diff options
author | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2009-09-21 05:09:22 -0400 |
---|---|---|
committer | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2009-09-21 05:09:22 -0400 |
commit | 7cce2f4cb7f5f641f78c8e3eea4e7b1b96cb71c0 (patch) | |
tree | b064d077928cf224660ab1e1841cdab2c9fd8b08 /fs | |
parent | e055f7e873d900925c222cf2d1ec955af4a9ca90 (diff) | |
parent | ebc79c4f8da0f92efa968e0328f32334a2ce80cf (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into linux-next
Conflicts:
fs/ubifs/super.c
Merge the upstream tree in order to resolve a conflict with the
per-bdi writeback changes from the linux-2.6-block tree.
Diffstat (limited to 'fs')
292 files changed, 9459 insertions, 6600 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 332b5ff02fec..f7003cfac63d 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -76,7 +76,7 @@ static const match_table_t tokens = { | |||
76 | * Return 0 upon success, -ERRNO upon failure. | 76 | * Return 0 upon success, -ERRNO upon failure. |
77 | */ | 77 | */ |
78 | 78 | ||
79 | static int v9fs_parse_options(struct v9fs_session_info *v9ses) | 79 | static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) |
80 | { | 80 | { |
81 | char *options; | 81 | char *options; |
82 | substring_t args[MAX_OPT_ARGS]; | 82 | substring_t args[MAX_OPT_ARGS]; |
@@ -90,10 +90,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses) | |||
90 | v9ses->debug = 0; | 90 | v9ses->debug = 0; |
91 | v9ses->cache = 0; | 91 | v9ses->cache = 0; |
92 | 92 | ||
93 | if (!v9ses->options) | 93 | if (!opts) |
94 | return 0; | 94 | return 0; |
95 | 95 | ||
96 | options = kstrdup(v9ses->options, GFP_KERNEL); | 96 | options = kstrdup(opts, GFP_KERNEL); |
97 | if (!options) { | 97 | if (!options) { |
98 | P9_DPRINTK(P9_DEBUG_ERROR, | 98 | P9_DPRINTK(P9_DEBUG_ERROR, |
99 | "failed to allocate copy of option string\n"); | 99 | "failed to allocate copy of option string\n"); |
@@ -206,24 +206,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
206 | v9ses->uid = ~0; | 206 | v9ses->uid = ~0; |
207 | v9ses->dfltuid = V9FS_DEFUID; | 207 | v9ses->dfltuid = V9FS_DEFUID; |
208 | v9ses->dfltgid = V9FS_DEFGID; | 208 | v9ses->dfltgid = V9FS_DEFGID; |
209 | if (data) { | ||
210 | v9ses->options = kstrdup(data, GFP_KERNEL); | ||
211 | if (!v9ses->options) { | ||
212 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
213 | "failed to allocate copy of option string\n"); | ||
214 | retval = -ENOMEM; | ||
215 | goto error; | ||
216 | } | ||
217 | } | ||
218 | 209 | ||
219 | rc = v9fs_parse_options(v9ses); | 210 | rc = v9fs_parse_options(v9ses, data); |
220 | if (rc < 0) { | 211 | if (rc < 0) { |
221 | retval = rc; | 212 | retval = rc; |
222 | goto error; | 213 | goto error; |
223 | } | 214 | } |
224 | 215 | ||
225 | v9ses->clnt = p9_client_create(dev_name, v9ses->options); | 216 | v9ses->clnt = p9_client_create(dev_name, data); |
226 | |||
227 | if (IS_ERR(v9ses->clnt)) { | 217 | if (IS_ERR(v9ses->clnt)) { |
228 | retval = PTR_ERR(v9ses->clnt); | 218 | retval = PTR_ERR(v9ses->clnt); |
229 | v9ses->clnt = NULL; | 219 | v9ses->clnt = NULL; |
@@ -280,7 +270,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) | |||
280 | 270 | ||
281 | __putname(v9ses->uname); | 271 | __putname(v9ses->uname); |
282 | __putname(v9ses->aname); | 272 | __putname(v9ses->aname); |
283 | kfree(v9ses->options); | ||
284 | } | 273 | } |
285 | 274 | ||
286 | /** | 275 | /** |
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index a7d567192998..38762bf102a9 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h | |||
@@ -85,7 +85,6 @@ struct v9fs_session_info { | |||
85 | unsigned int afid; | 85 | unsigned int afid; |
86 | unsigned int cache; | 86 | unsigned int cache; |
87 | 87 | ||
88 | char *options; /* copy of mount options */ | ||
89 | char *uname; /* user name to mount as */ | 88 | char *uname; /* user name to mount as */ |
90 | char *aname; /* name of remote hierarchy being mounted */ | 89 | char *aname; /* name of remote hierarchy being mounted */ |
91 | unsigned int maxdata; /* max data for client interface */ | 90 | unsigned int maxdata; /* max data for client interface */ |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 81f8bbf12f9f..06a223d50a81 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -171,7 +171,6 @@ int v9fs_uflags2omode(int uflags, int extended) | |||
171 | 171 | ||
172 | /** | 172 | /** |
173 | * v9fs_blank_wstat - helper function to setup a 9P stat structure | 173 | * v9fs_blank_wstat - helper function to setup a 9P stat structure |
174 | * @v9ses: 9P session info (for determining extended mode) | ||
175 | * @wstat: structure to initialize | 174 | * @wstat: structure to initialize |
176 | * | 175 | * |
177 | */ | 176 | */ |
@@ -207,65 +206,72 @@ v9fs_blank_wstat(struct p9_wstat *wstat) | |||
207 | 206 | ||
208 | struct inode *v9fs_get_inode(struct super_block *sb, int mode) | 207 | struct inode *v9fs_get_inode(struct super_block *sb, int mode) |
209 | { | 208 | { |
209 | int err; | ||
210 | struct inode *inode; | 210 | struct inode *inode; |
211 | struct v9fs_session_info *v9ses = sb->s_fs_info; | 211 | struct v9fs_session_info *v9ses = sb->s_fs_info; |
212 | 212 | ||
213 | P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); | 213 | P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); |
214 | 214 | ||
215 | inode = new_inode(sb); | 215 | inode = new_inode(sb); |
216 | if (inode) { | 216 | if (!inode) { |
217 | inode->i_mode = mode; | ||
218 | inode->i_uid = current_fsuid(); | ||
219 | inode->i_gid = current_fsgid(); | ||
220 | inode->i_blocks = 0; | ||
221 | inode->i_rdev = 0; | ||
222 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
223 | inode->i_mapping->a_ops = &v9fs_addr_operations; | ||
224 | |||
225 | switch (mode & S_IFMT) { | ||
226 | case S_IFIFO: | ||
227 | case S_IFBLK: | ||
228 | case S_IFCHR: | ||
229 | case S_IFSOCK: | ||
230 | if (!v9fs_extended(v9ses)) { | ||
231 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
232 | "special files without extended mode\n"); | ||
233 | return ERR_PTR(-EINVAL); | ||
234 | } | ||
235 | init_special_inode(inode, inode->i_mode, | ||
236 | inode->i_rdev); | ||
237 | break; | ||
238 | case S_IFREG: | ||
239 | inode->i_op = &v9fs_file_inode_operations; | ||
240 | inode->i_fop = &v9fs_file_operations; | ||
241 | break; | ||
242 | case S_IFLNK: | ||
243 | if (!v9fs_extended(v9ses)) { | ||
244 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
245 | "extended modes used w/o 9P2000.u\n"); | ||
246 | return ERR_PTR(-EINVAL); | ||
247 | } | ||
248 | inode->i_op = &v9fs_symlink_inode_operations; | ||
249 | break; | ||
250 | case S_IFDIR: | ||
251 | inc_nlink(inode); | ||
252 | if (v9fs_extended(v9ses)) | ||
253 | inode->i_op = &v9fs_dir_inode_operations_ext; | ||
254 | else | ||
255 | inode->i_op = &v9fs_dir_inode_operations; | ||
256 | inode->i_fop = &v9fs_dir_operations; | ||
257 | break; | ||
258 | default: | ||
259 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
260 | "BAD mode 0x%x S_IFMT 0x%x\n", | ||
261 | mode, mode & S_IFMT); | ||
262 | return ERR_PTR(-EINVAL); | ||
263 | } | ||
264 | } else { | ||
265 | P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); | 217 | P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); |
266 | return ERR_PTR(-ENOMEM); | 218 | return ERR_PTR(-ENOMEM); |
267 | } | 219 | } |
220 | |||
221 | inode->i_mode = mode; | ||
222 | inode->i_uid = current_fsuid(); | ||
223 | inode->i_gid = current_fsgid(); | ||
224 | inode->i_blocks = 0; | ||
225 | inode->i_rdev = 0; | ||
226 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
227 | inode->i_mapping->a_ops = &v9fs_addr_operations; | ||
228 | |||
229 | switch (mode & S_IFMT) { | ||
230 | case S_IFIFO: | ||
231 | case S_IFBLK: | ||
232 | case S_IFCHR: | ||
233 | case S_IFSOCK: | ||
234 | if (!v9fs_extended(v9ses)) { | ||
235 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
236 | "special files without extended mode\n"); | ||
237 | err = -EINVAL; | ||
238 | goto error; | ||
239 | } | ||
240 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | ||
241 | break; | ||
242 | case S_IFREG: | ||
243 | inode->i_op = &v9fs_file_inode_operations; | ||
244 | inode->i_fop = &v9fs_file_operations; | ||
245 | break; | ||
246 | case S_IFLNK: | ||
247 | if (!v9fs_extended(v9ses)) { | ||
248 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
249 | "extended modes used w/o 9P2000.u\n"); | ||
250 | err = -EINVAL; | ||
251 | goto error; | ||
252 | } | ||
253 | inode->i_op = &v9fs_symlink_inode_operations; | ||
254 | break; | ||
255 | case S_IFDIR: | ||
256 | inc_nlink(inode); | ||
257 | if (v9fs_extended(v9ses)) | ||
258 | inode->i_op = &v9fs_dir_inode_operations_ext; | ||
259 | else | ||
260 | inode->i_op = &v9fs_dir_inode_operations; | ||
261 | inode->i_fop = &v9fs_dir_operations; | ||
262 | break; | ||
263 | default: | ||
264 | P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", | ||
265 | mode, mode & S_IFMT); | ||
266 | err = -EINVAL; | ||
267 | goto error; | ||
268 | } | ||
269 | |||
268 | return inode; | 270 | return inode; |
271 | |||
272 | error: | ||
273 | iput(inode); | ||
274 | return ERR_PTR(err); | ||
269 | } | 275 | } |
270 | 276 | ||
271 | /* | 277 | /* |
@@ -338,30 +344,25 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, | |||
338 | 344 | ||
339 | ret = NULL; | 345 | ret = NULL; |
340 | st = p9_client_stat(fid); | 346 | st = p9_client_stat(fid); |
341 | if (IS_ERR(st)) { | 347 | if (IS_ERR(st)) |
342 | err = PTR_ERR(st); | 348 | return ERR_CAST(st); |
343 | st = NULL; | ||
344 | goto error; | ||
345 | } | ||
346 | 349 | ||
347 | umode = p9mode2unixmode(v9ses, st->mode); | 350 | umode = p9mode2unixmode(v9ses, st->mode); |
348 | ret = v9fs_get_inode(sb, umode); | 351 | ret = v9fs_get_inode(sb, umode); |
349 | if (IS_ERR(ret)) { | 352 | if (IS_ERR(ret)) { |
350 | err = PTR_ERR(ret); | 353 | err = PTR_ERR(ret); |
351 | ret = NULL; | ||
352 | goto error; | 354 | goto error; |
353 | } | 355 | } |
354 | 356 | ||
355 | v9fs_stat2inode(st, ret, sb); | 357 | v9fs_stat2inode(st, ret, sb); |
356 | ret->i_ino = v9fs_qid2ino(&st->qid); | 358 | ret->i_ino = v9fs_qid2ino(&st->qid); |
359 | p9stat_free(st); | ||
357 | kfree(st); | 360 | kfree(st); |
358 | return ret; | 361 | return ret; |
359 | 362 | ||
360 | error: | 363 | error: |
364 | p9stat_free(st); | ||
361 | kfree(st); | 365 | kfree(st); |
362 | if (ret) | ||
363 | iput(ret); | ||
364 | |||
365 | return ERR_PTR(err); | 366 | return ERR_PTR(err); |
366 | } | 367 | } |
367 | 368 | ||
@@ -403,9 +404,9 @@ v9fs_open_created(struct inode *inode, struct file *file) | |||
403 | * @v9ses: session information | 404 | * @v9ses: session information |
404 | * @dir: directory that dentry is being created in | 405 | * @dir: directory that dentry is being created in |
405 | * @dentry: dentry that is being created | 406 | * @dentry: dentry that is being created |
407 | * @extension: 9p2000.u extension string to support devices, etc. | ||
406 | * @perm: create permissions | 408 | * @perm: create permissions |
407 | * @mode: open mode | 409 | * @mode: open mode |
408 | * @extension: 9p2000.u extension string to support devices, etc. | ||
409 | * | 410 | * |
410 | */ | 411 | */ |
411 | static struct p9_fid * | 412 | static struct p9_fid * |
@@ -470,7 +471,10 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
470 | dentry->d_op = &v9fs_dentry_operations; | 471 | dentry->d_op = &v9fs_dentry_operations; |
471 | 472 | ||
472 | d_instantiate(dentry, inode); | 473 | d_instantiate(dentry, inode); |
473 | v9fs_fid_add(dentry, fid); | 474 | err = v9fs_fid_add(dentry, fid); |
475 | if (err < 0) | ||
476 | goto error; | ||
477 | |||
474 | return ofid; | 478 | return ofid; |
475 | 479 | ||
476 | error: | 480 | error: |
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 38d695d66a0b..8961f1a8f668 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -81,7 +81,7 @@ static int v9fs_set_super(struct super_block *s, void *data) | |||
81 | 81 | ||
82 | static void | 82 | static void |
83 | v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, | 83 | v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, |
84 | int flags) | 84 | int flags, void *data) |
85 | { | 85 | { |
86 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 86 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
87 | sb->s_blocksize_bits = fls(v9ses->maxdata - 1); | 87 | sb->s_blocksize_bits = fls(v9ses->maxdata - 1); |
@@ -91,6 +91,8 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, | |||
91 | 91 | ||
92 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | | 92 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | |
93 | MS_NOATIME; | 93 | MS_NOATIME; |
94 | |||
95 | save_mount_options(sb, data); | ||
94 | } | 96 | } |
95 | 97 | ||
96 | /** | 98 | /** |
@@ -113,14 +115,11 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
113 | struct v9fs_session_info *v9ses = NULL; | 115 | struct v9fs_session_info *v9ses = NULL; |
114 | struct p9_wstat *st = NULL; | 116 | struct p9_wstat *st = NULL; |
115 | int mode = S_IRWXUGO | S_ISVTX; | 117 | int mode = S_IRWXUGO | S_ISVTX; |
116 | uid_t uid = current_fsuid(); | ||
117 | gid_t gid = current_fsgid(); | ||
118 | struct p9_fid *fid; | 118 | struct p9_fid *fid; |
119 | int retval = 0; | 119 | int retval = 0; |
120 | 120 | ||
121 | P9_DPRINTK(P9_DEBUG_VFS, " \n"); | 121 | P9_DPRINTK(P9_DEBUG_VFS, " \n"); |
122 | 122 | ||
123 | st = NULL; | ||
124 | v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); | 123 | v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); |
125 | if (!v9ses) | 124 | if (!v9ses) |
126 | return -ENOMEM; | 125 | return -ENOMEM; |
@@ -142,7 +141,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
142 | retval = PTR_ERR(sb); | 141 | retval = PTR_ERR(sb); |
143 | goto free_stat; | 142 | goto free_stat; |
144 | } | 143 | } |
145 | v9fs_fill_super(sb, v9ses, flags); | 144 | v9fs_fill_super(sb, v9ses, flags, data); |
146 | 145 | ||
147 | inode = v9fs_get_inode(sb, S_IFDIR | mode); | 146 | inode = v9fs_get_inode(sb, S_IFDIR | mode); |
148 | if (IS_ERR(inode)) { | 147 | if (IS_ERR(inode)) { |
@@ -150,9 +149,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
150 | goto release_sb; | 149 | goto release_sb; |
151 | } | 150 | } |
152 | 151 | ||
153 | inode->i_uid = uid; | ||
154 | inode->i_gid = gid; | ||
155 | |||
156 | root = d_alloc_root(inode); | 152 | root = d_alloc_root(inode); |
157 | if (!root) { | 153 | if (!root) { |
158 | iput(inode); | 154 | iput(inode); |
@@ -173,10 +169,8 @@ P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); | |||
173 | simple_set_mnt(mnt, sb); | 169 | simple_set_mnt(mnt, sb); |
174 | return 0; | 170 | return 0; |
175 | 171 | ||
176 | release_sb: | ||
177 | deactivate_locked_super(sb); | ||
178 | |||
179 | free_stat: | 172 | free_stat: |
173 | p9stat_free(st); | ||
180 | kfree(st); | 174 | kfree(st); |
181 | 175 | ||
182 | clunk_fid: | 176 | clunk_fid: |
@@ -185,7 +179,12 @@ clunk_fid: | |||
185 | close_session: | 179 | close_session: |
186 | v9fs_session_close(v9ses); | 180 | v9fs_session_close(v9ses); |
187 | kfree(v9ses); | 181 | kfree(v9ses); |
182 | return retval; | ||
188 | 183 | ||
184 | release_sb: | ||
185 | p9stat_free(st); | ||
186 | kfree(st); | ||
187 | deactivate_locked_super(sb); | ||
189 | return retval; | 188 | return retval; |
190 | } | 189 | } |
191 | 190 | ||
@@ -207,24 +206,10 @@ static void v9fs_kill_super(struct super_block *s) | |||
207 | 206 | ||
208 | v9fs_session_close(v9ses); | 207 | v9fs_session_close(v9ses); |
209 | kfree(v9ses); | 208 | kfree(v9ses); |
209 | s->s_fs_info = NULL; | ||
210 | P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n"); | 210 | P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n"); |
211 | } | 211 | } |
212 | 212 | ||
213 | /** | ||
214 | * v9fs_show_options - Show mount options in /proc/mounts | ||
215 | * @m: seq_file to write to | ||
216 | * @mnt: mount descriptor | ||
217 | * | ||
218 | */ | ||
219 | |||
220 | static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
221 | { | ||
222 | struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info; | ||
223 | |||
224 | seq_printf(m, "%s", v9ses->options); | ||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | static void | 213 | static void |
229 | v9fs_umount_begin(struct super_block *sb) | 214 | v9fs_umount_begin(struct super_block *sb) |
230 | { | 215 | { |
@@ -237,7 +222,7 @@ v9fs_umount_begin(struct super_block *sb) | |||
237 | static const struct super_operations v9fs_super_ops = { | 222 | static const struct super_operations v9fs_super_ops = { |
238 | .statfs = simple_statfs, | 223 | .statfs = simple_statfs, |
239 | .clear_inode = v9fs_clear_inode, | 224 | .clear_inode = v9fs_clear_inode, |
240 | .show_options = v9fs_show_options, | 225 | .show_options = generic_show_options, |
241 | .umount_begin = v9fs_umount_begin, | 226 | .umount_begin = v9fs_umount_begin, |
242 | }; | 227 | }; |
243 | 228 | ||
diff --git a/fs/Kconfig b/fs/Kconfig index 0e7da7bb5d93..455aa207e67e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -43,6 +43,7 @@ source "fs/xfs/Kconfig" | |||
43 | source "fs/gfs2/Kconfig" | 43 | source "fs/gfs2/Kconfig" |
44 | source "fs/ocfs2/Kconfig" | 44 | source "fs/ocfs2/Kconfig" |
45 | source "fs/btrfs/Kconfig" | 45 | source "fs/btrfs/Kconfig" |
46 | source "fs/nilfs2/Kconfig" | ||
46 | 47 | ||
47 | endif # BLOCK | 48 | endif # BLOCK |
48 | 49 | ||
@@ -186,7 +187,6 @@ source "fs/romfs/Kconfig" | |||
186 | source "fs/sysv/Kconfig" | 187 | source "fs/sysv/Kconfig" |
187 | source "fs/ufs/Kconfig" | 188 | source "fs/ufs/Kconfig" |
188 | source "fs/exofs/Kconfig" | 189 | source "fs/exofs/Kconfig" |
189 | source "fs/nilfs2/Kconfig" | ||
190 | 190 | ||
191 | endif # MISC_FILESYSTEMS | 191 | endif # MISC_FILESYSTEMS |
192 | 192 | ||
diff --git a/fs/afs/file.c b/fs/afs/file.c index 0149dab365e7..681c2a7b013f 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
@@ -134,9 +134,16 @@ static int afs_readpage(struct file *file, struct page *page) | |||
134 | 134 | ||
135 | inode = page->mapping->host; | 135 | inode = page->mapping->host; |
136 | 136 | ||
137 | ASSERT(file != NULL); | 137 | if (file) { |
138 | key = file->private_data; | 138 | key = file->private_data; |
139 | ASSERT(key != NULL); | 139 | ASSERT(key != NULL); |
140 | } else { | ||
141 | key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell); | ||
142 | if (IS_ERR(key)) { | ||
143 | ret = PTR_ERR(key); | ||
144 | goto error_nokey; | ||
145 | } | ||
146 | } | ||
140 | 147 | ||
141 | _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); | 148 | _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); |
142 | 149 | ||
@@ -207,12 +214,17 @@ static int afs_readpage(struct file *file, struct page *page) | |||
207 | unlock_page(page); | 214 | unlock_page(page); |
208 | } | 215 | } |
209 | 216 | ||
217 | if (!file) | ||
218 | key_put(key); | ||
210 | _leave(" = 0"); | 219 | _leave(" = 0"); |
211 | return 0; | 220 | return 0; |
212 | 221 | ||
213 | error: | 222 | error: |
214 | SetPageError(page); | 223 | SetPageError(page); |
215 | unlock_page(page); | 224 | unlock_page(page); |
225 | if (!file) | ||
226 | key_put(key); | ||
227 | error_nokey: | ||
216 | _leave(" = %d", ret); | 228 | _leave(" = %d", ret); |
217 | return ret; | 229 | return ret; |
218 | } | 230 | } |
diff --git a/fs/afs/write.c b/fs/afs/write.c index c2e7a7ff0080..c63a3c8beb73 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -712,7 +712,6 @@ int afs_writeback_all(struct afs_vnode *vnode) | |||
712 | .bdi = mapping->backing_dev_info, | 712 | .bdi = mapping->backing_dev_info, |
713 | .sync_mode = WB_SYNC_ALL, | 713 | .sync_mode = WB_SYNC_ALL, |
714 | .nr_to_write = LONG_MAX, | 714 | .nr_to_write = LONG_MAX, |
715 | .for_writepages = 1, | ||
716 | .range_cyclic = 1, | 715 | .range_cyclic = 1, |
717 | }; | 716 | }; |
718 | int ret; | 717 | int ret; |
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index aa39ae83f019..3da18d453488 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -77,7 +77,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | |||
77 | } | 77 | } |
78 | 78 | ||
79 | /* Update the expiry counter if fs is busy */ | 79 | /* Update the expiry counter if fs is busy */ |
80 | if (!may_umount_tree(mnt)) { | 80 | if (!may_umount_tree(path.mnt)) { |
81 | struct autofs_info *ino = autofs4_dentry_ino(top); | 81 | struct autofs_info *ino = autofs4_dentry_ino(top); |
82 | ino->last_used = jiffies; | 82 | ino->last_used = jiffies; |
83 | goto done; | 83 | goto done; |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b7c1603cd4bd..7c1e65d54872 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -501,22 +501,22 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
501 | } | 501 | } |
502 | } | 502 | } |
503 | 503 | ||
504 | /* | 504 | if (last_bss > elf_bss) { |
505 | * Now fill out the bss section. First pad the last page up | 505 | /* |
506 | * to the page boundary, and then perform a mmap to make sure | 506 | * Now fill out the bss section. First pad the last page up |
507 | * that there are zero-mapped pages up to and including the | 507 | * to the page boundary, and then perform a mmap to make sure |
508 | * last bss page. | 508 | * that there are zero-mapped pages up to and including the |
509 | */ | 509 | * last bss page. |
510 | if (padzero(elf_bss)) { | 510 | */ |
511 | error = -EFAULT; | 511 | if (padzero(elf_bss)) { |
512 | goto out_close; | 512 | error = -EFAULT; |
513 | } | 513 | goto out_close; |
514 | } | ||
514 | 515 | ||
515 | /* What we have mapped so far */ | 516 | /* What we have mapped so far */ |
516 | elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); | 517 | elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); |
517 | 518 | ||
518 | /* Map the last of the bss segment */ | 519 | /* Map the last of the bss segment */ |
519 | if (last_bss > elf_bss) { | ||
520 | down_write(¤t->mm->mmap_sem); | 520 | down_write(¤t->mm->mmap_sem); |
521 | error = do_brk(elf_bss, last_bss - elf_bss); | 521 | error = do_brk(elf_bss, last_bss - elf_bss); |
522 | up_write(¤t->mm->mmap_sem); | 522 | up_write(¤t->mm->mmap_sem); |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 697f6b5f1313..e92f229e3c6e 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -828,15 +828,22 @@ static int load_flat_shared_library(int id, struct lib_info *libs) | |||
828 | if (IS_ERR(bprm.file)) | 828 | if (IS_ERR(bprm.file)) |
829 | return res; | 829 | return res; |
830 | 830 | ||
831 | bprm.cred = prepare_exec_creds(); | ||
832 | res = -ENOMEM; | ||
833 | if (!bprm.cred) | ||
834 | goto out; | ||
835 | |||
831 | res = prepare_binprm(&bprm); | 836 | res = prepare_binprm(&bprm); |
832 | 837 | ||
833 | if (res <= (unsigned long)-4096) | 838 | if (res <= (unsigned long)-4096) |
834 | res = load_flat_file(&bprm, libs, id, NULL); | 839 | res = load_flat_file(&bprm, libs, id, NULL); |
835 | if (bprm.file) { | 840 | |
836 | allow_write_access(bprm.file); | 841 | abort_creds(bprm.cred); |
837 | fput(bprm.file); | 842 | |
838 | bprm.file = NULL; | 843 | out: |
839 | } | 844 | allow_write_access(bprm.file); |
845 | fput(bprm.file); | ||
846 | |||
840 | return(res); | 847 | return(res); |
841 | } | 848 | } |
842 | 849 | ||
diff --git a/fs/block_dev.c b/fs/block_dev.c index 3a6d4fb2a329..71e7e03ac343 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -420,7 +420,6 @@ static void bdev_destroy_inode(struct inode *inode) | |||
420 | { | 420 | { |
421 | struct bdev_inode *bdi = BDEV_I(inode); | 421 | struct bdev_inode *bdi = BDEV_I(inode); |
422 | 422 | ||
423 | bdi->bdev.bd_inode_backing_dev_info = NULL; | ||
424 | kmem_cache_free(bdev_cachep, bdi); | 423 | kmem_cache_free(bdev_cachep, bdi); |
425 | } | 424 | } |
426 | 425 | ||
@@ -564,6 +563,16 @@ struct block_device *bdget(dev_t dev) | |||
564 | 563 | ||
565 | EXPORT_SYMBOL(bdget); | 564 | EXPORT_SYMBOL(bdget); |
566 | 565 | ||
566 | /** | ||
567 | * bdgrab -- Grab a reference to an already referenced block device | ||
568 | * @bdev: Block device to grab a reference to. | ||
569 | */ | ||
570 | struct block_device *bdgrab(struct block_device *bdev) | ||
571 | { | ||
572 | atomic_inc(&bdev->bd_inode->i_count); | ||
573 | return bdev; | ||
574 | } | ||
575 | |||
567 | long nr_blockdev_pages(void) | 576 | long nr_blockdev_pages(void) |
568 | { | 577 | { |
569 | struct block_device *bdev; | 578 | struct block_device *bdev; |
@@ -1395,6 +1404,33 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
1395 | } | 1404 | } |
1396 | 1405 | ||
1397 | /* | 1406 | /* |
1407 | * Write data to the block device. Only intended for the block device itself | ||
1408 | * and the raw driver which basically is a fake block device. | ||
1409 | * | ||
1410 | * Does not take i_mutex for the write and thus is not for general purpose | ||
1411 | * use. | ||
1412 | */ | ||
1413 | ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | ||
1414 | unsigned long nr_segs, loff_t pos) | ||
1415 | { | ||
1416 | struct file *file = iocb->ki_filp; | ||
1417 | ssize_t ret; | ||
1418 | |||
1419 | BUG_ON(iocb->ki_pos != pos); | ||
1420 | |||
1421 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); | ||
1422 | if (ret > 0 || ret == -EIOCBQUEUED) { | ||
1423 | ssize_t err; | ||
1424 | |||
1425 | err = generic_write_sync(file, pos, ret); | ||
1426 | if (err < 0 && ret > 0) | ||
1427 | ret = err; | ||
1428 | } | ||
1429 | return ret; | ||
1430 | } | ||
1431 | EXPORT_SYMBOL_GPL(blkdev_aio_write); | ||
1432 | |||
1433 | /* | ||
1398 | * Try to release a page associated with block device when the system | 1434 | * Try to release a page associated with block device when the system |
1399 | * is under memory pressure. | 1435 | * is under memory pressure. |
1400 | */ | 1436 | */ |
@@ -1426,7 +1462,7 @@ const struct file_operations def_blk_fops = { | |||
1426 | .read = do_sync_read, | 1462 | .read = do_sync_read, |
1427 | .write = do_sync_write, | 1463 | .write = do_sync_write, |
1428 | .aio_read = generic_file_aio_read, | 1464 | .aio_read = generic_file_aio_read, |
1429 | .aio_write = generic_file_aio_write_nolock, | 1465 | .aio_write = blkdev_aio_write, |
1430 | .mmap = generic_file_mmap, | 1466 | .mmap = generic_file_mmap, |
1431 | .fsync = block_fsync, | 1467 | .fsync = block_fsync, |
1432 | .unlocked_ioctl = block_ioctl, | 1468 | .unlocked_ioctl = block_ioctl, |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 6e4f6c50a120..019e8af449ab 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -424,11 +424,11 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
424 | * list | 424 | * list |
425 | */ | 425 | */ |
426 | if (worker->idle) { | 426 | if (worker->idle) { |
427 | spin_lock_irqsave(&worker->workers->lock, flags); | 427 | spin_lock(&worker->workers->lock); |
428 | worker->idle = 0; | 428 | worker->idle = 0; |
429 | list_move_tail(&worker->worker_list, | 429 | list_move_tail(&worker->worker_list, |
430 | &worker->workers->worker_list); | 430 | &worker->workers->worker_list); |
431 | spin_unlock_irqrestore(&worker->workers->lock, flags); | 431 | spin_unlock(&worker->workers->lock); |
432 | } | 432 | } |
433 | if (!worker->working) { | 433 | if (!worker->working) { |
434 | wake = 1; | 434 | wake = 1; |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 60a45f3a4e91..3fdcc0512d3a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -557,19 +557,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) | |||
557 | 557 | ||
558 | btrfs_disk_key_to_cpu(&k1, disk); | 558 | btrfs_disk_key_to_cpu(&k1, disk); |
559 | 559 | ||
560 | if (k1.objectid > k2->objectid) | 560 | return btrfs_comp_cpu_keys(&k1, k2); |
561 | return 1; | ||
562 | if (k1.objectid < k2->objectid) | ||
563 | return -1; | ||
564 | if (k1.type > k2->type) | ||
565 | return 1; | ||
566 | if (k1.type < k2->type) | ||
567 | return -1; | ||
568 | if (k1.offset > k2->offset) | ||
569 | return 1; | ||
570 | if (k1.offset < k2->offset) | ||
571 | return -1; | ||
572 | return 0; | ||
573 | } | 561 | } |
574 | 562 | ||
575 | /* | 563 | /* |
@@ -1052,9 +1040,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1052 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | 1040 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) |
1053 | return 0; | 1041 | return 0; |
1054 | 1042 | ||
1055 | if (btrfs_header_nritems(mid) > 2) | ||
1056 | return 0; | ||
1057 | |||
1058 | if (btrfs_header_nritems(mid) < 2) | 1043 | if (btrfs_header_nritems(mid) < 2) |
1059 | err_on_enospc = 1; | 1044 | err_on_enospc = 1; |
1060 | 1045 | ||
@@ -1701,6 +1686,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1701 | struct extent_buffer *b; | 1686 | struct extent_buffer *b; |
1702 | int slot; | 1687 | int slot; |
1703 | int ret; | 1688 | int ret; |
1689 | int err; | ||
1704 | int level; | 1690 | int level; |
1705 | int lowest_unlock = 1; | 1691 | int lowest_unlock = 1; |
1706 | u8 lowest_level = 0; | 1692 | u8 lowest_level = 0; |
@@ -1737,8 +1723,6 @@ again: | |||
1737 | p->locks[level] = 1; | 1723 | p->locks[level] = 1; |
1738 | 1724 | ||
1739 | if (cow) { | 1725 | if (cow) { |
1740 | int wret; | ||
1741 | |||
1742 | /* | 1726 | /* |
1743 | * if we don't really need to cow this block | 1727 | * if we don't really need to cow this block |
1744 | * then we don't want to set the path blocking, | 1728 | * then we don't want to set the path blocking, |
@@ -1749,12 +1733,12 @@ again: | |||
1749 | 1733 | ||
1750 | btrfs_set_path_blocking(p); | 1734 | btrfs_set_path_blocking(p); |
1751 | 1735 | ||
1752 | wret = btrfs_cow_block(trans, root, b, | 1736 | err = btrfs_cow_block(trans, root, b, |
1753 | p->nodes[level + 1], | 1737 | p->nodes[level + 1], |
1754 | p->slots[level + 1], &b); | 1738 | p->slots[level + 1], &b); |
1755 | if (wret) { | 1739 | if (err) { |
1756 | free_extent_buffer(b); | 1740 | free_extent_buffer(b); |
1757 | ret = wret; | 1741 | ret = err; |
1758 | goto done; | 1742 | goto done; |
1759 | } | 1743 | } |
1760 | } | 1744 | } |
@@ -1793,41 +1777,45 @@ cow_done: | |||
1793 | ret = bin_search(b, key, level, &slot); | 1777 | ret = bin_search(b, key, level, &slot); |
1794 | 1778 | ||
1795 | if (level != 0) { | 1779 | if (level != 0) { |
1796 | if (ret && slot > 0) | 1780 | int dec = 0; |
1781 | if (ret && slot > 0) { | ||
1782 | dec = 1; | ||
1797 | slot -= 1; | 1783 | slot -= 1; |
1784 | } | ||
1798 | p->slots[level] = slot; | 1785 | p->slots[level] = slot; |
1799 | ret = setup_nodes_for_search(trans, root, p, b, level, | 1786 | err = setup_nodes_for_search(trans, root, p, b, level, |
1800 | ins_len); | 1787 | ins_len); |
1801 | if (ret == -EAGAIN) | 1788 | if (err == -EAGAIN) |
1802 | goto again; | 1789 | goto again; |
1803 | else if (ret) | 1790 | if (err) { |
1791 | ret = err; | ||
1804 | goto done; | 1792 | goto done; |
1793 | } | ||
1805 | b = p->nodes[level]; | 1794 | b = p->nodes[level]; |
1806 | slot = p->slots[level]; | 1795 | slot = p->slots[level]; |
1807 | 1796 | ||
1808 | unlock_up(p, level, lowest_unlock); | 1797 | unlock_up(p, level, lowest_unlock); |
1809 | 1798 | ||
1810 | /* this is only true while dropping a snapshot */ | ||
1811 | if (level == lowest_level) { | 1799 | if (level == lowest_level) { |
1812 | ret = 0; | 1800 | if (dec) |
1801 | p->slots[level]++; | ||
1813 | goto done; | 1802 | goto done; |
1814 | } | 1803 | } |
1815 | 1804 | ||
1816 | ret = read_block_for_search(trans, root, p, | 1805 | err = read_block_for_search(trans, root, p, |
1817 | &b, level, slot, key); | 1806 | &b, level, slot, key); |
1818 | if (ret == -EAGAIN) | 1807 | if (err == -EAGAIN) |
1819 | goto again; | 1808 | goto again; |
1820 | 1809 | if (err) { | |
1821 | if (ret == -EIO) | 1810 | ret = err; |
1822 | goto done; | 1811 | goto done; |
1812 | } | ||
1823 | 1813 | ||
1824 | if (!p->skip_locking) { | 1814 | if (!p->skip_locking) { |
1825 | int lret; | ||
1826 | |||
1827 | btrfs_clear_path_blocking(p, NULL); | 1815 | btrfs_clear_path_blocking(p, NULL); |
1828 | lret = btrfs_try_spin_lock(b); | 1816 | err = btrfs_try_spin_lock(b); |
1829 | 1817 | ||
1830 | if (!lret) { | 1818 | if (!err) { |
1831 | btrfs_set_path_blocking(p); | 1819 | btrfs_set_path_blocking(p); |
1832 | btrfs_tree_lock(b); | 1820 | btrfs_tree_lock(b); |
1833 | btrfs_clear_path_blocking(p, b); | 1821 | btrfs_clear_path_blocking(p, b); |
@@ -1837,16 +1825,14 @@ cow_done: | |||
1837 | p->slots[level] = slot; | 1825 | p->slots[level] = slot; |
1838 | if (ins_len > 0 && | 1826 | if (ins_len > 0 && |
1839 | btrfs_leaf_free_space(root, b) < ins_len) { | 1827 | btrfs_leaf_free_space(root, b) < ins_len) { |
1840 | int sret; | ||
1841 | |||
1842 | btrfs_set_path_blocking(p); | 1828 | btrfs_set_path_blocking(p); |
1843 | sret = split_leaf(trans, root, key, | 1829 | err = split_leaf(trans, root, key, |
1844 | p, ins_len, ret == 0); | 1830 | p, ins_len, ret == 0); |
1845 | btrfs_clear_path_blocking(p, NULL); | 1831 | btrfs_clear_path_blocking(p, NULL); |
1846 | 1832 | ||
1847 | BUG_ON(sret > 0); | 1833 | BUG_ON(err > 0); |
1848 | if (sret) { | 1834 | if (err) { |
1849 | ret = sret; | 1835 | ret = err; |
1850 | goto done; | 1836 | goto done; |
1851 | } | 1837 | } |
1852 | } | 1838 | } |
@@ -3807,7 +3793,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3807 | } | 3793 | } |
3808 | 3794 | ||
3809 | /* delete the leaf if it is mostly empty */ | 3795 | /* delete the leaf if it is mostly empty */ |
3810 | if (used < BTRFS_LEAF_DATA_SIZE(root) / 2) { | 3796 | if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { |
3811 | /* push_leaf_left fixes the path. | 3797 | /* push_leaf_left fixes the path. |
3812 | * make sure the path still points to our leaf | 3798 | * make sure the path still points to our leaf |
3813 | * for possible call to del_ptr below | 3799 | * for possible call to del_ptr below |
@@ -4042,10 +4028,9 @@ out: | |||
4042 | * calling this function. | 4028 | * calling this function. |
4043 | */ | 4029 | */ |
4044 | int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, | 4030 | int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, |
4045 | struct btrfs_key *key, int lowest_level, | 4031 | struct btrfs_key *key, int level, |
4046 | int cache_only, u64 min_trans) | 4032 | int cache_only, u64 min_trans) |
4047 | { | 4033 | { |
4048 | int level = lowest_level; | ||
4049 | int slot; | 4034 | int slot; |
4050 | struct extent_buffer *c; | 4035 | struct extent_buffer *c; |
4051 | 4036 | ||
@@ -4058,11 +4043,40 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, | |||
4058 | c = path->nodes[level]; | 4043 | c = path->nodes[level]; |
4059 | next: | 4044 | next: |
4060 | if (slot >= btrfs_header_nritems(c)) { | 4045 | if (slot >= btrfs_header_nritems(c)) { |
4061 | level++; | 4046 | int ret; |
4062 | if (level == BTRFS_MAX_LEVEL) | 4047 | int orig_lowest; |
4048 | struct btrfs_key cur_key; | ||
4049 | if (level + 1 >= BTRFS_MAX_LEVEL || | ||
4050 | !path->nodes[level + 1]) | ||
4063 | return 1; | 4051 | return 1; |
4064 | continue; | 4052 | |
4053 | if (path->locks[level + 1]) { | ||
4054 | level++; | ||
4055 | continue; | ||
4056 | } | ||
4057 | |||
4058 | slot = btrfs_header_nritems(c) - 1; | ||
4059 | if (level == 0) | ||
4060 | btrfs_item_key_to_cpu(c, &cur_key, slot); | ||
4061 | else | ||
4062 | btrfs_node_key_to_cpu(c, &cur_key, slot); | ||
4063 | |||
4064 | orig_lowest = path->lowest_level; | ||
4065 | btrfs_release_path(root, path); | ||
4066 | path->lowest_level = level; | ||
4067 | ret = btrfs_search_slot(NULL, root, &cur_key, path, | ||
4068 | 0, 0); | ||
4069 | path->lowest_level = orig_lowest; | ||
4070 | if (ret < 0) | ||
4071 | return ret; | ||
4072 | |||
4073 | c = path->nodes[level]; | ||
4074 | slot = path->slots[level]; | ||
4075 | if (ret == 0) | ||
4076 | slot++; | ||
4077 | goto next; | ||
4065 | } | 4078 | } |
4079 | |||
4066 | if (level == 0) | 4080 | if (level == 0) |
4067 | btrfs_item_key_to_cpu(c, key, slot); | 4081 | btrfs_item_key_to_cpu(c, key, slot); |
4068 | else { | 4082 | else { |
@@ -4146,7 +4160,8 @@ again: | |||
4146 | * advance the path if there are now more items available. | 4160 | * advance the path if there are now more items available. |
4147 | */ | 4161 | */ |
4148 | if (nritems > 0 && path->slots[0] < nritems - 1) { | 4162 | if (nritems > 0 && path->slots[0] < nritems - 1) { |
4149 | path->slots[0]++; | 4163 | if (ret == 0) |
4164 | path->slots[0]++; | ||
4150 | ret = 0; | 4165 | ret = 0; |
4151 | goto done; | 4166 | goto done; |
4152 | } | 4167 | } |
@@ -4278,10 +4293,10 @@ int btrfs_previous_item(struct btrfs_root *root, | |||
4278 | path->slots[0]--; | 4293 | path->slots[0]--; |
4279 | 4294 | ||
4280 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 4295 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
4281 | if (found_key.type == type) | ||
4282 | return 0; | ||
4283 | if (found_key.objectid < min_objectid) | 4296 | if (found_key.objectid < min_objectid) |
4284 | break; | 4297 | break; |
4298 | if (found_key.type == type) | ||
4299 | return 0; | ||
4285 | if (found_key.objectid == min_objectid && | 4300 | if (found_key.objectid == min_objectid && |
4286 | found_key.type < type) | 4301 | found_key.type < type) |
4287 | break; | 4302 | break; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 98a873838717..837435ce84ca 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -481,7 +481,7 @@ struct btrfs_shared_data_ref { | |||
481 | 481 | ||
482 | struct btrfs_extent_inline_ref { | 482 | struct btrfs_extent_inline_ref { |
483 | u8 type; | 483 | u8 type; |
484 | u64 offset; | 484 | __le64 offset; |
485 | } __attribute__ ((__packed__)); | 485 | } __attribute__ ((__packed__)); |
486 | 486 | ||
487 | /* old style backrefs item */ | 487 | /* old style backrefs item */ |
@@ -689,6 +689,7 @@ struct btrfs_space_info { | |||
689 | struct list_head block_groups; | 689 | struct list_head block_groups; |
690 | spinlock_t lock; | 690 | spinlock_t lock; |
691 | struct rw_semaphore groups_sem; | 691 | struct rw_semaphore groups_sem; |
692 | atomic_t caching_threads; | ||
692 | }; | 693 | }; |
693 | 694 | ||
694 | /* | 695 | /* |
@@ -707,6 +708,9 @@ struct btrfs_free_cluster { | |||
707 | /* first extent starting offset */ | 708 | /* first extent starting offset */ |
708 | u64 window_start; | 709 | u64 window_start; |
709 | 710 | ||
711 | /* if this cluster simply points at a bitmap in the block group */ | ||
712 | bool points_to_bitmap; | ||
713 | |||
710 | struct btrfs_block_group_cache *block_group; | 714 | struct btrfs_block_group_cache *block_group; |
711 | /* | 715 | /* |
712 | * when a cluster is allocated from a block group, we put the | 716 | * when a cluster is allocated from a block group, we put the |
@@ -716,24 +720,37 @@ struct btrfs_free_cluster { | |||
716 | struct list_head block_group_list; | 720 | struct list_head block_group_list; |
717 | }; | 721 | }; |
718 | 722 | ||
723 | enum btrfs_caching_type { | ||
724 | BTRFS_CACHE_NO = 0, | ||
725 | BTRFS_CACHE_STARTED = 1, | ||
726 | BTRFS_CACHE_FINISHED = 2, | ||
727 | }; | ||
728 | |||
719 | struct btrfs_block_group_cache { | 729 | struct btrfs_block_group_cache { |
720 | struct btrfs_key key; | 730 | struct btrfs_key key; |
721 | struct btrfs_block_group_item item; | 731 | struct btrfs_block_group_item item; |
732 | struct btrfs_fs_info *fs_info; | ||
722 | spinlock_t lock; | 733 | spinlock_t lock; |
723 | struct mutex cache_mutex; | ||
724 | u64 pinned; | 734 | u64 pinned; |
725 | u64 reserved; | 735 | u64 reserved; |
726 | u64 flags; | 736 | u64 flags; |
727 | int cached; | 737 | u64 sectorsize; |
738 | int extents_thresh; | ||
739 | int free_extents; | ||
740 | int total_bitmaps; | ||
728 | int ro; | 741 | int ro; |
729 | int dirty; | 742 | int dirty; |
730 | 743 | ||
744 | /* cache tracking stuff */ | ||
745 | wait_queue_head_t caching_q; | ||
746 | int cached; | ||
747 | |||
731 | struct btrfs_space_info *space_info; | 748 | struct btrfs_space_info *space_info; |
732 | 749 | ||
733 | /* free space cache stuff */ | 750 | /* free space cache stuff */ |
734 | spinlock_t tree_lock; | 751 | spinlock_t tree_lock; |
735 | struct rb_root free_space_bytes; | ||
736 | struct rb_root free_space_offset; | 752 | struct rb_root free_space_offset; |
753 | u64 free_space; | ||
737 | 754 | ||
738 | /* block group cache stuff */ | 755 | /* block group cache stuff */ |
739 | struct rb_node cache_node; | 756 | struct rb_node cache_node; |
@@ -808,6 +825,7 @@ struct btrfs_fs_info { | |||
808 | struct mutex drop_mutex; | 825 | struct mutex drop_mutex; |
809 | struct mutex volume_mutex; | 826 | struct mutex volume_mutex; |
810 | struct mutex tree_reloc_mutex; | 827 | struct mutex tree_reloc_mutex; |
828 | struct rw_semaphore extent_commit_sem; | ||
811 | 829 | ||
812 | /* | 830 | /* |
813 | * this protects the ordered operations list only while we are | 831 | * this protects the ordered operations list only while we are |
@@ -1988,6 +2006,7 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | |||
1988 | u64 bytes); | 2006 | u64 bytes); |
1989 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | 2007 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, |
1990 | u64 bytes); | 2008 | u64 bytes); |
2009 | void btrfs_free_pinned_extents(struct btrfs_fs_info *info); | ||
1991 | /* ctree.c */ | 2010 | /* ctree.c */ |
1992 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2011 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
1993 | int level, int *slot); | 2012 | int level, int *slot); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d28d29c95f7c..8b8192790011 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1352,6 +1352,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1352 | { | 1352 | { |
1353 | int err; | 1353 | int err; |
1354 | 1354 | ||
1355 | bdi->name = "btrfs"; | ||
1355 | bdi->capabilities = BDI_CAP_MAP_COPY; | 1356 | bdi->capabilities = BDI_CAP_MAP_COPY; |
1356 | err = bdi_init(bdi); | 1357 | err = bdi_init(bdi); |
1357 | if (err) | 1358 | if (err) |
@@ -1599,6 +1600,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1599 | 1600 | ||
1600 | sb->s_blocksize = 4096; | 1601 | sb->s_blocksize = 4096; |
1601 | sb->s_blocksize_bits = blksize_bits(4096); | 1602 | sb->s_blocksize_bits = blksize_bits(4096); |
1603 | sb->s_bdi = &fs_info->bdi; | ||
1602 | 1604 | ||
1603 | /* | 1605 | /* |
1604 | * we set the i_size on the btree inode to the max possible int. | 1606 | * we set the i_size on the btree inode to the max possible int. |
@@ -1639,6 +1641,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1639 | mutex_init(&fs_info->cleaner_mutex); | 1641 | mutex_init(&fs_info->cleaner_mutex); |
1640 | mutex_init(&fs_info->volume_mutex); | 1642 | mutex_init(&fs_info->volume_mutex); |
1641 | mutex_init(&fs_info->tree_reloc_mutex); | 1643 | mutex_init(&fs_info->tree_reloc_mutex); |
1644 | init_rwsem(&fs_info->extent_commit_sem); | ||
1642 | 1645 | ||
1643 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | 1646 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); |
1644 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | 1647 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); |
@@ -1799,6 +1802,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1799 | btrfs_super_chunk_root(disk_super), | 1802 | btrfs_super_chunk_root(disk_super), |
1800 | blocksize, generation); | 1803 | blocksize, generation); |
1801 | BUG_ON(!chunk_root->node); | 1804 | BUG_ON(!chunk_root->node); |
1805 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { | ||
1806 | printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", | ||
1807 | sb->s_id); | ||
1808 | goto fail_chunk_root; | ||
1809 | } | ||
1802 | btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); | 1810 | btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); |
1803 | chunk_root->commit_root = btrfs_root_node(chunk_root); | 1811 | chunk_root->commit_root = btrfs_root_node(chunk_root); |
1804 | 1812 | ||
@@ -1826,6 +1834,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1826 | blocksize, generation); | 1834 | blocksize, generation); |
1827 | if (!tree_root->node) | 1835 | if (!tree_root->node) |
1828 | goto fail_chunk_root; | 1836 | goto fail_chunk_root; |
1837 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { | ||
1838 | printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", | ||
1839 | sb->s_id); | ||
1840 | goto fail_tree_root; | ||
1841 | } | ||
1829 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); | 1842 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); |
1830 | tree_root->commit_root = btrfs_root_node(tree_root); | 1843 | tree_root->commit_root = btrfs_root_node(tree_root); |
1831 | 1844 | ||
@@ -2322,6 +2335,9 @@ int close_ctree(struct btrfs_root *root) | |||
2322 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2335 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2323 | } | 2336 | } |
2324 | 2337 | ||
2338 | fs_info->closing = 2; | ||
2339 | smp_mb(); | ||
2340 | |||
2325 | if (fs_info->delalloc_bytes) { | 2341 | if (fs_info->delalloc_bytes) { |
2326 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | 2342 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", |
2327 | (unsigned long long)fs_info->delalloc_bytes); | 2343 | (unsigned long long)fs_info->delalloc_bytes); |
@@ -2343,6 +2359,7 @@ int close_ctree(struct btrfs_root *root) | |||
2343 | free_extent_buffer(root->fs_info->csum_root->commit_root); | 2359 | free_extent_buffer(root->fs_info->csum_root->commit_root); |
2344 | 2360 | ||
2345 | btrfs_free_block_groups(root->fs_info); | 2361 | btrfs_free_block_groups(root->fs_info); |
2362 | btrfs_free_pinned_extents(root->fs_info); | ||
2346 | 2363 | ||
2347 | del_fs_roots(fs_info); | 2364 | del_fs_roots(fs_info); |
2348 | 2365 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a5aca3997d42..535f85ba104f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/blkdev.h> | 21 | #include <linux/blkdev.h> |
22 | #include <linux/sort.h> | 22 | #include <linux/sort.h> |
23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
24 | #include <linux/kthread.h> | ||
24 | #include "compat.h" | 25 | #include "compat.h" |
25 | #include "hash.h" | 26 | #include "hash.h" |
26 | #include "ctree.h" | 27 | #include "ctree.h" |
@@ -61,6 +62,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
61 | struct btrfs_root *extent_root, u64 alloc_bytes, | 62 | struct btrfs_root *extent_root, u64 alloc_bytes, |
62 | u64 flags, int force); | 63 | u64 flags, int force); |
63 | 64 | ||
65 | static noinline int | ||
66 | block_group_cache_done(struct btrfs_block_group_cache *cache) | ||
67 | { | ||
68 | smp_mb(); | ||
69 | return cache->cached == BTRFS_CACHE_FINISHED; | ||
70 | } | ||
71 | |||
64 | static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) | 72 | static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) |
65 | { | 73 | { |
66 | return (cache->flags & bits) == bits; | 74 | return (cache->flags & bits) == bits; |
@@ -146,20 +154,70 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, | |||
146 | } | 154 | } |
147 | 155 | ||
148 | /* | 156 | /* |
157 | * We always set EXTENT_LOCKED for the super mirror extents so we don't | ||
158 | * overwrite them, so those bits need to be unset. Also, if we are unmounting | ||
159 | * with pinned extents still sitting there because we had a block group caching, | ||
160 | * we need to clear those now, since we are done. | ||
161 | */ | ||
162 | void btrfs_free_pinned_extents(struct btrfs_fs_info *info) | ||
163 | { | ||
164 | u64 start, end, last = 0; | ||
165 | int ret; | ||
166 | |||
167 | while (1) { | ||
168 | ret = find_first_extent_bit(&info->pinned_extents, last, | ||
169 | &start, &end, | ||
170 | EXTENT_LOCKED|EXTENT_DIRTY); | ||
171 | if (ret) | ||
172 | break; | ||
173 | |||
174 | clear_extent_bits(&info->pinned_extents, start, end, | ||
175 | EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); | ||
176 | last = end+1; | ||
177 | } | ||
178 | } | ||
179 | |||
180 | static int remove_sb_from_cache(struct btrfs_root *root, | ||
181 | struct btrfs_block_group_cache *cache) | ||
182 | { | ||
183 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
184 | u64 bytenr; | ||
185 | u64 *logical; | ||
186 | int stripe_len; | ||
187 | int i, nr, ret; | ||
188 | |||
189 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | ||
190 | bytenr = btrfs_sb_offset(i); | ||
191 | ret = btrfs_rmap_block(&root->fs_info->mapping_tree, | ||
192 | cache->key.objectid, bytenr, | ||
193 | 0, &logical, &nr, &stripe_len); | ||
194 | BUG_ON(ret); | ||
195 | while (nr--) { | ||
196 | try_lock_extent(&fs_info->pinned_extents, | ||
197 | logical[nr], | ||
198 | logical[nr] + stripe_len - 1, GFP_NOFS); | ||
199 | } | ||
200 | kfree(logical); | ||
201 | } | ||
202 | |||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | /* | ||
149 | * this is only called by cache_block_group, since we could have freed extents | 207 | * this is only called by cache_block_group, since we could have freed extents |
150 | * we need to check the pinned_extents for any extents that can't be used yet | 208 | * we need to check the pinned_extents for any extents that can't be used yet |
151 | * since their free space will be released as soon as the transaction commits. | 209 | * since their free space will be released as soon as the transaction commits. |
152 | */ | 210 | */ |
153 | static int add_new_free_space(struct btrfs_block_group_cache *block_group, | 211 | static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, |
154 | struct btrfs_fs_info *info, u64 start, u64 end) | 212 | struct btrfs_fs_info *info, u64 start, u64 end) |
155 | { | 213 | { |
156 | u64 extent_start, extent_end, size; | 214 | u64 extent_start, extent_end, size, total_added = 0; |
157 | int ret; | 215 | int ret; |
158 | 216 | ||
159 | while (start < end) { | 217 | while (start < end) { |
160 | ret = find_first_extent_bit(&info->pinned_extents, start, | 218 | ret = find_first_extent_bit(&info->pinned_extents, start, |
161 | &extent_start, &extent_end, | 219 | &extent_start, &extent_end, |
162 | EXTENT_DIRTY); | 220 | EXTENT_DIRTY|EXTENT_LOCKED); |
163 | if (ret) | 221 | if (ret) |
164 | break; | 222 | break; |
165 | 223 | ||
@@ -167,6 +225,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
167 | start = extent_end + 1; | 225 | start = extent_end + 1; |
168 | } else if (extent_start > start && extent_start < end) { | 226 | } else if (extent_start > start && extent_start < end) { |
169 | size = extent_start - start; | 227 | size = extent_start - start; |
228 | total_added += size; | ||
170 | ret = btrfs_add_free_space(block_group, start, | 229 | ret = btrfs_add_free_space(block_group, start, |
171 | size); | 230 | size); |
172 | BUG_ON(ret); | 231 | BUG_ON(ret); |
@@ -178,84 +237,93 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
178 | 237 | ||
179 | if (start < end) { | 238 | if (start < end) { |
180 | size = end - start; | 239 | size = end - start; |
240 | total_added += size; | ||
181 | ret = btrfs_add_free_space(block_group, start, size); | 241 | ret = btrfs_add_free_space(block_group, start, size); |
182 | BUG_ON(ret); | 242 | BUG_ON(ret); |
183 | } | 243 | } |
184 | 244 | ||
185 | return 0; | 245 | return total_added; |
186 | } | ||
187 | |||
188 | static int remove_sb_from_cache(struct btrfs_root *root, | ||
189 | struct btrfs_block_group_cache *cache) | ||
190 | { | ||
191 | u64 bytenr; | ||
192 | u64 *logical; | ||
193 | int stripe_len; | ||
194 | int i, nr, ret; | ||
195 | |||
196 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | ||
197 | bytenr = btrfs_sb_offset(i); | ||
198 | ret = btrfs_rmap_block(&root->fs_info->mapping_tree, | ||
199 | cache->key.objectid, bytenr, 0, | ||
200 | &logical, &nr, &stripe_len); | ||
201 | BUG_ON(ret); | ||
202 | while (nr--) { | ||
203 | btrfs_remove_free_space(cache, logical[nr], | ||
204 | stripe_len); | ||
205 | } | ||
206 | kfree(logical); | ||
207 | } | ||
208 | return 0; | ||
209 | } | 246 | } |
210 | 247 | ||
211 | static int cache_block_group(struct btrfs_root *root, | 248 | static int caching_kthread(void *data) |
212 | struct btrfs_block_group_cache *block_group) | ||
213 | { | 249 | { |
250 | struct btrfs_block_group_cache *block_group = data; | ||
251 | struct btrfs_fs_info *fs_info = block_group->fs_info; | ||
252 | u64 last = 0; | ||
214 | struct btrfs_path *path; | 253 | struct btrfs_path *path; |
215 | int ret = 0; | 254 | int ret = 0; |
216 | struct btrfs_key key; | 255 | struct btrfs_key key; |
217 | struct extent_buffer *leaf; | 256 | struct extent_buffer *leaf; |
218 | int slot; | 257 | int slot; |
219 | u64 last; | 258 | u64 total_found = 0; |
220 | |||
221 | if (!block_group) | ||
222 | return 0; | ||
223 | |||
224 | root = root->fs_info->extent_root; | ||
225 | 259 | ||
226 | if (block_group->cached) | 260 | BUG_ON(!fs_info); |
227 | return 0; | ||
228 | 261 | ||
229 | path = btrfs_alloc_path(); | 262 | path = btrfs_alloc_path(); |
230 | if (!path) | 263 | if (!path) |
231 | return -ENOMEM; | 264 | return -ENOMEM; |
232 | 265 | ||
233 | path->reada = 2; | 266 | atomic_inc(&block_group->space_info->caching_threads); |
267 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | ||
234 | /* | 268 | /* |
235 | * we get into deadlocks with paths held by callers of this function. | 269 | * We don't want to deadlock with somebody trying to allocate a new |
236 | * since the alloc_mutex is protecting things right now, just | 270 | * extent for the extent root while also trying to search the extent |
237 | * skip the locking here | 271 | * root to add free space. So we skip locking and search the commit |
272 | * root, since its read-only | ||
238 | */ | 273 | */ |
239 | path->skip_locking = 1; | 274 | path->skip_locking = 1; |
240 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 275 | path->search_commit_root = 1; |
276 | path->reada = 2; | ||
277 | |||
241 | key.objectid = last; | 278 | key.objectid = last; |
242 | key.offset = 0; | 279 | key.offset = 0; |
243 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 280 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); |
244 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 281 | again: |
282 | /* need to make sure the commit_root doesn't disappear */ | ||
283 | down_read(&fs_info->extent_commit_sem); | ||
284 | |||
285 | ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); | ||
245 | if (ret < 0) | 286 | if (ret < 0) |
246 | goto err; | 287 | goto err; |
247 | 288 | ||
248 | while (1) { | 289 | while (1) { |
290 | smp_mb(); | ||
291 | if (block_group->fs_info->closing > 1) { | ||
292 | last = (u64)-1; | ||
293 | break; | ||
294 | } | ||
295 | |||
249 | leaf = path->nodes[0]; | 296 | leaf = path->nodes[0]; |
250 | slot = path->slots[0]; | 297 | slot = path->slots[0]; |
251 | if (slot >= btrfs_header_nritems(leaf)) { | 298 | if (slot >= btrfs_header_nritems(leaf)) { |
252 | ret = btrfs_next_leaf(root, path); | 299 | ret = btrfs_next_leaf(fs_info->extent_root, path); |
253 | if (ret < 0) | 300 | if (ret < 0) |
254 | goto err; | 301 | goto err; |
255 | if (ret == 0) | 302 | else if (ret) |
256 | continue; | ||
257 | else | ||
258 | break; | 303 | break; |
304 | |||
305 | if (need_resched() || | ||
306 | btrfs_transaction_in_commit(fs_info)) { | ||
307 | leaf = path->nodes[0]; | ||
308 | |||
309 | /* this shouldn't happen, but if the | ||
310 | * leaf is empty just move on. | ||
311 | */ | ||
312 | if (btrfs_header_nritems(leaf) == 0) | ||
313 | break; | ||
314 | /* | ||
315 | * we need to copy the key out so that | ||
316 | * we are sure the next search advances | ||
317 | * us forward in the btree. | ||
318 | */ | ||
319 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
320 | btrfs_release_path(fs_info->extent_root, path); | ||
321 | up_read(&fs_info->extent_commit_sem); | ||
322 | schedule_timeout(1); | ||
323 | goto again; | ||
324 | } | ||
325 | |||
326 | continue; | ||
259 | } | 327 | } |
260 | btrfs_item_key_to_cpu(leaf, &key, slot); | 328 | btrfs_item_key_to_cpu(leaf, &key, slot); |
261 | if (key.objectid < block_group->key.objectid) | 329 | if (key.objectid < block_group->key.objectid) |
@@ -266,24 +334,59 @@ static int cache_block_group(struct btrfs_root *root, | |||
266 | break; | 334 | break; |
267 | 335 | ||
268 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { | 336 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { |
269 | add_new_free_space(block_group, root->fs_info, last, | 337 | total_found += add_new_free_space(block_group, |
270 | key.objectid); | 338 | fs_info, last, |
271 | 339 | key.objectid); | |
272 | last = key.objectid + key.offset; | 340 | last = key.objectid + key.offset; |
273 | } | 341 | } |
342 | |||
343 | if (total_found > (1024 * 1024 * 2)) { | ||
344 | total_found = 0; | ||
345 | wake_up(&block_group->caching_q); | ||
346 | } | ||
274 | next: | 347 | next: |
275 | path->slots[0]++; | 348 | path->slots[0]++; |
276 | } | 349 | } |
350 | ret = 0; | ||
277 | 351 | ||
278 | add_new_free_space(block_group, root->fs_info, last, | 352 | total_found += add_new_free_space(block_group, fs_info, last, |
279 | block_group->key.objectid + | 353 | block_group->key.objectid + |
280 | block_group->key.offset); | 354 | block_group->key.offset); |
355 | |||
356 | spin_lock(&block_group->lock); | ||
357 | block_group->cached = BTRFS_CACHE_FINISHED; | ||
358 | spin_unlock(&block_group->lock); | ||
281 | 359 | ||
282 | block_group->cached = 1; | ||
283 | remove_sb_from_cache(root, block_group); | ||
284 | ret = 0; | ||
285 | err: | 360 | err: |
286 | btrfs_free_path(path); | 361 | btrfs_free_path(path); |
362 | up_read(&fs_info->extent_commit_sem); | ||
363 | atomic_dec(&block_group->space_info->caching_threads); | ||
364 | wake_up(&block_group->caching_q); | ||
365 | |||
366 | return 0; | ||
367 | } | ||
368 | |||
369 | static int cache_block_group(struct btrfs_block_group_cache *cache) | ||
370 | { | ||
371 | struct task_struct *tsk; | ||
372 | int ret = 0; | ||
373 | |||
374 | spin_lock(&cache->lock); | ||
375 | if (cache->cached != BTRFS_CACHE_NO) { | ||
376 | spin_unlock(&cache->lock); | ||
377 | return ret; | ||
378 | } | ||
379 | cache->cached = BTRFS_CACHE_STARTED; | ||
380 | spin_unlock(&cache->lock); | ||
381 | |||
382 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | ||
383 | cache->key.objectid); | ||
384 | if (IS_ERR(tsk)) { | ||
385 | ret = PTR_ERR(tsk); | ||
386 | printk(KERN_ERR "error running thread %d\n", ret); | ||
387 | BUG(); | ||
388 | } | ||
389 | |||
287 | return ret; | 390 | return ret; |
288 | } | 391 | } |
289 | 392 | ||
@@ -1408,7 +1511,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1408 | static void btrfs_issue_discard(struct block_device *bdev, | 1511 | static void btrfs_issue_discard(struct block_device *bdev, |
1409 | u64 start, u64 len) | 1512 | u64 start, u64 len) |
1410 | { | 1513 | { |
1411 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); | 1514 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, |
1515 | DISCARD_FL_BARRIER); | ||
1412 | } | 1516 | } |
1413 | #endif | 1517 | #endif |
1414 | 1518 | ||
@@ -2387,13 +2491,29 @@ fail: | |||
2387 | 2491 | ||
2388 | } | 2492 | } |
2389 | 2493 | ||
2494 | static struct btrfs_block_group_cache * | ||
2495 | next_block_group(struct btrfs_root *root, | ||
2496 | struct btrfs_block_group_cache *cache) | ||
2497 | { | ||
2498 | struct rb_node *node; | ||
2499 | spin_lock(&root->fs_info->block_group_cache_lock); | ||
2500 | node = rb_next(&cache->cache_node); | ||
2501 | btrfs_put_block_group(cache); | ||
2502 | if (node) { | ||
2503 | cache = rb_entry(node, struct btrfs_block_group_cache, | ||
2504 | cache_node); | ||
2505 | atomic_inc(&cache->count); | ||
2506 | } else | ||
2507 | cache = NULL; | ||
2508 | spin_unlock(&root->fs_info->block_group_cache_lock); | ||
2509 | return cache; | ||
2510 | } | ||
2511 | |||
2390 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 2512 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
2391 | struct btrfs_root *root) | 2513 | struct btrfs_root *root) |
2392 | { | 2514 | { |
2393 | struct btrfs_block_group_cache *cache, *entry; | 2515 | struct btrfs_block_group_cache *cache; |
2394 | struct rb_node *n; | ||
2395 | int err = 0; | 2516 | int err = 0; |
2396 | int werr = 0; | ||
2397 | struct btrfs_path *path; | 2517 | struct btrfs_path *path; |
2398 | u64 last = 0; | 2518 | u64 last = 0; |
2399 | 2519 | ||
@@ -2402,39 +2522,35 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2402 | return -ENOMEM; | 2522 | return -ENOMEM; |
2403 | 2523 | ||
2404 | while (1) { | 2524 | while (1) { |
2405 | cache = NULL; | 2525 | if (last == 0) { |
2406 | spin_lock(&root->fs_info->block_group_cache_lock); | 2526 | err = btrfs_run_delayed_refs(trans, root, |
2407 | for (n = rb_first(&root->fs_info->block_group_cache_tree); | 2527 | (unsigned long)-1); |
2408 | n; n = rb_next(n)) { | 2528 | BUG_ON(err); |
2409 | entry = rb_entry(n, struct btrfs_block_group_cache, | ||
2410 | cache_node); | ||
2411 | if (entry->dirty) { | ||
2412 | cache = entry; | ||
2413 | break; | ||
2414 | } | ||
2415 | } | 2529 | } |
2416 | spin_unlock(&root->fs_info->block_group_cache_lock); | ||
2417 | 2530 | ||
2418 | if (!cache) | 2531 | cache = btrfs_lookup_first_block_group(root->fs_info, last); |
2419 | break; | 2532 | while (cache) { |
2533 | if (cache->dirty) | ||
2534 | break; | ||
2535 | cache = next_block_group(root, cache); | ||
2536 | } | ||
2537 | if (!cache) { | ||
2538 | if (last == 0) | ||
2539 | break; | ||
2540 | last = 0; | ||
2541 | continue; | ||
2542 | } | ||
2420 | 2543 | ||
2421 | cache->dirty = 0; | 2544 | cache->dirty = 0; |
2422 | last += cache->key.offset; | 2545 | last = cache->key.objectid + cache->key.offset; |
2423 | 2546 | ||
2424 | err = write_one_cache_group(trans, root, | 2547 | err = write_one_cache_group(trans, root, path, cache); |
2425 | path, cache); | 2548 | BUG_ON(err); |
2426 | /* | 2549 | btrfs_put_block_group(cache); |
2427 | * if we fail to write the cache group, we want | ||
2428 | * to keep it marked dirty in hopes that a later | ||
2429 | * write will work | ||
2430 | */ | ||
2431 | if (err) { | ||
2432 | werr = err; | ||
2433 | continue; | ||
2434 | } | ||
2435 | } | 2550 | } |
2551 | |||
2436 | btrfs_free_path(path); | 2552 | btrfs_free_path(path); |
2437 | return werr; | 2553 | return 0; |
2438 | } | 2554 | } |
2439 | 2555 | ||
2440 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) | 2556 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) |
@@ -2484,6 +2600,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2484 | found->force_alloc = 0; | 2600 | found->force_alloc = 0; |
2485 | *space_info = found; | 2601 | *space_info = found; |
2486 | list_add_rcu(&found->list, &info->space_info); | 2602 | list_add_rcu(&found->list, &info->space_info); |
2603 | atomic_set(&found->caching_threads, 0); | ||
2487 | return 0; | 2604 | return 0; |
2488 | } | 2605 | } |
2489 | 2606 | ||
@@ -2947,13 +3064,9 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2947 | struct btrfs_block_group_cache *cache; | 3064 | struct btrfs_block_group_cache *cache; |
2948 | struct btrfs_fs_info *fs_info = root->fs_info; | 3065 | struct btrfs_fs_info *fs_info = root->fs_info; |
2949 | 3066 | ||
2950 | if (pin) { | 3067 | if (pin) |
2951 | set_extent_dirty(&fs_info->pinned_extents, | 3068 | set_extent_dirty(&fs_info->pinned_extents, |
2952 | bytenr, bytenr + num - 1, GFP_NOFS); | 3069 | bytenr, bytenr + num - 1, GFP_NOFS); |
2953 | } else { | ||
2954 | clear_extent_dirty(&fs_info->pinned_extents, | ||
2955 | bytenr, bytenr + num - 1, GFP_NOFS); | ||
2956 | } | ||
2957 | 3070 | ||
2958 | while (num > 0) { | 3071 | while (num > 0) { |
2959 | cache = btrfs_lookup_block_group(fs_info, bytenr); | 3072 | cache = btrfs_lookup_block_group(fs_info, bytenr); |
@@ -2969,14 +3082,34 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2969 | spin_unlock(&cache->space_info->lock); | 3082 | spin_unlock(&cache->space_info->lock); |
2970 | fs_info->total_pinned += len; | 3083 | fs_info->total_pinned += len; |
2971 | } else { | 3084 | } else { |
3085 | int unpin = 0; | ||
3086 | |||
3087 | /* | ||
3088 | * in order to not race with the block group caching, we | ||
3089 | * only want to unpin the extent if we are cached. If | ||
3090 | * we aren't cached, we want to start async caching this | ||
3091 | * block group so we can free the extent the next time | ||
3092 | * around. | ||
3093 | */ | ||
2972 | spin_lock(&cache->space_info->lock); | 3094 | spin_lock(&cache->space_info->lock); |
2973 | spin_lock(&cache->lock); | 3095 | spin_lock(&cache->lock); |
2974 | cache->pinned -= len; | 3096 | unpin = (cache->cached == BTRFS_CACHE_FINISHED); |
2975 | cache->space_info->bytes_pinned -= len; | 3097 | if (likely(unpin)) { |
3098 | cache->pinned -= len; | ||
3099 | cache->space_info->bytes_pinned -= len; | ||
3100 | fs_info->total_pinned -= len; | ||
3101 | } | ||
2976 | spin_unlock(&cache->lock); | 3102 | spin_unlock(&cache->lock); |
2977 | spin_unlock(&cache->space_info->lock); | 3103 | spin_unlock(&cache->space_info->lock); |
2978 | fs_info->total_pinned -= len; | 3104 | |
2979 | if (cache->cached) | 3105 | if (likely(unpin)) |
3106 | clear_extent_dirty(&fs_info->pinned_extents, | ||
3107 | bytenr, bytenr + len -1, | ||
3108 | GFP_NOFS); | ||
3109 | else | ||
3110 | cache_block_group(cache); | ||
3111 | |||
3112 | if (unpin) | ||
2980 | btrfs_add_free_space(cache, bytenr, len); | 3113 | btrfs_add_free_space(cache, bytenr, len); |
2981 | } | 3114 | } |
2982 | btrfs_put_block_group(cache); | 3115 | btrfs_put_block_group(cache); |
@@ -3030,6 +3163,7 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | |||
3030 | &start, &end, EXTENT_DIRTY); | 3163 | &start, &end, EXTENT_DIRTY); |
3031 | if (ret) | 3164 | if (ret) |
3032 | break; | 3165 | break; |
3166 | |||
3033 | set_extent_dirty(copy, start, end, GFP_NOFS); | 3167 | set_extent_dirty(copy, start, end, GFP_NOFS); |
3034 | last = end + 1; | 3168 | last = end + 1; |
3035 | } | 3169 | } |
@@ -3058,6 +3192,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3058 | 3192 | ||
3059 | cond_resched(); | 3193 | cond_resched(); |
3060 | } | 3194 | } |
3195 | |||
3061 | return ret; | 3196 | return ret; |
3062 | } | 3197 | } |
3063 | 3198 | ||
@@ -3436,6 +3571,45 @@ static u64 stripe_align(struct btrfs_root *root, u64 val) | |||
3436 | } | 3571 | } |
3437 | 3572 | ||
3438 | /* | 3573 | /* |
3574 | * when we wait for progress in the block group caching, its because | ||
3575 | * our allocation attempt failed at least once. So, we must sleep | ||
3576 | * and let some progress happen before we try again. | ||
3577 | * | ||
3578 | * This function will sleep at least once waiting for new free space to | ||
3579 | * show up, and then it will check the block group free space numbers | ||
3580 | * for our min num_bytes. Another option is to have it go ahead | ||
3581 | * and look in the rbtree for a free extent of a given size, but this | ||
3582 | * is a good start. | ||
3583 | */ | ||
3584 | static noinline int | ||
3585 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | ||
3586 | u64 num_bytes) | ||
3587 | { | ||
3588 | DEFINE_WAIT(wait); | ||
3589 | |||
3590 | prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); | ||
3591 | |||
3592 | if (block_group_cache_done(cache)) { | ||
3593 | finish_wait(&cache->caching_q, &wait); | ||
3594 | return 0; | ||
3595 | } | ||
3596 | schedule(); | ||
3597 | finish_wait(&cache->caching_q, &wait); | ||
3598 | |||
3599 | wait_event(cache->caching_q, block_group_cache_done(cache) || | ||
3600 | (cache->free_space >= num_bytes)); | ||
3601 | return 0; | ||
3602 | } | ||
3603 | |||
3604 | enum btrfs_loop_type { | ||
3605 | LOOP_CACHED_ONLY = 0, | ||
3606 | LOOP_CACHING_NOWAIT = 1, | ||
3607 | LOOP_CACHING_WAIT = 2, | ||
3608 | LOOP_ALLOC_CHUNK = 3, | ||
3609 | LOOP_NO_EMPTY_SIZE = 4, | ||
3610 | }; | ||
3611 | |||
3612 | /* | ||
3439 | * walks the btree of allocated extents and find a hole of a given size. | 3613 | * walks the btree of allocated extents and find a hole of a given size. |
3440 | * The key ins is changed to record the hole: | 3614 | * The key ins is changed to record the hole: |
3441 | * ins->objectid == block start | 3615 | * ins->objectid == block start |
@@ -3460,6 +3634,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3460 | struct btrfs_space_info *space_info; | 3634 | struct btrfs_space_info *space_info; |
3461 | int last_ptr_loop = 0; | 3635 | int last_ptr_loop = 0; |
3462 | int loop = 0; | 3636 | int loop = 0; |
3637 | bool found_uncached_bg = false; | ||
3463 | 3638 | ||
3464 | WARN_ON(num_bytes < root->sectorsize); | 3639 | WARN_ON(num_bytes < root->sectorsize); |
3465 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 3640 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
@@ -3491,15 +3666,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3491 | search_start = max(search_start, first_logical_byte(root, 0)); | 3666 | search_start = max(search_start, first_logical_byte(root, 0)); |
3492 | search_start = max(search_start, hint_byte); | 3667 | search_start = max(search_start, hint_byte); |
3493 | 3668 | ||
3494 | if (!last_ptr) { | 3669 | if (!last_ptr) |
3495 | empty_cluster = 0; | 3670 | empty_cluster = 0; |
3496 | loop = 1; | ||
3497 | } | ||
3498 | 3671 | ||
3499 | if (search_start == hint_byte) { | 3672 | if (search_start == hint_byte) { |
3500 | block_group = btrfs_lookup_block_group(root->fs_info, | 3673 | block_group = btrfs_lookup_block_group(root->fs_info, |
3501 | search_start); | 3674 | search_start); |
3502 | if (block_group && block_group_bits(block_group, data)) { | 3675 | /* |
3676 | * we don't want to use the block group if it doesn't match our | ||
3677 | * allocation bits, or if its not cached. | ||
3678 | */ | ||
3679 | if (block_group && block_group_bits(block_group, data) && | ||
3680 | block_group_cache_done(block_group)) { | ||
3503 | down_read(&space_info->groups_sem); | 3681 | down_read(&space_info->groups_sem); |
3504 | if (list_empty(&block_group->list) || | 3682 | if (list_empty(&block_group->list) || |
3505 | block_group->ro) { | 3683 | block_group->ro) { |
@@ -3522,21 +3700,35 @@ search: | |||
3522 | down_read(&space_info->groups_sem); | 3700 | down_read(&space_info->groups_sem); |
3523 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 3701 | list_for_each_entry(block_group, &space_info->block_groups, list) { |
3524 | u64 offset; | 3702 | u64 offset; |
3703 | int cached; | ||
3525 | 3704 | ||
3526 | atomic_inc(&block_group->count); | 3705 | atomic_inc(&block_group->count); |
3527 | search_start = block_group->key.objectid; | 3706 | search_start = block_group->key.objectid; |
3528 | 3707 | ||
3529 | have_block_group: | 3708 | have_block_group: |
3530 | if (unlikely(!block_group->cached)) { | 3709 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
3531 | mutex_lock(&block_group->cache_mutex); | 3710 | /* |
3532 | ret = cache_block_group(root, block_group); | 3711 | * we want to start caching kthreads, but not too many |
3533 | mutex_unlock(&block_group->cache_mutex); | 3712 | * right off the bat so we don't overwhelm the system, |
3534 | if (ret) { | 3713 | * so only start them if there are less than 2 and we're |
3535 | btrfs_put_block_group(block_group); | 3714 | * in the initial allocation phase. |
3536 | break; | 3715 | */ |
3716 | if (loop > LOOP_CACHING_NOWAIT || | ||
3717 | atomic_read(&space_info->caching_threads) < 2) { | ||
3718 | ret = cache_block_group(block_group); | ||
3719 | BUG_ON(ret); | ||
3537 | } | 3720 | } |
3538 | } | 3721 | } |
3539 | 3722 | ||
3723 | cached = block_group_cache_done(block_group); | ||
3724 | if (unlikely(!cached)) { | ||
3725 | found_uncached_bg = true; | ||
3726 | |||
3727 | /* if we only want cached bgs, loop */ | ||
3728 | if (loop == LOOP_CACHED_ONLY) | ||
3729 | goto loop; | ||
3730 | } | ||
3731 | |||
3540 | if (unlikely(block_group->ro)) | 3732 | if (unlikely(block_group->ro)) |
3541 | goto loop; | 3733 | goto loop; |
3542 | 3734 | ||
@@ -3615,14 +3807,21 @@ refill_cluster: | |||
3615 | spin_unlock(&last_ptr->refill_lock); | 3807 | spin_unlock(&last_ptr->refill_lock); |
3616 | goto checks; | 3808 | goto checks; |
3617 | } | 3809 | } |
3810 | } else if (!cached && loop > LOOP_CACHING_NOWAIT) { | ||
3811 | spin_unlock(&last_ptr->refill_lock); | ||
3812 | |||
3813 | wait_block_group_cache_progress(block_group, | ||
3814 | num_bytes + empty_cluster + empty_size); | ||
3815 | goto have_block_group; | ||
3618 | } | 3816 | } |
3817 | |||
3619 | /* | 3818 | /* |
3620 | * at this point we either didn't find a cluster | 3819 | * at this point we either didn't find a cluster |
3621 | * or we weren't able to allocate a block from our | 3820 | * or we weren't able to allocate a block from our |
3622 | * cluster. Free the cluster we've been trying | 3821 | * cluster. Free the cluster we've been trying |
3623 | * to use, and go to the next block group | 3822 | * to use, and go to the next block group |
3624 | */ | 3823 | */ |
3625 | if (loop < 2) { | 3824 | if (loop < LOOP_NO_EMPTY_SIZE) { |
3626 | btrfs_return_cluster_to_free_space(NULL, | 3825 | btrfs_return_cluster_to_free_space(NULL, |
3627 | last_ptr); | 3826 | last_ptr); |
3628 | spin_unlock(&last_ptr->refill_lock); | 3827 | spin_unlock(&last_ptr->refill_lock); |
@@ -3633,11 +3832,17 @@ refill_cluster: | |||
3633 | 3832 | ||
3634 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 3833 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
3635 | num_bytes, empty_size); | 3834 | num_bytes, empty_size); |
3636 | if (!offset) | 3835 | if (!offset && (cached || (!cached && |
3836 | loop == LOOP_CACHING_NOWAIT))) { | ||
3637 | goto loop; | 3837 | goto loop; |
3838 | } else if (!offset && (!cached && | ||
3839 | loop > LOOP_CACHING_NOWAIT)) { | ||
3840 | wait_block_group_cache_progress(block_group, | ||
3841 | num_bytes + empty_size); | ||
3842 | goto have_block_group; | ||
3843 | } | ||
3638 | checks: | 3844 | checks: |
3639 | search_start = stripe_align(root, offset); | 3845 | search_start = stripe_align(root, offset); |
3640 | |||
3641 | /* move on to the next group */ | 3846 | /* move on to the next group */ |
3642 | if (search_start + num_bytes >= search_end) { | 3847 | if (search_start + num_bytes >= search_end) { |
3643 | btrfs_add_free_space(block_group, offset, num_bytes); | 3848 | btrfs_add_free_space(block_group, offset, num_bytes); |
@@ -3683,13 +3888,26 @@ loop: | |||
3683 | } | 3888 | } |
3684 | up_read(&space_info->groups_sem); | 3889 | up_read(&space_info->groups_sem); |
3685 | 3890 | ||
3686 | /* loop == 0, try to find a clustered alloc in every block group | 3891 | /* LOOP_CACHED_ONLY, only search fully cached block groups |
3687 | * loop == 1, try again after forcing a chunk allocation | 3892 | * LOOP_CACHING_NOWAIT, search partially cached block groups, but |
3688 | * loop == 2, set empty_size and empty_cluster to 0 and try again | 3893 | * dont wait foR them to finish caching |
3894 | * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching | ||
3895 | * LOOP_ALLOC_CHUNK, force a chunk allocation and try again | ||
3896 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try | ||
3897 | * again | ||
3689 | */ | 3898 | */ |
3690 | if (!ins->objectid && loop < 3 && | 3899 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
3691 | (empty_size || empty_cluster || allowed_chunk_alloc)) { | 3900 | (found_uncached_bg || empty_size || empty_cluster || |
3692 | if (loop >= 2) { | 3901 | allowed_chunk_alloc)) { |
3902 | if (found_uncached_bg) { | ||
3903 | found_uncached_bg = false; | ||
3904 | if (loop < LOOP_CACHING_WAIT) { | ||
3905 | loop++; | ||
3906 | goto search; | ||
3907 | } | ||
3908 | } | ||
3909 | |||
3910 | if (loop == LOOP_ALLOC_CHUNK) { | ||
3693 | empty_size = 0; | 3911 | empty_size = 0; |
3694 | empty_cluster = 0; | 3912 | empty_cluster = 0; |
3695 | } | 3913 | } |
@@ -3702,7 +3920,7 @@ loop: | |||
3702 | space_info->force_alloc = 1; | 3920 | space_info->force_alloc = 1; |
3703 | } | 3921 | } |
3704 | 3922 | ||
3705 | if (loop < 3) { | 3923 | if (loop < LOOP_NO_EMPTY_SIZE) { |
3706 | loop++; | 3924 | loop++; |
3707 | goto search; | 3925 | goto search; |
3708 | } | 3926 | } |
@@ -3798,7 +4016,7 @@ again: | |||
3798 | num_bytes, data, 1); | 4016 | num_bytes, data, 1); |
3799 | goto again; | 4017 | goto again; |
3800 | } | 4018 | } |
3801 | if (ret) { | 4019 | if (ret == -ENOSPC) { |
3802 | struct btrfs_space_info *sinfo; | 4020 | struct btrfs_space_info *sinfo; |
3803 | 4021 | ||
3804 | sinfo = __find_space_info(root->fs_info, data); | 4022 | sinfo = __find_space_info(root->fs_info, data); |
@@ -3806,7 +4024,6 @@ again: | |||
3806 | "wanted %llu\n", (unsigned long long)data, | 4024 | "wanted %llu\n", (unsigned long long)data, |
3807 | (unsigned long long)num_bytes); | 4025 | (unsigned long long)num_bytes); |
3808 | dump_space_info(sinfo, num_bytes); | 4026 | dump_space_info(sinfo, num_bytes); |
3809 | BUG(); | ||
3810 | } | 4027 | } |
3811 | 4028 | ||
3812 | return ret; | 4029 | return ret; |
@@ -3844,7 +4061,9 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
3844 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, | 4061 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, |
3845 | empty_size, hint_byte, search_end, ins, | 4062 | empty_size, hint_byte, search_end, ins, |
3846 | data); | 4063 | data); |
3847 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | 4064 | if (!ret) |
4065 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
4066 | |||
3848 | return ret; | 4067 | return ret; |
3849 | } | 4068 | } |
3850 | 4069 | ||
@@ -4006,9 +4225,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
4006 | struct btrfs_block_group_cache *block_group; | 4225 | struct btrfs_block_group_cache *block_group; |
4007 | 4226 | ||
4008 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 4227 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
4009 | mutex_lock(&block_group->cache_mutex); | 4228 | cache_block_group(block_group); |
4010 | cache_block_group(root, block_group); | 4229 | wait_event(block_group->caching_q, |
4011 | mutex_unlock(&block_group->cache_mutex); | 4230 | block_group_cache_done(block_group)); |
4012 | 4231 | ||
4013 | ret = btrfs_remove_free_space(block_group, ins->objectid, | 4232 | ret = btrfs_remove_free_space(block_group, ins->objectid, |
4014 | ins->offset); | 4233 | ins->offset); |
@@ -4039,7 +4258,8 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
4039 | ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | 4258 | ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, |
4040 | empty_size, hint_byte, search_end, | 4259 | empty_size, hint_byte, search_end, |
4041 | ins, 0); | 4260 | ins, 0); |
4042 | BUG_ON(ret); | 4261 | if (ret) |
4262 | return ret; | ||
4043 | 4263 | ||
4044 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | 4264 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { |
4045 | if (parent == 0) | 4265 | if (parent == 0) |
@@ -6955,11 +7175,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
6955 | &info->block_group_cache_tree); | 7175 | &info->block_group_cache_tree); |
6956 | spin_unlock(&info->block_group_cache_lock); | 7176 | spin_unlock(&info->block_group_cache_lock); |
6957 | 7177 | ||
6958 | btrfs_remove_free_space_cache(block_group); | ||
6959 | down_write(&block_group->space_info->groups_sem); | 7178 | down_write(&block_group->space_info->groups_sem); |
6960 | list_del(&block_group->list); | 7179 | list_del(&block_group->list); |
6961 | up_write(&block_group->space_info->groups_sem); | 7180 | up_write(&block_group->space_info->groups_sem); |
6962 | 7181 | ||
7182 | if (block_group->cached == BTRFS_CACHE_STARTED) | ||
7183 | wait_event(block_group->caching_q, | ||
7184 | block_group_cache_done(block_group)); | ||
7185 | |||
7186 | btrfs_remove_free_space_cache(block_group); | ||
7187 | |||
6963 | WARN_ON(atomic_read(&block_group->count) != 1); | 7188 | WARN_ON(atomic_read(&block_group->count) != 1); |
6964 | kfree(block_group); | 7189 | kfree(block_group); |
6965 | 7190 | ||
@@ -7025,9 +7250,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7025 | atomic_set(&cache->count, 1); | 7250 | atomic_set(&cache->count, 1); |
7026 | spin_lock_init(&cache->lock); | 7251 | spin_lock_init(&cache->lock); |
7027 | spin_lock_init(&cache->tree_lock); | 7252 | spin_lock_init(&cache->tree_lock); |
7028 | mutex_init(&cache->cache_mutex); | 7253 | cache->fs_info = info; |
7254 | init_waitqueue_head(&cache->caching_q); | ||
7029 | INIT_LIST_HEAD(&cache->list); | 7255 | INIT_LIST_HEAD(&cache->list); |
7030 | INIT_LIST_HEAD(&cache->cluster_list); | 7256 | INIT_LIST_HEAD(&cache->cluster_list); |
7257 | |||
7258 | /* | ||
7259 | * we only want to have 32k of ram per block group for keeping | ||
7260 | * track of free space, and if we pass 1/2 of that we want to | ||
7261 | * start converting things over to using bitmaps | ||
7262 | */ | ||
7263 | cache->extents_thresh = ((1024 * 32) / 2) / | ||
7264 | sizeof(struct btrfs_free_space); | ||
7265 | |||
7031 | read_extent_buffer(leaf, &cache->item, | 7266 | read_extent_buffer(leaf, &cache->item, |
7032 | btrfs_item_ptr_offset(leaf, path->slots[0]), | 7267 | btrfs_item_ptr_offset(leaf, path->slots[0]), |
7033 | sizeof(cache->item)); | 7268 | sizeof(cache->item)); |
@@ -7036,6 +7271,26 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7036 | key.objectid = found_key.objectid + found_key.offset; | 7271 | key.objectid = found_key.objectid + found_key.offset; |
7037 | btrfs_release_path(root, path); | 7272 | btrfs_release_path(root, path); |
7038 | cache->flags = btrfs_block_group_flags(&cache->item); | 7273 | cache->flags = btrfs_block_group_flags(&cache->item); |
7274 | cache->sectorsize = root->sectorsize; | ||
7275 | |||
7276 | remove_sb_from_cache(root, cache); | ||
7277 | |||
7278 | /* | ||
7279 | * check for two cases, either we are full, and therefore | ||
7280 | * don't need to bother with the caching work since we won't | ||
7281 | * find any space, or we are empty, and we can just add all | ||
7282 | * the space in and be done with it. This saves us _alot_ of | ||
7283 | * time, particularly in the full case. | ||
7284 | */ | ||
7285 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { | ||
7286 | cache->cached = BTRFS_CACHE_FINISHED; | ||
7287 | } else if (btrfs_block_group_used(&cache->item) == 0) { | ||
7288 | cache->cached = BTRFS_CACHE_FINISHED; | ||
7289 | add_new_free_space(cache, root->fs_info, | ||
7290 | found_key.objectid, | ||
7291 | found_key.objectid + | ||
7292 | found_key.offset); | ||
7293 | } | ||
7039 | 7294 | ||
7040 | ret = update_space_info(info, cache->flags, found_key.offset, | 7295 | ret = update_space_info(info, cache->flags, found_key.offset, |
7041 | btrfs_block_group_used(&cache->item), | 7296 | btrfs_block_group_used(&cache->item), |
@@ -7079,10 +7334,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7079 | cache->key.objectid = chunk_offset; | 7334 | cache->key.objectid = chunk_offset; |
7080 | cache->key.offset = size; | 7335 | cache->key.offset = size; |
7081 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 7336 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
7337 | cache->sectorsize = root->sectorsize; | ||
7338 | |||
7339 | /* | ||
7340 | * we only want to have 32k of ram per block group for keeping track | ||
7341 | * of free space, and if we pass 1/2 of that we want to start | ||
7342 | * converting things over to using bitmaps | ||
7343 | */ | ||
7344 | cache->extents_thresh = ((1024 * 32) / 2) / | ||
7345 | sizeof(struct btrfs_free_space); | ||
7082 | atomic_set(&cache->count, 1); | 7346 | atomic_set(&cache->count, 1); |
7083 | spin_lock_init(&cache->lock); | 7347 | spin_lock_init(&cache->lock); |
7084 | spin_lock_init(&cache->tree_lock); | 7348 | spin_lock_init(&cache->tree_lock); |
7085 | mutex_init(&cache->cache_mutex); | 7349 | init_waitqueue_head(&cache->caching_q); |
7086 | INIT_LIST_HEAD(&cache->list); | 7350 | INIT_LIST_HEAD(&cache->list); |
7087 | INIT_LIST_HEAD(&cache->cluster_list); | 7351 | INIT_LIST_HEAD(&cache->cluster_list); |
7088 | 7352 | ||
@@ -7091,6 +7355,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7091 | cache->flags = type; | 7355 | cache->flags = type; |
7092 | btrfs_set_block_group_flags(&cache->item, type); | 7356 | btrfs_set_block_group_flags(&cache->item, type); |
7093 | 7357 | ||
7358 | cache->cached = BTRFS_CACHE_FINISHED; | ||
7359 | remove_sb_from_cache(root, cache); | ||
7360 | |||
7361 | add_new_free_space(cache, root->fs_info, chunk_offset, | ||
7362 | chunk_offset + size); | ||
7363 | |||
7094 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, | 7364 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, |
7095 | &cache->space_info); | 7365 | &cache->space_info); |
7096 | BUG_ON(ret); | 7366 | BUG_ON(ret); |
@@ -7149,7 +7419,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7149 | rb_erase(&block_group->cache_node, | 7419 | rb_erase(&block_group->cache_node, |
7150 | &root->fs_info->block_group_cache_tree); | 7420 | &root->fs_info->block_group_cache_tree); |
7151 | spin_unlock(&root->fs_info->block_group_cache_lock); | 7421 | spin_unlock(&root->fs_info->block_group_cache_lock); |
7152 | btrfs_remove_free_space_cache(block_group); | 7422 | |
7153 | down_write(&block_group->space_info->groups_sem); | 7423 | down_write(&block_group->space_info->groups_sem); |
7154 | /* | 7424 | /* |
7155 | * we must use list_del_init so people can check to see if they | 7425 | * we must use list_del_init so people can check to see if they |
@@ -7158,11 +7428,18 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7158 | list_del_init(&block_group->list); | 7428 | list_del_init(&block_group->list); |
7159 | up_write(&block_group->space_info->groups_sem); | 7429 | up_write(&block_group->space_info->groups_sem); |
7160 | 7430 | ||
7431 | if (block_group->cached == BTRFS_CACHE_STARTED) | ||
7432 | wait_event(block_group->caching_q, | ||
7433 | block_group_cache_done(block_group)); | ||
7434 | |||
7435 | btrfs_remove_free_space_cache(block_group); | ||
7436 | |||
7161 | spin_lock(&block_group->space_info->lock); | 7437 | spin_lock(&block_group->space_info->lock); |
7162 | block_group->space_info->total_bytes -= block_group->key.offset; | 7438 | block_group->space_info->total_bytes -= block_group->key.offset; |
7163 | block_group->space_info->bytes_readonly -= block_group->key.offset; | 7439 | block_group->space_info->bytes_readonly -= block_group->key.offset; |
7164 | spin_unlock(&block_group->space_info->lock); | 7440 | spin_unlock(&block_group->space_info->lock); |
7165 | block_group->space_info->full = 0; | 7441 | |
7442 | btrfs_clear_space_info_full(root->fs_info); | ||
7166 | 7443 | ||
7167 | btrfs_put_block_group(block_group); | 7444 | btrfs_put_block_group(block_group); |
7168 | btrfs_put_block_group(block_group); | 7445 | btrfs_put_block_group(block_group); |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 4538e48581a5..5edcee3a617f 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -16,45 +16,46 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/pagemap.h> | ||
19 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/math64.h> | ||
20 | #include "ctree.h" | 22 | #include "ctree.h" |
21 | #include "free-space-cache.h" | 23 | #include "free-space-cache.h" |
22 | #include "transaction.h" | 24 | #include "transaction.h" |
23 | 25 | ||
24 | struct btrfs_free_space { | 26 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) |
25 | struct rb_node bytes_index; | 27 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) |
26 | struct rb_node offset_index; | ||
27 | u64 offset; | ||
28 | u64 bytes; | ||
29 | }; | ||
30 | 28 | ||
31 | static int tree_insert_offset(struct rb_root *root, u64 offset, | 29 | static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, |
32 | struct rb_node *node) | 30 | u64 offset) |
33 | { | 31 | { |
34 | struct rb_node **p = &root->rb_node; | 32 | BUG_ON(offset < bitmap_start); |
35 | struct rb_node *parent = NULL; | 33 | offset -= bitmap_start; |
36 | struct btrfs_free_space *info; | 34 | return (unsigned long)(div64_u64(offset, sectorsize)); |
35 | } | ||
37 | 36 | ||
38 | while (*p) { | 37 | static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize) |
39 | parent = *p; | 38 | { |
40 | info = rb_entry(parent, struct btrfs_free_space, offset_index); | 39 | return (unsigned long)(div64_u64(bytes, sectorsize)); |
40 | } | ||
41 | 41 | ||
42 | if (offset < info->offset) | 42 | static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group, |
43 | p = &(*p)->rb_left; | 43 | u64 offset) |
44 | else if (offset > info->offset) | 44 | { |
45 | p = &(*p)->rb_right; | 45 | u64 bitmap_start; |
46 | else | 46 | u64 bytes_per_bitmap; |
47 | return -EEXIST; | ||
48 | } | ||
49 | 47 | ||
50 | rb_link_node(node, parent, p); | 48 | bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize; |
51 | rb_insert_color(node, root); | 49 | bitmap_start = offset - block_group->key.objectid; |
50 | bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); | ||
51 | bitmap_start *= bytes_per_bitmap; | ||
52 | bitmap_start += block_group->key.objectid; | ||
52 | 53 | ||
53 | return 0; | 54 | return bitmap_start; |
54 | } | 55 | } |
55 | 56 | ||
56 | static int tree_insert_bytes(struct rb_root *root, u64 bytes, | 57 | static int tree_insert_offset(struct rb_root *root, u64 offset, |
57 | struct rb_node *node) | 58 | struct rb_node *node, int bitmap) |
58 | { | 59 | { |
59 | struct rb_node **p = &root->rb_node; | 60 | struct rb_node **p = &root->rb_node; |
60 | struct rb_node *parent = NULL; | 61 | struct rb_node *parent = NULL; |
@@ -62,12 +63,34 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes, | |||
62 | 63 | ||
63 | while (*p) { | 64 | while (*p) { |
64 | parent = *p; | 65 | parent = *p; |
65 | info = rb_entry(parent, struct btrfs_free_space, bytes_index); | 66 | info = rb_entry(parent, struct btrfs_free_space, offset_index); |
66 | 67 | ||
67 | if (bytes < info->bytes) | 68 | if (offset < info->offset) { |
68 | p = &(*p)->rb_left; | 69 | p = &(*p)->rb_left; |
69 | else | 70 | } else if (offset > info->offset) { |
70 | p = &(*p)->rb_right; | 71 | p = &(*p)->rb_right; |
72 | } else { | ||
73 | /* | ||
74 | * we could have a bitmap entry and an extent entry | ||
75 | * share the same offset. If this is the case, we want | ||
76 | * the extent entry to always be found first if we do a | ||
77 | * linear search through the tree, since we want to have | ||
78 | * the quickest allocation time, and allocating from an | ||
79 | * extent is faster than allocating from a bitmap. So | ||
80 | * if we're inserting a bitmap and we find an entry at | ||
81 | * this offset, we want to go right, or after this entry | ||
82 | * logically. If we are inserting an extent and we've | ||
83 | * found a bitmap, we want to go left, or before | ||
84 | * logically. | ||
85 | */ | ||
86 | if (bitmap) { | ||
87 | WARN_ON(info->bitmap); | ||
88 | p = &(*p)->rb_right; | ||
89 | } else { | ||
90 | WARN_ON(!info->bitmap); | ||
91 | p = &(*p)->rb_left; | ||
92 | } | ||
93 | } | ||
71 | } | 94 | } |
72 | 95 | ||
73 | rb_link_node(node, parent, p); | 96 | rb_link_node(node, parent, p); |
@@ -79,110 +102,143 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes, | |||
79 | /* | 102 | /* |
80 | * searches the tree for the given offset. | 103 | * searches the tree for the given offset. |
81 | * | 104 | * |
82 | * fuzzy == 1: this is used for allocations where we are given a hint of where | 105 | * fuzzy - If this is set, then we are trying to make an allocation, and we just |
83 | * to look for free space. Because the hint may not be completely on an offset | 106 | * want a section that has at least bytes size and comes at or after the given |
84 | * mark, or the hint may no longer point to free space we need to fudge our | 107 | * offset. |
85 | * results a bit. So we look for free space starting at or after offset with at | ||
86 | * least bytes size. We prefer to find as close to the given offset as we can. | ||
87 | * Also if the offset is within a free space range, then we will return the free | ||
88 | * space that contains the given offset, which means we can return a free space | ||
89 | * chunk with an offset before the provided offset. | ||
90 | * | ||
91 | * fuzzy == 0: this is just a normal tree search. Give us the free space that | ||
92 | * starts at the given offset which is at least bytes size, and if its not there | ||
93 | * return NULL. | ||
94 | */ | 108 | */ |
95 | static struct btrfs_free_space *tree_search_offset(struct rb_root *root, | 109 | static struct btrfs_free_space * |
96 | u64 offset, u64 bytes, | 110 | tree_search_offset(struct btrfs_block_group_cache *block_group, |
97 | int fuzzy) | 111 | u64 offset, int bitmap_only, int fuzzy) |
98 | { | 112 | { |
99 | struct rb_node *n = root->rb_node; | 113 | struct rb_node *n = block_group->free_space_offset.rb_node; |
100 | struct btrfs_free_space *entry, *ret = NULL; | 114 | struct btrfs_free_space *entry, *prev = NULL; |
115 | |||
116 | /* find entry that is closest to the 'offset' */ | ||
117 | while (1) { | ||
118 | if (!n) { | ||
119 | entry = NULL; | ||
120 | break; | ||
121 | } | ||
101 | 122 | ||
102 | while (n) { | ||
103 | entry = rb_entry(n, struct btrfs_free_space, offset_index); | 123 | entry = rb_entry(n, struct btrfs_free_space, offset_index); |
124 | prev = entry; | ||
104 | 125 | ||
105 | if (offset < entry->offset) { | 126 | if (offset < entry->offset) |
106 | if (fuzzy && | ||
107 | (!ret || entry->offset < ret->offset) && | ||
108 | (bytes <= entry->bytes)) | ||
109 | ret = entry; | ||
110 | n = n->rb_left; | 127 | n = n->rb_left; |
111 | } else if (offset > entry->offset) { | 128 | else if (offset > entry->offset) |
112 | if (fuzzy && | ||
113 | (entry->offset + entry->bytes - 1) >= offset && | ||
114 | bytes <= entry->bytes) { | ||
115 | ret = entry; | ||
116 | break; | ||
117 | } | ||
118 | n = n->rb_right; | 129 | n = n->rb_right; |
119 | } else { | 130 | else |
120 | if (bytes > entry->bytes) { | ||
121 | n = n->rb_right; | ||
122 | continue; | ||
123 | } | ||
124 | ret = entry; | ||
125 | break; | 131 | break; |
126 | } | ||
127 | } | 132 | } |
128 | 133 | ||
129 | return ret; | 134 | if (bitmap_only) { |
130 | } | 135 | if (!entry) |
136 | return NULL; | ||
137 | if (entry->bitmap) | ||
138 | return entry; | ||
131 | 139 | ||
132 | /* | 140 | /* |
133 | * return a chunk at least bytes size, as close to offset that we can get. | 141 | * bitmap entry and extent entry may share same offset, |
134 | */ | 142 | * in that case, bitmap entry comes after extent entry. |
135 | static struct btrfs_free_space *tree_search_bytes(struct rb_root *root, | 143 | */ |
136 | u64 offset, u64 bytes) | 144 | n = rb_next(n); |
137 | { | 145 | if (!n) |
138 | struct rb_node *n = root->rb_node; | 146 | return NULL; |
139 | struct btrfs_free_space *entry, *ret = NULL; | 147 | entry = rb_entry(n, struct btrfs_free_space, offset_index); |
140 | 148 | if (entry->offset != offset) | |
141 | while (n) { | 149 | return NULL; |
142 | entry = rb_entry(n, struct btrfs_free_space, bytes_index); | ||
143 | 150 | ||
144 | if (bytes < entry->bytes) { | 151 | WARN_ON(!entry->bitmap); |
152 | return entry; | ||
153 | } else if (entry) { | ||
154 | if (entry->bitmap) { | ||
145 | /* | 155 | /* |
146 | * We prefer to get a hole size as close to the size we | 156 | * if previous extent entry covers the offset, |
147 | * are asking for so we don't take small slivers out of | 157 | * we should return it instead of the bitmap entry |
148 | * huge holes, but we also want to get as close to the | ||
149 | * offset as possible so we don't have a whole lot of | ||
150 | * fragmentation. | ||
151 | */ | 158 | */ |
152 | if (offset <= entry->offset) { | 159 | n = &entry->offset_index; |
153 | if (!ret) | 160 | while (1) { |
154 | ret = entry; | 161 | n = rb_prev(n); |
155 | else if (entry->bytes < ret->bytes) | 162 | if (!n) |
156 | ret = entry; | 163 | break; |
157 | else if (entry->offset < ret->offset) | 164 | prev = rb_entry(n, struct btrfs_free_space, |
158 | ret = entry; | 165 | offset_index); |
166 | if (!prev->bitmap) { | ||
167 | if (prev->offset + prev->bytes > offset) | ||
168 | entry = prev; | ||
169 | break; | ||
170 | } | ||
159 | } | 171 | } |
160 | n = n->rb_left; | 172 | } |
161 | } else if (bytes > entry->bytes) { | 173 | return entry; |
162 | n = n->rb_right; | 174 | } |
175 | |||
176 | if (!prev) | ||
177 | return NULL; | ||
178 | |||
179 | /* find last entry before the 'offset' */ | ||
180 | entry = prev; | ||
181 | if (entry->offset > offset) { | ||
182 | n = rb_prev(&entry->offset_index); | ||
183 | if (n) { | ||
184 | entry = rb_entry(n, struct btrfs_free_space, | ||
185 | offset_index); | ||
186 | BUG_ON(entry->offset > offset); | ||
163 | } else { | 187 | } else { |
164 | /* | 188 | if (fuzzy) |
165 | * Ok we may have multiple chunks of the wanted size, | 189 | return entry; |
166 | * so we don't want to take the first one we find, we | 190 | else |
167 | * want to take the one closest to our given offset, so | 191 | return NULL; |
168 | * keep searching just in case theres a better match. | ||
169 | */ | ||
170 | n = n->rb_right; | ||
171 | if (offset > entry->offset) | ||
172 | continue; | ||
173 | else if (!ret || entry->offset < ret->offset) | ||
174 | ret = entry; | ||
175 | } | 192 | } |
176 | } | 193 | } |
177 | 194 | ||
178 | return ret; | 195 | if (entry->bitmap) { |
196 | n = &entry->offset_index; | ||
197 | while (1) { | ||
198 | n = rb_prev(n); | ||
199 | if (!n) | ||
200 | break; | ||
201 | prev = rb_entry(n, struct btrfs_free_space, | ||
202 | offset_index); | ||
203 | if (!prev->bitmap) { | ||
204 | if (prev->offset + prev->bytes > offset) | ||
205 | return prev; | ||
206 | break; | ||
207 | } | ||
208 | } | ||
209 | if (entry->offset + BITS_PER_BITMAP * | ||
210 | block_group->sectorsize > offset) | ||
211 | return entry; | ||
212 | } else if (entry->offset + entry->bytes > offset) | ||
213 | return entry; | ||
214 | |||
215 | if (!fuzzy) | ||
216 | return NULL; | ||
217 | |||
218 | while (1) { | ||
219 | if (entry->bitmap) { | ||
220 | if (entry->offset + BITS_PER_BITMAP * | ||
221 | block_group->sectorsize > offset) | ||
222 | break; | ||
223 | } else { | ||
224 | if (entry->offset + entry->bytes > offset) | ||
225 | break; | ||
226 | } | ||
227 | |||
228 | n = rb_next(&entry->offset_index); | ||
229 | if (!n) | ||
230 | return NULL; | ||
231 | entry = rb_entry(n, struct btrfs_free_space, offset_index); | ||
232 | } | ||
233 | return entry; | ||
179 | } | 234 | } |
180 | 235 | ||
181 | static void unlink_free_space(struct btrfs_block_group_cache *block_group, | 236 | static void unlink_free_space(struct btrfs_block_group_cache *block_group, |
182 | struct btrfs_free_space *info) | 237 | struct btrfs_free_space *info) |
183 | { | 238 | { |
184 | rb_erase(&info->offset_index, &block_group->free_space_offset); | 239 | rb_erase(&info->offset_index, &block_group->free_space_offset); |
185 | rb_erase(&info->bytes_index, &block_group->free_space_bytes); | 240 | block_group->free_extents--; |
241 | block_group->free_space -= info->bytes; | ||
186 | } | 242 | } |
187 | 243 | ||
188 | static int link_free_space(struct btrfs_block_group_cache *block_group, | 244 | static int link_free_space(struct btrfs_block_group_cache *block_group, |
@@ -190,17 +246,353 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, | |||
190 | { | 246 | { |
191 | int ret = 0; | 247 | int ret = 0; |
192 | 248 | ||
193 | 249 | BUG_ON(!info->bitmap && !info->bytes); | |
194 | BUG_ON(!info->bytes); | ||
195 | ret = tree_insert_offset(&block_group->free_space_offset, info->offset, | 250 | ret = tree_insert_offset(&block_group->free_space_offset, info->offset, |
196 | &info->offset_index); | 251 | &info->offset_index, (info->bitmap != NULL)); |
197 | if (ret) | 252 | if (ret) |
198 | return ret; | 253 | return ret; |
199 | 254 | ||
200 | ret = tree_insert_bytes(&block_group->free_space_bytes, info->bytes, | 255 | block_group->free_space += info->bytes; |
201 | &info->bytes_index); | 256 | block_group->free_extents++; |
202 | if (ret) | 257 | return ret; |
203 | return ret; | 258 | } |
259 | |||
260 | static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | ||
261 | { | ||
262 | u64 max_bytes, possible_bytes; | ||
263 | |||
264 | /* | ||
265 | * The goal is to keep the total amount of memory used per 1gb of space | ||
266 | * at or below 32k, so we need to adjust how much memory we allow to be | ||
267 | * used by extent based free space tracking | ||
268 | */ | ||
269 | max_bytes = MAX_CACHE_BYTES_PER_GIG * | ||
270 | (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); | ||
271 | |||
272 | possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) + | ||
273 | (sizeof(struct btrfs_free_space) * | ||
274 | block_group->extents_thresh); | ||
275 | |||
276 | if (possible_bytes > max_bytes) { | ||
277 | int extent_bytes = max_bytes - | ||
278 | (block_group->total_bitmaps * PAGE_CACHE_SIZE); | ||
279 | |||
280 | if (extent_bytes <= 0) { | ||
281 | block_group->extents_thresh = 0; | ||
282 | return; | ||
283 | } | ||
284 | |||
285 | block_group->extents_thresh = extent_bytes / | ||
286 | (sizeof(struct btrfs_free_space)); | ||
287 | } | ||
288 | } | ||
289 | |||
290 | static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, | ||
291 | struct btrfs_free_space *info, u64 offset, | ||
292 | u64 bytes) | ||
293 | { | ||
294 | unsigned long start, end; | ||
295 | unsigned long i; | ||
296 | |||
297 | start = offset_to_bit(info->offset, block_group->sectorsize, offset); | ||
298 | end = start + bytes_to_bits(bytes, block_group->sectorsize); | ||
299 | BUG_ON(end > BITS_PER_BITMAP); | ||
300 | |||
301 | for (i = start; i < end; i++) | ||
302 | clear_bit(i, info->bitmap); | ||
303 | |||
304 | info->bytes -= bytes; | ||
305 | block_group->free_space -= bytes; | ||
306 | } | ||
307 | |||
308 | static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, | ||
309 | struct btrfs_free_space *info, u64 offset, | ||
310 | u64 bytes) | ||
311 | { | ||
312 | unsigned long start, end; | ||
313 | unsigned long i; | ||
314 | |||
315 | start = offset_to_bit(info->offset, block_group->sectorsize, offset); | ||
316 | end = start + bytes_to_bits(bytes, block_group->sectorsize); | ||
317 | BUG_ON(end > BITS_PER_BITMAP); | ||
318 | |||
319 | for (i = start; i < end; i++) | ||
320 | set_bit(i, info->bitmap); | ||
321 | |||
322 | info->bytes += bytes; | ||
323 | block_group->free_space += bytes; | ||
324 | } | ||
325 | |||
326 | static int search_bitmap(struct btrfs_block_group_cache *block_group, | ||
327 | struct btrfs_free_space *bitmap_info, u64 *offset, | ||
328 | u64 *bytes) | ||
329 | { | ||
330 | unsigned long found_bits = 0; | ||
331 | unsigned long bits, i; | ||
332 | unsigned long next_zero; | ||
333 | |||
334 | i = offset_to_bit(bitmap_info->offset, block_group->sectorsize, | ||
335 | max_t(u64, *offset, bitmap_info->offset)); | ||
336 | bits = bytes_to_bits(*bytes, block_group->sectorsize); | ||
337 | |||
338 | for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); | ||
339 | i < BITS_PER_BITMAP; | ||
340 | i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { | ||
341 | next_zero = find_next_zero_bit(bitmap_info->bitmap, | ||
342 | BITS_PER_BITMAP, i); | ||
343 | if ((next_zero - i) >= bits) { | ||
344 | found_bits = next_zero - i; | ||
345 | break; | ||
346 | } | ||
347 | i = next_zero; | ||
348 | } | ||
349 | |||
350 | if (found_bits) { | ||
351 | *offset = (u64)(i * block_group->sectorsize) + | ||
352 | bitmap_info->offset; | ||
353 | *bytes = (u64)(found_bits) * block_group->sectorsize; | ||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | return -1; | ||
358 | } | ||
359 | |||
360 | static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache | ||
361 | *block_group, u64 *offset, | ||
362 | u64 *bytes, int debug) | ||
363 | { | ||
364 | struct btrfs_free_space *entry; | ||
365 | struct rb_node *node; | ||
366 | int ret; | ||
367 | |||
368 | if (!block_group->free_space_offset.rb_node) | ||
369 | return NULL; | ||
370 | |||
371 | entry = tree_search_offset(block_group, | ||
372 | offset_to_bitmap(block_group, *offset), | ||
373 | 0, 1); | ||
374 | if (!entry) | ||
375 | return NULL; | ||
376 | |||
377 | for (node = &entry->offset_index; node; node = rb_next(node)) { | ||
378 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
379 | if (entry->bytes < *bytes) | ||
380 | continue; | ||
381 | |||
382 | if (entry->bitmap) { | ||
383 | ret = search_bitmap(block_group, entry, offset, bytes); | ||
384 | if (!ret) | ||
385 | return entry; | ||
386 | continue; | ||
387 | } | ||
388 | |||
389 | *offset = entry->offset; | ||
390 | *bytes = entry->bytes; | ||
391 | return entry; | ||
392 | } | ||
393 | |||
394 | return NULL; | ||
395 | } | ||
396 | |||
397 | static void add_new_bitmap(struct btrfs_block_group_cache *block_group, | ||
398 | struct btrfs_free_space *info, u64 offset) | ||
399 | { | ||
400 | u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; | ||
401 | int max_bitmaps = (int)div64_u64(block_group->key.offset + | ||
402 | bytes_per_bg - 1, bytes_per_bg); | ||
403 | BUG_ON(block_group->total_bitmaps >= max_bitmaps); | ||
404 | |||
405 | info->offset = offset_to_bitmap(block_group, offset); | ||
406 | link_free_space(block_group, info); | ||
407 | block_group->total_bitmaps++; | ||
408 | |||
409 | recalculate_thresholds(block_group); | ||
410 | } | ||
411 | |||
412 | static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, | ||
413 | struct btrfs_free_space *bitmap_info, | ||
414 | u64 *offset, u64 *bytes) | ||
415 | { | ||
416 | u64 end; | ||
417 | u64 search_start, search_bytes; | ||
418 | int ret; | ||
419 | |||
420 | again: | ||
421 | end = bitmap_info->offset + | ||
422 | (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; | ||
423 | |||
424 | /* | ||
425 | * XXX - this can go away after a few releases. | ||
426 | * | ||
427 | * since the only user of btrfs_remove_free_space is the tree logging | ||
428 | * stuff, and the only way to test that is under crash conditions, we | ||
429 | * want to have this debug stuff here just in case somethings not | ||
430 | * working. Search the bitmap for the space we are trying to use to | ||
431 | * make sure its actually there. If its not there then we need to stop | ||
432 | * because something has gone wrong. | ||
433 | */ | ||
434 | search_start = *offset; | ||
435 | search_bytes = *bytes; | ||
436 | ret = search_bitmap(block_group, bitmap_info, &search_start, | ||
437 | &search_bytes); | ||
438 | BUG_ON(ret < 0 || search_start != *offset); | ||
439 | |||
440 | if (*offset > bitmap_info->offset && *offset + *bytes > end) { | ||
441 | bitmap_clear_bits(block_group, bitmap_info, *offset, | ||
442 | end - *offset + 1); | ||
443 | *bytes -= end - *offset + 1; | ||
444 | *offset = end + 1; | ||
445 | } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { | ||
446 | bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); | ||
447 | *bytes = 0; | ||
448 | } | ||
449 | |||
450 | if (*bytes) { | ||
451 | struct rb_node *next = rb_next(&bitmap_info->offset_index); | ||
452 | if (!bitmap_info->bytes) { | ||
453 | unlink_free_space(block_group, bitmap_info); | ||
454 | kfree(bitmap_info->bitmap); | ||
455 | kfree(bitmap_info); | ||
456 | block_group->total_bitmaps--; | ||
457 | recalculate_thresholds(block_group); | ||
458 | } | ||
459 | |||
460 | /* | ||
461 | * no entry after this bitmap, but we still have bytes to | ||
462 | * remove, so something has gone wrong. | ||
463 | */ | ||
464 | if (!next) | ||
465 | return -EINVAL; | ||
466 | |||
467 | bitmap_info = rb_entry(next, struct btrfs_free_space, | ||
468 | offset_index); | ||
469 | |||
470 | /* | ||
471 | * if the next entry isn't a bitmap we need to return to let the | ||
472 | * extent stuff do its work. | ||
473 | */ | ||
474 | if (!bitmap_info->bitmap) | ||
475 | return -EAGAIN; | ||
476 | |||
477 | /* | ||
478 | * Ok the next item is a bitmap, but it may not actually hold | ||
479 | * the information for the rest of this free space stuff, so | ||
480 | * look for it, and if we don't find it return so we can try | ||
481 | * everything over again. | ||
482 | */ | ||
483 | search_start = *offset; | ||
484 | search_bytes = *bytes; | ||
485 | ret = search_bitmap(block_group, bitmap_info, &search_start, | ||
486 | &search_bytes); | ||
487 | if (ret < 0 || search_start != *offset) | ||
488 | return -EAGAIN; | ||
489 | |||
490 | goto again; | ||
491 | } else if (!bitmap_info->bytes) { | ||
492 | unlink_free_space(block_group, bitmap_info); | ||
493 | kfree(bitmap_info->bitmap); | ||
494 | kfree(bitmap_info); | ||
495 | block_group->total_bitmaps--; | ||
496 | recalculate_thresholds(block_group); | ||
497 | } | ||
498 | |||
499 | return 0; | ||
500 | } | ||
501 | |||
502 | static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, | ||
503 | struct btrfs_free_space *info) | ||
504 | { | ||
505 | struct btrfs_free_space *bitmap_info; | ||
506 | int added = 0; | ||
507 | u64 bytes, offset, end; | ||
508 | int ret; | ||
509 | |||
510 | /* | ||
511 | * If we are below the extents threshold then we can add this as an | ||
512 | * extent, and don't have to deal with the bitmap | ||
513 | */ | ||
514 | if (block_group->free_extents < block_group->extents_thresh && | ||
515 | info->bytes > block_group->sectorsize * 4) | ||
516 | return 0; | ||
517 | |||
518 | /* | ||
519 | * some block groups are so tiny they can't be enveloped by a bitmap, so | ||
520 | * don't even bother to create a bitmap for this | ||
521 | */ | ||
522 | if (BITS_PER_BITMAP * block_group->sectorsize > | ||
523 | block_group->key.offset) | ||
524 | return 0; | ||
525 | |||
526 | bytes = info->bytes; | ||
527 | offset = info->offset; | ||
528 | |||
529 | again: | ||
530 | bitmap_info = tree_search_offset(block_group, | ||
531 | offset_to_bitmap(block_group, offset), | ||
532 | 1, 0); | ||
533 | if (!bitmap_info) { | ||
534 | BUG_ON(added); | ||
535 | goto new_bitmap; | ||
536 | } | ||
537 | |||
538 | end = bitmap_info->offset + | ||
539 | (u64)(BITS_PER_BITMAP * block_group->sectorsize); | ||
540 | |||
541 | if (offset >= bitmap_info->offset && offset + bytes > end) { | ||
542 | bitmap_set_bits(block_group, bitmap_info, offset, | ||
543 | end - offset); | ||
544 | bytes -= end - offset; | ||
545 | offset = end; | ||
546 | added = 0; | ||
547 | } else if (offset >= bitmap_info->offset && offset + bytes <= end) { | ||
548 | bitmap_set_bits(block_group, bitmap_info, offset, bytes); | ||
549 | bytes = 0; | ||
550 | } else { | ||
551 | BUG(); | ||
552 | } | ||
553 | |||
554 | if (!bytes) { | ||
555 | ret = 1; | ||
556 | goto out; | ||
557 | } else | ||
558 | goto again; | ||
559 | |||
560 | new_bitmap: | ||
561 | if (info && info->bitmap) { | ||
562 | add_new_bitmap(block_group, info, offset); | ||
563 | added = 1; | ||
564 | info = NULL; | ||
565 | goto again; | ||
566 | } else { | ||
567 | spin_unlock(&block_group->tree_lock); | ||
568 | |||
569 | /* no pre-allocated info, allocate a new one */ | ||
570 | if (!info) { | ||
571 | info = kzalloc(sizeof(struct btrfs_free_space), | ||
572 | GFP_NOFS); | ||
573 | if (!info) { | ||
574 | spin_lock(&block_group->tree_lock); | ||
575 | ret = -ENOMEM; | ||
576 | goto out; | ||
577 | } | ||
578 | } | ||
579 | |||
580 | /* allocate the bitmap */ | ||
581 | info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | ||
582 | spin_lock(&block_group->tree_lock); | ||
583 | if (!info->bitmap) { | ||
584 | ret = -ENOMEM; | ||
585 | goto out; | ||
586 | } | ||
587 | goto again; | ||
588 | } | ||
589 | |||
590 | out: | ||
591 | if (info) { | ||
592 | if (info->bitmap) | ||
593 | kfree(info->bitmap); | ||
594 | kfree(info); | ||
595 | } | ||
204 | 596 | ||
205 | return ret; | 597 | return ret; |
206 | } | 598 | } |
@@ -208,8 +600,8 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, | |||
208 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | 600 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, |
209 | u64 offset, u64 bytes) | 601 | u64 offset, u64 bytes) |
210 | { | 602 | { |
211 | struct btrfs_free_space *right_info; | 603 | struct btrfs_free_space *right_info = NULL; |
212 | struct btrfs_free_space *left_info; | 604 | struct btrfs_free_space *left_info = NULL; |
213 | struct btrfs_free_space *info = NULL; | 605 | struct btrfs_free_space *info = NULL; |
214 | int ret = 0; | 606 | int ret = 0; |
215 | 607 | ||
@@ -227,18 +619,38 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | |||
227 | * are adding, if there is remove that struct and add a new one to | 619 | * are adding, if there is remove that struct and add a new one to |
228 | * cover the entire range | 620 | * cover the entire range |
229 | */ | 621 | */ |
230 | right_info = tree_search_offset(&block_group->free_space_offset, | 622 | right_info = tree_search_offset(block_group, offset + bytes, 0, 0); |
231 | offset+bytes, 0, 0); | 623 | if (right_info && rb_prev(&right_info->offset_index)) |
232 | left_info = tree_search_offset(&block_group->free_space_offset, | 624 | left_info = rb_entry(rb_prev(&right_info->offset_index), |
233 | offset-1, 0, 1); | 625 | struct btrfs_free_space, offset_index); |
626 | else | ||
627 | left_info = tree_search_offset(block_group, offset - 1, 0, 0); | ||
628 | |||
629 | /* | ||
630 | * If there was no extent directly to the left or right of this new | ||
631 | * extent then we know we're going to have to allocate a new extent, so | ||
632 | * before we do that see if we need to drop this into a bitmap | ||
633 | */ | ||
634 | if ((!left_info || left_info->bitmap) && | ||
635 | (!right_info || right_info->bitmap)) { | ||
636 | ret = insert_into_bitmap(block_group, info); | ||
637 | |||
638 | if (ret < 0) { | ||
639 | goto out; | ||
640 | } else if (ret) { | ||
641 | ret = 0; | ||
642 | goto out; | ||
643 | } | ||
644 | } | ||
234 | 645 | ||
235 | if (right_info) { | 646 | if (right_info && !right_info->bitmap) { |
236 | unlink_free_space(block_group, right_info); | 647 | unlink_free_space(block_group, right_info); |
237 | info->bytes += right_info->bytes; | 648 | info->bytes += right_info->bytes; |
238 | kfree(right_info); | 649 | kfree(right_info); |
239 | } | 650 | } |
240 | 651 | ||
241 | if (left_info && left_info->offset + left_info->bytes == offset) { | 652 | if (left_info && !left_info->bitmap && |
653 | left_info->offset + left_info->bytes == offset) { | ||
242 | unlink_free_space(block_group, left_info); | 654 | unlink_free_space(block_group, left_info); |
243 | info->offset = left_info->offset; | 655 | info->offset = left_info->offset; |
244 | info->bytes += left_info->bytes; | 656 | info->bytes += left_info->bytes; |
@@ -248,11 +660,11 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | |||
248 | ret = link_free_space(block_group, info); | 660 | ret = link_free_space(block_group, info); |
249 | if (ret) | 661 | if (ret) |
250 | kfree(info); | 662 | kfree(info); |
251 | 663 | out: | |
252 | spin_unlock(&block_group->tree_lock); | 664 | spin_unlock(&block_group->tree_lock); |
253 | 665 | ||
254 | if (ret) { | 666 | if (ret) { |
255 | printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); | 667 | printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); |
256 | BUG_ON(ret == -EEXIST); | 668 | BUG_ON(ret == -EEXIST); |
257 | } | 669 | } |
258 | 670 | ||
@@ -263,40 +675,74 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
263 | u64 offset, u64 bytes) | 675 | u64 offset, u64 bytes) |
264 | { | 676 | { |
265 | struct btrfs_free_space *info; | 677 | struct btrfs_free_space *info; |
678 | struct btrfs_free_space *next_info = NULL; | ||
266 | int ret = 0; | 679 | int ret = 0; |
267 | 680 | ||
268 | spin_lock(&block_group->tree_lock); | 681 | spin_lock(&block_group->tree_lock); |
269 | 682 | ||
270 | info = tree_search_offset(&block_group->free_space_offset, offset, 0, | 683 | again: |
271 | 1); | 684 | info = tree_search_offset(block_group, offset, 0, 0); |
272 | if (info && info->offset == offset) { | 685 | if (!info) { |
273 | if (info->bytes < bytes) { | 686 | /* |
274 | printk(KERN_ERR "Found free space at %llu, size %llu," | 687 | * oops didn't find an extent that matched the space we wanted |
275 | "trying to use %llu\n", | 688 | * to remove, look for a bitmap instead |
276 | (unsigned long long)info->offset, | 689 | */ |
277 | (unsigned long long)info->bytes, | 690 | info = tree_search_offset(block_group, |
278 | (unsigned long long)bytes); | 691 | offset_to_bitmap(block_group, offset), |
692 | 1, 0); | ||
693 | if (!info) { | ||
694 | WARN_ON(1); | ||
695 | goto out_lock; | ||
696 | } | ||
697 | } | ||
698 | |||
699 | if (info->bytes < bytes && rb_next(&info->offset_index)) { | ||
700 | u64 end; | ||
701 | next_info = rb_entry(rb_next(&info->offset_index), | ||
702 | struct btrfs_free_space, | ||
703 | offset_index); | ||
704 | |||
705 | if (next_info->bitmap) | ||
706 | end = next_info->offset + BITS_PER_BITMAP * | ||
707 | block_group->sectorsize - 1; | ||
708 | else | ||
709 | end = next_info->offset + next_info->bytes; | ||
710 | |||
711 | if (next_info->bytes < bytes || | ||
712 | next_info->offset > offset || offset > end) { | ||
713 | printk(KERN_CRIT "Found free space at %llu, size %llu," | ||
714 | " trying to use %llu\n", | ||
715 | (unsigned long long)info->offset, | ||
716 | (unsigned long long)info->bytes, | ||
717 | (unsigned long long)bytes); | ||
279 | WARN_ON(1); | 718 | WARN_ON(1); |
280 | ret = -EINVAL; | 719 | ret = -EINVAL; |
281 | spin_unlock(&block_group->tree_lock); | 720 | goto out_lock; |
282 | goto out; | ||
283 | } | 721 | } |
284 | unlink_free_space(block_group, info); | ||
285 | 722 | ||
286 | if (info->bytes == bytes) { | 723 | info = next_info; |
287 | kfree(info); | 724 | } |
288 | spin_unlock(&block_group->tree_lock); | 725 | |
289 | goto out; | 726 | if (info->bytes == bytes) { |
727 | unlink_free_space(block_group, info); | ||
728 | if (info->bitmap) { | ||
729 | kfree(info->bitmap); | ||
730 | block_group->total_bitmaps--; | ||
290 | } | 731 | } |
732 | kfree(info); | ||
733 | goto out_lock; | ||
734 | } | ||
291 | 735 | ||
736 | if (!info->bitmap && info->offset == offset) { | ||
737 | unlink_free_space(block_group, info); | ||
292 | info->offset += bytes; | 738 | info->offset += bytes; |
293 | info->bytes -= bytes; | 739 | info->bytes -= bytes; |
740 | link_free_space(block_group, info); | ||
741 | goto out_lock; | ||
742 | } | ||
294 | 743 | ||
295 | ret = link_free_space(block_group, info); | 744 | if (!info->bitmap && info->offset <= offset && |
296 | spin_unlock(&block_group->tree_lock); | 745 | info->offset + info->bytes >= offset + bytes) { |
297 | BUG_ON(ret); | ||
298 | } else if (info && info->offset < offset && | ||
299 | info->offset + info->bytes >= offset + bytes) { | ||
300 | u64 old_start = info->offset; | 746 | u64 old_start = info->offset; |
301 | /* | 747 | /* |
302 | * we're freeing space in the middle of the info, | 748 | * we're freeing space in the middle of the info, |
@@ -312,7 +758,9 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
312 | info->offset = offset + bytes; | 758 | info->offset = offset + bytes; |
313 | info->bytes = old_end - info->offset; | 759 | info->bytes = old_end - info->offset; |
314 | ret = link_free_space(block_group, info); | 760 | ret = link_free_space(block_group, info); |
315 | BUG_ON(ret); | 761 | WARN_ON(ret); |
762 | if (ret) | ||
763 | goto out_lock; | ||
316 | } else { | 764 | } else { |
317 | /* the hole we're creating ends at the end | 765 | /* the hole we're creating ends at the end |
318 | * of the info struct, just free the info | 766 | * of the info struct, just free the info |
@@ -320,32 +768,22 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
320 | kfree(info); | 768 | kfree(info); |
321 | } | 769 | } |
322 | spin_unlock(&block_group->tree_lock); | 770 | spin_unlock(&block_group->tree_lock); |
323 | /* step two, insert a new info struct to cover anything | 771 | |
324 | * before the hole | 772 | /* step two, insert a new info struct to cover |
773 | * anything before the hole | ||
325 | */ | 774 | */ |
326 | ret = btrfs_add_free_space(block_group, old_start, | 775 | ret = btrfs_add_free_space(block_group, old_start, |
327 | offset - old_start); | 776 | offset - old_start); |
328 | BUG_ON(ret); | 777 | WARN_ON(ret); |
329 | } else { | 778 | goto out; |
330 | spin_unlock(&block_group->tree_lock); | ||
331 | if (!info) { | ||
332 | printk(KERN_ERR "couldn't find space %llu to free\n", | ||
333 | (unsigned long long)offset); | ||
334 | printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", | ||
335 | block_group->cached, | ||
336 | (unsigned long long)block_group->key.objectid, | ||
337 | (unsigned long long)block_group->key.offset); | ||
338 | btrfs_dump_free_space(block_group, bytes); | ||
339 | } else if (info) { | ||
340 | printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " | ||
341 | "but wanted offset=%llu bytes=%llu\n", | ||
342 | (unsigned long long)info->offset, | ||
343 | (unsigned long long)info->bytes, | ||
344 | (unsigned long long)offset, | ||
345 | (unsigned long long)bytes); | ||
346 | } | ||
347 | WARN_ON(1); | ||
348 | } | 779 | } |
780 | |||
781 | ret = remove_from_bitmap(block_group, info, &offset, &bytes); | ||
782 | if (ret == -EAGAIN) | ||
783 | goto again; | ||
784 | BUG_ON(ret); | ||
785 | out_lock: | ||
786 | spin_unlock(&block_group->tree_lock); | ||
349 | out: | 787 | out: |
350 | return ret; | 788 | return ret; |
351 | } | 789 | } |
@@ -361,10 +799,13 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | |||
361 | info = rb_entry(n, struct btrfs_free_space, offset_index); | 799 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
362 | if (info->bytes >= bytes) | 800 | if (info->bytes >= bytes) |
363 | count++; | 801 | count++; |
364 | printk(KERN_ERR "entry offset %llu, bytes %llu\n", | 802 | printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", |
365 | (unsigned long long)info->offset, | 803 | (unsigned long long)info->offset, |
366 | (unsigned long long)info->bytes); | 804 | (unsigned long long)info->bytes, |
805 | (info->bitmap) ? "yes" : "no"); | ||
367 | } | 806 | } |
807 | printk(KERN_INFO "block group has cluster?: %s\n", | ||
808 | list_empty(&block_group->cluster_list) ? "no" : "yes"); | ||
368 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" | 809 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" |
369 | "\n", count); | 810 | "\n", count); |
370 | } | 811 | } |
@@ -397,26 +838,35 @@ __btrfs_return_cluster_to_free_space( | |||
397 | { | 838 | { |
398 | struct btrfs_free_space *entry; | 839 | struct btrfs_free_space *entry; |
399 | struct rb_node *node; | 840 | struct rb_node *node; |
841 | bool bitmap; | ||
400 | 842 | ||
401 | spin_lock(&cluster->lock); | 843 | spin_lock(&cluster->lock); |
402 | if (cluster->block_group != block_group) | 844 | if (cluster->block_group != block_group) |
403 | goto out; | 845 | goto out; |
404 | 846 | ||
847 | bitmap = cluster->points_to_bitmap; | ||
848 | cluster->block_group = NULL; | ||
405 | cluster->window_start = 0; | 849 | cluster->window_start = 0; |
850 | list_del_init(&cluster->block_group_list); | ||
851 | cluster->points_to_bitmap = false; | ||
852 | |||
853 | if (bitmap) | ||
854 | goto out; | ||
855 | |||
406 | node = rb_first(&cluster->root); | 856 | node = rb_first(&cluster->root); |
407 | while(node) { | 857 | while (node) { |
408 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 858 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
409 | node = rb_next(&entry->offset_index); | 859 | node = rb_next(&entry->offset_index); |
410 | rb_erase(&entry->offset_index, &cluster->root); | 860 | rb_erase(&entry->offset_index, &cluster->root); |
411 | link_free_space(block_group, entry); | 861 | BUG_ON(entry->bitmap); |
862 | tree_insert_offset(&block_group->free_space_offset, | ||
863 | entry->offset, &entry->offset_index, 0); | ||
412 | } | 864 | } |
413 | list_del_init(&cluster->block_group_list); | ||
414 | |||
415 | btrfs_put_block_group(cluster->block_group); | ||
416 | cluster->block_group = NULL; | ||
417 | cluster->root.rb_node = NULL; | 865 | cluster->root.rb_node = NULL; |
866 | |||
418 | out: | 867 | out: |
419 | spin_unlock(&cluster->lock); | 868 | spin_unlock(&cluster->lock); |
869 | btrfs_put_block_group(block_group); | ||
420 | return 0; | 870 | return 0; |
421 | } | 871 | } |
422 | 872 | ||
@@ -425,20 +875,28 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | |||
425 | struct btrfs_free_space *info; | 875 | struct btrfs_free_space *info; |
426 | struct rb_node *node; | 876 | struct rb_node *node; |
427 | struct btrfs_free_cluster *cluster; | 877 | struct btrfs_free_cluster *cluster; |
428 | struct btrfs_free_cluster *safe; | 878 | struct list_head *head; |
429 | 879 | ||
430 | spin_lock(&block_group->tree_lock); | 880 | spin_lock(&block_group->tree_lock); |
431 | 881 | while ((head = block_group->cluster_list.next) != | |
432 | list_for_each_entry_safe(cluster, safe, &block_group->cluster_list, | 882 | &block_group->cluster_list) { |
433 | block_group_list) { | 883 | cluster = list_entry(head, struct btrfs_free_cluster, |
884 | block_group_list); | ||
434 | 885 | ||
435 | WARN_ON(cluster->block_group != block_group); | 886 | WARN_ON(cluster->block_group != block_group); |
436 | __btrfs_return_cluster_to_free_space(block_group, cluster); | 887 | __btrfs_return_cluster_to_free_space(block_group, cluster); |
888 | if (need_resched()) { | ||
889 | spin_unlock(&block_group->tree_lock); | ||
890 | cond_resched(); | ||
891 | spin_lock(&block_group->tree_lock); | ||
892 | } | ||
437 | } | 893 | } |
438 | 894 | ||
439 | while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { | 895 | while ((node = rb_last(&block_group->free_space_offset)) != NULL) { |
440 | info = rb_entry(node, struct btrfs_free_space, bytes_index); | 896 | info = rb_entry(node, struct btrfs_free_space, offset_index); |
441 | unlink_free_space(block_group, info); | 897 | unlink_free_space(block_group, info); |
898 | if (info->bitmap) | ||
899 | kfree(info->bitmap); | ||
442 | kfree(info); | 900 | kfree(info); |
443 | if (need_resched()) { | 901 | if (need_resched()) { |
444 | spin_unlock(&block_group->tree_lock); | 902 | spin_unlock(&block_group->tree_lock); |
@@ -446,6 +904,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | |||
446 | spin_lock(&block_group->tree_lock); | 904 | spin_lock(&block_group->tree_lock); |
447 | } | 905 | } |
448 | } | 906 | } |
907 | |||
449 | spin_unlock(&block_group->tree_lock); | 908 | spin_unlock(&block_group->tree_lock); |
450 | } | 909 | } |
451 | 910 | ||
@@ -453,25 +912,35 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | |||
453 | u64 offset, u64 bytes, u64 empty_size) | 912 | u64 offset, u64 bytes, u64 empty_size) |
454 | { | 913 | { |
455 | struct btrfs_free_space *entry = NULL; | 914 | struct btrfs_free_space *entry = NULL; |
915 | u64 bytes_search = bytes + empty_size; | ||
456 | u64 ret = 0; | 916 | u64 ret = 0; |
457 | 917 | ||
458 | spin_lock(&block_group->tree_lock); | 918 | spin_lock(&block_group->tree_lock); |
459 | entry = tree_search_offset(&block_group->free_space_offset, offset, | 919 | entry = find_free_space(block_group, &offset, &bytes_search, 0); |
460 | bytes + empty_size, 1); | ||
461 | if (!entry) | 920 | if (!entry) |
462 | entry = tree_search_bytes(&block_group->free_space_bytes, | 921 | goto out; |
463 | offset, bytes + empty_size); | 922 | |
464 | if (entry) { | 923 | ret = offset; |
924 | if (entry->bitmap) { | ||
925 | bitmap_clear_bits(block_group, entry, offset, bytes); | ||
926 | if (!entry->bytes) { | ||
927 | unlink_free_space(block_group, entry); | ||
928 | kfree(entry->bitmap); | ||
929 | kfree(entry); | ||
930 | block_group->total_bitmaps--; | ||
931 | recalculate_thresholds(block_group); | ||
932 | } | ||
933 | } else { | ||
465 | unlink_free_space(block_group, entry); | 934 | unlink_free_space(block_group, entry); |
466 | ret = entry->offset; | ||
467 | entry->offset += bytes; | 935 | entry->offset += bytes; |
468 | entry->bytes -= bytes; | 936 | entry->bytes -= bytes; |
469 | |||
470 | if (!entry->bytes) | 937 | if (!entry->bytes) |
471 | kfree(entry); | 938 | kfree(entry); |
472 | else | 939 | else |
473 | link_free_space(block_group, entry); | 940 | link_free_space(block_group, entry); |
474 | } | 941 | } |
942 | |||
943 | out: | ||
475 | spin_unlock(&block_group->tree_lock); | 944 | spin_unlock(&block_group->tree_lock); |
476 | 945 | ||
477 | return ret; | 946 | return ret; |
@@ -517,6 +986,54 @@ int btrfs_return_cluster_to_free_space( | |||
517 | return ret; | 986 | return ret; |
518 | } | 987 | } |
519 | 988 | ||
989 | static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, | ||
990 | struct btrfs_free_cluster *cluster, | ||
991 | u64 bytes, u64 min_start) | ||
992 | { | ||
993 | struct btrfs_free_space *entry; | ||
994 | int err; | ||
995 | u64 search_start = cluster->window_start; | ||
996 | u64 search_bytes = bytes; | ||
997 | u64 ret = 0; | ||
998 | |||
999 | spin_lock(&block_group->tree_lock); | ||
1000 | spin_lock(&cluster->lock); | ||
1001 | |||
1002 | if (!cluster->points_to_bitmap) | ||
1003 | goto out; | ||
1004 | |||
1005 | if (cluster->block_group != block_group) | ||
1006 | goto out; | ||
1007 | |||
1008 | /* | ||
1009 | * search_start is the beginning of the bitmap, but at some point it may | ||
1010 | * be a good idea to point to the actual start of the free area in the | ||
1011 | * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only | ||
1012 | * to 1 to make sure we get the bitmap entry | ||
1013 | */ | ||
1014 | entry = tree_search_offset(block_group, | ||
1015 | offset_to_bitmap(block_group, search_start), | ||
1016 | 1, 0); | ||
1017 | if (!entry || !entry->bitmap) | ||
1018 | goto out; | ||
1019 | |||
1020 | search_start = min_start; | ||
1021 | search_bytes = bytes; | ||
1022 | |||
1023 | err = search_bitmap(block_group, entry, &search_start, | ||
1024 | &search_bytes); | ||
1025 | if (err) | ||
1026 | goto out; | ||
1027 | |||
1028 | ret = search_start; | ||
1029 | bitmap_clear_bits(block_group, entry, ret, bytes); | ||
1030 | out: | ||
1031 | spin_unlock(&cluster->lock); | ||
1032 | spin_unlock(&block_group->tree_lock); | ||
1033 | |||
1034 | return ret; | ||
1035 | } | ||
1036 | |||
520 | /* | 1037 | /* |
521 | * given a cluster, try to allocate 'bytes' from it, returns 0 | 1038 | * given a cluster, try to allocate 'bytes' from it, returns 0 |
522 | * if it couldn't find anything suitably large, or a logical disk offset | 1039 | * if it couldn't find anything suitably large, or a logical disk offset |
@@ -530,6 +1047,10 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
530 | struct rb_node *node; | 1047 | struct rb_node *node; |
531 | u64 ret = 0; | 1048 | u64 ret = 0; |
532 | 1049 | ||
1050 | if (cluster->points_to_bitmap) | ||
1051 | return btrfs_alloc_from_bitmap(block_group, cluster, bytes, | ||
1052 | min_start); | ||
1053 | |||
533 | spin_lock(&cluster->lock); | 1054 | spin_lock(&cluster->lock); |
534 | if (bytes > cluster->max_size) | 1055 | if (bytes > cluster->max_size) |
535 | goto out; | 1056 | goto out; |
@@ -567,9 +1088,73 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
567 | } | 1088 | } |
568 | out: | 1089 | out: |
569 | spin_unlock(&cluster->lock); | 1090 | spin_unlock(&cluster->lock); |
1091 | |||
570 | return ret; | 1092 | return ret; |
571 | } | 1093 | } |
572 | 1094 | ||
1095 | static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | ||
1096 | struct btrfs_free_space *entry, | ||
1097 | struct btrfs_free_cluster *cluster, | ||
1098 | u64 offset, u64 bytes, u64 min_bytes) | ||
1099 | { | ||
1100 | unsigned long next_zero; | ||
1101 | unsigned long i; | ||
1102 | unsigned long search_bits; | ||
1103 | unsigned long total_bits; | ||
1104 | unsigned long found_bits; | ||
1105 | unsigned long start = 0; | ||
1106 | unsigned long total_found = 0; | ||
1107 | bool found = false; | ||
1108 | |||
1109 | i = offset_to_bit(entry->offset, block_group->sectorsize, | ||
1110 | max_t(u64, offset, entry->offset)); | ||
1111 | search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); | ||
1112 | total_bits = bytes_to_bits(bytes, block_group->sectorsize); | ||
1113 | |||
1114 | again: | ||
1115 | found_bits = 0; | ||
1116 | for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); | ||
1117 | i < BITS_PER_BITMAP; | ||
1118 | i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { | ||
1119 | next_zero = find_next_zero_bit(entry->bitmap, | ||
1120 | BITS_PER_BITMAP, i); | ||
1121 | if (next_zero - i >= search_bits) { | ||
1122 | found_bits = next_zero - i; | ||
1123 | break; | ||
1124 | } | ||
1125 | i = next_zero; | ||
1126 | } | ||
1127 | |||
1128 | if (!found_bits) | ||
1129 | return -1; | ||
1130 | |||
1131 | if (!found) { | ||
1132 | start = i; | ||
1133 | found = true; | ||
1134 | } | ||
1135 | |||
1136 | total_found += found_bits; | ||
1137 | |||
1138 | if (cluster->max_size < found_bits * block_group->sectorsize) | ||
1139 | cluster->max_size = found_bits * block_group->sectorsize; | ||
1140 | |||
1141 | if (total_found < total_bits) { | ||
1142 | i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); | ||
1143 | if (i - start > total_bits * 2) { | ||
1144 | total_found = 0; | ||
1145 | cluster->max_size = 0; | ||
1146 | found = false; | ||
1147 | } | ||
1148 | goto again; | ||
1149 | } | ||
1150 | |||
1151 | cluster->window_start = start * block_group->sectorsize + | ||
1152 | entry->offset; | ||
1153 | cluster->points_to_bitmap = true; | ||
1154 | |||
1155 | return 0; | ||
1156 | } | ||
1157 | |||
573 | /* | 1158 | /* |
574 | * here we try to find a cluster of blocks in a block group. The goal | 1159 | * here we try to find a cluster of blocks in a block group. The goal |
575 | * is to find at least bytes free and up to empty_size + bytes free. | 1160 | * is to find at least bytes free and up to empty_size + bytes free. |
@@ -587,12 +1172,12 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | |||
587 | struct btrfs_free_space *entry = NULL; | 1172 | struct btrfs_free_space *entry = NULL; |
588 | struct rb_node *node; | 1173 | struct rb_node *node; |
589 | struct btrfs_free_space *next; | 1174 | struct btrfs_free_space *next; |
590 | struct btrfs_free_space *last; | 1175 | struct btrfs_free_space *last = NULL; |
591 | u64 min_bytes; | 1176 | u64 min_bytes; |
592 | u64 window_start; | 1177 | u64 window_start; |
593 | u64 window_free; | 1178 | u64 window_free; |
594 | u64 max_extent = 0; | 1179 | u64 max_extent = 0; |
595 | int total_retries = 0; | 1180 | bool found_bitmap = false; |
596 | int ret; | 1181 | int ret; |
597 | 1182 | ||
598 | /* for metadata, allow allocates with more holes */ | 1183 | /* for metadata, allow allocates with more holes */ |
@@ -620,31 +1205,80 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | |||
620 | goto out; | 1205 | goto out; |
621 | } | 1206 | } |
622 | again: | 1207 | again: |
623 | min_bytes = min(min_bytes, bytes + empty_size); | 1208 | entry = tree_search_offset(block_group, offset, found_bitmap, 1); |
624 | entry = tree_search_bytes(&block_group->free_space_bytes, | ||
625 | offset, min_bytes); | ||
626 | if (!entry) { | 1209 | if (!entry) { |
627 | ret = -ENOSPC; | 1210 | ret = -ENOSPC; |
628 | goto out; | 1211 | goto out; |
629 | } | 1212 | } |
1213 | |||
1214 | /* | ||
1215 | * If found_bitmap is true, we exhausted our search for extent entries, | ||
1216 | * and we just want to search all of the bitmaps that we can find, and | ||
1217 | * ignore any extent entries we find. | ||
1218 | */ | ||
1219 | while (entry->bitmap || found_bitmap || | ||
1220 | (!entry->bitmap && entry->bytes < min_bytes)) { | ||
1221 | struct rb_node *node = rb_next(&entry->offset_index); | ||
1222 | |||
1223 | if (entry->bitmap && entry->bytes > bytes + empty_size) { | ||
1224 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, | ||
1225 | offset, bytes + empty_size, | ||
1226 | min_bytes); | ||
1227 | if (!ret) | ||
1228 | goto got_it; | ||
1229 | } | ||
1230 | |||
1231 | if (!node) { | ||
1232 | ret = -ENOSPC; | ||
1233 | goto out; | ||
1234 | } | ||
1235 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
1236 | } | ||
1237 | |||
1238 | /* | ||
1239 | * We already searched all the extent entries from the passed in offset | ||
1240 | * to the end and didn't find enough space for the cluster, and we also | ||
1241 | * didn't find any bitmaps that met our criteria, just go ahead and exit | ||
1242 | */ | ||
1243 | if (found_bitmap) { | ||
1244 | ret = -ENOSPC; | ||
1245 | goto out; | ||
1246 | } | ||
1247 | |||
1248 | cluster->points_to_bitmap = false; | ||
630 | window_start = entry->offset; | 1249 | window_start = entry->offset; |
631 | window_free = entry->bytes; | 1250 | window_free = entry->bytes; |
632 | last = entry; | 1251 | last = entry; |
633 | max_extent = entry->bytes; | 1252 | max_extent = entry->bytes; |
634 | 1253 | ||
635 | while(1) { | 1254 | while (1) { |
636 | /* out window is just right, lets fill it */ | 1255 | /* out window is just right, lets fill it */ |
637 | if (window_free >= bytes + empty_size) | 1256 | if (window_free >= bytes + empty_size) |
638 | break; | 1257 | break; |
639 | 1258 | ||
640 | node = rb_next(&last->offset_index); | 1259 | node = rb_next(&last->offset_index); |
641 | if (!node) { | 1260 | if (!node) { |
1261 | if (found_bitmap) | ||
1262 | goto again; | ||
642 | ret = -ENOSPC; | 1263 | ret = -ENOSPC; |
643 | goto out; | 1264 | goto out; |
644 | } | 1265 | } |
645 | next = rb_entry(node, struct btrfs_free_space, offset_index); | 1266 | next = rb_entry(node, struct btrfs_free_space, offset_index); |
646 | 1267 | ||
647 | /* | 1268 | /* |
1269 | * we found a bitmap, so if this search doesn't result in a | ||
1270 | * cluster, we know to go and search again for the bitmaps and | ||
1271 | * start looking for space there | ||
1272 | */ | ||
1273 | if (next->bitmap) { | ||
1274 | if (!found_bitmap) | ||
1275 | offset = next->offset; | ||
1276 | found_bitmap = true; | ||
1277 | last = next; | ||
1278 | continue; | ||
1279 | } | ||
1280 | |||
1281 | /* | ||
648 | * we haven't filled the empty size and the window is | 1282 | * we haven't filled the empty size and the window is |
649 | * very large. reset and try again | 1283 | * very large. reset and try again |
650 | */ | 1284 | */ |
@@ -655,19 +1289,6 @@ again: | |||
655 | window_free = entry->bytes; | 1289 | window_free = entry->bytes; |
656 | last = entry; | 1290 | last = entry; |
657 | max_extent = 0; | 1291 | max_extent = 0; |
658 | total_retries++; | ||
659 | if (total_retries % 64 == 0) { | ||
660 | if (min_bytes >= (bytes + empty_size)) { | ||
661 | ret = -ENOSPC; | ||
662 | goto out; | ||
663 | } | ||
664 | /* | ||
665 | * grow our allocation a bit, we're not having | ||
666 | * much luck | ||
667 | */ | ||
668 | min_bytes *= 2; | ||
669 | goto again; | ||
670 | } | ||
671 | } else { | 1292 | } else { |
672 | last = next; | 1293 | last = next; |
673 | window_free += next->bytes; | 1294 | window_free += next->bytes; |
@@ -685,11 +1306,19 @@ again: | |||
685 | * The cluster includes an rbtree, but only uses the offset index | 1306 | * The cluster includes an rbtree, but only uses the offset index |
686 | * of each free space cache entry. | 1307 | * of each free space cache entry. |
687 | */ | 1308 | */ |
688 | while(1) { | 1309 | while (1) { |
689 | node = rb_next(&entry->offset_index); | 1310 | node = rb_next(&entry->offset_index); |
690 | unlink_free_space(block_group, entry); | 1311 | if (entry->bitmap && node) { |
1312 | entry = rb_entry(node, struct btrfs_free_space, | ||
1313 | offset_index); | ||
1314 | continue; | ||
1315 | } else if (entry->bitmap && !node) { | ||
1316 | break; | ||
1317 | } | ||
1318 | |||
1319 | rb_erase(&entry->offset_index, &block_group->free_space_offset); | ||
691 | ret = tree_insert_offset(&cluster->root, entry->offset, | 1320 | ret = tree_insert_offset(&cluster->root, entry->offset, |
692 | &entry->offset_index); | 1321 | &entry->offset_index, 0); |
693 | BUG_ON(ret); | 1322 | BUG_ON(ret); |
694 | 1323 | ||
695 | if (!node || entry == last) | 1324 | if (!node || entry == last) |
@@ -697,8 +1326,10 @@ again: | |||
697 | 1326 | ||
698 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 1327 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
699 | } | 1328 | } |
700 | ret = 0; | 1329 | |
701 | cluster->max_size = max_extent; | 1330 | cluster->max_size = max_extent; |
1331 | got_it: | ||
1332 | ret = 0; | ||
702 | atomic_inc(&block_group->count); | 1333 | atomic_inc(&block_group->count); |
703 | list_add_tail(&cluster->block_group_list, &block_group->cluster_list); | 1334 | list_add_tail(&cluster->block_group_list, &block_group->cluster_list); |
704 | cluster->block_group = block_group; | 1335 | cluster->block_group = block_group; |
@@ -718,6 +1349,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | |||
718 | spin_lock_init(&cluster->refill_lock); | 1349 | spin_lock_init(&cluster->refill_lock); |
719 | cluster->root.rb_node = NULL; | 1350 | cluster->root.rb_node = NULL; |
720 | cluster->max_size = 0; | 1351 | cluster->max_size = 0; |
1352 | cluster->points_to_bitmap = false; | ||
721 | INIT_LIST_HEAD(&cluster->block_group_list); | 1353 | INIT_LIST_HEAD(&cluster->block_group_list); |
722 | cluster->block_group = NULL; | 1354 | cluster->block_group = NULL; |
723 | } | 1355 | } |
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 266fb8764054..890a8e79011b 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h | |||
@@ -19,6 +19,14 @@ | |||
19 | #ifndef __BTRFS_FREE_SPACE_CACHE | 19 | #ifndef __BTRFS_FREE_SPACE_CACHE |
20 | #define __BTRFS_FREE_SPACE_CACHE | 20 | #define __BTRFS_FREE_SPACE_CACHE |
21 | 21 | ||
22 | struct btrfs_free_space { | ||
23 | struct rb_node offset_index; | ||
24 | u64 offset; | ||
25 | u64 bytes; | ||
26 | unsigned long *bitmap; | ||
27 | struct list_head list; | ||
28 | }; | ||
29 | |||
22 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | 30 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, |
23 | u64 bytenr, u64 size); | 31 | u64 bytenr, u64 size); |
24 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | 32 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 791eab19e330..59cba180fe83 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -2603,8 +2603,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2603 | if (root->ref_cows) | 2603 | if (root->ref_cows) |
2604 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); | 2604 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); |
2605 | path = btrfs_alloc_path(); | 2605 | path = btrfs_alloc_path(); |
2606 | path->reada = -1; | ||
2607 | BUG_ON(!path); | 2606 | BUG_ON(!path); |
2607 | path->reada = -1; | ||
2608 | 2608 | ||
2609 | /* FIXME, add redo link to tree so we don't leak on crash */ | 2609 | /* FIXME, add redo link to tree so we don't leak on crash */ |
2610 | key.objectid = inode->i_ino; | 2610 | key.objectid = inode->i_ino; |
@@ -3099,8 +3099,12 @@ static void inode_tree_add(struct inode *inode) | |||
3099 | { | 3099 | { |
3100 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3100 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3101 | struct btrfs_inode *entry; | 3101 | struct btrfs_inode *entry; |
3102 | struct rb_node **p = &root->inode_tree.rb_node; | 3102 | struct rb_node **p; |
3103 | struct rb_node *parent = NULL; | 3103 | struct rb_node *parent; |
3104 | |||
3105 | again: | ||
3106 | p = &root->inode_tree.rb_node; | ||
3107 | parent = NULL; | ||
3104 | 3108 | ||
3105 | spin_lock(&root->inode_lock); | 3109 | spin_lock(&root->inode_lock); |
3106 | while (*p) { | 3110 | while (*p) { |
@@ -3108,13 +3112,16 @@ static void inode_tree_add(struct inode *inode) | |||
3108 | entry = rb_entry(parent, struct btrfs_inode, rb_node); | 3112 | entry = rb_entry(parent, struct btrfs_inode, rb_node); |
3109 | 3113 | ||
3110 | if (inode->i_ino < entry->vfs_inode.i_ino) | 3114 | if (inode->i_ino < entry->vfs_inode.i_ino) |
3111 | p = &(*p)->rb_left; | 3115 | p = &parent->rb_left; |
3112 | else if (inode->i_ino > entry->vfs_inode.i_ino) | 3116 | else if (inode->i_ino > entry->vfs_inode.i_ino) |
3113 | p = &(*p)->rb_right; | 3117 | p = &parent->rb_right; |
3114 | else { | 3118 | else { |
3115 | WARN_ON(!(entry->vfs_inode.i_state & | 3119 | WARN_ON(!(entry->vfs_inode.i_state & |
3116 | (I_WILL_FREE | I_FREEING | I_CLEAR))); | 3120 | (I_WILL_FREE | I_FREEING | I_CLEAR))); |
3117 | break; | 3121 | rb_erase(parent, &root->inode_tree); |
3122 | RB_CLEAR_NODE(parent); | ||
3123 | spin_unlock(&root->inode_lock); | ||
3124 | goto again; | ||
3118 | } | 3125 | } |
3119 | } | 3126 | } |
3120 | rb_link_node(&BTRFS_I(inode)->rb_node, parent, p); | 3127 | rb_link_node(&BTRFS_I(inode)->rb_node, parent, p); |
@@ -3126,12 +3133,12 @@ static void inode_tree_del(struct inode *inode) | |||
3126 | { | 3133 | { |
3127 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3134 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3128 | 3135 | ||
3136 | spin_lock(&root->inode_lock); | ||
3129 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { | 3137 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { |
3130 | spin_lock(&root->inode_lock); | ||
3131 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); | 3138 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); |
3132 | spin_unlock(&root->inode_lock); | ||
3133 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | 3139 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); |
3134 | } | 3140 | } |
3141 | spin_unlock(&root->inode_lock); | ||
3135 | } | 3142 | } |
3136 | 3143 | ||
3137 | static noinline void init_btrfs_i(struct inode *inode) | 3144 | static noinline void init_btrfs_i(struct inode *inode) |
@@ -4785,8 +4792,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4785 | * and the replacement file is large. Start IO on it now so | 4792 | * and the replacement file is large. Start IO on it now so |
4786 | * we don't add too much work to the end of the transaction | 4793 | * we don't add too much work to the end of the transaction |
4787 | */ | 4794 | */ |
4788 | if (new_inode && old_inode && S_ISREG(old_inode->i_mode) && | 4795 | if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size && |
4789 | new_inode->i_size && | ||
4790 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | 4796 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) |
4791 | filemap_flush(old_inode->i_mapping); | 4797 | filemap_flush(old_inode->i_mapping); |
4792 | 4798 | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index d6f0806c682f..7b2f401e604e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -740,7 +740,6 @@ int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | |||
740 | .nr_to_write = mapping->nrpages * 2, | 740 | .nr_to_write = mapping->nrpages * 2, |
741 | .range_start = start, | 741 | .range_start = start, |
742 | .range_end = end, | 742 | .range_end = end, |
743 | .for_writepages = 1, | ||
744 | }; | 743 | }; |
745 | return btrfs_writepages(mapping, &wbc); | 744 | return btrfs_writepages(mapping, &wbc); |
746 | } | 745 | } |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 6d6523da0a30..0d126be22b63 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -309,7 +309,7 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) | |||
309 | } | 309 | } |
310 | printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n", | 310 | printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n", |
311 | (unsigned long long)btrfs_header_bytenr(c), | 311 | (unsigned long long)btrfs_header_bytenr(c), |
312 | btrfs_header_level(c), nr, | 312 | level, nr, |
313 | (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); | 313 | (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); |
314 | for (i = 0; i < nr; i++) { | 314 | for (i = 0; i < nr; i++) { |
315 | btrfs_node_key_to_cpu(c, &key, i); | 315 | btrfs_node_key_to_cpu(c, &key, i); |
@@ -326,10 +326,10 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) | |||
326 | btrfs_level_size(root, level - 1), | 326 | btrfs_level_size(root, level - 1), |
327 | btrfs_node_ptr_generation(c, i)); | 327 | btrfs_node_ptr_generation(c, i)); |
328 | if (btrfs_is_leaf(next) && | 328 | if (btrfs_is_leaf(next) && |
329 | btrfs_header_level(c) != 1) | 329 | level != 1) |
330 | BUG(); | 330 | BUG(); |
331 | if (btrfs_header_level(next) != | 331 | if (btrfs_header_level(next) != |
332 | btrfs_header_level(c) - 1) | 332 | level - 1) |
333 | BUG(); | 333 | BUG(); |
334 | btrfs_print_tree(root, next); | 334 | btrfs_print_tree(root, next); |
335 | free_extent_buffer(next); | 335 | free_extent_buffer(next); |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 008397934778..c04f7f212602 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -670,6 +670,8 @@ again: | |||
670 | err = ret; | 670 | err = ret; |
671 | goto out; | 671 | goto out; |
672 | } | 672 | } |
673 | if (ret > 0 && path2->slots[level] > 0) | ||
674 | path2->slots[level]--; | ||
673 | 675 | ||
674 | eb = path2->nodes[level]; | 676 | eb = path2->nodes[level]; |
675 | WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != | 677 | WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != |
@@ -1609,6 +1611,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1609 | BUG_ON(level == 0); | 1611 | BUG_ON(level == 0); |
1610 | path->lowest_level = level; | 1612 | path->lowest_level = level; |
1611 | ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); | 1613 | ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); |
1614 | path->lowest_level = 0; | ||
1612 | if (ret < 0) { | 1615 | if (ret < 0) { |
1613 | btrfs_free_path(path); | 1616 | btrfs_free_path(path); |
1614 | return ret; | 1617 | return ret; |
@@ -2550,8 +2553,13 @@ int relocate_inode_pages(struct inode *inode, u64 start, u64 len) | |||
2550 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; | 2553 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; |
2551 | 2554 | ||
2552 | /* make sure the dirty trick played by the caller work */ | 2555 | /* make sure the dirty trick played by the caller work */ |
2553 | ret = invalidate_inode_pages2_range(inode->i_mapping, | 2556 | while (1) { |
2554 | first_index, last_index); | 2557 | ret = invalidate_inode_pages2_range(inode->i_mapping, |
2558 | first_index, last_index); | ||
2559 | if (ret != -EBUSY) | ||
2560 | break; | ||
2561 | schedule_timeout(HZ/10); | ||
2562 | } | ||
2555 | if (ret) | 2563 | if (ret) |
2556 | goto out_unlock; | 2564 | goto out_unlock; |
2557 | 2565 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2dbf1c1f56ee..cdbb5022da52 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -40,6 +40,12 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) | |||
40 | } | 40 | } |
41 | } | 41 | } |
42 | 42 | ||
43 | static noinline void switch_commit_root(struct btrfs_root *root) | ||
44 | { | ||
45 | free_extent_buffer(root->commit_root); | ||
46 | root->commit_root = btrfs_root_node(root); | ||
47 | } | ||
48 | |||
43 | /* | 49 | /* |
44 | * either allocate a new transaction or hop into the existing one | 50 | * either allocate a new transaction or hop into the existing one |
45 | */ | 51 | */ |
@@ -444,9 +450,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
444 | 450 | ||
445 | btrfs_write_dirty_block_groups(trans, root); | 451 | btrfs_write_dirty_block_groups(trans, root); |
446 | 452 | ||
447 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
448 | BUG_ON(ret); | ||
449 | |||
450 | while (1) { | 453 | while (1) { |
451 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); | 454 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); |
452 | if (old_root_bytenr == root->node->start) | 455 | if (old_root_bytenr == root->node->start) |
@@ -457,13 +460,14 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
457 | &root->root_key, | 460 | &root->root_key, |
458 | &root->root_item); | 461 | &root->root_item); |
459 | BUG_ON(ret); | 462 | BUG_ON(ret); |
460 | btrfs_write_dirty_block_groups(trans, root); | ||
461 | 463 | ||
462 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | 464 | ret = btrfs_write_dirty_block_groups(trans, root); |
463 | BUG_ON(ret); | 465 | BUG_ON(ret); |
464 | } | 466 | } |
465 | free_extent_buffer(root->commit_root); | 467 | |
466 | root->commit_root = btrfs_root_node(root); | 468 | if (root != root->fs_info->extent_root) |
469 | switch_commit_root(root); | ||
470 | |||
467 | return 0; | 471 | return 0; |
468 | } | 472 | } |
469 | 473 | ||
@@ -495,10 +499,12 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
495 | root = list_entry(next, struct btrfs_root, dirty_list); | 499 | root = list_entry(next, struct btrfs_root, dirty_list); |
496 | 500 | ||
497 | update_cowonly_root(trans, root); | 501 | update_cowonly_root(trans, root); |
498 | |||
499 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
500 | BUG_ON(ret); | ||
501 | } | 502 | } |
503 | |||
504 | down_write(&fs_info->extent_commit_sem); | ||
505 | switch_commit_root(fs_info->extent_root); | ||
506 | up_write(&fs_info->extent_commit_sem); | ||
507 | |||
502 | return 0; | 508 | return 0; |
503 | } | 509 | } |
504 | 510 | ||
@@ -544,8 +550,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
544 | btrfs_update_reloc_root(trans, root); | 550 | btrfs_update_reloc_root(trans, root); |
545 | 551 | ||
546 | if (root->commit_root != root->node) { | 552 | if (root->commit_root != root->node) { |
547 | free_extent_buffer(root->commit_root); | 553 | switch_commit_root(root); |
548 | root->commit_root = btrfs_root_node(root); | ||
549 | btrfs_set_root_node(&root->root_item, | 554 | btrfs_set_root_node(&root->root_item, |
550 | root->node); | 555 | root->node); |
551 | } | 556 | } |
@@ -852,6 +857,16 @@ static void update_super_roots(struct btrfs_root *root) | |||
852 | super->root_level = root_item->level; | 857 | super->root_level = root_item->level; |
853 | } | 858 | } |
854 | 859 | ||
860 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | ||
861 | { | ||
862 | int ret = 0; | ||
863 | spin_lock(&info->new_trans_lock); | ||
864 | if (info->running_transaction) | ||
865 | ret = info->running_transaction->in_commit; | ||
866 | spin_unlock(&info->new_trans_lock); | ||
867 | return ret; | ||
868 | } | ||
869 | |||
855 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 870 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
856 | struct btrfs_root *root) | 871 | struct btrfs_root *root) |
857 | { | 872 | { |
@@ -943,9 +958,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
943 | 958 | ||
944 | mutex_unlock(&root->fs_info->trans_mutex); | 959 | mutex_unlock(&root->fs_info->trans_mutex); |
945 | 960 | ||
946 | if (flush_on_commit || snap_pending) { | 961 | if (flush_on_commit) { |
947 | if (flush_on_commit) | 962 | btrfs_start_delalloc_inodes(root); |
948 | btrfs_start_delalloc_inodes(root); | 963 | ret = btrfs_wait_ordered_extents(root, 0); |
964 | BUG_ON(ret); | ||
965 | } else if (snap_pending) { | ||
949 | ret = btrfs_wait_ordered_extents(root, 1); | 966 | ret = btrfs_wait_ordered_extents(root, 1); |
950 | BUG_ON(ret); | 967 | BUG_ON(ret); |
951 | } | 968 | } |
@@ -1009,15 +1026,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1009 | 1026 | ||
1010 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, | 1027 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, |
1011 | root->fs_info->tree_root->node); | 1028 | root->fs_info->tree_root->node); |
1012 | free_extent_buffer(root->fs_info->tree_root->commit_root); | 1029 | switch_commit_root(root->fs_info->tree_root); |
1013 | root->fs_info->tree_root->commit_root = | ||
1014 | btrfs_root_node(root->fs_info->tree_root); | ||
1015 | 1030 | ||
1016 | btrfs_set_root_node(&root->fs_info->chunk_root->root_item, | 1031 | btrfs_set_root_node(&root->fs_info->chunk_root->root_item, |
1017 | root->fs_info->chunk_root->node); | 1032 | root->fs_info->chunk_root->node); |
1018 | free_extent_buffer(root->fs_info->chunk_root->commit_root); | 1033 | switch_commit_root(root->fs_info->chunk_root); |
1019 | root->fs_info->chunk_root->commit_root = | ||
1020 | btrfs_root_node(root->fs_info->chunk_root); | ||
1021 | 1034 | ||
1022 | update_super_roots(root); | 1035 | update_super_roots(root); |
1023 | 1036 | ||
@@ -1057,6 +1070,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1057 | cur_trans->commit_done = 1; | 1070 | cur_trans->commit_done = 1; |
1058 | 1071 | ||
1059 | root->fs_info->last_trans_committed = cur_trans->transid; | 1072 | root->fs_info->last_trans_committed = cur_trans->transid; |
1073 | |||
1060 | wake_up(&cur_trans->commit_wait); | 1074 | wake_up(&cur_trans->commit_wait); |
1061 | 1075 | ||
1062 | put_transaction(cur_trans); | 1076 | put_transaction(cur_trans); |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 961c3ee5a2e1..663c67404918 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -107,4 +107,5 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | |||
107 | struct btrfs_root *root); | 107 | struct btrfs_root *root); |
108 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | 108 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, |
109 | struct extent_io_tree *dirty_pages); | 109 | struct extent_io_tree *dirty_pages); |
110 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); | ||
110 | #endif | 111 | #endif |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c13922206d1b..d91b0de7c502 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -797,7 +797,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
797 | return -ENOENT; | 797 | return -ENOENT; |
798 | 798 | ||
799 | inode = read_one_inode(root, key->objectid); | 799 | inode = read_one_inode(root, key->objectid); |
800 | BUG_ON(!dir); | 800 | BUG_ON(!inode); |
801 | 801 | ||
802 | ref_ptr = btrfs_item_ptr_offset(eb, slot); | 802 | ref_ptr = btrfs_item_ptr_offset(eb, slot); |
803 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); | 803 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3ab80e9cd767..5cf405b0828d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -260,7 +260,7 @@ loop_lock: | |||
260 | num_run++; | 260 | num_run++; |
261 | batch_run++; | 261 | batch_run++; |
262 | 262 | ||
263 | if (bio_sync(cur)) | 263 | if (bio_rw_flagged(cur, BIO_RW_SYNCIO)) |
264 | num_sync_run++; | 264 | num_sync_run++; |
265 | 265 | ||
266 | if (need_resched()) { | 266 | if (need_resched()) { |
@@ -721,7 +721,8 @@ error: | |||
721 | */ | 721 | */ |
722 | static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, | 722 | static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, |
723 | struct btrfs_device *device, | 723 | struct btrfs_device *device, |
724 | u64 num_bytes, u64 *start) | 724 | u64 num_bytes, u64 *start, |
725 | u64 *max_avail) | ||
725 | { | 726 | { |
726 | struct btrfs_key key; | 727 | struct btrfs_key key; |
727 | struct btrfs_root *root = device->dev_root; | 728 | struct btrfs_root *root = device->dev_root; |
@@ -758,9 +759,13 @@ static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
758 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 759 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); |
759 | if (ret < 0) | 760 | if (ret < 0) |
760 | goto error; | 761 | goto error; |
761 | ret = btrfs_previous_item(root, path, 0, key.type); | 762 | if (ret > 0) { |
762 | if (ret < 0) | 763 | ret = btrfs_previous_item(root, path, key.objectid, key.type); |
763 | goto error; | 764 | if (ret < 0) |
765 | goto error; | ||
766 | if (ret > 0) | ||
767 | start_found = 1; | ||
768 | } | ||
764 | l = path->nodes[0]; | 769 | l = path->nodes[0]; |
765 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | 770 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); |
766 | while (1) { | 771 | while (1) { |
@@ -803,6 +808,10 @@ no_more_items: | |||
803 | if (last_byte < search_start) | 808 | if (last_byte < search_start) |
804 | last_byte = search_start; | 809 | last_byte = search_start; |
805 | hole_size = key.offset - last_byte; | 810 | hole_size = key.offset - last_byte; |
811 | |||
812 | if (hole_size > *max_avail) | ||
813 | *max_avail = hole_size; | ||
814 | |||
806 | if (key.offset > last_byte && | 815 | if (key.offset > last_byte && |
807 | hole_size >= num_bytes) { | 816 | hole_size >= num_bytes) { |
808 | *start = last_byte; | 817 | *start = last_byte; |
@@ -1621,6 +1630,7 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, | |||
1621 | device->fs_devices->total_rw_bytes += diff; | 1630 | device->fs_devices->total_rw_bytes += diff; |
1622 | 1631 | ||
1623 | device->total_bytes = new_size; | 1632 | device->total_bytes = new_size; |
1633 | device->disk_total_bytes = new_size; | ||
1624 | btrfs_clear_space_info_full(device->dev_root->fs_info); | 1634 | btrfs_clear_space_info_full(device->dev_root->fs_info); |
1625 | 1635 | ||
1626 | return btrfs_update_device(trans, device); | 1636 | return btrfs_update_device(trans, device); |
@@ -2007,7 +2017,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
2007 | goto done; | 2017 | goto done; |
2008 | if (ret) { | 2018 | if (ret) { |
2009 | ret = 0; | 2019 | ret = 0; |
2010 | goto done; | 2020 | break; |
2011 | } | 2021 | } |
2012 | 2022 | ||
2013 | l = path->nodes[0]; | 2023 | l = path->nodes[0]; |
@@ -2015,7 +2025,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
2015 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | 2025 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); |
2016 | 2026 | ||
2017 | if (key.objectid != device->devid) | 2027 | if (key.objectid != device->devid) |
2018 | goto done; | 2028 | break; |
2019 | 2029 | ||
2020 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 2030 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
2021 | length = btrfs_dev_extent_length(l, dev_extent); | 2031 | length = btrfs_dev_extent_length(l, dev_extent); |
@@ -2171,6 +2181,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2171 | max_chunk_size); | 2181 | max_chunk_size); |
2172 | 2182 | ||
2173 | again: | 2183 | again: |
2184 | max_avail = 0; | ||
2174 | if (!map || map->num_stripes != num_stripes) { | 2185 | if (!map || map->num_stripes != num_stripes) { |
2175 | kfree(map); | 2186 | kfree(map); |
2176 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | 2187 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); |
@@ -2219,7 +2230,8 @@ again: | |||
2219 | 2230 | ||
2220 | if (device->in_fs_metadata && avail >= min_free) { | 2231 | if (device->in_fs_metadata && avail >= min_free) { |
2221 | ret = find_free_dev_extent(trans, device, | 2232 | ret = find_free_dev_extent(trans, device, |
2222 | min_free, &dev_offset); | 2233 | min_free, &dev_offset, |
2234 | &max_avail); | ||
2223 | if (ret == 0) { | 2235 | if (ret == 0) { |
2224 | list_move_tail(&device->dev_alloc_list, | 2236 | list_move_tail(&device->dev_alloc_list, |
2225 | &private_devs); | 2237 | &private_devs); |
@@ -2795,26 +2807,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
2795 | } | 2807 | } |
2796 | } | 2808 | } |
2797 | 2809 | ||
2798 | for (i = 0; i > nr; i++) { | ||
2799 | struct btrfs_multi_bio *multi; | ||
2800 | struct btrfs_bio_stripe *stripe; | ||
2801 | int ret; | ||
2802 | |||
2803 | length = 1; | ||
2804 | ret = btrfs_map_block(map_tree, WRITE, buf[i], | ||
2805 | &length, &multi, 0); | ||
2806 | BUG_ON(ret); | ||
2807 | |||
2808 | stripe = multi->stripes; | ||
2809 | for (j = 0; j < multi->num_stripes; j++) { | ||
2810 | if (stripe->physical >= physical && | ||
2811 | physical < stripe->physical + length) | ||
2812 | break; | ||
2813 | } | ||
2814 | BUG_ON(j >= multi->num_stripes); | ||
2815 | kfree(multi); | ||
2816 | } | ||
2817 | |||
2818 | *logical = buf; | 2810 | *logical = buf; |
2819 | *naddrs = nr; | 2811 | *naddrs = nr; |
2820 | *stripe_len = map->stripe_len; | 2812 | *stripe_len = map->stripe_len; |
@@ -2911,7 +2903,7 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2911 | bio->bi_rw |= rw; | 2903 | bio->bi_rw |= rw; |
2912 | 2904 | ||
2913 | spin_lock(&device->io_lock); | 2905 | spin_lock(&device->io_lock); |
2914 | if (bio_sync(bio)) | 2906 | if (bio_rw_flagged(bio, BIO_RW_SYNCIO)) |
2915 | pending_bios = &device->pending_sync_bios; | 2907 | pending_bios = &device->pending_sync_bios; |
2916 | else | 2908 | else |
2917 | pending_bios = &device->pending_bios; | 2909 | pending_bios = &device->pending_bios; |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index ecfbce836d32..3e2b90eaa239 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -208,7 +208,7 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
208 | *total_in = 0; | 208 | *total_in = 0; |
209 | 209 | ||
210 | workspace = find_zlib_workspace(); | 210 | workspace = find_zlib_workspace(); |
211 | if (!workspace) | 211 | if (IS_ERR(workspace)) |
212 | return -1; | 212 | return -1; |
213 | 213 | ||
214 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 214 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
@@ -366,7 +366,7 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
366 | char *kaddr; | 366 | char *kaddr; |
367 | 367 | ||
368 | workspace = find_zlib_workspace(); | 368 | workspace = find_zlib_workspace(); |
369 | if (!workspace) | 369 | if (IS_ERR(workspace)) |
370 | return -ENOMEM; | 370 | return -ENOMEM; |
371 | 371 | ||
372 | data_in = kmap(pages_in[page_in_index]); | 372 | data_in = kmap(pages_in[page_in_index]); |
@@ -547,7 +547,7 @@ int btrfs_zlib_decompress(unsigned char *data_in, | |||
547 | return -ENOMEM; | 547 | return -ENOMEM; |
548 | 548 | ||
549 | workspace = find_zlib_workspace(); | 549 | workspace = find_zlib_workspace(); |
550 | if (!workspace) | 550 | if (IS_ERR(workspace)) |
551 | return -ENOMEM; | 551 | return -ENOMEM; |
552 | 552 | ||
553 | workspace->inf_strm.next_in = data_in; | 553 | workspace->inf_strm.next_in = data_in; |
diff --git a/fs/buffer.c b/fs/buffer.c index a3ef091a45bd..90a98865b0cc 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -281,7 +281,7 @@ static void free_more_memory(void) | |||
281 | struct zone *zone; | 281 | struct zone *zone; |
282 | int nid; | 282 | int nid; |
283 | 283 | ||
284 | wakeup_pdflush(1024); | 284 | wakeup_flusher_threads(1024); |
285 | yield(); | 285 | yield(); |
286 | 286 | ||
287 | for_each_online_node(nid) { | 287 | for_each_online_node(nid) { |
@@ -1165,8 +1165,11 @@ void mark_buffer_dirty(struct buffer_head *bh) | |||
1165 | 1165 | ||
1166 | if (!test_set_buffer_dirty(bh)) { | 1166 | if (!test_set_buffer_dirty(bh)) { |
1167 | struct page *page = bh->b_page; | 1167 | struct page *page = bh->b_page; |
1168 | if (!TestSetPageDirty(page)) | 1168 | if (!TestSetPageDirty(page)) { |
1169 | __set_page_dirty(page, page_mapping(page), 0); | 1169 | struct address_space *mapping = page_mapping(page); |
1170 | if (mapping) | ||
1171 | __set_page_dirty(page, mapping, 0); | ||
1172 | } | ||
1170 | } | 1173 | } |
1171 | } | 1174 | } |
1172 | 1175 | ||
diff --git a/fs/char_dev.c b/fs/char_dev.c index a173551e19d7..3cbc57f932d2 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -31,6 +31,7 @@ | |||
31 | * - no readahead or I/O queue unplugging required | 31 | * - no readahead or I/O queue unplugging required |
32 | */ | 32 | */ |
33 | struct backing_dev_info directly_mappable_cdev_bdi = { | 33 | struct backing_dev_info directly_mappable_cdev_bdi = { |
34 | .name = "char", | ||
34 | .capabilities = ( | 35 | .capabilities = ( |
35 | #ifdef CONFIG_MMU | 36 | #ifdef CONFIG_MMU |
36 | /* permit private copies of the data to be taken */ | 37 | /* permit private copies of the data to be taken */ |
@@ -237,8 +238,10 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, | |||
237 | } | 238 | } |
238 | 239 | ||
239 | /** | 240 | /** |
240 | * register_chrdev() - Register a major number for character devices. | 241 | * __register_chrdev() - create and register a cdev occupying a range of minors |
241 | * @major: major device number or 0 for dynamic allocation | 242 | * @major: major device number or 0 for dynamic allocation |
243 | * @baseminor: first of the requested range of minor numbers | ||
244 | * @count: the number of minor numbers required | ||
242 | * @name: name of this range of devices | 245 | * @name: name of this range of devices |
243 | * @fops: file operations associated with this devices | 246 | * @fops: file operations associated with this devices |
244 | * | 247 | * |
@@ -254,19 +257,17 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, | |||
254 | * /dev. It only helps to keep track of the different owners of devices. If | 257 | * /dev. It only helps to keep track of the different owners of devices. If |
255 | * your module name has only one type of devices it's ok to use e.g. the name | 258 | * your module name has only one type of devices it's ok to use e.g. the name |
256 | * of the module here. | 259 | * of the module here. |
257 | * | ||
258 | * This function registers a range of 256 minor numbers. The first minor number | ||
259 | * is 0. | ||
260 | */ | 260 | */ |
261 | int register_chrdev(unsigned int major, const char *name, | 261 | int __register_chrdev(unsigned int major, unsigned int baseminor, |
262 | const struct file_operations *fops) | 262 | unsigned int count, const char *name, |
263 | const struct file_operations *fops) | ||
263 | { | 264 | { |
264 | struct char_device_struct *cd; | 265 | struct char_device_struct *cd; |
265 | struct cdev *cdev; | 266 | struct cdev *cdev; |
266 | char *s; | 267 | char *s; |
267 | int err = -ENOMEM; | 268 | int err = -ENOMEM; |
268 | 269 | ||
269 | cd = __register_chrdev_region(major, 0, 256, name); | 270 | cd = __register_chrdev_region(major, baseminor, count, name); |
270 | if (IS_ERR(cd)) | 271 | if (IS_ERR(cd)) |
271 | return PTR_ERR(cd); | 272 | return PTR_ERR(cd); |
272 | 273 | ||
@@ -280,7 +281,7 @@ int register_chrdev(unsigned int major, const char *name, | |||
280 | for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/')) | 281 | for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/')) |
281 | *s = '!'; | 282 | *s = '!'; |
282 | 283 | ||
283 | err = cdev_add(cdev, MKDEV(cd->major, 0), 256); | 284 | err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); |
284 | if (err) | 285 | if (err) |
285 | goto out; | 286 | goto out; |
286 | 287 | ||
@@ -290,7 +291,7 @@ int register_chrdev(unsigned int major, const char *name, | |||
290 | out: | 291 | out: |
291 | kobject_put(&cdev->kobj); | 292 | kobject_put(&cdev->kobj); |
292 | out2: | 293 | out2: |
293 | kfree(__unregister_chrdev_region(cd->major, 0, 256)); | 294 | kfree(__unregister_chrdev_region(cd->major, baseminor, count)); |
294 | return err; | 295 | return err; |
295 | } | 296 | } |
296 | 297 | ||
@@ -316,10 +317,23 @@ void unregister_chrdev_region(dev_t from, unsigned count) | |||
316 | } | 317 | } |
317 | } | 318 | } |
318 | 319 | ||
319 | void unregister_chrdev(unsigned int major, const char *name) | 320 | /** |
321 | * __unregister_chrdev - unregister and destroy a cdev | ||
322 | * @major: major device number | ||
323 | * @baseminor: first of the range of minor numbers | ||
324 | * @count: the number of minor numbers this cdev is occupying | ||
325 | * @name: name of this range of devices | ||
326 | * | ||
327 | * Unregister and destroy the cdev occupying the region described by | ||
328 | * @major, @baseminor and @count. This function undoes what | ||
329 | * __register_chrdev() did. | ||
330 | */ | ||
331 | void __unregister_chrdev(unsigned int major, unsigned int baseminor, | ||
332 | unsigned int count, const char *name) | ||
320 | { | 333 | { |
321 | struct char_device_struct *cd; | 334 | struct char_device_struct *cd; |
322 | cd = __unregister_chrdev_region(major, 0, 256); | 335 | |
336 | cd = __unregister_chrdev_region(major, baseminor, count); | ||
323 | if (cd && cd->cdev) | 337 | if (cd && cd->cdev) |
324 | cdev_del(cd->cdev); | 338 | cdev_del(cd->cdev); |
325 | kfree(cd); | 339 | kfree(cd); |
@@ -568,6 +582,6 @@ EXPORT_SYMBOL(cdev_alloc); | |||
568 | EXPORT_SYMBOL(cdev_del); | 582 | EXPORT_SYMBOL(cdev_del); |
569 | EXPORT_SYMBOL(cdev_add); | 583 | EXPORT_SYMBOL(cdev_add); |
570 | EXPORT_SYMBOL(cdev_index); | 584 | EXPORT_SYMBOL(cdev_index); |
571 | EXPORT_SYMBOL(register_chrdev); | 585 | EXPORT_SYMBOL(__register_chrdev); |
572 | EXPORT_SYMBOL(unregister_chrdev); | 586 | EXPORT_SYMBOL(__unregister_chrdev); |
573 | EXPORT_SYMBOL(directly_mappable_cdev_bdi); | 587 | EXPORT_SYMBOL(directly_mappable_cdev_bdi); |
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 92888aa90749..145540a316ab 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES | |||
@@ -1,3 +1,13 @@ | |||
1 | Version 1.60 | ||
2 | ------------- | ||
3 | Fix memory leak in reconnect. Fix oops in DFS mount error path. | ||
4 | Set s_maxbytes to smaller (the max that vfs can handle) so that | ||
5 | sendfile will now work over cifs mounts again. Add noforcegid | ||
6 | and noforceuid mount parameters. Fix small mem leak when using | ||
7 | ntlmv2. Fix 2nd mount to same server but with different port to | ||
8 | be allowed (rather than reusing the 1st port) - only when the | ||
9 | user explicitly overrides the port on the 2nd mount. | ||
10 | |||
1 | Version 1.59 | 11 | Version 1.59 |
2 | ------------ | 12 | ------------ |
3 | Client uses server inode numbers (which are persistent) rather than | 13 | Client uses server inode numbers (which are persistent) rather than |
diff --git a/fs/cifs/README b/fs/cifs/README index ad92921dbde4..79c1a93400be 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -262,11 +262,11 @@ A partial list of the supported mount options follows: | |||
262 | mount. | 262 | mount. |
263 | domain Set the SMB/CIFS workgroup name prepended to the | 263 | domain Set the SMB/CIFS workgroup name prepended to the |
264 | username during CIFS session establishment | 264 | username during CIFS session establishment |
265 | forceuid Set the default uid for inodes based on the uid | 265 | forceuid Set the default uid for inodes to the uid |
266 | passed in. For mounts to servers | 266 | passed in on mount. For mounts to servers |
267 | which do support the CIFS Unix extensions, such as a | 267 | which do support the CIFS Unix extensions, such as a |
268 | properly configured Samba server, the server provides | 268 | properly configured Samba server, the server provides |
269 | the uid, gid and mode so this parameter should not be | 269 | the uid, gid and mode so this parameter should not be |
270 | specified unless the server and clients uid and gid | 270 | specified unless the server and clients uid and gid |
271 | numbering differ. If the server and client are in the | 271 | numbering differ. If the server and client are in the |
272 | same domain (e.g. running winbind or nss_ldap) and | 272 | same domain (e.g. running winbind or nss_ldap) and |
@@ -278,11 +278,7 @@ A partial list of the supported mount options follows: | |||
278 | of existing files will be the uid (gid) of the person | 278 | of existing files will be the uid (gid) of the person |
279 | who executed the mount (root, except when mount.cifs | 279 | who executed the mount (root, except when mount.cifs |
280 | is configured setuid for user mounts) unless the "uid=" | 280 | is configured setuid for user mounts) unless the "uid=" |
281 | (gid) mount option is specified. For the uid (gid) of newly | 281 | (gid) mount option is specified. Also note that permission |
282 | created files and directories, ie files created since | ||
283 | the last mount of the server share, the expected uid | ||
284 | (gid) is cached as long as the inode remains in | ||
285 | memory on the client. Also note that permission | ||
286 | checks (authorization checks) on accesses to a file occur | 282 | checks (authorization checks) on accesses to a file occur |
287 | at the server, but there are cases in which an administrator | 283 | at the server, but there are cases in which an administrator |
288 | may want to restrict at the client as well. For those | 284 | may want to restrict at the client as well. For those |
@@ -290,12 +286,15 @@ A partial list of the supported mount options follows: | |||
290 | (such as Windows), permissions can also be checked at the | 286 | (such as Windows), permissions can also be checked at the |
291 | client, and a crude form of client side permission checking | 287 | client, and a crude form of client side permission checking |
292 | can be enabled by specifying file_mode and dir_mode on | 288 | can be enabled by specifying file_mode and dir_mode on |
293 | the client. Note that the mount.cifs helper must be | 289 | the client. (default) |
294 | at version 1.10 or higher to support specifying the uid | 290 | forcegid (similar to above but for the groupid instead of uid) (default) |
295 | (or gid) in non-numeric form. | 291 | noforceuid Fill in file owner information (uid) by requesting it from |
296 | forcegid (similar to above but for the groupid instead of uid) | 292 | the server if possible. With this option, the value given in |
293 | the uid= option (on mount) will only be used if the server | ||
294 | can not support returning uids on inodes. | ||
295 | noforcegid (similar to above but for the group owner, gid, instead of uid) | ||
297 | uid Set the default uid for inodes, and indicate to the | 296 | uid Set the default uid for inodes, and indicate to the |
298 | cifs kernel driver which local user mounted . If the server | 297 | cifs kernel driver which local user mounted. If the server |
299 | supports the unix extensions the default uid is | 298 | supports the unix extensions the default uid is |
300 | not used to fill in the owner fields of inodes (files) | 299 | not used to fill in the owner fields of inodes (files) |
301 | unless the "forceuid" parameter is specified. | 300 | unless the "forceuid" parameter is specified. |
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 3bb11be8b6a8..606912d8f2a8 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c | |||
@@ -55,7 +55,7 @@ void cifs_dfs_release_automount_timer(void) | |||
55 | * i.e. strips from UNC trailing path that is not part of share | 55 | * i.e. strips from UNC trailing path that is not part of share |
56 | * name and fixup missing '\' in the begining of DFS node refferal | 56 | * name and fixup missing '\' in the begining of DFS node refferal |
57 | * if neccessary. | 57 | * if neccessary. |
58 | * Returns pointer to share name on success or NULL on error. | 58 | * Returns pointer to share name on success or ERR_PTR on error. |
59 | * Caller is responsible for freeing returned string. | 59 | * Caller is responsible for freeing returned string. |
60 | */ | 60 | */ |
61 | static char *cifs_get_share_name(const char *node_name) | 61 | static char *cifs_get_share_name(const char *node_name) |
@@ -68,7 +68,7 @@ static char *cifs_get_share_name(const char *node_name) | |||
68 | UNC = kmalloc(len+2 /*for term null and additional \ if it's missed */, | 68 | UNC = kmalloc(len+2 /*for term null and additional \ if it's missed */, |
69 | GFP_KERNEL); | 69 | GFP_KERNEL); |
70 | if (!UNC) | 70 | if (!UNC) |
71 | return NULL; | 71 | return ERR_PTR(-ENOMEM); |
72 | 72 | ||
73 | /* get share name and server name */ | 73 | /* get share name and server name */ |
74 | if (node_name[1] != '\\') { | 74 | if (node_name[1] != '\\') { |
@@ -87,7 +87,7 @@ static char *cifs_get_share_name(const char *node_name) | |||
87 | cERROR(1, ("%s: no server name end in node name: %s", | 87 | cERROR(1, ("%s: no server name end in node name: %s", |
88 | __func__, node_name)); | 88 | __func__, node_name)); |
89 | kfree(UNC); | 89 | kfree(UNC); |
90 | return NULL; | 90 | return ERR_PTR(-EINVAL); |
91 | } | 91 | } |
92 | 92 | ||
93 | /* find sharename end */ | 93 | /* find sharename end */ |
@@ -133,6 +133,12 @@ char *cifs_compose_mount_options(const char *sb_mountdata, | |||
133 | return ERR_PTR(-EINVAL); | 133 | return ERR_PTR(-EINVAL); |
134 | 134 | ||
135 | *devname = cifs_get_share_name(ref->node_name); | 135 | *devname = cifs_get_share_name(ref->node_name); |
136 | if (IS_ERR(*devname)) { | ||
137 | rc = PTR_ERR(*devname); | ||
138 | *devname = NULL; | ||
139 | goto compose_mount_options_err; | ||
140 | } | ||
141 | |||
136 | rc = dns_resolve_server_name_to_ip(*devname, &srvIP); | 142 | rc = dns_resolve_server_name_to_ip(*devname, &srvIP); |
137 | if (rc != 0) { | 143 | if (rc != 0) { |
138 | cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d", | 144 | cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d", |
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 051caecf7d67..8ec7736ce954 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c | |||
@@ -125,7 +125,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | |||
125 | if (server->addr.sockAddr.sin_family == AF_INET) | 125 | if (server->addr.sockAddr.sin_family == AF_INET) |
126 | sprintf(dp, "ip4=%pI4", &server->addr.sockAddr.sin_addr); | 126 | sprintf(dp, "ip4=%pI4", &server->addr.sockAddr.sin_addr); |
127 | else if (server->addr.sockAddr.sin_family == AF_INET6) | 127 | else if (server->addr.sockAddr.sin_family == AF_INET6) |
128 | sprintf(dp, "ip6=%pi6", &server->addr.sockAddr6.sin6_addr); | 128 | sprintf(dp, "ip6=%pI6", &server->addr.sockAddr6.sin6_addr); |
129 | else | 129 | else |
130 | goto out; | 130 | goto out; |
131 | 131 | ||
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 60e3c4253de0..714a542cbafc 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c | |||
@@ -44,7 +44,7 @@ cifs_ucs2_bytes(const __le16 *from, int maxbytes, | |||
44 | int maxwords = maxbytes / 2; | 44 | int maxwords = maxbytes / 2; |
45 | char tmp[NLS_MAX_CHARSET_SIZE]; | 45 | char tmp[NLS_MAX_CHARSET_SIZE]; |
46 | 46 | ||
47 | for (i = 0; from[i] && i < maxwords; i++) { | 47 | for (i = 0; i < maxwords && from[i]; i++) { |
48 | charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp, | 48 | charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp, |
49 | NLS_MAX_CHARSET_SIZE); | 49 | NLS_MAX_CHARSET_SIZE); |
50 | if (charlen > 0) | 50 | if (charlen > 0) |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 6941c22398a6..7dfe0842a6f6 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -607,7 +607,7 @@ static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, | |||
607 | return get_cifs_acl_by_path(cifs_sb, path, pacllen); | 607 | return get_cifs_acl_by_path(cifs_sb, path, pacllen); |
608 | 608 | ||
609 | pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen); | 609 | pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen); |
610 | atomic_dec(&open_file->wrtPending); | 610 | cifsFileInfo_put(open_file); |
611 | return pntsd; | 611 | return pntsd; |
612 | } | 612 | } |
613 | 613 | ||
@@ -665,7 +665,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, | |||
665 | return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); | 665 | return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); |
666 | 666 | ||
667 | rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen); | 667 | rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen); |
668 | atomic_dec(&open_file->wrtPending); | 668 | cifsFileInfo_put(open_file); |
669 | return rc; | 669 | return rc; |
670 | } | 670 | } |
671 | 671 | ||
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 7c9809523f42..7efe1745494d 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -373,6 +373,7 @@ calc_exit_2: | |||
373 | compare with the NTLM example */ | 373 | compare with the NTLM example */ |
374 | hmac_md5_final(ses->server->ntlmv2_hash, pctxt); | 374 | hmac_md5_final(ses->server->ntlmv2_hash, pctxt); |
375 | 375 | ||
376 | kfree(pctxt); | ||
376 | return rc; | 377 | return rc; |
377 | } | 378 | } |
378 | 379 | ||
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 44f30504b82d..3610e9958b4c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -361,13 +361,10 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) | |||
361 | static int | 361 | static int |
362 | cifs_show_options(struct seq_file *s, struct vfsmount *m) | 362 | cifs_show_options(struct seq_file *s, struct vfsmount *m) |
363 | { | 363 | { |
364 | struct cifs_sb_info *cifs_sb; | 364 | struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb); |
365 | struct cifsTconInfo *tcon; | 365 | struct cifsTconInfo *tcon = cifs_sb->tcon; |
366 | |||
367 | cifs_sb = CIFS_SB(m->mnt_sb); | ||
368 | tcon = cifs_sb->tcon; | ||
369 | 366 | ||
370 | seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName); | 367 | seq_printf(s, ",unc=%s", tcon->treeName); |
371 | if (tcon->ses->userName) | 368 | if (tcon->ses->userName) |
372 | seq_printf(s, ",username=%s", tcon->ses->userName); | 369 | seq_printf(s, ",username=%s", tcon->ses->userName); |
373 | if (tcon->ses->domainName) | 370 | if (tcon->ses->domainName) |
@@ -376,10 +373,14 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m) | |||
376 | seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); | 373 | seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); |
377 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) | 374 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) |
378 | seq_printf(s, ",forceuid"); | 375 | seq_printf(s, ",forceuid"); |
376 | else | ||
377 | seq_printf(s, ",noforceuid"); | ||
379 | 378 | ||
380 | seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); | 379 | seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); |
381 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) | 380 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) |
382 | seq_printf(s, ",forcegid"); | 381 | seq_printf(s, ",forcegid"); |
382 | else | ||
383 | seq_printf(s, ",noforcegid"); | ||
383 | 384 | ||
384 | cifs_show_address(s, tcon->ses->server); | 385 | cifs_show_address(s, tcon->ses->server); |
385 | 386 | ||
@@ -985,19 +986,19 @@ static int cifs_oplock_thread(void *dummyarg) | |||
985 | if (try_to_freeze()) | 986 | if (try_to_freeze()) |
986 | continue; | 987 | continue; |
987 | 988 | ||
988 | spin_lock(&GlobalMid_Lock); | 989 | spin_lock(&cifs_oplock_lock); |
989 | if (list_empty(&GlobalOplock_Q)) { | 990 | if (list_empty(&cifs_oplock_list)) { |
990 | spin_unlock(&GlobalMid_Lock); | 991 | spin_unlock(&cifs_oplock_lock); |
991 | set_current_state(TASK_INTERRUPTIBLE); | 992 | set_current_state(TASK_INTERRUPTIBLE); |
992 | schedule_timeout(39*HZ); | 993 | schedule_timeout(39*HZ); |
993 | } else { | 994 | } else { |
994 | oplock_item = list_entry(GlobalOplock_Q.next, | 995 | oplock_item = list_entry(cifs_oplock_list.next, |
995 | struct oplock_q_entry, qhead); | 996 | struct oplock_q_entry, qhead); |
996 | cFYI(1, ("found oplock item to write out")); | 997 | cFYI(1, ("found oplock item to write out")); |
997 | pTcon = oplock_item->tcon; | 998 | pTcon = oplock_item->tcon; |
998 | inode = oplock_item->pinode; | 999 | inode = oplock_item->pinode; |
999 | netfid = oplock_item->netfid; | 1000 | netfid = oplock_item->netfid; |
1000 | spin_unlock(&GlobalMid_Lock); | 1001 | spin_unlock(&cifs_oplock_lock); |
1001 | DeleteOplockQEntry(oplock_item); | 1002 | DeleteOplockQEntry(oplock_item); |
1002 | /* can not grab inode sem here since it would | 1003 | /* can not grab inode sem here since it would |
1003 | deadlock when oplock received on delete | 1004 | deadlock when oplock received on delete |
@@ -1054,7 +1055,7 @@ init_cifs(void) | |||
1054 | int rc = 0; | 1055 | int rc = 0; |
1055 | cifs_proc_init(); | 1056 | cifs_proc_init(); |
1056 | INIT_LIST_HEAD(&cifs_tcp_ses_list); | 1057 | INIT_LIST_HEAD(&cifs_tcp_ses_list); |
1057 | INIT_LIST_HEAD(&GlobalOplock_Q); | 1058 | INIT_LIST_HEAD(&cifs_oplock_list); |
1058 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 1059 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
1059 | INIT_LIST_HEAD(&GlobalDnotifyReqList); | 1060 | INIT_LIST_HEAD(&GlobalDnotifyReqList); |
1060 | INIT_LIST_HEAD(&GlobalDnotifyRsp_Q); | 1061 | INIT_LIST_HEAD(&GlobalDnotifyRsp_Q); |
@@ -1083,6 +1084,7 @@ init_cifs(void) | |||
1083 | rwlock_init(&GlobalSMBSeslock); | 1084 | rwlock_init(&GlobalSMBSeslock); |
1084 | rwlock_init(&cifs_tcp_ses_lock); | 1085 | rwlock_init(&cifs_tcp_ses_lock); |
1085 | spin_lock_init(&GlobalMid_Lock); | 1086 | spin_lock_init(&GlobalMid_Lock); |
1087 | spin_lock_init(&cifs_oplock_lock); | ||
1086 | 1088 | ||
1087 | if (cifs_max_pending < 2) { | 1089 | if (cifs_max_pending < 2) { |
1088 | cifs_max_pending = 2; | 1090 | cifs_max_pending = 2; |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 6c170948300d..094325e3f714 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -113,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); | |||
113 | extern const struct export_operations cifs_export_ops; | 113 | extern const struct export_operations cifs_export_ops; |
114 | #endif /* EXPERIMENTAL */ | 114 | #endif /* EXPERIMENTAL */ |
115 | 115 | ||
116 | #define CIFS_VERSION "1.60" | 116 | #define CIFS_VERSION "1.61" |
117 | #endif /* _CIFSFS_H */ | 117 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6084d6379c03..6cfc81a32703 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -351,11 +351,24 @@ struct cifsFileInfo { | |||
351 | bool closePend:1; /* file is marked to close */ | 351 | bool closePend:1; /* file is marked to close */ |
352 | bool invalidHandle:1; /* file closed via session abend */ | 352 | bool invalidHandle:1; /* file closed via session abend */ |
353 | bool messageMode:1; /* for pipes: message vs byte mode */ | 353 | bool messageMode:1; /* for pipes: message vs byte mode */ |
354 | atomic_t wrtPending; /* handle in use - defer close */ | 354 | atomic_t count; /* reference count */ |
355 | struct mutex fh_mutex; /* prevents reopen race after dead ses*/ | 355 | struct mutex fh_mutex; /* prevents reopen race after dead ses*/ |
356 | struct cifs_search_info srch_inf; | 356 | struct cifs_search_info srch_inf; |
357 | }; | 357 | }; |
358 | 358 | ||
359 | /* Take a reference on the file private data */ | ||
360 | static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file) | ||
361 | { | ||
362 | atomic_inc(&cifs_file->count); | ||
363 | } | ||
364 | |||
365 | /* Release a reference on the file private data */ | ||
366 | static inline void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | ||
367 | { | ||
368 | if (atomic_dec_and_test(&cifs_file->count)) | ||
369 | kfree(cifs_file); | ||
370 | } | ||
371 | |||
359 | /* | 372 | /* |
360 | * One of these for each file inode | 373 | * One of these for each file inode |
361 | */ | 374 | */ |
@@ -656,7 +669,11 @@ GLOBAL_EXTERN rwlock_t cifs_tcp_ses_lock; | |||
656 | */ | 669 | */ |
657 | GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; | 670 | GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; |
658 | 671 | ||
659 | GLOBAL_EXTERN struct list_head GlobalOplock_Q; | 672 | /* Global list of oplocks */ |
673 | GLOBAL_EXTERN struct list_head cifs_oplock_list; | ||
674 | |||
675 | /* Protects the cifs_oplock_list */ | ||
676 | GLOBAL_EXTERN spinlock_t cifs_oplock_lock; | ||
660 | 677 | ||
661 | /* Outstanding dir notify requests */ | 678 | /* Outstanding dir notify requests */ |
662 | GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; | 679 | GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1866bc2927d4..301e307e1279 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -100,110 +100,138 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon) | |||
100 | to this tcon */ | 100 | to this tcon */ |
101 | } | 101 | } |
102 | 102 | ||
103 | /* Allocate and return pointer to an SMB request buffer, and set basic | 103 | /* reconnect the socket, tcon, and smb session if needed */ |
104 | SMB information in the SMB header. If the return code is zero, this | ||
105 | function must have filled in request_buf pointer */ | ||
106 | static int | 104 | static int |
107 | small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | 105 | cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command) |
108 | void **request_buf) | ||
109 | { | 106 | { |
110 | int rc = 0; | 107 | int rc = 0; |
108 | struct cifsSesInfo *ses; | ||
109 | struct TCP_Server_Info *server; | ||
110 | struct nls_table *nls_codepage; | ||
111 | 111 | ||
112 | /* SMBs NegProt, SessSetup, uLogoff do not have tcon yet so | 112 | /* |
113 | check for tcp and smb session status done differently | 113 | * SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for |
114 | for those three - in the calling routine */ | 114 | * tcp and smb session status done differently for those three - in the |
115 | if (tcon) { | 115 | * calling routine |
116 | if (tcon->tidStatus == CifsExiting) { | 116 | */ |
117 | /* only tree disconnect, open, and write, | 117 | if (!tcon) |
118 | (and ulogoff which does not have tcon) | 118 | return 0; |
119 | are allowed as we start force umount */ | 119 | |
120 | if ((smb_command != SMB_COM_WRITE_ANDX) && | 120 | ses = tcon->ses; |
121 | (smb_command != SMB_COM_OPEN_ANDX) && | 121 | server = ses->server; |
122 | (smb_command != SMB_COM_TREE_DISCONNECT)) { | 122 | |
123 | cFYI(1, ("can not send cmd %d while umounting", | 123 | /* |
124 | smb_command)); | 124 | * only tree disconnect, open, and write, (and ulogoff which does not |
125 | return -ENODEV; | 125 | * have tcon) are allowed as we start force umount |
126 | } | 126 | */ |
127 | if (tcon->tidStatus == CifsExiting) { | ||
128 | if (smb_command != SMB_COM_WRITE_ANDX && | ||
129 | smb_command != SMB_COM_OPEN_ANDX && | ||
130 | smb_command != SMB_COM_TREE_DISCONNECT) { | ||
131 | cFYI(1, ("can not send cmd %d while umounting", | ||
132 | smb_command)); | ||
133 | return -ENODEV; | ||
127 | } | 134 | } |
128 | if ((tcon->ses) && (tcon->ses->status != CifsExiting) && | 135 | } |
129 | (tcon->ses->server)) { | ||
130 | struct nls_table *nls_codepage; | ||
131 | /* Give Demultiplex thread up to 10 seconds to | ||
132 | reconnect, should be greater than cifs socket | ||
133 | timeout which is 7 seconds */ | ||
134 | while (tcon->ses->server->tcpStatus == | ||
135 | CifsNeedReconnect) { | ||
136 | wait_event_interruptible_timeout(tcon->ses->server->response_q, | ||
137 | (tcon->ses->server->tcpStatus == | ||
138 | CifsGood), 10 * HZ); | ||
139 | if (tcon->ses->server->tcpStatus == | ||
140 | CifsNeedReconnect) { | ||
141 | /* on "soft" mounts we wait once */ | ||
142 | if (!tcon->retry || | ||
143 | (tcon->ses->status == CifsExiting)) { | ||
144 | cFYI(1, ("gave up waiting on " | ||
145 | "reconnect in smb_init")); | ||
146 | return -EHOSTDOWN; | ||
147 | } /* else "hard" mount - keep retrying | ||
148 | until process is killed or server | ||
149 | comes back on-line */ | ||
150 | } else /* TCP session is reestablished now */ | ||
151 | break; | ||
152 | } | ||
153 | 136 | ||
154 | nls_codepage = load_nls_default(); | 137 | if (ses->status == CifsExiting) |
155 | /* need to prevent multiple threads trying to | 138 | return -EIO; |
156 | simultaneously reconnect the same SMB session */ | ||
157 | down(&tcon->ses->sesSem); | ||
158 | if (tcon->ses->need_reconnect) | ||
159 | rc = cifs_setup_session(0, tcon->ses, | ||
160 | nls_codepage); | ||
161 | if (!rc && (tcon->need_reconnect)) { | ||
162 | mark_open_files_invalid(tcon); | ||
163 | rc = CIFSTCon(0, tcon->ses, tcon->treeName, | ||
164 | tcon, nls_codepage); | ||
165 | up(&tcon->ses->sesSem); | ||
166 | /* BB FIXME add code to check if wsize needs | ||
167 | update due to negotiated smb buffer size | ||
168 | shrinking */ | ||
169 | if (rc == 0) { | ||
170 | atomic_inc(&tconInfoReconnectCount); | ||
171 | /* tell server Unix caps we support */ | ||
172 | if (tcon->ses->capabilities & CAP_UNIX) | ||
173 | reset_cifs_unix_caps( | ||
174 | 0 /* no xid */, | ||
175 | tcon, | ||
176 | NULL /* we do not know sb */, | ||
177 | NULL /* no vol info */); | ||
178 | } | ||
179 | 139 | ||
180 | cFYI(1, ("reconnect tcon rc = %d", rc)); | 140 | /* |
181 | /* Removed call to reopen open files here. | 141 | * Give demultiplex thread up to 10 seconds to reconnect, should be |
182 | It is safer (and faster) to reopen files | 142 | * greater than cifs socket timeout which is 7 seconds |
183 | one at a time as needed in read and write */ | 143 | */ |
184 | 144 | while (server->tcpStatus == CifsNeedReconnect) { | |
185 | /* Check if handle based operation so we | 145 | wait_event_interruptible_timeout(server->response_q, |
186 | know whether we can continue or not without | 146 | (server->tcpStatus == CifsGood), 10 * HZ); |
187 | returning to caller to reset file handle */ | ||
188 | switch (smb_command) { | ||
189 | case SMB_COM_READ_ANDX: | ||
190 | case SMB_COM_WRITE_ANDX: | ||
191 | case SMB_COM_CLOSE: | ||
192 | case SMB_COM_FIND_CLOSE2: | ||
193 | case SMB_COM_LOCKING_ANDX: { | ||
194 | unload_nls(nls_codepage); | ||
195 | return -EAGAIN; | ||
196 | } | ||
197 | } | ||
198 | } else { | ||
199 | up(&tcon->ses->sesSem); | ||
200 | } | ||
201 | unload_nls(nls_codepage); | ||
202 | 147 | ||
203 | } else { | 148 | /* is TCP session is reestablished now ?*/ |
204 | return -EIO; | 149 | if (server->tcpStatus != CifsNeedReconnect) |
150 | break; | ||
151 | |||
152 | /* | ||
153 | * on "soft" mounts we wait once. Hard mounts keep | ||
154 | * retrying until process is killed or server comes | ||
155 | * back on-line | ||
156 | */ | ||
157 | if (!tcon->retry || ses->status == CifsExiting) { | ||
158 | cFYI(1, ("gave up waiting on reconnect in smb_init")); | ||
159 | return -EHOSTDOWN; | ||
205 | } | 160 | } |
206 | } | 161 | } |
162 | |||
163 | if (!ses->need_reconnect && !tcon->need_reconnect) | ||
164 | return 0; | ||
165 | |||
166 | nls_codepage = load_nls_default(); | ||
167 | |||
168 | /* | ||
169 | * need to prevent multiple threads trying to simultaneously | ||
170 | * reconnect the same SMB session | ||
171 | */ | ||
172 | down(&ses->sesSem); | ||
173 | if (ses->need_reconnect) | ||
174 | rc = cifs_setup_session(0, ses, nls_codepage); | ||
175 | |||
176 | /* do we need to reconnect tcon? */ | ||
177 | if (rc || !tcon->need_reconnect) { | ||
178 | up(&ses->sesSem); | ||
179 | goto out; | ||
180 | } | ||
181 | |||
182 | mark_open_files_invalid(tcon); | ||
183 | rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage); | ||
184 | up(&ses->sesSem); | ||
185 | cFYI(1, ("reconnect tcon rc = %d", rc)); | ||
186 | |||
187 | if (rc) | ||
188 | goto out; | ||
189 | |||
190 | /* | ||
191 | * FIXME: check if wsize needs updated due to negotiated smb buffer | ||
192 | * size shrinking | ||
193 | */ | ||
194 | atomic_inc(&tconInfoReconnectCount); | ||
195 | |||
196 | /* tell server Unix caps we support */ | ||
197 | if (ses->capabilities & CAP_UNIX) | ||
198 | reset_cifs_unix_caps(0, tcon, NULL, NULL); | ||
199 | |||
200 | /* | ||
201 | * Removed call to reopen open files here. It is safer (and faster) to | ||
202 | * reopen files one at a time as needed in read and write. | ||
203 | * | ||
204 | * FIXME: what about file locks? don't we need to reclaim them ASAP? | ||
205 | */ | ||
206 | |||
207 | out: | ||
208 | /* | ||
209 | * Check if handle based operation so we know whether we can continue | ||
210 | * or not without returning to caller to reset file handle | ||
211 | */ | ||
212 | switch (smb_command) { | ||
213 | case SMB_COM_READ_ANDX: | ||
214 | case SMB_COM_WRITE_ANDX: | ||
215 | case SMB_COM_CLOSE: | ||
216 | case SMB_COM_FIND_CLOSE2: | ||
217 | case SMB_COM_LOCKING_ANDX: | ||
218 | rc = -EAGAIN; | ||
219 | } | ||
220 | |||
221 | unload_nls(nls_codepage); | ||
222 | return rc; | ||
223 | } | ||
224 | |||
225 | /* Allocate and return pointer to an SMB request buffer, and set basic | ||
226 | SMB information in the SMB header. If the return code is zero, this | ||
227 | function must have filled in request_buf pointer */ | ||
228 | static int | ||
229 | small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | ||
230 | void **request_buf) | ||
231 | { | ||
232 | int rc = 0; | ||
233 | |||
234 | rc = cifs_reconnect_tcon(tcon, smb_command); | ||
207 | if (rc) | 235 | if (rc) |
208 | return rc; | 236 | return rc; |
209 | 237 | ||
@@ -256,101 +284,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | |||
256 | { | 284 | { |
257 | int rc = 0; | 285 | int rc = 0; |
258 | 286 | ||
259 | /* SMBs NegProt, SessSetup, uLogoff do not have tcon yet so | 287 | rc = cifs_reconnect_tcon(tcon, smb_command); |
260 | check for tcp and smb session status done differently | ||
261 | for those three - in the calling routine */ | ||
262 | if (tcon) { | ||
263 | if (tcon->tidStatus == CifsExiting) { | ||
264 | /* only tree disconnect, open, and write, | ||
265 | (and ulogoff which does not have tcon) | ||
266 | are allowed as we start force umount */ | ||
267 | if ((smb_command != SMB_COM_WRITE_ANDX) && | ||
268 | (smb_command != SMB_COM_OPEN_ANDX) && | ||
269 | (smb_command != SMB_COM_TREE_DISCONNECT)) { | ||
270 | cFYI(1, ("can not send cmd %d while umounting", | ||
271 | smb_command)); | ||
272 | return -ENODEV; | ||
273 | } | ||
274 | } | ||
275 | |||
276 | if ((tcon->ses) && (tcon->ses->status != CifsExiting) && | ||
277 | (tcon->ses->server)) { | ||
278 | struct nls_table *nls_codepage; | ||
279 | /* Give Demultiplex thread up to 10 seconds to | ||
280 | reconnect, should be greater than cifs socket | ||
281 | timeout which is 7 seconds */ | ||
282 | while (tcon->ses->server->tcpStatus == | ||
283 | CifsNeedReconnect) { | ||
284 | wait_event_interruptible_timeout(tcon->ses->server->response_q, | ||
285 | (tcon->ses->server->tcpStatus == | ||
286 | CifsGood), 10 * HZ); | ||
287 | if (tcon->ses->server->tcpStatus == | ||
288 | CifsNeedReconnect) { | ||
289 | /* on "soft" mounts we wait once */ | ||
290 | if (!tcon->retry || | ||
291 | (tcon->ses->status == CifsExiting)) { | ||
292 | cFYI(1, ("gave up waiting on " | ||
293 | "reconnect in smb_init")); | ||
294 | return -EHOSTDOWN; | ||
295 | } /* else "hard" mount - keep retrying | ||
296 | until process is killed or server | ||
297 | comes on-line */ | ||
298 | } else /* TCP session is reestablished now */ | ||
299 | break; | ||
300 | } | ||
301 | nls_codepage = load_nls_default(); | ||
302 | /* need to prevent multiple threads trying to | ||
303 | simultaneously reconnect the same SMB session */ | ||
304 | down(&tcon->ses->sesSem); | ||
305 | if (tcon->ses->need_reconnect) | ||
306 | rc = cifs_setup_session(0, tcon->ses, | ||
307 | nls_codepage); | ||
308 | if (!rc && (tcon->need_reconnect)) { | ||
309 | mark_open_files_invalid(tcon); | ||
310 | rc = CIFSTCon(0, tcon->ses, tcon->treeName, | ||
311 | tcon, nls_codepage); | ||
312 | up(&tcon->ses->sesSem); | ||
313 | /* BB FIXME add code to check if wsize needs | ||
314 | update due to negotiated smb buffer size | ||
315 | shrinking */ | ||
316 | if (rc == 0) { | ||
317 | atomic_inc(&tconInfoReconnectCount); | ||
318 | /* tell server Unix caps we support */ | ||
319 | if (tcon->ses->capabilities & CAP_UNIX) | ||
320 | reset_cifs_unix_caps( | ||
321 | 0 /* no xid */, | ||
322 | tcon, | ||
323 | NULL /* do not know sb */, | ||
324 | NULL /* no vol info */); | ||
325 | } | ||
326 | |||
327 | cFYI(1, ("reconnect tcon rc = %d", rc)); | ||
328 | /* Removed call to reopen open files here. | ||
329 | It is safer (and faster) to reopen files | ||
330 | one at a time as needed in read and write */ | ||
331 | |||
332 | /* Check if handle based operation so we | ||
333 | know whether we can continue or not without | ||
334 | returning to caller to reset file handle */ | ||
335 | switch (smb_command) { | ||
336 | case SMB_COM_READ_ANDX: | ||
337 | case SMB_COM_WRITE_ANDX: | ||
338 | case SMB_COM_CLOSE: | ||
339 | case SMB_COM_FIND_CLOSE2: | ||
340 | case SMB_COM_LOCKING_ANDX: { | ||
341 | unload_nls(nls_codepage); | ||
342 | return -EAGAIN; | ||
343 | } | ||
344 | } | ||
345 | } else { | ||
346 | up(&tcon->ses->sesSem); | ||
347 | } | ||
348 | unload_nls(nls_codepage); | ||
349 | |||
350 | } else { | ||
351 | return -EIO; | ||
352 | } | ||
353 | } | ||
354 | if (rc) | 288 | if (rc) |
355 | return rc; | 289 | return rc; |
356 | 290 | ||
@@ -3961,6 +3895,10 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, | |||
3961 | if (is_unicode) { | 3895 | if (is_unicode) { |
3962 | __le16 *tmp = kmalloc(strlen(searchName)*2 + 2, | 3896 | __le16 *tmp = kmalloc(strlen(searchName)*2 + 2, |
3963 | GFP_KERNEL); | 3897 | GFP_KERNEL); |
3898 | if (tmp == NULL) { | ||
3899 | rc = -ENOMEM; | ||
3900 | goto parse_DFS_referrals_exit; | ||
3901 | } | ||
3964 | cifsConvertToUCS((__le16 *) tmp, searchName, | 3902 | cifsConvertToUCS((__le16 *) tmp, searchName, |
3965 | PATH_MAX, nls_codepage, remap); | 3903 | PATH_MAX, nls_codepage, remap); |
3966 | node->path_consumed = cifs_ucs2_bytes(tmp, | 3904 | node->path_consumed = cifs_ucs2_bytes(tmp, |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 9bb5c8750736..d49682433c20 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -803,6 +803,10 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
803 | char *data; | 803 | char *data; |
804 | unsigned int temp_len, i, j; | 804 | unsigned int temp_len, i, j; |
805 | char separator[2]; | 805 | char separator[2]; |
806 | short int override_uid = -1; | ||
807 | short int override_gid = -1; | ||
808 | bool uid_specified = false; | ||
809 | bool gid_specified = false; | ||
806 | 810 | ||
807 | separator[0] = ','; | 811 | separator[0] = ','; |
808 | separator[1] = 0; | 812 | separator[1] = 0; |
@@ -1093,18 +1097,20 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1093 | "too long.\n"); | 1097 | "too long.\n"); |
1094 | return 1; | 1098 | return 1; |
1095 | } | 1099 | } |
1096 | } else if (strnicmp(data, "uid", 3) == 0) { | 1100 | } else if (!strnicmp(data, "uid", 3) && value && *value) { |
1097 | if (value && *value) | 1101 | vol->linux_uid = simple_strtoul(value, &value, 0); |
1098 | vol->linux_uid = | 1102 | uid_specified = true; |
1099 | simple_strtoul(value, &value, 0); | 1103 | } else if (!strnicmp(data, "forceuid", 8)) { |
1100 | } else if (strnicmp(data, "forceuid", 8) == 0) { | 1104 | override_uid = 1; |
1101 | vol->override_uid = 1; | 1105 | } else if (!strnicmp(data, "noforceuid", 10)) { |
1102 | } else if (strnicmp(data, "gid", 3) == 0) { | 1106 | override_uid = 0; |
1103 | if (value && *value) | 1107 | } else if (!strnicmp(data, "gid", 3) && value && *value) { |
1104 | vol->linux_gid = | 1108 | vol->linux_gid = simple_strtoul(value, &value, 0); |
1105 | simple_strtoul(value, &value, 0); | 1109 | gid_specified = true; |
1106 | } else if (strnicmp(data, "forcegid", 8) == 0) { | 1110 | } else if (!strnicmp(data, "forcegid", 8)) { |
1107 | vol->override_gid = 1; | 1111 | override_gid = 1; |
1112 | } else if (!strnicmp(data, "noforcegid", 10)) { | ||
1113 | override_gid = 0; | ||
1108 | } else if (strnicmp(data, "file_mode", 4) == 0) { | 1114 | } else if (strnicmp(data, "file_mode", 4) == 0) { |
1109 | if (value && *value) { | 1115 | if (value && *value) { |
1110 | vol->file_mode = | 1116 | vol->file_mode = |
@@ -1355,11 +1361,23 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1355 | if (vol->UNCip == NULL) | 1361 | if (vol->UNCip == NULL) |
1356 | vol->UNCip = &vol->UNC[2]; | 1362 | vol->UNCip = &vol->UNC[2]; |
1357 | 1363 | ||
1364 | if (uid_specified) | ||
1365 | vol->override_uid = override_uid; | ||
1366 | else if (override_uid == 1) | ||
1367 | printk(KERN_NOTICE "CIFS: ignoring forceuid mount option " | ||
1368 | "specified with no uid= option.\n"); | ||
1369 | |||
1370 | if (gid_specified) | ||
1371 | vol->override_gid = override_gid; | ||
1372 | else if (override_gid == 1) | ||
1373 | printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " | ||
1374 | "specified with no gid= option.\n"); | ||
1375 | |||
1358 | return 0; | 1376 | return 0; |
1359 | } | 1377 | } |
1360 | 1378 | ||
1361 | static struct TCP_Server_Info * | 1379 | static struct TCP_Server_Info * |
1362 | cifs_find_tcp_session(struct sockaddr_storage *addr) | 1380 | cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port) |
1363 | { | 1381 | { |
1364 | struct list_head *tmp; | 1382 | struct list_head *tmp; |
1365 | struct TCP_Server_Info *server; | 1383 | struct TCP_Server_Info *server; |
@@ -1379,16 +1397,37 @@ cifs_find_tcp_session(struct sockaddr_storage *addr) | |||
1379 | if (server->tcpStatus == CifsNew) | 1397 | if (server->tcpStatus == CifsNew) |
1380 | continue; | 1398 | continue; |
1381 | 1399 | ||
1382 | if (addr->ss_family == AF_INET && | 1400 | switch (addr->ss_family) { |
1383 | (addr4->sin_addr.s_addr != | 1401 | case AF_INET: |
1384 | server->addr.sockAddr.sin_addr.s_addr)) | 1402 | if (addr4->sin_addr.s_addr == |
1385 | continue; | 1403 | server->addr.sockAddr.sin_addr.s_addr) { |
1386 | else if (addr->ss_family == AF_INET6 && | 1404 | addr4->sin_port = htons(port); |
1387 | (!ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr, | 1405 | /* user overrode default port? */ |
1388 | &addr6->sin6_addr) || | 1406 | if (addr4->sin_port) { |
1389 | server->addr.sockAddr6.sin6_scope_id != | 1407 | if (addr4->sin_port != |
1390 | addr6->sin6_scope_id)) | 1408 | server->addr.sockAddr.sin_port) |
1391 | continue; | 1409 | continue; |
1410 | } | ||
1411 | break; | ||
1412 | } else | ||
1413 | continue; | ||
1414 | |||
1415 | case AF_INET6: | ||
1416 | if (ipv6_addr_equal(&addr6->sin6_addr, | ||
1417 | &server->addr.sockAddr6.sin6_addr) && | ||
1418 | (addr6->sin6_scope_id == | ||
1419 | server->addr.sockAddr6.sin6_scope_id)) { | ||
1420 | addr6->sin6_port = htons(port); | ||
1421 | /* user overrode default port? */ | ||
1422 | if (addr6->sin6_port) { | ||
1423 | if (addr6->sin6_port != | ||
1424 | server->addr.sockAddr6.sin6_port) | ||
1425 | continue; | ||
1426 | } | ||
1427 | break; | ||
1428 | } else | ||
1429 | continue; | ||
1430 | } | ||
1392 | 1431 | ||
1393 | ++server->srv_count; | 1432 | ++server->srv_count; |
1394 | write_unlock(&cifs_tcp_ses_lock); | 1433 | write_unlock(&cifs_tcp_ses_lock); |
@@ -1457,7 +1496,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) | |||
1457 | } | 1496 | } |
1458 | 1497 | ||
1459 | /* see if we already have a matching tcp_ses */ | 1498 | /* see if we already have a matching tcp_ses */ |
1460 | tcp_ses = cifs_find_tcp_session(&addr); | 1499 | tcp_ses = cifs_find_tcp_session(&addr, volume_info->port); |
1461 | if (tcp_ses) | 1500 | if (tcp_ses) |
1462 | return tcp_ses; | 1501 | return tcp_ses; |
1463 | 1502 | ||
@@ -2452,10 +2491,10 @@ try_mount_again: | |||
2452 | tcon->local_lease = volume_info->local_lease; | 2491 | tcon->local_lease = volume_info->local_lease; |
2453 | } | 2492 | } |
2454 | if (pSesInfo) { | 2493 | if (pSesInfo) { |
2455 | if (pSesInfo->capabilities & CAP_LARGE_FILES) { | 2494 | if (pSesInfo->capabilities & CAP_LARGE_FILES) |
2456 | sb->s_maxbytes = (u64) 1 << 63; | 2495 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
2457 | } else | 2496 | else |
2458 | sb->s_maxbytes = (u64) 1 << 31; /* 2 GB */ | 2497 | sb->s_maxbytes = MAX_NON_LFS; |
2459 | } | 2498 | } |
2460 | 2499 | ||
2461 | /* BB FIXME fix time_gran to be larger for LANMAN sessions */ | 2500 | /* BB FIXME fix time_gran to be larger for LANMAN sessions */ |
@@ -2544,11 +2583,20 @@ remote_path_check: | |||
2544 | 2583 | ||
2545 | if (mount_data != mount_data_global) | 2584 | if (mount_data != mount_data_global) |
2546 | kfree(mount_data); | 2585 | kfree(mount_data); |
2586 | |||
2547 | mount_data = cifs_compose_mount_options( | 2587 | mount_data = cifs_compose_mount_options( |
2548 | cifs_sb->mountdata, full_path + 1, | 2588 | cifs_sb->mountdata, full_path + 1, |
2549 | referrals, &fake_devname); | 2589 | referrals, &fake_devname); |
2550 | kfree(fake_devname); | 2590 | |
2551 | free_dfs_info_array(referrals, num_referrals); | 2591 | free_dfs_info_array(referrals, num_referrals); |
2592 | kfree(fake_devname); | ||
2593 | kfree(full_path); | ||
2594 | |||
2595 | if (IS_ERR(mount_data)) { | ||
2596 | rc = PTR_ERR(mount_data); | ||
2597 | mount_data = NULL; | ||
2598 | goto mount_fail_check; | ||
2599 | } | ||
2552 | 2600 | ||
2553 | if (tcon) | 2601 | if (tcon) |
2554 | cifs_put_tcon(tcon); | 2602 | cifs_put_tcon(tcon); |
@@ -2556,8 +2604,6 @@ remote_path_check: | |||
2556 | cifs_put_smb_ses(pSesInfo); | 2604 | cifs_put_smb_ses(pSesInfo); |
2557 | 2605 | ||
2558 | cleanup_volume_info(&volume_info); | 2606 | cleanup_volume_info(&volume_info); |
2559 | FreeXid(xid); | ||
2560 | kfree(full_path); | ||
2561 | referral_walks_count++; | 2607 | referral_walks_count++; |
2562 | goto try_mount_again; | 2608 | goto try_mount_again; |
2563 | } | 2609 | } |
@@ -2611,9 +2657,9 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, | |||
2611 | return -EIO; | 2657 | return -EIO; |
2612 | 2658 | ||
2613 | smb_buffer = cifs_buf_get(); | 2659 | smb_buffer = cifs_buf_get(); |
2614 | if (smb_buffer == NULL) { | 2660 | if (smb_buffer == NULL) |
2615 | return -ENOMEM; | 2661 | return -ENOMEM; |
2616 | } | 2662 | |
2617 | smb_buffer_response = smb_buffer; | 2663 | smb_buffer_response = smb_buffer; |
2618 | 2664 | ||
2619 | header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX, | 2665 | header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX, |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 4326ffd90fa9..a6424cfc0121 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -153,7 +153,7 @@ cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle, | |||
153 | mutex_init(&pCifsFile->fh_mutex); | 153 | mutex_init(&pCifsFile->fh_mutex); |
154 | mutex_init(&pCifsFile->lock_mutex); | 154 | mutex_init(&pCifsFile->lock_mutex); |
155 | INIT_LIST_HEAD(&pCifsFile->llist); | 155 | INIT_LIST_HEAD(&pCifsFile->llist); |
156 | atomic_set(&pCifsFile->wrtPending, 0); | 156 | atomic_set(&pCifsFile->count, 1); |
157 | 157 | ||
158 | /* set the following in open now | 158 | /* set the following in open now |
159 | pCifsFile->pfile = file; */ | 159 | pCifsFile->pfile = file; */ |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index c34b7f8a217b..fa7beac8b80e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -53,11 +53,9 @@ static inline struct cifsFileInfo *cifs_init_private( | |||
53 | private_data->pInode = inode; | 53 | private_data->pInode = inode; |
54 | private_data->invalidHandle = false; | 54 | private_data->invalidHandle = false; |
55 | private_data->closePend = false; | 55 | private_data->closePend = false; |
56 | /* we have to track num writers to the inode, since writepages | 56 | /* Initialize reference count to one. The private data is |
57 | does not tell us which handle the write is for so there can | 57 | freed on the release of the last reference */ |
58 | be a close (overlapping with write) of the filehandle that | 58 | atomic_set(&private_data->count, 1); |
59 | cifs_writepages chose to use */ | ||
60 | atomic_set(&private_data->wrtPending, 0); | ||
61 | 59 | ||
62 | return private_data; | 60 | return private_data; |
63 | } | 61 | } |
@@ -643,7 +641,7 @@ int cifs_close(struct inode *inode, struct file *file) | |||
643 | if (!pTcon->need_reconnect) { | 641 | if (!pTcon->need_reconnect) { |
644 | write_unlock(&GlobalSMBSeslock); | 642 | write_unlock(&GlobalSMBSeslock); |
645 | timeout = 2; | 643 | timeout = 2; |
646 | while ((atomic_read(&pSMBFile->wrtPending) != 0) | 644 | while ((atomic_read(&pSMBFile->count) != 1) |
647 | && (timeout <= 2048)) { | 645 | && (timeout <= 2048)) { |
648 | /* Give write a better chance to get to | 646 | /* Give write a better chance to get to |
649 | server ahead of the close. We do not | 647 | server ahead of the close. We do not |
@@ -657,8 +655,6 @@ int cifs_close(struct inode *inode, struct file *file) | |||
657 | msleep(timeout); | 655 | msleep(timeout); |
658 | timeout *= 4; | 656 | timeout *= 4; |
659 | } | 657 | } |
660 | if (atomic_read(&pSMBFile->wrtPending)) | ||
661 | cERROR(1, ("close with pending write")); | ||
662 | if (!pTcon->need_reconnect && | 658 | if (!pTcon->need_reconnect && |
663 | !pSMBFile->invalidHandle) | 659 | !pSMBFile->invalidHandle) |
664 | rc = CIFSSMBClose(xid, pTcon, | 660 | rc = CIFSSMBClose(xid, pTcon, |
@@ -681,24 +677,7 @@ int cifs_close(struct inode *inode, struct file *file) | |||
681 | list_del(&pSMBFile->flist); | 677 | list_del(&pSMBFile->flist); |
682 | list_del(&pSMBFile->tlist); | 678 | list_del(&pSMBFile->tlist); |
683 | write_unlock(&GlobalSMBSeslock); | 679 | write_unlock(&GlobalSMBSeslock); |
684 | timeout = 10; | 680 | cifsFileInfo_put(file->private_data); |
685 | /* We waited above to give the SMBWrite a chance to issue | ||
686 | on the wire (so we do not get SMBWrite returning EBADF | ||
687 | if writepages is racing with close. Note that writepages | ||
688 | does not specify a file handle, so it is possible for a file | ||
689 | to be opened twice, and the application close the "wrong" | ||
690 | file handle - in these cases we delay long enough to allow | ||
691 | the SMBWrite to get on the wire before the SMB Close. | ||
692 | We allow total wait here over 45 seconds, more than | ||
693 | oplock break time, and more than enough to allow any write | ||
694 | to complete on the server, or to time out on the client */ | ||
695 | while ((atomic_read(&pSMBFile->wrtPending) != 0) | ||
696 | && (timeout <= 50000)) { | ||
697 | cERROR(1, ("writes pending, delay free of handle")); | ||
698 | msleep(timeout); | ||
699 | timeout *= 8; | ||
700 | } | ||
701 | kfree(file->private_data); | ||
702 | file->private_data = NULL; | 681 | file->private_data = NULL; |
703 | } else | 682 | } else |
704 | rc = -EBADF; | 683 | rc = -EBADF; |
@@ -1236,7 +1215,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode) | |||
1236 | if (!open_file->invalidHandle) { | 1215 | if (!open_file->invalidHandle) { |
1237 | /* found a good file */ | 1216 | /* found a good file */ |
1238 | /* lock it so it will not be closed on us */ | 1217 | /* lock it so it will not be closed on us */ |
1239 | atomic_inc(&open_file->wrtPending); | 1218 | cifsFileInfo_get(open_file); |
1240 | read_unlock(&GlobalSMBSeslock); | 1219 | read_unlock(&GlobalSMBSeslock); |
1241 | return open_file; | 1220 | return open_file; |
1242 | } /* else might as well continue, and look for | 1221 | } /* else might as well continue, and look for |
@@ -1276,7 +1255,7 @@ refind_writable: | |||
1276 | if (open_file->pfile && | 1255 | if (open_file->pfile && |
1277 | ((open_file->pfile->f_flags & O_RDWR) || | 1256 | ((open_file->pfile->f_flags & O_RDWR) || |
1278 | (open_file->pfile->f_flags & O_WRONLY))) { | 1257 | (open_file->pfile->f_flags & O_WRONLY))) { |
1279 | atomic_inc(&open_file->wrtPending); | 1258 | cifsFileInfo_get(open_file); |
1280 | 1259 | ||
1281 | if (!open_file->invalidHandle) { | 1260 | if (!open_file->invalidHandle) { |
1282 | /* found a good writable file */ | 1261 | /* found a good writable file */ |
@@ -1293,7 +1272,7 @@ refind_writable: | |||
1293 | else { /* start over in case this was deleted */ | 1272 | else { /* start over in case this was deleted */ |
1294 | /* since the list could be modified */ | 1273 | /* since the list could be modified */ |
1295 | read_lock(&GlobalSMBSeslock); | 1274 | read_lock(&GlobalSMBSeslock); |
1296 | atomic_dec(&open_file->wrtPending); | 1275 | cifsFileInfo_put(open_file); |
1297 | goto refind_writable; | 1276 | goto refind_writable; |
1298 | } | 1277 | } |
1299 | } | 1278 | } |
@@ -1309,7 +1288,7 @@ refind_writable: | |||
1309 | read_lock(&GlobalSMBSeslock); | 1288 | read_lock(&GlobalSMBSeslock); |
1310 | /* can not use this handle, no write | 1289 | /* can not use this handle, no write |
1311 | pending on this one after all */ | 1290 | pending on this one after all */ |
1312 | atomic_dec(&open_file->wrtPending); | 1291 | cifsFileInfo_put(open_file); |
1313 | 1292 | ||
1314 | if (open_file->closePend) /* list could have changed */ | 1293 | if (open_file->closePend) /* list could have changed */ |
1315 | goto refind_writable; | 1294 | goto refind_writable; |
@@ -1373,7 +1352,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) | |||
1373 | if (open_file) { | 1352 | if (open_file) { |
1374 | bytes_written = cifs_write(open_file->pfile, write_data, | 1353 | bytes_written = cifs_write(open_file->pfile, write_data, |
1375 | to-from, &offset); | 1354 | to-from, &offset); |
1376 | atomic_dec(&open_file->wrtPending); | 1355 | cifsFileInfo_put(open_file); |
1377 | /* Does mm or vfs already set times? */ | 1356 | /* Does mm or vfs already set times? */ |
1378 | inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb); | 1357 | inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb); |
1379 | if ((bytes_written > 0) && (offset)) | 1358 | if ((bytes_written > 0) && (offset)) |
@@ -1562,7 +1541,7 @@ retry: | |||
1562 | bytes_to_write, offset, | 1541 | bytes_to_write, offset, |
1563 | &bytes_written, iov, n_iov, | 1542 | &bytes_written, iov, n_iov, |
1564 | long_op); | 1543 | long_op); |
1565 | atomic_dec(&open_file->wrtPending); | 1544 | cifsFileInfo_put(open_file); |
1566 | cifs_update_eof(cifsi, offset, bytes_written); | 1545 | cifs_update_eof(cifsi, offset, bytes_written); |
1567 | 1546 | ||
1568 | if (rc || bytes_written < bytes_to_write) { | 1547 | if (rc || bytes_written < bytes_to_write) { |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 18afe57b2461..1f09c7619319 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -212,7 +212,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, | |||
212 | * junction to the new submount (ie to setup the fake directory | 212 | * junction to the new submount (ie to setup the fake directory |
213 | * which represents a DFS referral). | 213 | * which represents a DFS referral). |
214 | */ | 214 | */ |
215 | void | 215 | static void |
216 | cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) | 216 | cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) |
217 | { | 217 | { |
218 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | 218 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); |
@@ -388,7 +388,7 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, | |||
388 | } | 388 | } |
389 | 389 | ||
390 | /* Fill a cifs_fattr struct with info from FILE_ALL_INFO */ | 390 | /* Fill a cifs_fattr struct with info from FILE_ALL_INFO */ |
391 | void | 391 | static void |
392 | cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, | 392 | cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, |
393 | struct cifs_sb_info *cifs_sb, bool adjust_tz) | 393 | struct cifs_sb_info *cifs_sb, bool adjust_tz) |
394 | { | 394 | { |
@@ -513,9 +513,12 @@ int cifs_get_inode_info(struct inode **pinode, | |||
513 | cifs_sb->mnt_cifs_flags & | 513 | cifs_sb->mnt_cifs_flags & |
514 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 514 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
515 | if (rc1) { | 515 | if (rc1) { |
516 | /* BB EOPNOSUPP disable SERVER_INUM? */ | ||
517 | cFYI(1, ("GetSrvInodeNum rc %d", rc1)); | 516 | cFYI(1, ("GetSrvInodeNum rc %d", rc1)); |
518 | fattr.cf_uniqueid = iunique(sb, ROOT_I); | 517 | fattr.cf_uniqueid = iunique(sb, ROOT_I); |
518 | /* disable serverino if call not supported */ | ||
519 | if (rc1 == -EINVAL) | ||
520 | cifs_sb->mnt_cifs_flags &= | ||
521 | ~CIFS_MOUNT_SERVER_INUM; | ||
519 | } | 522 | } |
520 | } else { | 523 | } else { |
521 | fattr.cf_uniqueid = iunique(sb, ROOT_I); | 524 | fattr.cf_uniqueid = iunique(sb, ROOT_I); |
@@ -797,7 +800,7 @@ set_via_filehandle: | |||
797 | if (open_file == NULL) | 800 | if (open_file == NULL) |
798 | CIFSSMBClose(xid, pTcon, netfid); | 801 | CIFSSMBClose(xid, pTcon, netfid); |
799 | else | 802 | else |
800 | atomic_dec(&open_file->wrtPending); | 803 | cifsFileInfo_put(open_file); |
801 | out: | 804 | out: |
802 | return rc; | 805 | return rc; |
803 | } | 806 | } |
@@ -1632,7 +1635,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
1632 | __u32 npid = open_file->pid; | 1635 | __u32 npid = open_file->pid; |
1633 | rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, | 1636 | rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, |
1634 | npid, false); | 1637 | npid, false); |
1635 | atomic_dec(&open_file->wrtPending); | 1638 | cifsFileInfo_put(open_file); |
1636 | cFYI(1, ("SetFSize for attrs rc = %d", rc)); | 1639 | cFYI(1, ("SetFSize for attrs rc = %d", rc)); |
1637 | if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { | 1640 | if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { |
1638 | unsigned int bytes_written; | 1641 | unsigned int bytes_written; |
@@ -1787,7 +1790,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) | |||
1787 | u16 nfid = open_file->netfid; | 1790 | u16 nfid = open_file->netfid; |
1788 | u32 npid = open_file->pid; | 1791 | u32 npid = open_file->pid; |
1789 | rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); | 1792 | rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); |
1790 | atomic_dec(&open_file->wrtPending); | 1793 | cifsFileInfo_put(open_file); |
1791 | } else { | 1794 | } else { |
1792 | rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, | 1795 | rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, |
1793 | cifs_sb->local_nls, | 1796 | cifs_sb->local_nls, |
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 0ad3e2d116a6..1da4ab250eae 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -119,20 +119,19 @@ AllocOplockQEntry(struct inode *pinode, __u16 fid, struct cifsTconInfo *tcon) | |||
119 | temp->pinode = pinode; | 119 | temp->pinode = pinode; |
120 | temp->tcon = tcon; | 120 | temp->tcon = tcon; |
121 | temp->netfid = fid; | 121 | temp->netfid = fid; |
122 | spin_lock(&GlobalMid_Lock); | 122 | spin_lock(&cifs_oplock_lock); |
123 | list_add_tail(&temp->qhead, &GlobalOplock_Q); | 123 | list_add_tail(&temp->qhead, &cifs_oplock_list); |
124 | spin_unlock(&GlobalMid_Lock); | 124 | spin_unlock(&cifs_oplock_lock); |
125 | } | 125 | } |
126 | return temp; | 126 | return temp; |
127 | |||
128 | } | 127 | } |
129 | 128 | ||
130 | void DeleteOplockQEntry(struct oplock_q_entry *oplockEntry) | 129 | void DeleteOplockQEntry(struct oplock_q_entry *oplockEntry) |
131 | { | 130 | { |
132 | spin_lock(&GlobalMid_Lock); | 131 | spin_lock(&cifs_oplock_lock); |
133 | /* should we check if list empty first? */ | 132 | /* should we check if list empty first? */ |
134 | list_del(&oplockEntry->qhead); | 133 | list_del(&oplockEntry->qhead); |
135 | spin_unlock(&GlobalMid_Lock); | 134 | spin_unlock(&cifs_oplock_lock); |
136 | kmem_cache_free(cifs_oplock_cachep, oplockEntry); | 135 | kmem_cache_free(cifs_oplock_cachep, oplockEntry); |
137 | } | 136 | } |
138 | 137 | ||
@@ -144,14 +143,14 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon) | |||
144 | if (tcon == NULL) | 143 | if (tcon == NULL) |
145 | return; | 144 | return; |
146 | 145 | ||
147 | spin_lock(&GlobalMid_Lock); | 146 | spin_lock(&cifs_oplock_lock); |
148 | list_for_each_entry(temp, &GlobalOplock_Q, qhead) { | 147 | list_for_each_entry(temp, &cifs_oplock_list, qhead) { |
149 | if ((temp->tcon) && (temp->tcon == tcon)) { | 148 | if ((temp->tcon) && (temp->tcon == tcon)) { |
150 | list_del(&temp->qhead); | 149 | list_del(&temp->qhead); |
151 | kmem_cache_free(cifs_oplock_cachep, temp); | 150 | kmem_cache_free(cifs_oplock_cachep, temp); |
152 | } | 151 | } |
153 | } | 152 | } |
154 | spin_unlock(&GlobalMid_Lock); | 153 | spin_unlock(&cifs_oplock_lock); |
155 | } | 154 | } |
156 | 155 | ||
157 | static int | 156 | static int |
diff --git a/fs/compat.c b/fs/compat.c index 94502dab972a..6d6f98fe64a0 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -1485,20 +1485,15 @@ int compat_do_execve(char * filename, | |||
1485 | if (!bprm) | 1485 | if (!bprm) |
1486 | goto out_files; | 1486 | goto out_files; |
1487 | 1487 | ||
1488 | retval = -ERESTARTNOINTR; | 1488 | retval = prepare_bprm_creds(bprm); |
1489 | if (mutex_lock_interruptible(¤t->cred_guard_mutex)) | 1489 | if (retval) |
1490 | goto out_free; | 1490 | goto out_free; |
1491 | current->in_execve = 1; | ||
1492 | |||
1493 | retval = -ENOMEM; | ||
1494 | bprm->cred = prepare_exec_creds(); | ||
1495 | if (!bprm->cred) | ||
1496 | goto out_unlock; | ||
1497 | 1491 | ||
1498 | retval = check_unsafe_exec(bprm); | 1492 | retval = check_unsafe_exec(bprm); |
1499 | if (retval < 0) | 1493 | if (retval < 0) |
1500 | goto out_unlock; | 1494 | goto out_free; |
1501 | clear_in_exec = retval; | 1495 | clear_in_exec = retval; |
1496 | current->in_execve = 1; | ||
1502 | 1497 | ||
1503 | file = open_exec(filename); | 1498 | file = open_exec(filename); |
1504 | retval = PTR_ERR(file); | 1499 | retval = PTR_ERR(file); |
@@ -1547,7 +1542,6 @@ int compat_do_execve(char * filename, | |||
1547 | /* execve succeeded */ | 1542 | /* execve succeeded */ |
1548 | current->fs->in_exec = 0; | 1543 | current->fs->in_exec = 0; |
1549 | current->in_execve = 0; | 1544 | current->in_execve = 0; |
1550 | mutex_unlock(¤t->cred_guard_mutex); | ||
1551 | acct_update_integrals(current); | 1545 | acct_update_integrals(current); |
1552 | free_bprm(bprm); | 1546 | free_bprm(bprm); |
1553 | if (displaced) | 1547 | if (displaced) |
@@ -1567,10 +1561,7 @@ out_file: | |||
1567 | out_unmark: | 1561 | out_unmark: |
1568 | if (clear_in_exec) | 1562 | if (clear_in_exec) |
1569 | current->fs->in_exec = 0; | 1563 | current->fs->in_exec = 0; |
1570 | |||
1571 | out_unlock: | ||
1572 | current->in_execve = 0; | 1564 | current->in_execve = 0; |
1573 | mutex_unlock(¤t->cred_guard_mutex); | ||
1574 | 1565 | ||
1575 | out_free: | 1566 | out_free: |
1576 | free_bprm(bprm); | 1567 | free_bprm(bprm); |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index f28f070a60fc..f91fd51b32e3 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -1905,6 +1905,7 @@ COMPATIBLE_IOCTL(FIONCLEX) | |||
1905 | COMPATIBLE_IOCTL(FIOASYNC) | 1905 | COMPATIBLE_IOCTL(FIOASYNC) |
1906 | COMPATIBLE_IOCTL(FIONBIO) | 1906 | COMPATIBLE_IOCTL(FIONBIO) |
1907 | COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */ | 1907 | COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */ |
1908 | COMPATIBLE_IOCTL(FS_IOC_FIEMAP) | ||
1908 | /* 0x00 */ | 1909 | /* 0x00 */ |
1909 | COMPATIBLE_IOCTL(FIBMAP) | 1910 | COMPATIBLE_IOCTL(FIBMAP) |
1910 | COMPATIBLE_IOCTL(FIGETBSZ) | 1911 | COMPATIBLE_IOCTL(FIGETBSZ) |
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 4921e7426d95..a2f746066c5d 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c | |||
@@ -51,6 +51,7 @@ static const struct address_space_operations configfs_aops = { | |||
51 | }; | 51 | }; |
52 | 52 | ||
53 | static struct backing_dev_info configfs_backing_dev_info = { | 53 | static struct backing_dev_info configfs_backing_dev_info = { |
54 | .name = "configfs", | ||
54 | .ra_pages = 0, /* No readahead */ | 55 | .ra_pages = 0, /* No readahead */ |
55 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 56 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
56 | }; | 57 | }; |
diff --git a/fs/dcache.c b/fs/dcache.c index 9e5cd3c3a6ba..a100fa35a48f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/swap.h> | 32 | #include <linux/swap.h> |
33 | #include <linux/bootmem.h> | 33 | #include <linux/bootmem.h> |
34 | #include <linux/fs_struct.h> | 34 | #include <linux/fs_struct.h> |
35 | #include <linux/hardirq.h> | ||
35 | #include "internal.h" | 36 | #include "internal.h" |
36 | 37 | ||
37 | int sysctl_vfs_cache_pressure __read_mostly = 100; | 38 | int sysctl_vfs_cache_pressure __read_mostly = 100; |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 618a60f03886..240cef14fe58 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -106,6 +106,7 @@ struct connection { | |||
106 | #define CF_CONNECT_PENDING 3 | 106 | #define CF_CONNECT_PENDING 3 |
107 | #define CF_INIT_PENDING 4 | 107 | #define CF_INIT_PENDING 4 |
108 | #define CF_IS_OTHERCON 5 | 108 | #define CF_IS_OTHERCON 5 |
109 | #define CF_CLOSE 6 | ||
109 | struct list_head writequeue; /* List of outgoing writequeue_entries */ | 110 | struct list_head writequeue; /* List of outgoing writequeue_entries */ |
110 | spinlock_t writequeue_lock; | 111 | spinlock_t writequeue_lock; |
111 | int (*rx_action) (struct connection *); /* What to do when active */ | 112 | int (*rx_action) (struct connection *); /* What to do when active */ |
@@ -299,6 +300,8 @@ static void lowcomms_write_space(struct sock *sk) | |||
299 | 300 | ||
300 | static inline void lowcomms_connect_sock(struct connection *con) | 301 | static inline void lowcomms_connect_sock(struct connection *con) |
301 | { | 302 | { |
303 | if (test_bit(CF_CLOSE, &con->flags)) | ||
304 | return; | ||
302 | if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) | 305 | if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) |
303 | queue_work(send_workqueue, &con->swork); | 306 | queue_work(send_workqueue, &con->swork); |
304 | } | 307 | } |
@@ -926,10 +929,8 @@ static void tcp_connect_to_sock(struct connection *con) | |||
926 | goto out_err; | 929 | goto out_err; |
927 | 930 | ||
928 | memset(&saddr, 0, sizeof(saddr)); | 931 | memset(&saddr, 0, sizeof(saddr)); |
929 | if (dlm_nodeid_to_addr(con->nodeid, &saddr)) { | 932 | if (dlm_nodeid_to_addr(con->nodeid, &saddr)) |
930 | sock_release(sock); | ||
931 | goto out_err; | 933 | goto out_err; |
932 | } | ||
933 | 934 | ||
934 | sock->sk->sk_user_data = con; | 935 | sock->sk->sk_user_data = con; |
935 | con->rx_action = receive_from_sock; | 936 | con->rx_action = receive_from_sock; |
@@ -1284,7 +1285,6 @@ out: | |||
1284 | static void send_to_sock(struct connection *con) | 1285 | static void send_to_sock(struct connection *con) |
1285 | { | 1286 | { |
1286 | int ret = 0; | 1287 | int ret = 0; |
1287 | ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int); | ||
1288 | const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; | 1288 | const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; |
1289 | struct writequeue_entry *e; | 1289 | struct writequeue_entry *e; |
1290 | int len, offset; | 1290 | int len, offset; |
@@ -1293,8 +1293,6 @@ static void send_to_sock(struct connection *con) | |||
1293 | if (con->sock == NULL) | 1293 | if (con->sock == NULL) |
1294 | goto out_connect; | 1294 | goto out_connect; |
1295 | 1295 | ||
1296 | sendpage = con->sock->ops->sendpage; | ||
1297 | |||
1298 | spin_lock(&con->writequeue_lock); | 1296 | spin_lock(&con->writequeue_lock); |
1299 | for (;;) { | 1297 | for (;;) { |
1300 | e = list_entry(con->writequeue.next, struct writequeue_entry, | 1298 | e = list_entry(con->writequeue.next, struct writequeue_entry, |
@@ -1309,8 +1307,8 @@ static void send_to_sock(struct connection *con) | |||
1309 | 1307 | ||
1310 | ret = 0; | 1308 | ret = 0; |
1311 | if (len) { | 1309 | if (len) { |
1312 | ret = sendpage(con->sock, e->page, offset, len, | 1310 | ret = kernel_sendpage(con->sock, e->page, offset, len, |
1313 | msg_flags); | 1311 | msg_flags); |
1314 | if (ret == -EAGAIN || ret == 0) { | 1312 | if (ret == -EAGAIN || ret == 0) { |
1315 | cond_resched(); | 1313 | cond_resched(); |
1316 | goto out; | 1314 | goto out; |
@@ -1370,6 +1368,13 @@ int dlm_lowcomms_close(int nodeid) | |||
1370 | log_print("closing connection to node %d", nodeid); | 1368 | log_print("closing connection to node %d", nodeid); |
1371 | con = nodeid2con(nodeid, 0); | 1369 | con = nodeid2con(nodeid, 0); |
1372 | if (con) { | 1370 | if (con) { |
1371 | clear_bit(CF_CONNECT_PENDING, &con->flags); | ||
1372 | clear_bit(CF_WRITE_PENDING, &con->flags); | ||
1373 | set_bit(CF_CLOSE, &con->flags); | ||
1374 | if (cancel_work_sync(&con->swork)) | ||
1375 | log_print("canceled swork for node %d", nodeid); | ||
1376 | if (cancel_work_sync(&con->rwork)) | ||
1377 | log_print("canceled rwork for node %d", nodeid); | ||
1373 | clean_one_writequeue(con); | 1378 | clean_one_writequeue(con); |
1374 | close_connection(con, true); | 1379 | close_connection(con, true); |
1375 | } | 1380 | } |
@@ -1395,9 +1400,10 @@ static void process_send_sockets(struct work_struct *work) | |||
1395 | 1400 | ||
1396 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | 1401 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { |
1397 | con->connect_action(con); | 1402 | con->connect_action(con); |
1403 | set_bit(CF_WRITE_PENDING, &con->flags); | ||
1398 | } | 1404 | } |
1399 | clear_bit(CF_WRITE_PENDING, &con->flags); | 1405 | if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags)) |
1400 | send_to_sock(con); | 1406 | send_to_sock(con); |
1401 | } | 1407 | } |
1402 | 1408 | ||
1403 | 1409 | ||
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index ccc9d62c462d..55ea369f43a9 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c | |||
@@ -63,7 +63,7 @@ static int send_data(struct sk_buff *skb) | |||
63 | return rv; | 63 | return rv; |
64 | } | 64 | } |
65 | 65 | ||
66 | return genlmsg_unicast(skb, listener_nlpid); | 66 | return genlmsg_unicast(&init_net, skb, listener_nlpid); |
67 | } | 67 | } |
68 | 68 | ||
69 | static int user_cmd(struct sk_buff *skb, struct genl_info *info) | 69 | static int user_cmd(struct sk_buff *skb, struct genl_info *info) |
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index af737bb56cb7..259525c9abb8 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c | |||
@@ -1303,6 +1303,13 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, | |||
1303 | } | 1303 | } |
1304 | (*new_auth_tok)->session_key.encrypted_key_size = | 1304 | (*new_auth_tok)->session_key.encrypted_key_size = |
1305 | (body_size - (ECRYPTFS_SALT_SIZE + 5)); | 1305 | (body_size - (ECRYPTFS_SALT_SIZE + 5)); |
1306 | if ((*new_auth_tok)->session_key.encrypted_key_size | ||
1307 | > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) { | ||
1308 | printk(KERN_WARNING "Tag 3 packet contains key larger " | ||
1309 | "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES\n"); | ||
1310 | rc = -EINVAL; | ||
1311 | goto out_free; | ||
1312 | } | ||
1306 | if (unlikely(data[(*packet_size)++] != 0x04)) { | 1313 | if (unlikely(data[(*packet_size)++] != 0x04)) { |
1307 | printk(KERN_WARNING "Unknown version number [%d]\n", | 1314 | printk(KERN_WARNING "Unknown version number [%d]\n", |
1308 | data[(*packet_size) - 1]); | 1315 | data[(*packet_size) - 1]); |
@@ -1449,6 +1456,12 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents, | |||
1449 | rc = -EINVAL; | 1456 | rc = -EINVAL; |
1450 | goto out; | 1457 | goto out; |
1451 | } | 1458 | } |
1459 | if (unlikely((*tag_11_contents_size) > max_contents_bytes)) { | ||
1460 | printk(KERN_ERR "Literal data section in tag 11 packet exceeds " | ||
1461 | "expected size\n"); | ||
1462 | rc = -EINVAL; | ||
1463 | goto out; | ||
1464 | } | ||
1452 | if (data[(*packet_size)++] != 0x62) { | 1465 | if (data[(*packet_size)++] != 0x62) { |
1453 | printk(KERN_WARNING "Unrecognizable packet\n"); | 1466 | printk(KERN_WARNING "Unrecognizable packet\n"); |
1454 | rc = -EINVAL; | 1467 | rc = -EINVAL; |
@@ -678,8 +678,8 @@ exit: | |||
678 | } | 678 | } |
679 | EXPORT_SYMBOL(open_exec); | 679 | EXPORT_SYMBOL(open_exec); |
680 | 680 | ||
681 | int kernel_read(struct file *file, unsigned long offset, | 681 | int kernel_read(struct file *file, loff_t offset, |
682 | char *addr, unsigned long count) | 682 | char *addr, unsigned long count) |
683 | { | 683 | { |
684 | mm_segment_t old_fs; | 684 | mm_segment_t old_fs; |
685 | loff_t pos = offset; | 685 | loff_t pos = offset; |
@@ -1016,6 +1016,35 @@ out: | |||
1016 | EXPORT_SYMBOL(flush_old_exec); | 1016 | EXPORT_SYMBOL(flush_old_exec); |
1017 | 1017 | ||
1018 | /* | 1018 | /* |
1019 | * Prepare credentials and lock ->cred_guard_mutex. | ||
1020 | * install_exec_creds() commits the new creds and drops the lock. | ||
1021 | * Or, if exec fails before, free_bprm() should release ->cred and | ||
1022 | * and unlock. | ||
1023 | */ | ||
1024 | int prepare_bprm_creds(struct linux_binprm *bprm) | ||
1025 | { | ||
1026 | if (mutex_lock_interruptible(¤t->cred_guard_mutex)) | ||
1027 | return -ERESTARTNOINTR; | ||
1028 | |||
1029 | bprm->cred = prepare_exec_creds(); | ||
1030 | if (likely(bprm->cred)) | ||
1031 | return 0; | ||
1032 | |||
1033 | mutex_unlock(¤t->cred_guard_mutex); | ||
1034 | return -ENOMEM; | ||
1035 | } | ||
1036 | |||
1037 | void free_bprm(struct linux_binprm *bprm) | ||
1038 | { | ||
1039 | free_arg_pages(bprm); | ||
1040 | if (bprm->cred) { | ||
1041 | mutex_unlock(¤t->cred_guard_mutex); | ||
1042 | abort_creds(bprm->cred); | ||
1043 | } | ||
1044 | kfree(bprm); | ||
1045 | } | ||
1046 | |||
1047 | /* | ||
1019 | * install the new credentials for this executable | 1048 | * install the new credentials for this executable |
1020 | */ | 1049 | */ |
1021 | void install_exec_creds(struct linux_binprm *bprm) | 1050 | void install_exec_creds(struct linux_binprm *bprm) |
@@ -1024,12 +1053,13 @@ void install_exec_creds(struct linux_binprm *bprm) | |||
1024 | 1053 | ||
1025 | commit_creds(bprm->cred); | 1054 | commit_creds(bprm->cred); |
1026 | bprm->cred = NULL; | 1055 | bprm->cred = NULL; |
1027 | 1056 | /* | |
1028 | /* cred_guard_mutex must be held at least to this point to prevent | 1057 | * cred_guard_mutex must be held at least to this point to prevent |
1029 | * ptrace_attach() from altering our determination of the task's | 1058 | * ptrace_attach() from altering our determination of the task's |
1030 | * credentials; any time after this it may be unlocked */ | 1059 | * credentials; any time after this it may be unlocked. |
1031 | 1060 | */ | |
1032 | security_bprm_committed_creds(bprm); | 1061 | security_bprm_committed_creds(bprm); |
1062 | mutex_unlock(¤t->cred_guard_mutex); | ||
1033 | } | 1063 | } |
1034 | EXPORT_SYMBOL(install_exec_creds); | 1064 | EXPORT_SYMBOL(install_exec_creds); |
1035 | 1065 | ||
@@ -1246,14 +1276,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | |||
1246 | 1276 | ||
1247 | EXPORT_SYMBOL(search_binary_handler); | 1277 | EXPORT_SYMBOL(search_binary_handler); |
1248 | 1278 | ||
1249 | void free_bprm(struct linux_binprm *bprm) | ||
1250 | { | ||
1251 | free_arg_pages(bprm); | ||
1252 | if (bprm->cred) | ||
1253 | abort_creds(bprm->cred); | ||
1254 | kfree(bprm); | ||
1255 | } | ||
1256 | |||
1257 | /* | 1279 | /* |
1258 | * sys_execve() executes a new program. | 1280 | * sys_execve() executes a new program. |
1259 | */ | 1281 | */ |
@@ -1277,20 +1299,15 @@ int do_execve(char * filename, | |||
1277 | if (!bprm) | 1299 | if (!bprm) |
1278 | goto out_files; | 1300 | goto out_files; |
1279 | 1301 | ||
1280 | retval = -ERESTARTNOINTR; | 1302 | retval = prepare_bprm_creds(bprm); |
1281 | if (mutex_lock_interruptible(¤t->cred_guard_mutex)) | 1303 | if (retval) |
1282 | goto out_free; | 1304 | goto out_free; |
1283 | current->in_execve = 1; | ||
1284 | |||
1285 | retval = -ENOMEM; | ||
1286 | bprm->cred = prepare_exec_creds(); | ||
1287 | if (!bprm->cred) | ||
1288 | goto out_unlock; | ||
1289 | 1305 | ||
1290 | retval = check_unsafe_exec(bprm); | 1306 | retval = check_unsafe_exec(bprm); |
1291 | if (retval < 0) | 1307 | if (retval < 0) |
1292 | goto out_unlock; | 1308 | goto out_free; |
1293 | clear_in_exec = retval; | 1309 | clear_in_exec = retval; |
1310 | current->in_execve = 1; | ||
1294 | 1311 | ||
1295 | file = open_exec(filename); | 1312 | file = open_exec(filename); |
1296 | retval = PTR_ERR(file); | 1313 | retval = PTR_ERR(file); |
@@ -1340,7 +1357,6 @@ int do_execve(char * filename, | |||
1340 | /* execve succeeded */ | 1357 | /* execve succeeded */ |
1341 | current->fs->in_exec = 0; | 1358 | current->fs->in_exec = 0; |
1342 | current->in_execve = 0; | 1359 | current->in_execve = 0; |
1343 | mutex_unlock(¤t->cred_guard_mutex); | ||
1344 | acct_update_integrals(current); | 1360 | acct_update_integrals(current); |
1345 | free_bprm(bprm); | 1361 | free_bprm(bprm); |
1346 | if (displaced) | 1362 | if (displaced) |
@@ -1360,10 +1376,7 @@ out_file: | |||
1360 | out_unmark: | 1376 | out_unmark: |
1361 | if (clear_in_exec) | 1377 | if (clear_in_exec) |
1362 | current->fs->in_exec = 0; | 1378 | current->fs->in_exec = 0; |
1363 | |||
1364 | out_unlock: | ||
1365 | current->in_execve = 0; | 1379 | current->in_execve = 0; |
1366 | mutex_unlock(¤t->cred_guard_mutex); | ||
1367 | 1380 | ||
1368 | out_free: | 1381 | out_free: |
1369 | free_bprm(bprm); | 1382 | free_bprm(bprm); |
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index d636e1297cad..a63d44256a70 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c | |||
@@ -230,7 +230,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |||
230 | return error; | 230 | return error; |
231 | } | 231 | } |
232 | 232 | ||
233 | static int | 233 | int |
234 | ext2_check_acl(struct inode *inode, int mask) | 234 | ext2_check_acl(struct inode *inode, int mask) |
235 | { | 235 | { |
236 | struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); | 236 | struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); |
@@ -246,12 +246,6 @@ ext2_check_acl(struct inode *inode, int mask) | |||
246 | return -EAGAIN; | 246 | return -EAGAIN; |
247 | } | 247 | } |
248 | 248 | ||
249 | int | ||
250 | ext2_permission(struct inode *inode, int mask) | ||
251 | { | ||
252 | return generic_permission(inode, mask, ext2_check_acl); | ||
253 | } | ||
254 | |||
255 | /* | 249 | /* |
256 | * Initialize the ACLs of a new inode. Called from ext2_new_inode. | 250 | * Initialize the ACLs of a new inode. Called from ext2_new_inode. |
257 | * | 251 | * |
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index ecefe478898f..3ff6cbb9ac44 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h | |||
@@ -54,13 +54,13 @@ static inline int ext2_acl_count(size_t size) | |||
54 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | 54 | #ifdef CONFIG_EXT2_FS_POSIX_ACL |
55 | 55 | ||
56 | /* acl.c */ | 56 | /* acl.c */ |
57 | extern int ext2_permission (struct inode *, int); | 57 | extern int ext2_check_acl (struct inode *, int); |
58 | extern int ext2_acl_chmod (struct inode *); | 58 | extern int ext2_acl_chmod (struct inode *); |
59 | extern int ext2_init_acl (struct inode *, struct inode *); | 59 | extern int ext2_init_acl (struct inode *, struct inode *); |
60 | 60 | ||
61 | #else | 61 | #else |
62 | #include <linux/sched.h> | 62 | #include <linux/sched.h> |
63 | #define ext2_permission NULL | 63 | #define ext2_check_acl NULL |
64 | #define ext2_get_acl NULL | 64 | #define ext2_get_acl NULL |
65 | #define ext2_set_acl NULL | 65 | #define ext2_set_acl NULL |
66 | 66 | ||
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 2b9e47dc9222..a2f3afd1a1c1 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -85,6 +85,6 @@ const struct inode_operations ext2_file_inode_operations = { | |||
85 | .removexattr = generic_removexattr, | 85 | .removexattr = generic_removexattr, |
86 | #endif | 86 | #endif |
87 | .setattr = ext2_setattr, | 87 | .setattr = ext2_setattr, |
88 | .permission = ext2_permission, | 88 | .check_acl = ext2_check_acl, |
89 | .fiemap = ext2_fiemap, | 89 | .fiemap = ext2_fiemap, |
90 | }; | 90 | }; |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index e27130341d4f..1c1638f873a4 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -482,7 +482,7 @@ static int ext2_alloc_branch(struct inode *inode, | |||
482 | unlock_buffer(bh); | 482 | unlock_buffer(bh); |
483 | mark_buffer_dirty_inode(bh, inode); | 483 | mark_buffer_dirty_inode(bh, inode); |
484 | /* We used to sync bh here if IS_SYNC(inode). | 484 | /* We used to sync bh here if IS_SYNC(inode). |
485 | * But we now rely upon generic_osync_inode() | 485 | * But we now rely upon generic_write_sync() |
486 | * and b_inode_buffers. But not for directories. | 486 | * and b_inode_buffers. But not for directories. |
487 | */ | 487 | */ |
488 | if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) | 488 | if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) |
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index e1dedb0f7873..23701f289e98 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c | |||
@@ -362,6 +362,10 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, | |||
362 | if (dir_de) { | 362 | if (dir_de) { |
363 | if (old_dir != new_dir) | 363 | if (old_dir != new_dir) |
364 | ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0); | 364 | ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0); |
365 | else { | ||
366 | kunmap(dir_page); | ||
367 | page_cache_release(dir_page); | ||
368 | } | ||
365 | inode_dec_link_count(old_dir); | 369 | inode_dec_link_count(old_dir); |
366 | } | 370 | } |
367 | return 0; | 371 | return 0; |
@@ -396,7 +400,7 @@ const struct inode_operations ext2_dir_inode_operations = { | |||
396 | .removexattr = generic_removexattr, | 400 | .removexattr = generic_removexattr, |
397 | #endif | 401 | #endif |
398 | .setattr = ext2_setattr, | 402 | .setattr = ext2_setattr, |
399 | .permission = ext2_permission, | 403 | .check_acl = ext2_check_acl, |
400 | }; | 404 | }; |
401 | 405 | ||
402 | const struct inode_operations ext2_special_inode_operations = { | 406 | const struct inode_operations ext2_special_inode_operations = { |
@@ -407,5 +411,5 @@ const struct inode_operations ext2_special_inode_operations = { | |||
407 | .removexattr = generic_removexattr, | 411 | .removexattr = generic_removexattr, |
408 | #endif | 412 | #endif |
409 | .setattr = ext2_setattr, | 413 | .setattr = ext2_setattr, |
410 | .permission = ext2_permission, | 414 | .check_acl = ext2_check_acl, |
411 | }; | 415 | }; |
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig index fb3c1a21b135..522b15498f45 100644 --- a/fs/ext3/Kconfig +++ b/fs/ext3/Kconfig | |||
@@ -29,23 +29,25 @@ config EXT3_FS | |||
29 | module will be called ext3. | 29 | module will be called ext3. |
30 | 30 | ||
31 | config EXT3_DEFAULTS_TO_ORDERED | 31 | config EXT3_DEFAULTS_TO_ORDERED |
32 | bool "Default to 'data=ordered' in ext3 (legacy option)" | 32 | bool "Default to 'data=ordered' in ext3" |
33 | depends on EXT3_FS | 33 | depends on EXT3_FS |
34 | help | 34 | help |
35 | If a filesystem does not explicitly specify a data ordering | 35 | The journal mode options for ext3 have different tradeoffs |
36 | mode, and the journal capability allowed it, ext3 used to | 36 | between when data is guaranteed to be on disk and |
37 | historically default to 'data=ordered'. | 37 | performance. The use of "data=writeback" can cause |
38 | 38 | unwritten data to appear in files after an system crash or | |
39 | That was a rather unfortunate choice, because it leads to all | 39 | power failure, which can be a security issue. However, |
40 | kinds of latency problems, and the 'data=writeback' mode is more | 40 | "data=ordered" mode can also result in major performance |
41 | appropriate these days. | 41 | problems, including seconds-long delays before an fsync() |
42 | 42 | call returns. For details, see: | |
43 | You should probably always answer 'n' here, and if you really | 43 | |
44 | want to use 'data=ordered' mode, set it in the filesystem itself | 44 | http://ext4.wiki.kernel.org/index.php/Ext3_data_mode_tradeoffs |
45 | with 'tune2fs -o journal_data_ordered'. | 45 | |
46 | 46 | If you have been historically happy with ext3's performance, | |
47 | But if you really want to enable the legacy default, you can do | 47 | data=ordered mode will be a safe choice and you should |
48 | so by answering 'y' to this question. | 48 | answer 'y' here. If you understand the reliability and data |
49 | privacy issues of data=writeback and are willing to make | ||
50 | that trade off, answer 'n'. | ||
49 | 51 | ||
50 | config EXT3_FS_XATTR | 52 | config EXT3_FS_XATTR |
51 | bool "Ext3 extended attributes" | 53 | bool "Ext3 extended attributes" |
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index e167bae37ef0..c9b0df376b5f 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c | |||
@@ -238,7 +238,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, | |||
238 | return error; | 238 | return error; |
239 | } | 239 | } |
240 | 240 | ||
241 | static int | 241 | int |
242 | ext3_check_acl(struct inode *inode, int mask) | 242 | ext3_check_acl(struct inode *inode, int mask) |
243 | { | 243 | { |
244 | struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); | 244 | struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); |
@@ -254,12 +254,6 @@ ext3_check_acl(struct inode *inode, int mask) | |||
254 | return -EAGAIN; | 254 | return -EAGAIN; |
255 | } | 255 | } |
256 | 256 | ||
257 | int | ||
258 | ext3_permission(struct inode *inode, int mask) | ||
259 | { | ||
260 | return generic_permission(inode, mask, ext3_check_acl); | ||
261 | } | ||
262 | |||
263 | /* | 257 | /* |
264 | * Initialize the ACLs of a new inode. Called from ext3_new_inode. | 258 | * Initialize the ACLs of a new inode. Called from ext3_new_inode. |
265 | * | 259 | * |
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 07d15a3a5969..597334626de9 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h | |||
@@ -54,13 +54,13 @@ static inline int ext3_acl_count(size_t size) | |||
54 | #ifdef CONFIG_EXT3_FS_POSIX_ACL | 54 | #ifdef CONFIG_EXT3_FS_POSIX_ACL |
55 | 55 | ||
56 | /* acl.c */ | 56 | /* acl.c */ |
57 | extern int ext3_permission (struct inode *, int); | 57 | extern int ext3_check_acl (struct inode *, int); |
58 | extern int ext3_acl_chmod (struct inode *); | 58 | extern int ext3_acl_chmod (struct inode *); |
59 | extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); | 59 | extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); |
60 | 60 | ||
61 | #else /* CONFIG_EXT3_FS_POSIX_ACL */ | 61 | #else /* CONFIG_EXT3_FS_POSIX_ACL */ |
62 | #include <linux/sched.h> | 62 | #include <linux/sched.h> |
63 | #define ext3_permission NULL | 63 | #define ext3_check_acl NULL |
64 | 64 | ||
65 | static inline int | 65 | static inline int |
66 | ext3_acl_chmod(struct inode *inode) | 66 | ext3_acl_chmod(struct inode *inode) |
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 3d724a95882f..373fa90c796a 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
@@ -130,8 +130,7 @@ static int ext3_readdir(struct file * filp, | |||
130 | struct buffer_head *bh = NULL; | 130 | struct buffer_head *bh = NULL; |
131 | 131 | ||
132 | map_bh.b_state = 0; | 132 | map_bh.b_state = 0; |
133 | err = ext3_get_blocks_handle(NULL, inode, blk, 1, | 133 | err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0); |
134 | &map_bh, 0, 0); | ||
135 | if (err > 0) { | 134 | if (err > 0) { |
136 | pgoff_t index = map_bh.b_blocknr >> | 135 | pgoff_t index = map_bh.b_blocknr >> |
137 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 136 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 5b49704b231b..388bbdfa0b4e 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c | |||
@@ -51,71 +51,12 @@ static int ext3_release_file (struct inode * inode, struct file * filp) | |||
51 | return 0; | 51 | return 0; |
52 | } | 52 | } |
53 | 53 | ||
54 | static ssize_t | ||
55 | ext3_file_write(struct kiocb *iocb, const struct iovec *iov, | ||
56 | unsigned long nr_segs, loff_t pos) | ||
57 | { | ||
58 | struct file *file = iocb->ki_filp; | ||
59 | struct inode *inode = file->f_path.dentry->d_inode; | ||
60 | ssize_t ret; | ||
61 | int err; | ||
62 | |||
63 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
64 | |||
65 | /* | ||
66 | * Skip flushing if there was an error, or if nothing was written. | ||
67 | */ | ||
68 | if (ret <= 0) | ||
69 | return ret; | ||
70 | |||
71 | /* | ||
72 | * If the inode is IS_SYNC, or is O_SYNC and we are doing data | ||
73 | * journalling then we need to make sure that we force the transaction | ||
74 | * to disk to keep all metadata uptodate synchronously. | ||
75 | */ | ||
76 | if (file->f_flags & O_SYNC) { | ||
77 | /* | ||
78 | * If we are non-data-journaled, then the dirty data has | ||
79 | * already been flushed to backing store by generic_osync_inode, | ||
80 | * and the inode has been flushed too if there have been any | ||
81 | * modifications other than mere timestamp updates. | ||
82 | * | ||
83 | * Open question --- do we care about flushing timestamps too | ||
84 | * if the inode is IS_SYNC? | ||
85 | */ | ||
86 | if (!ext3_should_journal_data(inode)) | ||
87 | return ret; | ||
88 | |||
89 | goto force_commit; | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * So we know that there has been no forced data flush. If the inode | ||
94 | * is marked IS_SYNC, we need to force one ourselves. | ||
95 | */ | ||
96 | if (!IS_SYNC(inode)) | ||
97 | return ret; | ||
98 | |||
99 | /* | ||
100 | * Open question #2 --- should we force data to disk here too? If we | ||
101 | * don't, the only impact is that data=writeback filesystems won't | ||
102 | * flush data to disk automatically on IS_SYNC, only metadata (but | ||
103 | * historically, that is what ext2 has done.) | ||
104 | */ | ||
105 | |||
106 | force_commit: | ||
107 | err = ext3_force_commit(inode->i_sb); | ||
108 | if (err) | ||
109 | return err; | ||
110 | return ret; | ||
111 | } | ||
112 | |||
113 | const struct file_operations ext3_file_operations = { | 54 | const struct file_operations ext3_file_operations = { |
114 | .llseek = generic_file_llseek, | 55 | .llseek = generic_file_llseek, |
115 | .read = do_sync_read, | 56 | .read = do_sync_read, |
116 | .write = do_sync_write, | 57 | .write = do_sync_write, |
117 | .aio_read = generic_file_aio_read, | 58 | .aio_read = generic_file_aio_read, |
118 | .aio_write = ext3_file_write, | 59 | .aio_write = generic_file_aio_write, |
119 | .unlocked_ioctl = ext3_ioctl, | 60 | .unlocked_ioctl = ext3_ioctl, |
120 | #ifdef CONFIG_COMPAT | 61 | #ifdef CONFIG_COMPAT |
121 | .compat_ioctl = ext3_compat_ioctl, | 62 | .compat_ioctl = ext3_compat_ioctl, |
@@ -137,7 +78,7 @@ const struct inode_operations ext3_file_inode_operations = { | |||
137 | .listxattr = ext3_listxattr, | 78 | .listxattr = ext3_listxattr, |
138 | .removexattr = generic_removexattr, | 79 | .removexattr = generic_removexattr, |
139 | #endif | 80 | #endif |
140 | .permission = ext3_permission, | 81 | .check_acl = ext3_check_acl, |
141 | .fiemap = ext3_fiemap, | 82 | .fiemap = ext3_fiemap, |
142 | }; | 83 | }; |
143 | 84 | ||
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index d33634119e17..451d166bbe93 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c | |||
@@ -23,6 +23,7 @@ | |||
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/time.h> | 25 | #include <linux/time.h> |
26 | #include <linux/blkdev.h> | ||
26 | #include <linux/fs.h> | 27 | #include <linux/fs.h> |
27 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
28 | #include <linux/writeback.h> | 29 | #include <linux/writeback.h> |
@@ -73,7 +74,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
73 | } | 74 | } |
74 | 75 | ||
75 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | 76 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) |
76 | goto out; | 77 | goto flush; |
77 | 78 | ||
78 | /* | 79 | /* |
79 | * The VFS has written the file data. If the inode is unaltered | 80 | * The VFS has written the file data. If the inode is unaltered |
@@ -85,7 +86,16 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
85 | .nr_to_write = 0, /* sys_fsync did this */ | 86 | .nr_to_write = 0, /* sys_fsync did this */ |
86 | }; | 87 | }; |
87 | ret = sync_inode(inode, &wbc); | 88 | ret = sync_inode(inode, &wbc); |
89 | goto out; | ||
88 | } | 90 | } |
91 | flush: | ||
92 | /* | ||
93 | * In case we didn't commit a transaction, we have to flush | ||
94 | * disk caches manually so that data really is on persistent | ||
95 | * storage | ||
96 | */ | ||
97 | if (test_opt(inode->i_sb, BARRIER)) | ||
98 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | ||
89 | out: | 99 | out: |
90 | return ret; | 100 | return ret; |
91 | } | 101 | } |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 5f51fed5c750..cd098a7b77fc 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -172,10 +172,21 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | |||
172 | * so before we call here everything must be consistently dirtied against | 172 | * so before we call here everything must be consistently dirtied against |
173 | * this transaction. | 173 | * this transaction. |
174 | */ | 174 | */ |
175 | static int ext3_journal_test_restart(handle_t *handle, struct inode *inode) | 175 | static int truncate_restart_transaction(handle_t *handle, struct inode *inode) |
176 | { | 176 | { |
177 | int ret; | ||
178 | |||
177 | jbd_debug(2, "restarting handle %p\n", handle); | 179 | jbd_debug(2, "restarting handle %p\n", handle); |
178 | return ext3_journal_restart(handle, blocks_for_truncate(inode)); | 180 | /* |
181 | * Drop truncate_mutex to avoid deadlock with ext3_get_blocks_handle | ||
182 | * At this moment, get_block can be called only for blocks inside | ||
183 | * i_size since page cache has been already dropped and writes are | ||
184 | * blocked by i_mutex. So we can safely drop the truncate_mutex. | ||
185 | */ | ||
186 | mutex_unlock(&EXT3_I(inode)->truncate_mutex); | ||
187 | ret = ext3_journal_restart(handle, blocks_for_truncate(inode)); | ||
188 | mutex_lock(&EXT3_I(inode)->truncate_mutex); | ||
189 | return ret; | ||
179 | } | 190 | } |
180 | 191 | ||
181 | /* | 192 | /* |
@@ -788,7 +799,7 @@ err_out: | |||
788 | int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, | 799 | int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, |
789 | sector_t iblock, unsigned long maxblocks, | 800 | sector_t iblock, unsigned long maxblocks, |
790 | struct buffer_head *bh_result, | 801 | struct buffer_head *bh_result, |
791 | int create, int extend_disksize) | 802 | int create) |
792 | { | 803 | { |
793 | int err = -EIO; | 804 | int err = -EIO; |
794 | int offsets[4]; | 805 | int offsets[4]; |
@@ -911,13 +922,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
911 | if (!err) | 922 | if (!err) |
912 | err = ext3_splice_branch(handle, inode, iblock, | 923 | err = ext3_splice_branch(handle, inode, iblock, |
913 | partial, indirect_blks, count); | 924 | partial, indirect_blks, count); |
914 | /* | ||
915 | * i_disksize growing is protected by truncate_mutex. Don't forget to | ||
916 | * protect it if you're about to implement concurrent | ||
917 | * ext3_get_block() -bzzz | ||
918 | */ | ||
919 | if (!err && extend_disksize && inode->i_size > ei->i_disksize) | ||
920 | ei->i_disksize = inode->i_size; | ||
921 | mutex_unlock(&ei->truncate_mutex); | 925 | mutex_unlock(&ei->truncate_mutex); |
922 | if (err) | 926 | if (err) |
923 | goto cleanup; | 927 | goto cleanup; |
@@ -972,7 +976,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock, | |||
972 | } | 976 | } |
973 | 977 | ||
974 | ret = ext3_get_blocks_handle(handle, inode, iblock, | 978 | ret = ext3_get_blocks_handle(handle, inode, iblock, |
975 | max_blocks, bh_result, create, 0); | 979 | max_blocks, bh_result, create); |
976 | if (ret > 0) { | 980 | if (ret > 0) { |
977 | bh_result->b_size = (ret << inode->i_blkbits); | 981 | bh_result->b_size = (ret << inode->i_blkbits); |
978 | ret = 0; | 982 | ret = 0; |
@@ -1005,7 +1009,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode, | |||
1005 | dummy.b_blocknr = -1000; | 1009 | dummy.b_blocknr = -1000; |
1006 | buffer_trace_init(&dummy.b_history); | 1010 | buffer_trace_init(&dummy.b_history); |
1007 | err = ext3_get_blocks_handle(handle, inode, block, 1, | 1011 | err = ext3_get_blocks_handle(handle, inode, block, 1, |
1008 | &dummy, create, 1); | 1012 | &dummy, create); |
1009 | /* | 1013 | /* |
1010 | * ext3_get_blocks_handle() returns number of blocks | 1014 | * ext3_get_blocks_handle() returns number of blocks |
1011 | * mapped. 0 in case of a HOLE. | 1015 | * mapped. 0 in case of a HOLE. |
@@ -1193,15 +1197,16 @@ write_begin_failed: | |||
1193 | * i_size_read because we hold i_mutex. | 1197 | * i_size_read because we hold i_mutex. |
1194 | * | 1198 | * |
1195 | * Add inode to orphan list in case we crash before truncate | 1199 | * Add inode to orphan list in case we crash before truncate |
1196 | * finishes. | 1200 | * finishes. Do this only if ext3_can_truncate() agrees so |
1201 | * that orphan processing code is happy. | ||
1197 | */ | 1202 | */ |
1198 | if (pos + len > inode->i_size) | 1203 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) |
1199 | ext3_orphan_add(handle, inode); | 1204 | ext3_orphan_add(handle, inode); |
1200 | ext3_journal_stop(handle); | 1205 | ext3_journal_stop(handle); |
1201 | unlock_page(page); | 1206 | unlock_page(page); |
1202 | page_cache_release(page); | 1207 | page_cache_release(page); |
1203 | if (pos + len > inode->i_size) | 1208 | if (pos + len > inode->i_size) |
1204 | vmtruncate(inode, inode->i_size); | 1209 | ext3_truncate(inode); |
1205 | } | 1210 | } |
1206 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) | 1211 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) |
1207 | goto retry; | 1212 | goto retry; |
@@ -1287,7 +1292,7 @@ static int ext3_ordered_write_end(struct file *file, | |||
1287 | * There may be allocated blocks outside of i_size because | 1292 | * There may be allocated blocks outside of i_size because |
1288 | * we failed to copy some data. Prepare for truncate. | 1293 | * we failed to copy some data. Prepare for truncate. |
1289 | */ | 1294 | */ |
1290 | if (pos + len > inode->i_size) | 1295 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) |
1291 | ext3_orphan_add(handle, inode); | 1296 | ext3_orphan_add(handle, inode); |
1292 | ret2 = ext3_journal_stop(handle); | 1297 | ret2 = ext3_journal_stop(handle); |
1293 | if (!ret) | 1298 | if (!ret) |
@@ -1296,7 +1301,7 @@ static int ext3_ordered_write_end(struct file *file, | |||
1296 | page_cache_release(page); | 1301 | page_cache_release(page); |
1297 | 1302 | ||
1298 | if (pos + len > inode->i_size) | 1303 | if (pos + len > inode->i_size) |
1299 | vmtruncate(inode, inode->i_size); | 1304 | ext3_truncate(inode); |
1300 | return ret ? ret : copied; | 1305 | return ret ? ret : copied; |
1301 | } | 1306 | } |
1302 | 1307 | ||
@@ -1315,14 +1320,14 @@ static int ext3_writeback_write_end(struct file *file, | |||
1315 | * There may be allocated blocks outside of i_size because | 1320 | * There may be allocated blocks outside of i_size because |
1316 | * we failed to copy some data. Prepare for truncate. | 1321 | * we failed to copy some data. Prepare for truncate. |
1317 | */ | 1322 | */ |
1318 | if (pos + len > inode->i_size) | 1323 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) |
1319 | ext3_orphan_add(handle, inode); | 1324 | ext3_orphan_add(handle, inode); |
1320 | ret = ext3_journal_stop(handle); | 1325 | ret = ext3_journal_stop(handle); |
1321 | unlock_page(page); | 1326 | unlock_page(page); |
1322 | page_cache_release(page); | 1327 | page_cache_release(page); |
1323 | 1328 | ||
1324 | if (pos + len > inode->i_size) | 1329 | if (pos + len > inode->i_size) |
1325 | vmtruncate(inode, inode->i_size); | 1330 | ext3_truncate(inode); |
1326 | return ret ? ret : copied; | 1331 | return ret ? ret : copied; |
1327 | } | 1332 | } |
1328 | 1333 | ||
@@ -1358,7 +1363,7 @@ static int ext3_journalled_write_end(struct file *file, | |||
1358 | * There may be allocated blocks outside of i_size because | 1363 | * There may be allocated blocks outside of i_size because |
1359 | * we failed to copy some data. Prepare for truncate. | 1364 | * we failed to copy some data. Prepare for truncate. |
1360 | */ | 1365 | */ |
1361 | if (pos + len > inode->i_size) | 1366 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) |
1362 | ext3_orphan_add(handle, inode); | 1367 | ext3_orphan_add(handle, inode); |
1363 | EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; | 1368 | EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; |
1364 | if (inode->i_size > EXT3_I(inode)->i_disksize) { | 1369 | if (inode->i_size > EXT3_I(inode)->i_disksize) { |
@@ -1375,7 +1380,7 @@ static int ext3_journalled_write_end(struct file *file, | |||
1375 | page_cache_release(page); | 1380 | page_cache_release(page); |
1376 | 1381 | ||
1377 | if (pos + len > inode->i_size) | 1382 | if (pos + len > inode->i_size) |
1378 | vmtruncate(inode, inode->i_size); | 1383 | ext3_truncate(inode); |
1379 | return ret ? ret : copied; | 1384 | return ret ? ret : copied; |
1380 | } | 1385 | } |
1381 | 1386 | ||
@@ -2078,7 +2083,7 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode, | |||
2078 | ext3_journal_dirty_metadata(handle, bh); | 2083 | ext3_journal_dirty_metadata(handle, bh); |
2079 | } | 2084 | } |
2080 | ext3_mark_inode_dirty(handle, inode); | 2085 | ext3_mark_inode_dirty(handle, inode); |
2081 | ext3_journal_test_restart(handle, inode); | 2086 | truncate_restart_transaction(handle, inode); |
2082 | if (bh) { | 2087 | if (bh) { |
2083 | BUFFER_TRACE(bh, "retaking write access"); | 2088 | BUFFER_TRACE(bh, "retaking write access"); |
2084 | ext3_journal_get_write_access(handle, bh); | 2089 | ext3_journal_get_write_access(handle, bh); |
@@ -2288,7 +2293,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, | |||
2288 | return; | 2293 | return; |
2289 | if (try_to_extend_transaction(handle, inode)) { | 2294 | if (try_to_extend_transaction(handle, inode)) { |
2290 | ext3_mark_inode_dirty(handle, inode); | 2295 | ext3_mark_inode_dirty(handle, inode); |
2291 | ext3_journal_test_restart(handle, inode); | 2296 | truncate_restart_transaction(handle, inode); |
2292 | } | 2297 | } |
2293 | 2298 | ||
2294 | ext3_free_blocks(handle, inode, nr, 1); | 2299 | ext3_free_blocks(handle, inode, nr, 1); |
@@ -2898,6 +2903,10 @@ static int ext3_do_update_inode(handle_t *handle, | |||
2898 | struct buffer_head *bh = iloc->bh; | 2903 | struct buffer_head *bh = iloc->bh; |
2899 | int err = 0, rc, block; | 2904 | int err = 0, rc, block; |
2900 | 2905 | ||
2906 | again: | ||
2907 | /* we can't allow multiple procs in here at once, its a bit racey */ | ||
2908 | lock_buffer(bh); | ||
2909 | |||
2901 | /* For fields not not tracking in the in-memory inode, | 2910 | /* For fields not not tracking in the in-memory inode, |
2902 | * initialise them to zero for new inodes. */ | 2911 | * initialise them to zero for new inodes. */ |
2903 | if (ei->i_state & EXT3_STATE_NEW) | 2912 | if (ei->i_state & EXT3_STATE_NEW) |
@@ -2957,16 +2966,20 @@ static int ext3_do_update_inode(handle_t *handle, | |||
2957 | /* If this is the first large file | 2966 | /* If this is the first large file |
2958 | * created, add a flag to the superblock. | 2967 | * created, add a flag to the superblock. |
2959 | */ | 2968 | */ |
2969 | unlock_buffer(bh); | ||
2960 | err = ext3_journal_get_write_access(handle, | 2970 | err = ext3_journal_get_write_access(handle, |
2961 | EXT3_SB(sb)->s_sbh); | 2971 | EXT3_SB(sb)->s_sbh); |
2962 | if (err) | 2972 | if (err) |
2963 | goto out_brelse; | 2973 | goto out_brelse; |
2974 | |||
2964 | ext3_update_dynamic_rev(sb); | 2975 | ext3_update_dynamic_rev(sb); |
2965 | EXT3_SET_RO_COMPAT_FEATURE(sb, | 2976 | EXT3_SET_RO_COMPAT_FEATURE(sb, |
2966 | EXT3_FEATURE_RO_COMPAT_LARGE_FILE); | 2977 | EXT3_FEATURE_RO_COMPAT_LARGE_FILE); |
2967 | handle->h_sync = 1; | 2978 | handle->h_sync = 1; |
2968 | err = ext3_journal_dirty_metadata(handle, | 2979 | err = ext3_journal_dirty_metadata(handle, |
2969 | EXT3_SB(sb)->s_sbh); | 2980 | EXT3_SB(sb)->s_sbh); |
2981 | /* get our lock and start over */ | ||
2982 | goto again; | ||
2970 | } | 2983 | } |
2971 | } | 2984 | } |
2972 | } | 2985 | } |
@@ -2989,6 +3002,7 @@ static int ext3_do_update_inode(handle_t *handle, | |||
2989 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); | 3002 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); |
2990 | 3003 | ||
2991 | BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); | 3004 | BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); |
3005 | unlock_buffer(bh); | ||
2992 | rc = ext3_journal_dirty_metadata(handle, bh); | 3006 | rc = ext3_journal_dirty_metadata(handle, bh); |
2993 | if (!err) | 3007 | if (!err) |
2994 | err = rc; | 3008 | err = rc; |
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 6ff7b9730234..aad6400c9b77 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -2445,7 +2445,7 @@ const struct inode_operations ext3_dir_inode_operations = { | |||
2445 | .listxattr = ext3_listxattr, | 2445 | .listxattr = ext3_listxattr, |
2446 | .removexattr = generic_removexattr, | 2446 | .removexattr = generic_removexattr, |
2447 | #endif | 2447 | #endif |
2448 | .permission = ext3_permission, | 2448 | .check_acl = ext3_check_acl, |
2449 | }; | 2449 | }; |
2450 | 2450 | ||
2451 | const struct inode_operations ext3_special_inode_operations = { | 2451 | const struct inode_operations ext3_special_inode_operations = { |
@@ -2456,5 +2456,5 @@ const struct inode_operations ext3_special_inode_operations = { | |||
2456 | .listxattr = ext3_listxattr, | 2456 | .listxattr = ext3_listxattr, |
2457 | .removexattr = generic_removexattr, | 2457 | .removexattr = generic_removexattr, |
2458 | #endif | 2458 | #endif |
2459 | .permission = ext3_permission, | 2459 | .check_acl = ext3_check_acl, |
2460 | }; | 2460 | }; |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 524b349c6299..a8d80a7f1105 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -543,6 +543,19 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl | |||
543 | #endif | 543 | #endif |
544 | } | 544 | } |
545 | 545 | ||
546 | static char *data_mode_string(unsigned long mode) | ||
547 | { | ||
548 | switch (mode) { | ||
549 | case EXT3_MOUNT_JOURNAL_DATA: | ||
550 | return "journal"; | ||
551 | case EXT3_MOUNT_ORDERED_DATA: | ||
552 | return "ordered"; | ||
553 | case EXT3_MOUNT_WRITEBACK_DATA: | ||
554 | return "writeback"; | ||
555 | } | ||
556 | return "unknown"; | ||
557 | } | ||
558 | |||
546 | /* | 559 | /* |
547 | * Show an option if | 560 | * Show an option if |
548 | * - it's set to a non-default value OR | 561 | * - it's set to a non-default value OR |
@@ -616,13 +629,8 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
616 | if (test_opt(sb, NOBH)) | 629 | if (test_opt(sb, NOBH)) |
617 | seq_puts(seq, ",nobh"); | 630 | seq_puts(seq, ",nobh"); |
618 | 631 | ||
619 | if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) | 632 | seq_printf(seq, ",data=%s", data_mode_string(sbi->s_mount_opt & |
620 | seq_puts(seq, ",data=journal"); | 633 | EXT3_MOUNT_DATA_FLAGS)); |
621 | else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA) | ||
622 | seq_puts(seq, ",data=ordered"); | ||
623 | else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) | ||
624 | seq_puts(seq, ",data=writeback"); | ||
625 | |||
626 | if (test_opt(sb, DATA_ERR_ABORT)) | 634 | if (test_opt(sb, DATA_ERR_ABORT)) |
627 | seq_puts(seq, ",data_err=abort"); | 635 | seq_puts(seq, ",data_err=abort"); |
628 | 636 | ||
@@ -1024,12 +1032,18 @@ static int parse_options (char *options, struct super_block *sb, | |||
1024 | datacheck: | 1032 | datacheck: |
1025 | if (is_remount) { | 1033 | if (is_remount) { |
1026 | if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) | 1034 | if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) |
1027 | != data_opt) { | 1035 | == data_opt) |
1028 | printk(KERN_ERR | 1036 | break; |
1029 | "EXT3-fs: cannot change data " | 1037 | printk(KERN_ERR |
1030 | "mode on remount\n"); | 1038 | "EXT3-fs (device %s): Cannot change " |
1031 | return 0; | 1039 | "data mode on remount. The filesystem " |
1032 | } | 1040 | "is mounted in data=%s mode and you " |
1041 | "try to remount it in data=%s mode.\n", | ||
1042 | sb->s_id, | ||
1043 | data_mode_string(sbi->s_mount_opt & | ||
1044 | EXT3_MOUNT_DATA_FLAGS), | ||
1045 | data_mode_string(data_opt)); | ||
1046 | return 0; | ||
1033 | } else { | 1047 | } else { |
1034 | sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS; | 1048 | sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS; |
1035 | sbi->s_mount_opt |= data_opt; | 1049 | sbi->s_mount_opt |= data_opt; |
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 418b6f3b0ae8..d5c0ea2e8f2d 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig | |||
@@ -37,7 +37,7 @@ config EXT4DEV_COMPAT | |||
37 | 37 | ||
38 | To enable backwards compatibility so that systems that are | 38 | To enable backwards compatibility so that systems that are |
39 | still expecting to mount ext4 filesystems using ext4dev, | 39 | still expecting to mount ext4 filesystems using ext4dev, |
40 | chose Y here. This feature will go away by 2.6.31, so | 40 | choose Y here. This feature will go away by 2.6.31, so |
41 | please arrange to get your userspace programs fixed! | 41 | please arrange to get your userspace programs fixed! |
42 | 42 | ||
43 | config EXT4_FS_XATTR | 43 | config EXT4_FS_XATTR |
@@ -77,3 +77,12 @@ config EXT4_FS_SECURITY | |||
77 | 77 | ||
78 | If you are not using a security module that requires using | 78 | If you are not using a security module that requires using |
79 | extended attributes for file security labels, say N. | 79 | extended attributes for file security labels, say N. |
80 | |||
81 | config EXT4_DEBUG | ||
82 | bool "EXT4 debugging support" | ||
83 | depends on EXT4_FS | ||
84 | help | ||
85 | Enables run-time debugging support for the ext4 filesystem. | ||
86 | |||
87 | If you select Y here, then you will be able to turn on debugging | ||
88 | with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug" | ||
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index f6d8967149ca..0df88b2a69b0 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -236,7 +236,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, | |||
236 | return error; | 236 | return error; |
237 | } | 237 | } |
238 | 238 | ||
239 | static int | 239 | int |
240 | ext4_check_acl(struct inode *inode, int mask) | 240 | ext4_check_acl(struct inode *inode, int mask) |
241 | { | 241 | { |
242 | struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); | 242 | struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); |
@@ -252,12 +252,6 @@ ext4_check_acl(struct inode *inode, int mask) | |||
252 | return -EAGAIN; | 252 | return -EAGAIN; |
253 | } | 253 | } |
254 | 254 | ||
255 | int | ||
256 | ext4_permission(struct inode *inode, int mask) | ||
257 | { | ||
258 | return generic_permission(inode, mask, ext4_check_acl); | ||
259 | } | ||
260 | |||
261 | /* | 255 | /* |
262 | * Initialize the ACLs of a new inode. Called from ext4_new_inode. | 256 | * Initialize the ACLs of a new inode. Called from ext4_new_inode. |
263 | * | 257 | * |
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 949789d2bba6..9d843d5deac4 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h | |||
@@ -54,13 +54,13 @@ static inline int ext4_acl_count(size_t size) | |||
54 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 54 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
55 | 55 | ||
56 | /* acl.c */ | 56 | /* acl.c */ |
57 | extern int ext4_permission(struct inode *, int); | 57 | extern int ext4_check_acl(struct inode *, int); |
58 | extern int ext4_acl_chmod(struct inode *); | 58 | extern int ext4_acl_chmod(struct inode *); |
59 | extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); | 59 | extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); |
60 | 60 | ||
61 | #else /* CONFIG_EXT4_FS_POSIX_ACL */ | 61 | #else /* CONFIG_EXT4_FS_POSIX_ACL */ |
62 | #include <linux/sched.h> | 62 | #include <linux/sched.h> |
63 | #define ext4_permission NULL | 63 | #define ext4_check_acl NULL |
64 | 64 | ||
65 | static inline int | 65 | static inline int |
66 | ext4_acl_chmod(struct inode *inode) | 66 | ext4_acl_chmod(struct inode *inode) |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e2126d70dff5..1d0418980f8d 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -478,7 +478,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
478 | * new bitmap information | 478 | * new bitmap information |
479 | */ | 479 | */ |
480 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); | 480 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); |
481 | ext4_mb_update_group_info(grp, blocks_freed); | 481 | grp->bb_free += blocks_freed; |
482 | up_write(&grp->alloc_sem); | 482 | up_write(&grp->alloc_sem); |
483 | 483 | ||
484 | /* We dirtied the bitmap block */ | 484 | /* We dirtied the bitmap block */ |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9714db393efe..e227eea23f05 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -67,27 +67,29 @@ typedef unsigned int ext4_group_t; | |||
67 | 67 | ||
68 | 68 | ||
69 | /* prefer goal again. length */ | 69 | /* prefer goal again. length */ |
70 | #define EXT4_MB_HINT_MERGE 1 | 70 | #define EXT4_MB_HINT_MERGE 0x0001 |
71 | /* blocks already reserved */ | 71 | /* blocks already reserved */ |
72 | #define EXT4_MB_HINT_RESERVED 2 | 72 | #define EXT4_MB_HINT_RESERVED 0x0002 |
73 | /* metadata is being allocated */ | 73 | /* metadata is being allocated */ |
74 | #define EXT4_MB_HINT_METADATA 4 | 74 | #define EXT4_MB_HINT_METADATA 0x0004 |
75 | /* first blocks in the file */ | 75 | /* first blocks in the file */ |
76 | #define EXT4_MB_HINT_FIRST 8 | 76 | #define EXT4_MB_HINT_FIRST 0x0008 |
77 | /* search for the best chunk */ | 77 | /* search for the best chunk */ |
78 | #define EXT4_MB_HINT_BEST 16 | 78 | #define EXT4_MB_HINT_BEST 0x0010 |
79 | /* data is being allocated */ | 79 | /* data is being allocated */ |
80 | #define EXT4_MB_HINT_DATA 32 | 80 | #define EXT4_MB_HINT_DATA 0x0020 |
81 | /* don't preallocate (for tails) */ | 81 | /* don't preallocate (for tails) */ |
82 | #define EXT4_MB_HINT_NOPREALLOC 64 | 82 | #define EXT4_MB_HINT_NOPREALLOC 0x0040 |
83 | /* allocate for locality group */ | 83 | /* allocate for locality group */ |
84 | #define EXT4_MB_HINT_GROUP_ALLOC 128 | 84 | #define EXT4_MB_HINT_GROUP_ALLOC 0x0080 |
85 | /* allocate goal blocks or none */ | 85 | /* allocate goal blocks or none */ |
86 | #define EXT4_MB_HINT_GOAL_ONLY 256 | 86 | #define EXT4_MB_HINT_GOAL_ONLY 0x0100 |
87 | /* goal is meaningful */ | 87 | /* goal is meaningful */ |
88 | #define EXT4_MB_HINT_TRY_GOAL 512 | 88 | #define EXT4_MB_HINT_TRY_GOAL 0x0200 |
89 | /* blocks already pre-reserved by delayed allocation */ | 89 | /* blocks already pre-reserved by delayed allocation */ |
90 | #define EXT4_MB_DELALLOC_RESERVED 1024 | 90 | #define EXT4_MB_DELALLOC_RESERVED 0x0400 |
91 | /* We are doing stream allocation */ | ||
92 | #define EXT4_MB_STREAM_ALLOC 0x0800 | ||
91 | 93 | ||
92 | 94 | ||
93 | struct ext4_allocation_request { | 95 | struct ext4_allocation_request { |
@@ -112,6 +114,21 @@ struct ext4_allocation_request { | |||
112 | }; | 114 | }; |
113 | 115 | ||
114 | /* | 116 | /* |
117 | * For delayed allocation tracking | ||
118 | */ | ||
119 | struct mpage_da_data { | ||
120 | struct inode *inode; | ||
121 | sector_t b_blocknr; /* start block number of extent */ | ||
122 | size_t b_size; /* size of extent */ | ||
123 | unsigned long b_state; /* state of the extent */ | ||
124 | unsigned long first_page, next_page; /* extent of pages */ | ||
125 | struct writeback_control *wbc; | ||
126 | int io_done; | ||
127 | int pages_written; | ||
128 | int retval; | ||
129 | }; | ||
130 | |||
131 | /* | ||
115 | * Special inodes numbers | 132 | * Special inodes numbers |
116 | */ | 133 | */ |
117 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ | 134 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ |
@@ -251,7 +268,6 @@ struct flex_groups { | |||
251 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ | 268 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ |
252 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ | 269 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ |
253 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ | 270 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ |
254 | #define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */ | ||
255 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ | 271 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ |
256 | 272 | ||
257 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ | 273 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ |
@@ -289,6 +305,7 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) | |||
289 | #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ | 305 | #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ |
290 | #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ | 306 | #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ |
291 | #define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ | 307 | #define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ |
308 | #define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */ | ||
292 | 309 | ||
293 | /* Used to pass group descriptor data when online resize is done */ | 310 | /* Used to pass group descriptor data when online resize is done */ |
294 | struct ext4_new_group_input { | 311 | struct ext4_new_group_input { |
@@ -386,6 +403,9 @@ struct ext4_mount_options { | |||
386 | #endif | 403 | #endif |
387 | }; | 404 | }; |
388 | 405 | ||
406 | /* Max physical block we can addres w/o extents */ | ||
407 | #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF | ||
408 | |||
389 | /* | 409 | /* |
390 | * Structure of an inode on the disk | 410 | * Structure of an inode on the disk |
391 | */ | 411 | */ |
@@ -456,7 +476,6 @@ struct move_extent { | |||
456 | __u64 len; /* block length to be moved */ | 476 | __u64 len; /* block length to be moved */ |
457 | __u64 moved_len; /* moved block length */ | 477 | __u64 moved_len; /* moved block length */ |
458 | }; | 478 | }; |
459 | #define MAX_DEFRAG_SIZE ((1UL<<31) - 1) | ||
460 | 479 | ||
461 | #define EXT4_EPOCH_BITS 2 | 480 | #define EXT4_EPOCH_BITS 2 |
462 | #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) | 481 | #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) |
@@ -694,7 +713,6 @@ struct ext4_inode_info { | |||
694 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ | 713 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ |
695 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ | 714 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ |
696 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ | 715 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ |
697 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | ||
698 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 716 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
699 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 717 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
700 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 718 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
@@ -841,6 +859,7 @@ struct ext4_sb_info { | |||
841 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ | 859 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ |
842 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ | 860 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ |
843 | ext4_group_t s_groups_count; /* Number of groups in the fs */ | 861 | ext4_group_t s_groups_count; /* Number of groups in the fs */ |
862 | ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ | ||
844 | unsigned long s_overhead_last; /* Last calculated overhead */ | 863 | unsigned long s_overhead_last; /* Last calculated overhead */ |
845 | unsigned long s_blocks_last; /* Last seen block count */ | 864 | unsigned long s_blocks_last; /* Last seen block count */ |
846 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | 865 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ |
@@ -950,6 +969,7 @@ struct ext4_sb_info { | |||
950 | atomic_t s_mb_lost_chunks; | 969 | atomic_t s_mb_lost_chunks; |
951 | atomic_t s_mb_preallocated; | 970 | atomic_t s_mb_preallocated; |
952 | atomic_t s_mb_discarded; | 971 | atomic_t s_mb_discarded; |
972 | atomic_t s_lock_busy; | ||
953 | 973 | ||
954 | /* locality groups */ | 974 | /* locality groups */ |
955 | struct ext4_locality_group *s_locality_groups; | 975 | struct ext4_locality_group *s_locality_groups; |
@@ -1340,8 +1360,6 @@ extern void ext4_mb_free_blocks(handle_t *, struct inode *, | |||
1340 | ext4_fsblk_t, unsigned long, int, unsigned long *); | 1360 | ext4_fsblk_t, unsigned long, int, unsigned long *); |
1341 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1361 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
1342 | ext4_group_t i, struct ext4_group_desc *desc); | 1362 | ext4_group_t i, struct ext4_group_desc *desc); |
1343 | extern void ext4_mb_update_group_info(struct ext4_group_info *grp, | ||
1344 | ext4_grpblk_t add); | ||
1345 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); | 1363 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); |
1346 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, | 1364 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, |
1347 | ext4_group_t, int); | 1365 | ext4_group_t, int); |
@@ -1367,6 +1385,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); | |||
1367 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1385 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
1368 | extern int ext4_can_truncate(struct inode *inode); | 1386 | extern int ext4_can_truncate(struct inode *inode); |
1369 | extern void ext4_truncate(struct inode *); | 1387 | extern void ext4_truncate(struct inode *); |
1388 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); | ||
1370 | extern void ext4_set_inode_flags(struct inode *); | 1389 | extern void ext4_set_inode_flags(struct inode *); |
1371 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1390 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
1372 | extern int ext4_alloc_da_blocks(struct inode *inode); | 1391 | extern int ext4_alloc_da_blocks(struct inode *inode); |
@@ -1575,15 +1594,18 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | |||
1575 | struct ext4_group_info { | 1594 | struct ext4_group_info { |
1576 | unsigned long bb_state; | 1595 | unsigned long bb_state; |
1577 | struct rb_root bb_free_root; | 1596 | struct rb_root bb_free_root; |
1578 | unsigned short bb_first_free; | 1597 | ext4_grpblk_t bb_first_free; /* first free block */ |
1579 | unsigned short bb_free; | 1598 | ext4_grpblk_t bb_free; /* total free blocks */ |
1580 | unsigned short bb_fragments; | 1599 | ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ |
1581 | struct list_head bb_prealloc_list; | 1600 | struct list_head bb_prealloc_list; |
1582 | #ifdef DOUBLE_CHECK | 1601 | #ifdef DOUBLE_CHECK |
1583 | void *bb_bitmap; | 1602 | void *bb_bitmap; |
1584 | #endif | 1603 | #endif |
1585 | struct rw_semaphore alloc_sem; | 1604 | struct rw_semaphore alloc_sem; |
1586 | unsigned short bb_counters[]; | 1605 | ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block |
1606 | * regions, index is order. | ||
1607 | * bb_counters[3] = 5 means | ||
1608 | * 5 free 8-block regions. */ | ||
1587 | }; | 1609 | }; |
1588 | 1610 | ||
1589 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | 1611 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 |
@@ -1591,15 +1613,42 @@ struct ext4_group_info { | |||
1591 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | 1613 | #define EXT4_MB_GRP_NEED_INIT(grp) \ |
1592 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | 1614 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) |
1593 | 1615 | ||
1616 | #define EXT4_MAX_CONTENTION 8 | ||
1617 | #define EXT4_CONTENTION_THRESHOLD 2 | ||
1618 | |||
1594 | static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, | 1619 | static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, |
1595 | ext4_group_t group) | 1620 | ext4_group_t group) |
1596 | { | 1621 | { |
1597 | return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); | 1622 | return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); |
1598 | } | 1623 | } |
1599 | 1624 | ||
1625 | /* | ||
1626 | * Returns true if the filesystem is busy enough that attempts to | ||
1627 | * access the block group locks has run into contention. | ||
1628 | */ | ||
1629 | static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi) | ||
1630 | { | ||
1631 | return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD); | ||
1632 | } | ||
1633 | |||
1600 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | 1634 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
1601 | { | 1635 | { |
1602 | spin_lock(ext4_group_lock_ptr(sb, group)); | 1636 | spinlock_t *lock = ext4_group_lock_ptr(sb, group); |
1637 | if (spin_trylock(lock)) | ||
1638 | /* | ||
1639 | * We're able to grab the lock right away, so drop the | ||
1640 | * lock contention counter. | ||
1641 | */ | ||
1642 | atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0); | ||
1643 | else { | ||
1644 | /* | ||
1645 | * The lock is busy, so bump the contention counter, | ||
1646 | * and then wait on the spin lock. | ||
1647 | */ | ||
1648 | atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1, | ||
1649 | EXT4_MAX_CONTENTION); | ||
1650 | spin_lock(lock); | ||
1651 | } | ||
1603 | } | 1652 | } |
1604 | 1653 | ||
1605 | static inline void ext4_unlock_group(struct super_block *sb, | 1654 | static inline void ext4_unlock_group(struct super_block *sb, |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 20a84105a10b..61652f1d15e6 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -43,8 +43,7 @@ | |||
43 | #define CHECK_BINSEARCH__ | 43 | #define CHECK_BINSEARCH__ |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * If EXT_DEBUG is defined you can use the 'extdebug' mount option | 46 | * Turn on EXT_DEBUG to get lots of info about extents operations. |
47 | * to get lots of info about what's going on. | ||
48 | */ | 47 | */ |
49 | #define EXT_DEBUG__ | 48 | #define EXT_DEBUG__ |
50 | #ifdef EXT_DEBUG | 49 | #ifdef EXT_DEBUG |
@@ -138,6 +137,7 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | |||
138 | #define EXT_BREAK 1 | 137 | #define EXT_BREAK 1 |
139 | #define EXT_REPEAT 2 | 138 | #define EXT_REPEAT 2 |
140 | 139 | ||
140 | /* Maximum logical block in a file; ext4_extent's ee_block is __le32 */ | ||
141 | #define EXT_MAX_BLOCK 0xffffffff | 141 | #define EXT_MAX_BLOCK 0xffffffff |
142 | 142 | ||
143 | /* | 143 | /* |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index eb27fd0f2ee8..6a9409920dee 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -44,7 +44,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle, | |||
44 | handle, err); | 44 | handle, err); |
45 | } | 45 | } |
46 | else | 46 | else |
47 | brelse(bh); | 47 | bforget(bh); |
48 | return err; | 48 | return err; |
49 | } | 49 | } |
50 | 50 | ||
@@ -60,7 +60,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle, | |||
60 | handle, err); | 60 | handle, err); |
61 | } | 61 | } |
62 | else | 62 | else |
63 | brelse(bh); | 63 | bforget(bh); |
64 | return err; | 64 | return err; |
65 | } | 65 | } |
66 | 66 | ||
@@ -89,7 +89,10 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, | |||
89 | ext4_journal_abort_handle(where, __func__, bh, | 89 | ext4_journal_abort_handle(where, __func__, bh, |
90 | handle, err); | 90 | handle, err); |
91 | } else { | 91 | } else { |
92 | mark_buffer_dirty(bh); | 92 | if (inode && bh) |
93 | mark_buffer_dirty_inode(bh, inode); | ||
94 | else | ||
95 | mark_buffer_dirty(bh); | ||
93 | if (inode && inode_needs_sync(inode)) { | 96 | if (inode && inode_needs_sync(inode)) { |
94 | sync_dirty_buffer(bh); | 97 | sync_dirty_buffer(bh); |
95 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | 98 | if (buffer_req(bh) && !buffer_uptodate(bh)) { |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 73ebfb44ad75..7a3832577923 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -93,7 +93,9 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) | |||
93 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | 93 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); |
94 | } | 94 | } |
95 | 95 | ||
96 | static int ext4_ext_journal_restart(handle_t *handle, int needed) | 96 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
97 | struct inode *inode, | ||
98 | int needed) | ||
97 | { | 99 | { |
98 | int err; | 100 | int err; |
99 | 101 | ||
@@ -104,7 +106,14 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed) | |||
104 | err = ext4_journal_extend(handle, needed); | 106 | err = ext4_journal_extend(handle, needed); |
105 | if (err <= 0) | 107 | if (err <= 0) |
106 | return err; | 108 | return err; |
107 | return ext4_journal_restart(handle, needed); | 109 | err = ext4_truncate_restart_trans(handle, inode, needed); |
110 | /* | ||
111 | * We have dropped i_data_sem so someone might have cached again | ||
112 | * an extent we are going to truncate. | ||
113 | */ | ||
114 | ext4_ext_invalidate_cache(inode); | ||
115 | |||
116 | return err; | ||
108 | } | 117 | } |
109 | 118 | ||
110 | /* | 119 | /* |
@@ -220,57 +229,65 @@ ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, | |||
220 | return newblock; | 229 | return newblock; |
221 | } | 230 | } |
222 | 231 | ||
223 | static int ext4_ext_space_block(struct inode *inode) | 232 | static inline int ext4_ext_space_block(struct inode *inode, int check) |
224 | { | 233 | { |
225 | int size; | 234 | int size; |
226 | 235 | ||
227 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 236 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
228 | / sizeof(struct ext4_extent); | 237 | / sizeof(struct ext4_extent); |
238 | if (!check) { | ||
229 | #ifdef AGGRESSIVE_TEST | 239 | #ifdef AGGRESSIVE_TEST |
230 | if (size > 6) | 240 | if (size > 6) |
231 | size = 6; | 241 | size = 6; |
232 | #endif | 242 | #endif |
243 | } | ||
233 | return size; | 244 | return size; |
234 | } | 245 | } |
235 | 246 | ||
236 | static int ext4_ext_space_block_idx(struct inode *inode) | 247 | static inline int ext4_ext_space_block_idx(struct inode *inode, int check) |
237 | { | 248 | { |
238 | int size; | 249 | int size; |
239 | 250 | ||
240 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 251 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
241 | / sizeof(struct ext4_extent_idx); | 252 | / sizeof(struct ext4_extent_idx); |
253 | if (!check) { | ||
242 | #ifdef AGGRESSIVE_TEST | 254 | #ifdef AGGRESSIVE_TEST |
243 | if (size > 5) | 255 | if (size > 5) |
244 | size = 5; | 256 | size = 5; |
245 | #endif | 257 | #endif |
258 | } | ||
246 | return size; | 259 | return size; |
247 | } | 260 | } |
248 | 261 | ||
249 | static int ext4_ext_space_root(struct inode *inode) | 262 | static inline int ext4_ext_space_root(struct inode *inode, int check) |
250 | { | 263 | { |
251 | int size; | 264 | int size; |
252 | 265 | ||
253 | size = sizeof(EXT4_I(inode)->i_data); | 266 | size = sizeof(EXT4_I(inode)->i_data); |
254 | size -= sizeof(struct ext4_extent_header); | 267 | size -= sizeof(struct ext4_extent_header); |
255 | size /= sizeof(struct ext4_extent); | 268 | size /= sizeof(struct ext4_extent); |
269 | if (!check) { | ||
256 | #ifdef AGGRESSIVE_TEST | 270 | #ifdef AGGRESSIVE_TEST |
257 | if (size > 3) | 271 | if (size > 3) |
258 | size = 3; | 272 | size = 3; |
259 | #endif | 273 | #endif |
274 | } | ||
260 | return size; | 275 | return size; |
261 | } | 276 | } |
262 | 277 | ||
263 | static int ext4_ext_space_root_idx(struct inode *inode) | 278 | static inline int ext4_ext_space_root_idx(struct inode *inode, int check) |
264 | { | 279 | { |
265 | int size; | 280 | int size; |
266 | 281 | ||
267 | size = sizeof(EXT4_I(inode)->i_data); | 282 | size = sizeof(EXT4_I(inode)->i_data); |
268 | size -= sizeof(struct ext4_extent_header); | 283 | size -= sizeof(struct ext4_extent_header); |
269 | size /= sizeof(struct ext4_extent_idx); | 284 | size /= sizeof(struct ext4_extent_idx); |
285 | if (!check) { | ||
270 | #ifdef AGGRESSIVE_TEST | 286 | #ifdef AGGRESSIVE_TEST |
271 | if (size > 4) | 287 | if (size > 4) |
272 | size = 4; | 288 | size = 4; |
273 | #endif | 289 | #endif |
290 | } | ||
274 | return size; | 291 | return size; |
275 | } | 292 | } |
276 | 293 | ||
@@ -284,9 +301,9 @@ int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) | |||
284 | int lcap, icap, rcap, leafs, idxs, num; | 301 | int lcap, icap, rcap, leafs, idxs, num; |
285 | int newextents = blocks; | 302 | int newextents = blocks; |
286 | 303 | ||
287 | rcap = ext4_ext_space_root_idx(inode); | 304 | rcap = ext4_ext_space_root_idx(inode, 0); |
288 | lcap = ext4_ext_space_block(inode); | 305 | lcap = ext4_ext_space_block(inode, 0); |
289 | icap = ext4_ext_space_block_idx(inode); | 306 | icap = ext4_ext_space_block_idx(inode, 0); |
290 | 307 | ||
291 | /* number of new leaf blocks needed */ | 308 | /* number of new leaf blocks needed */ |
292 | num = leafs = (newextents + lcap - 1) / lcap; | 309 | num = leafs = (newextents + lcap - 1) / lcap; |
@@ -311,14 +328,14 @@ ext4_ext_max_entries(struct inode *inode, int depth) | |||
311 | 328 | ||
312 | if (depth == ext_depth(inode)) { | 329 | if (depth == ext_depth(inode)) { |
313 | if (depth == 0) | 330 | if (depth == 0) |
314 | max = ext4_ext_space_root(inode); | 331 | max = ext4_ext_space_root(inode, 1); |
315 | else | 332 | else |
316 | max = ext4_ext_space_root_idx(inode); | 333 | max = ext4_ext_space_root_idx(inode, 1); |
317 | } else { | 334 | } else { |
318 | if (depth == 0) | 335 | if (depth == 0) |
319 | max = ext4_ext_space_block(inode); | 336 | max = ext4_ext_space_block(inode, 1); |
320 | else | 337 | else |
321 | max = ext4_ext_space_block_idx(inode); | 338 | max = ext4_ext_space_block_idx(inode, 1); |
322 | } | 339 | } |
323 | 340 | ||
324 | return max; | 341 | return max; |
@@ -437,8 +454,9 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) | |||
437 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), | 454 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), |
438 | idx_pblock(path->p_idx)); | 455 | idx_pblock(path->p_idx)); |
439 | } else if (path->p_ext) { | 456 | } else if (path->p_ext) { |
440 | ext_debug(" %d:%d:%llu ", | 457 | ext_debug(" %d:[%d]%d:%llu ", |
441 | le32_to_cpu(path->p_ext->ee_block), | 458 | le32_to_cpu(path->p_ext->ee_block), |
459 | ext4_ext_is_uninitialized(path->p_ext), | ||
442 | ext4_ext_get_actual_len(path->p_ext), | 460 | ext4_ext_get_actual_len(path->p_ext), |
443 | ext_pblock(path->p_ext)); | 461 | ext_pblock(path->p_ext)); |
444 | } else | 462 | } else |
@@ -460,8 +478,11 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
460 | eh = path[depth].p_hdr; | 478 | eh = path[depth].p_hdr; |
461 | ex = EXT_FIRST_EXTENT(eh); | 479 | ex = EXT_FIRST_EXTENT(eh); |
462 | 480 | ||
481 | ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); | ||
482 | |||
463 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { | 483 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { |
464 | ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block), | 484 | ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), |
485 | ext4_ext_is_uninitialized(ex), | ||
465 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 486 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); |
466 | } | 487 | } |
467 | ext_debug("\n"); | 488 | ext_debug("\n"); |
@@ -580,9 +601,10 @@ ext4_ext_binsearch(struct inode *inode, | |||
580 | } | 601 | } |
581 | 602 | ||
582 | path->p_ext = l - 1; | 603 | path->p_ext = l - 1; |
583 | ext_debug(" -> %d:%llu:%d ", | 604 | ext_debug(" -> %d:%llu:[%d]%d ", |
584 | le32_to_cpu(path->p_ext->ee_block), | 605 | le32_to_cpu(path->p_ext->ee_block), |
585 | ext_pblock(path->p_ext), | 606 | ext_pblock(path->p_ext), |
607 | ext4_ext_is_uninitialized(path->p_ext), | ||
586 | ext4_ext_get_actual_len(path->p_ext)); | 608 | ext4_ext_get_actual_len(path->p_ext)); |
587 | 609 | ||
588 | #ifdef CHECK_BINSEARCH | 610 | #ifdef CHECK_BINSEARCH |
@@ -612,7 +634,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) | |||
612 | eh->eh_depth = 0; | 634 | eh->eh_depth = 0; |
613 | eh->eh_entries = 0; | 635 | eh->eh_entries = 0; |
614 | eh->eh_magic = EXT4_EXT_MAGIC; | 636 | eh->eh_magic = EXT4_EXT_MAGIC; |
615 | eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode)); | 637 | eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); |
616 | ext4_mark_inode_dirty(handle, inode); | 638 | ext4_mark_inode_dirty(handle, inode); |
617 | ext4_ext_invalidate_cache(inode); | 639 | ext4_ext_invalidate_cache(inode); |
618 | return 0; | 640 | return 0; |
@@ -837,7 +859,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
837 | 859 | ||
838 | neh = ext_block_hdr(bh); | 860 | neh = ext_block_hdr(bh); |
839 | neh->eh_entries = 0; | 861 | neh->eh_entries = 0; |
840 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); | 862 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); |
841 | neh->eh_magic = EXT4_EXT_MAGIC; | 863 | neh->eh_magic = EXT4_EXT_MAGIC; |
842 | neh->eh_depth = 0; | 864 | neh->eh_depth = 0; |
843 | ex = EXT_FIRST_EXTENT(neh); | 865 | ex = EXT_FIRST_EXTENT(neh); |
@@ -850,9 +872,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
850 | path[depth].p_ext++; | 872 | path[depth].p_ext++; |
851 | while (path[depth].p_ext <= | 873 | while (path[depth].p_ext <= |
852 | EXT_MAX_EXTENT(path[depth].p_hdr)) { | 874 | EXT_MAX_EXTENT(path[depth].p_hdr)) { |
853 | ext_debug("move %d:%llu:%d in new leaf %llu\n", | 875 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", |
854 | le32_to_cpu(path[depth].p_ext->ee_block), | 876 | le32_to_cpu(path[depth].p_ext->ee_block), |
855 | ext_pblock(path[depth].p_ext), | 877 | ext_pblock(path[depth].p_ext), |
878 | ext4_ext_is_uninitialized(path[depth].p_ext), | ||
856 | ext4_ext_get_actual_len(path[depth].p_ext), | 879 | ext4_ext_get_actual_len(path[depth].p_ext), |
857 | newblock); | 880 | newblock); |
858 | /*memmove(ex++, path[depth].p_ext++, | 881 | /*memmove(ex++, path[depth].p_ext++, |
@@ -912,7 +935,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
912 | neh = ext_block_hdr(bh); | 935 | neh = ext_block_hdr(bh); |
913 | neh->eh_entries = cpu_to_le16(1); | 936 | neh->eh_entries = cpu_to_le16(1); |
914 | neh->eh_magic = EXT4_EXT_MAGIC; | 937 | neh->eh_magic = EXT4_EXT_MAGIC; |
915 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); | 938 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); |
916 | neh->eh_depth = cpu_to_le16(depth - i); | 939 | neh->eh_depth = cpu_to_le16(depth - i); |
917 | fidx = EXT_FIRST_INDEX(neh); | 940 | fidx = EXT_FIRST_INDEX(neh); |
918 | fidx->ei_block = border; | 941 | fidx->ei_block = border; |
@@ -1037,9 +1060,9 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1037 | /* old root could have indexes or leaves | 1060 | /* old root could have indexes or leaves |
1038 | * so calculate e_max right way */ | 1061 | * so calculate e_max right way */ |
1039 | if (ext_depth(inode)) | 1062 | if (ext_depth(inode)) |
1040 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); | 1063 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); |
1041 | else | 1064 | else |
1042 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); | 1065 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); |
1043 | neh->eh_magic = EXT4_EXT_MAGIC; | 1066 | neh->eh_magic = EXT4_EXT_MAGIC; |
1044 | set_buffer_uptodate(bh); | 1067 | set_buffer_uptodate(bh); |
1045 | unlock_buffer(bh); | 1068 | unlock_buffer(bh); |
@@ -1054,7 +1077,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1054 | goto out; | 1077 | goto out; |
1055 | 1078 | ||
1056 | curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; | 1079 | curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; |
1057 | curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode)); | 1080 | curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); |
1058 | curp->p_hdr->eh_entries = cpu_to_le16(1); | 1081 | curp->p_hdr->eh_entries = cpu_to_le16(1); |
1059 | curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); | 1082 | curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); |
1060 | 1083 | ||
@@ -1580,9 +1603,11 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1580 | 1603 | ||
1581 | /* try to insert block into found extent and return */ | 1604 | /* try to insert block into found extent and return */ |
1582 | if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { | 1605 | if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { |
1583 | ext_debug("append %d block to %d:%d (from %llu)\n", | 1606 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", |
1607 | ext4_ext_is_uninitialized(newext), | ||
1584 | ext4_ext_get_actual_len(newext), | 1608 | ext4_ext_get_actual_len(newext), |
1585 | le32_to_cpu(ex->ee_block), | 1609 | le32_to_cpu(ex->ee_block), |
1610 | ext4_ext_is_uninitialized(ex), | ||
1586 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 1611 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); |
1587 | err = ext4_ext_get_access(handle, inode, path + depth); | 1612 | err = ext4_ext_get_access(handle, inode, path + depth); |
1588 | if (err) | 1613 | if (err) |
@@ -1651,9 +1676,10 @@ has_space: | |||
1651 | 1676 | ||
1652 | if (!nearex) { | 1677 | if (!nearex) { |
1653 | /* there is no extent in this leaf, create first one */ | 1678 | /* there is no extent in this leaf, create first one */ |
1654 | ext_debug("first extent in the leaf: %d:%llu:%d\n", | 1679 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", |
1655 | le32_to_cpu(newext->ee_block), | 1680 | le32_to_cpu(newext->ee_block), |
1656 | ext_pblock(newext), | 1681 | ext_pblock(newext), |
1682 | ext4_ext_is_uninitialized(newext), | ||
1657 | ext4_ext_get_actual_len(newext)); | 1683 | ext4_ext_get_actual_len(newext)); |
1658 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); | 1684 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); |
1659 | } else if (le32_to_cpu(newext->ee_block) | 1685 | } else if (le32_to_cpu(newext->ee_block) |
@@ -1663,10 +1689,11 @@ has_space: | |||
1663 | len = EXT_MAX_EXTENT(eh) - nearex; | 1689 | len = EXT_MAX_EXTENT(eh) - nearex; |
1664 | len = (len - 1) * sizeof(struct ext4_extent); | 1690 | len = (len - 1) * sizeof(struct ext4_extent); |
1665 | len = len < 0 ? 0 : len; | 1691 | len = len < 0 ? 0 : len; |
1666 | ext_debug("insert %d:%llu:%d after: nearest 0x%p, " | 1692 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " |
1667 | "move %d from 0x%p to 0x%p\n", | 1693 | "move %d from 0x%p to 0x%p\n", |
1668 | le32_to_cpu(newext->ee_block), | 1694 | le32_to_cpu(newext->ee_block), |
1669 | ext_pblock(newext), | 1695 | ext_pblock(newext), |
1696 | ext4_ext_is_uninitialized(newext), | ||
1670 | ext4_ext_get_actual_len(newext), | 1697 | ext4_ext_get_actual_len(newext), |
1671 | nearex, len, nearex + 1, nearex + 2); | 1698 | nearex, len, nearex + 1, nearex + 2); |
1672 | memmove(nearex + 2, nearex + 1, len); | 1699 | memmove(nearex + 2, nearex + 1, len); |
@@ -1676,10 +1703,11 @@ has_space: | |||
1676 | BUG_ON(newext->ee_block == nearex->ee_block); | 1703 | BUG_ON(newext->ee_block == nearex->ee_block); |
1677 | len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); | 1704 | len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); |
1678 | len = len < 0 ? 0 : len; | 1705 | len = len < 0 ? 0 : len; |
1679 | ext_debug("insert %d:%llu:%d before: nearest 0x%p, " | 1706 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " |
1680 | "move %d from 0x%p to 0x%p\n", | 1707 | "move %d from 0x%p to 0x%p\n", |
1681 | le32_to_cpu(newext->ee_block), | 1708 | le32_to_cpu(newext->ee_block), |
1682 | ext_pblock(newext), | 1709 | ext_pblock(newext), |
1710 | ext4_ext_is_uninitialized(newext), | ||
1683 | ext4_ext_get_actual_len(newext), | 1711 | ext4_ext_get_actual_len(newext), |
1684 | nearex, len, nearex + 1, nearex + 2); | 1712 | nearex, len, nearex + 1, nearex + 2); |
1685 | memmove(nearex + 1, nearex, len); | 1713 | memmove(nearex + 1, nearex, len); |
@@ -2094,7 +2122,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2094 | else | 2122 | else |
2095 | uninitialized = 0; | 2123 | uninitialized = 0; |
2096 | 2124 | ||
2097 | ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); | 2125 | ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, |
2126 | uninitialized, ex_ee_len); | ||
2098 | path[depth].p_ext = ex; | 2127 | path[depth].p_ext = ex; |
2099 | 2128 | ||
2100 | a = ex_ee_block > start ? ex_ee_block : start; | 2129 | a = ex_ee_block > start ? ex_ee_block : start; |
@@ -2138,7 +2167,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2138 | } | 2167 | } |
2139 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | 2168 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); |
2140 | 2169 | ||
2141 | err = ext4_ext_journal_restart(handle, credits); | 2170 | err = ext4_ext_truncate_extend_restart(handle, inode, credits); |
2142 | if (err) | 2171 | if (err) |
2143 | goto out; | 2172 | goto out; |
2144 | 2173 | ||
@@ -2327,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
2327 | if (err == 0) { | 2356 | if (err == 0) { |
2328 | ext_inode_hdr(inode)->eh_depth = 0; | 2357 | ext_inode_hdr(inode)->eh_depth = 0; |
2329 | ext_inode_hdr(inode)->eh_max = | 2358 | ext_inode_hdr(inode)->eh_max = |
2330 | cpu_to_le16(ext4_ext_space_root(inode)); | 2359 | cpu_to_le16(ext4_ext_space_root(inode, 0)); |
2331 | err = ext4_ext_dirty(handle, inode, path); | 2360 | err = ext4_ext_dirty(handle, inode, path); |
2332 | } | 2361 | } |
2333 | } | 2362 | } |
@@ -2743,6 +2772,7 @@ insert: | |||
2743 | } else if (err) | 2772 | } else if (err) |
2744 | goto fix_extent_len; | 2773 | goto fix_extent_len; |
2745 | out: | 2774 | out: |
2775 | ext4_ext_show_leaf(inode, path); | ||
2746 | return err ? err : allocated; | 2776 | return err ? err : allocated; |
2747 | 2777 | ||
2748 | fix_extent_len: | 2778 | fix_extent_len: |
@@ -2786,7 +2816,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2786 | struct ext4_allocation_request ar; | 2816 | struct ext4_allocation_request ar; |
2787 | 2817 | ||
2788 | __clear_bit(BH_New, &bh_result->b_state); | 2818 | __clear_bit(BH_New, &bh_result->b_state); |
2789 | ext_debug("blocks %u/%u requested for inode %u\n", | 2819 | ext_debug("blocks %u/%u requested for inode %lu\n", |
2790 | iblock, max_blocks, inode->i_ino); | 2820 | iblock, max_blocks, inode->i_ino); |
2791 | 2821 | ||
2792 | /* check in cache */ | 2822 | /* check in cache */ |
@@ -2849,7 +2879,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2849 | newblock = iblock - ee_block + ee_start; | 2879 | newblock = iblock - ee_block + ee_start; |
2850 | /* number of remaining blocks in the extent */ | 2880 | /* number of remaining blocks in the extent */ |
2851 | allocated = ee_len - (iblock - ee_block); | 2881 | allocated = ee_len - (iblock - ee_block); |
2852 | ext_debug("%u fit into %lu:%d -> %llu\n", iblock, | 2882 | ext_debug("%u fit into %u:%d -> %llu\n", iblock, |
2853 | ee_block, ee_len, newblock); | 2883 | ee_block, ee_len, newblock); |
2854 | 2884 | ||
2855 | /* Do not put uninitialized extent in the cache */ | 2885 | /* Do not put uninitialized extent in the cache */ |
@@ -2950,7 +2980,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2950 | newblock = ext4_mb_new_blocks(handle, &ar, &err); | 2980 | newblock = ext4_mb_new_blocks(handle, &ar, &err); |
2951 | if (!newblock) | 2981 | if (!newblock) |
2952 | goto out2; | 2982 | goto out2; |
2953 | ext_debug("allocate new block: goal %llu, found %llu/%lu\n", | 2983 | ext_debug("allocate new block: goal %llu, found %llu/%u\n", |
2954 | ar.goal, newblock, allocated); | 2984 | ar.goal, newblock, allocated); |
2955 | 2985 | ||
2956 | /* try to insert new extent into found leaf and return */ | 2986 | /* try to insert new extent into found leaf and return */ |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 3f1873fef1c6..5ca3eca70a1e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -58,10 +58,7 @@ static ssize_t | |||
58 | ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | 58 | ext4_file_write(struct kiocb *iocb, const struct iovec *iov, |
59 | unsigned long nr_segs, loff_t pos) | 59 | unsigned long nr_segs, loff_t pos) |
60 | { | 60 | { |
61 | struct file *file = iocb->ki_filp; | 61 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; |
62 | struct inode *inode = file->f_path.dentry->d_inode; | ||
63 | ssize_t ret; | ||
64 | int err; | ||
65 | 62 | ||
66 | /* | 63 | /* |
67 | * If we have encountered a bitmap-format file, the size limit | 64 | * If we have encountered a bitmap-format file, the size limit |
@@ -81,53 +78,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
81 | } | 78 | } |
82 | } | 79 | } |
83 | 80 | ||
84 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | 81 | return generic_file_aio_write(iocb, iov, nr_segs, pos); |
85 | /* | ||
86 | * Skip flushing if there was an error, or if nothing was written. | ||
87 | */ | ||
88 | if (ret <= 0) | ||
89 | return ret; | ||
90 | |||
91 | /* | ||
92 | * If the inode is IS_SYNC, or is O_SYNC and we are doing data | ||
93 | * journalling then we need to make sure that we force the transaction | ||
94 | * to disk to keep all metadata uptodate synchronously. | ||
95 | */ | ||
96 | if (file->f_flags & O_SYNC) { | ||
97 | /* | ||
98 | * If we are non-data-journaled, then the dirty data has | ||
99 | * already been flushed to backing store by generic_osync_inode, | ||
100 | * and the inode has been flushed too if there have been any | ||
101 | * modifications other than mere timestamp updates. | ||
102 | * | ||
103 | * Open question --- do we care about flushing timestamps too | ||
104 | * if the inode is IS_SYNC? | ||
105 | */ | ||
106 | if (!ext4_should_journal_data(inode)) | ||
107 | return ret; | ||
108 | |||
109 | goto force_commit; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * So we know that there has been no forced data flush. If the inode | ||
114 | * is marked IS_SYNC, we need to force one ourselves. | ||
115 | */ | ||
116 | if (!IS_SYNC(inode)) | ||
117 | return ret; | ||
118 | |||
119 | /* | ||
120 | * Open question #2 --- should we force data to disk here too? If we | ||
121 | * don't, the only impact is that data=writeback filesystems won't | ||
122 | * flush data to disk automatically on IS_SYNC, only metadata (but | ||
123 | * historically, that is what ext2 has done.) | ||
124 | */ | ||
125 | |||
126 | force_commit: | ||
127 | err = ext4_force_commit(inode->i_sb); | ||
128 | if (err) | ||
129 | return err; | ||
130 | return ret; | ||
131 | } | 82 | } |
132 | 83 | ||
133 | static struct vm_operations_struct ext4_file_vm_ops = { | 84 | static struct vm_operations_struct ext4_file_vm_ops = { |
@@ -207,7 +158,7 @@ const struct inode_operations ext4_file_inode_operations = { | |||
207 | .listxattr = ext4_listxattr, | 158 | .listxattr = ext4_listxattr, |
208 | .removexattr = generic_removexattr, | 159 | .removexattr = generic_removexattr, |
209 | #endif | 160 | #endif |
210 | .permission = ext4_permission, | 161 | .check_acl = ext4_check_acl, |
211 | .fallocate = ext4_fallocate, | 162 | .fallocate = ext4_fallocate, |
212 | .fiemap = ext4_fiemap, | 163 | .fiemap = ext4_fiemap, |
213 | }; | 164 | }; |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 83cf6415f599..07475740b512 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -50,7 +50,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
50 | { | 50 | { |
51 | struct inode *inode = dentry->d_inode; | 51 | struct inode *inode = dentry->d_inode; |
52 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 52 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
53 | int ret = 0; | 53 | int err, ret = 0; |
54 | 54 | ||
55 | J_ASSERT(ext4_journal_current_handle() == NULL); | 55 | J_ASSERT(ext4_journal_current_handle() == NULL); |
56 | 56 | ||
@@ -79,6 +79,9 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
79 | goto out; | 79 | goto out; |
80 | } | 80 | } |
81 | 81 | ||
82 | if (!journal) | ||
83 | ret = sync_mapping_buffers(inode->i_mapping); | ||
84 | |||
82 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | 85 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) |
83 | goto out; | 86 | goto out; |
84 | 87 | ||
@@ -91,10 +94,12 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
91 | .sync_mode = WB_SYNC_ALL, | 94 | .sync_mode = WB_SYNC_ALL, |
92 | .nr_to_write = 0, /* sys_fsync did this */ | 95 | .nr_to_write = 0, /* sys_fsync did this */ |
93 | }; | 96 | }; |
94 | ret = sync_inode(inode, &wbc); | 97 | err = sync_inode(inode, &wbc); |
95 | if (journal && (journal->j_flags & JBD2_BARRIER)) | 98 | if (ret == 0) |
96 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 99 | ret = err; |
97 | } | 100 | } |
98 | out: | 101 | out: |
102 | if (journal && (journal->j_flags & JBD2_BARRIER)) | ||
103 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | ||
99 | return ret; | 104 | return ret; |
100 | } | 105 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 29e6dc7299b8..f3624ead4f6c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -1189,7 +1189,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) | |||
1189 | 1189 | ||
1190 | x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); | 1190 | x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); |
1191 | printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", | 1191 | printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", |
1192 | i, ext4_free_inodes_count(sb, gdp), x); | 1192 | (unsigned long) i, ext4_free_inodes_count(sb, gdp), x); |
1193 | bitmap_count += x; | 1193 | bitmap_count += x; |
1194 | } | 1194 | } |
1195 | brelse(bitmap_bh); | 1195 | brelse(bitmap_bh); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f9c642b22efa..4abd683b963d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -192,11 +192,24 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | |||
192 | * so before we call here everything must be consistently dirtied against | 192 | * so before we call here everything must be consistently dirtied against |
193 | * this transaction. | 193 | * this transaction. |
194 | */ | 194 | */ |
195 | static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) | 195 | int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, |
196 | int nblocks) | ||
196 | { | 197 | { |
198 | int ret; | ||
199 | |||
200 | /* | ||
201 | * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this | ||
202 | * moment, get_block can be called only for blocks inside i_size since | ||
203 | * page cache has been already dropped and writes are blocked by | ||
204 | * i_mutex. So we can safely drop the i_data_sem here. | ||
205 | */ | ||
197 | BUG_ON(EXT4_JOURNAL(inode) == NULL); | 206 | BUG_ON(EXT4_JOURNAL(inode) == NULL); |
198 | jbd_debug(2, "restarting handle %p\n", handle); | 207 | jbd_debug(2, "restarting handle %p\n", handle); |
199 | return ext4_journal_restart(handle, blocks_for_truncate(inode)); | 208 | up_write(&EXT4_I(inode)->i_data_sem); |
209 | ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); | ||
210 | down_write(&EXT4_I(inode)->i_data_sem); | ||
211 | |||
212 | return ret; | ||
200 | } | 213 | } |
201 | 214 | ||
202 | /* | 215 | /* |
@@ -341,9 +354,7 @@ static int ext4_block_to_path(struct inode *inode, | |||
341 | int n = 0; | 354 | int n = 0; |
342 | int final = 0; | 355 | int final = 0; |
343 | 356 | ||
344 | if (i_block < 0) { | 357 | if (i_block < direct_blocks) { |
345 | ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0"); | ||
346 | } else if (i_block < direct_blocks) { | ||
347 | offsets[n++] = i_block; | 358 | offsets[n++] = i_block; |
348 | final = direct_blocks; | 359 | final = direct_blocks; |
349 | } else if ((i_block -= direct_blocks) < indirect_blocks) { | 360 | } else if ((i_block -= direct_blocks) < indirect_blocks) { |
@@ -551,15 +562,21 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
551 | * | 562 | * |
552 | * Normally this function find the preferred place for block allocation, | 563 | * Normally this function find the preferred place for block allocation, |
553 | * returns it. | 564 | * returns it. |
565 | * Because this is only used for non-extent files, we limit the block nr | ||
566 | * to 32 bits. | ||
554 | */ | 567 | */ |
555 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 568 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
556 | Indirect *partial) | 569 | Indirect *partial) |
557 | { | 570 | { |
571 | ext4_fsblk_t goal; | ||
572 | |||
558 | /* | 573 | /* |
559 | * XXX need to get goal block from mballoc's data structures | 574 | * XXX need to get goal block from mballoc's data structures |
560 | */ | 575 | */ |
561 | 576 | ||
562 | return ext4_find_near(inode, partial); | 577 | goal = ext4_find_near(inode, partial); |
578 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | ||
579 | return goal; | ||
563 | } | 580 | } |
564 | 581 | ||
565 | /** | 582 | /** |
@@ -640,6 +657,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
640 | if (*err) | 657 | if (*err) |
641 | goto failed_out; | 658 | goto failed_out; |
642 | 659 | ||
660 | BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); | ||
661 | |||
643 | target -= count; | 662 | target -= count; |
644 | /* allocate blocks for indirect blocks */ | 663 | /* allocate blocks for indirect blocks */ |
645 | while (index < indirect_blks && count) { | 664 | while (index < indirect_blks && count) { |
@@ -674,6 +693,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
674 | ar.flags = EXT4_MB_HINT_DATA; | 693 | ar.flags = EXT4_MB_HINT_DATA; |
675 | 694 | ||
676 | current_block = ext4_mb_new_blocks(handle, &ar, err); | 695 | current_block = ext4_mb_new_blocks(handle, &ar, err); |
696 | BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); | ||
677 | 697 | ||
678 | if (*err && (target == blks)) { | 698 | if (*err && (target == blks)) { |
679 | /* | 699 | /* |
@@ -762,8 +782,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
762 | BUFFER_TRACE(bh, "call get_create_access"); | 782 | BUFFER_TRACE(bh, "call get_create_access"); |
763 | err = ext4_journal_get_create_access(handle, bh); | 783 | err = ext4_journal_get_create_access(handle, bh); |
764 | if (err) { | 784 | if (err) { |
785 | /* Don't brelse(bh) here; it's done in | ||
786 | * ext4_journal_forget() below */ | ||
765 | unlock_buffer(bh); | 787 | unlock_buffer(bh); |
766 | brelse(bh); | ||
767 | goto failed; | 788 | goto failed; |
768 | } | 789 | } |
769 | 790 | ||
@@ -1109,16 +1130,15 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1109 | ext4_discard_preallocations(inode); | 1130 | ext4_discard_preallocations(inode); |
1110 | } | 1131 | } |
1111 | 1132 | ||
1112 | static int check_block_validity(struct inode *inode, sector_t logical, | 1133 | static int check_block_validity(struct inode *inode, const char *msg, |
1113 | sector_t phys, int len) | 1134 | sector_t logical, sector_t phys, int len) |
1114 | { | 1135 | { |
1115 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | 1136 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { |
1116 | ext4_error(inode->i_sb, "check_block_validity", | 1137 | ext4_error(inode->i_sb, msg, |
1117 | "inode #%lu logical block %llu mapped to %llu " | 1138 | "inode #%lu logical block %llu mapped to %llu " |
1118 | "(size %d)", inode->i_ino, | 1139 | "(size %d)", inode->i_ino, |
1119 | (unsigned long long) logical, | 1140 | (unsigned long long) logical, |
1120 | (unsigned long long) phys, len); | 1141 | (unsigned long long) phys, len); |
1121 | WARN_ON(1); | ||
1122 | return -EIO; | 1142 | return -EIO; |
1123 | } | 1143 | } |
1124 | return 0; | 1144 | return 0; |
@@ -1170,8 +1190,8 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1170 | up_read((&EXT4_I(inode)->i_data_sem)); | 1190 | up_read((&EXT4_I(inode)->i_data_sem)); |
1171 | 1191 | ||
1172 | if (retval > 0 && buffer_mapped(bh)) { | 1192 | if (retval > 0 && buffer_mapped(bh)) { |
1173 | int ret = check_block_validity(inode, block, | 1193 | int ret = check_block_validity(inode, "file system corruption", |
1174 | bh->b_blocknr, retval); | 1194 | block, bh->b_blocknr, retval); |
1175 | if (ret != 0) | 1195 | if (ret != 0) |
1176 | return ret; | 1196 | return ret; |
1177 | } | 1197 | } |
@@ -1235,8 +1255,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1235 | * i_data's format changing. Force the migrate | 1255 | * i_data's format changing. Force the migrate |
1236 | * to fail by clearing migrate flags | 1256 | * to fail by clearing migrate flags |
1237 | */ | 1257 | */ |
1238 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & | 1258 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; |
1239 | ~EXT4_EXT_MIGRATE; | ||
1240 | } | 1259 | } |
1241 | } | 1260 | } |
1242 | 1261 | ||
@@ -1252,8 +1271,9 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1252 | 1271 | ||
1253 | up_write((&EXT4_I(inode)->i_data_sem)); | 1272 | up_write((&EXT4_I(inode)->i_data_sem)); |
1254 | if (retval > 0 && buffer_mapped(bh)) { | 1273 | if (retval > 0 && buffer_mapped(bh)) { |
1255 | int ret = check_block_validity(inode, block, | 1274 | int ret = check_block_validity(inode, "file system " |
1256 | bh->b_blocknr, retval); | 1275 | "corruption after allocation", |
1276 | block, bh->b_blocknr, retval); | ||
1257 | if (ret != 0) | 1277 | if (ret != 0) |
1258 | return ret; | 1278 | return ret; |
1259 | } | 1279 | } |
@@ -1863,18 +1883,6 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1863 | * Delayed allocation stuff | 1883 | * Delayed allocation stuff |
1864 | */ | 1884 | */ |
1865 | 1885 | ||
1866 | struct mpage_da_data { | ||
1867 | struct inode *inode; | ||
1868 | sector_t b_blocknr; /* start block number of extent */ | ||
1869 | size_t b_size; /* size of extent */ | ||
1870 | unsigned long b_state; /* state of the extent */ | ||
1871 | unsigned long first_page, next_page; /* extent of pages */ | ||
1872 | struct writeback_control *wbc; | ||
1873 | int io_done; | ||
1874 | int pages_written; | ||
1875 | int retval; | ||
1876 | }; | ||
1877 | |||
1878 | /* | 1886 | /* |
1879 | * mpage_da_submit_io - walks through extent of pages and try to write | 1887 | * mpage_da_submit_io - walks through extent of pages and try to write |
1880 | * them with writepage() call back | 1888 | * them with writepage() call back |
@@ -2737,6 +2745,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2737 | long pages_skipped; | 2745 | long pages_skipped; |
2738 | int range_cyclic, cycled = 1, io_done = 0; | 2746 | int range_cyclic, cycled = 1, io_done = 0; |
2739 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2747 | int needed_blocks, ret = 0, nr_to_writebump = 0; |
2748 | loff_t range_start = wbc->range_start; | ||
2740 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2749 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2741 | 2750 | ||
2742 | trace_ext4_da_writepages(inode, wbc); | 2751 | trace_ext4_da_writepages(inode, wbc); |
@@ -2850,6 +2859,7 @@ retry: | |||
2850 | mpd.io_done = 1; | 2859 | mpd.io_done = 1; |
2851 | ret = MPAGE_DA_EXTENT_TAIL; | 2860 | ret = MPAGE_DA_EXTENT_TAIL; |
2852 | } | 2861 | } |
2862 | trace_ext4_da_write_pages(inode, &mpd); | ||
2853 | wbc->nr_to_write -= mpd.pages_written; | 2863 | wbc->nr_to_write -= mpd.pages_written; |
2854 | 2864 | ||
2855 | ext4_journal_stop(handle); | 2865 | ext4_journal_stop(handle); |
@@ -2905,6 +2915,7 @@ out_writepages: | |||
2905 | if (!no_nrwrite_index_update) | 2915 | if (!no_nrwrite_index_update) |
2906 | wbc->no_nrwrite_index_update = 0; | 2916 | wbc->no_nrwrite_index_update = 0; |
2907 | wbc->nr_to_write -= nr_to_writebump; | 2917 | wbc->nr_to_write -= nr_to_writebump; |
2918 | wbc->range_start = range_start; | ||
2908 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 2919 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
2909 | return ret; | 2920 | return ret; |
2910 | } | 2921 | } |
@@ -3117,6 +3128,8 @@ out: | |||
3117 | */ | 3128 | */ |
3118 | int ext4_alloc_da_blocks(struct inode *inode) | 3129 | int ext4_alloc_da_blocks(struct inode *inode) |
3119 | { | 3130 | { |
3131 | trace_ext4_alloc_da_blocks(inode); | ||
3132 | |||
3120 | if (!EXT4_I(inode)->i_reserved_data_blocks && | 3133 | if (!EXT4_I(inode)->i_reserved_data_blocks && |
3121 | !EXT4_I(inode)->i_reserved_meta_blocks) | 3134 | !EXT4_I(inode)->i_reserved_meta_blocks) |
3122 | return 0; | 3135 | return 0; |
@@ -3659,7 +3672,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
3659 | ext4_handle_dirty_metadata(handle, inode, bh); | 3672 | ext4_handle_dirty_metadata(handle, inode, bh); |
3660 | } | 3673 | } |
3661 | ext4_mark_inode_dirty(handle, inode); | 3674 | ext4_mark_inode_dirty(handle, inode); |
3662 | ext4_journal_test_restart(handle, inode); | 3675 | ext4_truncate_restart_trans(handle, inode, |
3676 | blocks_for_truncate(inode)); | ||
3663 | if (bh) { | 3677 | if (bh) { |
3664 | BUFFER_TRACE(bh, "retaking write access"); | 3678 | BUFFER_TRACE(bh, "retaking write access"); |
3665 | ext4_journal_get_write_access(handle, bh); | 3679 | ext4_journal_get_write_access(handle, bh); |
@@ -3870,7 +3884,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
3870 | return; | 3884 | return; |
3871 | if (try_to_extend_transaction(handle, inode)) { | 3885 | if (try_to_extend_transaction(handle, inode)) { |
3872 | ext4_mark_inode_dirty(handle, inode); | 3886 | ext4_mark_inode_dirty(handle, inode); |
3873 | ext4_journal_test_restart(handle, inode); | 3887 | ext4_truncate_restart_trans(handle, inode, |
3888 | blocks_for_truncate(inode)); | ||
3874 | } | 3889 | } |
3875 | 3890 | ||
3876 | ext4_free_blocks(handle, inode, nr, 1, 1); | 3891 | ext4_free_blocks(handle, inode, nr, 1, 1); |
@@ -3958,8 +3973,7 @@ void ext4_truncate(struct inode *inode) | |||
3958 | if (!ext4_can_truncate(inode)) | 3973 | if (!ext4_can_truncate(inode)) |
3959 | return; | 3974 | return; |
3960 | 3975 | ||
3961 | if (ei->i_disksize && inode->i_size == 0 && | 3976 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
3962 | !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | ||
3963 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | 3977 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; |
3964 | 3978 | ||
3965 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 3979 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
@@ -4533,7 +4547,8 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
4533 | */ | 4547 | */ |
4534 | static int ext4_do_update_inode(handle_t *handle, | 4548 | static int ext4_do_update_inode(handle_t *handle, |
4535 | struct inode *inode, | 4549 | struct inode *inode, |
4536 | struct ext4_iloc *iloc) | 4550 | struct ext4_iloc *iloc, |
4551 | int do_sync) | ||
4537 | { | 4552 | { |
4538 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); | 4553 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); |
4539 | struct ext4_inode_info *ei = EXT4_I(inode); | 4554 | struct ext4_inode_info *ei = EXT4_I(inode); |
@@ -4581,8 +4596,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4581 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) | 4596 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) |
4582 | goto out_brelse; | 4597 | goto out_brelse; |
4583 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 4598 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
4584 | /* clear the migrate flag in the raw_inode */ | 4599 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); |
4585 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE); | ||
4586 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 4600 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
4587 | cpu_to_le32(EXT4_OS_HURD)) | 4601 | cpu_to_le32(EXT4_OS_HURD)) |
4588 | raw_inode->i_file_acl_high = | 4602 | raw_inode->i_file_acl_high = |
@@ -4635,10 +4649,22 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4635 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); | 4649 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); |
4636 | } | 4650 | } |
4637 | 4651 | ||
4638 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4652 | /* |
4639 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | 4653 | * If we're not using a journal and we were called from |
4640 | if (!err) | 4654 | * ext4_write_inode() to sync the inode (making do_sync true), |
4641 | err = rc; | 4655 | * we can just use sync_dirty_buffer() directly to do our dirty |
4656 | * work. Testing s_journal here is a bit redundant but it's | ||
4657 | * worth it to avoid potential future trouble. | ||
4658 | */ | ||
4659 | if (EXT4_SB(inode->i_sb)->s_journal == NULL && do_sync) { | ||
4660 | BUFFER_TRACE(bh, "call sync_dirty_buffer"); | ||
4661 | sync_dirty_buffer(bh); | ||
4662 | } else { | ||
4663 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
4664 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | ||
4665 | if (!err) | ||
4666 | err = rc; | ||
4667 | } | ||
4642 | ei->i_state &= ~EXT4_STATE_NEW; | 4668 | ei->i_state &= ~EXT4_STATE_NEW; |
4643 | 4669 | ||
4644 | out_brelse: | 4670 | out_brelse: |
@@ -4684,19 +4710,32 @@ out_brelse: | |||
4684 | */ | 4710 | */ |
4685 | int ext4_write_inode(struct inode *inode, int wait) | 4711 | int ext4_write_inode(struct inode *inode, int wait) |
4686 | { | 4712 | { |
4713 | int err; | ||
4714 | |||
4687 | if (current->flags & PF_MEMALLOC) | 4715 | if (current->flags & PF_MEMALLOC) |
4688 | return 0; | 4716 | return 0; |
4689 | 4717 | ||
4690 | if (ext4_journal_current_handle()) { | 4718 | if (EXT4_SB(inode->i_sb)->s_journal) { |
4691 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); | 4719 | if (ext4_journal_current_handle()) { |
4692 | dump_stack(); | 4720 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); |
4693 | return -EIO; | 4721 | dump_stack(); |
4694 | } | 4722 | return -EIO; |
4723 | } | ||
4695 | 4724 | ||
4696 | if (!wait) | 4725 | if (!wait) |
4697 | return 0; | 4726 | return 0; |
4727 | |||
4728 | err = ext4_force_commit(inode->i_sb); | ||
4729 | } else { | ||
4730 | struct ext4_iloc iloc; | ||
4698 | 4731 | ||
4699 | return ext4_force_commit(inode->i_sb); | 4732 | err = ext4_get_inode_loc(inode, &iloc); |
4733 | if (err) | ||
4734 | return err; | ||
4735 | err = ext4_do_update_inode(EXT4_NOJOURNAL_HANDLE, | ||
4736 | inode, &iloc, wait); | ||
4737 | } | ||
4738 | return err; | ||
4700 | } | 4739 | } |
4701 | 4740 | ||
4702 | /* | 4741 | /* |
@@ -4990,7 +5029,7 @@ int ext4_mark_iloc_dirty(handle_t *handle, | |||
4990 | get_bh(iloc->bh); | 5029 | get_bh(iloc->bh); |
4991 | 5030 | ||
4992 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ | 5031 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ |
4993 | err = ext4_do_update_inode(handle, inode, iloc); | 5032 | err = ext4_do_update_inode(handle, inode, iloc, 0); |
4994 | put_bh(iloc->bh); | 5033 | put_bh(iloc->bh); |
4995 | return err; | 5034 | return err; |
4996 | } | 5035 | } |
@@ -5281,12 +5320,21 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5281 | else | 5320 | else |
5282 | len = PAGE_CACHE_SIZE; | 5321 | len = PAGE_CACHE_SIZE; |
5283 | 5322 | ||
5323 | lock_page(page); | ||
5324 | /* | ||
5325 | * return if we have all the buffers mapped. This avoid | ||
5326 | * the need to call write_begin/write_end which does a | ||
5327 | * journal_start/journal_stop which can block and take | ||
5328 | * long time | ||
5329 | */ | ||
5284 | if (page_has_buffers(page)) { | 5330 | if (page_has_buffers(page)) { |
5285 | /* return if we have all the buffers mapped */ | ||
5286 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 5331 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, |
5287 | ext4_bh_unmapped)) | 5332 | ext4_bh_unmapped)) { |
5333 | unlock_page(page); | ||
5288 | goto out_unlock; | 5334 | goto out_unlock; |
5335 | } | ||
5289 | } | 5336 | } |
5337 | unlock_page(page); | ||
5290 | /* | 5338 | /* |
5291 | * OK, we need to fill the hole... Do write_begin write_end | 5339 | * OK, we need to fill the hole... Do write_begin write_end |
5292 | * to do block allocation/reservation.We are not holding | 5340 | * to do block allocation/reservation.We are not holding |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7050a9cd04a4..c1cdf613e725 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -243,10 +243,9 @@ setversion_out: | |||
243 | me.donor_start, me.len, &me.moved_len); | 243 | me.donor_start, me.len, &me.moved_len); |
244 | fput(donor_filp); | 244 | fput(donor_filp); |
245 | 245 | ||
246 | if (!err) | 246 | if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) |
247 | if (copy_to_user((struct move_extent *)arg, | 247 | return -EFAULT; |
248 | &me, sizeof(me))) | 248 | |
249 | return -EFAULT; | ||
250 | return err; | 249 | return err; |
251 | } | 250 | } |
252 | 251 | ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index cd258463e2a9..e9c61896d605 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -22,6 +22,7 @@ | |||
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include "mballoc.h" | 24 | #include "mballoc.h" |
25 | #include <linux/debugfs.h> | ||
25 | #include <trace/events/ext4.h> | 26 | #include <trace/events/ext4.h> |
26 | 27 | ||
27 | /* | 28 | /* |
@@ -622,13 +623,13 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
622 | 623 | ||
623 | /* FIXME!! need more doc */ | 624 | /* FIXME!! need more doc */ |
624 | static void ext4_mb_mark_free_simple(struct super_block *sb, | 625 | static void ext4_mb_mark_free_simple(struct super_block *sb, |
625 | void *buddy, unsigned first, int len, | 626 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, |
626 | struct ext4_group_info *grp) | 627 | struct ext4_group_info *grp) |
627 | { | 628 | { |
628 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 629 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
629 | unsigned short min; | 630 | ext4_grpblk_t min; |
630 | unsigned short max; | 631 | ext4_grpblk_t max; |
631 | unsigned short chunk; | 632 | ext4_grpblk_t chunk; |
632 | unsigned short border; | 633 | unsigned short border; |
633 | 634 | ||
634 | BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); | 635 | BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); |
@@ -662,10 +663,10 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
662 | void *buddy, void *bitmap, ext4_group_t group) | 663 | void *buddy, void *bitmap, ext4_group_t group) |
663 | { | 664 | { |
664 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 665 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
665 | unsigned short max = EXT4_BLOCKS_PER_GROUP(sb); | 666 | ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb); |
666 | unsigned short i = 0; | 667 | ext4_grpblk_t i = 0; |
667 | unsigned short first; | 668 | ext4_grpblk_t first; |
668 | unsigned short len; | 669 | ext4_grpblk_t len; |
669 | unsigned free = 0; | 670 | unsigned free = 0; |
670 | unsigned fragments = 0; | 671 | unsigned fragments = 0; |
671 | unsigned long long period = get_cycles(); | 672 | unsigned long long period = get_cycles(); |
@@ -743,7 +744,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
743 | char *data; | 744 | char *data; |
744 | char *bitmap; | 745 | char *bitmap; |
745 | 746 | ||
746 | mb_debug("init page %lu\n", page->index); | 747 | mb_debug(1, "init page %lu\n", page->index); |
747 | 748 | ||
748 | inode = page->mapping->host; | 749 | inode = page->mapping->host; |
749 | sb = inode->i_sb; | 750 | sb = inode->i_sb; |
@@ -822,7 +823,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
822 | set_bitmap_uptodate(bh[i]); | 823 | set_bitmap_uptodate(bh[i]); |
823 | bh[i]->b_end_io = end_buffer_read_sync; | 824 | bh[i]->b_end_io = end_buffer_read_sync; |
824 | submit_bh(READ, bh[i]); | 825 | submit_bh(READ, bh[i]); |
825 | mb_debug("read bitmap for group %u\n", first_group + i); | 826 | mb_debug(1, "read bitmap for group %u\n", first_group + i); |
826 | } | 827 | } |
827 | 828 | ||
828 | /* wait for I/O completion */ | 829 | /* wait for I/O completion */ |
@@ -862,12 +863,13 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
862 | if ((first_block + i) & 1) { | 863 | if ((first_block + i) & 1) { |
863 | /* this is block of buddy */ | 864 | /* this is block of buddy */ |
864 | BUG_ON(incore == NULL); | 865 | BUG_ON(incore == NULL); |
865 | mb_debug("put buddy for group %u in page %lu/%x\n", | 866 | mb_debug(1, "put buddy for group %u in page %lu/%x\n", |
866 | group, page->index, i * blocksize); | 867 | group, page->index, i * blocksize); |
867 | grinfo = ext4_get_group_info(sb, group); | 868 | grinfo = ext4_get_group_info(sb, group); |
868 | grinfo->bb_fragments = 0; | 869 | grinfo->bb_fragments = 0; |
869 | memset(grinfo->bb_counters, 0, | 870 | memset(grinfo->bb_counters, 0, |
870 | sizeof(unsigned short)*(sb->s_blocksize_bits+2)); | 871 | sizeof(*grinfo->bb_counters) * |
872 | (sb->s_blocksize_bits+2)); | ||
871 | /* | 873 | /* |
872 | * incore got set to the group block bitmap below | 874 | * incore got set to the group block bitmap below |
873 | */ | 875 | */ |
@@ -878,7 +880,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
878 | } else { | 880 | } else { |
879 | /* this is block of bitmap */ | 881 | /* this is block of bitmap */ |
880 | BUG_ON(incore != NULL); | 882 | BUG_ON(incore != NULL); |
881 | mb_debug("put bitmap for group %u in page %lu/%x\n", | 883 | mb_debug(1, "put bitmap for group %u in page %lu/%x\n", |
882 | group, page->index, i * blocksize); | 884 | group, page->index, i * blocksize); |
883 | 885 | ||
884 | /* see comments in ext4_mb_put_pa() */ | 886 | /* see comments in ext4_mb_put_pa() */ |
@@ -908,6 +910,100 @@ out: | |||
908 | return err; | 910 | return err; |
909 | } | 911 | } |
910 | 912 | ||
913 | static noinline_for_stack | ||
914 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | ||
915 | { | ||
916 | |||
917 | int ret = 0; | ||
918 | void *bitmap; | ||
919 | int blocks_per_page; | ||
920 | int block, pnum, poff; | ||
921 | int num_grp_locked = 0; | ||
922 | struct ext4_group_info *this_grp; | ||
923 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
924 | struct inode *inode = sbi->s_buddy_cache; | ||
925 | struct page *page = NULL, *bitmap_page = NULL; | ||
926 | |||
927 | mb_debug(1, "init group %u\n", group); | ||
928 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
929 | this_grp = ext4_get_group_info(sb, group); | ||
930 | /* | ||
931 | * This ensures that we don't reinit the buddy cache | ||
932 | * page which map to the group from which we are already | ||
933 | * allocating. If we are looking at the buddy cache we would | ||
934 | * have taken a reference using ext4_mb_load_buddy and that | ||
935 | * would have taken the alloc_sem lock. | ||
936 | */ | ||
937 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | ||
938 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | ||
939 | /* | ||
940 | * somebody initialized the group | ||
941 | * return without doing anything | ||
942 | */ | ||
943 | ret = 0; | ||
944 | goto err; | ||
945 | } | ||
946 | /* | ||
947 | * the buddy cache inode stores the block bitmap | ||
948 | * and buddy information in consecutive blocks. | ||
949 | * So for each group we need two blocks. | ||
950 | */ | ||
951 | block = group * 2; | ||
952 | pnum = block / blocks_per_page; | ||
953 | poff = block % blocks_per_page; | ||
954 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
955 | if (page) { | ||
956 | BUG_ON(page->mapping != inode->i_mapping); | ||
957 | ret = ext4_mb_init_cache(page, NULL); | ||
958 | if (ret) { | ||
959 | unlock_page(page); | ||
960 | goto err; | ||
961 | } | ||
962 | unlock_page(page); | ||
963 | } | ||
964 | if (page == NULL || !PageUptodate(page)) { | ||
965 | ret = -EIO; | ||
966 | goto err; | ||
967 | } | ||
968 | mark_page_accessed(page); | ||
969 | bitmap_page = page; | ||
970 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
971 | |||
972 | /* init buddy cache */ | ||
973 | block++; | ||
974 | pnum = block / blocks_per_page; | ||
975 | poff = block % blocks_per_page; | ||
976 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
977 | if (page == bitmap_page) { | ||
978 | /* | ||
979 | * If both the bitmap and buddy are in | ||
980 | * the same page we don't need to force | ||
981 | * init the buddy | ||
982 | */ | ||
983 | unlock_page(page); | ||
984 | } else if (page) { | ||
985 | BUG_ON(page->mapping != inode->i_mapping); | ||
986 | ret = ext4_mb_init_cache(page, bitmap); | ||
987 | if (ret) { | ||
988 | unlock_page(page); | ||
989 | goto err; | ||
990 | } | ||
991 | unlock_page(page); | ||
992 | } | ||
993 | if (page == NULL || !PageUptodate(page)) { | ||
994 | ret = -EIO; | ||
995 | goto err; | ||
996 | } | ||
997 | mark_page_accessed(page); | ||
998 | err: | ||
999 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | ||
1000 | if (bitmap_page) | ||
1001 | page_cache_release(bitmap_page); | ||
1002 | if (page) | ||
1003 | page_cache_release(page); | ||
1004 | return ret; | ||
1005 | } | ||
1006 | |||
911 | static noinline_for_stack int | 1007 | static noinline_for_stack int |
912 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | 1008 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, |
913 | struct ext4_buddy *e4b) | 1009 | struct ext4_buddy *e4b) |
@@ -922,7 +1018,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
922 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1018 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
923 | struct inode *inode = sbi->s_buddy_cache; | 1019 | struct inode *inode = sbi->s_buddy_cache; |
924 | 1020 | ||
925 | mb_debug("load group %u\n", group); | 1021 | mb_debug(1, "load group %u\n", group); |
926 | 1022 | ||
927 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | 1023 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
928 | grp = ext4_get_group_info(sb, group); | 1024 | grp = ext4_get_group_info(sb, group); |
@@ -941,8 +1037,26 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
941 | * groups mapped by the page is blocked | 1037 | * groups mapped by the page is blocked |
942 | * till we are done with allocation | 1038 | * till we are done with allocation |
943 | */ | 1039 | */ |
1040 | repeat_load_buddy: | ||
944 | down_read(e4b->alloc_semp); | 1041 | down_read(e4b->alloc_semp); |
945 | 1042 | ||
1043 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | ||
1044 | /* we need to check for group need init flag | ||
1045 | * with alloc_semp held so that we can be sure | ||
1046 | * that new blocks didn't get added to the group | ||
1047 | * when we are loading the buddy cache | ||
1048 | */ | ||
1049 | up_read(e4b->alloc_semp); | ||
1050 | /* | ||
1051 | * we need full data about the group | ||
1052 | * to make a good selection | ||
1053 | */ | ||
1054 | ret = ext4_mb_init_group(sb, group); | ||
1055 | if (ret) | ||
1056 | return ret; | ||
1057 | goto repeat_load_buddy; | ||
1058 | } | ||
1059 | |||
946 | /* | 1060 | /* |
947 | * the buddy cache inode stores the block bitmap | 1061 | * the buddy cache inode stores the block bitmap |
948 | * and buddy information in consecutive blocks. | 1062 | * and buddy information in consecutive blocks. |
@@ -1360,7 +1474,7 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, | |||
1360 | ac->alloc_semp = e4b->alloc_semp; | 1474 | ac->alloc_semp = e4b->alloc_semp; |
1361 | e4b->alloc_semp = NULL; | 1475 | e4b->alloc_semp = NULL; |
1362 | /* store last allocated for subsequent stream allocation */ | 1476 | /* store last allocated for subsequent stream allocation */ |
1363 | if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { | 1477 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
1364 | spin_lock(&sbi->s_md_lock); | 1478 | spin_lock(&sbi->s_md_lock); |
1365 | sbi->s_mb_last_group = ac->ac_f_ex.fe_group; | 1479 | sbi->s_mb_last_group = ac->ac_f_ex.fe_group; |
1366 | sbi->s_mb_last_start = ac->ac_f_ex.fe_start; | 1480 | sbi->s_mb_last_start = ac->ac_f_ex.fe_start; |
@@ -1837,97 +1951,6 @@ void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | |||
1837 | 1951 | ||
1838 | } | 1952 | } |
1839 | 1953 | ||
1840 | static noinline_for_stack | ||
1841 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | ||
1842 | { | ||
1843 | |||
1844 | int ret; | ||
1845 | void *bitmap; | ||
1846 | int blocks_per_page; | ||
1847 | int block, pnum, poff; | ||
1848 | int num_grp_locked = 0; | ||
1849 | struct ext4_group_info *this_grp; | ||
1850 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1851 | struct inode *inode = sbi->s_buddy_cache; | ||
1852 | struct page *page = NULL, *bitmap_page = NULL; | ||
1853 | |||
1854 | mb_debug("init group %lu\n", group); | ||
1855 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1856 | this_grp = ext4_get_group_info(sb, group); | ||
1857 | /* | ||
1858 | * This ensures we don't add group | ||
1859 | * to this buddy cache via resize | ||
1860 | */ | ||
1861 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | ||
1862 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | ||
1863 | /* | ||
1864 | * somebody initialized the group | ||
1865 | * return without doing anything | ||
1866 | */ | ||
1867 | ret = 0; | ||
1868 | goto err; | ||
1869 | } | ||
1870 | /* | ||
1871 | * the buddy cache inode stores the block bitmap | ||
1872 | * and buddy information in consecutive blocks. | ||
1873 | * So for each group we need two blocks. | ||
1874 | */ | ||
1875 | block = group * 2; | ||
1876 | pnum = block / blocks_per_page; | ||
1877 | poff = block % blocks_per_page; | ||
1878 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1879 | if (page) { | ||
1880 | BUG_ON(page->mapping != inode->i_mapping); | ||
1881 | ret = ext4_mb_init_cache(page, NULL); | ||
1882 | if (ret) { | ||
1883 | unlock_page(page); | ||
1884 | goto err; | ||
1885 | } | ||
1886 | unlock_page(page); | ||
1887 | } | ||
1888 | if (page == NULL || !PageUptodate(page)) { | ||
1889 | ret = -EIO; | ||
1890 | goto err; | ||
1891 | } | ||
1892 | mark_page_accessed(page); | ||
1893 | bitmap_page = page; | ||
1894 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
1895 | |||
1896 | /* init buddy cache */ | ||
1897 | block++; | ||
1898 | pnum = block / blocks_per_page; | ||
1899 | poff = block % blocks_per_page; | ||
1900 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1901 | if (page == bitmap_page) { | ||
1902 | /* | ||
1903 | * If both the bitmap and buddy are in | ||
1904 | * the same page we don't need to force | ||
1905 | * init the buddy | ||
1906 | */ | ||
1907 | unlock_page(page); | ||
1908 | } else if (page) { | ||
1909 | BUG_ON(page->mapping != inode->i_mapping); | ||
1910 | ret = ext4_mb_init_cache(page, bitmap); | ||
1911 | if (ret) { | ||
1912 | unlock_page(page); | ||
1913 | goto err; | ||
1914 | } | ||
1915 | unlock_page(page); | ||
1916 | } | ||
1917 | if (page == NULL || !PageUptodate(page)) { | ||
1918 | ret = -EIO; | ||
1919 | goto err; | ||
1920 | } | ||
1921 | mark_page_accessed(page); | ||
1922 | err: | ||
1923 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | ||
1924 | if (bitmap_page) | ||
1925 | page_cache_release(bitmap_page); | ||
1926 | if (page) | ||
1927 | page_cache_release(page); | ||
1928 | return ret; | ||
1929 | } | ||
1930 | |||
1931 | static noinline_for_stack int | 1954 | static noinline_for_stack int |
1932 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 1955 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1933 | { | 1956 | { |
@@ -1938,11 +1961,14 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
1938 | struct ext4_sb_info *sbi; | 1961 | struct ext4_sb_info *sbi; |
1939 | struct super_block *sb; | 1962 | struct super_block *sb; |
1940 | struct ext4_buddy e4b; | 1963 | struct ext4_buddy e4b; |
1941 | loff_t size, isize; | ||
1942 | 1964 | ||
1943 | sb = ac->ac_sb; | 1965 | sb = ac->ac_sb; |
1944 | sbi = EXT4_SB(sb); | 1966 | sbi = EXT4_SB(sb); |
1945 | ngroups = ext4_get_groups_count(sb); | 1967 | ngroups = ext4_get_groups_count(sb); |
1968 | /* non-extent files are limited to low blocks/groups */ | ||
1969 | if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL)) | ||
1970 | ngroups = sbi->s_blockfile_groups; | ||
1971 | |||
1946 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); | 1972 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); |
1947 | 1973 | ||
1948 | /* first, try the goal */ | 1974 | /* first, try the goal */ |
@@ -1974,20 +2000,16 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
1974 | } | 2000 | } |
1975 | 2001 | ||
1976 | bsbits = ac->ac_sb->s_blocksize_bits; | 2002 | bsbits = ac->ac_sb->s_blocksize_bits; |
1977 | /* if stream allocation is enabled, use global goal */ | ||
1978 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | ||
1979 | isize = i_size_read(ac->ac_inode) >> bsbits; | ||
1980 | if (size < isize) | ||
1981 | size = isize; | ||
1982 | 2003 | ||
1983 | if (size < sbi->s_mb_stream_request && | 2004 | /* if stream allocation is enabled, use global goal */ |
1984 | (ac->ac_flags & EXT4_MB_HINT_DATA)) { | 2005 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
1985 | /* TBD: may be hot point */ | 2006 | /* TBD: may be hot point */ |
1986 | spin_lock(&sbi->s_md_lock); | 2007 | spin_lock(&sbi->s_md_lock); |
1987 | ac->ac_g_ex.fe_group = sbi->s_mb_last_group; | 2008 | ac->ac_g_ex.fe_group = sbi->s_mb_last_group; |
1988 | ac->ac_g_ex.fe_start = sbi->s_mb_last_start; | 2009 | ac->ac_g_ex.fe_start = sbi->s_mb_last_start; |
1989 | spin_unlock(&sbi->s_md_lock); | 2010 | spin_unlock(&sbi->s_md_lock); |
1990 | } | 2011 | } |
2012 | |||
1991 | /* Let's just scan groups to find more-less suitable blocks */ | 2013 | /* Let's just scan groups to find more-less suitable blocks */ |
1992 | cr = ac->ac_2order ? 0 : 1; | 2014 | cr = ac->ac_2order ? 0 : 1; |
1993 | /* | 2015 | /* |
@@ -2015,27 +2037,6 @@ repeat: | |||
2015 | if (grp->bb_free == 0) | 2037 | if (grp->bb_free == 0) |
2016 | continue; | 2038 | continue; |
2017 | 2039 | ||
2018 | /* | ||
2019 | * if the group is already init we check whether it is | ||
2020 | * a good group and if not we don't load the buddy | ||
2021 | */ | ||
2022 | if (EXT4_MB_GRP_NEED_INIT(grp)) { | ||
2023 | /* | ||
2024 | * we need full data about the group | ||
2025 | * to make a good selection | ||
2026 | */ | ||
2027 | err = ext4_mb_init_group(sb, group); | ||
2028 | if (err) | ||
2029 | goto out; | ||
2030 | } | ||
2031 | |||
2032 | /* | ||
2033 | * If the particular group doesn't satisfy our | ||
2034 | * criteria we continue with the next group | ||
2035 | */ | ||
2036 | if (!ext4_mb_good_group(ac, group, cr)) | ||
2037 | continue; | ||
2038 | |||
2039 | err = ext4_mb_load_buddy(sb, group, &e4b); | 2040 | err = ext4_mb_load_buddy(sb, group, &e4b); |
2040 | if (err) | 2041 | if (err) |
2041 | goto out; | 2042 | goto out; |
@@ -2156,7 +2157,7 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) | |||
2156 | 2157 | ||
2157 | if (v == SEQ_START_TOKEN) { | 2158 | if (v == SEQ_START_TOKEN) { |
2158 | seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " | 2159 | seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " |
2159 | "%-5s %-2s %-5s %-5s %-5s %-6s\n", | 2160 | "%-5s %-2s %-6s %-5s %-5s %-6s\n", |
2160 | "pid", "inode", "original", "goal", "result", "found", | 2161 | "pid", "inode", "original", "goal", "result", "found", |
2161 | "grps", "cr", "flags", "merge", "tail", "broken"); | 2162 | "grps", "cr", "flags", "merge", "tail", "broken"); |
2162 | return 0; | 2163 | return 0; |
@@ -2164,7 +2165,7 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) | |||
2164 | 2165 | ||
2165 | if (hs->op == EXT4_MB_HISTORY_ALLOC) { | 2166 | if (hs->op == EXT4_MB_HISTORY_ALLOC) { |
2166 | fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " | 2167 | fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " |
2167 | "%-5u %-5s %-5u %-6u\n"; | 2168 | "0x%04x %-5s %-5u %-6u\n"; |
2168 | sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, | 2169 | sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, |
2169 | hs->result.fe_start, hs->result.fe_len, | 2170 | hs->result.fe_start, hs->result.fe_len, |
2170 | hs->result.fe_logical); | 2171 | hs->result.fe_logical); |
@@ -2205,7 +2206,7 @@ static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v) | |||
2205 | { | 2206 | { |
2206 | } | 2207 | } |
2207 | 2208 | ||
2208 | static struct seq_operations ext4_mb_seq_history_ops = { | 2209 | static const struct seq_operations ext4_mb_seq_history_ops = { |
2209 | .start = ext4_mb_seq_history_start, | 2210 | .start = ext4_mb_seq_history_start, |
2210 | .next = ext4_mb_seq_history_next, | 2211 | .next = ext4_mb_seq_history_next, |
2211 | .stop = ext4_mb_seq_history_stop, | 2212 | .stop = ext4_mb_seq_history_stop, |
@@ -2287,7 +2288,7 @@ static ssize_t ext4_mb_seq_history_write(struct file *file, | |||
2287 | return count; | 2288 | return count; |
2288 | } | 2289 | } |
2289 | 2290 | ||
2290 | static struct file_operations ext4_mb_seq_history_fops = { | 2291 | static const struct file_operations ext4_mb_seq_history_fops = { |
2291 | .owner = THIS_MODULE, | 2292 | .owner = THIS_MODULE, |
2292 | .open = ext4_mb_seq_history_open, | 2293 | .open = ext4_mb_seq_history_open, |
2293 | .read = seq_read, | 2294 | .read = seq_read, |
@@ -2328,7 +2329,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) | |||
2328 | struct ext4_buddy e4b; | 2329 | struct ext4_buddy e4b; |
2329 | struct sg { | 2330 | struct sg { |
2330 | struct ext4_group_info info; | 2331 | struct ext4_group_info info; |
2331 | unsigned short counters[16]; | 2332 | ext4_grpblk_t counters[16]; |
2332 | } sg; | 2333 | } sg; |
2333 | 2334 | ||
2334 | group--; | 2335 | group--; |
@@ -2366,7 +2367,7 @@ static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v) | |||
2366 | { | 2367 | { |
2367 | } | 2368 | } |
2368 | 2369 | ||
2369 | static struct seq_operations ext4_mb_seq_groups_ops = { | 2370 | static const struct seq_operations ext4_mb_seq_groups_ops = { |
2370 | .start = ext4_mb_seq_groups_start, | 2371 | .start = ext4_mb_seq_groups_start, |
2371 | .next = ext4_mb_seq_groups_next, | 2372 | .next = ext4_mb_seq_groups_next, |
2372 | .stop = ext4_mb_seq_groups_stop, | 2373 | .stop = ext4_mb_seq_groups_stop, |
@@ -2387,7 +2388,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) | |||
2387 | 2388 | ||
2388 | } | 2389 | } |
2389 | 2390 | ||
2390 | static struct file_operations ext4_mb_seq_groups_fops = { | 2391 | static const struct file_operations ext4_mb_seq_groups_fops = { |
2391 | .owner = THIS_MODULE, | 2392 | .owner = THIS_MODULE, |
2392 | .open = ext4_mb_seq_groups_open, | 2393 | .open = ext4_mb_seq_groups_open, |
2393 | .read = seq_read, | 2394 | .read = seq_read, |
@@ -2532,7 +2533,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2532 | 2533 | ||
2533 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2534 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
2534 | init_rwsem(&meta_group_info[i]->alloc_sem); | 2535 | init_rwsem(&meta_group_info[i]->alloc_sem); |
2535 | meta_group_info[i]->bb_free_root.rb_node = NULL;; | 2536 | meta_group_info[i]->bb_free_root.rb_node = NULL; |
2536 | 2537 | ||
2537 | #ifdef DOUBLE_CHECK | 2538 | #ifdef DOUBLE_CHECK |
2538 | { | 2539 | { |
@@ -2558,26 +2559,15 @@ exit_meta_group_info: | |||
2558 | return -ENOMEM; | 2559 | return -ENOMEM; |
2559 | } /* ext4_mb_add_groupinfo */ | 2560 | } /* ext4_mb_add_groupinfo */ |
2560 | 2561 | ||
2561 | /* | ||
2562 | * Update an existing group. | ||
2563 | * This function is used for online resize | ||
2564 | */ | ||
2565 | void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) | ||
2566 | { | ||
2567 | grp->bb_free += add; | ||
2568 | } | ||
2569 | |||
2570 | static int ext4_mb_init_backend(struct super_block *sb) | 2562 | static int ext4_mb_init_backend(struct super_block *sb) |
2571 | { | 2563 | { |
2572 | ext4_group_t ngroups = ext4_get_groups_count(sb); | 2564 | ext4_group_t ngroups = ext4_get_groups_count(sb); |
2573 | ext4_group_t i; | 2565 | ext4_group_t i; |
2574 | int metalen; | ||
2575 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2566 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2576 | struct ext4_super_block *es = sbi->s_es; | 2567 | struct ext4_super_block *es = sbi->s_es; |
2577 | int num_meta_group_infos; | 2568 | int num_meta_group_infos; |
2578 | int num_meta_group_infos_max; | 2569 | int num_meta_group_infos_max; |
2579 | int array_size; | 2570 | int array_size; |
2580 | struct ext4_group_info **meta_group_info; | ||
2581 | struct ext4_group_desc *desc; | 2571 | struct ext4_group_desc *desc; |
2582 | 2572 | ||
2583 | /* This is the number of blocks used by GDT */ | 2573 | /* This is the number of blocks used by GDT */ |
@@ -2622,22 +2612,6 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2622 | goto err_freesgi; | 2612 | goto err_freesgi; |
2623 | } | 2613 | } |
2624 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; | 2614 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; |
2625 | |||
2626 | metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb); | ||
2627 | for (i = 0; i < num_meta_group_infos; i++) { | ||
2628 | if ((i + 1) == num_meta_group_infos) | ||
2629 | metalen = sizeof(*meta_group_info) * | ||
2630 | (ngroups - | ||
2631 | (i << EXT4_DESC_PER_BLOCK_BITS(sb))); | ||
2632 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | ||
2633 | if (meta_group_info == NULL) { | ||
2634 | printk(KERN_ERR "EXT4-fs: can't allocate mem for a " | ||
2635 | "buddy group\n"); | ||
2636 | goto err_freemeta; | ||
2637 | } | ||
2638 | sbi->s_group_info[i] = meta_group_info; | ||
2639 | } | ||
2640 | |||
2641 | for (i = 0; i < ngroups; i++) { | 2615 | for (i = 0; i < ngroups; i++) { |
2642 | desc = ext4_get_group_desc(sb, i, NULL); | 2616 | desc = ext4_get_group_desc(sb, i, NULL); |
2643 | if (desc == NULL) { | 2617 | if (desc == NULL) { |
@@ -2655,7 +2629,6 @@ err_freebuddy: | |||
2655 | while (i-- > 0) | 2629 | while (i-- > 0) |
2656 | kfree(ext4_get_group_info(sb, i)); | 2630 | kfree(ext4_get_group_info(sb, i)); |
2657 | i = num_meta_group_infos; | 2631 | i = num_meta_group_infos; |
2658 | err_freemeta: | ||
2659 | while (i-- > 0) | 2632 | while (i-- > 0) |
2660 | kfree(sbi->s_group_info[i]); | 2633 | kfree(sbi->s_group_info[i]); |
2661 | iput(sbi->s_buddy_cache); | 2634 | iput(sbi->s_buddy_cache); |
@@ -2672,14 +2645,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2672 | unsigned max; | 2645 | unsigned max; |
2673 | int ret; | 2646 | int ret; |
2674 | 2647 | ||
2675 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); | 2648 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); |
2676 | 2649 | ||
2677 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2650 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2678 | if (sbi->s_mb_offsets == NULL) { | 2651 | if (sbi->s_mb_offsets == NULL) { |
2679 | return -ENOMEM; | 2652 | return -ENOMEM; |
2680 | } | 2653 | } |
2681 | 2654 | ||
2682 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int); | 2655 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); |
2683 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2656 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2684 | if (sbi->s_mb_maxs == NULL) { | 2657 | if (sbi->s_mb_maxs == NULL) { |
2685 | kfree(sbi->s_mb_offsets); | 2658 | kfree(sbi->s_mb_offsets); |
@@ -2758,7 +2731,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | |||
2758 | kmem_cache_free(ext4_pspace_cachep, pa); | 2731 | kmem_cache_free(ext4_pspace_cachep, pa); |
2759 | } | 2732 | } |
2760 | if (count) | 2733 | if (count) |
2761 | mb_debug("mballoc: %u PAs left\n", count); | 2734 | mb_debug(1, "mballoc: %u PAs left\n", count); |
2762 | 2735 | ||
2763 | } | 2736 | } |
2764 | 2737 | ||
@@ -2839,7 +2812,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2839 | list_for_each_safe(l, ltmp, &txn->t_private_list) { | 2812 | list_for_each_safe(l, ltmp, &txn->t_private_list) { |
2840 | entry = list_entry(l, struct ext4_free_data, list); | 2813 | entry = list_entry(l, struct ext4_free_data, list); |
2841 | 2814 | ||
2842 | mb_debug("gonna free %u blocks in group %u (0x%p):", | 2815 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
2843 | entry->count, entry->group, entry); | 2816 | entry->count, entry->group, entry); |
2844 | 2817 | ||
2845 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2818 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
@@ -2874,9 +2847,43 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2874 | ext4_mb_release_desc(&e4b); | 2847 | ext4_mb_release_desc(&e4b); |
2875 | } | 2848 | } |
2876 | 2849 | ||
2877 | mb_debug("freed %u blocks in %u structures\n", count, count2); | 2850 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); |
2851 | } | ||
2852 | |||
2853 | #ifdef CONFIG_EXT4_DEBUG | ||
2854 | u8 mb_enable_debug __read_mostly; | ||
2855 | |||
2856 | static struct dentry *debugfs_dir; | ||
2857 | static struct dentry *debugfs_debug; | ||
2858 | |||
2859 | static void __init ext4_create_debugfs_entry(void) | ||
2860 | { | ||
2861 | debugfs_dir = debugfs_create_dir("ext4", NULL); | ||
2862 | if (debugfs_dir) | ||
2863 | debugfs_debug = debugfs_create_u8("mballoc-debug", | ||
2864 | S_IRUGO | S_IWUSR, | ||
2865 | debugfs_dir, | ||
2866 | &mb_enable_debug); | ||
2867 | } | ||
2868 | |||
2869 | static void ext4_remove_debugfs_entry(void) | ||
2870 | { | ||
2871 | debugfs_remove(debugfs_debug); | ||
2872 | debugfs_remove(debugfs_dir); | ||
2878 | } | 2873 | } |
2879 | 2874 | ||
2875 | #else | ||
2876 | |||
2877 | static void __init ext4_create_debugfs_entry(void) | ||
2878 | { | ||
2879 | } | ||
2880 | |||
2881 | static void ext4_remove_debugfs_entry(void) | ||
2882 | { | ||
2883 | } | ||
2884 | |||
2885 | #endif | ||
2886 | |||
2880 | int __init init_ext4_mballoc(void) | 2887 | int __init init_ext4_mballoc(void) |
2881 | { | 2888 | { |
2882 | ext4_pspace_cachep = | 2889 | ext4_pspace_cachep = |
@@ -2904,6 +2911,7 @@ int __init init_ext4_mballoc(void) | |||
2904 | kmem_cache_destroy(ext4_ac_cachep); | 2911 | kmem_cache_destroy(ext4_ac_cachep); |
2905 | return -ENOMEM; | 2912 | return -ENOMEM; |
2906 | } | 2913 | } |
2914 | ext4_create_debugfs_entry(); | ||
2907 | return 0; | 2915 | return 0; |
2908 | } | 2916 | } |
2909 | 2917 | ||
@@ -2917,6 +2925,7 @@ void exit_ext4_mballoc(void) | |||
2917 | kmem_cache_destroy(ext4_pspace_cachep); | 2925 | kmem_cache_destroy(ext4_pspace_cachep); |
2918 | kmem_cache_destroy(ext4_ac_cachep); | 2926 | kmem_cache_destroy(ext4_ac_cachep); |
2919 | kmem_cache_destroy(ext4_free_ext_cachep); | 2927 | kmem_cache_destroy(ext4_free_ext_cachep); |
2928 | ext4_remove_debugfs_entry(); | ||
2920 | } | 2929 | } |
2921 | 2930 | ||
2922 | 2931 | ||
@@ -3061,7 +3070,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) | |||
3061 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; | 3070 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; |
3062 | else | 3071 | else |
3063 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; | 3072 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; |
3064 | mb_debug("#%u: goal %u blocks for locality group\n", | 3073 | mb_debug(1, "#%u: goal %u blocks for locality group\n", |
3065 | current->pid, ac->ac_g_ex.fe_len); | 3074 | current->pid, ac->ac_g_ex.fe_len); |
3066 | } | 3075 | } |
3067 | 3076 | ||
@@ -3180,23 +3189,18 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
3180 | BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || | 3189 | BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || |
3181 | ac->ac_o_ex.fe_logical < pa->pa_lstart)); | 3190 | ac->ac_o_ex.fe_logical < pa->pa_lstart)); |
3182 | 3191 | ||
3183 | /* skip PA normalized request doesn't overlap with */ | 3192 | /* skip PAs this normalized request doesn't overlap with */ |
3184 | if (pa->pa_lstart >= end) { | 3193 | if (pa->pa_lstart >= end || pa_end <= start) { |
3185 | spin_unlock(&pa->pa_lock); | ||
3186 | continue; | ||
3187 | } | ||
3188 | if (pa_end <= start) { | ||
3189 | spin_unlock(&pa->pa_lock); | 3194 | spin_unlock(&pa->pa_lock); |
3190 | continue; | 3195 | continue; |
3191 | } | 3196 | } |
3192 | BUG_ON(pa->pa_lstart <= start && pa_end >= end); | 3197 | BUG_ON(pa->pa_lstart <= start && pa_end >= end); |
3193 | 3198 | ||
3199 | /* adjust start or end to be adjacent to this pa */ | ||
3194 | if (pa_end <= ac->ac_o_ex.fe_logical) { | 3200 | if (pa_end <= ac->ac_o_ex.fe_logical) { |
3195 | BUG_ON(pa_end < start); | 3201 | BUG_ON(pa_end < start); |
3196 | start = pa_end; | 3202 | start = pa_end; |
3197 | } | 3203 | } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { |
3198 | |||
3199 | if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { | ||
3200 | BUG_ON(pa->pa_lstart > end); | 3204 | BUG_ON(pa->pa_lstart > end); |
3201 | end = pa->pa_lstart; | 3205 | end = pa->pa_lstart; |
3202 | } | 3206 | } |
@@ -3251,7 +3255,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
3251 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; | 3255 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; |
3252 | } | 3256 | } |
3253 | 3257 | ||
3254 | mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size, | 3258 | mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size, |
3255 | (unsigned) orig_size, (unsigned) start); | 3259 | (unsigned) orig_size, (unsigned) start); |
3256 | } | 3260 | } |
3257 | 3261 | ||
@@ -3300,7 +3304,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, | |||
3300 | BUG_ON(pa->pa_free < len); | 3304 | BUG_ON(pa->pa_free < len); |
3301 | pa->pa_free -= len; | 3305 | pa->pa_free -= len; |
3302 | 3306 | ||
3303 | mb_debug("use %llu/%u from inode pa %p\n", start, len, pa); | 3307 | mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa); |
3304 | } | 3308 | } |
3305 | 3309 | ||
3306 | /* | 3310 | /* |
@@ -3324,7 +3328,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, | |||
3324 | * in on-disk bitmap -- see ext4_mb_release_context() | 3328 | * in on-disk bitmap -- see ext4_mb_release_context() |
3325 | * Other CPUs are prevented from allocating from this pa by lg_mutex | 3329 | * Other CPUs are prevented from allocating from this pa by lg_mutex |
3326 | */ | 3330 | */ |
3327 | mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); | 3331 | mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); |
3328 | } | 3332 | } |
3329 | 3333 | ||
3330 | /* | 3334 | /* |
@@ -3382,6 +3386,11 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3382 | ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) | 3386 | ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) |
3383 | continue; | 3387 | continue; |
3384 | 3388 | ||
3389 | /* non-extent files can't have physical blocks past 2^32 */ | ||
3390 | if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) && | ||
3391 | pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) | ||
3392 | continue; | ||
3393 | |||
3385 | /* found preallocated blocks, use them */ | 3394 | /* found preallocated blocks, use them */ |
3386 | spin_lock(&pa->pa_lock); | 3395 | spin_lock(&pa->pa_lock); |
3387 | if (pa->pa_deleted == 0 && pa->pa_free) { | 3396 | if (pa->pa_deleted == 0 && pa->pa_free) { |
@@ -3503,7 +3512,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
3503 | preallocated += len; | 3512 | preallocated += len; |
3504 | count++; | 3513 | count++; |
3505 | } | 3514 | } |
3506 | mb_debug("prellocated %u for group %u\n", preallocated, group); | 3515 | mb_debug(1, "prellocated %u for group %u\n", preallocated, group); |
3507 | } | 3516 | } |
3508 | 3517 | ||
3509 | static void ext4_mb_pa_callback(struct rcu_head *head) | 3518 | static void ext4_mb_pa_callback(struct rcu_head *head) |
@@ -3638,7 +3647,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | |||
3638 | pa->pa_deleted = 0; | 3647 | pa->pa_deleted = 0; |
3639 | pa->pa_type = MB_INODE_PA; | 3648 | pa->pa_type = MB_INODE_PA; |
3640 | 3649 | ||
3641 | mb_debug("new inode pa %p: %llu/%u for %u\n", pa, | 3650 | mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa, |
3642 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | 3651 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
3643 | trace_ext4_mb_new_inode_pa(ac, pa); | 3652 | trace_ext4_mb_new_inode_pa(ac, pa); |
3644 | 3653 | ||
@@ -3698,7 +3707,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) | |||
3698 | pa->pa_deleted = 0; | 3707 | pa->pa_deleted = 0; |
3699 | pa->pa_type = MB_GROUP_PA; | 3708 | pa->pa_type = MB_GROUP_PA; |
3700 | 3709 | ||
3701 | mb_debug("new group pa %p: %llu/%u for %u\n", pa, | 3710 | mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa, |
3702 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | 3711 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
3703 | trace_ext4_mb_new_group_pa(ac, pa); | 3712 | trace_ext4_mb_new_group_pa(ac, pa); |
3704 | 3713 | ||
@@ -3777,7 +3786,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3777 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); | 3786 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); |
3778 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + | 3787 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + |
3779 | le32_to_cpu(sbi->s_es->s_first_data_block); | 3788 | le32_to_cpu(sbi->s_es->s_first_data_block); |
3780 | mb_debug(" free preallocated %u/%u in group %u\n", | 3789 | mb_debug(1, " free preallocated %u/%u in group %u\n", |
3781 | (unsigned) start, (unsigned) next - bit, | 3790 | (unsigned) start, (unsigned) next - bit, |
3782 | (unsigned) group); | 3791 | (unsigned) group); |
3783 | free += next - bit; | 3792 | free += next - bit; |
@@ -3868,7 +3877,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3868 | int busy = 0; | 3877 | int busy = 0; |
3869 | int free = 0; | 3878 | int free = 0; |
3870 | 3879 | ||
3871 | mb_debug("discard preallocation for group %u\n", group); | 3880 | mb_debug(1, "discard preallocation for group %u\n", group); |
3872 | 3881 | ||
3873 | if (list_empty(&grp->bb_prealloc_list)) | 3882 | if (list_empty(&grp->bb_prealloc_list)) |
3874 | return 0; | 3883 | return 0; |
@@ -3992,7 +4001,7 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3992 | return; | 4001 | return; |
3993 | } | 4002 | } |
3994 | 4003 | ||
3995 | mb_debug("discard preallocation for inode %lu\n", inode->i_ino); | 4004 | mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino); |
3996 | trace_ext4_discard_preallocations(inode); | 4005 | trace_ext4_discard_preallocations(inode); |
3997 | 4006 | ||
3998 | INIT_LIST_HEAD(&list); | 4007 | INIT_LIST_HEAD(&list); |
@@ -4097,7 +4106,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode, | |||
4097 | { | 4106 | { |
4098 | BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); | 4107 | BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); |
4099 | } | 4108 | } |
4100 | #ifdef MB_DEBUG | 4109 | #ifdef CONFIG_EXT4_DEBUG |
4101 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | 4110 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) |
4102 | { | 4111 | { |
4103 | struct super_block *sb = ac->ac_sb; | 4112 | struct super_block *sb = ac->ac_sb; |
@@ -4139,14 +4148,14 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
4139 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, | 4148 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, |
4140 | NULL, &start); | 4149 | NULL, &start); |
4141 | spin_unlock(&pa->pa_lock); | 4150 | spin_unlock(&pa->pa_lock); |
4142 | printk(KERN_ERR "PA:%lu:%d:%u \n", i, | 4151 | printk(KERN_ERR "PA:%u:%d:%u \n", i, |
4143 | start, pa->pa_len); | 4152 | start, pa->pa_len); |
4144 | } | 4153 | } |
4145 | ext4_unlock_group(sb, i); | 4154 | ext4_unlock_group(sb, i); |
4146 | 4155 | ||
4147 | if (grp->bb_free == 0) | 4156 | if (grp->bb_free == 0) |
4148 | continue; | 4157 | continue; |
4149 | printk(KERN_ERR "%lu: %d/%d \n", | 4158 | printk(KERN_ERR "%u: %d/%d \n", |
4150 | i, grp->bb_free, grp->bb_fragments); | 4159 | i, grp->bb_free, grp->bb_fragments); |
4151 | } | 4160 | } |
4152 | printk(KERN_ERR "\n"); | 4161 | printk(KERN_ERR "\n"); |
@@ -4174,16 +4183,26 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
4174 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | 4183 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) |
4175 | return; | 4184 | return; |
4176 | 4185 | ||
4186 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) | ||
4187 | return; | ||
4188 | |||
4177 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | 4189 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; |
4178 | isize = i_size_read(ac->ac_inode) >> bsbits; | 4190 | isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) |
4191 | >> bsbits; | ||
4179 | size = max(size, isize); | 4192 | size = max(size, isize); |
4180 | 4193 | ||
4181 | /* don't use group allocation for large files */ | 4194 | if ((size == isize) && |
4182 | if (size >= sbi->s_mb_stream_request) | 4195 | !ext4_fs_is_busy(sbi) && |
4196 | (atomic_read(&ac->ac_inode->i_writecount) == 0)) { | ||
4197 | ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC; | ||
4183 | return; | 4198 | return; |
4199 | } | ||
4184 | 4200 | ||
4185 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) | 4201 | /* don't use group allocation for large files */ |
4202 | if (size >= sbi->s_mb_stream_request) { | ||
4203 | ac->ac_flags |= EXT4_MB_STREAM_ALLOC; | ||
4186 | return; | 4204 | return; |
4205 | } | ||
4187 | 4206 | ||
4188 | BUG_ON(ac->ac_lg != NULL); | 4207 | BUG_ON(ac->ac_lg != NULL); |
4189 | /* | 4208 | /* |
@@ -4246,7 +4265,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
4246 | * locality group. this is a policy, actually */ | 4265 | * locality group. this is a policy, actually */ |
4247 | ext4_mb_group_or_file(ac); | 4266 | ext4_mb_group_or_file(ac); |
4248 | 4267 | ||
4249 | mb_debug("init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " | 4268 | mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " |
4250 | "left: %u/%u, right %u/%u to %swritable\n", | 4269 | "left: %u/%u, right %u/%u to %swritable\n", |
4251 | (unsigned) ar->len, (unsigned) ar->logical, | 4270 | (unsigned) ar->len, (unsigned) ar->logical, |
4252 | (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, | 4271 | (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, |
@@ -4268,7 +4287,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4268 | struct ext4_prealloc_space *pa, *tmp; | 4287 | struct ext4_prealloc_space *pa, *tmp; |
4269 | struct ext4_allocation_context *ac; | 4288 | struct ext4_allocation_context *ac; |
4270 | 4289 | ||
4271 | mb_debug("discard locality group preallocation\n"); | 4290 | mb_debug(1, "discard locality group preallocation\n"); |
4272 | 4291 | ||
4273 | INIT_LIST_HEAD(&discard_list); | 4292 | INIT_LIST_HEAD(&discard_list); |
4274 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4293 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c96bb19f58f9..188d3d709b24 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -37,11 +37,19 @@ | |||
37 | 37 | ||
38 | /* | 38 | /* |
39 | */ | 39 | */ |
40 | #define MB_DEBUG__ | 40 | #ifdef CONFIG_EXT4_DEBUG |
41 | #ifdef MB_DEBUG | 41 | extern u8 mb_enable_debug; |
42 | #define mb_debug(fmt, a...) printk(fmt, ##a) | 42 | |
43 | #define mb_debug(n, fmt, a...) \ | ||
44 | do { \ | ||
45 | if ((n) <= mb_enable_debug) { \ | ||
46 | printk(KERN_DEBUG "(%s, %d): %s: ", \ | ||
47 | __FILE__, __LINE__, __func__); \ | ||
48 | printk(fmt, ## a); \ | ||
49 | } \ | ||
50 | } while (0) | ||
43 | #else | 51 | #else |
44 | #define mb_debug(fmt, a...) | 52 | #define mb_debug(n, fmt, a...) |
45 | #endif | 53 | #endif |
46 | 54 | ||
47 | /* | 55 | /* |
@@ -128,8 +136,8 @@ struct ext4_prealloc_space { | |||
128 | unsigned pa_deleted; | 136 | unsigned pa_deleted; |
129 | ext4_fsblk_t pa_pstart; /* phys. block */ | 137 | ext4_fsblk_t pa_pstart; /* phys. block */ |
130 | ext4_lblk_t pa_lstart; /* log. block */ | 138 | ext4_lblk_t pa_lstart; /* log. block */ |
131 | unsigned short pa_len; /* len of preallocated chunk */ | 139 | ext4_grpblk_t pa_len; /* len of preallocated chunk */ |
132 | unsigned short pa_free; /* how many blocks are free */ | 140 | ext4_grpblk_t pa_free; /* how many blocks are free */ |
133 | unsigned short pa_type; /* pa type. inode or group */ | 141 | unsigned short pa_type; /* pa type. inode or group */ |
134 | spinlock_t *pa_obj_lock; | 142 | spinlock_t *pa_obj_lock; |
135 | struct inode *pa_inode; /* hack, for history only */ | 143 | struct inode *pa_inode; /* hack, for history only */ |
@@ -144,7 +152,7 @@ struct ext4_free_extent { | |||
144 | ext4_lblk_t fe_logical; | 152 | ext4_lblk_t fe_logical; |
145 | ext4_grpblk_t fe_start; | 153 | ext4_grpblk_t fe_start; |
146 | ext4_group_t fe_group; | 154 | ext4_group_t fe_group; |
147 | int fe_len; | 155 | ext4_grpblk_t fe_len; |
148 | }; | 156 | }; |
149 | 157 | ||
150 | /* | 158 | /* |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 313a50b39741..bf519f239ae6 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -353,17 +353,16 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
353 | 353 | ||
354 | down_write(&EXT4_I(inode)->i_data_sem); | 354 | down_write(&EXT4_I(inode)->i_data_sem); |
355 | /* | 355 | /* |
356 | * if EXT4_EXT_MIGRATE is cleared a block allocation | 356 | * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation |
357 | * happened after we started the migrate. We need to | 357 | * happened after we started the migrate. We need to |
358 | * fail the migrate | 358 | * fail the migrate |
359 | */ | 359 | */ |
360 | if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) { | 360 | if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) { |
361 | retval = -EAGAIN; | 361 | retval = -EAGAIN; |
362 | up_write(&EXT4_I(inode)->i_data_sem); | 362 | up_write(&EXT4_I(inode)->i_data_sem); |
363 | goto err_out; | 363 | goto err_out; |
364 | } else | 364 | } else |
365 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & | 365 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; |
366 | ~EXT4_EXT_MIGRATE; | ||
367 | /* | 366 | /* |
368 | * We have the extent map build with the tmp inode. | 367 | * We have the extent map build with the tmp inode. |
369 | * Now copy the i_data across | 368 | * Now copy the i_data across |
@@ -517,14 +516,15 @@ int ext4_ext_migrate(struct inode *inode) | |||
517 | * when we add extents we extent the journal | 516 | * when we add extents we extent the journal |
518 | */ | 517 | */ |
519 | /* | 518 | /* |
520 | * Even though we take i_mutex we can still cause block allocation | 519 | * Even though we take i_mutex we can still cause block |
521 | * via mmap write to holes. If we have allocated new blocks we fail | 520 | * allocation via mmap write to holes. If we have allocated |
522 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. | 521 | * new blocks we fail migrate. New block allocation will |
523 | * The flag is updated with i_data_sem held to prevent racing with | 522 | * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated |
524 | * block allocation. | 523 | * with i_data_sem held to prevent racing with block |
524 | * allocation. | ||
525 | */ | 525 | */ |
526 | down_read((&EXT4_I(inode)->i_data_sem)); | 526 | down_read((&EXT4_I(inode)->i_data_sem)); |
527 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE; | 527 | EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE; |
528 | up_read((&EXT4_I(inode)->i_data_sem)); | 528 | up_read((&EXT4_I(inode)->i_data_sem)); |
529 | 529 | ||
530 | handle = ext4_journal_start(inode, 1); | 530 | handle = ext4_journal_start(inode, 1); |
@@ -618,7 +618,7 @@ err_out: | |||
618 | tmp_inode->i_nlink = 0; | 618 | tmp_inode->i_nlink = 0; |
619 | 619 | ||
620 | ext4_journal_stop(handle); | 620 | ext4_journal_stop(handle); |
621 | 621 | unlock_new_inode(tmp_inode); | |
622 | iput(tmp_inode); | 622 | iput(tmp_inode); |
623 | 623 | ||
624 | return retval; | 624 | return retval; |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index bbf2dd9404dc..c07a2915e40b 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -19,14 +19,31 @@ | |||
19 | #include "ext4_extents.h" | 19 | #include "ext4_extents.h" |
20 | #include "ext4.h" | 20 | #include "ext4.h" |
21 | 21 | ||
22 | #define get_ext_path(path, inode, block, ret) \ | 22 | /** |
23 | do { \ | 23 | * get_ext_path - Find an extent path for designated logical block number. |
24 | path = ext4_ext_find_extent(inode, block, path); \ | 24 | * |
25 | if (IS_ERR(path)) { \ | 25 | * @inode: an inode which is searched |
26 | ret = PTR_ERR(path); \ | 26 | * @lblock: logical block number to find an extent path |
27 | path = NULL; \ | 27 | * @path: pointer to an extent path pointer (for output) |
28 | } \ | 28 | * |
29 | } while (0) | 29 | * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value |
30 | * on failure. | ||
31 | */ | ||
32 | static inline int | ||
33 | get_ext_path(struct inode *inode, ext4_lblk_t lblock, | ||
34 | struct ext4_ext_path **path) | ||
35 | { | ||
36 | int ret = 0; | ||
37 | |||
38 | *path = ext4_ext_find_extent(inode, lblock, *path); | ||
39 | if (IS_ERR(*path)) { | ||
40 | ret = PTR_ERR(*path); | ||
41 | *path = NULL; | ||
42 | } else if ((*path)[ext_depth(inode)].p_ext == NULL) | ||
43 | ret = -ENODATA; | ||
44 | |||
45 | return ret; | ||
46 | } | ||
30 | 47 | ||
31 | /** | 48 | /** |
32 | * copy_extent_status - Copy the extent's initialization status | 49 | * copy_extent_status - Copy the extent's initialization status |
@@ -113,6 +130,31 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
113 | } | 130 | } |
114 | 131 | ||
115 | /** | 132 | /** |
133 | * mext_check_null_inode - NULL check for two inodes | ||
134 | * | ||
135 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. | ||
136 | */ | ||
137 | static int | ||
138 | mext_check_null_inode(struct inode *inode1, struct inode *inode2, | ||
139 | const char *function) | ||
140 | { | ||
141 | int ret = 0; | ||
142 | |||
143 | if (inode1 == NULL) { | ||
144 | ext4_error(inode2->i_sb, function, | ||
145 | "Both inodes should not be NULL: " | ||
146 | "inode1 NULL inode2 %lu", inode2->i_ino); | ||
147 | ret = -EIO; | ||
148 | } else if (inode2 == NULL) { | ||
149 | ext4_error(inode1->i_sb, function, | ||
150 | "Both inodes should not be NULL: " | ||
151 | "inode1 %lu inode2 NULL", inode1->i_ino); | ||
152 | ret = -EIO; | ||
153 | } | ||
154 | return ret; | ||
155 | } | ||
156 | |||
157 | /** | ||
116 | * mext_double_down_read - Acquire two inodes' read semaphore | 158 | * mext_double_down_read - Acquire two inodes' read semaphore |
117 | * | 159 | * |
118 | * @orig_inode: original inode structure | 160 | * @orig_inode: original inode structure |
@@ -124,8 +166,6 @@ mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) | |||
124 | { | 166 | { |
125 | struct inode *first = orig_inode, *second = donor_inode; | 167 | struct inode *first = orig_inode, *second = donor_inode; |
126 | 168 | ||
127 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
128 | |||
129 | /* | 169 | /* |
130 | * Use the inode number to provide the stable locking order instead | 170 | * Use the inode number to provide the stable locking order instead |
131 | * of its address, because the C language doesn't guarantee you can | 171 | * of its address, because the C language doesn't guarantee you can |
@@ -152,8 +192,6 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) | |||
152 | { | 192 | { |
153 | struct inode *first = orig_inode, *second = donor_inode; | 193 | struct inode *first = orig_inode, *second = donor_inode; |
154 | 194 | ||
155 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
156 | |||
157 | /* | 195 | /* |
158 | * Use the inode number to provide the stable locking order instead | 196 | * Use the inode number to provide the stable locking order instead |
159 | * of its address, because the C language doesn't guarantee you can | 197 | * of its address, because the C language doesn't guarantee you can |
@@ -178,8 +216,6 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) | |||
178 | static void | 216 | static void |
179 | mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) | 217 | mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) |
180 | { | 218 | { |
181 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
182 | |||
183 | up_read(&EXT4_I(orig_inode)->i_data_sem); | 219 | up_read(&EXT4_I(orig_inode)->i_data_sem); |
184 | up_read(&EXT4_I(donor_inode)->i_data_sem); | 220 | up_read(&EXT4_I(donor_inode)->i_data_sem); |
185 | } | 221 | } |
@@ -194,8 +230,6 @@ mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) | |||
194 | static void | 230 | static void |
195 | mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) | 231 | mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) |
196 | { | 232 | { |
197 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
198 | |||
199 | up_write(&EXT4_I(orig_inode)->i_data_sem); | 233 | up_write(&EXT4_I(orig_inode)->i_data_sem); |
200 | up_write(&EXT4_I(donor_inode)->i_data_sem); | 234 | up_write(&EXT4_I(donor_inode)->i_data_sem); |
201 | } | 235 | } |
@@ -283,8 +317,8 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
283 | } | 317 | } |
284 | 318 | ||
285 | if (new_flag) { | 319 | if (new_flag) { |
286 | get_ext_path(orig_path, orig_inode, eblock, err); | 320 | err = get_ext_path(orig_inode, eblock, &orig_path); |
287 | if (orig_path == NULL) | 321 | if (err) |
288 | goto out; | 322 | goto out; |
289 | 323 | ||
290 | if (ext4_ext_insert_extent(handle, orig_inode, | 324 | if (ext4_ext_insert_extent(handle, orig_inode, |
@@ -293,9 +327,9 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
293 | } | 327 | } |
294 | 328 | ||
295 | if (end_flag) { | 329 | if (end_flag) { |
296 | get_ext_path(orig_path, orig_inode, | 330 | err = get_ext_path(orig_inode, |
297 | le32_to_cpu(end_ext->ee_block) - 1, err); | 331 | le32_to_cpu(end_ext->ee_block) - 1, &orig_path); |
298 | if (orig_path == NULL) | 332 | if (err) |
299 | goto out; | 333 | goto out; |
300 | 334 | ||
301 | if (ext4_ext_insert_extent(handle, orig_inode, | 335 | if (ext4_ext_insert_extent(handle, orig_inode, |
@@ -519,7 +553,15 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
519 | * oext |-----------| | 553 | * oext |-----------| |
520 | * new_ext |-------| | 554 | * new_ext |-------| |
521 | */ | 555 | */ |
522 | BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end); | 556 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { |
557 | ext4_error(orig_inode->i_sb, __func__, | ||
558 | "new_ext_end(%u) should be less than or equal to " | ||
559 | "oext->ee_block(%u) + oext_alen(%d) - 1", | ||
560 | new_ext_end, le32_to_cpu(oext->ee_block), | ||
561 | oext_alen); | ||
562 | ret = -EIO; | ||
563 | goto out; | ||
564 | } | ||
523 | 565 | ||
524 | /* | 566 | /* |
525 | * Case: new_ext is smaller than original extent | 567 | * Case: new_ext is smaller than original extent |
@@ -543,6 +585,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
543 | 585 | ||
544 | ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, | 586 | ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, |
545 | o_end, &start_ext, &new_ext, &end_ext); | 587 | o_end, &start_ext, &new_ext, &end_ext); |
588 | out: | ||
546 | return ret; | 589 | return ret; |
547 | } | 590 | } |
548 | 591 | ||
@@ -554,8 +597,10 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
554 | * @orig_off: block offset of original inode | 597 | * @orig_off: block offset of original inode |
555 | * @donor_off: block offset of donor inode | 598 | * @donor_off: block offset of donor inode |
556 | * @max_count: the maximun length of extents | 599 | * @max_count: the maximun length of extents |
600 | * | ||
601 | * Return 0 on success, or a negative error value on failure. | ||
557 | */ | 602 | */ |
558 | static void | 603 | static int |
559 | mext_calc_swap_extents(struct ext4_extent *tmp_dext, | 604 | mext_calc_swap_extents(struct ext4_extent *tmp_dext, |
560 | struct ext4_extent *tmp_oext, | 605 | struct ext4_extent *tmp_oext, |
561 | ext4_lblk_t orig_off, ext4_lblk_t donor_off, | 606 | ext4_lblk_t orig_off, ext4_lblk_t donor_off, |
@@ -564,6 +609,19 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
564 | ext4_lblk_t diff, orig_diff; | 609 | ext4_lblk_t diff, orig_diff; |
565 | struct ext4_extent dext_old, oext_old; | 610 | struct ext4_extent dext_old, oext_old; |
566 | 611 | ||
612 | BUG_ON(orig_off != donor_off); | ||
613 | |||
614 | /* original and donor extents have to cover the same block offset */ | ||
615 | if (orig_off < le32_to_cpu(tmp_oext->ee_block) || | ||
616 | le32_to_cpu(tmp_oext->ee_block) + | ||
617 | ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off) | ||
618 | return -ENODATA; | ||
619 | |||
620 | if (orig_off < le32_to_cpu(tmp_dext->ee_block) || | ||
621 | le32_to_cpu(tmp_dext->ee_block) + | ||
622 | ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off) | ||
623 | return -ENODATA; | ||
624 | |||
567 | dext_old = *tmp_dext; | 625 | dext_old = *tmp_dext; |
568 | oext_old = *tmp_oext; | 626 | oext_old = *tmp_oext; |
569 | 627 | ||
@@ -591,6 +649,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
591 | 649 | ||
592 | copy_extent_status(&oext_old, tmp_dext); | 650 | copy_extent_status(&oext_old, tmp_dext); |
593 | copy_extent_status(&dext_old, tmp_oext); | 651 | copy_extent_status(&dext_old, tmp_oext); |
652 | |||
653 | return 0; | ||
594 | } | 654 | } |
595 | 655 | ||
596 | /** | 656 | /** |
@@ -631,13 +691,13 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
631 | mext_double_down_write(orig_inode, donor_inode); | 691 | mext_double_down_write(orig_inode, donor_inode); |
632 | 692 | ||
633 | /* Get the original extent for the block "orig_off" */ | 693 | /* Get the original extent for the block "orig_off" */ |
634 | get_ext_path(orig_path, orig_inode, orig_off, err); | 694 | err = get_ext_path(orig_inode, orig_off, &orig_path); |
635 | if (orig_path == NULL) | 695 | if (err) |
636 | goto out; | 696 | goto out; |
637 | 697 | ||
638 | /* Get the donor extent for the head */ | 698 | /* Get the donor extent for the head */ |
639 | get_ext_path(donor_path, donor_inode, donor_off, err); | 699 | err = get_ext_path(donor_inode, donor_off, &donor_path); |
640 | if (donor_path == NULL) | 700 | if (err) |
641 | goto out; | 701 | goto out; |
642 | depth = ext_depth(orig_inode); | 702 | depth = ext_depth(orig_inode); |
643 | oext = orig_path[depth].p_ext; | 703 | oext = orig_path[depth].p_ext; |
@@ -647,13 +707,28 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
647 | dext = donor_path[depth].p_ext; | 707 | dext = donor_path[depth].p_ext; |
648 | tmp_dext = *dext; | 708 | tmp_dext = *dext; |
649 | 709 | ||
650 | mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | 710 | err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, |
651 | donor_off, count); | 711 | donor_off, count); |
712 | if (err) | ||
713 | goto out; | ||
652 | 714 | ||
653 | /* Loop for the donor extents */ | 715 | /* Loop for the donor extents */ |
654 | while (1) { | 716 | while (1) { |
655 | /* The extent for donor must be found. */ | 717 | /* The extent for donor must be found. */ |
656 | BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block)); | 718 | if (!dext) { |
719 | ext4_error(donor_inode->i_sb, __func__, | ||
720 | "The extent for donor must be found"); | ||
721 | err = -EIO; | ||
722 | goto out; | ||
723 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { | ||
724 | ext4_error(donor_inode->i_sb, __func__, | ||
725 | "Donor offset(%u) and the first block of donor " | ||
726 | "extent(%u) should be equal", | ||
727 | donor_off, | ||
728 | le32_to_cpu(tmp_dext.ee_block)); | ||
729 | err = -EIO; | ||
730 | goto out; | ||
731 | } | ||
657 | 732 | ||
658 | /* Set donor extent to orig extent */ | 733 | /* Set donor extent to orig extent */ |
659 | err = mext_leaf_block(handle, orig_inode, | 734 | err = mext_leaf_block(handle, orig_inode, |
@@ -678,8 +753,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
678 | 753 | ||
679 | if (orig_path) | 754 | if (orig_path) |
680 | ext4_ext_drop_refs(orig_path); | 755 | ext4_ext_drop_refs(orig_path); |
681 | get_ext_path(orig_path, orig_inode, orig_off, err); | 756 | err = get_ext_path(orig_inode, orig_off, &orig_path); |
682 | if (orig_path == NULL) | 757 | if (err) |
683 | goto out; | 758 | goto out; |
684 | depth = ext_depth(orig_inode); | 759 | depth = ext_depth(orig_inode); |
685 | oext = orig_path[depth].p_ext; | 760 | oext = orig_path[depth].p_ext; |
@@ -692,9 +767,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
692 | 767 | ||
693 | if (donor_path) | 768 | if (donor_path) |
694 | ext4_ext_drop_refs(donor_path); | 769 | ext4_ext_drop_refs(donor_path); |
695 | get_ext_path(donor_path, donor_inode, | 770 | err = get_ext_path(donor_inode, donor_off, &donor_path); |
696 | donor_off, err); | 771 | if (err) |
697 | if (donor_path == NULL) | ||
698 | goto out; | 772 | goto out; |
699 | depth = ext_depth(donor_inode); | 773 | depth = ext_depth(donor_inode); |
700 | dext = donor_path[depth].p_ext; | 774 | dext = donor_path[depth].p_ext; |
@@ -705,9 +779,10 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
705 | } | 779 | } |
706 | tmp_dext = *dext; | 780 | tmp_dext = *dext; |
707 | 781 | ||
708 | mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | 782 | err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, |
709 | donor_off, | 783 | donor_off, count - replaced_count); |
710 | count - replaced_count); | 784 | if (err) |
785 | goto out; | ||
711 | } | 786 | } |
712 | 787 | ||
713 | out: | 788 | out: |
@@ -740,7 +815,7 @@ out: | |||
740 | * on success, or a negative error value on failure. | 815 | * on success, or a negative error value on failure. |
741 | */ | 816 | */ |
742 | static int | 817 | static int |
743 | move_extent_par_page(struct file *o_filp, struct inode *donor_inode, | 818 | move_extent_per_page(struct file *o_filp, struct inode *donor_inode, |
744 | pgoff_t orig_page_offset, int data_offset_in_page, | 819 | pgoff_t orig_page_offset, int data_offset_in_page, |
745 | int block_len_in_page, int uninit) | 820 | int block_len_in_page, int uninit) |
746 | { | 821 | { |
@@ -871,6 +946,7 @@ out: | |||
871 | if (PageLocked(page)) | 946 | if (PageLocked(page)) |
872 | unlock_page(page); | 947 | unlock_page(page); |
873 | page_cache_release(page); | 948 | page_cache_release(page); |
949 | ext4_journal_stop(handle); | ||
874 | } | 950 | } |
875 | out2: | 951 | out2: |
876 | ext4_journal_stop(handle); | 952 | ext4_journal_stop(handle); |
@@ -897,6 +973,10 @@ mext_check_arguments(struct inode *orig_inode, | |||
897 | struct inode *donor_inode, __u64 orig_start, | 973 | struct inode *donor_inode, __u64 orig_start, |
898 | __u64 donor_start, __u64 *len, __u64 moved_len) | 974 | __u64 donor_start, __u64 *len, __u64 moved_len) |
899 | { | 975 | { |
976 | ext4_lblk_t orig_blocks, donor_blocks; | ||
977 | unsigned int blkbits = orig_inode->i_blkbits; | ||
978 | unsigned int blocksize = 1 << blkbits; | ||
979 | |||
900 | /* Regular file check */ | 980 | /* Regular file check */ |
901 | if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { | 981 | if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { |
902 | ext4_debug("ext4 move extent: The argument files should be " | 982 | ext4_debug("ext4 move extent: The argument files should be " |
@@ -960,54 +1040,58 @@ mext_check_arguments(struct inode *orig_inode, | |||
960 | return -EINVAL; | 1040 | return -EINVAL; |
961 | } | 1041 | } |
962 | 1042 | ||
963 | if ((orig_start > MAX_DEFRAG_SIZE) || | 1043 | if ((orig_start > EXT_MAX_BLOCK) || |
964 | (donor_start > MAX_DEFRAG_SIZE) || | 1044 | (donor_start > EXT_MAX_BLOCK) || |
965 | (*len > MAX_DEFRAG_SIZE) || | 1045 | (*len > EXT_MAX_BLOCK) || |
966 | (orig_start + *len > MAX_DEFRAG_SIZE)) { | 1046 | (orig_start + *len > EXT_MAX_BLOCK)) { |
967 | ext4_debug("ext4 move extent: Can't handle over [%lu] blocks " | 1047 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " |
968 | "[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE, | 1048 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK, |
969 | orig_inode->i_ino, donor_inode->i_ino); | 1049 | orig_inode->i_ino, donor_inode->i_ino); |
970 | return -EINVAL; | 1050 | return -EINVAL; |
971 | } | 1051 | } |
972 | 1052 | ||
973 | if (orig_inode->i_size > donor_inode->i_size) { | 1053 | if (orig_inode->i_size > donor_inode->i_size) { |
974 | if (orig_start >= donor_inode->i_size) { | 1054 | donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits; |
1055 | /* TODO: eliminate this artificial restriction */ | ||
1056 | if (orig_start >= donor_blocks) { | ||
975 | ext4_debug("ext4 move extent: orig start offset " | 1057 | ext4_debug("ext4 move extent: orig start offset " |
976 | "[%llu] should be less than donor file size " | 1058 | "[%llu] should be less than donor file blocks " |
977 | "[%lld] [ino:orig %lu, donor_inode %lu]\n", | 1059 | "[%u] [ino:orig %lu, donor %lu]\n", |
978 | orig_start, donor_inode->i_size, | 1060 | orig_start, donor_blocks, |
979 | orig_inode->i_ino, donor_inode->i_ino); | 1061 | orig_inode->i_ino, donor_inode->i_ino); |
980 | return -EINVAL; | 1062 | return -EINVAL; |
981 | } | 1063 | } |
982 | 1064 | ||
983 | if (orig_start + *len > donor_inode->i_size) { | 1065 | /* TODO: eliminate this artificial restriction */ |
1066 | if (orig_start + *len > donor_blocks) { | ||
984 | ext4_debug("ext4 move extent: End offset [%llu] should " | 1067 | ext4_debug("ext4 move extent: End offset [%llu] should " |
985 | "be less than donor file size [%lld]." | 1068 | "be less than donor file blocks [%u]." |
986 | "So adjust length from %llu to %lld " | 1069 | "So adjust length from %llu to %llu " |
987 | "[ino:orig %lu, donor %lu]\n", | 1070 | "[ino:orig %lu, donor %lu]\n", |
988 | orig_start + *len, donor_inode->i_size, | 1071 | orig_start + *len, donor_blocks, |
989 | *len, donor_inode->i_size - orig_start, | 1072 | *len, donor_blocks - orig_start, |
990 | orig_inode->i_ino, donor_inode->i_ino); | 1073 | orig_inode->i_ino, donor_inode->i_ino); |
991 | *len = donor_inode->i_size - orig_start; | 1074 | *len = donor_blocks - orig_start; |
992 | } | 1075 | } |
993 | } else { | 1076 | } else { |
994 | if (orig_start >= orig_inode->i_size) { | 1077 | orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits; |
1078 | if (orig_start >= orig_blocks) { | ||
995 | ext4_debug("ext4 move extent: start offset [%llu] " | 1079 | ext4_debug("ext4 move extent: start offset [%llu] " |
996 | "should be less than original file size " | 1080 | "should be less than original file blocks " |
997 | "[%lld] [inode:orig %lu, donor %lu]\n", | 1081 | "[%u] [ino:orig %lu, donor %lu]\n", |
998 | orig_start, orig_inode->i_size, | 1082 | orig_start, orig_blocks, |
999 | orig_inode->i_ino, donor_inode->i_ino); | 1083 | orig_inode->i_ino, donor_inode->i_ino); |
1000 | return -EINVAL; | 1084 | return -EINVAL; |
1001 | } | 1085 | } |
1002 | 1086 | ||
1003 | if (orig_start + *len > orig_inode->i_size) { | 1087 | if (orig_start + *len > orig_blocks) { |
1004 | ext4_debug("ext4 move extent: Adjust length " | 1088 | ext4_debug("ext4 move extent: Adjust length " |
1005 | "from %llu to %lld. Because it should be " | 1089 | "from %llu to %llu. Because it should be " |
1006 | "less than original file size " | 1090 | "less than original file blocks " |
1007 | "[ino:orig %lu, donor %lu]\n", | 1091 | "[ino:orig %lu, donor %lu]\n", |
1008 | *len, orig_inode->i_size - orig_start, | 1092 | *len, orig_blocks - orig_start, |
1009 | orig_inode->i_ino, donor_inode->i_ino); | 1093 | orig_inode->i_ino, donor_inode->i_ino); |
1010 | *len = orig_inode->i_size - orig_start; | 1094 | *len = orig_blocks - orig_start; |
1011 | } | 1095 | } |
1012 | } | 1096 | } |
1013 | 1097 | ||
@@ -1027,18 +1111,23 @@ mext_check_arguments(struct inode *orig_inode, | |||
1027 | * @inode1: the inode structure | 1111 | * @inode1: the inode structure |
1028 | * @inode2: the inode structure | 1112 | * @inode2: the inode structure |
1029 | * | 1113 | * |
1030 | * Lock two inodes' i_mutex by i_ino order. This function is moved from | 1114 | * Lock two inodes' i_mutex by i_ino order. |
1031 | * fs/inode.c. | 1115 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. |
1032 | */ | 1116 | */ |
1033 | static void | 1117 | static int |
1034 | mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | 1118 | mext_inode_double_lock(struct inode *inode1, struct inode *inode2) |
1035 | { | 1119 | { |
1036 | if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { | 1120 | int ret = 0; |
1037 | if (inode1) | 1121 | |
1038 | mutex_lock(&inode1->i_mutex); | 1122 | BUG_ON(inode1 == NULL && inode2 == NULL); |
1039 | else if (inode2) | 1123 | |
1040 | mutex_lock(&inode2->i_mutex); | 1124 | ret = mext_check_null_inode(inode1, inode2, __func__); |
1041 | return; | 1125 | if (ret < 0) |
1126 | goto out; | ||
1127 | |||
1128 | if (inode1 == inode2) { | ||
1129 | mutex_lock(&inode1->i_mutex); | ||
1130 | goto out; | ||
1042 | } | 1131 | } |
1043 | 1132 | ||
1044 | if (inode1->i_ino < inode2->i_ino) { | 1133 | if (inode1->i_ino < inode2->i_ino) { |
@@ -1048,6 +1137,9 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | |||
1048 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); | 1137 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); |
1049 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); | 1138 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); |
1050 | } | 1139 | } |
1140 | |||
1141 | out: | ||
1142 | return ret; | ||
1051 | } | 1143 | } |
1052 | 1144 | ||
1053 | /** | 1145 | /** |
@@ -1056,17 +1148,28 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | |||
1056 | * @inode1: the inode that is released first | 1148 | * @inode1: the inode that is released first |
1057 | * @inode2: the inode that is released second | 1149 | * @inode2: the inode that is released second |
1058 | * | 1150 | * |
1059 | * This function is moved from fs/inode.c. | 1151 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. |
1060 | */ | 1152 | */ |
1061 | 1153 | ||
1062 | static void | 1154 | static int |
1063 | mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) | 1155 | mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) |
1064 | { | 1156 | { |
1157 | int ret = 0; | ||
1158 | |||
1159 | BUG_ON(inode1 == NULL && inode2 == NULL); | ||
1160 | |||
1161 | ret = mext_check_null_inode(inode1, inode2, __func__); | ||
1162 | if (ret < 0) | ||
1163 | goto out; | ||
1164 | |||
1065 | if (inode1) | 1165 | if (inode1) |
1066 | mutex_unlock(&inode1->i_mutex); | 1166 | mutex_unlock(&inode1->i_mutex); |
1067 | 1167 | ||
1068 | if (inode2 && inode2 != inode1) | 1168 | if (inode2 && inode2 != inode1) |
1069 | mutex_unlock(&inode2->i_mutex); | 1169 | mutex_unlock(&inode2->i_mutex); |
1170 | |||
1171 | out: | ||
1172 | return ret; | ||
1070 | } | 1173 | } |
1071 | 1174 | ||
1072 | /** | 1175 | /** |
@@ -1123,70 +1226,76 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1123 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; | 1226 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; |
1124 | ext4_lblk_t rest_blocks; | 1227 | ext4_lblk_t rest_blocks; |
1125 | pgoff_t orig_page_offset = 0, seq_end_page; | 1228 | pgoff_t orig_page_offset = 0, seq_end_page; |
1126 | int ret, depth, last_extent = 0; | 1229 | int ret1, ret2, depth, last_extent = 0; |
1127 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 1230 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
1128 | int data_offset_in_page; | 1231 | int data_offset_in_page; |
1129 | int block_len_in_page; | 1232 | int block_len_in_page; |
1130 | int uninit; | 1233 | int uninit; |
1131 | 1234 | ||
1132 | /* protect orig and donor against a truncate */ | 1235 | /* protect orig and donor against a truncate */ |
1133 | mext_inode_double_lock(orig_inode, donor_inode); | 1236 | ret1 = mext_inode_double_lock(orig_inode, donor_inode); |
1237 | if (ret1 < 0) | ||
1238 | return ret1; | ||
1134 | 1239 | ||
1135 | mext_double_down_read(orig_inode, donor_inode); | 1240 | mext_double_down_read(orig_inode, donor_inode); |
1136 | /* Check the filesystem environment whether move_extent can be done */ | 1241 | /* Check the filesystem environment whether move_extent can be done */ |
1137 | ret = mext_check_arguments(orig_inode, donor_inode, orig_start, | 1242 | ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, |
1138 | donor_start, &len, *moved_len); | 1243 | donor_start, &len, *moved_len); |
1139 | mext_double_up_read(orig_inode, donor_inode); | 1244 | mext_double_up_read(orig_inode, donor_inode); |
1140 | if (ret) | 1245 | if (ret1) |
1141 | goto out2; | 1246 | goto out; |
1142 | 1247 | ||
1143 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; | 1248 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; |
1144 | block_end = block_start + len - 1; | 1249 | block_end = block_start + len - 1; |
1145 | if (file_end < block_end) | 1250 | if (file_end < block_end) |
1146 | len -= block_end - file_end; | 1251 | len -= block_end - file_end; |
1147 | 1252 | ||
1148 | get_ext_path(orig_path, orig_inode, block_start, ret); | 1253 | ret1 = get_ext_path(orig_inode, block_start, &orig_path); |
1149 | if (orig_path == NULL) | 1254 | if (ret1) |
1150 | goto out2; | 1255 | goto out; |
1151 | 1256 | ||
1152 | /* Get path structure to check the hole */ | 1257 | /* Get path structure to check the hole */ |
1153 | get_ext_path(holecheck_path, orig_inode, block_start, ret); | 1258 | ret1 = get_ext_path(orig_inode, block_start, &holecheck_path); |
1154 | if (holecheck_path == NULL) | 1259 | if (ret1) |
1155 | goto out; | 1260 | goto out; |
1156 | 1261 | ||
1157 | depth = ext_depth(orig_inode); | 1262 | depth = ext_depth(orig_inode); |
1158 | ext_cur = holecheck_path[depth].p_ext; | 1263 | ext_cur = holecheck_path[depth].p_ext; |
1159 | if (ext_cur == NULL) { | ||
1160 | ret = -EINVAL; | ||
1161 | goto out; | ||
1162 | } | ||
1163 | 1264 | ||
1164 | /* | 1265 | /* |
1165 | * Get proper extent whose ee_block is beyond block_start | 1266 | * Get proper starting location of block replacement if block_start was |
1166 | * if block_start was within the hole. | 1267 | * within the hole. |
1167 | */ | 1268 | */ |
1168 | if (le32_to_cpu(ext_cur->ee_block) + | 1269 | if (le32_to_cpu(ext_cur->ee_block) + |
1169 | ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { | 1270 | ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { |
1271 | /* | ||
1272 | * The hole exists between extents or the tail of | ||
1273 | * original file. | ||
1274 | */ | ||
1170 | last_extent = mext_next_extent(orig_inode, | 1275 | last_extent = mext_next_extent(orig_inode, |
1171 | holecheck_path, &ext_cur); | 1276 | holecheck_path, &ext_cur); |
1172 | if (last_extent < 0) { | 1277 | if (last_extent < 0) { |
1173 | ret = last_extent; | 1278 | ret1 = last_extent; |
1174 | goto out; | 1279 | goto out; |
1175 | } | 1280 | } |
1176 | last_extent = mext_next_extent(orig_inode, orig_path, | 1281 | last_extent = mext_next_extent(orig_inode, orig_path, |
1177 | &ext_dummy); | 1282 | &ext_dummy); |
1178 | if (last_extent < 0) { | 1283 | if (last_extent < 0) { |
1179 | ret = last_extent; | 1284 | ret1 = last_extent; |
1180 | goto out; | 1285 | goto out; |
1181 | } | 1286 | } |
1182 | } | 1287 | seq_start = le32_to_cpu(ext_cur->ee_block); |
1183 | seq_start = block_start; | 1288 | } else if (le32_to_cpu(ext_cur->ee_block) > block_start) |
1289 | /* The hole exists at the beginning of original file. */ | ||
1290 | seq_start = le32_to_cpu(ext_cur->ee_block); | ||
1291 | else | ||
1292 | seq_start = block_start; | ||
1184 | 1293 | ||
1185 | /* No blocks within the specified range. */ | 1294 | /* No blocks within the specified range. */ |
1186 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { | 1295 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { |
1187 | ext4_debug("ext4 move extent: The specified range of file " | 1296 | ext4_debug("ext4 move extent: The specified range of file " |
1188 | "may be the hole\n"); | 1297 | "may be the hole\n"); |
1189 | ret = -EINVAL; | 1298 | ret1 = -EINVAL; |
1190 | goto out; | 1299 | goto out; |
1191 | } | 1300 | } |
1192 | 1301 | ||
@@ -1206,7 +1315,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1206 | last_extent = mext_next_extent(orig_inode, holecheck_path, | 1315 | last_extent = mext_next_extent(orig_inode, holecheck_path, |
1207 | &ext_cur); | 1316 | &ext_cur); |
1208 | if (last_extent < 0) { | 1317 | if (last_extent < 0) { |
1209 | ret = last_extent; | 1318 | ret1 = last_extent; |
1210 | break; | 1319 | break; |
1211 | } | 1320 | } |
1212 | add_blocks = ext4_ext_get_actual_len(ext_cur); | 1321 | add_blocks = ext4_ext_get_actual_len(ext_cur); |
@@ -1258,16 +1367,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1258 | while (orig_page_offset <= seq_end_page) { | 1367 | while (orig_page_offset <= seq_end_page) { |
1259 | 1368 | ||
1260 | /* Swap original branches with new branches */ | 1369 | /* Swap original branches with new branches */ |
1261 | ret = move_extent_par_page(o_filp, donor_inode, | 1370 | ret1 = move_extent_per_page(o_filp, donor_inode, |
1262 | orig_page_offset, | 1371 | orig_page_offset, |
1263 | data_offset_in_page, | 1372 | data_offset_in_page, |
1264 | block_len_in_page, uninit); | 1373 | block_len_in_page, uninit); |
1265 | if (ret < 0) | 1374 | if (ret1 < 0) |
1266 | goto out; | 1375 | goto out; |
1267 | orig_page_offset++; | 1376 | orig_page_offset++; |
1268 | /* Count how many blocks we have exchanged */ | 1377 | /* Count how many blocks we have exchanged */ |
1269 | *moved_len += block_len_in_page; | 1378 | *moved_len += block_len_in_page; |
1270 | BUG_ON(*moved_len > len); | 1379 | if (*moved_len > len) { |
1380 | ext4_error(orig_inode->i_sb, __func__, | ||
1381 | "We replaced blocks too much! " | ||
1382 | "sum of replaced: %llu requested: %llu", | ||
1383 | *moved_len, len); | ||
1384 | ret1 = -EIO; | ||
1385 | goto out; | ||
1386 | } | ||
1271 | 1387 | ||
1272 | data_offset_in_page = 0; | 1388 | data_offset_in_page = 0; |
1273 | rest_blocks -= block_len_in_page; | 1389 | rest_blocks -= block_len_in_page; |
@@ -1280,17 +1396,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1280 | /* Decrease buffer counter */ | 1396 | /* Decrease buffer counter */ |
1281 | if (holecheck_path) | 1397 | if (holecheck_path) |
1282 | ext4_ext_drop_refs(holecheck_path); | 1398 | ext4_ext_drop_refs(holecheck_path); |
1283 | get_ext_path(holecheck_path, orig_inode, | 1399 | ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path); |
1284 | seq_start, ret); | 1400 | if (ret1) |
1285 | if (holecheck_path == NULL) | ||
1286 | break; | 1401 | break; |
1287 | depth = holecheck_path->p_depth; | 1402 | depth = holecheck_path->p_depth; |
1288 | 1403 | ||
1289 | /* Decrease buffer counter */ | 1404 | /* Decrease buffer counter */ |
1290 | if (orig_path) | 1405 | if (orig_path) |
1291 | ext4_ext_drop_refs(orig_path); | 1406 | ext4_ext_drop_refs(orig_path); |
1292 | get_ext_path(orig_path, orig_inode, seq_start, ret); | 1407 | ret1 = get_ext_path(orig_inode, seq_start, &orig_path); |
1293 | if (orig_path == NULL) | 1408 | if (ret1) |
1294 | break; | 1409 | break; |
1295 | 1410 | ||
1296 | ext_cur = holecheck_path[depth].p_ext; | 1411 | ext_cur = holecheck_path[depth].p_ext; |
@@ -1307,14 +1422,13 @@ out: | |||
1307 | ext4_ext_drop_refs(holecheck_path); | 1422 | ext4_ext_drop_refs(holecheck_path); |
1308 | kfree(holecheck_path); | 1423 | kfree(holecheck_path); |
1309 | } | 1424 | } |
1310 | out2: | ||
1311 | mext_inode_double_unlock(orig_inode, donor_inode); | ||
1312 | 1425 | ||
1313 | if (ret) | 1426 | ret2 = mext_inode_double_unlock(orig_inode, donor_inode); |
1314 | return ret; | ||
1315 | 1427 | ||
1316 | /* All of the specified blocks must be exchanged in succeed */ | 1428 | if (ret1) |
1317 | BUG_ON(*moved_len != len); | 1429 | return ret1; |
1430 | else if (ret2) | ||
1431 | return ret2; | ||
1318 | 1432 | ||
1319 | return 0; | 1433 | return 0; |
1320 | } | 1434 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index de04013d16ff..42f81d285cd5 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -1518,8 +1518,12 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1518 | return retval; | 1518 | return retval; |
1519 | 1519 | ||
1520 | if (blocks == 1 && !dx_fallback && | 1520 | if (blocks == 1 && !dx_fallback && |
1521 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) | 1521 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { |
1522 | return make_indexed_dir(handle, dentry, inode, bh); | 1522 | retval = make_indexed_dir(handle, dentry, inode, bh); |
1523 | if (retval == -ENOSPC) | ||
1524 | brelse(bh); | ||
1525 | return retval; | ||
1526 | } | ||
1523 | brelse(bh); | 1527 | brelse(bh); |
1524 | } | 1528 | } |
1525 | bh = ext4_append(handle, dir, &block, &retval); | 1529 | bh = ext4_append(handle, dir, &block, &retval); |
@@ -1528,7 +1532,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1528 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1532 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1529 | de->inode = 0; | 1533 | de->inode = 0; |
1530 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); | 1534 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); |
1531 | return add_dirent_to_buf(handle, dentry, inode, de, bh); | 1535 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1536 | if (retval == -ENOSPC) | ||
1537 | brelse(bh); | ||
1538 | return retval; | ||
1532 | } | 1539 | } |
1533 | 1540 | ||
1534 | /* | 1541 | /* |
@@ -1590,9 +1597,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1590 | goto cleanup; | 1597 | goto cleanup; |
1591 | node2 = (struct dx_node *)(bh2->b_data); | 1598 | node2 = (struct dx_node *)(bh2->b_data); |
1592 | entries2 = node2->entries; | 1599 | entries2 = node2->entries; |
1600 | memset(&node2->fake, 0, sizeof(struct fake_dirent)); | ||
1593 | node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize, | 1601 | node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize, |
1594 | sb->s_blocksize); | 1602 | sb->s_blocksize); |
1595 | node2->fake.inode = 0; | ||
1596 | BUFFER_TRACE(frame->bh, "get_write_access"); | 1603 | BUFFER_TRACE(frame->bh, "get_write_access"); |
1597 | err = ext4_journal_get_write_access(handle, frame->bh); | 1604 | err = ext4_journal_get_write_access(handle, frame->bh); |
1598 | if (err) | 1605 | if (err) |
@@ -1657,7 +1664,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1657 | if (!de) | 1664 | if (!de) |
1658 | goto cleanup; | 1665 | goto cleanup; |
1659 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); | 1666 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1660 | bh = NULL; | 1667 | if (err != -ENOSPC) |
1668 | bh = NULL; | ||
1661 | goto cleanup; | 1669 | goto cleanup; |
1662 | 1670 | ||
1663 | journal_error: | 1671 | journal_error: |
@@ -2310,7 +2318,7 @@ static int ext4_link(struct dentry *old_dentry, | |||
2310 | struct inode *inode = old_dentry->d_inode; | 2318 | struct inode *inode = old_dentry->d_inode; |
2311 | int err, retries = 0; | 2319 | int err, retries = 0; |
2312 | 2320 | ||
2313 | if (EXT4_DIR_LINK_MAX(inode)) | 2321 | if (inode->i_nlink >= EXT4_LINK_MAX) |
2314 | return -EMLINK; | 2322 | return -EMLINK; |
2315 | 2323 | ||
2316 | /* | 2324 | /* |
@@ -2413,7 +2421,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2413 | goto end_rename; | 2421 | goto end_rename; |
2414 | retval = -EMLINK; | 2422 | retval = -EMLINK; |
2415 | if (!new_inode && new_dir != old_dir && | 2423 | if (!new_inode && new_dir != old_dir && |
2416 | new_dir->i_nlink >= EXT4_LINK_MAX) | 2424 | EXT4_DIR_LINK_MAX(new_dir)) |
2417 | goto end_rename; | 2425 | goto end_rename; |
2418 | } | 2426 | } |
2419 | if (!new_bh) { | 2427 | if (!new_bh) { |
@@ -2536,7 +2544,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
2536 | .listxattr = ext4_listxattr, | 2544 | .listxattr = ext4_listxattr, |
2537 | .removexattr = generic_removexattr, | 2545 | .removexattr = generic_removexattr, |
2538 | #endif | 2546 | #endif |
2539 | .permission = ext4_permission, | 2547 | .check_acl = ext4_check_acl, |
2540 | .fiemap = ext4_fiemap, | 2548 | .fiemap = ext4_fiemap, |
2541 | }; | 2549 | }; |
2542 | 2550 | ||
@@ -2548,5 +2556,5 @@ const struct inode_operations ext4_special_inode_operations = { | |||
2548 | .listxattr = ext4_listxattr, | 2556 | .listxattr = ext4_listxattr, |
2549 | .removexattr = generic_removexattr, | 2557 | .removexattr = generic_removexattr, |
2550 | #endif | 2558 | #endif |
2551 | .permission = ext4_permission, | 2559 | .check_acl = ext4_check_acl, |
2552 | }; | 2560 | }; |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 68b0351fc647..3cfc343c41b5 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -746,7 +746,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
746 | struct inode *inode = NULL; | 746 | struct inode *inode = NULL; |
747 | handle_t *handle; | 747 | handle_t *handle; |
748 | int gdb_off, gdb_num; | 748 | int gdb_off, gdb_num; |
749 | int num_grp_locked = 0; | ||
750 | int err, err2; | 749 | int err, err2; |
751 | 750 | ||
752 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); | 751 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); |
@@ -856,7 +855,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
856 | * using the new disk blocks. | 855 | * using the new disk blocks. |
857 | */ | 856 | */ |
858 | 857 | ||
859 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group); | ||
860 | /* Update group descriptor block for new group */ | 858 | /* Update group descriptor block for new group */ |
861 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + | 859 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + |
862 | gdb_off * EXT4_DESC_SIZE(sb)); | 860 | gdb_off * EXT4_DESC_SIZE(sb)); |
@@ -875,10 +873,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
875 | * descriptor | 873 | * descriptor |
876 | */ | 874 | */ |
877 | err = ext4_mb_add_groupinfo(sb, input->group, gdp); | 875 | err = ext4_mb_add_groupinfo(sb, input->group, gdp); |
878 | if (err) { | 876 | if (err) |
879 | ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); | ||
880 | goto exit_journal; | 877 | goto exit_journal; |
881 | } | ||
882 | 878 | ||
883 | /* | 879 | /* |
884 | * Make the new blocks and inodes valid next. We do this before | 880 | * Make the new blocks and inodes valid next. We do this before |
@@ -920,7 +916,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
920 | 916 | ||
921 | /* Update the global fs size fields */ | 917 | /* Update the global fs size fields */ |
922 | sbi->s_groups_count++; | 918 | sbi->s_groups_count++; |
923 | ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); | ||
924 | 919 | ||
925 | ext4_handle_dirty_metadata(handle, NULL, primary); | 920 | ext4_handle_dirty_metadata(handle, NULL, primary); |
926 | 921 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8f4f079e6b9a..a6b1ab734728 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include "ext4_jbd2.h" | 45 | #include "ext4_jbd2.h" |
46 | #include "xattr.h" | 46 | #include "xattr.h" |
47 | #include "acl.h" | 47 | #include "acl.h" |
48 | #include "mballoc.h" | ||
48 | 49 | ||
49 | #define CREATE_TRACE_POINTS | 50 | #define CREATE_TRACE_POINTS |
50 | #include <trace/events/ext4.h> | 51 | #include <trace/events/ext4.h> |
@@ -344,7 +345,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno, | |||
344 | errstr = "Out of memory"; | 345 | errstr = "Out of memory"; |
345 | break; | 346 | break; |
346 | case -EROFS: | 347 | case -EROFS: |
347 | if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) | 348 | if (!sb || (EXT4_SB(sb)->s_journal && |
349 | EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) | ||
348 | errstr = "Journal has aborted"; | 350 | errstr = "Journal has aborted"; |
349 | else | 351 | else |
350 | errstr = "Readonly filesystem"; | 352 | errstr = "Readonly filesystem"; |
@@ -1279,11 +1281,9 @@ static int parse_options(char *options, struct super_block *sb, | |||
1279 | *journal_devnum = option; | 1281 | *journal_devnum = option; |
1280 | break; | 1282 | break; |
1281 | case Opt_journal_checksum: | 1283 | case Opt_journal_checksum: |
1282 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | 1284 | break; /* Kept for backwards compatibility */ |
1283 | break; | ||
1284 | case Opt_journal_async_commit: | 1285 | case Opt_journal_async_commit: |
1285 | set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); | 1286 | set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); |
1286 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | ||
1287 | break; | 1287 | break; |
1288 | case Opt_noload: | 1288 | case Opt_noload: |
1289 | set_opt(sbi->s_mount_opt, NOLOAD); | 1289 | set_opt(sbi->s_mount_opt, NOLOAD); |
@@ -1695,12 +1695,12 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1695 | gdp = ext4_get_group_desc(sb, i, NULL); | 1695 | gdp = ext4_get_group_desc(sb, i, NULL); |
1696 | 1696 | ||
1697 | flex_group = ext4_flex_group(sbi, i); | 1697 | flex_group = ext4_flex_group(sbi, i); |
1698 | atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, | 1698 | atomic_add(ext4_free_inodes_count(sb, gdp), |
1699 | ext4_free_inodes_count(sb, gdp)); | 1699 | &sbi->s_flex_groups[flex_group].free_inodes); |
1700 | atomic_set(&sbi->s_flex_groups[flex_group].free_blocks, | 1700 | atomic_add(ext4_free_blks_count(sb, gdp), |
1701 | ext4_free_blks_count(sb, gdp)); | 1701 | &sbi->s_flex_groups[flex_group].free_blocks); |
1702 | atomic_set(&sbi->s_flex_groups[flex_group].used_dirs, | 1702 | atomic_add(ext4_used_dirs_count(sb, gdp), |
1703 | ext4_used_dirs_count(sb, gdp)); | 1703 | &sbi->s_flex_groups[flex_group].used_dirs); |
1704 | } | 1704 | } |
1705 | 1705 | ||
1706 | return 1; | 1706 | return 1; |
@@ -2253,6 +2253,49 @@ static struct kobj_type ext4_ktype = { | |||
2253 | .release = ext4_sb_release, | 2253 | .release = ext4_sb_release, |
2254 | }; | 2254 | }; |
2255 | 2255 | ||
2256 | /* | ||
2257 | * Check whether this filesystem can be mounted based on | ||
2258 | * the features present and the RDONLY/RDWR mount requested. | ||
2259 | * Returns 1 if this filesystem can be mounted as requested, | ||
2260 | * 0 if it cannot be. | ||
2261 | */ | ||
2262 | static int ext4_feature_set_ok(struct super_block *sb, int readonly) | ||
2263 | { | ||
2264 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { | ||
2265 | ext4_msg(sb, KERN_ERR, | ||
2266 | "Couldn't mount because of " | ||
2267 | "unsupported optional features (%x)", | ||
2268 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & | ||
2269 | ~EXT4_FEATURE_INCOMPAT_SUPP)); | ||
2270 | return 0; | ||
2271 | } | ||
2272 | |||
2273 | if (readonly) | ||
2274 | return 1; | ||
2275 | |||
2276 | /* Check that feature set is OK for a read-write mount */ | ||
2277 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { | ||
2278 | ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " | ||
2279 | "unsupported optional features (%x)", | ||
2280 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & | ||
2281 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | ||
2282 | return 0; | ||
2283 | } | ||
2284 | /* | ||
2285 | * Large file size enabled file system can only be mounted | ||
2286 | * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF | ||
2287 | */ | ||
2288 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { | ||
2289 | if (sizeof(blkcnt_t) < sizeof(u64)) { | ||
2290 | ext4_msg(sb, KERN_ERR, "Filesystem with huge files " | ||
2291 | "cannot be mounted RDWR without " | ||
2292 | "CONFIG_LBDAF"); | ||
2293 | return 0; | ||
2294 | } | ||
2295 | } | ||
2296 | return 1; | ||
2297 | } | ||
2298 | |||
2256 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 2299 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2257 | __releases(kernel_lock) | 2300 | __releases(kernel_lock) |
2258 | __acquires(kernel_lock) | 2301 | __acquires(kernel_lock) |
@@ -2274,7 +2317,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2274 | unsigned int db_count; | 2317 | unsigned int db_count; |
2275 | unsigned int i; | 2318 | unsigned int i; |
2276 | int needs_recovery, has_huge_files; | 2319 | int needs_recovery, has_huge_files; |
2277 | int features; | ||
2278 | __u64 blocks_count; | 2320 | __u64 blocks_count; |
2279 | int err; | 2321 | int err; |
2280 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 2322 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
@@ -2401,39 +2443,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2401 | * previously didn't change the revision level when setting the flags, | 2443 | * previously didn't change the revision level when setting the flags, |
2402 | * so there is a chance incompat flags are set on a rev 0 filesystem. | 2444 | * so there is a chance incompat flags are set on a rev 0 filesystem. |
2403 | */ | 2445 | */ |
2404 | features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); | 2446 | if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) |
2405 | if (features) { | ||
2406 | ext4_msg(sb, KERN_ERR, | ||
2407 | "Couldn't mount because of " | ||
2408 | "unsupported optional features (%x)", | ||
2409 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & | ||
2410 | ~EXT4_FEATURE_INCOMPAT_SUPP)); | ||
2411 | goto failed_mount; | ||
2412 | } | ||
2413 | features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); | ||
2414 | if (!(sb->s_flags & MS_RDONLY) && features) { | ||
2415 | ext4_msg(sb, KERN_ERR, | ||
2416 | "Couldn't mount RDWR because of " | ||
2417 | "unsupported optional features (%x)", | ||
2418 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & | ||
2419 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | ||
2420 | goto failed_mount; | 2447 | goto failed_mount; |
2421 | } | 2448 | |
2422 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
2423 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
2424 | if (has_huge_files) { | ||
2425 | /* | ||
2426 | * Large file size enabled file system can only be | ||
2427 | * mount if kernel is build with CONFIG_LBDAF | ||
2428 | */ | ||
2429 | if (sizeof(root->i_blocks) < sizeof(u64) && | ||
2430 | !(sb->s_flags & MS_RDONLY)) { | ||
2431 | ext4_msg(sb, KERN_ERR, "Filesystem with huge " | ||
2432 | "files cannot be mounted read-write " | ||
2433 | "without CONFIG_LBDAF"); | ||
2434 | goto failed_mount; | ||
2435 | } | ||
2436 | } | ||
2437 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); | 2449 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); |
2438 | 2450 | ||
2439 | if (blocksize < EXT4_MIN_BLOCK_SIZE || | 2451 | if (blocksize < EXT4_MIN_BLOCK_SIZE || |
@@ -2469,6 +2481,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2469 | } | 2481 | } |
2470 | } | 2482 | } |
2471 | 2483 | ||
2484 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
2485 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
2472 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, | 2486 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, |
2473 | has_huge_files); | 2487 | has_huge_files); |
2474 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); | 2488 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); |
@@ -2549,12 +2563,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2549 | goto failed_mount; | 2563 | goto failed_mount; |
2550 | } | 2564 | } |
2551 | 2565 | ||
2552 | if (ext4_blocks_count(es) > | 2566 | /* |
2553 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { | 2567 | * Test whether we have more sectors than will fit in sector_t, |
2568 | * and whether the max offset is addressable by the page cache. | ||
2569 | */ | ||
2570 | if ((ext4_blocks_count(es) > | ||
2571 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || | ||
2572 | (ext4_blocks_count(es) > | ||
2573 | (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { | ||
2554 | ext4_msg(sb, KERN_ERR, "filesystem" | 2574 | ext4_msg(sb, KERN_ERR, "filesystem" |
2555 | " too large to mount safely"); | 2575 | " too large to mount safely on this system"); |
2556 | if (sizeof(sector_t) < 8) | 2576 | if (sizeof(sector_t) < 8) |
2557 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); | 2577 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); |
2578 | ret = -EFBIG; | ||
2558 | goto failed_mount; | 2579 | goto failed_mount; |
2559 | } | 2580 | } |
2560 | 2581 | ||
@@ -2595,6 +2616,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2595 | goto failed_mount; | 2616 | goto failed_mount; |
2596 | } | 2617 | } |
2597 | sbi->s_groups_count = blocks_count; | 2618 | sbi->s_groups_count = blocks_count; |
2619 | sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, | ||
2620 | (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); | ||
2598 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / | 2621 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / |
2599 | EXT4_DESC_PER_BLOCK(sb); | 2622 | EXT4_DESC_PER_BLOCK(sb); |
2600 | sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), | 2623 | sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), |
@@ -2729,20 +2752,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2729 | goto failed_mount4; | 2752 | goto failed_mount4; |
2730 | } | 2753 | } |
2731 | 2754 | ||
2732 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | 2755 | jbd2_journal_set_features(sbi->s_journal, |
2733 | jbd2_journal_set_features(sbi->s_journal, | 2756 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); |
2734 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | 2757 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) |
2758 | jbd2_journal_set_features(sbi->s_journal, 0, 0, | ||
2735 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | 2759 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
2736 | } else if (test_opt(sb, JOURNAL_CHECKSUM)) { | 2760 | else |
2737 | jbd2_journal_set_features(sbi->s_journal, | ||
2738 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); | ||
2739 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, | 2761 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, |
2740 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | 2762 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
2741 | } else { | ||
2742 | jbd2_journal_clear_features(sbi->s_journal, | ||
2743 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
2744 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
2745 | } | ||
2746 | 2763 | ||
2747 | /* We have now updated the journal if required, so we can | 2764 | /* We have now updated the journal if required, so we can |
2748 | * validate the data journaling mode. */ | 2765 | * validate the data journaling mode. */ |
@@ -3208,7 +3225,18 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
3208 | clear_buffer_write_io_error(sbh); | 3225 | clear_buffer_write_io_error(sbh); |
3209 | set_buffer_uptodate(sbh); | 3226 | set_buffer_uptodate(sbh); |
3210 | } | 3227 | } |
3211 | es->s_wtime = cpu_to_le32(get_seconds()); | 3228 | /* |
3229 | * If the file system is mounted read-only, don't update the | ||
3230 | * superblock write time. This avoids updating the superblock | ||
3231 | * write time when we are mounting the root file system | ||
3232 | * read/only but we need to replay the journal; at that point, | ||
3233 | * for people who are east of GMT and who make their clock | ||
3234 | * tick in localtime for Windows bug-for-bug compatibility, | ||
3235 | * the clock is set in the future, and this will cause e2fsck | ||
3236 | * to complain and force a full file system check. | ||
3237 | */ | ||
3238 | if (!(sb->s_flags & MS_RDONLY)) | ||
3239 | es->s_wtime = cpu_to_le32(get_seconds()); | ||
3212 | es->s_kbytes_written = | 3240 | es->s_kbytes_written = |
3213 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + | 3241 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + |
3214 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 3242 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
@@ -3477,18 +3505,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3477 | if (sbi->s_journal) | 3505 | if (sbi->s_journal) |
3478 | ext4_mark_recovery_complete(sb, es); | 3506 | ext4_mark_recovery_complete(sb, es); |
3479 | } else { | 3507 | } else { |
3480 | int ret; | 3508 | /* Make sure we can mount this feature set readwrite */ |
3481 | if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3509 | if (!ext4_feature_set_ok(sb, 0)) { |
3482 | ~EXT4_FEATURE_RO_COMPAT_SUPP))) { | ||
3483 | ext4_msg(sb, KERN_WARNING, "couldn't " | ||
3484 | "remount RDWR because of unsupported " | ||
3485 | "optional features (%x)", | ||
3486 | (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & | ||
3487 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | ||
3488 | err = -EROFS; | 3510 | err = -EROFS; |
3489 | goto restore_opts; | 3511 | goto restore_opts; |
3490 | } | 3512 | } |
3491 | |||
3492 | /* | 3513 | /* |
3493 | * Make sure the group descriptor checksums | 3514 | * Make sure the group descriptor checksums |
3494 | * are sane. If they aren't, refuse to remount r/w. | 3515 | * are sane. If they aren't, refuse to remount r/w. |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 62b31c246994..fed5b01d7a8d 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -810,12 +810,23 @@ inserted: | |||
810 | get_bh(new_bh); | 810 | get_bh(new_bh); |
811 | } else { | 811 | } else { |
812 | /* We need to allocate a new block */ | 812 | /* We need to allocate a new block */ |
813 | ext4_fsblk_t goal = ext4_group_first_block_no(sb, | 813 | ext4_fsblk_t goal, block; |
814 | |||
815 | goal = ext4_group_first_block_no(sb, | ||
814 | EXT4_I(inode)->i_block_group); | 816 | EXT4_I(inode)->i_block_group); |
815 | ext4_fsblk_t block = ext4_new_meta_blocks(handle, inode, | 817 | |
818 | /* non-extent files can't have physical blocks past 2^32 */ | ||
819 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
820 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | ||
821 | |||
822 | block = ext4_new_meta_blocks(handle, inode, | ||
816 | goal, NULL, &error); | 823 | goal, NULL, &error); |
817 | if (error) | 824 | if (error) |
818 | goto cleanup; | 825 | goto cleanup; |
826 | |||
827 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
828 | BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); | ||
829 | |||
819 | ea_idebug(inode, "creating block %d", block); | 830 | ea_idebug(inode, "creating block %d", block); |
820 | 831 | ||
821 | new_bh = sb_getblk(sb, block); | 832 | new_bh = sb_getblk(sb, block); |
diff --git a/fs/fat/file.c b/fs/fat/file.c index f042b965c95c..e8c159de236b 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
@@ -176,8 +176,26 @@ static int fat_cont_expand(struct inode *inode, loff_t size) | |||
176 | 176 | ||
177 | inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; | 177 | inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; |
178 | mark_inode_dirty(inode); | 178 | mark_inode_dirty(inode); |
179 | if (IS_SYNC(inode)) | 179 | if (IS_SYNC(inode)) { |
180 | err = sync_page_range_nolock(inode, mapping, start, count); | 180 | int err2; |
181 | |||
182 | /* | ||
183 | * Opencode syncing since we don't have a file open to use | ||
184 | * standard fsync path. | ||
185 | */ | ||
186 | err = filemap_fdatawrite_range(mapping, start, | ||
187 | start + count - 1); | ||
188 | err2 = sync_mapping_buffers(mapping); | ||
189 | if (!err) | ||
190 | err = err2; | ||
191 | err2 = write_inode_now(inode, 1); | ||
192 | if (!err) | ||
193 | err = err2; | ||
194 | if (!err) { | ||
195 | err = filemap_fdatawait_range(mapping, start, | ||
196 | start + count - 1); | ||
197 | } | ||
198 | } | ||
181 | out: | 199 | out: |
182 | return err; | 200 | return err; |
183 | } | 201 | } |
diff --git a/fs/fat/misc.c b/fs/fat/misc.c index a6c20473dfd7..4e35be873e09 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c | |||
@@ -119,8 +119,8 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) | |||
119 | MSDOS_I(inode)->i_start = new_dclus; | 119 | MSDOS_I(inode)->i_start = new_dclus; |
120 | MSDOS_I(inode)->i_logstart = new_dclus; | 120 | MSDOS_I(inode)->i_logstart = new_dclus; |
121 | /* | 121 | /* |
122 | * Since generic_osync_inode() synchronize later if | 122 | * Since generic_write_sync() synchronizes regular files later, |
123 | * this is not directory, we don't here. | 123 | * we sync here only directories. |
124 | */ | 124 | */ |
125 | if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) { | 125 | if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) { |
126 | ret = fat_sync_inode(inode); | 126 | ret = fat_sync_inode(inode); |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index c54226be5294..8e1e5e19d21e 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -19,171 +19,245 @@ | |||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
22 | #include <linux/kthread.h> | ||
23 | #include <linux/freezer.h> | ||
22 | #include <linux/writeback.h> | 24 | #include <linux/writeback.h> |
23 | #include <linux/blkdev.h> | 25 | #include <linux/blkdev.h> |
24 | #include <linux/backing-dev.h> | 26 | #include <linux/backing-dev.h> |
25 | #include <linux/buffer_head.h> | 27 | #include <linux/buffer_head.h> |
26 | #include "internal.h" | 28 | #include "internal.h" |
27 | 29 | ||
30 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
28 | 31 | ||
29 | /** | 32 | /* |
30 | * writeback_acquire - attempt to get exclusive writeback access to a device | 33 | * We don't actually have pdflush, but this one is exported though /proc... |
31 | * @bdi: the device's backing_dev_info structure | 34 | */ |
32 | * | 35 | int nr_pdflush_threads; |
33 | * It is a waste of resources to have more than one pdflush thread blocked on | 36 | |
34 | * a single request queue. Exclusion at the request_queue level is obtained | 37 | /* |
35 | * via a flag in the request_queue's backing_dev_info.state. | 38 | * Passed into wb_writeback(), essentially a subset of writeback_control |
36 | * | 39 | */ |
37 | * Non-request_queue-backed address_spaces will share default_backing_dev_info, | 40 | struct wb_writeback_args { |
38 | * unless they implement their own. Which is somewhat inefficient, as this | 41 | long nr_pages; |
39 | * may prevent concurrent writeback against multiple devices. | 42 | struct super_block *sb; |
43 | enum writeback_sync_modes sync_mode; | ||
44 | int for_kupdate; | ||
45 | int range_cyclic; | ||
46 | }; | ||
47 | |||
48 | /* | ||
49 | * Work items for the bdi_writeback threads | ||
40 | */ | 50 | */ |
41 | static int writeback_acquire(struct backing_dev_info *bdi) | 51 | struct bdi_work { |
52 | struct list_head list; /* pending work list */ | ||
53 | struct rcu_head rcu_head; /* for RCU free/clear of work */ | ||
54 | |||
55 | unsigned long seen; /* threads that have seen this work */ | ||
56 | atomic_t pending; /* number of threads still to do work */ | ||
57 | |||
58 | struct wb_writeback_args args; /* writeback arguments */ | ||
59 | |||
60 | unsigned long state; /* flag bits, see WS_* */ | ||
61 | }; | ||
62 | |||
63 | enum { | ||
64 | WS_USED_B = 0, | ||
65 | WS_ONSTACK_B, | ||
66 | }; | ||
67 | |||
68 | #define WS_USED (1 << WS_USED_B) | ||
69 | #define WS_ONSTACK (1 << WS_ONSTACK_B) | ||
70 | |||
71 | static inline bool bdi_work_on_stack(struct bdi_work *work) | ||
72 | { | ||
73 | return test_bit(WS_ONSTACK_B, &work->state); | ||
74 | } | ||
75 | |||
76 | static inline void bdi_work_init(struct bdi_work *work, | ||
77 | struct wb_writeback_args *args) | ||
42 | { | 78 | { |
43 | return !test_and_set_bit(BDI_pdflush, &bdi->state); | 79 | INIT_RCU_HEAD(&work->rcu_head); |
80 | work->args = *args; | ||
81 | work->state = WS_USED; | ||
44 | } | 82 | } |
45 | 83 | ||
46 | /** | 84 | /** |
47 | * writeback_in_progress - determine whether there is writeback in progress | 85 | * writeback_in_progress - determine whether there is writeback in progress |
48 | * @bdi: the device's backing_dev_info structure. | 86 | * @bdi: the device's backing_dev_info structure. |
49 | * | 87 | * |
50 | * Determine whether there is writeback in progress against a backing device. | 88 | * Determine whether there is writeback waiting to be handled against a |
89 | * backing device. | ||
51 | */ | 90 | */ |
52 | int writeback_in_progress(struct backing_dev_info *bdi) | 91 | int writeback_in_progress(struct backing_dev_info *bdi) |
53 | { | 92 | { |
54 | return test_bit(BDI_pdflush, &bdi->state); | 93 | return !list_empty(&bdi->work_list); |
55 | } | 94 | } |
56 | 95 | ||
57 | /** | 96 | static void bdi_work_clear(struct bdi_work *work) |
58 | * writeback_release - relinquish exclusive writeback access against a device. | ||
59 | * @bdi: the device's backing_dev_info structure | ||
60 | */ | ||
61 | static void writeback_release(struct backing_dev_info *bdi) | ||
62 | { | 97 | { |
63 | BUG_ON(!writeback_in_progress(bdi)); | 98 | clear_bit(WS_USED_B, &work->state); |
64 | clear_bit(BDI_pdflush, &bdi->state); | 99 | smp_mb__after_clear_bit(); |
100 | /* | ||
101 | * work can have disappeared at this point. bit waitq functions | ||
102 | * should be able to tolerate this, provided bdi_sched_wait does | ||
103 | * not dereference it's pointer argument. | ||
104 | */ | ||
105 | wake_up_bit(&work->state, WS_USED_B); | ||
65 | } | 106 | } |
66 | 107 | ||
67 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | 108 | static void bdi_work_free(struct rcu_head *head) |
68 | { | 109 | { |
69 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { | 110 | struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); |
70 | struct dentry *dentry; | ||
71 | const char *name = "?"; | ||
72 | 111 | ||
73 | dentry = d_find_alias(inode); | 112 | if (!bdi_work_on_stack(work)) |
74 | if (dentry) { | 113 | kfree(work); |
75 | spin_lock(&dentry->d_lock); | 114 | else |
76 | name = (const char *) dentry->d_name.name; | 115 | bdi_work_clear(work); |
77 | } | ||
78 | printk(KERN_DEBUG | ||
79 | "%s(%d): dirtied inode %lu (%s) on %s\n", | ||
80 | current->comm, task_pid_nr(current), inode->i_ino, | ||
81 | name, inode->i_sb->s_id); | ||
82 | if (dentry) { | ||
83 | spin_unlock(&dentry->d_lock); | ||
84 | dput(dentry); | ||
85 | } | ||
86 | } | ||
87 | } | 116 | } |
88 | 117 | ||
89 | /** | 118 | static void wb_work_complete(struct bdi_work *work) |
90 | * __mark_inode_dirty - internal function | ||
91 | * @inode: inode to mark | ||
92 | * @flags: what kind of dirty (i.e. I_DIRTY_SYNC) | ||
93 | * Mark an inode as dirty. Callers should use mark_inode_dirty or | ||
94 | * mark_inode_dirty_sync. | ||
95 | * | ||
96 | * Put the inode on the super block's dirty list. | ||
97 | * | ||
98 | * CAREFUL! We mark it dirty unconditionally, but move it onto the | ||
99 | * dirty list only if it is hashed or if it refers to a blockdev. | ||
100 | * If it was not hashed, it will never be added to the dirty list | ||
101 | * even if it is later hashed, as it will have been marked dirty already. | ||
102 | * | ||
103 | * In short, make sure you hash any inodes _before_ you start marking | ||
104 | * them dirty. | ||
105 | * | ||
106 | * This function *must* be atomic for the I_DIRTY_PAGES case - | ||
107 | * set_page_dirty() is called under spinlock in several places. | ||
108 | * | ||
109 | * Note that for blockdevs, inode->dirtied_when represents the dirtying time of | ||
110 | * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of | ||
111 | * the kernel-internal blockdev inode represents the dirtying time of the | ||
112 | * blockdev's pages. This is why for I_DIRTY_PAGES we always use | ||
113 | * page->mapping->host, so the page-dirtying time is recorded in the internal | ||
114 | * blockdev inode. | ||
115 | */ | ||
116 | void __mark_inode_dirty(struct inode *inode, int flags) | ||
117 | { | 119 | { |
118 | struct super_block *sb = inode->i_sb; | 120 | const enum writeback_sync_modes sync_mode = work->args.sync_mode; |
121 | int onstack = bdi_work_on_stack(work); | ||
119 | 122 | ||
120 | /* | 123 | /* |
121 | * Don't do this for I_DIRTY_PAGES - that doesn't actually | 124 | * For allocated work, we can clear the done/seen bit right here. |
122 | * dirty the inode itself | 125 | * For on-stack work, we need to postpone both the clear and free |
126 | * to after the RCU grace period, since the stack could be invalidated | ||
127 | * as soon as bdi_work_clear() has done the wakeup. | ||
123 | */ | 128 | */ |
124 | if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 129 | if (!onstack) |
125 | if (sb->s_op->dirty_inode) | 130 | bdi_work_clear(work); |
126 | sb->s_op->dirty_inode(inode); | 131 | if (sync_mode == WB_SYNC_NONE || onstack) |
127 | } | 132 | call_rcu(&work->rcu_head, bdi_work_free); |
133 | } | ||
128 | 134 | ||
135 | static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) | ||
136 | { | ||
129 | /* | 137 | /* |
130 | * make sure that changes are seen by all cpus before we test i_state | 138 | * The caller has retrieved the work arguments from this work, |
131 | * -- mikulas | 139 | * drop our reference. If this is the last ref, delete and free it |
132 | */ | 140 | */ |
133 | smp_mb(); | 141 | if (atomic_dec_and_test(&work->pending)) { |
142 | struct backing_dev_info *bdi = wb->bdi; | ||
134 | 143 | ||
135 | /* avoid the locking if we can */ | 144 | spin_lock(&bdi->wb_lock); |
136 | if ((inode->i_state & flags) == flags) | 145 | list_del_rcu(&work->list); |
137 | return; | 146 | spin_unlock(&bdi->wb_lock); |
138 | 147 | ||
139 | if (unlikely(block_dump)) | 148 | wb_work_complete(work); |
140 | block_dump___mark_inode_dirty(inode); | 149 | } |
150 | } | ||
141 | 151 | ||
142 | spin_lock(&inode_lock); | 152 | static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) |
143 | if ((inode->i_state & flags) != flags) { | 153 | { |
144 | const int was_dirty = inode->i_state & I_DIRTY; | 154 | work->seen = bdi->wb_mask; |
155 | BUG_ON(!work->seen); | ||
156 | atomic_set(&work->pending, bdi->wb_cnt); | ||
157 | BUG_ON(!bdi->wb_cnt); | ||
145 | 158 | ||
146 | inode->i_state |= flags; | 159 | /* |
160 | * list_add_tail_rcu() contains the necessary barriers to | ||
161 | * make sure the above stores are seen before the item is | ||
162 | * noticed on the list | ||
163 | */ | ||
164 | spin_lock(&bdi->wb_lock); | ||
165 | list_add_tail_rcu(&work->list, &bdi->work_list); | ||
166 | spin_unlock(&bdi->wb_lock); | ||
147 | 167 | ||
148 | /* | 168 | /* |
149 | * If the inode is being synced, just update its dirty state. | 169 | * If the default thread isn't there, make sure we add it. When |
150 | * The unlocker will place the inode on the appropriate | 170 | * it gets created and wakes up, we'll run this work. |
151 | * superblock list, based upon its state. | 171 | */ |
152 | */ | 172 | if (unlikely(list_empty_careful(&bdi->wb_list))) |
153 | if (inode->i_state & I_SYNC) | 173 | wake_up_process(default_backing_dev_info.wb.task); |
154 | goto out; | 174 | else { |
175 | struct bdi_writeback *wb = &bdi->wb; | ||
155 | 176 | ||
156 | /* | 177 | if (wb->task) |
157 | * Only add valid (hashed) inodes to the superblock's | 178 | wake_up_process(wb->task); |
158 | * dirty list. Add blockdev inodes as well. | 179 | } |
159 | */ | 180 | } |
160 | if (!S_ISBLK(inode->i_mode)) { | ||
161 | if (hlist_unhashed(&inode->i_hash)) | ||
162 | goto out; | ||
163 | } | ||
164 | if (inode->i_state & (I_FREEING|I_CLEAR)) | ||
165 | goto out; | ||
166 | 181 | ||
167 | /* | 182 | /* |
168 | * If the inode was already on s_dirty/s_io/s_more_io, don't | 183 | * Used for on-stack allocated work items. The caller needs to wait until |
169 | * reposition it (that would break s_dirty time-ordering). | 184 | * the wb threads have acked the work before it's safe to continue. |
170 | */ | 185 | */ |
171 | if (!was_dirty) { | 186 | static void bdi_wait_on_work_clear(struct bdi_work *work) |
172 | inode->dirtied_when = jiffies; | 187 | { |
173 | list_move(&inode->i_list, &sb->s_dirty); | 188 | wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait, |
174 | } | 189 | TASK_UNINTERRUPTIBLE); |
190 | } | ||
191 | |||
192 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | ||
193 | struct wb_writeback_args *args) | ||
194 | { | ||
195 | struct bdi_work *work; | ||
196 | |||
197 | /* | ||
198 | * This is WB_SYNC_NONE writeback, so if allocation fails just | ||
199 | * wakeup the thread for old dirty data writeback | ||
200 | */ | ||
201 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | ||
202 | if (work) { | ||
203 | bdi_work_init(work, args); | ||
204 | bdi_queue_work(bdi, work); | ||
205 | } else { | ||
206 | struct bdi_writeback *wb = &bdi->wb; | ||
207 | |||
208 | if (wb->task) | ||
209 | wake_up_process(wb->task); | ||
175 | } | 210 | } |
176 | out: | ||
177 | spin_unlock(&inode_lock); | ||
178 | } | 211 | } |
179 | 212 | ||
180 | EXPORT_SYMBOL(__mark_inode_dirty); | 213 | /** |
214 | * bdi_sync_writeback - start and wait for writeback | ||
215 | * @bdi: the backing device to write from | ||
216 | * @sb: write inodes from this super_block | ||
217 | * | ||
218 | * Description: | ||
219 | * This does WB_SYNC_ALL data integrity writeback and waits for the | ||
220 | * IO to complete. Callers must hold the sb s_umount semaphore for | ||
221 | * reading, to avoid having the super disappear before we are done. | ||
222 | */ | ||
223 | static void bdi_sync_writeback(struct backing_dev_info *bdi, | ||
224 | struct super_block *sb) | ||
225 | { | ||
226 | struct wb_writeback_args args = { | ||
227 | .sb = sb, | ||
228 | .sync_mode = WB_SYNC_ALL, | ||
229 | .nr_pages = LONG_MAX, | ||
230 | .range_cyclic = 0, | ||
231 | }; | ||
232 | struct bdi_work work; | ||
181 | 233 | ||
182 | static int write_inode(struct inode *inode, int sync) | 234 | bdi_work_init(&work, &args); |
235 | work.state |= WS_ONSTACK; | ||
236 | |||
237 | bdi_queue_work(bdi, &work); | ||
238 | bdi_wait_on_work_clear(&work); | ||
239 | } | ||
240 | |||
241 | /** | ||
242 | * bdi_start_writeback - start writeback | ||
243 | * @bdi: the backing device to write from | ||
244 | * @nr_pages: the number of pages to write | ||
245 | * | ||
246 | * Description: | ||
247 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only | ||
248 | * started when this function returns, we make no guarentees on | ||
249 | * completion. Caller need not hold sb s_umount semaphore. | ||
250 | * | ||
251 | */ | ||
252 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | ||
183 | { | 253 | { |
184 | if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) | 254 | struct wb_writeback_args args = { |
185 | return inode->i_sb->s_op->write_inode(inode, sync); | 255 | .sync_mode = WB_SYNC_NONE, |
186 | return 0; | 256 | .nr_pages = nr_pages, |
257 | .range_cyclic = 1, | ||
258 | }; | ||
259 | |||
260 | bdi_alloc_queue_work(bdi, &args); | ||
187 | } | 261 | } |
188 | 262 | ||
189 | /* | 263 | /* |
@@ -191,31 +265,32 @@ static int write_inode(struct inode *inode, int sync) | |||
191 | * furthest end of its superblock's dirty-inode list. | 265 | * furthest end of its superblock's dirty-inode list. |
192 | * | 266 | * |
193 | * Before stamping the inode's ->dirtied_when, we check to see whether it is | 267 | * Before stamping the inode's ->dirtied_when, we check to see whether it is |
194 | * already the most-recently-dirtied inode on the s_dirty list. If that is | 268 | * already the most-recently-dirtied inode on the b_dirty list. If that is |
195 | * the case then the inode must have been redirtied while it was being written | 269 | * the case then the inode must have been redirtied while it was being written |
196 | * out and we don't reset its dirtied_when. | 270 | * out and we don't reset its dirtied_when. |
197 | */ | 271 | */ |
198 | static void redirty_tail(struct inode *inode) | 272 | static void redirty_tail(struct inode *inode) |
199 | { | 273 | { |
200 | struct super_block *sb = inode->i_sb; | 274 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
201 | 275 | ||
202 | if (!list_empty(&sb->s_dirty)) { | 276 | if (!list_empty(&wb->b_dirty)) { |
203 | struct inode *tail_inode; | 277 | struct inode *tail; |
204 | 278 | ||
205 | tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); | 279 | tail = list_entry(wb->b_dirty.next, struct inode, i_list); |
206 | if (time_before(inode->dirtied_when, | 280 | if (time_before(inode->dirtied_when, tail->dirtied_when)) |
207 | tail_inode->dirtied_when)) | ||
208 | inode->dirtied_when = jiffies; | 281 | inode->dirtied_when = jiffies; |
209 | } | 282 | } |
210 | list_move(&inode->i_list, &sb->s_dirty); | 283 | list_move(&inode->i_list, &wb->b_dirty); |
211 | } | 284 | } |
212 | 285 | ||
213 | /* | 286 | /* |
214 | * requeue inode for re-scanning after sb->s_io list is exhausted. | 287 | * requeue inode for re-scanning after bdi->b_io list is exhausted. |
215 | */ | 288 | */ |
216 | static void requeue_io(struct inode *inode) | 289 | static void requeue_io(struct inode *inode) |
217 | { | 290 | { |
218 | list_move(&inode->i_list, &inode->i_sb->s_more_io); | 291 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
292 | |||
293 | list_move(&inode->i_list, &wb->b_more_io); | ||
219 | } | 294 | } |
220 | 295 | ||
221 | static void inode_sync_complete(struct inode *inode) | 296 | static void inode_sync_complete(struct inode *inode) |
@@ -262,20 +337,18 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
262 | /* | 337 | /* |
263 | * Queue all expired dirty inodes for io, eldest first. | 338 | * Queue all expired dirty inodes for io, eldest first. |
264 | */ | 339 | */ |
265 | static void queue_io(struct super_block *sb, | 340 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
266 | unsigned long *older_than_this) | ||
267 | { | 341 | { |
268 | list_splice_init(&sb->s_more_io, sb->s_io.prev); | 342 | list_splice_init(&wb->b_more_io, wb->b_io.prev); |
269 | move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this); | 343 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
270 | } | 344 | } |
271 | 345 | ||
272 | int sb_has_dirty_inodes(struct super_block *sb) | 346 | static int write_inode(struct inode *inode, int sync) |
273 | { | 347 | { |
274 | return !list_empty(&sb->s_dirty) || | 348 | if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) |
275 | !list_empty(&sb->s_io) || | 349 | return inode->i_sb->s_op->write_inode(inode, sync); |
276 | !list_empty(&sb->s_more_io); | 350 | return 0; |
277 | } | 351 | } |
278 | EXPORT_SYMBOL(sb_has_dirty_inodes); | ||
279 | 352 | ||
280 | /* | 353 | /* |
281 | * Wait for writeback on an inode to complete. | 354 | * Wait for writeback on an inode to complete. |
@@ -322,11 +395,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
322 | if (inode->i_state & I_SYNC) { | 395 | if (inode->i_state & I_SYNC) { |
323 | /* | 396 | /* |
324 | * If this inode is locked for writeback and we are not doing | 397 | * If this inode is locked for writeback and we are not doing |
325 | * writeback-for-data-integrity, move it to s_more_io so that | 398 | * writeback-for-data-integrity, move it to b_more_io so that |
326 | * writeback can proceed with the other inodes on s_io. | 399 | * writeback can proceed with the other inodes on s_io. |
327 | * | 400 | * |
328 | * We'll have another go at writing back this inode when we | 401 | * We'll have another go at writing back this inode when we |
329 | * completed a full scan of s_io. | 402 | * completed a full scan of b_io. |
330 | */ | 403 | */ |
331 | if (!wait) { | 404 | if (!wait) { |
332 | requeue_io(inode); | 405 | requeue_io(inode); |
@@ -371,11 +444,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
371 | /* | 444 | /* |
372 | * We didn't write back all the pages. nfs_writepages() | 445 | * We didn't write back all the pages. nfs_writepages() |
373 | * sometimes bales out without doing anything. Redirty | 446 | * sometimes bales out without doing anything. Redirty |
374 | * the inode; Move it from s_io onto s_more_io/s_dirty. | 447 | * the inode; Move it from b_io onto b_more_io/b_dirty. |
375 | */ | 448 | */ |
376 | /* | 449 | /* |
377 | * akpm: if the caller was the kupdate function we put | 450 | * akpm: if the caller was the kupdate function we put |
378 | * this inode at the head of s_dirty so it gets first | 451 | * this inode at the head of b_dirty so it gets first |
379 | * consideration. Otherwise, move it to the tail, for | 452 | * consideration. Otherwise, move it to the tail, for |
380 | * the reasons described there. I'm not really sure | 453 | * the reasons described there. I'm not really sure |
381 | * how much sense this makes. Presumably I had a good | 454 | * how much sense this makes. Presumably I had a good |
@@ -385,7 +458,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
385 | if (wbc->for_kupdate) { | 458 | if (wbc->for_kupdate) { |
386 | /* | 459 | /* |
387 | * For the kupdate function we move the inode | 460 | * For the kupdate function we move the inode |
388 | * to s_more_io so it will get more writeout as | 461 | * to b_more_io so it will get more writeout as |
389 | * soon as the queue becomes uncongested. | 462 | * soon as the queue becomes uncongested. |
390 | */ | 463 | */ |
391 | inode->i_state |= I_DIRTY_PAGES; | 464 | inode->i_state |= I_DIRTY_PAGES; |
@@ -434,50 +507,84 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
434 | } | 507 | } |
435 | 508 | ||
436 | /* | 509 | /* |
437 | * Write out a superblock's list of dirty inodes. A wait will be performed | 510 | * For WB_SYNC_NONE writeback, the caller does not have the sb pinned |
438 | * upon no inodes, all inodes or the final one, depending upon sync_mode. | 511 | * before calling writeback. So make sure that we do pin it, so it doesn't |
439 | * | 512 | * go away while we are writing inodes from it. |
440 | * If older_than_this is non-NULL, then only write out inodes which | ||
441 | * had their first dirtying at a time earlier than *older_than_this. | ||
442 | * | ||
443 | * If we're a pdflush thread, then implement pdflush collision avoidance | ||
444 | * against the entire list. | ||
445 | * | 513 | * |
446 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. | 514 | * Returns 0 if the super was successfully pinned (or pinning wasn't needed), |
447 | * This function assumes that the blockdev superblock's inodes are backed by | 515 | * 1 if we failed. |
448 | * a variety of queues, so all inodes are searched. For other superblocks, | ||
449 | * assume that all inodes are backed by the same queue. | ||
450 | * | ||
451 | * FIXME: this linear search could get expensive with many fileystems. But | ||
452 | * how to fix? We need to go from an address_space to all inodes which share | ||
453 | * a queue with that address_space. (Easy: have a global "dirty superblocks" | ||
454 | * list). | ||
455 | * | ||
456 | * The inodes to be written are parked on sb->s_io. They are moved back onto | ||
457 | * sb->s_dirty as they are selected for writing. This way, none can be missed | ||
458 | * on the writer throttling path, and we get decent balancing between many | ||
459 | * throttled threads: we don't want them all piling up on inode_sync_wait. | ||
460 | */ | 516 | */ |
461 | void generic_sync_sb_inodes(struct super_block *sb, | 517 | static int pin_sb_for_writeback(struct writeback_control *wbc, |
518 | struct inode *inode) | ||
519 | { | ||
520 | struct super_block *sb = inode->i_sb; | ||
521 | |||
522 | /* | ||
523 | * Caller must already hold the ref for this | ||
524 | */ | ||
525 | if (wbc->sync_mode == WB_SYNC_ALL) { | ||
526 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | ||
527 | return 0; | ||
528 | } | ||
529 | |||
530 | spin_lock(&sb_lock); | ||
531 | sb->s_count++; | ||
532 | if (down_read_trylock(&sb->s_umount)) { | ||
533 | if (sb->s_root) { | ||
534 | spin_unlock(&sb_lock); | ||
535 | return 0; | ||
536 | } | ||
537 | /* | ||
538 | * umounted, drop rwsem again and fall through to failure | ||
539 | */ | ||
540 | up_read(&sb->s_umount); | ||
541 | } | ||
542 | |||
543 | sb->s_count--; | ||
544 | spin_unlock(&sb_lock); | ||
545 | return 1; | ||
546 | } | ||
547 | |||
548 | static void unpin_sb_for_writeback(struct writeback_control *wbc, | ||
549 | struct inode *inode) | ||
550 | { | ||
551 | struct super_block *sb = inode->i_sb; | ||
552 | |||
553 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
554 | return; | ||
555 | |||
556 | up_read(&sb->s_umount); | ||
557 | put_super(sb); | ||
558 | } | ||
559 | |||
560 | static void writeback_inodes_wb(struct bdi_writeback *wb, | ||
462 | struct writeback_control *wbc) | 561 | struct writeback_control *wbc) |
463 | { | 562 | { |
563 | struct super_block *sb = wbc->sb; | ||
564 | const int is_blkdev_sb = sb_is_blkdev_sb(sb); | ||
464 | const unsigned long start = jiffies; /* livelock avoidance */ | 565 | const unsigned long start = jiffies; /* livelock avoidance */ |
465 | int sync = wbc->sync_mode == WB_SYNC_ALL; | ||
466 | 566 | ||
467 | spin_lock(&inode_lock); | 567 | spin_lock(&inode_lock); |
468 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) | ||
469 | queue_io(sb, wbc->older_than_this); | ||
470 | 568 | ||
471 | while (!list_empty(&sb->s_io)) { | 569 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
472 | struct inode *inode = list_entry(sb->s_io.prev, | 570 | queue_io(wb, wbc->older_than_this); |
571 | |||
572 | while (!list_empty(&wb->b_io)) { | ||
573 | struct inode *inode = list_entry(wb->b_io.prev, | ||
473 | struct inode, i_list); | 574 | struct inode, i_list); |
474 | struct address_space *mapping = inode->i_mapping; | ||
475 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
476 | long pages_skipped; | 575 | long pages_skipped; |
477 | 576 | ||
478 | if (!bdi_cap_writeback_dirty(bdi)) { | 577 | /* |
578 | * super block given and doesn't match, skip this inode | ||
579 | */ | ||
580 | if (sb && sb != inode->i_sb) { | ||
581 | redirty_tail(inode); | ||
582 | continue; | ||
583 | } | ||
584 | |||
585 | if (!bdi_cap_writeback_dirty(wb->bdi)) { | ||
479 | redirty_tail(inode); | 586 | redirty_tail(inode); |
480 | if (sb_is_blkdev_sb(sb)) { | 587 | if (is_blkdev_sb) { |
481 | /* | 588 | /* |
482 | * Dirty memory-backed blockdev: the ramdisk | 589 | * Dirty memory-backed blockdev: the ramdisk |
483 | * driver does this. Skip just this inode | 590 | * driver does this. Skip just this inode |
@@ -497,21 +604,14 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
497 | continue; | 604 | continue; |
498 | } | 605 | } |
499 | 606 | ||
500 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | 607 | if (wbc->nonblocking && bdi_write_congested(wb->bdi)) { |
501 | wbc->encountered_congestion = 1; | 608 | wbc->encountered_congestion = 1; |
502 | if (!sb_is_blkdev_sb(sb)) | 609 | if (!is_blkdev_sb) |
503 | break; /* Skip a congested fs */ | 610 | break; /* Skip a congested fs */ |
504 | requeue_io(inode); | 611 | requeue_io(inode); |
505 | continue; /* Skip a congested blockdev */ | 612 | continue; /* Skip a congested blockdev */ |
506 | } | 613 | } |
507 | 614 | ||
508 | if (wbc->bdi && bdi != wbc->bdi) { | ||
509 | if (!sb_is_blkdev_sb(sb)) | ||
510 | break; /* fs has the wrong queue */ | ||
511 | requeue_io(inode); | ||
512 | continue; /* blockdev has wrong queue */ | ||
513 | } | ||
514 | |||
515 | /* | 615 | /* |
516 | * Was this inode dirtied after sync_sb_inodes was called? | 616 | * Was this inode dirtied after sync_sb_inodes was called? |
517 | * This keeps sync from extra jobs and livelock. | 617 | * This keeps sync from extra jobs and livelock. |
@@ -519,16 +619,16 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
519 | if (inode_dirtied_after(inode, start)) | 619 | if (inode_dirtied_after(inode, start)) |
520 | break; | 620 | break; |
521 | 621 | ||
522 | /* Is another pdflush already flushing this queue? */ | 622 | if (pin_sb_for_writeback(wbc, inode)) { |
523 | if (current_is_pdflush() && !writeback_acquire(bdi)) | 623 | requeue_io(inode); |
524 | break; | 624 | continue; |
625 | } | ||
525 | 626 | ||
526 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); | 627 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); |
527 | __iget(inode); | 628 | __iget(inode); |
528 | pages_skipped = wbc->pages_skipped; | 629 | pages_skipped = wbc->pages_skipped; |
529 | writeback_single_inode(inode, wbc); | 630 | writeback_single_inode(inode, wbc); |
530 | if (current_is_pdflush()) | 631 | unpin_sb_for_writeback(wbc, inode); |
531 | writeback_release(bdi); | ||
532 | if (wbc->pages_skipped != pages_skipped) { | 632 | if (wbc->pages_skipped != pages_skipped) { |
533 | /* | 633 | /* |
534 | * writeback is not making progress due to locked | 634 | * writeback is not making progress due to locked |
@@ -544,144 +644,520 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
544 | wbc->more_io = 1; | 644 | wbc->more_io = 1; |
545 | break; | 645 | break; |
546 | } | 646 | } |
547 | if (!list_empty(&sb->s_more_io)) | 647 | if (!list_empty(&wb->b_more_io)) |
548 | wbc->more_io = 1; | 648 | wbc->more_io = 1; |
549 | } | 649 | } |
550 | 650 | ||
551 | if (sync) { | 651 | spin_unlock(&inode_lock); |
552 | struct inode *inode, *old_inode = NULL; | 652 | /* Leave any unwritten inodes on b_io */ |
653 | } | ||
654 | |||
655 | void writeback_inodes_wbc(struct writeback_control *wbc) | ||
656 | { | ||
657 | struct backing_dev_info *bdi = wbc->bdi; | ||
553 | 658 | ||
659 | writeback_inodes_wb(&bdi->wb, wbc); | ||
660 | } | ||
661 | |||
662 | /* | ||
663 | * The maximum number of pages to writeout in a single bdi flush/kupdate | ||
664 | * operation. We do this so we don't hold I_SYNC against an inode for | ||
665 | * enormous amounts of time, which would block a userspace task which has | ||
666 | * been forced to throttle against that inode. Also, the code reevaluates | ||
667 | * the dirty each time it has written this many pages. | ||
668 | */ | ||
669 | #define MAX_WRITEBACK_PAGES 1024 | ||
670 | |||
671 | static inline bool over_bground_thresh(void) | ||
672 | { | ||
673 | unsigned long background_thresh, dirty_thresh; | ||
674 | |||
675 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); | ||
676 | |||
677 | return (global_page_state(NR_FILE_DIRTY) + | ||
678 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); | ||
679 | } | ||
680 | |||
681 | /* | ||
682 | * Explicit flushing or periodic writeback of "old" data. | ||
683 | * | ||
684 | * Define "old": the first time one of an inode's pages is dirtied, we mark the | ||
685 | * dirtying-time in the inode's address_space. So this periodic writeback code | ||
686 | * just walks the superblock inode list, writing back any inodes which are | ||
687 | * older than a specific point in time. | ||
688 | * | ||
689 | * Try to run once per dirty_writeback_interval. But if a writeback event | ||
690 | * takes longer than a dirty_writeback_interval interval, then leave a | ||
691 | * one-second gap. | ||
692 | * | ||
693 | * older_than_this takes precedence over nr_to_write. So we'll only write back | ||
694 | * all dirty pages if they are all attached to "old" mappings. | ||
695 | */ | ||
696 | static long wb_writeback(struct bdi_writeback *wb, | ||
697 | struct wb_writeback_args *args) | ||
698 | { | ||
699 | struct writeback_control wbc = { | ||
700 | .bdi = wb->bdi, | ||
701 | .sb = args->sb, | ||
702 | .sync_mode = args->sync_mode, | ||
703 | .older_than_this = NULL, | ||
704 | .for_kupdate = args->for_kupdate, | ||
705 | .range_cyclic = args->range_cyclic, | ||
706 | }; | ||
707 | unsigned long oldest_jif; | ||
708 | long wrote = 0; | ||
709 | |||
710 | if (wbc.for_kupdate) { | ||
711 | wbc.older_than_this = &oldest_jif; | ||
712 | oldest_jif = jiffies - | ||
713 | msecs_to_jiffies(dirty_expire_interval * 10); | ||
714 | } | ||
715 | if (!wbc.range_cyclic) { | ||
716 | wbc.range_start = 0; | ||
717 | wbc.range_end = LLONG_MAX; | ||
718 | } | ||
719 | |||
720 | for (;;) { | ||
554 | /* | 721 | /* |
555 | * Data integrity sync. Must wait for all pages under writeback, | 722 | * Don't flush anything for non-integrity writeback where |
556 | * because there may have been pages dirtied before our sync | 723 | * no nr_pages was given |
557 | * call, but which had writeout started before we write it out. | ||
558 | * In which case, the inode may not be on the dirty list, but | ||
559 | * we still have to wait for that writeout. | ||
560 | */ | 724 | */ |
561 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 725 | if (!args->for_kupdate && args->nr_pages <= 0 && |
562 | struct address_space *mapping; | 726 | args->sync_mode == WB_SYNC_NONE) |
727 | break; | ||
563 | 728 | ||
564 | if (inode->i_state & | 729 | /* |
565 | (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | 730 | * If no specific pages were given and this is just a |
566 | continue; | 731 | * periodic background writeout and we are below the |
567 | mapping = inode->i_mapping; | 732 | * background dirty threshold, don't do anything |
568 | if (mapping->nrpages == 0) | 733 | */ |
734 | if (args->for_kupdate && args->nr_pages <= 0 && | ||
735 | !over_bground_thresh()) | ||
736 | break; | ||
737 | |||
738 | wbc.more_io = 0; | ||
739 | wbc.encountered_congestion = 0; | ||
740 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | ||
741 | wbc.pages_skipped = 0; | ||
742 | writeback_inodes_wb(wb, &wbc); | ||
743 | args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
744 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
745 | |||
746 | /* | ||
747 | * If we ran out of stuff to write, bail unless more_io got set | ||
748 | */ | ||
749 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { | ||
750 | if (wbc.more_io && !wbc.for_kupdate) | ||
569 | continue; | 751 | continue; |
570 | __iget(inode); | 752 | break; |
571 | spin_unlock(&inode_lock); | 753 | } |
572 | /* | 754 | } |
573 | * We hold a reference to 'inode' so it couldn't have | 755 | |
574 | * been removed from s_inodes list while we dropped the | 756 | return wrote; |
575 | * inode_lock. We cannot iput the inode now as we can | 757 | } |
576 | * be holding the last reference and we cannot iput it | 758 | |
577 | * under inode_lock. So we keep the reference and iput | 759 | /* |
578 | * it later. | 760 | * Return the next bdi_work struct that hasn't been processed by this |
579 | */ | 761 | * wb thread yet. ->seen is initially set for each thread that exists |
580 | iput(old_inode); | 762 | * for this device, when a thread first notices a piece of work it |
581 | old_inode = inode; | 763 | * clears its bit. Depending on writeback type, the thread will notify |
764 | * completion on either receiving the work (WB_SYNC_NONE) or after | ||
765 | * it is done (WB_SYNC_ALL). | ||
766 | */ | ||
767 | static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, | ||
768 | struct bdi_writeback *wb) | ||
769 | { | ||
770 | struct bdi_work *work, *ret = NULL; | ||
771 | |||
772 | rcu_read_lock(); | ||
773 | |||
774 | list_for_each_entry_rcu(work, &bdi->work_list, list) { | ||
775 | if (!test_bit(wb->nr, &work->seen)) | ||
776 | continue; | ||
777 | clear_bit(wb->nr, &work->seen); | ||
778 | |||
779 | ret = work; | ||
780 | break; | ||
781 | } | ||
782 | |||
783 | rcu_read_unlock(); | ||
784 | return ret; | ||
785 | } | ||
786 | |||
787 | static long wb_check_old_data_flush(struct bdi_writeback *wb) | ||
788 | { | ||
789 | unsigned long expired; | ||
790 | long nr_pages; | ||
791 | |||
792 | expired = wb->last_old_flush + | ||
793 | msecs_to_jiffies(dirty_writeback_interval * 10); | ||
794 | if (time_before(jiffies, expired)) | ||
795 | return 0; | ||
796 | |||
797 | wb->last_old_flush = jiffies; | ||
798 | nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
799 | global_page_state(NR_UNSTABLE_NFS) + | ||
800 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
801 | |||
802 | if (nr_pages) { | ||
803 | struct wb_writeback_args args = { | ||
804 | .nr_pages = nr_pages, | ||
805 | .sync_mode = WB_SYNC_NONE, | ||
806 | .for_kupdate = 1, | ||
807 | .range_cyclic = 1, | ||
808 | }; | ||
809 | |||
810 | return wb_writeback(wb, &args); | ||
811 | } | ||
812 | |||
813 | return 0; | ||
814 | } | ||
815 | |||
816 | /* | ||
817 | * Retrieve work items and do the writeback they describe | ||
818 | */ | ||
819 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | ||
820 | { | ||
821 | struct backing_dev_info *bdi = wb->bdi; | ||
822 | struct bdi_work *work; | ||
823 | long wrote = 0; | ||
582 | 824 | ||
583 | filemap_fdatawait(mapping); | 825 | while ((work = get_next_work_item(bdi, wb)) != NULL) { |
826 | struct wb_writeback_args args = work->args; | ||
584 | 827 | ||
585 | cond_resched(); | 828 | /* |
829 | * Override sync mode, in case we must wait for completion | ||
830 | */ | ||
831 | if (force_wait) | ||
832 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; | ||
586 | 833 | ||
587 | spin_lock(&inode_lock); | 834 | /* |
835 | * If this isn't a data integrity operation, just notify | ||
836 | * that we have seen this work and we are now starting it. | ||
837 | */ | ||
838 | if (args.sync_mode == WB_SYNC_NONE) | ||
839 | wb_clear_pending(wb, work); | ||
840 | |||
841 | wrote += wb_writeback(wb, &args); | ||
842 | |||
843 | /* | ||
844 | * This is a data integrity writeback, so only do the | ||
845 | * notification when we have completed the work. | ||
846 | */ | ||
847 | if (args.sync_mode == WB_SYNC_ALL) | ||
848 | wb_clear_pending(wb, work); | ||
849 | } | ||
850 | |||
851 | /* | ||
852 | * Check for periodic writeback, kupdated() style | ||
853 | */ | ||
854 | wrote += wb_check_old_data_flush(wb); | ||
855 | |||
856 | return wrote; | ||
857 | } | ||
858 | |||
859 | /* | ||
860 | * Handle writeback of dirty data for the device backed by this bdi. Also | ||
861 | * wakes up periodically and does kupdated style flushing. | ||
862 | */ | ||
863 | int bdi_writeback_task(struct bdi_writeback *wb) | ||
864 | { | ||
865 | unsigned long last_active = jiffies; | ||
866 | unsigned long wait_jiffies = -1UL; | ||
867 | long pages_written; | ||
868 | |||
869 | while (!kthread_should_stop()) { | ||
870 | pages_written = wb_do_writeback(wb, 0); | ||
871 | |||
872 | if (pages_written) | ||
873 | last_active = jiffies; | ||
874 | else if (wait_jiffies != -1UL) { | ||
875 | unsigned long max_idle; | ||
876 | |||
877 | /* | ||
878 | * Longest period of inactivity that we tolerate. If we | ||
879 | * see dirty data again later, the task will get | ||
880 | * recreated automatically. | ||
881 | */ | ||
882 | max_idle = max(5UL * 60 * HZ, wait_jiffies); | ||
883 | if (time_after(jiffies, max_idle + last_active)) | ||
884 | break; | ||
588 | } | 885 | } |
589 | spin_unlock(&inode_lock); | ||
590 | iput(old_inode); | ||
591 | } else | ||
592 | spin_unlock(&inode_lock); | ||
593 | 886 | ||
594 | return; /* Leave any unwritten inodes on s_io */ | 887 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); |
888 | schedule_timeout_interruptible(wait_jiffies); | ||
889 | try_to_freeze(); | ||
890 | } | ||
891 | |||
892 | return 0; | ||
595 | } | 893 | } |
596 | EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); | ||
597 | 894 | ||
598 | static void sync_sb_inodes(struct super_block *sb, | 895 | /* |
599 | struct writeback_control *wbc) | 896 | * Schedule writeback for all backing devices. This does WB_SYNC_NONE |
897 | * writeback, for integrity writeback see bdi_sync_writeback(). | ||
898 | */ | ||
899 | static void bdi_writeback_all(struct super_block *sb, long nr_pages) | ||
600 | { | 900 | { |
601 | generic_sync_sb_inodes(sb, wbc); | 901 | struct wb_writeback_args args = { |
902 | .sb = sb, | ||
903 | .nr_pages = nr_pages, | ||
904 | .sync_mode = WB_SYNC_NONE, | ||
905 | }; | ||
906 | struct backing_dev_info *bdi; | ||
907 | |||
908 | rcu_read_lock(); | ||
909 | |||
910 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | ||
911 | if (!bdi_has_dirty_io(bdi)) | ||
912 | continue; | ||
913 | |||
914 | bdi_alloc_queue_work(bdi, &args); | ||
915 | } | ||
916 | |||
917 | rcu_read_unlock(); | ||
602 | } | 918 | } |
603 | 919 | ||
604 | /* | 920 | /* |
605 | * Start writeback of dirty pagecache data against all unlocked inodes. | 921 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back |
922 | * the whole world. | ||
923 | */ | ||
924 | void wakeup_flusher_threads(long nr_pages) | ||
925 | { | ||
926 | if (nr_pages == 0) | ||
927 | nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
928 | global_page_state(NR_UNSTABLE_NFS); | ||
929 | bdi_writeback_all(NULL, nr_pages); | ||
930 | } | ||
931 | |||
932 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | ||
933 | { | ||
934 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { | ||
935 | struct dentry *dentry; | ||
936 | const char *name = "?"; | ||
937 | |||
938 | dentry = d_find_alias(inode); | ||
939 | if (dentry) { | ||
940 | spin_lock(&dentry->d_lock); | ||
941 | name = (const char *) dentry->d_name.name; | ||
942 | } | ||
943 | printk(KERN_DEBUG | ||
944 | "%s(%d): dirtied inode %lu (%s) on %s\n", | ||
945 | current->comm, task_pid_nr(current), inode->i_ino, | ||
946 | name, inode->i_sb->s_id); | ||
947 | if (dentry) { | ||
948 | spin_unlock(&dentry->d_lock); | ||
949 | dput(dentry); | ||
950 | } | ||
951 | } | ||
952 | } | ||
953 | |||
954 | /** | ||
955 | * __mark_inode_dirty - internal function | ||
956 | * @inode: inode to mark | ||
957 | * @flags: what kind of dirty (i.e. I_DIRTY_SYNC) | ||
958 | * Mark an inode as dirty. Callers should use mark_inode_dirty or | ||
959 | * mark_inode_dirty_sync. | ||
960 | * | ||
961 | * Put the inode on the super block's dirty list. | ||
606 | * | 962 | * |
607 | * Note: | 963 | * CAREFUL! We mark it dirty unconditionally, but move it onto the |
608 | * We don't need to grab a reference to superblock here. If it has non-empty | 964 | * dirty list only if it is hashed or if it refers to a blockdev. |
609 | * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed | 965 | * If it was not hashed, it will never be added to the dirty list |
610 | * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all | 966 | * even if it is later hashed, as it will have been marked dirty already. |
611 | * empty. Since __sync_single_inode() regains inode_lock before it finally moves | ||
612 | * inode from superblock lists we are OK. | ||
613 | * | 967 | * |
614 | * If `older_than_this' is non-zero then only flush inodes which have a | 968 | * In short, make sure you hash any inodes _before_ you start marking |
615 | * flushtime older than *older_than_this. | 969 | * them dirty. |
616 | * | 970 | * |
617 | * If `bdi' is non-zero then we will scan the first inode against each | 971 | * This function *must* be atomic for the I_DIRTY_PAGES case - |
618 | * superblock until we find the matching ones. One group will be the dirty | 972 | * set_page_dirty() is called under spinlock in several places. |
619 | * inodes against a filesystem. Then when we hit the dummy blockdev superblock, | 973 | * |
620 | * sync_sb_inodes will seekout the blockdev which matches `bdi'. Maybe not | 974 | * Note that for blockdevs, inode->dirtied_when represents the dirtying time of |
621 | * super-efficient but we're about to do a ton of I/O... | 975 | * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of |
976 | * the kernel-internal blockdev inode represents the dirtying time of the | ||
977 | * blockdev's pages. This is why for I_DIRTY_PAGES we always use | ||
978 | * page->mapping->host, so the page-dirtying time is recorded in the internal | ||
979 | * blockdev inode. | ||
622 | */ | 980 | */ |
623 | void | 981 | void __mark_inode_dirty(struct inode *inode, int flags) |
624 | writeback_inodes(struct writeback_control *wbc) | ||
625 | { | 982 | { |
626 | struct super_block *sb; | 983 | struct super_block *sb = inode->i_sb; |
627 | 984 | ||
628 | might_sleep(); | 985 | /* |
629 | spin_lock(&sb_lock); | 986 | * Don't do this for I_DIRTY_PAGES - that doesn't actually |
630 | restart: | 987 | * dirty the inode itself |
631 | list_for_each_entry_reverse(sb, &super_blocks, s_list) { | 988 | */ |
632 | if (sb_has_dirty_inodes(sb)) { | 989 | if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
633 | /* we're making our own get_super here */ | 990 | if (sb->s_op->dirty_inode) |
634 | sb->s_count++; | 991 | sb->s_op->dirty_inode(inode); |
635 | spin_unlock(&sb_lock); | 992 | } |
636 | /* | 993 | |
637 | * If we can't get the readlock, there's no sense in | 994 | /* |
638 | * waiting around, most of the time the FS is going to | 995 | * make sure that changes are seen by all cpus before we test i_state |
639 | * be unmounted by the time it is released. | 996 | * -- mikulas |
640 | */ | 997 | */ |
641 | if (down_read_trylock(&sb->s_umount)) { | 998 | smp_mb(); |
642 | if (sb->s_root) | 999 | |
643 | sync_sb_inodes(sb, wbc); | 1000 | /* avoid the locking if we can */ |
644 | up_read(&sb->s_umount); | 1001 | if ((inode->i_state & flags) == flags) |
1002 | return; | ||
1003 | |||
1004 | if (unlikely(block_dump)) | ||
1005 | block_dump___mark_inode_dirty(inode); | ||
1006 | |||
1007 | spin_lock(&inode_lock); | ||
1008 | if ((inode->i_state & flags) != flags) { | ||
1009 | const int was_dirty = inode->i_state & I_DIRTY; | ||
1010 | |||
1011 | inode->i_state |= flags; | ||
1012 | |||
1013 | /* | ||
1014 | * If the inode is being synced, just update its dirty state. | ||
1015 | * The unlocker will place the inode on the appropriate | ||
1016 | * superblock list, based upon its state. | ||
1017 | */ | ||
1018 | if (inode->i_state & I_SYNC) | ||
1019 | goto out; | ||
1020 | |||
1021 | /* | ||
1022 | * Only add valid (hashed) inodes to the superblock's | ||
1023 | * dirty list. Add blockdev inodes as well. | ||
1024 | */ | ||
1025 | if (!S_ISBLK(inode->i_mode)) { | ||
1026 | if (hlist_unhashed(&inode->i_hash)) | ||
1027 | goto out; | ||
1028 | } | ||
1029 | if (inode->i_state & (I_FREEING|I_CLEAR)) | ||
1030 | goto out; | ||
1031 | |||
1032 | /* | ||
1033 | * If the inode was already on b_dirty/b_io/b_more_io, don't | ||
1034 | * reposition it (that would break b_dirty time-ordering). | ||
1035 | */ | ||
1036 | if (!was_dirty) { | ||
1037 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | ||
1038 | struct backing_dev_info *bdi = wb->bdi; | ||
1039 | |||
1040 | if (bdi_cap_writeback_dirty(bdi) && | ||
1041 | !test_bit(BDI_registered, &bdi->state)) { | ||
1042 | WARN_ON(1); | ||
1043 | printk(KERN_ERR "bdi-%s not registered\n", | ||
1044 | bdi->name); | ||
645 | } | 1045 | } |
646 | spin_lock(&sb_lock); | 1046 | |
647 | if (__put_super_and_need_restart(sb)) | 1047 | inode->dirtied_when = jiffies; |
648 | goto restart; | 1048 | list_move(&inode->i_list, &wb->b_dirty); |
649 | } | 1049 | } |
650 | if (wbc->nr_to_write <= 0) | ||
651 | break; | ||
652 | } | 1050 | } |
653 | spin_unlock(&sb_lock); | 1051 | out: |
1052 | spin_unlock(&inode_lock); | ||
654 | } | 1053 | } |
1054 | EXPORT_SYMBOL(__mark_inode_dirty); | ||
655 | 1055 | ||
656 | /* | 1056 | /* |
657 | * writeback and wait upon the filesystem's dirty inodes. The caller will | 1057 | * Write out a superblock's list of dirty inodes. A wait will be performed |
658 | * do this in two passes - one to write, and one to wait. | 1058 | * upon no inodes, all inodes or the final one, depending upon sync_mode. |
1059 | * | ||
1060 | * If older_than_this is non-NULL, then only write out inodes which | ||
1061 | * had their first dirtying at a time earlier than *older_than_this. | ||
1062 | * | ||
1063 | * If we're a pdlfush thread, then implement pdflush collision avoidance | ||
1064 | * against the entire list. | ||
659 | * | 1065 | * |
660 | * A finite limit is set on the number of pages which will be written. | 1066 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. |
661 | * To prevent infinite livelock of sys_sync(). | 1067 | * This function assumes that the blockdev superblock's inodes are backed by |
1068 | * a variety of queues, so all inodes are searched. For other superblocks, | ||
1069 | * assume that all inodes are backed by the same queue. | ||
662 | * | 1070 | * |
663 | * We add in the number of potentially dirty inodes, because each inode write | 1071 | * The inodes to be written are parked on bdi->b_io. They are moved back onto |
664 | * can dirty pagecache in the underlying blockdev. | 1072 | * bdi->b_dirty as they are selected for writing. This way, none can be missed |
1073 | * on the writer throttling path, and we get decent balancing between many | ||
1074 | * throttled threads: we don't want them all piling up on inode_sync_wait. | ||
665 | */ | 1075 | */ |
666 | void sync_inodes_sb(struct super_block *sb, int wait) | 1076 | static void wait_sb_inodes(struct super_block *sb) |
667 | { | 1077 | { |
668 | struct writeback_control wbc = { | 1078 | struct inode *inode, *old_inode = NULL; |
669 | .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, | 1079 | |
670 | .range_start = 0, | 1080 | /* |
671 | .range_end = LLONG_MAX, | 1081 | * We need to be protected against the filesystem going from |
672 | }; | 1082 | * r/o to r/w or vice versa. |
1083 | */ | ||
1084 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | ||
1085 | |||
1086 | spin_lock(&inode_lock); | ||
1087 | |||
1088 | /* | ||
1089 | * Data integrity sync. Must wait for all pages under writeback, | ||
1090 | * because there may have been pages dirtied before our sync | ||
1091 | * call, but which had writeout started before we write it out. | ||
1092 | * In which case, the inode may not be on the dirty list, but | ||
1093 | * we still have to wait for that writeout. | ||
1094 | */ | ||
1095 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | ||
1096 | struct address_space *mapping; | ||
1097 | |||
1098 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | ||
1099 | continue; | ||
1100 | mapping = inode->i_mapping; | ||
1101 | if (mapping->nrpages == 0) | ||
1102 | continue; | ||
1103 | __iget(inode); | ||
1104 | spin_unlock(&inode_lock); | ||
1105 | /* | ||
1106 | * We hold a reference to 'inode' so it couldn't have | ||
1107 | * been removed from s_inodes list while we dropped the | ||
1108 | * inode_lock. We cannot iput the inode now as we can | ||
1109 | * be holding the last reference and we cannot iput it | ||
1110 | * under inode_lock. So we keep the reference and iput | ||
1111 | * it later. | ||
1112 | */ | ||
1113 | iput(old_inode); | ||
1114 | old_inode = inode; | ||
1115 | |||
1116 | filemap_fdatawait(mapping); | ||
1117 | |||
1118 | cond_resched(); | ||
673 | 1119 | ||
674 | if (!wait) { | 1120 | spin_lock(&inode_lock); |
675 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1121 | } |
676 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | 1122 | spin_unlock(&inode_lock); |
1123 | iput(old_inode); | ||
1124 | } | ||
677 | 1125 | ||
678 | wbc.nr_to_write = nr_dirty + nr_unstable + | 1126 | /** |
1127 | * writeback_inodes_sb - writeback dirty inodes from given super_block | ||
1128 | * @sb: the superblock | ||
1129 | * | ||
1130 | * Start writeback on some inodes on this super_block. No guarantees are made | ||
1131 | * on how many (if any) will be written, and this function does not wait | ||
1132 | * for IO completion of submitted IO. The number of pages submitted is | ||
1133 | * returned. | ||
1134 | */ | ||
1135 | void writeback_inodes_sb(struct super_block *sb) | ||
1136 | { | ||
1137 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | ||
1138 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
1139 | long nr_to_write; | ||
1140 | |||
1141 | nr_to_write = nr_dirty + nr_unstable + | ||
679 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 1142 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
680 | } else | ||
681 | wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */ | ||
682 | 1143 | ||
683 | sync_sb_inodes(sb, &wbc); | 1144 | bdi_writeback_all(sb, nr_to_write); |
1145 | } | ||
1146 | EXPORT_SYMBOL(writeback_inodes_sb); | ||
1147 | |||
1148 | /** | ||
1149 | * sync_inodes_sb - sync sb inode pages | ||
1150 | * @sb: the superblock | ||
1151 | * | ||
1152 | * This function writes and waits on any dirty inode belonging to this | ||
1153 | * super_block. The number of pages synced is returned. | ||
1154 | */ | ||
1155 | void sync_inodes_sb(struct super_block *sb) | ||
1156 | { | ||
1157 | bdi_sync_writeback(sb->s_bdi, sb); | ||
1158 | wait_sb_inodes(sb); | ||
684 | } | 1159 | } |
1160 | EXPORT_SYMBOL(sync_inodes_sb); | ||
685 | 1161 | ||
686 | /** | 1162 | /** |
687 | * write_inode_now - write an inode to disk | 1163 | * write_inode_now - write an inode to disk |
@@ -737,57 +1213,3 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) | |||
737 | return ret; | 1213 | return ret; |
738 | } | 1214 | } |
739 | EXPORT_SYMBOL(sync_inode); | 1215 | EXPORT_SYMBOL(sync_inode); |
740 | |||
741 | /** | ||
742 | * generic_osync_inode - flush all dirty data for a given inode to disk | ||
743 | * @inode: inode to write | ||
744 | * @mapping: the address_space that should be flushed | ||
745 | * @what: what to write and wait upon | ||
746 | * | ||
747 | * This can be called by file_write functions for files which have the | ||
748 | * O_SYNC flag set, to flush dirty writes to disk. | ||
749 | * | ||
750 | * @what is a bitmask, specifying which part of the inode's data should be | ||
751 | * written and waited upon. | ||
752 | * | ||
753 | * OSYNC_DATA: i_mapping's dirty data | ||
754 | * OSYNC_METADATA: the buffers at i_mapping->private_list | ||
755 | * OSYNC_INODE: the inode itself | ||
756 | */ | ||
757 | |||
758 | int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what) | ||
759 | { | ||
760 | int err = 0; | ||
761 | int need_write_inode_now = 0; | ||
762 | int err2; | ||
763 | |||
764 | if (what & OSYNC_DATA) | ||
765 | err = filemap_fdatawrite(mapping); | ||
766 | if (what & (OSYNC_METADATA|OSYNC_DATA)) { | ||
767 | err2 = sync_mapping_buffers(mapping); | ||
768 | if (!err) | ||
769 | err = err2; | ||
770 | } | ||
771 | if (what & OSYNC_DATA) { | ||
772 | err2 = filemap_fdatawait(mapping); | ||
773 | if (!err) | ||
774 | err = err2; | ||
775 | } | ||
776 | |||
777 | spin_lock(&inode_lock); | ||
778 | if ((inode->i_state & I_DIRTY) && | ||
779 | ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC))) | ||
780 | need_write_inode_now = 1; | ||
781 | spin_unlock(&inode_lock); | ||
782 | |||
783 | if (need_write_inode_now) { | ||
784 | err2 = write_inode_now(inode, 1); | ||
785 | if (!err) | ||
786 | err = err2; | ||
787 | } | ||
788 | else | ||
789 | inode_sync_wait(inode); | ||
790 | |||
791 | return err; | ||
792 | } | ||
793 | EXPORT_SYMBOL(generic_osync_inode); | ||
diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 99c99dfb0373..3773fd63d2f9 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c | |||
@@ -61,6 +61,121 @@ static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf, | |||
61 | return simple_read_from_buffer(buf, len, ppos, tmp, size); | 61 | return simple_read_from_buffer(buf, len, ppos, tmp, size); |
62 | } | 62 | } |
63 | 63 | ||
64 | static ssize_t fuse_conn_limit_read(struct file *file, char __user *buf, | ||
65 | size_t len, loff_t *ppos, unsigned val) | ||
66 | { | ||
67 | char tmp[32]; | ||
68 | size_t size = sprintf(tmp, "%u\n", val); | ||
69 | |||
70 | return simple_read_from_buffer(buf, len, ppos, tmp, size); | ||
71 | } | ||
72 | |||
73 | static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf, | ||
74 | size_t count, loff_t *ppos, unsigned *val, | ||
75 | unsigned global_limit) | ||
76 | { | ||
77 | unsigned long t; | ||
78 | char tmp[32]; | ||
79 | unsigned limit = (1 << 16) - 1; | ||
80 | int err; | ||
81 | |||
82 | if (*ppos || count >= sizeof(tmp) - 1) | ||
83 | return -EINVAL; | ||
84 | |||
85 | if (copy_from_user(tmp, buf, count)) | ||
86 | return -EINVAL; | ||
87 | |||
88 | tmp[count] = '\0'; | ||
89 | |||
90 | err = strict_strtoul(tmp, 0, &t); | ||
91 | if (err) | ||
92 | return err; | ||
93 | |||
94 | if (!capable(CAP_SYS_ADMIN)) | ||
95 | limit = min(limit, global_limit); | ||
96 | |||
97 | if (t > limit) | ||
98 | return -EINVAL; | ||
99 | |||
100 | *val = t; | ||
101 | |||
102 | return count; | ||
103 | } | ||
104 | |||
105 | static ssize_t fuse_conn_max_background_read(struct file *file, | ||
106 | char __user *buf, size_t len, | ||
107 | loff_t *ppos) | ||
108 | { | ||
109 | struct fuse_conn *fc; | ||
110 | unsigned val; | ||
111 | |||
112 | fc = fuse_ctl_file_conn_get(file); | ||
113 | if (!fc) | ||
114 | return 0; | ||
115 | |||
116 | val = fc->max_background; | ||
117 | fuse_conn_put(fc); | ||
118 | |||
119 | return fuse_conn_limit_read(file, buf, len, ppos, val); | ||
120 | } | ||
121 | |||
122 | static ssize_t fuse_conn_max_background_write(struct file *file, | ||
123 | const char __user *buf, | ||
124 | size_t count, loff_t *ppos) | ||
125 | { | ||
126 | unsigned val; | ||
127 | ssize_t ret; | ||
128 | |||
129 | ret = fuse_conn_limit_write(file, buf, count, ppos, &val, | ||
130 | max_user_bgreq); | ||
131 | if (ret > 0) { | ||
132 | struct fuse_conn *fc = fuse_ctl_file_conn_get(file); | ||
133 | if (fc) { | ||
134 | fc->max_background = val; | ||
135 | fuse_conn_put(fc); | ||
136 | } | ||
137 | } | ||
138 | |||
139 | return ret; | ||
140 | } | ||
141 | |||
142 | static ssize_t fuse_conn_congestion_threshold_read(struct file *file, | ||
143 | char __user *buf, size_t len, | ||
144 | loff_t *ppos) | ||
145 | { | ||
146 | struct fuse_conn *fc; | ||
147 | unsigned val; | ||
148 | |||
149 | fc = fuse_ctl_file_conn_get(file); | ||
150 | if (!fc) | ||
151 | return 0; | ||
152 | |||
153 | val = fc->congestion_threshold; | ||
154 | fuse_conn_put(fc); | ||
155 | |||
156 | return fuse_conn_limit_read(file, buf, len, ppos, val); | ||
157 | } | ||
158 | |||
159 | static ssize_t fuse_conn_congestion_threshold_write(struct file *file, | ||
160 | const char __user *buf, | ||
161 | size_t count, loff_t *ppos) | ||
162 | { | ||
163 | unsigned val; | ||
164 | ssize_t ret; | ||
165 | |||
166 | ret = fuse_conn_limit_write(file, buf, count, ppos, &val, | ||
167 | max_user_congthresh); | ||
168 | if (ret > 0) { | ||
169 | struct fuse_conn *fc = fuse_ctl_file_conn_get(file); | ||
170 | if (fc) { | ||
171 | fc->congestion_threshold = val; | ||
172 | fuse_conn_put(fc); | ||
173 | } | ||
174 | } | ||
175 | |||
176 | return ret; | ||
177 | } | ||
178 | |||
64 | static const struct file_operations fuse_ctl_abort_ops = { | 179 | static const struct file_operations fuse_ctl_abort_ops = { |
65 | .open = nonseekable_open, | 180 | .open = nonseekable_open, |
66 | .write = fuse_conn_abort_write, | 181 | .write = fuse_conn_abort_write, |
@@ -71,6 +186,18 @@ static const struct file_operations fuse_ctl_waiting_ops = { | |||
71 | .read = fuse_conn_waiting_read, | 186 | .read = fuse_conn_waiting_read, |
72 | }; | 187 | }; |
73 | 188 | ||
189 | static const struct file_operations fuse_conn_max_background_ops = { | ||
190 | .open = nonseekable_open, | ||
191 | .read = fuse_conn_max_background_read, | ||
192 | .write = fuse_conn_max_background_write, | ||
193 | }; | ||
194 | |||
195 | static const struct file_operations fuse_conn_congestion_threshold_ops = { | ||
196 | .open = nonseekable_open, | ||
197 | .read = fuse_conn_congestion_threshold_read, | ||
198 | .write = fuse_conn_congestion_threshold_write, | ||
199 | }; | ||
200 | |||
74 | static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, | 201 | static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, |
75 | struct fuse_conn *fc, | 202 | struct fuse_conn *fc, |
76 | const char *name, | 203 | const char *name, |
@@ -127,9 +254,14 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) | |||
127 | goto err; | 254 | goto err; |
128 | 255 | ||
129 | if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1, | 256 | if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1, |
130 | NULL, &fuse_ctl_waiting_ops) || | 257 | NULL, &fuse_ctl_waiting_ops) || |
131 | !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1, | 258 | !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1, |
132 | NULL, &fuse_ctl_abort_ops)) | 259 | NULL, &fuse_ctl_abort_ops) || |
260 | !fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600, | ||
261 | 1, NULL, &fuse_conn_max_background_ops) || | ||
262 | !fuse_ctl_add_dentry(parent, fc, "congestion_threshold", | ||
263 | S_IFREG | 0600, 1, NULL, | ||
264 | &fuse_conn_congestion_threshold_ops)) | ||
133 | goto err; | 265 | goto err; |
134 | 266 | ||
135 | return 0; | 267 | return 0; |
@@ -156,7 +288,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc) | |||
156 | d_drop(dentry); | 288 | d_drop(dentry); |
157 | dput(dentry); | 289 | dput(dentry); |
158 | } | 290 | } |
159 | fuse_control_sb->s_root->d_inode->i_nlink--; | 291 | drop_nlink(fuse_control_sb->s_root->d_inode); |
160 | } | 292 | } |
161 | 293 | ||
162 | static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) | 294 | static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6484eb75acd6..51d9e33d634f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -250,7 +250,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) | |||
250 | 250 | ||
251 | static void flush_bg_queue(struct fuse_conn *fc) | 251 | static void flush_bg_queue(struct fuse_conn *fc) |
252 | { | 252 | { |
253 | while (fc->active_background < FUSE_MAX_BACKGROUND && | 253 | while (fc->active_background < fc->max_background && |
254 | !list_empty(&fc->bg_queue)) { | 254 | !list_empty(&fc->bg_queue)) { |
255 | struct fuse_req *req; | 255 | struct fuse_req *req; |
256 | 256 | ||
@@ -280,11 +280,11 @@ __releases(&fc->lock) | |||
280 | list_del(&req->intr_entry); | 280 | list_del(&req->intr_entry); |
281 | req->state = FUSE_REQ_FINISHED; | 281 | req->state = FUSE_REQ_FINISHED; |
282 | if (req->background) { | 282 | if (req->background) { |
283 | if (fc->num_background == FUSE_MAX_BACKGROUND) { | 283 | if (fc->num_background == fc->max_background) { |
284 | fc->blocked = 0; | 284 | fc->blocked = 0; |
285 | wake_up_all(&fc->blocked_waitq); | 285 | wake_up_all(&fc->blocked_waitq); |
286 | } | 286 | } |
287 | if (fc->num_background == FUSE_CONGESTION_THRESHOLD && | 287 | if (fc->num_background == fc->congestion_threshold && |
288 | fc->connected && fc->bdi_initialized) { | 288 | fc->connected && fc->bdi_initialized) { |
289 | clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); | 289 | clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); |
290 | clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); | 290 | clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); |
@@ -410,9 +410,9 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, | |||
410 | { | 410 | { |
411 | req->background = 1; | 411 | req->background = 1; |
412 | fc->num_background++; | 412 | fc->num_background++; |
413 | if (fc->num_background == FUSE_MAX_BACKGROUND) | 413 | if (fc->num_background == fc->max_background) |
414 | fc->blocked = 1; | 414 | fc->blocked = 1; |
415 | if (fc->num_background == FUSE_CONGESTION_THRESHOLD && | 415 | if (fc->num_background == fc->congestion_threshold && |
416 | fc->bdi_initialized) { | 416 | fc->bdi_initialized) { |
417 | set_bdi_congested(&fc->bdi, BLK_RW_SYNC); | 417 | set_bdi_congested(&fc->bdi, BLK_RW_SYNC); |
418 | set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); | 418 | set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 52b641fc0faf..fc9c79feb5f7 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -25,12 +25,6 @@ | |||
25 | /** Max number of pages that can be used in a single read request */ | 25 | /** Max number of pages that can be used in a single read request */ |
26 | #define FUSE_MAX_PAGES_PER_REQ 32 | 26 | #define FUSE_MAX_PAGES_PER_REQ 32 |
27 | 27 | ||
28 | /** Maximum number of outstanding background requests */ | ||
29 | #define FUSE_MAX_BACKGROUND 12 | ||
30 | |||
31 | /** Congestion starts at 75% of maximum */ | ||
32 | #define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) | ||
33 | |||
34 | /** Bias for fi->writectr, meaning new writepages must not be sent */ | 28 | /** Bias for fi->writectr, meaning new writepages must not be sent */ |
35 | #define FUSE_NOWRITE INT_MIN | 29 | #define FUSE_NOWRITE INT_MIN |
36 | 30 | ||
@@ -38,7 +32,7 @@ | |||
38 | #define FUSE_NAME_MAX 1024 | 32 | #define FUSE_NAME_MAX 1024 |
39 | 33 | ||
40 | /** Number of dentries for each connection in the control filesystem */ | 34 | /** Number of dentries for each connection in the control filesystem */ |
41 | #define FUSE_CTL_NUM_DENTRIES 3 | 35 | #define FUSE_CTL_NUM_DENTRIES 5 |
42 | 36 | ||
43 | /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem | 37 | /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem |
44 | module will check permissions based on the file mode. Otherwise no | 38 | module will check permissions based on the file mode. Otherwise no |
@@ -55,6 +49,10 @@ extern struct list_head fuse_conn_list; | |||
55 | /** Global mutex protecting fuse_conn_list and the control filesystem */ | 49 | /** Global mutex protecting fuse_conn_list and the control filesystem */ |
56 | extern struct mutex fuse_mutex; | 50 | extern struct mutex fuse_mutex; |
57 | 51 | ||
52 | /** Module parameters */ | ||
53 | extern unsigned max_user_bgreq; | ||
54 | extern unsigned max_user_congthresh; | ||
55 | |||
58 | /** FUSE inode */ | 56 | /** FUSE inode */ |
59 | struct fuse_inode { | 57 | struct fuse_inode { |
60 | /** Inode data */ | 58 | /** Inode data */ |
@@ -349,6 +347,12 @@ struct fuse_conn { | |||
349 | /** rbtree of fuse_files waiting for poll events indexed by ph */ | 347 | /** rbtree of fuse_files waiting for poll events indexed by ph */ |
350 | struct rb_root polled_files; | 348 | struct rb_root polled_files; |
351 | 349 | ||
350 | /** Maximum number of outstanding background requests */ | ||
351 | unsigned max_background; | ||
352 | |||
353 | /** Number of background requests at which congestion starts */ | ||
354 | unsigned congestion_threshold; | ||
355 | |||
352 | /** Number of requests currently in the background */ | 356 | /** Number of requests currently in the background */ |
353 | unsigned num_background; | 357 | unsigned num_background; |
354 | 358 | ||
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f91ccc4a189d..6da947daabda 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/seq_file.h> | 14 | #include <linux/seq_file.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/moduleparam.h> | ||
17 | #include <linux/parser.h> | 18 | #include <linux/parser.h> |
18 | #include <linux/statfs.h> | 19 | #include <linux/statfs.h> |
19 | #include <linux/random.h> | 20 | #include <linux/random.h> |
@@ -28,10 +29,34 @@ static struct kmem_cache *fuse_inode_cachep; | |||
28 | struct list_head fuse_conn_list; | 29 | struct list_head fuse_conn_list; |
29 | DEFINE_MUTEX(fuse_mutex); | 30 | DEFINE_MUTEX(fuse_mutex); |
30 | 31 | ||
32 | static int set_global_limit(const char *val, struct kernel_param *kp); | ||
33 | |||
34 | unsigned max_user_bgreq; | ||
35 | module_param_call(max_user_bgreq, set_global_limit, param_get_uint, | ||
36 | &max_user_bgreq, 0644); | ||
37 | __MODULE_PARM_TYPE(max_user_bgreq, "uint"); | ||
38 | MODULE_PARM_DESC(max_user_bgreq, | ||
39 | "Global limit for the maximum number of backgrounded requests an " | ||
40 | "unprivileged user can set"); | ||
41 | |||
42 | unsigned max_user_congthresh; | ||
43 | module_param_call(max_user_congthresh, set_global_limit, param_get_uint, | ||
44 | &max_user_congthresh, 0644); | ||
45 | __MODULE_PARM_TYPE(max_user_congthresh, "uint"); | ||
46 | MODULE_PARM_DESC(max_user_congthresh, | ||
47 | "Global limit for the maximum congestion threshold an " | ||
48 | "unprivileged user can set"); | ||
49 | |||
31 | #define FUSE_SUPER_MAGIC 0x65735546 | 50 | #define FUSE_SUPER_MAGIC 0x65735546 |
32 | 51 | ||
33 | #define FUSE_DEFAULT_BLKSIZE 512 | 52 | #define FUSE_DEFAULT_BLKSIZE 512 |
34 | 53 | ||
54 | /** Maximum number of outstanding background requests */ | ||
55 | #define FUSE_DEFAULT_MAX_BACKGROUND 12 | ||
56 | |||
57 | /** Congestion starts at 75% of maximum */ | ||
58 | #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) | ||
59 | |||
35 | struct fuse_mount_data { | 60 | struct fuse_mount_data { |
36 | int fd; | 61 | int fd; |
37 | unsigned rootmode; | 62 | unsigned rootmode; |
@@ -517,6 +542,8 @@ void fuse_conn_init(struct fuse_conn *fc) | |||
517 | INIT_LIST_HEAD(&fc->bg_queue); | 542 | INIT_LIST_HEAD(&fc->bg_queue); |
518 | INIT_LIST_HEAD(&fc->entry); | 543 | INIT_LIST_HEAD(&fc->entry); |
519 | atomic_set(&fc->num_waiting, 0); | 544 | atomic_set(&fc->num_waiting, 0); |
545 | fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; | ||
546 | fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; | ||
520 | fc->khctr = 0; | 547 | fc->khctr = 0; |
521 | fc->polled_files = RB_ROOT; | 548 | fc->polled_files = RB_ROOT; |
522 | fc->reqctr = 0; | 549 | fc->reqctr = 0; |
@@ -727,6 +754,54 @@ static const struct super_operations fuse_super_operations = { | |||
727 | .show_options = fuse_show_options, | 754 | .show_options = fuse_show_options, |
728 | }; | 755 | }; |
729 | 756 | ||
757 | static void sanitize_global_limit(unsigned *limit) | ||
758 | { | ||
759 | if (*limit == 0) | ||
760 | *limit = ((num_physpages << PAGE_SHIFT) >> 13) / | ||
761 | sizeof(struct fuse_req); | ||
762 | |||
763 | if (*limit >= 1 << 16) | ||
764 | *limit = (1 << 16) - 1; | ||
765 | } | ||
766 | |||
767 | static int set_global_limit(const char *val, struct kernel_param *kp) | ||
768 | { | ||
769 | int rv; | ||
770 | |||
771 | rv = param_set_uint(val, kp); | ||
772 | if (rv) | ||
773 | return rv; | ||
774 | |||
775 | sanitize_global_limit((unsigned *)kp->arg); | ||
776 | |||
777 | return 0; | ||
778 | } | ||
779 | |||
780 | static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) | ||
781 | { | ||
782 | int cap_sys_admin = capable(CAP_SYS_ADMIN); | ||
783 | |||
784 | if (arg->minor < 13) | ||
785 | return; | ||
786 | |||
787 | sanitize_global_limit(&max_user_bgreq); | ||
788 | sanitize_global_limit(&max_user_congthresh); | ||
789 | |||
790 | if (arg->max_background) { | ||
791 | fc->max_background = arg->max_background; | ||
792 | |||
793 | if (!cap_sys_admin && fc->max_background > max_user_bgreq) | ||
794 | fc->max_background = max_user_bgreq; | ||
795 | } | ||
796 | if (arg->congestion_threshold) { | ||
797 | fc->congestion_threshold = arg->congestion_threshold; | ||
798 | |||
799 | if (!cap_sys_admin && | ||
800 | fc->congestion_threshold > max_user_congthresh) | ||
801 | fc->congestion_threshold = max_user_congthresh; | ||
802 | } | ||
803 | } | ||
804 | |||
730 | static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | 805 | static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) |
731 | { | 806 | { |
732 | struct fuse_init_out *arg = &req->misc.init_out; | 807 | struct fuse_init_out *arg = &req->misc.init_out; |
@@ -736,6 +811,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | |||
736 | else { | 811 | else { |
737 | unsigned long ra_pages; | 812 | unsigned long ra_pages; |
738 | 813 | ||
814 | process_init_limits(fc, arg); | ||
815 | |||
739 | if (arg->minor >= 6) { | 816 | if (arg->minor >= 6) { |
740 | ra_pages = arg->max_readahead / PAGE_CACHE_SIZE; | 817 | ra_pages = arg->max_readahead / PAGE_CACHE_SIZE; |
741 | if (arg->flags & FUSE_ASYNC_READ) | 818 | if (arg->flags & FUSE_ASYNC_READ) |
@@ -801,6 +878,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) | |||
801 | { | 878 | { |
802 | int err; | 879 | int err; |
803 | 880 | ||
881 | fc->bdi.name = "fuse"; | ||
804 | fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 882 | fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
805 | fc->bdi.unplug_io_fn = default_unplug_io_fn; | 883 | fc->bdi.unplug_io_fn = default_unplug_io_fn; |
806 | /* fuse does it's own writeback accounting */ | 884 | /* fuse does it's own writeback accounting */ |
@@ -893,6 +971,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
893 | if (err) | 971 | if (err) |
894 | goto err_put_conn; | 972 | goto err_put_conn; |
895 | 973 | ||
974 | sb->s_bdi = &fc->bdi; | ||
975 | |||
896 | /* Handle umasking inside the fuse code */ | 976 | /* Handle umasking inside the fuse code */ |
897 | if (sb->s_flags & MS_POSIXACL) | 977 | if (sb->s_flags & MS_POSIXACL) |
898 | fc->dont_mask = 1; | 978 | fc->dont_mask = 1; |
@@ -1147,6 +1227,9 @@ static int __init fuse_init(void) | |||
1147 | if (res) | 1227 | if (res) |
1148 | goto err_sysfs_cleanup; | 1228 | goto err_sysfs_cleanup; |
1149 | 1229 | ||
1230 | sanitize_global_limit(&max_user_bgreq); | ||
1231 | sanitize_global_limit(&max_user_congthresh); | ||
1232 | |||
1150 | return 0; | 1233 | return 0; |
1151 | 1234 | ||
1152 | err_sysfs_cleanup: | 1235 | err_sysfs_cleanup: |
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index 3da2f1f4f738..21f7e46da4c0 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile | |||
@@ -1,6 +1,6 @@ | |||
1 | EXTRA_CFLAGS := -I$(src) | 1 | EXTRA_CFLAGS := -I$(src) |
2 | obj-$(CONFIG_GFS2_FS) += gfs2.o | 2 | obj-$(CONFIG_GFS2_FS) += gfs2.o |
3 | gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ | 3 | gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ |
4 | glops.o inode.o log.o lops.o main.o meta_io.o \ | 4 | glops.o inode.o log.o lops.o main.o meta_io.o \ |
5 | aops.o dentry.o export.o file.o \ | 5 | aops.o dentry.o export.o file.o \ |
6 | ops_fstype.o ops_inode.o quota.o \ | 6 | ops_fstype.o ops_inode.o quota.o \ |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index fa881bdc3d85..3fc4e3ac7d84 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
@@ -19,8 +19,7 @@ | |||
19 | #include "gfs2.h" | 19 | #include "gfs2.h" |
20 | #include "incore.h" | 20 | #include "incore.h" |
21 | #include "acl.h" | 21 | #include "acl.h" |
22 | #include "eaops.h" | 22 | #include "xattr.h" |
23 | #include "eattr.h" | ||
24 | #include "glock.h" | 23 | #include "glock.h" |
25 | #include "inode.h" | 24 | #include "inode.h" |
26 | #include "meta_io.h" | 25 | #include "meta_io.h" |
@@ -31,8 +30,7 @@ | |||
31 | #define ACL_DEFAULT 0 | 30 | #define ACL_DEFAULT 0 |
32 | 31 | ||
33 | int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, | 32 | int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, |
34 | struct gfs2_ea_request *er, | 33 | struct gfs2_ea_request *er, int *remove, mode_t *mode) |
35 | int *remove, mode_t *mode) | ||
36 | { | 34 | { |
37 | struct posix_acl *acl; | 35 | struct posix_acl *acl; |
38 | int error; | 36 | int error; |
@@ -83,30 +81,20 @@ int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access) | |||
83 | return 0; | 81 | return 0; |
84 | } | 82 | } |
85 | 83 | ||
86 | static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl, | 84 | static int acl_get(struct gfs2_inode *ip, const char *name, |
87 | struct gfs2_ea_location *el, char **data, unsigned int *len) | 85 | struct posix_acl **acl, struct gfs2_ea_location *el, |
86 | char **datap, unsigned int *lenp) | ||
88 | { | 87 | { |
89 | struct gfs2_ea_request er; | 88 | char *data; |
90 | struct gfs2_ea_location el_this; | 89 | unsigned int len; |
91 | int error; | 90 | int error; |
92 | 91 | ||
92 | el->el_bh = NULL; | ||
93 | |||
93 | if (!ip->i_eattr) | 94 | if (!ip->i_eattr) |
94 | return 0; | 95 | return 0; |
95 | 96 | ||
96 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | 97 | error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, el); |
97 | if (access) { | ||
98 | er.er_name = GFS2_POSIX_ACL_ACCESS; | ||
99 | er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN; | ||
100 | } else { | ||
101 | er.er_name = GFS2_POSIX_ACL_DEFAULT; | ||
102 | er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN; | ||
103 | } | ||
104 | er.er_type = GFS2_EATYPE_SYS; | ||
105 | |||
106 | if (!el) | ||
107 | el = &el_this; | ||
108 | |||
109 | error = gfs2_ea_find(ip, &er, el); | ||
110 | if (error) | 98 | if (error) |
111 | return error; | 99 | return error; |
112 | if (!el->el_ea) | 100 | if (!el->el_ea) |
@@ -114,32 +102,31 @@ static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl, | |||
114 | if (!GFS2_EA_DATA_LEN(el->el_ea)) | 102 | if (!GFS2_EA_DATA_LEN(el->el_ea)) |
115 | goto out; | 103 | goto out; |
116 | 104 | ||
117 | er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea); | 105 | len = GFS2_EA_DATA_LEN(el->el_ea); |
118 | er.er_data = kmalloc(er.er_data_len, GFP_NOFS); | 106 | data = kmalloc(len, GFP_NOFS); |
119 | error = -ENOMEM; | 107 | error = -ENOMEM; |
120 | if (!er.er_data) | 108 | if (!data) |
121 | goto out; | 109 | goto out; |
122 | 110 | ||
123 | error = gfs2_ea_get_copy(ip, el, er.er_data); | 111 | error = gfs2_ea_get_copy(ip, el, data, len); |
124 | if (error) | 112 | if (error < 0) |
125 | goto out_kfree; | 113 | goto out_kfree; |
114 | error = 0; | ||
126 | 115 | ||
127 | if (acl) { | 116 | if (acl) { |
128 | *acl = posix_acl_from_xattr(er.er_data, er.er_data_len); | 117 | *acl = posix_acl_from_xattr(data, len); |
129 | if (IS_ERR(*acl)) | 118 | if (IS_ERR(*acl)) |
130 | error = PTR_ERR(*acl); | 119 | error = PTR_ERR(*acl); |
131 | } | 120 | } |
132 | 121 | ||
133 | out_kfree: | 122 | out_kfree: |
134 | if (error || !data) | 123 | if (error || !datap) { |
135 | kfree(er.er_data); | 124 | kfree(data); |
136 | else { | 125 | } else { |
137 | *data = er.er_data; | 126 | *datap = data; |
138 | *len = er.er_data_len; | 127 | *lenp = len; |
139 | } | 128 | } |
140 | out: | 129 | out: |
141 | if (error || el == &el_this) | ||
142 | brelse(el->el_bh); | ||
143 | return error; | 130 | return error; |
144 | } | 131 | } |
145 | 132 | ||
@@ -153,10 +140,12 @@ out: | |||
153 | 140 | ||
154 | int gfs2_check_acl(struct inode *inode, int mask) | 141 | int gfs2_check_acl(struct inode *inode, int mask) |
155 | { | 142 | { |
143 | struct gfs2_ea_location el; | ||
156 | struct posix_acl *acl = NULL; | 144 | struct posix_acl *acl = NULL; |
157 | int error; | 145 | int error; |
158 | 146 | ||
159 | error = acl_get(GFS2_I(inode), ACL_ACCESS, &acl, NULL, NULL, NULL); | 147 | error = acl_get(GFS2_I(inode), GFS2_POSIX_ACL_ACCESS, &acl, &el, NULL, NULL); |
148 | brelse(el.el_bh); | ||
160 | if (error) | 149 | if (error) |
161 | return error; | 150 | return error; |
162 | 151 | ||
@@ -196,10 +185,12 @@ static int munge_mode(struct gfs2_inode *ip, mode_t mode) | |||
196 | 185 | ||
197 | int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) | 186 | int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) |
198 | { | 187 | { |
188 | struct gfs2_ea_location el; | ||
199 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 189 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
200 | struct posix_acl *acl = NULL, *clone; | 190 | struct posix_acl *acl = NULL, *clone; |
201 | struct gfs2_ea_request er; | ||
202 | mode_t mode = ip->i_inode.i_mode; | 191 | mode_t mode = ip->i_inode.i_mode; |
192 | char *data = NULL; | ||
193 | unsigned int len; | ||
203 | int error; | 194 | int error; |
204 | 195 | ||
205 | if (!sdp->sd_args.ar_posix_acl) | 196 | if (!sdp->sd_args.ar_posix_acl) |
@@ -207,11 +198,8 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) | |||
207 | if (S_ISLNK(ip->i_inode.i_mode)) | 198 | if (S_ISLNK(ip->i_inode.i_mode)) |
208 | return 0; | 199 | return 0; |
209 | 200 | ||
210 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | 201 | error = acl_get(dip, GFS2_POSIX_ACL_DEFAULT, &acl, &el, &data, &len); |
211 | er.er_type = GFS2_EATYPE_SYS; | 202 | brelse(el.el_bh); |
212 | |||
213 | error = acl_get(dip, ACL_DEFAULT, &acl, NULL, | ||
214 | &er.er_data, &er.er_data_len); | ||
215 | if (error) | 203 | if (error) |
216 | return error; | 204 | return error; |
217 | if (!acl) { | 205 | if (!acl) { |
@@ -229,9 +217,8 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) | |||
229 | acl = clone; | 217 | acl = clone; |
230 | 218 | ||
231 | if (S_ISDIR(ip->i_inode.i_mode)) { | 219 | if (S_ISDIR(ip->i_inode.i_mode)) { |
232 | er.er_name = GFS2_POSIX_ACL_DEFAULT; | 220 | error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS, |
233 | er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN; | 221 | GFS2_POSIX_ACL_DEFAULT, data, len, 0); |
234 | error = gfs2_system_eaops.eo_set(ip, &er); | ||
235 | if (error) | 222 | if (error) |
236 | goto out; | 223 | goto out; |
237 | } | 224 | } |
@@ -239,21 +226,19 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) | |||
239 | error = posix_acl_create_masq(acl, &mode); | 226 | error = posix_acl_create_masq(acl, &mode); |
240 | if (error < 0) | 227 | if (error < 0) |
241 | goto out; | 228 | goto out; |
242 | if (error > 0) { | 229 | if (error == 0) |
243 | er.er_name = GFS2_POSIX_ACL_ACCESS; | 230 | goto munge; |
244 | er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN; | ||
245 | posix_acl_to_xattr(acl, er.er_data, er.er_data_len); | ||
246 | er.er_mode = mode; | ||
247 | er.er_flags = GFS2_ERF_MODE; | ||
248 | error = gfs2_system_eaops.eo_set(ip, &er); | ||
249 | if (error) | ||
250 | goto out; | ||
251 | } else | ||
252 | munge_mode(ip, mode); | ||
253 | 231 | ||
232 | posix_acl_to_xattr(acl, data, len); | ||
233 | error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS, | ||
234 | GFS2_POSIX_ACL_ACCESS, data, len, 0); | ||
235 | if (error) | ||
236 | goto out; | ||
237 | munge: | ||
238 | error = munge_mode(ip, mode); | ||
254 | out: | 239 | out: |
255 | posix_acl_release(acl); | 240 | posix_acl_release(acl); |
256 | kfree(er.er_data); | 241 | kfree(data); |
257 | return error; | 242 | return error; |
258 | } | 243 | } |
259 | 244 | ||
@@ -265,9 +250,9 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) | |||
265 | unsigned int len; | 250 | unsigned int len; |
266 | int error; | 251 | int error; |
267 | 252 | ||
268 | error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len); | 253 | error = acl_get(ip, GFS2_POSIX_ACL_ACCESS, &acl, &el, &data, &len); |
269 | if (error) | 254 | if (error) |
270 | return error; | 255 | goto out_brelse; |
271 | if (!acl) | 256 | if (!acl) |
272 | return gfs2_setattr_simple(ip, attr); | 257 | return gfs2_setattr_simple(ip, attr); |
273 | 258 | ||
@@ -286,8 +271,9 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) | |||
286 | 271 | ||
287 | out: | 272 | out: |
288 | posix_acl_release(acl); | 273 | posix_acl_release(acl); |
289 | brelse(el.el_bh); | ||
290 | kfree(data); | 274 | kfree(data); |
275 | out_brelse: | ||
276 | brelse(el.el_bh); | ||
291 | return error; | 277 | return error; |
292 | } | 278 | } |
293 | 279 | ||
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 03ebb439ace0..7ebae9a4ecc0 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -624,6 +624,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
624 | { | 624 | { |
625 | struct gfs2_inode *ip = GFS2_I(mapping->host); | 625 | struct gfs2_inode *ip = GFS2_I(mapping->host); |
626 | struct gfs2_sbd *sdp = GFS2_SB(mapping->host); | 626 | struct gfs2_sbd *sdp = GFS2_SB(mapping->host); |
627 | struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); | ||
627 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | 628 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; |
628 | int alloc_required; | 629 | int alloc_required; |
629 | int error = 0; | 630 | int error = 0; |
@@ -637,6 +638,14 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
637 | error = gfs2_glock_nq(&ip->i_gh); | 638 | error = gfs2_glock_nq(&ip->i_gh); |
638 | if (unlikely(error)) | 639 | if (unlikely(error)) |
639 | goto out_uninit; | 640 | goto out_uninit; |
641 | if (&ip->i_inode == sdp->sd_rindex) { | ||
642 | error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, | ||
643 | GL_NOCACHE, &m_ip->i_gh); | ||
644 | if (unlikely(error)) { | ||
645 | gfs2_glock_dq(&ip->i_gh); | ||
646 | goto out_uninit; | ||
647 | } | ||
648 | } | ||
640 | 649 | ||
641 | error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); | 650 | error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); |
642 | if (error) | 651 | if (error) |
@@ -667,6 +676,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
667 | rblocks += data_blocks ? data_blocks : 1; | 676 | rblocks += data_blocks ? data_blocks : 1; |
668 | if (ind_blocks || data_blocks) | 677 | if (ind_blocks || data_blocks) |
669 | rblocks += RES_STATFS + RES_QUOTA; | 678 | rblocks += RES_STATFS + RES_QUOTA; |
679 | if (&ip->i_inode == sdp->sd_rindex) | ||
680 | rblocks += 2 * RES_STATFS; | ||
670 | 681 | ||
671 | error = gfs2_trans_begin(sdp, rblocks, | 682 | error = gfs2_trans_begin(sdp, rblocks, |
672 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | 683 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); |
@@ -712,6 +723,10 @@ out_alloc_put: | |||
712 | gfs2_alloc_put(ip); | 723 | gfs2_alloc_put(ip); |
713 | } | 724 | } |
714 | out_unlock: | 725 | out_unlock: |
726 | if (&ip->i_inode == sdp->sd_rindex) { | ||
727 | gfs2_glock_dq(&m_ip->i_gh); | ||
728 | gfs2_holder_uninit(&m_ip->i_gh); | ||
729 | } | ||
715 | gfs2_glock_dq(&ip->i_gh); | 730 | gfs2_glock_dq(&ip->i_gh); |
716 | out_uninit: | 731 | out_uninit: |
717 | gfs2_holder_uninit(&ip->i_gh); | 732 | gfs2_holder_uninit(&ip->i_gh); |
@@ -725,14 +740,21 @@ out_uninit: | |||
725 | static void adjust_fs_space(struct inode *inode) | 740 | static void adjust_fs_space(struct inode *inode) |
726 | { | 741 | { |
727 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; | 742 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; |
743 | struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); | ||
744 | struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); | ||
728 | struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; | 745 | struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; |
729 | struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; | 746 | struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; |
747 | struct buffer_head *m_bh, *l_bh; | ||
730 | u64 fs_total, new_free; | 748 | u64 fs_total, new_free; |
731 | 749 | ||
732 | /* Total up the file system space, according to the latest rindex. */ | 750 | /* Total up the file system space, according to the latest rindex. */ |
733 | fs_total = gfs2_ri_total(sdp); | 751 | fs_total = gfs2_ri_total(sdp); |
752 | if (gfs2_meta_inode_buffer(m_ip, &m_bh) != 0) | ||
753 | return; | ||
734 | 754 | ||
735 | spin_lock(&sdp->sd_statfs_spin); | 755 | spin_lock(&sdp->sd_statfs_spin); |
756 | gfs2_statfs_change_in(m_sc, m_bh->b_data + | ||
757 | sizeof(struct gfs2_dinode)); | ||
736 | if (fs_total > (m_sc->sc_total + l_sc->sc_total)) | 758 | if (fs_total > (m_sc->sc_total + l_sc->sc_total)) |
737 | new_free = fs_total - (m_sc->sc_total + l_sc->sc_total); | 759 | new_free = fs_total - (m_sc->sc_total + l_sc->sc_total); |
738 | else | 760 | else |
@@ -741,6 +763,13 @@ static void adjust_fs_space(struct inode *inode) | |||
741 | fs_warn(sdp, "File system extended by %llu blocks.\n", | 763 | fs_warn(sdp, "File system extended by %llu blocks.\n", |
742 | (unsigned long long)new_free); | 764 | (unsigned long long)new_free); |
743 | gfs2_statfs_change(sdp, new_free, new_free, 0); | 765 | gfs2_statfs_change(sdp, new_free, new_free, 0); |
766 | |||
767 | if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0) | ||
768 | goto out; | ||
769 | update_statfs(sdp, m_bh, l_bh); | ||
770 | brelse(l_bh); | ||
771 | out: | ||
772 | brelse(m_bh); | ||
744 | } | 773 | } |
745 | 774 | ||
746 | /** | 775 | /** |
@@ -763,6 +792,7 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, | |||
763 | { | 792 | { |
764 | struct gfs2_inode *ip = GFS2_I(inode); | 793 | struct gfs2_inode *ip = GFS2_I(inode); |
765 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 794 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
795 | struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); | ||
766 | u64 to = pos + copied; | 796 | u64 to = pos + copied; |
767 | void *kaddr; | 797 | void *kaddr; |
768 | unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); | 798 | unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); |
@@ -794,6 +824,10 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, | |||
794 | 824 | ||
795 | brelse(dibh); | 825 | brelse(dibh); |
796 | gfs2_trans_end(sdp); | 826 | gfs2_trans_end(sdp); |
827 | if (inode == sdp->sd_rindex) { | ||
828 | gfs2_glock_dq(&m_ip->i_gh); | ||
829 | gfs2_holder_uninit(&m_ip->i_gh); | ||
830 | } | ||
797 | gfs2_glock_dq(&ip->i_gh); | 831 | gfs2_glock_dq(&ip->i_gh); |
798 | gfs2_holder_uninit(&ip->i_gh); | 832 | gfs2_holder_uninit(&ip->i_gh); |
799 | return copied; | 833 | return copied; |
@@ -823,6 +857,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
823 | struct inode *inode = page->mapping->host; | 857 | struct inode *inode = page->mapping->host; |
824 | struct gfs2_inode *ip = GFS2_I(inode); | 858 | struct gfs2_inode *ip = GFS2_I(inode); |
825 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 859 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
860 | struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); | ||
826 | struct buffer_head *dibh; | 861 | struct buffer_head *dibh; |
827 | struct gfs2_alloc *al = ip->i_alloc; | 862 | struct gfs2_alloc *al = ip->i_alloc; |
828 | unsigned int from = pos & (PAGE_CACHE_SIZE - 1); | 863 | unsigned int from = pos & (PAGE_CACHE_SIZE - 1); |
@@ -865,6 +900,10 @@ failed: | |||
865 | gfs2_quota_unlock(ip); | 900 | gfs2_quota_unlock(ip); |
866 | gfs2_alloc_put(ip); | 901 | gfs2_alloc_put(ip); |
867 | } | 902 | } |
903 | if (inode == sdp->sd_rindex) { | ||
904 | gfs2_glock_dq(&m_ip->i_gh); | ||
905 | gfs2_holder_uninit(&m_ip->i_gh); | ||
906 | } | ||
868 | gfs2_glock_dq(&ip->i_gh); | 907 | gfs2_glock_dq(&ip->i_gh); |
869 | gfs2_holder_uninit(&ip->i_gh); | 908 | gfs2_holder_uninit(&ip->i_gh); |
870 | return ret; | 909 | return ret; |
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 022c66cd5606..91beddadd388 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c | |||
@@ -107,8 +107,26 @@ static int gfs2_dhash(struct dentry *dentry, struct qstr *str) | |||
107 | return 0; | 107 | return 0; |
108 | } | 108 | } |
109 | 109 | ||
110 | static int gfs2_dentry_delete(struct dentry *dentry) | ||
111 | { | ||
112 | struct gfs2_inode *ginode; | ||
113 | |||
114 | if (!dentry->d_inode) | ||
115 | return 0; | ||
116 | |||
117 | ginode = GFS2_I(dentry->d_inode); | ||
118 | if (!ginode->i_iopen_gh.gh_gl) | ||
119 | return 0; | ||
120 | |||
121 | if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags)) | ||
122 | return 1; | ||
123 | |||
124 | return 0; | ||
125 | } | ||
126 | |||
110 | const struct dentry_operations gfs2_dops = { | 127 | const struct dentry_operations gfs2_dops = { |
111 | .d_revalidate = gfs2_drevalidate, | 128 | .d_revalidate = gfs2_drevalidate, |
112 | .d_hash = gfs2_dhash, | 129 | .d_hash = gfs2_dhash, |
130 | .d_delete = gfs2_dentry_delete, | ||
113 | }; | 131 | }; |
114 | 132 | ||
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c deleted file mode 100644 index dee9b03e5b37..000000000000 --- a/fs/gfs2/eaops.c +++ /dev/null | |||
@@ -1,157 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This copyrighted material is made available to anyone wishing to use, | ||
6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
7 | * of the GNU General Public License version 2. | ||
8 | */ | ||
9 | |||
10 | #include <linux/slab.h> | ||
11 | #include <linux/spinlock.h> | ||
12 | #include <linux/completion.h> | ||
13 | #include <linux/buffer_head.h> | ||
14 | #include <linux/capability.h> | ||
15 | #include <linux/xattr.h> | ||
16 | #include <linux/gfs2_ondisk.h> | ||
17 | #include <asm/uaccess.h> | ||
18 | |||
19 | #include "gfs2.h" | ||
20 | #include "incore.h" | ||
21 | #include "acl.h" | ||
22 | #include "eaops.h" | ||
23 | #include "eattr.h" | ||
24 | #include "util.h" | ||
25 | |||
26 | /** | ||
27 | * gfs2_ea_name2type - get the type of the ea, and truncate type from the name | ||
28 | * @namep: ea name, possibly with type appended | ||
29 | * | ||
30 | * Returns: GFS2_EATYPE_XXX | ||
31 | */ | ||
32 | |||
33 | unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name) | ||
34 | { | ||
35 | unsigned int type; | ||
36 | |||
37 | if (strncmp(name, "system.", 7) == 0) { | ||
38 | type = GFS2_EATYPE_SYS; | ||
39 | if (truncated_name) | ||
40 | *truncated_name = name + sizeof("system.") - 1; | ||
41 | } else if (strncmp(name, "user.", 5) == 0) { | ||
42 | type = GFS2_EATYPE_USR; | ||
43 | if (truncated_name) | ||
44 | *truncated_name = name + sizeof("user.") - 1; | ||
45 | } else if (strncmp(name, "security.", 9) == 0) { | ||
46 | type = GFS2_EATYPE_SECURITY; | ||
47 | if (truncated_name) | ||
48 | *truncated_name = name + sizeof("security.") - 1; | ||
49 | } else { | ||
50 | type = GFS2_EATYPE_UNUSED; | ||
51 | if (truncated_name) | ||
52 | *truncated_name = NULL; | ||
53 | } | ||
54 | |||
55 | return type; | ||
56 | } | ||
57 | |||
58 | static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
59 | { | ||
60 | if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) && | ||
61 | !GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len) && | ||
62 | !capable(CAP_SYS_ADMIN)) | ||
63 | return -EPERM; | ||
64 | |||
65 | if (GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl == 0 && | ||
66 | (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) || | ||
67 | GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len))) | ||
68 | return -EOPNOTSUPP; | ||
69 | |||
70 | return gfs2_ea_get_i(ip, er); | ||
71 | } | ||
72 | |||
73 | static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
74 | { | ||
75 | int remove = 0; | ||
76 | int error; | ||
77 | |||
78 | if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) { | ||
79 | if (!(er->er_flags & GFS2_ERF_MODE)) { | ||
80 | er->er_mode = ip->i_inode.i_mode; | ||
81 | er->er_flags |= GFS2_ERF_MODE; | ||
82 | } | ||
83 | error = gfs2_acl_validate_set(ip, 1, er, | ||
84 | &remove, &er->er_mode); | ||
85 | if (error) | ||
86 | return error; | ||
87 | error = gfs2_ea_set_i(ip, er); | ||
88 | if (error) | ||
89 | return error; | ||
90 | if (remove) | ||
91 | gfs2_ea_remove_i(ip, er); | ||
92 | return 0; | ||
93 | |||
94 | } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) { | ||
95 | error = gfs2_acl_validate_set(ip, 0, er, | ||
96 | &remove, NULL); | ||
97 | if (error) | ||
98 | return error; | ||
99 | if (!remove) | ||
100 | error = gfs2_ea_set_i(ip, er); | ||
101 | else { | ||
102 | error = gfs2_ea_remove_i(ip, er); | ||
103 | if (error == -ENODATA) | ||
104 | error = 0; | ||
105 | } | ||
106 | return error; | ||
107 | } | ||
108 | |||
109 | return -EPERM; | ||
110 | } | ||
111 | |||
112 | static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
113 | { | ||
114 | if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) { | ||
115 | int error = gfs2_acl_validate_remove(ip, 1); | ||
116 | if (error) | ||
117 | return error; | ||
118 | |||
119 | } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) { | ||
120 | int error = gfs2_acl_validate_remove(ip, 0); | ||
121 | if (error) | ||
122 | return error; | ||
123 | |||
124 | } else | ||
125 | return -EPERM; | ||
126 | |||
127 | return gfs2_ea_remove_i(ip, er); | ||
128 | } | ||
129 | |||
130 | static const struct gfs2_eattr_operations gfs2_user_eaops = { | ||
131 | .eo_get = gfs2_ea_get_i, | ||
132 | .eo_set = gfs2_ea_set_i, | ||
133 | .eo_remove = gfs2_ea_remove_i, | ||
134 | .eo_name = "user", | ||
135 | }; | ||
136 | |||
137 | const struct gfs2_eattr_operations gfs2_system_eaops = { | ||
138 | .eo_get = system_eo_get, | ||
139 | .eo_set = system_eo_set, | ||
140 | .eo_remove = system_eo_remove, | ||
141 | .eo_name = "system", | ||
142 | }; | ||
143 | |||
144 | static const struct gfs2_eattr_operations gfs2_security_eaops = { | ||
145 | .eo_get = gfs2_ea_get_i, | ||
146 | .eo_set = gfs2_ea_set_i, | ||
147 | .eo_remove = gfs2_ea_remove_i, | ||
148 | .eo_name = "security", | ||
149 | }; | ||
150 | |||
151 | const struct gfs2_eattr_operations *gfs2_ea_ops[] = { | ||
152 | NULL, | ||
153 | &gfs2_user_eaops, | ||
154 | &gfs2_system_eaops, | ||
155 | &gfs2_security_eaops, | ||
156 | }; | ||
157 | |||
diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h deleted file mode 100644 index da2f7fbbb40d..000000000000 --- a/fs/gfs2/eaops.h +++ /dev/null | |||
@@ -1,30 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This copyrighted material is made available to anyone wishing to use, | ||
6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
7 | * of the GNU General Public License version 2. | ||
8 | */ | ||
9 | |||
10 | #ifndef __EAOPS_DOT_H__ | ||
11 | #define __EAOPS_DOT_H__ | ||
12 | |||
13 | struct gfs2_ea_request; | ||
14 | struct gfs2_inode; | ||
15 | |||
16 | struct gfs2_eattr_operations { | ||
17 | int (*eo_get) (struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
18 | int (*eo_set) (struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
19 | int (*eo_remove) (struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
20 | char *eo_name; | ||
21 | }; | ||
22 | |||
23 | unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name); | ||
24 | |||
25 | extern const struct gfs2_eattr_operations gfs2_system_eaops; | ||
26 | |||
27 | extern const struct gfs2_eattr_operations *gfs2_ea_ops[]; | ||
28 | |||
29 | #endif /* __EAOPS_DOT_H__ */ | ||
30 | |||
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 9200ef221716..d15876e9aa26 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c | |||
@@ -143,17 +143,14 @@ static struct dentry *gfs2_get_parent(struct dentry *child) | |||
143 | } | 143 | } |
144 | 144 | ||
145 | static struct dentry *gfs2_get_dentry(struct super_block *sb, | 145 | static struct dentry *gfs2_get_dentry(struct super_block *sb, |
146 | struct gfs2_inum_host *inum) | 146 | struct gfs2_inum_host *inum) |
147 | { | 147 | { |
148 | struct gfs2_sbd *sdp = sb->s_fs_info; | 148 | struct gfs2_sbd *sdp = sb->s_fs_info; |
149 | struct gfs2_holder i_gh, ri_gh, rgd_gh; | 149 | struct gfs2_holder i_gh; |
150 | struct gfs2_rgrpd *rgd; | ||
151 | struct inode *inode; | 150 | struct inode *inode; |
152 | struct dentry *dentry; | 151 | struct dentry *dentry; |
153 | int error; | 152 | int error; |
154 | 153 | ||
155 | /* System files? */ | ||
156 | |||
157 | inode = gfs2_ilookup(sb, inum->no_addr); | 154 | inode = gfs2_ilookup(sb, inum->no_addr); |
158 | if (inode) { | 155 | if (inode) { |
159 | if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { | 156 | if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { |
@@ -168,29 +165,11 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, | |||
168 | if (error) | 165 | if (error) |
169 | return ERR_PTR(error); | 166 | return ERR_PTR(error); |
170 | 167 | ||
171 | error = gfs2_rindex_hold(sdp, &ri_gh); | 168 | error = gfs2_check_blk_type(sdp, inum->no_addr, GFS2_BLKST_DINODE); |
172 | if (error) | 169 | if (error) |
173 | goto fail; | 170 | goto fail; |
174 | 171 | ||
175 | error = -EINVAL; | 172 | inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0, 0); |
176 | rgd = gfs2_blk2rgrpd(sdp, inum->no_addr); | ||
177 | if (!rgd) | ||
178 | goto fail_rindex; | ||
179 | |||
180 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); | ||
181 | if (error) | ||
182 | goto fail_rindex; | ||
183 | |||
184 | error = -ESTALE; | ||
185 | if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE) | ||
186 | goto fail_rgd; | ||
187 | |||
188 | gfs2_glock_dq_uninit(&rgd_gh); | ||
189 | gfs2_glock_dq_uninit(&ri_gh); | ||
190 | |||
191 | inode = gfs2_inode_lookup(sb, DT_UNKNOWN, | ||
192 | inum->no_addr, | ||
193 | 0, 0); | ||
194 | if (IS_ERR(inode)) { | 173 | if (IS_ERR(inode)) { |
195 | error = PTR_ERR(inode); | 174 | error = PTR_ERR(inode); |
196 | goto fail; | 175 | goto fail; |
@@ -224,13 +203,6 @@ out_inode: | |||
224 | if (!IS_ERR(dentry)) | 203 | if (!IS_ERR(dentry)) |
225 | dentry->d_op = &gfs2_dops; | 204 | dentry->d_op = &gfs2_dops; |
226 | return dentry; | 205 | return dentry; |
227 | |||
228 | fail_rgd: | ||
229 | gfs2_glock_dq_uninit(&rgd_gh); | ||
230 | |||
231 | fail_rindex: | ||
232 | gfs2_glock_dq_uninit(&ri_gh); | ||
233 | |||
234 | fail: | 206 | fail: |
235 | gfs2_glock_dq_uninit(&i_gh); | 207 | gfs2_glock_dq_uninit(&i_gh); |
236 | return ERR_PTR(error); | 208 | return ERR_PTR(error); |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 73318a3ce6f1..166f38fbd246 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -38,7 +38,6 @@ | |||
38 | #include "rgrp.h" | 38 | #include "rgrp.h" |
39 | #include "trans.h" | 39 | #include "trans.h" |
40 | #include "util.h" | 40 | #include "util.h" |
41 | #include "eaops.h" | ||
42 | 41 | ||
43 | /** | 42 | /** |
44 | * gfs2_llseek - seek to a location in a file | 43 | * gfs2_llseek - seek to a location in a file |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 297421c0427a..8b674b1f3a55 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -63,6 +63,7 @@ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int | |||
63 | static DECLARE_RWSEM(gfs2_umount_flush_sem); | 63 | static DECLARE_RWSEM(gfs2_umount_flush_sem); |
64 | static struct dentry *gfs2_root; | 64 | static struct dentry *gfs2_root; |
65 | static struct workqueue_struct *glock_workqueue; | 65 | static struct workqueue_struct *glock_workqueue; |
66 | struct workqueue_struct *gfs2_delete_workqueue; | ||
66 | static LIST_HEAD(lru_list); | 67 | static LIST_HEAD(lru_list); |
67 | static atomic_t lru_count = ATOMIC_INIT(0); | 68 | static atomic_t lru_count = ATOMIC_INIT(0); |
68 | static DEFINE_SPINLOCK(lru_lock); | 69 | static DEFINE_SPINLOCK(lru_lock); |
@@ -167,13 +168,33 @@ static void glock_free(struct gfs2_glock *gl) | |||
167 | * | 168 | * |
168 | */ | 169 | */ |
169 | 170 | ||
170 | static void gfs2_glock_hold(struct gfs2_glock *gl) | 171 | void gfs2_glock_hold(struct gfs2_glock *gl) |
171 | { | 172 | { |
172 | GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0); | 173 | GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0); |
173 | atomic_inc(&gl->gl_ref); | 174 | atomic_inc(&gl->gl_ref); |
174 | } | 175 | } |
175 | 176 | ||
176 | /** | 177 | /** |
178 | * demote_ok - Check to see if it's ok to unlock a glock | ||
179 | * @gl: the glock | ||
180 | * | ||
181 | * Returns: 1 if it's ok | ||
182 | */ | ||
183 | |||
184 | static int demote_ok(const struct gfs2_glock *gl) | ||
185 | { | ||
186 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
187 | |||
188 | if (gl->gl_state == LM_ST_UNLOCKED) | ||
189 | return 0; | ||
190 | if (!list_empty(&gl->gl_holders)) | ||
191 | return 0; | ||
192 | if (glops->go_demote_ok) | ||
193 | return glops->go_demote_ok(gl); | ||
194 | return 1; | ||
195 | } | ||
196 | |||
197 | /** | ||
177 | * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list | 198 | * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list |
178 | * @gl: the glock | 199 | * @gl: the glock |
179 | * | 200 | * |
@@ -181,8 +202,13 @@ static void gfs2_glock_hold(struct gfs2_glock *gl) | |||
181 | 202 | ||
182 | static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) | 203 | static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) |
183 | { | 204 | { |
205 | int may_reclaim; | ||
206 | may_reclaim = (demote_ok(gl) && | ||
207 | (atomic_read(&gl->gl_ref) == 1 || | ||
208 | (gl->gl_name.ln_type == LM_TYPE_INODE && | ||
209 | atomic_read(&gl->gl_ref) <= 2))); | ||
184 | spin_lock(&lru_lock); | 210 | spin_lock(&lru_lock); |
185 | if (list_empty(&gl->gl_lru) && gl->gl_state != LM_ST_UNLOCKED) { | 211 | if (list_empty(&gl->gl_lru) && may_reclaim) { |
186 | list_add_tail(&gl->gl_lru, &lru_list); | 212 | list_add_tail(&gl->gl_lru, &lru_list); |
187 | atomic_inc(&lru_count); | 213 | atomic_inc(&lru_count); |
188 | } | 214 | } |
@@ -190,6 +216,21 @@ static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) | |||
190 | } | 216 | } |
191 | 217 | ||
192 | /** | 218 | /** |
219 | * gfs2_glock_put_nolock() - Decrement reference count on glock | ||
220 | * @gl: The glock to put | ||
221 | * | ||
222 | * This function should only be used if the caller has its own reference | ||
223 | * to the glock, in addition to the one it is dropping. | ||
224 | */ | ||
225 | |||
226 | void gfs2_glock_put_nolock(struct gfs2_glock *gl) | ||
227 | { | ||
228 | if (atomic_dec_and_test(&gl->gl_ref)) | ||
229 | GLOCK_BUG_ON(gl, 1); | ||
230 | gfs2_glock_schedule_for_reclaim(gl); | ||
231 | } | ||
232 | |||
233 | /** | ||
193 | * gfs2_glock_put() - Decrement reference count on glock | 234 | * gfs2_glock_put() - Decrement reference count on glock |
194 | * @gl: The glock to put | 235 | * @gl: The glock to put |
195 | * | 236 | * |
@@ -214,9 +255,9 @@ int gfs2_glock_put(struct gfs2_glock *gl) | |||
214 | rv = 1; | 255 | rv = 1; |
215 | goto out; | 256 | goto out; |
216 | } | 257 | } |
217 | /* 1 for being hashed, 1 for having state != LM_ST_UNLOCKED */ | 258 | spin_lock(&gl->gl_spin); |
218 | if (atomic_read(&gl->gl_ref) == 2) | 259 | gfs2_glock_schedule_for_reclaim(gl); |
219 | gfs2_glock_schedule_for_reclaim(gl); | 260 | spin_unlock(&gl->gl_spin); |
220 | write_unlock(gl_lock_addr(gl->gl_hash)); | 261 | write_unlock(gl_lock_addr(gl->gl_hash)); |
221 | out: | 262 | out: |
222 | return rv; | 263 | return rv; |
@@ -398,7 +439,7 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) | |||
398 | if (held2) | 439 | if (held2) |
399 | gfs2_glock_hold(gl); | 440 | gfs2_glock_hold(gl); |
400 | else | 441 | else |
401 | gfs2_glock_put(gl); | 442 | gfs2_glock_put_nolock(gl); |
402 | } | 443 | } |
403 | 444 | ||
404 | gl->gl_state = new_state; | 445 | gl->gl_state = new_state; |
@@ -633,12 +674,35 @@ out: | |||
633 | out_sched: | 674 | out_sched: |
634 | gfs2_glock_hold(gl); | 675 | gfs2_glock_hold(gl); |
635 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | 676 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) |
636 | gfs2_glock_put(gl); | 677 | gfs2_glock_put_nolock(gl); |
637 | out_unlock: | 678 | out_unlock: |
638 | clear_bit(GLF_LOCK, &gl->gl_flags); | 679 | clear_bit(GLF_LOCK, &gl->gl_flags); |
639 | goto out; | 680 | goto out; |
640 | } | 681 | } |
641 | 682 | ||
683 | static void delete_work_func(struct work_struct *work) | ||
684 | { | ||
685 | struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); | ||
686 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
687 | struct gfs2_inode *ip = NULL; | ||
688 | struct inode *inode; | ||
689 | u64 no_addr = 0; | ||
690 | |||
691 | spin_lock(&gl->gl_spin); | ||
692 | ip = (struct gfs2_inode *)gl->gl_object; | ||
693 | if (ip) | ||
694 | no_addr = ip->i_no_addr; | ||
695 | spin_unlock(&gl->gl_spin); | ||
696 | if (ip) { | ||
697 | inode = gfs2_ilookup(sdp->sd_vfs, no_addr); | ||
698 | if (inode) { | ||
699 | d_prune_aliases(inode); | ||
700 | iput(inode); | ||
701 | } | ||
702 | } | ||
703 | gfs2_glock_put(gl); | ||
704 | } | ||
705 | |||
642 | static void glock_work_func(struct work_struct *work) | 706 | static void glock_work_func(struct work_struct *work) |
643 | { | 707 | { |
644 | unsigned long delay = 0; | 708 | unsigned long delay = 0; |
@@ -717,6 +781,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
717 | gl->gl_sbd = sdp; | 781 | gl->gl_sbd = sdp; |
718 | gl->gl_aspace = NULL; | 782 | gl->gl_aspace = NULL; |
719 | INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); | 783 | INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); |
784 | INIT_WORK(&gl->gl_delete, delete_work_func); | ||
720 | 785 | ||
721 | /* If this glock protects actual on-disk data or metadata blocks, | 786 | /* If this glock protects actual on-disk data or metadata blocks, |
722 | create a VFS inode to manage the pages/buffers holding them. */ | 787 | create a VFS inode to manage the pages/buffers holding them. */ |
@@ -858,6 +923,8 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, | |||
858 | gl->gl_demote_state != state) { | 923 | gl->gl_demote_state != state) { |
859 | gl->gl_demote_state = LM_ST_UNLOCKED; | 924 | gl->gl_demote_state = LM_ST_UNLOCKED; |
860 | } | 925 | } |
926 | if (gl->gl_ops->go_callback) | ||
927 | gl->gl_ops->go_callback(gl); | ||
861 | trace_gfs2_demote_rq(gl); | 928 | trace_gfs2_demote_rq(gl); |
862 | } | 929 | } |
863 | 930 | ||
@@ -1274,33 +1341,12 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) | |||
1274 | gfs2_glock_put(gl); | 1341 | gfs2_glock_put(gl); |
1275 | } | 1342 | } |
1276 | 1343 | ||
1277 | /** | ||
1278 | * demote_ok - Check to see if it's ok to unlock a glock | ||
1279 | * @gl: the glock | ||
1280 | * | ||
1281 | * Returns: 1 if it's ok | ||
1282 | */ | ||
1283 | |||
1284 | static int demote_ok(const struct gfs2_glock *gl) | ||
1285 | { | ||
1286 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
1287 | |||
1288 | if (gl->gl_state == LM_ST_UNLOCKED) | ||
1289 | return 0; | ||
1290 | if (!list_empty(&gl->gl_holders)) | ||
1291 | return 0; | ||
1292 | if (glops->go_demote_ok) | ||
1293 | return glops->go_demote_ok(gl); | ||
1294 | return 1; | ||
1295 | } | ||
1296 | |||
1297 | 1344 | ||
1298 | static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) | 1345 | static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) |
1299 | { | 1346 | { |
1300 | struct gfs2_glock *gl; | 1347 | struct gfs2_glock *gl; |
1301 | int may_demote; | 1348 | int may_demote; |
1302 | int nr_skipped = 0; | 1349 | int nr_skipped = 0; |
1303 | int got_ref = 0; | ||
1304 | LIST_HEAD(skipped); | 1350 | LIST_HEAD(skipped); |
1305 | 1351 | ||
1306 | if (nr == 0) | 1352 | if (nr == 0) |
@@ -1315,37 +1361,29 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) | |||
1315 | list_del_init(&gl->gl_lru); | 1361 | list_del_init(&gl->gl_lru); |
1316 | atomic_dec(&lru_count); | 1362 | atomic_dec(&lru_count); |
1317 | 1363 | ||
1364 | /* Check if glock is about to be freed */ | ||
1365 | if (atomic_read(&gl->gl_ref) == 0) | ||
1366 | continue; | ||
1367 | |||
1318 | /* Test for being demotable */ | 1368 | /* Test for being demotable */ |
1319 | if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | 1369 | if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { |
1320 | gfs2_glock_hold(gl); | 1370 | gfs2_glock_hold(gl); |
1321 | got_ref = 1; | ||
1322 | spin_unlock(&lru_lock); | 1371 | spin_unlock(&lru_lock); |
1323 | spin_lock(&gl->gl_spin); | 1372 | spin_lock(&gl->gl_spin); |
1324 | may_demote = demote_ok(gl); | 1373 | may_demote = demote_ok(gl); |
1325 | spin_unlock(&gl->gl_spin); | ||
1326 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
1327 | if (may_demote) { | 1374 | if (may_demote) { |
1328 | handle_callback(gl, LM_ST_UNLOCKED, 0); | 1375 | handle_callback(gl, LM_ST_UNLOCKED, 0); |
1329 | nr--; | 1376 | nr--; |
1330 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | ||
1331 | gfs2_glock_put(gl); | ||
1332 | got_ref = 0; | ||
1333 | } | 1377 | } |
1378 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | ||
1379 | gfs2_glock_put_nolock(gl); | ||
1380 | spin_unlock(&gl->gl_spin); | ||
1381 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
1334 | spin_lock(&lru_lock); | 1382 | spin_lock(&lru_lock); |
1335 | if (may_demote) | 1383 | continue; |
1336 | continue; | ||
1337 | } | ||
1338 | if (list_empty(&gl->gl_lru) && | ||
1339 | (atomic_read(&gl->gl_ref) <= (2 + got_ref))) { | ||
1340 | nr_skipped++; | ||
1341 | list_add(&gl->gl_lru, &skipped); | ||
1342 | } | ||
1343 | if (got_ref) { | ||
1344 | spin_unlock(&lru_lock); | ||
1345 | gfs2_glock_put(gl); | ||
1346 | spin_lock(&lru_lock); | ||
1347 | got_ref = 0; | ||
1348 | } | 1384 | } |
1385 | nr_skipped++; | ||
1386 | list_add(&gl->gl_lru, &skipped); | ||
1349 | } | 1387 | } |
1350 | list_splice(&skipped, &lru_list); | 1388 | list_splice(&skipped, &lru_list); |
1351 | atomic_add(nr_skipped, &lru_count); | 1389 | atomic_add(nr_skipped, &lru_count); |
@@ -1727,6 +1765,11 @@ int __init gfs2_glock_init(void) | |||
1727 | glock_workqueue = create_workqueue("glock_workqueue"); | 1765 | glock_workqueue = create_workqueue("glock_workqueue"); |
1728 | if (IS_ERR(glock_workqueue)) | 1766 | if (IS_ERR(glock_workqueue)) |
1729 | return PTR_ERR(glock_workqueue); | 1767 | return PTR_ERR(glock_workqueue); |
1768 | gfs2_delete_workqueue = create_workqueue("delete_workqueue"); | ||
1769 | if (IS_ERR(gfs2_delete_workqueue)) { | ||
1770 | destroy_workqueue(glock_workqueue); | ||
1771 | return PTR_ERR(gfs2_delete_workqueue); | ||
1772 | } | ||
1730 | 1773 | ||
1731 | register_shrinker(&glock_shrinker); | 1774 | register_shrinker(&glock_shrinker); |
1732 | 1775 | ||
@@ -1737,6 +1780,7 @@ void gfs2_glock_exit(void) | |||
1737 | { | 1780 | { |
1738 | unregister_shrinker(&glock_shrinker); | 1781 | unregister_shrinker(&glock_shrinker); |
1739 | destroy_workqueue(glock_workqueue); | 1782 | destroy_workqueue(glock_workqueue); |
1783 | destroy_workqueue(gfs2_delete_workqueue); | ||
1740 | } | 1784 | } |
1741 | 1785 | ||
1742 | static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) | 1786 | static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index a602a28f6f08..c609894ec0d0 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -143,6 +143,7 @@ struct lm_lockops { | |||
143 | 143 | ||
144 | #define GLR_TRYFAILED 13 | 144 | #define GLR_TRYFAILED 13 |
145 | 145 | ||
146 | extern struct workqueue_struct *gfs2_delete_workqueue; | ||
146 | static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) | 147 | static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) |
147 | { | 148 | { |
148 | struct gfs2_holder *gh; | 149 | struct gfs2_holder *gh; |
@@ -191,6 +192,8 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) | |||
191 | int gfs2_glock_get(struct gfs2_sbd *sdp, | 192 | int gfs2_glock_get(struct gfs2_sbd *sdp, |
192 | u64 number, const struct gfs2_glock_operations *glops, | 193 | u64 number, const struct gfs2_glock_operations *glops, |
193 | int create, struct gfs2_glock **glp); | 194 | int create, struct gfs2_glock **glp); |
195 | void gfs2_glock_hold(struct gfs2_glock *gl); | ||
196 | void gfs2_glock_put_nolock(struct gfs2_glock *gl); | ||
194 | int gfs2_glock_put(struct gfs2_glock *gl); | 197 | int gfs2_glock_put(struct gfs2_glock *gl); |
195 | void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, | 198 | void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, |
196 | struct gfs2_holder *gh); | 199 | struct gfs2_holder *gh); |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index d5e4ab155ca0..6985eef06c39 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -323,6 +323,7 @@ static void trans_go_sync(struct gfs2_glock *gl) | |||
323 | 323 | ||
324 | if (gl->gl_state != LM_ST_UNLOCKED && | 324 | if (gl->gl_state != LM_ST_UNLOCKED && |
325 | test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { | 325 | test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { |
326 | flush_workqueue(gfs2_delete_workqueue); | ||
326 | gfs2_meta_syncfs(sdp); | 327 | gfs2_meta_syncfs(sdp); |
327 | gfs2_log_shutdown(sdp); | 328 | gfs2_log_shutdown(sdp); |
328 | } | 329 | } |
@@ -372,6 +373,25 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl) | |||
372 | return 0; | 373 | return 0; |
373 | } | 374 | } |
374 | 375 | ||
376 | /** | ||
377 | * iopen_go_callback - schedule the dcache entry for the inode to be deleted | ||
378 | * @gl: the glock | ||
379 | * | ||
380 | * gl_spin lock is held while calling this | ||
381 | */ | ||
382 | static void iopen_go_callback(struct gfs2_glock *gl) | ||
383 | { | ||
384 | struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; | ||
385 | |||
386 | if (gl->gl_demote_state == LM_ST_UNLOCKED && | ||
387 | gl->gl_state == LM_ST_SHARED && | ||
388 | ip && test_bit(GIF_USER, &ip->i_flags)) { | ||
389 | gfs2_glock_hold(gl); | ||
390 | if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) | ||
391 | gfs2_glock_put_nolock(gl); | ||
392 | } | ||
393 | } | ||
394 | |||
375 | const struct gfs2_glock_operations gfs2_meta_glops = { | 395 | const struct gfs2_glock_operations gfs2_meta_glops = { |
376 | .go_type = LM_TYPE_META, | 396 | .go_type = LM_TYPE_META, |
377 | }; | 397 | }; |
@@ -406,6 +426,7 @@ const struct gfs2_glock_operations gfs2_trans_glops = { | |||
406 | 426 | ||
407 | const struct gfs2_glock_operations gfs2_iopen_glops = { | 427 | const struct gfs2_glock_operations gfs2_iopen_glops = { |
408 | .go_type = LM_TYPE_IOPEN, | 428 | .go_type = LM_TYPE_IOPEN, |
429 | .go_callback = iopen_go_callback, | ||
409 | }; | 430 | }; |
410 | 431 | ||
411 | const struct gfs2_glock_operations gfs2_flock_glops = { | 432 | const struct gfs2_glock_operations gfs2_flock_glops = { |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 225347fbff3c..6edb423f90b3 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -159,6 +159,7 @@ struct gfs2_glock_operations { | |||
159 | int (*go_lock) (struct gfs2_holder *gh); | 159 | int (*go_lock) (struct gfs2_holder *gh); |
160 | void (*go_unlock) (struct gfs2_holder *gh); | 160 | void (*go_unlock) (struct gfs2_holder *gh); |
161 | int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); | 161 | int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); |
162 | void (*go_callback) (struct gfs2_glock *gl); | ||
162 | const int go_type; | 163 | const int go_type; |
163 | const unsigned long go_min_hold_time; | 164 | const unsigned long go_min_hold_time; |
164 | }; | 165 | }; |
@@ -228,6 +229,7 @@ struct gfs2_glock { | |||
228 | struct list_head gl_ail_list; | 229 | struct list_head gl_ail_list; |
229 | atomic_t gl_ail_count; | 230 | atomic_t gl_ail_count; |
230 | struct delayed_work gl_work; | 231 | struct delayed_work gl_work; |
232 | struct work_struct gl_delete; | ||
231 | }; | 233 | }; |
232 | 234 | ||
233 | #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ | 235 | #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ |
@@ -404,6 +406,12 @@ struct gfs2_statfs_change_host { | |||
404 | #define GFS2_DATA_WRITEBACK 1 | 406 | #define GFS2_DATA_WRITEBACK 1 |
405 | #define GFS2_DATA_ORDERED 2 | 407 | #define GFS2_DATA_ORDERED 2 |
406 | 408 | ||
409 | #define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW | ||
410 | #define GFS2_ERRORS_WITHDRAW 0 | ||
411 | #define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */ | ||
412 | #define GFS2_ERRORS_RO 2 /* place holder for future feature */ | ||
413 | #define GFS2_ERRORS_PANIC 3 | ||
414 | |||
407 | struct gfs2_args { | 415 | struct gfs2_args { |
408 | char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ | 416 | char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ |
409 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ | 417 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ |
@@ -420,6 +428,7 @@ struct gfs2_args { | |||
420 | unsigned int ar_data:2; /* ordered/writeback */ | 428 | unsigned int ar_data:2; /* ordered/writeback */ |
421 | unsigned int ar_meta:1; /* mount metafs */ | 429 | unsigned int ar_meta:1; /* mount metafs */ |
422 | unsigned int ar_discard:1; /* discard requests */ | 430 | unsigned int ar_discard:1; /* discard requests */ |
431 | unsigned int ar_errors:2; /* errors=withdraw | panic */ | ||
423 | int ar_commit; /* Commit interval */ | 432 | int ar_commit; /* Commit interval */ |
424 | }; | 433 | }; |
425 | 434 | ||
@@ -487,7 +496,6 @@ struct gfs2_sb_host { | |||
487 | */ | 496 | */ |
488 | 497 | ||
489 | struct lm_lockstruct { | 498 | struct lm_lockstruct { |
490 | u32 ls_id; | ||
491 | unsigned int ls_jid; | 499 | unsigned int ls_jid; |
492 | unsigned int ls_first; | 500 | unsigned int ls_first; |
493 | unsigned int ls_first_done; | 501 | unsigned int ls_first_done; |
@@ -539,18 +547,12 @@ struct gfs2_sbd { | |||
539 | struct dentry *sd_root_dir; | 547 | struct dentry *sd_root_dir; |
540 | 548 | ||
541 | struct inode *sd_jindex; | 549 | struct inode *sd_jindex; |
542 | struct inode *sd_inum_inode; | ||
543 | struct inode *sd_statfs_inode; | 550 | struct inode *sd_statfs_inode; |
544 | struct inode *sd_ir_inode; | ||
545 | struct inode *sd_sc_inode; | 551 | struct inode *sd_sc_inode; |
546 | struct inode *sd_qc_inode; | 552 | struct inode *sd_qc_inode; |
547 | struct inode *sd_rindex; | 553 | struct inode *sd_rindex; |
548 | struct inode *sd_quota_inode; | 554 | struct inode *sd_quota_inode; |
549 | 555 | ||
550 | /* Inum stuff */ | ||
551 | |||
552 | struct mutex sd_inum_mutex; | ||
553 | |||
554 | /* StatFS stuff */ | 556 | /* StatFS stuff */ |
555 | 557 | ||
556 | spinlock_t sd_statfs_spin; | 558 | spinlock_t sd_statfs_spin; |
@@ -578,7 +580,6 @@ struct gfs2_sbd { | |||
578 | struct gfs2_holder sd_journal_gh; | 580 | struct gfs2_holder sd_journal_gh; |
579 | struct gfs2_holder sd_jinode_gh; | 581 | struct gfs2_holder sd_jinode_gh; |
580 | 582 | ||
581 | struct gfs2_holder sd_ir_gh; | ||
582 | struct gfs2_holder sd_sc_gh; | 583 | struct gfs2_holder sd_sc_gh; |
583 | struct gfs2_holder sd_qc_gh; | 584 | struct gfs2_holder sd_qc_gh; |
584 | 585 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 2f94bd723698..fb15d3b1f409 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -24,7 +24,7 @@ | |||
24 | #include "acl.h" | 24 | #include "acl.h" |
25 | #include "bmap.h" | 25 | #include "bmap.h" |
26 | #include "dir.h" | 26 | #include "dir.h" |
27 | #include "eattr.h" | 27 | #include "xattr.h" |
28 | #include "glock.h" | 28 | #include "glock.h" |
29 | #include "glops.h" | 29 | #include "glops.h" |
30 | #include "inode.h" | 30 | #include "inode.h" |
@@ -519,139 +519,6 @@ out: | |||
519 | return inode ? inode : ERR_PTR(error); | 519 | return inode ? inode : ERR_PTR(error); |
520 | } | 520 | } |
521 | 521 | ||
522 | static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf) | ||
523 | { | ||
524 | const struct gfs2_inum_range *str = buf; | ||
525 | |||
526 | ir->ir_start = be64_to_cpu(str->ir_start); | ||
527 | ir->ir_length = be64_to_cpu(str->ir_length); | ||
528 | } | ||
529 | |||
530 | static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf) | ||
531 | { | ||
532 | struct gfs2_inum_range *str = buf; | ||
533 | |||
534 | str->ir_start = cpu_to_be64(ir->ir_start); | ||
535 | str->ir_length = cpu_to_be64(ir->ir_length); | ||
536 | } | ||
537 | |||
538 | static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) | ||
539 | { | ||
540 | struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); | ||
541 | struct buffer_head *bh; | ||
542 | struct gfs2_inum_range_host ir; | ||
543 | int error; | ||
544 | |||
545 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
546 | if (error) | ||
547 | return error; | ||
548 | mutex_lock(&sdp->sd_inum_mutex); | ||
549 | |||
550 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
551 | if (error) { | ||
552 | mutex_unlock(&sdp->sd_inum_mutex); | ||
553 | gfs2_trans_end(sdp); | ||
554 | return error; | ||
555 | } | ||
556 | |||
557 | gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); | ||
558 | |||
559 | if (ir.ir_length) { | ||
560 | *formal_ino = ir.ir_start++; | ||
561 | ir.ir_length--; | ||
562 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
563 | gfs2_inum_range_out(&ir, | ||
564 | bh->b_data + sizeof(struct gfs2_dinode)); | ||
565 | brelse(bh); | ||
566 | mutex_unlock(&sdp->sd_inum_mutex); | ||
567 | gfs2_trans_end(sdp); | ||
568 | return 0; | ||
569 | } | ||
570 | |||
571 | brelse(bh); | ||
572 | |||
573 | mutex_unlock(&sdp->sd_inum_mutex); | ||
574 | gfs2_trans_end(sdp); | ||
575 | |||
576 | return 1; | ||
577 | } | ||
578 | |||
579 | static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino) | ||
580 | { | ||
581 | struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); | ||
582 | struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode); | ||
583 | struct gfs2_holder gh; | ||
584 | struct buffer_head *bh; | ||
585 | struct gfs2_inum_range_host ir; | ||
586 | int error; | ||
587 | |||
588 | error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
589 | if (error) | ||
590 | return error; | ||
591 | |||
592 | error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0); | ||
593 | if (error) | ||
594 | goto out; | ||
595 | mutex_lock(&sdp->sd_inum_mutex); | ||
596 | |||
597 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
598 | if (error) | ||
599 | goto out_end_trans; | ||
600 | |||
601 | gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); | ||
602 | |||
603 | if (!ir.ir_length) { | ||
604 | struct buffer_head *m_bh; | ||
605 | u64 x, y; | ||
606 | __be64 z; | ||
607 | |||
608 | error = gfs2_meta_inode_buffer(m_ip, &m_bh); | ||
609 | if (error) | ||
610 | goto out_brelse; | ||
611 | |||
612 | z = *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)); | ||
613 | x = y = be64_to_cpu(z); | ||
614 | ir.ir_start = x; | ||
615 | ir.ir_length = GFS2_INUM_QUANTUM; | ||
616 | x += GFS2_INUM_QUANTUM; | ||
617 | if (x < y) | ||
618 | gfs2_consist_inode(m_ip); | ||
619 | z = cpu_to_be64(x); | ||
620 | gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); | ||
621 | *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = z; | ||
622 | |||
623 | brelse(m_bh); | ||
624 | } | ||
625 | |||
626 | *formal_ino = ir.ir_start++; | ||
627 | ir.ir_length--; | ||
628 | |||
629 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
630 | gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode)); | ||
631 | |||
632 | out_brelse: | ||
633 | brelse(bh); | ||
634 | out_end_trans: | ||
635 | mutex_unlock(&sdp->sd_inum_mutex); | ||
636 | gfs2_trans_end(sdp); | ||
637 | out: | ||
638 | gfs2_glock_dq_uninit(&gh); | ||
639 | return error; | ||
640 | } | ||
641 | |||
642 | static int pick_formal_ino(struct gfs2_sbd *sdp, u64 *inum) | ||
643 | { | ||
644 | int error; | ||
645 | |||
646 | error = pick_formal_ino_1(sdp, inum); | ||
647 | if (error <= 0) | ||
648 | return error; | ||
649 | |||
650 | error = pick_formal_ino_2(sdp, inum); | ||
651 | |||
652 | return error; | ||
653 | } | ||
654 | |||
655 | /** | 522 | /** |
656 | * create_ok - OK to create a new on-disk inode here? | 523 | * create_ok - OK to create a new on-disk inode here? |
657 | * @dip: Directory in which dinode is to be created | 524 | * @dip: Directory in which dinode is to be created |
@@ -731,7 +598,7 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) | |||
731 | if (error) | 598 | if (error) |
732 | goto out_ipreserv; | 599 | goto out_ipreserv; |
733 | 600 | ||
734 | *no_addr = gfs2_alloc_di(dip, generation); | 601 | error = gfs2_alloc_di(dip, no_addr, generation); |
735 | 602 | ||
736 | gfs2_trans_end(sdp); | 603 | gfs2_trans_end(sdp); |
737 | 604 | ||
@@ -924,7 +791,6 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) | |||
924 | size_t len; | 791 | size_t len; |
925 | void *value; | 792 | void *value; |
926 | char *name; | 793 | char *name; |
927 | struct gfs2_ea_request er; | ||
928 | 794 | ||
929 | err = security_inode_init_security(&ip->i_inode, &dip->i_inode, | 795 | err = security_inode_init_security(&ip->i_inode, &dip->i_inode, |
930 | &name, &value, &len); | 796 | &name, &value, &len); |
@@ -935,16 +801,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) | |||
935 | return err; | 801 | return err; |
936 | } | 802 | } |
937 | 803 | ||
938 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | 804 | err = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SECURITY, name, value, len, 0); |
939 | |||
940 | er.er_type = GFS2_EATYPE_SECURITY; | ||
941 | er.er_name = name; | ||
942 | er.er_data = value; | ||
943 | er.er_name_len = strlen(name); | ||
944 | er.er_data_len = len; | ||
945 | |||
946 | err = gfs2_ea_set_i(ip, &er); | ||
947 | |||
948 | kfree(value); | 805 | kfree(value); |
949 | kfree(name); | 806 | kfree(name); |
950 | 807 | ||
@@ -991,13 +848,10 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
991 | if (error) | 848 | if (error) |
992 | goto fail_gunlock; | 849 | goto fail_gunlock; |
993 | 850 | ||
994 | error = pick_formal_ino(sdp, &inum.no_formal_ino); | ||
995 | if (error) | ||
996 | goto fail_gunlock; | ||
997 | |||
998 | error = alloc_dinode(dip, &inum.no_addr, &generation); | 851 | error = alloc_dinode(dip, &inum.no_addr, &generation); |
999 | if (error) | 852 | if (error) |
1000 | goto fail_gunlock; | 853 | goto fail_gunlock; |
854 | inum.no_formal_ino = generation; | ||
1001 | 855 | ||
1002 | error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, | 856 | error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, |
1003 | LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); | 857 | LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); |
@@ -1008,9 +862,8 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
1008 | if (error) | 862 | if (error) |
1009 | goto fail_gunlock2; | 863 | goto fail_gunlock2; |
1010 | 864 | ||
1011 | inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), | 865 | inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, |
1012 | inum.no_addr, | 866 | inum.no_formal_ino, 0); |
1013 | inum.no_formal_ino, 0); | ||
1014 | if (IS_ERR(inode)) | 867 | if (IS_ERR(inode)) |
1015 | goto fail_gunlock2; | 868 | goto fail_gunlock2; |
1016 | 869 | ||
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 7bc3c45cd676..52fb6c048981 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -84,7 +84,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
84 | 84 | ||
85 | gfs2_tune_init(&sdp->sd_tune); | 85 | gfs2_tune_init(&sdp->sd_tune); |
86 | 86 | ||
87 | mutex_init(&sdp->sd_inum_mutex); | ||
88 | spin_lock_init(&sdp->sd_statfs_spin); | 87 | spin_lock_init(&sdp->sd_statfs_spin); |
89 | 88 | ||
90 | spin_lock_init(&sdp->sd_rindex_spin); | 89 | spin_lock_init(&sdp->sd_rindex_spin); |
@@ -833,21 +832,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
833 | if (error) | 832 | if (error) |
834 | goto fail; | 833 | goto fail; |
835 | 834 | ||
836 | /* Read in the master inode number inode */ | ||
837 | sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum"); | ||
838 | if (IS_ERR(sdp->sd_inum_inode)) { | ||
839 | error = PTR_ERR(sdp->sd_inum_inode); | ||
840 | fs_err(sdp, "can't read in inum inode: %d\n", error); | ||
841 | goto fail_journal; | ||
842 | } | ||
843 | |||
844 | |||
845 | /* Read in the master statfs inode */ | 835 | /* Read in the master statfs inode */ |
846 | sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs"); | 836 | sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs"); |
847 | if (IS_ERR(sdp->sd_statfs_inode)) { | 837 | if (IS_ERR(sdp->sd_statfs_inode)) { |
848 | error = PTR_ERR(sdp->sd_statfs_inode); | 838 | error = PTR_ERR(sdp->sd_statfs_inode); |
849 | fs_err(sdp, "can't read in statfs inode: %d\n", error); | 839 | fs_err(sdp, "can't read in statfs inode: %d\n", error); |
850 | goto fail_inum; | 840 | goto fail_journal; |
851 | } | 841 | } |
852 | 842 | ||
853 | /* Read in the resource index inode */ | 843 | /* Read in the resource index inode */ |
@@ -876,8 +866,6 @@ fail_rindex: | |||
876 | iput(sdp->sd_rindex); | 866 | iput(sdp->sd_rindex); |
877 | fail_statfs: | 867 | fail_statfs: |
878 | iput(sdp->sd_statfs_inode); | 868 | iput(sdp->sd_statfs_inode); |
879 | fail_inum: | ||
880 | iput(sdp->sd_inum_inode); | ||
881 | fail_journal: | 869 | fail_journal: |
882 | init_journal(sdp, UNDO); | 870 | init_journal(sdp, UNDO); |
883 | fail: | 871 | fail: |
@@ -905,20 +893,12 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) | |||
905 | return error; | 893 | return error; |
906 | } | 894 | } |
907 | 895 | ||
908 | sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid); | ||
909 | sdp->sd_ir_inode = gfs2_lookup_simple(pn, buf); | ||
910 | if (IS_ERR(sdp->sd_ir_inode)) { | ||
911 | error = PTR_ERR(sdp->sd_ir_inode); | ||
912 | fs_err(sdp, "can't find local \"ir\" file: %d\n", error); | ||
913 | goto fail; | ||
914 | } | ||
915 | |||
916 | sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid); | 896 | sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid); |
917 | sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf); | 897 | sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf); |
918 | if (IS_ERR(sdp->sd_sc_inode)) { | 898 | if (IS_ERR(sdp->sd_sc_inode)) { |
919 | error = PTR_ERR(sdp->sd_sc_inode); | 899 | error = PTR_ERR(sdp->sd_sc_inode); |
920 | fs_err(sdp, "can't find local \"sc\" file: %d\n", error); | 900 | fs_err(sdp, "can't find local \"sc\" file: %d\n", error); |
921 | goto fail_ir_i; | 901 | goto fail; |
922 | } | 902 | } |
923 | 903 | ||
924 | sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid); | 904 | sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid); |
@@ -932,27 +912,16 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) | |||
932 | iput(pn); | 912 | iput(pn); |
933 | pn = NULL; | 913 | pn = NULL; |
934 | 914 | ||
935 | ip = GFS2_I(sdp->sd_ir_inode); | ||
936 | error = gfs2_glock_nq_init(ip->i_gl, | ||
937 | LM_ST_EXCLUSIVE, 0, | ||
938 | &sdp->sd_ir_gh); | ||
939 | if (error) { | ||
940 | fs_err(sdp, "can't lock local \"ir\" file: %d\n", error); | ||
941 | goto fail_qc_i; | ||
942 | } | ||
943 | |||
944 | ip = GFS2_I(sdp->sd_sc_inode); | 915 | ip = GFS2_I(sdp->sd_sc_inode); |
945 | error = gfs2_glock_nq_init(ip->i_gl, | 916 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, |
946 | LM_ST_EXCLUSIVE, 0, | ||
947 | &sdp->sd_sc_gh); | 917 | &sdp->sd_sc_gh); |
948 | if (error) { | 918 | if (error) { |
949 | fs_err(sdp, "can't lock local \"sc\" file: %d\n", error); | 919 | fs_err(sdp, "can't lock local \"sc\" file: %d\n", error); |
950 | goto fail_ir_gh; | 920 | goto fail_qc_i; |
951 | } | 921 | } |
952 | 922 | ||
953 | ip = GFS2_I(sdp->sd_qc_inode); | 923 | ip = GFS2_I(sdp->sd_qc_inode); |
954 | error = gfs2_glock_nq_init(ip->i_gl, | 924 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, |
955 | LM_ST_EXCLUSIVE, 0, | ||
956 | &sdp->sd_qc_gh); | 925 | &sdp->sd_qc_gh); |
957 | if (error) { | 926 | if (error) { |
958 | fs_err(sdp, "can't lock local \"qc\" file: %d\n", error); | 927 | fs_err(sdp, "can't lock local \"qc\" file: %d\n", error); |
@@ -965,14 +934,10 @@ fail_qc_gh: | |||
965 | gfs2_glock_dq_uninit(&sdp->sd_qc_gh); | 934 | gfs2_glock_dq_uninit(&sdp->sd_qc_gh); |
966 | fail_ut_gh: | 935 | fail_ut_gh: |
967 | gfs2_glock_dq_uninit(&sdp->sd_sc_gh); | 936 | gfs2_glock_dq_uninit(&sdp->sd_sc_gh); |
968 | fail_ir_gh: | ||
969 | gfs2_glock_dq_uninit(&sdp->sd_ir_gh); | ||
970 | fail_qc_i: | 937 | fail_qc_i: |
971 | iput(sdp->sd_qc_inode); | 938 | iput(sdp->sd_qc_inode); |
972 | fail_ut_i: | 939 | fail_ut_i: |
973 | iput(sdp->sd_sc_inode); | 940 | iput(sdp->sd_sc_inode); |
974 | fail_ir_i: | ||
975 | iput(sdp->sd_ir_inode); | ||
976 | fail: | 941 | fail: |
977 | if (pn) | 942 | if (pn) |
978 | iput(pn); | 943 | iput(pn); |
@@ -1063,7 +1028,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | |||
1063 | 1028 | ||
1064 | ls->ls_ops = lm; | 1029 | ls->ls_ops = lm; |
1065 | ls->ls_first = 1; | 1030 | ls->ls_first = 1; |
1066 | ls->ls_id = 0; | ||
1067 | 1031 | ||
1068 | for (options = args->ar_hostdata; (o = strsep(&options, ":")); ) { | 1032 | for (options = args->ar_hostdata; (o = strsep(&options, ":")); ) { |
1069 | substring_t tmp[MAX_OPT_ARGS]; | 1033 | substring_t tmp[MAX_OPT_ARGS]; |
@@ -1081,10 +1045,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | |||
1081 | ls->ls_jid = option; | 1045 | ls->ls_jid = option; |
1082 | break; | 1046 | break; |
1083 | case Opt_id: | 1047 | case Opt_id: |
1084 | ret = match_int(&tmp[0], &option); | 1048 | /* Obsolete, but left for backward compat purposes */ |
1085 | if (ret) | ||
1086 | goto hostdata_error; | ||
1087 | ls->ls_id = option; | ||
1088 | break; | 1049 | break; |
1089 | case Opt_first: | 1050 | case Opt_first: |
1090 | ret = match_int(&tmp[0], &option); | 1051 | ret = match_int(&tmp[0], &option); |
@@ -1133,6 +1094,17 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp) | |||
1133 | lm->lm_unmount(sdp); | 1094 | lm->lm_unmount(sdp); |
1134 | } | 1095 | } |
1135 | 1096 | ||
1097 | void gfs2_online_uevent(struct gfs2_sbd *sdp) | ||
1098 | { | ||
1099 | struct super_block *sb = sdp->sd_vfs; | ||
1100 | char ro[20]; | ||
1101 | char spectator[20]; | ||
1102 | char *envp[] = { ro, spectator, NULL }; | ||
1103 | sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0); | ||
1104 | sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0); | ||
1105 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_ONLINE, envp); | ||
1106 | } | ||
1107 | |||
1136 | /** | 1108 | /** |
1137 | * fill_super - Read in superblock | 1109 | * fill_super - Read in superblock |
1138 | * @sb: The VFS superblock | 1110 | * @sb: The VFS superblock |
@@ -1157,6 +1129,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
1157 | sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT; | 1129 | sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT; |
1158 | sdp->sd_args.ar_data = GFS2_DATA_DEFAULT; | 1130 | sdp->sd_args.ar_data = GFS2_DATA_DEFAULT; |
1159 | sdp->sd_args.ar_commit = 60; | 1131 | sdp->sd_args.ar_commit = 60; |
1132 | sdp->sd_args.ar_errors = GFS2_ERRORS_DEFAULT; | ||
1160 | 1133 | ||
1161 | error = gfs2_mount_args(sdp, &sdp->sd_args, data); | 1134 | error = gfs2_mount_args(sdp, &sdp->sd_args, data); |
1162 | if (error) { | 1135 | if (error) { |
@@ -1174,6 +1147,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
1174 | sb->s_magic = GFS2_MAGIC; | 1147 | sb->s_magic = GFS2_MAGIC; |
1175 | sb->s_op = &gfs2_super_ops; | 1148 | sb->s_op = &gfs2_super_ops; |
1176 | sb->s_export_op = &gfs2_export_ops; | 1149 | sb->s_export_op = &gfs2_export_ops; |
1150 | sb->s_xattr = gfs2_xattr_handlers; | ||
1177 | sb->s_time_gran = 1; | 1151 | sb->s_time_gran = 1; |
1178 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 1152 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
1179 | 1153 | ||
@@ -1236,7 +1210,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
1236 | } | 1210 | } |
1237 | 1211 | ||
1238 | gfs2_glock_dq_uninit(&mount_gh); | 1212 | gfs2_glock_dq_uninit(&mount_gh); |
1239 | 1213 | gfs2_online_uevent(sdp); | |
1240 | return 0; | 1214 | return 0; |
1241 | 1215 | ||
1242 | fail_threads: | 1216 | fail_threads: |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index f8bd20baf99c..c3ac18054057 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -26,8 +26,7 @@ | |||
26 | #include "acl.h" | 26 | #include "acl.h" |
27 | #include "bmap.h" | 27 | #include "bmap.h" |
28 | #include "dir.h" | 28 | #include "dir.h" |
29 | #include "eaops.h" | 29 | #include "xattr.h" |
30 | #include "eattr.h" | ||
31 | #include "glock.h" | 30 | #include "glock.h" |
32 | #include "inode.h" | 31 | #include "inode.h" |
33 | #include "meta_io.h" | 32 | #include "meta_io.h" |
@@ -349,7 +348,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
349 | 348 | ||
350 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); | 349 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); |
351 | if (error) | 350 | if (error) |
352 | goto out_rgrp; | 351 | goto out_gunlock; |
353 | 352 | ||
354 | error = gfs2_dir_del(dip, &dentry->d_name); | 353 | error = gfs2_dir_del(dip, &dentry->d_name); |
355 | if (error) | 354 | if (error) |
@@ -1302,60 +1301,53 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name, | |||
1302 | const void *data, size_t size, int flags) | 1301 | const void *data, size_t size, int flags) |
1303 | { | 1302 | { |
1304 | struct inode *inode = dentry->d_inode; | 1303 | struct inode *inode = dentry->d_inode; |
1305 | struct gfs2_ea_request er; | 1304 | struct gfs2_inode *ip = GFS2_I(inode); |
1306 | 1305 | struct gfs2_holder gh; | |
1307 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | 1306 | int ret; |
1308 | er.er_type = gfs2_ea_name2type(name, &er.er_name); | ||
1309 | if (er.er_type == GFS2_EATYPE_UNUSED) | ||
1310 | return -EOPNOTSUPP; | ||
1311 | er.er_data = (char *)data; | ||
1312 | er.er_name_len = strlen(er.er_name); | ||
1313 | er.er_data_len = size; | ||
1314 | er.er_flags = flags; | ||
1315 | |||
1316 | gfs2_assert_warn(GFS2_SB(inode), !(er.er_flags & GFS2_ERF_MODE)); | ||
1317 | 1307 | ||
1318 | return gfs2_ea_set(GFS2_I(inode), &er); | 1308 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
1309 | ret = gfs2_glock_nq(&gh); | ||
1310 | if (ret == 0) { | ||
1311 | ret = generic_setxattr(dentry, name, data, size, flags); | ||
1312 | gfs2_glock_dq(&gh); | ||
1313 | } | ||
1314 | gfs2_holder_uninit(&gh); | ||
1315 | return ret; | ||
1319 | } | 1316 | } |
1320 | 1317 | ||
1321 | static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, | 1318 | static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, |
1322 | void *data, size_t size) | 1319 | void *data, size_t size) |
1323 | { | 1320 | { |
1324 | struct gfs2_ea_request er; | 1321 | struct inode *inode = dentry->d_inode; |
1325 | 1322 | struct gfs2_inode *ip = GFS2_I(inode); | |
1326 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | 1323 | struct gfs2_holder gh; |
1327 | er.er_type = gfs2_ea_name2type(name, &er.er_name); | 1324 | int ret; |
1328 | if (er.er_type == GFS2_EATYPE_UNUSED) | ||
1329 | return -EOPNOTSUPP; | ||
1330 | er.er_data = data; | ||
1331 | er.er_name_len = strlen(er.er_name); | ||
1332 | er.er_data_len = size; | ||
1333 | |||
1334 | return gfs2_ea_get(GFS2_I(dentry->d_inode), &er); | ||
1335 | } | ||
1336 | |||
1337 | static ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) | ||
1338 | { | ||
1339 | struct gfs2_ea_request er; | ||
1340 | |||
1341 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | ||
1342 | er.er_data = (size) ? buffer : NULL; | ||
1343 | er.er_data_len = size; | ||
1344 | 1325 | ||
1345 | return gfs2_ea_list(GFS2_I(dentry->d_inode), &er); | 1326 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); |
1327 | ret = gfs2_glock_nq(&gh); | ||
1328 | if (ret == 0) { | ||
1329 | ret = generic_getxattr(dentry, name, data, size); | ||
1330 | gfs2_glock_dq(&gh); | ||
1331 | } | ||
1332 | gfs2_holder_uninit(&gh); | ||
1333 | return ret; | ||
1346 | } | 1334 | } |
1347 | 1335 | ||
1348 | static int gfs2_removexattr(struct dentry *dentry, const char *name) | 1336 | static int gfs2_removexattr(struct dentry *dentry, const char *name) |
1349 | { | 1337 | { |
1350 | struct gfs2_ea_request er; | 1338 | struct inode *inode = dentry->d_inode; |
1351 | 1339 | struct gfs2_inode *ip = GFS2_I(inode); | |
1352 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | 1340 | struct gfs2_holder gh; |
1353 | er.er_type = gfs2_ea_name2type(name, &er.er_name); | 1341 | int ret; |
1354 | if (er.er_type == GFS2_EATYPE_UNUSED) | ||
1355 | return -EOPNOTSUPP; | ||
1356 | er.er_name_len = strlen(er.er_name); | ||
1357 | 1342 | ||
1358 | return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er); | 1343 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
1344 | ret = gfs2_glock_nq(&gh); | ||
1345 | if (ret == 0) { | ||
1346 | ret = generic_removexattr(dentry, name); | ||
1347 | gfs2_glock_dq(&gh); | ||
1348 | } | ||
1349 | gfs2_holder_uninit(&gh); | ||
1350 | return ret; | ||
1359 | } | 1351 | } |
1360 | 1352 | ||
1361 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 1353 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index daa4ae341a29..28c590b7c9da 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -285,27 +285,19 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) | |||
285 | } | 285 | } |
286 | 286 | ||
287 | tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; | 287 | tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; |
288 | if (count[1] + count[2] != tmp) { | 288 | if (count[1] != tmp) { |
289 | if (gfs2_consist_rgrpd(rgd)) | 289 | if (gfs2_consist_rgrpd(rgd)) |
290 | fs_err(sdp, "used data mismatch: %u != %u\n", | 290 | fs_err(sdp, "used data mismatch: %u != %u\n", |
291 | count[1], tmp); | 291 | count[1], tmp); |
292 | return; | 292 | return; |
293 | } | 293 | } |
294 | 294 | ||
295 | if (count[3] != rgd->rd_dinodes) { | 295 | if (count[2] + count[3] != rgd->rd_dinodes) { |
296 | if (gfs2_consist_rgrpd(rgd)) | 296 | if (gfs2_consist_rgrpd(rgd)) |
297 | fs_err(sdp, "used metadata mismatch: %u != %u\n", | 297 | fs_err(sdp, "used metadata mismatch: %u != %u\n", |
298 | count[3], rgd->rd_dinodes); | 298 | count[2] + count[3], rgd->rd_dinodes); |
299 | return; | 299 | return; |
300 | } | 300 | } |
301 | |||
302 | if (count[2] > count[3]) { | ||
303 | if (gfs2_consist_rgrpd(rgd)) | ||
304 | fs_err(sdp, "unlinked inodes > inodes: %u\n", | ||
305 | count[2]); | ||
306 | return; | ||
307 | } | ||
308 | |||
309 | } | 301 | } |
310 | 302 | ||
311 | static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) | 303 | static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) |
@@ -865,7 +857,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | |||
865 | goto start_new_extent; | 857 | goto start_new_extent; |
866 | if ((start + nr_sects) != blk) { | 858 | if ((start + nr_sects) != blk) { |
867 | rv = blkdev_issue_discard(bdev, start, | 859 | rv = blkdev_issue_discard(bdev, start, |
868 | nr_sects, GFP_NOFS); | 860 | nr_sects, GFP_NOFS, |
861 | DISCARD_FL_BARRIER); | ||
869 | if (rv) | 862 | if (rv) |
870 | goto fail; | 863 | goto fail; |
871 | nr_sects = 0; | 864 | nr_sects = 0; |
@@ -879,7 +872,8 @@ start_new_extent: | |||
879 | } | 872 | } |
880 | } | 873 | } |
881 | if (nr_sects) { | 874 | if (nr_sects) { |
882 | rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS); | 875 | rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, |
876 | DISCARD_FL_BARRIER); | ||
883 | if (rv) | 877 | if (rv) |
884 | goto fail; | 878 | goto fail; |
885 | } | 879 | } |
@@ -961,7 +955,8 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) | |||
961 | * Returns: The inode, if one has been found | 955 | * Returns: The inode, if one has been found |
962 | */ | 956 | */ |
963 | 957 | ||
964 | static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) | 958 | static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, |
959 | u64 skip) | ||
965 | { | 960 | { |
966 | struct inode *inode; | 961 | struct inode *inode; |
967 | u32 goal = 0, block; | 962 | u32 goal = 0, block; |
@@ -985,6 +980,8 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) | |||
985 | goal++; | 980 | goal++; |
986 | if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) | 981 | if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) |
987 | continue; | 982 | continue; |
983 | if (no_addr == skip) | ||
984 | continue; | ||
988 | *last_unlinked = no_addr; | 985 | *last_unlinked = no_addr; |
989 | inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, | 986 | inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, |
990 | no_addr, -1, 1); | 987 | no_addr, -1, 1); |
@@ -1104,7 +1101,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
1104 | if (try_rgrp_fit(rgd, al)) | 1101 | if (try_rgrp_fit(rgd, al)) |
1105 | goto out; | 1102 | goto out; |
1106 | if (rgd->rd_flags & GFS2_RDF_CHECK) | 1103 | if (rgd->rd_flags & GFS2_RDF_CHECK) |
1107 | inode = try_rgrp_unlink(rgd, last_unlinked); | 1104 | inode = try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); |
1108 | if (!rg_locked) | 1105 | if (!rg_locked) |
1109 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1106 | gfs2_glock_dq_uninit(&al->al_rgd_gh); |
1110 | if (inode) | 1107 | if (inode) |
@@ -1138,7 +1135,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
1138 | if (try_rgrp_fit(rgd, al)) | 1135 | if (try_rgrp_fit(rgd, al)) |
1139 | goto out; | 1136 | goto out; |
1140 | if (rgd->rd_flags & GFS2_RDF_CHECK) | 1137 | if (rgd->rd_flags & GFS2_RDF_CHECK) |
1141 | inode = try_rgrp_unlink(rgd, last_unlinked); | 1138 | inode = try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); |
1142 | if (!rg_locked) | 1139 | if (!rg_locked) |
1143 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1140 | gfs2_glock_dq_uninit(&al->al_rgd_gh); |
1144 | if (inode) | 1141 | if (inode) |
@@ -1261,7 +1258,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) | |||
1261 | * Returns: The block type (GFS2_BLKST_*) | 1258 | * Returns: The block type (GFS2_BLKST_*) |
1262 | */ | 1259 | */ |
1263 | 1260 | ||
1264 | unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) | 1261 | static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) |
1265 | { | 1262 | { |
1266 | struct gfs2_bitmap *bi = NULL; | 1263 | struct gfs2_bitmap *bi = NULL; |
1267 | u32 length, rgrp_block, buf_block; | 1264 | u32 length, rgrp_block, buf_block; |
@@ -1464,6 +1461,16 @@ int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) | |||
1464 | return 0; | 1461 | return 0; |
1465 | } | 1462 | } |
1466 | 1463 | ||
1464 | static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) | ||
1465 | { | ||
1466 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
1467 | fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", | ||
1468 | (unsigned long long)rgd->rd_addr); | ||
1469 | fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); | ||
1470 | gfs2_rgrp_dump(NULL, rgd->rd_gl); | ||
1471 | rgd->rd_flags |= GFS2_RDF_ERROR; | ||
1472 | } | ||
1473 | |||
1467 | /** | 1474 | /** |
1468 | * gfs2_alloc_block - Allocate one or more blocks | 1475 | * gfs2_alloc_block - Allocate one or more blocks |
1469 | * @ip: the inode to allocate the block for | 1476 | * @ip: the inode to allocate the block for |
@@ -1525,22 +1532,20 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) | |||
1525 | return 0; | 1532 | return 0; |
1526 | 1533 | ||
1527 | rgrp_error: | 1534 | rgrp_error: |
1528 | fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", | 1535 | gfs2_rgrp_error(rgd); |
1529 | (unsigned long long)rgd->rd_addr); | ||
1530 | fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); | ||
1531 | gfs2_rgrp_dump(NULL, rgd->rd_gl); | ||
1532 | rgd->rd_flags |= GFS2_RDF_ERROR; | ||
1533 | return -EIO; | 1536 | return -EIO; |
1534 | } | 1537 | } |
1535 | 1538 | ||
1536 | /** | 1539 | /** |
1537 | * gfs2_alloc_di - Allocate a dinode | 1540 | * gfs2_alloc_di - Allocate a dinode |
1538 | * @dip: the directory that the inode is going in | 1541 | * @dip: the directory that the inode is going in |
1542 | * @bn: the block number which is allocated | ||
1543 | * @generation: the generation number of the inode | ||
1539 | * | 1544 | * |
1540 | * Returns: the block allocated | 1545 | * Returns: 0 on success or error |
1541 | */ | 1546 | */ |
1542 | 1547 | ||
1543 | u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) | 1548 | int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation) |
1544 | { | 1549 | { |
1545 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 1550 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
1546 | struct gfs2_alloc *al = dip->i_alloc; | 1551 | struct gfs2_alloc *al = dip->i_alloc; |
@@ -1551,16 +1556,21 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) | |||
1551 | 1556 | ||
1552 | blk = rgblk_search(rgd, rgd->rd_last_alloc, | 1557 | blk = rgblk_search(rgd, rgd->rd_last_alloc, |
1553 | GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n); | 1558 | GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n); |
1554 | BUG_ON(blk == BFITNOENT); | ||
1555 | 1559 | ||
1556 | rgd->rd_last_alloc = blk; | 1560 | /* Since all blocks are reserved in advance, this shouldn't happen */ |
1561 | if (blk == BFITNOENT) | ||
1562 | goto rgrp_error; | ||
1557 | 1563 | ||
1564 | rgd->rd_last_alloc = blk; | ||
1558 | block = rgd->rd_data0 + blk; | 1565 | block = rgd->rd_data0 + blk; |
1566 | if (rgd->rd_free == 0) | ||
1567 | goto rgrp_error; | ||
1559 | 1568 | ||
1560 | gfs2_assert_withdraw(sdp, rgd->rd_free); | ||
1561 | rgd->rd_free--; | 1569 | rgd->rd_free--; |
1562 | rgd->rd_dinodes++; | 1570 | rgd->rd_dinodes++; |
1563 | *generation = rgd->rd_igeneration++; | 1571 | *generation = rgd->rd_igeneration++; |
1572 | if (*generation == 0) | ||
1573 | *generation = rgd->rd_igeneration++; | ||
1564 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1574 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1565 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); | 1575 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
1566 | 1576 | ||
@@ -1573,7 +1583,12 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) | |||
1573 | rgd->rd_free_clone--; | 1583 | rgd->rd_free_clone--; |
1574 | spin_unlock(&sdp->sd_rindex_spin); | 1584 | spin_unlock(&sdp->sd_rindex_spin); |
1575 | trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); | 1585 | trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); |
1576 | return block; | 1586 | *bn = block; |
1587 | return 0; | ||
1588 | |||
1589 | rgrp_error: | ||
1590 | gfs2_rgrp_error(rgd); | ||
1591 | return -EIO; | ||
1577 | } | 1592 | } |
1578 | 1593 | ||
1579 | /** | 1594 | /** |
@@ -1681,6 +1696,46 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) | |||
1681 | } | 1696 | } |
1682 | 1697 | ||
1683 | /** | 1698 | /** |
1699 | * gfs2_check_blk_type - Check the type of a block | ||
1700 | * @sdp: The superblock | ||
1701 | * @no_addr: The block number to check | ||
1702 | * @type: The block type we are looking for | ||
1703 | * | ||
1704 | * Returns: 0 if the block type matches the expected type | ||
1705 | * -ESTALE if it doesn't match | ||
1706 | * or -ve errno if something went wrong while checking | ||
1707 | */ | ||
1708 | |||
1709 | int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) | ||
1710 | { | ||
1711 | struct gfs2_rgrpd *rgd; | ||
1712 | struct gfs2_holder ri_gh, rgd_gh; | ||
1713 | int error; | ||
1714 | |||
1715 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
1716 | if (error) | ||
1717 | goto fail; | ||
1718 | |||
1719 | error = -EINVAL; | ||
1720 | rgd = gfs2_blk2rgrpd(sdp, no_addr); | ||
1721 | if (!rgd) | ||
1722 | goto fail_rindex; | ||
1723 | |||
1724 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); | ||
1725 | if (error) | ||
1726 | goto fail_rindex; | ||
1727 | |||
1728 | if (gfs2_get_block_type(rgd, no_addr) != type) | ||
1729 | error = -ESTALE; | ||
1730 | |||
1731 | gfs2_glock_dq_uninit(&rgd_gh); | ||
1732 | fail_rindex: | ||
1733 | gfs2_glock_dq_uninit(&ri_gh); | ||
1734 | fail: | ||
1735 | return error; | ||
1736 | } | ||
1737 | |||
1738 | /** | ||
1684 | * gfs2_rlist_add - add a RG to a list of RGs | 1739 | * gfs2_rlist_add - add a RG to a list of RGs |
1685 | * @sdp: the filesystem | 1740 | * @sdp: the filesystem |
1686 | * @rlist: the list of resource groups | 1741 | * @rlist: the list of resource groups |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 1e76ff0f3e00..b4106ddaaa98 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -44,15 +44,15 @@ gfs2_inplace_reserve_i((ip), __FILE__, __LINE__) | |||
44 | 44 | ||
45 | extern void gfs2_inplace_release(struct gfs2_inode *ip); | 45 | extern void gfs2_inplace_release(struct gfs2_inode *ip); |
46 | 46 | ||
47 | extern unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); | ||
48 | |||
49 | extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); | 47 | extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); |
50 | extern u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); | 48 | extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); |
51 | 49 | ||
52 | extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); | 50 | extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); |
53 | extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); | 51 | extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); |
54 | extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); | 52 | extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); |
55 | extern void gfs2_unlink_di(struct inode *inode); | 53 | extern void gfs2_unlink_di(struct inode *inode); |
54 | extern int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, | ||
55 | unsigned int type); | ||
56 | 56 | ||
57 | struct gfs2_rgrp_list { | 57 | struct gfs2_rgrp_list { |
58 | unsigned int rl_rgrps; | 58 | unsigned int rl_rgrps; |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 0a6801336470..0ec3ec672de1 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -38,7 +38,7 @@ | |||
38 | #include "trans.h" | 38 | #include "trans.h" |
39 | #include "util.h" | 39 | #include "util.h" |
40 | #include "sys.h" | 40 | #include "sys.h" |
41 | #include "eattr.h" | 41 | #include "xattr.h" |
42 | 42 | ||
43 | #define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x) | 43 | #define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x) |
44 | 44 | ||
@@ -68,6 +68,8 @@ enum { | |||
68 | Opt_discard, | 68 | Opt_discard, |
69 | Opt_nodiscard, | 69 | Opt_nodiscard, |
70 | Opt_commit, | 70 | Opt_commit, |
71 | Opt_err_withdraw, | ||
72 | Opt_err_panic, | ||
71 | Opt_error, | 73 | Opt_error, |
72 | }; | 74 | }; |
73 | 75 | ||
@@ -97,6 +99,8 @@ static const match_table_t tokens = { | |||
97 | {Opt_discard, "discard"}, | 99 | {Opt_discard, "discard"}, |
98 | {Opt_nodiscard, "nodiscard"}, | 100 | {Opt_nodiscard, "nodiscard"}, |
99 | {Opt_commit, "commit=%d"}, | 101 | {Opt_commit, "commit=%d"}, |
102 | {Opt_err_withdraw, "errors=withdraw"}, | ||
103 | {Opt_err_panic, "errors=panic"}, | ||
100 | {Opt_error, NULL} | 104 | {Opt_error, NULL} |
101 | }; | 105 | }; |
102 | 106 | ||
@@ -152,6 +156,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) | |||
152 | args->ar_localcaching = 1; | 156 | args->ar_localcaching = 1; |
153 | break; | 157 | break; |
154 | case Opt_debug: | 158 | case Opt_debug: |
159 | if (args->ar_errors == GFS2_ERRORS_PANIC) { | ||
160 | fs_info(sdp, "-o debug and -o errors=panic " | ||
161 | "are mutually exclusive.\n"); | ||
162 | return -EINVAL; | ||
163 | } | ||
155 | args->ar_debug = 1; | 164 | args->ar_debug = 1; |
156 | break; | 165 | break; |
157 | case Opt_nodebug: | 166 | case Opt_nodebug: |
@@ -205,6 +214,17 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) | |||
205 | return rv ? rv : -EINVAL; | 214 | return rv ? rv : -EINVAL; |
206 | } | 215 | } |
207 | break; | 216 | break; |
217 | case Opt_err_withdraw: | ||
218 | args->ar_errors = GFS2_ERRORS_WITHDRAW; | ||
219 | break; | ||
220 | case Opt_err_panic: | ||
221 | if (args->ar_debug) { | ||
222 | fs_info(sdp, "-o debug and -o errors=panic " | ||
223 | "are mutually exclusive.\n"); | ||
224 | return -EINVAL; | ||
225 | } | ||
226 | args->ar_errors = GFS2_ERRORS_PANIC; | ||
227 | break; | ||
208 | case Opt_error: | 228 | case Opt_error: |
209 | default: | 229 | default: |
210 | fs_info(sdp, "invalid mount option: %s\n", o); | 230 | fs_info(sdp, "invalid mount option: %s\n", o); |
@@ -353,7 +373,7 @@ fail: | |||
353 | return error; | 373 | return error; |
354 | } | 374 | } |
355 | 375 | ||
356 | static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) | 376 | void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) |
357 | { | 377 | { |
358 | const struct gfs2_statfs_change *str = buf; | 378 | const struct gfs2_statfs_change *str = buf; |
359 | 379 | ||
@@ -441,6 +461,29 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, | |||
441 | brelse(l_bh); | 461 | brelse(l_bh); |
442 | } | 462 | } |
443 | 463 | ||
464 | void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, | ||
465 | struct buffer_head *l_bh) | ||
466 | { | ||
467 | struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); | ||
468 | struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); | ||
469 | struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; | ||
470 | struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; | ||
471 | |||
472 | gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); | ||
473 | |||
474 | spin_lock(&sdp->sd_statfs_spin); | ||
475 | m_sc->sc_total += l_sc->sc_total; | ||
476 | m_sc->sc_free += l_sc->sc_free; | ||
477 | m_sc->sc_dinodes += l_sc->sc_dinodes; | ||
478 | memset(l_sc, 0, sizeof(struct gfs2_statfs_change)); | ||
479 | memset(l_bh->b_data + sizeof(struct gfs2_dinode), | ||
480 | 0, sizeof(struct gfs2_statfs_change)); | ||
481 | spin_unlock(&sdp->sd_statfs_spin); | ||
482 | |||
483 | gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); | ||
484 | gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); | ||
485 | } | ||
486 | |||
444 | int gfs2_statfs_sync(struct gfs2_sbd *sdp) | 487 | int gfs2_statfs_sync(struct gfs2_sbd *sdp) |
445 | { | 488 | { |
446 | struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); | 489 | struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); |
@@ -477,19 +520,7 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp) | |||
477 | if (error) | 520 | if (error) |
478 | goto out_bh2; | 521 | goto out_bh2; |
479 | 522 | ||
480 | gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); | 523 | update_statfs(sdp, m_bh, l_bh); |
481 | |||
482 | spin_lock(&sdp->sd_statfs_spin); | ||
483 | m_sc->sc_total += l_sc->sc_total; | ||
484 | m_sc->sc_free += l_sc->sc_free; | ||
485 | m_sc->sc_dinodes += l_sc->sc_dinodes; | ||
486 | memset(l_sc, 0, sizeof(struct gfs2_statfs_change)); | ||
487 | memset(l_bh->b_data + sizeof(struct gfs2_dinode), | ||
488 | 0, sizeof(struct gfs2_statfs_change)); | ||
489 | spin_unlock(&sdp->sd_statfs_spin); | ||
490 | |||
491 | gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); | ||
492 | gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); | ||
493 | 524 | ||
494 | gfs2_trans_end(sdp); | 525 | gfs2_trans_end(sdp); |
495 | 526 | ||
@@ -680,6 +711,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | |||
680 | struct gfs2_holder t_gh; | 711 | struct gfs2_holder t_gh; |
681 | int error; | 712 | int error; |
682 | 713 | ||
714 | flush_workqueue(gfs2_delete_workqueue); | ||
683 | gfs2_quota_sync(sdp); | 715 | gfs2_quota_sync(sdp); |
684 | gfs2_statfs_sync(sdp); | 716 | gfs2_statfs_sync(sdp); |
685 | 717 | ||
@@ -756,7 +788,6 @@ restart: | |||
756 | /* Release stuff */ | 788 | /* Release stuff */ |
757 | 789 | ||
758 | iput(sdp->sd_jindex); | 790 | iput(sdp->sd_jindex); |
759 | iput(sdp->sd_inum_inode); | ||
760 | iput(sdp->sd_statfs_inode); | 791 | iput(sdp->sd_statfs_inode); |
761 | iput(sdp->sd_rindex); | 792 | iput(sdp->sd_rindex); |
762 | iput(sdp->sd_quota_inode); | 793 | iput(sdp->sd_quota_inode); |
@@ -767,10 +798,8 @@ restart: | |||
767 | if (!sdp->sd_args.ar_spectator) { | 798 | if (!sdp->sd_args.ar_spectator) { |
768 | gfs2_glock_dq_uninit(&sdp->sd_journal_gh); | 799 | gfs2_glock_dq_uninit(&sdp->sd_journal_gh); |
769 | gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); | 800 | gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); |
770 | gfs2_glock_dq_uninit(&sdp->sd_ir_gh); | ||
771 | gfs2_glock_dq_uninit(&sdp->sd_sc_gh); | 801 | gfs2_glock_dq_uninit(&sdp->sd_sc_gh); |
772 | gfs2_glock_dq_uninit(&sdp->sd_qc_gh); | 802 | gfs2_glock_dq_uninit(&sdp->sd_qc_gh); |
773 | iput(sdp->sd_ir_inode); | ||
774 | iput(sdp->sd_sc_inode); | 803 | iput(sdp->sd_sc_inode); |
775 | iput(sdp->sd_qc_inode); | 804 | iput(sdp->sd_qc_inode); |
776 | } | 805 | } |
@@ -1072,6 +1101,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
1072 | gt->gt_log_flush_secs = args.ar_commit; | 1101 | gt->gt_log_flush_secs = args.ar_commit; |
1073 | spin_unlock(>->gt_spin); | 1102 | spin_unlock(>->gt_spin); |
1074 | 1103 | ||
1104 | gfs2_online_uevent(sdp); | ||
1075 | return 0; | 1105 | return 0; |
1076 | } | 1106 | } |
1077 | 1107 | ||
@@ -1213,6 +1243,22 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1213 | lfsecs = sdp->sd_tune.gt_log_flush_secs; | 1243 | lfsecs = sdp->sd_tune.gt_log_flush_secs; |
1214 | if (lfsecs != 60) | 1244 | if (lfsecs != 60) |
1215 | seq_printf(s, ",commit=%d", lfsecs); | 1245 | seq_printf(s, ",commit=%d", lfsecs); |
1246 | if (args->ar_errors != GFS2_ERRORS_DEFAULT) { | ||
1247 | const char *state; | ||
1248 | |||
1249 | switch (args->ar_errors) { | ||
1250 | case GFS2_ERRORS_WITHDRAW: | ||
1251 | state = "withdraw"; | ||
1252 | break; | ||
1253 | case GFS2_ERRORS_PANIC: | ||
1254 | state = "panic"; | ||
1255 | break; | ||
1256 | default: | ||
1257 | state = "unknown"; | ||
1258 | break; | ||
1259 | } | ||
1260 | seq_printf(s, ",errors=%s", state); | ||
1261 | } | ||
1216 | return 0; | 1262 | return 0; |
1217 | } | 1263 | } |
1218 | 1264 | ||
@@ -1240,6 +1286,10 @@ static void gfs2_delete_inode(struct inode *inode) | |||
1240 | goto out; | 1286 | goto out; |
1241 | } | 1287 | } |
1242 | 1288 | ||
1289 | error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED); | ||
1290 | if (error) | ||
1291 | goto out_truncate; | ||
1292 | |||
1243 | gfs2_glock_dq_wait(&ip->i_iopen_gh); | 1293 | gfs2_glock_dq_wait(&ip->i_iopen_gh); |
1244 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); | 1294 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); |
1245 | error = gfs2_glock_nq(&ip->i_iopen_gh); | 1295 | error = gfs2_glock_nq(&ip->i_iopen_gh); |
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index b56413e3e40d..235db3682885 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h | |||
@@ -25,7 +25,7 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) | |||
25 | return x; | 25 | return x; |
26 | } | 26 | } |
27 | 27 | ||
28 | void gfs2_jindex_free(struct gfs2_sbd *sdp); | 28 | extern void gfs2_jindex_free(struct gfs2_sbd *sdp); |
29 | 29 | ||
30 | extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data); | 30 | extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data); |
31 | 31 | ||
@@ -36,10 +36,14 @@ extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, | |||
36 | struct gfs2_inode **ipp); | 36 | struct gfs2_inode **ipp); |
37 | 37 | ||
38 | extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp); | 38 | extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp); |
39 | 39 | extern void gfs2_online_uevent(struct gfs2_sbd *sdp); | |
40 | extern int gfs2_statfs_init(struct gfs2_sbd *sdp); | 40 | extern int gfs2_statfs_init(struct gfs2_sbd *sdp); |
41 | extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, | 41 | extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, |
42 | s64 dinodes); | 42 | s64 dinodes); |
43 | extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, | ||
44 | const void *buf); | ||
45 | extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, | ||
46 | struct buffer_head *l_bh); | ||
43 | extern int gfs2_statfs_sync(struct gfs2_sbd *sdp); | 47 | extern int gfs2_statfs_sync(struct gfs2_sbd *sdp); |
44 | 48 | ||
45 | extern int gfs2_freeze_fs(struct gfs2_sbd *sdp); | 49 | extern int gfs2_freeze_fs(struct gfs2_sbd *sdp); |
@@ -50,6 +54,7 @@ extern struct file_system_type gfs2meta_fs_type; | |||
50 | extern const struct export_operations gfs2_export_ops; | 54 | extern const struct export_operations gfs2_export_ops; |
51 | extern const struct super_operations gfs2_super_ops; | 55 | extern const struct super_operations gfs2_super_ops; |
52 | extern const struct dentry_operations gfs2_dops; | 56 | extern const struct dentry_operations gfs2_dops; |
57 | extern struct xattr_handler *gfs2_xattr_handlers[]; | ||
53 | 58 | ||
54 | #endif /* __SUPER_DOT_H__ */ | 59 | #endif /* __SUPER_DOT_H__ */ |
55 | 60 | ||
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 23419dc3027b..446329728d52 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/kobject.h> | 16 | #include <linux/kobject.h> |
17 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
18 | #include <linux/gfs2_ondisk.h> | 18 | #include <linux/gfs2_ondisk.h> |
19 | #include <linux/genhd.h> | ||
19 | 20 | ||
20 | #include "gfs2.h" | 21 | #include "gfs2.h" |
21 | #include "incore.h" | 22 | #include "incore.h" |
@@ -319,12 +320,6 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | |||
319 | return ret; | 320 | return ret; |
320 | } | 321 | } |
321 | 322 | ||
322 | static ssize_t lkid_show(struct gfs2_sbd *sdp, char *buf) | ||
323 | { | ||
324 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
325 | return sprintf(buf, "%u\n", ls->ls_id); | ||
326 | } | ||
327 | |||
328 | static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) | 323 | static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) |
329 | { | 324 | { |
330 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | 325 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; |
@@ -386,22 +381,20 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) | |||
386 | #define GDLM_ATTR(_name,_mode,_show,_store) \ | 381 | #define GDLM_ATTR(_name,_mode,_show,_store) \ |
387 | static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) | 382 | static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) |
388 | 383 | ||
389 | GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); | 384 | GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); |
390 | GDLM_ATTR(block, 0644, block_show, block_store); | 385 | GDLM_ATTR(block, 0644, block_show, block_store); |
391 | GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); | 386 | GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); |
392 | GDLM_ATTR(id, 0444, lkid_show, NULL); | 387 | GDLM_ATTR(jid, 0444, jid_show, NULL); |
393 | GDLM_ATTR(jid, 0444, jid_show, NULL); | 388 | GDLM_ATTR(first, 0444, lkfirst_show, NULL); |
394 | GDLM_ATTR(first, 0444, lkfirst_show, NULL); | 389 | GDLM_ATTR(first_done, 0444, first_done_show, NULL); |
395 | GDLM_ATTR(first_done, 0444, first_done_show, NULL); | 390 | GDLM_ATTR(recover, 0600, NULL, recover_store); |
396 | GDLM_ATTR(recover, 0200, NULL, recover_store); | 391 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); |
397 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); | 392 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); |
398 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); | ||
399 | 393 | ||
400 | static struct attribute *lock_module_attrs[] = { | 394 | static struct attribute *lock_module_attrs[] = { |
401 | &gdlm_attr_proto_name.attr, | 395 | &gdlm_attr_proto_name.attr, |
402 | &gdlm_attr_block.attr, | 396 | &gdlm_attr_block.attr, |
403 | &gdlm_attr_withdraw.attr, | 397 | &gdlm_attr_withdraw.attr, |
404 | &gdlm_attr_id.attr, | ||
405 | &gdlm_attr_jid.attr, | 398 | &gdlm_attr_jid.attr, |
406 | &gdlm_attr_first.attr, | 399 | &gdlm_attr_first.attr, |
407 | &gdlm_attr_first_done.attr, | 400 | &gdlm_attr_first_done.attr, |
@@ -519,7 +512,14 @@ static struct attribute_group lock_module_group = { | |||
519 | 512 | ||
520 | int gfs2_sys_fs_add(struct gfs2_sbd *sdp) | 513 | int gfs2_sys_fs_add(struct gfs2_sbd *sdp) |
521 | { | 514 | { |
515 | struct super_block *sb = sdp->sd_vfs; | ||
522 | int error; | 516 | int error; |
517 | char ro[20]; | ||
518 | char spectator[20]; | ||
519 | char *envp[] = { ro, spectator, NULL }; | ||
520 | |||
521 | sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0); | ||
522 | sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0); | ||
523 | 523 | ||
524 | sdp->sd_kobj.kset = gfs2_kset; | 524 | sdp->sd_kobj.kset = gfs2_kset; |
525 | error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL, | 525 | error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL, |
@@ -535,9 +535,17 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) | |||
535 | if (error) | 535 | if (error) |
536 | goto fail_tune; | 536 | goto fail_tune; |
537 | 537 | ||
538 | kobject_uevent(&sdp->sd_kobj, KOBJ_ADD); | 538 | error = sysfs_create_link(&sdp->sd_kobj, |
539 | &disk_to_dev(sb->s_bdev->bd_disk)->kobj, | ||
540 | "device"); | ||
541 | if (error) | ||
542 | goto fail_lock_module; | ||
543 | |||
544 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_ADD, envp); | ||
539 | return 0; | 545 | return 0; |
540 | 546 | ||
547 | fail_lock_module: | ||
548 | sysfs_remove_group(&sdp->sd_kobj, &lock_module_group); | ||
541 | fail_tune: | 549 | fail_tune: |
542 | sysfs_remove_group(&sdp->sd_kobj, &tune_group); | 550 | sysfs_remove_group(&sdp->sd_kobj, &tune_group); |
543 | fail_reg: | 551 | fail_reg: |
@@ -549,12 +557,12 @@ fail: | |||
549 | 557 | ||
550 | void gfs2_sys_fs_del(struct gfs2_sbd *sdp) | 558 | void gfs2_sys_fs_del(struct gfs2_sbd *sdp) |
551 | { | 559 | { |
560 | sysfs_remove_link(&sdp->sd_kobj, "device"); | ||
552 | sysfs_remove_group(&sdp->sd_kobj, &tune_group); | 561 | sysfs_remove_group(&sdp->sd_kobj, &tune_group); |
553 | sysfs_remove_group(&sdp->sd_kobj, &lock_module_group); | 562 | sysfs_remove_group(&sdp->sd_kobj, &lock_module_group); |
554 | kobject_put(&sdp->sd_kobj); | 563 | kobject_put(&sdp->sd_kobj); |
555 | } | 564 | } |
556 | 565 | ||
557 | |||
558 | static int gfs2_uevent(struct kset *kset, struct kobject *kobj, | 566 | static int gfs2_uevent(struct kset *kset, struct kobject *kobj, |
559 | struct kobj_uevent_env *env) | 567 | struct kobj_uevent_env *env) |
560 | { | 568 | { |
@@ -563,6 +571,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj, | |||
563 | 571 | ||
564 | add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); | 572 | add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); |
565 | add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); | 573 | add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); |
574 | if (!sdp->sd_args.ar_spectator) | ||
575 | add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid); | ||
566 | if (gfs2_uuid_valid(uuid)) { | 576 | if (gfs2_uuid_valid(uuid)) { |
567 | add_uevent_var(env, "UUID=%02X%02X%02X%02X-%02X%02X-%02X%02X-" | 577 | add_uevent_var(env, "UUID=%02X%02X%02X%02X-%02X%02X-%02X%02X-" |
568 | "%02X%02X-%02X%02X%02X%02X%02X%02X", | 578 | "%02X%02X-%02X%02X%02X%02X%02X%02X", |
@@ -578,7 +588,6 @@ static struct kset_uevent_ops gfs2_uevent_ops = { | |||
578 | .uevent = gfs2_uevent, | 588 | .uevent = gfs2_uevent, |
579 | }; | 589 | }; |
580 | 590 | ||
581 | |||
582 | int gfs2_sys_init(void) | 591 | int gfs2_sys_init(void) |
583 | { | 592 | { |
584 | gfs2_kset = kset_create_and_add("gfs2", &gfs2_uevent_ops, fs_kobj); | 593 | gfs2_kset = kset_create_and_add("gfs2", &gfs2_uevent_ops, fs_kobj); |
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 9d12b1118ba0..f6a7efa34eb9 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c | |||
@@ -38,24 +38,30 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | |||
38 | const struct lm_lockops *lm = ls->ls_ops; | 38 | const struct lm_lockops *lm = ls->ls_ops; |
39 | va_list args; | 39 | va_list args; |
40 | 40 | ||
41 | if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | 41 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && |
42 | test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
42 | return 0; | 43 | return 0; |
43 | 44 | ||
44 | va_start(args, fmt); | 45 | va_start(args, fmt); |
45 | vprintk(fmt, args); | 46 | vprintk(fmt, args); |
46 | va_end(args); | 47 | va_end(args); |
47 | 48 | ||
48 | fs_err(sdp, "about to withdraw this file system\n"); | 49 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { |
49 | BUG_ON(sdp->sd_args.ar_debug); | 50 | fs_err(sdp, "about to withdraw this file system\n"); |
51 | BUG_ON(sdp->sd_args.ar_debug); | ||
50 | 52 | ||
51 | kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); | 53 | kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); |
52 | 54 | ||
53 | if (lm->lm_unmount) { | 55 | if (lm->lm_unmount) { |
54 | fs_err(sdp, "telling LM to unmount\n"); | 56 | fs_err(sdp, "telling LM to unmount\n"); |
55 | lm->lm_unmount(sdp); | 57 | lm->lm_unmount(sdp); |
58 | } | ||
59 | fs_err(sdp, "withdrawn\n"); | ||
60 | dump_stack(); | ||
56 | } | 61 | } |
57 | fs_err(sdp, "withdrawn\n"); | 62 | |
58 | dump_stack(); | 63 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) |
64 | panic("GFS2: fsid=%s: panic requested.\n", sdp->sd_fsname); | ||
59 | 65 | ||
60 | return -1; | 66 | return -1; |
61 | } | 67 | } |
@@ -93,17 +99,24 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, | |||
93 | gfs2_tune_get(sdp, gt_complain_secs) * HZ)) | 99 | gfs2_tune_get(sdp, gt_complain_secs) * HZ)) |
94 | return -2; | 100 | return -2; |
95 | 101 | ||
96 | printk(KERN_WARNING | 102 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) |
97 | "GFS2: fsid=%s: warning: assertion \"%s\" failed\n" | 103 | printk(KERN_WARNING |
98 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 104 | "GFS2: fsid=%s: warning: assertion \"%s\" failed\n" |
99 | sdp->sd_fsname, assertion, | 105 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", |
100 | sdp->sd_fsname, function, file, line); | 106 | sdp->sd_fsname, assertion, |
107 | sdp->sd_fsname, function, file, line); | ||
101 | 108 | ||
102 | if (sdp->sd_args.ar_debug) | 109 | if (sdp->sd_args.ar_debug) |
103 | BUG(); | 110 | BUG(); |
104 | else | 111 | else |
105 | dump_stack(); | 112 | dump_stack(); |
106 | 113 | ||
114 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) | ||
115 | panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n" | ||
116 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | ||
117 | sdp->sd_fsname, assertion, | ||
118 | sdp->sd_fsname, function, file, line); | ||
119 | |||
107 | sdp->sd_last_warning = jiffies; | 120 | sdp->sd_last_warning = jiffies; |
108 | 121 | ||
109 | return -1; | 122 | return -1; |
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/xattr.c index 07ea9529adda..8a0f8ef6ee27 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/xattr.c | |||
@@ -18,8 +18,7 @@ | |||
18 | #include "gfs2.h" | 18 | #include "gfs2.h" |
19 | #include "incore.h" | 19 | #include "incore.h" |
20 | #include "acl.h" | 20 | #include "acl.h" |
21 | #include "eaops.h" | 21 | #include "xattr.h" |
22 | #include "eattr.h" | ||
23 | #include "glock.h" | 22 | #include "glock.h" |
24 | #include "inode.h" | 23 | #include "inode.h" |
25 | #include "meta_io.h" | 24 | #include "meta_io.h" |
@@ -38,26 +37,32 @@ | |||
38 | * Returns: 1 if the EA should be stuffed | 37 | * Returns: 1 if the EA should be stuffed |
39 | */ | 38 | */ |
40 | 39 | ||
41 | static int ea_calc_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er, | 40 | static int ea_calc_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize, |
42 | unsigned int *size) | 41 | unsigned int *size) |
43 | { | 42 | { |
44 | *size = GFS2_EAREQ_SIZE_STUFFED(er); | 43 | unsigned int jbsize = sdp->sd_jbsize; |
45 | if (*size <= sdp->sd_jbsize) | 44 | |
45 | /* Stuffed */ | ||
46 | *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize + dsize, 8); | ||
47 | |||
48 | if (*size <= jbsize) | ||
46 | return 1; | 49 | return 1; |
47 | 50 | ||
48 | *size = GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er); | 51 | /* Unstuffed */ |
52 | *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize + | ||
53 | (sizeof(__be64) * DIV_ROUND_UP(dsize, jbsize)), 8); | ||
49 | 54 | ||
50 | return 0; | 55 | return 0; |
51 | } | 56 | } |
52 | 57 | ||
53 | static int ea_check_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er) | 58 | static int ea_check_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize) |
54 | { | 59 | { |
55 | unsigned int size; | 60 | unsigned int size; |
56 | 61 | ||
57 | if (er->er_data_len > GFS2_EA_MAX_DATA_LEN) | 62 | if (dsize > GFS2_EA_MAX_DATA_LEN) |
58 | return -ERANGE; | 63 | return -ERANGE; |
59 | 64 | ||
60 | ea_calc_size(sdp, er, &size); | 65 | ea_calc_size(sdp, nsize, dsize, &size); |
61 | 66 | ||
62 | /* This can only happen with 512 byte blocks */ | 67 | /* This can only happen with 512 byte blocks */ |
63 | if (size > sdp->sd_jbsize) | 68 | if (size > sdp->sd_jbsize) |
@@ -151,7 +156,9 @@ out: | |||
151 | } | 156 | } |
152 | 157 | ||
153 | struct ea_find { | 158 | struct ea_find { |
154 | struct gfs2_ea_request *ef_er; | 159 | int type; |
160 | const char *name; | ||
161 | size_t namel; | ||
155 | struct gfs2_ea_location *ef_el; | 162 | struct gfs2_ea_location *ef_el; |
156 | }; | 163 | }; |
157 | 164 | ||
@@ -160,14 +167,13 @@ static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh, | |||
160 | void *private) | 167 | void *private) |
161 | { | 168 | { |
162 | struct ea_find *ef = private; | 169 | struct ea_find *ef = private; |
163 | struct gfs2_ea_request *er = ef->ef_er; | ||
164 | 170 | ||
165 | if (ea->ea_type == GFS2_EATYPE_UNUSED) | 171 | if (ea->ea_type == GFS2_EATYPE_UNUSED) |
166 | return 0; | 172 | return 0; |
167 | 173 | ||
168 | if (ea->ea_type == er->er_type) { | 174 | if (ea->ea_type == ef->type) { |
169 | if (ea->ea_name_len == er->er_name_len && | 175 | if (ea->ea_name_len == ef->namel && |
170 | !memcmp(GFS2_EA2NAME(ea), er->er_name, ea->ea_name_len)) { | 176 | !memcmp(GFS2_EA2NAME(ea), ef->name, ea->ea_name_len)) { |
171 | struct gfs2_ea_location *el = ef->ef_el; | 177 | struct gfs2_ea_location *el = ef->ef_el; |
172 | get_bh(bh); | 178 | get_bh(bh); |
173 | el->el_bh = bh; | 179 | el->el_bh = bh; |
@@ -180,13 +186,15 @@ static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh, | |||
180 | return 0; | 186 | return 0; |
181 | } | 187 | } |
182 | 188 | ||
183 | int gfs2_ea_find(struct gfs2_inode *ip, struct gfs2_ea_request *er, | 189 | int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, |
184 | struct gfs2_ea_location *el) | 190 | struct gfs2_ea_location *el) |
185 | { | 191 | { |
186 | struct ea_find ef; | 192 | struct ea_find ef; |
187 | int error; | 193 | int error; |
188 | 194 | ||
189 | ef.ef_er = er; | 195 | ef.type = type; |
196 | ef.name = name; | ||
197 | ef.namel = strlen(name); | ||
190 | ef.ef_el = el; | 198 | ef.ef_el = el; |
191 | 199 | ||
192 | memset(el, 0, sizeof(struct gfs2_ea_location)); | 200 | memset(el, 0, sizeof(struct gfs2_ea_location)); |
@@ -344,6 +352,20 @@ struct ea_list { | |||
344 | unsigned int ei_size; | 352 | unsigned int ei_size; |
345 | }; | 353 | }; |
346 | 354 | ||
355 | static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea) | ||
356 | { | ||
357 | switch (ea->ea_type) { | ||
358 | case GFS2_EATYPE_USR: | ||
359 | return 5 + ea->ea_name_len + 1; | ||
360 | case GFS2_EATYPE_SYS: | ||
361 | return 7 + ea->ea_name_len + 1; | ||
362 | case GFS2_EATYPE_SECURITY: | ||
363 | return 9 + ea->ea_name_len + 1; | ||
364 | default: | ||
365 | return 0; | ||
366 | } | ||
367 | } | ||
368 | |||
347 | static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh, | 369 | static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh, |
348 | struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, | 370 | struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, |
349 | void *private) | 371 | void *private) |
@@ -392,21 +414,25 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh, | |||
392 | } | 414 | } |
393 | 415 | ||
394 | /** | 416 | /** |
395 | * gfs2_ea_list - | 417 | * gfs2_listxattr - List gfs2 extended attributes |
396 | * @ip: | 418 | * @dentry: The dentry whose inode we are interested in |
397 | * @er: | 419 | * @buffer: The buffer to write the results |
420 | * @size: The size of the buffer | ||
398 | * | 421 | * |
399 | * Returns: actual size of data on success, -errno on error | 422 | * Returns: actual size of data on success, -errno on error |
400 | */ | 423 | */ |
401 | 424 | ||
402 | int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er) | 425 | ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) |
403 | { | 426 | { |
427 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | ||
428 | struct gfs2_ea_request er; | ||
404 | struct gfs2_holder i_gh; | 429 | struct gfs2_holder i_gh; |
405 | int error; | 430 | int error; |
406 | 431 | ||
407 | if (!er->er_data || !er->er_data_len) { | 432 | memset(&er, 0, sizeof(struct gfs2_ea_request)); |
408 | er->er_data = NULL; | 433 | if (size) { |
409 | er->er_data_len = 0; | 434 | er.er_data = buffer; |
435 | er.er_data_len = size; | ||
410 | } | 436 | } |
411 | 437 | ||
412 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | 438 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); |
@@ -414,7 +440,7 @@ int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er) | |||
414 | return error; | 440 | return error; |
415 | 441 | ||
416 | if (ip->i_eattr) { | 442 | if (ip->i_eattr) { |
417 | struct ea_list ei = { .ei_er = er, .ei_size = 0 }; | 443 | struct ea_list ei = { .ei_er = &er, .ei_size = 0 }; |
418 | 444 | ||
419 | error = ea_foreach(ip, ea_list_i, &ei); | 445 | error = ea_foreach(ip, ea_list_i, &ei); |
420 | if (!error) | 446 | if (!error) |
@@ -491,84 +517,61 @@ out: | |||
491 | } | 517 | } |
492 | 518 | ||
493 | int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, | 519 | int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, |
494 | char *data) | 520 | char *data, size_t size) |
495 | { | 521 | { |
522 | int ret; | ||
523 | size_t len = GFS2_EA_DATA_LEN(el->el_ea); | ||
524 | if (len > size) | ||
525 | return -ERANGE; | ||
526 | |||
496 | if (GFS2_EA_IS_STUFFED(el->el_ea)) { | 527 | if (GFS2_EA_IS_STUFFED(el->el_ea)) { |
497 | memcpy(data, GFS2_EA2DATA(el->el_ea), GFS2_EA_DATA_LEN(el->el_ea)); | 528 | memcpy(data, GFS2_EA2DATA(el->el_ea), len); |
498 | return 0; | 529 | return len; |
499 | } else | 530 | } |
500 | return ea_get_unstuffed(ip, el->el_ea, data); | 531 | ret = ea_get_unstuffed(ip, el->el_ea, data); |
532 | if (ret < 0) | ||
533 | return ret; | ||
534 | return len; | ||
501 | } | 535 | } |
502 | 536 | ||
503 | /** | 537 | /** |
504 | * gfs2_ea_get_i - | 538 | * gfs2_xattr_get - Get a GFS2 extended attribute |
505 | * @ip: The GFS2 inode | 539 | * @inode: The inode |
506 | * @er: The request structure | 540 | * @type: The type of extended attribute |
541 | * @name: The name of the extended attribute | ||
542 | * @buffer: The buffer to write the result into | ||
543 | * @size: The size of the buffer | ||
507 | * | 544 | * |
508 | * Returns: actual size of data on success, -errno on error | 545 | * Returns: actual size of data on success, -errno on error |
509 | */ | 546 | */ |
510 | 547 | ||
511 | int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) | 548 | int gfs2_xattr_get(struct inode *inode, int type, const char *name, |
549 | void *buffer, size_t size) | ||
512 | { | 550 | { |
551 | struct gfs2_inode *ip = GFS2_I(inode); | ||
513 | struct gfs2_ea_location el; | 552 | struct gfs2_ea_location el; |
514 | int error; | 553 | int error; |
515 | 554 | ||
516 | if (!ip->i_eattr) | 555 | if (!ip->i_eattr) |
517 | return -ENODATA; | 556 | return -ENODATA; |
557 | if (strlen(name) > GFS2_EA_MAX_NAME_LEN) | ||
558 | return -EINVAL; | ||
518 | 559 | ||
519 | error = gfs2_ea_find(ip, er, &el); | 560 | error = gfs2_ea_find(ip, type, name, &el); |
520 | if (error) | 561 | if (error) |
521 | return error; | 562 | return error; |
522 | if (!el.el_ea) | 563 | if (!el.el_ea) |
523 | return -ENODATA; | 564 | return -ENODATA; |
524 | 565 | if (size) | |
525 | if (er->er_data_len) { | 566 | error = gfs2_ea_get_copy(ip, &el, buffer, size); |
526 | if (GFS2_EA_DATA_LEN(el.el_ea) > er->er_data_len) | 567 | else |
527 | error = -ERANGE; | ||
528 | else | ||
529 | error = gfs2_ea_get_copy(ip, &el, er->er_data); | ||
530 | } | ||
531 | if (!error) | ||
532 | error = GFS2_EA_DATA_LEN(el.el_ea); | 568 | error = GFS2_EA_DATA_LEN(el.el_ea); |
533 | |||
534 | brelse(el.el_bh); | 569 | brelse(el.el_bh); |
535 | 570 | ||
536 | return error; | 571 | return error; |
537 | } | 572 | } |
538 | 573 | ||
539 | /** | 574 | /** |
540 | * gfs2_ea_get - | ||
541 | * @ip: The GFS2 inode | ||
542 | * @er: The request structure | ||
543 | * | ||
544 | * Returns: actual size of data on success, -errno on error | ||
545 | */ | ||
546 | |||
547 | int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
548 | { | ||
549 | struct gfs2_holder i_gh; | ||
550 | int error; | ||
551 | |||
552 | if (!er->er_name_len || | ||
553 | er->er_name_len > GFS2_EA_MAX_NAME_LEN) | ||
554 | return -EINVAL; | ||
555 | if (!er->er_data || !er->er_data_len) { | ||
556 | er->er_data = NULL; | ||
557 | er->er_data_len = 0; | ||
558 | } | ||
559 | |||
560 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | ||
561 | if (error) | ||
562 | return error; | ||
563 | |||
564 | error = gfs2_ea_ops[er->er_type]->eo_get(ip, er); | ||
565 | |||
566 | gfs2_glock_dq_uninit(&i_gh); | ||
567 | |||
568 | return error; | ||
569 | } | ||
570 | |||
571 | /** | ||
572 | * ea_alloc_blk - allocates a new block for extended attributes. | 575 | * ea_alloc_blk - allocates a new block for extended attributes. |
573 | * @ip: A pointer to the inode that's getting extended attributes | 576 | * @ip: A pointer to the inode that's getting extended attributes |
574 | * @bhp: Pointer to pointer to a struct buffer_head | 577 | * @bhp: Pointer to pointer to a struct buffer_head |
@@ -713,12 +716,6 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
713 | 716 | ||
714 | error = gfs2_meta_inode_buffer(ip, &dibh); | 717 | error = gfs2_meta_inode_buffer(ip, &dibh); |
715 | if (!error) { | 718 | if (!error) { |
716 | if (er->er_flags & GFS2_ERF_MODE) { | ||
717 | gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), | ||
718 | (ip->i_inode.i_mode & S_IFMT) == | ||
719 | (er->er_mode & S_IFMT)); | ||
720 | ip->i_inode.i_mode = er->er_mode; | ||
721 | } | ||
722 | ip->i_inode.i_ctime = CURRENT_TIME; | 719 | ip->i_inode.i_ctime = CURRENT_TIME; |
723 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 720 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
724 | gfs2_dinode_out(ip, dibh->b_data); | 721 | gfs2_dinode_out(ip, dibh->b_data); |
@@ -762,15 +759,23 @@ static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
762 | * Returns: errno | 759 | * Returns: errno |
763 | */ | 760 | */ |
764 | 761 | ||
765 | static int ea_init(struct gfs2_inode *ip, struct gfs2_ea_request *er) | 762 | static int ea_init(struct gfs2_inode *ip, int type, const char *name, |
763 | const void *data, size_t size) | ||
766 | { | 764 | { |
765 | struct gfs2_ea_request er; | ||
767 | unsigned int jbsize = GFS2_SB(&ip->i_inode)->sd_jbsize; | 766 | unsigned int jbsize = GFS2_SB(&ip->i_inode)->sd_jbsize; |
768 | unsigned int blks = 1; | 767 | unsigned int blks = 1; |
769 | 768 | ||
770 | if (GFS2_EAREQ_SIZE_STUFFED(er) > jbsize) | 769 | er.er_type = type; |
771 | blks += DIV_ROUND_UP(er->er_data_len, jbsize); | 770 | er.er_name = name; |
771 | er.er_name_len = strlen(name); | ||
772 | er.er_data = (void *)data; | ||
773 | er.er_data_len = size; | ||
774 | |||
775 | if (GFS2_EAREQ_SIZE_STUFFED(&er) > jbsize) | ||
776 | blks += DIV_ROUND_UP(er.er_data_len, jbsize); | ||
772 | 777 | ||
773 | return ea_alloc_skeleton(ip, er, blks, ea_init_i, NULL); | 778 | return ea_alloc_skeleton(ip, &er, blks, ea_init_i, NULL); |
774 | } | 779 | } |
775 | 780 | ||
776 | static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea) | 781 | static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea) |
@@ -848,12 +853,6 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, | |||
848 | error = gfs2_meta_inode_buffer(ip, &dibh); | 853 | error = gfs2_meta_inode_buffer(ip, &dibh); |
849 | if (error) | 854 | if (error) |
850 | goto out; | 855 | goto out; |
851 | |||
852 | if (er->er_flags & GFS2_ERF_MODE) { | ||
853 | gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), | ||
854 | (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); | ||
855 | ip->i_inode.i_mode = er->er_mode; | ||
856 | } | ||
857 | ip->i_inode.i_ctime = CURRENT_TIME; | 856 | ip->i_inode.i_ctime = CURRENT_TIME; |
858 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 857 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
859 | gfs2_dinode_out(ip, dibh->b_data); | 858 | gfs2_dinode_out(ip, dibh->b_data); |
@@ -894,7 +893,8 @@ static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh, | |||
894 | int stuffed; | 893 | int stuffed; |
895 | int error; | 894 | int error; |
896 | 895 | ||
897 | stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er, &size); | 896 | stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er->er_name_len, |
897 | es->es_er->er_data_len, &size); | ||
898 | 898 | ||
899 | if (ea->ea_type == GFS2_EATYPE_UNUSED) { | 899 | if (ea->ea_type == GFS2_EATYPE_UNUSED) { |
900 | if (GFS2_EA_REC_LEN(ea) < size) | 900 | if (GFS2_EA_REC_LEN(ea) < size) |
@@ -1005,15 +1005,22 @@ out: | |||
1005 | return error; | 1005 | return error; |
1006 | } | 1006 | } |
1007 | 1007 | ||
1008 | static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er, | 1008 | static int ea_set_i(struct gfs2_inode *ip, int type, const char *name, |
1009 | struct gfs2_ea_location *el) | 1009 | const void *value, size_t size, struct gfs2_ea_location *el) |
1010 | { | 1010 | { |
1011 | struct gfs2_ea_request er; | ||
1011 | struct ea_set es; | 1012 | struct ea_set es; |
1012 | unsigned int blks = 2; | 1013 | unsigned int blks = 2; |
1013 | int error; | 1014 | int error; |
1014 | 1015 | ||
1016 | er.er_type = type; | ||
1017 | er.er_name = name; | ||
1018 | er.er_data = (void *)value; | ||
1019 | er.er_name_len = strlen(name); | ||
1020 | er.er_data_len = size; | ||
1021 | |||
1015 | memset(&es, 0, sizeof(struct ea_set)); | 1022 | memset(&es, 0, sizeof(struct ea_set)); |
1016 | es.es_er = er; | 1023 | es.es_er = &er; |
1017 | es.es_el = el; | 1024 | es.es_el = el; |
1018 | 1025 | ||
1019 | error = ea_foreach(ip, ea_set_simple, &es); | 1026 | error = ea_foreach(ip, ea_set_simple, &es); |
@@ -1024,10 +1031,10 @@ static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
1024 | 1031 | ||
1025 | if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT)) | 1032 | if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT)) |
1026 | blks++; | 1033 | blks++; |
1027 | if (GFS2_EAREQ_SIZE_STUFFED(er) > GFS2_SB(&ip->i_inode)->sd_jbsize) | 1034 | if (GFS2_EAREQ_SIZE_STUFFED(&er) > GFS2_SB(&ip->i_inode)->sd_jbsize) |
1028 | blks += DIV_ROUND_UP(er->er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize); | 1035 | blks += DIV_ROUND_UP(er.er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize); |
1029 | 1036 | ||
1030 | return ea_alloc_skeleton(ip, er, blks, ea_set_block, el); | 1037 | return ea_alloc_skeleton(ip, &er, blks, ea_set_block, el); |
1031 | } | 1038 | } |
1032 | 1039 | ||
1033 | static int ea_set_remove_unstuffed(struct gfs2_inode *ip, | 1040 | static int ea_set_remove_unstuffed(struct gfs2_inode *ip, |
@@ -1039,75 +1046,7 @@ static int ea_set_remove_unstuffed(struct gfs2_inode *ip, | |||
1039 | GFS2_EA2NEXT(el->el_prev) == el->el_ea); | 1046 | GFS2_EA2NEXT(el->el_prev) == el->el_ea); |
1040 | } | 1047 | } |
1041 | 1048 | ||
1042 | return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev,0); | 1049 | return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev, 0); |
1043 | } | ||
1044 | |||
1045 | int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
1046 | { | ||
1047 | struct gfs2_ea_location el; | ||
1048 | int error; | ||
1049 | |||
1050 | if (!ip->i_eattr) { | ||
1051 | if (er->er_flags & XATTR_REPLACE) | ||
1052 | return -ENODATA; | ||
1053 | return ea_init(ip, er); | ||
1054 | } | ||
1055 | |||
1056 | error = gfs2_ea_find(ip, er, &el); | ||
1057 | if (error) | ||
1058 | return error; | ||
1059 | |||
1060 | if (el.el_ea) { | ||
1061 | if (ip->i_diskflags & GFS2_DIF_APPENDONLY) { | ||
1062 | brelse(el.el_bh); | ||
1063 | return -EPERM; | ||
1064 | } | ||
1065 | |||
1066 | error = -EEXIST; | ||
1067 | if (!(er->er_flags & XATTR_CREATE)) { | ||
1068 | int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea); | ||
1069 | error = ea_set_i(ip, er, &el); | ||
1070 | if (!error && unstuffed) | ||
1071 | ea_set_remove_unstuffed(ip, &el); | ||
1072 | } | ||
1073 | |||
1074 | brelse(el.el_bh); | ||
1075 | } else { | ||
1076 | error = -ENODATA; | ||
1077 | if (!(er->er_flags & XATTR_REPLACE)) | ||
1078 | error = ea_set_i(ip, er, NULL); | ||
1079 | } | ||
1080 | |||
1081 | return error; | ||
1082 | } | ||
1083 | |||
1084 | int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
1085 | { | ||
1086 | struct gfs2_holder i_gh; | ||
1087 | int error; | ||
1088 | |||
1089 | if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN) | ||
1090 | return -EINVAL; | ||
1091 | if (!er->er_data || !er->er_data_len) { | ||
1092 | er->er_data = NULL; | ||
1093 | er->er_data_len = 0; | ||
1094 | } | ||
1095 | error = ea_check_size(GFS2_SB(&ip->i_inode), er); | ||
1096 | if (error) | ||
1097 | return error; | ||
1098 | |||
1099 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); | ||
1100 | if (error) | ||
1101 | return error; | ||
1102 | |||
1103 | if (IS_IMMUTABLE(&ip->i_inode)) | ||
1104 | error = -EPERM; | ||
1105 | else | ||
1106 | error = gfs2_ea_ops[er->er_type]->eo_set(ip, er); | ||
1107 | |||
1108 | gfs2_glock_dq_uninit(&i_gh); | ||
1109 | |||
1110 | return error; | ||
1111 | } | 1050 | } |
1112 | 1051 | ||
1113 | static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) | 1052 | static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) |
@@ -1131,8 +1070,9 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) | |||
1131 | 1070 | ||
1132 | if (GFS2_EA_IS_LAST(ea)) | 1071 | if (GFS2_EA_IS_LAST(ea)) |
1133 | prev->ea_flags |= GFS2_EAFLAG_LAST; | 1072 | prev->ea_flags |= GFS2_EAFLAG_LAST; |
1134 | } else | 1073 | } else { |
1135 | ea->ea_type = GFS2_EATYPE_UNUSED; | 1074 | ea->ea_type = GFS2_EATYPE_UNUSED; |
1075 | } | ||
1136 | 1076 | ||
1137 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1077 | error = gfs2_meta_inode_buffer(ip, &dibh); |
1138 | if (!error) { | 1078 | if (!error) { |
@@ -1147,15 +1087,29 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) | |||
1147 | return error; | 1087 | return error; |
1148 | } | 1088 | } |
1149 | 1089 | ||
1150 | int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) | 1090 | /** |
1091 | * gfs2_xattr_remove - Remove a GFS2 extended attribute | ||
1092 | * @inode: The inode | ||
1093 | * @type: The type of the extended attribute | ||
1094 | * @name: The name of the extended attribute | ||
1095 | * | ||
1096 | * This is not called directly by the VFS since we use the (common) | ||
1097 | * scheme of making a "set with NULL data" mean a remove request. Note | ||
1098 | * that this is different from a set with zero length data. | ||
1099 | * | ||
1100 | * Returns: 0, or errno on failure | ||
1101 | */ | ||
1102 | |||
1103 | static int gfs2_xattr_remove(struct inode *inode, int type, const char *name) | ||
1151 | { | 1104 | { |
1105 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1152 | struct gfs2_ea_location el; | 1106 | struct gfs2_ea_location el; |
1153 | int error; | 1107 | int error; |
1154 | 1108 | ||
1155 | if (!ip->i_eattr) | 1109 | if (!ip->i_eattr) |
1156 | return -ENODATA; | 1110 | return -ENODATA; |
1157 | 1111 | ||
1158 | error = gfs2_ea_find(ip, er, &el); | 1112 | error = gfs2_ea_find(ip, type, name, &el); |
1159 | if (error) | 1113 | if (error) |
1160 | return error; | 1114 | return error; |
1161 | if (!el.el_ea) | 1115 | if (!el.el_ea) |
@@ -1164,8 +1118,7 @@ int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) | |||
1164 | if (GFS2_EA_IS_STUFFED(el.el_ea)) | 1118 | if (GFS2_EA_IS_STUFFED(el.el_ea)) |
1165 | error = ea_remove_stuffed(ip, &el); | 1119 | error = ea_remove_stuffed(ip, &el); |
1166 | else | 1120 | else |
1167 | error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev, | 1121 | error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev, 0); |
1168 | 0); | ||
1169 | 1122 | ||
1170 | brelse(el.el_bh); | 1123 | brelse(el.el_bh); |
1171 | 1124 | ||
@@ -1173,31 +1126,70 @@ int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) | |||
1173 | } | 1126 | } |
1174 | 1127 | ||
1175 | /** | 1128 | /** |
1176 | * gfs2_ea_remove - sets (or creates or replaces) an extended attribute | 1129 | * gfs2_xattr_set - Set (or remove) a GFS2 extended attribute |
1177 | * @ip: pointer to the inode of the target file | 1130 | * @inode: The inode |
1178 | * @er: request information | 1131 | * @type: The type of the extended attribute |
1132 | * @name: The name of the extended attribute | ||
1133 | * @value: The value of the extended attribute (NULL for remove) | ||
1134 | * @size: The size of the @value argument | ||
1135 | * @flags: Create or Replace | ||
1179 | * | 1136 | * |
1180 | * Returns: errno | 1137 | * See gfs2_xattr_remove() for details of the removal of xattrs. |
1138 | * | ||
1139 | * Returns: 0 or errno on failure | ||
1181 | */ | 1140 | */ |
1182 | 1141 | ||
1183 | int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) | 1142 | int gfs2_xattr_set(struct inode *inode, int type, const char *name, |
1143 | const void *value, size_t size, int flags) | ||
1184 | { | 1144 | { |
1185 | struct gfs2_holder i_gh; | 1145 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
1146 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1147 | struct gfs2_ea_location el; | ||
1148 | unsigned int namel = strlen(name); | ||
1186 | int error; | 1149 | int error; |
1187 | 1150 | ||
1188 | if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN) | 1151 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) |
1189 | return -EINVAL; | 1152 | return -EPERM; |
1153 | if (namel > GFS2_EA_MAX_NAME_LEN) | ||
1154 | return -ERANGE; | ||
1190 | 1155 | ||
1191 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); | 1156 | if (value == NULL) |
1157 | return gfs2_xattr_remove(inode, type, name); | ||
1158 | |||
1159 | if (ea_check_size(sdp, namel, size)) | ||
1160 | return -ERANGE; | ||
1161 | |||
1162 | if (!ip->i_eattr) { | ||
1163 | if (flags & XATTR_REPLACE) | ||
1164 | return -ENODATA; | ||
1165 | return ea_init(ip, type, name, value, size); | ||
1166 | } | ||
1167 | |||
1168 | error = gfs2_ea_find(ip, type, name, &el); | ||
1192 | if (error) | 1169 | if (error) |
1193 | return error; | 1170 | return error; |
1194 | 1171 | ||
1195 | if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) | 1172 | if (el.el_ea) { |
1196 | error = -EPERM; | 1173 | if (ip->i_diskflags & GFS2_DIF_APPENDONLY) { |
1197 | else | 1174 | brelse(el.el_bh); |
1198 | error = gfs2_ea_ops[er->er_type]->eo_remove(ip, er); | 1175 | return -EPERM; |
1176 | } | ||
1199 | 1177 | ||
1200 | gfs2_glock_dq_uninit(&i_gh); | 1178 | error = -EEXIST; |
1179 | if (!(flags & XATTR_CREATE)) { | ||
1180 | int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea); | ||
1181 | error = ea_set_i(ip, type, name, value, size, &el); | ||
1182 | if (!error && unstuffed) | ||
1183 | ea_set_remove_unstuffed(ip, &el); | ||
1184 | } | ||
1185 | |||
1186 | brelse(el.el_bh); | ||
1187 | return error; | ||
1188 | } | ||
1189 | |||
1190 | error = -ENODATA; | ||
1191 | if (!(flags & XATTR_REPLACE)) | ||
1192 | error = ea_set_i(ip, type, name, value, size, NULL); | ||
1201 | 1193 | ||
1202 | return error; | 1194 | return error; |
1203 | } | 1195 | } |
@@ -1503,3 +1495,64 @@ out_alloc: | |||
1503 | return error; | 1495 | return error; |
1504 | } | 1496 | } |
1505 | 1497 | ||
1498 | static int gfs2_xattr_user_get(struct inode *inode, const char *name, | ||
1499 | void *buffer, size_t size) | ||
1500 | { | ||
1501 | return gfs2_xattr_get(inode, GFS2_EATYPE_USR, name, buffer, size); | ||
1502 | } | ||
1503 | |||
1504 | static int gfs2_xattr_user_set(struct inode *inode, const char *name, | ||
1505 | const void *value, size_t size, int flags) | ||
1506 | { | ||
1507 | return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags); | ||
1508 | } | ||
1509 | |||
1510 | static int gfs2_xattr_system_get(struct inode *inode, const char *name, | ||
1511 | void *buffer, size_t size) | ||
1512 | { | ||
1513 | return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size); | ||
1514 | } | ||
1515 | |||
1516 | static int gfs2_xattr_system_set(struct inode *inode, const char *name, | ||
1517 | const void *value, size_t size, int flags) | ||
1518 | { | ||
1519 | return gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, flags); | ||
1520 | } | ||
1521 | |||
1522 | static int gfs2_xattr_security_get(struct inode *inode, const char *name, | ||
1523 | void *buffer, size_t size) | ||
1524 | { | ||
1525 | return gfs2_xattr_get(inode, GFS2_EATYPE_SECURITY, name, buffer, size); | ||
1526 | } | ||
1527 | |||
1528 | static int gfs2_xattr_security_set(struct inode *inode, const char *name, | ||
1529 | const void *value, size_t size, int flags) | ||
1530 | { | ||
1531 | return gfs2_xattr_set(inode, GFS2_EATYPE_SECURITY, name, value, size, flags); | ||
1532 | } | ||
1533 | |||
1534 | static struct xattr_handler gfs2_xattr_user_handler = { | ||
1535 | .prefix = XATTR_USER_PREFIX, | ||
1536 | .get = gfs2_xattr_user_get, | ||
1537 | .set = gfs2_xattr_user_set, | ||
1538 | }; | ||
1539 | |||
1540 | static struct xattr_handler gfs2_xattr_security_handler = { | ||
1541 | .prefix = XATTR_SECURITY_PREFIX, | ||
1542 | .get = gfs2_xattr_security_get, | ||
1543 | .set = gfs2_xattr_security_set, | ||
1544 | }; | ||
1545 | |||
1546 | static struct xattr_handler gfs2_xattr_system_handler = { | ||
1547 | .prefix = XATTR_SYSTEM_PREFIX, | ||
1548 | .get = gfs2_xattr_system_get, | ||
1549 | .set = gfs2_xattr_system_set, | ||
1550 | }; | ||
1551 | |||
1552 | struct xattr_handler *gfs2_xattr_handlers[] = { | ||
1553 | &gfs2_xattr_user_handler, | ||
1554 | &gfs2_xattr_security_handler, | ||
1555 | &gfs2_xattr_system_handler, | ||
1556 | NULL, | ||
1557 | }; | ||
1558 | |||
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/xattr.h index c82dbe01d713..cbdfd7743733 100644 --- a/fs/gfs2/eattr.h +++ b/fs/gfs2/xattr.h | |||
@@ -19,7 +19,7 @@ struct iattr; | |||
19 | #define GFS2_EA_SIZE(ea) \ | 19 | #define GFS2_EA_SIZE(ea) \ |
20 | ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \ | 20 | ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \ |
21 | ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \ | 21 | ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \ |
22 | (sizeof(__be64) * (ea)->ea_num_ptrs)), 8) | 22 | (sizeof(__be64) * (ea)->ea_num_ptrs)), 8) |
23 | 23 | ||
24 | #define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs) | 24 | #define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs) |
25 | #define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST) | 25 | #define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST) |
@@ -27,10 +27,6 @@ ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \ | |||
27 | #define GFS2_EAREQ_SIZE_STUFFED(er) \ | 27 | #define GFS2_EAREQ_SIZE_STUFFED(er) \ |
28 | ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8) | 28 | ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8) |
29 | 29 | ||
30 | #define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \ | ||
31 | ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \ | ||
32 | sizeof(__be64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8) | ||
33 | |||
34 | #define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1)) | 30 | #define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1)) |
35 | #define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len) | 31 | #define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len) |
36 | 32 | ||
@@ -43,16 +39,12 @@ ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \ | |||
43 | #define GFS2_EA_BH2FIRST(bh) \ | 39 | #define GFS2_EA_BH2FIRST(bh) \ |
44 | ((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header))) | 40 | ((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header))) |
45 | 41 | ||
46 | #define GFS2_ERF_MODE 0x80000000 | ||
47 | |||
48 | struct gfs2_ea_request { | 42 | struct gfs2_ea_request { |
49 | const char *er_name; | 43 | const char *er_name; |
50 | char *er_data; | 44 | char *er_data; |
51 | unsigned int er_name_len; | 45 | unsigned int er_name_len; |
52 | unsigned int er_data_len; | 46 | unsigned int er_data_len; |
53 | unsigned int er_type; /* GFS2_EATYPE_... */ | 47 | unsigned int er_type; /* GFS2_EATYPE_... */ |
54 | int er_flags; | ||
55 | mode_t er_mode; | ||
56 | }; | 48 | }; |
57 | 49 | ||
58 | struct gfs2_ea_location { | 50 | struct gfs2_ea_location { |
@@ -61,40 +53,20 @@ struct gfs2_ea_location { | |||
61 | struct gfs2_ea_header *el_prev; | 53 | struct gfs2_ea_header *el_prev; |
62 | }; | 54 | }; |
63 | 55 | ||
64 | int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er); | 56 | extern int gfs2_xattr_get(struct inode *inode, int type, const char *name, |
65 | int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er); | 57 | void *buffer, size_t size); |
66 | int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er); | 58 | extern int gfs2_xattr_set(struct inode *inode, int type, const char *name, |
67 | 59 | const void *value, size_t size, int flags); | |
68 | int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er); | 60 | extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); |
69 | int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er); | 61 | extern int gfs2_ea_dealloc(struct gfs2_inode *ip); |
70 | int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
71 | int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
72 | |||
73 | int gfs2_ea_dealloc(struct gfs2_inode *ip); | ||
74 | 62 | ||
75 | /* Exported to acl.c */ | 63 | /* Exported to acl.c */ |
76 | 64 | ||
77 | int gfs2_ea_find(struct gfs2_inode *ip, | 65 | extern int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, |
78 | struct gfs2_ea_request *er, | 66 | struct gfs2_ea_location *el); |
79 | struct gfs2_ea_location *el); | 67 | extern int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, |
80 | int gfs2_ea_get_copy(struct gfs2_inode *ip, | 68 | char *data, size_t size); |
81 | struct gfs2_ea_location *el, | 69 | extern int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, |
82 | char *data); | 70 | struct iattr *attr, char *data); |
83 | int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, | ||
84 | struct iattr *attr, char *data); | ||
85 | |||
86 | static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea) | ||
87 | { | ||
88 | switch (ea->ea_type) { | ||
89 | case GFS2_EATYPE_USR: | ||
90 | return 5 + ea->ea_name_len + 1; | ||
91 | case GFS2_EATYPE_SYS: | ||
92 | return 7 + ea->ea_name_len + 1; | ||
93 | case GFS2_EATYPE_SECURITY: | ||
94 | return 9 + ea->ea_name_len + 1; | ||
95 | default: | ||
96 | return 0; | ||
97 | } | ||
98 | } | ||
99 | 71 | ||
100 | #endif /* __EATTR_DOT_H__ */ | 72 | #endif /* __EATTR_DOT_H__ */ |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 941c8425c10b..a93b885311d8 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -44,6 +44,7 @@ static const struct inode_operations hugetlbfs_dir_inode_operations; | |||
44 | static const struct inode_operations hugetlbfs_inode_operations; | 44 | static const struct inode_operations hugetlbfs_inode_operations; |
45 | 45 | ||
46 | static struct backing_dev_info hugetlbfs_backing_dev_info = { | 46 | static struct backing_dev_info hugetlbfs_backing_dev_info = { |
47 | .name = "hugetlbfs", | ||
47 | .ra_pages = 0, /* No readahead */ | 48 | .ra_pages = 0, /* No readahead */ |
48 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 49 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
49 | }; | 50 | }; |
@@ -935,26 +936,28 @@ static int can_do_hugetlb_shm(void) | |||
935 | return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); | 936 | return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); |
936 | } | 937 | } |
937 | 938 | ||
938 | struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag) | 939 | struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, |
940 | struct user_struct **user) | ||
939 | { | 941 | { |
940 | int error = -ENOMEM; | 942 | int error = -ENOMEM; |
941 | int unlock_shm = 0; | ||
942 | struct file *file; | 943 | struct file *file; |
943 | struct inode *inode; | 944 | struct inode *inode; |
944 | struct dentry *dentry, *root; | 945 | struct dentry *dentry, *root; |
945 | struct qstr quick_string; | 946 | struct qstr quick_string; |
946 | struct user_struct *user = current_user(); | ||
947 | 947 | ||
948 | *user = NULL; | ||
948 | if (!hugetlbfs_vfsmount) | 949 | if (!hugetlbfs_vfsmount) |
949 | return ERR_PTR(-ENOENT); | 950 | return ERR_PTR(-ENOENT); |
950 | 951 | ||
951 | if (!can_do_hugetlb_shm()) { | 952 | if (!can_do_hugetlb_shm()) { |
952 | if (user_shm_lock(size, user)) { | 953 | *user = current_user(); |
953 | unlock_shm = 1; | 954 | if (user_shm_lock(size, *user)) { |
954 | WARN_ONCE(1, | 955 | WARN_ONCE(1, |
955 | "Using mlock ulimits for SHM_HUGETLB deprecated\n"); | 956 | "Using mlock ulimits for SHM_HUGETLB deprecated\n"); |
956 | } else | 957 | } else { |
958 | *user = NULL; | ||
957 | return ERR_PTR(-EPERM); | 959 | return ERR_PTR(-EPERM); |
960 | } | ||
958 | } | 961 | } |
959 | 962 | ||
960 | root = hugetlbfs_vfsmount->mnt_root; | 963 | root = hugetlbfs_vfsmount->mnt_root; |
@@ -996,8 +999,10 @@ out_inode: | |||
996 | out_dentry: | 999 | out_dentry: |
997 | dput(dentry); | 1000 | dput(dentry); |
998 | out_shm_unlock: | 1001 | out_shm_unlock: |
999 | if (unlock_shm) | 1002 | if (*user) { |
1000 | user_shm_unlock(size, user); | 1003 | user_shm_unlock(size, *user); |
1004 | *user = NULL; | ||
1005 | } | ||
1001 | return ERR_PTR(error); | 1006 | return ERR_PTR(error); |
1002 | } | 1007 | } |
1003 | 1008 | ||
diff --git a/fs/inode.c b/fs/inode.c index 901bad1e5f12..b2ba83d2c4e1 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -120,12 +120,11 @@ static void wake_up_inode(struct inode *inode) | |||
120 | * These are initializations that need to be done on every inode | 120 | * These are initializations that need to be done on every inode |
121 | * allocation as the fields are not initialised by slab allocation. | 121 | * allocation as the fields are not initialised by slab allocation. |
122 | */ | 122 | */ |
123 | struct inode *inode_init_always(struct super_block *sb, struct inode *inode) | 123 | int inode_init_always(struct super_block *sb, struct inode *inode) |
124 | { | 124 | { |
125 | static const struct address_space_operations empty_aops; | 125 | static const struct address_space_operations empty_aops; |
126 | static struct inode_operations empty_iops; | 126 | static struct inode_operations empty_iops; |
127 | static const struct file_operations empty_fops; | 127 | static const struct file_operations empty_fops; |
128 | |||
129 | struct address_space *const mapping = &inode->i_data; | 128 | struct address_space *const mapping = &inode->i_data; |
130 | 129 | ||
131 | inode->i_sb = sb; | 130 | inode->i_sb = sb; |
@@ -152,7 +151,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) | |||
152 | inode->dirtied_when = 0; | 151 | inode->dirtied_when = 0; |
153 | 152 | ||
154 | if (security_inode_alloc(inode)) | 153 | if (security_inode_alloc(inode)) |
155 | goto out_free_inode; | 154 | goto out; |
156 | 155 | ||
157 | /* allocate and initialize an i_integrity */ | 156 | /* allocate and initialize an i_integrity */ |
158 | if (ima_inode_alloc(inode)) | 157 | if (ima_inode_alloc(inode)) |
@@ -183,9 +182,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) | |||
183 | if (sb->s_bdev) { | 182 | if (sb->s_bdev) { |
184 | struct backing_dev_info *bdi; | 183 | struct backing_dev_info *bdi; |
185 | 184 | ||
186 | bdi = sb->s_bdev->bd_inode_backing_dev_info; | 185 | bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; |
187 | if (!bdi) | ||
188 | bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; | ||
189 | mapping->backing_dev_info = bdi; | 186 | mapping->backing_dev_info = bdi; |
190 | } | 187 | } |
191 | inode->i_private = NULL; | 188 | inode->i_private = NULL; |
@@ -198,16 +195,12 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) | |||
198 | inode->i_fsnotify_mask = 0; | 195 | inode->i_fsnotify_mask = 0; |
199 | #endif | 196 | #endif |
200 | 197 | ||
201 | return inode; | 198 | return 0; |
202 | 199 | ||
203 | out_free_security: | 200 | out_free_security: |
204 | security_inode_free(inode); | 201 | security_inode_free(inode); |
205 | out_free_inode: | 202 | out: |
206 | if (inode->i_sb->s_op->destroy_inode) | 203 | return -ENOMEM; |
207 | inode->i_sb->s_op->destroy_inode(inode); | ||
208 | else | ||
209 | kmem_cache_free(inode_cachep, (inode)); | ||
210 | return NULL; | ||
211 | } | 204 | } |
212 | EXPORT_SYMBOL(inode_init_always); | 205 | EXPORT_SYMBOL(inode_init_always); |
213 | 206 | ||
@@ -220,12 +213,21 @@ static struct inode *alloc_inode(struct super_block *sb) | |||
220 | else | 213 | else |
221 | inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); | 214 | inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); |
222 | 215 | ||
223 | if (inode) | 216 | if (!inode) |
224 | return inode_init_always(sb, inode); | 217 | return NULL; |
225 | return NULL; | 218 | |
219 | if (unlikely(inode_init_always(sb, inode))) { | ||
220 | if (inode->i_sb->s_op->destroy_inode) | ||
221 | inode->i_sb->s_op->destroy_inode(inode); | ||
222 | else | ||
223 | kmem_cache_free(inode_cachep, inode); | ||
224 | return NULL; | ||
225 | } | ||
226 | |||
227 | return inode; | ||
226 | } | 228 | } |
227 | 229 | ||
228 | void destroy_inode(struct inode *inode) | 230 | void __destroy_inode(struct inode *inode) |
229 | { | 231 | { |
230 | BUG_ON(inode_has_buffers(inode)); | 232 | BUG_ON(inode_has_buffers(inode)); |
231 | ima_inode_free(inode); | 233 | ima_inode_free(inode); |
@@ -237,13 +239,17 @@ void destroy_inode(struct inode *inode) | |||
237 | if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) | 239 | if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) |
238 | posix_acl_release(inode->i_default_acl); | 240 | posix_acl_release(inode->i_default_acl); |
239 | #endif | 241 | #endif |
242 | } | ||
243 | EXPORT_SYMBOL(__destroy_inode); | ||
244 | |||
245 | void destroy_inode(struct inode *inode) | ||
246 | { | ||
247 | __destroy_inode(inode); | ||
240 | if (inode->i_sb->s_op->destroy_inode) | 248 | if (inode->i_sb->s_op->destroy_inode) |
241 | inode->i_sb->s_op->destroy_inode(inode); | 249 | inode->i_sb->s_op->destroy_inode(inode); |
242 | else | 250 | else |
243 | kmem_cache_free(inode_cachep, (inode)); | 251 | kmem_cache_free(inode_cachep, (inode)); |
244 | } | 252 | } |
245 | EXPORT_SYMBOL(destroy_inode); | ||
246 | |||
247 | 253 | ||
248 | /* | 254 | /* |
249 | * These are initializations that only need to be done | 255 | * These are initializations that only need to be done |
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 61f32f3868cd..b0435dd0654d 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -456,7 +456,7 @@ int cleanup_journal_tail(journal_t *journal) | |||
456 | { | 456 | { |
457 | transaction_t * transaction; | 457 | transaction_t * transaction; |
458 | tid_t first_tid; | 458 | tid_t first_tid; |
459 | unsigned long blocknr, freed; | 459 | unsigned int blocknr, freed; |
460 | 460 | ||
461 | if (is_journal_aborted(journal)) | 461 | if (is_journal_aborted(journal)) |
462 | return 1; | 462 | return 1; |
@@ -502,8 +502,8 @@ int cleanup_journal_tail(journal_t *journal) | |||
502 | freed = freed + journal->j_last - journal->j_first; | 502 | freed = freed + journal->j_last - journal->j_first; |
503 | 503 | ||
504 | jbd_debug(1, | 504 | jbd_debug(1, |
505 | "Cleaning journal tail from %d to %d (offset %lu), " | 505 | "Cleaning journal tail from %d to %d (offset %u), " |
506 | "freeing %lu\n", | 506 | "freeing %u\n", |
507 | journal->j_tail_sequence, first_tid, blocknr, freed); | 507 | journal->j_tail_sequence, first_tid, blocknr, freed); |
508 | 508 | ||
509 | journal->j_free += freed; | 509 | journal->j_free += freed; |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 618e21c0b7a3..4bd882548c45 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -308,7 +308,7 @@ void journal_commit_transaction(journal_t *journal) | |||
308 | int bufs; | 308 | int bufs; |
309 | int flags; | 309 | int flags; |
310 | int err; | 310 | int err; |
311 | unsigned long blocknr; | 311 | unsigned int blocknr; |
312 | ktime_t start_time; | 312 | ktime_t start_time; |
313 | u64 commit_time; | 313 | u64 commit_time; |
314 | char *tagp = NULL; | 314 | char *tagp = NULL; |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 737f7246a4b5..bd3c073b485d 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -276,7 +276,7 @@ static void journal_kill_thread(journal_t *journal) | |||
276 | int journal_write_metadata_buffer(transaction_t *transaction, | 276 | int journal_write_metadata_buffer(transaction_t *transaction, |
277 | struct journal_head *jh_in, | 277 | struct journal_head *jh_in, |
278 | struct journal_head **jh_out, | 278 | struct journal_head **jh_out, |
279 | unsigned long blocknr) | 279 | unsigned int blocknr) |
280 | { | 280 | { |
281 | int need_copy_out = 0; | 281 | int need_copy_out = 0; |
282 | int done_copy_out = 0; | 282 | int done_copy_out = 0; |
@@ -287,6 +287,7 @@ int journal_write_metadata_buffer(transaction_t *transaction, | |||
287 | struct page *new_page; | 287 | struct page *new_page; |
288 | unsigned int new_offset; | 288 | unsigned int new_offset; |
289 | struct buffer_head *bh_in = jh2bh(jh_in); | 289 | struct buffer_head *bh_in = jh2bh(jh_in); |
290 | journal_t *journal = transaction->t_journal; | ||
290 | 291 | ||
291 | /* | 292 | /* |
292 | * The buffer really shouldn't be locked: only the current committing | 293 | * The buffer really shouldn't be locked: only the current committing |
@@ -300,6 +301,11 @@ int journal_write_metadata_buffer(transaction_t *transaction, | |||
300 | J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); | 301 | J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); |
301 | 302 | ||
302 | new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); | 303 | new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); |
304 | /* keep subsequent assertions sane */ | ||
305 | new_bh->b_state = 0; | ||
306 | init_buffer(new_bh, NULL, NULL); | ||
307 | atomic_set(&new_bh->b_count, 1); | ||
308 | new_jh = journal_add_journal_head(new_bh); /* This sleeps */ | ||
303 | 309 | ||
304 | /* | 310 | /* |
305 | * If a new transaction has already done a buffer copy-out, then | 311 | * If a new transaction has already done a buffer copy-out, then |
@@ -361,14 +367,6 @@ repeat: | |||
361 | kunmap_atomic(mapped_data, KM_USER0); | 367 | kunmap_atomic(mapped_data, KM_USER0); |
362 | } | 368 | } |
363 | 369 | ||
364 | /* keep subsequent assertions sane */ | ||
365 | new_bh->b_state = 0; | ||
366 | init_buffer(new_bh, NULL, NULL); | ||
367 | atomic_set(&new_bh->b_count, 1); | ||
368 | jbd_unlock_bh_state(bh_in); | ||
369 | |||
370 | new_jh = journal_add_journal_head(new_bh); /* This sleeps */ | ||
371 | |||
372 | set_bh_page(new_bh, new_page, new_offset); | 370 | set_bh_page(new_bh, new_page, new_offset); |
373 | new_jh->b_transaction = NULL; | 371 | new_jh->b_transaction = NULL; |
374 | new_bh->b_size = jh2bh(jh_in)->b_size; | 372 | new_bh->b_size = jh2bh(jh_in)->b_size; |
@@ -385,7 +383,11 @@ repeat: | |||
385 | * copying is moved to the transaction's shadow queue. | 383 | * copying is moved to the transaction's shadow queue. |
386 | */ | 384 | */ |
387 | JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); | 385 | JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); |
388 | journal_file_buffer(jh_in, transaction, BJ_Shadow); | 386 | spin_lock(&journal->j_list_lock); |
387 | __journal_file_buffer(jh_in, transaction, BJ_Shadow); | ||
388 | spin_unlock(&journal->j_list_lock); | ||
389 | jbd_unlock_bh_state(bh_in); | ||
390 | |||
389 | JBUFFER_TRACE(new_jh, "file as BJ_IO"); | 391 | JBUFFER_TRACE(new_jh, "file as BJ_IO"); |
390 | journal_file_buffer(new_jh, transaction, BJ_IO); | 392 | journal_file_buffer(new_jh, transaction, BJ_IO); |
391 | 393 | ||
@@ -565,9 +567,9 @@ int log_wait_commit(journal_t *journal, tid_t tid) | |||
565 | * Log buffer allocation routines: | 567 | * Log buffer allocation routines: |
566 | */ | 568 | */ |
567 | 569 | ||
568 | int journal_next_log_block(journal_t *journal, unsigned long *retp) | 570 | int journal_next_log_block(journal_t *journal, unsigned int *retp) |
569 | { | 571 | { |
570 | unsigned long blocknr; | 572 | unsigned int blocknr; |
571 | 573 | ||
572 | spin_lock(&journal->j_state_lock); | 574 | spin_lock(&journal->j_state_lock); |
573 | J_ASSERT(journal->j_free > 1); | 575 | J_ASSERT(journal->j_free > 1); |
@@ -588,11 +590,11 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp) | |||
588 | * this is a no-op. If needed, we can use j_blk_offset - everything is | 590 | * this is a no-op. If needed, we can use j_blk_offset - everything is |
589 | * ready. | 591 | * ready. |
590 | */ | 592 | */ |
591 | int journal_bmap(journal_t *journal, unsigned long blocknr, | 593 | int journal_bmap(journal_t *journal, unsigned int blocknr, |
592 | unsigned long *retp) | 594 | unsigned int *retp) |
593 | { | 595 | { |
594 | int err = 0; | 596 | int err = 0; |
595 | unsigned long ret; | 597 | unsigned int ret; |
596 | 598 | ||
597 | if (journal->j_inode) { | 599 | if (journal->j_inode) { |
598 | ret = bmap(journal->j_inode, blocknr); | 600 | ret = bmap(journal->j_inode, blocknr); |
@@ -602,7 +604,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr, | |||
602 | char b[BDEVNAME_SIZE]; | 604 | char b[BDEVNAME_SIZE]; |
603 | 605 | ||
604 | printk(KERN_ALERT "%s: journal block not found " | 606 | printk(KERN_ALERT "%s: journal block not found " |
605 | "at offset %lu on %s\n", | 607 | "at offset %u on %s\n", |
606 | __func__, | 608 | __func__, |
607 | blocknr, | 609 | blocknr, |
608 | bdevname(journal->j_dev, b)); | 610 | bdevname(journal->j_dev, b)); |
@@ -628,7 +630,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr, | |||
628 | struct journal_head *journal_get_descriptor_buffer(journal_t *journal) | 630 | struct journal_head *journal_get_descriptor_buffer(journal_t *journal) |
629 | { | 631 | { |
630 | struct buffer_head *bh; | 632 | struct buffer_head *bh; |
631 | unsigned long blocknr; | 633 | unsigned int blocknr; |
632 | int err; | 634 | int err; |
633 | 635 | ||
634 | err = journal_next_log_block(journal, &blocknr); | 636 | err = journal_next_log_block(journal, &blocknr); |
@@ -772,7 +774,7 @@ journal_t * journal_init_inode (struct inode *inode) | |||
772 | journal_t *journal = journal_init_common(); | 774 | journal_t *journal = journal_init_common(); |
773 | int err; | 775 | int err; |
774 | int n; | 776 | int n; |
775 | unsigned long blocknr; | 777 | unsigned int blocknr; |
776 | 778 | ||
777 | if (!journal) | 779 | if (!journal) |
778 | return NULL; | 780 | return NULL; |
@@ -844,10 +846,16 @@ static void journal_fail_superblock (journal_t *journal) | |||
844 | static int journal_reset(journal_t *journal) | 846 | static int journal_reset(journal_t *journal) |
845 | { | 847 | { |
846 | journal_superblock_t *sb = journal->j_superblock; | 848 | journal_superblock_t *sb = journal->j_superblock; |
847 | unsigned long first, last; | 849 | unsigned int first, last; |
848 | 850 | ||
849 | first = be32_to_cpu(sb->s_first); | 851 | first = be32_to_cpu(sb->s_first); |
850 | last = be32_to_cpu(sb->s_maxlen); | 852 | last = be32_to_cpu(sb->s_maxlen); |
853 | if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) { | ||
854 | printk(KERN_ERR "JBD: Journal too short (blocks %u-%u).\n", | ||
855 | first, last); | ||
856 | journal_fail_superblock(journal); | ||
857 | return -EINVAL; | ||
858 | } | ||
851 | 859 | ||
852 | journal->j_first = first; | 860 | journal->j_first = first; |
853 | journal->j_last = last; | 861 | journal->j_last = last; |
@@ -877,7 +885,7 @@ static int journal_reset(journal_t *journal) | |||
877 | **/ | 885 | **/ |
878 | int journal_create(journal_t *journal) | 886 | int journal_create(journal_t *journal) |
879 | { | 887 | { |
880 | unsigned long blocknr; | 888 | unsigned int blocknr; |
881 | struct buffer_head *bh; | 889 | struct buffer_head *bh; |
882 | journal_superblock_t *sb; | 890 | journal_superblock_t *sb; |
883 | int i, err; | 891 | int i, err; |
@@ -961,14 +969,14 @@ void journal_update_superblock(journal_t *journal, int wait) | |||
961 | if (sb->s_start == 0 && journal->j_tail_sequence == | 969 | if (sb->s_start == 0 && journal->j_tail_sequence == |
962 | journal->j_transaction_sequence) { | 970 | journal->j_transaction_sequence) { |
963 | jbd_debug(1,"JBD: Skipping superblock update on recovered sb " | 971 | jbd_debug(1,"JBD: Skipping superblock update on recovered sb " |
964 | "(start %ld, seq %d, errno %d)\n", | 972 | "(start %u, seq %d, errno %d)\n", |
965 | journal->j_tail, journal->j_tail_sequence, | 973 | journal->j_tail, journal->j_tail_sequence, |
966 | journal->j_errno); | 974 | journal->j_errno); |
967 | goto out; | 975 | goto out; |
968 | } | 976 | } |
969 | 977 | ||
970 | spin_lock(&journal->j_state_lock); | 978 | spin_lock(&journal->j_state_lock); |
971 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", | 979 | jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n", |
972 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 980 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); |
973 | 981 | ||
974 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); | 982 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); |
@@ -1363,7 +1371,7 @@ int journal_flush(journal_t *journal) | |||
1363 | { | 1371 | { |
1364 | int err = 0; | 1372 | int err = 0; |
1365 | transaction_t *transaction = NULL; | 1373 | transaction_t *transaction = NULL; |
1366 | unsigned long old_tail; | 1374 | unsigned int old_tail; |
1367 | 1375 | ||
1368 | spin_lock(&journal->j_state_lock); | 1376 | spin_lock(&journal->j_state_lock); |
1369 | 1377 | ||
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index db5e982c5ddf..cb1a49ae605e 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c | |||
@@ -70,7 +70,7 @@ static int do_readahead(journal_t *journal, unsigned int start) | |||
70 | { | 70 | { |
71 | int err; | 71 | int err; |
72 | unsigned int max, nbufs, next; | 72 | unsigned int max, nbufs, next; |
73 | unsigned long blocknr; | 73 | unsigned int blocknr; |
74 | struct buffer_head *bh; | 74 | struct buffer_head *bh; |
75 | 75 | ||
76 | struct buffer_head * bufs[MAXBUF]; | 76 | struct buffer_head * bufs[MAXBUF]; |
@@ -132,7 +132,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal, | |||
132 | unsigned int offset) | 132 | unsigned int offset) |
133 | { | 133 | { |
134 | int err; | 134 | int err; |
135 | unsigned long blocknr; | 135 | unsigned int blocknr; |
136 | struct buffer_head *bh; | 136 | struct buffer_head *bh; |
137 | 137 | ||
138 | *bhp = NULL; | 138 | *bhp = NULL; |
@@ -314,7 +314,7 @@ static int do_one_pass(journal_t *journal, | |||
314 | struct recovery_info *info, enum passtype pass) | 314 | struct recovery_info *info, enum passtype pass) |
315 | { | 315 | { |
316 | unsigned int first_commit_ID, next_commit_ID; | 316 | unsigned int first_commit_ID, next_commit_ID; |
317 | unsigned long next_log_block; | 317 | unsigned int next_log_block; |
318 | int err, success = 0; | 318 | int err, success = 0; |
319 | journal_superblock_t * sb; | 319 | journal_superblock_t * sb; |
320 | journal_header_t * tmp; | 320 | journal_header_t * tmp; |
@@ -367,14 +367,14 @@ static int do_one_pass(journal_t *journal, | |||
367 | if (tid_geq(next_commit_ID, info->end_transaction)) | 367 | if (tid_geq(next_commit_ID, info->end_transaction)) |
368 | break; | 368 | break; |
369 | 369 | ||
370 | jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", | 370 | jbd_debug(2, "Scanning for sequence ID %u at %u/%u\n", |
371 | next_commit_ID, next_log_block, journal->j_last); | 371 | next_commit_ID, next_log_block, journal->j_last); |
372 | 372 | ||
373 | /* Skip over each chunk of the transaction looking | 373 | /* Skip over each chunk of the transaction looking |
374 | * either the next descriptor block or the final commit | 374 | * either the next descriptor block or the final commit |
375 | * record. */ | 375 | * record. */ |
376 | 376 | ||
377 | jbd_debug(3, "JBD: checking block %ld\n", next_log_block); | 377 | jbd_debug(3, "JBD: checking block %u\n", next_log_block); |
378 | err = jread(&bh, journal, next_log_block); | 378 | err = jread(&bh, journal, next_log_block); |
379 | if (err) | 379 | if (err) |
380 | goto failed; | 380 | goto failed; |
@@ -429,7 +429,7 @@ static int do_one_pass(journal_t *journal, | |||
429 | tagp = &bh->b_data[sizeof(journal_header_t)]; | 429 | tagp = &bh->b_data[sizeof(journal_header_t)]; |
430 | while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) | 430 | while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) |
431 | <= journal->j_blocksize) { | 431 | <= journal->j_blocksize) { |
432 | unsigned long io_block; | 432 | unsigned int io_block; |
433 | 433 | ||
434 | tag = (journal_block_tag_t *) tagp; | 434 | tag = (journal_block_tag_t *) tagp; |
435 | flags = be32_to_cpu(tag->t_flags); | 435 | flags = be32_to_cpu(tag->t_flags); |
@@ -443,10 +443,10 @@ static int do_one_pass(journal_t *journal, | |||
443 | success = err; | 443 | success = err; |
444 | printk (KERN_ERR | 444 | printk (KERN_ERR |
445 | "JBD: IO error %d recovering " | 445 | "JBD: IO error %d recovering " |
446 | "block %ld in log\n", | 446 | "block %u in log\n", |
447 | err, io_block); | 447 | err, io_block); |
448 | } else { | 448 | } else { |
449 | unsigned long blocknr; | 449 | unsigned int blocknr; |
450 | 450 | ||
451 | J_ASSERT(obh != NULL); | 451 | J_ASSERT(obh != NULL); |
452 | blocknr = be32_to_cpu(tag->t_blocknr); | 452 | blocknr = be32_to_cpu(tag->t_blocknr); |
@@ -581,7 +581,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, | |||
581 | max = be32_to_cpu(header->r_count); | 581 | max = be32_to_cpu(header->r_count); |
582 | 582 | ||
583 | while (offset < max) { | 583 | while (offset < max) { |
584 | unsigned long blocknr; | 584 | unsigned int blocknr; |
585 | int err; | 585 | int err; |
586 | 586 | ||
587 | blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); | 587 | blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); |
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index da6cd9bdaabc..ad717328343a 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
@@ -101,7 +101,7 @@ struct jbd_revoke_record_s | |||
101 | { | 101 | { |
102 | struct list_head hash; | 102 | struct list_head hash; |
103 | tid_t sequence; /* Used for recovery only */ | 103 | tid_t sequence; /* Used for recovery only */ |
104 | unsigned long blocknr; | 104 | unsigned int blocknr; |
105 | }; | 105 | }; |
106 | 106 | ||
107 | 107 | ||
@@ -126,7 +126,7 @@ static void flush_descriptor(journal_t *, struct journal_head *, int, int); | |||
126 | /* Utility functions to maintain the revoke table */ | 126 | /* Utility functions to maintain the revoke table */ |
127 | 127 | ||
128 | /* Borrowed from buffer.c: this is a tried and tested block hash function */ | 128 | /* Borrowed from buffer.c: this is a tried and tested block hash function */ |
129 | static inline int hash(journal_t *journal, unsigned long block) | 129 | static inline int hash(journal_t *journal, unsigned int block) |
130 | { | 130 | { |
131 | struct jbd_revoke_table_s *table = journal->j_revoke; | 131 | struct jbd_revoke_table_s *table = journal->j_revoke; |
132 | int hash_shift = table->hash_shift; | 132 | int hash_shift = table->hash_shift; |
@@ -136,7 +136,7 @@ static inline int hash(journal_t *journal, unsigned long block) | |||
136 | (block << (hash_shift - 12))) & (table->hash_size - 1); | 136 | (block << (hash_shift - 12))) & (table->hash_size - 1); |
137 | } | 137 | } |
138 | 138 | ||
139 | static int insert_revoke_hash(journal_t *journal, unsigned long blocknr, | 139 | static int insert_revoke_hash(journal_t *journal, unsigned int blocknr, |
140 | tid_t seq) | 140 | tid_t seq) |
141 | { | 141 | { |
142 | struct list_head *hash_list; | 142 | struct list_head *hash_list; |
@@ -166,7 +166,7 @@ oom: | |||
166 | /* Find a revoke record in the journal's hash table. */ | 166 | /* Find a revoke record in the journal's hash table. */ |
167 | 167 | ||
168 | static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal, | 168 | static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal, |
169 | unsigned long blocknr) | 169 | unsigned int blocknr) |
170 | { | 170 | { |
171 | struct list_head *hash_list; | 171 | struct list_head *hash_list; |
172 | struct jbd_revoke_record_s *record; | 172 | struct jbd_revoke_record_s *record; |
@@ -332,7 +332,7 @@ void journal_destroy_revoke(journal_t *journal) | |||
332 | * by one. | 332 | * by one. |
333 | */ | 333 | */ |
334 | 334 | ||
335 | int journal_revoke(handle_t *handle, unsigned long blocknr, | 335 | int journal_revoke(handle_t *handle, unsigned int blocknr, |
336 | struct buffer_head *bh_in) | 336 | struct buffer_head *bh_in) |
337 | { | 337 | { |
338 | struct buffer_head *bh = NULL; | 338 | struct buffer_head *bh = NULL; |
@@ -401,7 +401,7 @@ int journal_revoke(handle_t *handle, unsigned long blocknr, | |||
401 | } | 401 | } |
402 | } | 402 | } |
403 | 403 | ||
404 | jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in); | 404 | jbd_debug(2, "insert revoke for block %u, bh_in=%p\n", blocknr, bh_in); |
405 | err = insert_revoke_hash(journal, blocknr, | 405 | err = insert_revoke_hash(journal, blocknr, |
406 | handle->h_transaction->t_tid); | 406 | handle->h_transaction->t_tid); |
407 | BUFFER_TRACE(bh_in, "exit"); | 407 | BUFFER_TRACE(bh_in, "exit"); |
@@ -644,7 +644,7 @@ static void flush_descriptor(journal_t *journal, | |||
644 | */ | 644 | */ |
645 | 645 | ||
646 | int journal_set_revoke(journal_t *journal, | 646 | int journal_set_revoke(journal_t *journal, |
647 | unsigned long blocknr, | 647 | unsigned int blocknr, |
648 | tid_t sequence) | 648 | tid_t sequence) |
649 | { | 649 | { |
650 | struct jbd_revoke_record_s *record; | 650 | struct jbd_revoke_record_s *record; |
@@ -668,7 +668,7 @@ int journal_set_revoke(journal_t *journal, | |||
668 | */ | 668 | */ |
669 | 669 | ||
670 | int journal_test_revoke(journal_t *journal, | 670 | int journal_test_revoke(journal_t *journal, |
671 | unsigned long blocknr, | 671 | unsigned int blocknr, |
672 | tid_t sequence) | 672 | tid_t sequence) |
673 | { | 673 | { |
674 | struct jbd_revoke_record_s *record; | 674 | struct jbd_revoke_record_s *record; |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 73242ba7c7b1..006f9ad838a2 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -56,7 +56,8 @@ get_transaction(journal_t *journal, transaction_t *transaction) | |||
56 | spin_lock_init(&transaction->t_handle_lock); | 56 | spin_lock_init(&transaction->t_handle_lock); |
57 | 57 | ||
58 | /* Set up the commit timer for the new transaction. */ | 58 | /* Set up the commit timer for the new transaction. */ |
59 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); | 59 | journal->j_commit_timer.expires = |
60 | round_jiffies_up(transaction->t_expires); | ||
60 | add_timer(&journal->j_commit_timer); | 61 | add_timer(&journal->j_commit_timer); |
61 | 62 | ||
62 | J_ASSERT(journal->j_running_transaction == NULL); | 63 | J_ASSERT(journal->j_running_transaction == NULL); |
@@ -228,6 +229,8 @@ repeat_locked: | |||
228 | __log_space_left(journal)); | 229 | __log_space_left(journal)); |
229 | spin_unlock(&transaction->t_handle_lock); | 230 | spin_unlock(&transaction->t_handle_lock); |
230 | spin_unlock(&journal->j_state_lock); | 231 | spin_unlock(&journal->j_state_lock); |
232 | |||
233 | lock_map_acquire(&handle->h_lockdep_map); | ||
231 | out: | 234 | out: |
232 | if (unlikely(new_transaction)) /* It's usually NULL */ | 235 | if (unlikely(new_transaction)) /* It's usually NULL */ |
233 | kfree(new_transaction); | 236 | kfree(new_transaction); |
@@ -292,9 +295,6 @@ handle_t *journal_start(journal_t *journal, int nblocks) | |||
292 | handle = ERR_PTR(err); | 295 | handle = ERR_PTR(err); |
293 | goto out; | 296 | goto out; |
294 | } | 297 | } |
295 | |||
296 | lock_map_acquire(&handle->h_lockdep_map); | ||
297 | |||
298 | out: | 298 | out: |
299 | return handle; | 299 | return handle; |
300 | } | 300 | } |
@@ -416,6 +416,7 @@ int journal_restart(handle_t *handle, int nblocks) | |||
416 | __log_start_commit(journal, transaction->t_tid); | 416 | __log_start_commit(journal, transaction->t_tid); |
417 | spin_unlock(&journal->j_state_lock); | 417 | spin_unlock(&journal->j_state_lock); |
418 | 418 | ||
419 | lock_map_release(&handle->h_lockdep_map); | ||
419 | handle->h_buffer_credits = nblocks; | 420 | handle->h_buffer_credits = nblocks; |
420 | ret = start_this_handle(journal, handle); | 421 | ret = start_this_handle(journal, handle); |
421 | return ret; | 422 | return ret; |
@@ -489,34 +490,15 @@ void journal_unlock_updates (journal_t *journal) | |||
489 | wake_up(&journal->j_wait_transaction_locked); | 490 | wake_up(&journal->j_wait_transaction_locked); |
490 | } | 491 | } |
491 | 492 | ||
492 | /* | 493 | static void warn_dirty_buffer(struct buffer_head *bh) |
493 | * Report any unexpected dirty buffers which turn up. Normally those | ||
494 | * indicate an error, but they can occur if the user is running (say) | ||
495 | * tune2fs to modify the live filesystem, so we need the option of | ||
496 | * continuing as gracefully as possible. # | ||
497 | * | ||
498 | * The caller should already hold the journal lock and | ||
499 | * j_list_lock spinlock: most callers will need those anyway | ||
500 | * in order to probe the buffer's journaling state safely. | ||
501 | */ | ||
502 | static void jbd_unexpected_dirty_buffer(struct journal_head *jh) | ||
503 | { | 494 | { |
504 | int jlist; | 495 | char b[BDEVNAME_SIZE]; |
505 | |||
506 | /* If this buffer is one which might reasonably be dirty | ||
507 | * --- ie. data, or not part of this journal --- then | ||
508 | * we're OK to leave it alone, but otherwise we need to | ||
509 | * move the dirty bit to the journal's own internal | ||
510 | * JBDDirty bit. */ | ||
511 | jlist = jh->b_jlist; | ||
512 | |||
513 | if (jlist == BJ_Metadata || jlist == BJ_Reserved || | ||
514 | jlist == BJ_Shadow || jlist == BJ_Forget) { | ||
515 | struct buffer_head *bh = jh2bh(jh); | ||
516 | 496 | ||
517 | if (test_clear_buffer_dirty(bh)) | 497 | printk(KERN_WARNING |
518 | set_buffer_jbddirty(bh); | 498 | "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " |
519 | } | 499 | "There's a risk of filesystem corruption in case of system " |
500 | "crash.\n", | ||
501 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); | ||
520 | } | 502 | } |
521 | 503 | ||
522 | /* | 504 | /* |
@@ -583,14 +565,16 @@ repeat: | |||
583 | if (jh->b_next_transaction) | 565 | if (jh->b_next_transaction) |
584 | J_ASSERT_JH(jh, jh->b_next_transaction == | 566 | J_ASSERT_JH(jh, jh->b_next_transaction == |
585 | transaction); | 567 | transaction); |
568 | warn_dirty_buffer(bh); | ||
586 | } | 569 | } |
587 | /* | 570 | /* |
588 | * In any case we need to clean the dirty flag and we must | 571 | * In any case we need to clean the dirty flag and we must |
589 | * do it under the buffer lock to be sure we don't race | 572 | * do it under the buffer lock to be sure we don't race |
590 | * with running write-out. | 573 | * with running write-out. |
591 | */ | 574 | */ |
592 | JBUFFER_TRACE(jh, "Unexpected dirty buffer"); | 575 | JBUFFER_TRACE(jh, "Journalling dirty buffer"); |
593 | jbd_unexpected_dirty_buffer(jh); | 576 | clear_buffer_dirty(bh); |
577 | set_buffer_jbddirty(bh); | ||
594 | } | 578 | } |
595 | 579 | ||
596 | unlock_buffer(bh); | 580 | unlock_buffer(bh); |
@@ -826,6 +810,15 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
826 | J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); | 810 | J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); |
827 | 811 | ||
828 | if (jh->b_transaction == NULL) { | 812 | if (jh->b_transaction == NULL) { |
813 | /* | ||
814 | * Previous journal_forget() could have left the buffer | ||
815 | * with jbddirty bit set because it was being committed. When | ||
816 | * the commit finished, we've filed the buffer for | ||
817 | * checkpointing and marked it dirty. Now we are reallocating | ||
818 | * the buffer so the transaction freeing it must have | ||
819 | * committed and so it's safe to clear the dirty bit. | ||
820 | */ | ||
821 | clear_buffer_dirty(jh2bh(jh)); | ||
829 | jh->b_transaction = transaction; | 822 | jh->b_transaction = transaction; |
830 | 823 | ||
831 | /* first access by this transaction */ | 824 | /* first access by this transaction */ |
@@ -1782,8 +1775,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1782 | 1775 | ||
1783 | if (jh->b_cp_transaction) { | 1776 | if (jh->b_cp_transaction) { |
1784 | JBUFFER_TRACE(jh, "on running+cp transaction"); | 1777 | JBUFFER_TRACE(jh, "on running+cp transaction"); |
1778 | /* | ||
1779 | * We don't want to write the buffer anymore, clear the | ||
1780 | * bit so that we don't confuse checks in | ||
1781 | * __journal_file_buffer | ||
1782 | */ | ||
1783 | clear_buffer_dirty(bh); | ||
1785 | __journal_file_buffer(jh, transaction, BJ_Forget); | 1784 | __journal_file_buffer(jh, transaction, BJ_Forget); |
1786 | clear_buffer_jbddirty(bh); | ||
1787 | may_free = 0; | 1785 | may_free = 0; |
1788 | } else { | 1786 | } else { |
1789 | JBUFFER_TRACE(jh, "on running transaction"); | 1787 | JBUFFER_TRACE(jh, "on running transaction"); |
@@ -2041,12 +2039,17 @@ void __journal_file_buffer(struct journal_head *jh, | |||
2041 | if (jh->b_transaction && jh->b_jlist == jlist) | 2039 | if (jh->b_transaction && jh->b_jlist == jlist) |
2042 | return; | 2040 | return; |
2043 | 2041 | ||
2044 | /* The following list of buffer states needs to be consistent | ||
2045 | * with __jbd_unexpected_dirty_buffer()'s handling of dirty | ||
2046 | * state. */ | ||
2047 | |||
2048 | if (jlist == BJ_Metadata || jlist == BJ_Reserved || | 2042 | if (jlist == BJ_Metadata || jlist == BJ_Reserved || |
2049 | jlist == BJ_Shadow || jlist == BJ_Forget) { | 2043 | jlist == BJ_Shadow || jlist == BJ_Forget) { |
2044 | /* | ||
2045 | * For metadata buffers, we track dirty bit in buffer_jbddirty | ||
2046 | * instead of buffer_dirty. We should not see a dirty bit set | ||
2047 | * here because we clear it in do_get_write_access but e.g. | ||
2048 | * tune2fs can modify the sb and set the dirty bit at any time | ||
2049 | * so we try to gracefully handle that. | ||
2050 | */ | ||
2051 | if (buffer_dirty(bh)) | ||
2052 | warn_dirty_buffer(bh); | ||
2050 | if (test_clear_buffer_dirty(bh) || | 2053 | if (test_clear_buffer_dirty(bh) || |
2051 | test_clear_buffer_jbddirty(bh)) | 2054 | test_clear_buffer_jbddirty(bh)) |
2052 | was_dirty = 1; | 2055 | was_dirty = 1; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7b4088b2364d..26d991ddc1e6 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/writeback.h> | 25 | #include <linux/writeback.h> |
26 | #include <linux/backing-dev.h> | 26 | #include <linux/backing-dev.h> |
27 | #include <linux/bio.h> | 27 | #include <linux/bio.h> |
28 | #include <linux/blkdev.h> | ||
28 | #include <trace/events/jbd2.h> | 29 | #include <trace/events/jbd2.h> |
29 | 30 | ||
30 | /* | 31 | /* |
@@ -133,8 +134,8 @@ static int journal_submit_commit_record(journal_t *journal, | |||
133 | bh->b_end_io = journal_end_buffer_io_sync; | 134 | bh->b_end_io = journal_end_buffer_io_sync; |
134 | 135 | ||
135 | if (journal->j_flags & JBD2_BARRIER && | 136 | if (journal->j_flags & JBD2_BARRIER && |
136 | !JBD2_HAS_INCOMPAT_FEATURE(journal, | 137 | !JBD2_HAS_INCOMPAT_FEATURE(journal, |
137 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 138 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
138 | set_buffer_ordered(bh); | 139 | set_buffer_ordered(bh); |
139 | barrier_done = 1; | 140 | barrier_done = 1; |
140 | } | 141 | } |
@@ -220,7 +221,6 @@ static int journal_submit_inode_data_buffers(struct address_space *mapping) | |||
220 | .nr_to_write = mapping->nrpages * 2, | 221 | .nr_to_write = mapping->nrpages * 2, |
221 | .range_start = 0, | 222 | .range_start = 0, |
222 | .range_end = i_size_read(mapping->host), | 223 | .range_end = i_size_read(mapping->host), |
223 | .for_writepages = 1, | ||
224 | }; | 224 | }; |
225 | 225 | ||
226 | ret = generic_writepages(mapping, &wbc); | 226 | ret = generic_writepages(mapping, &wbc); |
@@ -707,11 +707,13 @@ start_journal_io: | |||
707 | /* Done it all: now write the commit record asynchronously. */ | 707 | /* Done it all: now write the commit record asynchronously. */ |
708 | 708 | ||
709 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | 709 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, |
710 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 710 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
711 | err = journal_submit_commit_record(journal, commit_transaction, | 711 | err = journal_submit_commit_record(journal, commit_transaction, |
712 | &cbh, crc32_sum); | 712 | &cbh, crc32_sum); |
713 | if (err) | 713 | if (err) |
714 | __jbd2_journal_abort_hard(journal); | 714 | __jbd2_journal_abort_hard(journal); |
715 | if (journal->j_flags & JBD2_BARRIER) | ||
716 | blkdev_issue_flush(journal->j_dev, NULL); | ||
715 | } | 717 | } |
716 | 718 | ||
717 | /* | 719 | /* |
@@ -834,7 +836,7 @@ wait_for_iobuf: | |||
834 | jbd_debug(3, "JBD: commit phase 5\n"); | 836 | jbd_debug(3, "JBD: commit phase 5\n"); |
835 | 837 | ||
836 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 838 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
837 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 839 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
838 | err = journal_submit_commit_record(journal, commit_transaction, | 840 | err = journal_submit_commit_record(journal, commit_transaction, |
839 | &cbh, crc32_sum); | 841 | &cbh, crc32_sum); |
840 | if (err) | 842 | if (err) |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e378cb383979..a8a358bc0f21 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -1187,6 +1187,12 @@ static int journal_reset(journal_t *journal) | |||
1187 | 1187 | ||
1188 | first = be32_to_cpu(sb->s_first); | 1188 | first = be32_to_cpu(sb->s_first); |
1189 | last = be32_to_cpu(sb->s_maxlen); | 1189 | last = be32_to_cpu(sb->s_maxlen); |
1190 | if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { | ||
1191 | printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n", | ||
1192 | first, last); | ||
1193 | journal_fail_superblock(journal); | ||
1194 | return -EINVAL; | ||
1195 | } | ||
1190 | 1196 | ||
1191 | journal->j_first = first; | 1197 | journal->j_first = first; |
1192 | journal->j_last = last; | 1198 | journal->j_last = last; |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 6213ac728f30..a0512700542f 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -57,7 +57,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
57 | INIT_LIST_HEAD(&transaction->t_private_list); | 57 | INIT_LIST_HEAD(&transaction->t_private_list); |
58 | 58 | ||
59 | /* Set up the commit timer for the new transaction. */ | 59 | /* Set up the commit timer for the new transaction. */ |
60 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); | 60 | journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires); |
61 | add_timer(&journal->j_commit_timer); | 61 | add_timer(&journal->j_commit_timer); |
62 | 62 | ||
63 | J_ASSERT(journal->j_running_transaction == NULL); | 63 | J_ASSERT(journal->j_running_transaction == NULL); |
@@ -238,6 +238,8 @@ repeat_locked: | |||
238 | __jbd2_log_space_left(journal)); | 238 | __jbd2_log_space_left(journal)); |
239 | spin_unlock(&transaction->t_handle_lock); | 239 | spin_unlock(&transaction->t_handle_lock); |
240 | spin_unlock(&journal->j_state_lock); | 240 | spin_unlock(&journal->j_state_lock); |
241 | |||
242 | lock_map_acquire(&handle->h_lockdep_map); | ||
241 | out: | 243 | out: |
242 | if (unlikely(new_transaction)) /* It's usually NULL */ | 244 | if (unlikely(new_transaction)) /* It's usually NULL */ |
243 | kfree(new_transaction); | 245 | kfree(new_transaction); |
@@ -303,8 +305,6 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) | |||
303 | handle = ERR_PTR(err); | 305 | handle = ERR_PTR(err); |
304 | goto out; | 306 | goto out; |
305 | } | 307 | } |
306 | |||
307 | lock_map_acquire(&handle->h_lockdep_map); | ||
308 | out: | 308 | out: |
309 | return handle; | 309 | return handle; |
310 | } | 310 | } |
@@ -426,6 +426,7 @@ int jbd2_journal_restart(handle_t *handle, int nblocks) | |||
426 | __jbd2_log_start_commit(journal, transaction->t_tid); | 426 | __jbd2_log_start_commit(journal, transaction->t_tid); |
427 | spin_unlock(&journal->j_state_lock); | 427 | spin_unlock(&journal->j_state_lock); |
428 | 428 | ||
429 | lock_map_release(&handle->h_lockdep_map); | ||
429 | handle->h_buffer_credits = nblocks; | 430 | handle->h_buffer_credits = nblocks; |
430 | ret = start_this_handle(journal, handle); | 431 | ret = start_this_handle(journal, handle); |
431 | return ret; | 432 | return ret; |
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 8fcb6239218e..7edb62e97419 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c | |||
@@ -258,7 +258,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |||
258 | return rc; | 258 | return rc; |
259 | } | 259 | } |
260 | 260 | ||
261 | static int jffs2_check_acl(struct inode *inode, int mask) | 261 | int jffs2_check_acl(struct inode *inode, int mask) |
262 | { | 262 | { |
263 | struct posix_acl *acl; | 263 | struct posix_acl *acl; |
264 | int rc; | 264 | int rc; |
@@ -274,11 +274,6 @@ static int jffs2_check_acl(struct inode *inode, int mask) | |||
274 | return -EAGAIN; | 274 | return -EAGAIN; |
275 | } | 275 | } |
276 | 276 | ||
277 | int jffs2_permission(struct inode *inode, int mask) | ||
278 | { | ||
279 | return generic_permission(inode, mask, jffs2_check_acl); | ||
280 | } | ||
281 | |||
282 | int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) | 277 | int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) |
283 | { | 278 | { |
284 | struct posix_acl *acl, *clone; | 279 | struct posix_acl *acl, *clone; |
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index fc929f2a14f6..f0ba63e3c36b 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h | |||
@@ -26,7 +26,7 @@ struct jffs2_acl_header { | |||
26 | 26 | ||
27 | #ifdef CONFIG_JFFS2_FS_POSIX_ACL | 27 | #ifdef CONFIG_JFFS2_FS_POSIX_ACL |
28 | 28 | ||
29 | extern int jffs2_permission(struct inode *, int); | 29 | extern int jffs2_check_acl(struct inode *, int); |
30 | extern int jffs2_acl_chmod(struct inode *); | 30 | extern int jffs2_acl_chmod(struct inode *); |
31 | extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); | 31 | extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); |
32 | extern int jffs2_init_acl_post(struct inode *); | 32 | extern int jffs2_init_acl_post(struct inode *); |
@@ -36,7 +36,7 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler; | |||
36 | 36 | ||
37 | #else | 37 | #else |
38 | 38 | ||
39 | #define jffs2_permission (NULL) | 39 | #define jffs2_check_acl (NULL) |
40 | #define jffs2_acl_chmod(inode) (0) | 40 | #define jffs2_acl_chmod(inode) (0) |
41 | #define jffs2_init_acl_pre(dir_i,inode,mode) (0) | 41 | #define jffs2_init_acl_pre(dir_i,inode,mode) (0) |
42 | #define jffs2_init_acl_post(inode) (0) | 42 | #define jffs2_init_acl_post(inode) (0) |
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 6f60cc910f4c..7aa4417e085f 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c | |||
@@ -55,7 +55,7 @@ const struct inode_operations jffs2_dir_inode_operations = | |||
55 | .rmdir = jffs2_rmdir, | 55 | .rmdir = jffs2_rmdir, |
56 | .mknod = jffs2_mknod, | 56 | .mknod = jffs2_mknod, |
57 | .rename = jffs2_rename, | 57 | .rename = jffs2_rename, |
58 | .permission = jffs2_permission, | 58 | .check_acl = jffs2_check_acl, |
59 | .setattr = jffs2_setattr, | 59 | .setattr = jffs2_setattr, |
60 | .setxattr = jffs2_setxattr, | 60 | .setxattr = jffs2_setxattr, |
61 | .getxattr = jffs2_getxattr, | 61 | .getxattr = jffs2_getxattr, |
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 5edc2bf20581..b7b74e299142 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c | |||
@@ -56,7 +56,7 @@ const struct file_operations jffs2_file_operations = | |||
56 | 56 | ||
57 | const struct inode_operations jffs2_file_inode_operations = | 57 | const struct inode_operations jffs2_file_inode_operations = |
58 | { | 58 | { |
59 | .permission = jffs2_permission, | 59 | .check_acl = jffs2_check_acl, |
60 | .setattr = jffs2_setattr, | 60 | .setattr = jffs2_setattr, |
61 | .setxattr = jffs2_setxattr, | 61 | .setxattr = jffs2_setxattr, |
62 | .getxattr = jffs2_getxattr, | 62 | .getxattr = jffs2_getxattr, |
@@ -99,7 +99,7 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg) | |||
99 | kunmap(pg); | 99 | kunmap(pg); |
100 | 100 | ||
101 | D2(printk(KERN_DEBUG "readpage finished\n")); | 101 | D2(printk(KERN_DEBUG "readpage finished\n")); |
102 | return 0; | 102 | return ret; |
103 | } | 103 | } |
104 | 104 | ||
105 | int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg) | 105 | int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg) |
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c index b7339c3b6ad9..4ec11e8bda8c 100644 --- a/fs/jffs2/symlink.c +++ b/fs/jffs2/symlink.c | |||
@@ -21,7 +21,7 @@ const struct inode_operations jffs2_symlink_inode_operations = | |||
21 | { | 21 | { |
22 | .readlink = generic_readlink, | 22 | .readlink = generic_readlink, |
23 | .follow_link = jffs2_follow_link, | 23 | .follow_link = jffs2_follow_link, |
24 | .permission = jffs2_permission, | 24 | .check_acl = jffs2_check_acl, |
25 | .setattr = jffs2_setattr, | 25 | .setattr = jffs2_setattr, |
26 | .setxattr = jffs2_setxattr, | 26 | .setxattr = jffs2_setxattr, |
27 | .getxattr = jffs2_getxattr, | 27 | .getxattr = jffs2_getxattr, |
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index d9a721e6db70..5ef7bac265e5 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c | |||
@@ -1268,10 +1268,20 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) { | |||
1268 | if (!c->wbuf) | 1268 | if (!c->wbuf) |
1269 | return -ENOMEM; | 1269 | return -ENOMEM; |
1270 | 1270 | ||
1271 | #ifdef CONFIG_JFFS2_FS_WBUF_VERIFY | ||
1272 | c->wbuf_verify = kmalloc(c->wbuf_pagesize, GFP_KERNEL); | ||
1273 | if (!c->wbuf_verify) { | ||
1274 | kfree(c->wbuf); | ||
1275 | return -ENOMEM; | ||
1276 | } | ||
1277 | #endif | ||
1271 | return 0; | 1278 | return 0; |
1272 | } | 1279 | } |
1273 | 1280 | ||
1274 | void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c) { | 1281 | void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c) { |
1282 | #ifdef CONFIG_JFFS2_FS_WBUF_VERIFY | ||
1283 | kfree(c->wbuf_verify); | ||
1284 | #endif | ||
1275 | kfree(c->wbuf); | 1285 | kfree(c->wbuf); |
1276 | } | 1286 | } |
1277 | 1287 | ||
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 91fa3ad6e8c2..d66477c34306 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c | |||
@@ -67,10 +67,8 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type) | |||
67 | acl = posix_acl_from_xattr(value, size); | 67 | acl = posix_acl_from_xattr(value, size); |
68 | } | 68 | } |
69 | kfree(value); | 69 | kfree(value); |
70 | if (!IS_ERR(acl)) { | 70 | if (!IS_ERR(acl)) |
71 | set_cached_acl(inode, type, acl); | 71 | set_cached_acl(inode, type, acl); |
72 | posix_acl_release(acl); | ||
73 | } | ||
74 | return acl; | 72 | return acl; |
75 | } | 73 | } |
76 | 74 | ||
@@ -116,7 +114,7 @@ out: | |||
116 | return rc; | 114 | return rc; |
117 | } | 115 | } |
118 | 116 | ||
119 | static int jfs_check_acl(struct inode *inode, int mask) | 117 | int jfs_check_acl(struct inode *inode, int mask) |
120 | { | 118 | { |
121 | struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); | 119 | struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); |
122 | 120 | ||
@@ -131,11 +129,6 @@ static int jfs_check_acl(struct inode *inode, int mask) | |||
131 | return -EAGAIN; | 129 | return -EAGAIN; |
132 | } | 130 | } |
133 | 131 | ||
134 | int jfs_permission(struct inode *inode, int mask) | ||
135 | { | ||
136 | return generic_permission(inode, mask, jfs_check_acl); | ||
137 | } | ||
138 | |||
139 | int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) | 132 | int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) |
140 | { | 133 | { |
141 | struct posix_acl *acl = NULL; | 134 | struct posix_acl *acl = NULL; |
diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 7f6063acaa3b..2b70fa78e4a7 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c | |||
@@ -96,7 +96,7 @@ const struct inode_operations jfs_file_inode_operations = { | |||
96 | .removexattr = jfs_removexattr, | 96 | .removexattr = jfs_removexattr, |
97 | #ifdef CONFIG_JFS_POSIX_ACL | 97 | #ifdef CONFIG_JFS_POSIX_ACL |
98 | .setattr = jfs_setattr, | 98 | .setattr = jfs_setattr, |
99 | .permission = jfs_permission, | 99 | .check_acl = jfs_check_acl, |
100 | #endif | 100 | #endif |
101 | }; | 101 | }; |
102 | 102 | ||
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index 88475f10a389..b07bd417ef85 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h | |||
@@ -20,7 +20,7 @@ | |||
20 | 20 | ||
21 | #ifdef CONFIG_JFS_POSIX_ACL | 21 | #ifdef CONFIG_JFS_POSIX_ACL |
22 | 22 | ||
23 | int jfs_permission(struct inode *, int); | 23 | int jfs_check_acl(struct inode *, int); |
24 | int jfs_init_acl(tid_t, struct inode *, struct inode *); | 24 | int jfs_init_acl(tid_t, struct inode *, struct inode *); |
25 | int jfs_setattr(struct dentry *, struct iattr *); | 25 | int jfs_setattr(struct dentry *, struct iattr *); |
26 | 26 | ||
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 514ee2edb92a..c79a4270f083 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -1543,7 +1543,7 @@ const struct inode_operations jfs_dir_inode_operations = { | |||
1543 | .removexattr = jfs_removexattr, | 1543 | .removexattr = jfs_removexattr, |
1544 | #ifdef CONFIG_JFS_POSIX_ACL | 1544 | #ifdef CONFIG_JFS_POSIX_ACL |
1545 | .setattr = jfs_setattr, | 1545 | .setattr = jfs_setattr, |
1546 | .permission = jfs_permission, | 1546 | .check_acl = jfs_check_acl, |
1547 | #endif | 1547 | #endif |
1548 | }; | 1548 | }; |
1549 | 1549 | ||
diff --git a/fs/libfs.c b/fs/libfs.c index ddfa89948c3f..dcec3d3ea64f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -217,7 +217,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, | |||
217 | return PTR_ERR(s); | 217 | return PTR_ERR(s); |
218 | 218 | ||
219 | s->s_flags = MS_NOUSER; | 219 | s->s_flags = MS_NOUSER; |
220 | s->s_maxbytes = ~0ULL; | 220 | s->s_maxbytes = MAX_LFS_FILESIZE; |
221 | s->s_blocksize = PAGE_SIZE; | 221 | s->s_blocksize = PAGE_SIZE; |
222 | s->s_blocksize_bits = PAGE_SHIFT; | 222 | s->s_blocksize_bits = PAGE_SHIFT; |
223 | s->s_magic = magic; | 223 | s->s_magic = magic; |
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 99d737bd4325..7cb076ac6b45 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
@@ -87,18 +87,6 @@ static unsigned int nlm_hash_address(const struct sockaddr *sap) | |||
87 | return hash & (NLM_HOST_NRHASH - 1); | 87 | return hash & (NLM_HOST_NRHASH - 1); |
88 | } | 88 | } |
89 | 89 | ||
90 | static void nlm_clear_port(struct sockaddr *sap) | ||
91 | { | ||
92 | switch (sap->sa_family) { | ||
93 | case AF_INET: | ||
94 | ((struct sockaddr_in *)sap)->sin_port = 0; | ||
95 | break; | ||
96 | case AF_INET6: | ||
97 | ((struct sockaddr_in6 *)sap)->sin6_port = 0; | ||
98 | break; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | /* | 90 | /* |
103 | * Common host lookup routine for server & client | 91 | * Common host lookup routine for server & client |
104 | */ | 92 | */ |
@@ -177,7 +165,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) | |||
177 | host->h_addrbuf = nsm->sm_addrbuf; | 165 | host->h_addrbuf = nsm->sm_addrbuf; |
178 | memcpy(nlm_addr(host), ni->sap, ni->salen); | 166 | memcpy(nlm_addr(host), ni->sap, ni->salen); |
179 | host->h_addrlen = ni->salen; | 167 | host->h_addrlen = ni->salen; |
180 | nlm_clear_port(nlm_addr(host)); | 168 | rpc_set_port(nlm_addr(host), 0); |
181 | memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); | 169 | memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); |
182 | host->h_version = ni->version; | 170 | host->h_version = ni->version; |
183 | host->h_proto = ni->protocol; | 171 | host->h_proto = ni->protocol; |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 7fce1b525849..30c933188dd7 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -61,43 +61,6 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) | |||
61 | return (struct sockaddr *)&nsm->sm_addr; | 61 | return (struct sockaddr *)&nsm->sm_addr; |
62 | } | 62 | } |
63 | 63 | ||
64 | static void nsm_display_ipv4_address(const struct sockaddr *sap, char *buf, | ||
65 | const size_t len) | ||
66 | { | ||
67 | const struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
68 | snprintf(buf, len, "%pI4", &sin->sin_addr.s_addr); | ||
69 | } | ||
70 | |||
71 | static void nsm_display_ipv6_address(const struct sockaddr *sap, char *buf, | ||
72 | const size_t len) | ||
73 | { | ||
74 | const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
75 | |||
76 | if (ipv6_addr_v4mapped(&sin6->sin6_addr)) | ||
77 | snprintf(buf, len, "%pI4", &sin6->sin6_addr.s6_addr32[3]); | ||
78 | else if (sin6->sin6_scope_id != 0) | ||
79 | snprintf(buf, len, "%pI6%%%u", &sin6->sin6_addr, | ||
80 | sin6->sin6_scope_id); | ||
81 | else | ||
82 | snprintf(buf, len, "%pI6", &sin6->sin6_addr); | ||
83 | } | ||
84 | |||
85 | static void nsm_display_address(const struct sockaddr *sap, | ||
86 | char *buf, const size_t len) | ||
87 | { | ||
88 | switch (sap->sa_family) { | ||
89 | case AF_INET: | ||
90 | nsm_display_ipv4_address(sap, buf, len); | ||
91 | break; | ||
92 | case AF_INET6: | ||
93 | nsm_display_ipv6_address(sap, buf, len); | ||
94 | break; | ||
95 | default: | ||
96 | snprintf(buf, len, "unsupported address family"); | ||
97 | break; | ||
98 | } | ||
99 | } | ||
100 | |||
101 | static struct rpc_clnt *nsm_create(void) | 64 | static struct rpc_clnt *nsm_create(void) |
102 | { | 65 | { |
103 | struct sockaddr_in sin = { | 66 | struct sockaddr_in sin = { |
@@ -307,8 +270,11 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap, | |||
307 | memcpy(nsm_addr(new), sap, salen); | 270 | memcpy(nsm_addr(new), sap, salen); |
308 | new->sm_addrlen = salen; | 271 | new->sm_addrlen = salen; |
309 | nsm_init_private(new); | 272 | nsm_init_private(new); |
310 | nsm_display_address((const struct sockaddr *)&new->sm_addr, | 273 | |
311 | new->sm_addrbuf, sizeof(new->sm_addrbuf)); | 274 | if (rpc_ntop(nsm_addr(new), new->sm_addrbuf, |
275 | sizeof(new->sm_addrbuf)) == 0) | ||
276 | (void)snprintf(new->sm_addrbuf, sizeof(new->sm_addrbuf), | ||
277 | "unsupported address family"); | ||
312 | memcpy(new->sm_name, hostname, hostname_len); | 278 | memcpy(new->sm_name, hostname, hostname_len); |
313 | new->sm_name[hostname_len] = '\0'; | 279 | new->sm_name[hostname_len] = '\0'; |
314 | 280 | ||
diff --git a/fs/locks.c b/fs/locks.c index b6440f52178f..19ee18a6829b 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -768,7 +768,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
768 | * give it the opportunity to lock the file. | 768 | * give it the opportunity to lock the file. |
769 | */ | 769 | */ |
770 | if (found) | 770 | if (found) |
771 | cond_resched_bkl(); | 771 | cond_resched(); |
772 | 772 | ||
773 | find_conflict: | 773 | find_conflict: |
774 | for_each_lock(inode, before) { | 774 | for_each_lock(inode, before) { |
@@ -1591,7 +1591,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) | |||
1591 | if (can_sleep) | 1591 | if (can_sleep) |
1592 | lock->fl_flags |= FL_SLEEP; | 1592 | lock->fl_flags |= FL_SLEEP; |
1593 | 1593 | ||
1594 | error = security_file_lock(filp, cmd); | 1594 | error = security_file_lock(filp, lock->fl_type); |
1595 | if (error) | 1595 | if (error) |
1596 | goto out_free; | 1596 | goto out_free; |
1597 | 1597 | ||
diff --git a/fs/namei.c b/fs/namei.c index f3c5b278895a..d11f404667e9 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -169,19 +169,10 @@ void putname(const char *name) | |||
169 | EXPORT_SYMBOL(putname); | 169 | EXPORT_SYMBOL(putname); |
170 | #endif | 170 | #endif |
171 | 171 | ||
172 | 172 | /* | |
173 | /** | 173 | * This does basic POSIX ACL permission checking |
174 | * generic_permission - check for access rights on a Posix-like filesystem | ||
175 | * @inode: inode to check access rights for | ||
176 | * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) | ||
177 | * @check_acl: optional callback to check for Posix ACLs | ||
178 | * | ||
179 | * Used to check for read/write/execute permissions on a file. | ||
180 | * We use "fsuid" for this, letting us set arbitrary permissions | ||
181 | * for filesystem access without changing the "normal" uids which | ||
182 | * are used for other things.. | ||
183 | */ | 174 | */ |
184 | int generic_permission(struct inode *inode, int mask, | 175 | static int acl_permission_check(struct inode *inode, int mask, |
185 | int (*check_acl)(struct inode *inode, int mask)) | 176 | int (*check_acl)(struct inode *inode, int mask)) |
186 | { | 177 | { |
187 | umode_t mode = inode->i_mode; | 178 | umode_t mode = inode->i_mode; |
@@ -193,9 +184,7 @@ int generic_permission(struct inode *inode, int mask, | |||
193 | else { | 184 | else { |
194 | if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { | 185 | if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { |
195 | int error = check_acl(inode, mask); | 186 | int error = check_acl(inode, mask); |
196 | if (error == -EACCES) | 187 | if (error != -EAGAIN) |
197 | goto check_capabilities; | ||
198 | else if (error != -EAGAIN) | ||
199 | return error; | 188 | return error; |
200 | } | 189 | } |
201 | 190 | ||
@@ -208,8 +197,32 @@ int generic_permission(struct inode *inode, int mask, | |||
208 | */ | 197 | */ |
209 | if ((mask & ~mode) == 0) | 198 | if ((mask & ~mode) == 0) |
210 | return 0; | 199 | return 0; |
200 | return -EACCES; | ||
201 | } | ||
202 | |||
203 | /** | ||
204 | * generic_permission - check for access rights on a Posix-like filesystem | ||
205 | * @inode: inode to check access rights for | ||
206 | * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) | ||
207 | * @check_acl: optional callback to check for Posix ACLs | ||
208 | * | ||
209 | * Used to check for read/write/execute permissions on a file. | ||
210 | * We use "fsuid" for this, letting us set arbitrary permissions | ||
211 | * for filesystem access without changing the "normal" uids which | ||
212 | * are used for other things.. | ||
213 | */ | ||
214 | int generic_permission(struct inode *inode, int mask, | ||
215 | int (*check_acl)(struct inode *inode, int mask)) | ||
216 | { | ||
217 | int ret; | ||
218 | |||
219 | /* | ||
220 | * Do the basic POSIX ACL permission checks. | ||
221 | */ | ||
222 | ret = acl_permission_check(inode, mask, check_acl); | ||
223 | if (ret != -EACCES) | ||
224 | return ret; | ||
211 | 225 | ||
212 | check_capabilities: | ||
213 | /* | 226 | /* |
214 | * Read/write DACs are always overridable. | 227 | * Read/write DACs are always overridable. |
215 | * Executable DACs are overridable if at least one exec bit is set. | 228 | * Executable DACs are overridable if at least one exec bit is set. |
@@ -262,7 +275,7 @@ int inode_permission(struct inode *inode, int mask) | |||
262 | if (inode->i_op->permission) | 275 | if (inode->i_op->permission) |
263 | retval = inode->i_op->permission(inode, mask); | 276 | retval = inode->i_op->permission(inode, mask); |
264 | else | 277 | else |
265 | retval = generic_permission(inode, mask, NULL); | 278 | retval = generic_permission(inode, mask, inode->i_op->check_acl); |
266 | 279 | ||
267 | if (retval) | 280 | if (retval) |
268 | return retval; | 281 | return retval; |
@@ -432,29 +445,22 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, | |||
432 | */ | 445 | */ |
433 | static int exec_permission_lite(struct inode *inode) | 446 | static int exec_permission_lite(struct inode *inode) |
434 | { | 447 | { |
435 | umode_t mode = inode->i_mode; | 448 | int ret; |
436 | 449 | ||
437 | if (inode->i_op->permission) | 450 | if (inode->i_op->permission) { |
438 | return -EAGAIN; | 451 | ret = inode->i_op->permission(inode, MAY_EXEC); |
439 | 452 | if (!ret) | |
440 | if (current_fsuid() == inode->i_uid) | 453 | goto ok; |
441 | mode >>= 6; | 454 | return ret; |
442 | else if (in_group_p(inode->i_gid)) | 455 | } |
443 | mode >>= 3; | 456 | ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl); |
444 | 457 | if (!ret) | |
445 | if (mode & MAY_EXEC) | ||
446 | goto ok; | ||
447 | |||
448 | if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE)) | ||
449 | goto ok; | ||
450 | |||
451 | if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE)) | ||
452 | goto ok; | 458 | goto ok; |
453 | 459 | ||
454 | if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) | 460 | if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) |
455 | goto ok; | 461 | goto ok; |
456 | 462 | ||
457 | return -EACCES; | 463 | return ret; |
458 | ok: | 464 | ok: |
459 | return security_inode_permission(inode, MAY_EXEC); | 465 | return security_inode_permission(inode, MAY_EXEC); |
460 | } | 466 | } |
@@ -853,12 +859,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd) | |||
853 | 859 | ||
854 | nd->flags |= LOOKUP_CONTINUE; | 860 | nd->flags |= LOOKUP_CONTINUE; |
855 | err = exec_permission_lite(inode); | 861 | err = exec_permission_lite(inode); |
856 | if (err == -EAGAIN) | ||
857 | err = inode_permission(nd->path.dentry->d_inode, | ||
858 | MAY_EXEC); | ||
859 | if (!err) | ||
860 | err = ima_path_check(&nd->path, MAY_EXEC, | ||
861 | IMA_COUNT_UPDATE); | ||
862 | if (err) | 862 | if (err) |
863 | break; | 863 | break; |
864 | 864 | ||
@@ -1533,37 +1533,42 @@ int may_open(struct path *path, int acc_mode, int flag) | |||
1533 | if (error) | 1533 | if (error) |
1534 | return error; | 1534 | return error; |
1535 | 1535 | ||
1536 | error = ima_path_check(path, | 1536 | error = ima_path_check(path, acc_mode ? |
1537 | acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC), | 1537 | acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) : |
1538 | ACC_MODE(flag) & (MAY_READ | MAY_WRITE), | ||
1538 | IMA_COUNT_UPDATE); | 1539 | IMA_COUNT_UPDATE); |
1540 | |||
1539 | if (error) | 1541 | if (error) |
1540 | return error; | 1542 | return error; |
1541 | /* | 1543 | /* |
1542 | * An append-only file must be opened in append mode for writing. | 1544 | * An append-only file must be opened in append mode for writing. |
1543 | */ | 1545 | */ |
1544 | if (IS_APPEND(inode)) { | 1546 | if (IS_APPEND(inode)) { |
1547 | error = -EPERM; | ||
1545 | if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) | 1548 | if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) |
1546 | return -EPERM; | 1549 | goto err_out; |
1547 | if (flag & O_TRUNC) | 1550 | if (flag & O_TRUNC) |
1548 | return -EPERM; | 1551 | goto err_out; |
1549 | } | 1552 | } |
1550 | 1553 | ||
1551 | /* O_NOATIME can only be set by the owner or superuser */ | 1554 | /* O_NOATIME can only be set by the owner or superuser */ |
1552 | if (flag & O_NOATIME) | 1555 | if (flag & O_NOATIME) |
1553 | if (!is_owner_or_cap(inode)) | 1556 | if (!is_owner_or_cap(inode)) { |
1554 | return -EPERM; | 1557 | error = -EPERM; |
1558 | goto err_out; | ||
1559 | } | ||
1555 | 1560 | ||
1556 | /* | 1561 | /* |
1557 | * Ensure there are no outstanding leases on the file. | 1562 | * Ensure there are no outstanding leases on the file. |
1558 | */ | 1563 | */ |
1559 | error = break_lease(inode, flag); | 1564 | error = break_lease(inode, flag); |
1560 | if (error) | 1565 | if (error) |
1561 | return error; | 1566 | goto err_out; |
1562 | 1567 | ||
1563 | if (flag & O_TRUNC) { | 1568 | if (flag & O_TRUNC) { |
1564 | error = get_write_access(inode); | 1569 | error = get_write_access(inode); |
1565 | if (error) | 1570 | if (error) |
1566 | return error; | 1571 | goto err_out; |
1567 | 1572 | ||
1568 | /* | 1573 | /* |
1569 | * Refuse to truncate files with mandatory locks held on them. | 1574 | * Refuse to truncate files with mandatory locks held on them. |
@@ -1581,12 +1586,17 @@ int may_open(struct path *path, int acc_mode, int flag) | |||
1581 | } | 1586 | } |
1582 | put_write_access(inode); | 1587 | put_write_access(inode); |
1583 | if (error) | 1588 | if (error) |
1584 | return error; | 1589 | goto err_out; |
1585 | } else | 1590 | } else |
1586 | if (flag & FMODE_WRITE) | 1591 | if (flag & FMODE_WRITE) |
1587 | vfs_dq_init(inode); | 1592 | vfs_dq_init(inode); |
1588 | 1593 | ||
1589 | return 0; | 1594 | return 0; |
1595 | err_out: | ||
1596 | ima_counts_put(path, acc_mode ? | ||
1597 | acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) : | ||
1598 | ACC_MODE(flag) & (MAY_READ | MAY_WRITE)); | ||
1599 | return error; | ||
1590 | } | 1600 | } |
1591 | 1601 | ||
1592 | /* | 1602 | /* |
diff --git a/fs/namespace.c b/fs/namespace.c index 277c28a63ead..7230787d18b0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -316,7 +316,8 @@ EXPORT_SYMBOL_GPL(mnt_clone_write); | |||
316 | */ | 316 | */ |
317 | int mnt_want_write_file(struct file *file) | 317 | int mnt_want_write_file(struct file *file) |
318 | { | 318 | { |
319 | if (!(file->f_mode & FMODE_WRITE)) | 319 | struct inode *inode = file->f_dentry->d_inode; |
320 | if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) | ||
320 | return mnt_want_write(file->f_path.mnt); | 321 | return mnt_want_write(file->f_path.mnt); |
321 | else | 322 | else |
322 | return mnt_clone_write(file->f_path.mnt); | 323 | return mnt_clone_write(file->f_path.mnt); |
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 845159814de2..da7fda639eac 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -6,7 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o | |||
6 | 6 | ||
7 | nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ | 7 | nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ |
8 | direct.o pagelist.o proc.o read.o symlink.o unlink.o \ | 8 | direct.o pagelist.o proc.o read.o symlink.o unlink.o \ |
9 | write.o namespace.o mount_clnt.o | 9 | write.o namespace.o mount_clnt.o \ |
10 | dns_resolve.o cache_lib.o | ||
10 | nfs-$(CONFIG_ROOT_NFS) += nfsroot.o | 11 | nfs-$(CONFIG_ROOT_NFS) += nfsroot.o |
11 | nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o | 12 | nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o |
12 | nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o | 13 | nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o |
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c new file mode 100644 index 000000000000..b4ffd0146ea6 --- /dev/null +++ b/fs/nfs/cache_lib.c | |||
@@ -0,0 +1,140 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/cache_lib.c | ||
3 | * | ||
4 | * Helper routines for the NFS client caches | ||
5 | * | ||
6 | * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com> | ||
7 | */ | ||
8 | #include <linux/kmod.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/moduleparam.h> | ||
11 | #include <linux/mount.h> | ||
12 | #include <linux/namei.h> | ||
13 | #include <linux/sunrpc/cache.h> | ||
14 | #include <linux/sunrpc/rpc_pipe_fs.h> | ||
15 | |||
16 | #include "cache_lib.h" | ||
17 | |||
18 | #define NFS_CACHE_UPCALL_PATHLEN 256 | ||
19 | #define NFS_CACHE_UPCALL_TIMEOUT 15 | ||
20 | |||
21 | static char nfs_cache_getent_prog[NFS_CACHE_UPCALL_PATHLEN] = | ||
22 | "/sbin/nfs_cache_getent"; | ||
23 | static unsigned long nfs_cache_getent_timeout = NFS_CACHE_UPCALL_TIMEOUT; | ||
24 | |||
25 | module_param_string(cache_getent, nfs_cache_getent_prog, | ||
26 | sizeof(nfs_cache_getent_prog), 0600); | ||
27 | MODULE_PARM_DESC(cache_getent, "Path to the client cache upcall program"); | ||
28 | module_param_named(cache_getent_timeout, nfs_cache_getent_timeout, ulong, 0600); | ||
29 | MODULE_PARM_DESC(cache_getent_timeout, "Timeout (in seconds) after which " | ||
30 | "the cache upcall is assumed to have failed"); | ||
31 | |||
32 | int nfs_cache_upcall(struct cache_detail *cd, char *entry_name) | ||
33 | { | ||
34 | static char *envp[] = { "HOME=/", | ||
35 | "TERM=linux", | ||
36 | "PATH=/sbin:/usr/sbin:/bin:/usr/bin", | ||
37 | NULL | ||
38 | }; | ||
39 | char *argv[] = { | ||
40 | nfs_cache_getent_prog, | ||
41 | cd->name, | ||
42 | entry_name, | ||
43 | NULL | ||
44 | }; | ||
45 | int ret = -EACCES; | ||
46 | |||
47 | if (nfs_cache_getent_prog[0] == '\0') | ||
48 | goto out; | ||
49 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); | ||
50 | /* | ||
51 | * Disable the upcall mechanism if we're getting an ENOENT or | ||
52 | * EACCES error. The admin can re-enable it on the fly by using | ||
53 | * sysfs to set the 'cache_getent' parameter once the problem | ||
54 | * has been fixed. | ||
55 | */ | ||
56 | if (ret == -ENOENT || ret == -EACCES) | ||
57 | nfs_cache_getent_prog[0] = '\0'; | ||
58 | out: | ||
59 | return ret > 0 ? 0 : ret; | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * Deferred request handling | ||
64 | */ | ||
65 | void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq) | ||
66 | { | ||
67 | if (atomic_dec_and_test(&dreq->count)) | ||
68 | kfree(dreq); | ||
69 | } | ||
70 | |||
71 | static void nfs_dns_cache_revisit(struct cache_deferred_req *d, int toomany) | ||
72 | { | ||
73 | struct nfs_cache_defer_req *dreq; | ||
74 | |||
75 | dreq = container_of(d, struct nfs_cache_defer_req, deferred_req); | ||
76 | |||
77 | complete_all(&dreq->completion); | ||
78 | nfs_cache_defer_req_put(dreq); | ||
79 | } | ||
80 | |||
81 | static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req) | ||
82 | { | ||
83 | struct nfs_cache_defer_req *dreq; | ||
84 | |||
85 | dreq = container_of(req, struct nfs_cache_defer_req, req); | ||
86 | dreq->deferred_req.revisit = nfs_dns_cache_revisit; | ||
87 | atomic_inc(&dreq->count); | ||
88 | |||
89 | return &dreq->deferred_req; | ||
90 | } | ||
91 | |||
92 | struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void) | ||
93 | { | ||
94 | struct nfs_cache_defer_req *dreq; | ||
95 | |||
96 | dreq = kzalloc(sizeof(*dreq), GFP_KERNEL); | ||
97 | if (dreq) { | ||
98 | init_completion(&dreq->completion); | ||
99 | atomic_set(&dreq->count, 1); | ||
100 | dreq->req.defer = nfs_dns_cache_defer; | ||
101 | } | ||
102 | return dreq; | ||
103 | } | ||
104 | |||
105 | int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq) | ||
106 | { | ||
107 | if (wait_for_completion_timeout(&dreq->completion, | ||
108 | nfs_cache_getent_timeout * HZ) == 0) | ||
109 | return -ETIMEDOUT; | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | int nfs_cache_register(struct cache_detail *cd) | ||
114 | { | ||
115 | struct nameidata nd; | ||
116 | struct vfsmount *mnt; | ||
117 | int ret; | ||
118 | |||
119 | mnt = rpc_get_mount(); | ||
120 | if (IS_ERR(mnt)) | ||
121 | return PTR_ERR(mnt); | ||
122 | ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &nd); | ||
123 | if (ret) | ||
124 | goto err; | ||
125 | ret = sunrpc_cache_register_pipefs(nd.path.dentry, | ||
126 | cd->name, 0600, cd); | ||
127 | path_put(&nd.path); | ||
128 | if (!ret) | ||
129 | return ret; | ||
130 | err: | ||
131 | rpc_put_mount(); | ||
132 | return ret; | ||
133 | } | ||
134 | |||
135 | void nfs_cache_unregister(struct cache_detail *cd) | ||
136 | { | ||
137 | sunrpc_cache_unregister_pipefs(cd); | ||
138 | rpc_put_mount(); | ||
139 | } | ||
140 | |||
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h new file mode 100644 index 000000000000..76f856e284e4 --- /dev/null +++ b/fs/nfs/cache_lib.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * Helper routines for the NFS client caches | ||
3 | * | ||
4 | * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com> | ||
5 | */ | ||
6 | |||
7 | #include <linux/completion.h> | ||
8 | #include <linux/sunrpc/cache.h> | ||
9 | #include <asm/atomic.h> | ||
10 | |||
11 | /* | ||
12 | * Deferred request handling | ||
13 | */ | ||
14 | struct nfs_cache_defer_req { | ||
15 | struct cache_req req; | ||
16 | struct cache_deferred_req deferred_req; | ||
17 | struct completion completion; | ||
18 | atomic_t count; | ||
19 | }; | ||
20 | |||
21 | extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name); | ||
22 | extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void); | ||
23 | extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); | ||
24 | extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); | ||
25 | |||
26 | extern int nfs_cache_register(struct cache_detail *cd); | ||
27 | extern void nfs_cache_unregister(struct cache_detail *cd); | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 7f604c7941fb..293fa0528a6e 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -43,21 +43,29 @@ static struct svc_program nfs4_callback_program; | |||
43 | unsigned int nfs_callback_set_tcpport; | 43 | unsigned int nfs_callback_set_tcpport; |
44 | unsigned short nfs_callback_tcpport; | 44 | unsigned short nfs_callback_tcpport; |
45 | unsigned short nfs_callback_tcpport6; | 45 | unsigned short nfs_callback_tcpport6; |
46 | static const int nfs_set_port_min = 0; | 46 | #define NFS_CALLBACK_MAXPORTNR (65535U) |
47 | static const int nfs_set_port_max = 65535; | ||
48 | 47 | ||
49 | static int param_set_port(const char *val, struct kernel_param *kp) | 48 | static int param_set_portnr(const char *val, struct kernel_param *kp) |
50 | { | 49 | { |
51 | char *endp; | 50 | unsigned long num; |
52 | int num = simple_strtol(val, &endp, 0); | 51 | int ret; |
53 | if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) | 52 | |
53 | if (!val) | ||
54 | return -EINVAL; | ||
55 | ret = strict_strtoul(val, 0, &num); | ||
56 | if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) | ||
54 | return -EINVAL; | 57 | return -EINVAL; |
55 | *((int *)kp->arg) = num; | 58 | *((unsigned int *)kp->arg) = num; |
56 | return 0; | 59 | return 0; |
57 | } | 60 | } |
58 | 61 | ||
59 | module_param_call(callback_tcpport, param_set_port, param_get_int, | 62 | static int param_get_portnr(char *buffer, struct kernel_param *kp) |
60 | &nfs_callback_set_tcpport, 0644); | 63 | { |
64 | return param_get_uint(buffer, kp); | ||
65 | } | ||
66 | #define param_check_portnr(name, p) __param_check(name, p, unsigned int); | ||
67 | |||
68 | module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); | ||
61 | 69 | ||
62 | /* | 70 | /* |
63 | * This is the NFSv4 callback kernel thread. | 71 | * This is the NFSv4 callback kernel thread. |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8d25ccb2d51d..e350bd6a2334 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -809,6 +809,9 @@ static int nfs_init_server(struct nfs_server *server, | |||
809 | /* Initialise the client representation from the mount data */ | 809 | /* Initialise the client representation from the mount data */ |
810 | server->flags = data->flags; | 810 | server->flags = data->flags; |
811 | server->options = data->options; | 811 | server->options = data->options; |
812 | server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| | ||
813 | NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| | ||
814 | NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; | ||
812 | 815 | ||
813 | if (data->rsize) | 816 | if (data->rsize) |
814 | server->rsize = nfs_block_size(data->rsize, NULL); | 817 | server->rsize = nfs_block_size(data->rsize, NULL); |
@@ -879,6 +882,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * | |||
879 | server->rsize = NFS_MAX_FILE_IO_SIZE; | 882 | server->rsize = NFS_MAX_FILE_IO_SIZE; |
880 | server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 883 | server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
881 | 884 | ||
885 | server->backing_dev_info.name = "nfs"; | ||
882 | server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; | 886 | server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; |
883 | 887 | ||
884 | if (server->wsize > max_rpc_payload) | 888 | if (server->wsize > max_rpc_payload) |
@@ -1074,10 +1078,6 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, | |||
1074 | (unsigned long long) server->fsid.major, | 1078 | (unsigned long long) server->fsid.major, |
1075 | (unsigned long long) server->fsid.minor); | 1079 | (unsigned long long) server->fsid.minor); |
1076 | 1080 | ||
1077 | BUG_ON(!server->nfs_client); | ||
1078 | BUG_ON(!server->nfs_client->rpc_ops); | ||
1079 | BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); | ||
1080 | |||
1081 | spin_lock(&nfs_client_lock); | 1081 | spin_lock(&nfs_client_lock); |
1082 | list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); | 1082 | list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); |
1083 | list_add_tail(&server->master_link, &nfs_volume_list); | 1083 | list_add_tail(&server->master_link, &nfs_volume_list); |
@@ -1274,7 +1274,7 @@ static int nfs4_init_server(struct nfs_server *server, | |||
1274 | 1274 | ||
1275 | /* Initialise the client representation from the mount data */ | 1275 | /* Initialise the client representation from the mount data */ |
1276 | server->flags = data->flags; | 1276 | server->flags = data->flags; |
1277 | server->caps |= NFS_CAP_ATOMIC_OPEN; | 1277 | server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; |
1278 | server->options = data->options; | 1278 | server->options = data->options; |
1279 | 1279 | ||
1280 | /* Get a client record */ | 1280 | /* Get a client record */ |
@@ -1359,10 +1359,6 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, | |||
1359 | if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) | 1359 | if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) |
1360 | server->namelen = NFS4_MAXNAMLEN; | 1360 | server->namelen = NFS4_MAXNAMLEN; |
1361 | 1361 | ||
1362 | BUG_ON(!server->nfs_client); | ||
1363 | BUG_ON(!server->nfs_client->rpc_ops); | ||
1364 | BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); | ||
1365 | |||
1366 | spin_lock(&nfs_client_lock); | 1362 | spin_lock(&nfs_client_lock); |
1367 | list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); | 1363 | list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); |
1368 | list_add_tail(&server->master_link, &nfs_volume_list); | 1364 | list_add_tail(&server->master_link, &nfs_volume_list); |
@@ -1400,7 +1396,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, | |||
1400 | 1396 | ||
1401 | /* Initialise the client representation from the parent server */ | 1397 | /* Initialise the client representation from the parent server */ |
1402 | nfs_server_copy_userdata(server, parent_server); | 1398 | nfs_server_copy_userdata(server, parent_server); |
1403 | server->caps |= NFS_CAP_ATOMIC_OPEN; | 1399 | server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; |
1404 | 1400 | ||
1405 | /* Get a client representation. | 1401 | /* Get a client representation. |
1406 | * Note: NFSv4 always uses TCP, */ | 1402 | * Note: NFSv4 always uses TCP, */ |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 489fc01a3204..6c3210099d51 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -255,7 +255,7 @@ static void nfs_direct_read_release(void *calldata) | |||
255 | 255 | ||
256 | if (put_dreq(dreq)) | 256 | if (put_dreq(dreq)) |
257 | nfs_direct_complete(dreq); | 257 | nfs_direct_complete(dreq); |
258 | nfs_readdata_release(calldata); | 258 | nfs_readdata_free(data); |
259 | } | 259 | } |
260 | 260 | ||
261 | static const struct rpc_call_ops nfs_read_direct_ops = { | 261 | static const struct rpc_call_ops nfs_read_direct_ops = { |
@@ -314,14 +314,14 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, | |||
314 | data->npages, 1, 0, data->pagevec, NULL); | 314 | data->npages, 1, 0, data->pagevec, NULL); |
315 | up_read(¤t->mm->mmap_sem); | 315 | up_read(¤t->mm->mmap_sem); |
316 | if (result < 0) { | 316 | if (result < 0) { |
317 | nfs_readdata_release(data); | 317 | nfs_readdata_free(data); |
318 | break; | 318 | break; |
319 | } | 319 | } |
320 | if ((unsigned)result < data->npages) { | 320 | if ((unsigned)result < data->npages) { |
321 | bytes = result * PAGE_SIZE; | 321 | bytes = result * PAGE_SIZE; |
322 | if (bytes <= pgbase) { | 322 | if (bytes <= pgbase) { |
323 | nfs_direct_release_pages(data->pagevec, result); | 323 | nfs_direct_release_pages(data->pagevec, result); |
324 | nfs_readdata_release(data); | 324 | nfs_readdata_free(data); |
325 | break; | 325 | break; |
326 | } | 326 | } |
327 | bytes -= pgbase; | 327 | bytes -= pgbase; |
@@ -334,7 +334,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, | |||
334 | data->inode = inode; | 334 | data->inode = inode; |
335 | data->cred = msg.rpc_cred; | 335 | data->cred = msg.rpc_cred; |
336 | data->args.fh = NFS_FH(inode); | 336 | data->args.fh = NFS_FH(inode); |
337 | data->args.context = get_nfs_open_context(ctx); | 337 | data->args.context = ctx; |
338 | data->args.offset = pos; | 338 | data->args.offset = pos; |
339 | data->args.pgbase = pgbase; | 339 | data->args.pgbase = pgbase; |
340 | data->args.pages = data->pagevec; | 340 | data->args.pages = data->pagevec; |
@@ -441,7 +441,7 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) | |||
441 | struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); | 441 | struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); |
442 | list_del(&data->pages); | 442 | list_del(&data->pages); |
443 | nfs_direct_release_pages(data->pagevec, data->npages); | 443 | nfs_direct_release_pages(data->pagevec, data->npages); |
444 | nfs_writedata_release(data); | 444 | nfs_writedata_free(data); |
445 | } | 445 | } |
446 | } | 446 | } |
447 | 447 | ||
@@ -534,7 +534,7 @@ static void nfs_direct_commit_release(void *calldata) | |||
534 | 534 | ||
535 | dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); | 535 | dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); |
536 | nfs_direct_write_complete(dreq, data->inode); | 536 | nfs_direct_write_complete(dreq, data->inode); |
537 | nfs_commitdata_release(calldata); | 537 | nfs_commit_free(data); |
538 | } | 538 | } |
539 | 539 | ||
540 | static const struct rpc_call_ops nfs_commit_direct_ops = { | 540 | static const struct rpc_call_ops nfs_commit_direct_ops = { |
@@ -570,7 +570,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) | |||
570 | data->args.fh = NFS_FH(data->inode); | 570 | data->args.fh = NFS_FH(data->inode); |
571 | data->args.offset = 0; | 571 | data->args.offset = 0; |
572 | data->args.count = 0; | 572 | data->args.count = 0; |
573 | data->args.context = get_nfs_open_context(dreq->ctx); | 573 | data->args.context = dreq->ctx; |
574 | data->res.count = 0; | 574 | data->res.count = 0; |
575 | data->res.fattr = &data->fattr; | 575 | data->res.fattr = &data->fattr; |
576 | data->res.verf = &data->verf; | 576 | data->res.verf = &data->verf; |
@@ -734,14 +734,14 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, | |||
734 | data->npages, 0, 0, data->pagevec, NULL); | 734 | data->npages, 0, 0, data->pagevec, NULL); |
735 | up_read(¤t->mm->mmap_sem); | 735 | up_read(¤t->mm->mmap_sem); |
736 | if (result < 0) { | 736 | if (result < 0) { |
737 | nfs_writedata_release(data); | 737 | nfs_writedata_free(data); |
738 | break; | 738 | break; |
739 | } | 739 | } |
740 | if ((unsigned)result < data->npages) { | 740 | if ((unsigned)result < data->npages) { |
741 | bytes = result * PAGE_SIZE; | 741 | bytes = result * PAGE_SIZE; |
742 | if (bytes <= pgbase) { | 742 | if (bytes <= pgbase) { |
743 | nfs_direct_release_pages(data->pagevec, result); | 743 | nfs_direct_release_pages(data->pagevec, result); |
744 | nfs_writedata_release(data); | 744 | nfs_writedata_free(data); |
745 | break; | 745 | break; |
746 | } | 746 | } |
747 | bytes -= pgbase; | 747 | bytes -= pgbase; |
@@ -756,7 +756,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, | |||
756 | data->inode = inode; | 756 | data->inode = inode; |
757 | data->cred = msg.rpc_cred; | 757 | data->cred = msg.rpc_cred; |
758 | data->args.fh = NFS_FH(inode); | 758 | data->args.fh = NFS_FH(inode); |
759 | data->args.context = get_nfs_open_context(ctx); | 759 | data->args.context = ctx; |
760 | data->args.offset = pos; | 760 | data->args.offset = pos; |
761 | data->args.pgbase = pgbase; | 761 | data->args.pgbase = pgbase; |
762 | data->args.pages = data->pagevec; | 762 | data->args.pages = data->pagevec; |
@@ -934,9 +934,6 @@ out: | |||
934 | * back into its cache. We let the server do generic write | 934 | * back into its cache. We let the server do generic write |
935 | * parameter checking and report problems. | 935 | * parameter checking and report problems. |
936 | * | 936 | * |
937 | * We also avoid an unnecessary invocation of generic_osync_inode(), | ||
938 | * as it is fairly meaningless to sync the metadata of an NFS file. | ||
939 | * | ||
940 | * We eliminate local atime updates, see direct read above. | 937 | * We eliminate local atime updates, see direct read above. |
941 | * | 938 | * |
942 | * We avoid unnecessary page cache invalidations for normal cached | 939 | * We avoid unnecessary page cache invalidations for normal cached |
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c new file mode 100644 index 000000000000..f4d54ba97cc6 --- /dev/null +++ b/fs/nfs/dns_resolve.c | |||
@@ -0,0 +1,335 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/dns_resolve.c | ||
3 | * | ||
4 | * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com> | ||
5 | * | ||
6 | * Resolves DNS hostnames into valid ip addresses | ||
7 | */ | ||
8 | |||
9 | #include <linux/hash.h> | ||
10 | #include <linux/string.h> | ||
11 | #include <linux/kmod.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/socket.h> | ||
14 | #include <linux/seq_file.h> | ||
15 | #include <linux/inet.h> | ||
16 | #include <linux/sunrpc/clnt.h> | ||
17 | #include <linux/sunrpc/cache.h> | ||
18 | #include <linux/sunrpc/svcauth.h> | ||
19 | |||
20 | #include "dns_resolve.h" | ||
21 | #include "cache_lib.h" | ||
22 | |||
23 | #define NFS_DNS_HASHBITS 4 | ||
24 | #define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS) | ||
25 | |||
26 | static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE]; | ||
27 | |||
28 | struct nfs_dns_ent { | ||
29 | struct cache_head h; | ||
30 | |||
31 | char *hostname; | ||
32 | size_t namelen; | ||
33 | |||
34 | struct sockaddr_storage addr; | ||
35 | size_t addrlen; | ||
36 | }; | ||
37 | |||
38 | |||
39 | static void nfs_dns_ent_init(struct cache_head *cnew, | ||
40 | struct cache_head *ckey) | ||
41 | { | ||
42 | struct nfs_dns_ent *new; | ||
43 | struct nfs_dns_ent *key; | ||
44 | |||
45 | new = container_of(cnew, struct nfs_dns_ent, h); | ||
46 | key = container_of(ckey, struct nfs_dns_ent, h); | ||
47 | |||
48 | kfree(new->hostname); | ||
49 | new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL); | ||
50 | if (new->hostname) { | ||
51 | new->namelen = key->namelen; | ||
52 | memcpy(&new->addr, &key->addr, key->addrlen); | ||
53 | new->addrlen = key->addrlen; | ||
54 | } else { | ||
55 | new->namelen = 0; | ||
56 | new->addrlen = 0; | ||
57 | } | ||
58 | } | ||
59 | |||
60 | static void nfs_dns_ent_put(struct kref *ref) | ||
61 | { | ||
62 | struct nfs_dns_ent *item; | ||
63 | |||
64 | item = container_of(ref, struct nfs_dns_ent, h.ref); | ||
65 | kfree(item->hostname); | ||
66 | kfree(item); | ||
67 | } | ||
68 | |||
69 | static struct cache_head *nfs_dns_ent_alloc(void) | ||
70 | { | ||
71 | struct nfs_dns_ent *item = kmalloc(sizeof(*item), GFP_KERNEL); | ||
72 | |||
73 | if (item != NULL) { | ||
74 | item->hostname = NULL; | ||
75 | item->namelen = 0; | ||
76 | item->addrlen = 0; | ||
77 | return &item->h; | ||
78 | } | ||
79 | return NULL; | ||
80 | }; | ||
81 | |||
82 | static unsigned int nfs_dns_hash(const struct nfs_dns_ent *key) | ||
83 | { | ||
84 | return hash_str(key->hostname, NFS_DNS_HASHBITS); | ||
85 | } | ||
86 | |||
87 | static void nfs_dns_request(struct cache_detail *cd, | ||
88 | struct cache_head *ch, | ||
89 | char **bpp, int *blen) | ||
90 | { | ||
91 | struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h); | ||
92 | |||
93 | qword_add(bpp, blen, key->hostname); | ||
94 | (*bpp)[-1] = '\n'; | ||
95 | } | ||
96 | |||
97 | static int nfs_dns_upcall(struct cache_detail *cd, | ||
98 | struct cache_head *ch) | ||
99 | { | ||
100 | struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h); | ||
101 | int ret; | ||
102 | |||
103 | ret = nfs_cache_upcall(cd, key->hostname); | ||
104 | if (ret) | ||
105 | ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request); | ||
106 | return ret; | ||
107 | } | ||
108 | |||
109 | static int nfs_dns_match(struct cache_head *ca, | ||
110 | struct cache_head *cb) | ||
111 | { | ||
112 | struct nfs_dns_ent *a; | ||
113 | struct nfs_dns_ent *b; | ||
114 | |||
115 | a = container_of(ca, struct nfs_dns_ent, h); | ||
116 | b = container_of(cb, struct nfs_dns_ent, h); | ||
117 | |||
118 | if (a->namelen == 0 || a->namelen != b->namelen) | ||
119 | return 0; | ||
120 | return memcmp(a->hostname, b->hostname, a->namelen) == 0; | ||
121 | } | ||
122 | |||
123 | static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd, | ||
124 | struct cache_head *h) | ||
125 | { | ||
126 | struct nfs_dns_ent *item; | ||
127 | long ttl; | ||
128 | |||
129 | if (h == NULL) { | ||
130 | seq_puts(m, "# ip address hostname ttl\n"); | ||
131 | return 0; | ||
132 | } | ||
133 | item = container_of(h, struct nfs_dns_ent, h); | ||
134 | ttl = (long)item->h.expiry_time - (long)get_seconds(); | ||
135 | if (ttl < 0) | ||
136 | ttl = 0; | ||
137 | |||
138 | if (!test_bit(CACHE_NEGATIVE, &h->flags)) { | ||
139 | char buf[INET6_ADDRSTRLEN+IPV6_SCOPE_ID_LEN+1]; | ||
140 | |||
141 | rpc_ntop((struct sockaddr *)&item->addr, buf, sizeof(buf)); | ||
142 | seq_printf(m, "%15s ", buf); | ||
143 | } else | ||
144 | seq_puts(m, "<none> "); | ||
145 | seq_printf(m, "%15s %ld\n", item->hostname, ttl); | ||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | struct nfs_dns_ent *nfs_dns_lookup(struct cache_detail *cd, | ||
150 | struct nfs_dns_ent *key) | ||
151 | { | ||
152 | struct cache_head *ch; | ||
153 | |||
154 | ch = sunrpc_cache_lookup(cd, | ||
155 | &key->h, | ||
156 | nfs_dns_hash(key)); | ||
157 | if (!ch) | ||
158 | return NULL; | ||
159 | return container_of(ch, struct nfs_dns_ent, h); | ||
160 | } | ||
161 | |||
162 | struct nfs_dns_ent *nfs_dns_update(struct cache_detail *cd, | ||
163 | struct nfs_dns_ent *new, | ||
164 | struct nfs_dns_ent *key) | ||
165 | { | ||
166 | struct cache_head *ch; | ||
167 | |||
168 | ch = sunrpc_cache_update(cd, | ||
169 | &new->h, &key->h, | ||
170 | nfs_dns_hash(key)); | ||
171 | if (!ch) | ||
172 | return NULL; | ||
173 | return container_of(ch, struct nfs_dns_ent, h); | ||
174 | } | ||
175 | |||
176 | static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) | ||
177 | { | ||
178 | char buf1[NFS_DNS_HOSTNAME_MAXLEN+1]; | ||
179 | struct nfs_dns_ent key, *item; | ||
180 | unsigned long ttl; | ||
181 | ssize_t len; | ||
182 | int ret = -EINVAL; | ||
183 | |||
184 | if (buf[buflen-1] != '\n') | ||
185 | goto out; | ||
186 | buf[buflen-1] = '\0'; | ||
187 | |||
188 | len = qword_get(&buf, buf1, sizeof(buf1)); | ||
189 | if (len <= 0) | ||
190 | goto out; | ||
191 | key.addrlen = rpc_pton(buf1, len, | ||
192 | (struct sockaddr *)&key.addr, | ||
193 | sizeof(key.addr)); | ||
194 | |||
195 | len = qword_get(&buf, buf1, sizeof(buf1)); | ||
196 | if (len <= 0) | ||
197 | goto out; | ||
198 | |||
199 | key.hostname = buf1; | ||
200 | key.namelen = len; | ||
201 | memset(&key.h, 0, sizeof(key.h)); | ||
202 | |||
203 | ttl = get_expiry(&buf); | ||
204 | if (ttl == 0) | ||
205 | goto out; | ||
206 | key.h.expiry_time = ttl + get_seconds(); | ||
207 | |||
208 | ret = -ENOMEM; | ||
209 | item = nfs_dns_lookup(cd, &key); | ||
210 | if (item == NULL) | ||
211 | goto out; | ||
212 | |||
213 | if (key.addrlen == 0) | ||
214 | set_bit(CACHE_NEGATIVE, &key.h.flags); | ||
215 | |||
216 | item = nfs_dns_update(cd, &key, item); | ||
217 | if (item == NULL) | ||
218 | goto out; | ||
219 | |||
220 | ret = 0; | ||
221 | cache_put(&item->h, cd); | ||
222 | out: | ||
223 | return ret; | ||
224 | } | ||
225 | |||
226 | static struct cache_detail nfs_dns_resolve = { | ||
227 | .owner = THIS_MODULE, | ||
228 | .hash_size = NFS_DNS_HASHTBL_SIZE, | ||
229 | .hash_table = nfs_dns_table, | ||
230 | .name = "dns_resolve", | ||
231 | .cache_put = nfs_dns_ent_put, | ||
232 | .cache_upcall = nfs_dns_upcall, | ||
233 | .cache_parse = nfs_dns_parse, | ||
234 | .cache_show = nfs_dns_show, | ||
235 | .match = nfs_dns_match, | ||
236 | .init = nfs_dns_ent_init, | ||
237 | .update = nfs_dns_ent_init, | ||
238 | .alloc = nfs_dns_ent_alloc, | ||
239 | }; | ||
240 | |||
241 | static int do_cache_lookup(struct cache_detail *cd, | ||
242 | struct nfs_dns_ent *key, | ||
243 | struct nfs_dns_ent **item, | ||
244 | struct nfs_cache_defer_req *dreq) | ||
245 | { | ||
246 | int ret = -ENOMEM; | ||
247 | |||
248 | *item = nfs_dns_lookup(cd, key); | ||
249 | if (*item) { | ||
250 | ret = cache_check(cd, &(*item)->h, &dreq->req); | ||
251 | if (ret) | ||
252 | *item = NULL; | ||
253 | } | ||
254 | return ret; | ||
255 | } | ||
256 | |||
257 | static int do_cache_lookup_nowait(struct cache_detail *cd, | ||
258 | struct nfs_dns_ent *key, | ||
259 | struct nfs_dns_ent **item) | ||
260 | { | ||
261 | int ret = -ENOMEM; | ||
262 | |||
263 | *item = nfs_dns_lookup(cd, key); | ||
264 | if (!*item) | ||
265 | goto out_err; | ||
266 | ret = -ETIMEDOUT; | ||
267 | if (!test_bit(CACHE_VALID, &(*item)->h.flags) | ||
268 | || (*item)->h.expiry_time < get_seconds() | ||
269 | || cd->flush_time > (*item)->h.last_refresh) | ||
270 | goto out_put; | ||
271 | ret = -ENOENT; | ||
272 | if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags)) | ||
273 | goto out_put; | ||
274 | return 0; | ||
275 | out_put: | ||
276 | cache_put(&(*item)->h, cd); | ||
277 | out_err: | ||
278 | *item = NULL; | ||
279 | return ret; | ||
280 | } | ||
281 | |||
282 | static int do_cache_lookup_wait(struct cache_detail *cd, | ||
283 | struct nfs_dns_ent *key, | ||
284 | struct nfs_dns_ent **item) | ||
285 | { | ||
286 | struct nfs_cache_defer_req *dreq; | ||
287 | int ret = -ENOMEM; | ||
288 | |||
289 | dreq = nfs_cache_defer_req_alloc(); | ||
290 | if (!dreq) | ||
291 | goto out; | ||
292 | ret = do_cache_lookup(cd, key, item, dreq); | ||
293 | if (ret == -EAGAIN) { | ||
294 | ret = nfs_cache_wait_for_upcall(dreq); | ||
295 | if (!ret) | ||
296 | ret = do_cache_lookup_nowait(cd, key, item); | ||
297 | } | ||
298 | nfs_cache_defer_req_put(dreq); | ||
299 | out: | ||
300 | return ret; | ||
301 | } | ||
302 | |||
303 | ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | ||
304 | struct sockaddr *sa, size_t salen) | ||
305 | { | ||
306 | struct nfs_dns_ent key = { | ||
307 | .hostname = name, | ||
308 | .namelen = namelen, | ||
309 | }; | ||
310 | struct nfs_dns_ent *item = NULL; | ||
311 | ssize_t ret; | ||
312 | |||
313 | ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item); | ||
314 | if (ret == 0) { | ||
315 | if (salen >= item->addrlen) { | ||
316 | memcpy(sa, &item->addr, item->addrlen); | ||
317 | ret = item->addrlen; | ||
318 | } else | ||
319 | ret = -EOVERFLOW; | ||
320 | cache_put(&item->h, &nfs_dns_resolve); | ||
321 | } else if (ret == -ENOENT) | ||
322 | ret = -ESRCH; | ||
323 | return ret; | ||
324 | } | ||
325 | |||
326 | int nfs_dns_resolver_init(void) | ||
327 | { | ||
328 | return nfs_cache_register(&nfs_dns_resolve); | ||
329 | } | ||
330 | |||
331 | void nfs_dns_resolver_destroy(void) | ||
332 | { | ||
333 | nfs_cache_unregister(&nfs_dns_resolve); | ||
334 | } | ||
335 | |||
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h new file mode 100644 index 000000000000..a3f0938babf7 --- /dev/null +++ b/fs/nfs/dns_resolve.h | |||
@@ -0,0 +1,14 @@ | |||
1 | /* | ||
2 | * Resolve DNS hostnames into valid ip addresses | ||
3 | */ | ||
4 | #ifndef __LINUX_FS_NFS_DNS_RESOLVE_H | ||
5 | #define __LINUX_FS_NFS_DNS_RESOLVE_H | ||
6 | |||
7 | #define NFS_DNS_HOSTNAME_MAXLEN (128) | ||
8 | |||
9 | extern int nfs_dns_resolver_init(void); | ||
10 | extern void nfs_dns_resolver_destroy(void); | ||
11 | extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | ||
12 | struct sockaddr *sa, size_t salen); | ||
13 | |||
14 | #endif | ||
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 05062329b678..5021b75d2d1e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -328,6 +328,42 @@ nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) | |||
328 | } | 328 | } |
329 | 329 | ||
330 | /* | 330 | /* |
331 | * Decide whether a read/modify/write cycle may be more efficient | ||
332 | * then a modify/write/read cycle when writing to a page in the | ||
333 | * page cache. | ||
334 | * | ||
335 | * The modify/write/read cycle may occur if a page is read before | ||
336 | * being completely filled by the writer. In this situation, the | ||
337 | * page must be completely written to stable storage on the server | ||
338 | * before it can be refilled by reading in the page from the server. | ||
339 | * This can lead to expensive, small, FILE_SYNC mode writes being | ||
340 | * done. | ||
341 | * | ||
342 | * It may be more efficient to read the page first if the file is | ||
343 | * open for reading in addition to writing, the page is not marked | ||
344 | * as Uptodate, it is not dirty or waiting to be committed, | ||
345 | * indicating that it was previously allocated and then modified, | ||
346 | * that there were valid bytes of data in that range of the file, | ||
347 | * and that the new data won't completely replace the old data in | ||
348 | * that range of the file. | ||
349 | */ | ||
350 | static int nfs_want_read_modify_write(struct file *file, struct page *page, | ||
351 | loff_t pos, unsigned len) | ||
352 | { | ||
353 | unsigned int pglen = nfs_page_length(page); | ||
354 | unsigned int offset = pos & (PAGE_CACHE_SIZE - 1); | ||
355 | unsigned int end = offset + len; | ||
356 | |||
357 | if ((file->f_mode & FMODE_READ) && /* open for read? */ | ||
358 | !PageUptodate(page) && /* Uptodate? */ | ||
359 | !PagePrivate(page) && /* i/o request already? */ | ||
360 | pglen && /* valid bytes of file? */ | ||
361 | (end < pglen || offset)) /* replace all valid bytes? */ | ||
362 | return 1; | ||
363 | return 0; | ||
364 | } | ||
365 | |||
366 | /* | ||
331 | * This does the "real" work of the write. We must allocate and lock the | 367 | * This does the "real" work of the write. We must allocate and lock the |
332 | * page to be sent back to the generic routine, which then copies the | 368 | * page to be sent back to the generic routine, which then copies the |
333 | * data from user space. | 369 | * data from user space. |
@@ -340,15 +376,16 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, | |||
340 | struct page **pagep, void **fsdata) | 376 | struct page **pagep, void **fsdata) |
341 | { | 377 | { |
342 | int ret; | 378 | int ret; |
343 | pgoff_t index; | 379 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
344 | struct page *page; | 380 | struct page *page; |
345 | index = pos >> PAGE_CACHE_SHIFT; | 381 | int once_thru = 0; |
346 | 382 | ||
347 | dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", | 383 | dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", |
348 | file->f_path.dentry->d_parent->d_name.name, | 384 | file->f_path.dentry->d_parent->d_name.name, |
349 | file->f_path.dentry->d_name.name, | 385 | file->f_path.dentry->d_name.name, |
350 | mapping->host->i_ino, len, (long long) pos); | 386 | mapping->host->i_ino, len, (long long) pos); |
351 | 387 | ||
388 | start: | ||
352 | /* | 389 | /* |
353 | * Prevent starvation issues if someone is doing a consistency | 390 | * Prevent starvation issues if someone is doing a consistency |
354 | * sync-to-disk | 391 | * sync-to-disk |
@@ -367,6 +404,13 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, | |||
367 | if (ret) { | 404 | if (ret) { |
368 | unlock_page(page); | 405 | unlock_page(page); |
369 | page_cache_release(page); | 406 | page_cache_release(page); |
407 | } else if (!once_thru && | ||
408 | nfs_want_read_modify_write(file, page, pos, len)) { | ||
409 | once_thru = 1; | ||
410 | ret = nfs_readpage(file, page); | ||
411 | page_cache_release(page); | ||
412 | if (!ret) | ||
413 | goto start; | ||
370 | } | 414 | } |
371 | return ret; | 415 | return ret; |
372 | } | 416 | } |
@@ -479,6 +523,7 @@ const struct address_space_operations nfs_file_aops = { | |||
479 | .invalidatepage = nfs_invalidate_page, | 523 | .invalidatepage = nfs_invalidate_page, |
480 | .releasepage = nfs_release_page, | 524 | .releasepage = nfs_release_page, |
481 | .direct_IO = nfs_direct_IO, | 525 | .direct_IO = nfs_direct_IO, |
526 | .migratepage = nfs_migrate_page, | ||
482 | .launder_page = nfs_launder_page, | 527 | .launder_page = nfs_launder_page, |
483 | }; | 528 | }; |
484 | 529 | ||
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 86147b0ab2cf..21a84d45916f 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
@@ -101,7 +101,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); | |||
101 | 101 | ||
102 | static unsigned int fnvhash32(const void *, size_t); | 102 | static unsigned int fnvhash32(const void *, size_t); |
103 | 103 | ||
104 | static struct rpc_pipe_ops idmap_upcall_ops = { | 104 | static const struct rpc_pipe_ops idmap_upcall_ops = { |
105 | .upcall = idmap_pipe_upcall, | 105 | .upcall = idmap_pipe_upcall, |
106 | .downcall = idmap_pipe_downcall, | 106 | .downcall = idmap_pipe_downcall, |
107 | .destroy_msg = idmap_pipe_destroy_msg, | 107 | .destroy_msg = idmap_pipe_destroy_msg, |
@@ -119,8 +119,8 @@ nfs_idmap_new(struct nfs_client *clp) | |||
119 | if (idmap == NULL) | 119 | if (idmap == NULL) |
120 | return -ENOMEM; | 120 | return -ENOMEM; |
121 | 121 | ||
122 | idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap", | 122 | idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry, |
123 | idmap, &idmap_upcall_ops, 0); | 123 | "idmap", idmap, &idmap_upcall_ops, 0); |
124 | if (IS_ERR(idmap->idmap_dentry)) { | 124 | if (IS_ERR(idmap->idmap_dentry)) { |
125 | error = PTR_ERR(idmap->idmap_dentry); | 125 | error = PTR_ERR(idmap->idmap_dentry); |
126 | kfree(idmap); | 126 | kfree(idmap); |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bd7938eda6a8..060022b4651c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include "iostat.h" | 46 | #include "iostat.h" |
47 | #include "internal.h" | 47 | #include "internal.h" |
48 | #include "fscache.h" | 48 | #include "fscache.h" |
49 | #include "dns_resolve.h" | ||
49 | 50 | ||
50 | #define NFSDBG_FACILITY NFSDBG_VFS | 51 | #define NFSDBG_FACILITY NFSDBG_VFS |
51 | 52 | ||
@@ -286,6 +287,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
286 | /* We can't support update_atime(), since the server will reset it */ | 287 | /* We can't support update_atime(), since the server will reset it */ |
287 | inode->i_flags |= S_NOATIME|S_NOCMTIME; | 288 | inode->i_flags |= S_NOATIME|S_NOCMTIME; |
288 | inode->i_mode = fattr->mode; | 289 | inode->i_mode = fattr->mode; |
290 | if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 | ||
291 | && nfs_server_capable(inode, NFS_CAP_MODE)) | ||
292 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | ||
293 | | NFS_INO_INVALID_ACCESS | ||
294 | | NFS_INO_INVALID_ACL; | ||
289 | /* Why so? Because we want revalidate for devices/FIFOs, and | 295 | /* Why so? Because we want revalidate for devices/FIFOs, and |
290 | * that's precisely what we have in nfs_file_inode_operations. | 296 | * that's precisely what we have in nfs_file_inode_operations. |
291 | */ | 297 | */ |
@@ -330,20 +336,46 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
330 | nfsi->attr_gencount = fattr->gencount; | 336 | nfsi->attr_gencount = fattr->gencount; |
331 | if (fattr->valid & NFS_ATTR_FATTR_ATIME) | 337 | if (fattr->valid & NFS_ATTR_FATTR_ATIME) |
332 | inode->i_atime = fattr->atime; | 338 | inode->i_atime = fattr->atime; |
339 | else if (nfs_server_capable(inode, NFS_CAP_ATIME)) | ||
340 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR; | ||
333 | if (fattr->valid & NFS_ATTR_FATTR_MTIME) | 341 | if (fattr->valid & NFS_ATTR_FATTR_MTIME) |
334 | inode->i_mtime = fattr->mtime; | 342 | inode->i_mtime = fattr->mtime; |
343 | else if (nfs_server_capable(inode, NFS_CAP_MTIME)) | ||
344 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | ||
345 | | NFS_INO_INVALID_DATA; | ||
335 | if (fattr->valid & NFS_ATTR_FATTR_CTIME) | 346 | if (fattr->valid & NFS_ATTR_FATTR_CTIME) |
336 | inode->i_ctime = fattr->ctime; | 347 | inode->i_ctime = fattr->ctime; |
348 | else if (nfs_server_capable(inode, NFS_CAP_CTIME)) | ||
349 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | ||
350 | | NFS_INO_INVALID_ACCESS | ||
351 | | NFS_INO_INVALID_ACL; | ||
337 | if (fattr->valid & NFS_ATTR_FATTR_CHANGE) | 352 | if (fattr->valid & NFS_ATTR_FATTR_CHANGE) |
338 | nfsi->change_attr = fattr->change_attr; | 353 | nfsi->change_attr = fattr->change_attr; |
354 | else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) | ||
355 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | ||
356 | | NFS_INO_INVALID_DATA; | ||
339 | if (fattr->valid & NFS_ATTR_FATTR_SIZE) | 357 | if (fattr->valid & NFS_ATTR_FATTR_SIZE) |
340 | inode->i_size = nfs_size_to_loff_t(fattr->size); | 358 | inode->i_size = nfs_size_to_loff_t(fattr->size); |
359 | else | ||
360 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | ||
361 | | NFS_INO_INVALID_DATA | ||
362 | | NFS_INO_REVAL_PAGECACHE; | ||
341 | if (fattr->valid & NFS_ATTR_FATTR_NLINK) | 363 | if (fattr->valid & NFS_ATTR_FATTR_NLINK) |
342 | inode->i_nlink = fattr->nlink; | 364 | inode->i_nlink = fattr->nlink; |
365 | else if (nfs_server_capable(inode, NFS_CAP_NLINK)) | ||
366 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR; | ||
343 | if (fattr->valid & NFS_ATTR_FATTR_OWNER) | 367 | if (fattr->valid & NFS_ATTR_FATTR_OWNER) |
344 | inode->i_uid = fattr->uid; | 368 | inode->i_uid = fattr->uid; |
369 | else if (nfs_server_capable(inode, NFS_CAP_OWNER)) | ||
370 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | ||
371 | | NFS_INO_INVALID_ACCESS | ||
372 | | NFS_INO_INVALID_ACL; | ||
345 | if (fattr->valid & NFS_ATTR_FATTR_GROUP) | 373 | if (fattr->valid & NFS_ATTR_FATTR_GROUP) |
346 | inode->i_gid = fattr->gid; | 374 | inode->i_gid = fattr->gid; |
375 | else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) | ||
376 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | ||
377 | | NFS_INO_INVALID_ACCESS | ||
378 | | NFS_INO_INVALID_ACL; | ||
347 | if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) | 379 | if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) |
348 | inode->i_blocks = fattr->du.nfs2.blocks; | 380 | inode->i_blocks = fattr->du.nfs2.blocks; |
349 | if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { | 381 | if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { |
@@ -1145,6 +1177,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1145 | loff_t cur_isize, new_isize; | 1177 | loff_t cur_isize, new_isize; |
1146 | unsigned long invalid = 0; | 1178 | unsigned long invalid = 0; |
1147 | unsigned long now = jiffies; | 1179 | unsigned long now = jiffies; |
1180 | unsigned long save_cache_validity; | ||
1148 | 1181 | ||
1149 | dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", | 1182 | dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", |
1150 | __func__, inode->i_sb->s_id, inode->i_ino, | 1183 | __func__, inode->i_sb->s_id, inode->i_ino, |
@@ -1171,10 +1204,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1171 | */ | 1204 | */ |
1172 | nfsi->read_cache_jiffies = fattr->time_start; | 1205 | nfsi->read_cache_jiffies = fattr->time_start; |
1173 | 1206 | ||
1174 | if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME))) | 1207 | save_cache_validity = nfsi->cache_validity; |
1175 | nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | 1208 | nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR |
1176 | | NFS_INO_INVALID_ATIME | 1209 | | NFS_INO_INVALID_ATIME |
1177 | | NFS_INO_REVAL_PAGECACHE); | 1210 | | NFS_INO_REVAL_FORCED |
1211 | | NFS_INO_REVAL_PAGECACHE); | ||
1178 | 1212 | ||
1179 | /* Do atomic weak cache consistency updates */ | 1213 | /* Do atomic weak cache consistency updates */ |
1180 | nfs_wcc_update_inode(inode, fattr); | 1214 | nfs_wcc_update_inode(inode, fattr); |
@@ -1189,7 +1223,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1189 | nfs_force_lookup_revalidate(inode); | 1223 | nfs_force_lookup_revalidate(inode); |
1190 | nfsi->change_attr = fattr->change_attr; | 1224 | nfsi->change_attr = fattr->change_attr; |
1191 | } | 1225 | } |
1192 | } | 1226 | } else if (server->caps & NFS_CAP_CHANGE_ATTR) |
1227 | invalid |= save_cache_validity; | ||
1193 | 1228 | ||
1194 | if (fattr->valid & NFS_ATTR_FATTR_MTIME) { | 1229 | if (fattr->valid & NFS_ATTR_FATTR_MTIME) { |
1195 | /* NFSv2/v3: Check if the mtime agrees */ | 1230 | /* NFSv2/v3: Check if the mtime agrees */ |
@@ -1201,7 +1236,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1201 | nfs_force_lookup_revalidate(inode); | 1236 | nfs_force_lookup_revalidate(inode); |
1202 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); | 1237 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); |
1203 | } | 1238 | } |
1204 | } | 1239 | } else if (server->caps & NFS_CAP_MTIME) |
1240 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | ||
1241 | | NFS_INO_INVALID_DATA | ||
1242 | | NFS_INO_REVAL_PAGECACHE | ||
1243 | | NFS_INO_REVAL_FORCED); | ||
1244 | |||
1205 | if (fattr->valid & NFS_ATTR_FATTR_CTIME) { | 1245 | if (fattr->valid & NFS_ATTR_FATTR_CTIME) { |
1206 | /* If ctime has changed we should definitely clear access+acl caches */ | 1246 | /* If ctime has changed we should definitely clear access+acl caches */ |
1207 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { | 1247 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { |
@@ -1215,7 +1255,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1215 | } | 1255 | } |
1216 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); | 1256 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); |
1217 | } | 1257 | } |
1218 | } | 1258 | } else if (server->caps & NFS_CAP_CTIME) |
1259 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | ||
1260 | | NFS_INO_INVALID_ACCESS | ||
1261 | | NFS_INO_INVALID_ACL | ||
1262 | | NFS_INO_REVAL_FORCED); | ||
1219 | 1263 | ||
1220 | /* Check if our cached file size is stale */ | 1264 | /* Check if our cached file size is stale */ |
1221 | if (fattr->valid & NFS_ATTR_FATTR_SIZE) { | 1265 | if (fattr->valid & NFS_ATTR_FATTR_SIZE) { |
@@ -1231,30 +1275,50 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1231 | dprintk("NFS: isize change on server for file %s/%ld\n", | 1275 | dprintk("NFS: isize change on server for file %s/%ld\n", |
1232 | inode->i_sb->s_id, inode->i_ino); | 1276 | inode->i_sb->s_id, inode->i_ino); |
1233 | } | 1277 | } |
1234 | } | 1278 | } else |
1279 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | ||
1280 | | NFS_INO_REVAL_PAGECACHE | ||
1281 | | NFS_INO_REVAL_FORCED); | ||
1235 | 1282 | ||
1236 | 1283 | ||
1237 | if (fattr->valid & NFS_ATTR_FATTR_ATIME) | 1284 | if (fattr->valid & NFS_ATTR_FATTR_ATIME) |
1238 | memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); | 1285 | memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); |
1286 | else if (server->caps & NFS_CAP_ATIME) | ||
1287 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME | ||
1288 | | NFS_INO_REVAL_FORCED); | ||
1239 | 1289 | ||
1240 | if (fattr->valid & NFS_ATTR_FATTR_MODE) { | 1290 | if (fattr->valid & NFS_ATTR_FATTR_MODE) { |
1241 | if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { | 1291 | if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { |
1242 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1292 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
1243 | inode->i_mode = fattr->mode; | 1293 | inode->i_mode = fattr->mode; |
1244 | } | 1294 | } |
1245 | } | 1295 | } else if (server->caps & NFS_CAP_MODE) |
1296 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | ||
1297 | | NFS_INO_INVALID_ACCESS | ||
1298 | | NFS_INO_INVALID_ACL | ||
1299 | | NFS_INO_REVAL_FORCED); | ||
1300 | |||
1246 | if (fattr->valid & NFS_ATTR_FATTR_OWNER) { | 1301 | if (fattr->valid & NFS_ATTR_FATTR_OWNER) { |
1247 | if (inode->i_uid != fattr->uid) { | 1302 | if (inode->i_uid != fattr->uid) { |
1248 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1303 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
1249 | inode->i_uid = fattr->uid; | 1304 | inode->i_uid = fattr->uid; |
1250 | } | 1305 | } |
1251 | } | 1306 | } else if (server->caps & NFS_CAP_OWNER) |
1307 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | ||
1308 | | NFS_INO_INVALID_ACCESS | ||
1309 | | NFS_INO_INVALID_ACL | ||
1310 | | NFS_INO_REVAL_FORCED); | ||
1311 | |||
1252 | if (fattr->valid & NFS_ATTR_FATTR_GROUP) { | 1312 | if (fattr->valid & NFS_ATTR_FATTR_GROUP) { |
1253 | if (inode->i_gid != fattr->gid) { | 1313 | if (inode->i_gid != fattr->gid) { |
1254 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1314 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
1255 | inode->i_gid = fattr->gid; | 1315 | inode->i_gid = fattr->gid; |
1256 | } | 1316 | } |
1257 | } | 1317 | } else if (server->caps & NFS_CAP_OWNER_GROUP) |
1318 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | ||
1319 | | NFS_INO_INVALID_ACCESS | ||
1320 | | NFS_INO_INVALID_ACL | ||
1321 | | NFS_INO_REVAL_FORCED); | ||
1258 | 1322 | ||
1259 | if (fattr->valid & NFS_ATTR_FATTR_NLINK) { | 1323 | if (fattr->valid & NFS_ATTR_FATTR_NLINK) { |
1260 | if (inode->i_nlink != fattr->nlink) { | 1324 | if (inode->i_nlink != fattr->nlink) { |
@@ -1263,7 +1327,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1263 | invalid |= NFS_INO_INVALID_DATA; | 1327 | invalid |= NFS_INO_INVALID_DATA; |
1264 | inode->i_nlink = fattr->nlink; | 1328 | inode->i_nlink = fattr->nlink; |
1265 | } | 1329 | } |
1266 | } | 1330 | } else if (server->caps & NFS_CAP_NLINK) |
1331 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | ||
1332 | | NFS_INO_REVAL_FORCED); | ||
1267 | 1333 | ||
1268 | if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { | 1334 | if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { |
1269 | /* | 1335 | /* |
@@ -1293,9 +1359,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1293 | || S_ISLNK(inode->i_mode))) | 1359 | || S_ISLNK(inode->i_mode))) |
1294 | invalid &= ~NFS_INO_INVALID_DATA; | 1360 | invalid &= ~NFS_INO_INVALID_DATA; |
1295 | if (!nfs_have_delegation(inode, FMODE_READ) || | 1361 | if (!nfs_have_delegation(inode, FMODE_READ) || |
1296 | (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) | 1362 | (save_cache_validity & NFS_INO_REVAL_FORCED)) |
1297 | nfsi->cache_validity |= invalid; | 1363 | nfsi->cache_validity |= invalid; |
1298 | nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED; | ||
1299 | 1364 | ||
1300 | return 0; | 1365 | return 0; |
1301 | out_changed: | 1366 | out_changed: |
@@ -1442,6 +1507,10 @@ static int __init init_nfs_fs(void) | |||
1442 | { | 1507 | { |
1443 | int err; | 1508 | int err; |
1444 | 1509 | ||
1510 | err = nfs_dns_resolver_init(); | ||
1511 | if (err < 0) | ||
1512 | goto out8; | ||
1513 | |||
1445 | err = nfs_fscache_register(); | 1514 | err = nfs_fscache_register(); |
1446 | if (err < 0) | 1515 | if (err < 0) |
1447 | goto out7; | 1516 | goto out7; |
@@ -1500,6 +1569,8 @@ out5: | |||
1500 | out6: | 1569 | out6: |
1501 | nfs_fscache_unregister(); | 1570 | nfs_fscache_unregister(); |
1502 | out7: | 1571 | out7: |
1572 | nfs_dns_resolver_destroy(); | ||
1573 | out8: | ||
1503 | return err; | 1574 | return err; |
1504 | } | 1575 | } |
1505 | 1576 | ||
@@ -1511,6 +1582,7 @@ static void __exit exit_nfs_fs(void) | |||
1511 | nfs_destroy_inodecache(); | 1582 | nfs_destroy_inodecache(); |
1512 | nfs_destroy_nfspagecache(); | 1583 | nfs_destroy_nfspagecache(); |
1513 | nfs_fscache_unregister(); | 1584 | nfs_fscache_unregister(); |
1585 | nfs_dns_resolver_destroy(); | ||
1514 | #ifdef CONFIG_PROC_FS | 1586 | #ifdef CONFIG_PROC_FS |
1515 | rpc_proc_unregister("nfs"); | 1587 | rpc_proc_unregister("nfs"); |
1516 | #endif | 1588 | #endif |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7dd90a6769d0..e21b1bb9972f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -49,6 +49,11 @@ struct nfs_clone_mount { | |||
49 | #define NFS_MAX_SECFLAVORS (12) | 49 | #define NFS_MAX_SECFLAVORS (12) |
50 | 50 | ||
51 | /* | 51 | /* |
52 | * Value used if the user did not specify a port value. | ||
53 | */ | ||
54 | #define NFS_UNSPEC_PORT (-1) | ||
55 | |||
56 | /* | ||
52 | * In-kernel mount arguments | 57 | * In-kernel mount arguments |
53 | */ | 58 | */ |
54 | struct nfs_parsed_mount_data { | 59 | struct nfs_parsed_mount_data { |
@@ -63,6 +68,7 @@ struct nfs_parsed_mount_data { | |||
63 | unsigned int auth_flavor_len; | 68 | unsigned int auth_flavor_len; |
64 | rpc_authflavor_t auth_flavors[1]; | 69 | rpc_authflavor_t auth_flavors[1]; |
65 | char *client_address; | 70 | char *client_address; |
71 | unsigned int version; | ||
66 | unsigned int minorversion; | 72 | unsigned int minorversion; |
67 | char *fscache_uniq; | 73 | char *fscache_uniq; |
68 | 74 | ||
@@ -71,7 +77,7 @@ struct nfs_parsed_mount_data { | |||
71 | size_t addrlen; | 77 | size_t addrlen; |
72 | char *hostname; | 78 | char *hostname; |
73 | u32 version; | 79 | u32 version; |
74 | unsigned short port; | 80 | int port; |
75 | unsigned short protocol; | 81 | unsigned short protocol; |
76 | } mount_server; | 82 | } mount_server; |
77 | 83 | ||
@@ -80,7 +86,7 @@ struct nfs_parsed_mount_data { | |||
80 | size_t addrlen; | 86 | size_t addrlen; |
81 | char *hostname; | 87 | char *hostname; |
82 | char *export_path; | 88 | char *export_path; |
83 | unsigned short port; | 89 | int port; |
84 | unsigned short protocol; | 90 | unsigned short protocol; |
85 | } nfs_server; | 91 | } nfs_server; |
86 | 92 | ||
@@ -102,6 +108,7 @@ struct nfs_mount_request { | |||
102 | }; | 108 | }; |
103 | 109 | ||
104 | extern int nfs_mount(struct nfs_mount_request *info); | 110 | extern int nfs_mount(struct nfs_mount_request *info); |
111 | extern void nfs_umount(const struct nfs_mount_request *info); | ||
105 | 112 | ||
106 | /* client.c */ | 113 | /* client.c */ |
107 | extern struct rpc_program nfs_program; | 114 | extern struct rpc_program nfs_program; |
@@ -213,7 +220,6 @@ void nfs_zap_acl_cache(struct inode *inode); | |||
213 | extern int nfs_wait_bit_killable(void *word); | 220 | extern int nfs_wait_bit_killable(void *word); |
214 | 221 | ||
215 | /* super.c */ | 222 | /* super.c */ |
216 | void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *); | ||
217 | extern struct file_system_type nfs_xdev_fs_type; | 223 | extern struct file_system_type nfs_xdev_fs_type; |
218 | #ifdef CONFIG_NFS_V4 | 224 | #ifdef CONFIG_NFS_V4 |
219 | extern struct file_system_type nfs4_xdev_fs_type; | 225 | extern struct file_system_type nfs4_xdev_fs_type; |
@@ -248,6 +254,12 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata); | |||
248 | 254 | ||
249 | /* write.c */ | 255 | /* write.c */ |
250 | extern void nfs_write_prepare(struct rpc_task *task, void *calldata); | 256 | extern void nfs_write_prepare(struct rpc_task *task, void *calldata); |
257 | #ifdef CONFIG_MIGRATION | ||
258 | extern int nfs_migrate_page(struct address_space *, | ||
259 | struct page *, struct page *); | ||
260 | #else | ||
261 | #define nfs_migrate_page NULL | ||
262 | #endif | ||
251 | 263 | ||
252 | /* nfs4proc.c */ | 264 | /* nfs4proc.c */ |
253 | extern int _nfs4_call_sync(struct nfs_server *server, | 265 | extern int _nfs4_call_sync(struct nfs_server *server, |
@@ -368,24 +380,3 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) | |||
368 | return ((unsigned long)len + (unsigned long)base + | 380 | return ((unsigned long)len + (unsigned long)base + |
369 | PAGE_SIZE - 1) >> PAGE_SHIFT; | 381 | PAGE_SIZE - 1) >> PAGE_SHIFT; |
370 | } | 382 | } |
371 | |||
372 | #define IPV6_SCOPE_DELIMITER '%' | ||
373 | |||
374 | /* | ||
375 | * Set the port number in an address. Be agnostic about the address | ||
376 | * family. | ||
377 | */ | ||
378 | static inline void nfs_set_port(struct sockaddr *sap, unsigned short port) | ||
379 | { | ||
380 | struct sockaddr_in *ap = (struct sockaddr_in *)sap; | ||
381 | struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap; | ||
382 | |||
383 | switch (sap->sa_family) { | ||
384 | case AF_INET: | ||
385 | ap->sin_port = htons(port); | ||
386 | break; | ||
387 | case AF_INET6: | ||
388 | ap6->sin6_port = htons(port); | ||
389 | break; | ||
390 | } | ||
391 | } | ||
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 38ef9eaec407..0adefc40cc89 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c | |||
@@ -209,6 +209,71 @@ out_mnt_err: | |||
209 | goto out; | 209 | goto out; |
210 | } | 210 | } |
211 | 211 | ||
212 | /** | ||
213 | * nfs_umount - Notify a server that we have unmounted this export | ||
214 | * @info: pointer to umount request arguments | ||
215 | * | ||
216 | * MOUNTPROC_UMNT is advisory, so we set a short timeout, and always | ||
217 | * use UDP. | ||
218 | */ | ||
219 | void nfs_umount(const struct nfs_mount_request *info) | ||
220 | { | ||
221 | static const struct rpc_timeout nfs_umnt_timeout = { | ||
222 | .to_initval = 1 * HZ, | ||
223 | .to_maxval = 3 * HZ, | ||
224 | .to_retries = 2, | ||
225 | }; | ||
226 | struct rpc_create_args args = { | ||
227 | .protocol = IPPROTO_UDP, | ||
228 | .address = info->sap, | ||
229 | .addrsize = info->salen, | ||
230 | .timeout = &nfs_umnt_timeout, | ||
231 | .servername = info->hostname, | ||
232 | .program = &mnt_program, | ||
233 | .version = info->version, | ||
234 | .authflavor = RPC_AUTH_UNIX, | ||
235 | .flags = RPC_CLNT_CREATE_NOPING, | ||
236 | }; | ||
237 | struct mountres result; | ||
238 | struct rpc_message msg = { | ||
239 | .rpc_argp = info->dirpath, | ||
240 | .rpc_resp = &result, | ||
241 | }; | ||
242 | struct rpc_clnt *clnt; | ||
243 | int status; | ||
244 | |||
245 | if (info->noresvport) | ||
246 | args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; | ||
247 | |||
248 | clnt = rpc_create(&args); | ||
249 | if (unlikely(IS_ERR(clnt))) | ||
250 | goto out_clnt_err; | ||
251 | |||
252 | dprintk("NFS: sending UMNT request for %s:%s\n", | ||
253 | (info->hostname ? info->hostname : "server"), info->dirpath); | ||
254 | |||
255 | if (info->version == NFS_MNT3_VERSION) | ||
256 | msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC3_UMNT]; | ||
257 | else | ||
258 | msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC_UMNT]; | ||
259 | |||
260 | status = rpc_call_sync(clnt, &msg, 0); | ||
261 | rpc_shutdown_client(clnt); | ||
262 | |||
263 | if (unlikely(status < 0)) | ||
264 | goto out_call_err; | ||
265 | |||
266 | return; | ||
267 | |||
268 | out_clnt_err: | ||
269 | dprintk("NFS: failed to create UMNT RPC client, status=%ld\n", | ||
270 | PTR_ERR(clnt)); | ||
271 | return; | ||
272 | |||
273 | out_call_err: | ||
274 | dprintk("NFS: UMNT request failed, status=%d\n", status); | ||
275 | } | ||
276 | |||
212 | /* | 277 | /* |
213 | * XDR encode/decode functions for MOUNT | 278 | * XDR encode/decode functions for MOUNT |
214 | */ | 279 | */ |
@@ -258,7 +323,7 @@ static int decode_status(struct xdr_stream *xdr, struct mountres *res) | |||
258 | return -EIO; | 323 | return -EIO; |
259 | status = ntohl(*p); | 324 | status = ntohl(*p); |
260 | 325 | ||
261 | for (i = 0; i <= ARRAY_SIZE(mnt_errtbl); i++) { | 326 | for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) { |
262 | if (mnt_errtbl[i].status == status) { | 327 | if (mnt_errtbl[i].status == status) { |
263 | res->errno = mnt_errtbl[i].errno; | 328 | res->errno = mnt_errtbl[i].errno; |
264 | return 0; | 329 | return 0; |
@@ -309,7 +374,7 @@ static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res) | |||
309 | return -EIO; | 374 | return -EIO; |
310 | status = ntohl(*p); | 375 | status = ntohl(*p); |
311 | 376 | ||
312 | for (i = 0; i <= ARRAY_SIZE(mnt3_errtbl); i++) { | 377 | for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) { |
313 | if (mnt3_errtbl[i].status == status) { | 378 | if (mnt3_errtbl[i].status == status) { |
314 | res->errno = mnt3_errtbl[i].errno; | 379 | res->errno = mnt3_errtbl[i].errno; |
315 | return 0; | 380 | return 0; |
@@ -407,6 +472,13 @@ static struct rpc_procinfo mnt_procedures[] = { | |||
407 | .p_statidx = MOUNTPROC_MNT, | 472 | .p_statidx = MOUNTPROC_MNT, |
408 | .p_name = "MOUNT", | 473 | .p_name = "MOUNT", |
409 | }, | 474 | }, |
475 | [MOUNTPROC_UMNT] = { | ||
476 | .p_proc = MOUNTPROC_UMNT, | ||
477 | .p_encode = (kxdrproc_t)mnt_enc_dirpath, | ||
478 | .p_arglen = MNT_enc_dirpath_sz, | ||
479 | .p_statidx = MOUNTPROC_UMNT, | ||
480 | .p_name = "UMOUNT", | ||
481 | }, | ||
410 | }; | 482 | }; |
411 | 483 | ||
412 | static struct rpc_procinfo mnt3_procedures[] = { | 484 | static struct rpc_procinfo mnt3_procedures[] = { |
@@ -419,6 +491,13 @@ static struct rpc_procinfo mnt3_procedures[] = { | |||
419 | .p_statidx = MOUNTPROC3_MNT, | 491 | .p_statidx = MOUNTPROC3_MNT, |
420 | .p_name = "MOUNT", | 492 | .p_name = "MOUNT", |
421 | }, | 493 | }, |
494 | [MOUNTPROC3_UMNT] = { | ||
495 | .p_proc = MOUNTPROC3_UMNT, | ||
496 | .p_encode = (kxdrproc_t)mnt_enc_dirpath, | ||
497 | .p_arglen = MNT_enc_dirpath_sz, | ||
498 | .p_statidx = MOUNTPROC3_UMNT, | ||
499 | .p_name = "UMOUNT", | ||
500 | }, | ||
422 | }; | 501 | }; |
423 | 502 | ||
424 | 503 | ||
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d0cc5ce0edfe..ee6a13f05443 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -299,7 +299,6 @@ static void nfs3_free_createdata(struct nfs3_createdata *data) | |||
299 | 299 | ||
300 | /* | 300 | /* |
301 | * Create a regular file. | 301 | * Create a regular file. |
302 | * For now, we don't implement O_EXCL. | ||
303 | */ | 302 | */ |
304 | static int | 303 | static int |
305 | nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | 304 | nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, |
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 2a2a0a7143ad..2636c26d56fa 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/inet.h> | 17 | #include <linux/inet.h> |
18 | #include "internal.h" | 18 | #include "internal.h" |
19 | #include "nfs4_fs.h" | 19 | #include "nfs4_fs.h" |
20 | #include "dns_resolve.h" | ||
20 | 21 | ||
21 | #define NFSDBG_FACILITY NFSDBG_VFS | 22 | #define NFSDBG_FACILITY NFSDBG_VFS |
22 | 23 | ||
@@ -95,6 +96,20 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, | |||
95 | return 0; | 96 | return 0; |
96 | } | 97 | } |
97 | 98 | ||
99 | static size_t nfs_parse_server_name(char *string, size_t len, | ||
100 | struct sockaddr *sa, size_t salen) | ||
101 | { | ||
102 | ssize_t ret; | ||
103 | |||
104 | ret = rpc_pton(string, len, sa, salen); | ||
105 | if (ret == 0) { | ||
106 | ret = nfs_dns_resolve_name(string, len, sa, salen); | ||
107 | if (ret < 0) | ||
108 | ret = 0; | ||
109 | } | ||
110 | return ret; | ||
111 | } | ||
112 | |||
98 | static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, | 113 | static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, |
99 | char *page, char *page2, | 114 | char *page, char *page2, |
100 | const struct nfs4_fs_location *location) | 115 | const struct nfs4_fs_location *location) |
@@ -121,11 +136,12 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, | |||
121 | 136 | ||
122 | if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) | 137 | if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) |
123 | continue; | 138 | continue; |
124 | nfs_parse_ip_address(buf->data, buf->len, | 139 | mountdata->addrlen = nfs_parse_server_name(buf->data, |
125 | mountdata->addr, &mountdata->addrlen); | 140 | buf->len, |
126 | if (mountdata->addr->sa_family == AF_UNSPEC) | 141 | mountdata->addr, mountdata->addrlen); |
142 | if (mountdata->addrlen == 0) | ||
127 | continue; | 143 | continue; |
128 | nfs_set_port(mountdata->addr, NFS_PORT); | 144 | rpc_set_port(mountdata->addr, NFS_PORT); |
129 | 145 | ||
130 | memcpy(page2, buf->data, buf->len); | 146 | memcpy(page2, buf->data, buf->len); |
131 | page2[buf->len] = '\0'; | 147 | page2[buf->len] = '\0'; |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6917311f201c..be6544aef41f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -61,6 +61,8 @@ | |||
61 | #define NFS4_POLL_RETRY_MIN (HZ/10) | 61 | #define NFS4_POLL_RETRY_MIN (HZ/10) |
62 | #define NFS4_POLL_RETRY_MAX (15*HZ) | 62 | #define NFS4_POLL_RETRY_MAX (15*HZ) |
63 | 63 | ||
64 | #define NFS4_MAX_LOOP_ON_RECOVER (10) | ||
65 | |||
64 | struct nfs4_opendata; | 66 | struct nfs4_opendata; |
65 | static int _nfs4_proc_open(struct nfs4_opendata *data); | 67 | static int _nfs4_proc_open(struct nfs4_opendata *data); |
66 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); | 68 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); |
@@ -426,17 +428,19 @@ out: | |||
426 | static int nfs4_recover_session(struct nfs4_session *session) | 428 | static int nfs4_recover_session(struct nfs4_session *session) |
427 | { | 429 | { |
428 | struct nfs_client *clp = session->clp; | 430 | struct nfs_client *clp = session->clp; |
431 | unsigned int loop; | ||
429 | int ret; | 432 | int ret; |
430 | 433 | ||
431 | for (;;) { | 434 | for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { |
432 | ret = nfs4_wait_clnt_recover(clp); | 435 | ret = nfs4_wait_clnt_recover(clp); |
433 | if (ret != 0) | 436 | if (ret != 0) |
434 | return ret; | 437 | break; |
435 | if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) | 438 | if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) |
436 | break; | 439 | break; |
437 | nfs4_schedule_state_manager(clp); | 440 | nfs4_schedule_state_manager(clp); |
441 | ret = -EIO; | ||
438 | } | 442 | } |
439 | return 0; | 443 | return ret; |
440 | } | 444 | } |
441 | 445 | ||
442 | static int nfs41_setup_sequence(struct nfs4_session *session, | 446 | static int nfs41_setup_sequence(struct nfs4_session *session, |
@@ -1444,18 +1448,20 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) | |||
1444 | static int nfs4_recover_expired_lease(struct nfs_server *server) | 1448 | static int nfs4_recover_expired_lease(struct nfs_server *server) |
1445 | { | 1449 | { |
1446 | struct nfs_client *clp = server->nfs_client; | 1450 | struct nfs_client *clp = server->nfs_client; |
1451 | unsigned int loop; | ||
1447 | int ret; | 1452 | int ret; |
1448 | 1453 | ||
1449 | for (;;) { | 1454 | for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { |
1450 | ret = nfs4_wait_clnt_recover(clp); | 1455 | ret = nfs4_wait_clnt_recover(clp); |
1451 | if (ret != 0) | 1456 | if (ret != 0) |
1452 | return ret; | 1457 | break; |
1453 | if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && | 1458 | if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && |
1454 | !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) | 1459 | !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) |
1455 | break; | 1460 | break; |
1456 | nfs4_schedule_state_recovery(clp); | 1461 | nfs4_schedule_state_recovery(clp); |
1462 | ret = -EIO; | ||
1457 | } | 1463 | } |
1458 | return 0; | 1464 | return ret; |
1459 | } | 1465 | } |
1460 | 1466 | ||
1461 | /* | 1467 | /* |
@@ -1997,12 +2003,34 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f | |||
1997 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | 2003 | status = nfs4_call_sync(server, &msg, &args, &res, 0); |
1998 | if (status == 0) { | 2004 | if (status == 0) { |
1999 | memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); | 2005 | memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); |
2006 | server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| | ||
2007 | NFS_CAP_SYMLINKS|NFS_CAP_FILEID| | ||
2008 | NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER| | ||
2009 | NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME| | ||
2010 | NFS_CAP_CTIME|NFS_CAP_MTIME); | ||
2000 | if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) | 2011 | if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) |
2001 | server->caps |= NFS_CAP_ACLS; | 2012 | server->caps |= NFS_CAP_ACLS; |
2002 | if (res.has_links != 0) | 2013 | if (res.has_links != 0) |
2003 | server->caps |= NFS_CAP_HARDLINKS; | 2014 | server->caps |= NFS_CAP_HARDLINKS; |
2004 | if (res.has_symlinks != 0) | 2015 | if (res.has_symlinks != 0) |
2005 | server->caps |= NFS_CAP_SYMLINKS; | 2016 | server->caps |= NFS_CAP_SYMLINKS; |
2017 | if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID) | ||
2018 | server->caps |= NFS_CAP_FILEID; | ||
2019 | if (res.attr_bitmask[1] & FATTR4_WORD1_MODE) | ||
2020 | server->caps |= NFS_CAP_MODE; | ||
2021 | if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS) | ||
2022 | server->caps |= NFS_CAP_NLINK; | ||
2023 | if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER) | ||
2024 | server->caps |= NFS_CAP_OWNER; | ||
2025 | if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP) | ||
2026 | server->caps |= NFS_CAP_OWNER_GROUP; | ||
2027 | if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS) | ||
2028 | server->caps |= NFS_CAP_ATIME; | ||
2029 | if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA) | ||
2030 | server->caps |= NFS_CAP_CTIME; | ||
2031 | if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY) | ||
2032 | server->caps |= NFS_CAP_MTIME; | ||
2033 | |||
2006 | memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); | 2034 | memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); |
2007 | server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; | 2035 | server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; |
2008 | server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; | 2036 | server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 65ca8c18476f..1434080aefeb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1250,8 +1250,8 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
1250 | continue; | 1250 | continue; |
1251 | } | 1251 | } |
1252 | /* Initialize or reset the session */ | 1252 | /* Initialize or reset the session */ |
1253 | if (nfs4_has_session(clp) && | 1253 | if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state) |
1254 | test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) { | 1254 | && nfs4_has_session(clp)) { |
1255 | if (clp->cl_cons_state == NFS_CS_SESSION_INITING) | 1255 | if (clp->cl_cons_state == NFS_CS_SESSION_INITING) |
1256 | status = nfs4_initialize_session(clp); | 1256 | status = nfs4_initialize_session(clp); |
1257 | else | 1257 | else |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 617273e7d47f..cfc30d362f94 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -702,29 +702,12 @@ struct compound_hdr { | |||
702 | u32 minorversion; | 702 | u32 minorversion; |
703 | }; | 703 | }; |
704 | 704 | ||
705 | /* | 705 | static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes) |
706 | * START OF "GENERIC" ENCODE ROUTINES. | 706 | { |
707 | * These may look a little ugly since they are imported from a "generic" | 707 | __be32 *p = xdr_reserve_space(xdr, nbytes); |
708 | * set of XDR encode/decode routines which are intended to be shared by | 708 | BUG_ON(!p); |
709 | * all of our NFSv4 implementations (OpenBSD, MacOS X...). | 709 | return p; |
710 | * | 710 | } |
711 | * If the pain of reading these is too great, it should be a straightforward | ||
712 | * task to translate them into Linux-specific versions which are more | ||
713 | * consistent with the style used in NFSv2/v3... | ||
714 | */ | ||
715 | #define WRITE32(n) *p++ = htonl(n) | ||
716 | #define WRITE64(n) do { \ | ||
717 | *p++ = htonl((uint32_t)((n) >> 32)); \ | ||
718 | *p++ = htonl((uint32_t)(n)); \ | ||
719 | } while (0) | ||
720 | #define WRITEMEM(ptr,nbytes) do { \ | ||
721 | p = xdr_encode_opaque_fixed(p, ptr, nbytes); \ | ||
722 | } while (0) | ||
723 | |||
724 | #define RESERVE_SPACE(nbytes) do { \ | ||
725 | p = xdr_reserve_space(xdr, nbytes); \ | ||
726 | BUG_ON(!p); \ | ||
727 | } while (0) | ||
728 | 711 | ||
729 | static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) | 712 | static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) |
730 | { | 713 | { |
@@ -749,12 +732,11 @@ static void encode_compound_hdr(struct xdr_stream *xdr, | |||
749 | 732 | ||
750 | dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); | 733 | dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); |
751 | BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); | 734 | BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); |
752 | RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2)); | 735 | p = reserve_space(xdr, 4 + hdr->taglen + 8); |
753 | WRITE32(hdr->taglen); | 736 | p = xdr_encode_opaque(p, hdr->tag, hdr->taglen); |
754 | WRITEMEM(hdr->tag, hdr->taglen); | 737 | *p++ = cpu_to_be32(hdr->minorversion); |
755 | WRITE32(hdr->minorversion); | ||
756 | hdr->nops_p = p; | 738 | hdr->nops_p = p; |
757 | WRITE32(hdr->nops); | 739 | *p = cpu_to_be32(hdr->nops); |
758 | } | 740 | } |
759 | 741 | ||
760 | static void encode_nops(struct compound_hdr *hdr) | 742 | static void encode_nops(struct compound_hdr *hdr) |
@@ -829,55 +811,53 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
829 | len += 16; | 811 | len += 16; |
830 | else if (iap->ia_valid & ATTR_MTIME) | 812 | else if (iap->ia_valid & ATTR_MTIME) |
831 | len += 4; | 813 | len += 4; |
832 | RESERVE_SPACE(len); | 814 | p = reserve_space(xdr, len); |
833 | 815 | ||
834 | /* | 816 | /* |
835 | * We write the bitmap length now, but leave the bitmap and the attribute | 817 | * We write the bitmap length now, but leave the bitmap and the attribute |
836 | * buffer length to be backfilled at the end of this routine. | 818 | * buffer length to be backfilled at the end of this routine. |
837 | */ | 819 | */ |
838 | WRITE32(2); | 820 | *p++ = cpu_to_be32(2); |
839 | q = p; | 821 | q = p; |
840 | p += 3; | 822 | p += 3; |
841 | 823 | ||
842 | if (iap->ia_valid & ATTR_SIZE) { | 824 | if (iap->ia_valid & ATTR_SIZE) { |
843 | bmval0 |= FATTR4_WORD0_SIZE; | 825 | bmval0 |= FATTR4_WORD0_SIZE; |
844 | WRITE64(iap->ia_size); | 826 | p = xdr_encode_hyper(p, iap->ia_size); |
845 | } | 827 | } |
846 | if (iap->ia_valid & ATTR_MODE) { | 828 | if (iap->ia_valid & ATTR_MODE) { |
847 | bmval1 |= FATTR4_WORD1_MODE; | 829 | bmval1 |= FATTR4_WORD1_MODE; |
848 | WRITE32(iap->ia_mode & S_IALLUGO); | 830 | *p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO); |
849 | } | 831 | } |
850 | if (iap->ia_valid & ATTR_UID) { | 832 | if (iap->ia_valid & ATTR_UID) { |
851 | bmval1 |= FATTR4_WORD1_OWNER; | 833 | bmval1 |= FATTR4_WORD1_OWNER; |
852 | WRITE32(owner_namelen); | 834 | p = xdr_encode_opaque(p, owner_name, owner_namelen); |
853 | WRITEMEM(owner_name, owner_namelen); | ||
854 | } | 835 | } |
855 | if (iap->ia_valid & ATTR_GID) { | 836 | if (iap->ia_valid & ATTR_GID) { |
856 | bmval1 |= FATTR4_WORD1_OWNER_GROUP; | 837 | bmval1 |= FATTR4_WORD1_OWNER_GROUP; |
857 | WRITE32(owner_grouplen); | 838 | p = xdr_encode_opaque(p, owner_group, owner_grouplen); |
858 | WRITEMEM(owner_group, owner_grouplen); | ||
859 | } | 839 | } |
860 | if (iap->ia_valid & ATTR_ATIME_SET) { | 840 | if (iap->ia_valid & ATTR_ATIME_SET) { |
861 | bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; | 841 | bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; |
862 | WRITE32(NFS4_SET_TO_CLIENT_TIME); | 842 | *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); |
863 | WRITE32(0); | 843 | *p++ = cpu_to_be32(0); |
864 | WRITE32(iap->ia_mtime.tv_sec); | 844 | *p++ = cpu_to_be32(iap->ia_mtime.tv_sec); |
865 | WRITE32(iap->ia_mtime.tv_nsec); | 845 | *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); |
866 | } | 846 | } |
867 | else if (iap->ia_valid & ATTR_ATIME) { | 847 | else if (iap->ia_valid & ATTR_ATIME) { |
868 | bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; | 848 | bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; |
869 | WRITE32(NFS4_SET_TO_SERVER_TIME); | 849 | *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); |
870 | } | 850 | } |
871 | if (iap->ia_valid & ATTR_MTIME_SET) { | 851 | if (iap->ia_valid & ATTR_MTIME_SET) { |
872 | bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; | 852 | bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; |
873 | WRITE32(NFS4_SET_TO_CLIENT_TIME); | 853 | *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); |
874 | WRITE32(0); | 854 | *p++ = cpu_to_be32(0); |
875 | WRITE32(iap->ia_mtime.tv_sec); | 855 | *p++ = cpu_to_be32(iap->ia_mtime.tv_sec); |
876 | WRITE32(iap->ia_mtime.tv_nsec); | 856 | *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); |
877 | } | 857 | } |
878 | else if (iap->ia_valid & ATTR_MTIME) { | 858 | else if (iap->ia_valid & ATTR_MTIME) { |
879 | bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; | 859 | bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; |
880 | WRITE32(NFS4_SET_TO_SERVER_TIME); | 860 | *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); |
881 | } | 861 | } |
882 | 862 | ||
883 | /* | 863 | /* |
@@ -891,7 +871,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
891 | len = (char *)p - (char *)q - 12; | 871 | len = (char *)p - (char *)q - 12; |
892 | *q++ = htonl(bmval0); | 872 | *q++ = htonl(bmval0); |
893 | *q++ = htonl(bmval1); | 873 | *q++ = htonl(bmval1); |
894 | *q++ = htonl(len); | 874 | *q = htonl(len); |
895 | 875 | ||
896 | /* out: */ | 876 | /* out: */ |
897 | } | 877 | } |
@@ -900,9 +880,9 @@ static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hd | |||
900 | { | 880 | { |
901 | __be32 *p; | 881 | __be32 *p; |
902 | 882 | ||
903 | RESERVE_SPACE(8); | 883 | p = reserve_space(xdr, 8); |
904 | WRITE32(OP_ACCESS); | 884 | *p++ = cpu_to_be32(OP_ACCESS); |
905 | WRITE32(access); | 885 | *p = cpu_to_be32(access); |
906 | hdr->nops++; | 886 | hdr->nops++; |
907 | hdr->replen += decode_access_maxsz; | 887 | hdr->replen += decode_access_maxsz; |
908 | } | 888 | } |
@@ -911,10 +891,10 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg | |||
911 | { | 891 | { |
912 | __be32 *p; | 892 | __be32 *p; |
913 | 893 | ||
914 | RESERVE_SPACE(8+NFS4_STATEID_SIZE); | 894 | p = reserve_space(xdr, 8+NFS4_STATEID_SIZE); |
915 | WRITE32(OP_CLOSE); | 895 | *p++ = cpu_to_be32(OP_CLOSE); |
916 | WRITE32(arg->seqid->sequence->counter); | 896 | *p++ = cpu_to_be32(arg->seqid->sequence->counter); |
917 | WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); | 897 | xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); |
918 | hdr->nops++; | 898 | hdr->nops++; |
919 | hdr->replen += decode_close_maxsz; | 899 | hdr->replen += decode_close_maxsz; |
920 | } | 900 | } |
@@ -923,10 +903,10 @@ static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *ar | |||
923 | { | 903 | { |
924 | __be32 *p; | 904 | __be32 *p; |
925 | 905 | ||
926 | RESERVE_SPACE(16); | 906 | p = reserve_space(xdr, 16); |
927 | WRITE32(OP_COMMIT); | 907 | *p++ = cpu_to_be32(OP_COMMIT); |
928 | WRITE64(args->offset); | 908 | p = xdr_encode_hyper(p, args->offset); |
929 | WRITE32(args->count); | 909 | *p = cpu_to_be32(args->count); |
930 | hdr->nops++; | 910 | hdr->nops++; |
931 | hdr->replen += decode_commit_maxsz; | 911 | hdr->replen += decode_commit_maxsz; |
932 | } | 912 | } |
@@ -935,30 +915,28 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * | |||
935 | { | 915 | { |
936 | __be32 *p; | 916 | __be32 *p; |
937 | 917 | ||
938 | RESERVE_SPACE(8); | 918 | p = reserve_space(xdr, 8); |
939 | WRITE32(OP_CREATE); | 919 | *p++ = cpu_to_be32(OP_CREATE); |
940 | WRITE32(create->ftype); | 920 | *p = cpu_to_be32(create->ftype); |
941 | 921 | ||
942 | switch (create->ftype) { | 922 | switch (create->ftype) { |
943 | case NF4LNK: | 923 | case NF4LNK: |
944 | RESERVE_SPACE(4); | 924 | p = reserve_space(xdr, 4); |
945 | WRITE32(create->u.symlink.len); | 925 | *p = cpu_to_be32(create->u.symlink.len); |
946 | xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len); | 926 | xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len); |
947 | break; | 927 | break; |
948 | 928 | ||
949 | case NF4BLK: case NF4CHR: | 929 | case NF4BLK: case NF4CHR: |
950 | RESERVE_SPACE(8); | 930 | p = reserve_space(xdr, 8); |
951 | WRITE32(create->u.device.specdata1); | 931 | *p++ = cpu_to_be32(create->u.device.specdata1); |
952 | WRITE32(create->u.device.specdata2); | 932 | *p = cpu_to_be32(create->u.device.specdata2); |
953 | break; | 933 | break; |
954 | 934 | ||
955 | default: | 935 | default: |
956 | break; | 936 | break; |
957 | } | 937 | } |
958 | 938 | ||
959 | RESERVE_SPACE(4 + create->name->len); | 939 | encode_string(xdr, create->name->len, create->name->name); |
960 | WRITE32(create->name->len); | ||
961 | WRITEMEM(create->name->name, create->name->len); | ||
962 | hdr->nops++; | 940 | hdr->nops++; |
963 | hdr->replen += decode_create_maxsz; | 941 | hdr->replen += decode_create_maxsz; |
964 | 942 | ||
@@ -969,10 +947,10 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c | |||
969 | { | 947 | { |
970 | __be32 *p; | 948 | __be32 *p; |
971 | 949 | ||
972 | RESERVE_SPACE(12); | 950 | p = reserve_space(xdr, 12); |
973 | WRITE32(OP_GETATTR); | 951 | *p++ = cpu_to_be32(OP_GETATTR); |
974 | WRITE32(1); | 952 | *p++ = cpu_to_be32(1); |
975 | WRITE32(bitmap); | 953 | *p = cpu_to_be32(bitmap); |
976 | hdr->nops++; | 954 | hdr->nops++; |
977 | hdr->replen += decode_getattr_maxsz; | 955 | hdr->replen += decode_getattr_maxsz; |
978 | } | 956 | } |
@@ -981,11 +959,11 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm | |||
981 | { | 959 | { |
982 | __be32 *p; | 960 | __be32 *p; |
983 | 961 | ||
984 | RESERVE_SPACE(16); | 962 | p = reserve_space(xdr, 16); |
985 | WRITE32(OP_GETATTR); | 963 | *p++ = cpu_to_be32(OP_GETATTR); |
986 | WRITE32(2); | 964 | *p++ = cpu_to_be32(2); |
987 | WRITE32(bm0); | 965 | *p++ = cpu_to_be32(bm0); |
988 | WRITE32(bm1); | 966 | *p = cpu_to_be32(bm1); |
989 | hdr->nops++; | 967 | hdr->nops++; |
990 | hdr->replen += decode_getattr_maxsz; | 968 | hdr->replen += decode_getattr_maxsz; |
991 | } | 969 | } |
@@ -1012,8 +990,8 @@ static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) | |||
1012 | { | 990 | { |
1013 | __be32 *p; | 991 | __be32 *p; |
1014 | 992 | ||
1015 | RESERVE_SPACE(4); | 993 | p = reserve_space(xdr, 4); |
1016 | WRITE32(OP_GETFH); | 994 | *p = cpu_to_be32(OP_GETFH); |
1017 | hdr->nops++; | 995 | hdr->nops++; |
1018 | hdr->replen += decode_getfh_maxsz; | 996 | hdr->replen += decode_getfh_maxsz; |
1019 | } | 997 | } |
@@ -1022,10 +1000,9 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct | |||
1022 | { | 1000 | { |
1023 | __be32 *p; | 1001 | __be32 *p; |
1024 | 1002 | ||
1025 | RESERVE_SPACE(8 + name->len); | 1003 | p = reserve_space(xdr, 8 + name->len); |
1026 | WRITE32(OP_LINK); | 1004 | *p++ = cpu_to_be32(OP_LINK); |
1027 | WRITE32(name->len); | 1005 | xdr_encode_opaque(p, name->name, name->len); |
1028 | WRITEMEM(name->name, name->len); | ||
1029 | hdr->nops++; | 1006 | hdr->nops++; |
1030 | hdr->replen += decode_link_maxsz; | 1007 | hdr->replen += decode_link_maxsz; |
1031 | } | 1008 | } |
@@ -1052,27 +1029,27 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args | |||
1052 | { | 1029 | { |
1053 | __be32 *p; | 1030 | __be32 *p; |
1054 | 1031 | ||
1055 | RESERVE_SPACE(32); | 1032 | p = reserve_space(xdr, 32); |
1056 | WRITE32(OP_LOCK); | 1033 | *p++ = cpu_to_be32(OP_LOCK); |
1057 | WRITE32(nfs4_lock_type(args->fl, args->block)); | 1034 | *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block)); |
1058 | WRITE32(args->reclaim); | 1035 | *p++ = cpu_to_be32(args->reclaim); |
1059 | WRITE64(args->fl->fl_start); | 1036 | p = xdr_encode_hyper(p, args->fl->fl_start); |
1060 | WRITE64(nfs4_lock_length(args->fl)); | 1037 | p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); |
1061 | WRITE32(args->new_lock_owner); | 1038 | *p = cpu_to_be32(args->new_lock_owner); |
1062 | if (args->new_lock_owner){ | 1039 | if (args->new_lock_owner){ |
1063 | RESERVE_SPACE(4+NFS4_STATEID_SIZE+32); | 1040 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32); |
1064 | WRITE32(args->open_seqid->sequence->counter); | 1041 | *p++ = cpu_to_be32(args->open_seqid->sequence->counter); |
1065 | WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE); | 1042 | p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE); |
1066 | WRITE32(args->lock_seqid->sequence->counter); | 1043 | *p++ = cpu_to_be32(args->lock_seqid->sequence->counter); |
1067 | WRITE64(args->lock_owner.clientid); | 1044 | p = xdr_encode_hyper(p, args->lock_owner.clientid); |
1068 | WRITE32(16); | 1045 | *p++ = cpu_to_be32(16); |
1069 | WRITEMEM("lock id:", 8); | 1046 | p = xdr_encode_opaque_fixed(p, "lock id:", 8); |
1070 | WRITE64(args->lock_owner.id); | 1047 | xdr_encode_hyper(p, args->lock_owner.id); |
1071 | } | 1048 | } |
1072 | else { | 1049 | else { |
1073 | RESERVE_SPACE(NFS4_STATEID_SIZE+4); | 1050 | p = reserve_space(xdr, NFS4_STATEID_SIZE+4); |
1074 | WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE); | 1051 | p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE); |
1075 | WRITE32(args->lock_seqid->sequence->counter); | 1052 | *p = cpu_to_be32(args->lock_seqid->sequence->counter); |
1076 | } | 1053 | } |
1077 | hdr->nops++; | 1054 | hdr->nops++; |
1078 | hdr->replen += decode_lock_maxsz; | 1055 | hdr->replen += decode_lock_maxsz; |
@@ -1082,15 +1059,15 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar | |||
1082 | { | 1059 | { |
1083 | __be32 *p; | 1060 | __be32 *p; |
1084 | 1061 | ||
1085 | RESERVE_SPACE(52); | 1062 | p = reserve_space(xdr, 52); |
1086 | WRITE32(OP_LOCKT); | 1063 | *p++ = cpu_to_be32(OP_LOCKT); |
1087 | WRITE32(nfs4_lock_type(args->fl, 0)); | 1064 | *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); |
1088 | WRITE64(args->fl->fl_start); | 1065 | p = xdr_encode_hyper(p, args->fl->fl_start); |
1089 | WRITE64(nfs4_lock_length(args->fl)); | 1066 | p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); |
1090 | WRITE64(args->lock_owner.clientid); | 1067 | p = xdr_encode_hyper(p, args->lock_owner.clientid); |
1091 | WRITE32(16); | 1068 | *p++ = cpu_to_be32(16); |
1092 | WRITEMEM("lock id:", 8); | 1069 | p = xdr_encode_opaque_fixed(p, "lock id:", 8); |
1093 | WRITE64(args->lock_owner.id); | 1070 | xdr_encode_hyper(p, args->lock_owner.id); |
1094 | hdr->nops++; | 1071 | hdr->nops++; |
1095 | hdr->replen += decode_lockt_maxsz; | 1072 | hdr->replen += decode_lockt_maxsz; |
1096 | } | 1073 | } |
@@ -1099,13 +1076,13 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar | |||
1099 | { | 1076 | { |
1100 | __be32 *p; | 1077 | __be32 *p; |
1101 | 1078 | ||
1102 | RESERVE_SPACE(12+NFS4_STATEID_SIZE+16); | 1079 | p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16); |
1103 | WRITE32(OP_LOCKU); | 1080 | *p++ = cpu_to_be32(OP_LOCKU); |
1104 | WRITE32(nfs4_lock_type(args->fl, 0)); | 1081 | *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); |
1105 | WRITE32(args->seqid->sequence->counter); | 1082 | *p++ = cpu_to_be32(args->seqid->sequence->counter); |
1106 | WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE); | 1083 | p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); |
1107 | WRITE64(args->fl->fl_start); | 1084 | p = xdr_encode_hyper(p, args->fl->fl_start); |
1108 | WRITE64(nfs4_lock_length(args->fl)); | 1085 | xdr_encode_hyper(p, nfs4_lock_length(args->fl)); |
1109 | hdr->nops++; | 1086 | hdr->nops++; |
1110 | hdr->replen += decode_locku_maxsz; | 1087 | hdr->replen += decode_locku_maxsz; |
1111 | } | 1088 | } |
@@ -1115,10 +1092,9 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc | |||
1115 | int len = name->len; | 1092 | int len = name->len; |
1116 | __be32 *p; | 1093 | __be32 *p; |
1117 | 1094 | ||
1118 | RESERVE_SPACE(8 + len); | 1095 | p = reserve_space(xdr, 8 + len); |
1119 | WRITE32(OP_LOOKUP); | 1096 | *p++ = cpu_to_be32(OP_LOOKUP); |
1120 | WRITE32(len); | 1097 | xdr_encode_opaque(p, name->name, len); |
1121 | WRITEMEM(name->name, len); | ||
1122 | hdr->nops++; | 1098 | hdr->nops++; |
1123 | hdr->replen += decode_lookup_maxsz; | 1099 | hdr->replen += decode_lookup_maxsz; |
1124 | } | 1100 | } |
@@ -1127,21 +1103,21 @@ static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) | |||
1127 | { | 1103 | { |
1128 | __be32 *p; | 1104 | __be32 *p; |
1129 | 1105 | ||
1130 | RESERVE_SPACE(8); | 1106 | p = reserve_space(xdr, 8); |
1131 | switch (fmode & (FMODE_READ|FMODE_WRITE)) { | 1107 | switch (fmode & (FMODE_READ|FMODE_WRITE)) { |
1132 | case FMODE_READ: | 1108 | case FMODE_READ: |
1133 | WRITE32(NFS4_SHARE_ACCESS_READ); | 1109 | *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_READ); |
1134 | break; | 1110 | break; |
1135 | case FMODE_WRITE: | 1111 | case FMODE_WRITE: |
1136 | WRITE32(NFS4_SHARE_ACCESS_WRITE); | 1112 | *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_WRITE); |
1137 | break; | 1113 | break; |
1138 | case FMODE_READ|FMODE_WRITE: | 1114 | case FMODE_READ|FMODE_WRITE: |
1139 | WRITE32(NFS4_SHARE_ACCESS_BOTH); | 1115 | *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_BOTH); |
1140 | break; | 1116 | break; |
1141 | default: | 1117 | default: |
1142 | WRITE32(0); | 1118 | *p++ = cpu_to_be32(0); |
1143 | } | 1119 | } |
1144 | WRITE32(0); /* for linux, share_deny = 0 always */ | 1120 | *p = cpu_to_be32(0); /* for linux, share_deny = 0 always */ |
1145 | } | 1121 | } |
1146 | 1122 | ||
1147 | static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg) | 1123 | static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg) |
@@ -1151,29 +1127,29 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena | |||
1151 | * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, | 1127 | * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, |
1152 | * owner 4 = 32 | 1128 | * owner 4 = 32 |
1153 | */ | 1129 | */ |
1154 | RESERVE_SPACE(8); | 1130 | p = reserve_space(xdr, 8); |
1155 | WRITE32(OP_OPEN); | 1131 | *p++ = cpu_to_be32(OP_OPEN); |
1156 | WRITE32(arg->seqid->sequence->counter); | 1132 | *p = cpu_to_be32(arg->seqid->sequence->counter); |
1157 | encode_share_access(xdr, arg->fmode); | 1133 | encode_share_access(xdr, arg->fmode); |
1158 | RESERVE_SPACE(28); | 1134 | p = reserve_space(xdr, 28); |
1159 | WRITE64(arg->clientid); | 1135 | p = xdr_encode_hyper(p, arg->clientid); |
1160 | WRITE32(16); | 1136 | *p++ = cpu_to_be32(16); |
1161 | WRITEMEM("open id:", 8); | 1137 | p = xdr_encode_opaque_fixed(p, "open id:", 8); |
1162 | WRITE64(arg->id); | 1138 | xdr_encode_hyper(p, arg->id); |
1163 | } | 1139 | } |
1164 | 1140 | ||
1165 | static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) | 1141 | static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) |
1166 | { | 1142 | { |
1167 | __be32 *p; | 1143 | __be32 *p; |
1168 | 1144 | ||
1169 | RESERVE_SPACE(4); | 1145 | p = reserve_space(xdr, 4); |
1170 | switch(arg->open_flags & O_EXCL) { | 1146 | switch(arg->open_flags & O_EXCL) { |
1171 | case 0: | 1147 | case 0: |
1172 | WRITE32(NFS4_CREATE_UNCHECKED); | 1148 | *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); |
1173 | encode_attrs(xdr, arg->u.attrs, arg->server); | 1149 | encode_attrs(xdr, arg->u.attrs, arg->server); |
1174 | break; | 1150 | break; |
1175 | default: | 1151 | default: |
1176 | WRITE32(NFS4_CREATE_EXCLUSIVE); | 1152 | *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); |
1177 | encode_nfs4_verifier(xdr, &arg->u.verifier); | 1153 | encode_nfs4_verifier(xdr, &arg->u.verifier); |
1178 | } | 1154 | } |
1179 | } | 1155 | } |
@@ -1182,14 +1158,14 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a | |||
1182 | { | 1158 | { |
1183 | __be32 *p; | 1159 | __be32 *p; |
1184 | 1160 | ||
1185 | RESERVE_SPACE(4); | 1161 | p = reserve_space(xdr, 4); |
1186 | switch (arg->open_flags & O_CREAT) { | 1162 | switch (arg->open_flags & O_CREAT) { |
1187 | case 0: | 1163 | case 0: |
1188 | WRITE32(NFS4_OPEN_NOCREATE); | 1164 | *p = cpu_to_be32(NFS4_OPEN_NOCREATE); |
1189 | break; | 1165 | break; |
1190 | default: | 1166 | default: |
1191 | BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); | 1167 | BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); |
1192 | WRITE32(NFS4_OPEN_CREATE); | 1168 | *p = cpu_to_be32(NFS4_OPEN_CREATE); |
1193 | encode_createmode(xdr, arg); | 1169 | encode_createmode(xdr, arg); |
1194 | } | 1170 | } |
1195 | } | 1171 | } |
@@ -1198,16 +1174,16 @@ static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delega | |||
1198 | { | 1174 | { |
1199 | __be32 *p; | 1175 | __be32 *p; |
1200 | 1176 | ||
1201 | RESERVE_SPACE(4); | 1177 | p = reserve_space(xdr, 4); |
1202 | switch (delegation_type) { | 1178 | switch (delegation_type) { |
1203 | case 0: | 1179 | case 0: |
1204 | WRITE32(NFS4_OPEN_DELEGATE_NONE); | 1180 | *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE); |
1205 | break; | 1181 | break; |
1206 | case FMODE_READ: | 1182 | case FMODE_READ: |
1207 | WRITE32(NFS4_OPEN_DELEGATE_READ); | 1183 | *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ); |
1208 | break; | 1184 | break; |
1209 | case FMODE_WRITE|FMODE_READ: | 1185 | case FMODE_WRITE|FMODE_READ: |
1210 | WRITE32(NFS4_OPEN_DELEGATE_WRITE); | 1186 | *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE); |
1211 | break; | 1187 | break; |
1212 | default: | 1188 | default: |
1213 | BUG(); | 1189 | BUG(); |
@@ -1218,8 +1194,8 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr * | |||
1218 | { | 1194 | { |
1219 | __be32 *p; | 1195 | __be32 *p; |
1220 | 1196 | ||
1221 | RESERVE_SPACE(4); | 1197 | p = reserve_space(xdr, 4); |
1222 | WRITE32(NFS4_OPEN_CLAIM_NULL); | 1198 | *p = cpu_to_be32(NFS4_OPEN_CLAIM_NULL); |
1223 | encode_string(xdr, name->len, name->name); | 1199 | encode_string(xdr, name->len, name->name); |
1224 | } | 1200 | } |
1225 | 1201 | ||
@@ -1227,8 +1203,8 @@ static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type) | |||
1227 | { | 1203 | { |
1228 | __be32 *p; | 1204 | __be32 *p; |
1229 | 1205 | ||
1230 | RESERVE_SPACE(4); | 1206 | p = reserve_space(xdr, 4); |
1231 | WRITE32(NFS4_OPEN_CLAIM_PREVIOUS); | 1207 | *p = cpu_to_be32(NFS4_OPEN_CLAIM_PREVIOUS); |
1232 | encode_delegation_type(xdr, type); | 1208 | encode_delegation_type(xdr, type); |
1233 | } | 1209 | } |
1234 | 1210 | ||
@@ -1236,9 +1212,9 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc | |||
1236 | { | 1212 | { |
1237 | __be32 *p; | 1213 | __be32 *p; |
1238 | 1214 | ||
1239 | RESERVE_SPACE(4+NFS4_STATEID_SIZE); | 1215 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); |
1240 | WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR); | 1216 | *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR); |
1241 | WRITEMEM(stateid->data, NFS4_STATEID_SIZE); | 1217 | xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); |
1242 | encode_string(xdr, name->len, name->name); | 1218 | encode_string(xdr, name->len, name->name); |
1243 | } | 1219 | } |
1244 | 1220 | ||
@@ -1267,10 +1243,10 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co | |||
1267 | { | 1243 | { |
1268 | __be32 *p; | 1244 | __be32 *p; |
1269 | 1245 | ||
1270 | RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); | 1246 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); |
1271 | WRITE32(OP_OPEN_CONFIRM); | 1247 | *p++ = cpu_to_be32(OP_OPEN_CONFIRM); |
1272 | WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); | 1248 | p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); |
1273 | WRITE32(arg->seqid->sequence->counter); | 1249 | *p = cpu_to_be32(arg->seqid->sequence->counter); |
1274 | hdr->nops++; | 1250 | hdr->nops++; |
1275 | hdr->replen += decode_open_confirm_maxsz; | 1251 | hdr->replen += decode_open_confirm_maxsz; |
1276 | } | 1252 | } |
@@ -1279,10 +1255,10 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close | |||
1279 | { | 1255 | { |
1280 | __be32 *p; | 1256 | __be32 *p; |
1281 | 1257 | ||
1282 | RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); | 1258 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); |
1283 | WRITE32(OP_OPEN_DOWNGRADE); | 1259 | *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE); |
1284 | WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); | 1260 | p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); |
1285 | WRITE32(arg->seqid->sequence->counter); | 1261 | *p = cpu_to_be32(arg->seqid->sequence->counter); |
1286 | encode_share_access(xdr, arg->fmode); | 1262 | encode_share_access(xdr, arg->fmode); |
1287 | hdr->nops++; | 1263 | hdr->nops++; |
1288 | hdr->replen += decode_open_downgrade_maxsz; | 1264 | hdr->replen += decode_open_downgrade_maxsz; |
@@ -1294,10 +1270,9 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hd | |||
1294 | int len = fh->size; | 1270 | int len = fh->size; |
1295 | __be32 *p; | 1271 | __be32 *p; |
1296 | 1272 | ||
1297 | RESERVE_SPACE(8 + len); | 1273 | p = reserve_space(xdr, 8 + len); |
1298 | WRITE32(OP_PUTFH); | 1274 | *p++ = cpu_to_be32(OP_PUTFH); |
1299 | WRITE32(len); | 1275 | xdr_encode_opaque(p, fh->data, len); |
1300 | WRITEMEM(fh->data, len); | ||
1301 | hdr->nops++; | 1276 | hdr->nops++; |
1302 | hdr->replen += decode_putfh_maxsz; | 1277 | hdr->replen += decode_putfh_maxsz; |
1303 | } | 1278 | } |
@@ -1306,8 +1281,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) | |||
1306 | { | 1281 | { |
1307 | __be32 *p; | 1282 | __be32 *p; |
1308 | 1283 | ||
1309 | RESERVE_SPACE(4); | 1284 | p = reserve_space(xdr, 4); |
1310 | WRITE32(OP_PUTROOTFH); | 1285 | *p = cpu_to_be32(OP_PUTROOTFH); |
1311 | hdr->nops++; | 1286 | hdr->nops++; |
1312 | hdr->replen += decode_putrootfh_maxsz; | 1287 | hdr->replen += decode_putrootfh_maxsz; |
1313 | } | 1288 | } |
@@ -1317,26 +1292,26 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context | |||
1317 | nfs4_stateid stateid; | 1292 | nfs4_stateid stateid; |
1318 | __be32 *p; | 1293 | __be32 *p; |
1319 | 1294 | ||
1320 | RESERVE_SPACE(NFS4_STATEID_SIZE); | 1295 | p = reserve_space(xdr, NFS4_STATEID_SIZE); |
1321 | if (ctx->state != NULL) { | 1296 | if (ctx->state != NULL) { |
1322 | nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); | 1297 | nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); |
1323 | WRITEMEM(stateid.data, NFS4_STATEID_SIZE); | 1298 | xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); |
1324 | } else | 1299 | } else |
1325 | WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); | 1300 | xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); |
1326 | } | 1301 | } |
1327 | 1302 | ||
1328 | static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) | 1303 | static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) |
1329 | { | 1304 | { |
1330 | __be32 *p; | 1305 | __be32 *p; |
1331 | 1306 | ||
1332 | RESERVE_SPACE(4); | 1307 | p = reserve_space(xdr, 4); |
1333 | WRITE32(OP_READ); | 1308 | *p = cpu_to_be32(OP_READ); |
1334 | 1309 | ||
1335 | encode_stateid(xdr, args->context); | 1310 | encode_stateid(xdr, args->context); |
1336 | 1311 | ||
1337 | RESERVE_SPACE(12); | 1312 | p = reserve_space(xdr, 12); |
1338 | WRITE64(args->offset); | 1313 | p = xdr_encode_hyper(p, args->offset); |
1339 | WRITE32(args->count); | 1314 | *p = cpu_to_be32(args->count); |
1340 | hdr->nops++; | 1315 | hdr->nops++; |
1341 | hdr->replen += decode_read_maxsz; | 1316 | hdr->replen += decode_read_maxsz; |
1342 | } | 1317 | } |
@@ -1349,20 +1324,20 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg | |||
1349 | }; | 1324 | }; |
1350 | __be32 *p; | 1325 | __be32 *p; |
1351 | 1326 | ||
1352 | RESERVE_SPACE(12+NFS4_VERIFIER_SIZE+20); | 1327 | p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); |
1353 | WRITE32(OP_READDIR); | 1328 | *p++ = cpu_to_be32(OP_READDIR); |
1354 | WRITE64(readdir->cookie); | 1329 | p = xdr_encode_hyper(p, readdir->cookie); |
1355 | WRITEMEM(readdir->verifier.data, NFS4_VERIFIER_SIZE); | 1330 | p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); |
1356 | WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ | 1331 | *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */ |
1357 | WRITE32(readdir->count); | 1332 | *p++ = cpu_to_be32(readdir->count); |
1358 | WRITE32(2); | 1333 | *p++ = cpu_to_be32(2); |
1359 | /* Switch to mounted_on_fileid if the server supports it */ | 1334 | /* Switch to mounted_on_fileid if the server supports it */ |
1360 | if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) | 1335 | if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) |
1361 | attrs[0] &= ~FATTR4_WORD0_FILEID; | 1336 | attrs[0] &= ~FATTR4_WORD0_FILEID; |
1362 | else | 1337 | else |
1363 | attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; | 1338 | attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; |
1364 | WRITE32(attrs[0] & readdir->bitmask[0]); | 1339 | *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); |
1365 | WRITE32(attrs[1] & readdir->bitmask[1]); | 1340 | *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); |
1366 | hdr->nops++; | 1341 | hdr->nops++; |
1367 | hdr->replen += decode_readdir_maxsz; | 1342 | hdr->replen += decode_readdir_maxsz; |
1368 | dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", | 1343 | dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", |
@@ -1378,8 +1353,8 @@ static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink * | |||
1378 | { | 1353 | { |
1379 | __be32 *p; | 1354 | __be32 *p; |
1380 | 1355 | ||
1381 | RESERVE_SPACE(4); | 1356 | p = reserve_space(xdr, 4); |
1382 | WRITE32(OP_READLINK); | 1357 | *p = cpu_to_be32(OP_READLINK); |
1383 | hdr->nops++; | 1358 | hdr->nops++; |
1384 | hdr->replen += decode_readlink_maxsz; | 1359 | hdr->replen += decode_readlink_maxsz; |
1385 | } | 1360 | } |
@@ -1388,10 +1363,9 @@ static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struc | |||
1388 | { | 1363 | { |
1389 | __be32 *p; | 1364 | __be32 *p; |
1390 | 1365 | ||
1391 | RESERVE_SPACE(8 + name->len); | 1366 | p = reserve_space(xdr, 8 + name->len); |
1392 | WRITE32(OP_REMOVE); | 1367 | *p++ = cpu_to_be32(OP_REMOVE); |
1393 | WRITE32(name->len); | 1368 | xdr_encode_opaque(p, name->name, name->len); |
1394 | WRITEMEM(name->name, name->len); | ||
1395 | hdr->nops++; | 1369 | hdr->nops++; |
1396 | hdr->replen += decode_remove_maxsz; | 1370 | hdr->replen += decode_remove_maxsz; |
1397 | } | 1371 | } |
@@ -1400,14 +1374,10 @@ static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, co | |||
1400 | { | 1374 | { |
1401 | __be32 *p; | 1375 | __be32 *p; |
1402 | 1376 | ||
1403 | RESERVE_SPACE(8 + oldname->len); | 1377 | p = reserve_space(xdr, 4); |
1404 | WRITE32(OP_RENAME); | 1378 | *p = cpu_to_be32(OP_RENAME); |
1405 | WRITE32(oldname->len); | 1379 | encode_string(xdr, oldname->len, oldname->name); |
1406 | WRITEMEM(oldname->name, oldname->len); | 1380 | encode_string(xdr, newname->len, newname->name); |
1407 | |||
1408 | RESERVE_SPACE(4 + newname->len); | ||
1409 | WRITE32(newname->len); | ||
1410 | WRITEMEM(newname->name, newname->len); | ||
1411 | hdr->nops++; | 1381 | hdr->nops++; |
1412 | hdr->replen += decode_rename_maxsz; | 1382 | hdr->replen += decode_rename_maxsz; |
1413 | } | 1383 | } |
@@ -1416,9 +1386,9 @@ static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client | |||
1416 | { | 1386 | { |
1417 | __be32 *p; | 1387 | __be32 *p; |
1418 | 1388 | ||
1419 | RESERVE_SPACE(12); | 1389 | p = reserve_space(xdr, 12); |
1420 | WRITE32(OP_RENEW); | 1390 | *p++ = cpu_to_be32(OP_RENEW); |
1421 | WRITE64(client_stateid->cl_clientid); | 1391 | xdr_encode_hyper(p, client_stateid->cl_clientid); |
1422 | hdr->nops++; | 1392 | hdr->nops++; |
1423 | hdr->replen += decode_renew_maxsz; | 1393 | hdr->replen += decode_renew_maxsz; |
1424 | } | 1394 | } |
@@ -1428,8 +1398,8 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) | |||
1428 | { | 1398 | { |
1429 | __be32 *p; | 1399 | __be32 *p; |
1430 | 1400 | ||
1431 | RESERVE_SPACE(4); | 1401 | p = reserve_space(xdr, 4); |
1432 | WRITE32(OP_RESTOREFH); | 1402 | *p = cpu_to_be32(OP_RESTOREFH); |
1433 | hdr->nops++; | 1403 | hdr->nops++; |
1434 | hdr->replen += decode_restorefh_maxsz; | 1404 | hdr->replen += decode_restorefh_maxsz; |
1435 | } | 1405 | } |
@@ -1439,16 +1409,16 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun | |||
1439 | { | 1409 | { |
1440 | __be32 *p; | 1410 | __be32 *p; |
1441 | 1411 | ||
1442 | RESERVE_SPACE(4+NFS4_STATEID_SIZE); | 1412 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); |
1443 | WRITE32(OP_SETATTR); | 1413 | *p++ = cpu_to_be32(OP_SETATTR); |
1444 | WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); | 1414 | xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); |
1445 | RESERVE_SPACE(2*4); | 1415 | p = reserve_space(xdr, 2*4); |
1446 | WRITE32(1); | 1416 | *p++ = cpu_to_be32(1); |
1447 | WRITE32(FATTR4_WORD0_ACL); | 1417 | *p = cpu_to_be32(FATTR4_WORD0_ACL); |
1448 | if (arg->acl_len % 4) | 1418 | if (arg->acl_len % 4) |
1449 | return -EINVAL; | 1419 | return -EINVAL; |
1450 | RESERVE_SPACE(4); | 1420 | p = reserve_space(xdr, 4); |
1451 | WRITE32(arg->acl_len); | 1421 | *p = cpu_to_be32(arg->acl_len); |
1452 | xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); | 1422 | xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); |
1453 | hdr->nops++; | 1423 | hdr->nops++; |
1454 | hdr->replen += decode_setacl_maxsz; | 1424 | hdr->replen += decode_setacl_maxsz; |
@@ -1460,8 +1430,8 @@ encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) | |||
1460 | { | 1430 | { |
1461 | __be32 *p; | 1431 | __be32 *p; |
1462 | 1432 | ||
1463 | RESERVE_SPACE(4); | 1433 | p = reserve_space(xdr, 4); |
1464 | WRITE32(OP_SAVEFH); | 1434 | *p = cpu_to_be32(OP_SAVEFH); |
1465 | hdr->nops++; | 1435 | hdr->nops++; |
1466 | hdr->replen += decode_savefh_maxsz; | 1436 | hdr->replen += decode_savefh_maxsz; |
1467 | } | 1437 | } |
@@ -1470,9 +1440,9 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs | |||
1470 | { | 1440 | { |
1471 | __be32 *p; | 1441 | __be32 *p; |
1472 | 1442 | ||
1473 | RESERVE_SPACE(4+NFS4_STATEID_SIZE); | 1443 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); |
1474 | WRITE32(OP_SETATTR); | 1444 | *p++ = cpu_to_be32(OP_SETATTR); |
1475 | WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); | 1445 | xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE); |
1476 | hdr->nops++; | 1446 | hdr->nops++; |
1477 | hdr->replen += decode_setattr_maxsz; | 1447 | hdr->replen += decode_setattr_maxsz; |
1478 | encode_attrs(xdr, arg->iap, server); | 1448 | encode_attrs(xdr, arg->iap, server); |
@@ -1482,17 +1452,17 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie | |||
1482 | { | 1452 | { |
1483 | __be32 *p; | 1453 | __be32 *p; |
1484 | 1454 | ||
1485 | RESERVE_SPACE(4 + NFS4_VERIFIER_SIZE); | 1455 | p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE); |
1486 | WRITE32(OP_SETCLIENTID); | 1456 | *p++ = cpu_to_be32(OP_SETCLIENTID); |
1487 | WRITEMEM(setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE); | 1457 | xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE); |
1488 | 1458 | ||
1489 | encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); | 1459 | encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); |
1490 | RESERVE_SPACE(4); | 1460 | p = reserve_space(xdr, 4); |
1491 | WRITE32(setclientid->sc_prog); | 1461 | *p = cpu_to_be32(setclientid->sc_prog); |
1492 | encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); | 1462 | encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); |
1493 | encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); | 1463 | encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); |
1494 | RESERVE_SPACE(4); | 1464 | p = reserve_space(xdr, 4); |
1495 | WRITE32(setclientid->sc_cb_ident); | 1465 | *p = cpu_to_be32(setclientid->sc_cb_ident); |
1496 | hdr->nops++; | 1466 | hdr->nops++; |
1497 | hdr->replen += decode_setclientid_maxsz; | 1467 | hdr->replen += decode_setclientid_maxsz; |
1498 | } | 1468 | } |
@@ -1501,10 +1471,10 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_ | |||
1501 | { | 1471 | { |
1502 | __be32 *p; | 1472 | __be32 *p; |
1503 | 1473 | ||
1504 | RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE); | 1474 | p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); |
1505 | WRITE32(OP_SETCLIENTID_CONFIRM); | 1475 | *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM); |
1506 | WRITE64(client_state->cl_clientid); | 1476 | p = xdr_encode_hyper(p, client_state->cl_clientid); |
1507 | WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); | 1477 | xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); |
1508 | hdr->nops++; | 1478 | hdr->nops++; |
1509 | hdr->replen += decode_setclientid_confirm_maxsz; | 1479 | hdr->replen += decode_setclientid_confirm_maxsz; |
1510 | } | 1480 | } |
@@ -1513,15 +1483,15 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg | |||
1513 | { | 1483 | { |
1514 | __be32 *p; | 1484 | __be32 *p; |
1515 | 1485 | ||
1516 | RESERVE_SPACE(4); | 1486 | p = reserve_space(xdr, 4); |
1517 | WRITE32(OP_WRITE); | 1487 | *p = cpu_to_be32(OP_WRITE); |
1518 | 1488 | ||
1519 | encode_stateid(xdr, args->context); | 1489 | encode_stateid(xdr, args->context); |
1520 | 1490 | ||
1521 | RESERVE_SPACE(16); | 1491 | p = reserve_space(xdr, 16); |
1522 | WRITE64(args->offset); | 1492 | p = xdr_encode_hyper(p, args->offset); |
1523 | WRITE32(args->stable); | 1493 | *p++ = cpu_to_be32(args->stable); |
1524 | WRITE32(args->count); | 1494 | *p = cpu_to_be32(args->count); |
1525 | 1495 | ||
1526 | xdr_write_pages(xdr, args->pages, args->pgbase, args->count); | 1496 | xdr_write_pages(xdr, args->pages, args->pgbase, args->count); |
1527 | hdr->nops++; | 1497 | hdr->nops++; |
@@ -1532,10 +1502,10 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state | |||
1532 | { | 1502 | { |
1533 | __be32 *p; | 1503 | __be32 *p; |
1534 | 1504 | ||
1535 | RESERVE_SPACE(4+NFS4_STATEID_SIZE); | 1505 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); |
1536 | 1506 | ||
1537 | WRITE32(OP_DELEGRETURN); | 1507 | *p++ = cpu_to_be32(OP_DELEGRETURN); |
1538 | WRITEMEM(stateid->data, NFS4_STATEID_SIZE); | 1508 | xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); |
1539 | hdr->nops++; | 1509 | hdr->nops++; |
1540 | hdr->replen += decode_delegreturn_maxsz; | 1510 | hdr->replen += decode_delegreturn_maxsz; |
1541 | } | 1511 | } |
@@ -1548,16 +1518,16 @@ static void encode_exchange_id(struct xdr_stream *xdr, | |||
1548 | { | 1518 | { |
1549 | __be32 *p; | 1519 | __be32 *p; |
1550 | 1520 | ||
1551 | RESERVE_SPACE(4 + sizeof(args->verifier->data)); | 1521 | p = reserve_space(xdr, 4 + sizeof(args->verifier->data)); |
1552 | WRITE32(OP_EXCHANGE_ID); | 1522 | *p++ = cpu_to_be32(OP_EXCHANGE_ID); |
1553 | WRITEMEM(args->verifier->data, sizeof(args->verifier->data)); | 1523 | xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data)); |
1554 | 1524 | ||
1555 | encode_string(xdr, args->id_len, args->id); | 1525 | encode_string(xdr, args->id_len, args->id); |
1556 | 1526 | ||
1557 | RESERVE_SPACE(12); | 1527 | p = reserve_space(xdr, 12); |
1558 | WRITE32(args->flags); | 1528 | *p++ = cpu_to_be32(args->flags); |
1559 | WRITE32(0); /* zero length state_protect4_a */ | 1529 | *p++ = cpu_to_be32(0); /* zero length state_protect4_a */ |
1560 | WRITE32(0); /* zero length implementation id array */ | 1530 | *p = cpu_to_be32(0); /* zero length implementation id array */ |
1561 | hdr->nops++; | 1531 | hdr->nops++; |
1562 | hdr->replen += decode_exchange_id_maxsz; | 1532 | hdr->replen += decode_exchange_id_maxsz; |
1563 | } | 1533 | } |
@@ -1571,55 +1541,43 @@ static void encode_create_session(struct xdr_stream *xdr, | |||
1571 | uint32_t len; | 1541 | uint32_t len; |
1572 | struct nfs_client *clp = args->client; | 1542 | struct nfs_client *clp = args->client; |
1573 | 1543 | ||
1574 | RESERVE_SPACE(4); | 1544 | len = scnprintf(machine_name, sizeof(machine_name), "%s", |
1575 | WRITE32(OP_CREATE_SESSION); | 1545 | clp->cl_ipaddr); |
1576 | |||
1577 | RESERVE_SPACE(8); | ||
1578 | WRITE64(clp->cl_ex_clid); | ||
1579 | 1546 | ||
1580 | RESERVE_SPACE(8); | 1547 | p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); |
1581 | WRITE32(clp->cl_seqid); /*Sequence id */ | 1548 | *p++ = cpu_to_be32(OP_CREATE_SESSION); |
1582 | WRITE32(args->flags); /*flags */ | 1549 | p = xdr_encode_hyper(p, clp->cl_ex_clid); |
1550 | *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ | ||
1551 | *p++ = cpu_to_be32(args->flags); /*flags */ | ||
1583 | 1552 | ||
1584 | RESERVE_SPACE(2*28); /* 2 channel_attrs */ | ||
1585 | /* Fore Channel */ | 1553 | /* Fore Channel */ |
1586 | WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ | 1554 | *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ |
1587 | WRITE32(args->fc_attrs.max_rqst_sz); /* max req size */ | 1555 | *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */ |
1588 | WRITE32(args->fc_attrs.max_resp_sz); /* max resp size */ | 1556 | *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */ |
1589 | WRITE32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */ | 1557 | *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */ |
1590 | WRITE32(args->fc_attrs.max_ops); /* max operations */ | 1558 | *p++ = cpu_to_be32(args->fc_attrs.max_ops); /* max operations */ |
1591 | WRITE32(args->fc_attrs.max_reqs); /* max requests */ | 1559 | *p++ = cpu_to_be32(args->fc_attrs.max_reqs); /* max requests */ |
1592 | WRITE32(0); /* rdmachannel_attrs */ | 1560 | *p++ = cpu_to_be32(0); /* rdmachannel_attrs */ |
1593 | 1561 | ||
1594 | /* Back Channel */ | 1562 | /* Back Channel */ |
1595 | WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ | 1563 | *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ |
1596 | WRITE32(args->bc_attrs.max_rqst_sz); /* max req size */ | 1564 | *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */ |
1597 | WRITE32(args->bc_attrs.max_resp_sz); /* max resp size */ | 1565 | *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */ |
1598 | WRITE32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ | 1566 | *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ |
1599 | WRITE32(args->bc_attrs.max_ops); /* max operations */ | 1567 | *p++ = cpu_to_be32(args->bc_attrs.max_ops); /* max operations */ |
1600 | WRITE32(args->bc_attrs.max_reqs); /* max requests */ | 1568 | *p++ = cpu_to_be32(args->bc_attrs.max_reqs); /* max requests */ |
1601 | WRITE32(0); /* rdmachannel_attrs */ | 1569 | *p++ = cpu_to_be32(0); /* rdmachannel_attrs */ |
1602 | 1570 | ||
1603 | RESERVE_SPACE(4); | 1571 | *p++ = cpu_to_be32(args->cb_program); /* cb_program */ |
1604 | WRITE32(args->cb_program); /* cb_program */ | 1572 | *p++ = cpu_to_be32(1); |
1605 | 1573 | *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */ | |
1606 | RESERVE_SPACE(4); /* # of security flavors */ | ||
1607 | WRITE32(1); | ||
1608 | |||
1609 | RESERVE_SPACE(4); | ||
1610 | WRITE32(RPC_AUTH_UNIX); /* auth_sys */ | ||
1611 | 1574 | ||
1612 | /* authsys_parms rfc1831 */ | 1575 | /* authsys_parms rfc1831 */ |
1613 | RESERVE_SPACE(4); | 1576 | *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */ |
1614 | WRITE32((u32)clp->cl_boot_time.tv_nsec); /* stamp */ | 1577 | p = xdr_encode_opaque(p, machine_name, len); |
1615 | len = scnprintf(machine_name, sizeof(machine_name), "%s", | 1578 | *p++ = cpu_to_be32(0); /* UID */ |
1616 | clp->cl_ipaddr); | 1579 | *p++ = cpu_to_be32(0); /* GID */ |
1617 | RESERVE_SPACE(16 + len); | 1580 | *p = cpu_to_be32(0); /* No more gids */ |
1618 | WRITE32(len); | ||
1619 | WRITEMEM(machine_name, len); | ||
1620 | WRITE32(0); /* UID */ | ||
1621 | WRITE32(0); /* GID */ | ||
1622 | WRITE32(0); /* No more gids */ | ||
1623 | hdr->nops++; | 1581 | hdr->nops++; |
1624 | hdr->replen += decode_create_session_maxsz; | 1582 | hdr->replen += decode_create_session_maxsz; |
1625 | } | 1583 | } |
@@ -1629,9 +1587,9 @@ static void encode_destroy_session(struct xdr_stream *xdr, | |||
1629 | struct compound_hdr *hdr) | 1587 | struct compound_hdr *hdr) |
1630 | { | 1588 | { |
1631 | __be32 *p; | 1589 | __be32 *p; |
1632 | RESERVE_SPACE(4 + NFS4_MAX_SESSIONID_LEN); | 1590 | p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN); |
1633 | WRITE32(OP_DESTROY_SESSION); | 1591 | *p++ = cpu_to_be32(OP_DESTROY_SESSION); |
1634 | WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); | 1592 | xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); |
1635 | hdr->nops++; | 1593 | hdr->nops++; |
1636 | hdr->replen += decode_destroy_session_maxsz; | 1594 | hdr->replen += decode_destroy_session_maxsz; |
1637 | } | 1595 | } |
@@ -1655,8 +1613,8 @@ static void encode_sequence(struct xdr_stream *xdr, | |||
1655 | WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); | 1613 | WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); |
1656 | slot = tp->slots + args->sa_slotid; | 1614 | slot = tp->slots + args->sa_slotid; |
1657 | 1615 | ||
1658 | RESERVE_SPACE(4); | 1616 | p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16); |
1659 | WRITE32(OP_SEQUENCE); | 1617 | *p++ = cpu_to_be32(OP_SEQUENCE); |
1660 | 1618 | ||
1661 | /* | 1619 | /* |
1662 | * Sessionid + seqid + slotid + max slotid + cache_this | 1620 | * Sessionid + seqid + slotid + max slotid + cache_this |
@@ -1670,12 +1628,11 @@ static void encode_sequence(struct xdr_stream *xdr, | |||
1670 | ((u32 *)session->sess_id.data)[3], | 1628 | ((u32 *)session->sess_id.data)[3], |
1671 | slot->seq_nr, args->sa_slotid, | 1629 | slot->seq_nr, args->sa_slotid, |
1672 | tp->highest_used_slotid, args->sa_cache_this); | 1630 | tp->highest_used_slotid, args->sa_cache_this); |
1673 | RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 16); | 1631 | p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); |
1674 | WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); | 1632 | *p++ = cpu_to_be32(slot->seq_nr); |
1675 | WRITE32(slot->seq_nr); | 1633 | *p++ = cpu_to_be32(args->sa_slotid); |
1676 | WRITE32(args->sa_slotid); | 1634 | *p++ = cpu_to_be32(tp->highest_used_slotid); |
1677 | WRITE32(tp->highest_used_slotid); | 1635 | *p = cpu_to_be32(args->sa_cache_this); |
1678 | WRITE32(args->sa_cache_this); | ||
1679 | hdr->nops++; | 1636 | hdr->nops++; |
1680 | hdr->replen += decode_sequence_maxsz; | 1637 | hdr->replen += decode_sequence_maxsz; |
1681 | #endif /* CONFIG_NFS_V4_1 */ | 1638 | #endif /* CONFIG_NFS_V4_1 */ |
@@ -2466,68 +2423,53 @@ static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p, | |||
2466 | } | 2423 | } |
2467 | #endif /* CONFIG_NFS_V4_1 */ | 2424 | #endif /* CONFIG_NFS_V4_1 */ |
2468 | 2425 | ||
2469 | /* | 2426 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) |
2470 | * START OF "GENERIC" DECODE ROUTINES. | 2427 | { |
2471 | * These may look a little ugly since they are imported from a "generic" | 2428 | dprintk("nfs: %s: prematurely hit end of receive buffer. " |
2472 | * set of XDR encode/decode routines which are intended to be shared by | 2429 | "Remaining buffer length is %tu words.\n", |
2473 | * all of our NFSv4 implementations (OpenBSD, MacOS X...). | 2430 | func, xdr->end - xdr->p); |
2474 | * | 2431 | } |
2475 | * If the pain of reading these is too great, it should be a straightforward | ||
2476 | * task to translate them into Linux-specific versions which are more | ||
2477 | * consistent with the style used in NFSv2/v3... | ||
2478 | */ | ||
2479 | #define READ32(x) (x) = ntohl(*p++) | ||
2480 | #define READ64(x) do { \ | ||
2481 | (x) = (u64)ntohl(*p++) << 32; \ | ||
2482 | (x) |= ntohl(*p++); \ | ||
2483 | } while (0) | ||
2484 | #define READTIME(x) do { \ | ||
2485 | p++; \ | ||
2486 | (x.tv_sec) = ntohl(*p++); \ | ||
2487 | (x.tv_nsec) = ntohl(*p++); \ | ||
2488 | } while (0) | ||
2489 | #define COPYMEM(x,nbytes) do { \ | ||
2490 | memcpy((x), p, nbytes); \ | ||
2491 | p += XDR_QUADLEN(nbytes); \ | ||
2492 | } while (0) | ||
2493 | |||
2494 | #define READ_BUF(nbytes) do { \ | ||
2495 | p = xdr_inline_decode(xdr, nbytes); \ | ||
2496 | if (unlikely(!p)) { \ | ||
2497 | dprintk("nfs: %s: prematurely hit end of receive" \ | ||
2498 | " buffer\n", __func__); \ | ||
2499 | dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \ | ||
2500 | __func__, xdr->p, nbytes, xdr->end); \ | ||
2501 | return -EIO; \ | ||
2502 | } \ | ||
2503 | } while (0) | ||
2504 | 2432 | ||
2505 | static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string) | 2433 | static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string) |
2506 | { | 2434 | { |
2507 | __be32 *p; | 2435 | __be32 *p; |
2508 | 2436 | ||
2509 | READ_BUF(4); | 2437 | p = xdr_inline_decode(xdr, 4); |
2510 | READ32(*len); | 2438 | if (unlikely(!p)) |
2511 | READ_BUF(*len); | 2439 | goto out_overflow; |
2440 | *len = be32_to_cpup(p); | ||
2441 | p = xdr_inline_decode(xdr, *len); | ||
2442 | if (unlikely(!p)) | ||
2443 | goto out_overflow; | ||
2512 | *string = (char *)p; | 2444 | *string = (char *)p; |
2513 | return 0; | 2445 | return 0; |
2446 | out_overflow: | ||
2447 | print_overflow_msg(__func__, xdr); | ||
2448 | return -EIO; | ||
2514 | } | 2449 | } |
2515 | 2450 | ||
2516 | static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) | 2451 | static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) |
2517 | { | 2452 | { |
2518 | __be32 *p; | 2453 | __be32 *p; |
2519 | 2454 | ||
2520 | READ_BUF(8); | 2455 | p = xdr_inline_decode(xdr, 8); |
2521 | READ32(hdr->status); | 2456 | if (unlikely(!p)) |
2522 | READ32(hdr->taglen); | 2457 | goto out_overflow; |
2458 | hdr->status = be32_to_cpup(p++); | ||
2459 | hdr->taglen = be32_to_cpup(p); | ||
2523 | 2460 | ||
2524 | READ_BUF(hdr->taglen + 4); | 2461 | p = xdr_inline_decode(xdr, hdr->taglen + 4); |
2462 | if (unlikely(!p)) | ||
2463 | goto out_overflow; | ||
2525 | hdr->tag = (char *)p; | 2464 | hdr->tag = (char *)p; |
2526 | p += XDR_QUADLEN(hdr->taglen); | 2465 | p += XDR_QUADLEN(hdr->taglen); |
2527 | READ32(hdr->nops); | 2466 | hdr->nops = be32_to_cpup(p); |
2528 | if (unlikely(hdr->nops < 1)) | 2467 | if (unlikely(hdr->nops < 1)) |
2529 | return nfs4_stat_to_errno(hdr->status); | 2468 | return nfs4_stat_to_errno(hdr->status); |
2530 | return 0; | 2469 | return 0; |
2470 | out_overflow: | ||
2471 | print_overflow_msg(__func__, xdr); | ||
2472 | return -EIO; | ||
2531 | } | 2473 | } |
2532 | 2474 | ||
2533 | static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) | 2475 | static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) |
@@ -2536,18 +2478,23 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) | |||
2536 | uint32_t opnum; | 2478 | uint32_t opnum; |
2537 | int32_t nfserr; | 2479 | int32_t nfserr; |
2538 | 2480 | ||
2539 | READ_BUF(8); | 2481 | p = xdr_inline_decode(xdr, 8); |
2540 | READ32(opnum); | 2482 | if (unlikely(!p)) |
2483 | goto out_overflow; | ||
2484 | opnum = be32_to_cpup(p++); | ||
2541 | if (opnum != expected) { | 2485 | if (opnum != expected) { |
2542 | dprintk("nfs: Server returned operation" | 2486 | dprintk("nfs: Server returned operation" |
2543 | " %d but we issued a request for %d\n", | 2487 | " %d but we issued a request for %d\n", |
2544 | opnum, expected); | 2488 | opnum, expected); |
2545 | return -EIO; | 2489 | return -EIO; |
2546 | } | 2490 | } |
2547 | READ32(nfserr); | 2491 | nfserr = be32_to_cpup(p); |
2548 | if (nfserr != NFS_OK) | 2492 | if (nfserr != NFS_OK) |
2549 | return nfs4_stat_to_errno(nfserr); | 2493 | return nfs4_stat_to_errno(nfserr); |
2550 | return 0; | 2494 | return 0; |
2495 | out_overflow: | ||
2496 | print_overflow_msg(__func__, xdr); | ||
2497 | return -EIO; | ||
2551 | } | 2498 | } |
2552 | 2499 | ||
2553 | /* Dummy routine */ | 2500 | /* Dummy routine */ |
@@ -2557,8 +2504,11 @@ static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp) | |||
2557 | unsigned int strlen; | 2504 | unsigned int strlen; |
2558 | char *str; | 2505 | char *str; |
2559 | 2506 | ||
2560 | READ_BUF(12); | 2507 | p = xdr_inline_decode(xdr, 12); |
2561 | return decode_opaque_inline(xdr, &strlen, &str); | 2508 | if (likely(p)) |
2509 | return decode_opaque_inline(xdr, &strlen, &str); | ||
2510 | print_overflow_msg(__func__, xdr); | ||
2511 | return -EIO; | ||
2562 | } | 2512 | } |
2563 | 2513 | ||
2564 | static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) | 2514 | static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) |
@@ -2566,27 +2516,39 @@ static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) | |||
2566 | uint32_t bmlen; | 2516 | uint32_t bmlen; |
2567 | __be32 *p; | 2517 | __be32 *p; |
2568 | 2518 | ||
2569 | READ_BUF(4); | 2519 | p = xdr_inline_decode(xdr, 4); |
2570 | READ32(bmlen); | 2520 | if (unlikely(!p)) |
2521 | goto out_overflow; | ||
2522 | bmlen = be32_to_cpup(p); | ||
2571 | 2523 | ||
2572 | bitmap[0] = bitmap[1] = 0; | 2524 | bitmap[0] = bitmap[1] = 0; |
2573 | READ_BUF((bmlen << 2)); | 2525 | p = xdr_inline_decode(xdr, (bmlen << 2)); |
2526 | if (unlikely(!p)) | ||
2527 | goto out_overflow; | ||
2574 | if (bmlen > 0) { | 2528 | if (bmlen > 0) { |
2575 | READ32(bitmap[0]); | 2529 | bitmap[0] = be32_to_cpup(p++); |
2576 | if (bmlen > 1) | 2530 | if (bmlen > 1) |
2577 | READ32(bitmap[1]); | 2531 | bitmap[1] = be32_to_cpup(p); |
2578 | } | 2532 | } |
2579 | return 0; | 2533 | return 0; |
2534 | out_overflow: | ||
2535 | print_overflow_msg(__func__, xdr); | ||
2536 | return -EIO; | ||
2580 | } | 2537 | } |
2581 | 2538 | ||
2582 | static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep) | 2539 | static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep) |
2583 | { | 2540 | { |
2584 | __be32 *p; | 2541 | __be32 *p; |
2585 | 2542 | ||
2586 | READ_BUF(4); | 2543 | p = xdr_inline_decode(xdr, 4); |
2587 | READ32(*attrlen); | 2544 | if (unlikely(!p)) |
2545 | goto out_overflow; | ||
2546 | *attrlen = be32_to_cpup(p); | ||
2588 | *savep = xdr->p; | 2547 | *savep = xdr->p; |
2589 | return 0; | 2548 | return 0; |
2549 | out_overflow: | ||
2550 | print_overflow_msg(__func__, xdr); | ||
2551 | return -EIO; | ||
2590 | } | 2552 | } |
2591 | 2553 | ||
2592 | static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) | 2554 | static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) |
@@ -2609,8 +2571,10 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t * | |||
2609 | if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U))) | 2571 | if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U))) |
2610 | return -EIO; | 2572 | return -EIO; |
2611 | if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) { | 2573 | if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) { |
2612 | READ_BUF(4); | 2574 | p = xdr_inline_decode(xdr, 4); |
2613 | READ32(*type); | 2575 | if (unlikely(!p)) |
2576 | goto out_overflow; | ||
2577 | *type = be32_to_cpup(p); | ||
2614 | if (*type < NF4REG || *type > NF4NAMEDATTR) { | 2578 | if (*type < NF4REG || *type > NF4NAMEDATTR) { |
2615 | dprintk("%s: bad type %d\n", __func__, *type); | 2579 | dprintk("%s: bad type %d\n", __func__, *type); |
2616 | return -EIO; | 2580 | return -EIO; |
@@ -2620,6 +2584,9 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t * | |||
2620 | } | 2584 | } |
2621 | dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]); | 2585 | dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]); |
2622 | return ret; | 2586 | return ret; |
2587 | out_overflow: | ||
2588 | print_overflow_msg(__func__, xdr); | ||
2589 | return -EIO; | ||
2623 | } | 2590 | } |
2624 | 2591 | ||
2625 | static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) | 2592 | static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) |
@@ -2631,14 +2598,19 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t | |||
2631 | if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U))) | 2598 | if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U))) |
2632 | return -EIO; | 2599 | return -EIO; |
2633 | if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) { | 2600 | if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) { |
2634 | READ_BUF(8); | 2601 | p = xdr_inline_decode(xdr, 8); |
2635 | READ64(*change); | 2602 | if (unlikely(!p)) |
2603 | goto out_overflow; | ||
2604 | xdr_decode_hyper(p, change); | ||
2636 | bitmap[0] &= ~FATTR4_WORD0_CHANGE; | 2605 | bitmap[0] &= ~FATTR4_WORD0_CHANGE; |
2637 | ret = NFS_ATTR_FATTR_CHANGE; | 2606 | ret = NFS_ATTR_FATTR_CHANGE; |
2638 | } | 2607 | } |
2639 | dprintk("%s: change attribute=%Lu\n", __func__, | 2608 | dprintk("%s: change attribute=%Lu\n", __func__, |
2640 | (unsigned long long)*change); | 2609 | (unsigned long long)*change); |
2641 | return ret; | 2610 | return ret; |
2611 | out_overflow: | ||
2612 | print_overflow_msg(__func__, xdr); | ||
2613 | return -EIO; | ||
2642 | } | 2614 | } |
2643 | 2615 | ||
2644 | static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size) | 2616 | static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size) |
@@ -2650,13 +2622,18 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t * | |||
2650 | if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U))) | 2622 | if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U))) |
2651 | return -EIO; | 2623 | return -EIO; |
2652 | if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) { | 2624 | if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) { |
2653 | READ_BUF(8); | 2625 | p = xdr_inline_decode(xdr, 8); |
2654 | READ64(*size); | 2626 | if (unlikely(!p)) |
2627 | goto out_overflow; | ||
2628 | xdr_decode_hyper(p, size); | ||
2655 | bitmap[0] &= ~FATTR4_WORD0_SIZE; | 2629 | bitmap[0] &= ~FATTR4_WORD0_SIZE; |
2656 | ret = NFS_ATTR_FATTR_SIZE; | 2630 | ret = NFS_ATTR_FATTR_SIZE; |
2657 | } | 2631 | } |
2658 | dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size); | 2632 | dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size); |
2659 | return ret; | 2633 | return ret; |
2634 | out_overflow: | ||
2635 | print_overflow_msg(__func__, xdr); | ||
2636 | return -EIO; | ||
2660 | } | 2637 | } |
2661 | 2638 | ||
2662 | static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) | 2639 | static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) |
@@ -2667,12 +2644,17 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui | |||
2667 | if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U))) | 2644 | if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U))) |
2668 | return -EIO; | 2645 | return -EIO; |
2669 | if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) { | 2646 | if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) { |
2670 | READ_BUF(4); | 2647 | p = xdr_inline_decode(xdr, 4); |
2671 | READ32(*res); | 2648 | if (unlikely(!p)) |
2649 | goto out_overflow; | ||
2650 | *res = be32_to_cpup(p); | ||
2672 | bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT; | 2651 | bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT; |
2673 | } | 2652 | } |
2674 | dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true"); | 2653 | dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true"); |
2675 | return 0; | 2654 | return 0; |
2655 | out_overflow: | ||
2656 | print_overflow_msg(__func__, xdr); | ||
2657 | return -EIO; | ||
2676 | } | 2658 | } |
2677 | 2659 | ||
2678 | static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) | 2660 | static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) |
@@ -2683,12 +2665,17 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, | |||
2683 | if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U))) | 2665 | if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U))) |
2684 | return -EIO; | 2666 | return -EIO; |
2685 | if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) { | 2667 | if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) { |
2686 | READ_BUF(4); | 2668 | p = xdr_inline_decode(xdr, 4); |
2687 | READ32(*res); | 2669 | if (unlikely(!p)) |
2670 | goto out_overflow; | ||
2671 | *res = be32_to_cpup(p); | ||
2688 | bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT; | 2672 | bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT; |
2689 | } | 2673 | } |
2690 | dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true"); | 2674 | dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true"); |
2691 | return 0; | 2675 | return 0; |
2676 | out_overflow: | ||
2677 | print_overflow_msg(__func__, xdr); | ||
2678 | return -EIO; | ||
2692 | } | 2679 | } |
2693 | 2680 | ||
2694 | static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) | 2681 | static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) |
@@ -2701,9 +2688,11 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs | |||
2701 | if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U))) | 2688 | if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U))) |
2702 | return -EIO; | 2689 | return -EIO; |
2703 | if (likely(bitmap[0] & FATTR4_WORD0_FSID)) { | 2690 | if (likely(bitmap[0] & FATTR4_WORD0_FSID)) { |
2704 | READ_BUF(16); | 2691 | p = xdr_inline_decode(xdr, 16); |
2705 | READ64(fsid->major); | 2692 | if (unlikely(!p)) |
2706 | READ64(fsid->minor); | 2693 | goto out_overflow; |
2694 | p = xdr_decode_hyper(p, &fsid->major); | ||
2695 | xdr_decode_hyper(p, &fsid->minor); | ||
2707 | bitmap[0] &= ~FATTR4_WORD0_FSID; | 2696 | bitmap[0] &= ~FATTR4_WORD0_FSID; |
2708 | ret = NFS_ATTR_FATTR_FSID; | 2697 | ret = NFS_ATTR_FATTR_FSID; |
2709 | } | 2698 | } |
@@ -2711,6 +2700,9 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs | |||
2711 | (unsigned long long)fsid->major, | 2700 | (unsigned long long)fsid->major, |
2712 | (unsigned long long)fsid->minor); | 2701 | (unsigned long long)fsid->minor); |
2713 | return ret; | 2702 | return ret; |
2703 | out_overflow: | ||
2704 | print_overflow_msg(__func__, xdr); | ||
2705 | return -EIO; | ||
2714 | } | 2706 | } |
2715 | 2707 | ||
2716 | static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) | 2708 | static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) |
@@ -2721,12 +2713,17 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint | |||
2721 | if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U))) | 2713 | if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U))) |
2722 | return -EIO; | 2714 | return -EIO; |
2723 | if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) { | 2715 | if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) { |
2724 | READ_BUF(4); | 2716 | p = xdr_inline_decode(xdr, 4); |
2725 | READ32(*res); | 2717 | if (unlikely(!p)) |
2718 | goto out_overflow; | ||
2719 | *res = be32_to_cpup(p); | ||
2726 | bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME; | 2720 | bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME; |
2727 | } | 2721 | } |
2728 | dprintk("%s: file size=%u\n", __func__, (unsigned int)*res); | 2722 | dprintk("%s: file size=%u\n", __func__, (unsigned int)*res); |
2729 | return 0; | 2723 | return 0; |
2724 | out_overflow: | ||
2725 | print_overflow_msg(__func__, xdr); | ||
2726 | return -EIO; | ||
2730 | } | 2727 | } |
2731 | 2728 | ||
2732 | static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) | 2729 | static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) |
@@ -2737,12 +2734,17 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint | |||
2737 | if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U))) | 2734 | if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U))) |
2738 | return -EIO; | 2735 | return -EIO; |
2739 | if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) { | 2736 | if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) { |
2740 | READ_BUF(4); | 2737 | p = xdr_inline_decode(xdr, 4); |
2741 | READ32(*res); | 2738 | if (unlikely(!p)) |
2739 | goto out_overflow; | ||
2740 | *res = be32_to_cpup(p); | ||
2742 | bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT; | 2741 | bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT; |
2743 | } | 2742 | } |
2744 | dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res); | 2743 | dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res); |
2745 | return 0; | 2744 | return 0; |
2745 | out_overflow: | ||
2746 | print_overflow_msg(__func__, xdr); | ||
2747 | return -EIO; | ||
2746 | } | 2748 | } |
2747 | 2749 | ||
2748 | static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) | 2750 | static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) |
@@ -2754,13 +2756,18 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t | |||
2754 | if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U))) | 2756 | if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U))) |
2755 | return -EIO; | 2757 | return -EIO; |
2756 | if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) { | 2758 | if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) { |
2757 | READ_BUF(8); | 2759 | p = xdr_inline_decode(xdr, 8); |
2758 | READ64(*fileid); | 2760 | if (unlikely(!p)) |
2761 | goto out_overflow; | ||
2762 | xdr_decode_hyper(p, fileid); | ||
2759 | bitmap[0] &= ~FATTR4_WORD0_FILEID; | 2763 | bitmap[0] &= ~FATTR4_WORD0_FILEID; |
2760 | ret = NFS_ATTR_FATTR_FILEID; | 2764 | ret = NFS_ATTR_FATTR_FILEID; |
2761 | } | 2765 | } |
2762 | dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); | 2766 | dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); |
2763 | return ret; | 2767 | return ret; |
2768 | out_overflow: | ||
2769 | print_overflow_msg(__func__, xdr); | ||
2770 | return -EIO; | ||
2764 | } | 2771 | } |
2765 | 2772 | ||
2766 | static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) | 2773 | static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) |
@@ -2772,13 +2779,18 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma | |||
2772 | if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U))) | 2779 | if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U))) |
2773 | return -EIO; | 2780 | return -EIO; |
2774 | if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) { | 2781 | if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) { |
2775 | READ_BUF(8); | 2782 | p = xdr_inline_decode(xdr, 8); |
2776 | READ64(*fileid); | 2783 | if (unlikely(!p)) |
2784 | goto out_overflow; | ||
2785 | xdr_decode_hyper(p, fileid); | ||
2777 | bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; | 2786 | bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; |
2778 | ret = NFS_ATTR_FATTR_FILEID; | 2787 | ret = NFS_ATTR_FATTR_FILEID; |
2779 | } | 2788 | } |
2780 | dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); | 2789 | dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); |
2781 | return ret; | 2790 | return ret; |
2791 | out_overflow: | ||
2792 | print_overflow_msg(__func__, xdr); | ||
2793 | return -EIO; | ||
2782 | } | 2794 | } |
2783 | 2795 | ||
2784 | static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) | 2796 | static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) |
@@ -2790,12 +2802,17 @@ static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin | |||
2790 | if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U))) | 2802 | if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U))) |
2791 | return -EIO; | 2803 | return -EIO; |
2792 | if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) { | 2804 | if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) { |
2793 | READ_BUF(8); | 2805 | p = xdr_inline_decode(xdr, 8); |
2794 | READ64(*res); | 2806 | if (unlikely(!p)) |
2807 | goto out_overflow; | ||
2808 | xdr_decode_hyper(p, res); | ||
2795 | bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL; | 2809 | bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL; |
2796 | } | 2810 | } |
2797 | dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res); | 2811 | dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res); |
2798 | return status; | 2812 | return status; |
2813 | out_overflow: | ||
2814 | print_overflow_msg(__func__, xdr); | ||
2815 | return -EIO; | ||
2799 | } | 2816 | } |
2800 | 2817 | ||
2801 | static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) | 2818 | static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) |
@@ -2807,12 +2824,17 @@ static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint | |||
2807 | if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U))) | 2824 | if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U))) |
2808 | return -EIO; | 2825 | return -EIO; |
2809 | if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) { | 2826 | if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) { |
2810 | READ_BUF(8); | 2827 | p = xdr_inline_decode(xdr, 8); |
2811 | READ64(*res); | 2828 | if (unlikely(!p)) |
2829 | goto out_overflow; | ||
2830 | xdr_decode_hyper(p, res); | ||
2812 | bitmap[0] &= ~FATTR4_WORD0_FILES_FREE; | 2831 | bitmap[0] &= ~FATTR4_WORD0_FILES_FREE; |
2813 | } | 2832 | } |
2814 | dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res); | 2833 | dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res); |
2815 | return status; | 2834 | return status; |
2835 | out_overflow: | ||
2836 | print_overflow_msg(__func__, xdr); | ||
2837 | return -EIO; | ||
2816 | } | 2838 | } |
2817 | 2839 | ||
2818 | static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) | 2840 | static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) |
@@ -2824,12 +2846,17 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin | |||
2824 | if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U))) | 2846 | if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U))) |
2825 | return -EIO; | 2847 | return -EIO; |
2826 | if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) { | 2848 | if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) { |
2827 | READ_BUF(8); | 2849 | p = xdr_inline_decode(xdr, 8); |
2828 | READ64(*res); | 2850 | if (unlikely(!p)) |
2851 | goto out_overflow; | ||
2852 | xdr_decode_hyper(p, res); | ||
2829 | bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL; | 2853 | bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL; |
2830 | } | 2854 | } |
2831 | dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res); | 2855 | dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res); |
2832 | return status; | 2856 | return status; |
2857 | out_overflow: | ||
2858 | print_overflow_msg(__func__, xdr); | ||
2859 | return -EIO; | ||
2833 | } | 2860 | } |
2834 | 2861 | ||
2835 | static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) | 2862 | static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) |
@@ -2838,8 +2865,10 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) | |||
2838 | __be32 *p; | 2865 | __be32 *p; |
2839 | int status = 0; | 2866 | int status = 0; |
2840 | 2867 | ||
2841 | READ_BUF(4); | 2868 | p = xdr_inline_decode(xdr, 4); |
2842 | READ32(n); | 2869 | if (unlikely(!p)) |
2870 | goto out_overflow; | ||
2871 | n = be32_to_cpup(p); | ||
2843 | if (n == 0) | 2872 | if (n == 0) |
2844 | goto root_path; | 2873 | goto root_path; |
2845 | dprintk("path "); | 2874 | dprintk("path "); |
@@ -2873,6 +2902,9 @@ out_eio: | |||
2873 | dprintk(" status %d", status); | 2902 | dprintk(" status %d", status); |
2874 | status = -EIO; | 2903 | status = -EIO; |
2875 | goto out; | 2904 | goto out; |
2905 | out_overflow: | ||
2906 | print_overflow_msg(__func__, xdr); | ||
2907 | return -EIO; | ||
2876 | } | 2908 | } |
2877 | 2909 | ||
2878 | static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res) | 2910 | static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res) |
@@ -2890,8 +2922,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st | |||
2890 | status = decode_pathname(xdr, &res->fs_path); | 2922 | status = decode_pathname(xdr, &res->fs_path); |
2891 | if (unlikely(status != 0)) | 2923 | if (unlikely(status != 0)) |
2892 | goto out; | 2924 | goto out; |
2893 | READ_BUF(4); | 2925 | p = xdr_inline_decode(xdr, 4); |
2894 | READ32(n); | 2926 | if (unlikely(!p)) |
2927 | goto out_overflow; | ||
2928 | n = be32_to_cpup(p); | ||
2895 | if (n <= 0) | 2929 | if (n <= 0) |
2896 | goto out_eio; | 2930 | goto out_eio; |
2897 | res->nlocations = 0; | 2931 | res->nlocations = 0; |
@@ -2899,8 +2933,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st | |||
2899 | u32 m; | 2933 | u32 m; |
2900 | struct nfs4_fs_location *loc = &res->locations[res->nlocations]; | 2934 | struct nfs4_fs_location *loc = &res->locations[res->nlocations]; |
2901 | 2935 | ||
2902 | READ_BUF(4); | 2936 | p = xdr_inline_decode(xdr, 4); |
2903 | READ32(m); | 2937 | if (unlikely(!p)) |
2938 | goto out_overflow; | ||
2939 | m = be32_to_cpup(p); | ||
2904 | 2940 | ||
2905 | loc->nservers = 0; | 2941 | loc->nservers = 0; |
2906 | dprintk("%s: servers ", __func__); | 2942 | dprintk("%s: servers ", __func__); |
@@ -2939,6 +2975,8 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st | |||
2939 | out: | 2975 | out: |
2940 | dprintk("%s: fs_locations done, error = %d\n", __func__, status); | 2976 | dprintk("%s: fs_locations done, error = %d\n", __func__, status); |
2941 | return status; | 2977 | return status; |
2978 | out_overflow: | ||
2979 | print_overflow_msg(__func__, xdr); | ||
2942 | out_eio: | 2980 | out_eio: |
2943 | status = -EIO; | 2981 | status = -EIO; |
2944 | goto out; | 2982 | goto out; |
@@ -2953,12 +2991,17 @@ static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uin | |||
2953 | if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U))) | 2991 | if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U))) |
2954 | return -EIO; | 2992 | return -EIO; |
2955 | if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) { | 2993 | if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) { |
2956 | READ_BUF(8); | 2994 | p = xdr_inline_decode(xdr, 8); |
2957 | READ64(*res); | 2995 | if (unlikely(!p)) |
2996 | goto out_overflow; | ||
2997 | xdr_decode_hyper(p, res); | ||
2958 | bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE; | 2998 | bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE; |
2959 | } | 2999 | } |
2960 | dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res); | 3000 | dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res); |
2961 | return status; | 3001 | return status; |
3002 | out_overflow: | ||
3003 | print_overflow_msg(__func__, xdr); | ||
3004 | return -EIO; | ||
2962 | } | 3005 | } |
2963 | 3006 | ||
2964 | static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink) | 3007 | static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink) |
@@ -2970,12 +3013,17 @@ static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_ | |||
2970 | if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U))) | 3013 | if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U))) |
2971 | return -EIO; | 3014 | return -EIO; |
2972 | if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) { | 3015 | if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) { |
2973 | READ_BUF(4); | 3016 | p = xdr_inline_decode(xdr, 4); |
2974 | READ32(*maxlink); | 3017 | if (unlikely(!p)) |
3018 | goto out_overflow; | ||
3019 | *maxlink = be32_to_cpup(p); | ||
2975 | bitmap[0] &= ~FATTR4_WORD0_MAXLINK; | 3020 | bitmap[0] &= ~FATTR4_WORD0_MAXLINK; |
2976 | } | 3021 | } |
2977 | dprintk("%s: maxlink=%u\n", __func__, *maxlink); | 3022 | dprintk("%s: maxlink=%u\n", __func__, *maxlink); |
2978 | return status; | 3023 | return status; |
3024 | out_overflow: | ||
3025 | print_overflow_msg(__func__, xdr); | ||
3026 | return -EIO; | ||
2979 | } | 3027 | } |
2980 | 3028 | ||
2981 | static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname) | 3029 | static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname) |
@@ -2987,12 +3035,17 @@ static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_ | |||
2987 | if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U))) | 3035 | if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U))) |
2988 | return -EIO; | 3036 | return -EIO; |
2989 | if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) { | 3037 | if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) { |
2990 | READ_BUF(4); | 3038 | p = xdr_inline_decode(xdr, 4); |
2991 | READ32(*maxname); | 3039 | if (unlikely(!p)) |
3040 | goto out_overflow; | ||
3041 | *maxname = be32_to_cpup(p); | ||
2992 | bitmap[0] &= ~FATTR4_WORD0_MAXNAME; | 3042 | bitmap[0] &= ~FATTR4_WORD0_MAXNAME; |
2993 | } | 3043 | } |
2994 | dprintk("%s: maxname=%u\n", __func__, *maxname); | 3044 | dprintk("%s: maxname=%u\n", __func__, *maxname); |
2995 | return status; | 3045 | return status; |
3046 | out_overflow: | ||
3047 | print_overflow_msg(__func__, xdr); | ||
3048 | return -EIO; | ||
2996 | } | 3049 | } |
2997 | 3050 | ||
2998 | static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) | 3051 | static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) |
@@ -3005,8 +3058,10 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_ | |||
3005 | return -EIO; | 3058 | return -EIO; |
3006 | if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) { | 3059 | if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) { |
3007 | uint64_t maxread; | 3060 | uint64_t maxread; |
3008 | READ_BUF(8); | 3061 | p = xdr_inline_decode(xdr, 8); |
3009 | READ64(maxread); | 3062 | if (unlikely(!p)) |
3063 | goto out_overflow; | ||
3064 | xdr_decode_hyper(p, &maxread); | ||
3010 | if (maxread > 0x7FFFFFFF) | 3065 | if (maxread > 0x7FFFFFFF) |
3011 | maxread = 0x7FFFFFFF; | 3066 | maxread = 0x7FFFFFFF; |
3012 | *res = (uint32_t)maxread; | 3067 | *res = (uint32_t)maxread; |
@@ -3014,6 +3069,9 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_ | |||
3014 | } | 3069 | } |
3015 | dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res); | 3070 | dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res); |
3016 | return status; | 3071 | return status; |
3072 | out_overflow: | ||
3073 | print_overflow_msg(__func__, xdr); | ||
3074 | return -EIO; | ||
3017 | } | 3075 | } |
3018 | 3076 | ||
3019 | static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) | 3077 | static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) |
@@ -3026,8 +3084,10 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32 | |||
3026 | return -EIO; | 3084 | return -EIO; |
3027 | if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) { | 3085 | if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) { |
3028 | uint64_t maxwrite; | 3086 | uint64_t maxwrite; |
3029 | READ_BUF(8); | 3087 | p = xdr_inline_decode(xdr, 8); |
3030 | READ64(maxwrite); | 3088 | if (unlikely(!p)) |
3089 | goto out_overflow; | ||
3090 | xdr_decode_hyper(p, &maxwrite); | ||
3031 | if (maxwrite > 0x7FFFFFFF) | 3091 | if (maxwrite > 0x7FFFFFFF) |
3032 | maxwrite = 0x7FFFFFFF; | 3092 | maxwrite = 0x7FFFFFFF; |
3033 | *res = (uint32_t)maxwrite; | 3093 | *res = (uint32_t)maxwrite; |
@@ -3035,6 +3095,9 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32 | |||
3035 | } | 3095 | } |
3036 | dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res); | 3096 | dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res); |
3037 | return status; | 3097 | return status; |
3098 | out_overflow: | ||
3099 | print_overflow_msg(__func__, xdr); | ||
3100 | return -EIO; | ||
3038 | } | 3101 | } |
3039 | 3102 | ||
3040 | static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode) | 3103 | static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode) |
@@ -3047,14 +3110,19 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *m | |||
3047 | if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U))) | 3110 | if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U))) |
3048 | return -EIO; | 3111 | return -EIO; |
3049 | if (likely(bitmap[1] & FATTR4_WORD1_MODE)) { | 3112 | if (likely(bitmap[1] & FATTR4_WORD1_MODE)) { |
3050 | READ_BUF(4); | 3113 | p = xdr_inline_decode(xdr, 4); |
3051 | READ32(tmp); | 3114 | if (unlikely(!p)) |
3115 | goto out_overflow; | ||
3116 | tmp = be32_to_cpup(p); | ||
3052 | *mode = tmp & ~S_IFMT; | 3117 | *mode = tmp & ~S_IFMT; |
3053 | bitmap[1] &= ~FATTR4_WORD1_MODE; | 3118 | bitmap[1] &= ~FATTR4_WORD1_MODE; |
3054 | ret = NFS_ATTR_FATTR_MODE; | 3119 | ret = NFS_ATTR_FATTR_MODE; |
3055 | } | 3120 | } |
3056 | dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode); | 3121 | dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode); |
3057 | return ret; | 3122 | return ret; |
3123 | out_overflow: | ||
3124 | print_overflow_msg(__func__, xdr); | ||
3125 | return -EIO; | ||
3058 | } | 3126 | } |
3059 | 3127 | ||
3060 | static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink) | 3128 | static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink) |
@@ -3066,16 +3134,22 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t | |||
3066 | if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U))) | 3134 | if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U))) |
3067 | return -EIO; | 3135 | return -EIO; |
3068 | if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) { | 3136 | if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) { |
3069 | READ_BUF(4); | 3137 | p = xdr_inline_decode(xdr, 4); |
3070 | READ32(*nlink); | 3138 | if (unlikely(!p)) |
3139 | goto out_overflow; | ||
3140 | *nlink = be32_to_cpup(p); | ||
3071 | bitmap[1] &= ~FATTR4_WORD1_NUMLINKS; | 3141 | bitmap[1] &= ~FATTR4_WORD1_NUMLINKS; |
3072 | ret = NFS_ATTR_FATTR_NLINK; | 3142 | ret = NFS_ATTR_FATTR_NLINK; |
3073 | } | 3143 | } |
3074 | dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink); | 3144 | dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink); |
3075 | return ret; | 3145 | return ret; |
3146 | out_overflow: | ||
3147 | print_overflow_msg(__func__, xdr); | ||
3148 | return -EIO; | ||
3076 | } | 3149 | } |
3077 | 3150 | ||
3078 | static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid) | 3151 | static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, |
3152 | struct nfs_client *clp, uint32_t *uid, int may_sleep) | ||
3079 | { | 3153 | { |
3080 | uint32_t len; | 3154 | uint32_t len; |
3081 | __be32 *p; | 3155 | __be32 *p; |
@@ -3085,10 +3159,16 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf | |||
3085 | if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) | 3159 | if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) |
3086 | return -EIO; | 3160 | return -EIO; |
3087 | if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) { | 3161 | if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) { |
3088 | READ_BUF(4); | 3162 | p = xdr_inline_decode(xdr, 4); |
3089 | READ32(len); | 3163 | if (unlikely(!p)) |
3090 | READ_BUF(len); | 3164 | goto out_overflow; |
3091 | if (len < XDR_MAX_NETOBJ) { | 3165 | len = be32_to_cpup(p); |
3166 | p = xdr_inline_decode(xdr, len); | ||
3167 | if (unlikely(!p)) | ||
3168 | goto out_overflow; | ||
3169 | if (!may_sleep) { | ||
3170 | /* do nothing */ | ||
3171 | } else if (len < XDR_MAX_NETOBJ) { | ||
3092 | if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) | 3172 | if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) |
3093 | ret = NFS_ATTR_FATTR_OWNER; | 3173 | ret = NFS_ATTR_FATTR_OWNER; |
3094 | else | 3174 | else |
@@ -3101,9 +3181,13 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf | |||
3101 | } | 3181 | } |
3102 | dprintk("%s: uid=%d\n", __func__, (int)*uid); | 3182 | dprintk("%s: uid=%d\n", __func__, (int)*uid); |
3103 | return ret; | 3183 | return ret; |
3184 | out_overflow: | ||
3185 | print_overflow_msg(__func__, xdr); | ||
3186 | return -EIO; | ||
3104 | } | 3187 | } |
3105 | 3188 | ||
3106 | static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid) | 3189 | static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, |
3190 | struct nfs_client *clp, uint32_t *gid, int may_sleep) | ||
3107 | { | 3191 | { |
3108 | uint32_t len; | 3192 | uint32_t len; |
3109 | __be32 *p; | 3193 | __be32 *p; |
@@ -3113,10 +3197,16 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf | |||
3113 | if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) | 3197 | if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) |
3114 | return -EIO; | 3198 | return -EIO; |
3115 | if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) { | 3199 | if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) { |
3116 | READ_BUF(4); | 3200 | p = xdr_inline_decode(xdr, 4); |
3117 | READ32(len); | 3201 | if (unlikely(!p)) |
3118 | READ_BUF(len); | 3202 | goto out_overflow; |
3119 | if (len < XDR_MAX_NETOBJ) { | 3203 | len = be32_to_cpup(p); |
3204 | p = xdr_inline_decode(xdr, len); | ||
3205 | if (unlikely(!p)) | ||
3206 | goto out_overflow; | ||
3207 | if (!may_sleep) { | ||
3208 | /* do nothing */ | ||
3209 | } else if (len < XDR_MAX_NETOBJ) { | ||
3120 | if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) | 3210 | if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) |
3121 | ret = NFS_ATTR_FATTR_GROUP; | 3211 | ret = NFS_ATTR_FATTR_GROUP; |
3122 | else | 3212 | else |
@@ -3129,6 +3219,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf | |||
3129 | } | 3219 | } |
3130 | dprintk("%s: gid=%d\n", __func__, (int)*gid); | 3220 | dprintk("%s: gid=%d\n", __func__, (int)*gid); |
3131 | return ret; | 3221 | return ret; |
3222 | out_overflow: | ||
3223 | print_overflow_msg(__func__, xdr); | ||
3224 | return -EIO; | ||
3132 | } | 3225 | } |
3133 | 3226 | ||
3134 | static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev) | 3227 | static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev) |
@@ -3143,9 +3236,11 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde | |||
3143 | if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) { | 3236 | if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) { |
3144 | dev_t tmp; | 3237 | dev_t tmp; |
3145 | 3238 | ||
3146 | READ_BUF(8); | 3239 | p = xdr_inline_decode(xdr, 8); |
3147 | READ32(major); | 3240 | if (unlikely(!p)) |
3148 | READ32(minor); | 3241 | goto out_overflow; |
3242 | major = be32_to_cpup(p++); | ||
3243 | minor = be32_to_cpup(p); | ||
3149 | tmp = MKDEV(major, minor); | 3244 | tmp = MKDEV(major, minor); |
3150 | if (MAJOR(tmp) == major && MINOR(tmp) == minor) | 3245 | if (MAJOR(tmp) == major && MINOR(tmp) == minor) |
3151 | *rdev = tmp; | 3246 | *rdev = tmp; |
@@ -3154,6 +3249,9 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde | |||
3154 | } | 3249 | } |
3155 | dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor); | 3250 | dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor); |
3156 | return ret; | 3251 | return ret; |
3252 | out_overflow: | ||
3253 | print_overflow_msg(__func__, xdr); | ||
3254 | return -EIO; | ||
3157 | } | 3255 | } |
3158 | 3256 | ||
3159 | static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) | 3257 | static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) |
@@ -3165,12 +3263,17 @@ static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin | |||
3165 | if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U))) | 3263 | if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U))) |
3166 | return -EIO; | 3264 | return -EIO; |
3167 | if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) { | 3265 | if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) { |
3168 | READ_BUF(8); | 3266 | p = xdr_inline_decode(xdr, 8); |
3169 | READ64(*res); | 3267 | if (unlikely(!p)) |
3268 | goto out_overflow; | ||
3269 | xdr_decode_hyper(p, res); | ||
3170 | bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL; | 3270 | bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL; |
3171 | } | 3271 | } |
3172 | dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res); | 3272 | dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res); |
3173 | return status; | 3273 | return status; |
3274 | out_overflow: | ||
3275 | print_overflow_msg(__func__, xdr); | ||
3276 | return -EIO; | ||
3174 | } | 3277 | } |
3175 | 3278 | ||
3176 | static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) | 3279 | static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) |
@@ -3182,12 +3285,17 @@ static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint | |||
3182 | if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U))) | 3285 | if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U))) |
3183 | return -EIO; | 3286 | return -EIO; |
3184 | if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) { | 3287 | if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) { |
3185 | READ_BUF(8); | 3288 | p = xdr_inline_decode(xdr, 8); |
3186 | READ64(*res); | 3289 | if (unlikely(!p)) |
3290 | goto out_overflow; | ||
3291 | xdr_decode_hyper(p, res); | ||
3187 | bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE; | 3292 | bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE; |
3188 | } | 3293 | } |
3189 | dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res); | 3294 | dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res); |
3190 | return status; | 3295 | return status; |
3296 | out_overflow: | ||
3297 | print_overflow_msg(__func__, xdr); | ||
3298 | return -EIO; | ||
3191 | } | 3299 | } |
3192 | 3300 | ||
3193 | static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) | 3301 | static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) |
@@ -3199,12 +3307,17 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin | |||
3199 | if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U))) | 3307 | if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U))) |
3200 | return -EIO; | 3308 | return -EIO; |
3201 | if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) { | 3309 | if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) { |
3202 | READ_BUF(8); | 3310 | p = xdr_inline_decode(xdr, 8); |
3203 | READ64(*res); | 3311 | if (unlikely(!p)) |
3312 | goto out_overflow; | ||
3313 | xdr_decode_hyper(p, res); | ||
3204 | bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL; | 3314 | bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL; |
3205 | } | 3315 | } |
3206 | dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res); | 3316 | dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res); |
3207 | return status; | 3317 | return status; |
3318 | out_overflow: | ||
3319 | print_overflow_msg(__func__, xdr); | ||
3320 | return -EIO; | ||
3208 | } | 3321 | } |
3209 | 3322 | ||
3210 | static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used) | 3323 | static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used) |
@@ -3216,14 +3329,19 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint | |||
3216 | if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U))) | 3329 | if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U))) |
3217 | return -EIO; | 3330 | return -EIO; |
3218 | if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) { | 3331 | if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) { |
3219 | READ_BUF(8); | 3332 | p = xdr_inline_decode(xdr, 8); |
3220 | READ64(*used); | 3333 | if (unlikely(!p)) |
3334 | goto out_overflow; | ||
3335 | xdr_decode_hyper(p, used); | ||
3221 | bitmap[1] &= ~FATTR4_WORD1_SPACE_USED; | 3336 | bitmap[1] &= ~FATTR4_WORD1_SPACE_USED; |
3222 | ret = NFS_ATTR_FATTR_SPACE_USED; | 3337 | ret = NFS_ATTR_FATTR_SPACE_USED; |
3223 | } | 3338 | } |
3224 | dprintk("%s: space used=%Lu\n", __func__, | 3339 | dprintk("%s: space used=%Lu\n", __func__, |
3225 | (unsigned long long)*used); | 3340 | (unsigned long long)*used); |
3226 | return ret; | 3341 | return ret; |
3342 | out_overflow: | ||
3343 | print_overflow_msg(__func__, xdr); | ||
3344 | return -EIO; | ||
3227 | } | 3345 | } |
3228 | 3346 | ||
3229 | static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) | 3347 | static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) |
@@ -3232,12 +3350,17 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) | |||
3232 | uint64_t sec; | 3350 | uint64_t sec; |
3233 | uint32_t nsec; | 3351 | uint32_t nsec; |
3234 | 3352 | ||
3235 | READ_BUF(12); | 3353 | p = xdr_inline_decode(xdr, 12); |
3236 | READ64(sec); | 3354 | if (unlikely(!p)) |
3237 | READ32(nsec); | 3355 | goto out_overflow; |
3356 | p = xdr_decode_hyper(p, &sec); | ||
3357 | nsec = be32_to_cpup(p); | ||
3238 | time->tv_sec = (time_t)sec; | 3358 | time->tv_sec = (time_t)sec; |
3239 | time->tv_nsec = (long)nsec; | 3359 | time->tv_nsec = (long)nsec; |
3240 | return 0; | 3360 | return 0; |
3361 | out_overflow: | ||
3362 | print_overflow_msg(__func__, xdr); | ||
3363 | return -EIO; | ||
3241 | } | 3364 | } |
3242 | 3365 | ||
3243 | static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) | 3366 | static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) |
@@ -3315,11 +3438,16 @@ static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *c | |||
3315 | { | 3438 | { |
3316 | __be32 *p; | 3439 | __be32 *p; |
3317 | 3440 | ||
3318 | READ_BUF(20); | 3441 | p = xdr_inline_decode(xdr, 20); |
3319 | READ32(cinfo->atomic); | 3442 | if (unlikely(!p)) |
3320 | READ64(cinfo->before); | 3443 | goto out_overflow; |
3321 | READ64(cinfo->after); | 3444 | cinfo->atomic = be32_to_cpup(p++); |
3445 | p = xdr_decode_hyper(p, &cinfo->before); | ||
3446 | xdr_decode_hyper(p, &cinfo->after); | ||
3322 | return 0; | 3447 | return 0; |
3448 | out_overflow: | ||
3449 | print_overflow_msg(__func__, xdr); | ||
3450 | return -EIO; | ||
3323 | } | 3451 | } |
3324 | 3452 | ||
3325 | static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) | 3453 | static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) |
@@ -3331,40 +3459,62 @@ static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) | |||
3331 | status = decode_op_hdr(xdr, OP_ACCESS); | 3459 | status = decode_op_hdr(xdr, OP_ACCESS); |
3332 | if (status) | 3460 | if (status) |
3333 | return status; | 3461 | return status; |
3334 | READ_BUF(8); | 3462 | p = xdr_inline_decode(xdr, 8); |
3335 | READ32(supp); | 3463 | if (unlikely(!p)) |
3336 | READ32(acc); | 3464 | goto out_overflow; |
3465 | supp = be32_to_cpup(p++); | ||
3466 | acc = be32_to_cpup(p); | ||
3337 | access->supported = supp; | 3467 | access->supported = supp; |
3338 | access->access = acc; | 3468 | access->access = acc; |
3339 | return 0; | 3469 | return 0; |
3470 | out_overflow: | ||
3471 | print_overflow_msg(__func__, xdr); | ||
3472 | return -EIO; | ||
3340 | } | 3473 | } |
3341 | 3474 | ||
3342 | static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) | 3475 | static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len) |
3343 | { | 3476 | { |
3344 | __be32 *p; | 3477 | __be32 *p; |
3478 | |||
3479 | p = xdr_inline_decode(xdr, len); | ||
3480 | if (likely(p)) { | ||
3481 | memcpy(buf, p, len); | ||
3482 | return 0; | ||
3483 | } | ||
3484 | print_overflow_msg(__func__, xdr); | ||
3485 | return -EIO; | ||
3486 | } | ||
3487 | |||
3488 | static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | ||
3489 | { | ||
3490 | return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); | ||
3491 | } | ||
3492 | |||
3493 | static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) | ||
3494 | { | ||
3345 | int status; | 3495 | int status; |
3346 | 3496 | ||
3347 | status = decode_op_hdr(xdr, OP_CLOSE); | 3497 | status = decode_op_hdr(xdr, OP_CLOSE); |
3348 | if (status != -EIO) | 3498 | if (status != -EIO) |
3349 | nfs_increment_open_seqid(status, res->seqid); | 3499 | nfs_increment_open_seqid(status, res->seqid); |
3350 | if (status) | 3500 | if (!status) |
3351 | return status; | 3501 | status = decode_stateid(xdr, &res->stateid); |
3352 | READ_BUF(NFS4_STATEID_SIZE); | 3502 | return status; |
3353 | COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); | 3503 | } |
3354 | return 0; | 3504 | |
3505 | static int decode_verifier(struct xdr_stream *xdr, void *verifier) | ||
3506 | { | ||
3507 | return decode_opaque_fixed(xdr, verifier, 8); | ||
3355 | } | 3508 | } |
3356 | 3509 | ||
3357 | static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) | 3510 | static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) |
3358 | { | 3511 | { |
3359 | __be32 *p; | ||
3360 | int status; | 3512 | int status; |
3361 | 3513 | ||
3362 | status = decode_op_hdr(xdr, OP_COMMIT); | 3514 | status = decode_op_hdr(xdr, OP_COMMIT); |
3363 | if (status) | 3515 | if (!status) |
3364 | return status; | 3516 | status = decode_verifier(xdr, res->verf->verifier); |
3365 | READ_BUF(8); | 3517 | return status; |
3366 | COPYMEM(res->verf->verifier, 8); | ||
3367 | return 0; | ||
3368 | } | 3518 | } |
3369 | 3519 | ||
3370 | static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) | 3520 | static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) |
@@ -3378,10 +3528,16 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) | |||
3378 | return status; | 3528 | return status; |
3379 | if ((status = decode_change_info(xdr, cinfo))) | 3529 | if ((status = decode_change_info(xdr, cinfo))) |
3380 | return status; | 3530 | return status; |
3381 | READ_BUF(4); | 3531 | p = xdr_inline_decode(xdr, 4); |
3382 | READ32(bmlen); | 3532 | if (unlikely(!p)) |
3383 | READ_BUF(bmlen << 2); | 3533 | goto out_overflow; |
3384 | return 0; | 3534 | bmlen = be32_to_cpup(p); |
3535 | p = xdr_inline_decode(xdr, bmlen << 2); | ||
3536 | if (likely(p)) | ||
3537 | return 0; | ||
3538 | out_overflow: | ||
3539 | print_overflow_msg(__func__, xdr); | ||
3540 | return -EIO; | ||
3385 | } | 3541 | } |
3386 | 3542 | ||
3387 | static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) | 3543 | static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) |
@@ -3466,7 +3622,8 @@ xdr_error: | |||
3466 | return status; | 3622 | return status; |
3467 | } | 3623 | } |
3468 | 3624 | ||
3469 | static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server) | 3625 | static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, |
3626 | const struct nfs_server *server, int may_sleep) | ||
3470 | { | 3627 | { |
3471 | __be32 *savep; | 3628 | __be32 *savep; |
3472 | uint32_t attrlen, | 3629 | uint32_t attrlen, |
@@ -3538,12 +3695,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons | |||
3538 | goto xdr_error; | 3695 | goto xdr_error; |
3539 | fattr->valid |= status; | 3696 | fattr->valid |= status; |
3540 | 3697 | ||
3541 | status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid); | 3698 | status = decode_attr_owner(xdr, bitmap, server->nfs_client, |
3699 | &fattr->uid, may_sleep); | ||
3542 | if (status < 0) | 3700 | if (status < 0) |
3543 | goto xdr_error; | 3701 | goto xdr_error; |
3544 | fattr->valid |= status; | 3702 | fattr->valid |= status; |
3545 | 3703 | ||
3546 | status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid); | 3704 | status = decode_attr_group(xdr, bitmap, server->nfs_client, |
3705 | &fattr->gid, may_sleep); | ||
3547 | if (status < 0) | 3706 | if (status < 0) |
3548 | goto xdr_error; | 3707 | goto xdr_error; |
3549 | fattr->valid |= status; | 3708 | fattr->valid |= status; |
@@ -3633,14 +3792,21 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh) | |||
3633 | if (status) | 3792 | if (status) |
3634 | return status; | 3793 | return status; |
3635 | 3794 | ||
3636 | READ_BUF(4); | 3795 | p = xdr_inline_decode(xdr, 4); |
3637 | READ32(len); | 3796 | if (unlikely(!p)) |
3797 | goto out_overflow; | ||
3798 | len = be32_to_cpup(p); | ||
3638 | if (len > NFS4_FHSIZE) | 3799 | if (len > NFS4_FHSIZE) |
3639 | return -EIO; | 3800 | return -EIO; |
3640 | fh->size = len; | 3801 | fh->size = len; |
3641 | READ_BUF(len); | 3802 | p = xdr_inline_decode(xdr, len); |
3642 | COPYMEM(fh->data, len); | 3803 | if (unlikely(!p)) |
3804 | goto out_overflow; | ||
3805 | memcpy(fh->data, p, len); | ||
3643 | return 0; | 3806 | return 0; |
3807 | out_overflow: | ||
3808 | print_overflow_msg(__func__, xdr); | ||
3809 | return -EIO; | ||
3644 | } | 3810 | } |
3645 | 3811 | ||
3646 | static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) | 3812 | static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) |
@@ -3662,10 +3828,12 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) | |||
3662 | __be32 *p; | 3828 | __be32 *p; |
3663 | uint32_t namelen, type; | 3829 | uint32_t namelen, type; |
3664 | 3830 | ||
3665 | READ_BUF(32); | 3831 | p = xdr_inline_decode(xdr, 32); |
3666 | READ64(offset); | 3832 | if (unlikely(!p)) |
3667 | READ64(length); | 3833 | goto out_overflow; |
3668 | READ32(type); | 3834 | p = xdr_decode_hyper(p, &offset); |
3835 | p = xdr_decode_hyper(p, &length); | ||
3836 | type = be32_to_cpup(p++); | ||
3669 | if (fl != NULL) { | 3837 | if (fl != NULL) { |
3670 | fl->fl_start = (loff_t)offset; | 3838 | fl->fl_start = (loff_t)offset; |
3671 | fl->fl_end = fl->fl_start + (loff_t)length - 1; | 3839 | fl->fl_end = fl->fl_start + (loff_t)length - 1; |
@@ -3676,23 +3844,27 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) | |||
3676 | fl->fl_type = F_RDLCK; | 3844 | fl->fl_type = F_RDLCK; |
3677 | fl->fl_pid = 0; | 3845 | fl->fl_pid = 0; |
3678 | } | 3846 | } |
3679 | READ64(clientid); | 3847 | p = xdr_decode_hyper(p, &clientid); |
3680 | READ32(namelen); | 3848 | namelen = be32_to_cpup(p); |
3681 | READ_BUF(namelen); | 3849 | p = xdr_inline_decode(xdr, namelen); |
3682 | return -NFS4ERR_DENIED; | 3850 | if (likely(p)) |
3851 | return -NFS4ERR_DENIED; | ||
3852 | out_overflow: | ||
3853 | print_overflow_msg(__func__, xdr); | ||
3854 | return -EIO; | ||
3683 | } | 3855 | } |
3684 | 3856 | ||
3685 | static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) | 3857 | static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) |
3686 | { | 3858 | { |
3687 | __be32 *p; | ||
3688 | int status; | 3859 | int status; |
3689 | 3860 | ||
3690 | status = decode_op_hdr(xdr, OP_LOCK); | 3861 | status = decode_op_hdr(xdr, OP_LOCK); |
3691 | if (status == -EIO) | 3862 | if (status == -EIO) |
3692 | goto out; | 3863 | goto out; |
3693 | if (status == 0) { | 3864 | if (status == 0) { |
3694 | READ_BUF(NFS4_STATEID_SIZE); | 3865 | status = decode_stateid(xdr, &res->stateid); |
3695 | COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); | 3866 | if (unlikely(status)) |
3867 | goto out; | ||
3696 | } else if (status == -NFS4ERR_DENIED) | 3868 | } else if (status == -NFS4ERR_DENIED) |
3697 | status = decode_lock_denied(xdr, NULL); | 3869 | status = decode_lock_denied(xdr, NULL); |
3698 | if (res->open_seqid != NULL) | 3870 | if (res->open_seqid != NULL) |
@@ -3713,16 +3885,13 @@ static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockt_res *res) | |||
3713 | 3885 | ||
3714 | static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) | 3886 | static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) |
3715 | { | 3887 | { |
3716 | __be32 *p; | ||
3717 | int status; | 3888 | int status; |
3718 | 3889 | ||
3719 | status = decode_op_hdr(xdr, OP_LOCKU); | 3890 | status = decode_op_hdr(xdr, OP_LOCKU); |
3720 | if (status != -EIO) | 3891 | if (status != -EIO) |
3721 | nfs_increment_lock_seqid(status, res->seqid); | 3892 | nfs_increment_lock_seqid(status, res->seqid); |
3722 | if (status == 0) { | 3893 | if (status == 0) |
3723 | READ_BUF(NFS4_STATEID_SIZE); | 3894 | status = decode_stateid(xdr, &res->stateid); |
3724 | COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); | ||
3725 | } | ||
3726 | return status; | 3895 | return status; |
3727 | } | 3896 | } |
3728 | 3897 | ||
@@ -3737,34 +3906,46 @@ static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize) | |||
3737 | __be32 *p; | 3906 | __be32 *p; |
3738 | uint32_t limit_type, nblocks, blocksize; | 3907 | uint32_t limit_type, nblocks, blocksize; |
3739 | 3908 | ||
3740 | READ_BUF(12); | 3909 | p = xdr_inline_decode(xdr, 12); |
3741 | READ32(limit_type); | 3910 | if (unlikely(!p)) |
3911 | goto out_overflow; | ||
3912 | limit_type = be32_to_cpup(p++); | ||
3742 | switch (limit_type) { | 3913 | switch (limit_type) { |
3743 | case 1: | 3914 | case 1: |
3744 | READ64(*maxsize); | 3915 | xdr_decode_hyper(p, maxsize); |
3745 | break; | 3916 | break; |
3746 | case 2: | 3917 | case 2: |
3747 | READ32(nblocks); | 3918 | nblocks = be32_to_cpup(p++); |
3748 | READ32(blocksize); | 3919 | blocksize = be32_to_cpup(p); |
3749 | *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; | 3920 | *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; |
3750 | } | 3921 | } |
3751 | return 0; | 3922 | return 0; |
3923 | out_overflow: | ||
3924 | print_overflow_msg(__func__, xdr); | ||
3925 | return -EIO; | ||
3752 | } | 3926 | } |
3753 | 3927 | ||
3754 | static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) | 3928 | static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) |
3755 | { | 3929 | { |
3756 | __be32 *p; | 3930 | __be32 *p; |
3757 | uint32_t delegation_type; | 3931 | uint32_t delegation_type; |
3932 | int status; | ||
3758 | 3933 | ||
3759 | READ_BUF(4); | 3934 | p = xdr_inline_decode(xdr, 4); |
3760 | READ32(delegation_type); | 3935 | if (unlikely(!p)) |
3936 | goto out_overflow; | ||
3937 | delegation_type = be32_to_cpup(p); | ||
3761 | if (delegation_type == NFS4_OPEN_DELEGATE_NONE) { | 3938 | if (delegation_type == NFS4_OPEN_DELEGATE_NONE) { |
3762 | res->delegation_type = 0; | 3939 | res->delegation_type = 0; |
3763 | return 0; | 3940 | return 0; |
3764 | } | 3941 | } |
3765 | READ_BUF(NFS4_STATEID_SIZE+4); | 3942 | status = decode_stateid(xdr, &res->delegation); |
3766 | COPYMEM(res->delegation.data, NFS4_STATEID_SIZE); | 3943 | if (unlikely(status)) |
3767 | READ32(res->do_recall); | 3944 | return status; |
3945 | p = xdr_inline_decode(xdr, 4); | ||
3946 | if (unlikely(!p)) | ||
3947 | goto out_overflow; | ||
3948 | res->do_recall = be32_to_cpup(p); | ||
3768 | 3949 | ||
3769 | switch (delegation_type) { | 3950 | switch (delegation_type) { |
3770 | case NFS4_OPEN_DELEGATE_READ: | 3951 | case NFS4_OPEN_DELEGATE_READ: |
@@ -3776,6 +3957,9 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) | |||
3776 | return -EIO; | 3957 | return -EIO; |
3777 | } | 3958 | } |
3778 | return decode_ace(xdr, NULL, res->server->nfs_client); | 3959 | return decode_ace(xdr, NULL, res->server->nfs_client); |
3960 | out_overflow: | ||
3961 | print_overflow_msg(__func__, xdr); | ||
3962 | return -EIO; | ||
3779 | } | 3963 | } |
3780 | 3964 | ||
3781 | static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) | 3965 | static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) |
@@ -3787,23 +3971,27 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) | |||
3787 | status = decode_op_hdr(xdr, OP_OPEN); | 3971 | status = decode_op_hdr(xdr, OP_OPEN); |
3788 | if (status != -EIO) | 3972 | if (status != -EIO) |
3789 | nfs_increment_open_seqid(status, res->seqid); | 3973 | nfs_increment_open_seqid(status, res->seqid); |
3790 | if (status) | 3974 | if (!status) |
3975 | status = decode_stateid(xdr, &res->stateid); | ||
3976 | if (unlikely(status)) | ||
3791 | return status; | 3977 | return status; |
3792 | READ_BUF(NFS4_STATEID_SIZE); | ||
3793 | COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); | ||
3794 | 3978 | ||
3795 | decode_change_info(xdr, &res->cinfo); | 3979 | decode_change_info(xdr, &res->cinfo); |
3796 | 3980 | ||
3797 | READ_BUF(8); | 3981 | p = xdr_inline_decode(xdr, 8); |
3798 | READ32(res->rflags); | 3982 | if (unlikely(!p)) |
3799 | READ32(bmlen); | 3983 | goto out_overflow; |
3984 | res->rflags = be32_to_cpup(p++); | ||
3985 | bmlen = be32_to_cpup(p); | ||
3800 | if (bmlen > 10) | 3986 | if (bmlen > 10) |
3801 | goto xdr_error; | 3987 | goto xdr_error; |
3802 | 3988 | ||
3803 | READ_BUF(bmlen << 2); | 3989 | p = xdr_inline_decode(xdr, bmlen << 2); |
3990 | if (unlikely(!p)) | ||
3991 | goto out_overflow; | ||
3804 | savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); | 3992 | savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); |
3805 | for (i = 0; i < savewords; ++i) | 3993 | for (i = 0; i < savewords; ++i) |
3806 | READ32(res->attrset[i]); | 3994 | res->attrset[i] = be32_to_cpup(p++); |
3807 | for (; i < NFS4_BITMAP_SIZE; i++) | 3995 | for (; i < NFS4_BITMAP_SIZE; i++) |
3808 | res->attrset[i] = 0; | 3996 | res->attrset[i] = 0; |
3809 | 3997 | ||
@@ -3811,36 +3999,33 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) | |||
3811 | xdr_error: | 3999 | xdr_error: |
3812 | dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen); | 4000 | dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen); |
3813 | return -EIO; | 4001 | return -EIO; |
4002 | out_overflow: | ||
4003 | print_overflow_msg(__func__, xdr); | ||
4004 | return -EIO; | ||
3814 | } | 4005 | } |
3815 | 4006 | ||
3816 | static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) | 4007 | static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) |
3817 | { | 4008 | { |
3818 | __be32 *p; | ||
3819 | int status; | 4009 | int status; |
3820 | 4010 | ||
3821 | status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); | 4011 | status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); |
3822 | if (status != -EIO) | 4012 | if (status != -EIO) |
3823 | nfs_increment_open_seqid(status, res->seqid); | 4013 | nfs_increment_open_seqid(status, res->seqid); |
3824 | if (status) | 4014 | if (!status) |
3825 | return status; | 4015 | status = decode_stateid(xdr, &res->stateid); |
3826 | READ_BUF(NFS4_STATEID_SIZE); | 4016 | return status; |
3827 | COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); | ||
3828 | return 0; | ||
3829 | } | 4017 | } |
3830 | 4018 | ||
3831 | static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) | 4019 | static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) |
3832 | { | 4020 | { |
3833 | __be32 *p; | ||
3834 | int status; | 4021 | int status; |
3835 | 4022 | ||
3836 | status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); | 4023 | status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); |
3837 | if (status != -EIO) | 4024 | if (status != -EIO) |
3838 | nfs_increment_open_seqid(status, res->seqid); | 4025 | nfs_increment_open_seqid(status, res->seqid); |
3839 | if (status) | 4026 | if (!status) |
3840 | return status; | 4027 | status = decode_stateid(xdr, &res->stateid); |
3841 | READ_BUF(NFS4_STATEID_SIZE); | 4028 | return status; |
3842 | COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); | ||
3843 | return 0; | ||
3844 | } | 4029 | } |
3845 | 4030 | ||
3846 | static int decode_putfh(struct xdr_stream *xdr) | 4031 | static int decode_putfh(struct xdr_stream *xdr) |
@@ -3863,9 +4048,11 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_ | |||
3863 | status = decode_op_hdr(xdr, OP_READ); | 4048 | status = decode_op_hdr(xdr, OP_READ); |
3864 | if (status) | 4049 | if (status) |
3865 | return status; | 4050 | return status; |
3866 | READ_BUF(8); | 4051 | p = xdr_inline_decode(xdr, 8); |
3867 | READ32(eof); | 4052 | if (unlikely(!p)) |
3868 | READ32(count); | 4053 | goto out_overflow; |
4054 | eof = be32_to_cpup(p++); | ||
4055 | count = be32_to_cpup(p); | ||
3869 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 4056 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
3870 | recvd = req->rq_rcv_buf.len - hdrlen; | 4057 | recvd = req->rq_rcv_buf.len - hdrlen; |
3871 | if (count > recvd) { | 4058 | if (count > recvd) { |
@@ -3878,6 +4065,9 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_ | |||
3878 | res->eof = eof; | 4065 | res->eof = eof; |
3879 | res->count = count; | 4066 | res->count = count; |
3880 | return 0; | 4067 | return 0; |
4068 | out_overflow: | ||
4069 | print_overflow_msg(__func__, xdr); | ||
4070 | return -EIO; | ||
3881 | } | 4071 | } |
3882 | 4072 | ||
3883 | static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) | 4073 | static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) |
@@ -3892,17 +4082,17 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n | |||
3892 | int status; | 4082 | int status; |
3893 | 4083 | ||
3894 | status = decode_op_hdr(xdr, OP_READDIR); | 4084 | status = decode_op_hdr(xdr, OP_READDIR); |
3895 | if (status) | 4085 | if (!status) |
4086 | status = decode_verifier(xdr, readdir->verifier.data); | ||
4087 | if (unlikely(status)) | ||
3896 | return status; | 4088 | return status; |
3897 | READ_BUF(8); | ||
3898 | COPYMEM(readdir->verifier.data, 8); | ||
3899 | dprintk("%s: verifier = %08x:%08x\n", | 4089 | dprintk("%s: verifier = %08x:%08x\n", |
3900 | __func__, | 4090 | __func__, |
3901 | ((u32 *)readdir->verifier.data)[0], | 4091 | ((u32 *)readdir->verifier.data)[0], |
3902 | ((u32 *)readdir->verifier.data)[1]); | 4092 | ((u32 *)readdir->verifier.data)[1]); |
3903 | 4093 | ||
3904 | 4094 | ||
3905 | hdrlen = (char *) p - (char *) iov->iov_base; | 4095 | hdrlen = (char *) xdr->p - (char *) iov->iov_base; |
3906 | recvd = rcvbuf->len - hdrlen; | 4096 | recvd = rcvbuf->len - hdrlen; |
3907 | if (pglen > recvd) | 4097 | if (pglen > recvd) |
3908 | pglen = recvd; | 4098 | pglen = recvd; |
@@ -3990,8 +4180,10 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) | |||
3990 | return status; | 4180 | return status; |
3991 | 4181 | ||
3992 | /* Convert length of symlink */ | 4182 | /* Convert length of symlink */ |
3993 | READ_BUF(4); | 4183 | p = xdr_inline_decode(xdr, 4); |
3994 | READ32(len); | 4184 | if (unlikely(!p)) |
4185 | goto out_overflow; | ||
4186 | len = be32_to_cpup(p); | ||
3995 | if (len >= rcvbuf->page_len || len <= 0) { | 4187 | if (len >= rcvbuf->page_len || len <= 0) { |
3996 | dprintk("nfs: server returned giant symlink!\n"); | 4188 | dprintk("nfs: server returned giant symlink!\n"); |
3997 | return -ENAMETOOLONG; | 4189 | return -ENAMETOOLONG; |
@@ -4015,6 +4207,9 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) | |||
4015 | kaddr[len+rcvbuf->page_base] = '\0'; | 4207 | kaddr[len+rcvbuf->page_base] = '\0'; |
4016 | kunmap_atomic(kaddr, KM_USER0); | 4208 | kunmap_atomic(kaddr, KM_USER0); |
4017 | return 0; | 4209 | return 0; |
4210 | out_overflow: | ||
4211 | print_overflow_msg(__func__, xdr); | ||
4212 | return -EIO; | ||
4018 | } | 4213 | } |
4019 | 4214 | ||
4020 | static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) | 4215 | static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) |
@@ -4112,10 +4307,16 @@ static int decode_setattr(struct xdr_stream *xdr) | |||
4112 | status = decode_op_hdr(xdr, OP_SETATTR); | 4307 | status = decode_op_hdr(xdr, OP_SETATTR); |
4113 | if (status) | 4308 | if (status) |
4114 | return status; | 4309 | return status; |
4115 | READ_BUF(4); | 4310 | p = xdr_inline_decode(xdr, 4); |
4116 | READ32(bmlen); | 4311 | if (unlikely(!p)) |
4117 | READ_BUF(bmlen << 2); | 4312 | goto out_overflow; |
4118 | return 0; | 4313 | bmlen = be32_to_cpup(p); |
4314 | p = xdr_inline_decode(xdr, bmlen << 2); | ||
4315 | if (likely(p)) | ||
4316 | return 0; | ||
4317 | out_overflow: | ||
4318 | print_overflow_msg(__func__, xdr); | ||
4319 | return -EIO; | ||
4119 | } | 4320 | } |
4120 | 4321 | ||
4121 | static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) | 4322 | static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) |
@@ -4124,35 +4325,50 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) | |||
4124 | uint32_t opnum; | 4325 | uint32_t opnum; |
4125 | int32_t nfserr; | 4326 | int32_t nfserr; |
4126 | 4327 | ||
4127 | READ_BUF(8); | 4328 | p = xdr_inline_decode(xdr, 8); |
4128 | READ32(opnum); | 4329 | if (unlikely(!p)) |
4330 | goto out_overflow; | ||
4331 | opnum = be32_to_cpup(p++); | ||
4129 | if (opnum != OP_SETCLIENTID) { | 4332 | if (opnum != OP_SETCLIENTID) { |
4130 | dprintk("nfs: decode_setclientid: Server returned operation" | 4333 | dprintk("nfs: decode_setclientid: Server returned operation" |
4131 | " %d\n", opnum); | 4334 | " %d\n", opnum); |
4132 | return -EIO; | 4335 | return -EIO; |
4133 | } | 4336 | } |
4134 | READ32(nfserr); | 4337 | nfserr = be32_to_cpup(p); |
4135 | if (nfserr == NFS_OK) { | 4338 | if (nfserr == NFS_OK) { |
4136 | READ_BUF(8 + NFS4_VERIFIER_SIZE); | 4339 | p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE); |
4137 | READ64(clp->cl_clientid); | 4340 | if (unlikely(!p)) |
4138 | COPYMEM(clp->cl_confirm.data, NFS4_VERIFIER_SIZE); | 4341 | goto out_overflow; |
4342 | p = xdr_decode_hyper(p, &clp->cl_clientid); | ||
4343 | memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE); | ||
4139 | } else if (nfserr == NFSERR_CLID_INUSE) { | 4344 | } else if (nfserr == NFSERR_CLID_INUSE) { |
4140 | uint32_t len; | 4345 | uint32_t len; |
4141 | 4346 | ||
4142 | /* skip netid string */ | 4347 | /* skip netid string */ |
4143 | READ_BUF(4); | 4348 | p = xdr_inline_decode(xdr, 4); |
4144 | READ32(len); | 4349 | if (unlikely(!p)) |
4145 | READ_BUF(len); | 4350 | goto out_overflow; |
4351 | len = be32_to_cpup(p); | ||
4352 | p = xdr_inline_decode(xdr, len); | ||
4353 | if (unlikely(!p)) | ||
4354 | goto out_overflow; | ||
4146 | 4355 | ||
4147 | /* skip uaddr string */ | 4356 | /* skip uaddr string */ |
4148 | READ_BUF(4); | 4357 | p = xdr_inline_decode(xdr, 4); |
4149 | READ32(len); | 4358 | if (unlikely(!p)) |
4150 | READ_BUF(len); | 4359 | goto out_overflow; |
4360 | len = be32_to_cpup(p); | ||
4361 | p = xdr_inline_decode(xdr, len); | ||
4362 | if (unlikely(!p)) | ||
4363 | goto out_overflow; | ||
4151 | return -NFSERR_CLID_INUSE; | 4364 | return -NFSERR_CLID_INUSE; |
4152 | } else | 4365 | } else |
4153 | return nfs4_stat_to_errno(nfserr); | 4366 | return nfs4_stat_to_errno(nfserr); |
4154 | 4367 | ||
4155 | return 0; | 4368 | return 0; |
4369 | out_overflow: | ||
4370 | print_overflow_msg(__func__, xdr); | ||
4371 | return -EIO; | ||
4156 | } | 4372 | } |
4157 | 4373 | ||
4158 | static int decode_setclientid_confirm(struct xdr_stream *xdr) | 4374 | static int decode_setclientid_confirm(struct xdr_stream *xdr) |
@@ -4169,11 +4385,16 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) | |||
4169 | if (status) | 4385 | if (status) |
4170 | return status; | 4386 | return status; |
4171 | 4387 | ||
4172 | READ_BUF(16); | 4388 | p = xdr_inline_decode(xdr, 16); |
4173 | READ32(res->count); | 4389 | if (unlikely(!p)) |
4174 | READ32(res->verf->committed); | 4390 | goto out_overflow; |
4175 | COPYMEM(res->verf->verifier, 8); | 4391 | res->count = be32_to_cpup(p++); |
4392 | res->verf->committed = be32_to_cpup(p++); | ||
4393 | memcpy(res->verf->verifier, p, 8); | ||
4176 | return 0; | 4394 | return 0; |
4395 | out_overflow: | ||
4396 | print_overflow_msg(__func__, xdr); | ||
4397 | return -EIO; | ||
4177 | } | 4398 | } |
4178 | 4399 | ||
4179 | static int decode_delegreturn(struct xdr_stream *xdr) | 4400 | static int decode_delegreturn(struct xdr_stream *xdr) |
@@ -4187,6 +4408,7 @@ static int decode_exchange_id(struct xdr_stream *xdr, | |||
4187 | { | 4408 | { |
4188 | __be32 *p; | 4409 | __be32 *p; |
4189 | uint32_t dummy; | 4410 | uint32_t dummy; |
4411 | char *dummy_str; | ||
4190 | int status; | 4412 | int status; |
4191 | struct nfs_client *clp = res->client; | 4413 | struct nfs_client *clp = res->client; |
4192 | 4414 | ||
@@ -4194,36 +4416,45 @@ static int decode_exchange_id(struct xdr_stream *xdr, | |||
4194 | if (status) | 4416 | if (status) |
4195 | return status; | 4417 | return status; |
4196 | 4418 | ||
4197 | READ_BUF(8); | 4419 | p = xdr_inline_decode(xdr, 8); |
4198 | READ64(clp->cl_ex_clid); | 4420 | if (unlikely(!p)) |
4199 | READ_BUF(12); | 4421 | goto out_overflow; |
4200 | READ32(clp->cl_seqid); | 4422 | xdr_decode_hyper(p, &clp->cl_ex_clid); |
4201 | READ32(clp->cl_exchange_flags); | 4423 | p = xdr_inline_decode(xdr, 12); |
4424 | if (unlikely(!p)) | ||
4425 | goto out_overflow; | ||
4426 | clp->cl_seqid = be32_to_cpup(p++); | ||
4427 | clp->cl_exchange_flags = be32_to_cpup(p++); | ||
4202 | 4428 | ||
4203 | /* We ask for SP4_NONE */ | 4429 | /* We ask for SP4_NONE */ |
4204 | READ32(dummy); | 4430 | dummy = be32_to_cpup(p); |
4205 | if (dummy != SP4_NONE) | 4431 | if (dummy != SP4_NONE) |
4206 | return -EIO; | 4432 | return -EIO; |
4207 | 4433 | ||
4208 | /* Throw away minor_id */ | 4434 | /* Throw away minor_id */ |
4209 | READ_BUF(8); | 4435 | p = xdr_inline_decode(xdr, 8); |
4436 | if (unlikely(!p)) | ||
4437 | goto out_overflow; | ||
4210 | 4438 | ||
4211 | /* Throw away Major id */ | 4439 | /* Throw away Major id */ |
4212 | READ_BUF(4); | 4440 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); |
4213 | READ32(dummy); | 4441 | if (unlikely(status)) |
4214 | READ_BUF(dummy); | 4442 | return status; |
4215 | 4443 | ||
4216 | /* Throw away server_scope */ | 4444 | /* Throw away server_scope */ |
4217 | READ_BUF(4); | 4445 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); |
4218 | READ32(dummy); | 4446 | if (unlikely(status)) |
4219 | READ_BUF(dummy); | 4447 | return status; |
4220 | 4448 | ||
4221 | /* Throw away Implementation id array */ | 4449 | /* Throw away Implementation id array */ |
4222 | READ_BUF(4); | 4450 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); |
4223 | READ32(dummy); | 4451 | if (unlikely(status)) |
4224 | READ_BUF(dummy); | 4452 | return status; |
4225 | 4453 | ||
4226 | return 0; | 4454 | return 0; |
4455 | out_overflow: | ||
4456 | print_overflow_msg(__func__, xdr); | ||
4457 | return -EIO; | ||
4227 | } | 4458 | } |
4228 | 4459 | ||
4229 | static int decode_chan_attrs(struct xdr_stream *xdr, | 4460 | static int decode_chan_attrs(struct xdr_stream *xdr, |
@@ -4232,22 +4463,35 @@ static int decode_chan_attrs(struct xdr_stream *xdr, | |||
4232 | __be32 *p; | 4463 | __be32 *p; |
4233 | u32 nr_attrs; | 4464 | u32 nr_attrs; |
4234 | 4465 | ||
4235 | READ_BUF(28); | 4466 | p = xdr_inline_decode(xdr, 28); |
4236 | READ32(attrs->headerpadsz); | 4467 | if (unlikely(!p)) |
4237 | READ32(attrs->max_rqst_sz); | 4468 | goto out_overflow; |
4238 | READ32(attrs->max_resp_sz); | 4469 | attrs->headerpadsz = be32_to_cpup(p++); |
4239 | READ32(attrs->max_resp_sz_cached); | 4470 | attrs->max_rqst_sz = be32_to_cpup(p++); |
4240 | READ32(attrs->max_ops); | 4471 | attrs->max_resp_sz = be32_to_cpup(p++); |
4241 | READ32(attrs->max_reqs); | 4472 | attrs->max_resp_sz_cached = be32_to_cpup(p++); |
4242 | READ32(nr_attrs); | 4473 | attrs->max_ops = be32_to_cpup(p++); |
4474 | attrs->max_reqs = be32_to_cpup(p++); | ||
4475 | nr_attrs = be32_to_cpup(p); | ||
4243 | if (unlikely(nr_attrs > 1)) { | 4476 | if (unlikely(nr_attrs > 1)) { |
4244 | printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", | 4477 | printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", |
4245 | __func__, nr_attrs); | 4478 | __func__, nr_attrs); |
4246 | return -EINVAL; | 4479 | return -EINVAL; |
4247 | } | 4480 | } |
4248 | if (nr_attrs == 1) | 4481 | if (nr_attrs == 1) { |
4249 | READ_BUF(4); /* skip rdma_attrs */ | 4482 | p = xdr_inline_decode(xdr, 4); /* skip rdma_attrs */ |
4483 | if (unlikely(!p)) | ||
4484 | goto out_overflow; | ||
4485 | } | ||
4250 | return 0; | 4486 | return 0; |
4487 | out_overflow: | ||
4488 | print_overflow_msg(__func__, xdr); | ||
4489 | return -EIO; | ||
4490 | } | ||
4491 | |||
4492 | static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid) | ||
4493 | { | ||
4494 | return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN); | ||
4251 | } | 4495 | } |
4252 | 4496 | ||
4253 | static int decode_create_session(struct xdr_stream *xdr, | 4497 | static int decode_create_session(struct xdr_stream *xdr, |
@@ -4259,24 +4503,26 @@ static int decode_create_session(struct xdr_stream *xdr, | |||
4259 | struct nfs4_session *session = clp->cl_session; | 4503 | struct nfs4_session *session = clp->cl_session; |
4260 | 4504 | ||
4261 | status = decode_op_hdr(xdr, OP_CREATE_SESSION); | 4505 | status = decode_op_hdr(xdr, OP_CREATE_SESSION); |
4262 | 4506 | if (!status) | |
4263 | if (status) | 4507 | status = decode_sessionid(xdr, &session->sess_id); |
4508 | if (unlikely(status)) | ||
4264 | return status; | 4509 | return status; |
4265 | 4510 | ||
4266 | /* sessionid */ | ||
4267 | READ_BUF(NFS4_MAX_SESSIONID_LEN); | ||
4268 | COPYMEM(&session->sess_id, NFS4_MAX_SESSIONID_LEN); | ||
4269 | |||
4270 | /* seqid, flags */ | 4511 | /* seqid, flags */ |
4271 | READ_BUF(8); | 4512 | p = xdr_inline_decode(xdr, 8); |
4272 | READ32(clp->cl_seqid); | 4513 | if (unlikely(!p)) |
4273 | READ32(session->flags); | 4514 | goto out_overflow; |
4515 | clp->cl_seqid = be32_to_cpup(p++); | ||
4516 | session->flags = be32_to_cpup(p); | ||
4274 | 4517 | ||
4275 | /* Channel attributes */ | 4518 | /* Channel attributes */ |
4276 | status = decode_chan_attrs(xdr, &session->fc_attrs); | 4519 | status = decode_chan_attrs(xdr, &session->fc_attrs); |
4277 | if (!status) | 4520 | if (!status) |
4278 | status = decode_chan_attrs(xdr, &session->bc_attrs); | 4521 | status = decode_chan_attrs(xdr, &session->bc_attrs); |
4279 | return status; | 4522 | return status; |
4523 | out_overflow: | ||
4524 | print_overflow_msg(__func__, xdr); | ||
4525 | return -EIO; | ||
4280 | } | 4526 | } |
4281 | 4527 | ||
4282 | static int decode_destroy_session(struct xdr_stream *xdr, void *dummy) | 4528 | static int decode_destroy_session(struct xdr_stream *xdr, void *dummy) |
@@ -4300,7 +4546,9 @@ static int decode_sequence(struct xdr_stream *xdr, | |||
4300 | return 0; | 4546 | return 0; |
4301 | 4547 | ||
4302 | status = decode_op_hdr(xdr, OP_SEQUENCE); | 4548 | status = decode_op_hdr(xdr, OP_SEQUENCE); |
4303 | if (status) | 4549 | if (!status) |
4550 | status = decode_sessionid(xdr, &id); | ||
4551 | if (unlikely(status)) | ||
4304 | goto out_err; | 4552 | goto out_err; |
4305 | 4553 | ||
4306 | /* | 4554 | /* |
@@ -4309,36 +4557,43 @@ static int decode_sequence(struct xdr_stream *xdr, | |||
4309 | */ | 4557 | */ |
4310 | status = -ESERVERFAULT; | 4558 | status = -ESERVERFAULT; |
4311 | 4559 | ||
4312 | slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid]; | ||
4313 | READ_BUF(NFS4_MAX_SESSIONID_LEN + 20); | ||
4314 | COPYMEM(id.data, NFS4_MAX_SESSIONID_LEN); | ||
4315 | if (memcmp(id.data, res->sr_session->sess_id.data, | 4560 | if (memcmp(id.data, res->sr_session->sess_id.data, |
4316 | NFS4_MAX_SESSIONID_LEN)) { | 4561 | NFS4_MAX_SESSIONID_LEN)) { |
4317 | dprintk("%s Invalid session id\n", __func__); | 4562 | dprintk("%s Invalid session id\n", __func__); |
4318 | goto out_err; | 4563 | goto out_err; |
4319 | } | 4564 | } |
4565 | |||
4566 | p = xdr_inline_decode(xdr, 20); | ||
4567 | if (unlikely(!p)) | ||
4568 | goto out_overflow; | ||
4569 | |||
4320 | /* seqid */ | 4570 | /* seqid */ |
4321 | READ32(dummy); | 4571 | slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid]; |
4572 | dummy = be32_to_cpup(p++); | ||
4322 | if (dummy != slot->seq_nr) { | 4573 | if (dummy != slot->seq_nr) { |
4323 | dprintk("%s Invalid sequence number\n", __func__); | 4574 | dprintk("%s Invalid sequence number\n", __func__); |
4324 | goto out_err; | 4575 | goto out_err; |
4325 | } | 4576 | } |
4326 | /* slot id */ | 4577 | /* slot id */ |
4327 | READ32(dummy); | 4578 | dummy = be32_to_cpup(p++); |
4328 | if (dummy != res->sr_slotid) { | 4579 | if (dummy != res->sr_slotid) { |
4329 | dprintk("%s Invalid slot id\n", __func__); | 4580 | dprintk("%s Invalid slot id\n", __func__); |
4330 | goto out_err; | 4581 | goto out_err; |
4331 | } | 4582 | } |
4332 | /* highest slot id - currently not processed */ | 4583 | /* highest slot id - currently not processed */ |
4333 | READ32(dummy); | 4584 | dummy = be32_to_cpup(p++); |
4334 | /* target highest slot id - currently not processed */ | 4585 | /* target highest slot id - currently not processed */ |
4335 | READ32(dummy); | 4586 | dummy = be32_to_cpup(p++); |
4336 | /* result flags - currently not processed */ | 4587 | /* result flags - currently not processed */ |
4337 | READ32(dummy); | 4588 | dummy = be32_to_cpup(p); |
4338 | status = 0; | 4589 | status = 0; |
4339 | out_err: | 4590 | out_err: |
4340 | res->sr_status = status; | 4591 | res->sr_status = status; |
4341 | return status; | 4592 | return status; |
4593 | out_overflow: | ||
4594 | print_overflow_msg(__func__, xdr); | ||
4595 | status = -EIO; | ||
4596 | goto out_err; | ||
4342 | #else /* CONFIG_NFS_V4_1 */ | 4597 | #else /* CONFIG_NFS_V4_1 */ |
4343 | return 0; | 4598 | return 0; |
4344 | #endif /* CONFIG_NFS_V4_1 */ | 4599 | #endif /* CONFIG_NFS_V4_1 */ |
@@ -4370,7 +4625,8 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct | |||
4370 | status = decode_open_downgrade(&xdr, res); | 4625 | status = decode_open_downgrade(&xdr, res); |
4371 | if (status != 0) | 4626 | if (status != 0) |
4372 | goto out; | 4627 | goto out; |
4373 | decode_getfattr(&xdr, res->fattr, res->server); | 4628 | decode_getfattr(&xdr, res->fattr, res->server, |
4629 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
4374 | out: | 4630 | out: |
4375 | return status; | 4631 | return status; |
4376 | } | 4632 | } |
@@ -4397,7 +4653,8 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac | |||
4397 | status = decode_access(&xdr, res); | 4653 | status = decode_access(&xdr, res); |
4398 | if (status != 0) | 4654 | if (status != 0) |
4399 | goto out; | 4655 | goto out; |
4400 | decode_getfattr(&xdr, res->fattr, res->server); | 4656 | decode_getfattr(&xdr, res->fattr, res->server, |
4657 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
4401 | out: | 4658 | out: |
4402 | return status; | 4659 | return status; |
4403 | } | 4660 | } |
@@ -4424,7 +4681,8 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo | |||
4424 | goto out; | 4681 | goto out; |
4425 | if ((status = decode_getfh(&xdr, res->fh)) != 0) | 4682 | if ((status = decode_getfh(&xdr, res->fh)) != 0) |
4426 | goto out; | 4683 | goto out; |
4427 | status = decode_getfattr(&xdr, res->fattr, res->server); | 4684 | status = decode_getfattr(&xdr, res->fattr, res->server |
4685 | ,!RPC_IS_ASYNC(rqstp->rq_task)); | ||
4428 | out: | 4686 | out: |
4429 | return status; | 4687 | return status; |
4430 | } | 4688 | } |
@@ -4448,7 +4706,8 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf | |||
4448 | if ((status = decode_putrootfh(&xdr)) != 0) | 4706 | if ((status = decode_putrootfh(&xdr)) != 0) |
4449 | goto out; | 4707 | goto out; |
4450 | if ((status = decode_getfh(&xdr, res->fh)) == 0) | 4708 | if ((status = decode_getfh(&xdr, res->fh)) == 0) |
4451 | status = decode_getfattr(&xdr, res->fattr, res->server); | 4709 | status = decode_getfattr(&xdr, res->fattr, res->server, |
4710 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
4452 | out: | 4711 | out: |
4453 | return status; | 4712 | return status; |
4454 | } | 4713 | } |
@@ -4473,7 +4732,8 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem | |||
4473 | goto out; | 4732 | goto out; |
4474 | if ((status = decode_remove(&xdr, &res->cinfo)) != 0) | 4733 | if ((status = decode_remove(&xdr, &res->cinfo)) != 0) |
4475 | goto out; | 4734 | goto out; |
4476 | decode_getfattr(&xdr, &res->dir_attr, res->server); | 4735 | decode_getfattr(&xdr, &res->dir_attr, res->server, |
4736 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
4477 | out: | 4737 | out: |
4478 | return status; | 4738 | return status; |
4479 | } | 4739 | } |
@@ -4503,11 +4763,13 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re | |||
4503 | if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0) | 4763 | if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0) |
4504 | goto out; | 4764 | goto out; |
4505 | /* Current FH is target directory */ | 4765 | /* Current FH is target directory */ |
4506 | if (decode_getfattr(&xdr, res->new_fattr, res->server) != 0) | 4766 | if (decode_getfattr(&xdr, res->new_fattr, res->server, |
4767 | !RPC_IS_ASYNC(rqstp->rq_task)) != 0) | ||
4507 | goto out; | 4768 | goto out; |
4508 | if ((status = decode_restorefh(&xdr)) != 0) | 4769 | if ((status = decode_restorefh(&xdr)) != 0) |
4509 | goto out; | 4770 | goto out; |
4510 | decode_getfattr(&xdr, res->old_fattr, res->server); | 4771 | decode_getfattr(&xdr, res->old_fattr, res->server, |
4772 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
4511 | out: | 4773 | out: |
4512 | return status; | 4774 | return status; |
4513 | } | 4775 | } |
@@ -4540,11 +4802,13 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link | |||
4540 | * Note order: OP_LINK leaves the directory as the current | 4802 | * Note order: OP_LINK leaves the directory as the current |
4541 | * filehandle. | 4803 | * filehandle. |
4542 | */ | 4804 | */ |
4543 | if (decode_getfattr(&xdr, res->dir_attr, res->server) != 0) | 4805 | if (decode_getfattr(&xdr, res->dir_attr, res->server, |
4806 | !RPC_IS_ASYNC(rqstp->rq_task)) != 0) | ||
4544 | goto out; | 4807 | goto out; |
4545 | if ((status = decode_restorefh(&xdr)) != 0) | 4808 | if ((status = decode_restorefh(&xdr)) != 0) |
4546 | goto out; | 4809 | goto out; |
4547 | decode_getfattr(&xdr, res->fattr, res->server); | 4810 | decode_getfattr(&xdr, res->fattr, res->server, |
4811 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
4548 | out: | 4812 | out: |
4549 | return status; | 4813 | return status; |
4550 | } | 4814 | } |
@@ -4573,11 +4837,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr | |||
4573 | goto out; | 4837 | goto out; |
4574 | if ((status = decode_getfh(&xdr, res->fh)) != 0) | 4838 | if ((status = decode_getfh(&xdr, res->fh)) != 0) |
4575 | goto out; | 4839 | goto out; |
4576 | if (decode_getfattr(&xdr, res->fattr, res->server) != 0) | 4840 | if (decode_getfattr(&xdr, res->fattr, res->server, |
4841 | !RPC_IS_ASYNC(rqstp->rq_task)) != 0) | ||
4577 | goto out; | 4842 | goto out; |
4578 | if ((status = decode_restorefh(&xdr)) != 0) | 4843 | if ((status = decode_restorefh(&xdr)) != 0) |
4579 | goto out; | 4844 | goto out; |
4580 | decode_getfattr(&xdr, res->dir_fattr, res->server); | 4845 | decode_getfattr(&xdr, res->dir_fattr, res->server, |
4846 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
4581 | out: | 4847 | out: |
4582 | return status; | 4848 | return status; |
4583 | } | 4849 | } |
@@ -4609,7 +4875,8 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g | |||
4609 | status = decode_putfh(&xdr); | 4875 | status = decode_putfh(&xdr); |
4610 | if (status) | 4876 | if (status) |
4611 | goto out; | 4877 | goto out; |
4612 | status = decode_getfattr(&xdr, res->fattr, res->server); | 4878 | status = decode_getfattr(&xdr, res->fattr, res->server, |
4879 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
4613 | out: | 4880 | out: |
4614 | return status; | 4881 | return status; |
4615 | } | 4882 | } |
@@ -4716,7 +4983,8 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos | |||
4716 | * an ESTALE error. Shouldn't be a problem, | 4983 | * an ESTALE error. Shouldn't be a problem, |
4717 | * though, since fattr->valid will remain unset. | 4984 | * though, since fattr->valid will remain unset. |
4718 | */ | 4985 | */ |
4719 | decode_getfattr(&xdr, res->fattr, res->server); | 4986 | decode_getfattr(&xdr, res->fattr, res->server, |
4987 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
4720 | out: | 4988 | out: |
4721 | return status; | 4989 | return status; |
4722 | } | 4990 | } |
@@ -4748,11 +5016,13 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr | |||
4748 | goto out; | 5016 | goto out; |
4749 | if (decode_getfh(&xdr, &res->fh) != 0) | 5017 | if (decode_getfh(&xdr, &res->fh) != 0) |
4750 | goto out; | 5018 | goto out; |
4751 | if (decode_getfattr(&xdr, res->f_attr, res->server) != 0) | 5019 | if (decode_getfattr(&xdr, res->f_attr, res->server, |
5020 | !RPC_IS_ASYNC(rqstp->rq_task)) != 0) | ||
4752 | goto out; | 5021 | goto out; |
4753 | if (decode_restorefh(&xdr) != 0) | 5022 | if (decode_restorefh(&xdr) != 0) |
4754 | goto out; | 5023 | goto out; |
4755 | decode_getfattr(&xdr, res->dir_attr, res->server); | 5024 | decode_getfattr(&xdr, res->dir_attr, res->server, |
5025 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
4756 | out: | 5026 | out: |
4757 | return status; | 5027 | return status; |
4758 | } | 5028 | } |
@@ -4800,7 +5070,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nf | |||
4800 | status = decode_open(&xdr, res); | 5070 | status = decode_open(&xdr, res); |
4801 | if (status) | 5071 | if (status) |
4802 | goto out; | 5072 | goto out; |
4803 | decode_getfattr(&xdr, res->f_attr, res->server); | 5073 | decode_getfattr(&xdr, res->f_attr, res->server, |
5074 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
4804 | out: | 5075 | out: |
4805 | return status; | 5076 | return status; |
4806 | } | 5077 | } |
@@ -4827,7 +5098,8 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se | |||
4827 | status = decode_setattr(&xdr); | 5098 | status = decode_setattr(&xdr); |
4828 | if (status) | 5099 | if (status) |
4829 | goto out; | 5100 | goto out; |
4830 | decode_getfattr(&xdr, res->fattr, res->server); | 5101 | decode_getfattr(&xdr, res->fattr, res->server, |
5102 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
4831 | out: | 5103 | out: |
4832 | return status; | 5104 | return status; |
4833 | } | 5105 | } |
@@ -5001,7 +5273,8 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writ | |||
5001 | status = decode_write(&xdr, res); | 5273 | status = decode_write(&xdr, res); |
5002 | if (status) | 5274 | if (status) |
5003 | goto out; | 5275 | goto out; |
5004 | decode_getfattr(&xdr, res->fattr, res->server); | 5276 | decode_getfattr(&xdr, res->fattr, res->server, |
5277 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
5005 | if (!status) | 5278 | if (!status) |
5006 | status = res->count; | 5279 | status = res->count; |
5007 | out: | 5280 | out: |
@@ -5030,7 +5303,8 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_wri | |||
5030 | status = decode_commit(&xdr, res); | 5303 | status = decode_commit(&xdr, res); |
5031 | if (status) | 5304 | if (status) |
5032 | goto out; | 5305 | goto out; |
5033 | decode_getfattr(&xdr, res->fattr, res->server); | 5306 | decode_getfattr(&xdr, res->fattr, res->server, |
5307 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
5034 | out: | 5308 | out: |
5035 | return status; | 5309 | return status; |
5036 | } | 5310 | } |
@@ -5194,7 +5468,8 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf | |||
5194 | if (status != 0) | 5468 | if (status != 0) |
5195 | goto out; | 5469 | goto out; |
5196 | status = decode_delegreturn(&xdr); | 5470 | status = decode_delegreturn(&xdr); |
5197 | decode_getfattr(&xdr, res->fattr, res->server); | 5471 | decode_getfattr(&xdr, res->fattr, res->server, |
5472 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
5198 | out: | 5473 | out: |
5199 | return status; | 5474 | return status; |
5200 | } | 5475 | } |
@@ -5222,7 +5497,8 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, | |||
5222 | goto out; | 5497 | goto out; |
5223 | xdr_enter_page(&xdr, PAGE_SIZE); | 5498 | xdr_enter_page(&xdr, PAGE_SIZE); |
5224 | status = decode_getfattr(&xdr, &res->fs_locations->fattr, | 5499 | status = decode_getfattr(&xdr, &res->fs_locations->fattr, |
5225 | res->fs_locations->server); | 5500 | res->fs_locations->server, |
5501 | !RPC_IS_ASYNC(req->rq_task)); | ||
5226 | out: | 5502 | out: |
5227 | return status; | 5503 | return status; |
5228 | } | 5504 | } |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 73ea5e8d66ce..12c9e66d3f1d 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -60,17 +60,15 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) | |||
60 | return p; | 60 | return p; |
61 | } | 61 | } |
62 | 62 | ||
63 | static void nfs_readdata_free(struct nfs_read_data *p) | 63 | void nfs_readdata_free(struct nfs_read_data *p) |
64 | { | 64 | { |
65 | if (p && (p->pagevec != &p->page_array[0])) | 65 | if (p && (p->pagevec != &p->page_array[0])) |
66 | kfree(p->pagevec); | 66 | kfree(p->pagevec); |
67 | mempool_free(p, nfs_rdata_mempool); | 67 | mempool_free(p, nfs_rdata_mempool); |
68 | } | 68 | } |
69 | 69 | ||
70 | void nfs_readdata_release(void *data) | 70 | static void nfs_readdata_release(struct nfs_read_data *rdata) |
71 | { | 71 | { |
72 | struct nfs_read_data *rdata = data; | ||
73 | |||
74 | put_nfs_open_context(rdata->args.context); | 72 | put_nfs_open_context(rdata->args.context); |
75 | nfs_readdata_free(rdata); | 73 | nfs_readdata_free(rdata); |
76 | } | 74 | } |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 0b4cbdc60abd..de935692d40d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -73,7 +73,7 @@ enum { | |||
73 | Opt_cto, Opt_nocto, | 73 | Opt_cto, Opt_nocto, |
74 | Opt_ac, Opt_noac, | 74 | Opt_ac, Opt_noac, |
75 | Opt_lock, Opt_nolock, | 75 | Opt_lock, Opt_nolock, |
76 | Opt_v2, Opt_v3, | 76 | Opt_v2, Opt_v3, Opt_v4, |
77 | Opt_udp, Opt_tcp, Opt_rdma, | 77 | Opt_udp, Opt_tcp, Opt_rdma, |
78 | Opt_acl, Opt_noacl, | 78 | Opt_acl, Opt_noacl, |
79 | Opt_rdirplus, Opt_nordirplus, | 79 | Opt_rdirplus, Opt_nordirplus, |
@@ -127,6 +127,7 @@ static const match_table_t nfs_mount_option_tokens = { | |||
127 | { Opt_nolock, "nolock" }, | 127 | { Opt_nolock, "nolock" }, |
128 | { Opt_v2, "v2" }, | 128 | { Opt_v2, "v2" }, |
129 | { Opt_v3, "v3" }, | 129 | { Opt_v3, "v3" }, |
130 | { Opt_v4, "v4" }, | ||
130 | { Opt_udp, "udp" }, | 131 | { Opt_udp, "udp" }, |
131 | { Opt_tcp, "tcp" }, | 132 | { Opt_tcp, "tcp" }, |
132 | { Opt_rdma, "rdma" }, | 133 | { Opt_rdma, "rdma" }, |
@@ -158,7 +159,7 @@ static const match_table_t nfs_mount_option_tokens = { | |||
158 | { Opt_mountvers, "mountvers=%s" }, | 159 | { Opt_mountvers, "mountvers=%s" }, |
159 | { Opt_nfsvers, "nfsvers=%s" }, | 160 | { Opt_nfsvers, "nfsvers=%s" }, |
160 | { Opt_nfsvers, "vers=%s" }, | 161 | { Opt_nfsvers, "vers=%s" }, |
161 | { Opt_minorversion, "minorversion=%u" }, | 162 | { Opt_minorversion, "minorversion=%s" }, |
162 | 163 | ||
163 | { Opt_sec, "sec=%s" }, | 164 | { Opt_sec, "sec=%s" }, |
164 | { Opt_proto, "proto=%s" }, | 165 | { Opt_proto, "proto=%s" }, |
@@ -272,6 +273,10 @@ static const struct super_operations nfs_sops = { | |||
272 | }; | 273 | }; |
273 | 274 | ||
274 | #ifdef CONFIG_NFS_V4 | 275 | #ifdef CONFIG_NFS_V4 |
276 | static int nfs4_validate_text_mount_data(void *options, | ||
277 | struct nfs_parsed_mount_data *args, const char *dev_name); | ||
278 | static int nfs4_try_mount(int flags, const char *dev_name, | ||
279 | struct nfs_parsed_mount_data *data, struct vfsmount *mnt); | ||
275 | static int nfs4_get_sb(struct file_system_type *fs_type, | 280 | static int nfs4_get_sb(struct file_system_type *fs_type, |
276 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); | 281 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); |
277 | static int nfs4_remote_get_sb(struct file_system_type *fs_type, | 282 | static int nfs4_remote_get_sb(struct file_system_type *fs_type, |
@@ -742,127 +747,23 @@ static int nfs_verify_server_address(struct sockaddr *addr) | |||
742 | } | 747 | } |
743 | } | 748 | } |
744 | 749 | ||
750 | dfprintk(MOUNT, "NFS: Invalid IP address specified\n"); | ||
745 | return 0; | 751 | return 0; |
746 | } | 752 | } |
747 | 753 | ||
748 | static void nfs_parse_ipv4_address(char *string, size_t str_len, | ||
749 | struct sockaddr *sap, size_t *addr_len) | ||
750 | { | ||
751 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
752 | u8 *addr = (u8 *)&sin->sin_addr.s_addr; | ||
753 | |||
754 | if (str_len <= INET_ADDRSTRLEN) { | ||
755 | dfprintk(MOUNT, "NFS: parsing IPv4 address %*s\n", | ||
756 | (int)str_len, string); | ||
757 | |||
758 | sin->sin_family = AF_INET; | ||
759 | *addr_len = sizeof(*sin); | ||
760 | if (in4_pton(string, str_len, addr, '\0', NULL)) | ||
761 | return; | ||
762 | } | ||
763 | |||
764 | sap->sa_family = AF_UNSPEC; | ||
765 | *addr_len = 0; | ||
766 | } | ||
767 | |||
768 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
769 | static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, | ||
770 | const char *delim, | ||
771 | struct sockaddr_in6 *sin6) | ||
772 | { | ||
773 | char *p; | ||
774 | size_t len; | ||
775 | |||
776 | if ((string + str_len) == delim) | ||
777 | return 1; | ||
778 | |||
779 | if (*delim != IPV6_SCOPE_DELIMITER) | ||
780 | return 0; | ||
781 | |||
782 | if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) | ||
783 | return 0; | ||
784 | |||
785 | len = (string + str_len) - delim - 1; | ||
786 | p = kstrndup(delim + 1, len, GFP_KERNEL); | ||
787 | if (p) { | ||
788 | unsigned long scope_id = 0; | ||
789 | struct net_device *dev; | ||
790 | |||
791 | dev = dev_get_by_name(&init_net, p); | ||
792 | if (dev != NULL) { | ||
793 | scope_id = dev->ifindex; | ||
794 | dev_put(dev); | ||
795 | } else { | ||
796 | if (strict_strtoul(p, 10, &scope_id) == 0) { | ||
797 | kfree(p); | ||
798 | return 0; | ||
799 | } | ||
800 | } | ||
801 | |||
802 | kfree(p); | ||
803 | |||
804 | sin6->sin6_scope_id = scope_id; | ||
805 | dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); | ||
806 | return 1; | ||
807 | } | ||
808 | |||
809 | return 0; | ||
810 | } | ||
811 | |||
812 | static void nfs_parse_ipv6_address(char *string, size_t str_len, | ||
813 | struct sockaddr *sap, size_t *addr_len) | ||
814 | { | ||
815 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
816 | u8 *addr = (u8 *)&sin6->sin6_addr.in6_u; | ||
817 | const char *delim; | ||
818 | |||
819 | if (str_len <= INET6_ADDRSTRLEN) { | ||
820 | dfprintk(MOUNT, "NFS: parsing IPv6 address %*s\n", | ||
821 | (int)str_len, string); | ||
822 | |||
823 | sin6->sin6_family = AF_INET6; | ||
824 | *addr_len = sizeof(*sin6); | ||
825 | if (in6_pton(string, str_len, addr, | ||
826 | IPV6_SCOPE_DELIMITER, &delim) != 0) { | ||
827 | if (nfs_parse_ipv6_scope_id(string, str_len, | ||
828 | delim, sin6) != 0) | ||
829 | return; | ||
830 | } | ||
831 | } | ||
832 | |||
833 | sap->sa_family = AF_UNSPEC; | ||
834 | *addr_len = 0; | ||
835 | } | ||
836 | #else | ||
837 | static void nfs_parse_ipv6_address(char *string, size_t str_len, | ||
838 | struct sockaddr *sap, size_t *addr_len) | ||
839 | { | ||
840 | sap->sa_family = AF_UNSPEC; | ||
841 | *addr_len = 0; | ||
842 | } | ||
843 | #endif | ||
844 | |||
845 | /* | 754 | /* |
846 | * Construct a sockaddr based on the contents of a string that contains | 755 | * Select between a default port value and a user-specified port value. |
847 | * an IP address in presentation format. | 756 | * If a zero value is set, then autobind will be used. |
848 | * | ||
849 | * If there is a problem constructing the new sockaddr, set the address | ||
850 | * family to AF_UNSPEC. | ||
851 | */ | 757 | */ |
852 | void nfs_parse_ip_address(char *string, size_t str_len, | 758 | static void nfs_set_default_port(struct sockaddr *sap, const int parsed_port, |
853 | struct sockaddr *sap, size_t *addr_len) | 759 | const unsigned short default_port) |
854 | { | 760 | { |
855 | unsigned int i, colons; | 761 | unsigned short port = default_port; |
856 | 762 | ||
857 | colons = 0; | 763 | if (parsed_port != NFS_UNSPEC_PORT) |
858 | for (i = 0; i < str_len; i++) | 764 | port = parsed_port; |
859 | if (string[i] == ':') | ||
860 | colons++; | ||
861 | 765 | ||
862 | if (colons >= 2) | 766 | rpc_set_port(sap, port); |
863 | nfs_parse_ipv6_address(string, str_len, sap, addr_len); | ||
864 | else | ||
865 | nfs_parse_ipv4_address(string, str_len, sap, addr_len); | ||
866 | } | 767 | } |
867 | 768 | ||
868 | /* | 769 | /* |
@@ -904,8 +805,6 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) | |||
904 | 805 | ||
905 | /* | 806 | /* |
906 | * Parse the value of the 'sec=' option. | 807 | * Parse the value of the 'sec=' option. |
907 | * | ||
908 | * The flavor_len setting is for v4 mounts. | ||
909 | */ | 808 | */ |
910 | static int nfs_parse_security_flavors(char *value, | 809 | static int nfs_parse_security_flavors(char *value, |
911 | struct nfs_parsed_mount_data *mnt) | 810 | struct nfs_parsed_mount_data *mnt) |
@@ -916,53 +815,43 @@ static int nfs_parse_security_flavors(char *value, | |||
916 | 815 | ||
917 | switch (match_token(value, nfs_secflavor_tokens, args)) { | 816 | switch (match_token(value, nfs_secflavor_tokens, args)) { |
918 | case Opt_sec_none: | 817 | case Opt_sec_none: |
919 | mnt->auth_flavor_len = 0; | ||
920 | mnt->auth_flavors[0] = RPC_AUTH_NULL; | 818 | mnt->auth_flavors[0] = RPC_AUTH_NULL; |
921 | break; | 819 | break; |
922 | case Opt_sec_sys: | 820 | case Opt_sec_sys: |
923 | mnt->auth_flavor_len = 0; | ||
924 | mnt->auth_flavors[0] = RPC_AUTH_UNIX; | 821 | mnt->auth_flavors[0] = RPC_AUTH_UNIX; |
925 | break; | 822 | break; |
926 | case Opt_sec_krb5: | 823 | case Opt_sec_krb5: |
927 | mnt->auth_flavor_len = 1; | ||
928 | mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; | 824 | mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; |
929 | break; | 825 | break; |
930 | case Opt_sec_krb5i: | 826 | case Opt_sec_krb5i: |
931 | mnt->auth_flavor_len = 1; | ||
932 | mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; | 827 | mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; |
933 | break; | 828 | break; |
934 | case Opt_sec_krb5p: | 829 | case Opt_sec_krb5p: |
935 | mnt->auth_flavor_len = 1; | ||
936 | mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; | 830 | mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; |
937 | break; | 831 | break; |
938 | case Opt_sec_lkey: | 832 | case Opt_sec_lkey: |
939 | mnt->auth_flavor_len = 1; | ||
940 | mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; | 833 | mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; |
941 | break; | 834 | break; |
942 | case Opt_sec_lkeyi: | 835 | case Opt_sec_lkeyi: |
943 | mnt->auth_flavor_len = 1; | ||
944 | mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; | 836 | mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; |
945 | break; | 837 | break; |
946 | case Opt_sec_lkeyp: | 838 | case Opt_sec_lkeyp: |
947 | mnt->auth_flavor_len = 1; | ||
948 | mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; | 839 | mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; |
949 | break; | 840 | break; |
950 | case Opt_sec_spkm: | 841 | case Opt_sec_spkm: |
951 | mnt->auth_flavor_len = 1; | ||
952 | mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; | 842 | mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; |
953 | break; | 843 | break; |
954 | case Opt_sec_spkmi: | 844 | case Opt_sec_spkmi: |
955 | mnt->auth_flavor_len = 1; | ||
956 | mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; | 845 | mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; |
957 | break; | 846 | break; |
958 | case Opt_sec_spkmp: | 847 | case Opt_sec_spkmp: |
959 | mnt->auth_flavor_len = 1; | ||
960 | mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; | 848 | mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; |
961 | break; | 849 | break; |
962 | default: | 850 | default: |
963 | return 0; | 851 | return 0; |
964 | } | 852 | } |
965 | 853 | ||
854 | mnt->auth_flavor_len = 1; | ||
966 | return 1; | 855 | return 1; |
967 | } | 856 | } |
968 | 857 | ||
@@ -1001,7 +890,6 @@ static int nfs_parse_mount_options(char *raw, | |||
1001 | while ((p = strsep(&raw, ",")) != NULL) { | 890 | while ((p = strsep(&raw, ",")) != NULL) { |
1002 | substring_t args[MAX_OPT_ARGS]; | 891 | substring_t args[MAX_OPT_ARGS]; |
1003 | unsigned long option; | 892 | unsigned long option; |
1004 | int int_option; | ||
1005 | int token; | 893 | int token; |
1006 | 894 | ||
1007 | if (!*p) | 895 | if (!*p) |
@@ -1047,10 +935,18 @@ static int nfs_parse_mount_options(char *raw, | |||
1047 | break; | 935 | break; |
1048 | case Opt_v2: | 936 | case Opt_v2: |
1049 | mnt->flags &= ~NFS_MOUNT_VER3; | 937 | mnt->flags &= ~NFS_MOUNT_VER3; |
938 | mnt->version = 2; | ||
1050 | break; | 939 | break; |
1051 | case Opt_v3: | 940 | case Opt_v3: |
1052 | mnt->flags |= NFS_MOUNT_VER3; | 941 | mnt->flags |= NFS_MOUNT_VER3; |
942 | mnt->version = 3; | ||
1053 | break; | 943 | break; |
944 | #ifdef CONFIG_NFS_V4 | ||
945 | case Opt_v4: | ||
946 | mnt->flags &= ~NFS_MOUNT_VER3; | ||
947 | mnt->version = 4; | ||
948 | break; | ||
949 | #endif | ||
1054 | case Opt_udp: | 950 | case Opt_udp: |
1055 | mnt->flags &= ~NFS_MOUNT_TCP; | 951 | mnt->flags &= ~NFS_MOUNT_TCP; |
1056 | mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; | 952 | mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; |
@@ -1264,20 +1160,33 @@ static int nfs_parse_mount_options(char *raw, | |||
1264 | switch (option) { | 1160 | switch (option) { |
1265 | case NFS2_VERSION: | 1161 | case NFS2_VERSION: |
1266 | mnt->flags &= ~NFS_MOUNT_VER3; | 1162 | mnt->flags &= ~NFS_MOUNT_VER3; |
1163 | mnt->version = 2; | ||
1267 | break; | 1164 | break; |
1268 | case NFS3_VERSION: | 1165 | case NFS3_VERSION: |
1269 | mnt->flags |= NFS_MOUNT_VER3; | 1166 | mnt->flags |= NFS_MOUNT_VER3; |
1167 | mnt->version = 3; | ||
1270 | break; | 1168 | break; |
1169 | #ifdef CONFIG_NFS_V4 | ||
1170 | case NFS4_VERSION: | ||
1171 | mnt->flags &= ~NFS_MOUNT_VER3; | ||
1172 | mnt->version = 4; | ||
1173 | break; | ||
1174 | #endif | ||
1271 | default: | 1175 | default: |
1272 | goto out_invalid_value; | 1176 | goto out_invalid_value; |
1273 | } | 1177 | } |
1274 | break; | 1178 | break; |
1275 | case Opt_minorversion: | 1179 | case Opt_minorversion: |
1276 | if (match_int(args, &int_option)) | 1180 | string = match_strdup(args); |
1277 | return 0; | 1181 | if (string == NULL) |
1278 | if (int_option < 0 || int_option > NFS4_MAX_MINOR_VERSION) | 1182 | goto out_nomem; |
1279 | return 0; | 1183 | rc = strict_strtoul(string, 10, &option); |
1280 | mnt->minorversion = int_option; | 1184 | kfree(string); |
1185 | if (rc != 0) | ||
1186 | goto out_invalid_value; | ||
1187 | if (option > NFS4_MAX_MINOR_VERSION) | ||
1188 | goto out_invalid_value; | ||
1189 | mnt->minorversion = option; | ||
1281 | break; | 1190 | break; |
1282 | 1191 | ||
1283 | /* | 1192 | /* |
@@ -1352,11 +1261,14 @@ static int nfs_parse_mount_options(char *raw, | |||
1352 | string = match_strdup(args); | 1261 | string = match_strdup(args); |
1353 | if (string == NULL) | 1262 | if (string == NULL) |
1354 | goto out_nomem; | 1263 | goto out_nomem; |
1355 | nfs_parse_ip_address(string, strlen(string), | 1264 | mnt->nfs_server.addrlen = |
1356 | (struct sockaddr *) | 1265 | rpc_pton(string, strlen(string), |
1357 | &mnt->nfs_server.address, | 1266 | (struct sockaddr *) |
1358 | &mnt->nfs_server.addrlen); | 1267 | &mnt->nfs_server.address, |
1268 | sizeof(mnt->nfs_server.address)); | ||
1359 | kfree(string); | 1269 | kfree(string); |
1270 | if (mnt->nfs_server.addrlen == 0) | ||
1271 | goto out_invalid_address; | ||
1360 | break; | 1272 | break; |
1361 | case Opt_clientaddr: | 1273 | case Opt_clientaddr: |
1362 | string = match_strdup(args); | 1274 | string = match_strdup(args); |
@@ -1376,11 +1288,14 @@ static int nfs_parse_mount_options(char *raw, | |||
1376 | string = match_strdup(args); | 1288 | string = match_strdup(args); |
1377 | if (string == NULL) | 1289 | if (string == NULL) |
1378 | goto out_nomem; | 1290 | goto out_nomem; |
1379 | nfs_parse_ip_address(string, strlen(string), | 1291 | mnt->mount_server.addrlen = |
1380 | (struct sockaddr *) | 1292 | rpc_pton(string, strlen(string), |
1381 | &mnt->mount_server.address, | 1293 | (struct sockaddr *) |
1382 | &mnt->mount_server.addrlen); | 1294 | &mnt->mount_server.address, |
1295 | sizeof(mnt->mount_server.address)); | ||
1383 | kfree(string); | 1296 | kfree(string); |
1297 | if (mnt->mount_server.addrlen == 0) | ||
1298 | goto out_invalid_address; | ||
1384 | break; | 1299 | break; |
1385 | case Opt_lookupcache: | 1300 | case Opt_lookupcache: |
1386 | string = match_strdup(args); | 1301 | string = match_strdup(args); |
@@ -1432,8 +1347,11 @@ static int nfs_parse_mount_options(char *raw, | |||
1432 | 1347 | ||
1433 | return 1; | 1348 | return 1; |
1434 | 1349 | ||
1350 | out_invalid_address: | ||
1351 | printk(KERN_INFO "NFS: bad IP address specified: %s\n", p); | ||
1352 | return 0; | ||
1435 | out_invalid_value: | 1353 | out_invalid_value: |
1436 | printk(KERN_INFO "NFS: bad mount option value specified: %s \n", p); | 1354 | printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); |
1437 | return 0; | 1355 | return 0; |
1438 | out_nomem: | 1356 | out_nomem: |
1439 | printk(KERN_INFO "NFS: not enough memory to parse option\n"); | 1357 | printk(KERN_INFO "NFS: not enough memory to parse option\n"); |
@@ -1445,13 +1363,60 @@ out_security_failure: | |||
1445 | } | 1363 | } |
1446 | 1364 | ||
1447 | /* | 1365 | /* |
1366 | * Match the requested auth flavors with the list returned by | ||
1367 | * the server. Returns zero and sets the mount's authentication | ||
1368 | * flavor on success; returns -EACCES if server does not support | ||
1369 | * the requested flavor. | ||
1370 | */ | ||
1371 | static int nfs_walk_authlist(struct nfs_parsed_mount_data *args, | ||
1372 | struct nfs_mount_request *request) | ||
1373 | { | ||
1374 | unsigned int i, j, server_authlist_len = *(request->auth_flav_len); | ||
1375 | |||
1376 | /* | ||
1377 | * Certain releases of Linux's mountd return an empty | ||
1378 | * flavor list. To prevent behavioral regression with | ||
1379 | * these servers (ie. rejecting mounts that used to | ||
1380 | * succeed), revert to pre-2.6.32 behavior (no checking) | ||
1381 | * if the returned flavor list is empty. | ||
1382 | */ | ||
1383 | if (server_authlist_len == 0) | ||
1384 | return 0; | ||
1385 | |||
1386 | /* | ||
1387 | * We avoid sophisticated negotiating here, as there are | ||
1388 | * plenty of cases where we can get it wrong, providing | ||
1389 | * either too little or too much security. | ||
1390 | * | ||
1391 | * RFC 2623, section 2.7 suggests we SHOULD prefer the | ||
1392 | * flavor listed first. However, some servers list | ||
1393 | * AUTH_NULL first. Our caller plants AUTH_SYS, the | ||
1394 | * preferred default, in args->auth_flavors[0] if user | ||
1395 | * didn't specify sec= mount option. | ||
1396 | */ | ||
1397 | for (i = 0; i < args->auth_flavor_len; i++) | ||
1398 | for (j = 0; j < server_authlist_len; j++) | ||
1399 | if (args->auth_flavors[i] == request->auth_flavs[j]) { | ||
1400 | dfprintk(MOUNT, "NFS: using auth flavor %d\n", | ||
1401 | request->auth_flavs[j]); | ||
1402 | args->auth_flavors[0] = request->auth_flavs[j]; | ||
1403 | return 0; | ||
1404 | } | ||
1405 | |||
1406 | dfprintk(MOUNT, "NFS: server does not support requested auth flavor\n"); | ||
1407 | nfs_umount(request); | ||
1408 | return -EACCES; | ||
1409 | } | ||
1410 | |||
1411 | /* | ||
1448 | * Use the remote server's MOUNT service to request the NFS file handle | 1412 | * Use the remote server's MOUNT service to request the NFS file handle |
1449 | * corresponding to the provided path. | 1413 | * corresponding to the provided path. |
1450 | */ | 1414 | */ |
1451 | static int nfs_try_mount(struct nfs_parsed_mount_data *args, | 1415 | static int nfs_try_mount(struct nfs_parsed_mount_data *args, |
1452 | struct nfs_fh *root_fh) | 1416 | struct nfs_fh *root_fh) |
1453 | { | 1417 | { |
1454 | unsigned int auth_flavor_len = 0; | 1418 | rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS]; |
1419 | unsigned int server_authlist_len = ARRAY_SIZE(server_authlist); | ||
1455 | struct nfs_mount_request request = { | 1420 | struct nfs_mount_request request = { |
1456 | .sap = (struct sockaddr *) | 1421 | .sap = (struct sockaddr *) |
1457 | &args->mount_server.address, | 1422 | &args->mount_server.address, |
@@ -1459,7 +1424,8 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, | |||
1459 | .protocol = args->mount_server.protocol, | 1424 | .protocol = args->mount_server.protocol, |
1460 | .fh = root_fh, | 1425 | .fh = root_fh, |
1461 | .noresvport = args->flags & NFS_MOUNT_NORESVPORT, | 1426 | .noresvport = args->flags & NFS_MOUNT_NORESVPORT, |
1462 | .auth_flav_len = &auth_flavor_len, | 1427 | .auth_flav_len = &server_authlist_len, |
1428 | .auth_flavs = server_authlist, | ||
1463 | }; | 1429 | }; |
1464 | int status; | 1430 | int status; |
1465 | 1431 | ||
@@ -1485,23 +1451,25 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, | |||
1485 | args->mount_server.addrlen = args->nfs_server.addrlen; | 1451 | args->mount_server.addrlen = args->nfs_server.addrlen; |
1486 | } | 1452 | } |
1487 | request.salen = args->mount_server.addrlen; | 1453 | request.salen = args->mount_server.addrlen; |
1488 | 1454 | nfs_set_default_port(request.sap, args->mount_server.port, 0); | |
1489 | /* | ||
1490 | * autobind will be used if mount_server.port == 0 | ||
1491 | */ | ||
1492 | nfs_set_port(request.sap, args->mount_server.port); | ||
1493 | 1455 | ||
1494 | /* | 1456 | /* |
1495 | * Now ask the mount server to map our export path | 1457 | * Now ask the mount server to map our export path |
1496 | * to a file handle. | 1458 | * to a file handle. |
1497 | */ | 1459 | */ |
1498 | status = nfs_mount(&request); | 1460 | status = nfs_mount(&request); |
1499 | if (status == 0) | 1461 | if (status != 0) { |
1500 | return 0; | 1462 | dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", |
1463 | request.hostname, status); | ||
1464 | return status; | ||
1465 | } | ||
1501 | 1466 | ||
1502 | dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", | 1467 | /* |
1503 | request.hostname, status); | 1468 | * MNTv1 (NFSv2) does not support auth flavor negotiation. |
1504 | return status; | 1469 | */ |
1470 | if (args->mount_server.version != NFS_MNT3_VERSION) | ||
1471 | return 0; | ||
1472 | return nfs_walk_authlist(args, &request); | ||
1505 | } | 1473 | } |
1506 | 1474 | ||
1507 | static int nfs_parse_simple_hostname(const char *dev_name, | 1475 | static int nfs_parse_simple_hostname(const char *dev_name, |
@@ -1661,6 +1629,7 @@ static int nfs_validate_mount_data(void *options, | |||
1661 | const char *dev_name) | 1629 | const char *dev_name) |
1662 | { | 1630 | { |
1663 | struct nfs_mount_data *data = (struct nfs_mount_data *)options; | 1631 | struct nfs_mount_data *data = (struct nfs_mount_data *)options; |
1632 | struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; | ||
1664 | 1633 | ||
1665 | if (data == NULL) | 1634 | if (data == NULL) |
1666 | goto out_no_data; | 1635 | goto out_no_data; |
@@ -1672,10 +1641,12 @@ static int nfs_validate_mount_data(void *options, | |||
1672 | args->acregmax = NFS_DEF_ACREGMAX; | 1641 | args->acregmax = NFS_DEF_ACREGMAX; |
1673 | args->acdirmin = NFS_DEF_ACDIRMIN; | 1642 | args->acdirmin = NFS_DEF_ACDIRMIN; |
1674 | args->acdirmax = NFS_DEF_ACDIRMAX; | 1643 | args->acdirmax = NFS_DEF_ACDIRMAX; |
1675 | args->mount_server.port = 0; /* autobind unless user sets port */ | 1644 | args->mount_server.port = NFS_UNSPEC_PORT; |
1676 | args->nfs_server.port = 0; /* autobind unless user sets port */ | 1645 | args->nfs_server.port = NFS_UNSPEC_PORT; |
1677 | args->nfs_server.protocol = XPRT_TRANSPORT_TCP; | 1646 | args->nfs_server.protocol = XPRT_TRANSPORT_TCP; |
1678 | args->auth_flavors[0] = RPC_AUTH_UNIX; | 1647 | args->auth_flavors[0] = RPC_AUTH_UNIX; |
1648 | args->auth_flavor_len = 1; | ||
1649 | args->minorversion = 0; | ||
1679 | 1650 | ||
1680 | switch (data->version) { | 1651 | switch (data->version) { |
1681 | case 1: | 1652 | case 1: |
@@ -1697,8 +1668,11 @@ static int nfs_validate_mount_data(void *options, | |||
1697 | if (data->root.size > NFS3_FHSIZE || data->root.size == 0) | 1668 | if (data->root.size > NFS3_FHSIZE || data->root.size == 0) |
1698 | goto out_invalid_fh; | 1669 | goto out_invalid_fh; |
1699 | mntfh->size = data->root.size; | 1670 | mntfh->size = data->root.size; |
1700 | } else | 1671 | args->version = 3; |
1672 | } else { | ||
1701 | mntfh->size = NFS2_FHSIZE; | 1673 | mntfh->size = NFS2_FHSIZE; |
1674 | args->version = 2; | ||
1675 | } | ||
1702 | 1676 | ||
1703 | 1677 | ||
1704 | memcpy(mntfh->data, data->root.data, mntfh->size); | 1678 | memcpy(mntfh->data, data->root.data, mntfh->size); |
@@ -1720,11 +1694,9 @@ static int nfs_validate_mount_data(void *options, | |||
1720 | args->acdirmin = data->acdirmin; | 1694 | args->acdirmin = data->acdirmin; |
1721 | args->acdirmax = data->acdirmax; | 1695 | args->acdirmax = data->acdirmax; |
1722 | 1696 | ||
1723 | memcpy(&args->nfs_server.address, &data->addr, | 1697 | memcpy(sap, &data->addr, sizeof(data->addr)); |
1724 | sizeof(data->addr)); | ||
1725 | args->nfs_server.addrlen = sizeof(data->addr); | 1698 | args->nfs_server.addrlen = sizeof(data->addr); |
1726 | if (!nfs_verify_server_address((struct sockaddr *) | 1699 | if (!nfs_verify_server_address(sap)) |
1727 | &args->nfs_server.address)) | ||
1728 | goto out_no_address; | 1700 | goto out_no_address; |
1729 | 1701 | ||
1730 | if (!(data->flags & NFS_MOUNT_TCP)) | 1702 | if (!(data->flags & NFS_MOUNT_TCP)) |
@@ -1772,12 +1744,18 @@ static int nfs_validate_mount_data(void *options, | |||
1772 | if (nfs_parse_mount_options((char *)options, args) == 0) | 1744 | if (nfs_parse_mount_options((char *)options, args) == 0) |
1773 | return -EINVAL; | 1745 | return -EINVAL; |
1774 | 1746 | ||
1775 | if (!nfs_verify_server_address((struct sockaddr *) | 1747 | if (!nfs_verify_server_address(sap)) |
1776 | &args->nfs_server.address)) | ||
1777 | goto out_no_address; | 1748 | goto out_no_address; |
1778 | 1749 | ||
1779 | nfs_set_port((struct sockaddr *)&args->nfs_server.address, | 1750 | if (args->version == 4) |
1780 | args->nfs_server.port); | 1751 | #ifdef CONFIG_NFS_V4 |
1752 | return nfs4_validate_text_mount_data(options, | ||
1753 | args, dev_name); | ||
1754 | #else | ||
1755 | goto out_v4_not_compiled; | ||
1756 | #endif | ||
1757 | |||
1758 | nfs_set_default_port(sap, args->nfs_server.port, 0); | ||
1781 | 1759 | ||
1782 | nfs_set_mount_transport_protocol(args); | 1760 | nfs_set_mount_transport_protocol(args); |
1783 | 1761 | ||
@@ -1825,6 +1803,12 @@ out_v3_not_compiled: | |||
1825 | return -EPROTONOSUPPORT; | 1803 | return -EPROTONOSUPPORT; |
1826 | #endif /* !CONFIG_NFS_V3 */ | 1804 | #endif /* !CONFIG_NFS_V3 */ |
1827 | 1805 | ||
1806 | #ifndef CONFIG_NFS_V4 | ||
1807 | out_v4_not_compiled: | ||
1808 | dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); | ||
1809 | return -EPROTONOSUPPORT; | ||
1810 | #endif /* !CONFIG_NFS_V4 */ | ||
1811 | |||
1828 | out_nomem: | 1812 | out_nomem: |
1829 | dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); | 1813 | dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); |
1830 | return -ENOMEM; | 1814 | return -ENOMEM; |
@@ -1934,6 +1918,8 @@ static inline void nfs_initialise_sb(struct super_block *sb) | |||
1934 | if (server->flags & NFS_MOUNT_NOAC) | 1918 | if (server->flags & NFS_MOUNT_NOAC) |
1935 | sb->s_flags |= MS_SYNCHRONOUS; | 1919 | sb->s_flags |= MS_SYNCHRONOUS; |
1936 | 1920 | ||
1921 | sb->s_bdi = &server->backing_dev_info; | ||
1922 | |||
1937 | nfs_super_set_maxbytes(sb, server->maxfilesize); | 1923 | nfs_super_set_maxbytes(sb, server->maxfilesize); |
1938 | } | 1924 | } |
1939 | 1925 | ||
@@ -2120,6 +2106,14 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
2120 | if (error < 0) | 2106 | if (error < 0) |
2121 | goto out; | 2107 | goto out; |
2122 | 2108 | ||
2109 | #ifdef CONFIG_NFS_V4 | ||
2110 | if (data->version == 4) { | ||
2111 | error = nfs4_try_mount(flags, dev_name, data, mnt); | ||
2112 | kfree(data->client_address); | ||
2113 | goto out; | ||
2114 | } | ||
2115 | #endif /* CONFIG_NFS_V4 */ | ||
2116 | |||
2123 | /* Get a volume representation */ | 2117 | /* Get a volume representation */ |
2124 | server = nfs_create_server(data, mntfh); | 2118 | server = nfs_create_server(data, mntfh); |
2125 | if (IS_ERR(server)) { | 2119 | if (IS_ERR(server)) { |
@@ -2317,6 +2311,43 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) | |||
2317 | args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); | 2311 | args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); |
2318 | } | 2312 | } |
2319 | 2313 | ||
2314 | static int nfs4_validate_text_mount_data(void *options, | ||
2315 | struct nfs_parsed_mount_data *args, | ||
2316 | const char *dev_name) | ||
2317 | { | ||
2318 | struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; | ||
2319 | |||
2320 | nfs_set_default_port(sap, args->nfs_server.port, NFS_PORT); | ||
2321 | |||
2322 | nfs_validate_transport_protocol(args); | ||
2323 | |||
2324 | nfs4_validate_mount_flags(args); | ||
2325 | |||
2326 | if (args->version != 4) { | ||
2327 | dfprintk(MOUNT, | ||
2328 | "NFS4: Illegal mount version\n"); | ||
2329 | return -EINVAL; | ||
2330 | } | ||
2331 | |||
2332 | if (args->auth_flavor_len > 1) { | ||
2333 | dfprintk(MOUNT, | ||
2334 | "NFS4: Too many RPC auth flavours specified\n"); | ||
2335 | return -EINVAL; | ||
2336 | } | ||
2337 | |||
2338 | if (args->client_address == NULL) { | ||
2339 | dfprintk(MOUNT, | ||
2340 | "NFS4: mount program didn't pass callback address\n"); | ||
2341 | return -EINVAL; | ||
2342 | } | ||
2343 | |||
2344 | return nfs_parse_devname(dev_name, | ||
2345 | &args->nfs_server.hostname, | ||
2346 | NFS4_MAXNAMLEN, | ||
2347 | &args->nfs_server.export_path, | ||
2348 | NFS4_MAXPATHLEN); | ||
2349 | } | ||
2350 | |||
2320 | /* | 2351 | /* |
2321 | * Validate NFSv4 mount options | 2352 | * Validate NFSv4 mount options |
2322 | */ | 2353 | */ |
@@ -2324,7 +2355,7 @@ static int nfs4_validate_mount_data(void *options, | |||
2324 | struct nfs_parsed_mount_data *args, | 2355 | struct nfs_parsed_mount_data *args, |
2325 | const char *dev_name) | 2356 | const char *dev_name) |
2326 | { | 2357 | { |
2327 | struct sockaddr_in *ap; | 2358 | struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; |
2328 | struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; | 2359 | struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; |
2329 | char *c; | 2360 | char *c; |
2330 | 2361 | ||
@@ -2337,23 +2368,22 @@ static int nfs4_validate_mount_data(void *options, | |||
2337 | args->acregmax = NFS_DEF_ACREGMAX; | 2368 | args->acregmax = NFS_DEF_ACREGMAX; |
2338 | args->acdirmin = NFS_DEF_ACDIRMIN; | 2369 | args->acdirmin = NFS_DEF_ACDIRMIN; |
2339 | args->acdirmax = NFS_DEF_ACDIRMAX; | 2370 | args->acdirmax = NFS_DEF_ACDIRMAX; |
2340 | args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ | 2371 | args->nfs_server.port = NFS_UNSPEC_PORT; |
2341 | args->auth_flavors[0] = RPC_AUTH_UNIX; | 2372 | args->auth_flavors[0] = RPC_AUTH_UNIX; |
2342 | args->auth_flavor_len = 0; | 2373 | args->auth_flavor_len = 1; |
2374 | args->version = 4; | ||
2343 | args->minorversion = 0; | 2375 | args->minorversion = 0; |
2344 | 2376 | ||
2345 | switch (data->version) { | 2377 | switch (data->version) { |
2346 | case 1: | 2378 | case 1: |
2347 | ap = (struct sockaddr_in *)&args->nfs_server.address; | ||
2348 | if (data->host_addrlen > sizeof(args->nfs_server.address)) | 2379 | if (data->host_addrlen > sizeof(args->nfs_server.address)) |
2349 | goto out_no_address; | 2380 | goto out_no_address; |
2350 | if (data->host_addrlen == 0) | 2381 | if (data->host_addrlen == 0) |
2351 | goto out_no_address; | 2382 | goto out_no_address; |
2352 | args->nfs_server.addrlen = data->host_addrlen; | 2383 | args->nfs_server.addrlen = data->host_addrlen; |
2353 | if (copy_from_user(ap, data->host_addr, data->host_addrlen)) | 2384 | if (copy_from_user(sap, data->host_addr, data->host_addrlen)) |
2354 | return -EFAULT; | 2385 | return -EFAULT; |
2355 | if (!nfs_verify_server_address((struct sockaddr *) | 2386 | if (!nfs_verify_server_address(sap)) |
2356 | &args->nfs_server.address)) | ||
2357 | goto out_no_address; | 2387 | goto out_no_address; |
2358 | 2388 | ||
2359 | if (data->auth_flavourlen) { | 2389 | if (data->auth_flavourlen) { |
@@ -2399,39 +2429,14 @@ static int nfs4_validate_mount_data(void *options, | |||
2399 | nfs_validate_transport_protocol(args); | 2429 | nfs_validate_transport_protocol(args); |
2400 | 2430 | ||
2401 | break; | 2431 | break; |
2402 | default: { | 2432 | default: |
2403 | int status; | ||
2404 | |||
2405 | if (nfs_parse_mount_options((char *)options, args) == 0) | 2433 | if (nfs_parse_mount_options((char *)options, args) == 0) |
2406 | return -EINVAL; | 2434 | return -EINVAL; |
2407 | 2435 | ||
2408 | if (!nfs_verify_server_address((struct sockaddr *) | 2436 | if (!nfs_verify_server_address(sap)) |
2409 | &args->nfs_server.address)) | ||
2410 | return -EINVAL; | 2437 | return -EINVAL; |
2411 | 2438 | ||
2412 | nfs_set_port((struct sockaddr *)&args->nfs_server.address, | 2439 | return nfs4_validate_text_mount_data(options, args, dev_name); |
2413 | args->nfs_server.port); | ||
2414 | |||
2415 | nfs_validate_transport_protocol(args); | ||
2416 | |||
2417 | nfs4_validate_mount_flags(args); | ||
2418 | |||
2419 | if (args->auth_flavor_len > 1) | ||
2420 | goto out_inval_auth; | ||
2421 | |||
2422 | if (args->client_address == NULL) | ||
2423 | goto out_no_client_address; | ||
2424 | |||
2425 | status = nfs_parse_devname(dev_name, | ||
2426 | &args->nfs_server.hostname, | ||
2427 | NFS4_MAXNAMLEN, | ||
2428 | &args->nfs_server.export_path, | ||
2429 | NFS4_MAXPATHLEN); | ||
2430 | if (status < 0) | ||
2431 | return status; | ||
2432 | |||
2433 | break; | ||
2434 | } | ||
2435 | } | 2440 | } |
2436 | 2441 | ||
2437 | return 0; | 2442 | return 0; |
@@ -2448,10 +2453,6 @@ out_inval_auth: | |||
2448 | out_no_address: | 2453 | out_no_address: |
2449 | dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); | 2454 | dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); |
2450 | return -EINVAL; | 2455 | return -EINVAL; |
2451 | |||
2452 | out_no_client_address: | ||
2453 | dfprintk(MOUNT, "NFS4: mount program didn't pass callback address\n"); | ||
2454 | return -EINVAL; | ||
2455 | } | 2456 | } |
2456 | 2457 | ||
2457 | /* | 2458 | /* |
@@ -2618,6 +2619,34 @@ out_err: | |||
2618 | return ret; | 2619 | return ret; |
2619 | } | 2620 | } |
2620 | 2621 | ||
2622 | static int nfs4_try_mount(int flags, const char *dev_name, | ||
2623 | struct nfs_parsed_mount_data *data, | ||
2624 | struct vfsmount *mnt) | ||
2625 | { | ||
2626 | char *export_path; | ||
2627 | struct vfsmount *root_mnt; | ||
2628 | int error; | ||
2629 | |||
2630 | dfprintk(MOUNT, "--> nfs4_try_mount()\n"); | ||
2631 | |||
2632 | export_path = data->nfs_server.export_path; | ||
2633 | data->nfs_server.export_path = "/"; | ||
2634 | root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data, | ||
2635 | data->nfs_server.hostname); | ||
2636 | data->nfs_server.export_path = export_path; | ||
2637 | |||
2638 | error = PTR_ERR(root_mnt); | ||
2639 | if (IS_ERR(root_mnt)) | ||
2640 | goto out; | ||
2641 | |||
2642 | error = nfs_follow_remote_path(root_mnt, export_path, mnt); | ||
2643 | |||
2644 | out: | ||
2645 | dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", error, | ||
2646 | error != 0 ? " [error]" : ""); | ||
2647 | return error; | ||
2648 | } | ||
2649 | |||
2621 | /* | 2650 | /* |
2622 | * Get the superblock for an NFS4 mountpoint | 2651 | * Get the superblock for an NFS4 mountpoint |
2623 | */ | 2652 | */ |
@@ -2625,8 +2654,6 @@ static int nfs4_get_sb(struct file_system_type *fs_type, | |||
2625 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) | 2654 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) |
2626 | { | 2655 | { |
2627 | struct nfs_parsed_mount_data *data; | 2656 | struct nfs_parsed_mount_data *data; |
2628 | char *export_path; | ||
2629 | struct vfsmount *root_mnt; | ||
2630 | int error = -ENOMEM; | 2657 | int error = -ENOMEM; |
2631 | 2658 | ||
2632 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 2659 | data = kzalloc(sizeof(*data), GFP_KERNEL); |
@@ -2638,17 +2665,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, | |||
2638 | if (error < 0) | 2665 | if (error < 0) |
2639 | goto out; | 2666 | goto out; |
2640 | 2667 | ||
2641 | export_path = data->nfs_server.export_path; | 2668 | error = nfs4_try_mount(flags, dev_name, data, mnt); |
2642 | data->nfs_server.export_path = "/"; | ||
2643 | root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data, | ||
2644 | data->nfs_server.hostname); | ||
2645 | data->nfs_server.export_path = export_path; | ||
2646 | |||
2647 | error = PTR_ERR(root_mnt); | ||
2648 | if (IS_ERR(root_mnt)) | ||
2649 | goto out; | ||
2650 | |||
2651 | error = nfs_follow_remote_path(root_mnt, export_path, mnt); | ||
2652 | 2669 | ||
2653 | out: | 2670 | out: |
2654 | kfree(data->client_address); | 2671 | kfree(data->client_address); |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0a0a2ff767c3..53eb26c16b50 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/file.h> | 13 | #include <linux/file.h> |
14 | #include <linux/writeback.h> | 14 | #include <linux/writeback.h> |
15 | #include <linux/swap.h> | 15 | #include <linux/swap.h> |
16 | #include <linux/migrate.h> | ||
16 | 17 | ||
17 | #include <linux/sunrpc/clnt.h> | 18 | #include <linux/sunrpc/clnt.h> |
18 | #include <linux/nfs_fs.h> | 19 | #include <linux/nfs_fs.h> |
@@ -26,6 +27,7 @@ | |||
26 | #include "internal.h" | 27 | #include "internal.h" |
27 | #include "iostat.h" | 28 | #include "iostat.h" |
28 | #include "nfs4_fs.h" | 29 | #include "nfs4_fs.h" |
30 | #include "fscache.h" | ||
29 | 31 | ||
30 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE | 32 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE |
31 | 33 | ||
@@ -87,17 +89,15 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) | |||
87 | return p; | 89 | return p; |
88 | } | 90 | } |
89 | 91 | ||
90 | static void nfs_writedata_free(struct nfs_write_data *p) | 92 | void nfs_writedata_free(struct nfs_write_data *p) |
91 | { | 93 | { |
92 | if (p && (p->pagevec != &p->page_array[0])) | 94 | if (p && (p->pagevec != &p->page_array[0])) |
93 | kfree(p->pagevec); | 95 | kfree(p->pagevec); |
94 | mempool_free(p, nfs_wdata_mempool); | 96 | mempool_free(p, nfs_wdata_mempool); |
95 | } | 97 | } |
96 | 98 | ||
97 | void nfs_writedata_release(void *data) | 99 | static void nfs_writedata_release(struct nfs_write_data *wdata) |
98 | { | 100 | { |
99 | struct nfs_write_data *wdata = data; | ||
100 | |||
101 | put_nfs_open_context(wdata->args.context); | 101 | put_nfs_open_context(wdata->args.context); |
102 | nfs_writedata_free(wdata); | 102 | nfs_writedata_free(wdata); |
103 | } | 103 | } |
@@ -220,24 +220,17 @@ static void nfs_end_page_writeback(struct page *page) | |||
220 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); | 220 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); |
221 | } | 221 | } |
222 | 222 | ||
223 | /* | 223 | static struct nfs_page *nfs_find_and_lock_request(struct page *page) |
224 | * Find an associated nfs write request, and prepare to flush it out | ||
225 | * May return an error if the user signalled nfs_wait_on_request(). | ||
226 | */ | ||
227 | static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | ||
228 | struct page *page) | ||
229 | { | 224 | { |
230 | struct inode *inode = page->mapping->host; | 225 | struct inode *inode = page->mapping->host; |
231 | struct nfs_page *req; | 226 | struct nfs_page *req; |
232 | int ret; | 227 | int ret; |
233 | 228 | ||
234 | spin_lock(&inode->i_lock); | 229 | spin_lock(&inode->i_lock); |
235 | for(;;) { | 230 | for (;;) { |
236 | req = nfs_page_find_request_locked(page); | 231 | req = nfs_page_find_request_locked(page); |
237 | if (req == NULL) { | 232 | if (req == NULL) |
238 | spin_unlock(&inode->i_lock); | 233 | break; |
239 | return 0; | ||
240 | } | ||
241 | if (nfs_set_page_tag_locked(req)) | 234 | if (nfs_set_page_tag_locked(req)) |
242 | break; | 235 | break; |
243 | /* Note: If we hold the page lock, as is the case in nfs_writepage, | 236 | /* Note: If we hold the page lock, as is the case in nfs_writepage, |
@@ -249,23 +242,40 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | |||
249 | ret = nfs_wait_on_request(req); | 242 | ret = nfs_wait_on_request(req); |
250 | nfs_release_request(req); | 243 | nfs_release_request(req); |
251 | if (ret != 0) | 244 | if (ret != 0) |
252 | return ret; | 245 | return ERR_PTR(ret); |
253 | spin_lock(&inode->i_lock); | 246 | spin_lock(&inode->i_lock); |
254 | } | 247 | } |
255 | if (test_bit(PG_CLEAN, &req->wb_flags)) { | ||
256 | spin_unlock(&inode->i_lock); | ||
257 | BUG(); | ||
258 | } | ||
259 | if (nfs_set_page_writeback(page) != 0) { | ||
260 | spin_unlock(&inode->i_lock); | ||
261 | BUG(); | ||
262 | } | ||
263 | spin_unlock(&inode->i_lock); | 248 | spin_unlock(&inode->i_lock); |
249 | return req; | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * Find an associated nfs write request, and prepare to flush it out | ||
254 | * May return an error if the user signalled nfs_wait_on_request(). | ||
255 | */ | ||
256 | static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | ||
257 | struct page *page) | ||
258 | { | ||
259 | struct nfs_page *req; | ||
260 | int ret = 0; | ||
261 | |||
262 | req = nfs_find_and_lock_request(page); | ||
263 | if (!req) | ||
264 | goto out; | ||
265 | ret = PTR_ERR(req); | ||
266 | if (IS_ERR(req)) | ||
267 | goto out; | ||
268 | |||
269 | ret = nfs_set_page_writeback(page); | ||
270 | BUG_ON(ret != 0); | ||
271 | BUG_ON(test_bit(PG_CLEAN, &req->wb_flags)); | ||
272 | |||
264 | if (!nfs_pageio_add_request(pgio, req)) { | 273 | if (!nfs_pageio_add_request(pgio, req)) { |
265 | nfs_redirty_request(req); | 274 | nfs_redirty_request(req); |
266 | return pgio->pg_error; | 275 | ret = pgio->pg_error; |
267 | } | 276 | } |
268 | return 0; | 277 | out: |
278 | return ret; | ||
269 | } | 279 | } |
270 | 280 | ||
271 | static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) | 281 | static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) |
@@ -1480,7 +1490,6 @@ static int nfs_write_mapping(struct address_space *mapping, int how) | |||
1480 | .nr_to_write = LONG_MAX, | 1490 | .nr_to_write = LONG_MAX, |
1481 | .range_start = 0, | 1491 | .range_start = 0, |
1482 | .range_end = LLONG_MAX, | 1492 | .range_end = LLONG_MAX, |
1483 | .for_writepages = 1, | ||
1484 | }; | 1493 | }; |
1485 | 1494 | ||
1486 | return __nfs_write_mapping(mapping, &wbc, how); | 1495 | return __nfs_write_mapping(mapping, &wbc, how); |
@@ -1582,6 +1591,41 @@ int nfs_wb_page(struct inode *inode, struct page* page) | |||
1582 | return nfs_wb_page_priority(inode, page, FLUSH_STABLE); | 1591 | return nfs_wb_page_priority(inode, page, FLUSH_STABLE); |
1583 | } | 1592 | } |
1584 | 1593 | ||
1594 | #ifdef CONFIG_MIGRATION | ||
1595 | int nfs_migrate_page(struct address_space *mapping, struct page *newpage, | ||
1596 | struct page *page) | ||
1597 | { | ||
1598 | struct nfs_page *req; | ||
1599 | int ret; | ||
1600 | |||
1601 | if (PageFsCache(page)) | ||
1602 | nfs_fscache_release_page(page, GFP_KERNEL); | ||
1603 | |||
1604 | req = nfs_find_and_lock_request(page); | ||
1605 | ret = PTR_ERR(req); | ||
1606 | if (IS_ERR(req)) | ||
1607 | goto out; | ||
1608 | |||
1609 | ret = migrate_page(mapping, newpage, page); | ||
1610 | if (!req) | ||
1611 | goto out; | ||
1612 | if (ret) | ||
1613 | goto out_unlock; | ||
1614 | page_cache_get(newpage); | ||
1615 | req->wb_page = newpage; | ||
1616 | SetPagePrivate(newpage); | ||
1617 | set_page_private(newpage, page_private(page)); | ||
1618 | ClearPagePrivate(page); | ||
1619 | set_page_private(page, 0); | ||
1620 | page_cache_release(page); | ||
1621 | out_unlock: | ||
1622 | nfs_clear_page_tag_locked(req); | ||
1623 | nfs_release_request(req); | ||
1624 | out: | ||
1625 | return ret; | ||
1626 | } | ||
1627 | #endif | ||
1628 | |||
1585 | int __init nfs_init_writepagecache(void) | 1629 | int __init nfs_init_writepagecache(void) |
1586 | { | 1630 | { |
1587 | nfs_wdata_cachep = kmem_cache_create("nfs_write_data", | 1631 | nfs_wdata_cachep = kmem_cache_create("nfs_write_data", |
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 5573508f707f..36fcabbf5186 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c | |||
@@ -34,6 +34,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) | |||
34 | int flags = nfsexp_flags(rqstp, exp); | 34 | int flags = nfsexp_flags(rqstp, exp); |
35 | int ret; | 35 | int ret; |
36 | 36 | ||
37 | validate_process_creds(); | ||
38 | |||
37 | /* discard any old override before preparing the new set */ | 39 | /* discard any old override before preparing the new set */ |
38 | revert_creds(get_cred(current->real_cred)); | 40 | revert_creds(get_cred(current->real_cred)); |
39 | new = prepare_creds(); | 41 | new = prepare_creds(); |
@@ -86,8 +88,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) | |||
86 | else | 88 | else |
87 | new->cap_effective = cap_raise_nfsd_set(new->cap_effective, | 89 | new->cap_effective = cap_raise_nfsd_set(new->cap_effective, |
88 | new->cap_permitted); | 90 | new->cap_permitted); |
91 | validate_process_creds(); | ||
89 | put_cred(override_creds(new)); | 92 | put_cred(override_creds(new)); |
90 | put_cred(new); | 93 | put_cred(new); |
94 | validate_process_creds(); | ||
91 | return 0; | 95 | return 0; |
92 | 96 | ||
93 | oom: | 97 | oom: |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index b92a27629fb7..d9462643155c 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -85,6 +85,11 @@ static void expkey_request(struct cache_detail *cd, | |||
85 | (*bpp)[-1] = '\n'; | 85 | (*bpp)[-1] = '\n'; |
86 | } | 86 | } |
87 | 87 | ||
88 | static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) | ||
89 | { | ||
90 | return sunrpc_cache_pipe_upcall(cd, h, expkey_request); | ||
91 | } | ||
92 | |||
88 | static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); | 93 | static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); |
89 | static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); | 94 | static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); |
90 | static struct cache_detail svc_expkey_cache; | 95 | static struct cache_detail svc_expkey_cache; |
@@ -259,7 +264,7 @@ static struct cache_detail svc_expkey_cache = { | |||
259 | .hash_table = expkey_table, | 264 | .hash_table = expkey_table, |
260 | .name = "nfsd.fh", | 265 | .name = "nfsd.fh", |
261 | .cache_put = expkey_put, | 266 | .cache_put = expkey_put, |
262 | .cache_request = expkey_request, | 267 | .cache_upcall = expkey_upcall, |
263 | .cache_parse = expkey_parse, | 268 | .cache_parse = expkey_parse, |
264 | .cache_show = expkey_show, | 269 | .cache_show = expkey_show, |
265 | .match = expkey_match, | 270 | .match = expkey_match, |
@@ -355,6 +360,11 @@ static void svc_export_request(struct cache_detail *cd, | |||
355 | (*bpp)[-1] = '\n'; | 360 | (*bpp)[-1] = '\n'; |
356 | } | 361 | } |
357 | 362 | ||
363 | static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) | ||
364 | { | ||
365 | return sunrpc_cache_pipe_upcall(cd, h, svc_export_request); | ||
366 | } | ||
367 | |||
358 | static struct svc_export *svc_export_update(struct svc_export *new, | 368 | static struct svc_export *svc_export_update(struct svc_export *new, |
359 | struct svc_export *old); | 369 | struct svc_export *old); |
360 | static struct svc_export *svc_export_lookup(struct svc_export *); | 370 | static struct svc_export *svc_export_lookup(struct svc_export *); |
@@ -724,7 +734,7 @@ struct cache_detail svc_export_cache = { | |||
724 | .hash_table = export_table, | 734 | .hash_table = export_table, |
725 | .name = "nfsd.export", | 735 | .name = "nfsd.export", |
726 | .cache_put = svc_export_put, | 736 | .cache_put = svc_export_put, |
727 | .cache_request = svc_export_request, | 737 | .cache_upcall = svc_export_upcall, |
728 | .cache_parse = svc_export_parse, | 738 | .cache_parse = svc_export_parse, |
729 | .cache_show = svc_export_show, | 739 | .cache_show = svc_export_show, |
730 | .match = svc_export_match, | 740 | .match = svc_export_match, |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 5b398421b051..cdfa86fa1471 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
@@ -146,6 +146,12 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, | |||
146 | } | 146 | } |
147 | 147 | ||
148 | static int | 148 | static int |
149 | idtoname_upcall(struct cache_detail *cd, struct cache_head *ch) | ||
150 | { | ||
151 | return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request); | ||
152 | } | ||
153 | |||
154 | static int | ||
149 | idtoname_match(struct cache_head *ca, struct cache_head *cb) | 155 | idtoname_match(struct cache_head *ca, struct cache_head *cb) |
150 | { | 156 | { |
151 | struct ent *a = container_of(ca, struct ent, h); | 157 | struct ent *a = container_of(ca, struct ent, h); |
@@ -175,10 +181,10 @@ idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) | |||
175 | } | 181 | } |
176 | 182 | ||
177 | static void | 183 | static void |
178 | warn_no_idmapd(struct cache_detail *detail) | 184 | warn_no_idmapd(struct cache_detail *detail, int has_died) |
179 | { | 185 | { |
180 | printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n", | 186 | printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n", |
181 | detail->last_close? "died" : "not been started"); | 187 | has_died ? "died" : "not been started"); |
182 | } | 188 | } |
183 | 189 | ||
184 | 190 | ||
@@ -192,7 +198,7 @@ static struct cache_detail idtoname_cache = { | |||
192 | .hash_table = idtoname_table, | 198 | .hash_table = idtoname_table, |
193 | .name = "nfs4.idtoname", | 199 | .name = "nfs4.idtoname", |
194 | .cache_put = ent_put, | 200 | .cache_put = ent_put, |
195 | .cache_request = idtoname_request, | 201 | .cache_upcall = idtoname_upcall, |
196 | .cache_parse = idtoname_parse, | 202 | .cache_parse = idtoname_parse, |
197 | .cache_show = idtoname_show, | 203 | .cache_show = idtoname_show, |
198 | .warn_no_listener = warn_no_idmapd, | 204 | .warn_no_listener = warn_no_idmapd, |
@@ -325,6 +331,12 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, | |||
325 | } | 331 | } |
326 | 332 | ||
327 | static int | 333 | static int |
334 | nametoid_upcall(struct cache_detail *cd, struct cache_head *ch) | ||
335 | { | ||
336 | return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request); | ||
337 | } | ||
338 | |||
339 | static int | ||
328 | nametoid_match(struct cache_head *ca, struct cache_head *cb) | 340 | nametoid_match(struct cache_head *ca, struct cache_head *cb) |
329 | { | 341 | { |
330 | struct ent *a = container_of(ca, struct ent, h); | 342 | struct ent *a = container_of(ca, struct ent, h); |
@@ -363,7 +375,7 @@ static struct cache_detail nametoid_cache = { | |||
363 | .hash_table = nametoid_table, | 375 | .hash_table = nametoid_table, |
364 | .name = "nfs4.nametoid", | 376 | .name = "nfs4.nametoid", |
365 | .cache_put = ent_put, | 377 | .cache_put = ent_put, |
366 | .cache_request = nametoid_request, | 378 | .cache_upcall = nametoid_upcall, |
367 | .cache_parse = nametoid_parse, | 379 | .cache_parse = nametoid_parse, |
368 | .cache_show = nametoid_show, | 380 | .cache_show = nametoid_show, |
369 | .warn_no_listener = warn_no_idmapd, | 381 | .warn_no_listener = warn_no_idmapd, |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6d0847562d87..7e906c5b7671 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/nfsd/xdr.h> | 37 | #include <linux/nfsd/xdr.h> |
38 | #include <linux/nfsd/syscall.h> | 38 | #include <linux/nfsd/syscall.h> |
39 | #include <linux/lockd/lockd.h> | 39 | #include <linux/lockd/lockd.h> |
40 | #include <linux/sunrpc/clnt.h> | ||
40 | 41 | ||
41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
42 | #include <net/ipv6.h> | 43 | #include <net/ipv6.h> |
@@ -490,22 +491,18 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) | |||
490 | * | 491 | * |
491 | * Input: | 492 | * Input: |
492 | * buf: '\n'-terminated C string containing a | 493 | * buf: '\n'-terminated C string containing a |
493 | * presentation format IPv4 address | 494 | * presentation format IP address |
494 | * size: length of C string in @buf | 495 | * size: length of C string in @buf |
495 | * Output: | 496 | * Output: |
496 | * On success: returns zero if all specified locks were released; | 497 | * On success: returns zero if all specified locks were released; |
497 | * returns one if one or more locks were not released | 498 | * returns one if one or more locks were not released |
498 | * On error: return code is negative errno value | 499 | * On error: return code is negative errno value |
499 | * | ||
500 | * Note: Only AF_INET client addresses are passed in | ||
501 | */ | 500 | */ |
502 | static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) | 501 | static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) |
503 | { | 502 | { |
504 | struct sockaddr_in sin = { | 503 | struct sockaddr_storage address; |
505 | .sin_family = AF_INET, | 504 | struct sockaddr *sap = (struct sockaddr *)&address; |
506 | }; | 505 | size_t salen = sizeof(address); |
507 | int b1, b2, b3, b4; | ||
508 | char c; | ||
509 | char *fo_path; | 506 | char *fo_path; |
510 | 507 | ||
511 | /* sanity check */ | 508 | /* sanity check */ |
@@ -519,14 +516,10 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) | |||
519 | if (qword_get(&buf, fo_path, size) < 0) | 516 | if (qword_get(&buf, fo_path, size) < 0) |
520 | return -EINVAL; | 517 | return -EINVAL; |
521 | 518 | ||
522 | /* get ipv4 address */ | 519 | if (rpc_pton(fo_path, size, sap, salen) == 0) |
523 | if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4) | ||
524 | return -EINVAL; | ||
525 | if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255) | ||
526 | return -EINVAL; | 520 | return -EINVAL; |
527 | sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4); | ||
528 | 521 | ||
529 | return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin); | 522 | return nlmsvc_unlock_all_by_ip(sap); |
530 | } | 523 | } |
531 | 524 | ||
532 | /** | 525 | /** |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 492c79b7800b..24d58adfe5fd 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -496,7 +496,9 @@ nfsd(void *vrqstp) | |||
496 | /* Lock the export hash tables for reading. */ | 496 | /* Lock the export hash tables for reading. */ |
497 | exp_readlock(); | 497 | exp_readlock(); |
498 | 498 | ||
499 | validate_process_creds(); | ||
499 | svc_process(rqstp); | 500 | svc_process(rqstp); |
501 | validate_process_creds(); | ||
500 | 502 | ||
501 | /* Unlock export hash tables */ | 503 | /* Unlock export hash tables */ |
502 | exp_readunlock(); | 504 | exp_readunlock(); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 23341c1063bc..8fa09bfbcba7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -684,6 +684,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, | |||
684 | __be32 err; | 684 | __be32 err; |
685 | int host_err; | 685 | int host_err; |
686 | 686 | ||
687 | validate_process_creds(); | ||
688 | |||
687 | /* | 689 | /* |
688 | * If we get here, then the client has already done an "open", | 690 | * If we get here, then the client has already done an "open", |
689 | * and (hopefully) checked permission - so allow OWNER_OVERRIDE | 691 | * and (hopefully) checked permission - so allow OWNER_OVERRIDE |
@@ -740,6 +742,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, | |||
740 | out_nfserr: | 742 | out_nfserr: |
741 | err = nfserrno(host_err); | 743 | err = nfserrno(host_err); |
742 | out: | 744 | out: |
745 | validate_process_creds(); | ||
743 | return err; | 746 | return err; |
744 | } | 747 | } |
745 | 748 | ||
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig index 72da095d4009..251da07b2a1d 100644 --- a/fs/nilfs2/Kconfig +++ b/fs/nilfs2/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config NILFS2_FS | 1 | config NILFS2_FS |
2 | tristate "NILFS2 file system support (EXPERIMENTAL)" | 2 | tristate "NILFS2 file system support (EXPERIMENTAL)" |
3 | depends on BLOCK && EXPERIMENTAL | 3 | depends on EXPERIMENTAL |
4 | select CRC32 | 4 | select CRC32 |
5 | help | 5 | help |
6 | NILFS2 is a log-structured file system (LFS) supporting continuous | 6 | NILFS2 is a log-structured file system (LFS) supporting continuous |
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 99d58a028b94..08834df6ec68 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c | |||
@@ -36,6 +36,26 @@ struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) | |||
36 | return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); | 36 | return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); |
37 | } | 37 | } |
38 | 38 | ||
39 | /** | ||
40 | * nilfs_bmap_lookup_at_level - find a data block or node block | ||
41 | * @bmap: bmap | ||
42 | * @key: key | ||
43 | * @level: level | ||
44 | * @ptrp: place to store the value associated to @key | ||
45 | * | ||
46 | * Description: nilfs_bmap_lookup_at_level() finds a record whose key | ||
47 | * matches @key in the block at @level of the bmap. | ||
48 | * | ||
49 | * Return Value: On success, 0 is returned and the record associated with @key | ||
50 | * is stored in the place pointed by @ptrp. On error, one of the following | ||
51 | * negative error codes is returned. | ||
52 | * | ||
53 | * %-EIO - I/O error. | ||
54 | * | ||
55 | * %-ENOMEM - Insufficient amount of memory available. | ||
56 | * | ||
57 | * %-ENOENT - A record associated with @key does not exist. | ||
58 | */ | ||
39 | int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, | 59 | int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, |
40 | __u64 *ptrp) | 60 | __u64 *ptrp) |
41 | { | 61 | { |
@@ -69,39 +89,6 @@ int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp, | |||
69 | return ret; | 89 | return ret; |
70 | } | 90 | } |
71 | 91 | ||
72 | /** | ||
73 | * nilfs_bmap_lookup - find a record | ||
74 | * @bmap: bmap | ||
75 | * @key: key | ||
76 | * @recp: pointer to record | ||
77 | * | ||
78 | * Description: nilfs_bmap_lookup() finds a record whose key matches @key in | ||
79 | * @bmap. | ||
80 | * | ||
81 | * Return Value: On success, 0 is returned and the record associated with @key | ||
82 | * is stored in the place pointed by @recp. On error, one of the following | ||
83 | * negative error codes is returned. | ||
84 | * | ||
85 | * %-EIO - I/O error. | ||
86 | * | ||
87 | * %-ENOMEM - Insufficient amount of memory available. | ||
88 | * | ||
89 | * %-ENOENT - A record associated with @key does not exist. | ||
90 | */ | ||
91 | int nilfs_bmap_lookup(struct nilfs_bmap *bmap, | ||
92 | unsigned long key, | ||
93 | unsigned long *recp) | ||
94 | { | ||
95 | __u64 ptr; | ||
96 | int ret; | ||
97 | |||
98 | /* XXX: use macro for level 1 */ | ||
99 | ret = nilfs_bmap_lookup_at_level(bmap, key, 1, &ptr); | ||
100 | if (recp != NULL) | ||
101 | *recp = ptr; | ||
102 | return ret; | ||
103 | } | ||
104 | |||
105 | static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | 92 | static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) |
106 | { | 93 | { |
107 | __u64 keys[NILFS_BMAP_SMALL_HIGH + 1]; | 94 | __u64 keys[NILFS_BMAP_SMALL_HIGH + 1]; |
@@ -469,104 +456,6 @@ __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap) | |||
469 | (entries_per_group / NILFS_BMAP_GROUP_DIV); | 456 | (entries_per_group / NILFS_BMAP_GROUP_DIV); |
470 | } | 457 | } |
471 | 458 | ||
472 | int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap, | ||
473 | union nilfs_bmap_ptr_req *req) | ||
474 | { | ||
475 | return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
476 | } | ||
477 | |||
478 | void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap, | ||
479 | union nilfs_bmap_ptr_req *req) | ||
480 | { | ||
481 | nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
482 | } | ||
483 | |||
484 | void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap, | ||
485 | union nilfs_bmap_ptr_req *req) | ||
486 | { | ||
487 | nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
488 | } | ||
489 | |||
490 | int nilfs_bmap_start_v(struct nilfs_bmap *bmap, union nilfs_bmap_ptr_req *req, | ||
491 | sector_t blocknr) | ||
492 | { | ||
493 | struct inode *dat = nilfs_bmap_get_dat(bmap); | ||
494 | int ret; | ||
495 | |||
496 | ret = nilfs_dat_prepare_start(dat, &req->bpr_req); | ||
497 | if (likely(!ret)) | ||
498 | nilfs_dat_commit_start(dat, &req->bpr_req, blocknr); | ||
499 | return ret; | ||
500 | } | ||
501 | |||
502 | int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap, | ||
503 | union nilfs_bmap_ptr_req *req) | ||
504 | { | ||
505 | return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
506 | } | ||
507 | |||
508 | void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap, | ||
509 | union nilfs_bmap_ptr_req *req) | ||
510 | { | ||
511 | nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, | ||
512 | bmap->b_ptr_type == NILFS_BMAP_PTR_VS); | ||
513 | } | ||
514 | |||
515 | void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap, | ||
516 | union nilfs_bmap_ptr_req *req) | ||
517 | { | ||
518 | nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
519 | } | ||
520 | |||
521 | int nilfs_bmap_move_v(const struct nilfs_bmap *bmap, __u64 vblocknr, | ||
522 | sector_t blocknr) | ||
523 | { | ||
524 | return nilfs_dat_move(nilfs_bmap_get_dat(bmap), vblocknr, blocknr); | ||
525 | } | ||
526 | |||
527 | int nilfs_bmap_mark_dirty(const struct nilfs_bmap *bmap, __u64 vblocknr) | ||
528 | { | ||
529 | return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr); | ||
530 | } | ||
531 | |||
532 | int nilfs_bmap_prepare_update_v(struct nilfs_bmap *bmap, | ||
533 | union nilfs_bmap_ptr_req *oldreq, | ||
534 | union nilfs_bmap_ptr_req *newreq) | ||
535 | { | ||
536 | struct inode *dat = nilfs_bmap_get_dat(bmap); | ||
537 | int ret; | ||
538 | |||
539 | ret = nilfs_dat_prepare_end(dat, &oldreq->bpr_req); | ||
540 | if (ret < 0) | ||
541 | return ret; | ||
542 | ret = nilfs_dat_prepare_alloc(dat, &newreq->bpr_req); | ||
543 | if (ret < 0) | ||
544 | nilfs_dat_abort_end(dat, &oldreq->bpr_req); | ||
545 | |||
546 | return ret; | ||
547 | } | ||
548 | |||
549 | void nilfs_bmap_commit_update_v(struct nilfs_bmap *bmap, | ||
550 | union nilfs_bmap_ptr_req *oldreq, | ||
551 | union nilfs_bmap_ptr_req *newreq) | ||
552 | { | ||
553 | struct inode *dat = nilfs_bmap_get_dat(bmap); | ||
554 | |||
555 | nilfs_dat_commit_end(dat, &oldreq->bpr_req, | ||
556 | bmap->b_ptr_type == NILFS_BMAP_PTR_VS); | ||
557 | nilfs_dat_commit_alloc(dat, &newreq->bpr_req); | ||
558 | } | ||
559 | |||
560 | void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap, | ||
561 | union nilfs_bmap_ptr_req *oldreq, | ||
562 | union nilfs_bmap_ptr_req *newreq) | ||
563 | { | ||
564 | struct inode *dat = nilfs_bmap_get_dat(bmap); | ||
565 | |||
566 | nilfs_dat_abort_end(dat, &oldreq->bpr_req); | ||
567 | nilfs_dat_abort_alloc(dat, &newreq->bpr_req); | ||
568 | } | ||
569 | |||
570 | static struct lock_class_key nilfs_bmap_dat_lock_key; | 459 | static struct lock_class_key nilfs_bmap_dat_lock_key; |
571 | static struct lock_class_key nilfs_bmap_mdt_lock_key; | 460 | static struct lock_class_key nilfs_bmap_mdt_lock_key; |
572 | 461 | ||
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index b2890cdcef12..9980d7dbab91 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/buffer_head.h> | 28 | #include <linux/buffer_head.h> |
29 | #include <linux/nilfs2_fs.h> | 29 | #include <linux/nilfs2_fs.h> |
30 | #include "alloc.h" | 30 | #include "alloc.h" |
31 | #include "dat.h" | ||
31 | 32 | ||
32 | #define NILFS_BMAP_INVALID_PTR 0 | 33 | #define NILFS_BMAP_INVALID_PTR 0 |
33 | 34 | ||
@@ -141,7 +142,6 @@ struct nilfs_bmap { | |||
141 | int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); | 142 | int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); |
142 | int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); | 143 | int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); |
143 | void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); | 144 | void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); |
144 | int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *); | ||
145 | int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned); | 145 | int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned); |
146 | int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); | 146 | int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); |
147 | int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); | 147 | int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); |
@@ -160,90 +160,76 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); | |||
160 | void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); | 160 | void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); |
161 | 161 | ||
162 | 162 | ||
163 | static inline int nilfs_bmap_lookup(struct nilfs_bmap *bmap, __u64 key, | ||
164 | __u64 *ptr) | ||
165 | { | ||
166 | return nilfs_bmap_lookup_at_level(bmap, key, 1, ptr); | ||
167 | } | ||
168 | |||
163 | /* | 169 | /* |
164 | * Internal use only | 170 | * Internal use only |
165 | */ | 171 | */ |
166 | struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *); | 172 | struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *); |
167 | int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *, | ||
168 | union nilfs_bmap_ptr_req *); | ||
169 | void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *, | ||
170 | union nilfs_bmap_ptr_req *); | ||
171 | void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *, | ||
172 | union nilfs_bmap_ptr_req *); | ||
173 | 173 | ||
174 | static inline int nilfs_bmap_prepare_alloc_ptr(struct nilfs_bmap *bmap, | 174 | static inline int nilfs_bmap_prepare_alloc_ptr(struct nilfs_bmap *bmap, |
175 | union nilfs_bmap_ptr_req *req) | 175 | union nilfs_bmap_ptr_req *req, |
176 | struct inode *dat) | ||
176 | { | 177 | { |
177 | if (NILFS_BMAP_USE_VBN(bmap)) | 178 | if (dat) |
178 | return nilfs_bmap_prepare_alloc_v(bmap, req); | 179 | return nilfs_dat_prepare_alloc(dat, &req->bpr_req); |
179 | /* ignore target ptr */ | 180 | /* ignore target ptr */ |
180 | req->bpr_ptr = bmap->b_last_allocated_ptr++; | 181 | req->bpr_ptr = bmap->b_last_allocated_ptr++; |
181 | return 0; | 182 | return 0; |
182 | } | 183 | } |
183 | 184 | ||
184 | static inline void nilfs_bmap_commit_alloc_ptr(struct nilfs_bmap *bmap, | 185 | static inline void nilfs_bmap_commit_alloc_ptr(struct nilfs_bmap *bmap, |
185 | union nilfs_bmap_ptr_req *req) | 186 | union nilfs_bmap_ptr_req *req, |
187 | struct inode *dat) | ||
186 | { | 188 | { |
187 | if (NILFS_BMAP_USE_VBN(bmap)) | 189 | if (dat) |
188 | nilfs_bmap_commit_alloc_v(bmap, req); | 190 | nilfs_dat_commit_alloc(dat, &req->bpr_req); |
189 | } | 191 | } |
190 | 192 | ||
191 | static inline void nilfs_bmap_abort_alloc_ptr(struct nilfs_bmap *bmap, | 193 | static inline void nilfs_bmap_abort_alloc_ptr(struct nilfs_bmap *bmap, |
192 | union nilfs_bmap_ptr_req *req) | 194 | union nilfs_bmap_ptr_req *req, |
195 | struct inode *dat) | ||
193 | { | 196 | { |
194 | if (NILFS_BMAP_USE_VBN(bmap)) | 197 | if (dat) |
195 | nilfs_bmap_abort_alloc_v(bmap, req); | 198 | nilfs_dat_abort_alloc(dat, &req->bpr_req); |
196 | else | 199 | else |
197 | bmap->b_last_allocated_ptr--; | 200 | bmap->b_last_allocated_ptr--; |
198 | } | 201 | } |
199 | 202 | ||
200 | int nilfs_bmap_prepare_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *); | ||
201 | void nilfs_bmap_commit_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *); | ||
202 | void nilfs_bmap_abort_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *); | ||
203 | |||
204 | static inline int nilfs_bmap_prepare_end_ptr(struct nilfs_bmap *bmap, | 203 | static inline int nilfs_bmap_prepare_end_ptr(struct nilfs_bmap *bmap, |
205 | union nilfs_bmap_ptr_req *req) | 204 | union nilfs_bmap_ptr_req *req, |
205 | struct inode *dat) | ||
206 | { | 206 | { |
207 | return NILFS_BMAP_USE_VBN(bmap) ? | 207 | return dat ? nilfs_dat_prepare_end(dat, &req->bpr_req) : 0; |
208 | nilfs_bmap_prepare_end_v(bmap, req) : 0; | ||
209 | } | 208 | } |
210 | 209 | ||
211 | static inline void nilfs_bmap_commit_end_ptr(struct nilfs_bmap *bmap, | 210 | static inline void nilfs_bmap_commit_end_ptr(struct nilfs_bmap *bmap, |
212 | union nilfs_bmap_ptr_req *req) | 211 | union nilfs_bmap_ptr_req *req, |
212 | struct inode *dat) | ||
213 | { | 213 | { |
214 | if (NILFS_BMAP_USE_VBN(bmap)) | 214 | if (dat) |
215 | nilfs_bmap_commit_end_v(bmap, req); | 215 | nilfs_dat_commit_end(dat, &req->bpr_req, |
216 | bmap->b_ptr_type == NILFS_BMAP_PTR_VS); | ||
216 | } | 217 | } |
217 | 218 | ||
218 | static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap, | 219 | static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap, |
219 | union nilfs_bmap_ptr_req *req) | 220 | union nilfs_bmap_ptr_req *req, |
221 | struct inode *dat) | ||
220 | { | 222 | { |
221 | if (NILFS_BMAP_USE_VBN(bmap)) | 223 | if (dat) |
222 | nilfs_bmap_abort_end_v(bmap, req); | 224 | nilfs_dat_abort_end(dat, &req->bpr_req); |
223 | } | 225 | } |
224 | 226 | ||
225 | int nilfs_bmap_start_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *, | ||
226 | sector_t); | ||
227 | int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t); | ||
228 | int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64); | ||
229 | |||
230 | |||
231 | __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, | 227 | __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, |
232 | const struct buffer_head *); | 228 | const struct buffer_head *); |
233 | 229 | ||
234 | __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); | 230 | __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); |
235 | __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); | 231 | __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); |
236 | 232 | ||
237 | int nilfs_bmap_prepare_update_v(struct nilfs_bmap *, | ||
238 | union nilfs_bmap_ptr_req *, | ||
239 | union nilfs_bmap_ptr_req *); | ||
240 | void nilfs_bmap_commit_update_v(struct nilfs_bmap *, | ||
241 | union nilfs_bmap_ptr_req *, | ||
242 | union nilfs_bmap_ptr_req *); | ||
243 | void nilfs_bmap_abort_update_v(struct nilfs_bmap *, | ||
244 | union nilfs_bmap_ptr_req *, | ||
245 | union nilfs_bmap_ptr_req *); | ||
246 | |||
247 | void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int); | 233 | void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int); |
248 | void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int); | 234 | void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int); |
249 | 235 | ||
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 7e0b61be212e..c668bca579c1 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c | |||
@@ -209,6 +209,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, | |||
209 | * We cannot call radix_tree_preload for the kernels older | 209 | * We cannot call radix_tree_preload for the kernels older |
210 | * than 2.6.23, because it is not exported for modules. | 210 | * than 2.6.23, because it is not exported for modules. |
211 | */ | 211 | */ |
212 | retry: | ||
212 | err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 213 | err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); |
213 | if (err) | 214 | if (err) |
214 | goto failed_unlock; | 215 | goto failed_unlock; |
@@ -219,7 +220,6 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, | |||
219 | (unsigned long long)oldkey, | 220 | (unsigned long long)oldkey, |
220 | (unsigned long long)newkey); | 221 | (unsigned long long)newkey); |
221 | 222 | ||
222 | retry: | ||
223 | spin_lock_irq(&btnc->tree_lock); | 223 | spin_lock_irq(&btnc->tree_lock); |
224 | err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); | 224 | err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); |
225 | spin_unlock_irq(&btnc->tree_lock); | 225 | spin_unlock_irq(&btnc->tree_lock); |
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index aa412724b64e..e25b507a474f 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c | |||
@@ -71,21 +71,17 @@ void nilfs_btree_path_cache_destroy(void) | |||
71 | kmem_cache_destroy(nilfs_btree_path_cache); | 71 | kmem_cache_destroy(nilfs_btree_path_cache); |
72 | } | 72 | } |
73 | 73 | ||
74 | static inline struct nilfs_btree_path * | 74 | static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void) |
75 | nilfs_btree_alloc_path(const struct nilfs_btree *btree) | ||
76 | { | 75 | { |
77 | return (struct nilfs_btree_path *) | 76 | return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); |
78 | kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); | ||
79 | } | 77 | } |
80 | 78 | ||
81 | static inline void nilfs_btree_free_path(const struct nilfs_btree *btree, | 79 | static inline void nilfs_btree_free_path(struct nilfs_btree_path *path) |
82 | struct nilfs_btree_path *path) | ||
83 | { | 80 | { |
84 | kmem_cache_free(nilfs_btree_path_cache, path); | 81 | kmem_cache_free(nilfs_btree_path_cache, path); |
85 | } | 82 | } |
86 | 83 | ||
87 | static void nilfs_btree_init_path(const struct nilfs_btree *btree, | 84 | static void nilfs_btree_init_path(struct nilfs_btree_path *path) |
88 | struct nilfs_btree_path *path) | ||
89 | { | 85 | { |
90 | int level; | 86 | int level; |
91 | 87 | ||
@@ -101,26 +97,13 @@ static void nilfs_btree_init_path(const struct nilfs_btree *btree, | |||
101 | } | 97 | } |
102 | } | 98 | } |
103 | 99 | ||
104 | static void nilfs_btree_clear_path(const struct nilfs_btree *btree, | 100 | static void nilfs_btree_release_path(struct nilfs_btree_path *path) |
105 | struct nilfs_btree_path *path) | ||
106 | { | 101 | { |
107 | int level; | 102 | int level; |
108 | 103 | ||
109 | for (level = NILFS_BTREE_LEVEL_DATA; | 104 | for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX; |
110 | level < NILFS_BTREE_LEVEL_MAX; | 105 | level++) |
111 | level++) { | 106 | brelse(path[level].bp_bh); |
112 | if (path[level].bp_bh != NULL) { | ||
113 | brelse(path[level].bp_bh); | ||
114 | path[level].bp_bh = NULL; | ||
115 | } | ||
116 | /* sib_bh is released or deleted by prepare or commit | ||
117 | * operations. */ | ||
118 | path[level].bp_sib_bh = NULL; | ||
119 | path[level].bp_index = 0; | ||
120 | path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; | ||
121 | path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; | ||
122 | path[level].bp_op = NULL; | ||
123 | } | ||
124 | } | 107 | } |
125 | 108 | ||
126 | /* | 109 | /* |
@@ -148,129 +131,110 @@ static int nilfs_btree_get_new_block(const struct nilfs_btree *btree, | |||
148 | } | 131 | } |
149 | 132 | ||
150 | static inline int | 133 | static inline int |
151 | nilfs_btree_node_get_flags(const struct nilfs_btree *btree, | 134 | nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) |
152 | const struct nilfs_btree_node *node) | ||
153 | { | 135 | { |
154 | return node->bn_flags; | 136 | return node->bn_flags; |
155 | } | 137 | } |
156 | 138 | ||
157 | static inline void | 139 | static inline void |
158 | nilfs_btree_node_set_flags(struct nilfs_btree *btree, | 140 | nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) |
159 | struct nilfs_btree_node *node, | ||
160 | int flags) | ||
161 | { | 141 | { |
162 | node->bn_flags = flags; | 142 | node->bn_flags = flags; |
163 | } | 143 | } |
164 | 144 | ||
165 | static inline int nilfs_btree_node_root(const struct nilfs_btree *btree, | 145 | static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node) |
166 | const struct nilfs_btree_node *node) | ||
167 | { | 146 | { |
168 | return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT; | 147 | return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; |
169 | } | 148 | } |
170 | 149 | ||
171 | static inline int | 150 | static inline int |
172 | nilfs_btree_node_get_level(const struct nilfs_btree *btree, | 151 | nilfs_btree_node_get_level(const struct nilfs_btree_node *node) |
173 | const struct nilfs_btree_node *node) | ||
174 | { | 152 | { |
175 | return node->bn_level; | 153 | return node->bn_level; |
176 | } | 154 | } |
177 | 155 | ||
178 | static inline void | 156 | static inline void |
179 | nilfs_btree_node_set_level(struct nilfs_btree *btree, | 157 | nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) |
180 | struct nilfs_btree_node *node, | ||
181 | int level) | ||
182 | { | 158 | { |
183 | node->bn_level = level; | 159 | node->bn_level = level; |
184 | } | 160 | } |
185 | 161 | ||
186 | static inline int | 162 | static inline int |
187 | nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree, | 163 | nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) |
188 | const struct nilfs_btree_node *node) | ||
189 | { | 164 | { |
190 | return le16_to_cpu(node->bn_nchildren); | 165 | return le16_to_cpu(node->bn_nchildren); |
191 | } | 166 | } |
192 | 167 | ||
193 | static inline void | 168 | static inline void |
194 | nilfs_btree_node_set_nchildren(struct nilfs_btree *btree, | 169 | nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) |
195 | struct nilfs_btree_node *node, | ||
196 | int nchildren) | ||
197 | { | 170 | { |
198 | node->bn_nchildren = cpu_to_le16(nchildren); | 171 | node->bn_nchildren = cpu_to_le16(nchildren); |
199 | } | 172 | } |
200 | 173 | ||
201 | static inline int | 174 | static inline int nilfs_btree_node_size(const struct nilfs_btree *btree) |
202 | nilfs_btree_node_size(const struct nilfs_btree *btree) | ||
203 | { | 175 | { |
204 | return 1 << btree->bt_bmap.b_inode->i_blkbits; | 176 | return 1 << btree->bt_bmap.b_inode->i_blkbits; |
205 | } | 177 | } |
206 | 178 | ||
207 | static inline int | 179 | static inline int |
208 | nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree, | 180 | nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node, |
209 | const struct nilfs_btree_node *node) | 181 | const struct nilfs_btree *btree) |
210 | { | 182 | { |
211 | return nilfs_btree_node_root(btree, node) ? | 183 | return nilfs_btree_node_root(node) ? |
212 | NILFS_BTREE_ROOT_NCHILDREN_MIN : | 184 | NILFS_BTREE_ROOT_NCHILDREN_MIN : |
213 | NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); | 185 | NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); |
214 | } | 186 | } |
215 | 187 | ||
216 | static inline int | 188 | static inline int |
217 | nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree, | 189 | nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node, |
218 | const struct nilfs_btree_node *node) | 190 | const struct nilfs_btree *btree) |
219 | { | 191 | { |
220 | return nilfs_btree_node_root(btree, node) ? | 192 | return nilfs_btree_node_root(node) ? |
221 | NILFS_BTREE_ROOT_NCHILDREN_MAX : | 193 | NILFS_BTREE_ROOT_NCHILDREN_MAX : |
222 | NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree)); | 194 | NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree)); |
223 | } | 195 | } |
224 | 196 | ||
225 | static inline __le64 * | 197 | static inline __le64 * |
226 | nilfs_btree_node_dkeys(const struct nilfs_btree *btree, | 198 | nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) |
227 | const struct nilfs_btree_node *node) | ||
228 | { | 199 | { |
229 | return (__le64 *)((char *)(node + 1) + | 200 | return (__le64 *)((char *)(node + 1) + |
230 | (nilfs_btree_node_root(btree, node) ? | 201 | (nilfs_btree_node_root(node) ? |
231 | 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); | 202 | 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); |
232 | } | 203 | } |
233 | 204 | ||
234 | static inline __le64 * | 205 | static inline __le64 * |
235 | nilfs_btree_node_dptrs(const struct nilfs_btree *btree, | 206 | nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, |
236 | const struct nilfs_btree_node *node) | 207 | const struct nilfs_btree *btree) |
237 | { | 208 | { |
238 | return (__le64 *)(nilfs_btree_node_dkeys(btree, node) + | 209 | return (__le64 *)(nilfs_btree_node_dkeys(node) + |
239 | nilfs_btree_node_nchildren_max(btree, node)); | 210 | nilfs_btree_node_nchildren_max(node, btree)); |
240 | } | 211 | } |
241 | 212 | ||
242 | static inline __u64 | 213 | static inline __u64 |
243 | nilfs_btree_node_get_key(const struct nilfs_btree *btree, | 214 | nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) |
244 | const struct nilfs_btree_node *node, int index) | ||
245 | { | 215 | { |
246 | return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) + | 216 | return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index)); |
247 | index)); | ||
248 | } | 217 | } |
249 | 218 | ||
250 | static inline void | 219 | static inline void |
251 | nilfs_btree_node_set_key(struct nilfs_btree *btree, | 220 | nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) |
252 | struct nilfs_btree_node *node, int index, __u64 key) | ||
253 | { | 221 | { |
254 | *(nilfs_btree_node_dkeys(btree, node) + index) = | 222 | *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key); |
255 | nilfs_bmap_key_to_dkey(key); | ||
256 | } | 223 | } |
257 | 224 | ||
258 | static inline __u64 | 225 | static inline __u64 |
259 | nilfs_btree_node_get_ptr(const struct nilfs_btree *btree, | 226 | nilfs_btree_node_get_ptr(const struct nilfs_btree *btree, |
260 | const struct nilfs_btree_node *node, | 227 | const struct nilfs_btree_node *node, int index) |
261 | int index) | ||
262 | { | 228 | { |
263 | return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) + | 229 | return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) + |
264 | index)); | 230 | index)); |
265 | } | 231 | } |
266 | 232 | ||
267 | static inline void | 233 | static inline void |
268 | nilfs_btree_node_set_ptr(struct nilfs_btree *btree, | 234 | nilfs_btree_node_set_ptr(struct nilfs_btree *btree, |
269 | struct nilfs_btree_node *node, | 235 | struct nilfs_btree_node *node, int index, __u64 ptr) |
270 | int index, | ||
271 | __u64 ptr) | ||
272 | { | 236 | { |
273 | *(nilfs_btree_node_dptrs(btree, node) + index) = | 237 | *(nilfs_btree_node_dptrs(node, btree) + index) = |
274 | nilfs_bmap_ptr_to_dptr(ptr); | 238 | nilfs_bmap_ptr_to_dptr(ptr); |
275 | } | 239 | } |
276 | 240 | ||
@@ -283,12 +247,12 @@ static void nilfs_btree_node_init(struct nilfs_btree *btree, | |||
283 | __le64 *dptrs; | 247 | __le64 *dptrs; |
284 | int i; | 248 | int i; |
285 | 249 | ||
286 | nilfs_btree_node_set_flags(btree, node, flags); | 250 | nilfs_btree_node_set_flags(node, flags); |
287 | nilfs_btree_node_set_level(btree, node, level); | 251 | nilfs_btree_node_set_level(node, level); |
288 | nilfs_btree_node_set_nchildren(btree, node, nchildren); | 252 | nilfs_btree_node_set_nchildren(node, nchildren); |
289 | 253 | ||
290 | dkeys = nilfs_btree_node_dkeys(btree, node); | 254 | dkeys = nilfs_btree_node_dkeys(node); |
291 | dptrs = nilfs_btree_node_dptrs(btree, node); | 255 | dptrs = nilfs_btree_node_dptrs(node, btree); |
292 | for (i = 0; i < nchildren; i++) { | 256 | for (i = 0; i < nchildren; i++) { |
293 | dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]); | 257 | dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]); |
294 | dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]); | 258 | dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]); |
@@ -305,13 +269,13 @@ static void nilfs_btree_node_move_left(struct nilfs_btree *btree, | |||
305 | __le64 *ldptrs, *rdptrs; | 269 | __le64 *ldptrs, *rdptrs; |
306 | int lnchildren, rnchildren; | 270 | int lnchildren, rnchildren; |
307 | 271 | ||
308 | ldkeys = nilfs_btree_node_dkeys(btree, left); | 272 | ldkeys = nilfs_btree_node_dkeys(left); |
309 | ldptrs = nilfs_btree_node_dptrs(btree, left); | 273 | ldptrs = nilfs_btree_node_dptrs(left, btree); |
310 | lnchildren = nilfs_btree_node_get_nchildren(btree, left); | 274 | lnchildren = nilfs_btree_node_get_nchildren(left); |
311 | 275 | ||
312 | rdkeys = nilfs_btree_node_dkeys(btree, right); | 276 | rdkeys = nilfs_btree_node_dkeys(right); |
313 | rdptrs = nilfs_btree_node_dptrs(btree, right); | 277 | rdptrs = nilfs_btree_node_dptrs(right, btree); |
314 | rnchildren = nilfs_btree_node_get_nchildren(btree, right); | 278 | rnchildren = nilfs_btree_node_get_nchildren(right); |
315 | 279 | ||
316 | memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); | 280 | memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); |
317 | memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); | 281 | memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); |
@@ -320,8 +284,8 @@ static void nilfs_btree_node_move_left(struct nilfs_btree *btree, | |||
320 | 284 | ||
321 | lnchildren += n; | 285 | lnchildren += n; |
322 | rnchildren -= n; | 286 | rnchildren -= n; |
323 | nilfs_btree_node_set_nchildren(btree, left, lnchildren); | 287 | nilfs_btree_node_set_nchildren(left, lnchildren); |
324 | nilfs_btree_node_set_nchildren(btree, right, rnchildren); | 288 | nilfs_btree_node_set_nchildren(right, rnchildren); |
325 | } | 289 | } |
326 | 290 | ||
327 | /* Assume that the buffer heads corresponding to left and right are locked. */ | 291 | /* Assume that the buffer heads corresponding to left and right are locked. */ |
@@ -334,13 +298,13 @@ static void nilfs_btree_node_move_right(struct nilfs_btree *btree, | |||
334 | __le64 *ldptrs, *rdptrs; | 298 | __le64 *ldptrs, *rdptrs; |
335 | int lnchildren, rnchildren; | 299 | int lnchildren, rnchildren; |
336 | 300 | ||
337 | ldkeys = nilfs_btree_node_dkeys(btree, left); | 301 | ldkeys = nilfs_btree_node_dkeys(left); |
338 | ldptrs = nilfs_btree_node_dptrs(btree, left); | 302 | ldptrs = nilfs_btree_node_dptrs(left, btree); |
339 | lnchildren = nilfs_btree_node_get_nchildren(btree, left); | 303 | lnchildren = nilfs_btree_node_get_nchildren(left); |
340 | 304 | ||
341 | rdkeys = nilfs_btree_node_dkeys(btree, right); | 305 | rdkeys = nilfs_btree_node_dkeys(right); |
342 | rdptrs = nilfs_btree_node_dptrs(btree, right); | 306 | rdptrs = nilfs_btree_node_dptrs(right, btree); |
343 | rnchildren = nilfs_btree_node_get_nchildren(btree, right); | 307 | rnchildren = nilfs_btree_node_get_nchildren(right); |
344 | 308 | ||
345 | memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); | 309 | memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); |
346 | memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); | 310 | memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); |
@@ -349,8 +313,8 @@ static void nilfs_btree_node_move_right(struct nilfs_btree *btree, | |||
349 | 313 | ||
350 | lnchildren -= n; | 314 | lnchildren -= n; |
351 | rnchildren += n; | 315 | rnchildren += n; |
352 | nilfs_btree_node_set_nchildren(btree, left, lnchildren); | 316 | nilfs_btree_node_set_nchildren(left, lnchildren); |
353 | nilfs_btree_node_set_nchildren(btree, right, rnchildren); | 317 | nilfs_btree_node_set_nchildren(right, rnchildren); |
354 | } | 318 | } |
355 | 319 | ||
356 | /* Assume that the buffer head corresponding to node is locked. */ | 320 | /* Assume that the buffer head corresponding to node is locked. */ |
@@ -362,9 +326,9 @@ static void nilfs_btree_node_insert(struct nilfs_btree *btree, | |||
362 | __le64 *dptrs; | 326 | __le64 *dptrs; |
363 | int nchildren; | 327 | int nchildren; |
364 | 328 | ||
365 | dkeys = nilfs_btree_node_dkeys(btree, node); | 329 | dkeys = nilfs_btree_node_dkeys(node); |
366 | dptrs = nilfs_btree_node_dptrs(btree, node); | 330 | dptrs = nilfs_btree_node_dptrs(node, btree); |
367 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | 331 | nchildren = nilfs_btree_node_get_nchildren(node); |
368 | if (index < nchildren) { | 332 | if (index < nchildren) { |
369 | memmove(dkeys + index + 1, dkeys + index, | 333 | memmove(dkeys + index + 1, dkeys + index, |
370 | (nchildren - index) * sizeof(*dkeys)); | 334 | (nchildren - index) * sizeof(*dkeys)); |
@@ -374,7 +338,7 @@ static void nilfs_btree_node_insert(struct nilfs_btree *btree, | |||
374 | dkeys[index] = nilfs_bmap_key_to_dkey(key); | 338 | dkeys[index] = nilfs_bmap_key_to_dkey(key); |
375 | dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr); | 339 | dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr); |
376 | nchildren++; | 340 | nchildren++; |
377 | nilfs_btree_node_set_nchildren(btree, node, nchildren); | 341 | nilfs_btree_node_set_nchildren(node, nchildren); |
378 | } | 342 | } |
379 | 343 | ||
380 | /* Assume that the buffer head corresponding to node is locked. */ | 344 | /* Assume that the buffer head corresponding to node is locked. */ |
@@ -388,11 +352,11 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree, | |||
388 | __le64 *dptrs; | 352 | __le64 *dptrs; |
389 | int nchildren; | 353 | int nchildren; |
390 | 354 | ||
391 | dkeys = nilfs_btree_node_dkeys(btree, node); | 355 | dkeys = nilfs_btree_node_dkeys(node); |
392 | dptrs = nilfs_btree_node_dptrs(btree, node); | 356 | dptrs = nilfs_btree_node_dptrs(node, btree); |
393 | key = nilfs_bmap_dkey_to_key(dkeys[index]); | 357 | key = nilfs_bmap_dkey_to_key(dkeys[index]); |
394 | ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]); | 358 | ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]); |
395 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | 359 | nchildren = nilfs_btree_node_get_nchildren(node); |
396 | if (keyp != NULL) | 360 | if (keyp != NULL) |
397 | *keyp = key; | 361 | *keyp = key; |
398 | if (ptrp != NULL) | 362 | if (ptrp != NULL) |
@@ -405,11 +369,10 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree, | |||
405 | (nchildren - index - 1) * sizeof(*dptrs)); | 369 | (nchildren - index - 1) * sizeof(*dptrs)); |
406 | } | 370 | } |
407 | nchildren--; | 371 | nchildren--; |
408 | nilfs_btree_node_set_nchildren(btree, node, nchildren); | 372 | nilfs_btree_node_set_nchildren(node, nchildren); |
409 | } | 373 | } |
410 | 374 | ||
411 | static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, | 375 | static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, |
412 | const struct nilfs_btree_node *node, | ||
413 | __u64 key, int *indexp) | 376 | __u64 key, int *indexp) |
414 | { | 377 | { |
415 | __u64 nkey; | 378 | __u64 nkey; |
@@ -417,12 +380,12 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, | |||
417 | 380 | ||
418 | /* binary search */ | 381 | /* binary search */ |
419 | low = 0; | 382 | low = 0; |
420 | high = nilfs_btree_node_get_nchildren(btree, node) - 1; | 383 | high = nilfs_btree_node_get_nchildren(node) - 1; |
421 | index = 0; | 384 | index = 0; |
422 | s = 0; | 385 | s = 0; |
423 | while (low <= high) { | 386 | while (low <= high) { |
424 | index = (low + high) / 2; | 387 | index = (low + high) / 2; |
425 | nkey = nilfs_btree_node_get_key(btree, node, index); | 388 | nkey = nilfs_btree_node_get_key(node, index); |
426 | if (nkey == key) { | 389 | if (nkey == key) { |
427 | s = 0; | 390 | s = 0; |
428 | goto out; | 391 | goto out; |
@@ -436,9 +399,8 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, | |||
436 | } | 399 | } |
437 | 400 | ||
438 | /* adjust index */ | 401 | /* adjust index */ |
439 | if (nilfs_btree_node_get_level(btree, node) > | 402 | if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { |
440 | NILFS_BTREE_LEVEL_NODE_MIN) { | 403 | if (s > 0 && index > 0) |
441 | if ((s > 0) && (index > 0)) | ||
442 | index--; | 404 | index--; |
443 | } else if (s < 0) | 405 | } else if (s < 0) |
444 | index++; | 406 | index++; |
@@ -456,25 +418,20 @@ nilfs_btree_get_root(const struct nilfs_btree *btree) | |||
456 | } | 418 | } |
457 | 419 | ||
458 | static inline struct nilfs_btree_node * | 420 | static inline struct nilfs_btree_node * |
459 | nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree, | 421 | nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) |
460 | const struct nilfs_btree_path *path, | ||
461 | int level) | ||
462 | { | 422 | { |
463 | return (struct nilfs_btree_node *)path[level].bp_bh->b_data; | 423 | return (struct nilfs_btree_node *)path[level].bp_bh->b_data; |
464 | } | 424 | } |
465 | 425 | ||
466 | static inline struct nilfs_btree_node * | 426 | static inline struct nilfs_btree_node * |
467 | nilfs_btree_get_sib_node(const struct nilfs_btree *btree, | 427 | nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) |
468 | const struct nilfs_btree_path *path, | ||
469 | int level) | ||
470 | { | 428 | { |
471 | return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; | 429 | return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; |
472 | } | 430 | } |
473 | 431 | ||
474 | static inline int nilfs_btree_height(const struct nilfs_btree *btree) | 432 | static inline int nilfs_btree_height(const struct nilfs_btree *btree) |
475 | { | 433 | { |
476 | return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree)) | 434 | return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; |
477 | + 1; | ||
478 | } | 435 | } |
479 | 436 | ||
480 | static inline struct nilfs_btree_node * | 437 | static inline struct nilfs_btree_node * |
@@ -484,7 +441,7 @@ nilfs_btree_get_node(const struct nilfs_btree *btree, | |||
484 | { | 441 | { |
485 | return (level == nilfs_btree_height(btree) - 1) ? | 442 | return (level == nilfs_btree_height(btree) - 1) ? |
486 | nilfs_btree_get_root(btree) : | 443 | nilfs_btree_get_root(btree) : |
487 | nilfs_btree_get_nonroot_node(btree, path, level); | 444 | nilfs_btree_get_nonroot_node(path, level); |
488 | } | 445 | } |
489 | 446 | ||
490 | static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, | 447 | static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, |
@@ -496,12 +453,11 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, | |||
496 | int level, index, found, ret; | 453 | int level, index, found, ret; |
497 | 454 | ||
498 | node = nilfs_btree_get_root(btree); | 455 | node = nilfs_btree_get_root(btree); |
499 | level = nilfs_btree_node_get_level(btree, node); | 456 | level = nilfs_btree_node_get_level(node); |
500 | if ((level < minlevel) || | 457 | if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) |
501 | (nilfs_btree_node_get_nchildren(btree, node) <= 0)) | ||
502 | return -ENOENT; | 458 | return -ENOENT; |
503 | 459 | ||
504 | found = nilfs_btree_node_lookup(btree, node, key, &index); | 460 | found = nilfs_btree_node_lookup(node, key, &index); |
505 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | 461 | ptr = nilfs_btree_node_get_ptr(btree, node, index); |
506 | path[level].bp_bh = NULL; | 462 | path[level].bp_bh = NULL; |
507 | path[level].bp_index = index; | 463 | path[level].bp_index = index; |
@@ -510,14 +466,13 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, | |||
510 | ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); | 466 | ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); |
511 | if (ret < 0) | 467 | if (ret < 0) |
512 | return ret; | 468 | return ret; |
513 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 469 | node = nilfs_btree_get_nonroot_node(path, level); |
514 | BUG_ON(level != nilfs_btree_node_get_level(btree, node)); | 470 | BUG_ON(level != nilfs_btree_node_get_level(node)); |
515 | if (!found) | 471 | if (!found) |
516 | found = nilfs_btree_node_lookup(btree, node, key, | 472 | found = nilfs_btree_node_lookup(node, key, &index); |
517 | &index); | ||
518 | else | 473 | else |
519 | index = 0; | 474 | index = 0; |
520 | if (index < nilfs_btree_node_nchildren_max(btree, node)) | 475 | if (index < nilfs_btree_node_nchildren_max(node, btree)) |
521 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | 476 | ptr = nilfs_btree_node_get_ptr(btree, node, index); |
522 | else { | 477 | else { |
523 | WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); | 478 | WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); |
@@ -544,10 +499,10 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, | |||
544 | int index, level, ret; | 499 | int index, level, ret; |
545 | 500 | ||
546 | node = nilfs_btree_get_root(btree); | 501 | node = nilfs_btree_get_root(btree); |
547 | index = nilfs_btree_node_get_nchildren(btree, node) - 1; | 502 | index = nilfs_btree_node_get_nchildren(node) - 1; |
548 | if (index < 0) | 503 | if (index < 0) |
549 | return -ENOENT; | 504 | return -ENOENT; |
550 | level = nilfs_btree_node_get_level(btree, node); | 505 | level = nilfs_btree_node_get_level(node); |
551 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | 506 | ptr = nilfs_btree_node_get_ptr(btree, node, index); |
552 | path[level].bp_bh = NULL; | 507 | path[level].bp_bh = NULL; |
553 | path[level].bp_index = index; | 508 | path[level].bp_index = index; |
@@ -556,15 +511,15 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, | |||
556 | ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); | 511 | ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); |
557 | if (ret < 0) | 512 | if (ret < 0) |
558 | return ret; | 513 | return ret; |
559 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 514 | node = nilfs_btree_get_nonroot_node(path, level); |
560 | BUG_ON(level != nilfs_btree_node_get_level(btree, node)); | 515 | BUG_ON(level != nilfs_btree_node_get_level(node)); |
561 | index = nilfs_btree_node_get_nchildren(btree, node) - 1; | 516 | index = nilfs_btree_node_get_nchildren(node) - 1; |
562 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | 517 | ptr = nilfs_btree_node_get_ptr(btree, node, index); |
563 | path[level].bp_index = index; | 518 | path[level].bp_index = index; |
564 | } | 519 | } |
565 | 520 | ||
566 | if (keyp != NULL) | 521 | if (keyp != NULL) |
567 | *keyp = nilfs_btree_node_get_key(btree, node, index); | 522 | *keyp = nilfs_btree_node_get_key(node, index); |
568 | if (ptrp != NULL) | 523 | if (ptrp != NULL) |
569 | *ptrp = ptr; | 524 | *ptrp = ptr; |
570 | 525 | ||
@@ -580,18 +535,18 @@ static int nilfs_btree_lookup(const struct nilfs_bmap *bmap, | |||
580 | int ret; | 535 | int ret; |
581 | 536 | ||
582 | btree = (struct nilfs_btree *)bmap; | 537 | btree = (struct nilfs_btree *)bmap; |
583 | path = nilfs_btree_alloc_path(btree); | 538 | path = nilfs_btree_alloc_path(); |
584 | if (path == NULL) | 539 | if (path == NULL) |
585 | return -ENOMEM; | 540 | return -ENOMEM; |
586 | nilfs_btree_init_path(btree, path); | 541 | nilfs_btree_init_path(path); |
587 | 542 | ||
588 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); | 543 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); |
589 | 544 | ||
590 | if (ptrp != NULL) | 545 | if (ptrp != NULL) |
591 | *ptrp = ptr; | 546 | *ptrp = ptr; |
592 | 547 | ||
593 | nilfs_btree_clear_path(btree, path); | 548 | nilfs_btree_release_path(path); |
594 | nilfs_btree_free_path(btree, path); | 549 | nilfs_btree_free_path(path); |
595 | 550 | ||
596 | return ret; | 551 | return ret; |
597 | } | 552 | } |
@@ -608,10 +563,10 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, | |||
608 | int level = NILFS_BTREE_LEVEL_NODE_MIN; | 563 | int level = NILFS_BTREE_LEVEL_NODE_MIN; |
609 | int ret, cnt, index, maxlevel; | 564 | int ret, cnt, index, maxlevel; |
610 | 565 | ||
611 | path = nilfs_btree_alloc_path(btree); | 566 | path = nilfs_btree_alloc_path(); |
612 | if (path == NULL) | 567 | if (path == NULL) |
613 | return -ENOMEM; | 568 | return -ENOMEM; |
614 | nilfs_btree_init_path(btree, path); | 569 | nilfs_btree_init_path(path); |
615 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); | 570 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); |
616 | if (ret < 0) | 571 | if (ret < 0) |
617 | goto out; | 572 | goto out; |
@@ -631,8 +586,8 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, | |||
631 | node = nilfs_btree_get_node(btree, path, level); | 586 | node = nilfs_btree_get_node(btree, path, level); |
632 | index = path[level].bp_index + 1; | 587 | index = path[level].bp_index + 1; |
633 | for (;;) { | 588 | for (;;) { |
634 | while (index < nilfs_btree_node_get_nchildren(btree, node)) { | 589 | while (index < nilfs_btree_node_get_nchildren(node)) { |
635 | if (nilfs_btree_node_get_key(btree, node, index) != | 590 | if (nilfs_btree_node_get_key(node, index) != |
636 | key + cnt) | 591 | key + cnt) |
637 | goto end; | 592 | goto end; |
638 | ptr2 = nilfs_btree_node_get_ptr(btree, node, index); | 593 | ptr2 = nilfs_btree_node_get_ptr(btree, node, index); |
@@ -653,8 +608,8 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, | |||
653 | /* look-up right sibling node */ | 608 | /* look-up right sibling node */ |
654 | node = nilfs_btree_get_node(btree, path, level + 1); | 609 | node = nilfs_btree_get_node(btree, path, level + 1); |
655 | index = path[level + 1].bp_index + 1; | 610 | index = path[level + 1].bp_index + 1; |
656 | if (index >= nilfs_btree_node_get_nchildren(btree, node) || | 611 | if (index >= nilfs_btree_node_get_nchildren(node) || |
657 | nilfs_btree_node_get_key(btree, node, index) != key + cnt) | 612 | nilfs_btree_node_get_key(node, index) != key + cnt) |
658 | break; | 613 | break; |
659 | ptr2 = nilfs_btree_node_get_ptr(btree, node, index); | 614 | ptr2 = nilfs_btree_node_get_ptr(btree, node, index); |
660 | path[level + 1].bp_index = index; | 615 | path[level + 1].bp_index = index; |
@@ -664,7 +619,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, | |||
664 | ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh); | 619 | ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh); |
665 | if (ret < 0) | 620 | if (ret < 0) |
666 | goto out; | 621 | goto out; |
667 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 622 | node = nilfs_btree_get_nonroot_node(path, level); |
668 | index = 0; | 623 | index = 0; |
669 | path[level].bp_index = index; | 624 | path[level].bp_index = index; |
670 | } | 625 | } |
@@ -672,8 +627,8 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, | |||
672 | *ptrp = ptr; | 627 | *ptrp = ptr; |
673 | ret = cnt; | 628 | ret = cnt; |
674 | out: | 629 | out: |
675 | nilfs_btree_clear_path(btree, path); | 630 | nilfs_btree_release_path(path); |
676 | nilfs_btree_free_path(btree, path); | 631 | nilfs_btree_free_path(path); |
677 | return ret; | 632 | return ret; |
678 | } | 633 | } |
679 | 634 | ||
@@ -685,9 +640,7 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree, | |||
685 | do { | 640 | do { |
686 | lock_buffer(path[level].bp_bh); | 641 | lock_buffer(path[level].bp_bh); |
687 | nilfs_btree_node_set_key( | 642 | nilfs_btree_node_set_key( |
688 | btree, | 643 | nilfs_btree_get_nonroot_node(path, level), |
689 | nilfs_btree_get_nonroot_node( | ||
690 | btree, path, level), | ||
691 | path[level].bp_index, key); | 644 | path[level].bp_index, key); |
692 | if (!buffer_dirty(path[level].bp_bh)) | 645 | if (!buffer_dirty(path[level].bp_bh)) |
693 | nilfs_btnode_mark_dirty(path[level].bp_bh); | 646 | nilfs_btnode_mark_dirty(path[level].bp_bh); |
@@ -698,8 +651,7 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree, | |||
698 | 651 | ||
699 | /* root */ | 652 | /* root */ |
700 | if (level == nilfs_btree_height(btree) - 1) { | 653 | if (level == nilfs_btree_height(btree) - 1) { |
701 | nilfs_btree_node_set_key(btree, | 654 | nilfs_btree_node_set_key(nilfs_btree_get_root(btree), |
702 | nilfs_btree_get_root(btree), | ||
703 | path[level].bp_index, key); | 655 | path[level].bp_index, key); |
704 | } | 656 | } |
705 | } | 657 | } |
@@ -712,7 +664,7 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree, | |||
712 | 664 | ||
713 | if (level < nilfs_btree_height(btree) - 1) { | 665 | if (level < nilfs_btree_height(btree) - 1) { |
714 | lock_buffer(path[level].bp_bh); | 666 | lock_buffer(path[level].bp_bh); |
715 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 667 | node = nilfs_btree_get_nonroot_node(path, level); |
716 | nilfs_btree_node_insert(btree, node, *keyp, *ptrp, | 668 | nilfs_btree_node_insert(btree, node, *keyp, *ptrp, |
717 | path[level].bp_index); | 669 | path[level].bp_index); |
718 | if (!buffer_dirty(path[level].bp_bh)) | 670 | if (!buffer_dirty(path[level].bp_bh)) |
@@ -721,8 +673,8 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree, | |||
721 | 673 | ||
722 | if (path[level].bp_index == 0) | 674 | if (path[level].bp_index == 0) |
723 | nilfs_btree_promote_key(btree, path, level + 1, | 675 | nilfs_btree_promote_key(btree, path, level + 1, |
724 | nilfs_btree_node_get_key( | 676 | nilfs_btree_node_get_key(node, |
725 | btree, node, 0)); | 677 | 0)); |
726 | } else { | 678 | } else { |
727 | node = nilfs_btree_get_root(btree); | 679 | node = nilfs_btree_get_root(btree); |
728 | nilfs_btree_node_insert(btree, node, *keyp, *ptrp, | 680 | nilfs_btree_node_insert(btree, node, *keyp, *ptrp, |
@@ -740,10 +692,10 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, | |||
740 | lock_buffer(path[level].bp_bh); | 692 | lock_buffer(path[level].bp_bh); |
741 | lock_buffer(path[level].bp_sib_bh); | 693 | lock_buffer(path[level].bp_sib_bh); |
742 | 694 | ||
743 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 695 | node = nilfs_btree_get_nonroot_node(path, level); |
744 | left = nilfs_btree_get_sib_node(btree, path, level); | 696 | left = nilfs_btree_get_sib_node(path, level); |
745 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | 697 | nchildren = nilfs_btree_node_get_nchildren(node); |
746 | lnchildren = nilfs_btree_node_get_nchildren(btree, left); | 698 | lnchildren = nilfs_btree_node_get_nchildren(left); |
747 | move = 0; | 699 | move = 0; |
748 | 700 | ||
749 | n = (nchildren + lnchildren + 1) / 2 - lnchildren; | 701 | n = (nchildren + lnchildren + 1) / 2 - lnchildren; |
@@ -764,7 +716,7 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, | |||
764 | unlock_buffer(path[level].bp_sib_bh); | 716 | unlock_buffer(path[level].bp_sib_bh); |
765 | 717 | ||
766 | nilfs_btree_promote_key(btree, path, level + 1, | 718 | nilfs_btree_promote_key(btree, path, level + 1, |
767 | nilfs_btree_node_get_key(btree, node, 0)); | 719 | nilfs_btree_node_get_key(node, 0)); |
768 | 720 | ||
769 | if (move) { | 721 | if (move) { |
770 | brelse(path[level].bp_bh); | 722 | brelse(path[level].bp_bh); |
@@ -791,10 +743,10 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree, | |||
791 | lock_buffer(path[level].bp_bh); | 743 | lock_buffer(path[level].bp_bh); |
792 | lock_buffer(path[level].bp_sib_bh); | 744 | lock_buffer(path[level].bp_sib_bh); |
793 | 745 | ||
794 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 746 | node = nilfs_btree_get_nonroot_node(path, level); |
795 | right = nilfs_btree_get_sib_node(btree, path, level); | 747 | right = nilfs_btree_get_sib_node(path, level); |
796 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | 748 | nchildren = nilfs_btree_node_get_nchildren(node); |
797 | rnchildren = nilfs_btree_node_get_nchildren(btree, right); | 749 | rnchildren = nilfs_btree_node_get_nchildren(right); |
798 | move = 0; | 750 | move = 0; |
799 | 751 | ||
800 | n = (nchildren + rnchildren + 1) / 2 - rnchildren; | 752 | n = (nchildren + rnchildren + 1) / 2 - rnchildren; |
@@ -816,15 +768,14 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree, | |||
816 | 768 | ||
817 | path[level + 1].bp_index++; | 769 | path[level + 1].bp_index++; |
818 | nilfs_btree_promote_key(btree, path, level + 1, | 770 | nilfs_btree_promote_key(btree, path, level + 1, |
819 | nilfs_btree_node_get_key(btree, right, 0)); | 771 | nilfs_btree_node_get_key(right, 0)); |
820 | path[level + 1].bp_index--; | 772 | path[level + 1].bp_index--; |
821 | 773 | ||
822 | if (move) { | 774 | if (move) { |
823 | brelse(path[level].bp_bh); | 775 | brelse(path[level].bp_bh); |
824 | path[level].bp_bh = path[level].bp_sib_bh; | 776 | path[level].bp_bh = path[level].bp_sib_bh; |
825 | path[level].bp_sib_bh = NULL; | 777 | path[level].bp_sib_bh = NULL; |
826 | path[level].bp_index -= | 778 | path[level].bp_index -= nilfs_btree_node_get_nchildren(node); |
827 | nilfs_btree_node_get_nchildren(btree, node); | ||
828 | path[level + 1].bp_index++; | 779 | path[level + 1].bp_index++; |
829 | } else { | 780 | } else { |
830 | brelse(path[level].bp_sib_bh); | 781 | brelse(path[level].bp_sib_bh); |
@@ -846,9 +797,9 @@ static void nilfs_btree_split(struct nilfs_btree *btree, | |||
846 | lock_buffer(path[level].bp_bh); | 797 | lock_buffer(path[level].bp_bh); |
847 | lock_buffer(path[level].bp_sib_bh); | 798 | lock_buffer(path[level].bp_sib_bh); |
848 | 799 | ||
849 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 800 | node = nilfs_btree_get_nonroot_node(path, level); |
850 | right = nilfs_btree_get_sib_node(btree, path, level); | 801 | right = nilfs_btree_get_sib_node(path, level); |
851 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | 802 | nchildren = nilfs_btree_node_get_nchildren(node); |
852 | move = 0; | 803 | move = 0; |
853 | 804 | ||
854 | n = (nchildren + 1) / 2; | 805 | n = (nchildren + 1) / 2; |
@@ -867,16 +818,15 @@ static void nilfs_btree_split(struct nilfs_btree *btree, | |||
867 | unlock_buffer(path[level].bp_bh); | 818 | unlock_buffer(path[level].bp_bh); |
868 | unlock_buffer(path[level].bp_sib_bh); | 819 | unlock_buffer(path[level].bp_sib_bh); |
869 | 820 | ||
870 | newkey = nilfs_btree_node_get_key(btree, right, 0); | 821 | newkey = nilfs_btree_node_get_key(right, 0); |
871 | newptr = path[level].bp_newreq.bpr_ptr; | 822 | newptr = path[level].bp_newreq.bpr_ptr; |
872 | 823 | ||
873 | if (move) { | 824 | if (move) { |
874 | path[level].bp_index -= | 825 | path[level].bp_index -= nilfs_btree_node_get_nchildren(node); |
875 | nilfs_btree_node_get_nchildren(btree, node); | ||
876 | nilfs_btree_node_insert(btree, right, *keyp, *ptrp, | 826 | nilfs_btree_node_insert(btree, right, *keyp, *ptrp, |
877 | path[level].bp_index); | 827 | path[level].bp_index); |
878 | 828 | ||
879 | *keyp = nilfs_btree_node_get_key(btree, right, 0); | 829 | *keyp = nilfs_btree_node_get_key(right, 0); |
880 | *ptrp = path[level].bp_newreq.bpr_ptr; | 830 | *ptrp = path[level].bp_newreq.bpr_ptr; |
881 | 831 | ||
882 | brelse(path[level].bp_bh); | 832 | brelse(path[level].bp_bh); |
@@ -885,7 +835,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree, | |||
885 | } else { | 835 | } else { |
886 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); | 836 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); |
887 | 837 | ||
888 | *keyp = nilfs_btree_node_get_key(btree, right, 0); | 838 | *keyp = nilfs_btree_node_get_key(right, 0); |
889 | *ptrp = path[level].bp_newreq.bpr_ptr; | 839 | *ptrp = path[level].bp_newreq.bpr_ptr; |
890 | 840 | ||
891 | brelse(path[level].bp_sib_bh); | 841 | brelse(path[level].bp_sib_bh); |
@@ -905,12 +855,12 @@ static void nilfs_btree_grow(struct nilfs_btree *btree, | |||
905 | lock_buffer(path[level].bp_sib_bh); | 855 | lock_buffer(path[level].bp_sib_bh); |
906 | 856 | ||
907 | root = nilfs_btree_get_root(btree); | 857 | root = nilfs_btree_get_root(btree); |
908 | child = nilfs_btree_get_sib_node(btree, path, level); | 858 | child = nilfs_btree_get_sib_node(path, level); |
909 | 859 | ||
910 | n = nilfs_btree_node_get_nchildren(btree, root); | 860 | n = nilfs_btree_node_get_nchildren(root); |
911 | 861 | ||
912 | nilfs_btree_node_move_right(btree, root, child, n); | 862 | nilfs_btree_node_move_right(btree, root, child, n); |
913 | nilfs_btree_node_set_level(btree, root, level + 1); | 863 | nilfs_btree_node_set_level(root, level + 1); |
914 | 864 | ||
915 | if (!buffer_dirty(path[level].bp_sib_bh)) | 865 | if (!buffer_dirty(path[level].bp_sib_bh)) |
916 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | 866 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); |
@@ -922,7 +872,7 @@ static void nilfs_btree_grow(struct nilfs_btree *btree, | |||
922 | 872 | ||
923 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); | 873 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); |
924 | 874 | ||
925 | *keyp = nilfs_btree_node_get_key(btree, child, 0); | 875 | *keyp = nilfs_btree_node_get_key(child, 0); |
926 | *ptrp = path[level].bp_newreq.bpr_ptr; | 876 | *ptrp = path[level].bp_newreq.bpr_ptr; |
927 | } | 877 | } |
928 | 878 | ||
@@ -990,26 +940,29 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | |||
990 | struct nilfs_btree_node *node, *parent, *sib; | 940 | struct nilfs_btree_node *node, *parent, *sib; |
991 | __u64 sibptr; | 941 | __u64 sibptr; |
992 | int pindex, level, ret; | 942 | int pindex, level, ret; |
943 | struct inode *dat = NULL; | ||
993 | 944 | ||
994 | stats->bs_nblocks = 0; | 945 | stats->bs_nblocks = 0; |
995 | level = NILFS_BTREE_LEVEL_DATA; | 946 | level = NILFS_BTREE_LEVEL_DATA; |
996 | 947 | ||
997 | /* allocate a new ptr for data block */ | 948 | /* allocate a new ptr for data block */ |
998 | if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) | 949 | if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) { |
999 | path[level].bp_newreq.bpr_ptr = | 950 | path[level].bp_newreq.bpr_ptr = |
1000 | nilfs_btree_find_target_v(btree, path, key); | 951 | nilfs_btree_find_target_v(btree, path, key); |
952 | dat = nilfs_bmap_get_dat(&btree->bt_bmap); | ||
953 | } | ||
1001 | 954 | ||
1002 | ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, | 955 | ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, |
1003 | &path[level].bp_newreq); | 956 | &path[level].bp_newreq, dat); |
1004 | if (ret < 0) | 957 | if (ret < 0) |
1005 | goto err_out_data; | 958 | goto err_out_data; |
1006 | 959 | ||
1007 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; | 960 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; |
1008 | level < nilfs_btree_height(btree) - 1; | 961 | level < nilfs_btree_height(btree) - 1; |
1009 | level++) { | 962 | level++) { |
1010 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 963 | node = nilfs_btree_get_nonroot_node(path, level); |
1011 | if (nilfs_btree_node_get_nchildren(btree, node) < | 964 | if (nilfs_btree_node_get_nchildren(node) < |
1012 | nilfs_btree_node_nchildren_max(btree, node)) { | 965 | nilfs_btree_node_nchildren_max(node, btree)) { |
1013 | path[level].bp_op = nilfs_btree_do_insert; | 966 | path[level].bp_op = nilfs_btree_do_insert; |
1014 | stats->bs_nblocks++; | 967 | stats->bs_nblocks++; |
1015 | goto out; | 968 | goto out; |
@@ -1026,8 +979,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | |||
1026 | if (ret < 0) | 979 | if (ret < 0) |
1027 | goto err_out_child_node; | 980 | goto err_out_child_node; |
1028 | sib = (struct nilfs_btree_node *)bh->b_data; | 981 | sib = (struct nilfs_btree_node *)bh->b_data; |
1029 | if (nilfs_btree_node_get_nchildren(btree, sib) < | 982 | if (nilfs_btree_node_get_nchildren(sib) < |
1030 | nilfs_btree_node_nchildren_max(btree, sib)) { | 983 | nilfs_btree_node_nchildren_max(sib, btree)) { |
1031 | path[level].bp_sib_bh = bh; | 984 | path[level].bp_sib_bh = bh; |
1032 | path[level].bp_op = nilfs_btree_carry_left; | 985 | path[level].bp_op = nilfs_btree_carry_left; |
1033 | stats->bs_nblocks++; | 986 | stats->bs_nblocks++; |
@@ -1038,15 +991,15 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | |||
1038 | 991 | ||
1039 | /* right sibling */ | 992 | /* right sibling */ |
1040 | if (pindex < | 993 | if (pindex < |
1041 | nilfs_btree_node_get_nchildren(btree, parent) - 1) { | 994 | nilfs_btree_node_get_nchildren(parent) - 1) { |
1042 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | 995 | sibptr = nilfs_btree_node_get_ptr(btree, parent, |
1043 | pindex + 1); | 996 | pindex + 1); |
1044 | ret = nilfs_btree_get_block(btree, sibptr, &bh); | 997 | ret = nilfs_btree_get_block(btree, sibptr, &bh); |
1045 | if (ret < 0) | 998 | if (ret < 0) |
1046 | goto err_out_child_node; | 999 | goto err_out_child_node; |
1047 | sib = (struct nilfs_btree_node *)bh->b_data; | 1000 | sib = (struct nilfs_btree_node *)bh->b_data; |
1048 | if (nilfs_btree_node_get_nchildren(btree, sib) < | 1001 | if (nilfs_btree_node_get_nchildren(sib) < |
1049 | nilfs_btree_node_nchildren_max(btree, sib)) { | 1002 | nilfs_btree_node_nchildren_max(sib, btree)) { |
1050 | path[level].bp_sib_bh = bh; | 1003 | path[level].bp_sib_bh = bh; |
1051 | path[level].bp_op = nilfs_btree_carry_right; | 1004 | path[level].bp_op = nilfs_btree_carry_right; |
1052 | stats->bs_nblocks++; | 1005 | stats->bs_nblocks++; |
@@ -1059,7 +1012,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | |||
1059 | path[level].bp_newreq.bpr_ptr = | 1012 | path[level].bp_newreq.bpr_ptr = |
1060 | path[level - 1].bp_newreq.bpr_ptr + 1; | 1013 | path[level - 1].bp_newreq.bpr_ptr + 1; |
1061 | ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, | 1014 | ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, |
1062 | &path[level].bp_newreq); | 1015 | &path[level].bp_newreq, dat); |
1063 | if (ret < 0) | 1016 | if (ret < 0) |
1064 | goto err_out_child_node; | 1017 | goto err_out_child_node; |
1065 | ret = nilfs_btree_get_new_block(btree, | 1018 | ret = nilfs_btree_get_new_block(btree, |
@@ -1081,8 +1034,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | |||
1081 | 1034 | ||
1082 | /* root */ | 1035 | /* root */ |
1083 | node = nilfs_btree_get_root(btree); | 1036 | node = nilfs_btree_get_root(btree); |
1084 | if (nilfs_btree_node_get_nchildren(btree, node) < | 1037 | if (nilfs_btree_node_get_nchildren(node) < |
1085 | nilfs_btree_node_nchildren_max(btree, node)) { | 1038 | nilfs_btree_node_nchildren_max(node, btree)) { |
1086 | path[level].bp_op = nilfs_btree_do_insert; | 1039 | path[level].bp_op = nilfs_btree_do_insert; |
1087 | stats->bs_nblocks++; | 1040 | stats->bs_nblocks++; |
1088 | goto out; | 1041 | goto out; |
@@ -1091,7 +1044,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | |||
1091 | /* grow */ | 1044 | /* grow */ |
1092 | path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; | 1045 | path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; |
1093 | ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, | 1046 | ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, |
1094 | &path[level].bp_newreq); | 1047 | &path[level].bp_newreq, dat); |
1095 | if (ret < 0) | 1048 | if (ret < 0) |
1096 | goto err_out_child_node; | 1049 | goto err_out_child_node; |
1097 | ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, | 1050 | ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, |
@@ -1119,16 +1072,18 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | |||
1119 | 1072 | ||
1120 | /* error */ | 1073 | /* error */ |
1121 | err_out_curr_node: | 1074 | err_out_curr_node: |
1122 | nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq); | 1075 | nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq, |
1076 | dat); | ||
1123 | err_out_child_node: | 1077 | err_out_child_node: |
1124 | for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { | 1078 | for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { |
1125 | nilfs_btnode_delete(path[level].bp_sib_bh); | 1079 | nilfs_btnode_delete(path[level].bp_sib_bh); |
1126 | nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, | 1080 | nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, |
1127 | &path[level].bp_newreq); | 1081 | &path[level].bp_newreq, dat); |
1128 | 1082 | ||
1129 | } | 1083 | } |
1130 | 1084 | ||
1131 | nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq); | 1085 | nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq, |
1086 | dat); | ||
1132 | err_out_data: | 1087 | err_out_data: |
1133 | *levelp = level; | 1088 | *levelp = level; |
1134 | stats->bs_nblocks = 0; | 1089 | stats->bs_nblocks = 0; |
@@ -1139,16 +1094,19 @@ static void nilfs_btree_commit_insert(struct nilfs_btree *btree, | |||
1139 | struct nilfs_btree_path *path, | 1094 | struct nilfs_btree_path *path, |
1140 | int maxlevel, __u64 key, __u64 ptr) | 1095 | int maxlevel, __u64 key, __u64 ptr) |
1141 | { | 1096 | { |
1097 | struct inode *dat = NULL; | ||
1142 | int level; | 1098 | int level; |
1143 | 1099 | ||
1144 | set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); | 1100 | set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); |
1145 | ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; | 1101 | ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; |
1146 | if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) | 1102 | if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) { |
1147 | nilfs_btree_set_target_v(btree, key, ptr); | 1103 | nilfs_btree_set_target_v(btree, key, ptr); |
1104 | dat = nilfs_bmap_get_dat(&btree->bt_bmap); | ||
1105 | } | ||
1148 | 1106 | ||
1149 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { | 1107 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { |
1150 | nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap, | 1108 | nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap, |
1151 | &path[level - 1].bp_newreq); | 1109 | &path[level - 1].bp_newreq, dat); |
1152 | path[level].bp_op(btree, path, level, &key, &ptr); | 1110 | path[level].bp_op(btree, path, level, &key, &ptr); |
1153 | } | 1111 | } |
1154 | 1112 | ||
@@ -1164,10 +1122,10 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | |||
1164 | int level, ret; | 1122 | int level, ret; |
1165 | 1123 | ||
1166 | btree = (struct nilfs_btree *)bmap; | 1124 | btree = (struct nilfs_btree *)bmap; |
1167 | path = nilfs_btree_alloc_path(btree); | 1125 | path = nilfs_btree_alloc_path(); |
1168 | if (path == NULL) | 1126 | if (path == NULL) |
1169 | return -ENOMEM; | 1127 | return -ENOMEM; |
1170 | nilfs_btree_init_path(btree, path); | 1128 | nilfs_btree_init_path(path); |
1171 | 1129 | ||
1172 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, | 1130 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, |
1173 | NILFS_BTREE_LEVEL_NODE_MIN); | 1131 | NILFS_BTREE_LEVEL_NODE_MIN); |
@@ -1184,8 +1142,8 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | |||
1184 | nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); | 1142 | nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); |
1185 | 1143 | ||
1186 | out: | 1144 | out: |
1187 | nilfs_btree_clear_path(btree, path); | 1145 | nilfs_btree_release_path(path); |
1188 | nilfs_btree_free_path(btree, path); | 1146 | nilfs_btree_free_path(path); |
1189 | return ret; | 1147 | return ret; |
1190 | } | 1148 | } |
1191 | 1149 | ||
@@ -1197,7 +1155,7 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree, | |||
1197 | 1155 | ||
1198 | if (level < nilfs_btree_height(btree) - 1) { | 1156 | if (level < nilfs_btree_height(btree) - 1) { |
1199 | lock_buffer(path[level].bp_bh); | 1157 | lock_buffer(path[level].bp_bh); |
1200 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 1158 | node = nilfs_btree_get_nonroot_node(path, level); |
1201 | nilfs_btree_node_delete(btree, node, keyp, ptrp, | 1159 | nilfs_btree_node_delete(btree, node, keyp, ptrp, |
1202 | path[level].bp_index); | 1160 | path[level].bp_index); |
1203 | if (!buffer_dirty(path[level].bp_bh)) | 1161 | if (!buffer_dirty(path[level].bp_bh)) |
@@ -1205,7 +1163,7 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree, | |||
1205 | unlock_buffer(path[level].bp_bh); | 1163 | unlock_buffer(path[level].bp_bh); |
1206 | if (path[level].bp_index == 0) | 1164 | if (path[level].bp_index == 0) |
1207 | nilfs_btree_promote_key(btree, path, level + 1, | 1165 | nilfs_btree_promote_key(btree, path, level + 1, |
1208 | nilfs_btree_node_get_key(btree, node, 0)); | 1166 | nilfs_btree_node_get_key(node, 0)); |
1209 | } else { | 1167 | } else { |
1210 | node = nilfs_btree_get_root(btree); | 1168 | node = nilfs_btree_get_root(btree); |
1211 | nilfs_btree_node_delete(btree, node, keyp, ptrp, | 1169 | nilfs_btree_node_delete(btree, node, keyp, ptrp, |
@@ -1225,10 +1183,10 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree, | |||
1225 | lock_buffer(path[level].bp_bh); | 1183 | lock_buffer(path[level].bp_bh); |
1226 | lock_buffer(path[level].bp_sib_bh); | 1184 | lock_buffer(path[level].bp_sib_bh); |
1227 | 1185 | ||
1228 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 1186 | node = nilfs_btree_get_nonroot_node(path, level); |
1229 | left = nilfs_btree_get_sib_node(btree, path, level); | 1187 | left = nilfs_btree_get_sib_node(path, level); |
1230 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | 1188 | nchildren = nilfs_btree_node_get_nchildren(node); |
1231 | lnchildren = nilfs_btree_node_get_nchildren(btree, left); | 1189 | lnchildren = nilfs_btree_node_get_nchildren(left); |
1232 | 1190 | ||
1233 | n = (nchildren + lnchildren) / 2 - nchildren; | 1191 | n = (nchildren + lnchildren) / 2 - nchildren; |
1234 | 1192 | ||
@@ -1243,7 +1201,7 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree, | |||
1243 | unlock_buffer(path[level].bp_sib_bh); | 1201 | unlock_buffer(path[level].bp_sib_bh); |
1244 | 1202 | ||
1245 | nilfs_btree_promote_key(btree, path, level + 1, | 1203 | nilfs_btree_promote_key(btree, path, level + 1, |
1246 | nilfs_btree_node_get_key(btree, node, 0)); | 1204 | nilfs_btree_node_get_key(node, 0)); |
1247 | 1205 | ||
1248 | brelse(path[level].bp_sib_bh); | 1206 | brelse(path[level].bp_sib_bh); |
1249 | path[level].bp_sib_bh = NULL; | 1207 | path[level].bp_sib_bh = NULL; |
@@ -1262,10 +1220,10 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree, | |||
1262 | lock_buffer(path[level].bp_bh); | 1220 | lock_buffer(path[level].bp_bh); |
1263 | lock_buffer(path[level].bp_sib_bh); | 1221 | lock_buffer(path[level].bp_sib_bh); |
1264 | 1222 | ||
1265 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 1223 | node = nilfs_btree_get_nonroot_node(path, level); |
1266 | right = nilfs_btree_get_sib_node(btree, path, level); | 1224 | right = nilfs_btree_get_sib_node(path, level); |
1267 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | 1225 | nchildren = nilfs_btree_node_get_nchildren(node); |
1268 | rnchildren = nilfs_btree_node_get_nchildren(btree, right); | 1226 | rnchildren = nilfs_btree_node_get_nchildren(right); |
1269 | 1227 | ||
1270 | n = (nchildren + rnchildren) / 2 - nchildren; | 1228 | n = (nchildren + rnchildren) / 2 - nchildren; |
1271 | 1229 | ||
@@ -1281,7 +1239,7 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree, | |||
1281 | 1239 | ||
1282 | path[level + 1].bp_index++; | 1240 | path[level + 1].bp_index++; |
1283 | nilfs_btree_promote_key(btree, path, level + 1, | 1241 | nilfs_btree_promote_key(btree, path, level + 1, |
1284 | nilfs_btree_node_get_key(btree, right, 0)); | 1242 | nilfs_btree_node_get_key(right, 0)); |
1285 | path[level + 1].bp_index--; | 1243 | path[level + 1].bp_index--; |
1286 | 1244 | ||
1287 | brelse(path[level].bp_sib_bh); | 1245 | brelse(path[level].bp_sib_bh); |
@@ -1300,10 +1258,10 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree, | |||
1300 | lock_buffer(path[level].bp_bh); | 1258 | lock_buffer(path[level].bp_bh); |
1301 | lock_buffer(path[level].bp_sib_bh); | 1259 | lock_buffer(path[level].bp_sib_bh); |
1302 | 1260 | ||
1303 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 1261 | node = nilfs_btree_get_nonroot_node(path, level); |
1304 | left = nilfs_btree_get_sib_node(btree, path, level); | 1262 | left = nilfs_btree_get_sib_node(path, level); |
1305 | 1263 | ||
1306 | n = nilfs_btree_node_get_nchildren(btree, node); | 1264 | n = nilfs_btree_node_get_nchildren(node); |
1307 | 1265 | ||
1308 | nilfs_btree_node_move_left(btree, left, node, n); | 1266 | nilfs_btree_node_move_left(btree, left, node, n); |
1309 | 1267 | ||
@@ -1316,7 +1274,7 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree, | |||
1316 | nilfs_btnode_delete(path[level].bp_bh); | 1274 | nilfs_btnode_delete(path[level].bp_bh); |
1317 | path[level].bp_bh = path[level].bp_sib_bh; | 1275 | path[level].bp_bh = path[level].bp_sib_bh; |
1318 | path[level].bp_sib_bh = NULL; | 1276 | path[level].bp_sib_bh = NULL; |
1319 | path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left); | 1277 | path[level].bp_index += nilfs_btree_node_get_nchildren(left); |
1320 | } | 1278 | } |
1321 | 1279 | ||
1322 | static void nilfs_btree_concat_right(struct nilfs_btree *btree, | 1280 | static void nilfs_btree_concat_right(struct nilfs_btree *btree, |
@@ -1331,10 +1289,10 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree, | |||
1331 | lock_buffer(path[level].bp_bh); | 1289 | lock_buffer(path[level].bp_bh); |
1332 | lock_buffer(path[level].bp_sib_bh); | 1290 | lock_buffer(path[level].bp_sib_bh); |
1333 | 1291 | ||
1334 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 1292 | node = nilfs_btree_get_nonroot_node(path, level); |
1335 | right = nilfs_btree_get_sib_node(btree, path, level); | 1293 | right = nilfs_btree_get_sib_node(path, level); |
1336 | 1294 | ||
1337 | n = nilfs_btree_node_get_nchildren(btree, right); | 1295 | n = nilfs_btree_node_get_nchildren(right); |
1338 | 1296 | ||
1339 | nilfs_btree_node_move_left(btree, node, right, n); | 1297 | nilfs_btree_node_move_left(btree, node, right, n); |
1340 | 1298 | ||
@@ -1360,11 +1318,11 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree, | |||
1360 | 1318 | ||
1361 | lock_buffer(path[level].bp_bh); | 1319 | lock_buffer(path[level].bp_bh); |
1362 | root = nilfs_btree_get_root(btree); | 1320 | root = nilfs_btree_get_root(btree); |
1363 | child = nilfs_btree_get_nonroot_node(btree, path, level); | 1321 | child = nilfs_btree_get_nonroot_node(path, level); |
1364 | 1322 | ||
1365 | nilfs_btree_node_delete(btree, root, NULL, NULL, 0); | 1323 | nilfs_btree_node_delete(btree, root, NULL, NULL, 0); |
1366 | nilfs_btree_node_set_level(btree, root, level); | 1324 | nilfs_btree_node_set_level(root, level); |
1367 | n = nilfs_btree_node_get_nchildren(btree, child); | 1325 | n = nilfs_btree_node_get_nchildren(child); |
1368 | nilfs_btree_node_move_left(btree, root, child, n); | 1326 | nilfs_btree_node_move_left(btree, root, child, n); |
1369 | unlock_buffer(path[level].bp_bh); | 1327 | unlock_buffer(path[level].bp_bh); |
1370 | 1328 | ||
@@ -1376,7 +1334,8 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree, | |||
1376 | static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | 1334 | static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, |
1377 | struct nilfs_btree_path *path, | 1335 | struct nilfs_btree_path *path, |
1378 | int *levelp, | 1336 | int *levelp, |
1379 | struct nilfs_bmap_stats *stats) | 1337 | struct nilfs_bmap_stats *stats, |
1338 | struct inode *dat) | ||
1380 | { | 1339 | { |
1381 | struct buffer_head *bh; | 1340 | struct buffer_head *bh; |
1382 | struct nilfs_btree_node *node, *parent, *sib; | 1341 | struct nilfs_btree_node *node, *parent, *sib; |
@@ -1388,17 +1347,17 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | |||
1388 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; | 1347 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; |
1389 | level < nilfs_btree_height(btree) - 1; | 1348 | level < nilfs_btree_height(btree) - 1; |
1390 | level++) { | 1349 | level++) { |
1391 | node = nilfs_btree_get_nonroot_node(btree, path, level); | 1350 | node = nilfs_btree_get_nonroot_node(path, level); |
1392 | path[level].bp_oldreq.bpr_ptr = | 1351 | path[level].bp_oldreq.bpr_ptr = |
1393 | nilfs_btree_node_get_ptr(btree, node, | 1352 | nilfs_btree_node_get_ptr(btree, node, |
1394 | path[level].bp_index); | 1353 | path[level].bp_index); |
1395 | ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, | 1354 | ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, |
1396 | &path[level].bp_oldreq); | 1355 | &path[level].bp_oldreq, dat); |
1397 | if (ret < 0) | 1356 | if (ret < 0) |
1398 | goto err_out_child_node; | 1357 | goto err_out_child_node; |
1399 | 1358 | ||
1400 | if (nilfs_btree_node_get_nchildren(btree, node) > | 1359 | if (nilfs_btree_node_get_nchildren(node) > |
1401 | nilfs_btree_node_nchildren_min(btree, node)) { | 1360 | nilfs_btree_node_nchildren_min(node, btree)) { |
1402 | path[level].bp_op = nilfs_btree_do_delete; | 1361 | path[level].bp_op = nilfs_btree_do_delete; |
1403 | stats->bs_nblocks++; | 1362 | stats->bs_nblocks++; |
1404 | goto out; | 1363 | goto out; |
@@ -1415,8 +1374,8 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | |||
1415 | if (ret < 0) | 1374 | if (ret < 0) |
1416 | goto err_out_curr_node; | 1375 | goto err_out_curr_node; |
1417 | sib = (struct nilfs_btree_node *)bh->b_data; | 1376 | sib = (struct nilfs_btree_node *)bh->b_data; |
1418 | if (nilfs_btree_node_get_nchildren(btree, sib) > | 1377 | if (nilfs_btree_node_get_nchildren(sib) > |
1419 | nilfs_btree_node_nchildren_min(btree, sib)) { | 1378 | nilfs_btree_node_nchildren_min(sib, btree)) { |
1420 | path[level].bp_sib_bh = bh; | 1379 | path[level].bp_sib_bh = bh; |
1421 | path[level].bp_op = nilfs_btree_borrow_left; | 1380 | path[level].bp_op = nilfs_btree_borrow_left; |
1422 | stats->bs_nblocks++; | 1381 | stats->bs_nblocks++; |
@@ -1428,7 +1387,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | |||
1428 | /* continue; */ | 1387 | /* continue; */ |
1429 | } | 1388 | } |
1430 | } else if (pindex < | 1389 | } else if (pindex < |
1431 | nilfs_btree_node_get_nchildren(btree, parent) - 1) { | 1390 | nilfs_btree_node_get_nchildren(parent) - 1) { |
1432 | /* right sibling */ | 1391 | /* right sibling */ |
1433 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | 1392 | sibptr = nilfs_btree_node_get_ptr(btree, parent, |
1434 | pindex + 1); | 1393 | pindex + 1); |
@@ -1436,8 +1395,8 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | |||
1436 | if (ret < 0) | 1395 | if (ret < 0) |
1437 | goto err_out_curr_node; | 1396 | goto err_out_curr_node; |
1438 | sib = (struct nilfs_btree_node *)bh->b_data; | 1397 | sib = (struct nilfs_btree_node *)bh->b_data; |
1439 | if (nilfs_btree_node_get_nchildren(btree, sib) > | 1398 | if (nilfs_btree_node_get_nchildren(sib) > |
1440 | nilfs_btree_node_nchildren_min(btree, sib)) { | 1399 | nilfs_btree_node_nchildren_min(sib, btree)) { |
1441 | path[level].bp_sib_bh = bh; | 1400 | path[level].bp_sib_bh = bh; |
1442 | path[level].bp_op = nilfs_btree_borrow_right; | 1401 | path[level].bp_op = nilfs_btree_borrow_right; |
1443 | stats->bs_nblocks++; | 1402 | stats->bs_nblocks++; |
@@ -1452,7 +1411,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | |||
1452 | /* no siblings */ | 1411 | /* no siblings */ |
1453 | /* the only child of the root node */ | 1412 | /* the only child of the root node */ |
1454 | WARN_ON(level != nilfs_btree_height(btree) - 2); | 1413 | WARN_ON(level != nilfs_btree_height(btree) - 2); |
1455 | if (nilfs_btree_node_get_nchildren(btree, node) - 1 <= | 1414 | if (nilfs_btree_node_get_nchildren(node) - 1 <= |
1456 | NILFS_BTREE_ROOT_NCHILDREN_MAX) { | 1415 | NILFS_BTREE_ROOT_NCHILDREN_MAX) { |
1457 | path[level].bp_op = nilfs_btree_shrink; | 1416 | path[level].bp_op = nilfs_btree_shrink; |
1458 | stats->bs_nblocks += 2; | 1417 | stats->bs_nblocks += 2; |
@@ -1471,7 +1430,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | |||
1471 | nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); | 1430 | nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); |
1472 | 1431 | ||
1473 | ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, | 1432 | ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, |
1474 | &path[level].bp_oldreq); | 1433 | &path[level].bp_oldreq, dat); |
1475 | if (ret < 0) | 1434 | if (ret < 0) |
1476 | goto err_out_child_node; | 1435 | goto err_out_child_node; |
1477 | 1436 | ||
@@ -1486,12 +1445,12 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | |||
1486 | 1445 | ||
1487 | /* error */ | 1446 | /* error */ |
1488 | err_out_curr_node: | 1447 | err_out_curr_node: |
1489 | nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq); | 1448 | nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat); |
1490 | err_out_child_node: | 1449 | err_out_child_node: |
1491 | for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { | 1450 | for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { |
1492 | brelse(path[level].bp_sib_bh); | 1451 | brelse(path[level].bp_sib_bh); |
1493 | nilfs_bmap_abort_end_ptr(&btree->bt_bmap, | 1452 | nilfs_bmap_abort_end_ptr(&btree->bt_bmap, |
1494 | &path[level].bp_oldreq); | 1453 | &path[level].bp_oldreq, dat); |
1495 | } | 1454 | } |
1496 | *levelp = level; | 1455 | *levelp = level; |
1497 | stats->bs_nblocks = 0; | 1456 | stats->bs_nblocks = 0; |
@@ -1500,13 +1459,13 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | |||
1500 | 1459 | ||
1501 | static void nilfs_btree_commit_delete(struct nilfs_btree *btree, | 1460 | static void nilfs_btree_commit_delete(struct nilfs_btree *btree, |
1502 | struct nilfs_btree_path *path, | 1461 | struct nilfs_btree_path *path, |
1503 | int maxlevel) | 1462 | int maxlevel, struct inode *dat) |
1504 | { | 1463 | { |
1505 | int level; | 1464 | int level; |
1506 | 1465 | ||
1507 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { | 1466 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { |
1508 | nilfs_bmap_commit_end_ptr(&btree->bt_bmap, | 1467 | nilfs_bmap_commit_end_ptr(&btree->bt_bmap, |
1509 | &path[level].bp_oldreq); | 1468 | &path[level].bp_oldreq, dat); |
1510 | path[level].bp_op(btree, path, level, NULL, NULL); | 1469 | path[level].bp_op(btree, path, level, NULL, NULL); |
1511 | } | 1470 | } |
1512 | 1471 | ||
@@ -1520,27 +1479,32 @@ static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key) | |||
1520 | struct nilfs_btree *btree; | 1479 | struct nilfs_btree *btree; |
1521 | struct nilfs_btree_path *path; | 1480 | struct nilfs_btree_path *path; |
1522 | struct nilfs_bmap_stats stats; | 1481 | struct nilfs_bmap_stats stats; |
1482 | struct inode *dat; | ||
1523 | int level, ret; | 1483 | int level, ret; |
1524 | 1484 | ||
1525 | btree = (struct nilfs_btree *)bmap; | 1485 | btree = (struct nilfs_btree *)bmap; |
1526 | path = nilfs_btree_alloc_path(btree); | 1486 | path = nilfs_btree_alloc_path(); |
1527 | if (path == NULL) | 1487 | if (path == NULL) |
1528 | return -ENOMEM; | 1488 | return -ENOMEM; |
1529 | nilfs_btree_init_path(btree, path); | 1489 | nilfs_btree_init_path(path); |
1530 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, | 1490 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, |
1531 | NILFS_BTREE_LEVEL_NODE_MIN); | 1491 | NILFS_BTREE_LEVEL_NODE_MIN); |
1532 | if (ret < 0) | 1492 | if (ret < 0) |
1533 | goto out; | 1493 | goto out; |
1534 | 1494 | ||
1535 | ret = nilfs_btree_prepare_delete(btree, path, &level, &stats); | 1495 | |
1496 | dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ? | ||
1497 | nilfs_bmap_get_dat(&btree->bt_bmap) : NULL; | ||
1498 | |||
1499 | ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); | ||
1536 | if (ret < 0) | 1500 | if (ret < 0) |
1537 | goto out; | 1501 | goto out; |
1538 | nilfs_btree_commit_delete(btree, path, level); | 1502 | nilfs_btree_commit_delete(btree, path, level, dat); |
1539 | nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); | 1503 | nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); |
1540 | 1504 | ||
1541 | out: | 1505 | out: |
1542 | nilfs_btree_clear_path(btree, path); | 1506 | nilfs_btree_release_path(path); |
1543 | nilfs_btree_free_path(btree, path); | 1507 | nilfs_btree_free_path(path); |
1544 | return ret; | 1508 | return ret; |
1545 | } | 1509 | } |
1546 | 1510 | ||
@@ -1551,15 +1515,15 @@ static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) | |||
1551 | int ret; | 1515 | int ret; |
1552 | 1516 | ||
1553 | btree = (struct nilfs_btree *)bmap; | 1517 | btree = (struct nilfs_btree *)bmap; |
1554 | path = nilfs_btree_alloc_path(btree); | 1518 | path = nilfs_btree_alloc_path(); |
1555 | if (path == NULL) | 1519 | if (path == NULL) |
1556 | return -ENOMEM; | 1520 | return -ENOMEM; |
1557 | nilfs_btree_init_path(btree, path); | 1521 | nilfs_btree_init_path(path); |
1558 | 1522 | ||
1559 | ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); | 1523 | ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); |
1560 | 1524 | ||
1561 | nilfs_btree_clear_path(btree, path); | 1525 | nilfs_btree_release_path(path); |
1562 | nilfs_btree_free_path(btree, path); | 1526 | nilfs_btree_free_path(path); |
1563 | 1527 | ||
1564 | return ret; | 1528 | return ret; |
1565 | } | 1529 | } |
@@ -1581,7 +1545,7 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) | |||
1581 | node = root; | 1545 | node = root; |
1582 | break; | 1546 | break; |
1583 | case 3: | 1547 | case 3: |
1584 | nchildren = nilfs_btree_node_get_nchildren(btree, root); | 1548 | nchildren = nilfs_btree_node_get_nchildren(root); |
1585 | if (nchildren > 1) | 1549 | if (nchildren > 1) |
1586 | return 0; | 1550 | return 0; |
1587 | ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); | 1551 | ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); |
@@ -1594,10 +1558,10 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) | |||
1594 | return 0; | 1558 | return 0; |
1595 | } | 1559 | } |
1596 | 1560 | ||
1597 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | 1561 | nchildren = nilfs_btree_node_get_nchildren(node); |
1598 | maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1); | 1562 | maxkey = nilfs_btree_node_get_key(node, nchildren - 1); |
1599 | nextmaxkey = (nchildren > 1) ? | 1563 | nextmaxkey = (nchildren > 1) ? |
1600 | nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0; | 1564 | nilfs_btree_node_get_key(node, nchildren - 2) : 0; |
1601 | if (bh != NULL) | 1565 | if (bh != NULL) |
1602 | brelse(bh); | 1566 | brelse(bh); |
1603 | 1567 | ||
@@ -1623,7 +1587,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, | |||
1623 | node = root; | 1587 | node = root; |
1624 | break; | 1588 | break; |
1625 | case 3: | 1589 | case 3: |
1626 | nchildren = nilfs_btree_node_get_nchildren(btree, root); | 1590 | nchildren = nilfs_btree_node_get_nchildren(root); |
1627 | WARN_ON(nchildren > 1); | 1591 | WARN_ON(nchildren > 1); |
1628 | ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); | 1592 | ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); |
1629 | ret = nilfs_btree_get_block(btree, ptr, &bh); | 1593 | ret = nilfs_btree_get_block(btree, ptr, &bh); |
@@ -1636,11 +1600,11 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, | |||
1636 | return -EINVAL; | 1600 | return -EINVAL; |
1637 | } | 1601 | } |
1638 | 1602 | ||
1639 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | 1603 | nchildren = nilfs_btree_node_get_nchildren(node); |
1640 | if (nchildren < nitems) | 1604 | if (nchildren < nitems) |
1641 | nitems = nchildren; | 1605 | nitems = nchildren; |
1642 | dkeys = nilfs_btree_node_dkeys(btree, node); | 1606 | dkeys = nilfs_btree_node_dkeys(node); |
1643 | dptrs = nilfs_btree_node_dptrs(btree, node); | 1607 | dptrs = nilfs_btree_node_dptrs(node, btree); |
1644 | for (i = 0; i < nitems; i++) { | 1608 | for (i = 0; i < nitems; i++) { |
1645 | keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]); | 1609 | keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]); |
1646 | ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]); | 1610 | ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]); |
@@ -1660,18 +1624,20 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, | |||
1660 | struct nilfs_bmap_stats *stats) | 1624 | struct nilfs_bmap_stats *stats) |
1661 | { | 1625 | { |
1662 | struct buffer_head *bh; | 1626 | struct buffer_head *bh; |
1663 | struct nilfs_btree *btree; | 1627 | struct nilfs_btree *btree = (struct nilfs_btree *)bmap; |
1628 | struct inode *dat = NULL; | ||
1664 | int ret; | 1629 | int ret; |
1665 | 1630 | ||
1666 | btree = (struct nilfs_btree *)bmap; | ||
1667 | stats->bs_nblocks = 0; | 1631 | stats->bs_nblocks = 0; |
1668 | 1632 | ||
1669 | /* for data */ | 1633 | /* for data */ |
1670 | /* cannot find near ptr */ | 1634 | /* cannot find near ptr */ |
1671 | if (NILFS_BMAP_USE_VBN(bmap)) | 1635 | if (NILFS_BMAP_USE_VBN(bmap)) { |
1672 | dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); | 1636 | dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); |
1637 | dat = nilfs_bmap_get_dat(bmap); | ||
1638 | } | ||
1673 | 1639 | ||
1674 | ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq); | 1640 | ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat); |
1675 | if (ret < 0) | 1641 | if (ret < 0) |
1676 | return ret; | 1642 | return ret; |
1677 | 1643 | ||
@@ -1679,7 +1645,7 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, | |||
1679 | stats->bs_nblocks++; | 1645 | stats->bs_nblocks++; |
1680 | if (nreq != NULL) { | 1646 | if (nreq != NULL) { |
1681 | nreq->bpr_ptr = dreq->bpr_ptr + 1; | 1647 | nreq->bpr_ptr = dreq->bpr_ptr + 1; |
1682 | ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq); | 1648 | ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat); |
1683 | if (ret < 0) | 1649 | if (ret < 0) |
1684 | goto err_out_dreq; | 1650 | goto err_out_dreq; |
1685 | 1651 | ||
@@ -1696,9 +1662,9 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, | |||
1696 | 1662 | ||
1697 | /* error */ | 1663 | /* error */ |
1698 | err_out_nreq: | 1664 | err_out_nreq: |
1699 | nilfs_bmap_abort_alloc_ptr(bmap, nreq); | 1665 | nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat); |
1700 | err_out_dreq: | 1666 | err_out_dreq: |
1701 | nilfs_bmap_abort_alloc_ptr(bmap, dreq); | 1667 | nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat); |
1702 | stats->bs_nblocks = 0; | 1668 | stats->bs_nblocks = 0; |
1703 | return ret; | 1669 | return ret; |
1704 | 1670 | ||
@@ -1713,8 +1679,9 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, | |||
1713 | union nilfs_bmap_ptr_req *nreq, | 1679 | union nilfs_bmap_ptr_req *nreq, |
1714 | struct buffer_head *bh) | 1680 | struct buffer_head *bh) |
1715 | { | 1681 | { |
1716 | struct nilfs_btree *btree; | 1682 | struct nilfs_btree *btree = (struct nilfs_btree *)bmap; |
1717 | struct nilfs_btree_node *node; | 1683 | struct nilfs_btree_node *node; |
1684 | struct inode *dat; | ||
1718 | __u64 tmpptr; | 1685 | __u64 tmpptr; |
1719 | 1686 | ||
1720 | /* free resources */ | 1687 | /* free resources */ |
@@ -1725,11 +1692,11 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, | |||
1725 | set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); | 1692 | set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); |
1726 | 1693 | ||
1727 | /* convert and insert */ | 1694 | /* convert and insert */ |
1728 | btree = (struct nilfs_btree *)bmap; | 1695 | dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL; |
1729 | nilfs_btree_init(bmap); | 1696 | nilfs_btree_init(bmap); |
1730 | if (nreq != NULL) { | 1697 | if (nreq != NULL) { |
1731 | nilfs_bmap_commit_alloc_ptr(bmap, dreq); | 1698 | nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat); |
1732 | nilfs_bmap_commit_alloc_ptr(bmap, nreq); | 1699 | nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat); |
1733 | 1700 | ||
1734 | /* create child node at level 1 */ | 1701 | /* create child node at level 1 */ |
1735 | lock_buffer(bh); | 1702 | lock_buffer(bh); |
@@ -1751,7 +1718,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, | |||
1751 | nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, | 1718 | nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, |
1752 | 2, 1, &keys[0], &tmpptr); | 1719 | 2, 1, &keys[0], &tmpptr); |
1753 | } else { | 1720 | } else { |
1754 | nilfs_bmap_commit_alloc_ptr(bmap, dreq); | 1721 | nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat); |
1755 | 1722 | ||
1756 | /* create root node at level 1 */ | 1723 | /* create root node at level 1 */ |
1757 | node = nilfs_btree_get_root(btree); | 1724 | node = nilfs_btree_get_root(btree); |
@@ -1822,7 +1789,7 @@ static int nilfs_btree_propagate_p(struct nilfs_btree *btree, | |||
1822 | 1789 | ||
1823 | static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, | 1790 | static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, |
1824 | struct nilfs_btree_path *path, | 1791 | struct nilfs_btree_path *path, |
1825 | int level) | 1792 | int level, struct inode *dat) |
1826 | { | 1793 | { |
1827 | struct nilfs_btree_node *parent; | 1794 | struct nilfs_btree_node *parent; |
1828 | int ret; | 1795 | int ret; |
@@ -1832,9 +1799,8 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, | |||
1832 | nilfs_btree_node_get_ptr(btree, parent, | 1799 | nilfs_btree_node_get_ptr(btree, parent, |
1833 | path[level + 1].bp_index); | 1800 | path[level + 1].bp_index); |
1834 | path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; | 1801 | path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; |
1835 | ret = nilfs_bmap_prepare_update_v(&btree->bt_bmap, | 1802 | ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, |
1836 | &path[level].bp_oldreq, | 1803 | &path[level].bp_newreq.bpr_req); |
1837 | &path[level].bp_newreq); | ||
1838 | if (ret < 0) | 1804 | if (ret < 0) |
1839 | return ret; | 1805 | return ret; |
1840 | 1806 | ||
@@ -1846,9 +1812,9 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, | |||
1846 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | 1812 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, |
1847 | &path[level].bp_ctxt); | 1813 | &path[level].bp_ctxt); |
1848 | if (ret < 0) { | 1814 | if (ret < 0) { |
1849 | nilfs_bmap_abort_update_v(&btree->bt_bmap, | 1815 | nilfs_dat_abort_update(dat, |
1850 | &path[level].bp_oldreq, | 1816 | &path[level].bp_oldreq.bpr_req, |
1851 | &path[level].bp_newreq); | 1817 | &path[level].bp_newreq.bpr_req); |
1852 | return ret; | 1818 | return ret; |
1853 | } | 1819 | } |
1854 | } | 1820 | } |
@@ -1858,13 +1824,13 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, | |||
1858 | 1824 | ||
1859 | static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, | 1825 | static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, |
1860 | struct nilfs_btree_path *path, | 1826 | struct nilfs_btree_path *path, |
1861 | int level) | 1827 | int level, struct inode *dat) |
1862 | { | 1828 | { |
1863 | struct nilfs_btree_node *parent; | 1829 | struct nilfs_btree_node *parent; |
1864 | 1830 | ||
1865 | nilfs_bmap_commit_update_v(&btree->bt_bmap, | 1831 | nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, |
1866 | &path[level].bp_oldreq, | 1832 | &path[level].bp_newreq.bpr_req, |
1867 | &path[level].bp_newreq); | 1833 | btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS); |
1868 | 1834 | ||
1869 | if (buffer_nilfs_node(path[level].bp_bh)) { | 1835 | if (buffer_nilfs_node(path[level].bp_bh)) { |
1870 | nilfs_btnode_commit_change_key( | 1836 | nilfs_btnode_commit_change_key( |
@@ -1881,11 +1847,10 @@ static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, | |||
1881 | 1847 | ||
1882 | static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, | 1848 | static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, |
1883 | struct nilfs_btree_path *path, | 1849 | struct nilfs_btree_path *path, |
1884 | int level) | 1850 | int level, struct inode *dat) |
1885 | { | 1851 | { |
1886 | nilfs_bmap_abort_update_v(&btree->bt_bmap, | 1852 | nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, |
1887 | &path[level].bp_oldreq, | 1853 | &path[level].bp_newreq.bpr_req); |
1888 | &path[level].bp_newreq); | ||
1889 | if (buffer_nilfs_node(path[level].bp_bh)) | 1854 | if (buffer_nilfs_node(path[level].bp_bh)) |
1890 | nilfs_btnode_abort_change_key( | 1855 | nilfs_btnode_abort_change_key( |
1891 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | 1856 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, |
@@ -1894,14 +1859,14 @@ static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, | |||
1894 | 1859 | ||
1895 | static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, | 1860 | static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, |
1896 | struct nilfs_btree_path *path, | 1861 | struct nilfs_btree_path *path, |
1897 | int minlevel, | 1862 | int minlevel, int *maxlevelp, |
1898 | int *maxlevelp) | 1863 | struct inode *dat) |
1899 | { | 1864 | { |
1900 | int level, ret; | 1865 | int level, ret; |
1901 | 1866 | ||
1902 | level = minlevel; | 1867 | level = minlevel; |
1903 | if (!buffer_nilfs_volatile(path[level].bp_bh)) { | 1868 | if (!buffer_nilfs_volatile(path[level].bp_bh)) { |
1904 | ret = nilfs_btree_prepare_update_v(btree, path, level); | 1869 | ret = nilfs_btree_prepare_update_v(btree, path, level, dat); |
1905 | if (ret < 0) | 1870 | if (ret < 0) |
1906 | return ret; | 1871 | return ret; |
1907 | } | 1872 | } |
@@ -1909,7 +1874,7 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, | |||
1909 | !buffer_dirty(path[level].bp_bh)) { | 1874 | !buffer_dirty(path[level].bp_bh)) { |
1910 | 1875 | ||
1911 | WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); | 1876 | WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); |
1912 | ret = nilfs_btree_prepare_update_v(btree, path, level); | 1877 | ret = nilfs_btree_prepare_update_v(btree, path, level, dat); |
1913 | if (ret < 0) | 1878 | if (ret < 0) |
1914 | goto out; | 1879 | goto out; |
1915 | } | 1880 | } |
@@ -1921,39 +1886,40 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, | |||
1921 | /* error */ | 1886 | /* error */ |
1922 | out: | 1887 | out: |
1923 | while (--level > minlevel) | 1888 | while (--level > minlevel) |
1924 | nilfs_btree_abort_update_v(btree, path, level); | 1889 | nilfs_btree_abort_update_v(btree, path, level, dat); |
1925 | if (!buffer_nilfs_volatile(path[level].bp_bh)) | 1890 | if (!buffer_nilfs_volatile(path[level].bp_bh)) |
1926 | nilfs_btree_abort_update_v(btree, path, level); | 1891 | nilfs_btree_abort_update_v(btree, path, level, dat); |
1927 | return ret; | 1892 | return ret; |
1928 | } | 1893 | } |
1929 | 1894 | ||
1930 | static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree, | 1895 | static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree, |
1931 | struct nilfs_btree_path *path, | 1896 | struct nilfs_btree_path *path, |
1932 | int minlevel, | 1897 | int minlevel, int maxlevel, |
1933 | int maxlevel, | 1898 | struct buffer_head *bh, |
1934 | struct buffer_head *bh) | 1899 | struct inode *dat) |
1935 | { | 1900 | { |
1936 | int level; | 1901 | int level; |
1937 | 1902 | ||
1938 | if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) | 1903 | if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) |
1939 | nilfs_btree_commit_update_v(btree, path, minlevel); | 1904 | nilfs_btree_commit_update_v(btree, path, minlevel, dat); |
1940 | 1905 | ||
1941 | for (level = minlevel + 1; level <= maxlevel; level++) | 1906 | for (level = minlevel + 1; level <= maxlevel; level++) |
1942 | nilfs_btree_commit_update_v(btree, path, level); | 1907 | nilfs_btree_commit_update_v(btree, path, level, dat); |
1943 | } | 1908 | } |
1944 | 1909 | ||
1945 | static int nilfs_btree_propagate_v(struct nilfs_btree *btree, | 1910 | static int nilfs_btree_propagate_v(struct nilfs_btree *btree, |
1946 | struct nilfs_btree_path *path, | 1911 | struct nilfs_btree_path *path, |
1947 | int level, | 1912 | int level, struct buffer_head *bh) |
1948 | struct buffer_head *bh) | ||
1949 | { | 1913 | { |
1950 | int maxlevel, ret; | 1914 | int maxlevel, ret; |
1951 | struct nilfs_btree_node *parent; | 1915 | struct nilfs_btree_node *parent; |
1916 | struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap); | ||
1952 | __u64 ptr; | 1917 | __u64 ptr; |
1953 | 1918 | ||
1954 | get_bh(bh); | 1919 | get_bh(bh); |
1955 | path[level].bp_bh = bh; | 1920 | path[level].bp_bh = bh; |
1956 | ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel); | 1921 | ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, |
1922 | dat); | ||
1957 | if (ret < 0) | 1923 | if (ret < 0) |
1958 | goto out; | 1924 | goto out; |
1959 | 1925 | ||
@@ -1961,12 +1927,12 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree, | |||
1961 | parent = nilfs_btree_get_node(btree, path, level + 1); | 1927 | parent = nilfs_btree_get_node(btree, path, level + 1); |
1962 | ptr = nilfs_btree_node_get_ptr(btree, parent, | 1928 | ptr = nilfs_btree_node_get_ptr(btree, parent, |
1963 | path[level + 1].bp_index); | 1929 | path[level + 1].bp_index); |
1964 | ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr); | 1930 | ret = nilfs_dat_mark_dirty(dat, ptr); |
1965 | if (ret < 0) | 1931 | if (ret < 0) |
1966 | goto out; | 1932 | goto out; |
1967 | } | 1933 | } |
1968 | 1934 | ||
1969 | nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh); | 1935 | nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); |
1970 | 1936 | ||
1971 | out: | 1937 | out: |
1972 | brelse(path[level].bp_bh); | 1938 | brelse(path[level].bp_bh); |
@@ -1986,15 +1952,15 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, | |||
1986 | WARN_ON(!buffer_dirty(bh)); | 1952 | WARN_ON(!buffer_dirty(bh)); |
1987 | 1953 | ||
1988 | btree = (struct nilfs_btree *)bmap; | 1954 | btree = (struct nilfs_btree *)bmap; |
1989 | path = nilfs_btree_alloc_path(btree); | 1955 | path = nilfs_btree_alloc_path(); |
1990 | if (path == NULL) | 1956 | if (path == NULL) |
1991 | return -ENOMEM; | 1957 | return -ENOMEM; |
1992 | nilfs_btree_init_path(btree, path); | 1958 | nilfs_btree_init_path(path); |
1993 | 1959 | ||
1994 | if (buffer_nilfs_node(bh)) { | 1960 | if (buffer_nilfs_node(bh)) { |
1995 | node = (struct nilfs_btree_node *)bh->b_data; | 1961 | node = (struct nilfs_btree_node *)bh->b_data; |
1996 | key = nilfs_btree_node_get_key(btree, node, 0); | 1962 | key = nilfs_btree_node_get_key(node, 0); |
1997 | level = nilfs_btree_node_get_level(btree, node); | 1963 | level = nilfs_btree_node_get_level(node); |
1998 | } else { | 1964 | } else { |
1999 | key = nilfs_bmap_data_get_key(bmap, bh); | 1965 | key = nilfs_bmap_data_get_key(bmap, bh); |
2000 | level = NILFS_BTREE_LEVEL_DATA; | 1966 | level = NILFS_BTREE_LEVEL_DATA; |
@@ -2013,8 +1979,8 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, | |||
2013 | nilfs_btree_propagate_p(btree, path, level, bh); | 1979 | nilfs_btree_propagate_p(btree, path, level, bh); |
2014 | 1980 | ||
2015 | out: | 1981 | out: |
2016 | nilfs_btree_clear_path(btree, path); | 1982 | nilfs_btree_release_path(path); |
2017 | nilfs_btree_free_path(btree, path); | 1983 | nilfs_btree_free_path(path); |
2018 | 1984 | ||
2019 | return ret; | 1985 | return ret; |
2020 | } | 1986 | } |
@@ -2022,7 +1988,7 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, | |||
2022 | static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap, | 1988 | static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap, |
2023 | struct buffer_head *bh) | 1989 | struct buffer_head *bh) |
2024 | { | 1990 | { |
2025 | return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr); | 1991 | return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr); |
2026 | } | 1992 | } |
2027 | 1993 | ||
2028 | static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, | 1994 | static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, |
@@ -2037,12 +2003,12 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, | |||
2037 | 2003 | ||
2038 | get_bh(bh); | 2004 | get_bh(bh); |
2039 | node = (struct nilfs_btree_node *)bh->b_data; | 2005 | node = (struct nilfs_btree_node *)bh->b_data; |
2040 | key = nilfs_btree_node_get_key(btree, node, 0); | 2006 | key = nilfs_btree_node_get_key(node, 0); |
2041 | level = nilfs_btree_node_get_level(btree, node); | 2007 | level = nilfs_btree_node_get_level(node); |
2042 | list_for_each(head, &lists[level]) { | 2008 | list_for_each(head, &lists[level]) { |
2043 | cbh = list_entry(head, struct buffer_head, b_assoc_buffers); | 2009 | cbh = list_entry(head, struct buffer_head, b_assoc_buffers); |
2044 | cnode = (struct nilfs_btree_node *)cbh->b_data; | 2010 | cnode = (struct nilfs_btree_node *)cbh->b_data; |
2045 | ckey = nilfs_btree_node_get_key(btree, cnode, 0); | 2011 | ckey = nilfs_btree_node_get_key(cnode, 0); |
2046 | if (key < ckey) | 2012 | if (key < ckey) |
2047 | break; | 2013 | break; |
2048 | } | 2014 | } |
@@ -2120,8 +2086,7 @@ static int nilfs_btree_assign_p(struct nilfs_btree *btree, | |||
2120 | nilfs_btree_node_set_ptr(btree, parent, | 2086 | nilfs_btree_node_set_ptr(btree, parent, |
2121 | path[level + 1].bp_index, blocknr); | 2087 | path[level + 1].bp_index, blocknr); |
2122 | 2088 | ||
2123 | key = nilfs_btree_node_get_key(btree, parent, | 2089 | key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); |
2124 | path[level + 1].bp_index); | ||
2125 | /* on-disk format */ | 2090 | /* on-disk format */ |
2126 | binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); | 2091 | binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); |
2127 | binfo->bi_dat.bi_level = level; | 2092 | binfo->bi_dat.bi_level = level; |
@@ -2137,6 +2102,7 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree, | |||
2137 | union nilfs_binfo *binfo) | 2102 | union nilfs_binfo *binfo) |
2138 | { | 2103 | { |
2139 | struct nilfs_btree_node *parent; | 2104 | struct nilfs_btree_node *parent; |
2105 | struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap); | ||
2140 | __u64 key; | 2106 | __u64 key; |
2141 | __u64 ptr; | 2107 | __u64 ptr; |
2142 | union nilfs_bmap_ptr_req req; | 2108 | union nilfs_bmap_ptr_req req; |
@@ -2146,12 +2112,12 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree, | |||
2146 | ptr = nilfs_btree_node_get_ptr(btree, parent, | 2112 | ptr = nilfs_btree_node_get_ptr(btree, parent, |
2147 | path[level + 1].bp_index); | 2113 | path[level + 1].bp_index); |
2148 | req.bpr_ptr = ptr; | 2114 | req.bpr_ptr = ptr; |
2149 | ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr); | 2115 | ret = nilfs_dat_prepare_start(dat, &req.bpr_req); |
2150 | if (unlikely(ret < 0)) | 2116 | if (ret < 0) |
2151 | return ret; | 2117 | return ret; |
2118 | nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); | ||
2152 | 2119 | ||
2153 | key = nilfs_btree_node_get_key(btree, parent, | 2120 | key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); |
2154 | path[level + 1].bp_index); | ||
2155 | /* on-disk format */ | 2121 | /* on-disk format */ |
2156 | binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); | 2122 | binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); |
2157 | binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); | 2123 | binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); |
@@ -2171,15 +2137,15 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap, | |||
2171 | int level, ret; | 2137 | int level, ret; |
2172 | 2138 | ||
2173 | btree = (struct nilfs_btree *)bmap; | 2139 | btree = (struct nilfs_btree *)bmap; |
2174 | path = nilfs_btree_alloc_path(btree); | 2140 | path = nilfs_btree_alloc_path(); |
2175 | if (path == NULL) | 2141 | if (path == NULL) |
2176 | return -ENOMEM; | 2142 | return -ENOMEM; |
2177 | nilfs_btree_init_path(btree, path); | 2143 | nilfs_btree_init_path(path); |
2178 | 2144 | ||
2179 | if (buffer_nilfs_node(*bh)) { | 2145 | if (buffer_nilfs_node(*bh)) { |
2180 | node = (struct nilfs_btree_node *)(*bh)->b_data; | 2146 | node = (struct nilfs_btree_node *)(*bh)->b_data; |
2181 | key = nilfs_btree_node_get_key(btree, node, 0); | 2147 | key = nilfs_btree_node_get_key(node, 0); |
2182 | level = nilfs_btree_node_get_level(btree, node); | 2148 | level = nilfs_btree_node_get_level(node); |
2183 | } else { | 2149 | } else { |
2184 | key = nilfs_bmap_data_get_key(bmap, *bh); | 2150 | key = nilfs_bmap_data_get_key(bmap, *bh); |
2185 | level = NILFS_BTREE_LEVEL_DATA; | 2151 | level = NILFS_BTREE_LEVEL_DATA; |
@@ -2196,8 +2162,8 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap, | |||
2196 | nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); | 2162 | nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); |
2197 | 2163 | ||
2198 | out: | 2164 | out: |
2199 | nilfs_btree_clear_path(btree, path); | 2165 | nilfs_btree_release_path(path); |
2200 | nilfs_btree_free_path(btree, path); | 2166 | nilfs_btree_free_path(path); |
2201 | 2167 | ||
2202 | return ret; | 2168 | return ret; |
2203 | } | 2169 | } |
@@ -2207,19 +2173,18 @@ static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap, | |||
2207 | sector_t blocknr, | 2173 | sector_t blocknr, |
2208 | union nilfs_binfo *binfo) | 2174 | union nilfs_binfo *binfo) |
2209 | { | 2175 | { |
2210 | struct nilfs_btree *btree; | ||
2211 | struct nilfs_btree_node *node; | 2176 | struct nilfs_btree_node *node; |
2212 | __u64 key; | 2177 | __u64 key; |
2213 | int ret; | 2178 | int ret; |
2214 | 2179 | ||
2215 | btree = (struct nilfs_btree *)bmap; | 2180 | ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr, |
2216 | ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr); | 2181 | blocknr); |
2217 | if (ret < 0) | 2182 | if (ret < 0) |
2218 | return ret; | 2183 | return ret; |
2219 | 2184 | ||
2220 | if (buffer_nilfs_node(*bh)) { | 2185 | if (buffer_nilfs_node(*bh)) { |
2221 | node = (struct nilfs_btree_node *)(*bh)->b_data; | 2186 | node = (struct nilfs_btree_node *)(*bh)->b_data; |
2222 | key = nilfs_btree_node_get_key(btree, node, 0); | 2187 | key = nilfs_btree_node_get_key(node, 0); |
2223 | } else | 2188 | } else |
2224 | key = nilfs_bmap_data_get_key(bmap, *bh); | 2189 | key = nilfs_bmap_data_get_key(bmap, *bh); |
2225 | 2190 | ||
@@ -2239,10 +2204,10 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) | |||
2239 | int ret; | 2204 | int ret; |
2240 | 2205 | ||
2241 | btree = (struct nilfs_btree *)bmap; | 2206 | btree = (struct nilfs_btree *)bmap; |
2242 | path = nilfs_btree_alloc_path(btree); | 2207 | path = nilfs_btree_alloc_path(); |
2243 | if (path == NULL) | 2208 | if (path == NULL) |
2244 | return -ENOMEM; | 2209 | return -ENOMEM; |
2245 | nilfs_btree_init_path(btree, path); | 2210 | nilfs_btree_init_path(path); |
2246 | 2211 | ||
2247 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); | 2212 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); |
2248 | if (ret < 0) { | 2213 | if (ret < 0) { |
@@ -2262,8 +2227,8 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) | |||
2262 | nilfs_bmap_set_dirty(&btree->bt_bmap); | 2227 | nilfs_bmap_set_dirty(&btree->bt_bmap); |
2263 | 2228 | ||
2264 | out: | 2229 | out: |
2265 | nilfs_btree_clear_path(btree, path); | 2230 | nilfs_btree_release_path(path); |
2266 | nilfs_btree_free_path(btree, path); | 2231 | nilfs_btree_free_path(path); |
2267 | return ret; | 2232 | return ret; |
2268 | } | 2233 | } |
2269 | 2234 | ||
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index aec942cf79e3..1c6cfb59128d 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c | |||
@@ -815,8 +815,10 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) | |||
815 | void *kaddr; | 815 | void *kaddr; |
816 | int ret; | 816 | int ret; |
817 | 817 | ||
818 | if (cno == 0) | 818 | /* CP number is invalid if it's zero or larger than the |
819 | return -ENOENT; /* checkpoint number 0 is invalid */ | 819 | largest exist one.*/ |
820 | if (cno == 0 || cno >= nilfs_mdt_cno(cpfile)) | ||
821 | return -ENOENT; | ||
820 | down_read(&NILFS_MDT(cpfile)->mi_sem); | 822 | down_read(&NILFS_MDT(cpfile)->mi_sem); |
821 | 823 | ||
822 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); | 824 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); |
@@ -824,7 +826,10 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) | |||
824 | goto out; | 826 | goto out; |
825 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | 827 | kaddr = kmap_atomic(bh->b_page, KM_USER0); |
826 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); | 828 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); |
827 | ret = nilfs_checkpoint_snapshot(cp); | 829 | if (nilfs_checkpoint_invalid(cp)) |
830 | ret = -ENOENT; | ||
831 | else | ||
832 | ret = nilfs_checkpoint_snapshot(cp); | ||
828 | kunmap_atomic(kaddr, KM_USER0); | 833 | kunmap_atomic(kaddr, KM_USER0); |
829 | brelse(bh); | 834 | brelse(bh); |
830 | 835 | ||
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index 788a45950197..debea896e701 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h | |||
@@ -27,8 +27,6 @@ | |||
27 | #include <linux/buffer_head.h> | 27 | #include <linux/buffer_head.h> |
28 | #include <linux/nilfs2_fs.h> | 28 | #include <linux/nilfs2_fs.h> |
29 | 29 | ||
30 | #define NILFS_CPFILE_GFP NILFS_MDT_GFP | ||
31 | |||
32 | 30 | ||
33 | int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int, | 31 | int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int, |
34 | struct nilfs_checkpoint **, | 32 | struct nilfs_checkpoint **, |
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 8927ca27e6f7..1ff8e15bd36b 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c | |||
@@ -109,12 +109,6 @@ void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) | |||
109 | nilfs_palloc_commit_free_entry(dat, req); | 109 | nilfs_palloc_commit_free_entry(dat, req); |
110 | } | 110 | } |
111 | 111 | ||
112 | void nilfs_dat_abort_free(struct inode *dat, struct nilfs_palloc_req *req) | ||
113 | { | ||
114 | nilfs_dat_abort_entry(dat, req); | ||
115 | nilfs_palloc_abort_free_entry(dat, req); | ||
116 | } | ||
117 | |||
118 | int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) | 112 | int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) |
119 | { | 113 | { |
120 | int ret; | 114 | int ret; |
@@ -140,11 +134,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, | |||
140 | nilfs_dat_commit_entry(dat, req); | 134 | nilfs_dat_commit_entry(dat, req); |
141 | } | 135 | } |
142 | 136 | ||
143 | void nilfs_dat_abort_start(struct inode *dat, struct nilfs_palloc_req *req) | ||
144 | { | ||
145 | nilfs_dat_abort_entry(dat, req); | ||
146 | } | ||
147 | |||
148 | int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) | 137 | int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) |
149 | { | 138 | { |
150 | struct nilfs_dat_entry *entry; | 139 | struct nilfs_dat_entry *entry; |
@@ -222,6 +211,37 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) | |||
222 | nilfs_dat_abort_entry(dat, req); | 211 | nilfs_dat_abort_entry(dat, req); |
223 | } | 212 | } |
224 | 213 | ||
214 | int nilfs_dat_prepare_update(struct inode *dat, | ||
215 | struct nilfs_palloc_req *oldreq, | ||
216 | struct nilfs_palloc_req *newreq) | ||
217 | { | ||
218 | int ret; | ||
219 | |||
220 | ret = nilfs_dat_prepare_end(dat, oldreq); | ||
221 | if (!ret) { | ||
222 | ret = nilfs_dat_prepare_alloc(dat, newreq); | ||
223 | if (ret < 0) | ||
224 | nilfs_dat_abort_end(dat, oldreq); | ||
225 | } | ||
226 | return ret; | ||
227 | } | ||
228 | |||
229 | void nilfs_dat_commit_update(struct inode *dat, | ||
230 | struct nilfs_palloc_req *oldreq, | ||
231 | struct nilfs_palloc_req *newreq, int dead) | ||
232 | { | ||
233 | nilfs_dat_commit_end(dat, oldreq, dead); | ||
234 | nilfs_dat_commit_alloc(dat, newreq); | ||
235 | } | ||
236 | |||
237 | void nilfs_dat_abort_update(struct inode *dat, | ||
238 | struct nilfs_palloc_req *oldreq, | ||
239 | struct nilfs_palloc_req *newreq) | ||
240 | { | ||
241 | nilfs_dat_abort_end(dat, oldreq); | ||
242 | nilfs_dat_abort_alloc(dat, newreq); | ||
243 | } | ||
244 | |||
225 | /** | 245 | /** |
226 | * nilfs_dat_mark_dirty - | 246 | * nilfs_dat_mark_dirty - |
227 | * @dat: DAT file inode | 247 | * @dat: DAT file inode |
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index d328b81eead4..406070d3ff49 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/buffer_head.h> | 27 | #include <linux/buffer_head.h> |
28 | #include <linux/fs.h> | 28 | #include <linux/fs.h> |
29 | 29 | ||
30 | #define NILFS_DAT_GFP NILFS_MDT_GFP | ||
31 | 30 | ||
32 | struct nilfs_palloc_req; | 31 | struct nilfs_palloc_req; |
33 | 32 | ||
@@ -39,10 +38,15 @@ void nilfs_dat_abort_alloc(struct inode *, struct nilfs_palloc_req *); | |||
39 | int nilfs_dat_prepare_start(struct inode *, struct nilfs_palloc_req *); | 38 | int nilfs_dat_prepare_start(struct inode *, struct nilfs_palloc_req *); |
40 | void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *, | 39 | void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *, |
41 | sector_t); | 40 | sector_t); |
42 | void nilfs_dat_abort_start(struct inode *, struct nilfs_palloc_req *); | ||
43 | int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *); | 41 | int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *); |
44 | void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int); | 42 | void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int); |
45 | void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *); | 43 | void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *); |
44 | int nilfs_dat_prepare_update(struct inode *, struct nilfs_palloc_req *, | ||
45 | struct nilfs_palloc_req *); | ||
46 | void nilfs_dat_commit_update(struct inode *, struct nilfs_palloc_req *, | ||
47 | struct nilfs_palloc_req *, int); | ||
48 | void nilfs_dat_abort_update(struct inode *, struct nilfs_palloc_req *, | ||
49 | struct nilfs_palloc_req *); | ||
46 | 50 | ||
47 | int nilfs_dat_mark_dirty(struct inode *, __u64); | 51 | int nilfs_dat_mark_dirty(struct inode *, __u64); |
48 | int nilfs_dat_freev(struct inode *, __u64 *, size_t); | 52 | int nilfs_dat_freev(struct inode *, __u64 *, size_t); |
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 342d9765df8d..d369ac718277 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c | |||
@@ -125,106 +125,64 @@ static void nilfs_direct_set_target_v(struct nilfs_direct *direct, | |||
125 | direct->d_bmap.b_last_allocated_ptr = ptr; | 125 | direct->d_bmap.b_last_allocated_ptr = ptr; |
126 | } | 126 | } |
127 | 127 | ||
128 | static int nilfs_direct_prepare_insert(struct nilfs_direct *direct, | ||
129 | __u64 key, | ||
130 | union nilfs_bmap_ptr_req *req, | ||
131 | struct nilfs_bmap_stats *stats) | ||
132 | { | ||
133 | int ret; | ||
134 | |||
135 | if (NILFS_BMAP_USE_VBN(&direct->d_bmap)) | ||
136 | req->bpr_ptr = nilfs_direct_find_target_v(direct, key); | ||
137 | ret = nilfs_bmap_prepare_alloc_ptr(&direct->d_bmap, req); | ||
138 | if (ret < 0) | ||
139 | return ret; | ||
140 | |||
141 | stats->bs_nblocks = 1; | ||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | static void nilfs_direct_commit_insert(struct nilfs_direct *direct, | ||
146 | union nilfs_bmap_ptr_req *req, | ||
147 | __u64 key, __u64 ptr) | ||
148 | { | ||
149 | struct buffer_head *bh; | ||
150 | |||
151 | /* ptr must be a pointer to a buffer head. */ | ||
152 | bh = (struct buffer_head *)((unsigned long)ptr); | ||
153 | set_buffer_nilfs_volatile(bh); | ||
154 | |||
155 | nilfs_bmap_commit_alloc_ptr(&direct->d_bmap, req); | ||
156 | nilfs_direct_set_ptr(direct, key, req->bpr_ptr); | ||
157 | |||
158 | if (!nilfs_bmap_dirty(&direct->d_bmap)) | ||
159 | nilfs_bmap_set_dirty(&direct->d_bmap); | ||
160 | |||
161 | if (NILFS_BMAP_USE_VBN(&direct->d_bmap)) | ||
162 | nilfs_direct_set_target_v(direct, key, req->bpr_ptr); | ||
163 | } | ||
164 | |||
165 | static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | 128 | static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) |
166 | { | 129 | { |
167 | struct nilfs_direct *direct; | 130 | struct nilfs_direct *direct = (struct nilfs_direct *)bmap; |
168 | union nilfs_bmap_ptr_req req; | 131 | union nilfs_bmap_ptr_req req; |
169 | struct nilfs_bmap_stats stats; | 132 | struct inode *dat = NULL; |
133 | struct buffer_head *bh; | ||
170 | int ret; | 134 | int ret; |
171 | 135 | ||
172 | direct = (struct nilfs_direct *)bmap; | ||
173 | if (key > NILFS_DIRECT_KEY_MAX) | 136 | if (key > NILFS_DIRECT_KEY_MAX) |
174 | return -ENOENT; | 137 | return -ENOENT; |
175 | if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR) | 138 | if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR) |
176 | return -EEXIST; | 139 | return -EEXIST; |
177 | 140 | ||
178 | ret = nilfs_direct_prepare_insert(direct, key, &req, &stats); | 141 | if (NILFS_BMAP_USE_VBN(bmap)) { |
179 | if (ret < 0) | 142 | req.bpr_ptr = nilfs_direct_find_target_v(direct, key); |
180 | return ret; | 143 | dat = nilfs_bmap_get_dat(bmap); |
181 | nilfs_direct_commit_insert(direct, &req, key, ptr); | 144 | } |
182 | nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); | 145 | ret = nilfs_bmap_prepare_alloc_ptr(bmap, &req, dat); |
146 | if (!ret) { | ||
147 | /* ptr must be a pointer to a buffer head. */ | ||
148 | bh = (struct buffer_head *)((unsigned long)ptr); | ||
149 | set_buffer_nilfs_volatile(bh); | ||
183 | 150 | ||
184 | return 0; | 151 | nilfs_bmap_commit_alloc_ptr(bmap, &req, dat); |
185 | } | 152 | nilfs_direct_set_ptr(direct, key, req.bpr_ptr); |
186 | 153 | ||
187 | static int nilfs_direct_prepare_delete(struct nilfs_direct *direct, | 154 | if (!nilfs_bmap_dirty(bmap)) |
188 | union nilfs_bmap_ptr_req *req, | 155 | nilfs_bmap_set_dirty(bmap); |
189 | __u64 key, | ||
190 | struct nilfs_bmap_stats *stats) | ||
191 | { | ||
192 | int ret; | ||
193 | 156 | ||
194 | req->bpr_ptr = nilfs_direct_get_ptr(direct, key); | 157 | if (NILFS_BMAP_USE_VBN(bmap)) |
195 | ret = nilfs_bmap_prepare_end_ptr(&direct->d_bmap, req); | 158 | nilfs_direct_set_target_v(direct, key, req.bpr_ptr); |
196 | if (!ret) | ||
197 | stats->bs_nblocks = 1; | ||
198 | return ret; | ||
199 | } | ||
200 | 159 | ||
201 | static void nilfs_direct_commit_delete(struct nilfs_direct *direct, | 160 | nilfs_bmap_add_blocks(bmap, 1); |
202 | union nilfs_bmap_ptr_req *req, | 161 | } |
203 | __u64 key) | 162 | return ret; |
204 | { | ||
205 | nilfs_bmap_commit_end_ptr(&direct->d_bmap, req); | ||
206 | nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR); | ||
207 | } | 163 | } |
208 | 164 | ||
209 | static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) | 165 | static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) |
210 | { | 166 | { |
211 | struct nilfs_direct *direct; | 167 | struct nilfs_direct *direct = (struct nilfs_direct *)bmap; |
212 | union nilfs_bmap_ptr_req req; | 168 | union nilfs_bmap_ptr_req req; |
213 | struct nilfs_bmap_stats stats; | 169 | struct inode *dat; |
214 | int ret; | 170 | int ret; |
215 | 171 | ||
216 | direct = (struct nilfs_direct *)bmap; | 172 | if (key > NILFS_DIRECT_KEY_MAX || |
217 | if ((key > NILFS_DIRECT_KEY_MAX) || | ||
218 | nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR) | 173 | nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR) |
219 | return -ENOENT; | 174 | return -ENOENT; |
220 | 175 | ||
221 | ret = nilfs_direct_prepare_delete(direct, &req, key, &stats); | 176 | dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL; |
222 | if (ret < 0) | 177 | req.bpr_ptr = nilfs_direct_get_ptr(direct, key); |
223 | return ret; | ||
224 | nilfs_direct_commit_delete(direct, &req, key); | ||
225 | nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); | ||
226 | 178 | ||
227 | return 0; | 179 | ret = nilfs_bmap_prepare_end_ptr(bmap, &req, dat); |
180 | if (!ret) { | ||
181 | nilfs_bmap_commit_end_ptr(bmap, &req, dat); | ||
182 | nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR); | ||
183 | nilfs_bmap_sub_blocks(bmap, 1); | ||
184 | } | ||
185 | return ret; | ||
228 | } | 186 | } |
229 | 187 | ||
230 | static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) | 188 | static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) |
@@ -310,59 +268,56 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, | |||
310 | return 0; | 268 | return 0; |
311 | } | 269 | } |
312 | 270 | ||
313 | static int nilfs_direct_propagate_v(struct nilfs_direct *direct, | 271 | static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, |
314 | struct buffer_head *bh) | 272 | struct buffer_head *bh) |
315 | { | 273 | { |
316 | union nilfs_bmap_ptr_req oldreq, newreq; | 274 | struct nilfs_direct *direct = (struct nilfs_direct *)bmap; |
275 | struct nilfs_palloc_req oldreq, newreq; | ||
276 | struct inode *dat; | ||
317 | __u64 key; | 277 | __u64 key; |
318 | __u64 ptr; | 278 | __u64 ptr; |
319 | int ret; | 279 | int ret; |
320 | 280 | ||
321 | key = nilfs_bmap_data_get_key(&direct->d_bmap, bh); | 281 | if (!NILFS_BMAP_USE_VBN(bmap)) |
282 | return 0; | ||
283 | |||
284 | dat = nilfs_bmap_get_dat(bmap); | ||
285 | key = nilfs_bmap_data_get_key(bmap, bh); | ||
322 | ptr = nilfs_direct_get_ptr(direct, key); | 286 | ptr = nilfs_direct_get_ptr(direct, key); |
323 | if (!buffer_nilfs_volatile(bh)) { | 287 | if (!buffer_nilfs_volatile(bh)) { |
324 | oldreq.bpr_ptr = ptr; | 288 | oldreq.pr_entry_nr = ptr; |
325 | newreq.bpr_ptr = ptr; | 289 | newreq.pr_entry_nr = ptr; |
326 | ret = nilfs_bmap_prepare_update_v(&direct->d_bmap, &oldreq, | 290 | ret = nilfs_dat_prepare_update(dat, &oldreq, &newreq); |
327 | &newreq); | ||
328 | if (ret < 0) | 291 | if (ret < 0) |
329 | return ret; | 292 | return ret; |
330 | nilfs_bmap_commit_update_v(&direct->d_bmap, &oldreq, &newreq); | 293 | nilfs_dat_commit_update(dat, &oldreq, &newreq, |
294 | bmap->b_ptr_type == NILFS_BMAP_PTR_VS); | ||
331 | set_buffer_nilfs_volatile(bh); | 295 | set_buffer_nilfs_volatile(bh); |
332 | nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr); | 296 | nilfs_direct_set_ptr(direct, key, newreq.pr_entry_nr); |
333 | } else | 297 | } else |
334 | ret = nilfs_bmap_mark_dirty(&direct->d_bmap, ptr); | 298 | ret = nilfs_dat_mark_dirty(dat, ptr); |
335 | 299 | ||
336 | return ret; | 300 | return ret; |
337 | } | 301 | } |
338 | 302 | ||
339 | static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, | ||
340 | struct buffer_head *bh) | ||
341 | { | ||
342 | struct nilfs_direct *direct = (struct nilfs_direct *)bmap; | ||
343 | |||
344 | return NILFS_BMAP_USE_VBN(bmap) ? | ||
345 | nilfs_direct_propagate_v(direct, bh) : 0; | ||
346 | } | ||
347 | |||
348 | static int nilfs_direct_assign_v(struct nilfs_direct *direct, | 303 | static int nilfs_direct_assign_v(struct nilfs_direct *direct, |
349 | __u64 key, __u64 ptr, | 304 | __u64 key, __u64 ptr, |
350 | struct buffer_head **bh, | 305 | struct buffer_head **bh, |
351 | sector_t blocknr, | 306 | sector_t blocknr, |
352 | union nilfs_binfo *binfo) | 307 | union nilfs_binfo *binfo) |
353 | { | 308 | { |
309 | struct inode *dat = nilfs_bmap_get_dat(&direct->d_bmap); | ||
354 | union nilfs_bmap_ptr_req req; | 310 | union nilfs_bmap_ptr_req req; |
355 | int ret; | 311 | int ret; |
356 | 312 | ||
357 | req.bpr_ptr = ptr; | 313 | req.bpr_ptr = ptr; |
358 | ret = nilfs_bmap_start_v(&direct->d_bmap, &req, blocknr); | 314 | ret = nilfs_dat_prepare_start(dat, &req.bpr_req); |
359 | if (unlikely(ret < 0)) | 315 | if (!ret) { |
360 | return ret; | 316 | nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); |
361 | 317 | binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); | |
362 | binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); | 318 | binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); |
363 | binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); | 319 | } |
364 | 320 | return ret; | |
365 | return 0; | ||
366 | } | 321 | } |
367 | 322 | ||
368 | static int nilfs_direct_assign_p(struct nilfs_direct *direct, | 323 | static int nilfs_direct_assign_p(struct nilfs_direct *direct, |
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 5d30a35679b5..ecc3ba76db47 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h | |||
@@ -31,7 +31,6 @@ | |||
31 | #include "mdt.h" | 31 | #include "mdt.h" |
32 | #include "alloc.h" | 32 | #include "alloc.h" |
33 | 33 | ||
34 | #define NILFS_IFILE_GFP NILFS_MDT_GFP | ||
35 | 34 | ||
36 | static inline struct nilfs_inode * | 35 | static inline struct nilfs_inode * |
37 | nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh) | 36 | nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh) |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index fe9d8f2a13f8..807e584b163d 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -430,7 +430,8 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, | |||
430 | 430 | ||
431 | raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); | 431 | raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); |
432 | 432 | ||
433 | if (nilfs_read_inode_common(inode, raw_inode)) | 433 | err = nilfs_read_inode_common(inode, raw_inode); |
434 | if (err) | ||
434 | goto failed_unmap; | 435 | goto failed_unmap; |
435 | 436 | ||
436 | if (S_ISREG(inode->i_mode)) { | 437 | if (S_ISREG(inode->i_mode)) { |
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 6ea5f872e2de..6572ea4bc4df 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c | |||
@@ -442,12 +442,6 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, | |||
442 | const char *msg; | 442 | const char *msg; |
443 | int ret; | 443 | int ret; |
444 | 444 | ||
445 | ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]); | ||
446 | if (ret < 0) { | ||
447 | msg = "cannot read source blocks"; | ||
448 | goto failed; | ||
449 | } | ||
450 | |||
451 | ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]); | 445 | ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]); |
452 | if (ret < 0) { | 446 | if (ret < 0) { |
453 | /* | 447 | /* |
@@ -548,7 +542,25 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, | |||
548 | } | 542 | } |
549 | } | 543 | } |
550 | 544 | ||
551 | ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); | 545 | /* |
546 | * nilfs_ioctl_move_blocks() will call nilfs_gc_iget(), | ||
547 | * which will operates an inode list without blocking. | ||
548 | * To protect the list from concurrent operations, | ||
549 | * nilfs_ioctl_move_blocks should be atomic operation. | ||
550 | */ | ||
551 | if (test_and_set_bit(THE_NILFS_GC_RUNNING, &nilfs->ns_flags)) { | ||
552 | ret = -EBUSY; | ||
553 | goto out_free; | ||
554 | } | ||
555 | |||
556 | ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]); | ||
557 | if (ret < 0) | ||
558 | printk(KERN_ERR "NILFS: GC failed during preparation: " | ||
559 | "cannot read source blocks: err=%d\n", ret); | ||
560 | else | ||
561 | ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); | ||
562 | |||
563 | clear_nilfs_gc_running(nilfs); | ||
552 | 564 | ||
553 | out_free: | 565 | out_free: |
554 | while (--n >= 0) | 566 | while (--n >= 0) |
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 3d3ddb3f5177..156bf6091a96 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c | |||
@@ -103,15 +103,12 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, | |||
103 | goto failed_unlock; | 103 | goto failed_unlock; |
104 | 104 | ||
105 | err = -EEXIST; | 105 | err = -EEXIST; |
106 | if (buffer_uptodate(bh) || buffer_mapped(bh)) | 106 | if (buffer_uptodate(bh)) |
107 | goto failed_bh; | 107 | goto failed_bh; |
108 | #if 0 | 108 | |
109 | /* The uptodate flag is not protected by the page lock, but | ||
110 | the mapped flag is. Thus, we don't have to wait the buffer. */ | ||
111 | wait_on_buffer(bh); | 109 | wait_on_buffer(bh); |
112 | if (buffer_uptodate(bh)) | 110 | if (buffer_uptodate(bh)) |
113 | goto failed_bh; | 111 | goto failed_bh; |
114 | #endif | ||
115 | 112 | ||
116 | bh->b_bdev = nilfs->ns_bdev; | 113 | bh->b_bdev = nilfs->ns_bdev; |
117 | err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); | 114 | err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); |
@@ -139,7 +136,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, | |||
139 | int mode, struct buffer_head **out_bh) | 136 | int mode, struct buffer_head **out_bh) |
140 | { | 137 | { |
141 | struct buffer_head *bh; | 138 | struct buffer_head *bh; |
142 | unsigned long blknum = 0; | 139 | __u64 blknum = 0; |
143 | int ret = -ENOMEM; | 140 | int ret = -ENOMEM; |
144 | 141 | ||
145 | bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); | 142 | bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); |
@@ -162,17 +159,15 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, | |||
162 | unlock_buffer(bh); | 159 | unlock_buffer(bh); |
163 | goto out; | 160 | goto out; |
164 | } | 161 | } |
165 | if (!buffer_mapped(bh)) { /* unused buffer */ | 162 | |
166 | ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, | 163 | ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum); |
167 | &blknum); | 164 | if (unlikely(ret)) { |
168 | if (unlikely(ret)) { | 165 | unlock_buffer(bh); |
169 | unlock_buffer(bh); | 166 | goto failed_bh; |
170 | goto failed_bh; | ||
171 | } | ||
172 | bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev; | ||
173 | bh->b_blocknr = blknum; | ||
174 | set_buffer_mapped(bh); | ||
175 | } | 167 | } |
168 | bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev; | ||
169 | bh->b_blocknr = (sector_t)blknum; | ||
170 | set_buffer_mapped(bh); | ||
176 | 171 | ||
177 | bh->b_end_io = end_buffer_read_sync; | 172 | bh->b_end_io = end_buffer_read_sync; |
178 | get_bh(bh); | 173 | get_bh(bh); |
@@ -402,6 +397,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) | |||
402 | struct inode *inode = container_of(page->mapping, | 397 | struct inode *inode = container_of(page->mapping, |
403 | struct inode, i_data); | 398 | struct inode, i_data); |
404 | struct super_block *sb = inode->i_sb; | 399 | struct super_block *sb = inode->i_sb; |
400 | struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; | ||
405 | struct nilfs_sb_info *writer = NULL; | 401 | struct nilfs_sb_info *writer = NULL; |
406 | int err = 0; | 402 | int err = 0; |
407 | 403 | ||
@@ -411,9 +407,12 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) | |||
411 | if (page->mapping->assoc_mapping) | 407 | if (page->mapping->assoc_mapping) |
412 | return 0; /* Do not request flush for shadow page cache */ | 408 | return 0; /* Do not request flush for shadow page cache */ |
413 | if (!sb) { | 409 | if (!sb) { |
414 | writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs); | 410 | down_read(&nilfs->ns_writer_sem); |
415 | if (!writer) | 411 | writer = nilfs->ns_writer; |
412 | if (!writer) { | ||
413 | up_read(&nilfs->ns_writer_sem); | ||
416 | return -EROFS; | 414 | return -EROFS; |
415 | } | ||
417 | sb = writer->s_super; | 416 | sb = writer->s_super; |
418 | } | 417 | } |
419 | 418 | ||
@@ -423,7 +422,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) | |||
423 | nilfs_flush_segment(sb, inode->i_ino); | 422 | nilfs_flush_segment(sb, inode->i_ino); |
424 | 423 | ||
425 | if (writer) | 424 | if (writer) |
426 | nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs); | 425 | up_read(&nilfs->ns_writer_sem); |
427 | return err; | 426 | return err; |
428 | } | 427 | } |
429 | 428 | ||
@@ -514,9 +513,10 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, | |||
514 | } | 513 | } |
515 | 514 | ||
516 | struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, | 515 | struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, |
517 | ino_t ino, gfp_t gfp_mask) | 516 | ino_t ino) |
518 | { | 517 | { |
519 | struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, gfp_mask); | 518 | struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, |
519 | NILFS_MDT_GFP); | ||
520 | 520 | ||
521 | if (!inode) | 521 | if (!inode) |
522 | return NULL; | 522 | return NULL; |
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index df683e0bca6a..431599733c9b 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h | |||
@@ -74,8 +74,7 @@ int nilfs_mdt_forget_block(struct inode *, unsigned long); | |||
74 | int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); | 74 | int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); |
75 | int nilfs_mdt_fetch_dirty(struct inode *); | 75 | int nilfs_mdt_fetch_dirty(struct inode *); |
76 | 76 | ||
77 | struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t, | 77 | struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t); |
78 | gfp_t); | ||
79 | struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, | 78 | struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, |
80 | ino_t, gfp_t); | 79 | ino_t, gfp_t); |
81 | void nilfs_mdt_destroy(struct inode *); | 80 | void nilfs_mdt_destroy(struct inode *); |
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index d80cc71be749..6dc83591d118 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c | |||
@@ -552,7 +552,8 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi, | |||
552 | printk(KERN_WARNING | 552 | printk(KERN_WARNING |
553 | "NILFS warning: error recovering data block " | 553 | "NILFS warning: error recovering data block " |
554 | "(err=%d, ino=%lu, block-offset=%llu)\n", | 554 | "(err=%d, ino=%lu, block-offset=%llu)\n", |
555 | err, rb->ino, (unsigned long long)rb->blkoff); | 555 | err, (unsigned long)rb->ino, |
556 | (unsigned long long)rb->blkoff); | ||
556 | if (!err2) | 557 | if (!err2) |
557 | err2 = err; | 558 | err2 = err; |
558 | next: | 559 | next: |
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 9e3fe17bb96b..e6d9e37fa241 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c | |||
@@ -316,10 +316,10 @@ static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start, | |||
316 | { | 316 | { |
317 | struct bio *bio; | 317 | struct bio *bio; |
318 | 318 | ||
319 | bio = bio_alloc(GFP_NOWAIT, nr_vecs); | 319 | bio = bio_alloc(GFP_NOIO, nr_vecs); |
320 | if (bio == NULL) { | 320 | if (bio == NULL) { |
321 | while (!bio && (nr_vecs >>= 1)) | 321 | while (!bio && (nr_vecs >>= 1)) |
322 | bio = bio_alloc(GFP_NOWAIT, nr_vecs); | 322 | bio = bio_alloc(GFP_NOIO, nr_vecs); |
323 | } | 323 | } |
324 | if (likely(bio)) { | 324 | if (likely(bio)) { |
325 | bio->bi_bdev = sb->s_bdev; | 325 | bio->bi_bdev = sb->s_bdev; |
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 8b5e4778cf28..683df89dbae5 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c | |||
@@ -1859,12 +1859,26 @@ static void nilfs_end_page_io(struct page *page, int err) | |||
1859 | if (!page) | 1859 | if (!page) |
1860 | return; | 1860 | return; |
1861 | 1861 | ||
1862 | if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) | 1862 | if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) { |
1863 | /* | 1863 | /* |
1864 | * For b-tree node pages, this function may be called twice | 1864 | * For b-tree node pages, this function may be called twice |
1865 | * or more because they might be split in a segment. | 1865 | * or more because they might be split in a segment. |
1866 | */ | 1866 | */ |
1867 | if (PageDirty(page)) { | ||
1868 | /* | ||
1869 | * For pages holding split b-tree node buffers, dirty | ||
1870 | * flag on the buffers may be cleared discretely. | ||
1871 | * In that case, the page is once redirtied for | ||
1872 | * remaining buffers, and it must be cancelled if | ||
1873 | * all the buffers get cleaned later. | ||
1874 | */ | ||
1875 | lock_page(page); | ||
1876 | if (nilfs_page_buffers_clean(page)) | ||
1877 | __nilfs_clear_page_dirty(page); | ||
1878 | unlock_page(page); | ||
1879 | } | ||
1867 | return; | 1880 | return; |
1881 | } | ||
1868 | 1882 | ||
1869 | __nilfs_end_page_io(page, err); | 1883 | __nilfs_end_page_io(page, err); |
1870 | } | 1884 | } |
@@ -2487,7 +2501,8 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, | |||
2487 | if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && | 2501 | if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && |
2488 | nilfs_discontinued(nilfs)) { | 2502 | nilfs_discontinued(nilfs)) { |
2489 | down_write(&nilfs->ns_sem); | 2503 | down_write(&nilfs->ns_sem); |
2490 | req->sb_err = nilfs_commit_super(sbi, 0); | 2504 | req->sb_err = nilfs_commit_super(sbi, |
2505 | nilfs_altsb_need_update(nilfs)); | ||
2491 | up_write(&nilfs->ns_sem); | 2506 | up_write(&nilfs->ns_sem); |
2492 | } | 2507 | } |
2493 | } | 2508 | } |
@@ -2675,6 +2690,7 @@ static int nilfs_segctor_thread(void *arg) | |||
2675 | } else { | 2690 | } else { |
2676 | DEFINE_WAIT(wait); | 2691 | DEFINE_WAIT(wait); |
2677 | int should_sleep = 1; | 2692 | int should_sleep = 1; |
2693 | struct the_nilfs *nilfs; | ||
2678 | 2694 | ||
2679 | prepare_to_wait(&sci->sc_wait_daemon, &wait, | 2695 | prepare_to_wait(&sci->sc_wait_daemon, &wait, |
2680 | TASK_INTERRUPTIBLE); | 2696 | TASK_INTERRUPTIBLE); |
@@ -2695,6 +2711,9 @@ static int nilfs_segctor_thread(void *arg) | |||
2695 | finish_wait(&sci->sc_wait_daemon, &wait); | 2711 | finish_wait(&sci->sc_wait_daemon, &wait); |
2696 | timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && | 2712 | timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && |
2697 | time_after_eq(jiffies, sci->sc_timer->expires)); | 2713 | time_after_eq(jiffies, sci->sc_timer->expires)); |
2714 | nilfs = sci->sc_sbi->s_nilfs; | ||
2715 | if (sci->sc_super->s_dirt && nilfs_sb_need_update(nilfs)) | ||
2716 | set_nilfs_discontinued(nilfs); | ||
2698 | } | 2717 | } |
2699 | goto loop; | 2718 | goto loop; |
2700 | 2719 | ||
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index a2c4d76c3366..0e99e5c0bd0f 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/nilfs2_fs.h> | 28 | #include <linux/nilfs2_fs.h> |
29 | #include "mdt.h" | 29 | #include "mdt.h" |
30 | 30 | ||
31 | #define NILFS_SUFILE_GFP NILFS_MDT_GFP | ||
32 | 31 | ||
33 | static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) | 32 | static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) |
34 | { | 33 | { |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 8e2ec43b18f4..55f3d6b60732 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -50,6 +50,8 @@ | |||
50 | #include <linux/writeback.h> | 50 | #include <linux/writeback.h> |
51 | #include <linux/kobject.h> | 51 | #include <linux/kobject.h> |
52 | #include <linux/exportfs.h> | 52 | #include <linux/exportfs.h> |
53 | #include <linux/seq_file.h> | ||
54 | #include <linux/mount.h> | ||
53 | #include "nilfs.h" | 55 | #include "nilfs.h" |
54 | #include "mdt.h" | 56 | #include "mdt.h" |
55 | #include "alloc.h" | 57 | #include "alloc.h" |
@@ -65,7 +67,6 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " | |||
65 | "(NILFS)"); | 67 | "(NILFS)"); |
66 | MODULE_LICENSE("GPL"); | 68 | MODULE_LICENSE("GPL"); |
67 | 69 | ||
68 | static void nilfs_write_super(struct super_block *sb); | ||
69 | static int nilfs_remount(struct super_block *sb, int *flags, char *data); | 70 | static int nilfs_remount(struct super_block *sb, int *flags, char *data); |
70 | 71 | ||
71 | /** | 72 | /** |
@@ -311,9 +312,6 @@ static void nilfs_put_super(struct super_block *sb) | |||
311 | 312 | ||
312 | lock_kernel(); | 313 | lock_kernel(); |
313 | 314 | ||
314 | if (sb->s_dirt) | ||
315 | nilfs_write_super(sb); | ||
316 | |||
317 | nilfs_detach_segment_constructor(sbi); | 315 | nilfs_detach_segment_constructor(sbi); |
318 | 316 | ||
319 | if (!(sb->s_flags & MS_RDONLY)) { | 317 | if (!(sb->s_flags & MS_RDONLY)) { |
@@ -336,63 +334,21 @@ static void nilfs_put_super(struct super_block *sb) | |||
336 | unlock_kernel(); | 334 | unlock_kernel(); |
337 | } | 335 | } |
338 | 336 | ||
339 | /** | 337 | static int nilfs_sync_fs(struct super_block *sb, int wait) |
340 | * nilfs_write_super - write super block(s) of NILFS | ||
341 | * @sb: super_block | ||
342 | * | ||
343 | * nilfs_write_super() gets a fs-dependent lock, writes super block(s), and | ||
344 | * clears s_dirt. This function is called in the section protected by | ||
345 | * lock_super(). | ||
346 | * | ||
347 | * The s_dirt flag is managed by each filesystem and we protect it by ns_sem | ||
348 | * of the struct the_nilfs. Lock order must be as follows: | ||
349 | * | ||
350 | * 1. lock_super() | ||
351 | * 2. down_write(&nilfs->ns_sem) | ||
352 | * | ||
353 | * Inside NILFS, locking ns_sem is enough to protect s_dirt and the buffer | ||
354 | * of the super block (nilfs->ns_sbp[]). | ||
355 | * | ||
356 | * In most cases, VFS functions call lock_super() before calling these | ||
357 | * methods. So we must be careful not to bring on deadlocks when using | ||
358 | * lock_super(); see generic_shutdown_super(), write_super(), and so on. | ||
359 | * | ||
360 | * Note that order of lock_kernel() and lock_super() depends on contexts | ||
361 | * of VFS. We should also note that lock_kernel() can be used in its | ||
362 | * protective section and only the outermost one has an effect. | ||
363 | */ | ||
364 | static void nilfs_write_super(struct super_block *sb) | ||
365 | { | 338 | { |
366 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 339 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
367 | struct the_nilfs *nilfs = sbi->s_nilfs; | 340 | struct the_nilfs *nilfs = sbi->s_nilfs; |
368 | |||
369 | down_write(&nilfs->ns_sem); | ||
370 | if (!(sb->s_flags & MS_RDONLY)) { | ||
371 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | ||
372 | u64 t = get_seconds(); | ||
373 | int dupsb; | ||
374 | |||
375 | if (!nilfs_discontinued(nilfs) && t >= nilfs->ns_sbwtime[0] && | ||
376 | t < nilfs->ns_sbwtime[0] + NILFS_SB_FREQ) { | ||
377 | up_write(&nilfs->ns_sem); | ||
378 | return; | ||
379 | } | ||
380 | dupsb = sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ; | ||
381 | nilfs_commit_super(sbi, dupsb); | ||
382 | } | ||
383 | sb->s_dirt = 0; | ||
384 | up_write(&nilfs->ns_sem); | ||
385 | } | ||
386 | |||
387 | static int nilfs_sync_fs(struct super_block *sb, int wait) | ||
388 | { | ||
389 | int err = 0; | 341 | int err = 0; |
390 | 342 | ||
391 | nilfs_write_super(sb); | ||
392 | |||
393 | /* This function is called when super block should be written back */ | 343 | /* This function is called when super block should be written back */ |
394 | if (wait) | 344 | if (wait) |
395 | err = nilfs_construct_segment(sb); | 345 | err = nilfs_construct_segment(sb); |
346 | |||
347 | down_write(&nilfs->ns_sem); | ||
348 | if (sb->s_dirt) | ||
349 | nilfs_commit_super(sbi, 1); | ||
350 | up_write(&nilfs->ns_sem); | ||
351 | |||
396 | return err; | 352 | return err; |
397 | } | 353 | } |
398 | 354 | ||
@@ -407,8 +363,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) | |||
407 | list_add(&sbi->s_list, &nilfs->ns_supers); | 363 | list_add(&sbi->s_list, &nilfs->ns_supers); |
408 | up_write(&nilfs->ns_super_sem); | 364 | up_write(&nilfs->ns_super_sem); |
409 | 365 | ||
410 | sbi->s_ifile = nilfs_mdt_new( | 366 | sbi->s_ifile = nilfs_mdt_new(nilfs, sbi->s_super, NILFS_IFILE_INO); |
411 | nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP); | ||
412 | if (!sbi->s_ifile) | 367 | if (!sbi->s_ifile) |
413 | return -ENOMEM; | 368 | return -ENOMEM; |
414 | 369 | ||
@@ -416,8 +371,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) | |||
416 | if (unlikely(err)) | 371 | if (unlikely(err)) |
417 | goto failed; | 372 | goto failed; |
418 | 373 | ||
374 | down_read(&nilfs->ns_segctor_sem); | ||
419 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, | 375 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, |
420 | &bh_cp); | 376 | &bh_cp); |
377 | up_read(&nilfs->ns_segctor_sem); | ||
421 | if (unlikely(err)) { | 378 | if (unlikely(err)) { |
422 | if (err == -ENOENT || err == -EINVAL) { | 379 | if (err == -ENOENT || err == -EINVAL) { |
423 | printk(KERN_ERR | 380 | printk(KERN_ERR |
@@ -527,6 +484,26 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
527 | return 0; | 484 | return 0; |
528 | } | 485 | } |
529 | 486 | ||
487 | static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | ||
488 | { | ||
489 | struct super_block *sb = vfs->mnt_sb; | ||
490 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
491 | |||
492 | if (!nilfs_test_opt(sbi, BARRIER)) | ||
493 | seq_printf(seq, ",barrier=off"); | ||
494 | if (nilfs_test_opt(sbi, SNAPSHOT)) | ||
495 | seq_printf(seq, ",cp=%llu", | ||
496 | (unsigned long long int)sbi->s_snapshot_cno); | ||
497 | if (nilfs_test_opt(sbi, ERRORS_RO)) | ||
498 | seq_printf(seq, ",errors=remount-ro"); | ||
499 | if (nilfs_test_opt(sbi, ERRORS_PANIC)) | ||
500 | seq_printf(seq, ",errors=panic"); | ||
501 | if (nilfs_test_opt(sbi, STRICT_ORDER)) | ||
502 | seq_printf(seq, ",order=strict"); | ||
503 | |||
504 | return 0; | ||
505 | } | ||
506 | |||
530 | static struct super_operations nilfs_sops = { | 507 | static struct super_operations nilfs_sops = { |
531 | .alloc_inode = nilfs_alloc_inode, | 508 | .alloc_inode = nilfs_alloc_inode, |
532 | .destroy_inode = nilfs_destroy_inode, | 509 | .destroy_inode = nilfs_destroy_inode, |
@@ -536,7 +513,7 @@ static struct super_operations nilfs_sops = { | |||
536 | /* .drop_inode = nilfs_drop_inode, */ | 513 | /* .drop_inode = nilfs_drop_inode, */ |
537 | .delete_inode = nilfs_delete_inode, | 514 | .delete_inode = nilfs_delete_inode, |
538 | .put_super = nilfs_put_super, | 515 | .put_super = nilfs_put_super, |
539 | .write_super = nilfs_write_super, | 516 | /* .write_super = nilfs_write_super, */ |
540 | .sync_fs = nilfs_sync_fs, | 517 | .sync_fs = nilfs_sync_fs, |
541 | /* .write_super_lockfs */ | 518 | /* .write_super_lockfs */ |
542 | /* .unlockfs */ | 519 | /* .unlockfs */ |
@@ -544,7 +521,7 @@ static struct super_operations nilfs_sops = { | |||
544 | .remount_fs = nilfs_remount, | 521 | .remount_fs = nilfs_remount, |
545 | .clear_inode = nilfs_clear_inode, | 522 | .clear_inode = nilfs_clear_inode, |
546 | /* .umount_begin */ | 523 | /* .umount_begin */ |
547 | /* .show_options */ | 524 | .show_options = nilfs_show_options |
548 | }; | 525 | }; |
549 | 526 | ||
550 | static struct inode * | 527 | static struct inode * |
@@ -814,10 +791,15 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, | |||
814 | 791 | ||
815 | if (sb->s_flags & MS_RDONLY) { | 792 | if (sb->s_flags & MS_RDONLY) { |
816 | if (nilfs_test_opt(sbi, SNAPSHOT)) { | 793 | if (nilfs_test_opt(sbi, SNAPSHOT)) { |
794 | down_read(&nilfs->ns_segctor_sem); | ||
817 | err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, | 795 | err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, |
818 | sbi->s_snapshot_cno); | 796 | sbi->s_snapshot_cno); |
819 | if (err < 0) | 797 | up_read(&nilfs->ns_segctor_sem); |
798 | if (err < 0) { | ||
799 | if (err == -ENOENT) | ||
800 | err = -EINVAL; | ||
820 | goto failed_sbi; | 801 | goto failed_sbi; |
802 | } | ||
821 | if (!err) { | 803 | if (!err) { |
822 | printk(KERN_ERR | 804 | printk(KERN_ERR |
823 | "NILFS: The specified checkpoint is " | 805 | "NILFS: The specified checkpoint is " |
@@ -1125,10 +1107,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, | |||
1125 | */ | 1107 | */ |
1126 | sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno); | 1108 | sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno); |
1127 | 1109 | ||
1128 | if (!sd.cno) | ||
1129 | /* trying to get the latest checkpoint. */ | ||
1130 | sd.cno = nilfs_last_cno(nilfs); | ||
1131 | |||
1132 | /* | 1110 | /* |
1133 | * Get super block instance holding the nilfs_sb_info struct. | 1111 | * Get super block instance holding the nilfs_sb_info struct. |
1134 | * A new instance is allocated if no existing mount is present or | 1112 | * A new instance is allocated if no existing mount is present or |
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 8b8889825716..ad391a8c3e7e 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -68,12 +68,11 @@ static struct the_nilfs *alloc_nilfs(struct block_device *bdev) | |||
68 | 68 | ||
69 | nilfs->ns_bdev = bdev; | 69 | nilfs->ns_bdev = bdev; |
70 | atomic_set(&nilfs->ns_count, 1); | 70 | atomic_set(&nilfs->ns_count, 1); |
71 | atomic_set(&nilfs->ns_writer_refcount, -1); | ||
72 | atomic_set(&nilfs->ns_ndirtyblks, 0); | 71 | atomic_set(&nilfs->ns_ndirtyblks, 0); |
73 | init_rwsem(&nilfs->ns_sem); | 72 | init_rwsem(&nilfs->ns_sem); |
74 | init_rwsem(&nilfs->ns_super_sem); | 73 | init_rwsem(&nilfs->ns_super_sem); |
75 | mutex_init(&nilfs->ns_mount_mutex); | 74 | mutex_init(&nilfs->ns_mount_mutex); |
76 | mutex_init(&nilfs->ns_writer_mutex); | 75 | init_rwsem(&nilfs->ns_writer_sem); |
77 | INIT_LIST_HEAD(&nilfs->ns_list); | 76 | INIT_LIST_HEAD(&nilfs->ns_list); |
78 | INIT_LIST_HEAD(&nilfs->ns_supers); | 77 | INIT_LIST_HEAD(&nilfs->ns_supers); |
79 | spin_lock_init(&nilfs->ns_last_segment_lock); | 78 | spin_lock_init(&nilfs->ns_last_segment_lock); |
@@ -188,23 +187,19 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, | |||
188 | inode_size = nilfs->ns_inode_size; | 187 | inode_size = nilfs->ns_inode_size; |
189 | 188 | ||
190 | err = -ENOMEM; | 189 | err = -ENOMEM; |
191 | nilfs->ns_dat = nilfs_mdt_new( | 190 | nilfs->ns_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO); |
192 | nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP); | ||
193 | if (unlikely(!nilfs->ns_dat)) | 191 | if (unlikely(!nilfs->ns_dat)) |
194 | goto failed; | 192 | goto failed; |
195 | 193 | ||
196 | nilfs->ns_gc_dat = nilfs_mdt_new( | 194 | nilfs->ns_gc_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO); |
197 | nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP); | ||
198 | if (unlikely(!nilfs->ns_gc_dat)) | 195 | if (unlikely(!nilfs->ns_gc_dat)) |
199 | goto failed_dat; | 196 | goto failed_dat; |
200 | 197 | ||
201 | nilfs->ns_cpfile = nilfs_mdt_new( | 198 | nilfs->ns_cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO); |
202 | nilfs, NULL, NILFS_CPFILE_INO, NILFS_CPFILE_GFP); | ||
203 | if (unlikely(!nilfs->ns_cpfile)) | 199 | if (unlikely(!nilfs->ns_cpfile)) |
204 | goto failed_gc_dat; | 200 | goto failed_gc_dat; |
205 | 201 | ||
206 | nilfs->ns_sufile = nilfs_mdt_new( | 202 | nilfs->ns_sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO); |
207 | nilfs, NULL, NILFS_SUFILE_INO, NILFS_SUFILE_GFP); | ||
208 | if (unlikely(!nilfs->ns_sufile)) | 203 | if (unlikely(!nilfs->ns_sufile)) |
209 | goto failed_cpfile; | 204 | goto failed_cpfile; |
210 | 205 | ||
@@ -596,9 +591,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) | |||
596 | 591 | ||
597 | nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); | 592 | nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); |
598 | 593 | ||
599 | bdi = nilfs->ns_bdev->bd_inode_backing_dev_info; | 594 | bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; |
600 | if (!bdi) | ||
601 | bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; | ||
602 | nilfs->ns_bdi = bdi ? : &default_backing_dev_info; | 595 | nilfs->ns_bdi = bdi ? : &default_backing_dev_info; |
603 | 596 | ||
604 | /* Finding last segment */ | 597 | /* Finding last segment */ |
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index e8adbffc626f..20abd55881e0 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h | |||
@@ -37,6 +37,7 @@ enum { | |||
37 | THE_NILFS_LOADED, /* Roll-back/roll-forward has done and | 37 | THE_NILFS_LOADED, /* Roll-back/roll-forward has done and |
38 | the latest checkpoint was loaded */ | 38 | the latest checkpoint was loaded */ |
39 | THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ | 39 | THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ |
40 | THE_NILFS_GC_RUNNING, /* gc process is running */ | ||
40 | }; | 41 | }; |
41 | 42 | ||
42 | /** | 43 | /** |
@@ -50,8 +51,7 @@ enum { | |||
50 | * @ns_sem: semaphore for shared states | 51 | * @ns_sem: semaphore for shared states |
51 | * @ns_super_sem: semaphore for global operations across super block instances | 52 | * @ns_super_sem: semaphore for global operations across super block instances |
52 | * @ns_mount_mutex: mutex protecting mount process of nilfs | 53 | * @ns_mount_mutex: mutex protecting mount process of nilfs |
53 | * @ns_writer_mutex: mutex protecting ns_writer attach/detach | 54 | * @ns_writer_sem: semaphore protecting ns_writer attach/detach |
54 | * @ns_writer_refcount: number of referrers on ns_writer | ||
55 | * @ns_current: back pointer to current mount | 55 | * @ns_current: back pointer to current mount |
56 | * @ns_sbh: buffer heads of on-disk super blocks | 56 | * @ns_sbh: buffer heads of on-disk super blocks |
57 | * @ns_sbp: pointers to super block data | 57 | * @ns_sbp: pointers to super block data |
@@ -100,8 +100,7 @@ struct the_nilfs { | |||
100 | struct rw_semaphore ns_sem; | 100 | struct rw_semaphore ns_sem; |
101 | struct rw_semaphore ns_super_sem; | 101 | struct rw_semaphore ns_super_sem; |
102 | struct mutex ns_mount_mutex; | 102 | struct mutex ns_mount_mutex; |
103 | struct mutex ns_writer_mutex; | 103 | struct rw_semaphore ns_writer_sem; |
104 | atomic_t ns_writer_refcount; | ||
105 | 104 | ||
106 | /* | 105 | /* |
107 | * components protected by ns_super_sem | 106 | * components protected by ns_super_sem |
@@ -197,11 +196,26 @@ static inline int nilfs_##name(struct the_nilfs *nilfs) \ | |||
197 | THE_NILFS_FNS(INIT, init) | 196 | THE_NILFS_FNS(INIT, init) |
198 | THE_NILFS_FNS(LOADED, loaded) | 197 | THE_NILFS_FNS(LOADED, loaded) |
199 | THE_NILFS_FNS(DISCONTINUED, discontinued) | 198 | THE_NILFS_FNS(DISCONTINUED, discontinued) |
199 | THE_NILFS_FNS(GC_RUNNING, gc_running) | ||
200 | 200 | ||
201 | /* Minimum interval of periodical update of superblocks (in seconds) */ | 201 | /* Minimum interval of periodical update of superblocks (in seconds) */ |
202 | #define NILFS_SB_FREQ 10 | 202 | #define NILFS_SB_FREQ 10 |
203 | #define NILFS_ALTSB_FREQ 60 /* spare superblock */ | 203 | #define NILFS_ALTSB_FREQ 60 /* spare superblock */ |
204 | 204 | ||
205 | static inline int nilfs_sb_need_update(struct the_nilfs *nilfs) | ||
206 | { | ||
207 | u64 t = get_seconds(); | ||
208 | return t < nilfs->ns_sbwtime[0] || | ||
209 | t > nilfs->ns_sbwtime[0] + NILFS_SB_FREQ; | ||
210 | } | ||
211 | |||
212 | static inline int nilfs_altsb_need_update(struct the_nilfs *nilfs) | ||
213 | { | ||
214 | u64 t = get_seconds(); | ||
215 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | ||
216 | return sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ; | ||
217 | } | ||
218 | |||
205 | void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); | 219 | void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); |
206 | struct the_nilfs *find_or_create_nilfs(struct block_device *); | 220 | struct the_nilfs *find_or_create_nilfs(struct block_device *); |
207 | void put_nilfs(struct the_nilfs *); | 221 | void put_nilfs(struct the_nilfs *); |
@@ -221,39 +235,26 @@ static inline void get_nilfs(struct the_nilfs *nilfs) | |||
221 | atomic_inc(&nilfs->ns_count); | 235 | atomic_inc(&nilfs->ns_count); |
222 | } | 236 | } |
223 | 237 | ||
224 | static inline struct nilfs_sb_info *nilfs_get_writer(struct the_nilfs *nilfs) | ||
225 | { | ||
226 | if (atomic_inc_and_test(&nilfs->ns_writer_refcount)) | ||
227 | mutex_lock(&nilfs->ns_writer_mutex); | ||
228 | return nilfs->ns_writer; | ||
229 | } | ||
230 | |||
231 | static inline void nilfs_put_writer(struct the_nilfs *nilfs) | ||
232 | { | ||
233 | if (atomic_add_negative(-1, &nilfs->ns_writer_refcount)) | ||
234 | mutex_unlock(&nilfs->ns_writer_mutex); | ||
235 | } | ||
236 | |||
237 | static inline void | 238 | static inline void |
238 | nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | 239 | nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) |
239 | { | 240 | { |
240 | mutex_lock(&nilfs->ns_writer_mutex); | 241 | down_write(&nilfs->ns_writer_sem); |
241 | nilfs->ns_writer = sbi; | 242 | nilfs->ns_writer = sbi; |
242 | mutex_unlock(&nilfs->ns_writer_mutex); | 243 | up_write(&nilfs->ns_writer_sem); |
243 | } | 244 | } |
244 | 245 | ||
245 | static inline void | 246 | static inline void |
246 | nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | 247 | nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) |
247 | { | 248 | { |
248 | mutex_lock(&nilfs->ns_writer_mutex); | 249 | down_write(&nilfs->ns_writer_sem); |
249 | if (sbi == nilfs->ns_writer) | 250 | if (sbi == nilfs->ns_writer) |
250 | nilfs->ns_writer = NULL; | 251 | nilfs->ns_writer = NULL; |
251 | mutex_unlock(&nilfs->ns_writer_mutex); | 252 | up_write(&nilfs->ns_writer_sem); |
252 | } | 253 | } |
253 | 254 | ||
254 | static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) | 255 | static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) |
255 | { | 256 | { |
256 | if (!atomic_dec_and_test(&sbi->s_count)) | 257 | if (atomic_dec_and_test(&sbi->s_count)) |
257 | kfree(sbi); | 258 | kfree(sbi); |
258 | } | 259 | } |
259 | 260 | ||
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index 31dac7e3b0f1..dffbb0911d02 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig | |||
@@ -1,15 +1,5 @@ | |||
1 | config FSNOTIFY | 1 | config FSNOTIFY |
2 | bool "Filesystem notification backend" | 2 | def_bool n |
3 | default y | ||
4 | ---help--- | ||
5 | fsnotify is a backend for filesystem notification. fsnotify does | ||
6 | not provide any userspace interface but does provide the basis | ||
7 | needed for other notification schemes such as dnotify, inotify, | ||
8 | and fanotify. | ||
9 | |||
10 | Say Y here to enable fsnotify suport. | ||
11 | |||
12 | If unsure, say Y. | ||
13 | 3 | ||
14 | source "fs/notify/dnotify/Kconfig" | 4 | source "fs/notify/dnotify/Kconfig" |
15 | source "fs/notify/inotify/Kconfig" | 5 | source "fs/notify/inotify/Kconfig" |
diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig index 904ff8d5405a..f9c1ca139d8f 100644 --- a/fs/notify/dnotify/Kconfig +++ b/fs/notify/dnotify/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config DNOTIFY | 1 | config DNOTIFY |
2 | bool "Dnotify support" | 2 | bool "Dnotify support" |
3 | depends on FSNOTIFY | 3 | select FSNOTIFY |
4 | default y | 4 | default y |
5 | help | 5 | help |
6 | Dnotify is a directory-based per-fd file change notification system | 6 | Dnotify is a directory-based per-fd file change notification system |
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index ec2f7bd76818..037e878e03fc 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c | |||
@@ -159,7 +159,9 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const | |||
159 | if (!group->ops->should_send_event(group, to_tell, mask)) | 159 | if (!group->ops->should_send_event(group, to_tell, mask)) |
160 | continue; | 160 | continue; |
161 | if (!event) { | 161 | if (!event) { |
162 | event = fsnotify_create_event(to_tell, mask, data, data_is, file_name, cookie); | 162 | event = fsnotify_create_event(to_tell, mask, data, |
163 | data_is, file_name, cookie, | ||
164 | GFP_KERNEL); | ||
163 | /* shit, we OOM'd and now we can't tell, maybe | 165 | /* shit, we OOM'd and now we can't tell, maybe |
164 | * someday someone else will want to do something | 166 | * someday someone else will want to do something |
165 | * here */ | 167 | * here */ |
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig index 5356884289a1..3e56dbffe729 100644 --- a/fs/notify/inotify/Kconfig +++ b/fs/notify/inotify/Kconfig | |||
@@ -15,7 +15,7 @@ config INOTIFY | |||
15 | 15 | ||
16 | config INOTIFY_USER | 16 | config INOTIFY_USER |
17 | bool "Inotify support for userspace" | 17 | bool "Inotify support for userspace" |
18 | depends on FSNOTIFY | 18 | select FSNOTIFY |
19 | default y | 19 | default y |
20 | ---help--- | 20 | ---help--- |
21 | Say Y here to enable inotify support for userspace, including the | 21 | Say Y here to enable inotify support for userspace, including the |
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 47cd258fd24d..c9ee67b442e1 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c | |||
@@ -62,13 +62,14 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev | |||
62 | event_priv->wd = wd; | 62 | event_priv->wd = wd; |
63 | 63 | ||
64 | ret = fsnotify_add_notify_event(group, event, fsn_event_priv); | 64 | ret = fsnotify_add_notify_event(group, event, fsn_event_priv); |
65 | /* EEXIST is not an error */ | 65 | if (ret) { |
66 | if (ret == -EEXIST) | ||
67 | ret = 0; | ||
68 | |||
69 | /* did event_priv get attached? */ | ||
70 | if (list_empty(&fsn_event_priv->event_list)) | ||
71 | inotify_free_event_priv(fsn_event_priv); | 66 | inotify_free_event_priv(fsn_event_priv); |
67 | /* EEXIST says we tail matched, EOVERFLOW isn't something | ||
68 | * to report up the stack. */ | ||
69 | if ((ret == -EEXIST) || | ||
70 | (ret == -EOVERFLOW)) | ||
71 | ret = 0; | ||
72 | } | ||
72 | 73 | ||
73 | /* | 74 | /* |
74 | * If we hold the entry until after the event is on the queue | 75 | * If we hold the entry until after the event is on the queue |
@@ -104,16 +105,45 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode | |||
104 | return send; | 105 | return send; |
105 | } | 106 | } |
106 | 107 | ||
108 | /* | ||
109 | * This is NEVER supposed to be called. Inotify marks should either have been | ||
110 | * removed from the idr when the watch was removed or in the | ||
111 | * fsnotify_destroy_mark_by_group() call when the inotify instance was being | ||
112 | * torn down. This is only called if the idr is about to be freed but there | ||
113 | * are still marks in it. | ||
114 | */ | ||
107 | static int idr_callback(int id, void *p, void *data) | 115 | static int idr_callback(int id, void *p, void *data) |
108 | { | 116 | { |
109 | BUG(); | 117 | struct fsnotify_mark_entry *entry; |
118 | struct inotify_inode_mark_entry *ientry; | ||
119 | static bool warned = false; | ||
120 | |||
121 | if (warned) | ||
122 | return 0; | ||
123 | |||
124 | warned = false; | ||
125 | entry = p; | ||
126 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | ||
127 | |||
128 | WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in " | ||
129 | "idr. Probably leaking memory\n", id, p, data); | ||
130 | |||
131 | /* | ||
132 | * I'm taking the liberty of assuming that the mark in question is a | ||
133 | * valid address and I'm dereferencing it. This might help to figure | ||
134 | * out why we got here and the panic is no worse than the original | ||
135 | * BUG() that was here. | ||
136 | */ | ||
137 | if (entry) | ||
138 | printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n", | ||
139 | entry->group, entry->inode, ientry->wd); | ||
110 | return 0; | 140 | return 0; |
111 | } | 141 | } |
112 | 142 | ||
113 | static void inotify_free_group_priv(struct fsnotify_group *group) | 143 | static void inotify_free_group_priv(struct fsnotify_group *group) |
114 | { | 144 | { |
115 | /* ideally the idr is empty and we won't hit the BUG in teh callback */ | 145 | /* ideally the idr is empty and we won't hit the BUG in teh callback */ |
116 | idr_for_each(&group->inotify_data.idr, idr_callback, NULL); | 146 | idr_for_each(&group->inotify_data.idr, idr_callback, group); |
117 | idr_remove_all(&group->inotify_data.idr); | 147 | idr_remove_all(&group->inotify_data.idr); |
118 | idr_destroy(&group->inotify_data.idr); | 148 | idr_destroy(&group->inotify_data.idr); |
119 | } | 149 | } |
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index ff27a2965844..dcd2040d330c 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
@@ -47,9 +47,6 @@ | |||
47 | 47 | ||
48 | static struct vfsmount *inotify_mnt __read_mostly; | 48 | static struct vfsmount *inotify_mnt __read_mostly; |
49 | 49 | ||
50 | /* this just sits here and wastes global memory. used to just pad userspace messages with zeros */ | ||
51 | static struct inotify_event nul_inotify_event; | ||
52 | |||
53 | /* these are configurable via /proc/sys/fs/inotify/ */ | 50 | /* these are configurable via /proc/sys/fs/inotify/ */ |
54 | static int inotify_max_user_instances __read_mostly; | 51 | static int inotify_max_user_instances __read_mostly; |
55 | static int inotify_max_queued_events __read_mostly; | 52 | static int inotify_max_queued_events __read_mostly; |
@@ -57,7 +54,6 @@ int inotify_max_user_watches __read_mostly; | |||
57 | 54 | ||
58 | static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; | 55 | static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; |
59 | struct kmem_cache *event_priv_cachep __read_mostly; | 56 | struct kmem_cache *event_priv_cachep __read_mostly; |
60 | static struct fsnotify_event *inotify_ignored_event; | ||
61 | 57 | ||
62 | /* | 58 | /* |
63 | * When inotify registers a new group it increments this and uses that | 59 | * When inotify registers a new group it increments this and uses that |
@@ -158,7 +154,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, | |||
158 | 154 | ||
159 | event = fsnotify_peek_notify_event(group); | 155 | event = fsnotify_peek_notify_event(group); |
160 | 156 | ||
161 | event_size += roundup(event->name_len, event_size); | 157 | if (event->name_len) |
158 | event_size += roundup(event->name_len + 1, event_size); | ||
162 | 159 | ||
163 | if (event_size > count) | 160 | if (event_size > count) |
164 | return ERR_PTR(-EINVAL); | 161 | return ERR_PTR(-EINVAL); |
@@ -184,7 +181,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
184 | struct fsnotify_event_private_data *fsn_priv; | 181 | struct fsnotify_event_private_data *fsn_priv; |
185 | struct inotify_event_private_data *priv; | 182 | struct inotify_event_private_data *priv; |
186 | size_t event_size = sizeof(struct inotify_event); | 183 | size_t event_size = sizeof(struct inotify_event); |
187 | size_t name_len; | 184 | size_t name_len = 0; |
188 | 185 | ||
189 | /* we get the inotify watch descriptor from the event private data */ | 186 | /* we get the inotify watch descriptor from the event private data */ |
190 | spin_lock(&event->lock); | 187 | spin_lock(&event->lock); |
@@ -200,8 +197,12 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
200 | inotify_free_event_priv(fsn_priv); | 197 | inotify_free_event_priv(fsn_priv); |
201 | } | 198 | } |
202 | 199 | ||
203 | /* round up event->name_len so it is a multiple of event_size */ | 200 | /* |
204 | name_len = roundup(event->name_len, event_size); | 201 | * round up event->name_len so it is a multiple of event_size |
202 | * plus an extra byte for the terminating '\0'. | ||
203 | */ | ||
204 | if (event->name_len) | ||
205 | name_len = roundup(event->name_len + 1, event_size); | ||
205 | inotify_event.len = name_len; | 206 | inotify_event.len = name_len; |
206 | 207 | ||
207 | inotify_event.mask = inotify_mask_to_arg(event->mask); | 208 | inotify_event.mask = inotify_mask_to_arg(event->mask); |
@@ -225,8 +226,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
225 | return -EFAULT; | 226 | return -EFAULT; |
226 | buf += event->name_len; | 227 | buf += event->name_len; |
227 | 228 | ||
228 | /* fill userspace with 0's from nul_inotify_event */ | 229 | /* fill userspace with 0's */ |
229 | if (copy_to_user(buf, &nul_inotify_event, len_to_zero)) | 230 | if (clear_user(buf, len_to_zero)) |
230 | return -EFAULT; | 231 | return -EFAULT; |
231 | buf += len_to_zero; | 232 | buf += len_to_zero; |
232 | event_size += name_len; | 233 | event_size += name_len; |
@@ -327,8 +328,9 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, | |||
327 | list_for_each_entry(holder, &group->notification_list, event_list) { | 328 | list_for_each_entry(holder, &group->notification_list, event_list) { |
328 | event = holder->event; | 329 | event = holder->event; |
329 | send_len += sizeof(struct inotify_event); | 330 | send_len += sizeof(struct inotify_event); |
330 | send_len += roundup(event->name_len, | 331 | if (event->name_len) |
331 | sizeof(struct inotify_event)); | 332 | send_len += roundup(event->name_len + 1, |
333 | sizeof(struct inotify_event)); | ||
332 | } | 334 | } |
333 | mutex_unlock(&group->notification_mutex); | 335 | mutex_unlock(&group->notification_mutex); |
334 | ret = put_user(send_len, (int __user *) p); | 336 | ret = put_user(send_len, (int __user *) p); |
@@ -366,20 +368,71 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns | |||
366 | } | 368 | } |
367 | 369 | ||
368 | /* | 370 | /* |
369 | * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the | 371 | * Remove the mark from the idr (if present) and drop the reference |
370 | * internal reference help on the mark because it is in the idr. | 372 | * on the mark because it was in the idr. |
373 | */ | ||
374 | static void inotify_remove_from_idr(struct fsnotify_group *group, | ||
375 | struct inotify_inode_mark_entry *ientry) | ||
376 | { | ||
377 | struct idr *idr; | ||
378 | struct fsnotify_mark_entry *entry; | ||
379 | struct inotify_inode_mark_entry *found_ientry; | ||
380 | int wd; | ||
381 | |||
382 | spin_lock(&group->inotify_data.idr_lock); | ||
383 | idr = &group->inotify_data.idr; | ||
384 | wd = ientry->wd; | ||
385 | |||
386 | if (wd == -1) | ||
387 | goto out; | ||
388 | |||
389 | entry = idr_find(&group->inotify_data.idr, wd); | ||
390 | if (unlikely(!entry)) | ||
391 | goto out; | ||
392 | |||
393 | found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | ||
394 | if (unlikely(found_ientry != ientry)) { | ||
395 | /* We found an entry in the idr with the right wd, but it's | ||
396 | * not the entry we were told to remove. eparis seriously | ||
397 | * fucked up somewhere. */ | ||
398 | WARN_ON(1); | ||
399 | ientry->wd = -1; | ||
400 | goto out; | ||
401 | } | ||
402 | |||
403 | /* One ref for being in the idr, one ref held by the caller */ | ||
404 | BUG_ON(atomic_read(&entry->refcnt) < 2); | ||
405 | |||
406 | idr_remove(idr, wd); | ||
407 | ientry->wd = -1; | ||
408 | |||
409 | /* removed from the idr, drop that ref */ | ||
410 | fsnotify_put_mark(entry); | ||
411 | out: | ||
412 | spin_unlock(&group->inotify_data.idr_lock); | ||
413 | } | ||
414 | |||
415 | /* | ||
416 | * Send IN_IGNORED for this wd, remove this wd from the idr. | ||
371 | */ | 417 | */ |
372 | void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, | 418 | void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, |
373 | struct fsnotify_group *group) | 419 | struct fsnotify_group *group) |
374 | { | 420 | { |
375 | struct inotify_inode_mark_entry *ientry; | 421 | struct inotify_inode_mark_entry *ientry; |
422 | struct fsnotify_event *ignored_event; | ||
376 | struct inotify_event_private_data *event_priv; | 423 | struct inotify_event_private_data *event_priv; |
377 | struct fsnotify_event_private_data *fsn_event_priv; | 424 | struct fsnotify_event_private_data *fsn_event_priv; |
378 | struct idr *idr; | 425 | int ret; |
426 | |||
427 | ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, | ||
428 | FSNOTIFY_EVENT_NONE, NULL, 0, | ||
429 | GFP_NOFS); | ||
430 | if (!ignored_event) | ||
431 | return; | ||
379 | 432 | ||
380 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | 433 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); |
381 | 434 | ||
382 | event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); | 435 | event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS); |
383 | if (unlikely(!event_priv)) | 436 | if (unlikely(!event_priv)) |
384 | goto skip_send_ignore; | 437 | goto skip_send_ignore; |
385 | 438 | ||
@@ -388,22 +441,19 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, | |||
388 | fsn_event_priv->group = group; | 441 | fsn_event_priv->group = group; |
389 | event_priv->wd = ientry->wd; | 442 | event_priv->wd = ientry->wd; |
390 | 443 | ||
391 | fsnotify_add_notify_event(group, inotify_ignored_event, fsn_event_priv); | 444 | ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); |
392 | 445 | if (ret) | |
393 | /* did the private data get added? */ | ||
394 | if (list_empty(&fsn_event_priv->event_list)) | ||
395 | inotify_free_event_priv(fsn_event_priv); | 446 | inotify_free_event_priv(fsn_event_priv); |
396 | 447 | ||
397 | skip_send_ignore: | 448 | skip_send_ignore: |
398 | 449 | ||
450 | /* matches the reference taken when the event was created */ | ||
451 | fsnotify_put_event(ignored_event); | ||
452 | |||
399 | /* remove this entry from the idr */ | 453 | /* remove this entry from the idr */ |
400 | spin_lock(&group->inotify_data.idr_lock); | 454 | inotify_remove_from_idr(group, ientry); |
401 | idr = &group->inotify_data.idr; | ||
402 | idr_remove(idr, ientry->wd); | ||
403 | spin_unlock(&group->inotify_data.idr_lock); | ||
404 | 455 | ||
405 | /* removed from idr, drop that reference */ | 456 | atomic_dec(&group->inotify_data.user->inotify_watches); |
406 | fsnotify_put_mark(entry); | ||
407 | } | 457 | } |
408 | 458 | ||
409 | /* ding dong the mark is dead */ | 459 | /* ding dong the mark is dead */ |
@@ -414,67 +464,29 @@ static void inotify_free_mark(struct fsnotify_mark_entry *entry) | |||
414 | kmem_cache_free(inotify_inode_mark_cachep, ientry); | 464 | kmem_cache_free(inotify_inode_mark_cachep, ientry); |
415 | } | 465 | } |
416 | 466 | ||
417 | static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) | 467 | static int inotify_update_existing_watch(struct fsnotify_group *group, |
468 | struct inode *inode, | ||
469 | u32 arg) | ||
418 | { | 470 | { |
419 | struct fsnotify_mark_entry *entry = NULL; | 471 | struct fsnotify_mark_entry *entry; |
420 | struct inotify_inode_mark_entry *ientry; | 472 | struct inotify_inode_mark_entry *ientry; |
421 | int ret = 0; | ||
422 | int add = (arg & IN_MASK_ADD); | ||
423 | __u32 mask; | ||
424 | __u32 old_mask, new_mask; | 473 | __u32 old_mask, new_mask; |
474 | __u32 mask; | ||
475 | int add = (arg & IN_MASK_ADD); | ||
476 | int ret; | ||
425 | 477 | ||
426 | /* don't allow invalid bits: we don't want flags set */ | 478 | /* don't allow invalid bits: we don't want flags set */ |
427 | mask = inotify_arg_to_mask(arg); | 479 | mask = inotify_arg_to_mask(arg); |
428 | if (unlikely(!mask)) | 480 | if (unlikely(!mask)) |
429 | return -EINVAL; | 481 | return -EINVAL; |
430 | 482 | ||
431 | ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); | ||
432 | if (unlikely(!ientry)) | ||
433 | return -ENOMEM; | ||
434 | /* we set the mask at the end after attaching it */ | ||
435 | fsnotify_init_mark(&ientry->fsn_entry, inotify_free_mark); | ||
436 | ientry->wd = 0; | ||
437 | |||
438 | find_entry: | ||
439 | spin_lock(&inode->i_lock); | 483 | spin_lock(&inode->i_lock); |
440 | entry = fsnotify_find_mark_entry(group, inode); | 484 | entry = fsnotify_find_mark_entry(group, inode); |
441 | spin_unlock(&inode->i_lock); | 485 | spin_unlock(&inode->i_lock); |
442 | if (entry) { | 486 | if (!entry) |
443 | kmem_cache_free(inotify_inode_mark_cachep, ientry); | 487 | return -ENOENT; |
444 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | ||
445 | } else { | ||
446 | if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) { | ||
447 | ret = -ENOSPC; | ||
448 | goto out_err; | ||
449 | } | ||
450 | |||
451 | ret = fsnotify_add_mark(&ientry->fsn_entry, group, inode); | ||
452 | if (ret == -EEXIST) | ||
453 | goto find_entry; | ||
454 | else if (ret) | ||
455 | goto out_err; | ||
456 | 488 | ||
457 | entry = &ientry->fsn_entry; | 489 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); |
458 | retry: | ||
459 | ret = -ENOMEM; | ||
460 | if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) | ||
461 | goto out_err; | ||
462 | |||
463 | spin_lock(&group->inotify_data.idr_lock); | ||
464 | /* if entry is added to the idr we keep the reference obtained | ||
465 | * through fsnotify_mark_add. remember to drop this reference | ||
466 | * when entry is removed from idr */ | ||
467 | ret = idr_get_new_above(&group->inotify_data.idr, entry, | ||
468 | ++group->inotify_data.last_wd, | ||
469 | &ientry->wd); | ||
470 | spin_unlock(&group->inotify_data.idr_lock); | ||
471 | if (ret) { | ||
472 | if (ret == -EAGAIN) | ||
473 | goto retry; | ||
474 | goto out_err; | ||
475 | } | ||
476 | atomic_inc(&group->inotify_data.user->inotify_watches); | ||
477 | } | ||
478 | 490 | ||
479 | spin_lock(&entry->lock); | 491 | spin_lock(&entry->lock); |
480 | 492 | ||
@@ -506,14 +518,108 @@ retry: | |||
506 | fsnotify_recalc_group_mask(group); | 518 | fsnotify_recalc_group_mask(group); |
507 | } | 519 | } |
508 | 520 | ||
509 | return ientry->wd; | 521 | /* return the wd */ |
522 | ret = ientry->wd; | ||
510 | 523 | ||
511 | out_err: | 524 | /* match the get from fsnotify_find_mark_entry() */ |
512 | /* see this isn't supposed to happen, just kill the watch */ | 525 | fsnotify_put_mark(entry); |
513 | if (entry) { | 526 | |
514 | fsnotify_destroy_mark_by_entry(entry); | 527 | return ret; |
515 | fsnotify_put_mark(entry); | 528 | } |
529 | |||
530 | static int inotify_new_watch(struct fsnotify_group *group, | ||
531 | struct inode *inode, | ||
532 | u32 arg) | ||
533 | { | ||
534 | struct inotify_inode_mark_entry *tmp_ientry; | ||
535 | __u32 mask; | ||
536 | int ret; | ||
537 | |||
538 | /* don't allow invalid bits: we don't want flags set */ | ||
539 | mask = inotify_arg_to_mask(arg); | ||
540 | if (unlikely(!mask)) | ||
541 | return -EINVAL; | ||
542 | |||
543 | tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); | ||
544 | if (unlikely(!tmp_ientry)) | ||
545 | return -ENOMEM; | ||
546 | |||
547 | fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark); | ||
548 | tmp_ientry->fsn_entry.mask = mask; | ||
549 | tmp_ientry->wd = -1; | ||
550 | |||
551 | ret = -ENOSPC; | ||
552 | if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) | ||
553 | goto out_err; | ||
554 | retry: | ||
555 | ret = -ENOMEM; | ||
556 | if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) | ||
557 | goto out_err; | ||
558 | |||
559 | spin_lock(&group->inotify_data.idr_lock); | ||
560 | ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, | ||
561 | group->inotify_data.last_wd, | ||
562 | &tmp_ientry->wd); | ||
563 | spin_unlock(&group->inotify_data.idr_lock); | ||
564 | if (ret) { | ||
565 | /* idr was out of memory allocate and try again */ | ||
566 | if (ret == -EAGAIN) | ||
567 | goto retry; | ||
568 | goto out_err; | ||
569 | } | ||
570 | |||
571 | /* we put the mark on the idr, take a reference */ | ||
572 | fsnotify_get_mark(&tmp_ientry->fsn_entry); | ||
573 | |||
574 | /* we are on the idr, now get on the inode */ | ||
575 | ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); | ||
576 | if (ret) { | ||
577 | /* we failed to get on the inode, get off the idr */ | ||
578 | inotify_remove_from_idr(group, tmp_ientry); | ||
579 | goto out_err; | ||
516 | } | 580 | } |
581 | |||
582 | /* update the idr hint, who cares about races, it's just a hint */ | ||
583 | group->inotify_data.last_wd = tmp_ientry->wd; | ||
584 | |||
585 | /* increment the number of watches the user has */ | ||
586 | atomic_inc(&group->inotify_data.user->inotify_watches); | ||
587 | |||
588 | /* return the watch descriptor for this new entry */ | ||
589 | ret = tmp_ientry->wd; | ||
590 | |||
591 | /* match the ref from fsnotify_init_markentry() */ | ||
592 | fsnotify_put_mark(&tmp_ientry->fsn_entry); | ||
593 | |||
594 | /* if this mark added a new event update the group mask */ | ||
595 | if (mask & ~group->mask) | ||
596 | fsnotify_recalc_group_mask(group); | ||
597 | |||
598 | out_err: | ||
599 | if (ret < 0) | ||
600 | kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); | ||
601 | |||
602 | return ret; | ||
603 | } | ||
604 | |||
605 | static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) | ||
606 | { | ||
607 | int ret = 0; | ||
608 | |||
609 | retry: | ||
610 | /* try to update and existing watch with the new arg */ | ||
611 | ret = inotify_update_existing_watch(group, inode, arg); | ||
612 | /* no mark present, try to add a new one */ | ||
613 | if (ret == -ENOENT) | ||
614 | ret = inotify_new_watch(group, inode, arg); | ||
615 | /* | ||
616 | * inotify_new_watch could race with another thread which did an | ||
617 | * inotify_new_watch between the update_existing and the add watch | ||
618 | * here, go back and try to update an existing mark again. | ||
619 | */ | ||
620 | if (ret == -EEXIST) | ||
621 | goto retry; | ||
622 | |||
517 | return ret; | 623 | return ret; |
518 | } | 624 | } |
519 | 625 | ||
@@ -532,7 +638,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign | |||
532 | 638 | ||
533 | spin_lock_init(&group->inotify_data.idr_lock); | 639 | spin_lock_init(&group->inotify_data.idr_lock); |
534 | idr_init(&group->inotify_data.idr); | 640 | idr_init(&group->inotify_data.idr); |
535 | group->inotify_data.last_wd = 0; | 641 | group->inotify_data.last_wd = 1; |
536 | group->inotify_data.user = user; | 642 | group->inotify_data.user = user; |
537 | group->inotify_data.fa = NULL; | 643 | group->inotify_data.fa = NULL; |
538 | 644 | ||
@@ -721,9 +827,6 @@ static int __init inotify_user_setup(void) | |||
721 | 827 | ||
722 | inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC); | 828 | inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC); |
723 | event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); | 829 | event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); |
724 | inotify_ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, NULL, 0); | ||
725 | if (!inotify_ignored_event) | ||
726 | panic("unable to allocate the inotify ignored event\n"); | ||
727 | 830 | ||
728 | inotify_max_queued_events = 16384; | 831 | inotify_max_queued_events = 16384; |
729 | inotify_max_user_instances = 128; | 832 | inotify_max_user_instances = 128; |
diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 959b73e756fd..3816d5750dd5 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c | |||
@@ -136,18 +136,28 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new | |||
136 | { | 136 | { |
137 | if ((old->mask == new->mask) && | 137 | if ((old->mask == new->mask) && |
138 | (old->to_tell == new->to_tell) && | 138 | (old->to_tell == new->to_tell) && |
139 | (old->data_type == new->data_type)) { | 139 | (old->data_type == new->data_type) && |
140 | (old->name_len == new->name_len)) { | ||
140 | switch (old->data_type) { | 141 | switch (old->data_type) { |
141 | case (FSNOTIFY_EVENT_INODE): | 142 | case (FSNOTIFY_EVENT_INODE): |
142 | if (old->inode == new->inode) | 143 | /* remember, after old was put on the wait_q we aren't |
144 | * allowed to look at the inode any more, only thing | ||
145 | * left to check was if the file_name is the same */ | ||
146 | if (old->name_len && | ||
147 | !strcmp(old->file_name, new->file_name)) | ||
143 | return true; | 148 | return true; |
144 | break; | 149 | break; |
145 | case (FSNOTIFY_EVENT_PATH): | 150 | case (FSNOTIFY_EVENT_PATH): |
146 | if ((old->path.mnt == new->path.mnt) && | 151 | if ((old->path.mnt == new->path.mnt) && |
147 | (old->path.dentry == new->path.dentry)) | 152 | (old->path.dentry == new->path.dentry)) |
148 | return true; | 153 | return true; |
154 | break; | ||
149 | case (FSNOTIFY_EVENT_NONE): | 155 | case (FSNOTIFY_EVENT_NONE): |
150 | return true; | 156 | if (old->mask & FS_Q_OVERFLOW) |
157 | return true; | ||
158 | else if (old->mask & FS_IN_IGNORED) | ||
159 | return false; | ||
160 | return false; | ||
151 | }; | 161 | }; |
152 | } | 162 | } |
153 | return false; | 163 | return false; |
@@ -165,9 +175,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even | |||
165 | struct list_head *list = &group->notification_list; | 175 | struct list_head *list = &group->notification_list; |
166 | struct fsnotify_event_holder *last_holder; | 176 | struct fsnotify_event_holder *last_holder; |
167 | struct fsnotify_event *last_event; | 177 | struct fsnotify_event *last_event; |
168 | 178 | int ret = 0; | |
169 | /* easy to tell if priv was attached to the event */ | ||
170 | INIT_LIST_HEAD(&priv->event_list); | ||
171 | 179 | ||
172 | /* | 180 | /* |
173 | * There is one fsnotify_event_holder embedded inside each fsnotify_event. | 181 | * There is one fsnotify_event_holder embedded inside each fsnotify_event. |
@@ -188,6 +196,7 @@ alloc_holder: | |||
188 | 196 | ||
189 | if (group->q_len >= group->max_events) { | 197 | if (group->q_len >= group->max_events) { |
190 | event = &q_overflow_event; | 198 | event = &q_overflow_event; |
199 | ret = -EOVERFLOW; | ||
191 | /* sorry, no private data on the overflow event */ | 200 | /* sorry, no private data on the overflow event */ |
192 | priv = NULL; | 201 | priv = NULL; |
193 | } | 202 | } |
@@ -229,7 +238,7 @@ alloc_holder: | |||
229 | mutex_unlock(&group->notification_mutex); | 238 | mutex_unlock(&group->notification_mutex); |
230 | 239 | ||
231 | wake_up(&group->notification_waitq); | 240 | wake_up(&group->notification_waitq); |
232 | return 0; | 241 | return ret; |
233 | } | 242 | } |
234 | 243 | ||
235 | /* | 244 | /* |
@@ -339,18 +348,19 @@ static void initialize_event(struct fsnotify_event *event) | |||
339 | * @name the filename, if available | 348 | * @name the filename, if available |
340 | */ | 349 | */ |
341 | struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data, | 350 | struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data, |
342 | int data_type, const char *name, u32 cookie) | 351 | int data_type, const char *name, u32 cookie, |
352 | gfp_t gfp) | ||
343 | { | 353 | { |
344 | struct fsnotify_event *event; | 354 | struct fsnotify_event *event; |
345 | 355 | ||
346 | event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL); | 356 | event = kmem_cache_alloc(fsnotify_event_cachep, gfp); |
347 | if (!event) | 357 | if (!event) |
348 | return NULL; | 358 | return NULL; |
349 | 359 | ||
350 | initialize_event(event); | 360 | initialize_event(event); |
351 | 361 | ||
352 | if (name) { | 362 | if (name) { |
353 | event->file_name = kstrdup(name, GFP_KERNEL); | 363 | event->file_name = kstrdup(name, gfp); |
354 | if (!event->file_name) { | 364 | if (!event->file_name) { |
355 | kmem_cache_free(fsnotify_event_cachep, event); | 365 | kmem_cache_free(fsnotify_event_cachep, event); |
356 | return NULL; | 366 | return NULL; |
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 3140a4429af1..4350d4993b18 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
@@ -2076,14 +2076,6 @@ err_out: | |||
2076 | *ppos = pos; | 2076 | *ppos = pos; |
2077 | if (cached_page) | 2077 | if (cached_page) |
2078 | page_cache_release(cached_page); | 2078 | page_cache_release(cached_page); |
2079 | /* For now, when the user asks for O_SYNC, we actually give O_DSYNC. */ | ||
2080 | if (likely(!status)) { | ||
2081 | if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(vi))) { | ||
2082 | if (!mapping->a_ops->writepage || !is_sync_kiocb(iocb)) | ||
2083 | status = generic_osync_inode(vi, mapping, | ||
2084 | OSYNC_METADATA|OSYNC_DATA); | ||
2085 | } | ||
2086 | } | ||
2087 | pagevec_lru_add_file(&lru_pvec); | 2079 | pagevec_lru_add_file(&lru_pvec); |
2088 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", | 2080 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", |
2089 | written ? "written" : "status", (unsigned long)written, | 2081 | written ? "written" : "status", (unsigned long)written, |
@@ -2145,8 +2137,8 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2145 | mutex_lock(&inode->i_mutex); | 2137 | mutex_lock(&inode->i_mutex); |
2146 | ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); | 2138 | ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); |
2147 | mutex_unlock(&inode->i_mutex); | 2139 | mutex_unlock(&inode->i_mutex); |
2148 | if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | 2140 | if (ret > 0) { |
2149 | int err = sync_page_range(inode, mapping, pos, ret); | 2141 | int err = generic_write_sync(file, pos, ret); |
2150 | if (err < 0) | 2142 | if (err < 0) |
2151 | ret = err; | 2143 | ret = err; |
2152 | } | 2144 | } |
@@ -2173,8 +2165,8 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov, | |||
2173 | if (ret == -EIOCBQUEUED) | 2165 | if (ret == -EIOCBQUEUED) |
2174 | ret = wait_on_sync_kiocb(&kiocb); | 2166 | ret = wait_on_sync_kiocb(&kiocb); |
2175 | mutex_unlock(&inode->i_mutex); | 2167 | mutex_unlock(&inode->i_mutex); |
2176 | if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | 2168 | if (ret > 0) { |
2177 | int err = sync_page_range(inode, mapping, *ppos - ret, ret); | 2169 | int err = generic_write_sync(file, *ppos - ret, ret); |
2178 | if (err < 0) | 2170 | if (err < 0) |
2179 | ret = err; | 2171 | ret = err; |
2180 | } | 2172 | } |
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 23bf68453d7d..1caa0ef0b2bb 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c | |||
@@ -384,13 +384,12 @@ unm_err_out: | |||
384 | * it is dirty in the inode meta data rather than the data page cache of the | 384 | * it is dirty in the inode meta data rather than the data page cache of the |
385 | * inode, and thus there are no data pages that need writing out. Therefore, a | 385 | * inode, and thus there are no data pages that need writing out. Therefore, a |
386 | * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the | 386 | * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the |
387 | * other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to | 387 | * other hand, is not sufficient, because ->write_inode needs to be called even |
388 | * ensure ->write_inode is called from generic_osync_inode() and this needs to | 388 | * in case of fdatasync. This needs to happen or the file data would not |
389 | * happen or the file data would not necessarily hit the device synchronously, | 389 | * necessarily hit the device synchronously, even though the vfs inode has the |
390 | * even though the vfs inode has the O_SYNC flag set. Also, I_DIRTY_DATASYNC | 390 | * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just |
391 | * simply "feels" better than just I_DIRTY_SYNC, since the file data has not | 391 | * I_DIRTY_SYNC, since the file data has not actually hit the block device yet, |
392 | * actually hit the block device yet, which is not what I_DIRTY_SYNC on its own | 392 | * which is not what I_DIRTY_SYNC on its own would suggest. |
393 | * would suggest. | ||
394 | */ | 393 | */ |
395 | void __mark_mft_record_dirty(ntfs_inode *ni) | 394 | void __mark_mft_record_dirty(ntfs_inode *ni) |
396 | { | 395 | { |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 9edcde4974aa..ab513ddaeff2 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -1914,7 +1914,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec, | |||
1914 | * immediately to their right. | 1914 | * immediately to their right. |
1915 | */ | 1915 | */ |
1916 | left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); | 1916 | left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); |
1917 | if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { | 1917 | if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) { |
1918 | BUG_ON(right_child_el->l_tree_depth); | ||
1918 | BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); | 1919 | BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); |
1919 | left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); | 1920 | left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); |
1920 | } | 1921 | } |
@@ -2476,15 +2477,37 @@ out_ret_path: | |||
2476 | return ret; | 2477 | return ret; |
2477 | } | 2478 | } |
2478 | 2479 | ||
2479 | static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, | 2480 | static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, |
2480 | struct ocfs2_path *path) | 2481 | int subtree_index, struct ocfs2_path *path) |
2481 | { | 2482 | { |
2482 | int i, idx; | 2483 | int i, idx, ret; |
2483 | struct ocfs2_extent_rec *rec; | 2484 | struct ocfs2_extent_rec *rec; |
2484 | struct ocfs2_extent_list *el; | 2485 | struct ocfs2_extent_list *el; |
2485 | struct ocfs2_extent_block *eb; | 2486 | struct ocfs2_extent_block *eb; |
2486 | u32 range; | 2487 | u32 range; |
2487 | 2488 | ||
2489 | /* | ||
2490 | * In normal tree rotation process, we will never touch the | ||
2491 | * tree branch above subtree_index and ocfs2_extend_rotate_transaction | ||
2492 | * doesn't reserve the credits for them either. | ||
2493 | * | ||
2494 | * But we do have a special case here which will update the rightmost | ||
2495 | * records for all the bh in the path. | ||
2496 | * So we have to allocate extra credits and access them. | ||
2497 | */ | ||
2498 | ret = ocfs2_extend_trans(handle, | ||
2499 | handle->h_buffer_credits + subtree_index); | ||
2500 | if (ret) { | ||
2501 | mlog_errno(ret); | ||
2502 | goto out; | ||
2503 | } | ||
2504 | |||
2505 | ret = ocfs2_journal_access_path(inode, handle, path); | ||
2506 | if (ret) { | ||
2507 | mlog_errno(ret); | ||
2508 | goto out; | ||
2509 | } | ||
2510 | |||
2488 | /* Path should always be rightmost. */ | 2511 | /* Path should always be rightmost. */ |
2489 | eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; | 2512 | eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; |
2490 | BUG_ON(eb->h_next_leaf_blk != 0ULL); | 2513 | BUG_ON(eb->h_next_leaf_blk != 0ULL); |
@@ -2505,6 +2528,8 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, | |||
2505 | 2528 | ||
2506 | ocfs2_journal_dirty(handle, path->p_node[i].bh); | 2529 | ocfs2_journal_dirty(handle, path->p_node[i].bh); |
2507 | } | 2530 | } |
2531 | out: | ||
2532 | return ret; | ||
2508 | } | 2533 | } |
2509 | 2534 | ||
2510 | static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, | 2535 | static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, |
@@ -2717,7 +2742,12 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2717 | if (del_right_subtree) { | 2742 | if (del_right_subtree) { |
2718 | ocfs2_unlink_subtree(inode, handle, left_path, right_path, | 2743 | ocfs2_unlink_subtree(inode, handle, left_path, right_path, |
2719 | subtree_index, dealloc); | 2744 | subtree_index, dealloc); |
2720 | ocfs2_update_edge_lengths(inode, handle, left_path); | 2745 | ret = ocfs2_update_edge_lengths(inode, handle, subtree_index, |
2746 | left_path); | ||
2747 | if (ret) { | ||
2748 | mlog_errno(ret); | ||
2749 | goto out; | ||
2750 | } | ||
2721 | 2751 | ||
2722 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | 2752 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; |
2723 | ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); | 2753 | ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); |
@@ -3034,7 +3064,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | |||
3034 | 3064 | ||
3035 | ocfs2_unlink_subtree(inode, handle, left_path, path, | 3065 | ocfs2_unlink_subtree(inode, handle, left_path, path, |
3036 | subtree_index, dealloc); | 3066 | subtree_index, dealloc); |
3037 | ocfs2_update_edge_lengths(inode, handle, left_path); | 3067 | ret = ocfs2_update_edge_lengths(inode, handle, subtree_index, |
3068 | left_path); | ||
3069 | if (ret) { | ||
3070 | mlog_errno(ret); | ||
3071 | goto out; | ||
3072 | } | ||
3038 | 3073 | ||
3039 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | 3074 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; |
3040 | ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); | 3075 | ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); |
@@ -6816,7 +6851,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
6816 | } | 6851 | } |
6817 | status = 0; | 6852 | status = 0; |
6818 | bail: | 6853 | bail: |
6819 | 6854 | brelse(last_eb_bh); | |
6820 | mlog_exit(status); | 6855 | mlog_exit(status); |
6821 | return status; | 6856 | return status; |
6822 | } | 6857 | } |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index b2c52b3a1484..8a1e61545f41 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -193,6 +193,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
193 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 193 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
194 | mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); | 194 | mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); |
195 | dump_stack(); | 195 | dump_stack(); |
196 | goto bail; | ||
196 | } | 197 | } |
197 | 198 | ||
198 | past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); | 199 | past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); |
@@ -894,18 +895,17 @@ struct ocfs2_write_cluster_desc { | |||
894 | */ | 895 | */ |
895 | unsigned c_new; | 896 | unsigned c_new; |
896 | unsigned c_unwritten; | 897 | unsigned c_unwritten; |
898 | unsigned c_needs_zero; | ||
897 | }; | 899 | }; |
898 | 900 | ||
899 | static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d) | ||
900 | { | ||
901 | return d->c_new || d->c_unwritten; | ||
902 | } | ||
903 | |||
904 | struct ocfs2_write_ctxt { | 901 | struct ocfs2_write_ctxt { |
905 | /* Logical cluster position / len of write */ | 902 | /* Logical cluster position / len of write */ |
906 | u32 w_cpos; | 903 | u32 w_cpos; |
907 | u32 w_clen; | 904 | u32 w_clen; |
908 | 905 | ||
906 | /* First cluster allocated in a nonsparse extend */ | ||
907 | u32 w_first_new_cpos; | ||
908 | |||
909 | struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; | 909 | struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; |
910 | 910 | ||
911 | /* | 911 | /* |
@@ -983,6 +983,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, | |||
983 | return -ENOMEM; | 983 | return -ENOMEM; |
984 | 984 | ||
985 | wc->w_cpos = pos >> osb->s_clustersize_bits; | 985 | wc->w_cpos = pos >> osb->s_clustersize_bits; |
986 | wc->w_first_new_cpos = UINT_MAX; | ||
986 | cend = (pos + len - 1) >> osb->s_clustersize_bits; | 987 | cend = (pos + len - 1) >> osb->s_clustersize_bits; |
987 | wc->w_clen = cend - wc->w_cpos + 1; | 988 | wc->w_clen = cend - wc->w_cpos + 1; |
988 | get_bh(di_bh); | 989 | get_bh(di_bh); |
@@ -1217,20 +1218,18 @@ out: | |||
1217 | */ | 1218 | */ |
1218 | static int ocfs2_write_cluster(struct address_space *mapping, | 1219 | static int ocfs2_write_cluster(struct address_space *mapping, |
1219 | u32 phys, unsigned int unwritten, | 1220 | u32 phys, unsigned int unwritten, |
1221 | unsigned int should_zero, | ||
1220 | struct ocfs2_alloc_context *data_ac, | 1222 | struct ocfs2_alloc_context *data_ac, |
1221 | struct ocfs2_alloc_context *meta_ac, | 1223 | struct ocfs2_alloc_context *meta_ac, |
1222 | struct ocfs2_write_ctxt *wc, u32 cpos, | 1224 | struct ocfs2_write_ctxt *wc, u32 cpos, |
1223 | loff_t user_pos, unsigned user_len) | 1225 | loff_t user_pos, unsigned user_len) |
1224 | { | 1226 | { |
1225 | int ret, i, new, should_zero = 0; | 1227 | int ret, i, new; |
1226 | u64 v_blkno, p_blkno; | 1228 | u64 v_blkno, p_blkno; |
1227 | struct inode *inode = mapping->host; | 1229 | struct inode *inode = mapping->host; |
1228 | struct ocfs2_extent_tree et; | 1230 | struct ocfs2_extent_tree et; |
1229 | 1231 | ||
1230 | new = phys == 0 ? 1 : 0; | 1232 | new = phys == 0 ? 1 : 0; |
1231 | if (new || unwritten) | ||
1232 | should_zero = 1; | ||
1233 | |||
1234 | if (new) { | 1233 | if (new) { |
1235 | u32 tmp_pos; | 1234 | u32 tmp_pos; |
1236 | 1235 | ||
@@ -1301,7 +1300,7 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
1301 | if (tmpret) { | 1300 | if (tmpret) { |
1302 | mlog_errno(tmpret); | 1301 | mlog_errno(tmpret); |
1303 | if (ret == 0) | 1302 | if (ret == 0) |
1304 | tmpret = ret; | 1303 | ret = tmpret; |
1305 | } | 1304 | } |
1306 | } | 1305 | } |
1307 | 1306 | ||
@@ -1341,7 +1340,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping, | |||
1341 | local_len = osb->s_clustersize - cluster_off; | 1340 | local_len = osb->s_clustersize - cluster_off; |
1342 | 1341 | ||
1343 | ret = ocfs2_write_cluster(mapping, desc->c_phys, | 1342 | ret = ocfs2_write_cluster(mapping, desc->c_phys, |
1344 | desc->c_unwritten, data_ac, meta_ac, | 1343 | desc->c_unwritten, |
1344 | desc->c_needs_zero, | ||
1345 | data_ac, meta_ac, | ||
1345 | wc, desc->c_cpos, pos, local_len); | 1346 | wc, desc->c_cpos, pos, local_len); |
1346 | if (ret) { | 1347 | if (ret) { |
1347 | mlog_errno(ret); | 1348 | mlog_errno(ret); |
@@ -1391,14 +1392,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb, | |||
1391 | * newly allocated cluster. | 1392 | * newly allocated cluster. |
1392 | */ | 1393 | */ |
1393 | desc = &wc->w_desc[0]; | 1394 | desc = &wc->w_desc[0]; |
1394 | if (ocfs2_should_zero_cluster(desc)) | 1395 | if (desc->c_needs_zero) |
1395 | ocfs2_figure_cluster_boundaries(osb, | 1396 | ocfs2_figure_cluster_boundaries(osb, |
1396 | desc->c_cpos, | 1397 | desc->c_cpos, |
1397 | &wc->w_target_from, | 1398 | &wc->w_target_from, |
1398 | NULL); | 1399 | NULL); |
1399 | 1400 | ||
1400 | desc = &wc->w_desc[wc->w_clen - 1]; | 1401 | desc = &wc->w_desc[wc->w_clen - 1]; |
1401 | if (ocfs2_should_zero_cluster(desc)) | 1402 | if (desc->c_needs_zero) |
1402 | ocfs2_figure_cluster_boundaries(osb, | 1403 | ocfs2_figure_cluster_boundaries(osb, |
1403 | desc->c_cpos, | 1404 | desc->c_cpos, |
1404 | NULL, | 1405 | NULL, |
@@ -1466,13 +1467,28 @@ static int ocfs2_populate_write_desc(struct inode *inode, | |||
1466 | phys++; | 1467 | phys++; |
1467 | } | 1468 | } |
1468 | 1469 | ||
1470 | /* | ||
1471 | * If w_first_new_cpos is < UINT_MAX, we have a non-sparse | ||
1472 | * file that got extended. w_first_new_cpos tells us | ||
1473 | * where the newly allocated clusters are so we can | ||
1474 | * zero them. | ||
1475 | */ | ||
1476 | if (desc->c_cpos >= wc->w_first_new_cpos) { | ||
1477 | BUG_ON(phys == 0); | ||
1478 | desc->c_needs_zero = 1; | ||
1479 | } | ||
1480 | |||
1469 | desc->c_phys = phys; | 1481 | desc->c_phys = phys; |
1470 | if (phys == 0) { | 1482 | if (phys == 0) { |
1471 | desc->c_new = 1; | 1483 | desc->c_new = 1; |
1484 | desc->c_needs_zero = 1; | ||
1472 | *clusters_to_alloc = *clusters_to_alloc + 1; | 1485 | *clusters_to_alloc = *clusters_to_alloc + 1; |
1473 | } | 1486 | } |
1474 | if (ext_flags & OCFS2_EXT_UNWRITTEN) | 1487 | |
1488 | if (ext_flags & OCFS2_EXT_UNWRITTEN) { | ||
1475 | desc->c_unwritten = 1; | 1489 | desc->c_unwritten = 1; |
1490 | desc->c_needs_zero = 1; | ||
1491 | } | ||
1476 | 1492 | ||
1477 | num_clusters--; | 1493 | num_clusters--; |
1478 | } | 1494 | } |
@@ -1632,10 +1648,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos, | |||
1632 | if (newsize <= i_size_read(inode)) | 1648 | if (newsize <= i_size_read(inode)) |
1633 | return 0; | 1649 | return 0; |
1634 | 1650 | ||
1635 | ret = ocfs2_extend_no_holes(inode, newsize, newsize - len); | 1651 | ret = ocfs2_extend_no_holes(inode, newsize, pos); |
1636 | if (ret) | 1652 | if (ret) |
1637 | mlog_errno(ret); | 1653 | mlog_errno(ret); |
1638 | 1654 | ||
1655 | wc->w_first_new_cpos = | ||
1656 | ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); | ||
1657 | |||
1639 | return ret; | 1658 | return ret; |
1640 | } | 1659 | } |
1641 | 1660 | ||
@@ -1644,7 +1663,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1644 | struct page **pagep, void **fsdata, | 1663 | struct page **pagep, void **fsdata, |
1645 | struct buffer_head *di_bh, struct page *mmap_page) | 1664 | struct buffer_head *di_bh, struct page *mmap_page) |
1646 | { | 1665 | { |
1647 | int ret, credits = OCFS2_INODE_UPDATE_CREDITS; | 1666 | int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS; |
1648 | unsigned int clusters_to_alloc, extents_to_split; | 1667 | unsigned int clusters_to_alloc, extents_to_split; |
1649 | struct ocfs2_write_ctxt *wc; | 1668 | struct ocfs2_write_ctxt *wc; |
1650 | struct inode *inode = mapping->host; | 1669 | struct inode *inode = mapping->host; |
@@ -1722,8 +1741,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1722 | 1741 | ||
1723 | } | 1742 | } |
1724 | 1743 | ||
1725 | ocfs2_set_target_boundaries(osb, wc, pos, len, | 1744 | /* |
1726 | clusters_to_alloc + extents_to_split); | 1745 | * We have to zero sparse allocated clusters, unwritten extent clusters, |
1746 | * and non-sparse clusters we just extended. For non-sparse writes, | ||
1747 | * we know zeros will only be needed in the first and/or last cluster. | ||
1748 | */ | ||
1749 | if (clusters_to_alloc || extents_to_split || | ||
1750 | (wc->w_clen && (wc->w_desc[0].c_needs_zero || | ||
1751 | wc->w_desc[wc->w_clen - 1].c_needs_zero))) | ||
1752 | cluster_of_pages = 1; | ||
1753 | else | ||
1754 | cluster_of_pages = 0; | ||
1755 | |||
1756 | ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages); | ||
1727 | 1757 | ||
1728 | handle = ocfs2_start_trans(osb, credits); | 1758 | handle = ocfs2_start_trans(osb, credits); |
1729 | if (IS_ERR(handle)) { | 1759 | if (IS_ERR(handle)) { |
@@ -1756,8 +1786,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1756 | * extent. | 1786 | * extent. |
1757 | */ | 1787 | */ |
1758 | ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, | 1788 | ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, |
1759 | clusters_to_alloc + extents_to_split, | 1789 | cluster_of_pages, mmap_page); |
1760 | mmap_page); | ||
1761 | if (ret) { | 1790 | if (ret) { |
1762 | mlog_errno(ret); | 1791 | mlog_errno(ret); |
1763 | goto out_quota; | 1792 | goto out_quota; |
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index b574431a031d..b4957c7d9fe2 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
@@ -85,6 +85,17 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, | |||
85 | goto bail; | 85 | goto bail; |
86 | } | 86 | } |
87 | 87 | ||
88 | /* | ||
89 | * If the last lookup failed to create dentry lock, let us | ||
90 | * redo it. | ||
91 | */ | ||
92 | if (!dentry->d_fsdata) { | ||
93 | mlog(0, "Inode %llu doesn't have dentry lock, " | ||
94 | "returning false\n", | ||
95 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
96 | goto bail; | ||
97 | } | ||
98 | |||
88 | ret = 1; | 99 | ret = 1; |
89 | 100 | ||
90 | bail: | 101 | bail: |
@@ -310,22 +321,19 @@ out_attach: | |||
310 | return ret; | 321 | return ret; |
311 | } | 322 | } |
312 | 323 | ||
313 | static DEFINE_SPINLOCK(dentry_list_lock); | 324 | DEFINE_SPINLOCK(dentry_list_lock); |
314 | 325 | ||
315 | /* We limit the number of dentry locks to drop in one go. We have | 326 | /* We limit the number of dentry locks to drop in one go. We have |
316 | * this limit so that we don't starve other users of ocfs2_wq. */ | 327 | * this limit so that we don't starve other users of ocfs2_wq. */ |
317 | #define DL_INODE_DROP_COUNT 64 | 328 | #define DL_INODE_DROP_COUNT 64 |
318 | 329 | ||
319 | /* Drop inode references from dentry locks */ | 330 | /* Drop inode references from dentry locks */ |
320 | void ocfs2_drop_dl_inodes(struct work_struct *work) | 331 | static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count) |
321 | { | 332 | { |
322 | struct ocfs2_super *osb = container_of(work, struct ocfs2_super, | ||
323 | dentry_lock_work); | ||
324 | struct ocfs2_dentry_lock *dl; | 333 | struct ocfs2_dentry_lock *dl; |
325 | int drop_count = DL_INODE_DROP_COUNT; | ||
326 | 334 | ||
327 | spin_lock(&dentry_list_lock); | 335 | spin_lock(&dentry_list_lock); |
328 | while (osb->dentry_lock_list && drop_count--) { | 336 | while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) { |
329 | dl = osb->dentry_lock_list; | 337 | dl = osb->dentry_lock_list; |
330 | osb->dentry_lock_list = dl->dl_next; | 338 | osb->dentry_lock_list = dl->dl_next; |
331 | spin_unlock(&dentry_list_lock); | 339 | spin_unlock(&dentry_list_lock); |
@@ -333,11 +341,32 @@ void ocfs2_drop_dl_inodes(struct work_struct *work) | |||
333 | kfree(dl); | 341 | kfree(dl); |
334 | spin_lock(&dentry_list_lock); | 342 | spin_lock(&dentry_list_lock); |
335 | } | 343 | } |
336 | if (osb->dentry_lock_list) | 344 | spin_unlock(&dentry_list_lock); |
345 | } | ||
346 | |||
347 | void ocfs2_drop_dl_inodes(struct work_struct *work) | ||
348 | { | ||
349 | struct ocfs2_super *osb = container_of(work, struct ocfs2_super, | ||
350 | dentry_lock_work); | ||
351 | |||
352 | __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT); | ||
353 | /* | ||
354 | * Don't queue dropping if umount is in progress. We flush the | ||
355 | * list in ocfs2_dismount_volume | ||
356 | */ | ||
357 | spin_lock(&dentry_list_lock); | ||
358 | if (osb->dentry_lock_list && | ||
359 | !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED)) | ||
337 | queue_work(ocfs2_wq, &osb->dentry_lock_work); | 360 | queue_work(ocfs2_wq, &osb->dentry_lock_work); |
338 | spin_unlock(&dentry_list_lock); | 361 | spin_unlock(&dentry_list_lock); |
339 | } | 362 | } |
340 | 363 | ||
364 | /* Flush the whole work queue */ | ||
365 | void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb) | ||
366 | { | ||
367 | __ocfs2_drop_dl_inodes(osb, -1); | ||
368 | } | ||
369 | |||
341 | /* | 370 | /* |
342 | * ocfs2_dentry_iput() and friends. | 371 | * ocfs2_dentry_iput() and friends. |
343 | * | 372 | * |
@@ -368,7 +397,8 @@ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, | |||
368 | /* We leave dropping of inode reference to ocfs2_wq as that can | 397 | /* We leave dropping of inode reference to ocfs2_wq as that can |
369 | * possibly lead to inode deletion which gets tricky */ | 398 | * possibly lead to inode deletion which gets tricky */ |
370 | spin_lock(&dentry_list_lock); | 399 | spin_lock(&dentry_list_lock); |
371 | if (!osb->dentry_lock_list) | 400 | if (!osb->dentry_lock_list && |
401 | !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED)) | ||
372 | queue_work(ocfs2_wq, &osb->dentry_lock_work); | 402 | queue_work(ocfs2_wq, &osb->dentry_lock_work); |
373 | dl->dl_next = osb->dentry_lock_list; | 403 | dl->dl_next = osb->dentry_lock_list; |
374 | osb->dentry_lock_list = dl; | 404 | osb->dentry_lock_list = dl; |
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h index faa12e75f98d..f5dd1789acf1 100644 --- a/fs/ocfs2/dcache.h +++ b/fs/ocfs2/dcache.h | |||
@@ -49,10 +49,13 @@ struct ocfs2_dentry_lock { | |||
49 | int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, | 49 | int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, |
50 | u64 parent_blkno); | 50 | u64 parent_blkno); |
51 | 51 | ||
52 | extern spinlock_t dentry_list_lock; | ||
53 | |||
52 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, | 54 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, |
53 | struct ocfs2_dentry_lock *dl); | 55 | struct ocfs2_dentry_lock *dl); |
54 | 56 | ||
55 | void ocfs2_drop_dl_inodes(struct work_struct *work); | 57 | void ocfs2_drop_dl_inodes(struct work_struct *work); |
58 | void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb); | ||
56 | 59 | ||
57 | struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, | 60 | struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, |
58 | int skip_unhashed); | 61 | int skip_unhashed); |
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index d07ddbe4b283..81eff8e58322 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c | |||
@@ -103,7 +103,6 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) | |||
103 | lock->ast_pending, lock->ml.type); | 103 | lock->ast_pending, lock->ml.type); |
104 | BUG(); | 104 | BUG(); |
105 | } | 105 | } |
106 | BUG_ON(!list_empty(&lock->ast_list)); | ||
107 | if (lock->ast_pending) | 106 | if (lock->ast_pending) |
108 | mlog(0, "lock has an ast getting flushed right now\n"); | 107 | mlog(0, "lock has an ast getting flushed right now\n"); |
109 | 108 | ||
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 1c9efb406a96..02bf17808bdc 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c | |||
@@ -325,6 +325,7 @@ clear_fields: | |||
325 | } | 325 | } |
326 | 326 | ||
327 | static struct backing_dev_info dlmfs_backing_dev_info = { | 327 | static struct backing_dev_info dlmfs_backing_dev_info = { |
328 | .name = "ocfs2-dlmfs", | ||
328 | .ra_pages = 0, /* No readahead */ | 329 | .ra_pages = 0, /* No readahead */ |
329 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 330 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
330 | }; | 331 | }; |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index bcb9260c3735..43e6e3280569 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -1118,7 +1118,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, | |||
1118 | 1118 | ||
1119 | mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n", | 1119 | mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n", |
1120 | dlm->name, res->lockname.len, res->lockname.name, | 1120 | dlm->name, res->lockname.len, res->lockname.name, |
1121 | orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery", | 1121 | orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery", |
1122 | send_to); | 1122 | send_to); |
1123 | 1123 | ||
1124 | /* send it */ | 1124 | /* send it */ |
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index fcf879ed6930..756f5b0998e0 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c | |||
@@ -122,7 +122,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
122 | * that still has AST's pending... */ | 122 | * that still has AST's pending... */ |
123 | in_use = !list_empty(&lock->ast_list); | 123 | in_use = !list_empty(&lock->ast_list); |
124 | spin_unlock(&dlm->ast_lock); | 124 | spin_unlock(&dlm->ast_lock); |
125 | if (in_use) { | 125 | if (in_use && !(flags & LKM_CANCEL)) { |
126 | mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock " | 126 | mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock " |
127 | "while waiting for an ast!", res->lockname.len, | 127 | "while waiting for an ast!", res->lockname.len, |
128 | res->lockname.name); | 128 | res->lockname.name); |
@@ -131,7 +131,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
131 | 131 | ||
132 | spin_lock(&res->spinlock); | 132 | spin_lock(&res->spinlock); |
133 | if (res->state & DLM_LOCK_RES_IN_PROGRESS) { | 133 | if (res->state & DLM_LOCK_RES_IN_PROGRESS) { |
134 | if (master_node) { | 134 | if (master_node && !(flags & LKM_CANCEL)) { |
135 | mlog(ML_ERROR, "lockres in progress!\n"); | 135 | mlog(ML_ERROR, "lockres in progress!\n"); |
136 | spin_unlock(&res->spinlock); | 136 | spin_unlock(&res->spinlock); |
137 | return DLM_FORWARD; | 137 | return DLM_FORWARD; |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 62442e413a00..221c5e98957b 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1851,6 +1851,7 @@ relock: | |||
1851 | if (ret) | 1851 | if (ret) |
1852 | goto out_dio; | 1852 | goto out_dio; |
1853 | 1853 | ||
1854 | count = ocount; | ||
1854 | ret = generic_write_checks(file, ppos, &count, | 1855 | ret = generic_write_checks(file, ppos, &count, |
1855 | S_ISBLK(inode->i_mode)); | 1856 | S_ISBLK(inode->i_mode)); |
1856 | if (ret) | 1857 | if (ret) |
@@ -1870,8 +1871,7 @@ relock: | |||
1870 | goto out_dio; | 1871 | goto out_dio; |
1871 | } | 1872 | } |
1872 | } else { | 1873 | } else { |
1873 | written = generic_file_aio_write_nolock(iocb, iov, nr_segs, | 1874 | written = __generic_file_aio_write(iocb, iov, nr_segs, ppos); |
1874 | *ppos); | ||
1875 | } | 1875 | } |
1876 | 1876 | ||
1877 | out_dio: | 1877 | out_dio: |
@@ -1879,18 +1879,21 @@ out_dio: | |||
1879 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); | 1879 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); |
1880 | 1880 | ||
1881 | if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { | 1881 | if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { |
1882 | /* | 1882 | ret = filemap_fdatawrite_range(file->f_mapping, pos, |
1883 | * The generic write paths have handled getting data | 1883 | pos + count - 1); |
1884 | * to disk, but since we don't make use of the dirty | 1884 | if (ret < 0) |
1885 | * inode list, a manual journal commit is necessary | 1885 | written = ret; |
1886 | * here. | 1886 | |
1887 | */ | 1887 | if (!ret && (old_size != i_size_read(inode) || |
1888 | if (old_size != i_size_read(inode) || | 1888 | old_clusters != OCFS2_I(inode)->ip_clusters)) { |
1889 | old_clusters != OCFS2_I(inode)->ip_clusters) { | ||
1890 | ret = jbd2_journal_force_commit(osb->journal->j_journal); | 1889 | ret = jbd2_journal_force_commit(osb->journal->j_journal); |
1891 | if (ret < 0) | 1890 | if (ret < 0) |
1892 | written = ret; | 1891 | written = ret; |
1893 | } | 1892 | } |
1893 | |||
1894 | if (!ret) | ||
1895 | ret = filemap_fdatawait_range(file->f_mapping, pos, | ||
1896 | pos + count - 1); | ||
1894 | } | 1897 | } |
1895 | 1898 | ||
1896 | /* | 1899 | /* |
@@ -1918,8 +1921,10 @@ out_sems: | |||
1918 | 1921 | ||
1919 | mutex_unlock(&inode->i_mutex); | 1922 | mutex_unlock(&inode->i_mutex); |
1920 | 1923 | ||
1924 | if (written) | ||
1925 | ret = written; | ||
1921 | mlog_exit(ret); | 1926 | mlog_exit(ret); |
1922 | return written ? written : ret; | 1927 | return ret; |
1923 | } | 1928 | } |
1924 | 1929 | ||
1925 | static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, | 1930 | static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, |
@@ -1988,31 +1993,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |||
1988 | 1993 | ||
1989 | if (ret > 0) { | 1994 | if (ret > 0) { |
1990 | unsigned long nr_pages; | 1995 | unsigned long nr_pages; |
1996 | int err; | ||
1991 | 1997 | ||
1992 | *ppos += ret; | ||
1993 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1998 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1994 | 1999 | ||
1995 | /* | 2000 | err = generic_write_sync(out, *ppos, ret); |
1996 | * If file or inode is SYNC and we actually wrote some data, | 2001 | if (err) |
1997 | * sync it. | 2002 | ret = err; |
1998 | */ | 2003 | else |
1999 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { | 2004 | *ppos += ret; |
2000 | int err; | ||
2001 | |||
2002 | mutex_lock(&inode->i_mutex); | ||
2003 | err = ocfs2_rw_lock(inode, 1); | ||
2004 | if (err < 0) { | ||
2005 | mlog_errno(err); | ||
2006 | } else { | ||
2007 | err = generic_osync_inode(inode, mapping, | ||
2008 | OSYNC_METADATA|OSYNC_DATA); | ||
2009 | ocfs2_rw_unlock(inode, 1); | ||
2010 | } | ||
2011 | mutex_unlock(&inode->i_mutex); | ||
2012 | 2005 | ||
2013 | if (err) | ||
2014 | ret = err; | ||
2015 | } | ||
2016 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); | 2006 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); |
2017 | } | 2007 | } |
2018 | 2008 | ||
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f033760ecbea..c48b93ac6b65 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -1954,10 +1954,16 @@ void ocfs2_orphan_scan_init(struct ocfs2_super *osb) | |||
1954 | os->os_osb = osb; | 1954 | os->os_osb = osb; |
1955 | os->os_count = 0; | 1955 | os->os_count = 0; |
1956 | os->os_seqno = 0; | 1956 | os->os_seqno = 0; |
1957 | os->os_scantime = CURRENT_TIME; | ||
1958 | mutex_init(&os->os_lock); | 1957 | mutex_init(&os->os_lock); |
1959 | INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); | 1958 | INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); |
1959 | } | ||
1960 | 1960 | ||
1961 | void ocfs2_orphan_scan_start(struct ocfs2_super *osb) | ||
1962 | { | ||
1963 | struct ocfs2_orphan_scan *os; | ||
1964 | |||
1965 | os = &osb->osb_orphan_scan; | ||
1966 | os->os_scantime = CURRENT_TIME; | ||
1961 | if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) | 1967 | if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) |
1962 | atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); | 1968 | atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); |
1963 | else { | 1969 | else { |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 5432c7f79cc6..2c3222aec622 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -145,6 +145,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, | |||
145 | 145 | ||
146 | /* Exported only for the journal struct init code in super.c. Do not call. */ | 146 | /* Exported only for the journal struct init code in super.c. Do not call. */ |
147 | void ocfs2_orphan_scan_init(struct ocfs2_super *osb); | 147 | void ocfs2_orphan_scan_init(struct ocfs2_super *osb); |
148 | void ocfs2_orphan_scan_start(struct ocfs2_super *osb); | ||
148 | void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); | 149 | void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); |
149 | void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); | 150 | void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); |
150 | 151 | ||
@@ -329,20 +330,27 @@ int ocfs2_journal_dirty(handle_t *handle, | |||
329 | /* extended attribute block update */ | 330 | /* extended attribute block update */ |
330 | #define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 | 331 | #define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 |
331 | 332 | ||
333 | /* Update of a single quota block */ | ||
334 | #define OCFS2_QUOTA_BLOCK_UPDATE_CREDITS 1 | ||
335 | |||
332 | /* global quotafile inode update, data block */ | 336 | /* global quotafile inode update, data block */ |
333 | #define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | 337 | #define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \ |
338 | OCFS2_QUOTA_BLOCK_UPDATE_CREDITS) | ||
334 | 339 | ||
340 | #define OCFS2_LOCAL_QINFO_WRITE_CREDITS OCFS2_QUOTA_BLOCK_UPDATE_CREDITS | ||
335 | /* | 341 | /* |
336 | * The two writes below can accidentally see global info dirty due | 342 | * The two writes below can accidentally see global info dirty due |
337 | * to set_info() quotactl so make them prepared for the writes. | 343 | * to set_info() quotactl so make them prepared for the writes. |
338 | */ | 344 | */ |
339 | /* quota data block, global info */ | 345 | /* quota data block, global info */ |
340 | /* Write to local quota file */ | 346 | /* Write to local quota file */ |
341 | #define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1) | 347 | #define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \ |
348 | OCFS2_QUOTA_BLOCK_UPDATE_CREDITS) | ||
342 | 349 | ||
343 | /* global quota data block, local quota data block, global quota inode, | 350 | /* global quota data block, local quota data block, global quota inode, |
344 | * global quota info */ | 351 | * global quota info */ |
345 | #define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3) | 352 | #define OCFS2_QSYNC_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \ |
353 | 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS) | ||
346 | 354 | ||
347 | static inline int ocfs2_quota_trans_credits(struct super_block *sb) | 355 | static inline int ocfs2_quota_trans_credits(struct super_block *sb) |
348 | { | 356 | { |
@@ -355,11 +363,6 @@ static inline int ocfs2_quota_trans_credits(struct super_block *sb) | |||
355 | return credits; | 363 | return credits; |
356 | } | 364 | } |
357 | 365 | ||
358 | /* Number of credits needed for removing quota structure from file */ | ||
359 | int ocfs2_calc_qdel_credits(struct super_block *sb, int type); | ||
360 | /* Number of credits needed for initialization of new quota structure */ | ||
361 | int ocfs2_calc_qinit_credits(struct super_block *sb, int type); | ||
362 | |||
363 | /* group extend. inode update and last group update. */ | 366 | /* group extend. inode update and last group update. */ |
364 | #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | 367 | #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) |
365 | 368 | ||
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index c9345ebb8493..39e1d5a39505 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -224,10 +224,12 @@ enum ocfs2_mount_options | |||
224 | OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ | 224 | OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ |
225 | }; | 225 | }; |
226 | 226 | ||
227 | #define OCFS2_OSB_SOFT_RO 0x0001 | 227 | #define OCFS2_OSB_SOFT_RO 0x0001 |
228 | #define OCFS2_OSB_HARD_RO 0x0002 | 228 | #define OCFS2_OSB_HARD_RO 0x0002 |
229 | #define OCFS2_OSB_ERROR_FS 0x0004 | 229 | #define OCFS2_OSB_ERROR_FS 0x0004 |
230 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 | 230 | #define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008 |
231 | |||
232 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 | ||
231 | 233 | ||
232 | struct ocfs2_journal; | 234 | struct ocfs2_journal; |
233 | struct ocfs2_slot_info; | 235 | struct ocfs2_slot_info; |
@@ -490,6 +492,18 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb, | |||
490 | spin_unlock(&osb->osb_lock); | 492 | spin_unlock(&osb->osb_lock); |
491 | } | 493 | } |
492 | 494 | ||
495 | |||
496 | static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb, | ||
497 | unsigned long flag) | ||
498 | { | ||
499 | unsigned long ret; | ||
500 | |||
501 | spin_lock(&osb->osb_lock); | ||
502 | ret = osb->osb_flags & flag; | ||
503 | spin_unlock(&osb->osb_lock); | ||
504 | return ret; | ||
505 | } | ||
506 | |||
493 | static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, | 507 | static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, |
494 | int hard) | 508 | int hard) |
495 | { | 509 | { |
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index fcdba091af3d..c212cf5a2bdf 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h | |||
@@ -108,6 +108,7 @@ static char *ocfs2_lock_type_strings[] = { | |||
108 | [OCFS2_LOCK_TYPE_OPEN] = "Open", | 108 | [OCFS2_LOCK_TYPE_OPEN] = "Open", |
109 | [OCFS2_LOCK_TYPE_FLOCK] = "Flock", | 109 | [OCFS2_LOCK_TYPE_FLOCK] = "Flock", |
110 | [OCFS2_LOCK_TYPE_QINFO] = "Quota", | 110 | [OCFS2_LOCK_TYPE_QINFO] = "Quota", |
111 | [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync", | ||
111 | [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", | 112 | [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", |
112 | }; | 113 | }; |
113 | 114 | ||
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index 7365e2e08706..3fb96fcd4c81 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h | |||
@@ -50,7 +50,6 @@ struct ocfs2_mem_dqinfo { | |||
50 | unsigned int dqi_chunks; /* Number of chunks in local quota file */ | 50 | unsigned int dqi_chunks; /* Number of chunks in local quota file */ |
51 | unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ | 51 | unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ |
52 | unsigned int dqi_syncms; /* How often should we sync with other nodes */ | 52 | unsigned int dqi_syncms; /* How often should we sync with other nodes */ |
53 | unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */ | ||
54 | struct list_head dqi_chunk; /* List of chunks */ | 53 | struct list_head dqi_chunk; /* List of chunks */ |
55 | struct inode *dqi_gqinode; /* Global quota file inode */ | 54 | struct inode *dqi_gqinode; /* Global quota file inode */ |
56 | struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ | 55 | struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ |
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index edfa60cd155c..44f2a5e1d042 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include "sysfile.h" | 23 | #include "sysfile.h" |
24 | #include "dlmglue.h" | 24 | #include "dlmglue.h" |
25 | #include "uptodate.h" | 25 | #include "uptodate.h" |
26 | #include "super.h" | ||
26 | #include "quota.h" | 27 | #include "quota.h" |
27 | 28 | ||
28 | static struct workqueue_struct *ocfs2_quota_wq = NULL; | 29 | static struct workqueue_struct *ocfs2_quota_wq = NULL; |
@@ -69,6 +70,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot) | |||
69 | d->dqb_curspace = cpu_to_le64(m->dqb_curspace); | 70 | d->dqb_curspace = cpu_to_le64(m->dqb_curspace); |
70 | d->dqb_btime = cpu_to_le64(m->dqb_btime); | 71 | d->dqb_btime = cpu_to_le64(m->dqb_btime); |
71 | d->dqb_itime = cpu_to_le64(m->dqb_itime); | 72 | d->dqb_itime = cpu_to_le64(m->dqb_itime); |
73 | d->dqb_pad1 = d->dqb_pad2 = 0; | ||
72 | } | 74 | } |
73 | 75 | ||
74 | static int ocfs2_global_is_id(void *dp, struct dquot *dquot) | 76 | static int ocfs2_global_is_id(void *dp, struct dquot *dquot) |
@@ -113,6 +115,15 @@ int ocfs2_read_quota_block(struct inode *inode, u64 v_block, | |||
113 | int rc = 0; | 115 | int rc = 0; |
114 | struct buffer_head *tmp = *bh; | 116 | struct buffer_head *tmp = *bh; |
115 | 117 | ||
118 | if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) { | ||
119 | ocfs2_error(inode->i_sb, | ||
120 | "Quota file %llu is probably corrupted! Requested " | ||
121 | "to read block %Lu but file has size only %Lu\n", | ||
122 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
123 | (unsigned long long)v_block, | ||
124 | (unsigned long long)i_size_read(inode)); | ||
125 | return -EIO; | ||
126 | } | ||
116 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, | 127 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, |
117 | ocfs2_validate_quota_block); | 128 | ocfs2_validate_quota_block); |
118 | if (rc) | 129 | if (rc) |
@@ -211,14 +222,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, | |||
211 | 222 | ||
212 | mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); | 223 | mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); |
213 | if (gqinode->i_size < off + len) { | 224 | if (gqinode->i_size < off + len) { |
214 | down_write(&OCFS2_I(gqinode)->ip_alloc_sem); | 225 | loff_t rounded_end = |
215 | err = ocfs2_extend_no_holes(gqinode, off + len, off); | 226 | ocfs2_align_bytes_to_blocks(sb, off + len); |
216 | up_write(&OCFS2_I(gqinode)->ip_alloc_sem); | 227 | |
217 | if (err < 0) | 228 | /* Space is already allocated in ocfs2_global_read_dquot() */ |
218 | goto out; | ||
219 | err = ocfs2_simple_size_update(gqinode, | 229 | err = ocfs2_simple_size_update(gqinode, |
220 | oinfo->dqi_gqi_bh, | 230 | oinfo->dqi_gqi_bh, |
221 | off + len); | 231 | rounded_end); |
222 | if (err < 0) | 232 | if (err < 0) |
223 | goto out; | 233 | goto out; |
224 | new = 1; | 234 | new = 1; |
@@ -234,7 +244,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, | |||
234 | } | 244 | } |
235 | if (err) { | 245 | if (err) { |
236 | mlog_errno(err); | 246 | mlog_errno(err); |
237 | return err; | 247 | goto out; |
238 | } | 248 | } |
239 | lock_buffer(bh); | 249 | lock_buffer(bh); |
240 | if (new) | 250 | if (new) |
@@ -342,7 +352,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type) | |||
342 | info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); | 352 | info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); |
343 | info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); | 353 | info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); |
344 | oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); | 354 | oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); |
345 | oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms); | ||
346 | oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); | 355 | oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); |
347 | oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); | 356 | oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); |
348 | oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); | 357 | oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); |
@@ -352,7 +361,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type) | |||
352 | oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); | 361 | oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); |
353 | INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); | 362 | INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); |
354 | queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, | 363 | queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, |
355 | oinfo->dqi_syncjiff); | 364 | msecs_to_jiffies(oinfo->dqi_syncms)); |
356 | 365 | ||
357 | out_err: | 366 | out_err: |
358 | mlog_exit(status); | 367 | mlog_exit(status); |
@@ -402,13 +411,36 @@ int ocfs2_global_write_info(struct super_block *sb, int type) | |||
402 | return err; | 411 | return err; |
403 | } | 412 | } |
404 | 413 | ||
414 | static int ocfs2_global_qinit_alloc(struct super_block *sb, int type) | ||
415 | { | ||
416 | struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; | ||
417 | |||
418 | /* | ||
419 | * We may need to allocate tree blocks and a leaf block but not the | ||
420 | * root block | ||
421 | */ | ||
422 | return oinfo->dqi_gi.dqi_qtree_depth; | ||
423 | } | ||
424 | |||
425 | static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type) | ||
426 | { | ||
427 | /* We modify all the allocated blocks, tree root, and info block */ | ||
428 | return (ocfs2_global_qinit_alloc(sb, type) + 2) * | ||
429 | OCFS2_QUOTA_BLOCK_UPDATE_CREDITS; | ||
430 | } | ||
431 | |||
405 | /* Read in information from global quota file and acquire a reference to it. | 432 | /* Read in information from global quota file and acquire a reference to it. |
406 | * dquot_acquire() has already started the transaction and locked quota file */ | 433 | * dquot_acquire() has already started the transaction and locked quota file */ |
407 | int ocfs2_global_read_dquot(struct dquot *dquot) | 434 | int ocfs2_global_read_dquot(struct dquot *dquot) |
408 | { | 435 | { |
409 | int err, err2, ex = 0; | 436 | int err, err2, ex = 0; |
410 | struct ocfs2_mem_dqinfo *info = | 437 | struct super_block *sb = dquot->dq_sb; |
411 | sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; | 438 | int type = dquot->dq_type; |
439 | struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; | ||
440 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
441 | struct inode *gqinode = info->dqi_gqinode; | ||
442 | int need_alloc = ocfs2_global_qinit_alloc(sb, type); | ||
443 | handle_t *handle = NULL; | ||
412 | 444 | ||
413 | err = ocfs2_qinfo_lock(info, 0); | 445 | err = ocfs2_qinfo_lock(info, 0); |
414 | if (err < 0) | 446 | if (err < 0) |
@@ -419,14 +451,33 @@ int ocfs2_global_read_dquot(struct dquot *dquot) | |||
419 | OCFS2_DQUOT(dquot)->dq_use_count++; | 451 | OCFS2_DQUOT(dquot)->dq_use_count++; |
420 | OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; | 452 | OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; |
421 | OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; | 453 | OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; |
454 | ocfs2_qinfo_unlock(info, 0); | ||
455 | |||
422 | if (!dquot->dq_off) { /* No real quota entry? */ | 456 | if (!dquot->dq_off) { /* No real quota entry? */ |
423 | /* Upgrade to exclusive lock for allocation */ | ||
424 | ocfs2_qinfo_unlock(info, 0); | ||
425 | err = ocfs2_qinfo_lock(info, 1); | ||
426 | if (err < 0) | ||
427 | goto out_qlock; | ||
428 | ex = 1; | 457 | ex = 1; |
458 | /* | ||
459 | * Add blocks to quota file before we start a transaction since | ||
460 | * locking allocators ranks above a transaction start | ||
461 | */ | ||
462 | WARN_ON(journal_current_handle()); | ||
463 | down_write(&OCFS2_I(gqinode)->ip_alloc_sem); | ||
464 | err = ocfs2_extend_no_holes(gqinode, | ||
465 | gqinode->i_size + (need_alloc << sb->s_blocksize_bits), | ||
466 | gqinode->i_size); | ||
467 | up_write(&OCFS2_I(gqinode)->ip_alloc_sem); | ||
468 | if (err < 0) | ||
469 | goto out; | ||
429 | } | 470 | } |
471 | |||
472 | handle = ocfs2_start_trans(osb, | ||
473 | ocfs2_calc_global_qinit_credits(sb, type)); | ||
474 | if (IS_ERR(handle)) { | ||
475 | err = PTR_ERR(handle); | ||
476 | goto out; | ||
477 | } | ||
478 | err = ocfs2_qinfo_lock(info, ex); | ||
479 | if (err < 0) | ||
480 | goto out_trans; | ||
430 | err = qtree_write_dquot(&info->dqi_gi, dquot); | 481 | err = qtree_write_dquot(&info->dqi_gi, dquot); |
431 | if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { | 482 | if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { |
432 | err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); | 483 | err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); |
@@ -438,6 +489,9 @@ out_qlock: | |||
438 | ocfs2_qinfo_unlock(info, 1); | 489 | ocfs2_qinfo_unlock(info, 1); |
439 | else | 490 | else |
440 | ocfs2_qinfo_unlock(info, 0); | 491 | ocfs2_qinfo_unlock(info, 0); |
492 | out_trans: | ||
493 | if (handle) | ||
494 | ocfs2_commit_trans(osb, handle); | ||
441 | out: | 495 | out: |
442 | if (err < 0) | 496 | if (err < 0) |
443 | mlog_errno(err); | 497 | mlog_errno(err); |
@@ -607,7 +661,7 @@ static void qsync_work_fn(struct work_struct *work) | |||
607 | 661 | ||
608 | dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); | 662 | dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); |
609 | queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, | 663 | queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, |
610 | oinfo->dqi_syncjiff); | 664 | msecs_to_jiffies(oinfo->dqi_syncms)); |
611 | } | 665 | } |
612 | 666 | ||
613 | /* | 667 | /* |
@@ -635,20 +689,18 @@ out: | |||
635 | return status; | 689 | return status; |
636 | } | 690 | } |
637 | 691 | ||
638 | int ocfs2_calc_qdel_credits(struct super_block *sb, int type) | 692 | static int ocfs2_calc_qdel_credits(struct super_block *sb, int type) |
639 | { | 693 | { |
640 | struct ocfs2_mem_dqinfo *oinfo; | 694 | struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; |
641 | int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, | 695 | /* |
642 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; | 696 | * We modify tree, leaf block, global info, local chunk header, |
643 | 697 | * global and local inode; OCFS2_QINFO_WRITE_CREDITS already | |
644 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) | 698 | * accounts for inode update |
645 | return 0; | 699 | */ |
646 | 700 | return (oinfo->dqi_gi.dqi_qtree_depth + 2) * | |
647 | oinfo = sb_dqinfo(sb, type)->dqi_priv; | 701 | OCFS2_QUOTA_BLOCK_UPDATE_CREDITS + |
648 | /* We modify tree, leaf block, global info, local chunk header, | 702 | OCFS2_QINFO_WRITE_CREDITS + |
649 | * global and local inode */ | 703 | OCFS2_INODE_UPDATE_CREDITS; |
650 | return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 + | ||
651 | 2 * OCFS2_INODE_UPDATE_CREDITS; | ||
652 | } | 704 | } |
653 | 705 | ||
654 | static int ocfs2_release_dquot(struct dquot *dquot) | 706 | static int ocfs2_release_dquot(struct dquot *dquot) |
@@ -680,33 +732,10 @@ out: | |||
680 | return status; | 732 | return status; |
681 | } | 733 | } |
682 | 734 | ||
683 | int ocfs2_calc_qinit_credits(struct super_block *sb, int type) | ||
684 | { | ||
685 | struct ocfs2_mem_dqinfo *oinfo; | ||
686 | int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, | ||
687 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; | ||
688 | struct ocfs2_dinode *lfe, *gfe; | ||
689 | |||
690 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) | ||
691 | return 0; | ||
692 | |||
693 | oinfo = sb_dqinfo(sb, type)->dqi_priv; | ||
694 | gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data; | ||
695 | lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data; | ||
696 | /* We can extend local file + global file. In local file we | ||
697 | * can modify info, chunk header block and dquot block. In | ||
698 | * global file we can modify info, tree and leaf block */ | ||
699 | return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) + | ||
700 | ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) + | ||
701 | 3 + oinfo->dqi_gi.dqi_qtree_depth + 2; | ||
702 | } | ||
703 | |||
704 | static int ocfs2_acquire_dquot(struct dquot *dquot) | 735 | static int ocfs2_acquire_dquot(struct dquot *dquot) |
705 | { | 736 | { |
706 | handle_t *handle; | ||
707 | struct ocfs2_mem_dqinfo *oinfo = | 737 | struct ocfs2_mem_dqinfo *oinfo = |
708 | sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; | 738 | sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; |
709 | struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); | ||
710 | int status = 0; | 739 | int status = 0; |
711 | 740 | ||
712 | mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); | 741 | mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); |
@@ -715,16 +744,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) | |||
715 | status = ocfs2_lock_global_qf(oinfo, 1); | 744 | status = ocfs2_lock_global_qf(oinfo, 1); |
716 | if (status < 0) | 745 | if (status < 0) |
717 | goto out; | 746 | goto out; |
718 | handle = ocfs2_start_trans(osb, | ||
719 | ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type)); | ||
720 | if (IS_ERR(handle)) { | ||
721 | status = PTR_ERR(handle); | ||
722 | mlog_errno(status); | ||
723 | goto out_ilock; | ||
724 | } | ||
725 | status = dquot_acquire(dquot); | 747 | status = dquot_acquire(dquot); |
726 | ocfs2_commit_trans(osb, handle); | ||
727 | out_ilock: | ||
728 | ocfs2_unlock_global_qf(oinfo, 1); | 748 | ocfs2_unlock_global_qf(oinfo, 1); |
729 | out: | 749 | out: |
730 | mlog_exit(status); | 750 | mlog_exit(status); |
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 5a460fa82553..bdb09cb6e1fe 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include "sysfile.h" | 20 | #include "sysfile.h" |
21 | #include "dlmglue.h" | 21 | #include "dlmglue.h" |
22 | #include "quota.h" | 22 | #include "quota.h" |
23 | #include "uptodate.h" | ||
23 | 24 | ||
24 | /* Number of local quota structures per block */ | 25 | /* Number of local quota structures per block */ |
25 | static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) | 26 | static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) |
@@ -100,7 +101,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh, | |||
100 | handle_t *handle; | 101 | handle_t *handle; |
101 | int status; | 102 | int status; |
102 | 103 | ||
103 | handle = ocfs2_start_trans(OCFS2_SB(sb), 1); | 104 | handle = ocfs2_start_trans(OCFS2_SB(sb), |
105 | OCFS2_QUOTA_BLOCK_UPDATE_CREDITS); | ||
104 | if (IS_ERR(handle)) { | 106 | if (IS_ERR(handle)) { |
105 | status = PTR_ERR(handle); | 107 | status = PTR_ERR(handle); |
106 | mlog_errno(status); | 108 | mlog_errno(status); |
@@ -610,7 +612,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, | |||
610 | goto out_bh; | 612 | goto out_bh; |
611 | /* Mark quota file as clean if we are recovering quota file of | 613 | /* Mark quota file as clean if we are recovering quota file of |
612 | * some other node. */ | 614 | * some other node. */ |
613 | handle = ocfs2_start_trans(osb, 1); | 615 | handle = ocfs2_start_trans(osb, |
616 | OCFS2_LOCAL_QINFO_WRITE_CREDITS); | ||
614 | if (IS_ERR(handle)) { | 617 | if (IS_ERR(handle)) { |
615 | status = PTR_ERR(handle); | 618 | status = PTR_ERR(handle); |
616 | mlog_errno(status); | 619 | mlog_errno(status); |
@@ -940,7 +943,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( | |||
940 | struct ocfs2_local_disk_chunk *dchunk; | 943 | struct ocfs2_local_disk_chunk *dchunk; |
941 | int status; | 944 | int status; |
942 | handle_t *handle; | 945 | handle_t *handle; |
943 | struct buffer_head *bh = NULL; | 946 | struct buffer_head *bh = NULL, *dbh = NULL; |
944 | u64 p_blkno; | 947 | u64 p_blkno; |
945 | 948 | ||
946 | /* We are protected by dqio_sem so no locking needed */ | 949 | /* We are protected by dqio_sem so no locking needed */ |
@@ -964,32 +967,35 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( | |||
964 | mlog_errno(status); | 967 | mlog_errno(status); |
965 | goto out; | 968 | goto out; |
966 | } | 969 | } |
970 | /* Local quota info and two new blocks we initialize */ | ||
971 | handle = ocfs2_start_trans(OCFS2_SB(sb), | ||
972 | OCFS2_LOCAL_QINFO_WRITE_CREDITS + | ||
973 | 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS); | ||
974 | if (IS_ERR(handle)) { | ||
975 | status = PTR_ERR(handle); | ||
976 | mlog_errno(status); | ||
977 | goto out; | ||
978 | } | ||
967 | 979 | ||
980 | /* Initialize chunk header */ | ||
968 | down_read(&OCFS2_I(lqinode)->ip_alloc_sem); | 981 | down_read(&OCFS2_I(lqinode)->ip_alloc_sem); |
969 | status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, | 982 | status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, |
970 | &p_blkno, NULL, NULL); | 983 | &p_blkno, NULL, NULL); |
971 | up_read(&OCFS2_I(lqinode)->ip_alloc_sem); | 984 | up_read(&OCFS2_I(lqinode)->ip_alloc_sem); |
972 | if (status < 0) { | 985 | if (status < 0) { |
973 | mlog_errno(status); | 986 | mlog_errno(status); |
974 | goto out; | 987 | goto out_trans; |
975 | } | 988 | } |
976 | bh = sb_getblk(sb, p_blkno); | 989 | bh = sb_getblk(sb, p_blkno); |
977 | if (!bh) { | 990 | if (!bh) { |
978 | status = -ENOMEM; | 991 | status = -ENOMEM; |
979 | mlog_errno(status); | 992 | mlog_errno(status); |
980 | goto out; | 993 | goto out_trans; |
981 | } | 994 | } |
982 | dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; | 995 | dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; |
983 | 996 | ocfs2_set_new_buffer_uptodate(lqinode, bh); | |
984 | handle = ocfs2_start_trans(OCFS2_SB(sb), 2); | ||
985 | if (IS_ERR(handle)) { | ||
986 | status = PTR_ERR(handle); | ||
987 | mlog_errno(status); | ||
988 | goto out; | ||
989 | } | ||
990 | |||
991 | status = ocfs2_journal_access_dq(handle, lqinode, bh, | 997 | status = ocfs2_journal_access_dq(handle, lqinode, bh, |
992 | OCFS2_JOURNAL_ACCESS_WRITE); | 998 | OCFS2_JOURNAL_ACCESS_CREATE); |
993 | if (status < 0) { | 999 | if (status < 0) { |
994 | mlog_errno(status); | 1000 | mlog_errno(status); |
995 | goto out_trans; | 1001 | goto out_trans; |
@@ -999,7 +1005,6 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( | |||
999 | memset(dchunk->dqc_bitmap, 0, | 1005 | memset(dchunk->dqc_bitmap, 0, |
1000 | sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - | 1006 | sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - |
1001 | OCFS2_QBLK_RESERVED_SPACE); | 1007 | OCFS2_QBLK_RESERVED_SPACE); |
1002 | set_buffer_uptodate(bh); | ||
1003 | unlock_buffer(bh); | 1008 | unlock_buffer(bh); |
1004 | status = ocfs2_journal_dirty(handle, bh); | 1009 | status = ocfs2_journal_dirty(handle, bh); |
1005 | if (status < 0) { | 1010 | if (status < 0) { |
@@ -1007,6 +1012,38 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( | |||
1007 | goto out_trans; | 1012 | goto out_trans; |
1008 | } | 1013 | } |
1009 | 1014 | ||
1015 | /* Initialize new block with structures */ | ||
1016 | down_read(&OCFS2_I(lqinode)->ip_alloc_sem); | ||
1017 | status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1, | ||
1018 | &p_blkno, NULL, NULL); | ||
1019 | up_read(&OCFS2_I(lqinode)->ip_alloc_sem); | ||
1020 | if (status < 0) { | ||
1021 | mlog_errno(status); | ||
1022 | goto out_trans; | ||
1023 | } | ||
1024 | dbh = sb_getblk(sb, p_blkno); | ||
1025 | if (!dbh) { | ||
1026 | status = -ENOMEM; | ||
1027 | mlog_errno(status); | ||
1028 | goto out_trans; | ||
1029 | } | ||
1030 | ocfs2_set_new_buffer_uptodate(lqinode, dbh); | ||
1031 | status = ocfs2_journal_access_dq(handle, lqinode, dbh, | ||
1032 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
1033 | if (status < 0) { | ||
1034 | mlog_errno(status); | ||
1035 | goto out_trans; | ||
1036 | } | ||
1037 | lock_buffer(dbh); | ||
1038 | memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE); | ||
1039 | unlock_buffer(dbh); | ||
1040 | status = ocfs2_journal_dirty(handle, dbh); | ||
1041 | if (status < 0) { | ||
1042 | mlog_errno(status); | ||
1043 | goto out_trans; | ||
1044 | } | ||
1045 | |||
1046 | /* Update local quotafile info */ | ||
1010 | oinfo->dqi_blocks += 2; | 1047 | oinfo->dqi_blocks += 2; |
1011 | oinfo->dqi_chunks++; | 1048 | oinfo->dqi_chunks++; |
1012 | status = ocfs2_local_write_info(sb, type); | 1049 | status = ocfs2_local_write_info(sb, type); |
@@ -1031,6 +1068,7 @@ out_trans: | |||
1031 | ocfs2_commit_trans(OCFS2_SB(sb), handle); | 1068 | ocfs2_commit_trans(OCFS2_SB(sb), handle); |
1032 | out: | 1069 | out: |
1033 | brelse(bh); | 1070 | brelse(bh); |
1071 | brelse(dbh); | ||
1034 | kmem_cache_free(ocfs2_qf_chunk_cachep, chunk); | 1072 | kmem_cache_free(ocfs2_qf_chunk_cachep, chunk); |
1035 | return ERR_PTR(status); | 1073 | return ERR_PTR(status); |
1036 | } | 1074 | } |
@@ -1048,6 +1086,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( | |||
1048 | struct ocfs2_local_disk_chunk *dchunk; | 1086 | struct ocfs2_local_disk_chunk *dchunk; |
1049 | int epb = ol_quota_entries_per_block(sb); | 1087 | int epb = ol_quota_entries_per_block(sb); |
1050 | unsigned int chunk_blocks; | 1088 | unsigned int chunk_blocks; |
1089 | struct buffer_head *bh; | ||
1090 | u64 p_blkno; | ||
1051 | int status; | 1091 | int status; |
1052 | handle_t *handle; | 1092 | handle_t *handle; |
1053 | 1093 | ||
@@ -1075,12 +1115,49 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( | |||
1075 | mlog_errno(status); | 1115 | mlog_errno(status); |
1076 | goto out; | 1116 | goto out; |
1077 | } | 1117 | } |
1078 | handle = ocfs2_start_trans(OCFS2_SB(sb), 2); | 1118 | |
1119 | /* Get buffer from the just added block */ | ||
1120 | down_read(&OCFS2_I(lqinode)->ip_alloc_sem); | ||
1121 | status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, | ||
1122 | &p_blkno, NULL, NULL); | ||
1123 | up_read(&OCFS2_I(lqinode)->ip_alloc_sem); | ||
1124 | if (status < 0) { | ||
1125 | mlog_errno(status); | ||
1126 | goto out; | ||
1127 | } | ||
1128 | bh = sb_getblk(sb, p_blkno); | ||
1129 | if (!bh) { | ||
1130 | status = -ENOMEM; | ||
1131 | mlog_errno(status); | ||
1132 | goto out; | ||
1133 | } | ||
1134 | ocfs2_set_new_buffer_uptodate(lqinode, bh); | ||
1135 | |||
1136 | /* Local quota info, chunk header and the new block we initialize */ | ||
1137 | handle = ocfs2_start_trans(OCFS2_SB(sb), | ||
1138 | OCFS2_LOCAL_QINFO_WRITE_CREDITS + | ||
1139 | 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS); | ||
1079 | if (IS_ERR(handle)) { | 1140 | if (IS_ERR(handle)) { |
1080 | status = PTR_ERR(handle); | 1141 | status = PTR_ERR(handle); |
1081 | mlog_errno(status); | 1142 | mlog_errno(status); |
1082 | goto out; | 1143 | goto out; |
1083 | } | 1144 | } |
1145 | /* Zero created block */ | ||
1146 | status = ocfs2_journal_access_dq(handle, lqinode, bh, | ||
1147 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
1148 | if (status < 0) { | ||
1149 | mlog_errno(status); | ||
1150 | goto out_trans; | ||
1151 | } | ||
1152 | lock_buffer(bh); | ||
1153 | memset(bh->b_data, 0, sb->s_blocksize); | ||
1154 | unlock_buffer(bh); | ||
1155 | status = ocfs2_journal_dirty(handle, bh); | ||
1156 | if (status < 0) { | ||
1157 | mlog_errno(status); | ||
1158 | goto out_trans; | ||
1159 | } | ||
1160 | /* Update chunk header */ | ||
1084 | status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, | 1161 | status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, |
1085 | OCFS2_JOURNAL_ACCESS_WRITE); | 1162 | OCFS2_JOURNAL_ACCESS_WRITE); |
1086 | if (status < 0) { | 1163 | if (status < 0) { |
@@ -1097,6 +1174,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( | |||
1097 | mlog_errno(status); | 1174 | mlog_errno(status); |
1098 | goto out_trans; | 1175 | goto out_trans; |
1099 | } | 1176 | } |
1177 | /* Update file header */ | ||
1100 | oinfo->dqi_blocks++; | 1178 | oinfo->dqi_blocks++; |
1101 | status = ocfs2_local_write_info(sb, type); | 1179 | status = ocfs2_local_write_info(sb, type); |
1102 | if (status < 0) { | 1180 | if (status < 0) { |
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 3f661376a2de..e49c41050264 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c | |||
@@ -17,6 +17,7 @@ | |||
17 | * General Public License for more details. | 17 | * General Public License for more details. |
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/kernel.h> | ||
20 | #include <linux/crc32.h> | 21 | #include <linux/crc32.h> |
21 | #include <linux/module.h> | 22 | #include <linux/module.h> |
22 | 23 | ||
@@ -153,7 +154,7 @@ static int status_map[] = { | |||
153 | 154 | ||
154 | static int dlm_status_to_errno(enum dlm_status status) | 155 | static int dlm_status_to_errno(enum dlm_status status) |
155 | { | 156 | { |
156 | BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); | 157 | BUG_ON(status < 0 || status >= ARRAY_SIZE(status_map)); |
157 | 158 | ||
158 | return status_map[status]; | 159 | return status_map[status]; |
159 | } | 160 | } |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 7efb349fb9bd..a3f8871d21fd 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -777,6 +777,7 @@ static int ocfs2_sb_probe(struct super_block *sb, | |||
777 | } | 777 | } |
778 | di = (struct ocfs2_dinode *) (*bh)->b_data; | 778 | di = (struct ocfs2_dinode *) (*bh)->b_data; |
779 | memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); | 779 | memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); |
780 | spin_lock_init(&stats->b_lock); | ||
780 | status = ocfs2_verify_volume(di, *bh, blksize, stats); | 781 | status = ocfs2_verify_volume(di, *bh, blksize, stats); |
781 | if (status >= 0) | 782 | if (status >= 0) |
782 | goto bail; | 783 | goto bail; |
@@ -1182,7 +1183,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
1182 | wake_up(&osb->osb_mount_event); | 1183 | wake_up(&osb->osb_mount_event); |
1183 | 1184 | ||
1184 | /* Start this when the mount is almost sure of being successful */ | 1185 | /* Start this when the mount is almost sure of being successful */ |
1185 | ocfs2_orphan_scan_init(osb); | 1186 | ocfs2_orphan_scan_start(osb); |
1186 | 1187 | ||
1187 | mlog_exit(status); | 1188 | mlog_exit(status); |
1188 | return status; | 1189 | return status; |
@@ -1213,14 +1214,31 @@ static int ocfs2_get_sb(struct file_system_type *fs_type, | |||
1213 | mnt); | 1214 | mnt); |
1214 | } | 1215 | } |
1215 | 1216 | ||
1217 | static void ocfs2_kill_sb(struct super_block *sb) | ||
1218 | { | ||
1219 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
1220 | |||
1221 | /* Failed mount? */ | ||
1222 | if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED) | ||
1223 | goto out; | ||
1224 | |||
1225 | /* Prevent further queueing of inode drop events */ | ||
1226 | spin_lock(&dentry_list_lock); | ||
1227 | ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED); | ||
1228 | spin_unlock(&dentry_list_lock); | ||
1229 | /* Wait for work to finish and/or remove it */ | ||
1230 | cancel_work_sync(&osb->dentry_lock_work); | ||
1231 | out: | ||
1232 | kill_block_super(sb); | ||
1233 | } | ||
1234 | |||
1216 | static struct file_system_type ocfs2_fs_type = { | 1235 | static struct file_system_type ocfs2_fs_type = { |
1217 | .owner = THIS_MODULE, | 1236 | .owner = THIS_MODULE, |
1218 | .name = "ocfs2", | 1237 | .name = "ocfs2", |
1219 | .get_sb = ocfs2_get_sb, /* is this called when we mount | 1238 | .get_sb = ocfs2_get_sb, /* is this called when we mount |
1220 | * the fs? */ | 1239 | * the fs? */ |
1221 | .kill_sb = kill_block_super, /* set to the generic one | 1240 | .kill_sb = ocfs2_kill_sb, |
1222 | * right now, but do we | 1241 | |
1223 | * need to change that? */ | ||
1224 | .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, | 1242 | .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, |
1225 | .next = NULL | 1243 | .next = NULL |
1226 | }; | 1244 | }; |
@@ -1819,6 +1837,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1819 | 1837 | ||
1820 | debugfs_remove(osb->osb_ctxt); | 1838 | debugfs_remove(osb->osb_ctxt); |
1821 | 1839 | ||
1840 | /* | ||
1841 | * Flush inode dropping work queue so that deletes are | ||
1842 | * performed while the filesystem is still working | ||
1843 | */ | ||
1844 | ocfs2_drop_all_dl_inodes(osb); | ||
1845 | |||
1822 | /* Orphan scan should be stopped as early as possible */ | 1846 | /* Orphan scan should be stopped as early as possible */ |
1823 | ocfs2_orphan_scan_stop(osb); | 1847 | ocfs2_orphan_scan_stop(osb); |
1824 | 1848 | ||
@@ -1981,6 +2005,8 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1981 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", | 2005 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", |
1982 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 2006 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
1983 | 2007 | ||
2008 | ocfs2_orphan_scan_init(osb); | ||
2009 | |||
1984 | status = ocfs2_recovery_init(osb); | 2010 | status = ocfs2_recovery_init(osb); |
1985 | if (status) { | 2011 | if (status) { |
1986 | mlog(ML_ERROR, "Unable to initialize recovery state\n"); | 2012 | mlog(ML_ERROR, "Unable to initialize recovery state\n"); |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index ba320e250747..d1a27cda984f 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -1052,7 +1052,8 @@ static int ocfs2_xattr_block_get(struct inode *inode, | |||
1052 | struct ocfs2_xattr_block *xb; | 1052 | struct ocfs2_xattr_block *xb; |
1053 | struct ocfs2_xattr_value_root *xv; | 1053 | struct ocfs2_xattr_value_root *xv; |
1054 | size_t size; | 1054 | size_t size; |
1055 | int ret = -ENODATA, name_offset, name_len, block_off, i; | 1055 | int ret = -ENODATA, name_offset, name_len, i; |
1056 | int uninitialized_var(block_off); | ||
1056 | 1057 | ||
1057 | xs->bucket = ocfs2_xattr_bucket_new(inode); | 1058 | xs->bucket = ocfs2_xattr_bucket_new(inode); |
1058 | if (!xs->bucket) { | 1059 | if (!xs->bucket) { |
@@ -199,7 +199,7 @@ out: | |||
199 | int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, | 199 | int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, |
200 | struct file *filp) | 200 | struct file *filp) |
201 | { | 201 | { |
202 | int err; | 202 | int ret; |
203 | struct iattr newattrs; | 203 | struct iattr newattrs; |
204 | 204 | ||
205 | /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ | 205 | /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ |
@@ -214,12 +214,14 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, | |||
214 | } | 214 | } |
215 | 215 | ||
216 | /* Remove suid/sgid on truncate too */ | 216 | /* Remove suid/sgid on truncate too */ |
217 | newattrs.ia_valid |= should_remove_suid(dentry); | 217 | ret = should_remove_suid(dentry); |
218 | if (ret) | ||
219 | newattrs.ia_valid |= ret | ATTR_FORCE; | ||
218 | 220 | ||
219 | mutex_lock(&dentry->d_inode->i_mutex); | 221 | mutex_lock(&dentry->d_inode->i_mutex); |
220 | err = notify_change(dentry, &newattrs); | 222 | ret = notify_change(dentry, &newattrs); |
221 | mutex_unlock(&dentry->d_inode->i_mutex); | 223 | mutex_unlock(&dentry->d_inode->i_mutex); |
222 | return err; | 224 | return ret; |
223 | } | 225 | } |
224 | 226 | ||
225 | static long do_sys_truncate(const char __user *pathname, loff_t length) | 227 | static long do_sys_truncate(const char __user *pathname, loff_t length) |
@@ -957,6 +959,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, | |||
957 | int error; | 959 | int error; |
958 | struct file *f; | 960 | struct file *f; |
959 | 961 | ||
962 | validate_creds(cred); | ||
963 | |||
960 | /* | 964 | /* |
961 | * We must always pass in a valid mount pointer. Historically | 965 | * We must always pass in a valid mount pointer. Historically |
962 | * callers got away with not passing it, but we must enforce this at | 966 | * callers got away with not passing it, but we must enforce this at |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index ea4e6cb29e13..fbeaddf595d3 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -248,11 +248,19 @@ ssize_t part_stat_show(struct device *dev, | |||
248 | part_stat_read(p, merges[WRITE]), | 248 | part_stat_read(p, merges[WRITE]), |
249 | (unsigned long long)part_stat_read(p, sectors[WRITE]), | 249 | (unsigned long long)part_stat_read(p, sectors[WRITE]), |
250 | jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), | 250 | jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), |
251 | p->in_flight, | 251 | part_in_flight(p), |
252 | jiffies_to_msecs(part_stat_read(p, io_ticks)), | 252 | jiffies_to_msecs(part_stat_read(p, io_ticks)), |
253 | jiffies_to_msecs(part_stat_read(p, time_in_queue))); | 253 | jiffies_to_msecs(part_stat_read(p, time_in_queue))); |
254 | } | 254 | } |
255 | 255 | ||
256 | ssize_t part_inflight_show(struct device *dev, | ||
257 | struct device_attribute *attr, char *buf) | ||
258 | { | ||
259 | struct hd_struct *p = dev_to_part(dev); | ||
260 | |||
261 | return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]); | ||
262 | } | ||
263 | |||
256 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 264 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
257 | ssize_t part_fail_show(struct device *dev, | 265 | ssize_t part_fail_show(struct device *dev, |
258 | struct device_attribute *attr, char *buf) | 266 | struct device_attribute *attr, char *buf) |
@@ -281,6 +289,7 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); | |||
281 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); | 289 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); |
282 | static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); | 290 | static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); |
283 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); | 291 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); |
292 | static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); | ||
284 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 293 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
285 | static struct device_attribute dev_attr_fail = | 294 | static struct device_attribute dev_attr_fail = |
286 | __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); | 295 | __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); |
@@ -292,6 +301,7 @@ static struct attribute *part_attrs[] = { | |||
292 | &dev_attr_size.attr, | 301 | &dev_attr_size.attr, |
293 | &dev_attr_alignment_offset.attr, | 302 | &dev_attr_alignment_offset.attr, |
294 | &dev_attr_stat.attr, | 303 | &dev_attr_stat.attr, |
304 | &dev_attr_inflight.attr, | ||
295 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 305 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
296 | &dev_attr_fail.attr, | 306 | &dev_attr_fail.attr, |
297 | #endif | 307 | #endif |
@@ -302,7 +312,7 @@ static struct attribute_group part_attr_group = { | |||
302 | .attrs = part_attrs, | 312 | .attrs = part_attrs, |
303 | }; | 313 | }; |
304 | 314 | ||
305 | static struct attribute_group *part_attr_groups[] = { | 315 | static const struct attribute_group *part_attr_groups[] = { |
306 | &part_attr_group, | 316 | &part_attr_group, |
307 | #ifdef CONFIG_BLK_DEV_IO_TRACE | 317 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
308 | &blk_trace_attr_group, | 318 | &blk_trace_attr_group, |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 3ce5ae9e3d2d..6f742f6658a9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -234,23 +234,20 @@ static int check_mem_permission(struct task_struct *task) | |||
234 | 234 | ||
235 | struct mm_struct *mm_for_maps(struct task_struct *task) | 235 | struct mm_struct *mm_for_maps(struct task_struct *task) |
236 | { | 236 | { |
237 | struct mm_struct *mm = get_task_mm(task); | 237 | struct mm_struct *mm; |
238 | if (!mm) | 238 | |
239 | if (mutex_lock_killable(&task->cred_guard_mutex)) | ||
239 | return NULL; | 240 | return NULL; |
240 | down_read(&mm->mmap_sem); | 241 | |
241 | task_lock(task); | 242 | mm = get_task_mm(task); |
242 | if (task->mm != mm) | 243 | if (mm && mm != current->mm && |
243 | goto out; | 244 | !ptrace_may_access(task, PTRACE_MODE_READ)) { |
244 | if (task->mm != current->mm && | 245 | mmput(mm); |
245 | __ptrace_may_access(task, PTRACE_MODE_READ) < 0) | 246 | mm = NULL; |
246 | goto out; | 247 | } |
247 | task_unlock(task); | 248 | mutex_unlock(&task->cred_guard_mutex); |
249 | |||
248 | return mm; | 250 | return mm; |
249 | out: | ||
250 | task_unlock(task); | ||
251 | up_read(&mm->mmap_sem); | ||
252 | mmput(mm); | ||
253 | return NULL; | ||
254 | } | 251 | } |
255 | 252 | ||
256 | static int proc_pid_cmdline(struct task_struct *task, char * buffer) | 253 | static int proc_pid_cmdline(struct task_struct *task, char * buffer) |
@@ -1006,12 +1003,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, | |||
1006 | 1003 | ||
1007 | if (!task) | 1004 | if (!task) |
1008 | return -ESRCH; | 1005 | return -ESRCH; |
1009 | task_lock(task); | 1006 | oom_adjust = task->oomkilladj; |
1010 | if (task->mm) | ||
1011 | oom_adjust = task->mm->oom_adj; | ||
1012 | else | ||
1013 | oom_adjust = OOM_DISABLE; | ||
1014 | task_unlock(task); | ||
1015 | put_task_struct(task); | 1007 | put_task_struct(task); |
1016 | 1008 | ||
1017 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); | 1009 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); |
@@ -1040,19 +1032,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
1040 | task = get_proc_task(file->f_path.dentry->d_inode); | 1032 | task = get_proc_task(file->f_path.dentry->d_inode); |
1041 | if (!task) | 1033 | if (!task) |
1042 | return -ESRCH; | 1034 | return -ESRCH; |
1043 | task_lock(task); | 1035 | if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { |
1044 | if (!task->mm) { | ||
1045 | task_unlock(task); | ||
1046 | put_task_struct(task); | ||
1047 | return -EINVAL; | ||
1048 | } | ||
1049 | if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) { | ||
1050 | task_unlock(task); | ||
1051 | put_task_struct(task); | 1036 | put_task_struct(task); |
1052 | return -EACCES; | 1037 | return -EACCES; |
1053 | } | 1038 | } |
1054 | task->mm->oom_adj = oom_adjust; | 1039 | task->oomkilladj = oom_adjust; |
1055 | task_unlock(task); | ||
1056 | put_task_struct(task); | 1040 | put_task_struct(task); |
1057 | if (end - buffer == 0) | 1041 | if (end - buffer == 0) |
1058 | return -EIO; | 1042 | return -EIO; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6f61b7cc32e0..9bd8be1d235c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -119,6 +119,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) | |||
119 | mm = mm_for_maps(priv->task); | 119 | mm = mm_for_maps(priv->task); |
120 | if (!mm) | 120 | if (!mm) |
121 | return NULL; | 121 | return NULL; |
122 | down_read(&mm->mmap_sem); | ||
122 | 123 | ||
123 | tail_vma = get_gate_vma(priv->task); | 124 | tail_vma = get_gate_vma(priv->task); |
124 | priv->tail_vma = tail_vma; | 125 | priv->tail_vma = tail_vma; |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 64a72e2e7650..8f5c05d3dbd3 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -189,6 +189,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) | |||
189 | priv->task = NULL; | 189 | priv->task = NULL; |
190 | return NULL; | 190 | return NULL; |
191 | } | 191 | } |
192 | down_read(&mm->mmap_sem); | ||
192 | 193 | ||
193 | /* start from the Nth VMA */ | 194 | /* start from the Nth VMA */ |
194 | for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) | 195 | for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 70f36c043d62..38f7bd559f35 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -2043,7 +2043,6 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, | |||
2043 | invalidate_bdev(sb->s_bdev); | 2043 | invalidate_bdev(sb->s_bdev); |
2044 | } | 2044 | } |
2045 | mutex_lock(&dqopt->dqonoff_mutex); | 2045 | mutex_lock(&dqopt->dqonoff_mutex); |
2046 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); | ||
2047 | if (sb_has_quota_loaded(sb, type)) { | 2046 | if (sb_has_quota_loaded(sb, type)) { |
2048 | error = -EBUSY; | 2047 | error = -EBUSY; |
2049 | goto out_lock; | 2048 | goto out_lock; |
@@ -2054,9 +2053,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, | |||
2054 | * possible) Also nobody should write to the file - we use | 2053 | * possible) Also nobody should write to the file - we use |
2055 | * special IO operations which ignore the immutable bit. */ | 2054 | * special IO operations which ignore the immutable bit. */ |
2056 | down_write(&dqopt->dqptr_sem); | 2055 | down_write(&dqopt->dqptr_sem); |
2056 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); | ||
2057 | oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | | 2057 | oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | |
2058 | S_NOQUOTA); | 2058 | S_NOQUOTA); |
2059 | inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; | 2059 | inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; |
2060 | mutex_unlock(&inode->i_mutex); | ||
2060 | up_write(&dqopt->dqptr_sem); | 2061 | up_write(&dqopt->dqptr_sem); |
2061 | sb->dq_op->drop(inode); | 2062 | sb->dq_op->drop(inode); |
2062 | } | 2063 | } |
@@ -2080,7 +2081,6 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, | |||
2080 | goto out_file_init; | 2081 | goto out_file_init; |
2081 | } | 2082 | } |
2082 | mutex_unlock(&dqopt->dqio_mutex); | 2083 | mutex_unlock(&dqopt->dqio_mutex); |
2083 | mutex_unlock(&inode->i_mutex); | ||
2084 | spin_lock(&dq_state_lock); | 2084 | spin_lock(&dq_state_lock); |
2085 | dqopt->flags |= dquot_state_flag(flags, type); | 2085 | dqopt->flags |= dquot_state_flag(flags, type); |
2086 | spin_unlock(&dq_state_lock); | 2086 | spin_unlock(&dq_state_lock); |
@@ -2096,13 +2096,14 @@ out_file_init: | |||
2096 | out_lock: | 2096 | out_lock: |
2097 | if (oldflags != -1) { | 2097 | if (oldflags != -1) { |
2098 | down_write(&dqopt->dqptr_sem); | 2098 | down_write(&dqopt->dqptr_sem); |
2099 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); | ||
2099 | /* Set the flags back (in the case of accidental quotaon() | 2100 | /* Set the flags back (in the case of accidental quotaon() |
2100 | * on a wrong file we don't want to mess up the flags) */ | 2101 | * on a wrong file we don't want to mess up the flags) */ |
2101 | inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE); | 2102 | inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE); |
2102 | inode->i_flags |= oldflags; | 2103 | inode->i_flags |= oldflags; |
2104 | mutex_unlock(&inode->i_mutex); | ||
2103 | up_write(&dqopt->dqptr_sem); | 2105 | up_write(&dqopt->dqptr_sem); |
2104 | } | 2106 | } |
2105 | mutex_unlock(&inode->i_mutex); | ||
2106 | mutex_unlock(&dqopt->dqonoff_mutex); | 2107 | mutex_unlock(&dqopt->dqonoff_mutex); |
2107 | out_fmt: | 2108 | out_fmt: |
2108 | put_quota_format(fmt); | 2109 | put_quota_format(fmt); |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index ebb2c417912c..11f0c06316de 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/ramfs.h> | 20 | #include <linux/ramfs.h> |
21 | #include <linux/pagevec.h> | 21 | #include <linux/pagevec.h> |
22 | #include <linux/mman.h> | 22 | #include <linux/mman.h> |
23 | #include <linux/sched.h> | ||
23 | 24 | ||
24 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
25 | #include "internal.h" | 26 | #include "internal.h" |
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 0ff7566c767c..a7f0110fca4c 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
@@ -46,6 +46,7 @@ static const struct super_operations ramfs_ops; | |||
46 | static const struct inode_operations ramfs_dir_inode_operations; | 46 | static const struct inode_operations ramfs_dir_inode_operations; |
47 | 47 | ||
48 | static struct backing_dev_info ramfs_backing_dev_info = { | 48 | static struct backing_dev_info ramfs_backing_dev_info = { |
49 | .name = "ramfs", | ||
49 | .ra_pages = 0, /* No readahead */ | 50 | .ra_pages = 0, /* No readahead */ |
50 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | | 51 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | |
51 | BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | | 52 | BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | |
diff --git a/fs/select.c b/fs/select.c index d870237e42c7..8084834e123e 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -110,6 +110,7 @@ void poll_initwait(struct poll_wqueues *pwq) | |||
110 | { | 110 | { |
111 | init_poll_funcptr(&pwq->pt, __pollwait); | 111 | init_poll_funcptr(&pwq->pt, __pollwait); |
112 | pwq->polling_task = current; | 112 | pwq->polling_task = current; |
113 | pwq->triggered = 0; | ||
113 | pwq->error = 0; | 114 | pwq->error = 0; |
114 | pwq->table = NULL; | 115 | pwq->table = NULL; |
115 | pwq->inline_index = 0; | 116 | pwq->inline_index = 0; |
diff --git a/fs/splice.c b/fs/splice.c index 73766d24f97b..7394e9e17534 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -502,8 +502,10 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, | |||
502 | len = left; | 502 | len = left; |
503 | 503 | ||
504 | ret = __generic_file_splice_read(in, ppos, pipe, len, flags); | 504 | ret = __generic_file_splice_read(in, ppos, pipe, len, flags); |
505 | if (ret > 0) | 505 | if (ret > 0) { |
506 | *ppos += ret; | 506 | *ppos += ret; |
507 | file_accessed(in); | ||
508 | } | ||
507 | 509 | ||
508 | return ret; | 510 | return ret; |
509 | } | 511 | } |
@@ -963,8 +965,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
963 | 965 | ||
964 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); | 966 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); |
965 | ret = file_remove_suid(out); | 967 | ret = file_remove_suid(out); |
966 | if (!ret) | 968 | if (!ret) { |
969 | file_update_time(out); | ||
967 | ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); | 970 | ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); |
971 | } | ||
968 | mutex_unlock(&inode->i_mutex); | 972 | mutex_unlock(&inode->i_mutex); |
969 | } while (ret > 0); | 973 | } while (ret > 0); |
970 | splice_from_pipe_end(pipe, &sd); | 974 | splice_from_pipe_end(pipe, &sd); |
@@ -976,25 +980,15 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
976 | 980 | ||
977 | if (ret > 0) { | 981 | if (ret > 0) { |
978 | unsigned long nr_pages; | 982 | unsigned long nr_pages; |
983 | int err; | ||
979 | 984 | ||
980 | *ppos += ret; | ||
981 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 985 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
982 | 986 | ||
983 | /* | 987 | err = generic_write_sync(out, *ppos, ret); |
984 | * If file or inode is SYNC and we actually wrote some data, | 988 | if (err) |
985 | * sync it. | 989 | ret = err; |
986 | */ | 990 | else |
987 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { | 991 | *ppos += ret; |
988 | int err; | ||
989 | |||
990 | mutex_lock(&inode->i_mutex); | ||
991 | err = generic_osync_inode(inode, mapping, | ||
992 | OSYNC_METADATA|OSYNC_DATA); | ||
993 | mutex_unlock(&inode->i_mutex); | ||
994 | |||
995 | if (err) | ||
996 | ret = err; | ||
997 | } | ||
998 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); | 992 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); |
999 | } | 993 | } |
1000 | 994 | ||
diff --git a/fs/super.c b/fs/super.c index 2761d3e22ed9..b03fea8fbfb6 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -62,9 +62,6 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
62 | s = NULL; | 62 | s = NULL; |
63 | goto out; | 63 | goto out; |
64 | } | 64 | } |
65 | INIT_LIST_HEAD(&s->s_dirty); | ||
66 | INIT_LIST_HEAD(&s->s_io); | ||
67 | INIT_LIST_HEAD(&s->s_more_io); | ||
68 | INIT_LIST_HEAD(&s->s_files); | 65 | INIT_LIST_HEAD(&s->s_files); |
69 | INIT_LIST_HEAD(&s->s_instances); | 66 | INIT_LIST_HEAD(&s->s_instances); |
70 | INIT_HLIST_HEAD(&s->s_anon); | 67 | INIT_HLIST_HEAD(&s->s_anon); |
@@ -171,7 +168,7 @@ int __put_super_and_need_restart(struct super_block *sb) | |||
171 | * Drops a temporary reference, frees superblock if there's no | 168 | * Drops a temporary reference, frees superblock if there's no |
172 | * references left. | 169 | * references left. |
173 | */ | 170 | */ |
174 | static void put_super(struct super_block *sb) | 171 | void put_super(struct super_block *sb) |
175 | { | 172 | { |
176 | spin_lock(&sb_lock); | 173 | spin_lock(&sb_lock); |
177 | __put_super(sb); | 174 | __put_super(sb); |
@@ -710,6 +707,12 @@ static int set_bdev_super(struct super_block *s, void *data) | |||
710 | { | 707 | { |
711 | s->s_bdev = data; | 708 | s->s_bdev = data; |
712 | s->s_dev = s->s_bdev->bd_dev; | 709 | s->s_dev = s->s_bdev->bd_dev; |
710 | |||
711 | /* | ||
712 | * We set the bdi here to the queue backing, file systems can | ||
713 | * overwrite this in ->fill_super() | ||
714 | */ | ||
715 | s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; | ||
713 | return 0; | 716 | return 0; |
714 | } | 717 | } |
715 | 718 | ||
@@ -19,20 +19,29 @@ | |||
19 | SYNC_FILE_RANGE_WAIT_AFTER) | 19 | SYNC_FILE_RANGE_WAIT_AFTER) |
20 | 20 | ||
21 | /* | 21 | /* |
22 | * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) | 22 | * Do the filesystem syncing work. For simple filesystems |
23 | * just dirties buffers with inodes so we have to submit IO for these buffers | 23 | * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to |
24 | * via __sync_blockdev(). This also speeds up the wait == 1 case since in that | 24 | * submit IO for these buffers via __sync_blockdev(). This also speeds up the |
25 | * case write_inode() functions do sync_dirty_buffer() and thus effectively | 25 | * wait == 1 case since in that case write_inode() functions do |
26 | * write one block at a time. | 26 | * sync_dirty_buffer() and thus effectively write one block at a time. |
27 | */ | 27 | */ |
28 | static int __sync_filesystem(struct super_block *sb, int wait) | 28 | static int __sync_filesystem(struct super_block *sb, int wait) |
29 | { | 29 | { |
30 | /* | ||
31 | * This should be safe, as we require bdi backing to actually | ||
32 | * write out data in the first place | ||
33 | */ | ||
34 | if (!sb->s_bdi) | ||
35 | return 0; | ||
36 | |||
30 | /* Avoid doing twice syncing and cache pruning for quota sync */ | 37 | /* Avoid doing twice syncing and cache pruning for quota sync */ |
31 | if (!wait) | 38 | if (!wait) { |
32 | writeout_quota_sb(sb, -1); | 39 | writeout_quota_sb(sb, -1); |
33 | else | 40 | writeback_inodes_sb(sb); |
41 | } else { | ||
34 | sync_quota_sb(sb, -1); | 42 | sync_quota_sb(sb, -1); |
35 | sync_inodes_sb(sb, wait); | 43 | sync_inodes_sb(sb); |
44 | } | ||
36 | if (sb->s_op->sync_fs) | 45 | if (sb->s_op->sync_fs) |
37 | sb->s_op->sync_fs(sb, wait); | 46 | sb->s_op->sync_fs(sb, wait); |
38 | return __sync_blockdev(sb->s_bdev, wait); | 47 | return __sync_blockdev(sb->s_bdev, wait); |
@@ -99,7 +108,7 @@ restart: | |||
99 | spin_unlock(&sb_lock); | 108 | spin_unlock(&sb_lock); |
100 | 109 | ||
101 | down_read(&sb->s_umount); | 110 | down_read(&sb->s_umount); |
102 | if (!(sb->s_flags & MS_RDONLY) && sb->s_root) | 111 | if (!(sb->s_flags & MS_RDONLY) && sb->s_root && sb->s_bdi) |
103 | __sync_filesystem(sb, wait); | 112 | __sync_filesystem(sb, wait); |
104 | up_read(&sb->s_umount); | 113 | up_read(&sb->s_umount); |
105 | 114 | ||
@@ -118,7 +127,7 @@ restart: | |||
118 | */ | 127 | */ |
119 | SYSCALL_DEFINE0(sync) | 128 | SYSCALL_DEFINE0(sync) |
120 | { | 129 | { |
121 | wakeup_pdflush(0); | 130 | wakeup_flusher_threads(0); |
122 | sync_filesystems(0); | 131 | sync_filesystems(0); |
123 | sync_filesystems(1); | 132 | sync_filesystems(1); |
124 | if (unlikely(laptop_mode)) | 133 | if (unlikely(laptop_mode)) |
@@ -176,19 +185,23 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) | |||
176 | } | 185 | } |
177 | 186 | ||
178 | /** | 187 | /** |
179 | * vfs_fsync - perform a fsync or fdatasync on a file | 188 | * vfs_fsync_range - helper to sync a range of data & metadata to disk |
180 | * @file: file to sync | 189 | * @file: file to sync |
181 | * @dentry: dentry of @file | 190 | * @dentry: dentry of @file |
182 | * @data: only perform a fdatasync operation | 191 | * @start: offset in bytes of the beginning of data range to sync |
192 | * @end: offset in bytes of the end of data range (inclusive) | ||
193 | * @datasync: perform only datasync | ||
183 | * | 194 | * |
184 | * Write back data and metadata for @file to disk. If @datasync is | 195 | * Write back data in range @start..@end and metadata for @file to disk. If |
185 | * set only metadata needed to access modified file data is written. | 196 | * @datasync is set only metadata needed to access modified file data is |
197 | * written. | ||
186 | * | 198 | * |
187 | * In case this function is called from nfsd @file may be %NULL and | 199 | * In case this function is called from nfsd @file may be %NULL and |
188 | * only @dentry is set. This can only happen when the filesystem | 200 | * only @dentry is set. This can only happen when the filesystem |
189 | * implements the export_operations API. | 201 | * implements the export_operations API. |
190 | */ | 202 | */ |
191 | int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 203 | int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start, |
204 | loff_t end, int datasync) | ||
192 | { | 205 | { |
193 | const struct file_operations *fop; | 206 | const struct file_operations *fop; |
194 | struct address_space *mapping; | 207 | struct address_space *mapping; |
@@ -212,7 +225,7 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) | |||
212 | goto out; | 225 | goto out; |
213 | } | 226 | } |
214 | 227 | ||
215 | ret = filemap_fdatawrite(mapping); | 228 | ret = filemap_write_and_wait_range(mapping, start, end); |
216 | 229 | ||
217 | /* | 230 | /* |
218 | * We need to protect against concurrent writers, which could cause | 231 | * We need to protect against concurrent writers, which could cause |
@@ -223,12 +236,29 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) | |||
223 | if (!ret) | 236 | if (!ret) |
224 | ret = err; | 237 | ret = err; |
225 | mutex_unlock(&mapping->host->i_mutex); | 238 | mutex_unlock(&mapping->host->i_mutex); |
226 | err = filemap_fdatawait(mapping); | 239 | |
227 | if (!ret) | ||
228 | ret = err; | ||
229 | out: | 240 | out: |
230 | return ret; | 241 | return ret; |
231 | } | 242 | } |
243 | EXPORT_SYMBOL(vfs_fsync_range); | ||
244 | |||
245 | /** | ||
246 | * vfs_fsync - perform a fsync or fdatasync on a file | ||
247 | * @file: file to sync | ||
248 | * @dentry: dentry of @file | ||
249 | * @datasync: only perform a fdatasync operation | ||
250 | * | ||
251 | * Write back data and metadata for @file to disk. If @datasync is | ||
252 | * set only metadata needed to access modified file data is written. | ||
253 | * | ||
254 | * In case this function is called from nfsd @file may be %NULL and | ||
255 | * only @dentry is set. This can only happen when the filesystem | ||
256 | * implements the export_operations API. | ||
257 | */ | ||
258 | int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) | ||
259 | { | ||
260 | return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync); | ||
261 | } | ||
232 | EXPORT_SYMBOL(vfs_fsync); | 262 | EXPORT_SYMBOL(vfs_fsync); |
233 | 263 | ||
234 | static int do_fsync(unsigned int fd, int datasync) | 264 | static int do_fsync(unsigned int fd, int datasync) |
@@ -254,6 +284,23 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) | |||
254 | return do_fsync(fd, 1); | 284 | return do_fsync(fd, 1); |
255 | } | 285 | } |
256 | 286 | ||
287 | /** | ||
288 | * generic_write_sync - perform syncing after a write if file / inode is sync | ||
289 | * @file: file to which the write happened | ||
290 | * @pos: offset where the write started | ||
291 | * @count: length of the write | ||
292 | * | ||
293 | * This is just a simple wrapper about our general syncing function. | ||
294 | */ | ||
295 | int generic_write_sync(struct file *file, loff_t pos, loff_t count) | ||
296 | { | ||
297 | if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host)) | ||
298 | return 0; | ||
299 | return vfs_fsync_range(file, file->f_path.dentry, pos, | ||
300 | pos + count - 1, 1); | ||
301 | } | ||
302 | EXPORT_SYMBOL(generic_write_sync); | ||
303 | |||
257 | /* | 304 | /* |
258 | * sys_sync_file_range() permits finely controlled syncing over a segment of | 305 | * sys_sync_file_range() permits finely controlled syncing over a segment of |
259 | * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is | 306 | * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is |
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index d88d0fac9fa5..0050fc40e8c9 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
@@ -760,6 +760,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, | |||
760 | const struct inode_operations sysfs_dir_inode_operations = { | 760 | const struct inode_operations sysfs_dir_inode_operations = { |
761 | .lookup = sysfs_lookup, | 761 | .lookup = sysfs_lookup, |
762 | .setattr = sysfs_setattr, | 762 | .setattr = sysfs_setattr, |
763 | .setxattr = sysfs_setxattr, | ||
763 | }; | 764 | }; |
764 | 765 | ||
765 | static void remove_dir(struct sysfs_dirent *sd) | 766 | static void remove_dir(struct sysfs_dirent *sd) |
@@ -939,8 +940,10 @@ again: | |||
939 | /* Remove from old parent's list and insert into new parent's list. */ | 940 | /* Remove from old parent's list and insert into new parent's list. */ |
940 | sysfs_unlink_sibling(sd); | 941 | sysfs_unlink_sibling(sd); |
941 | sysfs_get(new_parent_sd); | 942 | sysfs_get(new_parent_sd); |
943 | drop_nlink(old_parent->d_inode); | ||
942 | sysfs_put(sd->s_parent); | 944 | sysfs_put(sd->s_parent); |
943 | sd->s_parent = new_parent_sd; | 945 | sd->s_parent = new_parent_sd; |
946 | inc_nlink(new_parent->d_inode); | ||
944 | sysfs_link_sibling(sd); | 947 | sysfs_link_sibling(sd); |
945 | 948 | ||
946 | out_unlock: | 949 | out_unlock: |
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 555f0ff988df..e28cecf179f5 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <linux/capability.h> | 18 | #include <linux/capability.h> |
19 | #include <linux/errno.h> | 19 | #include <linux/errno.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/xattr.h> | ||
22 | #include <linux/security.h> | ||
21 | #include "sysfs.h" | 23 | #include "sysfs.h" |
22 | 24 | ||
23 | extern struct super_block * sysfs_sb; | 25 | extern struct super_block * sysfs_sb; |
@@ -29,12 +31,14 @@ static const struct address_space_operations sysfs_aops = { | |||
29 | }; | 31 | }; |
30 | 32 | ||
31 | static struct backing_dev_info sysfs_backing_dev_info = { | 33 | static struct backing_dev_info sysfs_backing_dev_info = { |
34 | .name = "sysfs", | ||
32 | .ra_pages = 0, /* No readahead */ | 35 | .ra_pages = 0, /* No readahead */ |
33 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 36 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
34 | }; | 37 | }; |
35 | 38 | ||
36 | static const struct inode_operations sysfs_inode_operations ={ | 39 | static const struct inode_operations sysfs_inode_operations ={ |
37 | .setattr = sysfs_setattr, | 40 | .setattr = sysfs_setattr, |
41 | .setxattr = sysfs_setxattr, | ||
38 | }; | 42 | }; |
39 | 43 | ||
40 | int __init sysfs_inode_init(void) | 44 | int __init sysfs_inode_init(void) |
@@ -42,18 +46,37 @@ int __init sysfs_inode_init(void) | |||
42 | return bdi_init(&sysfs_backing_dev_info); | 46 | return bdi_init(&sysfs_backing_dev_info); |
43 | } | 47 | } |
44 | 48 | ||
49 | struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd) | ||
50 | { | ||
51 | struct sysfs_inode_attrs *attrs; | ||
52 | struct iattr *iattrs; | ||
53 | |||
54 | attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL); | ||
55 | if (!attrs) | ||
56 | return NULL; | ||
57 | iattrs = &attrs->ia_iattr; | ||
58 | |||
59 | /* assign default attributes */ | ||
60 | iattrs->ia_mode = sd->s_mode; | ||
61 | iattrs->ia_uid = 0; | ||
62 | iattrs->ia_gid = 0; | ||
63 | iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME; | ||
64 | |||
65 | return attrs; | ||
66 | } | ||
45 | int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) | 67 | int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) |
46 | { | 68 | { |
47 | struct inode * inode = dentry->d_inode; | 69 | struct inode * inode = dentry->d_inode; |
48 | struct sysfs_dirent * sd = dentry->d_fsdata; | 70 | struct sysfs_dirent * sd = dentry->d_fsdata; |
49 | struct iattr * sd_iattr; | 71 | struct sysfs_inode_attrs *sd_attrs; |
72 | struct iattr *iattrs; | ||
50 | unsigned int ia_valid = iattr->ia_valid; | 73 | unsigned int ia_valid = iattr->ia_valid; |
51 | int error; | 74 | int error; |
52 | 75 | ||
53 | if (!sd) | 76 | if (!sd) |
54 | return -EINVAL; | 77 | return -EINVAL; |
55 | 78 | ||
56 | sd_iattr = sd->s_iattr; | 79 | sd_attrs = sd->s_iattr; |
57 | 80 | ||
58 | error = inode_change_ok(inode, iattr); | 81 | error = inode_change_ok(inode, iattr); |
59 | if (error) | 82 | if (error) |
@@ -65,42 +88,77 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) | |||
65 | if (error) | 88 | if (error) |
66 | return error; | 89 | return error; |
67 | 90 | ||
68 | if (!sd_iattr) { | 91 | if (!sd_attrs) { |
69 | /* setting attributes for the first time, allocate now */ | 92 | /* setting attributes for the first time, allocate now */ |
70 | sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL); | 93 | sd_attrs = sysfs_init_inode_attrs(sd); |
71 | if (!sd_iattr) | 94 | if (!sd_attrs) |
72 | return -ENOMEM; | 95 | return -ENOMEM; |
73 | /* assign default attributes */ | 96 | sd->s_iattr = sd_attrs; |
74 | sd_iattr->ia_mode = sd->s_mode; | 97 | } else { |
75 | sd_iattr->ia_uid = 0; | 98 | /* attributes were changed at least once in past */ |
76 | sd_iattr->ia_gid = 0; | 99 | iattrs = &sd_attrs->ia_iattr; |
77 | sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; | 100 | |
78 | sd->s_iattr = sd_iattr; | 101 | if (ia_valid & ATTR_UID) |
102 | iattrs->ia_uid = iattr->ia_uid; | ||
103 | if (ia_valid & ATTR_GID) | ||
104 | iattrs->ia_gid = iattr->ia_gid; | ||
105 | if (ia_valid & ATTR_ATIME) | ||
106 | iattrs->ia_atime = timespec_trunc(iattr->ia_atime, | ||
107 | inode->i_sb->s_time_gran); | ||
108 | if (ia_valid & ATTR_MTIME) | ||
109 | iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime, | ||
110 | inode->i_sb->s_time_gran); | ||
111 | if (ia_valid & ATTR_CTIME) | ||
112 | iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime, | ||
113 | inode->i_sb->s_time_gran); | ||
114 | if (ia_valid & ATTR_MODE) { | ||
115 | umode_t mode = iattr->ia_mode; | ||
116 | |||
117 | if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) | ||
118 | mode &= ~S_ISGID; | ||
119 | iattrs->ia_mode = sd->s_mode = mode; | ||
120 | } | ||
79 | } | 121 | } |
122 | return error; | ||
123 | } | ||
80 | 124 | ||
81 | /* attributes were changed atleast once in past */ | 125 | int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
82 | 126 | size_t size, int flags) | |
83 | if (ia_valid & ATTR_UID) | 127 | { |
84 | sd_iattr->ia_uid = iattr->ia_uid; | 128 | struct sysfs_dirent *sd = dentry->d_fsdata; |
85 | if (ia_valid & ATTR_GID) | 129 | struct sysfs_inode_attrs *iattrs; |
86 | sd_iattr->ia_gid = iattr->ia_gid; | 130 | void *secdata; |
87 | if (ia_valid & ATTR_ATIME) | 131 | int error; |
88 | sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime, | 132 | u32 secdata_len = 0; |
89 | inode->i_sb->s_time_gran); | 133 | |
90 | if (ia_valid & ATTR_MTIME) | 134 | if (!sd) |
91 | sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime, | 135 | return -EINVAL; |
92 | inode->i_sb->s_time_gran); | 136 | if (!sd->s_iattr) |
93 | if (ia_valid & ATTR_CTIME) | 137 | sd->s_iattr = sysfs_init_inode_attrs(sd); |
94 | sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime, | 138 | if (!sd->s_iattr) |
95 | inode->i_sb->s_time_gran); | 139 | return -ENOMEM; |
96 | if (ia_valid & ATTR_MODE) { | 140 | |
97 | umode_t mode = iattr->ia_mode; | 141 | iattrs = sd->s_iattr; |
98 | 142 | ||
99 | if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) | 143 | if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { |
100 | mode &= ~S_ISGID; | 144 | const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; |
101 | sd_iattr->ia_mode = sd->s_mode = mode; | 145 | error = security_inode_setsecurity(dentry->d_inode, suffix, |
102 | } | 146 | value, size, flags); |
147 | if (error) | ||
148 | goto out; | ||
149 | error = security_inode_getsecctx(dentry->d_inode, | ||
150 | &secdata, &secdata_len); | ||
151 | if (error) | ||
152 | goto out; | ||
153 | if (iattrs->ia_secdata) | ||
154 | security_release_secctx(iattrs->ia_secdata, | ||
155 | iattrs->ia_secdata_len); | ||
156 | iattrs->ia_secdata = secdata; | ||
157 | iattrs->ia_secdata_len = secdata_len; | ||
103 | 158 | ||
159 | } else | ||
160 | return -EINVAL; | ||
161 | out: | ||
104 | return error; | 162 | return error; |
105 | } | 163 | } |
106 | 164 | ||
@@ -146,6 +204,7 @@ static int sysfs_count_nlink(struct sysfs_dirent *sd) | |||
146 | static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) | 204 | static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) |
147 | { | 205 | { |
148 | struct bin_attribute *bin_attr; | 206 | struct bin_attribute *bin_attr; |
207 | struct sysfs_inode_attrs *iattrs; | ||
149 | 208 | ||
150 | inode->i_private = sysfs_get(sd); | 209 | inode->i_private = sysfs_get(sd); |
151 | inode->i_mapping->a_ops = &sysfs_aops; | 210 | inode->i_mapping->a_ops = &sysfs_aops; |
@@ -154,16 +213,20 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) | |||
154 | inode->i_ino = sd->s_ino; | 213 | inode->i_ino = sd->s_ino; |
155 | lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); | 214 | lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); |
156 | 215 | ||
157 | if (sd->s_iattr) { | 216 | iattrs = sd->s_iattr; |
217 | if (iattrs) { | ||
158 | /* sysfs_dirent has non-default attributes | 218 | /* sysfs_dirent has non-default attributes |
159 | * get them for the new inode from persistent copy | 219 | * get them for the new inode from persistent copy |
160 | * in sysfs_dirent | 220 | * in sysfs_dirent |
161 | */ | 221 | */ |
162 | set_inode_attr(inode, sd->s_iattr); | 222 | set_inode_attr(inode, &iattrs->ia_iattr); |
223 | if (iattrs->ia_secdata) | ||
224 | security_inode_notifysecctx(inode, | ||
225 | iattrs->ia_secdata, | ||
226 | iattrs->ia_secdata_len); | ||
163 | } else | 227 | } else |
164 | set_default_inode_attr(inode, sd->s_mode); | 228 | set_default_inode_attr(inode, sd->s_mode); |
165 | 229 | ||
166 | |||
167 | /* initialize inode according to type */ | 230 | /* initialize inode according to type */ |
168 | switch (sysfs_type(sd)) { | 231 | switch (sysfs_type(sd)) { |
169 | case SYSFS_DIR: | 232 | case SYSFS_DIR: |
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 1d897ad808e0..c5081ad77026 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/kobject.h> | 16 | #include <linux/kobject.h> |
17 | #include <linux/namei.h> | 17 | #include <linux/namei.h> |
18 | #include <linux/mutex.h> | 18 | #include <linux/mutex.h> |
19 | #include <linux/security.h> | ||
19 | 20 | ||
20 | #include "sysfs.h" | 21 | #include "sysfs.h" |
21 | 22 | ||
@@ -209,6 +210,7 @@ static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *co | |||
209 | } | 210 | } |
210 | 211 | ||
211 | const struct inode_operations sysfs_symlink_inode_operations = { | 212 | const struct inode_operations sysfs_symlink_inode_operations = { |
213 | .setxattr = sysfs_setxattr, | ||
212 | .readlink = generic_readlink, | 214 | .readlink = generic_readlink, |
213 | .follow_link = sysfs_follow_link, | 215 | .follow_link = sysfs_follow_link, |
214 | .put_link = sysfs_put_link, | 216 | .put_link = sysfs_put_link, |
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 3fa0d98481e2..af4c4e7482ac 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h | |||
@@ -8,6 +8,8 @@ | |||
8 | * This file is released under the GPLv2. | 8 | * This file is released under the GPLv2. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/fs.h> | ||
12 | |||
11 | struct sysfs_open_dirent; | 13 | struct sysfs_open_dirent; |
12 | 14 | ||
13 | /* type-specific structures for sysfs_dirent->s_* union members */ | 15 | /* type-specific structures for sysfs_dirent->s_* union members */ |
@@ -31,6 +33,12 @@ struct sysfs_elem_bin_attr { | |||
31 | struct hlist_head buffers; | 33 | struct hlist_head buffers; |
32 | }; | 34 | }; |
33 | 35 | ||
36 | struct sysfs_inode_attrs { | ||
37 | struct iattr ia_iattr; | ||
38 | void *ia_secdata; | ||
39 | u32 ia_secdata_len; | ||
40 | }; | ||
41 | |||
34 | /* | 42 | /* |
35 | * sysfs_dirent - the building block of sysfs hierarchy. Each and | 43 | * sysfs_dirent - the building block of sysfs hierarchy. Each and |
36 | * every sysfs node is represented by single sysfs_dirent. | 44 | * every sysfs node is represented by single sysfs_dirent. |
@@ -56,7 +64,7 @@ struct sysfs_dirent { | |||
56 | unsigned int s_flags; | 64 | unsigned int s_flags; |
57 | ino_t s_ino; | 65 | ino_t s_ino; |
58 | umode_t s_mode; | 66 | umode_t s_mode; |
59 | struct iattr *s_iattr; | 67 | struct sysfs_inode_attrs *s_iattr; |
60 | }; | 68 | }; |
61 | 69 | ||
62 | #define SD_DEACTIVATED_BIAS INT_MIN | 70 | #define SD_DEACTIVATED_BIAS INT_MIN |
@@ -148,6 +156,8 @@ static inline void __sysfs_put(struct sysfs_dirent *sd) | |||
148 | struct inode *sysfs_get_inode(struct sysfs_dirent *sd); | 156 | struct inode *sysfs_get_inode(struct sysfs_dirent *sd); |
149 | void sysfs_delete_inode(struct inode *inode); | 157 | void sysfs_delete_inode(struct inode *inode); |
150 | int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); | 158 | int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); |
159 | int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, | ||
160 | size_t size, int flags); | ||
151 | int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); | 161 | int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); |
152 | int sysfs_inode_init(void); | 162 | int sysfs_inode_init(void); |
153 | 163 | ||
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index c1f3f99b2939..076ca50e9933 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -54,41 +54,15 @@ | |||
54 | * @nr_to_write: how many dirty pages to write-back | 54 | * @nr_to_write: how many dirty pages to write-back |
55 | * | 55 | * |
56 | * This function shrinks UBIFS liability by means of writing back some amount | 56 | * This function shrinks UBIFS liability by means of writing back some amount |
57 | * of dirty inodes and their pages. Returns the amount of pages which were | 57 | * of dirty inodes and their pages. |
58 | * written back. The returned value does not include dirty inodes which were | ||
59 | * synchronized. | ||
60 | * | 58 | * |
61 | * Note, this function synchronizes even VFS inodes which are locked | 59 | * Note, this function synchronizes even VFS inodes which are locked |
62 | * (@i_mutex) by the caller of the budgeting function, because write-back does | 60 | * (@i_mutex) by the caller of the budgeting function, because write-back does |
63 | * not touch @i_mutex. | 61 | * not touch @i_mutex. |
64 | */ | 62 | */ |
65 | static int shrink_liability(struct ubifs_info *c, int nr_to_write) | 63 | static void shrink_liability(struct ubifs_info *c, int nr_to_write) |
66 | { | 64 | { |
67 | int nr_written; | 65 | writeback_inodes_sb(c->vfs_sb); |
68 | struct writeback_control wbc = { | ||
69 | .sync_mode = WB_SYNC_NONE, | ||
70 | .range_end = LLONG_MAX, | ||
71 | .nr_to_write = nr_to_write, | ||
72 | }; | ||
73 | |||
74 | generic_sync_sb_inodes(c->vfs_sb, &wbc); | ||
75 | nr_written = nr_to_write - wbc.nr_to_write; | ||
76 | |||
77 | if (!nr_written) { | ||
78 | /* | ||
79 | * Re-try again but wait on pages/inodes which are being | ||
80 | * written-back concurrently (e.g., by pdflush). | ||
81 | */ | ||
82 | memset(&wbc, 0, sizeof(struct writeback_control)); | ||
83 | wbc.sync_mode = WB_SYNC_ALL; | ||
84 | wbc.range_end = LLONG_MAX; | ||
85 | wbc.nr_to_write = nr_to_write; | ||
86 | generic_sync_sb_inodes(c->vfs_sb, &wbc); | ||
87 | nr_written = nr_to_write - wbc.nr_to_write; | ||
88 | } | ||
89 | |||
90 | dbg_budg("%d pages were written back", nr_written); | ||
91 | return nr_written; | ||
92 | } | 66 | } |
93 | 67 | ||
94 | /** | 68 | /** |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 7e2b3d4d487a..333e181ee987 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -1952,6 +1952,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) | |||
1952 | * | 1952 | * |
1953 | * Read-ahead will be disabled because @c->bdi.ra_pages is 0. | 1953 | * Read-ahead will be disabled because @c->bdi.ra_pages is 0. |
1954 | */ | 1954 | */ |
1955 | c->bdi.name = "ubifs", | ||
1955 | c->bdi.capabilities = BDI_CAP_MAP_COPY; | 1956 | c->bdi.capabilities = BDI_CAP_MAP_COPY; |
1956 | c->bdi.unplug_io_fn = default_unplug_io_fn; | 1957 | c->bdi.unplug_io_fn = default_unplug_io_fn; |
1957 | err = bdi_init(&c->bdi); | 1958 | err = bdi_init(&c->bdi); |
@@ -1966,6 +1967,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) | |||
1966 | if (err) | 1967 | if (err) |
1967 | goto out_bdi; | 1968 | goto out_bdi; |
1968 | 1969 | ||
1970 | sb->s_bdi = &c->bdi; | ||
1969 | sb->s_fs_info = c; | 1971 | sb->s_fs_info = c; |
1970 | sb->s_magic = UBIFS_SUPER_MAGIC; | 1972 | sb->s_magic = UBIFS_SUPER_MAGIC; |
1971 | sb->s_blocksize = UBIFS_BLOCK_SIZE; | 1973 | sb->s_blocksize = UBIFS_BLOCK_SIZE; |
diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 1d2c570704c8..2ffdb6733af1 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c | |||
@@ -18,59 +18,6 @@ | |||
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <linux/buffer_head.h> | 19 | #include <linux/buffer_head.h> |
20 | 20 | ||
21 | #if 0 | ||
22 | static uint8_t *udf_filead_read(struct inode *dir, uint8_t *tmpad, | ||
23 | uint8_t ad_size, struct kernel_lb_addr fe_loc, | ||
24 | int *pos, int *offset, struct buffer_head **bh, | ||
25 | int *error) | ||
26 | { | ||
27 | int loffset = *offset; | ||
28 | int block; | ||
29 | uint8_t *ad; | ||
30 | int remainder; | ||
31 | |||
32 | *error = 0; | ||
33 | |||
34 | ad = (uint8_t *)(*bh)->b_data + *offset; | ||
35 | *offset += ad_size; | ||
36 | |||
37 | if (!ad) { | ||
38 | brelse(*bh); | ||
39 | *error = 1; | ||
40 | return NULL; | ||
41 | } | ||
42 | |||
43 | if (*offset == dir->i_sb->s_blocksize) { | ||
44 | brelse(*bh); | ||
45 | block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos); | ||
46 | if (!block) | ||
47 | return NULL; | ||
48 | *bh = udf_tread(dir->i_sb, block); | ||
49 | if (!*bh) | ||
50 | return NULL; | ||
51 | } else if (*offset > dir->i_sb->s_blocksize) { | ||
52 | ad = tmpad; | ||
53 | |||
54 | remainder = dir->i_sb->s_blocksize - loffset; | ||
55 | memcpy((uint8_t *)ad, (*bh)->b_data + loffset, remainder); | ||
56 | |||
57 | brelse(*bh); | ||
58 | block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos); | ||
59 | if (!block) | ||
60 | return NULL; | ||
61 | (*bh) = udf_tread(dir->i_sb, block); | ||
62 | if (!*bh) | ||
63 | return NULL; | ||
64 | |||
65 | memcpy((uint8_t *)ad + remainder, (*bh)->b_data, | ||
66 | ad_size - remainder); | ||
67 | *offset = ad_size - remainder; | ||
68 | } | ||
69 | |||
70 | return ad; | ||
71 | } | ||
72 | #endif | ||
73 | |||
74 | struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, | 21 | struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, |
75 | struct udf_fileident_bh *fibh, | 22 | struct udf_fileident_bh *fibh, |
76 | struct fileIdentDesc *cfi, | 23 | struct fileIdentDesc *cfi, |
@@ -248,39 +195,6 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset) | |||
248 | return fi; | 195 | return fi; |
249 | } | 196 | } |
250 | 197 | ||
251 | #if 0 | ||
252 | static struct extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset) | ||
253 | { | ||
254 | struct extent_ad *ext; | ||
255 | struct fileEntry *fe; | ||
256 | uint8_t *ptr; | ||
257 | |||
258 | if ((!buffer) || (!offset)) { | ||
259 | printk(KERN_ERR "udf: udf_get_fileextent() invalidparms\n"); | ||
260 | return NULL; | ||
261 | } | ||
262 | |||
263 | fe = (struct fileEntry *)buffer; | ||
264 | |||
265 | if (fe->descTag.tagIdent != cpu_to_le16(TAG_IDENT_FE)) { | ||
266 | udf_debug("0x%x != TAG_IDENT_FE\n", | ||
267 | le16_to_cpu(fe->descTag.tagIdent)); | ||
268 | return NULL; | ||
269 | } | ||
270 | |||
271 | ptr = (uint8_t *)(fe->extendedAttr) + | ||
272 | le32_to_cpu(fe->lengthExtendedAttr); | ||
273 | |||
274 | if ((*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs))) | ||
275 | ptr += *offset; | ||
276 | |||
277 | ext = (struct extent_ad *)ptr; | ||
278 | |||
279 | *offset = *offset + sizeof(struct extent_ad); | ||
280 | return ext; | ||
281 | } | ||
282 | #endif | ||
283 | |||
284 | struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset, | 198 | struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset, |
285 | int inc) | 199 | int inc) |
286 | { | 200 | { |
diff --git a/fs/udf/file.c b/fs/udf/file.c index 7464305382b5..b80cbd78833c 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -193,9 +193,11 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | |||
193 | static int udf_release_file(struct inode *inode, struct file *filp) | 193 | static int udf_release_file(struct inode *inode, struct file *filp) |
194 | { | 194 | { |
195 | if (filp->f_mode & FMODE_WRITE) { | 195 | if (filp->f_mode & FMODE_WRITE) { |
196 | mutex_lock(&inode->i_mutex); | ||
196 | lock_kernel(); | 197 | lock_kernel(); |
197 | udf_discard_prealloc(inode); | 198 | udf_discard_prealloc(inode); |
198 | unlock_kernel(); | 199 | unlock_kernel(); |
200 | mutex_unlock(&inode->i_mutex); | ||
199 | } | 201 | } |
200 | return 0; | 202 | return 0; |
201 | } | 203 | } |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index e7533f785636..6d24c2c63f93 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -90,19 +90,16 @@ no_delete: | |||
90 | } | 90 | } |
91 | 91 | ||
92 | /* | 92 | /* |
93 | * If we are going to release inode from memory, we discard preallocation and | 93 | * If we are going to release inode from memory, we truncate last inode extent |
94 | * truncate last inode extent to proper length. We could use drop_inode() but | 94 | * to proper length. We could use drop_inode() but it's called under inode_lock |
95 | * it's called under inode_lock and thus we cannot mark inode dirty there. We | 95 | * and thus we cannot mark inode dirty there. We use clear_inode() but we have |
96 | * use clear_inode() but we have to make sure to write inode as it's not written | 96 | * to make sure to write inode as it's not written automatically. |
97 | * automatically. | ||
98 | */ | 97 | */ |
99 | void udf_clear_inode(struct inode *inode) | 98 | void udf_clear_inode(struct inode *inode) |
100 | { | 99 | { |
101 | struct udf_inode_info *iinfo; | 100 | struct udf_inode_info *iinfo; |
102 | if (!(inode->i_sb->s_flags & MS_RDONLY)) { | 101 | if (!(inode->i_sb->s_flags & MS_RDONLY)) { |
103 | lock_kernel(); | 102 | lock_kernel(); |
104 | /* Discard preallocation for directories, symlinks, etc. */ | ||
105 | udf_discard_prealloc(inode); | ||
106 | udf_truncate_tail_extent(inode); | 103 | udf_truncate_tail_extent(inode); |
107 | unlock_kernel(); | 104 | unlock_kernel(); |
108 | write_inode_now(inode, 0); | 105 | write_inode_now(inode, 0); |
@@ -664,8 +661,12 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, | |||
664 | udf_split_extents(inode, &c, offset, newblocknum, laarr, &endnum); | 661 | udf_split_extents(inode, &c, offset, newblocknum, laarr, &endnum); |
665 | 662 | ||
666 | #ifdef UDF_PREALLOCATE | 663 | #ifdef UDF_PREALLOCATE |
667 | /* preallocate blocks */ | 664 | /* We preallocate blocks only for regular files. It also makes sense |
668 | udf_prealloc_extents(inode, c, lastblock, laarr, &endnum); | 665 | * for directories but there's a problem when to drop the |
666 | * preallocation. We might use some delayed work for that but I feel | ||
667 | * it's overengineering for a filesystem like UDF. */ | ||
668 | if (S_ISREG(inode->i_mode)) | ||
669 | udf_prealloc_extents(inode, c, lastblock, laarr, &endnum); | ||
669 | #endif | 670 | #endif |
670 | 671 | ||
671 | /* merge any continuous blocks in laarr */ | 672 | /* merge any continuous blocks in laarr */ |
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c index 1b88fd5df05d..43e24a3b8e10 100644 --- a/fs/udf/lowlevel.c +++ b/fs/udf/lowlevel.c | |||
@@ -36,14 +36,10 @@ unsigned int udf_get_last_session(struct super_block *sb) | |||
36 | ms_info.addr_format = CDROM_LBA; | 36 | ms_info.addr_format = CDROM_LBA; |
37 | i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long)&ms_info); | 37 | i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long)&ms_info); |
38 | 38 | ||
39 | #define WE_OBEY_THE_WRITTEN_STANDARDS 1 | ||
40 | |||
41 | if (i == 0) { | 39 | if (i == 0) { |
42 | udf_debug("XA disk: %s, vol_desc_start=%d\n", | 40 | udf_debug("XA disk: %s, vol_desc_start=%d\n", |
43 | (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba); | 41 | (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba); |
44 | #if WE_OBEY_THE_WRITTEN_STANDARDS | ||
45 | if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ | 42 | if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ |
46 | #endif | ||
47 | vol_desc_start = ms_info.addr.lba; | 43 | vol_desc_start = ms_info.addr.lba; |
48 | } else { | 44 | } else { |
49 | udf_debug("CDROMMULTISESSION not supported: rc=%d\n", i); | 45 | udf_debug("CDROMMULTISESSION not supported: rc=%d\n", i); |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 6a29fa34c478..21dad8c608f9 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -943,7 +943,6 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
943 | pc->componentType = 1; | 943 | pc->componentType = 1; |
944 | pc->lengthComponentIdent = 0; | 944 | pc->lengthComponentIdent = 0; |
945 | pc->componentFileVersionNum = 0; | 945 | pc->componentFileVersionNum = 0; |
946 | pc += sizeof(struct pathComponent); | ||
947 | elen += sizeof(struct pathComponent); | 946 | elen += sizeof(struct pathComponent); |
948 | } | 947 | } |
949 | 948 | ||
diff --git a/fs/udf/super.c b/fs/udf/super.c index 6832135159b6..9d1b8c2e6c45 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -1087,11 +1087,23 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) | |||
1087 | struct udf_inode_info *vati; | 1087 | struct udf_inode_info *vati; |
1088 | uint32_t pos; | 1088 | uint32_t pos; |
1089 | struct virtualAllocationTable20 *vat20; | 1089 | struct virtualAllocationTable20 *vat20; |
1090 | sector_t blocks = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; | ||
1090 | 1091 | ||
1091 | /* VAT file entry is in the last recorded block */ | 1092 | /* VAT file entry is in the last recorded block */ |
1092 | ino.partitionReferenceNum = type1_index; | 1093 | ino.partitionReferenceNum = type1_index; |
1093 | ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root; | 1094 | ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root; |
1094 | sbi->s_vat_inode = udf_iget(sb, &ino); | 1095 | sbi->s_vat_inode = udf_iget(sb, &ino); |
1096 | if (!sbi->s_vat_inode && | ||
1097 | sbi->s_last_block != blocks - 1) { | ||
1098 | printk(KERN_NOTICE "UDF-fs: Failed to read VAT inode from the" | ||
1099 | " last recorded block (%lu), retrying with the last " | ||
1100 | "block of the device (%lu).\n", | ||
1101 | (unsigned long)sbi->s_last_block, | ||
1102 | (unsigned long)blocks - 1); | ||
1103 | ino.partitionReferenceNum = type1_index; | ||
1104 | ino.logicalBlockNum = blocks - 1 - map->s_partition_root; | ||
1105 | sbi->s_vat_inode = udf_iget(sb, &ino); | ||
1106 | } | ||
1095 | if (!sbi->s_vat_inode) | 1107 | if (!sbi->s_vat_inode) |
1096 | return 1; | 1108 | return 1; |
1097 | 1109 | ||
diff --git a/fs/xattr.c b/fs/xattr.c index 1c3d0af59ddf..6d4f6d3449fb 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -66,22 +66,28 @@ xattr_permission(struct inode *inode, const char *name, int mask) | |||
66 | return inode_permission(inode, mask); | 66 | return inode_permission(inode, mask); |
67 | } | 67 | } |
68 | 68 | ||
69 | int | 69 | /** |
70 | vfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 70 | * __vfs_setxattr_noperm - perform setxattr operation without performing |
71 | size_t size, int flags) | 71 | * permission checks. |
72 | * | ||
73 | * @dentry - object to perform setxattr on | ||
74 | * @name - xattr name to set | ||
75 | * @value - value to set @name to | ||
76 | * @size - size of @value | ||
77 | * @flags - flags to pass into filesystem operations | ||
78 | * | ||
79 | * returns the result of the internal setxattr or setsecurity operations. | ||
80 | * | ||
81 | * This function requires the caller to lock the inode's i_mutex before it | ||
82 | * is executed. It also assumes that the caller will make the appropriate | ||
83 | * permission checks. | ||
84 | */ | ||
85 | int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, | ||
86 | const void *value, size_t size, int flags) | ||
72 | { | 87 | { |
73 | struct inode *inode = dentry->d_inode; | 88 | struct inode *inode = dentry->d_inode; |
74 | int error; | 89 | int error = -EOPNOTSUPP; |
75 | |||
76 | error = xattr_permission(inode, name, MAY_WRITE); | ||
77 | if (error) | ||
78 | return error; | ||
79 | 90 | ||
80 | mutex_lock(&inode->i_mutex); | ||
81 | error = security_inode_setxattr(dentry, name, value, size, flags); | ||
82 | if (error) | ||
83 | goto out; | ||
84 | error = -EOPNOTSUPP; | ||
85 | if (inode->i_op->setxattr) { | 91 | if (inode->i_op->setxattr) { |
86 | error = inode->i_op->setxattr(dentry, name, value, size, flags); | 92 | error = inode->i_op->setxattr(dentry, name, value, size, flags); |
87 | if (!error) { | 93 | if (!error) { |
@@ -97,6 +103,29 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
97 | if (!error) | 103 | if (!error) |
98 | fsnotify_xattr(dentry); | 104 | fsnotify_xattr(dentry); |
99 | } | 105 | } |
106 | |||
107 | return error; | ||
108 | } | ||
109 | |||
110 | |||
111 | int | ||
112 | vfs_setxattr(struct dentry *dentry, const char *name, const void *value, | ||
113 | size_t size, int flags) | ||
114 | { | ||
115 | struct inode *inode = dentry->d_inode; | ||
116 | int error; | ||
117 | |||
118 | error = xattr_permission(inode, name, MAY_WRITE); | ||
119 | if (error) | ||
120 | return error; | ||
121 | |||
122 | mutex_lock(&inode->i_mutex); | ||
123 | error = security_inode_setxattr(dentry, name, value, size, flags); | ||
124 | if (error) | ||
125 | goto out; | ||
126 | |||
127 | error = __vfs_setxattr_noperm(dentry, name, value, size, flags); | ||
128 | |||
100 | out: | 129 | out: |
101 | mutex_unlock(&inode->i_mutex); | 130 | mutex_unlock(&inode->i_mutex); |
102 | return error; | 131 | return error; |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 7ec89fc05b2b..d5e5559e31db 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -216,7 +216,6 @@ xfs_setfilesize( | |||
216 | if (ip->i_d.di_size < isize) { | 216 | if (ip->i_d.di_size < isize) { |
217 | ip->i_d.di_size = isize; | 217 | ip->i_d.di_size = isize; |
218 | ip->i_update_core = 1; | 218 | ip->i_update_core = 1; |
219 | ip->i_update_size = 1; | ||
220 | xfs_mark_inode_dirty_sync(ip); | 219 | xfs_mark_inode_dirty_sync(ip); |
221 | } | 220 | } |
222 | 221 | ||
@@ -1268,6 +1267,14 @@ xfs_vm_writepage( | |||
1268 | if (!page_has_buffers(page)) | 1267 | if (!page_has_buffers(page)) |
1269 | create_empty_buffers(page, 1 << inode->i_blkbits, 0); | 1268 | create_empty_buffers(page, 1 << inode->i_blkbits, 0); |
1270 | 1269 | ||
1270 | |||
1271 | /* | ||
1272 | * VM calculation for nr_to_write seems off. Bump it way | ||
1273 | * up, this gets simple streaming writes zippy again. | ||
1274 | * To be reviewed again after Jens' writeback changes. | ||
1275 | */ | ||
1276 | wbc->nr_to_write *= 4; | ||
1277 | |||
1271 | /* | 1278 | /* |
1272 | * Convert delayed allocate, unwritten or unmapped space | 1279 | * Convert delayed allocate, unwritten or unmapped space |
1273 | * to real space and flush out to disk. | 1280 | * to real space and flush out to disk. |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 0c93c7ef3d18..965df1227d64 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -770,7 +770,7 @@ xfs_buf_associate_memory( | |||
770 | bp->b_pages = NULL; | 770 | bp->b_pages = NULL; |
771 | bp->b_addr = mem; | 771 | bp->b_addr = mem; |
772 | 772 | ||
773 | rval = _xfs_buf_get_pages(bp, page_count, 0); | 773 | rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK); |
774 | if (rval) | 774 | if (rval) |
775 | return rval; | 775 | return rval; |
776 | 776 | ||
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 0542fd507649..988d8f87bc0f 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -172,12 +172,21 @@ xfs_file_release( | |||
172 | */ | 172 | */ |
173 | STATIC int | 173 | STATIC int |
174 | xfs_file_fsync( | 174 | xfs_file_fsync( |
175 | struct file *filp, | 175 | struct file *file, |
176 | struct dentry *dentry, | 176 | struct dentry *dentry, |
177 | int datasync) | 177 | int datasync) |
178 | { | 178 | { |
179 | xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); | 179 | struct inode *inode = dentry->d_inode; |
180 | return -xfs_fsync(XFS_I(dentry->d_inode)); | 180 | struct xfs_inode *ip = XFS_I(inode); |
181 | int error; | ||
182 | |||
183 | /* capture size updates in I/O completion before writing the inode. */ | ||
184 | error = filemap_fdatawait(inode->i_mapping); | ||
185 | if (error) | ||
186 | return error; | ||
187 | |||
188 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | ||
189 | return -xfs_fsync(ip); | ||
181 | } | 190 | } |
182 | 191 | ||
183 | STATIC int | 192 | STATIC int |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 0882d166239a..eafcc7c18706 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
@@ -619,7 +619,7 @@ xfs_file_compat_ioctl( | |||
619 | case XFS_IOC_GETVERSION_32: | 619 | case XFS_IOC_GETVERSION_32: |
620 | cmd = _NATIVE_IOC(cmd, long); | 620 | cmd = _NATIVE_IOC(cmd, long); |
621 | return xfs_file_ioctl(filp, cmd, p); | 621 | return xfs_file_ioctl(filp, cmd, p); |
622 | case XFS_IOC_SWAPEXT: { | 622 | case XFS_IOC_SWAPEXT_32: { |
623 | struct xfs_swapext sxp; | 623 | struct xfs_swapext sxp; |
624 | struct compat_xfs_swapext __user *sxu = arg; | 624 | struct compat_xfs_swapext __user *sxu = arg; |
625 | 625 | ||
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 58973bb46038..da0159d99f82 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -43,7 +43,6 @@ | |||
43 | #include "xfs_error.h" | 43 | #include "xfs_error.h" |
44 | #include "xfs_itable.h" | 44 | #include "xfs_itable.h" |
45 | #include "xfs_rw.h" | 45 | #include "xfs_rw.h" |
46 | #include "xfs_acl.h" | ||
47 | #include "xfs_attr.h" | 46 | #include "xfs_attr.h" |
48 | #include "xfs_buf_item.h" | 47 | #include "xfs_buf_item.h" |
49 | #include "xfs_utils.h" | 48 | #include "xfs_utils.h" |
@@ -485,14 +484,6 @@ xfs_vn_put_link( | |||
485 | } | 484 | } |
486 | 485 | ||
487 | STATIC int | 486 | STATIC int |
488 | xfs_vn_permission( | ||
489 | struct inode *inode, | ||
490 | int mask) | ||
491 | { | ||
492 | return generic_permission(inode, mask, xfs_check_acl); | ||
493 | } | ||
494 | |||
495 | STATIC int | ||
496 | xfs_vn_getattr( | 487 | xfs_vn_getattr( |
497 | struct vfsmount *mnt, | 488 | struct vfsmount *mnt, |
498 | struct dentry *dentry, | 489 | struct dentry *dentry, |
@@ -680,8 +671,8 @@ xfs_vn_fiemap( | |||
680 | else | 671 | else |
681 | bm.bmv_length = BTOBB(length); | 672 | bm.bmv_length = BTOBB(length); |
682 | 673 | ||
683 | /* our formatter will tell xfs_getbmap when to stop. */ | 674 | /* We add one because in getbmap world count includes the header */ |
684 | bm.bmv_count = MAXEXTNUM; | 675 | bm.bmv_count = fieinfo->fi_extents_max + 1; |
685 | bm.bmv_iflags = BMV_IF_PREALLOC; | 676 | bm.bmv_iflags = BMV_IF_PREALLOC; |
686 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) | 677 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) |
687 | bm.bmv_iflags |= BMV_IF_ATTRFORK; | 678 | bm.bmv_iflags |= BMV_IF_ATTRFORK; |
@@ -696,7 +687,7 @@ xfs_vn_fiemap( | |||
696 | } | 687 | } |
697 | 688 | ||
698 | static const struct inode_operations xfs_inode_operations = { | 689 | static const struct inode_operations xfs_inode_operations = { |
699 | .permission = xfs_vn_permission, | 690 | .check_acl = xfs_check_acl, |
700 | .truncate = xfs_vn_truncate, | 691 | .truncate = xfs_vn_truncate, |
701 | .getattr = xfs_vn_getattr, | 692 | .getattr = xfs_vn_getattr, |
702 | .setattr = xfs_vn_setattr, | 693 | .setattr = xfs_vn_setattr, |
@@ -724,7 +715,7 @@ static const struct inode_operations xfs_dir_inode_operations = { | |||
724 | .rmdir = xfs_vn_unlink, | 715 | .rmdir = xfs_vn_unlink, |
725 | .mknod = xfs_vn_mknod, | 716 | .mknod = xfs_vn_mknod, |
726 | .rename = xfs_vn_rename, | 717 | .rename = xfs_vn_rename, |
727 | .permission = xfs_vn_permission, | 718 | .check_acl = xfs_check_acl, |
728 | .getattr = xfs_vn_getattr, | 719 | .getattr = xfs_vn_getattr, |
729 | .setattr = xfs_vn_setattr, | 720 | .setattr = xfs_vn_setattr, |
730 | .setxattr = generic_setxattr, | 721 | .setxattr = generic_setxattr, |
@@ -749,7 +740,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { | |||
749 | .rmdir = xfs_vn_unlink, | 740 | .rmdir = xfs_vn_unlink, |
750 | .mknod = xfs_vn_mknod, | 741 | .mknod = xfs_vn_mknod, |
751 | .rename = xfs_vn_rename, | 742 | .rename = xfs_vn_rename, |
752 | .permission = xfs_vn_permission, | 743 | .check_acl = xfs_check_acl, |
753 | .getattr = xfs_vn_getattr, | 744 | .getattr = xfs_vn_getattr, |
754 | .setattr = xfs_vn_setattr, | 745 | .setattr = xfs_vn_setattr, |
755 | .setxattr = generic_setxattr, | 746 | .setxattr = generic_setxattr, |
@@ -762,7 +753,7 @@ static const struct inode_operations xfs_symlink_inode_operations = { | |||
762 | .readlink = generic_readlink, | 753 | .readlink = generic_readlink, |
763 | .follow_link = xfs_vn_follow_link, | 754 | .follow_link = xfs_vn_follow_link, |
764 | .put_link = xfs_vn_put_link, | 755 | .put_link = xfs_vn_put_link, |
765 | .permission = xfs_vn_permission, | 756 | .check_acl = xfs_check_acl, |
766 | .getattr = xfs_vn_getattr, | 757 | .getattr = xfs_vn_getattr, |
767 | .setattr = xfs_vn_setattr, | 758 | .setattr = xfs_vn_setattr, |
768 | .setxattr = generic_setxattr, | 759 | .setxattr = generic_setxattr, |
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 7078974a6eee..49e4a6aea73c 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c | |||
@@ -812,18 +812,21 @@ write_retry: | |||
812 | 812 | ||
813 | /* Handle various SYNC-type writes */ | 813 | /* Handle various SYNC-type writes */ |
814 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { | 814 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { |
815 | loff_t end = pos + ret - 1; | ||
815 | int error2; | 816 | int error2; |
816 | 817 | ||
817 | xfs_iunlock(xip, iolock); | 818 | xfs_iunlock(xip, iolock); |
818 | if (need_i_mutex) | 819 | if (need_i_mutex) |
819 | mutex_unlock(&inode->i_mutex); | 820 | mutex_unlock(&inode->i_mutex); |
820 | error2 = sync_page_range(inode, mapping, pos, ret); | 821 | |
822 | error2 = filemap_write_and_wait_range(mapping, pos, end); | ||
821 | if (!error) | 823 | if (!error) |
822 | error = error2; | 824 | error = error2; |
823 | if (need_i_mutex) | 825 | if (need_i_mutex) |
824 | mutex_lock(&inode->i_mutex); | 826 | mutex_lock(&inode->i_mutex); |
825 | xfs_ilock(xip, iolock); | 827 | xfs_ilock(xip, iolock); |
826 | error2 = xfs_write_sync_logforce(mp, xip); | 828 | |
829 | error2 = xfs_fsync(xip); | ||
827 | if (!error) | 830 | if (!error) |
828 | error = error2; | 831 | error = error2; |
829 | } | 832 | } |
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c index c3526d445f6a..76fdc5861932 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/linux-2.6/xfs_stats.c | |||
@@ -20,16 +20,9 @@ | |||
20 | 20 | ||
21 | DEFINE_PER_CPU(struct xfsstats, xfsstats); | 21 | DEFINE_PER_CPU(struct xfsstats, xfsstats); |
22 | 22 | ||
23 | STATIC int | 23 | static int xfs_stat_proc_show(struct seq_file *m, void *v) |
24 | xfs_read_xfsstats( | ||
25 | char *buffer, | ||
26 | char **start, | ||
27 | off_t offset, | ||
28 | int count, | ||
29 | int *eof, | ||
30 | void *data) | ||
31 | { | 24 | { |
32 | int c, i, j, len, val; | 25 | int c, i, j, val; |
33 | __uint64_t xs_xstrat_bytes = 0; | 26 | __uint64_t xs_xstrat_bytes = 0; |
34 | __uint64_t xs_write_bytes = 0; | 27 | __uint64_t xs_write_bytes = 0; |
35 | __uint64_t xs_read_bytes = 0; | 28 | __uint64_t xs_read_bytes = 0; |
@@ -60,18 +53,18 @@ xfs_read_xfsstats( | |||
60 | }; | 53 | }; |
61 | 54 | ||
62 | /* Loop over all stats groups */ | 55 | /* Loop over all stats groups */ |
63 | for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) { | 56 | for (i=j = 0; i < ARRAY_SIZE(xstats); i++) { |
64 | len += sprintf(buffer + len, "%s", xstats[i].desc); | 57 | seq_printf(m, "%s", xstats[i].desc); |
65 | /* inner loop does each group */ | 58 | /* inner loop does each group */ |
66 | while (j < xstats[i].endpoint) { | 59 | while (j < xstats[i].endpoint) { |
67 | val = 0; | 60 | val = 0; |
68 | /* sum over all cpus */ | 61 | /* sum over all cpus */ |
69 | for_each_possible_cpu(c) | 62 | for_each_possible_cpu(c) |
70 | val += *(((__u32*)&per_cpu(xfsstats, c) + j)); | 63 | val += *(((__u32*)&per_cpu(xfsstats, c) + j)); |
71 | len += sprintf(buffer + len, " %u", val); | 64 | seq_printf(m, " %u", val); |
72 | j++; | 65 | j++; |
73 | } | 66 | } |
74 | buffer[len++] = '\n'; | 67 | seq_putc(m, '\n'); |
75 | } | 68 | } |
76 | /* extra precision counters */ | 69 | /* extra precision counters */ |
77 | for_each_possible_cpu(i) { | 70 | for_each_possible_cpu(i) { |
@@ -80,36 +73,38 @@ xfs_read_xfsstats( | |||
80 | xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; | 73 | xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; |
81 | } | 74 | } |
82 | 75 | ||
83 | len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n", | 76 | seq_printf(m, "xpc %Lu %Lu %Lu\n", |
84 | xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); | 77 | xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); |
85 | len += sprintf(buffer + len, "debug %u\n", | 78 | seq_printf(m, "debug %u\n", |
86 | #if defined(DEBUG) | 79 | #if defined(DEBUG) |
87 | 1); | 80 | 1); |
88 | #else | 81 | #else |
89 | 0); | 82 | 0); |
90 | #endif | 83 | #endif |
84 | return 0; | ||
85 | } | ||
91 | 86 | ||
92 | if (offset >= len) { | 87 | static int xfs_stat_proc_open(struct inode *inode, struct file *file) |
93 | *start = buffer; | 88 | { |
94 | *eof = 1; | 89 | return single_open(file, xfs_stat_proc_show, NULL); |
95 | return 0; | ||
96 | } | ||
97 | *start = buffer + offset; | ||
98 | if ((len -= offset) > count) | ||
99 | return count; | ||
100 | *eof = 1; | ||
101 | |||
102 | return len; | ||
103 | } | 90 | } |
104 | 91 | ||
92 | static const struct file_operations xfs_stat_proc_fops = { | ||
93 | .owner = THIS_MODULE, | ||
94 | .open = xfs_stat_proc_open, | ||
95 | .read = seq_read, | ||
96 | .llseek = seq_lseek, | ||
97 | .release = single_release, | ||
98 | }; | ||
99 | |||
105 | int | 100 | int |
106 | xfs_init_procfs(void) | 101 | xfs_init_procfs(void) |
107 | { | 102 | { |
108 | if (!proc_mkdir("fs/xfs", NULL)) | 103 | if (!proc_mkdir("fs/xfs", NULL)) |
109 | goto out; | 104 | goto out; |
110 | 105 | ||
111 | if (!create_proc_read_entry("fs/xfs/stat", 0, NULL, | 106 | if (!proc_create("fs/xfs/stat", 0, NULL, |
112 | xfs_read_xfsstats, NULL)) | 107 | &xfs_stat_proc_fops)) |
113 | goto out_remove_entry; | 108 | goto out_remove_entry; |
114 | return 0; | 109 | return 0; |
115 | 110 | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a220d36f789b..5d7c60ac77b4 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -579,15 +579,19 @@ xfs_showargs( | |||
579 | else if (mp->m_qflags & XFS_UQUOTA_ACCT) | 579 | else if (mp->m_qflags & XFS_UQUOTA_ACCT) |
580 | seq_puts(m, "," MNTOPT_UQUOTANOENF); | 580 | seq_puts(m, "," MNTOPT_UQUOTANOENF); |
581 | 581 | ||
582 | if (mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) | 582 | /* Either project or group quotas can be active, not both */ |
583 | seq_puts(m, "," MNTOPT_PRJQUOTA); | 583 | |
584 | else if (mp->m_qflags & XFS_PQUOTA_ACCT) | 584 | if (mp->m_qflags & XFS_PQUOTA_ACCT) { |
585 | seq_puts(m, "," MNTOPT_PQUOTANOENF); | 585 | if (mp->m_qflags & XFS_OQUOTA_ENFD) |
586 | 586 | seq_puts(m, "," MNTOPT_PRJQUOTA); | |
587 | if (mp->m_qflags & (XFS_GQUOTA_ACCT|XFS_OQUOTA_ENFD)) | 587 | else |
588 | seq_puts(m, "," MNTOPT_GRPQUOTA); | 588 | seq_puts(m, "," MNTOPT_PQUOTANOENF); |
589 | else if (mp->m_qflags & XFS_GQUOTA_ACCT) | 589 | } else if (mp->m_qflags & XFS_GQUOTA_ACCT) { |
590 | seq_puts(m, "," MNTOPT_GQUOTANOENF); | 590 | if (mp->m_qflags & XFS_OQUOTA_ENFD) |
591 | seq_puts(m, "," MNTOPT_GRPQUOTA); | ||
592 | else | ||
593 | seq_puts(m, "," MNTOPT_GQUOTANOENF); | ||
594 | } | ||
591 | 595 | ||
592 | if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) | 596 | if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) |
593 | seq_puts(m, "," MNTOPT_NOQUOTA); | 597 | seq_puts(m, "," MNTOPT_NOQUOTA); |
@@ -687,7 +691,7 @@ xfs_barrier_test( | |||
687 | return error; | 691 | return error; |
688 | } | 692 | } |
689 | 693 | ||
690 | void | 694 | STATIC void |
691 | xfs_mountfs_check_barriers(xfs_mount_t *mp) | 695 | xfs_mountfs_check_barriers(xfs_mount_t *mp) |
692 | { | 696 | { |
693 | int error; | 697 | int error; |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index b619d6b8ca43..320be6aea492 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -708,6 +708,16 @@ xfs_reclaim_inode( | |||
708 | return 0; | 708 | return 0; |
709 | } | 709 | } |
710 | 710 | ||
711 | void | ||
712 | __xfs_inode_set_reclaim_tag( | ||
713 | struct xfs_perag *pag, | ||
714 | struct xfs_inode *ip) | ||
715 | { | ||
716 | radix_tree_tag_set(&pag->pag_ici_root, | ||
717 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), | ||
718 | XFS_ICI_RECLAIM_TAG); | ||
719 | } | ||
720 | |||
711 | /* | 721 | /* |
712 | * We set the inode flag atomically with the radix tree tag. | 722 | * We set the inode flag atomically with the radix tree tag. |
713 | * Once we get tag lookups on the radix tree, this inode flag | 723 | * Once we get tag lookups on the radix tree, this inode flag |
@@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag( | |||
722 | 732 | ||
723 | read_lock(&pag->pag_ici_lock); | 733 | read_lock(&pag->pag_ici_lock); |
724 | spin_lock(&ip->i_flags_lock); | 734 | spin_lock(&ip->i_flags_lock); |
725 | radix_tree_tag_set(&pag->pag_ici_root, | 735 | __xfs_inode_set_reclaim_tag(pag, ip); |
726 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
727 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | 736 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); |
728 | spin_unlock(&ip->i_flags_lock); | 737 | spin_unlock(&ip->i_flags_lock); |
729 | read_unlock(&pag->pag_ici_lock); | 738 | read_unlock(&pag->pag_ici_lock); |
@@ -740,21 +749,6 @@ __xfs_inode_clear_reclaim_tag( | |||
740 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | 749 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); |
741 | } | 750 | } |
742 | 751 | ||
743 | void | ||
744 | xfs_inode_clear_reclaim_tag( | ||
745 | xfs_inode_t *ip) | ||
746 | { | ||
747 | xfs_mount_t *mp = ip->i_mount; | ||
748 | xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); | ||
749 | |||
750 | read_lock(&pag->pag_ici_lock); | ||
751 | spin_lock(&ip->i_flags_lock); | ||
752 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | ||
753 | spin_unlock(&ip->i_flags_lock); | ||
754 | read_unlock(&pag->pag_ici_lock); | ||
755 | xfs_put_perag(mp, pag); | ||
756 | } | ||
757 | |||
758 | STATIC int | 752 | STATIC int |
759 | xfs_reclaim_inode_now( | 753 | xfs_reclaim_inode_now( |
760 | struct xfs_inode *ip, | 754 | struct xfs_inode *ip, |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 2a10301c99c7..27920eb7a820 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -48,7 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); | |||
48 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); | 48 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); |
49 | 49 | ||
50 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); | 50 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); |
51 | void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); | 51 | void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); |
52 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, | 52 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, |
53 | struct xfs_inode *ip); | 53 | struct xfs_inode *ip); |
54 | 54 | ||
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c index 21b08c0396a1..83e7ea3e25fa 100644 --- a/fs/xfs/quota/xfs_qm_stats.c +++ b/fs/xfs/quota/xfs_qm_stats.c | |||
@@ -48,50 +48,34 @@ | |||
48 | 48 | ||
49 | struct xqmstats xqmstats; | 49 | struct xqmstats xqmstats; |
50 | 50 | ||
51 | STATIC int | 51 | static int xqm_proc_show(struct seq_file *m, void *v) |
52 | xfs_qm_read_xfsquota( | ||
53 | char *buffer, | ||
54 | char **start, | ||
55 | off_t offset, | ||
56 | int count, | ||
57 | int *eof, | ||
58 | void *data) | ||
59 | { | 52 | { |
60 | int len; | ||
61 | |||
62 | /* maximum; incore; ratio free to inuse; freelist */ | 53 | /* maximum; incore; ratio free to inuse; freelist */ |
63 | len = sprintf(buffer, "%d\t%d\t%d\t%u\n", | 54 | seq_printf(m, "%d\t%d\t%d\t%u\n", |
64 | ndquot, | 55 | ndquot, |
65 | xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, | 56 | xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, |
66 | xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, | 57 | xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, |
67 | xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0); | 58 | xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0); |
68 | 59 | return 0; | |
69 | if (offset >= len) { | ||
70 | *start = buffer; | ||
71 | *eof = 1; | ||
72 | return 0; | ||
73 | } | ||
74 | *start = buffer + offset; | ||
75 | if ((len -= offset) > count) | ||
76 | return count; | ||
77 | *eof = 1; | ||
78 | |||
79 | return len; | ||
80 | } | 60 | } |
81 | 61 | ||
82 | STATIC int | 62 | static int xqm_proc_open(struct inode *inode, struct file *file) |
83 | xfs_qm_read_stats( | ||
84 | char *buffer, | ||
85 | char **start, | ||
86 | off_t offset, | ||
87 | int count, | ||
88 | int *eof, | ||
89 | void *data) | ||
90 | { | 63 | { |
91 | int len; | 64 | return single_open(file, xqm_proc_show, NULL); |
65 | } | ||
66 | |||
67 | static const struct file_operations xqm_proc_fops = { | ||
68 | .owner = THIS_MODULE, | ||
69 | .open = xqm_proc_open, | ||
70 | .read = seq_read, | ||
71 | .llseek = seq_lseek, | ||
72 | .release = single_release, | ||
73 | }; | ||
92 | 74 | ||
75 | static int xqmstat_proc_show(struct seq_file *m, void *v) | ||
76 | { | ||
93 | /* quota performance statistics */ | 77 | /* quota performance statistics */ |
94 | len = sprintf(buffer, "qm %u %u %u %u %u %u %u %u\n", | 78 | seq_printf(m, "qm %u %u %u %u %u %u %u %u\n", |
95 | xqmstats.xs_qm_dqreclaims, | 79 | xqmstats.xs_qm_dqreclaims, |
96 | xqmstats.xs_qm_dqreclaim_misses, | 80 | xqmstats.xs_qm_dqreclaim_misses, |
97 | xqmstats.xs_qm_dquot_dups, | 81 | xqmstats.xs_qm_dquot_dups, |
@@ -100,25 +84,27 @@ xfs_qm_read_stats( | |||
100 | xqmstats.xs_qm_dqwants, | 84 | xqmstats.xs_qm_dqwants, |
101 | xqmstats.xs_qm_dqshake_reclaims, | 85 | xqmstats.xs_qm_dqshake_reclaims, |
102 | xqmstats.xs_qm_dqinact_reclaims); | 86 | xqmstats.xs_qm_dqinact_reclaims); |
87 | return 0; | ||
88 | } | ||
103 | 89 | ||
104 | if (offset >= len) { | 90 | static int xqmstat_proc_open(struct inode *inode, struct file *file) |
105 | *start = buffer; | 91 | { |
106 | *eof = 1; | 92 | return single_open(file, xqmstat_proc_show, NULL); |
107 | return 0; | ||
108 | } | ||
109 | *start = buffer + offset; | ||
110 | if ((len -= offset) > count) | ||
111 | return count; | ||
112 | *eof = 1; | ||
113 | |||
114 | return len; | ||
115 | } | 93 | } |
116 | 94 | ||
95 | static const struct file_operations xqmstat_proc_fops = { | ||
96 | .owner = THIS_MODULE, | ||
97 | .open = xqmstat_proc_open, | ||
98 | .read = seq_read, | ||
99 | .llseek = seq_lseek, | ||
100 | .release = single_release, | ||
101 | }; | ||
102 | |||
117 | void | 103 | void |
118 | xfs_qm_init_procfs(void) | 104 | xfs_qm_init_procfs(void) |
119 | { | 105 | { |
120 | create_proc_read_entry("fs/xfs/xqmstat", 0, NULL, xfs_qm_read_stats, NULL); | 106 | proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops); |
121 | create_proc_read_entry("fs/xfs/xqm", 0, NULL, xfs_qm_read_xfsquota, NULL); | 107 | proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops); |
122 | } | 108 | } |
123 | 109 | ||
124 | void | 110 | void |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index f24b50b68d03..a5d54bf4931b 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -198,6 +198,15 @@ typedef struct xfs_perag | |||
198 | xfs_agino_t pagi_count; /* number of allocated inodes */ | 198 | xfs_agino_t pagi_count; /* number of allocated inodes */ |
199 | int pagb_count; /* pagb slots in use */ | 199 | int pagb_count; /* pagb slots in use */ |
200 | xfs_perag_busy_t *pagb_list; /* unstable blocks */ | 200 | xfs_perag_busy_t *pagb_list; /* unstable blocks */ |
201 | |||
202 | /* | ||
203 | * Inode allocation search lookup optimisation. | ||
204 | * If the pagino matches, the search for new inodes | ||
205 | * doesn't need to search the near ones again straight away | ||
206 | */ | ||
207 | xfs_agino_t pagl_pagino; | ||
208 | xfs_agino_t pagl_leftrec; | ||
209 | xfs_agino_t pagl_rightrec; | ||
201 | #ifdef __KERNEL__ | 210 | #ifdef __KERNEL__ |
202 | spinlock_t pagb_lock; /* lock for pagb_list */ | 211 | spinlock_t pagb_lock; /* lock for pagb_list */ |
203 | 212 | ||
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index db15feb906ff..4ece1906bd41 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
@@ -2010,7 +2010,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) | |||
2010 | dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); | 2010 | dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); |
2011 | blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); | 2011 | blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); |
2012 | error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, | 2012 | error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, |
2013 | blkcnt, XFS_BUF_LOCK, &bp); | 2013 | blkcnt, |
2014 | XFS_BUF_LOCK | XBF_DONT_BLOCK, | ||
2015 | &bp); | ||
2014 | if (error) | 2016 | if (error) |
2015 | return(error); | 2017 | return(error); |
2016 | 2018 | ||
@@ -2141,8 +2143,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
2141 | dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), | 2143 | dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), |
2142 | blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); | 2144 | blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); |
2143 | 2145 | ||
2144 | bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, | 2146 | bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt, |
2145 | blkcnt, XFS_BUF_LOCK); | 2147 | XFS_BUF_LOCK | XBF_DONT_BLOCK); |
2146 | ASSERT(bp); | 2148 | ASSERT(bp); |
2147 | ASSERT(!XFS_BUF_GETERROR(bp)); | 2149 | ASSERT(!XFS_BUF_GETERROR(bp)); |
2148 | 2150 | ||
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 7928b9983c1d..8971fb09d387 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -3713,7 +3713,7 @@ done: | |||
3713 | * entry (null if none). Else, *lastxp will be set to the index | 3713 | * entry (null if none). Else, *lastxp will be set to the index |
3714 | * of the found entry; *gotp will contain the entry. | 3714 | * of the found entry; *gotp will contain the entry. |
3715 | */ | 3715 | */ |
3716 | xfs_bmbt_rec_host_t * /* pointer to found extent entry */ | 3716 | STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ |
3717 | xfs_bmap_search_multi_extents( | 3717 | xfs_bmap_search_multi_extents( |
3718 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3718 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3719 | xfs_fileoff_t bno, /* block number searched for */ | 3719 | xfs_fileoff_t bno, /* block number searched for */ |
@@ -6009,7 +6009,7 @@ xfs_getbmap( | |||
6009 | */ | 6009 | */ |
6010 | error = ENOMEM; | 6010 | error = ENOMEM; |
6011 | subnex = 16; | 6011 | subnex = 16; |
6012 | map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL); | 6012 | map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); |
6013 | if (!map) | 6013 | if (!map) |
6014 | goto out_unlock_ilock; | 6014 | goto out_unlock_ilock; |
6015 | 6015 | ||
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 1b8ff9256bd0..56f62d2edc35 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
@@ -392,17 +392,6 @@ xfs_bmap_count_blocks( | |||
392 | int whichfork, | 392 | int whichfork, |
393 | int *count); | 393 | int *count); |
394 | 394 | ||
395 | /* | ||
396 | * Search the extent records for the entry containing block bno. | ||
397 | * If bno lies in a hole, point to the next entry. If bno lies | ||
398 | * past eof, *eofp will be set, and *prevp will contain the last | ||
399 | * entry (null if none). Else, *lastxp will be set to the index | ||
400 | * of the found entry; *gotp will contain the entry. | ||
401 | */ | ||
402 | xfs_bmbt_rec_host_t * | ||
403 | xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *, | ||
404 | xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *); | ||
405 | |||
406 | #endif /* __KERNEL__ */ | 395 | #endif /* __KERNEL__ */ |
407 | 396 | ||
408 | #endif /* __XFS_BMAP_H__ */ | 397 | #endif /* __XFS_BMAP_H__ */ |
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 5c1ade06578e..eb7b702d0690 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c | |||
@@ -202,16 +202,6 @@ xfs_bmbt_get_state( | |||
202 | ext_flag); | 202 | ext_flag); |
203 | } | 203 | } |
204 | 204 | ||
205 | /* Endian flipping versions of the bmbt extraction functions */ | ||
206 | void | ||
207 | xfs_bmbt_disk_get_all( | ||
208 | xfs_bmbt_rec_t *r, | ||
209 | xfs_bmbt_irec_t *s) | ||
210 | { | ||
211 | __xfs_bmbt_get_all(get_unaligned_be64(&r->l0), | ||
212 | get_unaligned_be64(&r->l1), s); | ||
213 | } | ||
214 | |||
215 | /* | 205 | /* |
216 | * Extract the blockcount field from an on disk bmap extent record. | 206 | * Extract the blockcount field from an on disk bmap extent record. |
217 | */ | 207 | */ |
@@ -816,6 +806,16 @@ xfs_bmbt_trace_key( | |||
816 | *l1 = 0; | 806 | *l1 = 0; |
817 | } | 807 | } |
818 | 808 | ||
809 | /* Endian flipping versions of the bmbt extraction functions */ | ||
810 | STATIC void | ||
811 | xfs_bmbt_disk_get_all( | ||
812 | xfs_bmbt_rec_t *r, | ||
813 | xfs_bmbt_irec_t *s) | ||
814 | { | ||
815 | __xfs_bmbt_get_all(get_unaligned_be64(&r->l0), | ||
816 | get_unaligned_be64(&r->l1), s); | ||
817 | } | ||
818 | |||
819 | STATIC void | 819 | STATIC void |
820 | xfs_bmbt_trace_record( | 820 | xfs_bmbt_trace_record( |
821 | struct xfs_btree_cur *cur, | 821 | struct xfs_btree_cur *cur, |
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 0e8df007615e..5549d495947f 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h | |||
@@ -220,7 +220,6 @@ extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); | |||
220 | extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); | 220 | extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); |
221 | extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r); | 221 | extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r); |
222 | 222 | ||
223 | extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); | ||
224 | extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); | 223 | extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); |
225 | extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); | 224 | extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); |
226 | 225 | ||
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index e9df99574829..52b5f14d0c32 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -120,8 +120,8 @@ xfs_btree_check_sblock( | |||
120 | XFS_RANDOM_BTREE_CHECK_SBLOCK))) { | 120 | XFS_RANDOM_BTREE_CHECK_SBLOCK))) { |
121 | if (bp) | 121 | if (bp) |
122 | xfs_buftrace("SBTREE ERROR", bp); | 122 | xfs_buftrace("SBTREE ERROR", bp); |
123 | XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW, | 123 | XFS_CORRUPTION_ERROR("xfs_btree_check_sblock", |
124 | cur->bc_mp); | 124 | XFS_ERRLEVEL_LOW, cur->bc_mp, block); |
125 | return XFS_ERROR(EFSCORRUPTED); | 125 | return XFS_ERROR(EFSCORRUPTED); |
126 | } | 126 | } |
127 | return 0; | 127 | return 0; |
@@ -646,46 +646,6 @@ xfs_btree_read_bufl( | |||
646 | } | 646 | } |
647 | 647 | ||
648 | /* | 648 | /* |
649 | * Get a buffer for the block, return it read in. | ||
650 | * Short-form addressing. | ||
651 | */ | ||
652 | int /* error */ | ||
653 | xfs_btree_read_bufs( | ||
654 | xfs_mount_t *mp, /* file system mount point */ | ||
655 | xfs_trans_t *tp, /* transaction pointer */ | ||
656 | xfs_agnumber_t agno, /* allocation group number */ | ||
657 | xfs_agblock_t agbno, /* allocation group block number */ | ||
658 | uint lock, /* lock flags for read_buf */ | ||
659 | xfs_buf_t **bpp, /* buffer for agno/agbno */ | ||
660 | int refval) /* ref count value for buffer */ | ||
661 | { | ||
662 | xfs_buf_t *bp; /* return value */ | ||
663 | xfs_daddr_t d; /* real disk block address */ | ||
664 | int error; | ||
665 | |||
666 | ASSERT(agno != NULLAGNUMBER); | ||
667 | ASSERT(agbno != NULLAGBLOCK); | ||
668 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); | ||
669 | if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, | ||
670 | mp->m_bsize, lock, &bp))) { | ||
671 | return error; | ||
672 | } | ||
673 | ASSERT(!bp || !XFS_BUF_GETERROR(bp)); | ||
674 | if (bp != NULL) { | ||
675 | switch (refval) { | ||
676 | case XFS_ALLOC_BTREE_REF: | ||
677 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); | ||
678 | break; | ||
679 | case XFS_INO_BTREE_REF: | ||
680 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval); | ||
681 | break; | ||
682 | } | ||
683 | } | ||
684 | *bpp = bp; | ||
685 | return 0; | ||
686 | } | ||
687 | |||
688 | /* | ||
689 | * Read-ahead the block, don't wait for it, don't return a buffer. | 649 | * Read-ahead the block, don't wait for it, don't return a buffer. |
690 | * Long-form addressing. | 650 | * Long-form addressing. |
691 | */ | 651 | */ |
@@ -2951,7 +2911,7 @@ error0: | |||
2951 | * inode we have to copy the single block it was pointing to into the | 2911 | * inode we have to copy the single block it was pointing to into the |
2952 | * inode. | 2912 | * inode. |
2953 | */ | 2913 | */ |
2954 | int | 2914 | STATIC int |
2955 | xfs_btree_kill_iroot( | 2915 | xfs_btree_kill_iroot( |
2956 | struct xfs_btree_cur *cur) | 2916 | struct xfs_btree_cur *cur) |
2957 | { | 2917 | { |
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 4f852b735b96..7fa07062bdda 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h | |||
@@ -379,20 +379,6 @@ xfs_btree_read_bufl( | |||
379 | int refval);/* ref count value for buffer */ | 379 | int refval);/* ref count value for buffer */ |
380 | 380 | ||
381 | /* | 381 | /* |
382 | * Get a buffer for the block, return it read in. | ||
383 | * Short-form addressing. | ||
384 | */ | ||
385 | int /* error */ | ||
386 | xfs_btree_read_bufs( | ||
387 | struct xfs_mount *mp, /* file system mount point */ | ||
388 | struct xfs_trans *tp, /* transaction pointer */ | ||
389 | xfs_agnumber_t agno, /* allocation group number */ | ||
390 | xfs_agblock_t agbno, /* allocation group block number */ | ||
391 | uint lock, /* lock flags for read_buf */ | ||
392 | struct xfs_buf **bpp, /* buffer for agno/agbno */ | ||
393 | int refval);/* ref count value for buffer */ | ||
394 | |||
395 | /* | ||
396 | * Read-ahead the block, don't wait for it, don't return a buffer. | 382 | * Read-ahead the block, don't wait for it, don't return a buffer. |
397 | * Long-form addressing. | 383 | * Long-form addressing. |
398 | */ | 384 | */ |
@@ -432,7 +418,6 @@ int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); | |||
432 | int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); | 418 | int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); |
433 | int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); | 419 | int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); |
434 | int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); | 420 | int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); |
435 | int xfs_btree_kill_iroot(struct xfs_btree_cur *); | ||
436 | int xfs_btree_insert(struct xfs_btree_cur *, int *); | 421 | int xfs_btree_insert(struct xfs_btree_cur *, int *); |
437 | int xfs_btree_delete(struct xfs_btree_cur *, int *); | 422 | int xfs_btree_delete(struct xfs_btree_cur *, int *); |
438 | int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); | 423 | int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); |
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 9ff6e57a5075..2847bbc1c534 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c | |||
@@ -2201,7 +2201,7 @@ kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */ | |||
2201 | xfs_da_state_t * | 2201 | xfs_da_state_t * |
2202 | xfs_da_state_alloc(void) | 2202 | xfs_da_state_alloc(void) |
2203 | { | 2203 | { |
2204 | return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP); | 2204 | return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS); |
2205 | } | 2205 | } |
2206 | 2206 | ||
2207 | /* | 2207 | /* |
@@ -2261,9 +2261,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) | |||
2261 | int off; | 2261 | int off; |
2262 | 2262 | ||
2263 | if (nbuf == 1) | 2263 | if (nbuf == 1) |
2264 | dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP); | 2264 | dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS); |
2265 | else | 2265 | else |
2266 | dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP); | 2266 | dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS); |
2267 | dabuf->dirty = 0; | 2267 | dabuf->dirty = 0; |
2268 | #ifdef XFS_DABUF_DEBUG | 2268 | #ifdef XFS_DABUF_DEBUG |
2269 | dabuf->ra = ra; | 2269 | dabuf->ra = ra; |
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index c657bec6d951..bb1d58eb3982 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c | |||
@@ -256,7 +256,7 @@ xfs_dir_cilookup_result( | |||
256 | !(args->op_flags & XFS_DA_OP_CILOOKUP)) | 256 | !(args->op_flags & XFS_DA_OP_CILOOKUP)) |
257 | return EEXIST; | 257 | return EEXIST; |
258 | 258 | ||
259 | args->value = kmem_alloc(len, KM_MAYFAIL); | 259 | args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL); |
260 | if (!args->value) | 260 | if (!args->value) |
261 | return ENOMEM; | 261 | return ENOMEM; |
262 | 262 | ||
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index cbd451bb4848..2d0b3e1da9e6 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -167,17 +167,25 @@ xfs_growfs_data_private( | |||
167 | new = nb - mp->m_sb.sb_dblocks; | 167 | new = nb - mp->m_sb.sb_dblocks; |
168 | oagcount = mp->m_sb.sb_agcount; | 168 | oagcount = mp->m_sb.sb_agcount; |
169 | if (nagcount > oagcount) { | 169 | if (nagcount > oagcount) { |
170 | void *new_perag, *old_perag; | ||
171 | |||
170 | xfs_filestream_flush(mp); | 172 | xfs_filestream_flush(mp); |
173 | |||
174 | new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount, | ||
175 | KM_MAYFAIL); | ||
176 | if (!new_perag) | ||
177 | return XFS_ERROR(ENOMEM); | ||
178 | |||
171 | down_write(&mp->m_peraglock); | 179 | down_write(&mp->m_peraglock); |
172 | mp->m_perag = kmem_realloc(mp->m_perag, | 180 | memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount); |
173 | sizeof(xfs_perag_t) * nagcount, | 181 | old_perag = mp->m_perag; |
174 | sizeof(xfs_perag_t) * oagcount, | 182 | mp->m_perag = new_perag; |
175 | KM_SLEEP); | 183 | |
176 | memset(&mp->m_perag[oagcount], 0, | ||
177 | (nagcount - oagcount) * sizeof(xfs_perag_t)); | ||
178 | mp->m_flags |= XFS_MOUNT_32BITINODES; | 184 | mp->m_flags |= XFS_MOUNT_32BITINODES; |
179 | nagimax = xfs_initialize_perag(mp, nagcount); | 185 | nagimax = xfs_initialize_perag(mp, nagcount); |
180 | up_write(&mp->m_peraglock); | 186 | up_write(&mp->m_peraglock); |
187 | |||
188 | kmem_free(old_perag); | ||
181 | } | 189 | } |
182 | tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); | 190 | tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); |
183 | tp->t_flags |= XFS_TRANS_RESERVE; | 191 | tp->t_flags |= XFS_TRANS_RESERVE; |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 3120a3a5e20f..ab64f3efb43b 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -57,75 +57,35 @@ xfs_ialloc_cluster_alignment( | |||
57 | } | 57 | } |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * Lookup the record equal to ino in the btree given by cur. | 60 | * Lookup a record by ino in the btree given by cur. |
61 | */ | ||
62 | STATIC int /* error */ | ||
63 | xfs_inobt_lookup_eq( | ||
64 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
65 | xfs_agino_t ino, /* starting inode of chunk */ | ||
66 | __int32_t fcnt, /* free inode count */ | ||
67 | xfs_inofree_t free, /* free inode mask */ | ||
68 | int *stat) /* success/failure */ | ||
69 | { | ||
70 | cur->bc_rec.i.ir_startino = ino; | ||
71 | cur->bc_rec.i.ir_freecount = fcnt; | ||
72 | cur->bc_rec.i.ir_free = free; | ||
73 | return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * Lookup the first record greater than or equal to ino | ||
78 | * in the btree given by cur. | ||
79 | */ | 61 | */ |
80 | int /* error */ | 62 | int /* error */ |
81 | xfs_inobt_lookup_ge( | 63 | xfs_inobt_lookup( |
82 | struct xfs_btree_cur *cur, /* btree cursor */ | 64 | struct xfs_btree_cur *cur, /* btree cursor */ |
83 | xfs_agino_t ino, /* starting inode of chunk */ | 65 | xfs_agino_t ino, /* starting inode of chunk */ |
84 | __int32_t fcnt, /* free inode count */ | 66 | xfs_lookup_t dir, /* <=, >=, == */ |
85 | xfs_inofree_t free, /* free inode mask */ | ||
86 | int *stat) /* success/failure */ | 67 | int *stat) /* success/failure */ |
87 | { | 68 | { |
88 | cur->bc_rec.i.ir_startino = ino; | 69 | cur->bc_rec.i.ir_startino = ino; |
89 | cur->bc_rec.i.ir_freecount = fcnt; | 70 | cur->bc_rec.i.ir_freecount = 0; |
90 | cur->bc_rec.i.ir_free = free; | 71 | cur->bc_rec.i.ir_free = 0; |
91 | return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); | 72 | return xfs_btree_lookup(cur, dir, stat); |
92 | } | 73 | } |
93 | 74 | ||
94 | /* | 75 | /* |
95 | * Lookup the first record less than or equal to ino | 76 | * Update the record referred to by cur to the value given. |
96 | * in the btree given by cur. | ||
97 | */ | ||
98 | int /* error */ | ||
99 | xfs_inobt_lookup_le( | ||
100 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
101 | xfs_agino_t ino, /* starting inode of chunk */ | ||
102 | __int32_t fcnt, /* free inode count */ | ||
103 | xfs_inofree_t free, /* free inode mask */ | ||
104 | int *stat) /* success/failure */ | ||
105 | { | ||
106 | cur->bc_rec.i.ir_startino = ino; | ||
107 | cur->bc_rec.i.ir_freecount = fcnt; | ||
108 | cur->bc_rec.i.ir_free = free; | ||
109 | return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Update the record referred to by cur to the value given | ||
114 | * by [ino, fcnt, free]. | ||
115 | * This either works (return 0) or gets an EFSCORRUPTED error. | 77 | * This either works (return 0) or gets an EFSCORRUPTED error. |
116 | */ | 78 | */ |
117 | STATIC int /* error */ | 79 | STATIC int /* error */ |
118 | xfs_inobt_update( | 80 | xfs_inobt_update( |
119 | struct xfs_btree_cur *cur, /* btree cursor */ | 81 | struct xfs_btree_cur *cur, /* btree cursor */ |
120 | xfs_agino_t ino, /* starting inode of chunk */ | 82 | xfs_inobt_rec_incore_t *irec) /* btree record */ |
121 | __int32_t fcnt, /* free inode count */ | ||
122 | xfs_inofree_t free) /* free inode mask */ | ||
123 | { | 83 | { |
124 | union xfs_btree_rec rec; | 84 | union xfs_btree_rec rec; |
125 | 85 | ||
126 | rec.inobt.ir_startino = cpu_to_be32(ino); | 86 | rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); |
127 | rec.inobt.ir_freecount = cpu_to_be32(fcnt); | 87 | rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); |
128 | rec.inobt.ir_free = cpu_to_be64(free); | 88 | rec.inobt.ir_free = cpu_to_be64(irec->ir_free); |
129 | return xfs_btree_update(cur, &rec); | 89 | return xfs_btree_update(cur, &rec); |
130 | } | 90 | } |
131 | 91 | ||
@@ -135,9 +95,7 @@ xfs_inobt_update( | |||
135 | int /* error */ | 95 | int /* error */ |
136 | xfs_inobt_get_rec( | 96 | xfs_inobt_get_rec( |
137 | struct xfs_btree_cur *cur, /* btree cursor */ | 97 | struct xfs_btree_cur *cur, /* btree cursor */ |
138 | xfs_agino_t *ino, /* output: starting inode of chunk */ | 98 | xfs_inobt_rec_incore_t *irec, /* btree record */ |
139 | __int32_t *fcnt, /* output: number of free inodes */ | ||
140 | xfs_inofree_t *free, /* output: free inode mask */ | ||
141 | int *stat) /* output: success/failure */ | 99 | int *stat) /* output: success/failure */ |
142 | { | 100 | { |
143 | union xfs_btree_rec *rec; | 101 | union xfs_btree_rec *rec; |
@@ -145,14 +103,136 @@ xfs_inobt_get_rec( | |||
145 | 103 | ||
146 | error = xfs_btree_get_rec(cur, &rec, stat); | 104 | error = xfs_btree_get_rec(cur, &rec, stat); |
147 | if (!error && *stat == 1) { | 105 | if (!error && *stat == 1) { |
148 | *ino = be32_to_cpu(rec->inobt.ir_startino); | 106 | irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); |
149 | *fcnt = be32_to_cpu(rec->inobt.ir_freecount); | 107 | irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount); |
150 | *free = be64_to_cpu(rec->inobt.ir_free); | 108 | irec->ir_free = be64_to_cpu(rec->inobt.ir_free); |
151 | } | 109 | } |
152 | return error; | 110 | return error; |
153 | } | 111 | } |
154 | 112 | ||
155 | /* | 113 | /* |
114 | * Verify that the number of free inodes in the AGI is correct. | ||
115 | */ | ||
116 | #ifdef DEBUG | ||
117 | STATIC int | ||
118 | xfs_check_agi_freecount( | ||
119 | struct xfs_btree_cur *cur, | ||
120 | struct xfs_agi *agi) | ||
121 | { | ||
122 | if (cur->bc_nlevels == 1) { | ||
123 | xfs_inobt_rec_incore_t rec; | ||
124 | int freecount = 0; | ||
125 | int error; | ||
126 | int i; | ||
127 | |||
128 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); | ||
129 | if (error) | ||
130 | return error; | ||
131 | |||
132 | do { | ||
133 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
134 | if (error) | ||
135 | return error; | ||
136 | |||
137 | if (i) { | ||
138 | freecount += rec.ir_freecount; | ||
139 | error = xfs_btree_increment(cur, 0, &i); | ||
140 | if (error) | ||
141 | return error; | ||
142 | } | ||
143 | } while (i == 1); | ||
144 | |||
145 | if (!XFS_FORCED_SHUTDOWN(cur->bc_mp)) | ||
146 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount)); | ||
147 | } | ||
148 | return 0; | ||
149 | } | ||
150 | #else | ||
151 | #define xfs_check_agi_freecount(cur, agi) 0 | ||
152 | #endif | ||
153 | |||
154 | /* | ||
155 | * Initialise a new set of inodes. | ||
156 | */ | ||
157 | STATIC void | ||
158 | xfs_ialloc_inode_init( | ||
159 | struct xfs_mount *mp, | ||
160 | struct xfs_trans *tp, | ||
161 | xfs_agnumber_t agno, | ||
162 | xfs_agblock_t agbno, | ||
163 | xfs_agblock_t length, | ||
164 | unsigned int gen) | ||
165 | { | ||
166 | struct xfs_buf *fbuf; | ||
167 | struct xfs_dinode *free; | ||
168 | int blks_per_cluster, nbufs, ninodes; | ||
169 | int version; | ||
170 | int i, j; | ||
171 | xfs_daddr_t d; | ||
172 | |||
173 | /* | ||
174 | * Loop over the new block(s), filling in the inodes. | ||
175 | * For small block sizes, manipulate the inodes in buffers | ||
176 | * which are multiples of the blocks size. | ||
177 | */ | ||
178 | if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { | ||
179 | blks_per_cluster = 1; | ||
180 | nbufs = length; | ||
181 | ninodes = mp->m_sb.sb_inopblock; | ||
182 | } else { | ||
183 | blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / | ||
184 | mp->m_sb.sb_blocksize; | ||
185 | nbufs = length / blks_per_cluster; | ||
186 | ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Figure out what version number to use in the inodes we create. | ||
191 | * If the superblock version has caught up to the one that supports | ||
192 | * the new inode format, then use the new inode version. Otherwise | ||
193 | * use the old version so that old kernels will continue to be | ||
194 | * able to use the file system. | ||
195 | */ | ||
196 | if (xfs_sb_version_hasnlink(&mp->m_sb)) | ||
197 | version = 2; | ||
198 | else | ||
199 | version = 1; | ||
200 | |||
201 | for (j = 0; j < nbufs; j++) { | ||
202 | /* | ||
203 | * Get the block. | ||
204 | */ | ||
205 | d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); | ||
206 | fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, | ||
207 | mp->m_bsize * blks_per_cluster, | ||
208 | XFS_BUF_LOCK); | ||
209 | ASSERT(fbuf); | ||
210 | ASSERT(!XFS_BUF_GETERROR(fbuf)); | ||
211 | |||
212 | /* | ||
213 | * Initialize all inodes in this buffer and then log them. | ||
214 | * | ||
215 | * XXX: It would be much better if we had just one transaction | ||
216 | * to log a whole cluster of inodes instead of all the | ||
217 | * individual transactions causing a lot of log traffic. | ||
218 | */ | ||
219 | xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); | ||
220 | for (i = 0; i < ninodes; i++) { | ||
221 | int ioffset = i << mp->m_sb.sb_inodelog; | ||
222 | uint isize = sizeof(struct xfs_dinode); | ||
223 | |||
224 | free = xfs_make_iptr(mp, fbuf, i); | ||
225 | free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); | ||
226 | free->di_version = version; | ||
227 | free->di_gen = cpu_to_be32(gen); | ||
228 | free->di_next_unlinked = cpu_to_be32(NULLAGINO); | ||
229 | xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); | ||
230 | } | ||
231 | xfs_trans_inode_alloc_buf(tp, fbuf); | ||
232 | } | ||
233 | } | ||
234 | |||
235 | /* | ||
156 | * Allocate new inodes in the allocation group specified by agbp. | 236 | * Allocate new inodes in the allocation group specified by agbp. |
157 | * Return 0 for success, else error code. | 237 | * Return 0 for success, else error code. |
158 | */ | 238 | */ |
@@ -164,24 +244,15 @@ xfs_ialloc_ag_alloc( | |||
164 | { | 244 | { |
165 | xfs_agi_t *agi; /* allocation group header */ | 245 | xfs_agi_t *agi; /* allocation group header */ |
166 | xfs_alloc_arg_t args; /* allocation argument structure */ | 246 | xfs_alloc_arg_t args; /* allocation argument structure */ |
167 | int blks_per_cluster; /* fs blocks per inode cluster */ | ||
168 | xfs_btree_cur_t *cur; /* inode btree cursor */ | 247 | xfs_btree_cur_t *cur; /* inode btree cursor */ |
169 | xfs_daddr_t d; /* disk addr of buffer */ | ||
170 | xfs_agnumber_t agno; | 248 | xfs_agnumber_t agno; |
171 | int error; | 249 | int error; |
172 | xfs_buf_t *fbuf; /* new free inodes' buffer */ | 250 | int i; |
173 | xfs_dinode_t *free; /* new free inode structure */ | ||
174 | int i; /* inode counter */ | ||
175 | int j; /* block counter */ | ||
176 | int nbufs; /* num bufs of new inodes */ | ||
177 | xfs_agino_t newino; /* new first inode's number */ | 251 | xfs_agino_t newino; /* new first inode's number */ |
178 | xfs_agino_t newlen; /* new number of inodes */ | 252 | xfs_agino_t newlen; /* new number of inodes */ |
179 | int ninodes; /* num inodes per buf */ | ||
180 | xfs_agino_t thisino; /* current inode number, for loop */ | 253 | xfs_agino_t thisino; /* current inode number, for loop */ |
181 | int version; /* inode version number to use */ | ||
182 | int isaligned = 0; /* inode allocation at stripe unit */ | 254 | int isaligned = 0; /* inode allocation at stripe unit */ |
183 | /* boundary */ | 255 | /* boundary */ |
184 | unsigned int gen; | ||
185 | 256 | ||
186 | args.tp = tp; | 257 | args.tp = tp; |
187 | args.mp = tp->t_mountp; | 258 | args.mp = tp->t_mountp; |
@@ -202,12 +273,12 @@ xfs_ialloc_ag_alloc( | |||
202 | */ | 273 | */ |
203 | agi = XFS_BUF_TO_AGI(agbp); | 274 | agi = XFS_BUF_TO_AGI(agbp); |
204 | newino = be32_to_cpu(agi->agi_newino); | 275 | newino = be32_to_cpu(agi->agi_newino); |
276 | agno = be32_to_cpu(agi->agi_seqno); | ||
205 | args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + | 277 | args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + |
206 | XFS_IALLOC_BLOCKS(args.mp); | 278 | XFS_IALLOC_BLOCKS(args.mp); |
207 | if (likely(newino != NULLAGINO && | 279 | if (likely(newino != NULLAGINO && |
208 | (args.agbno < be32_to_cpu(agi->agi_length)))) { | 280 | (args.agbno < be32_to_cpu(agi->agi_length)))) { |
209 | args.fsbno = XFS_AGB_TO_FSB(args.mp, | 281 | args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); |
210 | be32_to_cpu(agi->agi_seqno), args.agbno); | ||
211 | args.type = XFS_ALLOCTYPE_THIS_BNO; | 282 | args.type = XFS_ALLOCTYPE_THIS_BNO; |
212 | args.mod = args.total = args.wasdel = args.isfl = | 283 | args.mod = args.total = args.wasdel = args.isfl = |
213 | args.userdata = args.minalignslop = 0; | 284 | args.userdata = args.minalignslop = 0; |
@@ -258,8 +329,7 @@ xfs_ialloc_ag_alloc( | |||
258 | * For now, just allocate blocks up front. | 329 | * For now, just allocate blocks up front. |
259 | */ | 330 | */ |
260 | args.agbno = be32_to_cpu(agi->agi_root); | 331 | args.agbno = be32_to_cpu(agi->agi_root); |
261 | args.fsbno = XFS_AGB_TO_FSB(args.mp, | 332 | args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); |
262 | be32_to_cpu(agi->agi_seqno), args.agbno); | ||
263 | /* | 333 | /* |
264 | * Allocate a fixed-size extent of inodes. | 334 | * Allocate a fixed-size extent of inodes. |
265 | */ | 335 | */ |
@@ -282,8 +352,7 @@ xfs_ialloc_ag_alloc( | |||
282 | if (isaligned && args.fsbno == NULLFSBLOCK) { | 352 | if (isaligned && args.fsbno == NULLFSBLOCK) { |
283 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | 353 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
284 | args.agbno = be32_to_cpu(agi->agi_root); | 354 | args.agbno = be32_to_cpu(agi->agi_root); |
285 | args.fsbno = XFS_AGB_TO_FSB(args.mp, | 355 | args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); |
286 | be32_to_cpu(agi->agi_seqno), args.agbno); | ||
287 | args.alignment = xfs_ialloc_cluster_alignment(&args); | 356 | args.alignment = xfs_ialloc_cluster_alignment(&args); |
288 | if ((error = xfs_alloc_vextent(&args))) | 357 | if ((error = xfs_alloc_vextent(&args))) |
289 | return error; | 358 | return error; |
@@ -294,85 +363,30 @@ xfs_ialloc_ag_alloc( | |||
294 | return 0; | 363 | return 0; |
295 | } | 364 | } |
296 | ASSERT(args.len == args.minlen); | 365 | ASSERT(args.len == args.minlen); |
297 | /* | ||
298 | * Convert the results. | ||
299 | */ | ||
300 | newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); | ||
301 | /* | ||
302 | * Loop over the new block(s), filling in the inodes. | ||
303 | * For small block sizes, manipulate the inodes in buffers | ||
304 | * which are multiples of the blocks size. | ||
305 | */ | ||
306 | if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) { | ||
307 | blks_per_cluster = 1; | ||
308 | nbufs = (int)args.len; | ||
309 | ninodes = args.mp->m_sb.sb_inopblock; | ||
310 | } else { | ||
311 | blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) / | ||
312 | args.mp->m_sb.sb_blocksize; | ||
313 | nbufs = (int)args.len / blks_per_cluster; | ||
314 | ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock; | ||
315 | } | ||
316 | /* | ||
317 | * Figure out what version number to use in the inodes we create. | ||
318 | * If the superblock version has caught up to the one that supports | ||
319 | * the new inode format, then use the new inode version. Otherwise | ||
320 | * use the old version so that old kernels will continue to be | ||
321 | * able to use the file system. | ||
322 | */ | ||
323 | if (xfs_sb_version_hasnlink(&args.mp->m_sb)) | ||
324 | version = 2; | ||
325 | else | ||
326 | version = 1; | ||
327 | 366 | ||
328 | /* | 367 | /* |
368 | * Stamp and write the inode buffers. | ||
369 | * | ||
329 | * Seed the new inode cluster with a random generation number. This | 370 | * Seed the new inode cluster with a random generation number. This |
330 | * prevents short-term reuse of generation numbers if a chunk is | 371 | * prevents short-term reuse of generation numbers if a chunk is |
331 | * freed and then immediately reallocated. We use random numbers | 372 | * freed and then immediately reallocated. We use random numbers |
332 | * rather than a linear progression to prevent the next generation | 373 | * rather than a linear progression to prevent the next generation |
333 | * number from being easily guessable. | 374 | * number from being easily guessable. |
334 | */ | 375 | */ |
335 | gen = random32(); | 376 | xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, args.len, |
336 | for (j = 0; j < nbufs; j++) { | 377 | random32()); |
337 | /* | ||
338 | * Get the block. | ||
339 | */ | ||
340 | d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno), | ||
341 | args.agbno + (j * blks_per_cluster)); | ||
342 | fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d, | ||
343 | args.mp->m_bsize * blks_per_cluster, | ||
344 | XFS_BUF_LOCK); | ||
345 | ASSERT(fbuf); | ||
346 | ASSERT(!XFS_BUF_GETERROR(fbuf)); | ||
347 | 378 | ||
348 | /* | 379 | /* |
349 | * Initialize all inodes in this buffer and then log them. | 380 | * Convert the results. |
350 | * | 381 | */ |
351 | * XXX: It would be much better if we had just one transaction to | 382 | newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); |
352 | * log a whole cluster of inodes instead of all the individual | ||
353 | * transactions causing a lot of log traffic. | ||
354 | */ | ||
355 | xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); | ||
356 | for (i = 0; i < ninodes; i++) { | ||
357 | int ioffset = i << args.mp->m_sb.sb_inodelog; | ||
358 | uint isize = sizeof(struct xfs_dinode); | ||
359 | |||
360 | free = xfs_make_iptr(args.mp, fbuf, i); | ||
361 | free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); | ||
362 | free->di_version = version; | ||
363 | free->di_gen = cpu_to_be32(gen); | ||
364 | free->di_next_unlinked = cpu_to_be32(NULLAGINO); | ||
365 | xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); | ||
366 | } | ||
367 | xfs_trans_inode_alloc_buf(tp, fbuf); | ||
368 | } | ||
369 | be32_add_cpu(&agi->agi_count, newlen); | 383 | be32_add_cpu(&agi->agi_count, newlen); |
370 | be32_add_cpu(&agi->agi_freecount, newlen); | 384 | be32_add_cpu(&agi->agi_freecount, newlen); |
371 | agno = be32_to_cpu(agi->agi_seqno); | ||
372 | down_read(&args.mp->m_peraglock); | 385 | down_read(&args.mp->m_peraglock); |
373 | args.mp->m_perag[agno].pagi_freecount += newlen; | 386 | args.mp->m_perag[agno].pagi_freecount += newlen; |
374 | up_read(&args.mp->m_peraglock); | 387 | up_read(&args.mp->m_peraglock); |
375 | agi->agi_newino = cpu_to_be32(newino); | 388 | agi->agi_newino = cpu_to_be32(newino); |
389 | |||
376 | /* | 390 | /* |
377 | * Insert records describing the new inode chunk into the btree. | 391 | * Insert records describing the new inode chunk into the btree. |
378 | */ | 392 | */ |
@@ -380,13 +394,17 @@ xfs_ialloc_ag_alloc( | |||
380 | for (thisino = newino; | 394 | for (thisino = newino; |
381 | thisino < newino + newlen; | 395 | thisino < newino + newlen; |
382 | thisino += XFS_INODES_PER_CHUNK) { | 396 | thisino += XFS_INODES_PER_CHUNK) { |
383 | if ((error = xfs_inobt_lookup_eq(cur, thisino, | 397 | cur->bc_rec.i.ir_startino = thisino; |
384 | XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) { | 398 | cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; |
399 | cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; | ||
400 | error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); | ||
401 | if (error) { | ||
385 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | 402 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); |
386 | return error; | 403 | return error; |
387 | } | 404 | } |
388 | ASSERT(i == 0); | 405 | ASSERT(i == 0); |
389 | if ((error = xfs_btree_insert(cur, &i))) { | 406 | error = xfs_btree_insert(cur, &i); |
407 | if (error) { | ||
390 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | 408 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); |
391 | return error; | 409 | return error; |
392 | } | 410 | } |
@@ -539,6 +557,62 @@ nextag: | |||
539 | } | 557 | } |
540 | 558 | ||
541 | /* | 559 | /* |
560 | * Try to retrieve the next record to the left/right from the current one. | ||
561 | */ | ||
562 | STATIC int | ||
563 | xfs_ialloc_next_rec( | ||
564 | struct xfs_btree_cur *cur, | ||
565 | xfs_inobt_rec_incore_t *rec, | ||
566 | int *done, | ||
567 | int left) | ||
568 | { | ||
569 | int error; | ||
570 | int i; | ||
571 | |||
572 | if (left) | ||
573 | error = xfs_btree_decrement(cur, 0, &i); | ||
574 | else | ||
575 | error = xfs_btree_increment(cur, 0, &i); | ||
576 | |||
577 | if (error) | ||
578 | return error; | ||
579 | *done = !i; | ||
580 | if (i) { | ||
581 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
582 | if (error) | ||
583 | return error; | ||
584 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
585 | } | ||
586 | |||
587 | return 0; | ||
588 | } | ||
589 | |||
590 | STATIC int | ||
591 | xfs_ialloc_get_rec( | ||
592 | struct xfs_btree_cur *cur, | ||
593 | xfs_agino_t agino, | ||
594 | xfs_inobt_rec_incore_t *rec, | ||
595 | int *done, | ||
596 | int left) | ||
597 | { | ||
598 | int error; | ||
599 | int i; | ||
600 | |||
601 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i); | ||
602 | if (error) | ||
603 | return error; | ||
604 | *done = !i; | ||
605 | if (i) { | ||
606 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
607 | if (error) | ||
608 | return error; | ||
609 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
610 | } | ||
611 | |||
612 | return 0; | ||
613 | } | ||
614 | |||
615 | /* | ||
542 | * Visible inode allocation functions. | 616 | * Visible inode allocation functions. |
543 | */ | 617 | */ |
544 | 618 | ||
@@ -592,8 +666,8 @@ xfs_dialloc( | |||
592 | int j; /* result code */ | 666 | int j; /* result code */ |
593 | xfs_mount_t *mp; /* file system mount structure */ | 667 | xfs_mount_t *mp; /* file system mount structure */ |
594 | int offset; /* index of inode in chunk */ | 668 | int offset; /* index of inode in chunk */ |
595 | xfs_agino_t pagino; /* parent's a.g. relative inode # */ | 669 | xfs_agino_t pagino; /* parent's AG relative inode # */ |
596 | xfs_agnumber_t pagno; /* parent's allocation group number */ | 670 | xfs_agnumber_t pagno; /* parent's AG number */ |
597 | xfs_inobt_rec_incore_t rec; /* inode allocation record */ | 671 | xfs_inobt_rec_incore_t rec; /* inode allocation record */ |
598 | xfs_agnumber_t tagno; /* testing allocation group number */ | 672 | xfs_agnumber_t tagno; /* testing allocation group number */ |
599 | xfs_btree_cur_t *tcur; /* temp cursor */ | 673 | xfs_btree_cur_t *tcur; /* temp cursor */ |
@@ -716,6 +790,8 @@ nextag: | |||
716 | */ | 790 | */ |
717 | agno = tagno; | 791 | agno = tagno; |
718 | *IO_agbp = NULL; | 792 | *IO_agbp = NULL; |
793 | |||
794 | restart_pagno: | ||
719 | cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); | 795 | cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); |
720 | /* | 796 | /* |
721 | * If pagino is 0 (this is the root inode allocation) use newino. | 797 | * If pagino is 0 (this is the root inode allocation) use newino. |
@@ -723,220 +799,199 @@ nextag: | |||
723 | */ | 799 | */ |
724 | if (!pagino) | 800 | if (!pagino) |
725 | pagino = be32_to_cpu(agi->agi_newino); | 801 | pagino = be32_to_cpu(agi->agi_newino); |
726 | #ifdef DEBUG | ||
727 | if (cur->bc_nlevels == 1) { | ||
728 | int freecount = 0; | ||
729 | 802 | ||
730 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | 803 | error = xfs_check_agi_freecount(cur, agi); |
731 | goto error0; | 804 | if (error) |
732 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 805 | goto error0; |
733 | do { | ||
734 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, | ||
735 | &rec.ir_freecount, &rec.ir_free, &i))) | ||
736 | goto error0; | ||
737 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
738 | freecount += rec.ir_freecount; | ||
739 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
740 | goto error0; | ||
741 | } while (i == 1); | ||
742 | 806 | ||
743 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || | ||
744 | XFS_FORCED_SHUTDOWN(mp)); | ||
745 | } | ||
746 | #endif | ||
747 | /* | 807 | /* |
748 | * If in the same a.g. as the parent, try to get near the parent. | 808 | * If in the same AG as the parent, try to get near the parent. |
749 | */ | 809 | */ |
750 | if (pagno == agno) { | 810 | if (pagno == agno) { |
751 | if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))) | 811 | xfs_perag_t *pag = &mp->m_perag[agno]; |
812 | int doneleft; /* done, to the left */ | ||
813 | int doneright; /* done, to the right */ | ||
814 | int searchdistance = 10; | ||
815 | |||
816 | error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); | ||
817 | if (error) | ||
818 | goto error0; | ||
819 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
820 | |||
821 | error = xfs_inobt_get_rec(cur, &rec, &j); | ||
822 | if (error) | ||
752 | goto error0; | 823 | goto error0; |
753 | if (i != 0 && | 824 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
754 | (error = xfs_inobt_get_rec(cur, &rec.ir_startino, | 825 | |
755 | &rec.ir_freecount, &rec.ir_free, &j)) == 0 && | 826 | if (rec.ir_freecount > 0) { |
756 | j == 1 && | ||
757 | rec.ir_freecount > 0) { | ||
758 | /* | 827 | /* |
759 | * Found a free inode in the same chunk | 828 | * Found a free inode in the same chunk |
760 | * as parent, done. | 829 | * as the parent, done. |
761 | */ | 830 | */ |
831 | goto alloc_inode; | ||
762 | } | 832 | } |
833 | |||
834 | |||
835 | /* | ||
836 | * In the same AG as parent, but parent's chunk is full. | ||
837 | */ | ||
838 | |||
839 | /* duplicate the cursor, search left & right simultaneously */ | ||
840 | error = xfs_btree_dup_cursor(cur, &tcur); | ||
841 | if (error) | ||
842 | goto error0; | ||
843 | |||
763 | /* | 844 | /* |
764 | * In the same a.g. as parent, but parent's chunk is full. | 845 | * Skip to last blocks looked up if same parent inode. |
765 | */ | 846 | */ |
766 | else { | 847 | if (pagino != NULLAGINO && |
767 | int doneleft; /* done, to the left */ | 848 | pag->pagl_pagino == pagino && |
768 | int doneright; /* done, to the right */ | 849 | pag->pagl_leftrec != NULLAGINO && |
850 | pag->pagl_rightrec != NULLAGINO) { | ||
851 | error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, | ||
852 | &trec, &doneleft, 1); | ||
853 | if (error) | ||
854 | goto error1; | ||
769 | 855 | ||
856 | error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, | ||
857 | &rec, &doneright, 0); | ||
770 | if (error) | 858 | if (error) |
771 | goto error0; | ||
772 | ASSERT(i == 1); | ||
773 | ASSERT(j == 1); | ||
774 | /* | ||
775 | * Duplicate the cursor, search left & right | ||
776 | * simultaneously. | ||
777 | */ | ||
778 | if ((error = xfs_btree_dup_cursor(cur, &tcur))) | ||
779 | goto error0; | ||
780 | /* | ||
781 | * Search left with tcur, back up 1 record. | ||
782 | */ | ||
783 | if ((error = xfs_btree_decrement(tcur, 0, &i))) | ||
784 | goto error1; | 859 | goto error1; |
785 | doneleft = !i; | 860 | } else { |
786 | if (!doneleft) { | 861 | /* search left with tcur, back up 1 record */ |
787 | if ((error = xfs_inobt_get_rec(tcur, | 862 | error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); |
788 | &trec.ir_startino, | 863 | if (error) |
789 | &trec.ir_freecount, | ||
790 | &trec.ir_free, &i))) | ||
791 | goto error1; | ||
792 | XFS_WANT_CORRUPTED_GOTO(i == 1, error1); | ||
793 | } | ||
794 | /* | ||
795 | * Search right with cur, go forward 1 record. | ||
796 | */ | ||
797 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
798 | goto error1; | 864 | goto error1; |
799 | doneright = !i; | ||
800 | if (!doneright) { | ||
801 | if ((error = xfs_inobt_get_rec(cur, | ||
802 | &rec.ir_startino, | ||
803 | &rec.ir_freecount, | ||
804 | &rec.ir_free, &i))) | ||
805 | goto error1; | ||
806 | XFS_WANT_CORRUPTED_GOTO(i == 1, error1); | ||
807 | } | ||
808 | /* | ||
809 | * Loop until we find the closest inode chunk | ||
810 | * with a free one. | ||
811 | */ | ||
812 | while (!doneleft || !doneright) { | ||
813 | int useleft; /* using left inode | ||
814 | chunk this time */ | ||
815 | 865 | ||
866 | /* search right with cur, go forward 1 record. */ | ||
867 | error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); | ||
868 | if (error) | ||
869 | goto error1; | ||
870 | } | ||
871 | |||
872 | /* | ||
873 | * Loop until we find an inode chunk with a free inode. | ||
874 | */ | ||
875 | while (!doneleft || !doneright) { | ||
876 | int useleft; /* using left inode chunk this time */ | ||
877 | |||
878 | if (!--searchdistance) { | ||
816 | /* | 879 | /* |
817 | * Figure out which block is closer, | 880 | * Not in range - save last search |
818 | * if both are valid. | 881 | * location and allocate a new inode |
819 | */ | ||
820 | if (!doneleft && !doneright) | ||
821 | useleft = | ||
822 | pagino - | ||
823 | (trec.ir_startino + | ||
824 | XFS_INODES_PER_CHUNK - 1) < | ||
825 | rec.ir_startino - pagino; | ||
826 | else | ||
827 | useleft = !doneleft; | ||
828 | /* | ||
829 | * If checking the left, does it have | ||
830 | * free inodes? | ||
831 | */ | ||
832 | if (useleft && trec.ir_freecount) { | ||
833 | /* | ||
834 | * Yes, set it up as the chunk to use. | ||
835 | */ | ||
836 | rec = trec; | ||
837 | xfs_btree_del_cursor(cur, | ||
838 | XFS_BTREE_NOERROR); | ||
839 | cur = tcur; | ||
840 | break; | ||
841 | } | ||
842 | /* | ||
843 | * If checking the right, does it have | ||
844 | * free inodes? | ||
845 | */ | ||
846 | if (!useleft && rec.ir_freecount) { | ||
847 | /* | ||
848 | * Yes, it's already set up. | ||
849 | */ | ||
850 | xfs_btree_del_cursor(tcur, | ||
851 | XFS_BTREE_NOERROR); | ||
852 | break; | ||
853 | } | ||
854 | /* | ||
855 | * If used the left, get another one | ||
856 | * further left. | ||
857 | */ | ||
858 | if (useleft) { | ||
859 | if ((error = xfs_btree_decrement(tcur, 0, | ||
860 | &i))) | ||
861 | goto error1; | ||
862 | doneleft = !i; | ||
863 | if (!doneleft) { | ||
864 | if ((error = xfs_inobt_get_rec( | ||
865 | tcur, | ||
866 | &trec.ir_startino, | ||
867 | &trec.ir_freecount, | ||
868 | &trec.ir_free, &i))) | ||
869 | goto error1; | ||
870 | XFS_WANT_CORRUPTED_GOTO(i == 1, | ||
871 | error1); | ||
872 | } | ||
873 | } | ||
874 | /* | ||
875 | * If used the right, get another one | ||
876 | * further right. | ||
877 | */ | 882 | */ |
878 | else { | 883 | pag->pagl_leftrec = trec.ir_startino; |
879 | if ((error = xfs_btree_increment(cur, 0, | 884 | pag->pagl_rightrec = rec.ir_startino; |
880 | &i))) | 885 | pag->pagl_pagino = pagino; |
881 | goto error1; | 886 | goto newino; |
882 | doneright = !i; | 887 | } |
883 | if (!doneright) { | 888 | |
884 | if ((error = xfs_inobt_get_rec( | 889 | /* figure out the closer block if both are valid. */ |
885 | cur, | 890 | if (!doneleft && !doneright) { |
886 | &rec.ir_startino, | 891 | useleft = pagino - |
887 | &rec.ir_freecount, | 892 | (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) < |
888 | &rec.ir_free, &i))) | 893 | rec.ir_startino - pagino; |
889 | goto error1; | 894 | } else { |
890 | XFS_WANT_CORRUPTED_GOTO(i == 1, | 895 | useleft = !doneleft; |
891 | error1); | ||
892 | } | ||
893 | } | ||
894 | } | 896 | } |
895 | ASSERT(!doneleft || !doneright); | 897 | |
898 | /* free inodes to the left? */ | ||
899 | if (useleft && trec.ir_freecount) { | ||
900 | rec = trec; | ||
901 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
902 | cur = tcur; | ||
903 | |||
904 | pag->pagl_leftrec = trec.ir_startino; | ||
905 | pag->pagl_rightrec = rec.ir_startino; | ||
906 | pag->pagl_pagino = pagino; | ||
907 | goto alloc_inode; | ||
908 | } | ||
909 | |||
910 | /* free inodes to the right? */ | ||
911 | if (!useleft && rec.ir_freecount) { | ||
912 | xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); | ||
913 | |||
914 | pag->pagl_leftrec = trec.ir_startino; | ||
915 | pag->pagl_rightrec = rec.ir_startino; | ||
916 | pag->pagl_pagino = pagino; | ||
917 | goto alloc_inode; | ||
918 | } | ||
919 | |||
920 | /* get next record to check */ | ||
921 | if (useleft) { | ||
922 | error = xfs_ialloc_next_rec(tcur, &trec, | ||
923 | &doneleft, 1); | ||
924 | } else { | ||
925 | error = xfs_ialloc_next_rec(cur, &rec, | ||
926 | &doneright, 0); | ||
927 | } | ||
928 | if (error) | ||
929 | goto error1; | ||
896 | } | 930 | } |
931 | |||
932 | /* | ||
933 | * We've reached the end of the btree. because | ||
934 | * we are only searching a small chunk of the | ||
935 | * btree each search, there is obviously free | ||
936 | * inodes closer to the parent inode than we | ||
937 | * are now. restart the search again. | ||
938 | */ | ||
939 | pag->pagl_pagino = NULLAGINO; | ||
940 | pag->pagl_leftrec = NULLAGINO; | ||
941 | pag->pagl_rightrec = NULLAGINO; | ||
942 | xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); | ||
943 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
944 | goto restart_pagno; | ||
897 | } | 945 | } |
946 | |||
898 | /* | 947 | /* |
899 | * In a different a.g. from the parent. | 948 | * In a different AG from the parent. |
900 | * See if the most recently allocated block has any free. | 949 | * See if the most recently allocated block has any free. |
901 | */ | 950 | */ |
902 | else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { | 951 | newino: |
903 | if ((error = xfs_inobt_lookup_eq(cur, | 952 | if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { |
904 | be32_to_cpu(agi->agi_newino), 0, 0, &i))) | 953 | error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), |
954 | XFS_LOOKUP_EQ, &i); | ||
955 | if (error) | ||
905 | goto error0; | 956 | goto error0; |
906 | if (i == 1 && | 957 | |
907 | (error = xfs_inobt_get_rec(cur, &rec.ir_startino, | 958 | if (i == 1) { |
908 | &rec.ir_freecount, &rec.ir_free, &j)) == 0 && | 959 | error = xfs_inobt_get_rec(cur, &rec, &j); |
909 | j == 1 && | ||
910 | rec.ir_freecount > 0) { | ||
911 | /* | ||
912 | * The last chunk allocated in the group still has | ||
913 | * a free inode. | ||
914 | */ | ||
915 | } | ||
916 | /* | ||
917 | * None left in the last group, search the whole a.g. | ||
918 | */ | ||
919 | else { | ||
920 | if (error) | 960 | if (error) |
921 | goto error0; | 961 | goto error0; |
922 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | 962 | |
923 | goto error0; | 963 | if (j == 1 && rec.ir_freecount > 0) { |
924 | ASSERT(i == 1); | 964 | /* |
925 | for (;;) { | 965 | * The last chunk allocated in the group |
926 | if ((error = xfs_inobt_get_rec(cur, | 966 | * still has a free inode. |
927 | &rec.ir_startino, | 967 | */ |
928 | &rec.ir_freecount, &rec.ir_free, | 968 | goto alloc_inode; |
929 | &i))) | ||
930 | goto error0; | ||
931 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
932 | if (rec.ir_freecount > 0) | ||
933 | break; | ||
934 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
935 | goto error0; | ||
936 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
937 | } | 969 | } |
938 | } | 970 | } |
939 | } | 971 | } |
972 | |||
973 | /* | ||
974 | * None left in the last group, search the whole AG | ||
975 | */ | ||
976 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); | ||
977 | if (error) | ||
978 | goto error0; | ||
979 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
980 | |||
981 | for (;;) { | ||
982 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
983 | if (error) | ||
984 | goto error0; | ||
985 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
986 | if (rec.ir_freecount > 0) | ||
987 | break; | ||
988 | error = xfs_btree_increment(cur, 0, &i); | ||
989 | if (error) | ||
990 | goto error0; | ||
991 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
992 | } | ||
993 | |||
994 | alloc_inode: | ||
940 | offset = xfs_ialloc_find_free(&rec.ir_free); | 995 | offset = xfs_ialloc_find_free(&rec.ir_free); |
941 | ASSERT(offset >= 0); | 996 | ASSERT(offset >= 0); |
942 | ASSERT(offset < XFS_INODES_PER_CHUNK); | 997 | ASSERT(offset < XFS_INODES_PER_CHUNK); |
@@ -945,33 +1000,19 @@ nextag: | |||
945 | ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); | 1000 | ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); |
946 | rec.ir_free &= ~XFS_INOBT_MASK(offset); | 1001 | rec.ir_free &= ~XFS_INOBT_MASK(offset); |
947 | rec.ir_freecount--; | 1002 | rec.ir_freecount--; |
948 | if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, | 1003 | error = xfs_inobt_update(cur, &rec); |
949 | rec.ir_free))) | 1004 | if (error) |
950 | goto error0; | 1005 | goto error0; |
951 | be32_add_cpu(&agi->agi_freecount, -1); | 1006 | be32_add_cpu(&agi->agi_freecount, -1); |
952 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); | 1007 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); |
953 | down_read(&mp->m_peraglock); | 1008 | down_read(&mp->m_peraglock); |
954 | mp->m_perag[tagno].pagi_freecount--; | 1009 | mp->m_perag[tagno].pagi_freecount--; |
955 | up_read(&mp->m_peraglock); | 1010 | up_read(&mp->m_peraglock); |
956 | #ifdef DEBUG | ||
957 | if (cur->bc_nlevels == 1) { | ||
958 | int freecount = 0; | ||
959 | 1011 | ||
960 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | 1012 | error = xfs_check_agi_freecount(cur, agi); |
961 | goto error0; | 1013 | if (error) |
962 | do { | 1014 | goto error0; |
963 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, | 1015 | |
964 | &rec.ir_freecount, &rec.ir_free, &i))) | ||
965 | goto error0; | ||
966 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
967 | freecount += rec.ir_freecount; | ||
968 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
969 | goto error0; | ||
970 | } while (i == 1); | ||
971 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || | ||
972 | XFS_FORCED_SHUTDOWN(mp)); | ||
973 | } | ||
974 | #endif | ||
975 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 1016 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
976 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); | 1017 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); |
977 | *inop = ino; | 1018 | *inop = ino; |
@@ -1062,38 +1103,23 @@ xfs_difree( | |||
1062 | * Initialize the cursor. | 1103 | * Initialize the cursor. |
1063 | */ | 1104 | */ |
1064 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); | 1105 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); |
1065 | #ifdef DEBUG | ||
1066 | if (cur->bc_nlevels == 1) { | ||
1067 | int freecount = 0; | ||
1068 | 1106 | ||
1069 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | 1107 | error = xfs_check_agi_freecount(cur, agi); |
1070 | goto error0; | 1108 | if (error) |
1071 | do { | 1109 | goto error0; |
1072 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, | 1110 | |
1073 | &rec.ir_freecount, &rec.ir_free, &i))) | ||
1074 | goto error0; | ||
1075 | if (i) { | ||
1076 | freecount += rec.ir_freecount; | ||
1077 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
1078 | goto error0; | ||
1079 | } | ||
1080 | } while (i == 1); | ||
1081 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || | ||
1082 | XFS_FORCED_SHUTDOWN(mp)); | ||
1083 | } | ||
1084 | #endif | ||
1085 | /* | 1111 | /* |
1086 | * Look for the entry describing this inode. | 1112 | * Look for the entry describing this inode. |
1087 | */ | 1113 | */ |
1088 | if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { | 1114 | if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { |
1089 | cmn_err(CE_WARN, | 1115 | cmn_err(CE_WARN, |
1090 | "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.", | 1116 | "xfs_difree: xfs_inobt_lookup returned() an error %d on %s. Returning error.", |
1091 | error, mp->m_fsname); | 1117 | error, mp->m_fsname); |
1092 | goto error0; | 1118 | goto error0; |
1093 | } | 1119 | } |
1094 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1120 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
1095 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, | 1121 | error = xfs_inobt_get_rec(cur, &rec, &i); |
1096 | &rec.ir_free, &i))) { | 1122 | if (error) { |
1097 | cmn_err(CE_WARN, | 1123 | cmn_err(CE_WARN, |
1098 | "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", | 1124 | "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", |
1099 | error, mp->m_fsname); | 1125 | error, mp->m_fsname); |
@@ -1148,12 +1174,14 @@ xfs_difree( | |||
1148 | } else { | 1174 | } else { |
1149 | *delete = 0; | 1175 | *delete = 0; |
1150 | 1176 | ||
1151 | if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) { | 1177 | error = xfs_inobt_update(cur, &rec); |
1178 | if (error) { | ||
1152 | cmn_err(CE_WARN, | 1179 | cmn_err(CE_WARN, |
1153 | "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.", | 1180 | "xfs_difree: xfs_inobt_update returned an error %d on %s.", |
1154 | error, mp->m_fsname); | 1181 | error, mp->m_fsname); |
1155 | goto error0; | 1182 | goto error0; |
1156 | } | 1183 | } |
1184 | |||
1157 | /* | 1185 | /* |
1158 | * Change the inode free counts and log the ag/sb changes. | 1186 | * Change the inode free counts and log the ag/sb changes. |
1159 | */ | 1187 | */ |
@@ -1165,28 +1193,10 @@ xfs_difree( | |||
1165 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); | 1193 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); |
1166 | } | 1194 | } |
1167 | 1195 | ||
1168 | #ifdef DEBUG | 1196 | error = xfs_check_agi_freecount(cur, agi); |
1169 | if (cur->bc_nlevels == 1) { | 1197 | if (error) |
1170 | int freecount = 0; | 1198 | goto error0; |
1171 | 1199 | ||
1172 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | ||
1173 | goto error0; | ||
1174 | do { | ||
1175 | if ((error = xfs_inobt_get_rec(cur, | ||
1176 | &rec.ir_startino, | ||
1177 | &rec.ir_freecount, | ||
1178 | &rec.ir_free, &i))) | ||
1179 | goto error0; | ||
1180 | if (i) { | ||
1181 | freecount += rec.ir_freecount; | ||
1182 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
1183 | goto error0; | ||
1184 | } | ||
1185 | } while (i == 1); | ||
1186 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || | ||
1187 | XFS_FORCED_SHUTDOWN(mp)); | ||
1188 | } | ||
1189 | #endif | ||
1190 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 1200 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
1191 | return 0; | 1201 | return 0; |
1192 | 1202 | ||
@@ -1297,9 +1307,7 @@ xfs_imap( | |||
1297 | chunk_agbno = agbno - offset_agbno; | 1307 | chunk_agbno = agbno - offset_agbno; |
1298 | } else { | 1308 | } else { |
1299 | xfs_btree_cur_t *cur; /* inode btree cursor */ | 1309 | xfs_btree_cur_t *cur; /* inode btree cursor */ |
1300 | xfs_agino_t chunk_agino; /* first agino in inode chunk */ | 1310 | xfs_inobt_rec_incore_t chunk_rec; |
1301 | __int32_t chunk_cnt; /* count of free inodes in chunk */ | ||
1302 | xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ | ||
1303 | xfs_buf_t *agbp; /* agi buffer */ | 1311 | xfs_buf_t *agbp; /* agi buffer */ |
1304 | int i; /* temp state */ | 1312 | int i; /* temp state */ |
1305 | 1313 | ||
@@ -1315,15 +1323,14 @@ xfs_imap( | |||
1315 | } | 1323 | } |
1316 | 1324 | ||
1317 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); | 1325 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); |
1318 | error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i); | 1326 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); |
1319 | if (error) { | 1327 | if (error) { |
1320 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | 1328 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " |
1321 | "xfs_inobt_lookup_le() failed"); | 1329 | "xfs_inobt_lookup() failed"); |
1322 | goto error0; | 1330 | goto error0; |
1323 | } | 1331 | } |
1324 | 1332 | ||
1325 | error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, | 1333 | error = xfs_inobt_get_rec(cur, &chunk_rec, &i); |
1326 | &chunk_free, &i); | ||
1327 | if (error) { | 1334 | if (error) { |
1328 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | 1335 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " |
1329 | "xfs_inobt_get_rec() failed"); | 1336 | "xfs_inobt_get_rec() failed"); |
@@ -1341,7 +1348,7 @@ xfs_imap( | |||
1341 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 1348 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
1342 | if (error) | 1349 | if (error) |
1343 | return error; | 1350 | return error; |
1344 | chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); | 1351 | chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino); |
1345 | offset_agbno = agbno - chunk_agbno; | 1352 | offset_agbno = agbno - chunk_agbno; |
1346 | } | 1353 | } |
1347 | 1354 | ||
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index aeee8278f92c..bb5385475e1f 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h | |||
@@ -150,23 +150,15 @@ xfs_ialloc_pagi_init( | |||
150 | xfs_agnumber_t agno); /* allocation group number */ | 150 | xfs_agnumber_t agno); /* allocation group number */ |
151 | 151 | ||
152 | /* | 152 | /* |
153 | * Lookup the first record greater than or equal to ino | 153 | * Lookup a record by ino in the btree given by cur. |
154 | * in the btree given by cur. | ||
155 | */ | 154 | */ |
156 | int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino, | 155 | int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, |
157 | __int32_t fcnt, xfs_inofree_t free, int *stat); | 156 | xfs_lookup_t dir, int *stat); |
158 | |||
159 | /* | ||
160 | * Lookup the first record less than or equal to ino | ||
161 | * in the btree given by cur. | ||
162 | */ | ||
163 | int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, | ||
164 | __int32_t fcnt, xfs_inofree_t free, int *stat); | ||
165 | 157 | ||
166 | /* | 158 | /* |
167 | * Get the data from the pointed-to record. | 159 | * Get the data from the pointed-to record. |
168 | */ | 160 | */ |
169 | extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, | 161 | extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, |
170 | __int32_t *fcnt, xfs_inofree_t *free, int *stat); | 162 | xfs_inobt_rec_incore_t *rec, int *stat); |
171 | 163 | ||
172 | #endif /* __XFS_IALLOC_H__ */ | 164 | #endif /* __XFS_IALLOC_H__ */ |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 5fcec6f020a7..80e526489be5 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -64,6 +64,10 @@ xfs_inode_alloc( | |||
64 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); | 64 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); |
65 | if (!ip) | 65 | if (!ip) |
66 | return NULL; | 66 | return NULL; |
67 | if (inode_init_always(mp->m_super, VFS_I(ip))) { | ||
68 | kmem_zone_free(xfs_inode_zone, ip); | ||
69 | return NULL; | ||
70 | } | ||
67 | 71 | ||
68 | ASSERT(atomic_read(&ip->i_iocount) == 0); | 72 | ASSERT(atomic_read(&ip->i_iocount) == 0); |
69 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 73 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
@@ -78,7 +82,6 @@ xfs_inode_alloc( | |||
78 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); | 82 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); |
79 | ip->i_flags = 0; | 83 | ip->i_flags = 0; |
80 | ip->i_update_core = 0; | 84 | ip->i_update_core = 0; |
81 | ip->i_update_size = 0; | ||
82 | ip->i_delayed_blks = 0; | 85 | ip->i_delayed_blks = 0; |
83 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); | 86 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); |
84 | ip->i_size = 0; | 87 | ip->i_size = 0; |
@@ -105,17 +108,6 @@ xfs_inode_alloc( | |||
105 | #ifdef XFS_DIR2_TRACE | 108 | #ifdef XFS_DIR2_TRACE |
106 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); | 109 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); |
107 | #endif | 110 | #endif |
108 | /* | ||
109 | * Now initialise the VFS inode. We do this after the xfs_inode | ||
110 | * initialisation as internal failures will result in ->destroy_inode | ||
111 | * being called and that will pass down through the reclaim path and | ||
112 | * free the XFS inode. This path requires the XFS inode to already be | ||
113 | * initialised. Hence if this call fails, the xfs_inode has already | ||
114 | * been freed and we should not reference it at all in the error | ||
115 | * handling. | ||
116 | */ | ||
117 | if (!inode_init_always(mp->m_super, VFS_I(ip))) | ||
118 | return NULL; | ||
119 | 111 | ||
120 | /* prevent anyone from using this yet */ | 112 | /* prevent anyone from using this yet */ |
121 | VFS_I(ip)->i_state = I_NEW|I_LOCK; | 113 | VFS_I(ip)->i_state = I_NEW|I_LOCK; |
@@ -123,6 +115,71 @@ xfs_inode_alloc( | |||
123 | return ip; | 115 | return ip; |
124 | } | 116 | } |
125 | 117 | ||
118 | STATIC void | ||
119 | xfs_inode_free( | ||
120 | struct xfs_inode *ip) | ||
121 | { | ||
122 | switch (ip->i_d.di_mode & S_IFMT) { | ||
123 | case S_IFREG: | ||
124 | case S_IFDIR: | ||
125 | case S_IFLNK: | ||
126 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
127 | break; | ||
128 | } | ||
129 | |||
130 | if (ip->i_afp) | ||
131 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
132 | |||
133 | #ifdef XFS_INODE_TRACE | ||
134 | ktrace_free(ip->i_trace); | ||
135 | #endif | ||
136 | #ifdef XFS_BMAP_TRACE | ||
137 | ktrace_free(ip->i_xtrace); | ||
138 | #endif | ||
139 | #ifdef XFS_BTREE_TRACE | ||
140 | ktrace_free(ip->i_btrace); | ||
141 | #endif | ||
142 | #ifdef XFS_RW_TRACE | ||
143 | ktrace_free(ip->i_rwtrace); | ||
144 | #endif | ||
145 | #ifdef XFS_ILOCK_TRACE | ||
146 | ktrace_free(ip->i_lock_trace); | ||
147 | #endif | ||
148 | #ifdef XFS_DIR2_TRACE | ||
149 | ktrace_free(ip->i_dir_trace); | ||
150 | #endif | ||
151 | |||
152 | if (ip->i_itemp) { | ||
153 | /* | ||
154 | * Only if we are shutting down the fs will we see an | ||
155 | * inode still in the AIL. If it is there, we should remove | ||
156 | * it to prevent a use-after-free from occurring. | ||
157 | */ | ||
158 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; | ||
159 | struct xfs_ail *ailp = lip->li_ailp; | ||
160 | |||
161 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || | ||
162 | XFS_FORCED_SHUTDOWN(ip->i_mount)); | ||
163 | if (lip->li_flags & XFS_LI_IN_AIL) { | ||
164 | spin_lock(&ailp->xa_lock); | ||
165 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
166 | xfs_trans_ail_delete(ailp, lip); | ||
167 | else | ||
168 | spin_unlock(&ailp->xa_lock); | ||
169 | } | ||
170 | xfs_inode_item_destroy(ip); | ||
171 | ip->i_itemp = NULL; | ||
172 | } | ||
173 | |||
174 | /* asserts to verify all state is correct here */ | ||
175 | ASSERT(atomic_read(&ip->i_iocount) == 0); | ||
176 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
177 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
178 | ASSERT(completion_done(&ip->i_flush)); | ||
179 | |||
180 | kmem_zone_free(xfs_inode_zone, ip); | ||
181 | } | ||
182 | |||
126 | /* | 183 | /* |
127 | * Check the validity of the inode we just found it the cache | 184 | * Check the validity of the inode we just found it the cache |
128 | */ | 185 | */ |
@@ -133,80 +190,82 @@ xfs_iget_cache_hit( | |||
133 | int flags, | 190 | int flags, |
134 | int lock_flags) __releases(pag->pag_ici_lock) | 191 | int lock_flags) __releases(pag->pag_ici_lock) |
135 | { | 192 | { |
193 | struct inode *inode = VFS_I(ip); | ||
136 | struct xfs_mount *mp = ip->i_mount; | 194 | struct xfs_mount *mp = ip->i_mount; |
137 | int error = EAGAIN; | 195 | int error; |
196 | |||
197 | spin_lock(&ip->i_flags_lock); | ||
138 | 198 | ||
139 | /* | 199 | /* |
140 | * If INEW is set this inode is being set up | 200 | * If we are racing with another cache hit that is currently |
141 | * If IRECLAIM is set this inode is being torn down | 201 | * instantiating this inode or currently recycling it out of |
142 | * Pause and try again. | 202 | * reclaimabe state, wait for the initialisation to complete |
203 | * before continuing. | ||
204 | * | ||
205 | * XXX(hch): eventually we should do something equivalent to | ||
206 | * wait_on_inode to wait for these flags to be cleared | ||
207 | * instead of polling for it. | ||
143 | */ | 208 | */ |
144 | if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { | 209 | if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { |
145 | XFS_STATS_INC(xs_ig_frecycle); | 210 | XFS_STATS_INC(xs_ig_frecycle); |
211 | error = EAGAIN; | ||
146 | goto out_error; | 212 | goto out_error; |
147 | } | 213 | } |
148 | 214 | ||
149 | /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ | 215 | /* |
150 | if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { | 216 | * If lookup is racing with unlink return an error immediately. |
151 | 217 | */ | |
152 | /* | 218 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { |
153 | * If lookup is racing with unlink, then we should return an | 219 | error = ENOENT; |
154 | * error immediately so we don't remove it from the reclaim | 220 | goto out_error; |
155 | * list and potentially leak the inode. | 221 | } |
156 | */ | ||
157 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | ||
158 | error = ENOENT; | ||
159 | goto out_error; | ||
160 | } | ||
161 | 222 | ||
223 | /* | ||
224 | * If IRECLAIMABLE is set, we've torn down the VFS inode already. | ||
225 | * Need to carefully get it back into useable state. | ||
226 | */ | ||
227 | if (ip->i_flags & XFS_IRECLAIMABLE) { | ||
162 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); | 228 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); |
163 | 229 | ||
164 | /* | 230 | /* |
165 | * We need to re-initialise the VFS inode as it has been | 231 | * We need to set XFS_INEW atomically with clearing the |
166 | * 'freed' by the VFS. Do this here so we can deal with | 232 | * reclaimable tag so that we do have an indicator of the |
167 | * errors cleanly, then tag it so it can be set up correctly | 233 | * inode still being initialized. |
168 | * later. | ||
169 | */ | 234 | */ |
170 | if (!inode_init_always(mp->m_super, VFS_I(ip))) { | 235 | ip->i_flags |= XFS_INEW; |
171 | error = ENOMEM; | 236 | ip->i_flags &= ~XFS_IRECLAIMABLE; |
172 | goto out_error; | 237 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
173 | } | ||
174 | 238 | ||
175 | /* | 239 | spin_unlock(&ip->i_flags_lock); |
176 | * We must set the XFS_INEW flag before clearing the | 240 | read_unlock(&pag->pag_ici_lock); |
177 | * XFS_IRECLAIMABLE flag so that if a racing lookup does | ||
178 | * not find the XFS_IRECLAIMABLE above but has the igrab() | ||
179 | * below succeed we can safely check XFS_INEW to detect | ||
180 | * that this inode is still being initialised. | ||
181 | */ | ||
182 | xfs_iflags_set(ip, XFS_INEW); | ||
183 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); | ||
184 | 241 | ||
185 | /* clear the radix tree reclaim flag as well. */ | 242 | error = -inode_init_always(mp->m_super, inode); |
186 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 243 | if (error) { |
187 | } else if (!igrab(VFS_I(ip))) { | 244 | /* |
245 | * Re-initializing the inode failed, and we are in deep | ||
246 | * trouble. Try to re-add it to the reclaim list. | ||
247 | */ | ||
248 | read_lock(&pag->pag_ici_lock); | ||
249 | spin_lock(&ip->i_flags_lock); | ||
250 | |||
251 | ip->i_flags &= ~XFS_INEW; | ||
252 | ip->i_flags |= XFS_IRECLAIMABLE; | ||
253 | __xfs_inode_set_reclaim_tag(pag, ip); | ||
254 | goto out_error; | ||
255 | } | ||
256 | inode->i_state = I_LOCK|I_NEW; | ||
257 | } else { | ||
188 | /* If the VFS inode is being torn down, pause and try again. */ | 258 | /* If the VFS inode is being torn down, pause and try again. */ |
189 | XFS_STATS_INC(xs_ig_frecycle); | 259 | if (!igrab(inode)) { |
190 | goto out_error; | 260 | error = EAGAIN; |
191 | } else if (xfs_iflags_test(ip, XFS_INEW)) { | 261 | goto out_error; |
192 | /* | 262 | } |
193 | * We are racing with another cache hit that is | ||
194 | * currently recycling this inode out of the XFS_IRECLAIMABLE | ||
195 | * state. Wait for the initialisation to complete before | ||
196 | * continuing. | ||
197 | */ | ||
198 | wait_on_inode(VFS_I(ip)); | ||
199 | } | ||
200 | 263 | ||
201 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | 264 | /* We've got a live one. */ |
202 | error = ENOENT; | 265 | spin_unlock(&ip->i_flags_lock); |
203 | iput(VFS_I(ip)); | 266 | read_unlock(&pag->pag_ici_lock); |
204 | goto out_error; | ||
205 | } | 267 | } |
206 | 268 | ||
207 | /* We've got a live one. */ | ||
208 | read_unlock(&pag->pag_ici_lock); | ||
209 | |||
210 | if (lock_flags != 0) | 269 | if (lock_flags != 0) |
211 | xfs_ilock(ip, lock_flags); | 270 | xfs_ilock(ip, lock_flags); |
212 | 271 | ||
@@ -216,6 +275,7 @@ xfs_iget_cache_hit( | |||
216 | return 0; | 275 | return 0; |
217 | 276 | ||
218 | out_error: | 277 | out_error: |
278 | spin_unlock(&ip->i_flags_lock); | ||
219 | read_unlock(&pag->pag_ici_lock); | 279 | read_unlock(&pag->pag_ici_lock); |
220 | return error; | 280 | return error; |
221 | } | 281 | } |
@@ -299,7 +359,8 @@ out_preload_end: | |||
299 | if (lock_flags) | 359 | if (lock_flags) |
300 | xfs_iunlock(ip, lock_flags); | 360 | xfs_iunlock(ip, lock_flags); |
301 | out_destroy: | 361 | out_destroy: |
302 | xfs_destroy_inode(ip); | 362 | __destroy_inode(VFS_I(ip)); |
363 | xfs_inode_free(ip); | ||
303 | return error; | 364 | return error; |
304 | } | 365 | } |
305 | 366 | ||
@@ -394,32 +455,6 @@ out_error_or_again: | |||
394 | return error; | 455 | return error; |
395 | } | 456 | } |
396 | 457 | ||
397 | |||
398 | /* | ||
399 | * Look for the inode corresponding to the given ino in the hash table. | ||
400 | * If it is there and its i_transp pointer matches tp, return it. | ||
401 | * Otherwise, return NULL. | ||
402 | */ | ||
403 | xfs_inode_t * | ||
404 | xfs_inode_incore(xfs_mount_t *mp, | ||
405 | xfs_ino_t ino, | ||
406 | xfs_trans_t *tp) | ||
407 | { | ||
408 | xfs_inode_t *ip; | ||
409 | xfs_perag_t *pag; | ||
410 | |||
411 | pag = xfs_get_perag(mp, ino); | ||
412 | read_lock(&pag->pag_ici_lock); | ||
413 | ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino)); | ||
414 | read_unlock(&pag->pag_ici_lock); | ||
415 | xfs_put_perag(mp, pag); | ||
416 | |||
417 | /* the returned inode must match the transaction */ | ||
418 | if (ip && (ip->i_transp != tp)) | ||
419 | return NULL; | ||
420 | return ip; | ||
421 | } | ||
422 | |||
423 | /* | 458 | /* |
424 | * Decrement reference count of an inode structure and unlock it. | 459 | * Decrement reference count of an inode structure and unlock it. |
425 | * | 460 | * |
@@ -504,62 +539,7 @@ xfs_ireclaim( | |||
504 | xfs_qm_dqdetach(ip); | 539 | xfs_qm_dqdetach(ip); |
505 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 540 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
506 | 541 | ||
507 | switch (ip->i_d.di_mode & S_IFMT) { | 542 | xfs_inode_free(ip); |
508 | case S_IFREG: | ||
509 | case S_IFDIR: | ||
510 | case S_IFLNK: | ||
511 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
512 | break; | ||
513 | } | ||
514 | |||
515 | if (ip->i_afp) | ||
516 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
517 | |||
518 | #ifdef XFS_INODE_TRACE | ||
519 | ktrace_free(ip->i_trace); | ||
520 | #endif | ||
521 | #ifdef XFS_BMAP_TRACE | ||
522 | ktrace_free(ip->i_xtrace); | ||
523 | #endif | ||
524 | #ifdef XFS_BTREE_TRACE | ||
525 | ktrace_free(ip->i_btrace); | ||
526 | #endif | ||
527 | #ifdef XFS_RW_TRACE | ||
528 | ktrace_free(ip->i_rwtrace); | ||
529 | #endif | ||
530 | #ifdef XFS_ILOCK_TRACE | ||
531 | ktrace_free(ip->i_lock_trace); | ||
532 | #endif | ||
533 | #ifdef XFS_DIR2_TRACE | ||
534 | ktrace_free(ip->i_dir_trace); | ||
535 | #endif | ||
536 | if (ip->i_itemp) { | ||
537 | /* | ||
538 | * Only if we are shutting down the fs will we see an | ||
539 | * inode still in the AIL. If it is there, we should remove | ||
540 | * it to prevent a use-after-free from occurring. | ||
541 | */ | ||
542 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; | ||
543 | struct xfs_ail *ailp = lip->li_ailp; | ||
544 | |||
545 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || | ||
546 | XFS_FORCED_SHUTDOWN(ip->i_mount)); | ||
547 | if (lip->li_flags & XFS_LI_IN_AIL) { | ||
548 | spin_lock(&ailp->xa_lock); | ||
549 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
550 | xfs_trans_ail_delete(ailp, lip); | ||
551 | else | ||
552 | spin_unlock(&ailp->xa_lock); | ||
553 | } | ||
554 | xfs_inode_item_destroy(ip); | ||
555 | ip->i_itemp = NULL; | ||
556 | } | ||
557 | /* asserts to verify all state is correct here */ | ||
558 | ASSERT(atomic_read(&ip->i_iocount) == 0); | ||
559 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
560 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
561 | ASSERT(completion_done(&ip->i_flush)); | ||
562 | kmem_zone_free(xfs_inode_zone, ip); | ||
563 | } | 543 | } |
564 | 544 | ||
565 | /* | 545 | /* |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1f22d65fed0a..c1dc7ef5a1d8 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -343,6 +343,16 @@ xfs_iformat( | |||
343 | return XFS_ERROR(EFSCORRUPTED); | 343 | return XFS_ERROR(EFSCORRUPTED); |
344 | } | 344 | } |
345 | 345 | ||
346 | if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && | ||
347 | !ip->i_mount->m_rtdev_targp)) { | ||
348 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | ||
349 | "corrupt dinode %Lu, has realtime flag set.", | ||
350 | ip->i_ino); | ||
351 | XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", | ||
352 | XFS_ERRLEVEL_LOW, ip->i_mount, dip); | ||
353 | return XFS_ERROR(EFSCORRUPTED); | ||
354 | } | ||
355 | |||
346 | switch (ip->i_d.di_mode & S_IFMT) { | 356 | switch (ip->i_d.di_mode & S_IFMT) { |
347 | case S_IFIFO: | 357 | case S_IFIFO: |
348 | case S_IFCHR: | 358 | case S_IFCHR: |
@@ -641,7 +651,7 @@ xfs_iformat_btree( | |||
641 | return 0; | 651 | return 0; |
642 | } | 652 | } |
643 | 653 | ||
644 | void | 654 | STATIC void |
645 | xfs_dinode_from_disk( | 655 | xfs_dinode_from_disk( |
646 | xfs_icdinode_t *to, | 656 | xfs_icdinode_t *to, |
647 | xfs_dinode_t *from) | 657 | xfs_dinode_t *from) |
@@ -1237,7 +1247,7 @@ xfs_isize_check( | |||
1237 | * In that case the pages will still be in memory, but the inode size | 1247 | * In that case the pages will still be in memory, but the inode size |
1238 | * will never have been updated. | 1248 | * will never have been updated. |
1239 | */ | 1249 | */ |
1240 | xfs_fsize_t | 1250 | STATIC xfs_fsize_t |
1241 | xfs_file_last_byte( | 1251 | xfs_file_last_byte( |
1242 | xfs_inode_t *ip) | 1252 | xfs_inode_t *ip) |
1243 | { | 1253 | { |
@@ -3827,7 +3837,7 @@ xfs_iext_inline_to_direct( | |||
3827 | /* | 3837 | /* |
3828 | * Resize an extent indirection array to new_size bytes. | 3838 | * Resize an extent indirection array to new_size bytes. |
3829 | */ | 3839 | */ |
3830 | void | 3840 | STATIC void |
3831 | xfs_iext_realloc_indirect( | 3841 | xfs_iext_realloc_indirect( |
3832 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3842 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3833 | int new_size) /* new indirection array size */ | 3843 | int new_size) /* new indirection array size */ |
@@ -3852,7 +3862,7 @@ xfs_iext_realloc_indirect( | |||
3852 | /* | 3862 | /* |
3853 | * Switch from indirection array to linear (direct) extent allocations. | 3863 | * Switch from indirection array to linear (direct) extent allocations. |
3854 | */ | 3864 | */ |
3855 | void | 3865 | STATIC void |
3856 | xfs_iext_indirect_to_direct( | 3866 | xfs_iext_indirect_to_direct( |
3857 | xfs_ifork_t *ifp) /* inode fork pointer */ | 3867 | xfs_ifork_t *ifp) /* inode fork pointer */ |
3858 | { | 3868 | { |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 1804f866a71d..0b38b9a869ec 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -261,7 +261,6 @@ typedef struct xfs_inode { | |||
261 | /* Miscellaneous state. */ | 261 | /* Miscellaneous state. */ |
262 | unsigned short i_flags; /* see defined flags below */ | 262 | unsigned short i_flags; /* see defined flags below */ |
263 | unsigned char i_update_core; /* timestamps/size is dirty */ | 263 | unsigned char i_update_core; /* timestamps/size is dirty */ |
264 | unsigned char i_update_size; /* di_size field is dirty */ | ||
265 | unsigned int i_delayed_blks; /* count of delay alloc blks */ | 264 | unsigned int i_delayed_blks; /* count of delay alloc blks */ |
266 | 265 | ||
267 | xfs_icdinode_t i_d; /* most of ondisk inode */ | 266 | xfs_icdinode_t i_d; /* most of ondisk inode */ |
@@ -310,23 +309,6 @@ static inline struct inode *VFS_I(struct xfs_inode *ip) | |||
310 | } | 309 | } |
311 | 310 | ||
312 | /* | 311 | /* |
313 | * Get rid of a partially initialized inode. | ||
314 | * | ||
315 | * We have to go through destroy_inode to make sure allocations | ||
316 | * from init_inode_always like the security data are undone. | ||
317 | * | ||
318 | * We mark the inode bad so that it takes the short cut in | ||
319 | * the reclaim path instead of going through the flush path | ||
320 | * which doesn't make sense for an inode that has never seen the | ||
321 | * light of day. | ||
322 | */ | ||
323 | static inline void xfs_destroy_inode(struct xfs_inode *ip) | ||
324 | { | ||
325 | make_bad_inode(VFS_I(ip)); | ||
326 | return destroy_inode(VFS_I(ip)); | ||
327 | } | ||
328 | |||
329 | /* | ||
330 | * i_flags helper functions | 312 | * i_flags helper functions |
331 | */ | 313 | */ |
332 | static inline void | 314 | static inline void |
@@ -485,8 +467,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
485 | /* | 467 | /* |
486 | * xfs_iget.c prototypes. | 468 | * xfs_iget.c prototypes. |
487 | */ | 469 | */ |
488 | xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, | ||
489 | struct xfs_trans *); | ||
490 | int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, | 470 | int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, |
491 | uint, uint, xfs_inode_t **, xfs_daddr_t); | 471 | uint, uint, xfs_inode_t **, xfs_daddr_t); |
492 | void xfs_iput(xfs_inode_t *, uint); | 472 | void xfs_iput(xfs_inode_t *, uint); |
@@ -521,7 +501,6 @@ void xfs_ipin(xfs_inode_t *); | |||
521 | void xfs_iunpin(xfs_inode_t *); | 501 | void xfs_iunpin(xfs_inode_t *); |
522 | int xfs_iflush(xfs_inode_t *, uint); | 502 | int xfs_iflush(xfs_inode_t *, uint); |
523 | void xfs_ichgtime(xfs_inode_t *, int); | 503 | void xfs_ichgtime(xfs_inode_t *, int); |
524 | xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); | ||
525 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 504 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |
526 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); | 505 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); |
527 | 506 | ||
@@ -589,8 +568,6 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *, | |||
589 | struct xfs_buf **, uint); | 568 | struct xfs_buf **, uint); |
590 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, | 569 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, |
591 | struct xfs_inode *, xfs_daddr_t, uint); | 570 | struct xfs_inode *, xfs_daddr_t, uint); |
592 | void xfs_dinode_from_disk(struct xfs_icdinode *, | ||
593 | struct xfs_dinode *); | ||
594 | void xfs_dinode_to_disk(struct xfs_dinode *, | 571 | void xfs_dinode_to_disk(struct xfs_dinode *, |
595 | struct xfs_icdinode *); | 572 | struct xfs_icdinode *); |
596 | void xfs_idestroy_fork(struct xfs_inode *, int); | 573 | void xfs_idestroy_fork(struct xfs_inode *, int); |
@@ -609,8 +586,6 @@ void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int); | |||
609 | void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); | 586 | void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); |
610 | void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); | 587 | void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); |
611 | void xfs_iext_realloc_direct(xfs_ifork_t *, int); | 588 | void xfs_iext_realloc_direct(xfs_ifork_t *, int); |
612 | void xfs_iext_realloc_indirect(xfs_ifork_t *, int); | ||
613 | void xfs_iext_indirect_to_direct(xfs_ifork_t *); | ||
614 | void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t); | 589 | void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t); |
615 | void xfs_iext_inline_to_direct(xfs_ifork_t *, int); | 590 | void xfs_iext_inline_to_direct(xfs_ifork_t *, int); |
616 | void xfs_iext_destroy(xfs_ifork_t *); | 591 | void xfs_iext_destroy(xfs_ifork_t *); |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 977c4aec587e..47d5b663c37e 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -263,14 +263,6 @@ xfs_inode_item_format( | |||
263 | } | 263 | } |
264 | 264 | ||
265 | /* | 265 | /* |
266 | * We don't have to worry about re-ordering here because | ||
267 | * the update_size field is protected by the inode lock | ||
268 | * and we have that held in exclusive mode. | ||
269 | */ | ||
270 | if (ip->i_update_size) | ||
271 | ip->i_update_size = 0; | ||
272 | |||
273 | /* | ||
274 | * Make sure to get the latest atime from the Linux inode. | 266 | * Make sure to get the latest atime from the Linux inode. |
275 | */ | 267 | */ |
276 | xfs_synchronize_atime(ip); | 268 | xfs_synchronize_atime(ip); |
@@ -712,8 +704,6 @@ xfs_inode_item_unlock( | |||
712 | * Clear out the fields of the inode log item particular | 704 | * Clear out the fields of the inode log item particular |
713 | * to the current transaction. | 705 | * to the current transaction. |
714 | */ | 706 | */ |
715 | iip->ili_ilock_recur = 0; | ||
716 | iip->ili_iolock_recur = 0; | ||
717 | iip->ili_flags = 0; | 707 | iip->ili_flags = 0; |
718 | 708 | ||
719 | /* | 709 | /* |
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index a52ac125f055..65bae4c9b8bf 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h | |||
@@ -137,8 +137,6 @@ typedef struct xfs_inode_log_item { | |||
137 | struct xfs_inode *ili_inode; /* inode ptr */ | 137 | struct xfs_inode *ili_inode; /* inode ptr */ |
138 | xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ | 138 | xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ |
139 | xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ | 139 | xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ |
140 | unsigned short ili_ilock_recur; /* lock recursion count */ | ||
141 | unsigned short ili_iolock_recur; /* lock recursion count */ | ||
142 | unsigned short ili_flags; /* misc flags */ | 140 | unsigned short ili_flags; /* misc flags */ |
143 | unsigned short ili_logged; /* flushed logged data */ | 141 | unsigned short ili_logged; /* flushed logged data */ |
144 | unsigned int ili_last_fields; /* fields when flushed */ | 142 | unsigned int ili_last_fields; /* fields when flushed */ |
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h index 7a28191cb0de..b8e4ee4e89a4 100644 --- a/fs/xfs/xfs_inum.h +++ b/fs/xfs/xfs_inum.h | |||
@@ -72,7 +72,6 @@ struct xfs_mount; | |||
72 | 72 | ||
73 | #if XFS_BIG_INUMS | 73 | #if XFS_BIG_INUMS |
74 | #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) | 74 | #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) |
75 | #define XFS_INO64_OFFSET ((xfs_ino_t)(1ULL << 32)) | ||
76 | #else | 75 | #else |
77 | #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL)) | 76 | #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL)) |
78 | #endif | 77 | #endif |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index aeb2d2221c7d..b68f9107e26c 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -39,7 +39,7 @@ | |||
39 | #include "xfs_error.h" | 39 | #include "xfs_error.h" |
40 | #include "xfs_btree.h" | 40 | #include "xfs_btree.h" |
41 | 41 | ||
42 | int | 42 | STATIC int |
43 | xfs_internal_inum( | 43 | xfs_internal_inum( |
44 | xfs_mount_t *mp, | 44 | xfs_mount_t *mp, |
45 | xfs_ino_t ino) | 45 | xfs_ino_t ino) |
@@ -353,9 +353,6 @@ xfs_bulkstat( | |||
353 | int end_of_ag; /* set if we've seen the ag end */ | 353 | int end_of_ag; /* set if we've seen the ag end */ |
354 | int error; /* error code */ | 354 | int error; /* error code */ |
355 | int fmterror;/* bulkstat formatter result */ | 355 | int fmterror;/* bulkstat formatter result */ |
356 | __int32_t gcnt; /* current btree rec's count */ | ||
357 | xfs_inofree_t gfree; /* current btree rec's free mask */ | ||
358 | xfs_agino_t gino; /* current btree rec's start inode */ | ||
359 | int i; /* loop index */ | 356 | int i; /* loop index */ |
360 | int icount; /* count of inodes good in irbuf */ | 357 | int icount; /* count of inodes good in irbuf */ |
361 | size_t irbsize; /* size of irec buffer in bytes */ | 358 | size_t irbsize; /* size of irec buffer in bytes */ |
@@ -442,40 +439,43 @@ xfs_bulkstat( | |||
442 | * we need to get the remainder of the chunk we're in. | 439 | * we need to get the remainder of the chunk we're in. |
443 | */ | 440 | */ |
444 | if (agino > 0) { | 441 | if (agino > 0) { |
442 | xfs_inobt_rec_incore_t r; | ||
443 | |||
445 | /* | 444 | /* |
446 | * Lookup the inode chunk that this inode lives in. | 445 | * Lookup the inode chunk that this inode lives in. |
447 | */ | 446 | */ |
448 | error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp); | 447 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, |
448 | &tmp); | ||
449 | if (!error && /* no I/O error */ | 449 | if (!error && /* no I/O error */ |
450 | tmp && /* lookup succeeded */ | 450 | tmp && /* lookup succeeded */ |
451 | /* got the record, should always work */ | 451 | /* got the record, should always work */ |
452 | !(error = xfs_inobt_get_rec(cur, &gino, &gcnt, | 452 | !(error = xfs_inobt_get_rec(cur, &r, &i)) && |
453 | &gfree, &i)) && | ||
454 | i == 1 && | 453 | i == 1 && |
455 | /* this is the right chunk */ | 454 | /* this is the right chunk */ |
456 | agino < gino + XFS_INODES_PER_CHUNK && | 455 | agino < r.ir_startino + XFS_INODES_PER_CHUNK && |
457 | /* lastino was not last in chunk */ | 456 | /* lastino was not last in chunk */ |
458 | (chunkidx = agino - gino + 1) < | 457 | (chunkidx = agino - r.ir_startino + 1) < |
459 | XFS_INODES_PER_CHUNK && | 458 | XFS_INODES_PER_CHUNK && |
460 | /* there are some left allocated */ | 459 | /* there are some left allocated */ |
461 | xfs_inobt_maskn(chunkidx, | 460 | xfs_inobt_maskn(chunkidx, |
462 | XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) { | 461 | XFS_INODES_PER_CHUNK - chunkidx) & |
462 | ~r.ir_free) { | ||
463 | /* | 463 | /* |
464 | * Grab the chunk record. Mark all the | 464 | * Grab the chunk record. Mark all the |
465 | * uninteresting inodes (because they're | 465 | * uninteresting inodes (because they're |
466 | * before our start point) free. | 466 | * before our start point) free. |
467 | */ | 467 | */ |
468 | for (i = 0; i < chunkidx; i++) { | 468 | for (i = 0; i < chunkidx; i++) { |
469 | if (XFS_INOBT_MASK(i) & ~gfree) | 469 | if (XFS_INOBT_MASK(i) & ~r.ir_free) |
470 | gcnt++; | 470 | r.ir_freecount++; |
471 | } | 471 | } |
472 | gfree |= xfs_inobt_maskn(0, chunkidx); | 472 | r.ir_free |= xfs_inobt_maskn(0, chunkidx); |
473 | irbp->ir_startino = gino; | 473 | irbp->ir_startino = r.ir_startino; |
474 | irbp->ir_freecount = gcnt; | 474 | irbp->ir_freecount = r.ir_freecount; |
475 | irbp->ir_free = gfree; | 475 | irbp->ir_free = r.ir_free; |
476 | irbp++; | 476 | irbp++; |
477 | agino = gino + XFS_INODES_PER_CHUNK; | 477 | agino = r.ir_startino + XFS_INODES_PER_CHUNK; |
478 | icount = XFS_INODES_PER_CHUNK - gcnt; | 478 | icount = XFS_INODES_PER_CHUNK - r.ir_freecount; |
479 | } else { | 479 | } else { |
480 | /* | 480 | /* |
481 | * If any of those tests failed, bump the | 481 | * If any of those tests failed, bump the |
@@ -493,7 +493,7 @@ xfs_bulkstat( | |||
493 | /* | 493 | /* |
494 | * Start of ag. Lookup the first inode chunk. | 494 | * Start of ag. Lookup the first inode chunk. |
495 | */ | 495 | */ |
496 | error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp); | 496 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp); |
497 | icount = 0; | 497 | icount = 0; |
498 | } | 498 | } |
499 | /* | 499 | /* |
@@ -501,6 +501,8 @@ xfs_bulkstat( | |||
501 | * until we run out of inodes or space in the buffer. | 501 | * until we run out of inodes or space in the buffer. |
502 | */ | 502 | */ |
503 | while (irbp < irbufend && icount < ubcount) { | 503 | while (irbp < irbufend && icount < ubcount) { |
504 | xfs_inobt_rec_incore_t r; | ||
505 | |||
504 | /* | 506 | /* |
505 | * Loop as long as we're unable to read the | 507 | * Loop as long as we're unable to read the |
506 | * inode btree. | 508 | * inode btree. |
@@ -510,51 +512,55 @@ xfs_bulkstat( | |||
510 | if (XFS_AGINO_TO_AGBNO(mp, agino) >= | 512 | if (XFS_AGINO_TO_AGBNO(mp, agino) >= |
511 | be32_to_cpu(agi->agi_length)) | 513 | be32_to_cpu(agi->agi_length)) |
512 | break; | 514 | break; |
513 | error = xfs_inobt_lookup_ge(cur, agino, 0, 0, | 515 | error = xfs_inobt_lookup(cur, agino, |
514 | &tmp); | 516 | XFS_LOOKUP_GE, &tmp); |
515 | cond_resched(); | 517 | cond_resched(); |
516 | } | 518 | } |
517 | /* | 519 | /* |
518 | * If ran off the end of the ag either with an error, | 520 | * If ran off the end of the ag either with an error, |
519 | * or the normal way, set end and stop collecting. | 521 | * or the normal way, set end and stop collecting. |
520 | */ | 522 | */ |
521 | if (error || | 523 | if (error) { |
522 | (error = xfs_inobt_get_rec(cur, &gino, &gcnt, | ||
523 | &gfree, &i)) || | ||
524 | i == 0) { | ||
525 | end_of_ag = 1; | 524 | end_of_ag = 1; |
526 | break; | 525 | break; |
527 | } | 526 | } |
527 | |||
528 | error = xfs_inobt_get_rec(cur, &r, &i); | ||
529 | if (error || i == 0) { | ||
530 | end_of_ag = 1; | ||
531 | break; | ||
532 | } | ||
533 | |||
528 | /* | 534 | /* |
529 | * If this chunk has any allocated inodes, save it. | 535 | * If this chunk has any allocated inodes, save it. |
530 | * Also start read-ahead now for this chunk. | 536 | * Also start read-ahead now for this chunk. |
531 | */ | 537 | */ |
532 | if (gcnt < XFS_INODES_PER_CHUNK) { | 538 | if (r.ir_freecount < XFS_INODES_PER_CHUNK) { |
533 | /* | 539 | /* |
534 | * Loop over all clusters in the next chunk. | 540 | * Loop over all clusters in the next chunk. |
535 | * Do a readahead if there are any allocated | 541 | * Do a readahead if there are any allocated |
536 | * inodes in that cluster. | 542 | * inodes in that cluster. |
537 | */ | 543 | */ |
538 | for (agbno = XFS_AGINO_TO_AGBNO(mp, gino), | 544 | agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); |
539 | chunkidx = 0; | 545 | for (chunkidx = 0; |
540 | chunkidx < XFS_INODES_PER_CHUNK; | 546 | chunkidx < XFS_INODES_PER_CHUNK; |
541 | chunkidx += nicluster, | 547 | chunkidx += nicluster, |
542 | agbno += nbcluster) { | 548 | agbno += nbcluster) { |
543 | if (xfs_inobt_maskn(chunkidx, | 549 | if (xfs_inobt_maskn(chunkidx, nicluster) |
544 | nicluster) & ~gfree) | 550 | & ~r.ir_free) |
545 | xfs_btree_reada_bufs(mp, agno, | 551 | xfs_btree_reada_bufs(mp, agno, |
546 | agbno, nbcluster); | 552 | agbno, nbcluster); |
547 | } | 553 | } |
548 | irbp->ir_startino = gino; | 554 | irbp->ir_startino = r.ir_startino; |
549 | irbp->ir_freecount = gcnt; | 555 | irbp->ir_freecount = r.ir_freecount; |
550 | irbp->ir_free = gfree; | 556 | irbp->ir_free = r.ir_free; |
551 | irbp++; | 557 | irbp++; |
552 | icount += XFS_INODES_PER_CHUNK - gcnt; | 558 | icount += XFS_INODES_PER_CHUNK - r.ir_freecount; |
553 | } | 559 | } |
554 | /* | 560 | /* |
555 | * Set agino to after this chunk and bump the cursor. | 561 | * Set agino to after this chunk and bump the cursor. |
556 | */ | 562 | */ |
557 | agino = gino + XFS_INODES_PER_CHUNK; | 563 | agino = r.ir_startino + XFS_INODES_PER_CHUNK; |
558 | error = xfs_btree_increment(cur, 0, &tmp); | 564 | error = xfs_btree_increment(cur, 0, &tmp); |
559 | cond_resched(); | 565 | cond_resched(); |
560 | } | 566 | } |
@@ -820,9 +826,7 @@ xfs_inumbers( | |||
820 | int bufidx; | 826 | int bufidx; |
821 | xfs_btree_cur_t *cur; | 827 | xfs_btree_cur_t *cur; |
822 | int error; | 828 | int error; |
823 | __int32_t gcnt; | 829 | xfs_inobt_rec_incore_t r; |
824 | xfs_inofree_t gfree; | ||
825 | xfs_agino_t gino; | ||
826 | int i; | 830 | int i; |
827 | xfs_ino_t ino; | 831 | xfs_ino_t ino; |
828 | int left; | 832 | int left; |
@@ -855,7 +859,8 @@ xfs_inumbers( | |||
855 | continue; | 859 | continue; |
856 | } | 860 | } |
857 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); | 861 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); |
858 | error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); | 862 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, |
863 | &tmp); | ||
859 | if (error) { | 864 | if (error) { |
860 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | 865 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); |
861 | cur = NULL; | 866 | cur = NULL; |
@@ -870,9 +875,8 @@ xfs_inumbers( | |||
870 | continue; | 875 | continue; |
871 | } | 876 | } |
872 | } | 877 | } |
873 | if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree, | 878 | error = xfs_inobt_get_rec(cur, &r, &i); |
874 | &i)) || | 879 | if (error || i == 0) { |
875 | i == 0) { | ||
876 | xfs_buf_relse(agbp); | 880 | xfs_buf_relse(agbp); |
877 | agbp = NULL; | 881 | agbp = NULL; |
878 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 882 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
@@ -881,10 +885,12 @@ xfs_inumbers( | |||
881 | agino = 0; | 885 | agino = 0; |
882 | continue; | 886 | continue; |
883 | } | 887 | } |
884 | agino = gino + XFS_INODES_PER_CHUNK - 1; | 888 | agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; |
885 | buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino); | 889 | buffer[bufidx].xi_startino = |
886 | buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt; | 890 | XFS_AGINO_TO_INO(mp, agno, r.ir_startino); |
887 | buffer[bufidx].xi_allocmask = ~gfree; | 891 | buffer[bufidx].xi_alloccount = |
892 | XFS_INODES_PER_CHUNK - r.ir_freecount; | ||
893 | buffer[bufidx].xi_allocmask = ~r.ir_free; | ||
888 | bufidx++; | 894 | bufidx++; |
889 | left--; | 895 | left--; |
890 | if (bufidx == bcount) { | 896 | if (bufidx == bcount) { |
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 1fb04e7deb61..20792bf45946 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h | |||
@@ -99,11 +99,6 @@ xfs_bulkstat_one( | |||
99 | void *dibuff, | 99 | void *dibuff, |
100 | int *stat); | 100 | int *stat); |
101 | 101 | ||
102 | int | ||
103 | xfs_internal_inum( | ||
104 | xfs_mount_t *mp, | ||
105 | xfs_ino_t ino); | ||
106 | |||
107 | typedef int (*inumbers_fmt_pf)( | 102 | typedef int (*inumbers_fmt_pf)( |
108 | void __user *ubuffer, /* buffer to write to */ | 103 | void __user *ubuffer, /* buffer to write to */ |
109 | const xfs_inogrp_t *buffer, /* buffer to read from */ | 104 | const xfs_inogrp_t *buffer, /* buffer to read from */ |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 3750f04ede0b..9dbdff3ea484 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -3180,7 +3180,7 @@ try_again: | |||
3180 | STATIC void | 3180 | STATIC void |
3181 | xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) | 3181 | xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) |
3182 | { | 3182 | { |
3183 | ASSERT(spin_is_locked(&log->l_icloglock)); | 3183 | assert_spin_locked(&log->l_icloglock); |
3184 | 3184 | ||
3185 | if (iclog->ic_state == XLOG_STATE_ACTIVE) { | 3185 | if (iclog->ic_state == XLOG_STATE_ACTIVE) { |
3186 | xlog_state_switch_iclogs(log, iclog, 0); | 3186 | xlog_state_switch_iclogs(log, iclog, 0); |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index bcad5f4c1fd1..679c7c4926a2 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -451,8 +451,6 @@ extern int xlog_find_tail(xlog_t *log, | |||
451 | extern int xlog_recover(xlog_t *log); | 451 | extern int xlog_recover(xlog_t *log); |
452 | extern int xlog_recover_finish(xlog_t *log); | 452 | extern int xlog_recover_finish(xlog_t *log); |
453 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); | 453 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); |
454 | extern void xlog_recover_process_iunlinks(xlog_t *log); | ||
455 | |||
456 | extern struct xfs_buf *xlog_get_bp(xlog_t *, int); | 454 | extern struct xfs_buf *xlog_get_bp(xlog_t *, int); |
457 | extern void xlog_put_bp(struct xfs_buf *); | 455 | extern void xlog_put_bp(struct xfs_buf *); |
458 | 456 | ||
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 47da2fb45377..1099395d7d6c 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -3263,7 +3263,7 @@ xlog_recover_process_one_iunlink( | |||
3263 | * freeing of the inode and its removal from the list must be | 3263 | * freeing of the inode and its removal from the list must be |
3264 | * atomic. | 3264 | * atomic. |
3265 | */ | 3265 | */ |
3266 | void | 3266 | STATIC void |
3267 | xlog_recover_process_iunlinks( | 3267 | xlog_recover_process_iunlinks( |
3268 | xlog_t *log) | 3268 | xlog_t *log) |
3269 | { | 3269 | { |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5c6f092659c1..8b6c9e807efb 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -1568,7 +1568,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) | |||
1568 | * | 1568 | * |
1569 | * The m_sb_lock must be held when this routine is called. | 1569 | * The m_sb_lock must be held when this routine is called. |
1570 | */ | 1570 | */ |
1571 | int | 1571 | STATIC int |
1572 | xfs_mod_incore_sb_unlocked( | 1572 | xfs_mod_incore_sb_unlocked( |
1573 | xfs_mount_t *mp, | 1573 | xfs_mount_t *mp, |
1574 | xfs_sb_field_t field, | 1574 | xfs_sb_field_t field, |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a5122382afde..a6c023bc0fb2 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -414,13 +414,10 @@ typedef struct xfs_mod_sb { | |||
414 | 414 | ||
415 | extern int xfs_log_sbcount(xfs_mount_t *, uint); | 415 | extern int xfs_log_sbcount(xfs_mount_t *, uint); |
416 | extern int xfs_mountfs(xfs_mount_t *mp); | 416 | extern int xfs_mountfs(xfs_mount_t *mp); |
417 | extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); | ||
418 | 417 | ||
419 | extern void xfs_unmountfs(xfs_mount_t *); | 418 | extern void xfs_unmountfs(xfs_mount_t *); |
420 | extern int xfs_unmountfs_writesb(xfs_mount_t *); | 419 | extern int xfs_unmountfs_writesb(xfs_mount_t *); |
421 | extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); | 420 | extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); |
422 | extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t, | ||
423 | int64_t, int); | ||
424 | extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, | 421 | extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, |
425 | uint, int); | 422 | uint, int); |
426 | extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); | 423 | extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); |
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index afee7eb24323..4b0613d99faa 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c | |||
@@ -564,35 +564,6 @@ xfs_mru_cache_lookup( | |||
564 | } | 564 | } |
565 | 565 | ||
566 | /* | 566 | /* |
567 | * To look up an element using its key, but leave its location in the internal | ||
568 | * lists alone, call xfs_mru_cache_peek(). If the element isn't found, this | ||
569 | * function returns NULL. | ||
570 | * | ||
571 | * See the comments above the declaration of the xfs_mru_cache_lookup() function | ||
572 | * for important locking information pertaining to this call. | ||
573 | */ | ||
574 | void * | ||
575 | xfs_mru_cache_peek( | ||
576 | xfs_mru_cache_t *mru, | ||
577 | unsigned long key) | ||
578 | { | ||
579 | xfs_mru_cache_elem_t *elem; | ||
580 | |||
581 | ASSERT(mru && mru->lists); | ||
582 | if (!mru || !mru->lists) | ||
583 | return NULL; | ||
584 | |||
585 | spin_lock(&mru->lock); | ||
586 | elem = radix_tree_lookup(&mru->store, key); | ||
587 | if (!elem) | ||
588 | spin_unlock(&mru->lock); | ||
589 | else | ||
590 | __release(mru_lock); /* help sparse not be stupid */ | ||
591 | |||
592 | return elem ? elem->value : NULL; | ||
593 | } | ||
594 | |||
595 | /* | ||
596 | * To release the internal data structure spinlock after having performed an | 567 | * To release the internal data structure spinlock after having performed an |
597 | * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done() | 568 | * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done() |
598 | * with the data store pointer. | 569 | * with the data store pointer. |
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index dd58ea1bbebe..5d439f34b0c9 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h | |||
@@ -49,7 +49,6 @@ int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, | |||
49 | void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); | 49 | void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); |
50 | void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); | 50 | void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); |
51 | void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); | 51 | void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); |
52 | void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key); | ||
53 | void xfs_mru_cache_done(struct xfs_mru_cache *mru); | 52 | void xfs_mru_cache_done(struct xfs_mru_cache *mru); |
54 | 53 | ||
55 | #endif /* __XFS_MRU_CACHE_H__ */ | 54 | #endif /* __XFS_MRU_CACHE_H__ */ |
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index fea68615ed23..3f816ad7ff19 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c | |||
@@ -88,90 +88,6 @@ xfs_write_clear_setuid( | |||
88 | } | 88 | } |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * Handle logging requirements of various synchronous types of write. | ||
92 | */ | ||
93 | int | ||
94 | xfs_write_sync_logforce( | ||
95 | xfs_mount_t *mp, | ||
96 | xfs_inode_t *ip) | ||
97 | { | ||
98 | int error = 0; | ||
99 | |||
100 | /* | ||
101 | * If we're treating this as O_DSYNC and we have not updated the | ||
102 | * size, force the log. | ||
103 | */ | ||
104 | if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && | ||
105 | !(ip->i_update_size)) { | ||
106 | xfs_inode_log_item_t *iip = ip->i_itemp; | ||
107 | |||
108 | /* | ||
109 | * If an allocation transaction occurred | ||
110 | * without extending the size, then we have to force | ||
111 | * the log up the proper point to ensure that the | ||
112 | * allocation is permanent. We can't count on | ||
113 | * the fact that buffered writes lock out direct I/O | ||
114 | * writes - the direct I/O write could have extended | ||
115 | * the size nontransactionally, then finished before | ||
116 | * we started. xfs_write_file will think that the file | ||
117 | * didn't grow but the update isn't safe unless the | ||
118 | * size change is logged. | ||
119 | * | ||
120 | * Force the log if we've committed a transaction | ||
121 | * against the inode or if someone else has and | ||
122 | * the commit record hasn't gone to disk (e.g. | ||
123 | * the inode is pinned). This guarantees that | ||
124 | * all changes affecting the inode are permanent | ||
125 | * when we return. | ||
126 | */ | ||
127 | if (iip && iip->ili_last_lsn) { | ||
128 | error = _xfs_log_force(mp, iip->ili_last_lsn, | ||
129 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); | ||
130 | } else if (xfs_ipincount(ip) > 0) { | ||
131 | error = _xfs_log_force(mp, (xfs_lsn_t)0, | ||
132 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); | ||
133 | } | ||
134 | |||
135 | } else { | ||
136 | xfs_trans_t *tp; | ||
137 | |||
138 | /* | ||
139 | * O_SYNC or O_DSYNC _with_ a size update are handled | ||
140 | * the same way. | ||
141 | * | ||
142 | * If the write was synchronous then we need to make | ||
143 | * sure that the inode modification time is permanent. | ||
144 | * We'll have updated the timestamp above, so here | ||
145 | * we use a synchronous transaction to log the inode. | ||
146 | * It's not fast, but it's necessary. | ||
147 | * | ||
148 | * If this a dsync write and the size got changed | ||
149 | * non-transactionally, then we need to ensure that | ||
150 | * the size change gets logged in a synchronous | ||
151 | * transaction. | ||
152 | */ | ||
153 | tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); | ||
154 | if ((error = xfs_trans_reserve(tp, 0, | ||
155 | XFS_SWRITE_LOG_RES(mp), | ||
156 | 0, 0, 0))) { | ||
157 | /* Transaction reserve failed */ | ||
158 | xfs_trans_cancel(tp, 0); | ||
159 | } else { | ||
160 | /* Transaction reserve successful */ | ||
161 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
162 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | ||
163 | xfs_trans_ihold(tp, ip); | ||
164 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
165 | xfs_trans_set_sync(tp); | ||
166 | error = xfs_trans_commit(tp, 0); | ||
167 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
168 | } | ||
169 | } | ||
170 | |||
171 | return error; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * Force a shutdown of the filesystem instantly while keeping | 91 | * Force a shutdown of the filesystem instantly while keeping |
176 | * the filesystem consistent. We don't do an unmount here; just shutdown | 92 | * the filesystem consistent. We don't do an unmount here; just shutdown |
177 | * the shop, make sure that absolutely nothing persistent happens to | 93 | * the shop, make sure that absolutely nothing persistent happens to |
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index f76c003ec55d..f5e4874c37d8 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h | |||
@@ -68,7 +68,6 @@ xfs_get_extsz_hint( | |||
68 | * Prototypes for functions in xfs_rw.c. | 68 | * Prototypes for functions in xfs_rw.c. |
69 | */ | 69 | */ |
70 | extern int xfs_write_clear_setuid(struct xfs_inode *ip); | 70 | extern int xfs_write_clear_setuid(struct xfs_inode *ip); |
71 | extern int xfs_write_sync_logforce(struct xfs_mount *mp, struct xfs_inode *ip); | ||
72 | extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); | 71 | extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); |
73 | extern int xfs_bioerror(struct xfs_buf *bp); | 72 | extern int xfs_bioerror(struct xfs_buf *bp); |
74 | extern int xfs_bioerror_relse(struct xfs_buf *bp); | 73 | extern int xfs_bioerror_relse(struct xfs_buf *bp); |
@@ -78,10 +77,4 @@ extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, | |||
78 | extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, | 77 | extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, |
79 | xfs_buf_t *bp, xfs_daddr_t blkno); | 78 | xfs_buf_t *bp, xfs_daddr_t blkno); |
80 | 79 | ||
81 | /* | ||
82 | * Prototypes for functions in xfs_vnodeops.c. | ||
83 | */ | ||
84 | extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, | ||
85 | int flags); | ||
86 | |||
87 | #endif /* __XFS_RW_H__ */ | 80 | #endif /* __XFS_RW_H__ */ |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 775249a54f6f..ed47fc77759c 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -68,7 +68,7 @@ typedef struct xfs_trans_header { | |||
68 | #define XFS_TRANS_GROWFS 14 | 68 | #define XFS_TRANS_GROWFS 14 |
69 | #define XFS_TRANS_STRAT_WRITE 15 | 69 | #define XFS_TRANS_STRAT_WRITE 15 |
70 | #define XFS_TRANS_DIOSTRAT 16 | 70 | #define XFS_TRANS_DIOSTRAT 16 |
71 | #define XFS_TRANS_WRITE_SYNC 17 | 71 | /* 17 was XFS_TRANS_WRITE_SYNC */ |
72 | #define XFS_TRANS_WRITEID 18 | 72 | #define XFS_TRANS_WRITEID 18 |
73 | #define XFS_TRANS_ADDAFORK 19 | 73 | #define XFS_TRANS_ADDAFORK 19 |
74 | #define XFS_TRANS_ATTRINVAL 20 | 74 | #define XFS_TRANS_ATTRINVAL 20 |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 8ee2f8c8b0a6..218829e6a152 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -307,7 +307,7 @@ xfs_trans_read_buf( | |||
307 | return (flags & XFS_BUF_TRYLOCK) ? | 307 | return (flags & XFS_BUF_TRYLOCK) ? |
308 | EAGAIN : XFS_ERROR(ENOMEM); | 308 | EAGAIN : XFS_ERROR(ENOMEM); |
309 | 309 | ||
310 | if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { | 310 | if (XFS_BUF_GETERROR(bp) != 0) { |
311 | xfs_ioerror_alert("xfs_trans_read_buf", mp, | 311 | xfs_ioerror_alert("xfs_trans_read_buf", mp, |
312 | bp, blkno); | 312 | bp, blkno); |
313 | error = XFS_BUF_GETERROR(bp); | 313 | error = XFS_BUF_GETERROR(bp); |
@@ -315,7 +315,7 @@ xfs_trans_read_buf( | |||
315 | return error; | 315 | return error; |
316 | } | 316 | } |
317 | #ifdef DEBUG | 317 | #ifdef DEBUG |
318 | if (xfs_do_error && (bp != NULL)) { | 318 | if (xfs_do_error) { |
319 | if (xfs_error_target == target) { | 319 | if (xfs_error_target == target) { |
320 | if (((xfs_req_num++) % xfs_error_mod) == 0) { | 320 | if (((xfs_req_num++) % xfs_error_mod) == 0) { |
321 | xfs_buf_relse(bp); | 321 | xfs_buf_relse(bp); |
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 23d276af2e0c..785ff101da0a 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
@@ -49,30 +49,7 @@ xfs_trans_inode_broot_debug( | |||
49 | 49 | ||
50 | 50 | ||
51 | /* | 51 | /* |
52 | * Get and lock the inode for the caller if it is not already | 52 | * Get an inode and join it to the transaction. |
53 | * locked within the given transaction. If it is already locked | ||
54 | * within the transaction, just increment its lock recursion count | ||
55 | * and return a pointer to it. | ||
56 | * | ||
57 | * For an inode to be locked in a transaction, the inode lock, as | ||
58 | * opposed to the io lock, must be taken exclusively. This ensures | ||
59 | * that the inode can be involved in only 1 transaction at a time. | ||
60 | * Lock recursion is handled on the io lock, but only for lock modes | ||
61 | * of equal or lesser strength. That is, you can recur on the io lock | ||
62 | * held EXCL with a SHARED request but not vice versa. Also, if | ||
63 | * the inode is already a part of the transaction then you cannot | ||
64 | * go from not holding the io lock to having it EXCL or SHARED. | ||
65 | * | ||
66 | * Use the inode cache routine xfs_inode_incore() to find the inode | ||
67 | * if it is already owned by this transaction. | ||
68 | * | ||
69 | * If we don't already own the inode, use xfs_iget() to get it. | ||
70 | * Since the inode log item structure is embedded in the incore | ||
71 | * inode structure and is initialized when the inode is brought | ||
72 | * into memory, there is nothing to do with it here. | ||
73 | * | ||
74 | * If the given transaction pointer is NULL, just call xfs_iget(). | ||
75 | * This simplifies code which must handle both cases. | ||
76 | */ | 53 | */ |
77 | int | 54 | int |
78 | xfs_trans_iget( | 55 | xfs_trans_iget( |
@@ -84,62 +61,11 @@ xfs_trans_iget( | |||
84 | xfs_inode_t **ipp) | 61 | xfs_inode_t **ipp) |
85 | { | 62 | { |
86 | int error; | 63 | int error; |
87 | xfs_inode_t *ip; | ||
88 | |||
89 | /* | ||
90 | * If the transaction pointer is NULL, just call the normal | ||
91 | * xfs_iget(). | ||
92 | */ | ||
93 | if (tp == NULL) | ||
94 | return xfs_iget(mp, NULL, ino, flags, lock_flags, ipp, 0); | ||
95 | |||
96 | /* | ||
97 | * If we find the inode in core with this transaction | ||
98 | * pointer in its i_transp field, then we know we already | ||
99 | * have it locked. In this case we just increment the lock | ||
100 | * recursion count and return the inode to the caller. | ||
101 | * Assert that the inode is already locked in the mode requested | ||
102 | * by the caller. We cannot do lock promotions yet, so | ||
103 | * die if someone gets this wrong. | ||
104 | */ | ||
105 | if ((ip = xfs_inode_incore(tp->t_mountp, ino, tp)) != NULL) { | ||
106 | /* | ||
107 | * Make sure that the inode lock is held EXCL and | ||
108 | * that the io lock is never upgraded when the inode | ||
109 | * is already a part of the transaction. | ||
110 | */ | ||
111 | ASSERT(ip->i_itemp != NULL); | ||
112 | ASSERT(lock_flags & XFS_ILOCK_EXCL); | ||
113 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
114 | ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || | ||
115 | xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | ||
116 | ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || | ||
117 | (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL)); | ||
118 | ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || | ||
119 | xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)); | ||
120 | ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || | ||
121 | (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY)); | ||
122 | |||
123 | if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { | ||
124 | ip->i_itemp->ili_iolock_recur++; | ||
125 | } | ||
126 | if (lock_flags & XFS_ILOCK_EXCL) { | ||
127 | ip->i_itemp->ili_ilock_recur++; | ||
128 | } | ||
129 | *ipp = ip; | ||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | ASSERT(lock_flags & XFS_ILOCK_EXCL); | ||
134 | error = xfs_iget(tp->t_mountp, tp, ino, flags, lock_flags, &ip, 0); | ||
135 | if (error) { | ||
136 | return error; | ||
137 | } | ||
138 | ASSERT(ip != NULL); | ||
139 | 64 | ||
140 | xfs_trans_ijoin(tp, ip, lock_flags); | 65 | error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0); |
141 | *ipp = ip; | 66 | if (!error && tp) |
142 | return 0; | 67 | xfs_trans_ijoin(tp, *ipp, lock_flags); |
68 | return error; | ||
143 | } | 69 | } |
144 | 70 | ||
145 | /* | 71 | /* |
@@ -163,8 +89,6 @@ xfs_trans_ijoin( | |||
163 | xfs_inode_item_init(ip, ip->i_mount); | 89 | xfs_inode_item_init(ip, ip->i_mount); |
164 | iip = ip->i_itemp; | 90 | iip = ip->i_itemp; |
165 | ASSERT(iip->ili_flags == 0); | 91 | ASSERT(iip->ili_flags == 0); |
166 | ASSERT(iip->ili_ilock_recur == 0); | ||
167 | ASSERT(iip->ili_iolock_recur == 0); | ||
168 | 92 | ||
169 | /* | 93 | /* |
170 | * Get a log_item_desc to point at the new item. | 94 | * Get a log_item_desc to point at the new item. |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c4eca5ed5dab..a434f287962d 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -538,7 +538,9 @@ xfs_readlink_bmap( | |||
538 | d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); | 538 | d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); |
539 | byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); | 539 | byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); |
540 | 540 | ||
541 | bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); | 541 | bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt), |
542 | XBF_LOCK | XBF_MAPPED | | ||
543 | XBF_DONT_BLOCK); | ||
542 | error = XFS_BUF_GETERROR(bp); | 544 | error = XFS_BUF_GETERROR(bp); |
543 | if (error) { | 545 | if (error) { |
544 | xfs_ioerror_alert("xfs_readlink", | 546 | xfs_ioerror_alert("xfs_readlink", |
@@ -609,7 +611,7 @@ xfs_fsync( | |||
609 | xfs_inode_t *ip) | 611 | xfs_inode_t *ip) |
610 | { | 612 | { |
611 | xfs_trans_t *tp; | 613 | xfs_trans_t *tp; |
612 | int error; | 614 | int error = 0; |
613 | int log_flushed = 0, changed = 1; | 615 | int log_flushed = 0, changed = 1; |
614 | 616 | ||
615 | xfs_itrace_entry(ip); | 617 | xfs_itrace_entry(ip); |
@@ -617,14 +619,9 @@ xfs_fsync( | |||
617 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 619 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
618 | return XFS_ERROR(EIO); | 620 | return XFS_ERROR(EIO); |
619 | 621 | ||
620 | /* capture size updates in I/O completion before writing the inode. */ | ||
621 | error = xfs_wait_on_pages(ip, 0, -1); | ||
622 | if (error) | ||
623 | return XFS_ERROR(error); | ||
624 | |||
625 | /* | 622 | /* |
626 | * We always need to make sure that the required inode state is safe on | 623 | * We always need to make sure that the required inode state is safe on |
627 | * disk. The vnode might be clean but we still might need to force the | 624 | * disk. The inode might be clean but we still might need to force the |
628 | * log because of committed transactions that haven't hit the disk yet. | 625 | * log because of committed transactions that haven't hit the disk yet. |
629 | * Likewise, there could be unflushed non-transactional changes to the | 626 | * Likewise, there could be unflushed non-transactional changes to the |
630 | * inode core that have to go to disk and this requires us to issue | 627 | * inode core that have to go to disk and this requires us to issue |
@@ -636,7 +633,7 @@ xfs_fsync( | |||
636 | */ | 633 | */ |
637 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 634 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
638 | 635 | ||
639 | if (!(ip->i_update_size || ip->i_update_core)) { | 636 | if (!ip->i_update_core) { |
640 | /* | 637 | /* |
641 | * Timestamps/size haven't changed since last inode flush or | 638 | * Timestamps/size haven't changed since last inode flush or |
642 | * inode transaction commit. That means either nothing got | 639 | * inode transaction commit. That means either nothing got |
@@ -716,7 +713,7 @@ xfs_fsync( | |||
716 | * when the link count isn't zero and by xfs_dm_punch_hole() when | 713 | * when the link count isn't zero and by xfs_dm_punch_hole() when |
717 | * punching a hole to EOF. | 714 | * punching a hole to EOF. |
718 | */ | 715 | */ |
719 | int | 716 | STATIC int |
720 | xfs_free_eofblocks( | 717 | xfs_free_eofblocks( |
721 | xfs_mount_t *mp, | 718 | xfs_mount_t *mp, |
722 | xfs_inode_t *ip, | 719 | xfs_inode_t *ip, |
@@ -1474,8 +1471,8 @@ xfs_create( | |||
1474 | if (error == ENOSPC) { | 1471 | if (error == ENOSPC) { |
1475 | /* flush outstanding delalloc blocks and retry */ | 1472 | /* flush outstanding delalloc blocks and retry */ |
1476 | xfs_flush_inodes(dp); | 1473 | xfs_flush_inodes(dp); |
1477 | error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, | 1474 | error = xfs_trans_reserve(tp, resblks, log_res, 0, |
1478 | XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); | 1475 | XFS_TRANS_PERM_LOG_RES, log_count); |
1479 | } | 1476 | } |
1480 | if (error == ENOSPC) { | 1477 | if (error == ENOSPC) { |
1481 | /* No space at all so try a "no-allocation" reservation */ | 1478 | /* No space at all so try a "no-allocation" reservation */ |