diff options
author | Jens Axboe <jaxboe@fusionio.com> | 2010-06-01 06:42:12 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-06-01 06:42:12 -0400 |
commit | b4ca761577535b2b4d153689ee97342797dfff05 (patch) | |
tree | 29054d55508f1faa22ec32acf7c245751af03348 /fs | |
parent | 28f4197e5d4707311febeec8a0eb97cb5fd93c97 (diff) | |
parent | 67a3e12b05e055c0415c556a315a3d3eb637e29e (diff) |
Merge branch 'master' into for-linus
Conflicts:
fs/pipe.c
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'fs')
275 files changed, 11458 insertions, 6190 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index ed835836e0dc..32ef4009d030 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h | |||
@@ -40,7 +40,9 @@ | |||
40 | extern struct file_system_type v9fs_fs_type; | 40 | extern struct file_system_type v9fs_fs_type; |
41 | extern const struct address_space_operations v9fs_addr_operations; | 41 | extern const struct address_space_operations v9fs_addr_operations; |
42 | extern const struct file_operations v9fs_file_operations; | 42 | extern const struct file_operations v9fs_file_operations; |
43 | extern const struct file_operations v9fs_file_operations_dotl; | ||
43 | extern const struct file_operations v9fs_dir_operations; | 44 | extern const struct file_operations v9fs_dir_operations; |
45 | extern const struct file_operations v9fs_dir_operations_dotl; | ||
44 | extern const struct dentry_operations v9fs_dentry_operations; | 46 | extern const struct dentry_operations v9fs_dentry_operations; |
45 | extern const struct dentry_operations v9fs_cached_dentry_operations; | 47 | extern const struct dentry_operations v9fs_cached_dentry_operations; |
46 | 48 | ||
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 0adfd64dfcee..d61e3b28ce37 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
@@ -203,3 +203,11 @@ const struct file_operations v9fs_dir_operations = { | |||
203 | .open = v9fs_file_open, | 203 | .open = v9fs_file_open, |
204 | .release = v9fs_dir_release, | 204 | .release = v9fs_dir_release, |
205 | }; | 205 | }; |
206 | |||
207 | const struct file_operations v9fs_dir_operations_dotl = { | ||
208 | .read = generic_read_dir, | ||
209 | .llseek = generic_file_llseek, | ||
210 | .readdir = v9fs_dir_readdir, | ||
211 | .open = v9fs_file_open, | ||
212 | .release = v9fs_dir_release, | ||
213 | }; | ||
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index df52d488d2a6..2bedc6c94fc2 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
@@ -257,15 +257,13 @@ v9fs_file_write(struct file *filp, const char __user * data, | |||
257 | return total; | 257 | return total; |
258 | } | 258 | } |
259 | 259 | ||
260 | static int v9fs_file_fsync(struct file *filp, struct dentry *dentry, | 260 | static int v9fs_file_fsync(struct file *filp, int datasync) |
261 | int datasync) | ||
262 | { | 261 | { |
263 | struct p9_fid *fid; | 262 | struct p9_fid *fid; |
264 | struct p9_wstat wstat; | 263 | struct p9_wstat wstat; |
265 | int retval; | 264 | int retval; |
266 | 265 | ||
267 | P9_DPRINTK(P9_DEBUG_VFS, "filp %p dentry %p datasync %x\n", filp, | 266 | P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); |
268 | dentry, datasync); | ||
269 | 267 | ||
270 | fid = filp->private_data; | 268 | fid = filp->private_data; |
271 | v9fs_blank_wstat(&wstat); | 269 | v9fs_blank_wstat(&wstat); |
@@ -296,3 +294,14 @@ const struct file_operations v9fs_file_operations = { | |||
296 | .mmap = generic_file_readonly_mmap, | 294 | .mmap = generic_file_readonly_mmap, |
297 | .fsync = v9fs_file_fsync, | 295 | .fsync = v9fs_file_fsync, |
298 | }; | 296 | }; |
297 | |||
298 | const struct file_operations v9fs_file_operations_dotl = { | ||
299 | .llseek = generic_file_llseek, | ||
300 | .read = v9fs_file_read, | ||
301 | .write = v9fs_file_write, | ||
302 | .open = v9fs_file_open, | ||
303 | .release = v9fs_dir_release, | ||
304 | .lock = v9fs_file_lock, | ||
305 | .mmap = generic_file_readonly_mmap, | ||
306 | .fsync = v9fs_file_fsync, | ||
307 | }; | ||
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 6d4d86187c55..4331b3b5ee1c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -44,9 +44,12 @@ | |||
44 | #include "cache.h" | 44 | #include "cache.h" |
45 | 45 | ||
46 | static const struct inode_operations v9fs_dir_inode_operations; | 46 | static const struct inode_operations v9fs_dir_inode_operations; |
47 | static const struct inode_operations v9fs_dir_inode_operations_ext; | 47 | static const struct inode_operations v9fs_dir_inode_operations_dotu; |
48 | static const struct inode_operations v9fs_dir_inode_operations_dotl; | ||
48 | static const struct inode_operations v9fs_file_inode_operations; | 49 | static const struct inode_operations v9fs_file_inode_operations; |
50 | static const struct inode_operations v9fs_file_inode_operations_dotl; | ||
49 | static const struct inode_operations v9fs_symlink_inode_operations; | 51 | static const struct inode_operations v9fs_symlink_inode_operations; |
52 | static const struct inode_operations v9fs_symlink_inode_operations_dotl; | ||
50 | 53 | ||
51 | /** | 54 | /** |
52 | * unixmode2p9mode - convert unix mode bits to plan 9 | 55 | * unixmode2p9mode - convert unix mode bits to plan 9 |
@@ -273,25 +276,44 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) | |||
273 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | 276 | init_special_inode(inode, inode->i_mode, inode->i_rdev); |
274 | break; | 277 | break; |
275 | case S_IFREG: | 278 | case S_IFREG: |
276 | inode->i_op = &v9fs_file_inode_operations; | 279 | if (v9fs_proto_dotl(v9ses)) { |
277 | inode->i_fop = &v9fs_file_operations; | 280 | inode->i_op = &v9fs_file_inode_operations_dotl; |
281 | inode->i_fop = &v9fs_file_operations_dotl; | ||
282 | } else { | ||
283 | inode->i_op = &v9fs_file_inode_operations; | ||
284 | inode->i_fop = &v9fs_file_operations; | ||
285 | } | ||
286 | |||
278 | break; | 287 | break; |
288 | |||
279 | case S_IFLNK: | 289 | case S_IFLNK: |
280 | if (!v9fs_proto_dotu(v9ses)) { | 290 | if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { |
281 | P9_DPRINTK(P9_DEBUG_ERROR, | 291 | P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with " |
282 | "extended modes used w/o 9P2000.u\n"); | 292 | "legacy protocol.\n"); |
283 | err = -EINVAL; | 293 | err = -EINVAL; |
284 | goto error; | 294 | goto error; |
285 | } | 295 | } |
286 | inode->i_op = &v9fs_symlink_inode_operations; | 296 | |
297 | if (v9fs_proto_dotl(v9ses)) | ||
298 | inode->i_op = &v9fs_symlink_inode_operations_dotl; | ||
299 | else | ||
300 | inode->i_op = &v9fs_symlink_inode_operations; | ||
301 | |||
287 | break; | 302 | break; |
288 | case S_IFDIR: | 303 | case S_IFDIR: |
289 | inc_nlink(inode); | 304 | inc_nlink(inode); |
290 | if (v9fs_proto_dotu(v9ses)) | 305 | if (v9fs_proto_dotl(v9ses)) |
291 | inode->i_op = &v9fs_dir_inode_operations_ext; | 306 | inode->i_op = &v9fs_dir_inode_operations_dotl; |
307 | else if (v9fs_proto_dotu(v9ses)) | ||
308 | inode->i_op = &v9fs_dir_inode_operations_dotu; | ||
292 | else | 309 | else |
293 | inode->i_op = &v9fs_dir_inode_operations; | 310 | inode->i_op = &v9fs_dir_inode_operations; |
294 | inode->i_fop = &v9fs_dir_operations; | 311 | |
312 | if (v9fs_proto_dotl(v9ses)) | ||
313 | inode->i_fop = &v9fs_dir_operations_dotl; | ||
314 | else | ||
315 | inode->i_fop = &v9fs_dir_operations; | ||
316 | |||
295 | break; | 317 | break; |
296 | default: | 318 | default: |
297 | P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", | 319 | P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", |
@@ -432,14 +454,12 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) | |||
432 | { | 454 | { |
433 | int retval; | 455 | int retval; |
434 | struct inode *file_inode; | 456 | struct inode *file_inode; |
435 | struct v9fs_session_info *v9ses; | ||
436 | struct p9_fid *v9fid; | 457 | struct p9_fid *v9fid; |
437 | 458 | ||
438 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, | 459 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, |
439 | rmdir); | 460 | rmdir); |
440 | 461 | ||
441 | file_inode = file->d_inode; | 462 | file_inode = file->d_inode; |
442 | v9ses = v9fs_inode2v9ses(file_inode); | ||
443 | v9fid = v9fs_fid_clone(file); | 463 | v9fid = v9fs_fid_clone(file); |
444 | if (IS_ERR(v9fid)) | 464 | if (IS_ERR(v9fid)) |
445 | return PTR_ERR(v9fid); | 465 | return PTR_ERR(v9fid); |
@@ -482,12 +502,11 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
482 | ofid = NULL; | 502 | ofid = NULL; |
483 | fid = NULL; | 503 | fid = NULL; |
484 | name = (char *) dentry->d_name.name; | 504 | name = (char *) dentry->d_name.name; |
485 | dfid = v9fs_fid_clone(dentry->d_parent); | 505 | dfid = v9fs_fid_lookup(dentry->d_parent); |
486 | if (IS_ERR(dfid)) { | 506 | if (IS_ERR(dfid)) { |
487 | err = PTR_ERR(dfid); | 507 | err = PTR_ERR(dfid); |
488 | P9_DPRINTK(P9_DEBUG_VFS, "fid clone failed %d\n", err); | 508 | P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); |
489 | dfid = NULL; | 509 | return ERR_PTR(err); |
490 | goto error; | ||
491 | } | 510 | } |
492 | 511 | ||
493 | /* clone a fid to use for creation */ | 512 | /* clone a fid to use for creation */ |
@@ -495,8 +514,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
495 | if (IS_ERR(ofid)) { | 514 | if (IS_ERR(ofid)) { |
496 | err = PTR_ERR(ofid); | 515 | err = PTR_ERR(ofid); |
497 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); | 516 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); |
498 | ofid = NULL; | 517 | return ERR_PTR(err); |
499 | goto error; | ||
500 | } | 518 | } |
501 | 519 | ||
502 | err = p9_client_fcreate(ofid, name, perm, mode, extension); | 520 | err = p9_client_fcreate(ofid, name, perm, mode, extension); |
@@ -506,14 +524,13 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
506 | } | 524 | } |
507 | 525 | ||
508 | /* now walk from the parent so we can get unopened fid */ | 526 | /* now walk from the parent so we can get unopened fid */ |
509 | fid = p9_client_walk(dfid, 1, &name, 0); | 527 | fid = p9_client_walk(dfid, 1, &name, 1); |
510 | if (IS_ERR(fid)) { | 528 | if (IS_ERR(fid)) { |
511 | err = PTR_ERR(fid); | 529 | err = PTR_ERR(fid); |
512 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); | 530 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); |
513 | fid = NULL; | 531 | fid = NULL; |
514 | goto error; | 532 | goto error; |
515 | } else | 533 | } |
516 | dfid = NULL; | ||
517 | 534 | ||
518 | /* instantiate inode and assign the unopened fid to the dentry */ | 535 | /* instantiate inode and assign the unopened fid to the dentry */ |
519 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); | 536 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); |
@@ -536,9 +553,6 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
536 | return ofid; | 553 | return ofid; |
537 | 554 | ||
538 | error: | 555 | error: |
539 | if (dfid) | ||
540 | p9_client_clunk(dfid); | ||
541 | |||
542 | if (ofid) | 556 | if (ofid) |
543 | p9_client_clunk(ofid); | 557 | p9_client_clunk(ofid); |
544 | 558 | ||
@@ -673,8 +687,8 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
673 | if (IS_ERR(fid)) { | 687 | if (IS_ERR(fid)) { |
674 | result = PTR_ERR(fid); | 688 | result = PTR_ERR(fid); |
675 | if (result == -ENOENT) { | 689 | if (result == -ENOENT) { |
676 | d_add(dentry, NULL); | 690 | inode = NULL; |
677 | return NULL; | 691 | goto inst_out; |
678 | } | 692 | } |
679 | 693 | ||
680 | return ERR_PTR(result); | 694 | return ERR_PTR(result); |
@@ -691,7 +705,8 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
691 | if (result < 0) | 705 | if (result < 0) |
692 | goto error; | 706 | goto error; |
693 | 707 | ||
694 | if ((fid->qid.version) && (v9ses->cache)) | 708 | inst_out: |
709 | if (v9ses->cache) | ||
695 | dentry->d_op = &v9fs_cached_dentry_operations; | 710 | dentry->d_op = &v9fs_cached_dentry_operations; |
696 | else | 711 | else |
697 | dentry->d_op = &v9fs_dentry_operations; | 712 | dentry->d_op = &v9fs_dentry_operations; |
@@ -770,6 +785,13 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
770 | goto clunk_olddir; | 785 | goto clunk_olddir; |
771 | } | 786 | } |
772 | 787 | ||
788 | if (v9fs_proto_dotl(v9ses)) { | ||
789 | retval = p9_client_rename(oldfid, newdirfid, | ||
790 | (char *) new_dentry->d_name.name); | ||
791 | if (retval != -ENOSYS) | ||
792 | goto clunk_newdir; | ||
793 | } | ||
794 | |||
773 | /* 9P can only handle file rename in the same directory */ | 795 | /* 9P can only handle file rename in the same directory */ |
774 | if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) { | 796 | if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) { |
775 | P9_DPRINTK(P9_DEBUG_ERROR, | 797 | P9_DPRINTK(P9_DEBUG_ERROR, |
@@ -1195,6 +1217,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) | |||
1195 | sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev)); | 1217 | sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev)); |
1196 | else if (S_ISFIFO(mode)) | 1218 | else if (S_ISFIFO(mode)) |
1197 | *name = 0; | 1219 | *name = 0; |
1220 | else if (S_ISSOCK(mode)) | ||
1221 | *name = 0; | ||
1198 | else { | 1222 | else { |
1199 | __putname(name); | 1223 | __putname(name); |
1200 | return -EINVAL; | 1224 | return -EINVAL; |
@@ -1206,7 +1230,21 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) | |||
1206 | return retval; | 1230 | return retval; |
1207 | } | 1231 | } |
1208 | 1232 | ||
1209 | static const struct inode_operations v9fs_dir_inode_operations_ext = { | 1233 | static const struct inode_operations v9fs_dir_inode_operations_dotu = { |
1234 | .create = v9fs_vfs_create, | ||
1235 | .lookup = v9fs_vfs_lookup, | ||
1236 | .symlink = v9fs_vfs_symlink, | ||
1237 | .link = v9fs_vfs_link, | ||
1238 | .unlink = v9fs_vfs_unlink, | ||
1239 | .mkdir = v9fs_vfs_mkdir, | ||
1240 | .rmdir = v9fs_vfs_rmdir, | ||
1241 | .mknod = v9fs_vfs_mknod, | ||
1242 | .rename = v9fs_vfs_rename, | ||
1243 | .getattr = v9fs_vfs_getattr, | ||
1244 | .setattr = v9fs_vfs_setattr, | ||
1245 | }; | ||
1246 | |||
1247 | static const struct inode_operations v9fs_dir_inode_operations_dotl = { | ||
1210 | .create = v9fs_vfs_create, | 1248 | .create = v9fs_vfs_create, |
1211 | .lookup = v9fs_vfs_lookup, | 1249 | .lookup = v9fs_vfs_lookup, |
1212 | .symlink = v9fs_vfs_symlink, | 1250 | .symlink = v9fs_vfs_symlink, |
@@ -1237,6 +1275,11 @@ static const struct inode_operations v9fs_file_inode_operations = { | |||
1237 | .setattr = v9fs_vfs_setattr, | 1275 | .setattr = v9fs_vfs_setattr, |
1238 | }; | 1276 | }; |
1239 | 1277 | ||
1278 | static const struct inode_operations v9fs_file_inode_operations_dotl = { | ||
1279 | .getattr = v9fs_vfs_getattr, | ||
1280 | .setattr = v9fs_vfs_setattr, | ||
1281 | }; | ||
1282 | |||
1240 | static const struct inode_operations v9fs_symlink_inode_operations = { | 1283 | static const struct inode_operations v9fs_symlink_inode_operations = { |
1241 | .readlink = generic_readlink, | 1284 | .readlink = generic_readlink, |
1242 | .follow_link = v9fs_vfs_follow_link, | 1285 | .follow_link = v9fs_vfs_follow_link, |
@@ -1244,3 +1287,11 @@ static const struct inode_operations v9fs_symlink_inode_operations = { | |||
1244 | .getattr = v9fs_vfs_getattr, | 1287 | .getattr = v9fs_vfs_getattr, |
1245 | .setattr = v9fs_vfs_setattr, | 1288 | .setattr = v9fs_vfs_setattr, |
1246 | }; | 1289 | }; |
1290 | |||
1291 | static const struct inode_operations v9fs_symlink_inode_operations_dotl = { | ||
1292 | .readlink = generic_readlink, | ||
1293 | .follow_link = v9fs_vfs_follow_link, | ||
1294 | .put_link = v9fs_vfs_put_link, | ||
1295 | .getattr = v9fs_vfs_getattr, | ||
1296 | .setattr = v9fs_vfs_setattr, | ||
1297 | }; | ||
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 806da5d3b3a0..be74d020436e 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/idr.h> | 38 | #include <linux/idr.h> |
39 | #include <linux/sched.h> | 39 | #include <linux/sched.h> |
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include <linux/statfs.h> | ||
41 | #include <net/9p/9p.h> | 42 | #include <net/9p/9p.h> |
42 | #include <net/9p/client.h> | 43 | #include <net/9p/client.h> |
43 | 44 | ||
@@ -45,7 +46,7 @@ | |||
45 | #include "v9fs_vfs.h" | 46 | #include "v9fs_vfs.h" |
46 | #include "fid.h" | 47 | #include "fid.h" |
47 | 48 | ||
48 | static const struct super_operations v9fs_super_ops; | 49 | static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl; |
49 | 50 | ||
50 | /** | 51 | /** |
51 | * v9fs_set_super - set the superblock | 52 | * v9fs_set_super - set the superblock |
@@ -76,7 +77,10 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, | |||
76 | sb->s_blocksize_bits = fls(v9ses->maxdata - 1); | 77 | sb->s_blocksize_bits = fls(v9ses->maxdata - 1); |
77 | sb->s_blocksize = 1 << sb->s_blocksize_bits; | 78 | sb->s_blocksize = 1 << sb->s_blocksize_bits; |
78 | sb->s_magic = V9FS_MAGIC; | 79 | sb->s_magic = V9FS_MAGIC; |
79 | sb->s_op = &v9fs_super_ops; | 80 | if (v9fs_proto_dotl(v9ses)) |
81 | sb->s_op = &v9fs_super_ops_dotl; | ||
82 | else | ||
83 | sb->s_op = &v9fs_super_ops; | ||
80 | sb->s_bdi = &v9ses->bdi; | 84 | sb->s_bdi = &v9ses->bdi; |
81 | 85 | ||
82 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | | 86 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | |
@@ -211,6 +215,42 @@ v9fs_umount_begin(struct super_block *sb) | |||
211 | v9fs_session_begin_cancel(v9ses); | 215 | v9fs_session_begin_cancel(v9ses); |
212 | } | 216 | } |
213 | 217 | ||
218 | static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
219 | { | ||
220 | struct v9fs_session_info *v9ses; | ||
221 | struct p9_fid *fid; | ||
222 | struct p9_rstatfs rs; | ||
223 | int res; | ||
224 | |||
225 | fid = v9fs_fid_lookup(dentry); | ||
226 | if (IS_ERR(fid)) { | ||
227 | res = PTR_ERR(fid); | ||
228 | goto done; | ||
229 | } | ||
230 | |||
231 | v9ses = v9fs_inode2v9ses(dentry->d_inode); | ||
232 | if (v9fs_proto_dotl(v9ses)) { | ||
233 | res = p9_client_statfs(fid, &rs); | ||
234 | if (res == 0) { | ||
235 | buf->f_type = rs.type; | ||
236 | buf->f_bsize = rs.bsize; | ||
237 | buf->f_blocks = rs.blocks; | ||
238 | buf->f_bfree = rs.bfree; | ||
239 | buf->f_bavail = rs.bavail; | ||
240 | buf->f_files = rs.files; | ||
241 | buf->f_ffree = rs.ffree; | ||
242 | buf->f_fsid.val[0] = rs.fsid & 0xFFFFFFFFUL; | ||
243 | buf->f_fsid.val[1] = (rs.fsid >> 32) & 0xFFFFFFFFUL; | ||
244 | buf->f_namelen = rs.namelen; | ||
245 | } | ||
246 | if (res != -ENOSYS) | ||
247 | goto done; | ||
248 | } | ||
249 | res = simple_statfs(dentry, buf); | ||
250 | done: | ||
251 | return res; | ||
252 | } | ||
253 | |||
214 | static const struct super_operations v9fs_super_ops = { | 254 | static const struct super_operations v9fs_super_ops = { |
215 | #ifdef CONFIG_9P_FSCACHE | 255 | #ifdef CONFIG_9P_FSCACHE |
216 | .alloc_inode = v9fs_alloc_inode, | 256 | .alloc_inode = v9fs_alloc_inode, |
@@ -222,6 +262,17 @@ static const struct super_operations v9fs_super_ops = { | |||
222 | .umount_begin = v9fs_umount_begin, | 262 | .umount_begin = v9fs_umount_begin, |
223 | }; | 263 | }; |
224 | 264 | ||
265 | static const struct super_operations v9fs_super_ops_dotl = { | ||
266 | #ifdef CONFIG_9P_FSCACHE | ||
267 | .alloc_inode = v9fs_alloc_inode, | ||
268 | .destroy_inode = v9fs_destroy_inode, | ||
269 | #endif | ||
270 | .statfs = v9fs_statfs, | ||
271 | .clear_inode = v9fs_clear_inode, | ||
272 | .show_options = generic_show_options, | ||
273 | .umount_begin = v9fs_umount_begin, | ||
274 | }; | ||
275 | |||
225 | struct file_system_type v9fs_fs_type = { | 276 | struct file_system_type v9fs_fs_type = { |
226 | .name = "9p", | 277 | .name = "9p", |
227 | .get_sb = v9fs_get_sb, | 278 | .get_sb = v9fs_get_sb, |
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 23aa52f548a0..f4287e4de744 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c | |||
@@ -197,7 +197,7 @@ const struct file_operations adfs_dir_operations = { | |||
197 | .read = generic_read_dir, | 197 | .read = generic_read_dir, |
198 | .llseek = generic_file_llseek, | 198 | .llseek = generic_file_llseek, |
199 | .readdir = adfs_readdir, | 199 | .readdir = adfs_readdir, |
200 | .fsync = simple_fsync, | 200 | .fsync = generic_file_fsync, |
201 | }; | 201 | }; |
202 | 202 | ||
203 | static int | 203 | static int |
diff --git a/fs/adfs/file.c b/fs/adfs/file.c index 005ea34d1758..a36da5382b40 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c | |||
@@ -26,7 +26,7 @@ const struct file_operations adfs_file_operations = { | |||
26 | .read = do_sync_read, | 26 | .read = do_sync_read, |
27 | .aio_read = generic_file_aio_read, | 27 | .aio_read = generic_file_aio_read, |
28 | .mmap = generic_file_mmap, | 28 | .mmap = generic_file_mmap, |
29 | .fsync = simple_fsync, | 29 | .fsync = generic_file_fsync, |
30 | .write = do_sync_write, | 30 | .write = do_sync_write, |
31 | .aio_write = generic_file_aio_write, | 31 | .aio_write = generic_file_aio_write, |
32 | .splice_read = generic_file_splice_read, | 32 | .splice_read = generic_file_splice_read, |
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 0f5e30978135..6f850b06ab62 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c | |||
@@ -322,8 +322,9 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr) | |||
322 | if (error) | 322 | if (error) |
323 | goto out; | 323 | goto out; |
324 | 324 | ||
325 | /* XXX: this is missing some actual on-disk truncation.. */ | ||
325 | if (ia_valid & ATTR_SIZE) | 326 | if (ia_valid & ATTR_SIZE) |
326 | error = vmtruncate(inode, attr->ia_size); | 327 | error = simple_setsize(inode, attr->ia_size); |
327 | 328 | ||
328 | if (error) | 329 | if (error) |
329 | goto out; | 330 | goto out; |
diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 861dae68ac12..f05b6155ccc8 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h | |||
@@ -183,7 +183,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent | |||
183 | 183 | ||
184 | void affs_free_prealloc(struct inode *inode); | 184 | void affs_free_prealloc(struct inode *inode); |
185 | extern void affs_truncate(struct inode *); | 185 | extern void affs_truncate(struct inode *); |
186 | int affs_file_fsync(struct file *, struct dentry *, int); | 186 | int affs_file_fsync(struct file *, int); |
187 | 187 | ||
188 | /* dir.c */ | 188 | /* dir.c */ |
189 | 189 | ||
diff --git a/fs/affs/file.c b/fs/affs/file.c index 184e55c1c9ba..322710c3eedf 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
@@ -916,9 +916,9 @@ affs_truncate(struct inode *inode) | |||
916 | affs_free_prealloc(inode); | 916 | affs_free_prealloc(inode); |
917 | } | 917 | } |
918 | 918 | ||
919 | int affs_file_fsync(struct file *filp, struct dentry *dentry, int datasync) | 919 | int affs_file_fsync(struct file *filp, int datasync) |
920 | { | 920 | { |
921 | struct inode * inode = dentry->d_inode; | 921 | struct inode *inode = filp->f_mapping->host; |
922 | int ret, err; | 922 | int ret, err; |
923 | 923 | ||
924 | ret = write_inode_now(inode, 0); | 924 | ret = write_inode_now(inode, 0); |
diff --git a/fs/affs/namei.c b/fs/affs/namei.c index d70bbbac6b7b..914d1c0bc07a 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c | |||
@@ -224,7 +224,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | |||
224 | affs_brelse(bh); | 224 | affs_brelse(bh); |
225 | inode = affs_iget(sb, ino); | 225 | inode = affs_iget(sb, ino); |
226 | if (IS_ERR(inode)) | 226 | if (IS_ERR(inode)) |
227 | return ERR_PTR(PTR_ERR(inode)); | 227 | return ERR_CAST(inode); |
228 | } | 228 | } |
229 | dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; | 229 | dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; |
230 | d_add(dentry, inode); | 230 | d_add(dentry, inode); |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 807f284cc75e..5f679b77ce24 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -740,7 +740,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); | |||
740 | extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, | 740 | extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, |
741 | unsigned long, loff_t); | 741 | unsigned long, loff_t); |
742 | extern int afs_writeback_all(struct afs_vnode *); | 742 | extern int afs_writeback_all(struct afs_vnode *); |
743 | extern int afs_fsync(struct file *, struct dentry *, int); | 743 | extern int afs_fsync(struct file *, int); |
744 | 744 | ||
745 | 745 | ||
746 | /*****************************************************************************/ | 746 | /*****************************************************************************/ |
diff --git a/fs/afs/write.c b/fs/afs/write.c index 3bed54a294d4..3dab9e9948d0 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -701,8 +701,9 @@ int afs_writeback_all(struct afs_vnode *vnode) | |||
701 | * - the return status from this call provides a reliable indication of | 701 | * - the return status from this call provides a reliable indication of |
702 | * whether any write errors occurred for this process. | 702 | * whether any write errors occurred for this process. |
703 | */ | 703 | */ |
704 | int afs_fsync(struct file *file, struct dentry *dentry, int datasync) | 704 | int afs_fsync(struct file *file, int datasync) |
705 | { | 705 | { |
706 | struct dentry *dentry = file->f_path.dentry; | ||
706 | struct afs_writeback *wb, *xwb; | 707 | struct afs_writeback *wb, *xwb; |
707 | struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); | 708 | struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); |
708 | int ret; | 709 | int ret; |
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/blkdev.h> | 36 | #include <linux/blkdev.h> |
37 | #include <linux/mempool.h> | 37 | #include <linux/mempool.h> |
38 | #include <linux/hash.h> | 38 | #include <linux/hash.h> |
39 | #include <linux/compat.h> | ||
39 | 40 | ||
40 | #include <asm/kmap_types.h> | 41 | #include <asm/kmap_types.h> |
41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
@@ -526,7 +527,7 @@ static void aio_fput_routine(struct work_struct *data) | |||
526 | 527 | ||
527 | /* Complete the fput(s) */ | 528 | /* Complete the fput(s) */ |
528 | if (req->ki_filp != NULL) | 529 | if (req->ki_filp != NULL) |
529 | __fput(req->ki_filp); | 530 | fput(req->ki_filp); |
530 | 531 | ||
531 | /* Link the iocb into the context's free list */ | 532 | /* Link the iocb into the context's free list */ |
532 | spin_lock_irq(&ctx->ctx_lock); | 533 | spin_lock_irq(&ctx->ctx_lock); |
@@ -559,11 +560,11 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) | |||
559 | 560 | ||
560 | /* | 561 | /* |
561 | * Try to optimize the aio and eventfd file* puts, by avoiding to | 562 | * Try to optimize the aio and eventfd file* puts, by avoiding to |
562 | * schedule work in case it is not __fput() time. In normal cases, | 563 | * schedule work in case it is not final fput() time. In normal cases, |
563 | * we would not be holding the last reference to the file*, so | 564 | * we would not be holding the last reference to the file*, so |
564 | * this function will be executed w/out any aio kthread wakeup. | 565 | * this function will be executed w/out any aio kthread wakeup. |
565 | */ | 566 | */ |
566 | if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) { | 567 | if (unlikely(!fput_atomic(req->ki_filp))) { |
567 | get_ioctx(ctx); | 568 | get_ioctx(ctx); |
568 | spin_lock(&fput_lock); | 569 | spin_lock(&fput_lock); |
569 | list_add(&req->ki_list, &fput_head); | 570 | list_add(&req->ki_list, &fput_head); |
@@ -1384,13 +1385,22 @@ static ssize_t aio_fsync(struct kiocb *iocb) | |||
1384 | return ret; | 1385 | return ret; |
1385 | } | 1386 | } |
1386 | 1387 | ||
1387 | static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb) | 1388 | static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat) |
1388 | { | 1389 | { |
1389 | ssize_t ret; | 1390 | ssize_t ret; |
1390 | 1391 | ||
1391 | ret = rw_copy_check_uvector(type, (struct iovec __user *)kiocb->ki_buf, | 1392 | #ifdef CONFIG_COMPAT |
1392 | kiocb->ki_nbytes, 1, | 1393 | if (compat) |
1393 | &kiocb->ki_inline_vec, &kiocb->ki_iovec); | 1394 | ret = compat_rw_copy_check_uvector(type, |
1395 | (struct compat_iovec __user *)kiocb->ki_buf, | ||
1396 | kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, | ||
1397 | &kiocb->ki_iovec); | ||
1398 | else | ||
1399 | #endif | ||
1400 | ret = rw_copy_check_uvector(type, | ||
1401 | (struct iovec __user *)kiocb->ki_buf, | ||
1402 | kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, | ||
1403 | &kiocb->ki_iovec); | ||
1394 | if (ret < 0) | 1404 | if (ret < 0) |
1395 | goto out; | 1405 | goto out; |
1396 | 1406 | ||
@@ -1420,7 +1430,7 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb) | |||
1420 | * Performs the initial checks and aio retry method | 1430 | * Performs the initial checks and aio retry method |
1421 | * setup for the kiocb at the time of io submission. | 1431 | * setup for the kiocb at the time of io submission. |
1422 | */ | 1432 | */ |
1423 | static ssize_t aio_setup_iocb(struct kiocb *kiocb) | 1433 | static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) |
1424 | { | 1434 | { |
1425 | struct file *file = kiocb->ki_filp; | 1435 | struct file *file = kiocb->ki_filp; |
1426 | ssize_t ret = 0; | 1436 | ssize_t ret = 0; |
@@ -1469,7 +1479,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb) | |||
1469 | ret = security_file_permission(file, MAY_READ); | 1479 | ret = security_file_permission(file, MAY_READ); |
1470 | if (unlikely(ret)) | 1480 | if (unlikely(ret)) |
1471 | break; | 1481 | break; |
1472 | ret = aio_setup_vectored_rw(READ, kiocb); | 1482 | ret = aio_setup_vectored_rw(READ, kiocb, compat); |
1473 | if (ret) | 1483 | if (ret) |
1474 | break; | 1484 | break; |
1475 | ret = -EINVAL; | 1485 | ret = -EINVAL; |
@@ -1483,7 +1493,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb) | |||
1483 | ret = security_file_permission(file, MAY_WRITE); | 1493 | ret = security_file_permission(file, MAY_WRITE); |
1484 | if (unlikely(ret)) | 1494 | if (unlikely(ret)) |
1485 | break; | 1495 | break; |
1486 | ret = aio_setup_vectored_rw(WRITE, kiocb); | 1496 | ret = aio_setup_vectored_rw(WRITE, kiocb, compat); |
1487 | if (ret) | 1497 | if (ret) |
1488 | break; | 1498 | break; |
1489 | ret = -EINVAL; | 1499 | ret = -EINVAL; |
@@ -1548,7 +1558,8 @@ static void aio_batch_free(struct hlist_head *batch_hash) | |||
1548 | } | 1558 | } |
1549 | 1559 | ||
1550 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | 1560 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, |
1551 | struct iocb *iocb, struct hlist_head *batch_hash) | 1561 | struct iocb *iocb, struct hlist_head *batch_hash, |
1562 | bool compat) | ||
1552 | { | 1563 | { |
1553 | struct kiocb *req; | 1564 | struct kiocb *req; |
1554 | struct file *file; | 1565 | struct file *file; |
@@ -1609,7 +1620,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1609 | req->ki_left = req->ki_nbytes = iocb->aio_nbytes; | 1620 | req->ki_left = req->ki_nbytes = iocb->aio_nbytes; |
1610 | req->ki_opcode = iocb->aio_lio_opcode; | 1621 | req->ki_opcode = iocb->aio_lio_opcode; |
1611 | 1622 | ||
1612 | ret = aio_setup_iocb(req); | 1623 | ret = aio_setup_iocb(req, compat); |
1613 | 1624 | ||
1614 | if (ret) | 1625 | if (ret) |
1615 | goto out_put_req; | 1626 | goto out_put_req; |
@@ -1637,20 +1648,8 @@ out_put_req: | |||
1637 | return ret; | 1648 | return ret; |
1638 | } | 1649 | } |
1639 | 1650 | ||
1640 | /* sys_io_submit: | 1651 | long do_io_submit(aio_context_t ctx_id, long nr, |
1641 | * Queue the nr iocbs pointed to by iocbpp for processing. Returns | 1652 | struct iocb __user *__user *iocbpp, bool compat) |
1642 | * the number of iocbs queued. May return -EINVAL if the aio_context | ||
1643 | * specified by ctx_id is invalid, if nr is < 0, if the iocb at | ||
1644 | * *iocbpp[0] is not properly initialized, if the operation specified | ||
1645 | * is invalid for the file descriptor in the iocb. May fail with | ||
1646 | * -EFAULT if any of the data structures point to invalid data. May | ||
1647 | * fail with -EBADF if the file descriptor specified in the first | ||
1648 | * iocb is invalid. May fail with -EAGAIN if insufficient resources | ||
1649 | * are available to queue any iocbs. Will return 0 if nr is 0. Will | ||
1650 | * fail with -ENOSYS if not implemented. | ||
1651 | */ | ||
1652 | SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | ||
1653 | struct iocb __user * __user *, iocbpp) | ||
1654 | { | 1653 | { |
1655 | struct kioctx *ctx; | 1654 | struct kioctx *ctx; |
1656 | long ret = 0; | 1655 | long ret = 0; |
@@ -1687,7 +1686,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | |||
1687 | break; | 1686 | break; |
1688 | } | 1687 | } |
1689 | 1688 | ||
1690 | ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash); | 1689 | ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash, compat); |
1691 | if (ret) | 1690 | if (ret) |
1692 | break; | 1691 | break; |
1693 | } | 1692 | } |
@@ -1697,6 +1696,24 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | |||
1697 | return i ? i : ret; | 1696 | return i ? i : ret; |
1698 | } | 1697 | } |
1699 | 1698 | ||
1699 | /* sys_io_submit: | ||
1700 | * Queue the nr iocbs pointed to by iocbpp for processing. Returns | ||
1701 | * the number of iocbs queued. May return -EINVAL if the aio_context | ||
1702 | * specified by ctx_id is invalid, if nr is < 0, if the iocb at | ||
1703 | * *iocbpp[0] is not properly initialized, if the operation specified | ||
1704 | * is invalid for the file descriptor in the iocb. May fail with | ||
1705 | * -EFAULT if any of the data structures point to invalid data. May | ||
1706 | * fail with -EBADF if the file descriptor specified in the first | ||
1707 | * iocb is invalid. May fail with -EAGAIN if insufficient resources | ||
1708 | * are available to queue any iocbs. Will return 0 if nr is 0. Will | ||
1709 | * fail with -ENOSYS if not implemented. | ||
1710 | */ | ||
1711 | SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | ||
1712 | struct iocb __user * __user *, iocbpp) | ||
1713 | { | ||
1714 | return do_io_submit(ctx_id, nr, iocbpp, 0); | ||
1715 | } | ||
1716 | |||
1700 | /* lookup_kiocb | 1717 | /* lookup_kiocb |
1701 | * Finds a given iocb for cancellation. | 1718 | * Finds a given iocb for cancellation. |
1702 | */ | 1719 | */ |
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 9bd4b3876c99..e4b75d6eda83 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c | |||
@@ -205,7 +205,7 @@ static struct inode *anon_inode_mkinode(void) | |||
205 | * that it already _is_ on the dirty list. | 205 | * that it already _is_ on the dirty list. |
206 | */ | 206 | */ |
207 | inode->i_state = I_DIRTY; | 207 | inode->i_state = I_DIRTY; |
208 | inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; | 208 | inode->i_mode = S_IRUSR | S_IWUSR; |
209 | inode->i_uid = current_fsuid(); | 209 | inode->i_uid = current_fsuid(); |
210 | inode->i_gid = current_fsgid(); | 210 | inode->i_gid = current_fsgid(); |
211 | inode->i_flags |= S_PRIVATE; | 211 | inode->i_flags |= S_PRIVATE; |
@@ -67,14 +67,14 @@ EXPORT_SYMBOL(inode_change_ok); | |||
67 | * @offset: the new size to assign to the inode | 67 | * @offset: the new size to assign to the inode |
68 | * @Returns: 0 on success, -ve errno on failure | 68 | * @Returns: 0 on success, -ve errno on failure |
69 | * | 69 | * |
70 | * inode_newsize_ok must be called with i_mutex held. | ||
71 | * | ||
70 | * inode_newsize_ok will check filesystem limits and ulimits to check that the | 72 | * inode_newsize_ok will check filesystem limits and ulimits to check that the |
71 | * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ | 73 | * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ |
72 | * when necessary. Caller must not proceed with inode size change if failure is | 74 | * when necessary. Caller must not proceed with inode size change if failure is |
73 | * returned. @inode must be a file (not directory), with appropriate | 75 | * returned. @inode must be a file (not directory), with appropriate |
74 | * permissions to allow truncate (inode_newsize_ok does NOT check these | 76 | * permissions to allow truncate (inode_newsize_ok does NOT check these |
75 | * conditions). | 77 | * conditions). |
76 | * | ||
77 | * inode_newsize_ok must be called with i_mutex held. | ||
78 | */ | 78 | */ |
79 | int inode_newsize_ok(const struct inode *inode, loff_t offset) | 79 | int inode_newsize_ok(const struct inode *inode, loff_t offset) |
80 | { | 80 | { |
@@ -104,17 +104,25 @@ out_big: | |||
104 | } | 104 | } |
105 | EXPORT_SYMBOL(inode_newsize_ok); | 105 | EXPORT_SYMBOL(inode_newsize_ok); |
106 | 106 | ||
107 | int inode_setattr(struct inode * inode, struct iattr * attr) | 107 | /** |
108 | * generic_setattr - copy simple metadata updates into the generic inode | ||
109 | * @inode: the inode to be updated | ||
110 | * @attr: the new attributes | ||
111 | * | ||
112 | * generic_setattr must be called with i_mutex held. | ||
113 | * | ||
114 | * generic_setattr updates the inode's metadata with that specified | ||
115 | * in attr. Noticably missing is inode size update, which is more complex | ||
116 | * as it requires pagecache updates. See simple_setsize. | ||
117 | * | ||
118 | * The inode is not marked as dirty after this operation. The rationale is | ||
119 | * that for "simple" filesystems, the struct inode is the inode storage. | ||
120 | * The caller is free to mark the inode dirty afterwards if needed. | ||
121 | */ | ||
122 | void generic_setattr(struct inode *inode, const struct iattr *attr) | ||
108 | { | 123 | { |
109 | unsigned int ia_valid = attr->ia_valid; | 124 | unsigned int ia_valid = attr->ia_valid; |
110 | 125 | ||
111 | if (ia_valid & ATTR_SIZE && | ||
112 | attr->ia_size != i_size_read(inode)) { | ||
113 | int error = vmtruncate(inode, attr->ia_size); | ||
114 | if (error) | ||
115 | return error; | ||
116 | } | ||
117 | |||
118 | if (ia_valid & ATTR_UID) | 126 | if (ia_valid & ATTR_UID) |
119 | inode->i_uid = attr->ia_uid; | 127 | inode->i_uid = attr->ia_uid; |
120 | if (ia_valid & ATTR_GID) | 128 | if (ia_valid & ATTR_GID) |
@@ -135,6 +143,28 @@ int inode_setattr(struct inode * inode, struct iattr * attr) | |||
135 | mode &= ~S_ISGID; | 143 | mode &= ~S_ISGID; |
136 | inode->i_mode = mode; | 144 | inode->i_mode = mode; |
137 | } | 145 | } |
146 | } | ||
147 | EXPORT_SYMBOL(generic_setattr); | ||
148 | |||
149 | /* | ||
150 | * note this function is deprecated, the new truncate sequence should be | ||
151 | * used instead -- see eg. simple_setsize, generic_setattr. | ||
152 | */ | ||
153 | int inode_setattr(struct inode *inode, const struct iattr *attr) | ||
154 | { | ||
155 | unsigned int ia_valid = attr->ia_valid; | ||
156 | |||
157 | if (ia_valid & ATTR_SIZE && | ||
158 | attr->ia_size != i_size_read(inode)) { | ||
159 | int error; | ||
160 | |||
161 | error = vmtruncate(inode, attr->ia_size); | ||
162 | if (error) | ||
163 | return error; | ||
164 | } | ||
165 | |||
166 | generic_setattr(inode, attr); | ||
167 | |||
138 | mark_inode_dirty(inode); | 168 | mark_inode_dirty(inode); |
139 | 169 | ||
140 | return 0; | 170 | return 0; |
diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 8713c7cfbc79..9a0520b50663 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c | |||
@@ -28,6 +28,7 @@ static int autofs_root_mkdir(struct inode *,struct dentry *,int); | |||
28 | static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); | 28 | static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); |
29 | 29 | ||
30 | const struct file_operations autofs_root_operations = { | 30 | const struct file_operations autofs_root_operations = { |
31 | .llseek = generic_file_llseek, | ||
31 | .read = generic_read_dir, | 32 | .read = generic_read_dir, |
32 | .readdir = autofs_root_readdir, | 33 | .readdir = autofs_root_readdir, |
33 | .ioctl = autofs_root_ioctl, | 34 | .ioctl = autofs_root_ioctl, |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index d29b7f6df862..ba4a38b9c22f 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -95,7 +95,7 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) | |||
95 | */ | 95 | */ |
96 | static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in) | 96 | static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in) |
97 | { | 97 | { |
98 | struct autofs_dev_ioctl tmp, *ads; | 98 | struct autofs_dev_ioctl tmp; |
99 | 99 | ||
100 | if (copy_from_user(&tmp, in, sizeof(tmp))) | 100 | if (copy_from_user(&tmp, in, sizeof(tmp))) |
101 | return ERR_PTR(-EFAULT); | 101 | return ERR_PTR(-EFAULT); |
@@ -103,16 +103,7 @@ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *i | |||
103 | if (tmp.size < sizeof(tmp)) | 103 | if (tmp.size < sizeof(tmp)) |
104 | return ERR_PTR(-EINVAL); | 104 | return ERR_PTR(-EINVAL); |
105 | 105 | ||
106 | ads = kmalloc(tmp.size, GFP_KERNEL); | 106 | return memdup_user(in, tmp.size); |
107 | if (!ads) | ||
108 | return ERR_PTR(-ENOMEM); | ||
109 | |||
110 | if (copy_from_user(ads, in, tmp.size)) { | ||
111 | kfree(ads); | ||
112 | return ERR_PTR(-EFAULT); | ||
113 | } | ||
114 | |||
115 | return ads; | ||
116 | } | 107 | } |
117 | 108 | ||
118 | static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) | 109 | static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) |
@@ -736,11 +727,14 @@ static const struct file_operations _dev_ioctl_fops = { | |||
736 | }; | 727 | }; |
737 | 728 | ||
738 | static struct miscdevice _autofs_dev_ioctl_misc = { | 729 | static struct miscdevice _autofs_dev_ioctl_misc = { |
739 | .minor = MISC_DYNAMIC_MINOR, | 730 | .minor = AUTOFS_MINOR, |
740 | .name = AUTOFS_DEVICE_NAME, | 731 | .name = AUTOFS_DEVICE_NAME, |
741 | .fops = &_dev_ioctl_fops | 732 | .fops = &_dev_ioctl_fops |
742 | }; | 733 | }; |
743 | 734 | ||
735 | MODULE_ALIAS_MISCDEV(AUTOFS_MINOR); | ||
736 | MODULE_ALIAS("devname:autofs"); | ||
737 | |||
744 | /* Register/deregister misc character device */ | 738 | /* Register/deregister misc character device */ |
745 | int autofs_dev_ioctl_init(void) | 739 | int autofs_dev_ioctl_init(void) |
746 | { | 740 | { |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index e8e5e63ac950..db4117ed7803 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -18,13 +18,14 @@ | |||
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/param.h> | 19 | #include <linux/param.h> |
20 | #include <linux/time.h> | 20 | #include <linux/time.h> |
21 | #include <linux/smp_lock.h> | ||
21 | #include "autofs_i.h" | 22 | #include "autofs_i.h" |
22 | 23 | ||
23 | static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); | 24 | static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); |
24 | static int autofs4_dir_unlink(struct inode *,struct dentry *); | 25 | static int autofs4_dir_unlink(struct inode *,struct dentry *); |
25 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); | 26 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); |
26 | static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); | 27 | static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); |
27 | static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); | 28 | static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long); |
28 | static int autofs4_dir_open(struct inode *inode, struct file *file); | 29 | static int autofs4_dir_open(struct inode *inode, struct file *file); |
29 | static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); | 30 | static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); |
30 | static void *autofs4_follow_link(struct dentry *, struct nameidata *); | 31 | static void *autofs4_follow_link(struct dentry *, struct nameidata *); |
@@ -38,7 +39,7 @@ const struct file_operations autofs4_root_operations = { | |||
38 | .read = generic_read_dir, | 39 | .read = generic_read_dir, |
39 | .readdir = dcache_readdir, | 40 | .readdir = dcache_readdir, |
40 | .llseek = dcache_dir_lseek, | 41 | .llseek = dcache_dir_lseek, |
41 | .ioctl = autofs4_root_ioctl, | 42 | .unlocked_ioctl = autofs4_root_ioctl, |
42 | }; | 43 | }; |
43 | 44 | ||
44 | const struct file_operations autofs4_dir_operations = { | 45 | const struct file_operations autofs4_dir_operations = { |
@@ -902,8 +903,8 @@ int is_autofs4_dentry(struct dentry *dentry) | |||
902 | * ioctl()'s on the root directory is the chief method for the daemon to | 903 | * ioctl()'s on the root directory is the chief method for the daemon to |
903 | * generate kernel reactions | 904 | * generate kernel reactions |
904 | */ | 905 | */ |
905 | static int autofs4_root_ioctl(struct inode *inode, struct file *filp, | 906 | static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, |
906 | unsigned int cmd, unsigned long arg) | 907 | unsigned int cmd, unsigned long arg) |
907 | { | 908 | { |
908 | struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb); | 909 | struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb); |
909 | void __user *p = (void __user *)arg; | 910 | void __user *p = (void __user *)arg; |
@@ -947,3 +948,16 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp, | |||
947 | return -ENOSYS; | 948 | return -ENOSYS; |
948 | } | 949 | } |
949 | } | 950 | } |
951 | |||
952 | static long autofs4_root_ioctl(struct file *filp, | ||
953 | unsigned int cmd, unsigned long arg) | ||
954 | { | ||
955 | long ret; | ||
956 | struct inode *inode = filp->f_dentry->d_inode; | ||
957 | |||
958 | lock_kernel(); | ||
959 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | ||
960 | unlock_kernel(); | ||
961 | |||
962 | return ret; | ||
963 | } | ||
diff --git a/fs/bad_inode.c b/fs/bad_inode.c index a05287a23f62..52e59bf4aa5f 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c | |||
@@ -93,8 +93,7 @@ static int bad_file_release(struct inode *inode, struct file *filp) | |||
93 | return -EIO; | 93 | return -EIO; |
94 | } | 94 | } |
95 | 95 | ||
96 | static int bad_file_fsync(struct file *file, struct dentry *dentry, | 96 | static int bad_file_fsync(struct file *file, int datasync) |
97 | int datasync) | ||
98 | { | 97 | { |
99 | return -EIO; | 98 | return -EIO; |
100 | } | 99 | } |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 8f73841fc974..d967e052b779 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -78,7 +78,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) | |||
78 | const struct file_operations bfs_dir_operations = { | 78 | const struct file_operations bfs_dir_operations = { |
79 | .read = generic_read_dir, | 79 | .read = generic_read_dir, |
80 | .readdir = bfs_readdir, | 80 | .readdir = bfs_readdir, |
81 | .fsync = simple_fsync, | 81 | .fsync = generic_file_fsync, |
82 | .llseek = generic_file_llseek, | 82 | .llseek = generic_file_llseek, |
83 | }; | 83 | }; |
84 | 84 | ||
diff --git a/fs/block_dev.c b/fs/block_dev.c index 26e5f5026620..7346c96308a5 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -172,8 +172,9 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
172 | struct file *file = iocb->ki_filp; | 172 | struct file *file = iocb->ki_filp; |
173 | struct inode *inode = file->f_mapping->host; | 173 | struct inode *inode = file->f_mapping->host; |
174 | 174 | ||
175 | return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode), | 175 | return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode, |
176 | iov, offset, nr_segs, blkdev_get_blocks, NULL); | 176 | I_BDEV(inode), iov, offset, nr_segs, |
177 | blkdev_get_blocks, NULL); | ||
177 | } | 178 | } |
178 | 179 | ||
179 | int __sync_blockdev(struct block_device *bdev, int wait) | 180 | int __sync_blockdev(struct block_device *bdev, int wait) |
@@ -309,8 +310,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping, | |||
309 | struct page **pagep, void **fsdata) | 310 | struct page **pagep, void **fsdata) |
310 | { | 311 | { |
311 | *pagep = NULL; | 312 | *pagep = NULL; |
312 | return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 313 | return block_write_begin_newtrunc(file, mapping, pos, len, flags, |
313 | blkdev_get_block); | 314 | pagep, fsdata, blkdev_get_block); |
314 | } | 315 | } |
315 | 316 | ||
316 | static int blkdev_write_end(struct file *file, struct address_space *mapping, | 317 | static int blkdev_write_end(struct file *file, struct address_space *mapping, |
@@ -358,12 +359,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) | |||
358 | return retval; | 359 | return retval; |
359 | } | 360 | } |
360 | 361 | ||
361 | /* | 362 | int blkdev_fsync(struct file *filp, int datasync) |
362 | * Filp is never NULL; the only case when ->fsync() is called with | ||
363 | * NULL first argument is nfsd_sync_dir() and that's not a directory. | ||
364 | */ | ||
365 | |||
366 | int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) | ||
367 | { | 363 | { |
368 | struct inode *bd_inode = filp->f_mapping->host; | 364 | struct inode *bd_inode = filp->f_mapping->host; |
369 | struct block_device *bdev = I_BDEV(bd_inode); | 365 | struct block_device *bdev = I_BDEV(bd_inode); |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 462859a30141..7ec14097fef1 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -377,6 +377,7 @@ again: | |||
377 | if (!list_empty(&worker->pending) || | 377 | if (!list_empty(&worker->pending) || |
378 | !list_empty(&worker->prio_pending)) { | 378 | !list_empty(&worker->prio_pending)) { |
379 | spin_unlock_irq(&worker->lock); | 379 | spin_unlock_irq(&worker->lock); |
380 | set_current_state(TASK_RUNNING); | ||
380 | goto again; | 381 | goto again; |
381 | } | 382 | } |
382 | 383 | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 7a4dee199832..6ad63f17eca0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -137,8 +137,8 @@ struct btrfs_inode { | |||
137 | * of extent items we've reserved metadata for. | 137 | * of extent items we've reserved metadata for. |
138 | */ | 138 | */ |
139 | spinlock_t accounting_lock; | 139 | spinlock_t accounting_lock; |
140 | atomic_t outstanding_extents; | ||
140 | int reserved_extents; | 141 | int reserved_extents; |
141 | int outstanding_extents; | ||
142 | 142 | ||
143 | /* | 143 | /* |
144 | * ordered_data_close is set by truncate when a file that used | 144 | * ordered_data_close is set by truncate when a file that used |
@@ -151,6 +151,7 @@ struct btrfs_inode { | |||
151 | * of these. | 151 | * of these. |
152 | */ | 152 | */ |
153 | unsigned ordered_data_close:1; | 153 | unsigned ordered_data_close:1; |
154 | unsigned orphan_meta_reserved:1; | ||
154 | unsigned dummy_inode:1; | 155 | unsigned dummy_inode:1; |
155 | 156 | ||
156 | /* | 157 | /* |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6795a713b205..0d1d966b0fe4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -280,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root, | |||
280 | static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | 280 | static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, |
281 | struct btrfs_root *root, | 281 | struct btrfs_root *root, |
282 | struct extent_buffer *buf, | 282 | struct extent_buffer *buf, |
283 | struct extent_buffer *cow) | 283 | struct extent_buffer *cow, |
284 | int *last_ref) | ||
284 | { | 285 | { |
285 | u64 refs; | 286 | u64 refs; |
286 | u64 owner; | 287 | u64 owner; |
@@ -366,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
366 | BUG_ON(ret); | 367 | BUG_ON(ret); |
367 | } | 368 | } |
368 | clean_tree_block(trans, root, buf); | 369 | clean_tree_block(trans, root, buf); |
370 | *last_ref = 1; | ||
369 | } | 371 | } |
370 | return 0; | 372 | return 0; |
371 | } | 373 | } |
@@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
392 | struct btrfs_disk_key disk_key; | 394 | struct btrfs_disk_key disk_key; |
393 | struct extent_buffer *cow; | 395 | struct extent_buffer *cow; |
394 | int level; | 396 | int level; |
397 | int last_ref = 0; | ||
395 | int unlock_orig = 0; | 398 | int unlock_orig = 0; |
396 | u64 parent_start; | 399 | u64 parent_start; |
397 | 400 | ||
@@ -442,7 +445,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
442 | (unsigned long)btrfs_header_fsid(cow), | 445 | (unsigned long)btrfs_header_fsid(cow), |
443 | BTRFS_FSID_SIZE); | 446 | BTRFS_FSID_SIZE); |
444 | 447 | ||
445 | update_ref_for_cow(trans, root, buf, cow); | 448 | update_ref_for_cow(trans, root, buf, cow, &last_ref); |
449 | |||
450 | if (root->ref_cows) | ||
451 | btrfs_reloc_cow_block(trans, root, buf, cow); | ||
446 | 452 | ||
447 | if (buf == root->node) { | 453 | if (buf == root->node) { |
448 | WARN_ON(parent && parent != buf); | 454 | WARN_ON(parent && parent != buf); |
@@ -457,8 +463,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
457 | extent_buffer_get(cow); | 463 | extent_buffer_get(cow); |
458 | spin_unlock(&root->node_lock); | 464 | spin_unlock(&root->node_lock); |
459 | 465 | ||
460 | btrfs_free_tree_block(trans, root, buf->start, buf->len, | 466 | btrfs_free_tree_block(trans, root, buf, parent_start, |
461 | parent_start, root->root_key.objectid, level); | 467 | last_ref); |
462 | free_extent_buffer(buf); | 468 | free_extent_buffer(buf); |
463 | add_root_to_dirty_list(root); | 469 | add_root_to_dirty_list(root); |
464 | } else { | 470 | } else { |
@@ -473,8 +479,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
473 | btrfs_set_node_ptr_generation(parent, parent_slot, | 479 | btrfs_set_node_ptr_generation(parent, parent_slot, |
474 | trans->transid); | 480 | trans->transid); |
475 | btrfs_mark_buffer_dirty(parent); | 481 | btrfs_mark_buffer_dirty(parent); |
476 | btrfs_free_tree_block(trans, root, buf->start, buf->len, | 482 | btrfs_free_tree_block(trans, root, buf, parent_start, |
477 | parent_start, root->root_key.objectid, level); | 483 | last_ref); |
478 | } | 484 | } |
479 | if (unlock_orig) | 485 | if (unlock_orig) |
480 | btrfs_tree_unlock(buf); | 486 | btrfs_tree_unlock(buf); |
@@ -949,6 +955,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | |||
949 | return bin_search(eb, key, level, slot); | 955 | return bin_search(eb, key, level, slot); |
950 | } | 956 | } |
951 | 957 | ||
958 | static void root_add_used(struct btrfs_root *root, u32 size) | ||
959 | { | ||
960 | spin_lock(&root->accounting_lock); | ||
961 | btrfs_set_root_used(&root->root_item, | ||
962 | btrfs_root_used(&root->root_item) + size); | ||
963 | spin_unlock(&root->accounting_lock); | ||
964 | } | ||
965 | |||
966 | static void root_sub_used(struct btrfs_root *root, u32 size) | ||
967 | { | ||
968 | spin_lock(&root->accounting_lock); | ||
969 | btrfs_set_root_used(&root->root_item, | ||
970 | btrfs_root_used(&root->root_item) - size); | ||
971 | spin_unlock(&root->accounting_lock); | ||
972 | } | ||
973 | |||
952 | /* given a node and slot number, this reads the blocks it points to. The | 974 | /* given a node and slot number, this reads the blocks it points to. The |
953 | * extent buffer is returned with a reference taken (but unlocked). | 975 | * extent buffer is returned with a reference taken (but unlocked). |
954 | * NULL is returned on error. | 976 | * NULL is returned on error. |
@@ -1019,7 +1041,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1019 | btrfs_tree_lock(child); | 1041 | btrfs_tree_lock(child); |
1020 | btrfs_set_lock_blocking(child); | 1042 | btrfs_set_lock_blocking(child); |
1021 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); | 1043 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); |
1022 | BUG_ON(ret); | 1044 | if (ret) { |
1045 | btrfs_tree_unlock(child); | ||
1046 | free_extent_buffer(child); | ||
1047 | goto enospc; | ||
1048 | } | ||
1023 | 1049 | ||
1024 | spin_lock(&root->node_lock); | 1050 | spin_lock(&root->node_lock); |
1025 | root->node = child; | 1051 | root->node = child; |
@@ -1034,11 +1060,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1034 | btrfs_tree_unlock(mid); | 1060 | btrfs_tree_unlock(mid); |
1035 | /* once for the path */ | 1061 | /* once for the path */ |
1036 | free_extent_buffer(mid); | 1062 | free_extent_buffer(mid); |
1037 | ret = btrfs_free_tree_block(trans, root, mid->start, mid->len, | 1063 | |
1038 | 0, root->root_key.objectid, level); | 1064 | root_sub_used(root, mid->len); |
1065 | btrfs_free_tree_block(trans, root, mid, 0, 1); | ||
1039 | /* once for the root ptr */ | 1066 | /* once for the root ptr */ |
1040 | free_extent_buffer(mid); | 1067 | free_extent_buffer(mid); |
1041 | return ret; | 1068 | return 0; |
1042 | } | 1069 | } |
1043 | if (btrfs_header_nritems(mid) > | 1070 | if (btrfs_header_nritems(mid) > |
1044 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | 1071 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) |
@@ -1088,23 +1115,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1088 | if (wret < 0 && wret != -ENOSPC) | 1115 | if (wret < 0 && wret != -ENOSPC) |
1089 | ret = wret; | 1116 | ret = wret; |
1090 | if (btrfs_header_nritems(right) == 0) { | 1117 | if (btrfs_header_nritems(right) == 0) { |
1091 | u64 bytenr = right->start; | ||
1092 | u32 blocksize = right->len; | ||
1093 | |||
1094 | clean_tree_block(trans, root, right); | 1118 | clean_tree_block(trans, root, right); |
1095 | btrfs_tree_unlock(right); | 1119 | btrfs_tree_unlock(right); |
1096 | free_extent_buffer(right); | ||
1097 | right = NULL; | ||
1098 | wret = del_ptr(trans, root, path, level + 1, pslot + | 1120 | wret = del_ptr(trans, root, path, level + 1, pslot + |
1099 | 1); | 1121 | 1); |
1100 | if (wret) | 1122 | if (wret) |
1101 | ret = wret; | 1123 | ret = wret; |
1102 | wret = btrfs_free_tree_block(trans, root, | 1124 | root_sub_used(root, right->len); |
1103 | bytenr, blocksize, 0, | 1125 | btrfs_free_tree_block(trans, root, right, 0, 1); |
1104 | root->root_key.objectid, | 1126 | free_extent_buffer(right); |
1105 | level); | 1127 | right = NULL; |
1106 | if (wret) | ||
1107 | ret = wret; | ||
1108 | } else { | 1128 | } else { |
1109 | struct btrfs_disk_key right_key; | 1129 | struct btrfs_disk_key right_key; |
1110 | btrfs_node_key(right, &right_key, 0); | 1130 | btrfs_node_key(right, &right_key, 0); |
@@ -1136,21 +1156,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1136 | BUG_ON(wret == 1); | 1156 | BUG_ON(wret == 1); |
1137 | } | 1157 | } |
1138 | if (btrfs_header_nritems(mid) == 0) { | 1158 | if (btrfs_header_nritems(mid) == 0) { |
1139 | /* we've managed to empty the middle node, drop it */ | ||
1140 | u64 bytenr = mid->start; | ||
1141 | u32 blocksize = mid->len; | ||
1142 | |||
1143 | clean_tree_block(trans, root, mid); | 1159 | clean_tree_block(trans, root, mid); |
1144 | btrfs_tree_unlock(mid); | 1160 | btrfs_tree_unlock(mid); |
1145 | free_extent_buffer(mid); | ||
1146 | mid = NULL; | ||
1147 | wret = del_ptr(trans, root, path, level + 1, pslot); | 1161 | wret = del_ptr(trans, root, path, level + 1, pslot); |
1148 | if (wret) | 1162 | if (wret) |
1149 | ret = wret; | 1163 | ret = wret; |
1150 | wret = btrfs_free_tree_block(trans, root, bytenr, blocksize, | 1164 | root_sub_used(root, mid->len); |
1151 | 0, root->root_key.objectid, level); | 1165 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
1152 | if (wret) | 1166 | free_extent_buffer(mid); |
1153 | ret = wret; | 1167 | mid = NULL; |
1154 | } else { | 1168 | } else { |
1155 | /* update the parent key to reflect our changes */ | 1169 | /* update the parent key to reflect our changes */ |
1156 | struct btrfs_disk_key mid_key; | 1170 | struct btrfs_disk_key mid_key; |
@@ -1590,7 +1604,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1590 | btrfs_release_path(NULL, p); | 1604 | btrfs_release_path(NULL, p); |
1591 | 1605 | ||
1592 | ret = -EAGAIN; | 1606 | ret = -EAGAIN; |
1593 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1607 | tmp = read_tree_block(root, blocknr, blocksize, 0); |
1594 | if (tmp) { | 1608 | if (tmp) { |
1595 | /* | 1609 | /* |
1596 | * If the read above didn't mark this buffer up to date, | 1610 | * If the read above didn't mark this buffer up to date, |
@@ -1740,7 +1754,6 @@ again: | |||
1740 | p->nodes[level + 1], | 1754 | p->nodes[level + 1], |
1741 | p->slots[level + 1], &b); | 1755 | p->slots[level + 1], &b); |
1742 | if (err) { | 1756 | if (err) { |
1743 | free_extent_buffer(b); | ||
1744 | ret = err; | 1757 | ret = err; |
1745 | goto done; | 1758 | goto done; |
1746 | } | 1759 | } |
@@ -2076,6 +2089,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2076 | if (IS_ERR(c)) | 2089 | if (IS_ERR(c)) |
2077 | return PTR_ERR(c); | 2090 | return PTR_ERR(c); |
2078 | 2091 | ||
2092 | root_add_used(root, root->nodesize); | ||
2093 | |||
2079 | memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); | 2094 | memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); |
2080 | btrfs_set_header_nritems(c, 1); | 2095 | btrfs_set_header_nritems(c, 1); |
2081 | btrfs_set_header_level(c, level); | 2096 | btrfs_set_header_level(c, level); |
@@ -2134,6 +2149,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2134 | int nritems; | 2149 | int nritems; |
2135 | 2150 | ||
2136 | BUG_ON(!path->nodes[level]); | 2151 | BUG_ON(!path->nodes[level]); |
2152 | btrfs_assert_tree_locked(path->nodes[level]); | ||
2137 | lower = path->nodes[level]; | 2153 | lower = path->nodes[level]; |
2138 | nritems = btrfs_header_nritems(lower); | 2154 | nritems = btrfs_header_nritems(lower); |
2139 | BUG_ON(slot > nritems); | 2155 | BUG_ON(slot > nritems); |
@@ -2202,6 +2218,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2202 | if (IS_ERR(split)) | 2218 | if (IS_ERR(split)) |
2203 | return PTR_ERR(split); | 2219 | return PTR_ERR(split); |
2204 | 2220 | ||
2221 | root_add_used(root, root->nodesize); | ||
2222 | |||
2205 | memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); | 2223 | memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); |
2206 | btrfs_set_header_level(split, btrfs_header_level(c)); | 2224 | btrfs_set_header_level(split, btrfs_header_level(c)); |
2207 | btrfs_set_header_bytenr(split, split->start); | 2225 | btrfs_set_header_bytenr(split, split->start); |
@@ -2415,6 +2433,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2415 | 2433 | ||
2416 | if (left_nritems) | 2434 | if (left_nritems) |
2417 | btrfs_mark_buffer_dirty(left); | 2435 | btrfs_mark_buffer_dirty(left); |
2436 | else | ||
2437 | clean_tree_block(trans, root, left); | ||
2438 | |||
2418 | btrfs_mark_buffer_dirty(right); | 2439 | btrfs_mark_buffer_dirty(right); |
2419 | 2440 | ||
2420 | btrfs_item_key(right, &disk_key, 0); | 2441 | btrfs_item_key(right, &disk_key, 0); |
@@ -2660,6 +2681,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2660 | btrfs_mark_buffer_dirty(left); | 2681 | btrfs_mark_buffer_dirty(left); |
2661 | if (right_nritems) | 2682 | if (right_nritems) |
2662 | btrfs_mark_buffer_dirty(right); | 2683 | btrfs_mark_buffer_dirty(right); |
2684 | else | ||
2685 | clean_tree_block(trans, root, right); | ||
2663 | 2686 | ||
2664 | btrfs_item_key(right, &disk_key, 0); | 2687 | btrfs_item_key(right, &disk_key, 0); |
2665 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); | 2688 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); |
@@ -2669,8 +2692,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2669 | /* then fixup the leaf pointer in the path */ | 2692 | /* then fixup the leaf pointer in the path */ |
2670 | if (path->slots[0] < push_items) { | 2693 | if (path->slots[0] < push_items) { |
2671 | path->slots[0] += old_left_nritems; | 2694 | path->slots[0] += old_left_nritems; |
2672 | if (btrfs_header_nritems(path->nodes[0]) == 0) | ||
2673 | clean_tree_block(trans, root, path->nodes[0]); | ||
2674 | btrfs_tree_unlock(path->nodes[0]); | 2695 | btrfs_tree_unlock(path->nodes[0]); |
2675 | free_extent_buffer(path->nodes[0]); | 2696 | free_extent_buffer(path->nodes[0]); |
2676 | path->nodes[0] = left; | 2697 | path->nodes[0] = left; |
@@ -2932,10 +2953,10 @@ again: | |||
2932 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 2953 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
2933 | root->root_key.objectid, | 2954 | root->root_key.objectid, |
2934 | &disk_key, 0, l->start, 0); | 2955 | &disk_key, 0, l->start, 0); |
2935 | if (IS_ERR(right)) { | 2956 | if (IS_ERR(right)) |
2936 | BUG_ON(1); | ||
2937 | return PTR_ERR(right); | 2957 | return PTR_ERR(right); |
2938 | } | 2958 | |
2959 | root_add_used(root, root->leafsize); | ||
2939 | 2960 | ||
2940 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); | 2961 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); |
2941 | btrfs_set_header_bytenr(right, right->start); | 2962 | btrfs_set_header_bytenr(right, right->start); |
@@ -3054,7 +3075,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, | |||
3054 | 3075 | ||
3055 | btrfs_set_path_blocking(path); | 3076 | btrfs_set_path_blocking(path); |
3056 | ret = split_leaf(trans, root, &key, path, ins_len, 1); | 3077 | ret = split_leaf(trans, root, &key, path, ins_len, 1); |
3057 | BUG_ON(ret); | 3078 | if (ret) |
3079 | goto err; | ||
3058 | 3080 | ||
3059 | path->keep_locks = 0; | 3081 | path->keep_locks = 0; |
3060 | btrfs_unlock_up_safe(path, 1); | 3082 | btrfs_unlock_up_safe(path, 1); |
@@ -3796,9 +3818,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3796 | */ | 3818 | */ |
3797 | btrfs_unlock_up_safe(path, 0); | 3819 | btrfs_unlock_up_safe(path, 0); |
3798 | 3820 | ||
3799 | ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len, | 3821 | root_sub_used(root, leaf->len); |
3800 | 0, root->root_key.objectid, 0); | 3822 | |
3801 | return ret; | 3823 | btrfs_free_tree_block(trans, root, leaf, 0, 1); |
3824 | return 0; | ||
3802 | } | 3825 | } |
3803 | /* | 3826 | /* |
3804 | * delete the item at the leaf level in path. If that empties | 3827 | * delete the item at the leaf level in path. If that empties |
@@ -3865,6 +3888,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3865 | if (leaf == root->node) { | 3888 | if (leaf == root->node) { |
3866 | btrfs_set_header_level(leaf, 0); | 3889 | btrfs_set_header_level(leaf, 0); |
3867 | } else { | 3890 | } else { |
3891 | btrfs_set_path_blocking(path); | ||
3892 | clean_tree_block(trans, root, leaf); | ||
3868 | ret = btrfs_del_leaf(trans, root, path, leaf); | 3893 | ret = btrfs_del_leaf(trans, root, path, leaf); |
3869 | BUG_ON(ret); | 3894 | BUG_ON(ret); |
3870 | } | 3895 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 746a7248678e..29c20092847e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -34,6 +34,7 @@ | |||
34 | 34 | ||
35 | struct btrfs_trans_handle; | 35 | struct btrfs_trans_handle; |
36 | struct btrfs_transaction; | 36 | struct btrfs_transaction; |
37 | struct btrfs_pending_snapshot; | ||
37 | extern struct kmem_cache *btrfs_trans_handle_cachep; | 38 | extern struct kmem_cache *btrfs_trans_handle_cachep; |
38 | extern struct kmem_cache *btrfs_transaction_cachep; | 39 | extern struct kmem_cache *btrfs_transaction_cachep; |
39 | extern struct kmem_cache *btrfs_bit_radix_cachep; | 40 | extern struct kmem_cache *btrfs_bit_radix_cachep; |
@@ -663,6 +664,7 @@ struct btrfs_csum_item { | |||
663 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) | 664 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) |
664 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) | 665 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) |
665 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) | 666 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) |
667 | #define BTRFS_NR_RAID_TYPES 5 | ||
666 | 668 | ||
667 | struct btrfs_block_group_item { | 669 | struct btrfs_block_group_item { |
668 | __le64 used; | 670 | __le64 used; |
@@ -674,42 +676,46 @@ struct btrfs_space_info { | |||
674 | u64 flags; | 676 | u64 flags; |
675 | 677 | ||
676 | u64 total_bytes; /* total bytes in the space */ | 678 | u64 total_bytes; /* total bytes in the space */ |
677 | u64 bytes_used; /* total bytes used on disk */ | 679 | u64 bytes_used; /* total bytes used, |
680 | this does't take mirrors into account */ | ||
678 | u64 bytes_pinned; /* total bytes pinned, will be freed when the | 681 | u64 bytes_pinned; /* total bytes pinned, will be freed when the |
679 | transaction finishes */ | 682 | transaction finishes */ |
680 | u64 bytes_reserved; /* total bytes the allocator has reserved for | 683 | u64 bytes_reserved; /* total bytes the allocator has reserved for |
681 | current allocations */ | 684 | current allocations */ |
682 | u64 bytes_readonly; /* total bytes that are read only */ | 685 | u64 bytes_readonly; /* total bytes that are read only */ |
683 | u64 bytes_super; /* total bytes reserved for the super blocks */ | 686 | |
684 | u64 bytes_root; /* the number of bytes needed to commit a | ||
685 | transaction */ | ||
686 | u64 bytes_may_use; /* number of bytes that may be used for | 687 | u64 bytes_may_use; /* number of bytes that may be used for |
687 | delalloc/allocations */ | 688 | delalloc/allocations */ |
688 | u64 bytes_delalloc; /* number of bytes currently reserved for | 689 | u64 disk_used; /* total bytes used on disk */ |
689 | delayed allocation */ | ||
690 | 690 | ||
691 | int full; /* indicates that we cannot allocate any more | 691 | int full; /* indicates that we cannot allocate any more |
692 | chunks for this space */ | 692 | chunks for this space */ |
693 | int force_alloc; /* set if we need to force a chunk alloc for | 693 | int force_alloc; /* set if we need to force a chunk alloc for |
694 | this space */ | 694 | this space */ |
695 | int force_delalloc; /* make people start doing filemap_flush until | ||
696 | we're under a threshold */ | ||
697 | 695 | ||
698 | struct list_head list; | 696 | struct list_head list; |
699 | 697 | ||
700 | /* for controlling how we free up space for allocations */ | ||
701 | wait_queue_head_t allocate_wait; | ||
702 | wait_queue_head_t flush_wait; | ||
703 | int allocating_chunk; | ||
704 | int flushing; | ||
705 | |||
706 | /* for block groups in our same type */ | 698 | /* for block groups in our same type */ |
707 | struct list_head block_groups; | 699 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; |
708 | spinlock_t lock; | 700 | spinlock_t lock; |
709 | struct rw_semaphore groups_sem; | 701 | struct rw_semaphore groups_sem; |
710 | atomic_t caching_threads; | 702 | atomic_t caching_threads; |
711 | }; | 703 | }; |
712 | 704 | ||
705 | struct btrfs_block_rsv { | ||
706 | u64 size; | ||
707 | u64 reserved; | ||
708 | u64 freed[2]; | ||
709 | struct btrfs_space_info *space_info; | ||
710 | struct list_head list; | ||
711 | spinlock_t lock; | ||
712 | atomic_t usage; | ||
713 | unsigned int priority:8; | ||
714 | unsigned int durable:1; | ||
715 | unsigned int refill_used:1; | ||
716 | unsigned int full:1; | ||
717 | }; | ||
718 | |||
713 | /* | 719 | /* |
714 | * free clusters are used to claim free space in relatively large chunks, | 720 | * free clusters are used to claim free space in relatively large chunks, |
715 | * allowing us to do less seeky writes. They are used for all metadata | 721 | * allowing us to do less seeky writes. They are used for all metadata |
@@ -760,6 +766,7 @@ struct btrfs_block_group_cache { | |||
760 | spinlock_t lock; | 766 | spinlock_t lock; |
761 | u64 pinned; | 767 | u64 pinned; |
762 | u64 reserved; | 768 | u64 reserved; |
769 | u64 reserved_pinned; | ||
763 | u64 bytes_super; | 770 | u64 bytes_super; |
764 | u64 flags; | 771 | u64 flags; |
765 | u64 sectorsize; | 772 | u64 sectorsize; |
@@ -825,6 +832,22 @@ struct btrfs_fs_info { | |||
825 | /* logical->physical extent mapping */ | 832 | /* logical->physical extent mapping */ |
826 | struct btrfs_mapping_tree mapping_tree; | 833 | struct btrfs_mapping_tree mapping_tree; |
827 | 834 | ||
835 | /* block reservation for extent, checksum and root tree */ | ||
836 | struct btrfs_block_rsv global_block_rsv; | ||
837 | /* block reservation for delay allocation */ | ||
838 | struct btrfs_block_rsv delalloc_block_rsv; | ||
839 | /* block reservation for metadata operations */ | ||
840 | struct btrfs_block_rsv trans_block_rsv; | ||
841 | /* block reservation for chunk tree */ | ||
842 | struct btrfs_block_rsv chunk_block_rsv; | ||
843 | |||
844 | struct btrfs_block_rsv empty_block_rsv; | ||
845 | |||
846 | /* list of block reservations that cross multiple transactions */ | ||
847 | struct list_head durable_block_rsv_list; | ||
848 | |||
849 | struct mutex durable_block_rsv_mutex; | ||
850 | |||
828 | u64 generation; | 851 | u64 generation; |
829 | u64 last_trans_committed; | 852 | u64 last_trans_committed; |
830 | 853 | ||
@@ -927,7 +950,6 @@ struct btrfs_fs_info { | |||
927 | struct btrfs_workers endio_meta_write_workers; | 950 | struct btrfs_workers endio_meta_write_workers; |
928 | struct btrfs_workers endio_write_workers; | 951 | struct btrfs_workers endio_write_workers; |
929 | struct btrfs_workers submit_workers; | 952 | struct btrfs_workers submit_workers; |
930 | struct btrfs_workers enospc_workers; | ||
931 | /* | 953 | /* |
932 | * fixup workers take dirty pages that didn't properly go through | 954 | * fixup workers take dirty pages that didn't properly go through |
933 | * the cow mechanism and make them safe to write. It happens | 955 | * the cow mechanism and make them safe to write. It happens |
@@ -943,6 +965,7 @@ struct btrfs_fs_info { | |||
943 | int do_barriers; | 965 | int do_barriers; |
944 | int closing; | 966 | int closing; |
945 | int log_root_recovering; | 967 | int log_root_recovering; |
968 | int enospc_unlink; | ||
946 | 969 | ||
947 | u64 total_pinned; | 970 | u64 total_pinned; |
948 | 971 | ||
@@ -1012,6 +1035,9 @@ struct btrfs_root { | |||
1012 | struct completion kobj_unregister; | 1035 | struct completion kobj_unregister; |
1013 | struct mutex objectid_mutex; | 1036 | struct mutex objectid_mutex; |
1014 | 1037 | ||
1038 | spinlock_t accounting_lock; | ||
1039 | struct btrfs_block_rsv *block_rsv; | ||
1040 | |||
1015 | struct mutex log_mutex; | 1041 | struct mutex log_mutex; |
1016 | wait_queue_head_t log_writer_wait; | 1042 | wait_queue_head_t log_writer_wait; |
1017 | wait_queue_head_t log_commit_wait[2]; | 1043 | wait_queue_head_t log_commit_wait[2]; |
@@ -1043,7 +1069,6 @@ struct btrfs_root { | |||
1043 | int ref_cows; | 1069 | int ref_cows; |
1044 | int track_dirty; | 1070 | int track_dirty; |
1045 | int in_radix; | 1071 | int in_radix; |
1046 | int clean_orphans; | ||
1047 | 1072 | ||
1048 | u64 defrag_trans_start; | 1073 | u64 defrag_trans_start; |
1049 | struct btrfs_key defrag_progress; | 1074 | struct btrfs_key defrag_progress; |
@@ -1057,8 +1082,11 @@ struct btrfs_root { | |||
1057 | 1082 | ||
1058 | struct list_head root_list; | 1083 | struct list_head root_list; |
1059 | 1084 | ||
1060 | spinlock_t list_lock; | 1085 | spinlock_t orphan_lock; |
1061 | struct list_head orphan_list; | 1086 | struct list_head orphan_list; |
1087 | struct btrfs_block_rsv *orphan_block_rsv; | ||
1088 | int orphan_item_inserted; | ||
1089 | int orphan_cleanup_state; | ||
1062 | 1090 | ||
1063 | spinlock_t inode_lock; | 1091 | spinlock_t inode_lock; |
1064 | /* red-black tree that keeps track of in-memory inodes */ | 1092 | /* red-black tree that keeps track of in-memory inodes */ |
@@ -1965,6 +1993,9 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | |||
1965 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 1993 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
1966 | struct btrfs_root *root, unsigned long count); | 1994 | struct btrfs_root *root, unsigned long count); |
1967 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1995 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
1996 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
1997 | struct btrfs_root *root, u64 bytenr, | ||
1998 | u64 num_bytes, u64 *refs, u64 *flags); | ||
1968 | int btrfs_pin_extent(struct btrfs_root *root, | 1999 | int btrfs_pin_extent(struct btrfs_root *root, |
1969 | u64 bytenr, u64 num, int reserved); | 2000 | u64 bytenr, u64 num, int reserved); |
1970 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 2001 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
@@ -1984,10 +2015,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
1984 | u64 parent, u64 root_objectid, | 2015 | u64 parent, u64 root_objectid, |
1985 | struct btrfs_disk_key *key, int level, | 2016 | struct btrfs_disk_key *key, int level, |
1986 | u64 hint, u64 empty_size); | 2017 | u64 hint, u64 empty_size); |
1987 | int btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 2018 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
1988 | struct btrfs_root *root, | 2019 | struct btrfs_root *root, |
1989 | u64 bytenr, u32 blocksize, | 2020 | struct extent_buffer *buf, |
1990 | u64 parent, u64 root_objectid, int level); | 2021 | u64 parent, int last_ref); |
1991 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 2022 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
1992 | struct btrfs_root *root, | 2023 | struct btrfs_root *root, |
1993 | u64 bytenr, u32 blocksize, | 2024 | u64 bytenr, u32 blocksize, |
@@ -2041,27 +2072,49 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
2041 | u64 size); | 2072 | u64 size); |
2042 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2073 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
2043 | struct btrfs_root *root, u64 group_start); | 2074 | struct btrfs_root *root, u64 group_start); |
2044 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | ||
2045 | struct btrfs_block_group_cache *group); | ||
2046 | |||
2047 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2075 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
2048 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2076 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
2049 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2077 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2050 | 2078 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | |
2051 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); | 2079 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
2052 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); | 2080 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, |
2053 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | 2081 | struct btrfs_root *root, |
2054 | struct inode *inode, int num_items); | 2082 | int num_items, int *retries); |
2055 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | 2083 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
2056 | struct inode *inode, int num_items); | 2084 | struct btrfs_root *root); |
2057 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2085 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, |
2058 | u64 bytes); | 2086 | struct inode *inode); |
2059 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2087 | void btrfs_orphan_release_metadata(struct inode *inode); |
2060 | struct inode *inode, u64 bytes); | 2088 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, |
2061 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | 2089 | struct btrfs_pending_snapshot *pending); |
2062 | u64 bytes); | 2090 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes); |
2063 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | 2091 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes); |
2064 | u64 bytes); | 2092 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes); |
2093 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes); | ||
2094 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); | ||
2095 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); | ||
2096 | void btrfs_free_block_rsv(struct btrfs_root *root, | ||
2097 | struct btrfs_block_rsv *rsv); | ||
2098 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
2099 | struct btrfs_block_rsv *rsv); | ||
2100 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | ||
2101 | struct btrfs_root *root, | ||
2102 | struct btrfs_block_rsv *block_rsv, | ||
2103 | u64 num_bytes, int *retries); | ||
2104 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | ||
2105 | struct btrfs_root *root, | ||
2106 | struct btrfs_block_rsv *block_rsv, | ||
2107 | u64 min_reserved, int min_factor); | ||
2108 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | ||
2109 | struct btrfs_block_rsv *dst_rsv, | ||
2110 | u64 num_bytes); | ||
2111 | void btrfs_block_rsv_release(struct btrfs_root *root, | ||
2112 | struct btrfs_block_rsv *block_rsv, | ||
2113 | u64 num_bytes); | ||
2114 | int btrfs_set_block_group_ro(struct btrfs_root *root, | ||
2115 | struct btrfs_block_group_cache *cache); | ||
2116 | int btrfs_set_block_group_rw(struct btrfs_root *root, | ||
2117 | struct btrfs_block_group_cache *cache); | ||
2065 | /* ctree.c */ | 2118 | /* ctree.c */ |
2066 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2119 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2067 | int level, int *slot); | 2120 | int level, int *slot); |
@@ -2152,7 +2205,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | |||
2152 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2205 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2153 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2206 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2154 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | 2207 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); |
2155 | int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref); | 2208 | int btrfs_drop_snapshot(struct btrfs_root *root, |
2209 | struct btrfs_block_rsv *block_rsv, int update_ref); | ||
2156 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | 2210 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, |
2157 | struct btrfs_root *root, | 2211 | struct btrfs_root *root, |
2158 | struct extent_buffer *node, | 2212 | struct extent_buffer *node, |
@@ -2245,6 +2299,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
2245 | struct btrfs_root *root, | 2299 | struct btrfs_root *root, |
2246 | const char *name, int name_len, | 2300 | const char *name, int name_len, |
2247 | u64 inode_objectid, u64 ref_objectid, u64 *index); | 2301 | u64 inode_objectid, u64 ref_objectid, u64 *index); |
2302 | struct btrfs_inode_ref * | ||
2303 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | ||
2304 | struct btrfs_root *root, | ||
2305 | struct btrfs_path *path, | ||
2306 | const char *name, int name_len, | ||
2307 | u64 inode_objectid, u64 ref_objectid, int mod); | ||
2248 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | 2308 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, |
2249 | struct btrfs_root *root, | 2309 | struct btrfs_root *root, |
2250 | struct btrfs_path *path, u64 objectid); | 2310 | struct btrfs_path *path, u64 objectid); |
@@ -2257,6 +2317,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
2257 | struct btrfs_root *root, u64 bytenr, u64 len); | 2317 | struct btrfs_root *root, u64 bytenr, u64 len); |
2258 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 2318 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, |
2259 | struct bio *bio, u32 *dst); | 2319 | struct bio *bio, u32 *dst); |
2320 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
2321 | struct bio *bio, u64 logical_offset, u32 *dst); | ||
2260 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 2322 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
2261 | struct btrfs_root *root, | 2323 | struct btrfs_root *root, |
2262 | u64 objectid, u64 pos, | 2324 | u64 objectid, u64 pos, |
@@ -2311,6 +2373,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2311 | u32 min_type); | 2373 | u32 min_type); |
2312 | 2374 | ||
2313 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 2375 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
2376 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput); | ||
2314 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 2377 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
2315 | struct extent_state **cached_state); | 2378 | struct extent_state **cached_state); |
2316 | int btrfs_writepages(struct address_space *mapping, | 2379 | int btrfs_writepages(struct address_space *mapping, |
@@ -2349,10 +2412,20 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2349 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | 2412 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); |
2350 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2413 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
2351 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2414 | void btrfs_orphan_cleanup(struct btrfs_root *root); |
2415 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2416 | struct btrfs_pending_snapshot *pending, | ||
2417 | u64 *bytes_to_reserve); | ||
2418 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
2419 | struct btrfs_pending_snapshot *pending); | ||
2420 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | ||
2421 | struct btrfs_root *root); | ||
2352 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2422 | int btrfs_cont_expand(struct inode *inode, loff_t size); |
2353 | int btrfs_invalidate_inodes(struct btrfs_root *root); | 2423 | int btrfs_invalidate_inodes(struct btrfs_root *root); |
2354 | void btrfs_add_delayed_iput(struct inode *inode); | 2424 | void btrfs_add_delayed_iput(struct inode *inode); |
2355 | void btrfs_run_delayed_iputs(struct btrfs_root *root); | 2425 | void btrfs_run_delayed_iputs(struct btrfs_root *root); |
2426 | int btrfs_prealloc_file_range(struct inode *inode, int mode, | ||
2427 | u64 start, u64 num_bytes, u64 min_size, | ||
2428 | loff_t actual_len, u64 *alloc_hint); | ||
2356 | extern const struct dentry_operations btrfs_dentry_operations; | 2429 | extern const struct dentry_operations btrfs_dentry_operations; |
2357 | 2430 | ||
2358 | /* ioctl.c */ | 2431 | /* ioctl.c */ |
@@ -2361,7 +2434,7 @@ void btrfs_update_iflags(struct inode *inode); | |||
2361 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); | 2434 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); |
2362 | 2435 | ||
2363 | /* file.c */ | 2436 | /* file.c */ |
2364 | int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); | 2437 | int btrfs_sync_file(struct file *file, int datasync); |
2365 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 2438 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
2366 | int skip_pinned); | 2439 | int skip_pinned); |
2367 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); | 2440 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); |
@@ -2409,4 +2482,12 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
2409 | struct btrfs_root *root); | 2482 | struct btrfs_root *root); |
2410 | int btrfs_recover_relocation(struct btrfs_root *root); | 2483 | int btrfs_recover_relocation(struct btrfs_root *root); |
2411 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); | 2484 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); |
2485 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | ||
2486 | struct btrfs_root *root, struct extent_buffer *buf, | ||
2487 | struct extent_buffer *cow); | ||
2488 | void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2489 | struct btrfs_pending_snapshot *pending, | ||
2490 | u64 *bytes_to_reserve); | ||
2491 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | ||
2492 | struct btrfs_pending_snapshot *pending); | ||
2412 | #endif | 2493 | #endif |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 902ce507c4e3..e807b143b857 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -319,107 +319,6 @@ out: | |||
319 | } | 319 | } |
320 | 320 | ||
321 | /* | 321 | /* |
322 | * helper function to lookup reference count and flags of extent. | ||
323 | * | ||
324 | * the head node for delayed ref is used to store the sum of all the | ||
325 | * reference count modifications queued up in the rbtree. the head | ||
326 | * node may also store the extent flags to set. This way you can check | ||
327 | * to see what the reference count and extent flags would be if all of | ||
328 | * the delayed refs are not processed. | ||
329 | */ | ||
330 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
331 | struct btrfs_root *root, u64 bytenr, | ||
332 | u64 num_bytes, u64 *refs, u64 *flags) | ||
333 | { | ||
334 | struct btrfs_delayed_ref_node *ref; | ||
335 | struct btrfs_delayed_ref_head *head; | ||
336 | struct btrfs_delayed_ref_root *delayed_refs; | ||
337 | struct btrfs_path *path; | ||
338 | struct btrfs_extent_item *ei; | ||
339 | struct extent_buffer *leaf; | ||
340 | struct btrfs_key key; | ||
341 | u32 item_size; | ||
342 | u64 num_refs; | ||
343 | u64 extent_flags; | ||
344 | int ret; | ||
345 | |||
346 | path = btrfs_alloc_path(); | ||
347 | if (!path) | ||
348 | return -ENOMEM; | ||
349 | |||
350 | key.objectid = bytenr; | ||
351 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
352 | key.offset = num_bytes; | ||
353 | delayed_refs = &trans->transaction->delayed_refs; | ||
354 | again: | ||
355 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
356 | &key, path, 0, 0); | ||
357 | if (ret < 0) | ||
358 | goto out; | ||
359 | |||
360 | if (ret == 0) { | ||
361 | leaf = path->nodes[0]; | ||
362 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
363 | if (item_size >= sizeof(*ei)) { | ||
364 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
365 | struct btrfs_extent_item); | ||
366 | num_refs = btrfs_extent_refs(leaf, ei); | ||
367 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
368 | } else { | ||
369 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
370 | struct btrfs_extent_item_v0 *ei0; | ||
371 | BUG_ON(item_size != sizeof(*ei0)); | ||
372 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
373 | struct btrfs_extent_item_v0); | ||
374 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
375 | /* FIXME: this isn't correct for data */ | ||
376 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
377 | #else | ||
378 | BUG(); | ||
379 | #endif | ||
380 | } | ||
381 | BUG_ON(num_refs == 0); | ||
382 | } else { | ||
383 | num_refs = 0; | ||
384 | extent_flags = 0; | ||
385 | ret = 0; | ||
386 | } | ||
387 | |||
388 | spin_lock(&delayed_refs->lock); | ||
389 | ref = find_ref_head(&delayed_refs->root, bytenr, NULL); | ||
390 | if (ref) { | ||
391 | head = btrfs_delayed_node_to_head(ref); | ||
392 | if (!mutex_trylock(&head->mutex)) { | ||
393 | atomic_inc(&ref->refs); | ||
394 | spin_unlock(&delayed_refs->lock); | ||
395 | |||
396 | btrfs_release_path(root->fs_info->extent_root, path); | ||
397 | |||
398 | mutex_lock(&head->mutex); | ||
399 | mutex_unlock(&head->mutex); | ||
400 | btrfs_put_delayed_ref(ref); | ||
401 | goto again; | ||
402 | } | ||
403 | if (head->extent_op && head->extent_op->update_flags) | ||
404 | extent_flags |= head->extent_op->flags_to_set; | ||
405 | else | ||
406 | BUG_ON(num_refs == 0); | ||
407 | |||
408 | num_refs += ref->ref_mod; | ||
409 | mutex_unlock(&head->mutex); | ||
410 | } | ||
411 | WARN_ON(num_refs == 0); | ||
412 | if (refs) | ||
413 | *refs = num_refs; | ||
414 | if (flags) | ||
415 | *flags = extent_flags; | ||
416 | out: | ||
417 | spin_unlock(&delayed_refs->lock); | ||
418 | btrfs_free_path(path); | ||
419 | return ret; | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * helper function to update an extent delayed ref in the | 322 | * helper function to update an extent delayed ref in the |
424 | * rbtree. existing and update must both have the same | 323 | * rbtree. existing and update must both have the same |
425 | * bytenr and parent | 324 | * bytenr and parent |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index f6fc67ddad36..50e3cf92fbda 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -167,9 +167,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, | |||
167 | struct btrfs_delayed_ref_head * | 167 | struct btrfs_delayed_ref_head * |
168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); | 168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); |
169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); | 169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); |
170 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
171 | struct btrfs_root *root, u64 bytenr, | ||
172 | u64 num_bytes, u64 *refs, u64 *flags); | ||
173 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | 170 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, |
174 | u64 bytenr, u64 num_bytes, u64 orig_parent, | 171 | u64 bytenr, u64 num_bytes, u64 orig_parent, |
175 | u64 parent, u64 orig_ref_root, u64 ref_root, | 172 | u64 parent, u64 orig_ref_root, u64 ref_root, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index feca04197d02..f3b287c22caf 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -74,6 +74,11 @@ struct async_submit_bio { | |||
74 | int rw; | 74 | int rw; |
75 | int mirror_num; | 75 | int mirror_num; |
76 | unsigned long bio_flags; | 76 | unsigned long bio_flags; |
77 | /* | ||
78 | * bio_offset is optional, can be used if the pages in the bio | ||
79 | * can't tell us where in the file the bio should go | ||
80 | */ | ||
81 | u64 bio_offset; | ||
77 | struct btrfs_work work; | 82 | struct btrfs_work work; |
78 | }; | 83 | }; |
79 | 84 | ||
@@ -534,7 +539,8 @@ static void run_one_async_start(struct btrfs_work *work) | |||
534 | async = container_of(work, struct async_submit_bio, work); | 539 | async = container_of(work, struct async_submit_bio, work); |
535 | fs_info = BTRFS_I(async->inode)->root->fs_info; | 540 | fs_info = BTRFS_I(async->inode)->root->fs_info; |
536 | async->submit_bio_start(async->inode, async->rw, async->bio, | 541 | async->submit_bio_start(async->inode, async->rw, async->bio, |
537 | async->mirror_num, async->bio_flags); | 542 | async->mirror_num, async->bio_flags, |
543 | async->bio_offset); | ||
538 | } | 544 | } |
539 | 545 | ||
540 | static void run_one_async_done(struct btrfs_work *work) | 546 | static void run_one_async_done(struct btrfs_work *work) |
@@ -556,7 +562,8 @@ static void run_one_async_done(struct btrfs_work *work) | |||
556 | wake_up(&fs_info->async_submit_wait); | 562 | wake_up(&fs_info->async_submit_wait); |
557 | 563 | ||
558 | async->submit_bio_done(async->inode, async->rw, async->bio, | 564 | async->submit_bio_done(async->inode, async->rw, async->bio, |
559 | async->mirror_num, async->bio_flags); | 565 | async->mirror_num, async->bio_flags, |
566 | async->bio_offset); | ||
560 | } | 567 | } |
561 | 568 | ||
562 | static void run_one_async_free(struct btrfs_work *work) | 569 | static void run_one_async_free(struct btrfs_work *work) |
@@ -570,6 +577,7 @@ static void run_one_async_free(struct btrfs_work *work) | |||
570 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 577 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
571 | int rw, struct bio *bio, int mirror_num, | 578 | int rw, struct bio *bio, int mirror_num, |
572 | unsigned long bio_flags, | 579 | unsigned long bio_flags, |
580 | u64 bio_offset, | ||
573 | extent_submit_bio_hook_t *submit_bio_start, | 581 | extent_submit_bio_hook_t *submit_bio_start, |
574 | extent_submit_bio_hook_t *submit_bio_done) | 582 | extent_submit_bio_hook_t *submit_bio_done) |
575 | { | 583 | { |
@@ -592,6 +600,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
592 | 600 | ||
593 | async->work.flags = 0; | 601 | async->work.flags = 0; |
594 | async->bio_flags = bio_flags; | 602 | async->bio_flags = bio_flags; |
603 | async->bio_offset = bio_offset; | ||
595 | 604 | ||
596 | atomic_inc(&fs_info->nr_async_submits); | 605 | atomic_inc(&fs_info->nr_async_submits); |
597 | 606 | ||
@@ -627,7 +636,8 @@ static int btree_csum_one_bio(struct bio *bio) | |||
627 | 636 | ||
628 | static int __btree_submit_bio_start(struct inode *inode, int rw, | 637 | static int __btree_submit_bio_start(struct inode *inode, int rw, |
629 | struct bio *bio, int mirror_num, | 638 | struct bio *bio, int mirror_num, |
630 | unsigned long bio_flags) | 639 | unsigned long bio_flags, |
640 | u64 bio_offset) | ||
631 | { | 641 | { |
632 | /* | 642 | /* |
633 | * when we're called for a write, we're already in the async | 643 | * when we're called for a write, we're already in the async |
@@ -638,7 +648,8 @@ static int __btree_submit_bio_start(struct inode *inode, int rw, | |||
638 | } | 648 | } |
639 | 649 | ||
640 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | 650 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, |
641 | int mirror_num, unsigned long bio_flags) | 651 | int mirror_num, unsigned long bio_flags, |
652 | u64 bio_offset) | ||
642 | { | 653 | { |
643 | /* | 654 | /* |
644 | * when we're called for a write, we're already in the async | 655 | * when we're called for a write, we're already in the async |
@@ -648,7 +659,8 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
648 | } | 659 | } |
649 | 660 | ||
650 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 661 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
651 | int mirror_num, unsigned long bio_flags) | 662 | int mirror_num, unsigned long bio_flags, |
663 | u64 bio_offset) | ||
652 | { | 664 | { |
653 | int ret; | 665 | int ret; |
654 | 666 | ||
@@ -671,6 +683,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
671 | */ | 683 | */ |
672 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 684 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
673 | inode, rw, bio, mirror_num, 0, | 685 | inode, rw, bio, mirror_num, 0, |
686 | bio_offset, | ||
674 | __btree_submit_bio_start, | 687 | __btree_submit_bio_start, |
675 | __btree_submit_bio_done); | 688 | __btree_submit_bio_done); |
676 | } | 689 | } |
@@ -894,7 +907,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
894 | root->ref_cows = 0; | 907 | root->ref_cows = 0; |
895 | root->track_dirty = 0; | 908 | root->track_dirty = 0; |
896 | root->in_radix = 0; | 909 | root->in_radix = 0; |
897 | root->clean_orphans = 0; | 910 | root->orphan_item_inserted = 0; |
911 | root->orphan_cleanup_state = 0; | ||
898 | 912 | ||
899 | root->fs_info = fs_info; | 913 | root->fs_info = fs_info; |
900 | root->objectid = objectid; | 914 | root->objectid = objectid; |
@@ -903,13 +917,16 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
903 | root->name = NULL; | 917 | root->name = NULL; |
904 | root->in_sysfs = 0; | 918 | root->in_sysfs = 0; |
905 | root->inode_tree = RB_ROOT; | 919 | root->inode_tree = RB_ROOT; |
920 | root->block_rsv = NULL; | ||
921 | root->orphan_block_rsv = NULL; | ||
906 | 922 | ||
907 | INIT_LIST_HEAD(&root->dirty_list); | 923 | INIT_LIST_HEAD(&root->dirty_list); |
908 | INIT_LIST_HEAD(&root->orphan_list); | 924 | INIT_LIST_HEAD(&root->orphan_list); |
909 | INIT_LIST_HEAD(&root->root_list); | 925 | INIT_LIST_HEAD(&root->root_list); |
910 | spin_lock_init(&root->node_lock); | 926 | spin_lock_init(&root->node_lock); |
911 | spin_lock_init(&root->list_lock); | 927 | spin_lock_init(&root->orphan_lock); |
912 | spin_lock_init(&root->inode_lock); | 928 | spin_lock_init(&root->inode_lock); |
929 | spin_lock_init(&root->accounting_lock); | ||
913 | mutex_init(&root->objectid_mutex); | 930 | mutex_init(&root->objectid_mutex); |
914 | mutex_init(&root->log_mutex); | 931 | mutex_init(&root->log_mutex); |
915 | init_waitqueue_head(&root->log_writer_wait); | 932 | init_waitqueue_head(&root->log_writer_wait); |
@@ -968,42 +985,6 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
968 | return 0; | 985 | return 0; |
969 | } | 986 | } |
970 | 987 | ||
971 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
972 | struct btrfs_fs_info *fs_info) | ||
973 | { | ||
974 | struct extent_buffer *eb; | ||
975 | struct btrfs_root *log_root_tree = fs_info->log_root_tree; | ||
976 | u64 start = 0; | ||
977 | u64 end = 0; | ||
978 | int ret; | ||
979 | |||
980 | if (!log_root_tree) | ||
981 | return 0; | ||
982 | |||
983 | while (1) { | ||
984 | ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, | ||
985 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); | ||
986 | if (ret) | ||
987 | break; | ||
988 | |||
989 | clear_extent_bits(&log_root_tree->dirty_log_pages, start, end, | ||
990 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | ||
991 | } | ||
992 | eb = fs_info->log_root_tree->node; | ||
993 | |||
994 | WARN_ON(btrfs_header_level(eb) != 0); | ||
995 | WARN_ON(btrfs_header_nritems(eb) != 0); | ||
996 | |||
997 | ret = btrfs_free_reserved_extent(fs_info->tree_root, | ||
998 | eb->start, eb->len); | ||
999 | BUG_ON(ret); | ||
1000 | |||
1001 | free_extent_buffer(eb); | ||
1002 | kfree(fs_info->log_root_tree); | ||
1003 | fs_info->log_root_tree = NULL; | ||
1004 | return 0; | ||
1005 | } | ||
1006 | |||
1007 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | 988 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, |
1008 | struct btrfs_fs_info *fs_info) | 989 | struct btrfs_fs_info *fs_info) |
1009 | { | 990 | { |
@@ -1191,19 +1172,23 @@ again: | |||
1191 | if (root) | 1172 | if (root) |
1192 | return root; | 1173 | return root; |
1193 | 1174 | ||
1194 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
1195 | if (ret == 0) | ||
1196 | ret = -ENOENT; | ||
1197 | if (ret < 0) | ||
1198 | return ERR_PTR(ret); | ||
1199 | |||
1200 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | 1175 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); |
1201 | if (IS_ERR(root)) | 1176 | if (IS_ERR(root)) |
1202 | return root; | 1177 | return root; |
1203 | 1178 | ||
1204 | WARN_ON(btrfs_root_refs(&root->root_item) == 0); | ||
1205 | set_anon_super(&root->anon_super, NULL); | 1179 | set_anon_super(&root->anon_super, NULL); |
1206 | 1180 | ||
1181 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
1182 | ret = -ENOENT; | ||
1183 | goto fail; | ||
1184 | } | ||
1185 | |||
1186 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
1187 | if (ret < 0) | ||
1188 | goto fail; | ||
1189 | if (ret == 0) | ||
1190 | root->orphan_item_inserted = 1; | ||
1191 | |||
1207 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 1192 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); |
1208 | if (ret) | 1193 | if (ret) |
1209 | goto fail; | 1194 | goto fail; |
@@ -1212,10 +1197,9 @@ again: | |||
1212 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | 1197 | ret = radix_tree_insert(&fs_info->fs_roots_radix, |
1213 | (unsigned long)root->root_key.objectid, | 1198 | (unsigned long)root->root_key.objectid, |
1214 | root); | 1199 | root); |
1215 | if (ret == 0) { | 1200 | if (ret == 0) |
1216 | root->in_radix = 1; | 1201 | root->in_radix = 1; |
1217 | root->clean_orphans = 1; | 1202 | |
1218 | } | ||
1219 | spin_unlock(&fs_info->fs_roots_radix_lock); | 1203 | spin_unlock(&fs_info->fs_roots_radix_lock); |
1220 | radix_tree_preload_end(); | 1204 | radix_tree_preload_end(); |
1221 | if (ret) { | 1205 | if (ret) { |
@@ -1461,10 +1445,6 @@ static int cleaner_kthread(void *arg) | |||
1461 | struct btrfs_root *root = arg; | 1445 | struct btrfs_root *root = arg; |
1462 | 1446 | ||
1463 | do { | 1447 | do { |
1464 | smp_mb(); | ||
1465 | if (root->fs_info->closing) | ||
1466 | break; | ||
1467 | |||
1468 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1448 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1469 | 1449 | ||
1470 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && | 1450 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && |
@@ -1477,11 +1457,9 @@ static int cleaner_kthread(void *arg) | |||
1477 | if (freezing(current)) { | 1457 | if (freezing(current)) { |
1478 | refrigerator(); | 1458 | refrigerator(); |
1479 | } else { | 1459 | } else { |
1480 | smp_mb(); | ||
1481 | if (root->fs_info->closing) | ||
1482 | break; | ||
1483 | set_current_state(TASK_INTERRUPTIBLE); | 1460 | set_current_state(TASK_INTERRUPTIBLE); |
1484 | schedule(); | 1461 | if (!kthread_should_stop()) |
1462 | schedule(); | ||
1485 | __set_current_state(TASK_RUNNING); | 1463 | __set_current_state(TASK_RUNNING); |
1486 | } | 1464 | } |
1487 | } while (!kthread_should_stop()); | 1465 | } while (!kthread_should_stop()); |
@@ -1493,36 +1471,40 @@ static int transaction_kthread(void *arg) | |||
1493 | struct btrfs_root *root = arg; | 1471 | struct btrfs_root *root = arg; |
1494 | struct btrfs_trans_handle *trans; | 1472 | struct btrfs_trans_handle *trans; |
1495 | struct btrfs_transaction *cur; | 1473 | struct btrfs_transaction *cur; |
1474 | u64 transid; | ||
1496 | unsigned long now; | 1475 | unsigned long now; |
1497 | unsigned long delay; | 1476 | unsigned long delay; |
1498 | int ret; | 1477 | int ret; |
1499 | 1478 | ||
1500 | do { | 1479 | do { |
1501 | smp_mb(); | ||
1502 | if (root->fs_info->closing) | ||
1503 | break; | ||
1504 | |||
1505 | delay = HZ * 30; | 1480 | delay = HZ * 30; |
1506 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1481 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1507 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1482 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1508 | 1483 | ||
1509 | mutex_lock(&root->fs_info->trans_mutex); | 1484 | spin_lock(&root->fs_info->new_trans_lock); |
1510 | cur = root->fs_info->running_transaction; | 1485 | cur = root->fs_info->running_transaction; |
1511 | if (!cur) { | 1486 | if (!cur) { |
1512 | mutex_unlock(&root->fs_info->trans_mutex); | 1487 | spin_unlock(&root->fs_info->new_trans_lock); |
1513 | goto sleep; | 1488 | goto sleep; |
1514 | } | 1489 | } |
1515 | 1490 | ||
1516 | now = get_seconds(); | 1491 | now = get_seconds(); |
1517 | if (now < cur->start_time || now - cur->start_time < 30) { | 1492 | if (!cur->blocked && |
1518 | mutex_unlock(&root->fs_info->trans_mutex); | 1493 | (now < cur->start_time || now - cur->start_time < 30)) { |
1494 | spin_unlock(&root->fs_info->new_trans_lock); | ||
1519 | delay = HZ * 5; | 1495 | delay = HZ * 5; |
1520 | goto sleep; | 1496 | goto sleep; |
1521 | } | 1497 | } |
1522 | mutex_unlock(&root->fs_info->trans_mutex); | 1498 | transid = cur->transid; |
1523 | trans = btrfs_start_transaction(root, 1); | 1499 | spin_unlock(&root->fs_info->new_trans_lock); |
1524 | ret = btrfs_commit_transaction(trans, root); | ||
1525 | 1500 | ||
1501 | trans = btrfs_join_transaction(root, 1); | ||
1502 | if (transid == trans->transid) { | ||
1503 | ret = btrfs_commit_transaction(trans, root); | ||
1504 | BUG_ON(ret); | ||
1505 | } else { | ||
1506 | btrfs_end_transaction(trans, root); | ||
1507 | } | ||
1526 | sleep: | 1508 | sleep: |
1527 | wake_up_process(root->fs_info->cleaner_kthread); | 1509 | wake_up_process(root->fs_info->cleaner_kthread); |
1528 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 1510 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
@@ -1530,10 +1512,10 @@ sleep: | |||
1530 | if (freezing(current)) { | 1512 | if (freezing(current)) { |
1531 | refrigerator(); | 1513 | refrigerator(); |
1532 | } else { | 1514 | } else { |
1533 | if (root->fs_info->closing) | ||
1534 | break; | ||
1535 | set_current_state(TASK_INTERRUPTIBLE); | 1515 | set_current_state(TASK_INTERRUPTIBLE); |
1536 | schedule_timeout(delay); | 1516 | if (!kthread_should_stop() && |
1517 | !btrfs_transaction_blocked(root->fs_info)) | ||
1518 | schedule_timeout(delay); | ||
1537 | __set_current_state(TASK_RUNNING); | 1519 | __set_current_state(TASK_RUNNING); |
1538 | } | 1520 | } |
1539 | } while (!kthread_should_stop()); | 1521 | } while (!kthread_should_stop()); |
@@ -1620,6 +1602,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1620 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1602 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
1621 | INIT_LIST_HEAD(&fs_info->space_info); | 1603 | INIT_LIST_HEAD(&fs_info->space_info); |
1622 | btrfs_mapping_init(&fs_info->mapping_tree); | 1604 | btrfs_mapping_init(&fs_info->mapping_tree); |
1605 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | ||
1606 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | ||
1607 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); | ||
1608 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); | ||
1609 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); | ||
1610 | INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); | ||
1611 | mutex_init(&fs_info->durable_block_rsv_mutex); | ||
1623 | atomic_set(&fs_info->nr_async_submits, 0); | 1612 | atomic_set(&fs_info->nr_async_submits, 0); |
1624 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1613 | atomic_set(&fs_info->async_delalloc_pages, 0); |
1625 | atomic_set(&fs_info->async_submit_draining, 0); | 1614 | atomic_set(&fs_info->async_submit_draining, 0); |
@@ -1759,9 +1748,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1759 | min_t(u64, fs_devices->num_devices, | 1748 | min_t(u64, fs_devices->num_devices, |
1760 | fs_info->thread_pool_size), | 1749 | fs_info->thread_pool_size), |
1761 | &fs_info->generic_worker); | 1750 | &fs_info->generic_worker); |
1762 | btrfs_init_workers(&fs_info->enospc_workers, "enospc", | ||
1763 | fs_info->thread_pool_size, | ||
1764 | &fs_info->generic_worker); | ||
1765 | 1751 | ||
1766 | /* a higher idle thresh on the submit workers makes it much more | 1752 | /* a higher idle thresh on the submit workers makes it much more |
1767 | * likely that bios will be send down in a sane order to the | 1753 | * likely that bios will be send down in a sane order to the |
@@ -1809,7 +1795,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1809 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); | 1795 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
1810 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); | 1796 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
1811 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 1797 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
1812 | btrfs_start_workers(&fs_info->enospc_workers, 1); | ||
1813 | 1798 | ||
1814 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1799 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1815 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1800 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -1912,17 +1897,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1912 | 1897 | ||
1913 | csum_root->track_dirty = 1; | 1898 | csum_root->track_dirty = 1; |
1914 | 1899 | ||
1900 | fs_info->generation = generation; | ||
1901 | fs_info->last_trans_committed = generation; | ||
1902 | fs_info->data_alloc_profile = (u64)-1; | ||
1903 | fs_info->metadata_alloc_profile = (u64)-1; | ||
1904 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | ||
1905 | |||
1915 | ret = btrfs_read_block_groups(extent_root); | 1906 | ret = btrfs_read_block_groups(extent_root); |
1916 | if (ret) { | 1907 | if (ret) { |
1917 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 1908 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
1918 | goto fail_block_groups; | 1909 | goto fail_block_groups; |
1919 | } | 1910 | } |
1920 | 1911 | ||
1921 | fs_info->generation = generation; | ||
1922 | fs_info->last_trans_committed = generation; | ||
1923 | fs_info->data_alloc_profile = (u64)-1; | ||
1924 | fs_info->metadata_alloc_profile = (u64)-1; | ||
1925 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | ||
1926 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 1912 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
1927 | "btrfs-cleaner"); | 1913 | "btrfs-cleaner"); |
1928 | if (IS_ERR(fs_info->cleaner_kthread)) | 1914 | if (IS_ERR(fs_info->cleaner_kthread)) |
@@ -1977,6 +1963,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1977 | BUG_ON(ret); | 1963 | BUG_ON(ret); |
1978 | 1964 | ||
1979 | if (!(sb->s_flags & MS_RDONLY)) { | 1965 | if (!(sb->s_flags & MS_RDONLY)) { |
1966 | ret = btrfs_cleanup_fs_roots(fs_info); | ||
1967 | BUG_ON(ret); | ||
1968 | |||
1980 | ret = btrfs_recover_relocation(tree_root); | 1969 | ret = btrfs_recover_relocation(tree_root); |
1981 | if (ret < 0) { | 1970 | if (ret < 0) { |
1982 | printk(KERN_WARNING | 1971 | printk(KERN_WARNING |
@@ -2040,7 +2029,6 @@ fail_sb_buffer: | |||
2040 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2029 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2041 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2030 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2042 | btrfs_stop_workers(&fs_info->submit_workers); | 2031 | btrfs_stop_workers(&fs_info->submit_workers); |
2043 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
2044 | fail_iput: | 2032 | fail_iput: |
2045 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2033 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
2046 | iput(fs_info->btree_inode); | 2034 | iput(fs_info->btree_inode); |
@@ -2405,11 +2393,11 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
2405 | down_write(&root->fs_info->cleanup_work_sem); | 2393 | down_write(&root->fs_info->cleanup_work_sem); |
2406 | up_write(&root->fs_info->cleanup_work_sem); | 2394 | up_write(&root->fs_info->cleanup_work_sem); |
2407 | 2395 | ||
2408 | trans = btrfs_start_transaction(root, 1); | 2396 | trans = btrfs_join_transaction(root, 1); |
2409 | ret = btrfs_commit_transaction(trans, root); | 2397 | ret = btrfs_commit_transaction(trans, root); |
2410 | BUG_ON(ret); | 2398 | BUG_ON(ret); |
2411 | /* run commit again to drop the original snapshot */ | 2399 | /* run commit again to drop the original snapshot */ |
2412 | trans = btrfs_start_transaction(root, 1); | 2400 | trans = btrfs_join_transaction(root, 1); |
2413 | btrfs_commit_transaction(trans, root); | 2401 | btrfs_commit_transaction(trans, root); |
2414 | ret = btrfs_write_and_wait_transaction(NULL, root); | 2402 | ret = btrfs_write_and_wait_transaction(NULL, root); |
2415 | BUG_ON(ret); | 2403 | BUG_ON(ret); |
@@ -2426,15 +2414,15 @@ int close_ctree(struct btrfs_root *root) | |||
2426 | fs_info->closing = 1; | 2414 | fs_info->closing = 1; |
2427 | smp_mb(); | 2415 | smp_mb(); |
2428 | 2416 | ||
2429 | kthread_stop(root->fs_info->transaction_kthread); | ||
2430 | kthread_stop(root->fs_info->cleaner_kthread); | ||
2431 | |||
2432 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 2417 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
2433 | ret = btrfs_commit_super(root); | 2418 | ret = btrfs_commit_super(root); |
2434 | if (ret) | 2419 | if (ret) |
2435 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2420 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2436 | } | 2421 | } |
2437 | 2422 | ||
2423 | kthread_stop(root->fs_info->transaction_kthread); | ||
2424 | kthread_stop(root->fs_info->cleaner_kthread); | ||
2425 | |||
2438 | fs_info->closing = 2; | 2426 | fs_info->closing = 2; |
2439 | smp_mb(); | 2427 | smp_mb(); |
2440 | 2428 | ||
@@ -2473,7 +2461,6 @@ int close_ctree(struct btrfs_root *root) | |||
2473 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2461 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2474 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2462 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2475 | btrfs_stop_workers(&fs_info->submit_workers); | 2463 | btrfs_stop_workers(&fs_info->submit_workers); |
2476 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
2477 | 2464 | ||
2478 | btrfs_close_devices(fs_info->fs_devices); | 2465 | btrfs_close_devices(fs_info->fs_devices); |
2479 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2466 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c958ecbc1916..88e825a0bf21 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -87,7 +87,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
87 | int metadata); | 87 | int metadata); |
88 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 88 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
89 | int rw, struct bio *bio, int mirror_num, | 89 | int rw, struct bio *bio, int mirror_num, |
90 | unsigned long bio_flags, | 90 | unsigned long bio_flags, u64 bio_offset, |
91 | extent_submit_bio_hook_t *submit_bio_start, | 91 | extent_submit_bio_hook_t *submit_bio_start, |
92 | extent_submit_bio_hook_t *submit_bio_done); | 92 | extent_submit_bio_hook_t *submit_bio_done); |
93 | 93 | ||
@@ -95,8 +95,6 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); | |||
95 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); | 95 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); |
96 | int btrfs_write_tree_block(struct extent_buffer *buf); | 96 | int btrfs_write_tree_block(struct extent_buffer *buf); |
97 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); | 97 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); |
98 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
99 | struct btrfs_fs_info *fs_info); | ||
100 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | 98 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, |
101 | struct btrfs_fs_info *fs_info); | 99 | struct btrfs_fs_info *fs_info); |
102 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | 100 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c6a4f459ad76..b9080d71991a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -35,10 +35,9 @@ | |||
35 | 35 | ||
36 | static int update_block_group(struct btrfs_trans_handle *trans, | 36 | static int update_block_group(struct btrfs_trans_handle *trans, |
37 | struct btrfs_root *root, | 37 | struct btrfs_root *root, |
38 | u64 bytenr, u64 num_bytes, int alloc, | 38 | u64 bytenr, u64 num_bytes, int alloc); |
39 | int mark_free); | 39 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, |
40 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | 40 | u64 num_bytes, int reserve, int sinfo); |
41 | u64 num_bytes, int reserve); | ||
42 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
43 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
44 | u64 bytenr, u64 num_bytes, u64 parent, | 43 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -61,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
61 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
62 | struct btrfs_root *extent_root, u64 alloc_bytes, | 61 | struct btrfs_root *extent_root, u64 alloc_bytes, |
63 | u64 flags, int force); | 62 | u64 flags, int force); |
64 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
65 | struct btrfs_root *root, | ||
66 | struct btrfs_path *path, | ||
67 | u64 bytenr, u64 num_bytes, | ||
68 | int is_data, int reserved, | ||
69 | struct extent_buffer **must_clean); | ||
70 | static int find_next_key(struct btrfs_path *path, int level, | 63 | static int find_next_key(struct btrfs_path *path, int level, |
71 | struct btrfs_key *key); | 64 | struct btrfs_key *key); |
72 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 65 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
@@ -91,8 +84,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache) | |||
91 | 84 | ||
92 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) | 85 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) |
93 | { | 86 | { |
94 | if (atomic_dec_and_test(&cache->count)) | 87 | if (atomic_dec_and_test(&cache->count)) { |
88 | WARN_ON(cache->pinned > 0); | ||
89 | WARN_ON(cache->reserved > 0); | ||
90 | WARN_ON(cache->reserved_pinned > 0); | ||
95 | kfree(cache); | 91 | kfree(cache); |
92 | } | ||
96 | } | 93 | } |
97 | 94 | ||
98 | /* | 95 | /* |
@@ -319,7 +316,7 @@ static int caching_kthread(void *data) | |||
319 | 316 | ||
320 | exclude_super_stripes(extent_root, block_group); | 317 | exclude_super_stripes(extent_root, block_group); |
321 | spin_lock(&block_group->space_info->lock); | 318 | spin_lock(&block_group->space_info->lock); |
322 | block_group->space_info->bytes_super += block_group->bytes_super; | 319 | block_group->space_info->bytes_readonly += block_group->bytes_super; |
323 | spin_unlock(&block_group->space_info->lock); | 320 | spin_unlock(&block_group->space_info->lock); |
324 | 321 | ||
325 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 322 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
@@ -507,6 +504,9 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
507 | struct list_head *head = &info->space_info; | 504 | struct list_head *head = &info->space_info; |
508 | struct btrfs_space_info *found; | 505 | struct btrfs_space_info *found; |
509 | 506 | ||
507 | flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM | | ||
508 | BTRFS_BLOCK_GROUP_METADATA; | ||
509 | |||
510 | rcu_read_lock(); | 510 | rcu_read_lock(); |
511 | list_for_each_entry_rcu(found, head, list) { | 511 | list_for_each_entry_rcu(found, head, list) { |
512 | if (found->flags == flags) { | 512 | if (found->flags == flags) { |
@@ -610,6 +610,113 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | |||
610 | } | 610 | } |
611 | 611 | ||
612 | /* | 612 | /* |
613 | * helper function to lookup reference count and flags of extent. | ||
614 | * | ||
615 | * the head node for delayed ref is used to store the sum of all the | ||
616 | * reference count modifications queued up in the rbtree. the head | ||
617 | * node may also store the extent flags to set. This way you can check | ||
618 | * to see what the reference count and extent flags would be if all of | ||
619 | * the delayed refs are not processed. | ||
620 | */ | ||
621 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
622 | struct btrfs_root *root, u64 bytenr, | ||
623 | u64 num_bytes, u64 *refs, u64 *flags) | ||
624 | { | ||
625 | struct btrfs_delayed_ref_head *head; | ||
626 | struct btrfs_delayed_ref_root *delayed_refs; | ||
627 | struct btrfs_path *path; | ||
628 | struct btrfs_extent_item *ei; | ||
629 | struct extent_buffer *leaf; | ||
630 | struct btrfs_key key; | ||
631 | u32 item_size; | ||
632 | u64 num_refs; | ||
633 | u64 extent_flags; | ||
634 | int ret; | ||
635 | |||
636 | path = btrfs_alloc_path(); | ||
637 | if (!path) | ||
638 | return -ENOMEM; | ||
639 | |||
640 | key.objectid = bytenr; | ||
641 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
642 | key.offset = num_bytes; | ||
643 | if (!trans) { | ||
644 | path->skip_locking = 1; | ||
645 | path->search_commit_root = 1; | ||
646 | } | ||
647 | again: | ||
648 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
649 | &key, path, 0, 0); | ||
650 | if (ret < 0) | ||
651 | goto out_free; | ||
652 | |||
653 | if (ret == 0) { | ||
654 | leaf = path->nodes[0]; | ||
655 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
656 | if (item_size >= sizeof(*ei)) { | ||
657 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
658 | struct btrfs_extent_item); | ||
659 | num_refs = btrfs_extent_refs(leaf, ei); | ||
660 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
661 | } else { | ||
662 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
663 | struct btrfs_extent_item_v0 *ei0; | ||
664 | BUG_ON(item_size != sizeof(*ei0)); | ||
665 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
666 | struct btrfs_extent_item_v0); | ||
667 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
668 | /* FIXME: this isn't correct for data */ | ||
669 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
670 | #else | ||
671 | BUG(); | ||
672 | #endif | ||
673 | } | ||
674 | BUG_ON(num_refs == 0); | ||
675 | } else { | ||
676 | num_refs = 0; | ||
677 | extent_flags = 0; | ||
678 | ret = 0; | ||
679 | } | ||
680 | |||
681 | if (!trans) | ||
682 | goto out; | ||
683 | |||
684 | delayed_refs = &trans->transaction->delayed_refs; | ||
685 | spin_lock(&delayed_refs->lock); | ||
686 | head = btrfs_find_delayed_ref_head(trans, bytenr); | ||
687 | if (head) { | ||
688 | if (!mutex_trylock(&head->mutex)) { | ||
689 | atomic_inc(&head->node.refs); | ||
690 | spin_unlock(&delayed_refs->lock); | ||
691 | |||
692 | btrfs_release_path(root->fs_info->extent_root, path); | ||
693 | |||
694 | mutex_lock(&head->mutex); | ||
695 | mutex_unlock(&head->mutex); | ||
696 | btrfs_put_delayed_ref(&head->node); | ||
697 | goto again; | ||
698 | } | ||
699 | if (head->extent_op && head->extent_op->update_flags) | ||
700 | extent_flags |= head->extent_op->flags_to_set; | ||
701 | else | ||
702 | BUG_ON(num_refs == 0); | ||
703 | |||
704 | num_refs += head->node.ref_mod; | ||
705 | mutex_unlock(&head->mutex); | ||
706 | } | ||
707 | spin_unlock(&delayed_refs->lock); | ||
708 | out: | ||
709 | WARN_ON(num_refs == 0); | ||
710 | if (refs) | ||
711 | *refs = num_refs; | ||
712 | if (flags) | ||
713 | *flags = extent_flags; | ||
714 | out_free: | ||
715 | btrfs_free_path(path); | ||
716 | return ret; | ||
717 | } | ||
718 | |||
719 | /* | ||
613 | * Back reference rules. Back refs have three main goals: | 720 | * Back reference rules. Back refs have three main goals: |
614 | * | 721 | * |
615 | * 1) differentiate between all holders of references to an extent so that | 722 | * 1) differentiate between all holders of references to an extent so that |
@@ -1871,7 +1978,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
1871 | return ret; | 1978 | return ret; |
1872 | } | 1979 | } |
1873 | 1980 | ||
1874 | |||
1875 | /* helper function to actually process a single delayed ref entry */ | 1981 | /* helper function to actually process a single delayed ref entry */ |
1876 | static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | 1982 | static int run_one_delayed_ref(struct btrfs_trans_handle *trans, |
1877 | struct btrfs_root *root, | 1983 | struct btrfs_root *root, |
@@ -1891,32 +1997,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
1891 | BUG_ON(extent_op); | 1997 | BUG_ON(extent_op); |
1892 | head = btrfs_delayed_node_to_head(node); | 1998 | head = btrfs_delayed_node_to_head(node); |
1893 | if (insert_reserved) { | 1999 | if (insert_reserved) { |
1894 | int mark_free = 0; | 2000 | btrfs_pin_extent(root, node->bytenr, |
1895 | struct extent_buffer *must_clean = NULL; | 2001 | node->num_bytes, 1); |
1896 | |||
1897 | ret = pin_down_bytes(trans, root, NULL, | ||
1898 | node->bytenr, node->num_bytes, | ||
1899 | head->is_data, 1, &must_clean); | ||
1900 | if (ret > 0) | ||
1901 | mark_free = 1; | ||
1902 | |||
1903 | if (must_clean) { | ||
1904 | clean_tree_block(NULL, root, must_clean); | ||
1905 | btrfs_tree_unlock(must_clean); | ||
1906 | free_extent_buffer(must_clean); | ||
1907 | } | ||
1908 | if (head->is_data) { | 2002 | if (head->is_data) { |
1909 | ret = btrfs_del_csums(trans, root, | 2003 | ret = btrfs_del_csums(trans, root, |
1910 | node->bytenr, | 2004 | node->bytenr, |
1911 | node->num_bytes); | 2005 | node->num_bytes); |
1912 | BUG_ON(ret); | 2006 | BUG_ON(ret); |
1913 | } | 2007 | } |
1914 | if (mark_free) { | ||
1915 | ret = btrfs_free_reserved_extent(root, | ||
1916 | node->bytenr, | ||
1917 | node->num_bytes); | ||
1918 | BUG_ON(ret); | ||
1919 | } | ||
1920 | } | 2008 | } |
1921 | mutex_unlock(&head->mutex); | 2009 | mutex_unlock(&head->mutex); |
1922 | return 0; | 2010 | return 0; |
@@ -2347,6 +2435,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | |||
2347 | ret = 0; | 2435 | ret = 0; |
2348 | out: | 2436 | out: |
2349 | btrfs_free_path(path); | 2437 | btrfs_free_path(path); |
2438 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) | ||
2439 | WARN_ON(ret > 0); | ||
2350 | return ret; | 2440 | return ret; |
2351 | } | 2441 | } |
2352 | 2442 | ||
@@ -2660,12 +2750,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2660 | struct btrfs_space_info **space_info) | 2750 | struct btrfs_space_info **space_info) |
2661 | { | 2751 | { |
2662 | struct btrfs_space_info *found; | 2752 | struct btrfs_space_info *found; |
2753 | int i; | ||
2754 | int factor; | ||
2755 | |||
2756 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | ||
2757 | BTRFS_BLOCK_GROUP_RAID10)) | ||
2758 | factor = 2; | ||
2759 | else | ||
2760 | factor = 1; | ||
2663 | 2761 | ||
2664 | found = __find_space_info(info, flags); | 2762 | found = __find_space_info(info, flags); |
2665 | if (found) { | 2763 | if (found) { |
2666 | spin_lock(&found->lock); | 2764 | spin_lock(&found->lock); |
2667 | found->total_bytes += total_bytes; | 2765 | found->total_bytes += total_bytes; |
2668 | found->bytes_used += bytes_used; | 2766 | found->bytes_used += bytes_used; |
2767 | found->disk_used += bytes_used * factor; | ||
2669 | found->full = 0; | 2768 | found->full = 0; |
2670 | spin_unlock(&found->lock); | 2769 | spin_unlock(&found->lock); |
2671 | *space_info = found; | 2770 | *space_info = found; |
@@ -2675,18 +2774,20 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2675 | if (!found) | 2774 | if (!found) |
2676 | return -ENOMEM; | 2775 | return -ENOMEM; |
2677 | 2776 | ||
2678 | INIT_LIST_HEAD(&found->block_groups); | 2777 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
2778 | INIT_LIST_HEAD(&found->block_groups[i]); | ||
2679 | init_rwsem(&found->groups_sem); | 2779 | init_rwsem(&found->groups_sem); |
2680 | init_waitqueue_head(&found->flush_wait); | ||
2681 | init_waitqueue_head(&found->allocate_wait); | ||
2682 | spin_lock_init(&found->lock); | 2780 | spin_lock_init(&found->lock); |
2683 | found->flags = flags; | 2781 | found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | |
2782 | BTRFS_BLOCK_GROUP_SYSTEM | | ||
2783 | BTRFS_BLOCK_GROUP_METADATA); | ||
2684 | found->total_bytes = total_bytes; | 2784 | found->total_bytes = total_bytes; |
2685 | found->bytes_used = bytes_used; | 2785 | found->bytes_used = bytes_used; |
2786 | found->disk_used = bytes_used * factor; | ||
2686 | found->bytes_pinned = 0; | 2787 | found->bytes_pinned = 0; |
2687 | found->bytes_reserved = 0; | 2788 | found->bytes_reserved = 0; |
2688 | found->bytes_readonly = 0; | 2789 | found->bytes_readonly = 0; |
2689 | found->bytes_delalloc = 0; | 2790 | found->bytes_may_use = 0; |
2690 | found->full = 0; | 2791 | found->full = 0; |
2691 | found->force_alloc = 0; | 2792 | found->force_alloc = 0; |
2692 | *space_info = found; | 2793 | *space_info = found; |
@@ -2711,19 +2812,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
2711 | } | 2812 | } |
2712 | } | 2813 | } |
2713 | 2814 | ||
2714 | static void set_block_group_readonly(struct btrfs_block_group_cache *cache) | ||
2715 | { | ||
2716 | spin_lock(&cache->space_info->lock); | ||
2717 | spin_lock(&cache->lock); | ||
2718 | if (!cache->ro) { | ||
2719 | cache->space_info->bytes_readonly += cache->key.offset - | ||
2720 | btrfs_block_group_used(&cache->item); | ||
2721 | cache->ro = 1; | ||
2722 | } | ||
2723 | spin_unlock(&cache->lock); | ||
2724 | spin_unlock(&cache->space_info->lock); | ||
2725 | } | ||
2726 | |||
2727 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | 2815 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) |
2728 | { | 2816 | { |
2729 | u64 num_devices = root->fs_info->fs_devices->rw_devices; | 2817 | u64 num_devices = root->fs_info->fs_devices->rw_devices; |
@@ -2752,491 +2840,50 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
2752 | return flags; | 2840 | return flags; |
2753 | } | 2841 | } |
2754 | 2842 | ||
2755 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) | 2843 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) |
2756 | { | ||
2757 | struct btrfs_fs_info *info = root->fs_info; | ||
2758 | u64 alloc_profile; | ||
2759 | |||
2760 | if (data) { | ||
2761 | alloc_profile = info->avail_data_alloc_bits & | ||
2762 | info->data_alloc_profile; | ||
2763 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | ||
2764 | } else if (root == root->fs_info->chunk_root) { | ||
2765 | alloc_profile = info->avail_system_alloc_bits & | ||
2766 | info->system_alloc_profile; | ||
2767 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | ||
2768 | } else { | ||
2769 | alloc_profile = info->avail_metadata_alloc_bits & | ||
2770 | info->metadata_alloc_profile; | ||
2771 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | ||
2772 | } | ||
2773 | |||
2774 | return btrfs_reduce_alloc_profile(root, data); | ||
2775 | } | ||
2776 | |||
2777 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | ||
2778 | { | ||
2779 | u64 alloc_target; | ||
2780 | |||
2781 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
2782 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, | ||
2783 | alloc_target); | ||
2784 | } | ||
2785 | |||
2786 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
2787 | { | ||
2788 | u64 num_bytes; | ||
2789 | int level; | ||
2790 | |||
2791 | level = BTRFS_MAX_LEVEL - 2; | ||
2792 | /* | ||
2793 | * NOTE: these calculations are absolutely the worst possible case. | ||
2794 | * This assumes that _every_ item we insert will require a new leaf, and | ||
2795 | * that the tree has grown to its maximum level size. | ||
2796 | */ | ||
2797 | |||
2798 | /* | ||
2799 | * for every item we insert we could insert both an extent item and a | ||
2800 | * extent ref item. Then for ever item we insert, we will need to cow | ||
2801 | * both the original leaf, plus the leaf to the left and right of it. | ||
2802 | * | ||
2803 | * Unless we are talking about the extent root, then we just want the | ||
2804 | * number of items * 2, since we just need the extent item plus its ref. | ||
2805 | */ | ||
2806 | if (root == root->fs_info->extent_root) | ||
2807 | num_bytes = num_items * 2; | ||
2808 | else | ||
2809 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
2810 | |||
2811 | /* | ||
2812 | * num_bytes is total number of leaves we could need times the leaf | ||
2813 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
2814 | * level, down to the leaf level. | ||
2815 | */ | ||
2816 | num_bytes = (num_bytes * root->leafsize) + | ||
2817 | (num_bytes * (level * 2)) * root->nodesize; | ||
2818 | |||
2819 | return num_bytes; | ||
2820 | } | ||
2821 | |||
2822 | /* | ||
2823 | * Unreserve metadata space for delalloc. If we have less reserved credits than | ||
2824 | * we have extents, this function does nothing. | ||
2825 | */ | ||
2826 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | ||
2827 | struct inode *inode, int num_items) | ||
2828 | { | ||
2829 | struct btrfs_fs_info *info = root->fs_info; | ||
2830 | struct btrfs_space_info *meta_sinfo; | ||
2831 | u64 num_bytes; | ||
2832 | u64 alloc_target; | ||
2833 | bool bug = false; | ||
2834 | |||
2835 | /* get the space info for where the metadata will live */ | ||
2836 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
2837 | meta_sinfo = __find_space_info(info, alloc_target); | ||
2838 | |||
2839 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
2840 | num_items); | ||
2841 | |||
2842 | spin_lock(&meta_sinfo->lock); | ||
2843 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
2844 | if (BTRFS_I(inode)->reserved_extents <= | ||
2845 | BTRFS_I(inode)->outstanding_extents) { | ||
2846 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
2847 | spin_unlock(&meta_sinfo->lock); | ||
2848 | return 0; | ||
2849 | } | ||
2850 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
2851 | |||
2852 | BTRFS_I(inode)->reserved_extents -= num_items; | ||
2853 | BUG_ON(BTRFS_I(inode)->reserved_extents < 0); | ||
2854 | |||
2855 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
2856 | bug = true; | ||
2857 | meta_sinfo->bytes_delalloc = 0; | ||
2858 | } else { | ||
2859 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
2860 | } | ||
2861 | spin_unlock(&meta_sinfo->lock); | ||
2862 | |||
2863 | BUG_ON(bug); | ||
2864 | |||
2865 | return 0; | ||
2866 | } | ||
2867 | |||
2868 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
2869 | { | 2844 | { |
2870 | u64 thresh; | 2845 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
2871 | 2846 | flags |= root->fs_info->avail_data_alloc_bits & | |
2872 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2847 | root->fs_info->data_alloc_profile; |
2873 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | 2848 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
2874 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | 2849 | flags |= root->fs_info->avail_system_alloc_bits & |
2875 | meta_sinfo->bytes_may_use; | 2850 | root->fs_info->system_alloc_profile; |
2876 | 2851 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) | |
2877 | thresh = meta_sinfo->total_bytes - thresh; | 2852 | flags |= root->fs_info->avail_metadata_alloc_bits & |
2878 | thresh *= 80; | 2853 | root->fs_info->metadata_alloc_profile; |
2879 | do_div(thresh, 100); | 2854 | return btrfs_reduce_alloc_profile(root, flags); |
2880 | if (thresh <= meta_sinfo->bytes_delalloc) | ||
2881 | meta_sinfo->force_delalloc = 1; | ||
2882 | else | ||
2883 | meta_sinfo->force_delalloc = 0; | ||
2884 | } | 2855 | } |
2885 | 2856 | ||
2886 | struct async_flush { | 2857 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
2887 | struct btrfs_root *root; | ||
2888 | struct btrfs_space_info *info; | ||
2889 | struct btrfs_work work; | ||
2890 | }; | ||
2891 | |||
2892 | static noinline void flush_delalloc_async(struct btrfs_work *work) | ||
2893 | { | 2858 | { |
2894 | struct async_flush *async; | 2859 | u64 flags; |
2895 | struct btrfs_root *root; | ||
2896 | struct btrfs_space_info *info; | ||
2897 | |||
2898 | async = container_of(work, struct async_flush, work); | ||
2899 | root = async->root; | ||
2900 | info = async->info; | ||
2901 | |||
2902 | btrfs_start_delalloc_inodes(root, 0); | ||
2903 | wake_up(&info->flush_wait); | ||
2904 | btrfs_wait_ordered_extents(root, 0, 0); | ||
2905 | |||
2906 | spin_lock(&info->lock); | ||
2907 | info->flushing = 0; | ||
2908 | spin_unlock(&info->lock); | ||
2909 | wake_up(&info->flush_wait); | ||
2910 | |||
2911 | kfree(async); | ||
2912 | } | ||
2913 | |||
2914 | static void wait_on_flush(struct btrfs_space_info *info) | ||
2915 | { | ||
2916 | DEFINE_WAIT(wait); | ||
2917 | u64 used; | ||
2918 | |||
2919 | while (1) { | ||
2920 | prepare_to_wait(&info->flush_wait, &wait, | ||
2921 | TASK_UNINTERRUPTIBLE); | ||
2922 | spin_lock(&info->lock); | ||
2923 | if (!info->flushing) { | ||
2924 | spin_unlock(&info->lock); | ||
2925 | break; | ||
2926 | } | ||
2927 | |||
2928 | used = info->bytes_used + info->bytes_reserved + | ||
2929 | info->bytes_pinned + info->bytes_readonly + | ||
2930 | info->bytes_super + info->bytes_root + | ||
2931 | info->bytes_may_use + info->bytes_delalloc; | ||
2932 | if (used < info->total_bytes) { | ||
2933 | spin_unlock(&info->lock); | ||
2934 | break; | ||
2935 | } | ||
2936 | spin_unlock(&info->lock); | ||
2937 | schedule(); | ||
2938 | } | ||
2939 | finish_wait(&info->flush_wait, &wait); | ||
2940 | } | ||
2941 | |||
2942 | static void flush_delalloc(struct btrfs_root *root, | ||
2943 | struct btrfs_space_info *info) | ||
2944 | { | ||
2945 | struct async_flush *async; | ||
2946 | bool wait = false; | ||
2947 | |||
2948 | spin_lock(&info->lock); | ||
2949 | 2860 | ||
2950 | if (!info->flushing) | 2861 | if (data) |
2951 | info->flushing = 1; | 2862 | flags = BTRFS_BLOCK_GROUP_DATA; |
2863 | else if (root == root->fs_info->chunk_root) | ||
2864 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
2952 | else | 2865 | else |
2953 | wait = true; | 2866 | flags = BTRFS_BLOCK_GROUP_METADATA; |
2954 | |||
2955 | spin_unlock(&info->lock); | ||
2956 | |||
2957 | if (wait) { | ||
2958 | wait_on_flush(info); | ||
2959 | return; | ||
2960 | } | ||
2961 | |||
2962 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
2963 | if (!async) | ||
2964 | goto flush; | ||
2965 | |||
2966 | async->root = root; | ||
2967 | async->info = info; | ||
2968 | async->work.func = flush_delalloc_async; | ||
2969 | 2867 | ||
2970 | btrfs_queue_worker(&root->fs_info->enospc_workers, | 2868 | return get_alloc_profile(root, flags); |
2971 | &async->work); | ||
2972 | wait_on_flush(info); | ||
2973 | return; | ||
2974 | |||
2975 | flush: | ||
2976 | btrfs_start_delalloc_inodes(root, 0); | ||
2977 | btrfs_wait_ordered_extents(root, 0, 0); | ||
2978 | |||
2979 | spin_lock(&info->lock); | ||
2980 | info->flushing = 0; | ||
2981 | spin_unlock(&info->lock); | ||
2982 | wake_up(&info->flush_wait); | ||
2983 | } | 2869 | } |
2984 | 2870 | ||
2985 | static int maybe_allocate_chunk(struct btrfs_root *root, | 2871 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) |
2986 | struct btrfs_space_info *info) | ||
2987 | { | ||
2988 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | ||
2989 | struct btrfs_trans_handle *trans; | ||
2990 | bool wait = false; | ||
2991 | int ret = 0; | ||
2992 | u64 min_metadata; | ||
2993 | u64 free_space; | ||
2994 | |||
2995 | free_space = btrfs_super_total_bytes(disk_super); | ||
2996 | /* | ||
2997 | * we allow the metadata to grow to a max of either 10gb or 5% of the | ||
2998 | * space in the volume. | ||
2999 | */ | ||
3000 | min_metadata = min((u64)10 * 1024 * 1024 * 1024, | ||
3001 | div64_u64(free_space * 5, 100)); | ||
3002 | if (info->total_bytes >= min_metadata) { | ||
3003 | spin_unlock(&info->lock); | ||
3004 | return 0; | ||
3005 | } | ||
3006 | |||
3007 | if (info->full) { | ||
3008 | spin_unlock(&info->lock); | ||
3009 | return 0; | ||
3010 | } | ||
3011 | |||
3012 | if (!info->allocating_chunk) { | ||
3013 | info->force_alloc = 1; | ||
3014 | info->allocating_chunk = 1; | ||
3015 | } else { | ||
3016 | wait = true; | ||
3017 | } | ||
3018 | |||
3019 | spin_unlock(&info->lock); | ||
3020 | |||
3021 | if (wait) { | ||
3022 | wait_event(info->allocate_wait, | ||
3023 | !info->allocating_chunk); | ||
3024 | return 1; | ||
3025 | } | ||
3026 | |||
3027 | trans = btrfs_start_transaction(root, 1); | ||
3028 | if (!trans) { | ||
3029 | ret = -ENOMEM; | ||
3030 | goto out; | ||
3031 | } | ||
3032 | |||
3033 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
3034 | 4096 + 2 * 1024 * 1024, | ||
3035 | info->flags, 0); | ||
3036 | btrfs_end_transaction(trans, root); | ||
3037 | if (ret) | ||
3038 | goto out; | ||
3039 | out: | ||
3040 | spin_lock(&info->lock); | ||
3041 | info->allocating_chunk = 0; | ||
3042 | spin_unlock(&info->lock); | ||
3043 | wake_up(&info->allocate_wait); | ||
3044 | |||
3045 | if (ret) | ||
3046 | return 0; | ||
3047 | return 1; | ||
3048 | } | ||
3049 | |||
3050 | /* | ||
3051 | * Reserve metadata space for delalloc. | ||
3052 | */ | ||
3053 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
3054 | struct inode *inode, int num_items) | ||
3055 | { | ||
3056 | struct btrfs_fs_info *info = root->fs_info; | ||
3057 | struct btrfs_space_info *meta_sinfo; | ||
3058 | u64 num_bytes; | ||
3059 | u64 used; | ||
3060 | u64 alloc_target; | ||
3061 | int flushed = 0; | ||
3062 | int force_delalloc; | ||
3063 | |||
3064 | /* get the space info for where the metadata will live */ | ||
3065 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3066 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3067 | |||
3068 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
3069 | num_items); | ||
3070 | again: | ||
3071 | spin_lock(&meta_sinfo->lock); | ||
3072 | |||
3073 | force_delalloc = meta_sinfo->force_delalloc; | ||
3074 | |||
3075 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3076 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3077 | |||
3078 | if (!flushed) | ||
3079 | meta_sinfo->bytes_delalloc += num_bytes; | ||
3080 | |||
3081 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3082 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3083 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3084 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3085 | |||
3086 | if (used > meta_sinfo->total_bytes) { | ||
3087 | flushed++; | ||
3088 | |||
3089 | if (flushed == 1) { | ||
3090 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3091 | goto again; | ||
3092 | flushed++; | ||
3093 | } else { | ||
3094 | spin_unlock(&meta_sinfo->lock); | ||
3095 | } | ||
3096 | |||
3097 | if (flushed == 2) { | ||
3098 | filemap_flush(inode->i_mapping); | ||
3099 | goto again; | ||
3100 | } else if (flushed == 3) { | ||
3101 | flush_delalloc(root, meta_sinfo); | ||
3102 | goto again; | ||
3103 | } | ||
3104 | spin_lock(&meta_sinfo->lock); | ||
3105 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
3106 | spin_unlock(&meta_sinfo->lock); | ||
3107 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
3108 | BTRFS_I(inode)->outstanding_extents, | ||
3109 | BTRFS_I(inode)->reserved_extents); | ||
3110 | dump_space_info(meta_sinfo, 0, 0); | ||
3111 | return -ENOSPC; | ||
3112 | } | ||
3113 | |||
3114 | BTRFS_I(inode)->reserved_extents += num_items; | ||
3115 | check_force_delalloc(meta_sinfo); | ||
3116 | spin_unlock(&meta_sinfo->lock); | ||
3117 | |||
3118 | if (!flushed && force_delalloc) | ||
3119 | filemap_flush(inode->i_mapping); | ||
3120 | |||
3121 | return 0; | ||
3122 | } | ||
3123 | |||
3124 | /* | ||
3125 | * unreserve num_items number of items worth of metadata space. This needs to | ||
3126 | * be paired with btrfs_reserve_metadata_space. | ||
3127 | * | ||
3128 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
3129 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
3130 | * oprations which will result in more used metadata, so we want to make sure we | ||
3131 | * can do that without issue. | ||
3132 | */ | ||
3133 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3134 | { | ||
3135 | struct btrfs_fs_info *info = root->fs_info; | ||
3136 | struct btrfs_space_info *meta_sinfo; | ||
3137 | u64 num_bytes; | ||
3138 | u64 alloc_target; | ||
3139 | bool bug = false; | ||
3140 | |||
3141 | /* get the space info for where the metadata will live */ | ||
3142 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3143 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3144 | |||
3145 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3146 | |||
3147 | spin_lock(&meta_sinfo->lock); | ||
3148 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
3149 | bug = true; | ||
3150 | meta_sinfo->bytes_may_use = 0; | ||
3151 | } else { | ||
3152 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3153 | } | ||
3154 | spin_unlock(&meta_sinfo->lock); | ||
3155 | |||
3156 | BUG_ON(bug); | ||
3157 | |||
3158 | return 0; | ||
3159 | } | ||
3160 | |||
3161 | /* | ||
3162 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
3163 | * of bytes that would be needed to modify num_items number of items. If we | ||
3164 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
3165 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
3166 | * items you reserved, since whatever metadata you needed should have already | ||
3167 | * been allocated. | ||
3168 | * | ||
3169 | * This will commit the transaction to make more space if we don't have enough | ||
3170 | * metadata space. THe only time we don't do this is if we're reserving space | ||
3171 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
3172 | * callers responsibility to handle it properly. | ||
3173 | */ | ||
3174 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3175 | { | 2872 | { |
3176 | struct btrfs_fs_info *info = root->fs_info; | 2873 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, |
3177 | struct btrfs_space_info *meta_sinfo; | 2874 | BTRFS_BLOCK_GROUP_DATA); |
3178 | u64 num_bytes; | ||
3179 | u64 used; | ||
3180 | u64 alloc_target; | ||
3181 | int retries = 0; | ||
3182 | |||
3183 | /* get the space info for where the metadata will live */ | ||
3184 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3185 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3186 | |||
3187 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3188 | again: | ||
3189 | spin_lock(&meta_sinfo->lock); | ||
3190 | |||
3191 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3192 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3193 | |||
3194 | if (!retries) | ||
3195 | meta_sinfo->bytes_may_use += num_bytes; | ||
3196 | |||
3197 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3198 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3199 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3200 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3201 | |||
3202 | if (used > meta_sinfo->total_bytes) { | ||
3203 | retries++; | ||
3204 | if (retries == 1) { | ||
3205 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3206 | goto again; | ||
3207 | retries++; | ||
3208 | } else { | ||
3209 | spin_unlock(&meta_sinfo->lock); | ||
3210 | } | ||
3211 | |||
3212 | if (retries == 2) { | ||
3213 | flush_delalloc(root, meta_sinfo); | ||
3214 | goto again; | ||
3215 | } | ||
3216 | spin_lock(&meta_sinfo->lock); | ||
3217 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3218 | spin_unlock(&meta_sinfo->lock); | ||
3219 | |||
3220 | dump_space_info(meta_sinfo, 0, 0); | ||
3221 | return -ENOSPC; | ||
3222 | } | ||
3223 | |||
3224 | check_force_delalloc(meta_sinfo); | ||
3225 | spin_unlock(&meta_sinfo->lock); | ||
3226 | |||
3227 | return 0; | ||
3228 | } | 2875 | } |
3229 | 2876 | ||
3230 | /* | 2877 | /* |
3231 | * This will check the space that the inode allocates from to make sure we have | 2878 | * This will check the space that the inode allocates from to make sure we have |
3232 | * enough space for bytes. | 2879 | * enough space for bytes. |
3233 | */ | 2880 | */ |
3234 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2881 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes) |
3235 | u64 bytes) | ||
3236 | { | 2882 | { |
3237 | struct btrfs_space_info *data_sinfo; | 2883 | struct btrfs_space_info *data_sinfo; |
2884 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3238 | u64 used; | 2885 | u64 used; |
3239 | int ret = 0, committed = 0, flushed = 0; | 2886 | int ret = 0, committed = 0; |
3240 | 2887 | ||
3241 | /* make sure bytes are sectorsize aligned */ | 2888 | /* make sure bytes are sectorsize aligned */ |
3242 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 2889 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
@@ -3248,21 +2895,13 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | |||
3248 | again: | 2895 | again: |
3249 | /* make sure we have enough space to handle the data first */ | 2896 | /* make sure we have enough space to handle the data first */ |
3250 | spin_lock(&data_sinfo->lock); | 2897 | spin_lock(&data_sinfo->lock); |
3251 | used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc + | 2898 | used = data_sinfo->bytes_used + data_sinfo->bytes_reserved + |
3252 | data_sinfo->bytes_reserved + data_sinfo->bytes_pinned + | 2899 | data_sinfo->bytes_pinned + data_sinfo->bytes_readonly + |
3253 | data_sinfo->bytes_readonly + data_sinfo->bytes_may_use + | 2900 | data_sinfo->bytes_may_use; |
3254 | data_sinfo->bytes_super; | ||
3255 | 2901 | ||
3256 | if (used + bytes > data_sinfo->total_bytes) { | 2902 | if (used + bytes > data_sinfo->total_bytes) { |
3257 | struct btrfs_trans_handle *trans; | 2903 | struct btrfs_trans_handle *trans; |
3258 | 2904 | ||
3259 | if (!flushed) { | ||
3260 | spin_unlock(&data_sinfo->lock); | ||
3261 | flush_delalloc(root, data_sinfo); | ||
3262 | flushed = 1; | ||
3263 | goto again; | ||
3264 | } | ||
3265 | |||
3266 | /* | 2905 | /* |
3267 | * if we don't have enough free bytes in this space then we need | 2906 | * if we don't have enough free bytes in this space then we need |
3268 | * to alloc a new chunk. | 2907 | * to alloc a new chunk. |
@@ -3274,15 +2913,15 @@ again: | |||
3274 | spin_unlock(&data_sinfo->lock); | 2913 | spin_unlock(&data_sinfo->lock); |
3275 | alloc: | 2914 | alloc: |
3276 | alloc_target = btrfs_get_alloc_profile(root, 1); | 2915 | alloc_target = btrfs_get_alloc_profile(root, 1); |
3277 | trans = btrfs_start_transaction(root, 1); | 2916 | trans = btrfs_join_transaction(root, 1); |
3278 | if (!trans) | 2917 | if (IS_ERR(trans)) |
3279 | return -ENOMEM; | 2918 | return PTR_ERR(trans); |
3280 | 2919 | ||
3281 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 2920 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
3282 | bytes + 2 * 1024 * 1024, | 2921 | bytes + 2 * 1024 * 1024, |
3283 | alloc_target, 0); | 2922 | alloc_target, 0); |
3284 | btrfs_end_transaction(trans, root); | 2923 | btrfs_end_transaction(trans, root); |
3285 | if (ret) | 2924 | if (ret < 0) |
3286 | return ret; | 2925 | return ret; |
3287 | 2926 | ||
3288 | if (!data_sinfo) { | 2927 | if (!data_sinfo) { |
@@ -3297,25 +2936,26 @@ alloc: | |||
3297 | if (!committed && !root->fs_info->open_ioctl_trans) { | 2936 | if (!committed && !root->fs_info->open_ioctl_trans) { |
3298 | committed = 1; | 2937 | committed = 1; |
3299 | trans = btrfs_join_transaction(root, 1); | 2938 | trans = btrfs_join_transaction(root, 1); |
3300 | if (!trans) | 2939 | if (IS_ERR(trans)) |
3301 | return -ENOMEM; | 2940 | return PTR_ERR(trans); |
3302 | ret = btrfs_commit_transaction(trans, root); | 2941 | ret = btrfs_commit_transaction(trans, root); |
3303 | if (ret) | 2942 | if (ret) |
3304 | return ret; | 2943 | return ret; |
3305 | goto again; | 2944 | goto again; |
3306 | } | 2945 | } |
3307 | 2946 | ||
3308 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" | 2947 | #if 0 /* I hope we never need this code again, just in case */ |
3309 | ", %llu bytes_used, %llu bytes_reserved, " | 2948 | printk(KERN_ERR "no space left, need %llu, %llu bytes_used, " |
3310 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use " | 2949 | "%llu bytes_reserved, " "%llu bytes_pinned, " |
3311 | "%llu total\n", (unsigned long long)bytes, | 2950 | "%llu bytes_readonly, %llu may use %llu total\n", |
3312 | (unsigned long long)data_sinfo->bytes_delalloc, | 2951 | (unsigned long long)bytes, |
3313 | (unsigned long long)data_sinfo->bytes_used, | 2952 | (unsigned long long)data_sinfo->bytes_used, |
3314 | (unsigned long long)data_sinfo->bytes_reserved, | 2953 | (unsigned long long)data_sinfo->bytes_reserved, |
3315 | (unsigned long long)data_sinfo->bytes_pinned, | 2954 | (unsigned long long)data_sinfo->bytes_pinned, |
3316 | (unsigned long long)data_sinfo->bytes_readonly, | 2955 | (unsigned long long)data_sinfo->bytes_readonly, |
3317 | (unsigned long long)data_sinfo->bytes_may_use, | 2956 | (unsigned long long)data_sinfo->bytes_may_use, |
3318 | (unsigned long long)data_sinfo->total_bytes); | 2957 | (unsigned long long)data_sinfo->total_bytes); |
2958 | #endif | ||
3319 | return -ENOSPC; | 2959 | return -ENOSPC; |
3320 | } | 2960 | } |
3321 | data_sinfo->bytes_may_use += bytes; | 2961 | data_sinfo->bytes_may_use += bytes; |
@@ -3326,12 +2966,13 @@ alloc: | |||
3326 | } | 2966 | } |
3327 | 2967 | ||
3328 | /* | 2968 | /* |
3329 | * if there was an error for whatever reason after calling | 2969 | * called when we are clearing an delalloc extent from the |
3330 | * btrfs_check_data_free_space, call this so we can cleanup the counters. | 2970 | * inode's io_tree or there was an error for whatever reason |
2971 | * after calling btrfs_check_data_free_space | ||
3331 | */ | 2972 | */ |
3332 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2973 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) |
3333 | struct inode *inode, u64 bytes) | ||
3334 | { | 2974 | { |
2975 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3335 | struct btrfs_space_info *data_sinfo; | 2976 | struct btrfs_space_info *data_sinfo; |
3336 | 2977 | ||
3337 | /* make sure bytes are sectorsize aligned */ | 2978 | /* make sure bytes are sectorsize aligned */ |
@@ -3344,48 +2985,6 @@ void btrfs_free_reserved_data_space(struct btrfs_root *root, | |||
3344 | spin_unlock(&data_sinfo->lock); | 2985 | spin_unlock(&data_sinfo->lock); |
3345 | } | 2986 | } |
3346 | 2987 | ||
3347 | /* called when we are adding a delalloc extent to the inode's io_tree */ | ||
3348 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | ||
3349 | u64 bytes) | ||
3350 | { | ||
3351 | struct btrfs_space_info *data_sinfo; | ||
3352 | |||
3353 | /* get the space info for where this inode will be storing its data */ | ||
3354 | data_sinfo = BTRFS_I(inode)->space_info; | ||
3355 | |||
3356 | /* make sure we have enough space to handle the data first */ | ||
3357 | spin_lock(&data_sinfo->lock); | ||
3358 | data_sinfo->bytes_delalloc += bytes; | ||
3359 | |||
3360 | /* | ||
3361 | * we are adding a delalloc extent without calling | ||
3362 | * btrfs_check_data_free_space first. This happens on a weird | ||
3363 | * writepage condition, but shouldn't hurt our accounting | ||
3364 | */ | ||
3365 | if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { | ||
3366 | data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; | ||
3367 | BTRFS_I(inode)->reserved_bytes = 0; | ||
3368 | } else { | ||
3369 | data_sinfo->bytes_may_use -= bytes; | ||
3370 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
3371 | } | ||
3372 | |||
3373 | spin_unlock(&data_sinfo->lock); | ||
3374 | } | ||
3375 | |||
3376 | /* called when we are clearing an delalloc extent from the inode's io_tree */ | ||
3377 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | ||
3378 | u64 bytes) | ||
3379 | { | ||
3380 | struct btrfs_space_info *info; | ||
3381 | |||
3382 | info = BTRFS_I(inode)->space_info; | ||
3383 | |||
3384 | spin_lock(&info->lock); | ||
3385 | info->bytes_delalloc -= bytes; | ||
3386 | spin_unlock(&info->lock); | ||
3387 | } | ||
3388 | |||
3389 | static void force_metadata_allocation(struct btrfs_fs_info *info) | 2988 | static void force_metadata_allocation(struct btrfs_fs_info *info) |
3390 | { | 2989 | { |
3391 | struct list_head *head = &info->space_info; | 2990 | struct list_head *head = &info->space_info; |
@@ -3399,13 +2998,28 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
3399 | rcu_read_unlock(); | 2998 | rcu_read_unlock(); |
3400 | } | 2999 | } |
3401 | 3000 | ||
3001 | static int should_alloc_chunk(struct btrfs_space_info *sinfo, | ||
3002 | u64 alloc_bytes) | ||
3003 | { | ||
3004 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | ||
3005 | |||
3006 | if (sinfo->bytes_used + sinfo->bytes_reserved + | ||
3007 | alloc_bytes + 256 * 1024 * 1024 < num_bytes) | ||
3008 | return 0; | ||
3009 | |||
3010 | if (sinfo->bytes_used + sinfo->bytes_reserved + | ||
3011 | alloc_bytes < div_factor(num_bytes, 8)) | ||
3012 | return 0; | ||
3013 | |||
3014 | return 1; | ||
3015 | } | ||
3016 | |||
3402 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 3017 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
3403 | struct btrfs_root *extent_root, u64 alloc_bytes, | 3018 | struct btrfs_root *extent_root, u64 alloc_bytes, |
3404 | u64 flags, int force) | 3019 | u64 flags, int force) |
3405 | { | 3020 | { |
3406 | struct btrfs_space_info *space_info; | 3021 | struct btrfs_space_info *space_info; |
3407 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3022 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
3408 | u64 thresh; | ||
3409 | int ret = 0; | 3023 | int ret = 0; |
3410 | 3024 | ||
3411 | mutex_lock(&fs_info->chunk_mutex); | 3025 | mutex_lock(&fs_info->chunk_mutex); |
@@ -3428,11 +3042,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3428 | goto out; | 3042 | goto out; |
3429 | } | 3043 | } |
3430 | 3044 | ||
3431 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3045 | if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { |
3432 | thresh = div_factor(thresh, 8); | ||
3433 | if (!force && | ||
3434 | (space_info->bytes_used + space_info->bytes_pinned + | ||
3435 | space_info->bytes_reserved + alloc_bytes) < thresh) { | ||
3436 | spin_unlock(&space_info->lock); | 3046 | spin_unlock(&space_info->lock); |
3437 | goto out; | 3047 | goto out; |
3438 | } | 3048 | } |
@@ -3454,6 +3064,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3454 | spin_lock(&space_info->lock); | 3064 | spin_lock(&space_info->lock); |
3455 | if (ret) | 3065 | if (ret) |
3456 | space_info->full = 1; | 3066 | space_info->full = 1; |
3067 | else | ||
3068 | ret = 1; | ||
3457 | space_info->force_alloc = 0; | 3069 | space_info->force_alloc = 0; |
3458 | spin_unlock(&space_info->lock); | 3070 | spin_unlock(&space_info->lock); |
3459 | out: | 3071 | out: |
@@ -3461,13 +3073,713 @@ out: | |||
3461 | return ret; | 3073 | return ret; |
3462 | } | 3074 | } |
3463 | 3075 | ||
3076 | static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, | ||
3077 | struct btrfs_root *root, | ||
3078 | struct btrfs_space_info *sinfo, u64 num_bytes) | ||
3079 | { | ||
3080 | int ret; | ||
3081 | int end_trans = 0; | ||
3082 | |||
3083 | if (sinfo->full) | ||
3084 | return 0; | ||
3085 | |||
3086 | spin_lock(&sinfo->lock); | ||
3087 | ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024); | ||
3088 | spin_unlock(&sinfo->lock); | ||
3089 | if (!ret) | ||
3090 | return 0; | ||
3091 | |||
3092 | if (!trans) { | ||
3093 | trans = btrfs_join_transaction(root, 1); | ||
3094 | BUG_ON(IS_ERR(trans)); | ||
3095 | end_trans = 1; | ||
3096 | } | ||
3097 | |||
3098 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
3099 | num_bytes + 2 * 1024 * 1024, | ||
3100 | get_alloc_profile(root, sinfo->flags), 0); | ||
3101 | |||
3102 | if (end_trans) | ||
3103 | btrfs_end_transaction(trans, root); | ||
3104 | |||
3105 | return ret == 1 ? 1 : 0; | ||
3106 | } | ||
3107 | |||
3108 | /* | ||
3109 | * shrink metadata reservation for delalloc | ||
3110 | */ | ||
3111 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | ||
3112 | struct btrfs_root *root, u64 to_reclaim) | ||
3113 | { | ||
3114 | struct btrfs_block_rsv *block_rsv; | ||
3115 | u64 reserved; | ||
3116 | u64 max_reclaim; | ||
3117 | u64 reclaimed = 0; | ||
3118 | int pause = 1; | ||
3119 | int ret; | ||
3120 | |||
3121 | block_rsv = &root->fs_info->delalloc_block_rsv; | ||
3122 | spin_lock(&block_rsv->lock); | ||
3123 | reserved = block_rsv->reserved; | ||
3124 | spin_unlock(&block_rsv->lock); | ||
3125 | |||
3126 | if (reserved == 0) | ||
3127 | return 0; | ||
3128 | |||
3129 | max_reclaim = min(reserved, to_reclaim); | ||
3130 | |||
3131 | while (1) { | ||
3132 | ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); | ||
3133 | if (!ret) { | ||
3134 | __set_current_state(TASK_INTERRUPTIBLE); | ||
3135 | schedule_timeout(pause); | ||
3136 | pause <<= 1; | ||
3137 | if (pause > HZ / 10) | ||
3138 | pause = HZ / 10; | ||
3139 | } else { | ||
3140 | pause = 1; | ||
3141 | } | ||
3142 | |||
3143 | spin_lock(&block_rsv->lock); | ||
3144 | if (reserved > block_rsv->reserved) | ||
3145 | reclaimed = reserved - block_rsv->reserved; | ||
3146 | reserved = block_rsv->reserved; | ||
3147 | spin_unlock(&block_rsv->lock); | ||
3148 | |||
3149 | if (reserved == 0 || reclaimed >= max_reclaim) | ||
3150 | break; | ||
3151 | |||
3152 | if (trans && trans->transaction->blocked) | ||
3153 | return -EAGAIN; | ||
3154 | } | ||
3155 | return reclaimed >= to_reclaim; | ||
3156 | } | ||
3157 | |||
3158 | static int should_retry_reserve(struct btrfs_trans_handle *trans, | ||
3159 | struct btrfs_root *root, | ||
3160 | struct btrfs_block_rsv *block_rsv, | ||
3161 | u64 num_bytes, int *retries) | ||
3162 | { | ||
3163 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
3164 | int ret; | ||
3165 | |||
3166 | if ((*retries) > 2) | ||
3167 | return -ENOSPC; | ||
3168 | |||
3169 | ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); | ||
3170 | if (ret) | ||
3171 | return 1; | ||
3172 | |||
3173 | if (trans && trans->transaction->in_commit) | ||
3174 | return -ENOSPC; | ||
3175 | |||
3176 | ret = shrink_delalloc(trans, root, num_bytes); | ||
3177 | if (ret) | ||
3178 | return ret; | ||
3179 | |||
3180 | spin_lock(&space_info->lock); | ||
3181 | if (space_info->bytes_pinned < num_bytes) | ||
3182 | ret = 1; | ||
3183 | spin_unlock(&space_info->lock); | ||
3184 | if (ret) | ||
3185 | return -ENOSPC; | ||
3186 | |||
3187 | (*retries)++; | ||
3188 | |||
3189 | if (trans) | ||
3190 | return -EAGAIN; | ||
3191 | |||
3192 | trans = btrfs_join_transaction(root, 1); | ||
3193 | BUG_ON(IS_ERR(trans)); | ||
3194 | ret = btrfs_commit_transaction(trans, root); | ||
3195 | BUG_ON(ret); | ||
3196 | |||
3197 | return 1; | ||
3198 | } | ||
3199 | |||
3200 | static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, | ||
3201 | u64 num_bytes) | ||
3202 | { | ||
3203 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
3204 | u64 unused; | ||
3205 | int ret = -ENOSPC; | ||
3206 | |||
3207 | spin_lock(&space_info->lock); | ||
3208 | unused = space_info->bytes_used + space_info->bytes_reserved + | ||
3209 | space_info->bytes_pinned + space_info->bytes_readonly; | ||
3210 | |||
3211 | if (unused < space_info->total_bytes) | ||
3212 | unused = space_info->total_bytes - unused; | ||
3213 | else | ||
3214 | unused = 0; | ||
3215 | |||
3216 | if (unused >= num_bytes) { | ||
3217 | if (block_rsv->priority >= 10) { | ||
3218 | space_info->bytes_reserved += num_bytes; | ||
3219 | ret = 0; | ||
3220 | } else { | ||
3221 | if ((unused + block_rsv->reserved) * | ||
3222 | block_rsv->priority >= | ||
3223 | (num_bytes + block_rsv->reserved) * 10) { | ||
3224 | space_info->bytes_reserved += num_bytes; | ||
3225 | ret = 0; | ||
3226 | } | ||
3227 | } | ||
3228 | } | ||
3229 | spin_unlock(&space_info->lock); | ||
3230 | |||
3231 | return ret; | ||
3232 | } | ||
3233 | |||
3234 | static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, | ||
3235 | struct btrfs_root *root) | ||
3236 | { | ||
3237 | struct btrfs_block_rsv *block_rsv; | ||
3238 | if (root->ref_cows) | ||
3239 | block_rsv = trans->block_rsv; | ||
3240 | else | ||
3241 | block_rsv = root->block_rsv; | ||
3242 | |||
3243 | if (!block_rsv) | ||
3244 | block_rsv = &root->fs_info->empty_block_rsv; | ||
3245 | |||
3246 | return block_rsv; | ||
3247 | } | ||
3248 | |||
3249 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | ||
3250 | u64 num_bytes) | ||
3251 | { | ||
3252 | int ret = -ENOSPC; | ||
3253 | spin_lock(&block_rsv->lock); | ||
3254 | if (block_rsv->reserved >= num_bytes) { | ||
3255 | block_rsv->reserved -= num_bytes; | ||
3256 | if (block_rsv->reserved < block_rsv->size) | ||
3257 | block_rsv->full = 0; | ||
3258 | ret = 0; | ||
3259 | } | ||
3260 | spin_unlock(&block_rsv->lock); | ||
3261 | return ret; | ||
3262 | } | ||
3263 | |||
3264 | static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | ||
3265 | u64 num_bytes, int update_size) | ||
3266 | { | ||
3267 | spin_lock(&block_rsv->lock); | ||
3268 | block_rsv->reserved += num_bytes; | ||
3269 | if (update_size) | ||
3270 | block_rsv->size += num_bytes; | ||
3271 | else if (block_rsv->reserved >= block_rsv->size) | ||
3272 | block_rsv->full = 1; | ||
3273 | spin_unlock(&block_rsv->lock); | ||
3274 | } | ||
3275 | |||
3276 | void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | ||
3277 | struct btrfs_block_rsv *dest, u64 num_bytes) | ||
3278 | { | ||
3279 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
3280 | |||
3281 | spin_lock(&block_rsv->lock); | ||
3282 | if (num_bytes == (u64)-1) | ||
3283 | num_bytes = block_rsv->size; | ||
3284 | block_rsv->size -= num_bytes; | ||
3285 | if (block_rsv->reserved >= block_rsv->size) { | ||
3286 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
3287 | block_rsv->reserved = block_rsv->size; | ||
3288 | block_rsv->full = 1; | ||
3289 | } else { | ||
3290 | num_bytes = 0; | ||
3291 | } | ||
3292 | spin_unlock(&block_rsv->lock); | ||
3293 | |||
3294 | if (num_bytes > 0) { | ||
3295 | if (dest) { | ||
3296 | block_rsv_add_bytes(dest, num_bytes, 0); | ||
3297 | } else { | ||
3298 | spin_lock(&space_info->lock); | ||
3299 | space_info->bytes_reserved -= num_bytes; | ||
3300 | spin_unlock(&space_info->lock); | ||
3301 | } | ||
3302 | } | ||
3303 | } | ||
3304 | |||
3305 | static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, | ||
3306 | struct btrfs_block_rsv *dst, u64 num_bytes) | ||
3307 | { | ||
3308 | int ret; | ||
3309 | |||
3310 | ret = block_rsv_use_bytes(src, num_bytes); | ||
3311 | if (ret) | ||
3312 | return ret; | ||
3313 | |||
3314 | block_rsv_add_bytes(dst, num_bytes, 1); | ||
3315 | return 0; | ||
3316 | } | ||
3317 | |||
3318 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) | ||
3319 | { | ||
3320 | memset(rsv, 0, sizeof(*rsv)); | ||
3321 | spin_lock_init(&rsv->lock); | ||
3322 | atomic_set(&rsv->usage, 1); | ||
3323 | rsv->priority = 6; | ||
3324 | INIT_LIST_HEAD(&rsv->list); | ||
3325 | } | ||
3326 | |||
3327 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | ||
3328 | { | ||
3329 | struct btrfs_block_rsv *block_rsv; | ||
3330 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3331 | u64 alloc_target; | ||
3332 | |||
3333 | block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); | ||
3334 | if (!block_rsv) | ||
3335 | return NULL; | ||
3336 | |||
3337 | btrfs_init_block_rsv(block_rsv); | ||
3338 | |||
3339 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3340 | block_rsv->space_info = __find_space_info(fs_info, | ||
3341 | BTRFS_BLOCK_GROUP_METADATA); | ||
3342 | |||
3343 | return block_rsv; | ||
3344 | } | ||
3345 | |||
3346 | void btrfs_free_block_rsv(struct btrfs_root *root, | ||
3347 | struct btrfs_block_rsv *rsv) | ||
3348 | { | ||
3349 | if (rsv && atomic_dec_and_test(&rsv->usage)) { | ||
3350 | btrfs_block_rsv_release(root, rsv, (u64)-1); | ||
3351 | if (!rsv->durable) | ||
3352 | kfree(rsv); | ||
3353 | } | ||
3354 | } | ||
3355 | |||
3356 | /* | ||
3357 | * make the block_rsv struct be able to capture freed space. | ||
3358 | * the captured space will re-add to the the block_rsv struct | ||
3359 | * after transaction commit | ||
3360 | */ | ||
3361 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
3362 | struct btrfs_block_rsv *block_rsv) | ||
3363 | { | ||
3364 | block_rsv->durable = 1; | ||
3365 | mutex_lock(&fs_info->durable_block_rsv_mutex); | ||
3366 | list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); | ||
3367 | mutex_unlock(&fs_info->durable_block_rsv_mutex); | ||
3368 | } | ||
3369 | |||
3370 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | ||
3371 | struct btrfs_root *root, | ||
3372 | struct btrfs_block_rsv *block_rsv, | ||
3373 | u64 num_bytes, int *retries) | ||
3374 | { | ||
3375 | int ret; | ||
3376 | |||
3377 | if (num_bytes == 0) | ||
3378 | return 0; | ||
3379 | again: | ||
3380 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | ||
3381 | if (!ret) { | ||
3382 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | ||
3383 | return 0; | ||
3384 | } | ||
3385 | |||
3386 | ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries); | ||
3387 | if (ret > 0) | ||
3388 | goto again; | ||
3389 | |||
3390 | return ret; | ||
3391 | } | ||
3392 | |||
3393 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | ||
3394 | struct btrfs_root *root, | ||
3395 | struct btrfs_block_rsv *block_rsv, | ||
3396 | u64 min_reserved, int min_factor) | ||
3397 | { | ||
3398 | u64 num_bytes = 0; | ||
3399 | int commit_trans = 0; | ||
3400 | int ret = -ENOSPC; | ||
3401 | |||
3402 | if (!block_rsv) | ||
3403 | return 0; | ||
3404 | |||
3405 | spin_lock(&block_rsv->lock); | ||
3406 | if (min_factor > 0) | ||
3407 | num_bytes = div_factor(block_rsv->size, min_factor); | ||
3408 | if (min_reserved > num_bytes) | ||
3409 | num_bytes = min_reserved; | ||
3410 | |||
3411 | if (block_rsv->reserved >= num_bytes) { | ||
3412 | ret = 0; | ||
3413 | } else { | ||
3414 | num_bytes -= block_rsv->reserved; | ||
3415 | if (block_rsv->durable && | ||
3416 | block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) | ||
3417 | commit_trans = 1; | ||
3418 | } | ||
3419 | spin_unlock(&block_rsv->lock); | ||
3420 | if (!ret) | ||
3421 | return 0; | ||
3422 | |||
3423 | if (block_rsv->refill_used) { | ||
3424 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | ||
3425 | if (!ret) { | ||
3426 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | ||
3427 | return 0; | ||
3428 | } | ||
3429 | } | ||
3430 | |||
3431 | if (commit_trans) { | ||
3432 | if (trans) | ||
3433 | return -EAGAIN; | ||
3434 | |||
3435 | trans = btrfs_join_transaction(root, 1); | ||
3436 | BUG_ON(IS_ERR(trans)); | ||
3437 | ret = btrfs_commit_transaction(trans, root); | ||
3438 | return 0; | ||
3439 | } | ||
3440 | |||
3441 | WARN_ON(1); | ||
3442 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
3443 | block_rsv->size, block_rsv->reserved, | ||
3444 | block_rsv->freed[0], block_rsv->freed[1]); | ||
3445 | |||
3446 | return -ENOSPC; | ||
3447 | } | ||
3448 | |||
3449 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | ||
3450 | struct btrfs_block_rsv *dst_rsv, | ||
3451 | u64 num_bytes) | ||
3452 | { | ||
3453 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
3454 | } | ||
3455 | |||
3456 | void btrfs_block_rsv_release(struct btrfs_root *root, | ||
3457 | struct btrfs_block_rsv *block_rsv, | ||
3458 | u64 num_bytes) | ||
3459 | { | ||
3460 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
3461 | if (global_rsv->full || global_rsv == block_rsv || | ||
3462 | block_rsv->space_info != global_rsv->space_info) | ||
3463 | global_rsv = NULL; | ||
3464 | block_rsv_release_bytes(block_rsv, global_rsv, num_bytes); | ||
3465 | } | ||
3466 | |||
3467 | /* | ||
3468 | * helper to calculate size of global block reservation. | ||
3469 | * the desired value is sum of space used by extent tree, | ||
3470 | * checksum tree and root tree | ||
3471 | */ | ||
3472 | static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | ||
3473 | { | ||
3474 | struct btrfs_space_info *sinfo; | ||
3475 | u64 num_bytes; | ||
3476 | u64 meta_used; | ||
3477 | u64 data_used; | ||
3478 | int csum_size = btrfs_super_csum_size(&fs_info->super_copy); | ||
3479 | #if 0 | ||
3480 | /* | ||
3481 | * per tree used space accounting can be inaccuracy, so we | ||
3482 | * can't rely on it. | ||
3483 | */ | ||
3484 | spin_lock(&fs_info->extent_root->accounting_lock); | ||
3485 | num_bytes = btrfs_root_used(&fs_info->extent_root->root_item); | ||
3486 | spin_unlock(&fs_info->extent_root->accounting_lock); | ||
3487 | |||
3488 | spin_lock(&fs_info->csum_root->accounting_lock); | ||
3489 | num_bytes += btrfs_root_used(&fs_info->csum_root->root_item); | ||
3490 | spin_unlock(&fs_info->csum_root->accounting_lock); | ||
3491 | |||
3492 | spin_lock(&fs_info->tree_root->accounting_lock); | ||
3493 | num_bytes += btrfs_root_used(&fs_info->tree_root->root_item); | ||
3494 | spin_unlock(&fs_info->tree_root->accounting_lock); | ||
3495 | #endif | ||
3496 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); | ||
3497 | spin_lock(&sinfo->lock); | ||
3498 | data_used = sinfo->bytes_used; | ||
3499 | spin_unlock(&sinfo->lock); | ||
3500 | |||
3501 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
3502 | spin_lock(&sinfo->lock); | ||
3503 | meta_used = sinfo->bytes_used; | ||
3504 | spin_unlock(&sinfo->lock); | ||
3505 | |||
3506 | num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * | ||
3507 | csum_size * 2; | ||
3508 | num_bytes += div64_u64(data_used + meta_used, 50); | ||
3509 | |||
3510 | if (num_bytes * 3 > meta_used) | ||
3511 | num_bytes = div64_u64(meta_used, 3); | ||
3512 | |||
3513 | return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); | ||
3514 | } | ||
3515 | |||
3516 | static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
3517 | { | ||
3518 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | ||
3519 | struct btrfs_space_info *sinfo = block_rsv->space_info; | ||
3520 | u64 num_bytes; | ||
3521 | |||
3522 | num_bytes = calc_global_metadata_size(fs_info); | ||
3523 | |||
3524 | spin_lock(&block_rsv->lock); | ||
3525 | spin_lock(&sinfo->lock); | ||
3526 | |||
3527 | block_rsv->size = num_bytes; | ||
3528 | |||
3529 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + | ||
3530 | sinfo->bytes_reserved + sinfo->bytes_readonly; | ||
3531 | |||
3532 | if (sinfo->total_bytes > num_bytes) { | ||
3533 | num_bytes = sinfo->total_bytes - num_bytes; | ||
3534 | block_rsv->reserved += num_bytes; | ||
3535 | sinfo->bytes_reserved += num_bytes; | ||
3536 | } | ||
3537 | |||
3538 | if (block_rsv->reserved >= block_rsv->size) { | ||
3539 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
3540 | sinfo->bytes_reserved -= num_bytes; | ||
3541 | block_rsv->reserved = block_rsv->size; | ||
3542 | block_rsv->full = 1; | ||
3543 | } | ||
3544 | #if 0 | ||
3545 | printk(KERN_INFO"global block rsv size %llu reserved %llu\n", | ||
3546 | block_rsv->size, block_rsv->reserved); | ||
3547 | #endif | ||
3548 | spin_unlock(&sinfo->lock); | ||
3549 | spin_unlock(&block_rsv->lock); | ||
3550 | } | ||
3551 | |||
3552 | static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
3553 | { | ||
3554 | struct btrfs_space_info *space_info; | ||
3555 | |||
3556 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | ||
3557 | fs_info->chunk_block_rsv.space_info = space_info; | ||
3558 | fs_info->chunk_block_rsv.priority = 10; | ||
3559 | |||
3560 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
3561 | fs_info->global_block_rsv.space_info = space_info; | ||
3562 | fs_info->global_block_rsv.priority = 10; | ||
3563 | fs_info->global_block_rsv.refill_used = 1; | ||
3564 | fs_info->delalloc_block_rsv.space_info = space_info; | ||
3565 | fs_info->trans_block_rsv.space_info = space_info; | ||
3566 | fs_info->empty_block_rsv.space_info = space_info; | ||
3567 | fs_info->empty_block_rsv.priority = 10; | ||
3568 | |||
3569 | fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; | ||
3570 | fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; | ||
3571 | fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; | ||
3572 | fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; | ||
3573 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; | ||
3574 | |||
3575 | btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); | ||
3576 | |||
3577 | btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); | ||
3578 | |||
3579 | update_global_block_rsv(fs_info); | ||
3580 | } | ||
3581 | |||
3582 | static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
3583 | { | ||
3584 | block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1); | ||
3585 | WARN_ON(fs_info->delalloc_block_rsv.size > 0); | ||
3586 | WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); | ||
3587 | WARN_ON(fs_info->trans_block_rsv.size > 0); | ||
3588 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); | ||
3589 | WARN_ON(fs_info->chunk_block_rsv.size > 0); | ||
3590 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | ||
3591 | } | ||
3592 | |||
3593 | static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) | ||
3594 | { | ||
3595 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | ||
3596 | 3 * num_items; | ||
3597 | } | ||
3598 | |||
3599 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3600 | struct btrfs_root *root, | ||
3601 | int num_items, int *retries) | ||
3602 | { | ||
3603 | u64 num_bytes; | ||
3604 | int ret; | ||
3605 | |||
3606 | if (num_items == 0 || root->fs_info->chunk_root == root) | ||
3607 | return 0; | ||
3608 | |||
3609 | num_bytes = calc_trans_metadata_size(root, num_items); | ||
3610 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | ||
3611 | num_bytes, retries); | ||
3612 | if (!ret) { | ||
3613 | trans->bytes_reserved += num_bytes; | ||
3614 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
3615 | } | ||
3616 | return ret; | ||
3617 | } | ||
3618 | |||
3619 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | ||
3620 | struct btrfs_root *root) | ||
3621 | { | ||
3622 | if (!trans->bytes_reserved) | ||
3623 | return; | ||
3624 | |||
3625 | BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); | ||
3626 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
3627 | trans->bytes_reserved); | ||
3628 | trans->bytes_reserved = 0; | ||
3629 | } | ||
3630 | |||
3631 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3632 | struct inode *inode) | ||
3633 | { | ||
3634 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3635 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | ||
3636 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; | ||
3637 | |||
3638 | /* | ||
3639 | * one for deleting orphan item, one for updating inode and | ||
3640 | * two for calling btrfs_truncate_inode_items. | ||
3641 | * | ||
3642 | * btrfs_truncate_inode_items is a delete operation, it frees | ||
3643 | * more space than it uses in most cases. So two units of | ||
3644 | * metadata space should be enough for calling it many times. | ||
3645 | * If all of the metadata space is used, we can commit | ||
3646 | * transaction and use space it freed. | ||
3647 | */ | ||
3648 | u64 num_bytes = calc_trans_metadata_size(root, 4); | ||
3649 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
3650 | } | ||
3651 | |||
3652 | void btrfs_orphan_release_metadata(struct inode *inode) | ||
3653 | { | ||
3654 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3655 | u64 num_bytes = calc_trans_metadata_size(root, 4); | ||
3656 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); | ||
3657 | } | ||
3658 | |||
3659 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3660 | struct btrfs_pending_snapshot *pending) | ||
3661 | { | ||
3662 | struct btrfs_root *root = pending->root; | ||
3663 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | ||
3664 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; | ||
3665 | /* | ||
3666 | * two for root back/forward refs, two for directory entries | ||
3667 | * and one for root of the snapshot. | ||
3668 | */ | ||
3669 | u64 num_bytes = calc_trans_metadata_size(root, 5); | ||
3670 | dst_rsv->space_info = src_rsv->space_info; | ||
3671 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
3672 | } | ||
3673 | |||
3674 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | ||
3675 | { | ||
3676 | return num_bytes >>= 3; | ||
3677 | } | ||
3678 | |||
3679 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | ||
3680 | { | ||
3681 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3682 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | ||
3683 | u64 to_reserve; | ||
3684 | int nr_extents; | ||
3685 | int retries = 0; | ||
3686 | int ret; | ||
3687 | |||
3688 | if (btrfs_transaction_in_commit(root->fs_info)) | ||
3689 | schedule_timeout(1); | ||
3690 | |||
3691 | num_bytes = ALIGN(num_bytes, root->sectorsize); | ||
3692 | again: | ||
3693 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
3694 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | ||
3695 | if (nr_extents > BTRFS_I(inode)->reserved_extents) { | ||
3696 | nr_extents -= BTRFS_I(inode)->reserved_extents; | ||
3697 | to_reserve = calc_trans_metadata_size(root, nr_extents); | ||
3698 | } else { | ||
3699 | nr_extents = 0; | ||
3700 | to_reserve = 0; | ||
3701 | } | ||
3702 | |||
3703 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | ||
3704 | ret = reserve_metadata_bytes(block_rsv, to_reserve); | ||
3705 | if (ret) { | ||
3706 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3707 | ret = should_retry_reserve(NULL, root, block_rsv, to_reserve, | ||
3708 | &retries); | ||
3709 | if (ret > 0) | ||
3710 | goto again; | ||
3711 | return ret; | ||
3712 | } | ||
3713 | |||
3714 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
3715 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
3716 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3717 | |||
3718 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | ||
3719 | |||
3720 | if (block_rsv->size > 512 * 1024 * 1024) | ||
3721 | shrink_delalloc(NULL, root, to_reserve); | ||
3722 | |||
3723 | return 0; | ||
3724 | } | ||
3725 | |||
3726 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | ||
3727 | { | ||
3728 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3729 | u64 to_free; | ||
3730 | int nr_extents; | ||
3731 | |||
3732 | num_bytes = ALIGN(num_bytes, root->sectorsize); | ||
3733 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | ||
3734 | |||
3735 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
3736 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | ||
3737 | if (nr_extents < BTRFS_I(inode)->reserved_extents) { | ||
3738 | nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; | ||
3739 | BTRFS_I(inode)->reserved_extents -= nr_extents; | ||
3740 | } else { | ||
3741 | nr_extents = 0; | ||
3742 | } | ||
3743 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3744 | |||
3745 | to_free = calc_csum_metadata_size(inode, num_bytes); | ||
3746 | if (nr_extents > 0) | ||
3747 | to_free += calc_trans_metadata_size(root, nr_extents); | ||
3748 | |||
3749 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | ||
3750 | to_free); | ||
3751 | } | ||
3752 | |||
3753 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) | ||
3754 | { | ||
3755 | int ret; | ||
3756 | |||
3757 | ret = btrfs_check_data_free_space(inode, num_bytes); | ||
3758 | if (ret) | ||
3759 | return ret; | ||
3760 | |||
3761 | ret = btrfs_delalloc_reserve_metadata(inode, num_bytes); | ||
3762 | if (ret) { | ||
3763 | btrfs_free_reserved_data_space(inode, num_bytes); | ||
3764 | return ret; | ||
3765 | } | ||
3766 | |||
3767 | return 0; | ||
3768 | } | ||
3769 | |||
3770 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) | ||
3771 | { | ||
3772 | btrfs_delalloc_release_metadata(inode, num_bytes); | ||
3773 | btrfs_free_reserved_data_space(inode, num_bytes); | ||
3774 | } | ||
3775 | |||
3464 | static int update_block_group(struct btrfs_trans_handle *trans, | 3776 | static int update_block_group(struct btrfs_trans_handle *trans, |
3465 | struct btrfs_root *root, | 3777 | struct btrfs_root *root, |
3466 | u64 bytenr, u64 num_bytes, int alloc, | 3778 | u64 bytenr, u64 num_bytes, int alloc) |
3467 | int mark_free) | ||
3468 | { | 3779 | { |
3469 | struct btrfs_block_group_cache *cache; | 3780 | struct btrfs_block_group_cache *cache; |
3470 | struct btrfs_fs_info *info = root->fs_info; | 3781 | struct btrfs_fs_info *info = root->fs_info; |
3782 | int factor; | ||
3471 | u64 total = num_bytes; | 3783 | u64 total = num_bytes; |
3472 | u64 old_val; | 3784 | u64 old_val; |
3473 | u64 byte_in_group; | 3785 | u64 byte_in_group; |
@@ -3486,6 +3798,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3486 | cache = btrfs_lookup_block_group(info, bytenr); | 3798 | cache = btrfs_lookup_block_group(info, bytenr); |
3487 | if (!cache) | 3799 | if (!cache) |
3488 | return -1; | 3800 | return -1; |
3801 | if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | | ||
3802 | BTRFS_BLOCK_GROUP_RAID1 | | ||
3803 | BTRFS_BLOCK_GROUP_RAID10)) | ||
3804 | factor = 2; | ||
3805 | else | ||
3806 | factor = 1; | ||
3489 | byte_in_group = bytenr - cache->key.objectid; | 3807 | byte_in_group = bytenr - cache->key.objectid; |
3490 | WARN_ON(byte_in_group > cache->key.offset); | 3808 | WARN_ON(byte_in_group > cache->key.offset); |
3491 | 3809 | ||
@@ -3498,31 +3816,24 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3498 | old_val += num_bytes; | 3816 | old_val += num_bytes; |
3499 | btrfs_set_block_group_used(&cache->item, old_val); | 3817 | btrfs_set_block_group_used(&cache->item, old_val); |
3500 | cache->reserved -= num_bytes; | 3818 | cache->reserved -= num_bytes; |
3501 | cache->space_info->bytes_used += num_bytes; | ||
3502 | cache->space_info->bytes_reserved -= num_bytes; | 3819 | cache->space_info->bytes_reserved -= num_bytes; |
3503 | if (cache->ro) | 3820 | cache->space_info->bytes_used += num_bytes; |
3504 | cache->space_info->bytes_readonly -= num_bytes; | 3821 | cache->space_info->disk_used += num_bytes * factor; |
3505 | spin_unlock(&cache->lock); | 3822 | spin_unlock(&cache->lock); |
3506 | spin_unlock(&cache->space_info->lock); | 3823 | spin_unlock(&cache->space_info->lock); |
3507 | } else { | 3824 | } else { |
3508 | old_val -= num_bytes; | 3825 | old_val -= num_bytes; |
3509 | cache->space_info->bytes_used -= num_bytes; | ||
3510 | if (cache->ro) | ||
3511 | cache->space_info->bytes_readonly += num_bytes; | ||
3512 | btrfs_set_block_group_used(&cache->item, old_val); | 3826 | btrfs_set_block_group_used(&cache->item, old_val); |
3827 | cache->pinned += num_bytes; | ||
3828 | cache->space_info->bytes_pinned += num_bytes; | ||
3829 | cache->space_info->bytes_used -= num_bytes; | ||
3830 | cache->space_info->disk_used -= num_bytes * factor; | ||
3513 | spin_unlock(&cache->lock); | 3831 | spin_unlock(&cache->lock); |
3514 | spin_unlock(&cache->space_info->lock); | 3832 | spin_unlock(&cache->space_info->lock); |
3515 | if (mark_free) { | ||
3516 | int ret; | ||
3517 | 3833 | ||
3518 | ret = btrfs_discard_extent(root, bytenr, | 3834 | set_extent_dirty(info->pinned_extents, |
3519 | num_bytes); | 3835 | bytenr, bytenr + num_bytes - 1, |
3520 | WARN_ON(ret); | 3836 | GFP_NOFS | __GFP_NOFAIL); |
3521 | |||
3522 | ret = btrfs_add_free_space(cache, bytenr, | ||
3523 | num_bytes); | ||
3524 | WARN_ON(ret); | ||
3525 | } | ||
3526 | } | 3837 | } |
3527 | btrfs_put_block_group(cache); | 3838 | btrfs_put_block_group(cache); |
3528 | total -= num_bytes; | 3839 | total -= num_bytes; |
@@ -3546,18 +3857,10 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
3546 | return bytenr; | 3857 | return bytenr; |
3547 | } | 3858 | } |
3548 | 3859 | ||
3549 | /* | 3860 | static int pin_down_extent(struct btrfs_root *root, |
3550 | * this function must be called within transaction | 3861 | struct btrfs_block_group_cache *cache, |
3551 | */ | 3862 | u64 bytenr, u64 num_bytes, int reserved) |
3552 | int btrfs_pin_extent(struct btrfs_root *root, | ||
3553 | u64 bytenr, u64 num_bytes, int reserved) | ||
3554 | { | 3863 | { |
3555 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3556 | struct btrfs_block_group_cache *cache; | ||
3557 | |||
3558 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
3559 | BUG_ON(!cache); | ||
3560 | |||
3561 | spin_lock(&cache->space_info->lock); | 3864 | spin_lock(&cache->space_info->lock); |
3562 | spin_lock(&cache->lock); | 3865 | spin_lock(&cache->lock); |
3563 | cache->pinned += num_bytes; | 3866 | cache->pinned += num_bytes; |
@@ -3569,28 +3872,68 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
3569 | spin_unlock(&cache->lock); | 3872 | spin_unlock(&cache->lock); |
3570 | spin_unlock(&cache->space_info->lock); | 3873 | spin_unlock(&cache->space_info->lock); |
3571 | 3874 | ||
3572 | btrfs_put_block_group(cache); | 3875 | set_extent_dirty(root->fs_info->pinned_extents, bytenr, |
3876 | bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); | ||
3877 | return 0; | ||
3878 | } | ||
3879 | |||
3880 | /* | ||
3881 | * this function must be called within transaction | ||
3882 | */ | ||
3883 | int btrfs_pin_extent(struct btrfs_root *root, | ||
3884 | u64 bytenr, u64 num_bytes, int reserved) | ||
3885 | { | ||
3886 | struct btrfs_block_group_cache *cache; | ||
3887 | |||
3888 | cache = btrfs_lookup_block_group(root->fs_info, bytenr); | ||
3889 | BUG_ON(!cache); | ||
3890 | |||
3891 | pin_down_extent(root, cache, bytenr, num_bytes, reserved); | ||
3573 | 3892 | ||
3574 | set_extent_dirty(fs_info->pinned_extents, | 3893 | btrfs_put_block_group(cache); |
3575 | bytenr, bytenr + num_bytes - 1, GFP_NOFS); | ||
3576 | return 0; | 3894 | return 0; |
3577 | } | 3895 | } |
3578 | 3896 | ||
3579 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | 3897 | /* |
3580 | u64 num_bytes, int reserve) | 3898 | * update size of reserved extents. this function may return -EAGAIN |
3899 | * if 'reserve' is true or 'sinfo' is false. | ||
3900 | */ | ||
3901 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
3902 | u64 num_bytes, int reserve, int sinfo) | ||
3581 | { | 3903 | { |
3582 | spin_lock(&cache->space_info->lock); | 3904 | int ret = 0; |
3583 | spin_lock(&cache->lock); | 3905 | if (sinfo) { |
3584 | if (reserve) { | 3906 | struct btrfs_space_info *space_info = cache->space_info; |
3585 | cache->reserved += num_bytes; | 3907 | spin_lock(&space_info->lock); |
3586 | cache->space_info->bytes_reserved += num_bytes; | 3908 | spin_lock(&cache->lock); |
3909 | if (reserve) { | ||
3910 | if (cache->ro) { | ||
3911 | ret = -EAGAIN; | ||
3912 | } else { | ||
3913 | cache->reserved += num_bytes; | ||
3914 | space_info->bytes_reserved += num_bytes; | ||
3915 | } | ||
3916 | } else { | ||
3917 | if (cache->ro) | ||
3918 | space_info->bytes_readonly += num_bytes; | ||
3919 | cache->reserved -= num_bytes; | ||
3920 | space_info->bytes_reserved -= num_bytes; | ||
3921 | } | ||
3922 | spin_unlock(&cache->lock); | ||
3923 | spin_unlock(&space_info->lock); | ||
3587 | } else { | 3924 | } else { |
3588 | cache->reserved -= num_bytes; | 3925 | spin_lock(&cache->lock); |
3589 | cache->space_info->bytes_reserved -= num_bytes; | 3926 | if (cache->ro) { |
3927 | ret = -EAGAIN; | ||
3928 | } else { | ||
3929 | if (reserve) | ||
3930 | cache->reserved += num_bytes; | ||
3931 | else | ||
3932 | cache->reserved -= num_bytes; | ||
3933 | } | ||
3934 | spin_unlock(&cache->lock); | ||
3590 | } | 3935 | } |
3591 | spin_unlock(&cache->lock); | 3936 | return ret; |
3592 | spin_unlock(&cache->space_info->lock); | ||
3593 | return 0; | ||
3594 | } | 3937 | } |
3595 | 3938 | ||
3596 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 3939 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
@@ -3621,6 +3964,8 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
3621 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | 3964 | fs_info->pinned_extents = &fs_info->freed_extents[0]; |
3622 | 3965 | ||
3623 | up_write(&fs_info->extent_commit_sem); | 3966 | up_write(&fs_info->extent_commit_sem); |
3967 | |||
3968 | update_global_block_rsv(fs_info); | ||
3624 | return 0; | 3969 | return 0; |
3625 | } | 3970 | } |
3626 | 3971 | ||
@@ -3647,14 +3992,21 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
3647 | btrfs_add_free_space(cache, start, len); | 3992 | btrfs_add_free_space(cache, start, len); |
3648 | } | 3993 | } |
3649 | 3994 | ||
3995 | start += len; | ||
3996 | |||
3650 | spin_lock(&cache->space_info->lock); | 3997 | spin_lock(&cache->space_info->lock); |
3651 | spin_lock(&cache->lock); | 3998 | spin_lock(&cache->lock); |
3652 | cache->pinned -= len; | 3999 | cache->pinned -= len; |
3653 | cache->space_info->bytes_pinned -= len; | 4000 | cache->space_info->bytes_pinned -= len; |
4001 | if (cache->ro) { | ||
4002 | cache->space_info->bytes_readonly += len; | ||
4003 | } else if (cache->reserved_pinned > 0) { | ||
4004 | len = min(len, cache->reserved_pinned); | ||
4005 | cache->reserved_pinned -= len; | ||
4006 | cache->space_info->bytes_reserved += len; | ||
4007 | } | ||
3654 | spin_unlock(&cache->lock); | 4008 | spin_unlock(&cache->lock); |
3655 | spin_unlock(&cache->space_info->lock); | 4009 | spin_unlock(&cache->space_info->lock); |
3656 | |||
3657 | start += len; | ||
3658 | } | 4010 | } |
3659 | 4011 | ||
3660 | if (cache) | 4012 | if (cache) |
@@ -3667,8 +4019,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3667 | { | 4019 | { |
3668 | struct btrfs_fs_info *fs_info = root->fs_info; | 4020 | struct btrfs_fs_info *fs_info = root->fs_info; |
3669 | struct extent_io_tree *unpin; | 4021 | struct extent_io_tree *unpin; |
4022 | struct btrfs_block_rsv *block_rsv; | ||
4023 | struct btrfs_block_rsv *next_rsv; | ||
3670 | u64 start; | 4024 | u64 start; |
3671 | u64 end; | 4025 | u64 end; |
4026 | int idx; | ||
3672 | int ret; | 4027 | int ret; |
3673 | 4028 | ||
3674 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | 4029 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
@@ -3689,59 +4044,30 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3689 | cond_resched(); | 4044 | cond_resched(); |
3690 | } | 4045 | } |
3691 | 4046 | ||
3692 | return ret; | 4047 | mutex_lock(&fs_info->durable_block_rsv_mutex); |
3693 | } | 4048 | list_for_each_entry_safe(block_rsv, next_rsv, |
4049 | &fs_info->durable_block_rsv_list, list) { | ||
3694 | 4050 | ||
3695 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 4051 | idx = trans->transid & 0x1; |
3696 | struct btrfs_root *root, | 4052 | if (block_rsv->freed[idx] > 0) { |
3697 | struct btrfs_path *path, | 4053 | block_rsv_add_bytes(block_rsv, |
3698 | u64 bytenr, u64 num_bytes, | 4054 | block_rsv->freed[idx], 0); |
3699 | int is_data, int reserved, | 4055 | block_rsv->freed[idx] = 0; |
3700 | struct extent_buffer **must_clean) | 4056 | } |
3701 | { | 4057 | if (atomic_read(&block_rsv->usage) == 0) { |
3702 | int err = 0; | 4058 | btrfs_block_rsv_release(root, block_rsv, (u64)-1); |
3703 | struct extent_buffer *buf; | ||
3704 | |||
3705 | if (is_data) | ||
3706 | goto pinit; | ||
3707 | |||
3708 | /* | ||
3709 | * discard is sloooow, and so triggering discards on | ||
3710 | * individual btree blocks isn't a good plan. Just | ||
3711 | * pin everything in discard mode. | ||
3712 | */ | ||
3713 | if (btrfs_test_opt(root, DISCARD)) | ||
3714 | goto pinit; | ||
3715 | |||
3716 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); | ||
3717 | if (!buf) | ||
3718 | goto pinit; | ||
3719 | 4059 | ||
3720 | /* we can reuse a block if it hasn't been written | 4060 | if (block_rsv->freed[0] == 0 && |
3721 | * and it is from this transaction. We can't | 4061 | block_rsv->freed[1] == 0) { |
3722 | * reuse anything from the tree log root because | 4062 | list_del_init(&block_rsv->list); |
3723 | * it has tiny sub-transactions. | 4063 | kfree(block_rsv); |
3724 | */ | 4064 | } |
3725 | if (btrfs_buffer_uptodate(buf, 0) && | 4065 | } else { |
3726 | btrfs_try_tree_lock(buf)) { | 4066 | btrfs_block_rsv_release(root, block_rsv, 0); |
3727 | u64 header_owner = btrfs_header_owner(buf); | ||
3728 | u64 header_transid = btrfs_header_generation(buf); | ||
3729 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && | ||
3730 | header_transid == trans->transid && | ||
3731 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
3732 | *must_clean = buf; | ||
3733 | return 1; | ||
3734 | } | 4067 | } |
3735 | btrfs_tree_unlock(buf); | ||
3736 | } | 4068 | } |
3737 | free_extent_buffer(buf); | 4069 | mutex_unlock(&fs_info->durable_block_rsv_mutex); |
3738 | pinit: | ||
3739 | if (path) | ||
3740 | btrfs_set_path_blocking(path); | ||
3741 | /* unlocks the pinned mutex */ | ||
3742 | btrfs_pin_extent(root, bytenr, num_bytes, reserved); | ||
3743 | 4070 | ||
3744 | BUG_ON(err < 0); | ||
3745 | return 0; | 4071 | return 0; |
3746 | } | 4072 | } |
3747 | 4073 | ||
@@ -3902,9 +4228,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3902 | BUG_ON(ret); | 4228 | BUG_ON(ret); |
3903 | } | 4229 | } |
3904 | } else { | 4230 | } else { |
3905 | int mark_free = 0; | ||
3906 | struct extent_buffer *must_clean = NULL; | ||
3907 | |||
3908 | if (found_extent) { | 4231 | if (found_extent) { |
3909 | BUG_ON(is_data && refs_to_drop != | 4232 | BUG_ON(is_data && refs_to_drop != |
3910 | extent_data_ref_count(root, path, iref)); | 4233 | extent_data_ref_count(root, path, iref)); |
@@ -3917,31 +4240,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3917 | } | 4240 | } |
3918 | } | 4241 | } |
3919 | 4242 | ||
3920 | ret = pin_down_bytes(trans, root, path, bytenr, | ||
3921 | num_bytes, is_data, 0, &must_clean); | ||
3922 | if (ret > 0) | ||
3923 | mark_free = 1; | ||
3924 | BUG_ON(ret < 0); | ||
3925 | /* | ||
3926 | * it is going to be very rare for someone to be waiting | ||
3927 | * on the block we're freeing. del_items might need to | ||
3928 | * schedule, so rather than get fancy, just force it | ||
3929 | * to blocking here | ||
3930 | */ | ||
3931 | if (must_clean) | ||
3932 | btrfs_set_lock_blocking(must_clean); | ||
3933 | |||
3934 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 4243 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
3935 | num_to_del); | 4244 | num_to_del); |
3936 | BUG_ON(ret); | 4245 | BUG_ON(ret); |
3937 | btrfs_release_path(extent_root, path); | 4246 | btrfs_release_path(extent_root, path); |
3938 | 4247 | ||
3939 | if (must_clean) { | ||
3940 | clean_tree_block(NULL, root, must_clean); | ||
3941 | btrfs_tree_unlock(must_clean); | ||
3942 | free_extent_buffer(must_clean); | ||
3943 | } | ||
3944 | |||
3945 | if (is_data) { | 4248 | if (is_data) { |
3946 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 4249 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
3947 | BUG_ON(ret); | 4250 | BUG_ON(ret); |
@@ -3951,8 +4254,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3951 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); | 4254 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); |
3952 | } | 4255 | } |
3953 | 4256 | ||
3954 | ret = update_block_group(trans, root, bytenr, num_bytes, 0, | 4257 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); |
3955 | mark_free); | ||
3956 | BUG_ON(ret); | 4258 | BUG_ON(ret); |
3957 | } | 4259 | } |
3958 | btrfs_free_path(path); | 4260 | btrfs_free_path(path); |
@@ -3960,7 +4262,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3960 | } | 4262 | } |
3961 | 4263 | ||
3962 | /* | 4264 | /* |
3963 | * when we free an extent, it is possible (and likely) that we free the last | 4265 | * when we free an block, it is possible (and likely) that we free the last |
3964 | * delayed ref for that extent as well. This searches the delayed ref tree for | 4266 | * delayed ref for that extent as well. This searches the delayed ref tree for |
3965 | * a given extent, and if there are no other delayed refs to be processed, it | 4267 | * a given extent, and if there are no other delayed refs to be processed, it |
3966 | * removes it from the tree. | 4268 | * removes it from the tree. |
@@ -3972,7 +4274,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
3972 | struct btrfs_delayed_ref_root *delayed_refs; | 4274 | struct btrfs_delayed_ref_root *delayed_refs; |
3973 | struct btrfs_delayed_ref_node *ref; | 4275 | struct btrfs_delayed_ref_node *ref; |
3974 | struct rb_node *node; | 4276 | struct rb_node *node; |
3975 | int ret; | 4277 | int ret = 0; |
3976 | 4278 | ||
3977 | delayed_refs = &trans->transaction->delayed_refs; | 4279 | delayed_refs = &trans->transaction->delayed_refs; |
3978 | spin_lock(&delayed_refs->lock); | 4280 | spin_lock(&delayed_refs->lock); |
@@ -4024,17 +4326,99 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
4024 | list_del_init(&head->cluster); | 4326 | list_del_init(&head->cluster); |
4025 | spin_unlock(&delayed_refs->lock); | 4327 | spin_unlock(&delayed_refs->lock); |
4026 | 4328 | ||
4027 | ret = run_one_delayed_ref(trans, root->fs_info->tree_root, | 4329 | BUG_ON(head->extent_op); |
4028 | &head->node, head->extent_op, | 4330 | if (head->must_insert_reserved) |
4029 | head->must_insert_reserved); | 4331 | ret = 1; |
4030 | BUG_ON(ret); | 4332 | |
4333 | mutex_unlock(&head->mutex); | ||
4031 | btrfs_put_delayed_ref(&head->node); | 4334 | btrfs_put_delayed_ref(&head->node); |
4032 | return 0; | 4335 | return ret; |
4033 | out: | 4336 | out: |
4034 | spin_unlock(&delayed_refs->lock); | 4337 | spin_unlock(&delayed_refs->lock); |
4035 | return 0; | 4338 | return 0; |
4036 | } | 4339 | } |
4037 | 4340 | ||
4341 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | ||
4342 | struct btrfs_root *root, | ||
4343 | struct extent_buffer *buf, | ||
4344 | u64 parent, int last_ref) | ||
4345 | { | ||
4346 | struct btrfs_block_rsv *block_rsv; | ||
4347 | struct btrfs_block_group_cache *cache = NULL; | ||
4348 | int ret; | ||
4349 | |||
4350 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
4351 | ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, | ||
4352 | parent, root->root_key.objectid, | ||
4353 | btrfs_header_level(buf), | ||
4354 | BTRFS_DROP_DELAYED_REF, NULL); | ||
4355 | BUG_ON(ret); | ||
4356 | } | ||
4357 | |||
4358 | if (!last_ref) | ||
4359 | return; | ||
4360 | |||
4361 | block_rsv = get_block_rsv(trans, root); | ||
4362 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); | ||
4363 | BUG_ON(block_rsv->space_info != cache->space_info); | ||
4364 | |||
4365 | if (btrfs_header_generation(buf) == trans->transid) { | ||
4366 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
4367 | ret = check_ref_cleanup(trans, root, buf->start); | ||
4368 | if (!ret) | ||
4369 | goto pin; | ||
4370 | } | ||
4371 | |||
4372 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
4373 | pin_down_extent(root, cache, buf->start, buf->len, 1); | ||
4374 | goto pin; | ||
4375 | } | ||
4376 | |||
4377 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | ||
4378 | |||
4379 | btrfs_add_free_space(cache, buf->start, buf->len); | ||
4380 | ret = update_reserved_bytes(cache, buf->len, 0, 0); | ||
4381 | if (ret == -EAGAIN) { | ||
4382 | /* block group became read-only */ | ||
4383 | update_reserved_bytes(cache, buf->len, 0, 1); | ||
4384 | goto out; | ||
4385 | } | ||
4386 | |||
4387 | ret = 1; | ||
4388 | spin_lock(&block_rsv->lock); | ||
4389 | if (block_rsv->reserved < block_rsv->size) { | ||
4390 | block_rsv->reserved += buf->len; | ||
4391 | ret = 0; | ||
4392 | } | ||
4393 | spin_unlock(&block_rsv->lock); | ||
4394 | |||
4395 | if (ret) { | ||
4396 | spin_lock(&cache->space_info->lock); | ||
4397 | cache->space_info->bytes_reserved -= buf->len; | ||
4398 | spin_unlock(&cache->space_info->lock); | ||
4399 | } | ||
4400 | goto out; | ||
4401 | } | ||
4402 | pin: | ||
4403 | if (block_rsv->durable && !cache->ro) { | ||
4404 | ret = 0; | ||
4405 | spin_lock(&cache->lock); | ||
4406 | if (!cache->ro) { | ||
4407 | cache->reserved_pinned += buf->len; | ||
4408 | ret = 1; | ||
4409 | } | ||
4410 | spin_unlock(&cache->lock); | ||
4411 | |||
4412 | if (ret) { | ||
4413 | spin_lock(&block_rsv->lock); | ||
4414 | block_rsv->freed[trans->transid & 0x1] += buf->len; | ||
4415 | spin_unlock(&block_rsv->lock); | ||
4416 | } | ||
4417 | } | ||
4418 | out: | ||
4419 | btrfs_put_block_group(cache); | ||
4420 | } | ||
4421 | |||
4038 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 4422 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
4039 | struct btrfs_root *root, | 4423 | struct btrfs_root *root, |
4040 | u64 bytenr, u64 num_bytes, u64 parent, | 4424 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -4056,8 +4440,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4056 | parent, root_objectid, (int)owner, | 4440 | parent, root_objectid, (int)owner, |
4057 | BTRFS_DROP_DELAYED_REF, NULL); | 4441 | BTRFS_DROP_DELAYED_REF, NULL); |
4058 | BUG_ON(ret); | 4442 | BUG_ON(ret); |
4059 | ret = check_ref_cleanup(trans, root, bytenr); | ||
4060 | BUG_ON(ret); | ||
4061 | } else { | 4443 | } else { |
4062 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, | 4444 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, |
4063 | parent, root_objectid, owner, | 4445 | parent, root_objectid, owner, |
@@ -4067,21 +4449,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4067 | return ret; | 4449 | return ret; |
4068 | } | 4450 | } |
4069 | 4451 | ||
4070 | int btrfs_free_tree_block(struct btrfs_trans_handle *trans, | ||
4071 | struct btrfs_root *root, | ||
4072 | u64 bytenr, u32 blocksize, | ||
4073 | u64 parent, u64 root_objectid, int level) | ||
4074 | { | ||
4075 | u64 used; | ||
4076 | spin_lock(&root->node_lock); | ||
4077 | used = btrfs_root_used(&root->root_item) - blocksize; | ||
4078 | btrfs_set_root_used(&root->root_item, used); | ||
4079 | spin_unlock(&root->node_lock); | ||
4080 | |||
4081 | return btrfs_free_extent(trans, root, bytenr, blocksize, | ||
4082 | parent, root_objectid, level, 0); | ||
4083 | } | ||
4084 | |||
4085 | static u64 stripe_align(struct btrfs_root *root, u64 val) | 4452 | static u64 stripe_align(struct btrfs_root *root, u64 val) |
4086 | { | 4453 | { |
4087 | u64 mask = ((u64)root->stripesize - 1); | 4454 | u64 mask = ((u64)root->stripesize - 1); |
@@ -4134,6 +4501,22 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
4134 | return 0; | 4501 | return 0; |
4135 | } | 4502 | } |
4136 | 4503 | ||
4504 | static int get_block_group_index(struct btrfs_block_group_cache *cache) | ||
4505 | { | ||
4506 | int index; | ||
4507 | if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) | ||
4508 | index = 0; | ||
4509 | else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) | ||
4510 | index = 1; | ||
4511 | else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) | ||
4512 | index = 2; | ||
4513 | else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) | ||
4514 | index = 3; | ||
4515 | else | ||
4516 | index = 4; | ||
4517 | return index; | ||
4518 | } | ||
4519 | |||
4137 | enum btrfs_loop_type { | 4520 | enum btrfs_loop_type { |
4138 | LOOP_FIND_IDEAL = 0, | 4521 | LOOP_FIND_IDEAL = 0, |
4139 | LOOP_CACHING_NOWAIT = 1, | 4522 | LOOP_CACHING_NOWAIT = 1, |
@@ -4155,7 +4538,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4155 | u64 num_bytes, u64 empty_size, | 4538 | u64 num_bytes, u64 empty_size, |
4156 | u64 search_start, u64 search_end, | 4539 | u64 search_start, u64 search_end, |
4157 | u64 hint_byte, struct btrfs_key *ins, | 4540 | u64 hint_byte, struct btrfs_key *ins, |
4158 | u64 exclude_start, u64 exclude_nr, | ||
4159 | int data) | 4541 | int data) |
4160 | { | 4542 | { |
4161 | int ret = 0; | 4543 | int ret = 0; |
@@ -4168,6 +4550,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4168 | struct btrfs_space_info *space_info; | 4550 | struct btrfs_space_info *space_info; |
4169 | int last_ptr_loop = 0; | 4551 | int last_ptr_loop = 0; |
4170 | int loop = 0; | 4552 | int loop = 0; |
4553 | int index = 0; | ||
4171 | bool found_uncached_bg = false; | 4554 | bool found_uncached_bg = false; |
4172 | bool failed_cluster_refill = false; | 4555 | bool failed_cluster_refill = false; |
4173 | bool failed_alloc = false; | 4556 | bool failed_alloc = false; |
@@ -4237,6 +4620,7 @@ ideal_cache: | |||
4237 | btrfs_put_block_group(block_group); | 4620 | btrfs_put_block_group(block_group); |
4238 | up_read(&space_info->groups_sem); | 4621 | up_read(&space_info->groups_sem); |
4239 | } else { | 4622 | } else { |
4623 | index = get_block_group_index(block_group); | ||
4240 | goto have_block_group; | 4624 | goto have_block_group; |
4241 | } | 4625 | } |
4242 | } else if (block_group) { | 4626 | } else if (block_group) { |
@@ -4245,7 +4629,8 @@ ideal_cache: | |||
4245 | } | 4629 | } |
4246 | search: | 4630 | search: |
4247 | down_read(&space_info->groups_sem); | 4631 | down_read(&space_info->groups_sem); |
4248 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 4632 | list_for_each_entry(block_group, &space_info->block_groups[index], |
4633 | list) { | ||
4249 | u64 offset; | 4634 | u64 offset; |
4250 | int cached; | 4635 | int cached; |
4251 | 4636 | ||
@@ -4436,23 +4821,22 @@ checks: | |||
4436 | goto loop; | 4821 | goto loop; |
4437 | } | 4822 | } |
4438 | 4823 | ||
4439 | if (exclude_nr > 0 && | 4824 | ins->objectid = search_start; |
4440 | (search_start + num_bytes > exclude_start && | 4825 | ins->offset = num_bytes; |
4441 | search_start < exclude_start + exclude_nr)) { | 4826 | |
4442 | search_start = exclude_start + exclude_nr; | 4827 | if (offset < search_start) |
4828 | btrfs_add_free_space(block_group, offset, | ||
4829 | search_start - offset); | ||
4830 | BUG_ON(offset > search_start); | ||
4443 | 4831 | ||
4832 | ret = update_reserved_bytes(block_group, num_bytes, 1, | ||
4833 | (data & BTRFS_BLOCK_GROUP_DATA)); | ||
4834 | if (ret == -EAGAIN) { | ||
4444 | btrfs_add_free_space(block_group, offset, num_bytes); | 4835 | btrfs_add_free_space(block_group, offset, num_bytes); |
4445 | /* | ||
4446 | * if search_start is still in this block group | ||
4447 | * then we just re-search this block group | ||
4448 | */ | ||
4449 | if (search_start >= block_group->key.objectid && | ||
4450 | search_start < (block_group->key.objectid + | ||
4451 | block_group->key.offset)) | ||
4452 | goto have_block_group; | ||
4453 | goto loop; | 4836 | goto loop; |
4454 | } | 4837 | } |
4455 | 4838 | ||
4839 | /* we are all good, lets return */ | ||
4456 | ins->objectid = search_start; | 4840 | ins->objectid = search_start; |
4457 | ins->offset = num_bytes; | 4841 | ins->offset = num_bytes; |
4458 | 4842 | ||
@@ -4460,18 +4844,18 @@ checks: | |||
4460 | btrfs_add_free_space(block_group, offset, | 4844 | btrfs_add_free_space(block_group, offset, |
4461 | search_start - offset); | 4845 | search_start - offset); |
4462 | BUG_ON(offset > search_start); | 4846 | BUG_ON(offset > search_start); |
4463 | |||
4464 | update_reserved_extents(block_group, num_bytes, 1); | ||
4465 | |||
4466 | /* we are all good, lets return */ | ||
4467 | break; | 4847 | break; |
4468 | loop: | 4848 | loop: |
4469 | failed_cluster_refill = false; | 4849 | failed_cluster_refill = false; |
4470 | failed_alloc = false; | 4850 | failed_alloc = false; |
4851 | BUG_ON(index != get_block_group_index(block_group)); | ||
4471 | btrfs_put_block_group(block_group); | 4852 | btrfs_put_block_group(block_group); |
4472 | } | 4853 | } |
4473 | up_read(&space_info->groups_sem); | 4854 | up_read(&space_info->groups_sem); |
4474 | 4855 | ||
4856 | if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) | ||
4857 | goto search; | ||
4858 | |||
4475 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for | 4859 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for |
4476 | * for them to make caching progress. Also | 4860 | * for them to make caching progress. Also |
4477 | * determine the best possible bg to cache | 4861 | * determine the best possible bg to cache |
@@ -4485,6 +4869,7 @@ loop: | |||
4485 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 4869 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
4486 | (found_uncached_bg || empty_size || empty_cluster || | 4870 | (found_uncached_bg || empty_size || empty_cluster || |
4487 | allowed_chunk_alloc)) { | 4871 | allowed_chunk_alloc)) { |
4872 | index = 0; | ||
4488 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 4873 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
4489 | found_uncached_bg = false; | 4874 | found_uncached_bg = false; |
4490 | loop++; | 4875 | loop++; |
@@ -4567,31 +4952,30 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
4567 | int dump_block_groups) | 4952 | int dump_block_groups) |
4568 | { | 4953 | { |
4569 | struct btrfs_block_group_cache *cache; | 4954 | struct btrfs_block_group_cache *cache; |
4955 | int index = 0; | ||
4570 | 4956 | ||
4571 | spin_lock(&info->lock); | 4957 | spin_lock(&info->lock); |
4572 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4958 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
4573 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4959 | (unsigned long long)(info->total_bytes - info->bytes_used - |
4574 | info->bytes_pinned - info->bytes_reserved - | 4960 | info->bytes_pinned - info->bytes_reserved - |
4575 | info->bytes_super), | 4961 | info->bytes_readonly), |
4576 | (info->full) ? "" : "not "); | 4962 | (info->full) ? "" : "not "); |
4577 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4963 | printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " |
4578 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" | 4964 | "reserved=%llu, may_use=%llu, readonly=%llu\n", |
4579 | "\n", | ||
4580 | (unsigned long long)info->total_bytes, | 4965 | (unsigned long long)info->total_bytes, |
4966 | (unsigned long long)info->bytes_used, | ||
4581 | (unsigned long long)info->bytes_pinned, | 4967 | (unsigned long long)info->bytes_pinned, |
4582 | (unsigned long long)info->bytes_delalloc, | 4968 | (unsigned long long)info->bytes_reserved, |
4583 | (unsigned long long)info->bytes_may_use, | 4969 | (unsigned long long)info->bytes_may_use, |
4584 | (unsigned long long)info->bytes_used, | 4970 | (unsigned long long)info->bytes_readonly); |
4585 | (unsigned long long)info->bytes_root, | ||
4586 | (unsigned long long)info->bytes_super, | ||
4587 | (unsigned long long)info->bytes_reserved); | ||
4588 | spin_unlock(&info->lock); | 4971 | spin_unlock(&info->lock); |
4589 | 4972 | ||
4590 | if (!dump_block_groups) | 4973 | if (!dump_block_groups) |
4591 | return; | 4974 | return; |
4592 | 4975 | ||
4593 | down_read(&info->groups_sem); | 4976 | down_read(&info->groups_sem); |
4594 | list_for_each_entry(cache, &info->block_groups, list) { | 4977 | again: |
4978 | list_for_each_entry(cache, &info->block_groups[index], list) { | ||
4595 | spin_lock(&cache->lock); | 4979 | spin_lock(&cache->lock); |
4596 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " | 4980 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " |
4597 | "%llu pinned %llu reserved\n", | 4981 | "%llu pinned %llu reserved\n", |
@@ -4603,6 +4987,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
4603 | btrfs_dump_free_space(cache, bytes); | 4987 | btrfs_dump_free_space(cache, bytes); |
4604 | spin_unlock(&cache->lock); | 4988 | spin_unlock(&cache->lock); |
4605 | } | 4989 | } |
4990 | if (++index < BTRFS_NR_RAID_TYPES) | ||
4991 | goto again; | ||
4606 | up_read(&info->groups_sem); | 4992 | up_read(&info->groups_sem); |
4607 | } | 4993 | } |
4608 | 4994 | ||
@@ -4628,9 +5014,8 @@ again: | |||
4628 | 5014 | ||
4629 | WARN_ON(num_bytes < root->sectorsize); | 5015 | WARN_ON(num_bytes < root->sectorsize); |
4630 | ret = find_free_extent(trans, root, num_bytes, empty_size, | 5016 | ret = find_free_extent(trans, root, num_bytes, empty_size, |
4631 | search_start, search_end, hint_byte, ins, | 5017 | search_start, search_end, hint_byte, |
4632 | trans->alloc_exclude_start, | 5018 | ins, data); |
4633 | trans->alloc_exclude_nr, data); | ||
4634 | 5019 | ||
4635 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { | 5020 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { |
4636 | num_bytes = num_bytes >> 1; | 5021 | num_bytes = num_bytes >> 1; |
@@ -4668,7 +5053,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
4668 | ret = btrfs_discard_extent(root, start, len); | 5053 | ret = btrfs_discard_extent(root, start, len); |
4669 | 5054 | ||
4670 | btrfs_add_free_space(cache, start, len); | 5055 | btrfs_add_free_space(cache, start, len); |
4671 | update_reserved_extents(cache, len, 0); | 5056 | update_reserved_bytes(cache, len, 0, 1); |
4672 | btrfs_put_block_group(cache); | 5057 | btrfs_put_block_group(cache); |
4673 | 5058 | ||
4674 | return ret; | 5059 | return ret; |
@@ -4731,8 +5116,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
4731 | btrfs_mark_buffer_dirty(path->nodes[0]); | 5116 | btrfs_mark_buffer_dirty(path->nodes[0]); |
4732 | btrfs_free_path(path); | 5117 | btrfs_free_path(path); |
4733 | 5118 | ||
4734 | ret = update_block_group(trans, root, ins->objectid, ins->offset, | 5119 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); |
4735 | 1, 0); | ||
4736 | if (ret) { | 5120 | if (ret) { |
4737 | printk(KERN_ERR "btrfs update block group failed for %llu " | 5121 | printk(KERN_ERR "btrfs update block group failed for %llu " |
4738 | "%llu\n", (unsigned long long)ins->objectid, | 5122 | "%llu\n", (unsigned long long)ins->objectid, |
@@ -4792,8 +5176,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
4792 | btrfs_mark_buffer_dirty(leaf); | 5176 | btrfs_mark_buffer_dirty(leaf); |
4793 | btrfs_free_path(path); | 5177 | btrfs_free_path(path); |
4794 | 5178 | ||
4795 | ret = update_block_group(trans, root, ins->objectid, ins->offset, | 5179 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); |
4796 | 1, 0); | ||
4797 | if (ret) { | 5180 | if (ret) { |
4798 | printk(KERN_ERR "btrfs update block group failed for %llu " | 5181 | printk(KERN_ERR "btrfs update block group failed for %llu " |
4799 | "%llu\n", (unsigned long long)ins->objectid, | 5182 | "%llu\n", (unsigned long long)ins->objectid, |
@@ -4869,73 +5252,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
4869 | put_caching_control(caching_ctl); | 5252 | put_caching_control(caching_ctl); |
4870 | } | 5253 | } |
4871 | 5254 | ||
4872 | update_reserved_extents(block_group, ins->offset, 1); | 5255 | ret = update_reserved_bytes(block_group, ins->offset, 1, 1); |
5256 | BUG_ON(ret); | ||
4873 | btrfs_put_block_group(block_group); | 5257 | btrfs_put_block_group(block_group); |
4874 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5258 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
4875 | 0, owner, offset, ins, 1); | 5259 | 0, owner, offset, ins, 1); |
4876 | return ret; | 5260 | return ret; |
4877 | } | 5261 | } |
4878 | 5262 | ||
4879 | /* | ||
4880 | * finds a free extent and does all the dirty work required for allocation | ||
4881 | * returns the key for the extent through ins, and a tree buffer for | ||
4882 | * the first block of the extent through buf. | ||
4883 | * | ||
4884 | * returns 0 if everything worked, non-zero otherwise. | ||
4885 | */ | ||
4886 | static int alloc_tree_block(struct btrfs_trans_handle *trans, | ||
4887 | struct btrfs_root *root, | ||
4888 | u64 num_bytes, u64 parent, u64 root_objectid, | ||
4889 | struct btrfs_disk_key *key, int level, | ||
4890 | u64 empty_size, u64 hint_byte, u64 search_end, | ||
4891 | struct btrfs_key *ins) | ||
4892 | { | ||
4893 | int ret; | ||
4894 | u64 flags = 0; | ||
4895 | |||
4896 | ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | ||
4897 | empty_size, hint_byte, search_end, | ||
4898 | ins, 0); | ||
4899 | if (ret) | ||
4900 | return ret; | ||
4901 | |||
4902 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
4903 | if (parent == 0) | ||
4904 | parent = ins->objectid; | ||
4905 | flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
4906 | } else | ||
4907 | BUG_ON(parent > 0); | ||
4908 | |||
4909 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
4910 | struct btrfs_delayed_extent_op *extent_op; | ||
4911 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
4912 | BUG_ON(!extent_op); | ||
4913 | if (key) | ||
4914 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | ||
4915 | else | ||
4916 | memset(&extent_op->key, 0, sizeof(extent_op->key)); | ||
4917 | extent_op->flags_to_set = flags; | ||
4918 | extent_op->update_key = 1; | ||
4919 | extent_op->update_flags = 1; | ||
4920 | extent_op->is_data = 0; | ||
4921 | |||
4922 | ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, | ||
4923 | ins->offset, parent, root_objectid, | ||
4924 | level, BTRFS_ADD_DELAYED_EXTENT, | ||
4925 | extent_op); | ||
4926 | BUG_ON(ret); | ||
4927 | } | ||
4928 | |||
4929 | if (root_objectid == root->root_key.objectid) { | ||
4930 | u64 used; | ||
4931 | spin_lock(&root->node_lock); | ||
4932 | used = btrfs_root_used(&root->root_item) + num_bytes; | ||
4933 | btrfs_set_root_used(&root->root_item, used); | ||
4934 | spin_unlock(&root->node_lock); | ||
4935 | } | ||
4936 | return ret; | ||
4937 | } | ||
4938 | |||
4939 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 5263 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
4940 | struct btrfs_root *root, | 5264 | struct btrfs_root *root, |
4941 | u64 bytenr, u32 blocksize, | 5265 | u64 bytenr, u32 blocksize, |
@@ -4974,8 +5298,45 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
4974 | return buf; | 5298 | return buf; |
4975 | } | 5299 | } |
4976 | 5300 | ||
5301 | static struct btrfs_block_rsv * | ||
5302 | use_block_rsv(struct btrfs_trans_handle *trans, | ||
5303 | struct btrfs_root *root, u32 blocksize) | ||
5304 | { | ||
5305 | struct btrfs_block_rsv *block_rsv; | ||
5306 | int ret; | ||
5307 | |||
5308 | block_rsv = get_block_rsv(trans, root); | ||
5309 | |||
5310 | if (block_rsv->size == 0) { | ||
5311 | ret = reserve_metadata_bytes(block_rsv, blocksize); | ||
5312 | if (ret) | ||
5313 | return ERR_PTR(ret); | ||
5314 | return block_rsv; | ||
5315 | } | ||
5316 | |||
5317 | ret = block_rsv_use_bytes(block_rsv, blocksize); | ||
5318 | if (!ret) | ||
5319 | return block_rsv; | ||
5320 | |||
5321 | WARN_ON(1); | ||
5322 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
5323 | block_rsv->size, block_rsv->reserved, | ||
5324 | block_rsv->freed[0], block_rsv->freed[1]); | ||
5325 | |||
5326 | return ERR_PTR(-ENOSPC); | ||
5327 | } | ||
5328 | |||
5329 | static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize) | ||
5330 | { | ||
5331 | block_rsv_add_bytes(block_rsv, blocksize, 0); | ||
5332 | block_rsv_release_bytes(block_rsv, NULL, 0); | ||
5333 | } | ||
5334 | |||
4977 | /* | 5335 | /* |
4978 | * helper function to allocate a block for a given tree | 5336 | * finds a free extent and does all the dirty work required for allocation |
5337 | * returns the key for the extent through ins, and a tree buffer for | ||
5338 | * the first block of the extent through buf. | ||
5339 | * | ||
4979 | * returns the tree buffer or NULL. | 5340 | * returns the tree buffer or NULL. |
4980 | */ | 5341 | */ |
4981 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | 5342 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, |
@@ -4985,18 +5346,53 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
4985 | u64 hint, u64 empty_size) | 5346 | u64 hint, u64 empty_size) |
4986 | { | 5347 | { |
4987 | struct btrfs_key ins; | 5348 | struct btrfs_key ins; |
4988 | int ret; | 5349 | struct btrfs_block_rsv *block_rsv; |
4989 | struct extent_buffer *buf; | 5350 | struct extent_buffer *buf; |
5351 | u64 flags = 0; | ||
5352 | int ret; | ||
5353 | |||
4990 | 5354 | ||
4991 | ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, | 5355 | block_rsv = use_block_rsv(trans, root, blocksize); |
4992 | key, level, empty_size, hint, (u64)-1, &ins); | 5356 | if (IS_ERR(block_rsv)) |
5357 | return ERR_CAST(block_rsv); | ||
5358 | |||
5359 | ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, | ||
5360 | empty_size, hint, (u64)-1, &ins, 0); | ||
4993 | if (ret) { | 5361 | if (ret) { |
4994 | BUG_ON(ret > 0); | 5362 | unuse_block_rsv(block_rsv, blocksize); |
4995 | return ERR_PTR(ret); | 5363 | return ERR_PTR(ret); |
4996 | } | 5364 | } |
4997 | 5365 | ||
4998 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, | 5366 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, |
4999 | blocksize, level); | 5367 | blocksize, level); |
5368 | BUG_ON(IS_ERR(buf)); | ||
5369 | |||
5370 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
5371 | if (parent == 0) | ||
5372 | parent = ins.objectid; | ||
5373 | flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
5374 | } else | ||
5375 | BUG_ON(parent > 0); | ||
5376 | |||
5377 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
5378 | struct btrfs_delayed_extent_op *extent_op; | ||
5379 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
5380 | BUG_ON(!extent_op); | ||
5381 | if (key) | ||
5382 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | ||
5383 | else | ||
5384 | memset(&extent_op->key, 0, sizeof(extent_op->key)); | ||
5385 | extent_op->flags_to_set = flags; | ||
5386 | extent_op->update_key = 1; | ||
5387 | extent_op->update_flags = 1; | ||
5388 | extent_op->is_data = 0; | ||
5389 | |||
5390 | ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, | ||
5391 | ins.offset, parent, root_objectid, | ||
5392 | level, BTRFS_ADD_DELAYED_EXTENT, | ||
5393 | extent_op); | ||
5394 | BUG_ON(ret); | ||
5395 | } | ||
5000 | return buf; | 5396 | return buf; |
5001 | } | 5397 | } |
5002 | 5398 | ||
@@ -5321,7 +5717,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
5321 | struct btrfs_path *path, | 5717 | struct btrfs_path *path, |
5322 | struct walk_control *wc) | 5718 | struct walk_control *wc) |
5323 | { | 5719 | { |
5324 | int ret = 0; | 5720 | int ret; |
5325 | int level = wc->level; | 5721 | int level = wc->level; |
5326 | struct extent_buffer *eb = path->nodes[level]; | 5722 | struct extent_buffer *eb = path->nodes[level]; |
5327 | u64 parent = 0; | 5723 | u64 parent = 0; |
@@ -5399,13 +5795,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
5399 | btrfs_header_owner(path->nodes[level + 1])); | 5795 | btrfs_header_owner(path->nodes[level + 1])); |
5400 | } | 5796 | } |
5401 | 5797 | ||
5402 | ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, | 5798 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); |
5403 | root->root_key.objectid, level, 0); | ||
5404 | BUG_ON(ret); | ||
5405 | out: | 5799 | out: |
5406 | wc->refs[level] = 0; | 5800 | wc->refs[level] = 0; |
5407 | wc->flags[level] = 0; | 5801 | wc->flags[level] = 0; |
5408 | return ret; | 5802 | return 0; |
5409 | } | 5803 | } |
5410 | 5804 | ||
5411 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | 5805 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, |
@@ -5483,7 +5877,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
5483 | * also make sure backrefs for the shared block and all lower level | 5877 | * also make sure backrefs for the shared block and all lower level |
5484 | * blocks are properly updated. | 5878 | * blocks are properly updated. |
5485 | */ | 5879 | */ |
5486 | int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | 5880 | int btrfs_drop_snapshot(struct btrfs_root *root, |
5881 | struct btrfs_block_rsv *block_rsv, int update_ref) | ||
5487 | { | 5882 | { |
5488 | struct btrfs_path *path; | 5883 | struct btrfs_path *path; |
5489 | struct btrfs_trans_handle *trans; | 5884 | struct btrfs_trans_handle *trans; |
@@ -5501,7 +5896,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5501 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 5896 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
5502 | BUG_ON(!wc); | 5897 | BUG_ON(!wc); |
5503 | 5898 | ||
5504 | trans = btrfs_start_transaction(tree_root, 1); | 5899 | trans = btrfs_start_transaction(tree_root, 0); |
5900 | if (block_rsv) | ||
5901 | trans->block_rsv = block_rsv; | ||
5505 | 5902 | ||
5506 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { | 5903 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { |
5507 | level = btrfs_header_level(root->node); | 5904 | level = btrfs_header_level(root->node); |
@@ -5589,22 +5986,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5589 | } | 5986 | } |
5590 | 5987 | ||
5591 | BUG_ON(wc->level == 0); | 5988 | BUG_ON(wc->level == 0); |
5592 | if (trans->transaction->in_commit || | 5989 | if (btrfs_should_end_transaction(trans, tree_root)) { |
5593 | trans->transaction->delayed_refs.flushing) { | ||
5594 | ret = btrfs_update_root(trans, tree_root, | 5990 | ret = btrfs_update_root(trans, tree_root, |
5595 | &root->root_key, | 5991 | &root->root_key, |
5596 | root_item); | 5992 | root_item); |
5597 | BUG_ON(ret); | 5993 | BUG_ON(ret); |
5598 | 5994 | ||
5599 | btrfs_end_transaction(trans, tree_root); | 5995 | btrfs_end_transaction_throttle(trans, tree_root); |
5600 | trans = btrfs_start_transaction(tree_root, 1); | 5996 | trans = btrfs_start_transaction(tree_root, 0); |
5601 | } else { | 5997 | if (block_rsv) |
5602 | unsigned long update; | 5998 | trans->block_rsv = block_rsv; |
5603 | update = trans->delayed_ref_updates; | ||
5604 | trans->delayed_ref_updates = 0; | ||
5605 | if (update) | ||
5606 | btrfs_run_delayed_refs(trans, tree_root, | ||
5607 | update); | ||
5608 | } | 5999 | } |
5609 | } | 6000 | } |
5610 | btrfs_release_path(root, path); | 6001 | btrfs_release_path(root, path); |
@@ -5632,7 +6023,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5632 | kfree(root); | 6023 | kfree(root); |
5633 | } | 6024 | } |
5634 | out: | 6025 | out: |
5635 | btrfs_end_transaction(trans, tree_root); | 6026 | btrfs_end_transaction_throttle(trans, tree_root); |
5636 | kfree(wc); | 6027 | kfree(wc); |
5637 | btrfs_free_path(path); | 6028 | btrfs_free_path(path); |
5638 | return err; | 6029 | return err; |
@@ -7228,48 +7619,80 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
7228 | return flags; | 7619 | return flags; |
7229 | } | 7620 | } |
7230 | 7621 | ||
7231 | static int __alloc_chunk_for_shrink(struct btrfs_root *root, | 7622 | static int set_block_group_ro(struct btrfs_block_group_cache *cache) |
7232 | struct btrfs_block_group_cache *shrink_block_group, | ||
7233 | int force) | ||
7234 | { | 7623 | { |
7235 | struct btrfs_trans_handle *trans; | 7624 | struct btrfs_space_info *sinfo = cache->space_info; |
7236 | u64 new_alloc_flags; | 7625 | u64 num_bytes; |
7237 | u64 calc; | 7626 | int ret = -ENOSPC; |
7238 | 7627 | ||
7239 | spin_lock(&shrink_block_group->lock); | 7628 | if (cache->ro) |
7240 | if (btrfs_block_group_used(&shrink_block_group->item) + | 7629 | return 0; |
7241 | shrink_block_group->reserved > 0) { | ||
7242 | spin_unlock(&shrink_block_group->lock); | ||
7243 | 7630 | ||
7244 | trans = btrfs_start_transaction(root, 1); | 7631 | spin_lock(&sinfo->lock); |
7245 | spin_lock(&shrink_block_group->lock); | 7632 | spin_lock(&cache->lock); |
7633 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | ||
7634 | cache->bytes_super - btrfs_block_group_used(&cache->item); | ||
7635 | |||
7636 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | ||
7637 | sinfo->bytes_may_use + sinfo->bytes_readonly + | ||
7638 | cache->reserved_pinned + num_bytes < sinfo->total_bytes) { | ||
7639 | sinfo->bytes_readonly += num_bytes; | ||
7640 | sinfo->bytes_reserved += cache->reserved_pinned; | ||
7641 | cache->reserved_pinned = 0; | ||
7642 | cache->ro = 1; | ||
7643 | ret = 0; | ||
7644 | } | ||
7645 | spin_unlock(&cache->lock); | ||
7646 | spin_unlock(&sinfo->lock); | ||
7647 | return ret; | ||
7648 | } | ||
7246 | 7649 | ||
7247 | new_alloc_flags = update_block_group_flags(root, | 7650 | int btrfs_set_block_group_ro(struct btrfs_root *root, |
7248 | shrink_block_group->flags); | 7651 | struct btrfs_block_group_cache *cache) |
7249 | if (new_alloc_flags != shrink_block_group->flags) { | ||
7250 | calc = | ||
7251 | btrfs_block_group_used(&shrink_block_group->item); | ||
7252 | } else { | ||
7253 | calc = shrink_block_group->key.offset; | ||
7254 | } | ||
7255 | spin_unlock(&shrink_block_group->lock); | ||
7256 | 7652 | ||
7257 | do_chunk_alloc(trans, root->fs_info->extent_root, | 7653 | { |
7258 | calc + 2 * 1024 * 1024, new_alloc_flags, force); | 7654 | struct btrfs_trans_handle *trans; |
7655 | u64 alloc_flags; | ||
7656 | int ret; | ||
7259 | 7657 | ||
7260 | btrfs_end_transaction(trans, root); | 7658 | BUG_ON(cache->ro); |
7261 | } else | 7659 | |
7262 | spin_unlock(&shrink_block_group->lock); | 7660 | trans = btrfs_join_transaction(root, 1); |
7263 | return 0; | 7661 | BUG_ON(IS_ERR(trans)); |
7264 | } | ||
7265 | 7662 | ||
7663 | alloc_flags = update_block_group_flags(root, cache->flags); | ||
7664 | if (alloc_flags != cache->flags) | ||
7665 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | ||
7266 | 7666 | ||
7267 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | 7667 | ret = set_block_group_ro(cache); |
7268 | struct btrfs_block_group_cache *group) | 7668 | if (!ret) |
7669 | goto out; | ||
7670 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | ||
7671 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | ||
7672 | if (ret < 0) | ||
7673 | goto out; | ||
7674 | ret = set_block_group_ro(cache); | ||
7675 | out: | ||
7676 | btrfs_end_transaction(trans, root); | ||
7677 | return ret; | ||
7678 | } | ||
7269 | 7679 | ||
7680 | int btrfs_set_block_group_rw(struct btrfs_root *root, | ||
7681 | struct btrfs_block_group_cache *cache) | ||
7270 | { | 7682 | { |
7271 | __alloc_chunk_for_shrink(root, group, 1); | 7683 | struct btrfs_space_info *sinfo = cache->space_info; |
7272 | set_block_group_readonly(group); | 7684 | u64 num_bytes; |
7685 | |||
7686 | BUG_ON(!cache->ro); | ||
7687 | |||
7688 | spin_lock(&sinfo->lock); | ||
7689 | spin_lock(&cache->lock); | ||
7690 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | ||
7691 | cache->bytes_super - btrfs_block_group_used(&cache->item); | ||
7692 | sinfo->bytes_readonly -= num_bytes; | ||
7693 | cache->ro = 0; | ||
7694 | spin_unlock(&cache->lock); | ||
7695 | spin_unlock(&sinfo->lock); | ||
7273 | return 0; | 7696 | return 0; |
7274 | } | 7697 | } |
7275 | 7698 | ||
@@ -7436,17 +7859,33 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7436 | */ | 7859 | */ |
7437 | synchronize_rcu(); | 7860 | synchronize_rcu(); |
7438 | 7861 | ||
7862 | release_global_block_rsv(info); | ||
7863 | |||
7439 | while(!list_empty(&info->space_info)) { | 7864 | while(!list_empty(&info->space_info)) { |
7440 | space_info = list_entry(info->space_info.next, | 7865 | space_info = list_entry(info->space_info.next, |
7441 | struct btrfs_space_info, | 7866 | struct btrfs_space_info, |
7442 | list); | 7867 | list); |
7443 | 7868 | if (space_info->bytes_pinned > 0 || | |
7869 | space_info->bytes_reserved > 0) { | ||
7870 | WARN_ON(1); | ||
7871 | dump_space_info(space_info, 0, 0); | ||
7872 | } | ||
7444 | list_del(&space_info->list); | 7873 | list_del(&space_info->list); |
7445 | kfree(space_info); | 7874 | kfree(space_info); |
7446 | } | 7875 | } |
7447 | return 0; | 7876 | return 0; |
7448 | } | 7877 | } |
7449 | 7878 | ||
7879 | static void __link_block_group(struct btrfs_space_info *space_info, | ||
7880 | struct btrfs_block_group_cache *cache) | ||
7881 | { | ||
7882 | int index = get_block_group_index(cache); | ||
7883 | |||
7884 | down_write(&space_info->groups_sem); | ||
7885 | list_add_tail(&cache->list, &space_info->block_groups[index]); | ||
7886 | up_write(&space_info->groups_sem); | ||
7887 | } | ||
7888 | |||
7450 | int btrfs_read_block_groups(struct btrfs_root *root) | 7889 | int btrfs_read_block_groups(struct btrfs_root *root) |
7451 | { | 7890 | { |
7452 | struct btrfs_path *path; | 7891 | struct btrfs_path *path; |
@@ -7468,10 +7907,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7468 | 7907 | ||
7469 | while (1) { | 7908 | while (1) { |
7470 | ret = find_first_block_group(root, path, &key); | 7909 | ret = find_first_block_group(root, path, &key); |
7471 | if (ret > 0) { | 7910 | if (ret > 0) |
7472 | ret = 0; | 7911 | break; |
7473 | goto error; | ||
7474 | } | ||
7475 | if (ret != 0) | 7912 | if (ret != 0) |
7476 | goto error; | 7913 | goto error; |
7477 | 7914 | ||
@@ -7480,7 +7917,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7480 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 7917 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
7481 | if (!cache) { | 7918 | if (!cache) { |
7482 | ret = -ENOMEM; | 7919 | ret = -ENOMEM; |
7483 | break; | 7920 | goto error; |
7484 | } | 7921 | } |
7485 | 7922 | ||
7486 | atomic_set(&cache->count, 1); | 7923 | atomic_set(&cache->count, 1); |
@@ -7537,20 +7974,36 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7537 | BUG_ON(ret); | 7974 | BUG_ON(ret); |
7538 | cache->space_info = space_info; | 7975 | cache->space_info = space_info; |
7539 | spin_lock(&cache->space_info->lock); | 7976 | spin_lock(&cache->space_info->lock); |
7540 | cache->space_info->bytes_super += cache->bytes_super; | 7977 | cache->space_info->bytes_readonly += cache->bytes_super; |
7541 | spin_unlock(&cache->space_info->lock); | 7978 | spin_unlock(&cache->space_info->lock); |
7542 | 7979 | ||
7543 | down_write(&space_info->groups_sem); | 7980 | __link_block_group(space_info, cache); |
7544 | list_add_tail(&cache->list, &space_info->block_groups); | ||
7545 | up_write(&space_info->groups_sem); | ||
7546 | 7981 | ||
7547 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 7982 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
7548 | BUG_ON(ret); | 7983 | BUG_ON(ret); |
7549 | 7984 | ||
7550 | set_avail_alloc_bits(root->fs_info, cache->flags); | 7985 | set_avail_alloc_bits(root->fs_info, cache->flags); |
7551 | if (btrfs_chunk_readonly(root, cache->key.objectid)) | 7986 | if (btrfs_chunk_readonly(root, cache->key.objectid)) |
7552 | set_block_group_readonly(cache); | 7987 | set_block_group_ro(cache); |
7553 | } | 7988 | } |
7989 | |||
7990 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { | ||
7991 | if (!(get_alloc_profile(root, space_info->flags) & | ||
7992 | (BTRFS_BLOCK_GROUP_RAID10 | | ||
7993 | BTRFS_BLOCK_GROUP_RAID1 | | ||
7994 | BTRFS_BLOCK_GROUP_DUP))) | ||
7995 | continue; | ||
7996 | /* | ||
7997 | * avoid allocating from un-mirrored block group if there are | ||
7998 | * mirrored block groups. | ||
7999 | */ | ||
8000 | list_for_each_entry(cache, &space_info->block_groups[3], list) | ||
8001 | set_block_group_ro(cache); | ||
8002 | list_for_each_entry(cache, &space_info->block_groups[4], list) | ||
8003 | set_block_group_ro(cache); | ||
8004 | } | ||
8005 | |||
8006 | init_global_block_rsv(info); | ||
7554 | ret = 0; | 8007 | ret = 0; |
7555 | error: | 8008 | error: |
7556 | btrfs_free_path(path); | 8009 | btrfs_free_path(path); |
@@ -7611,12 +8064,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7611 | BUG_ON(ret); | 8064 | BUG_ON(ret); |
7612 | 8065 | ||
7613 | spin_lock(&cache->space_info->lock); | 8066 | spin_lock(&cache->space_info->lock); |
7614 | cache->space_info->bytes_super += cache->bytes_super; | 8067 | cache->space_info->bytes_readonly += cache->bytes_super; |
7615 | spin_unlock(&cache->space_info->lock); | 8068 | spin_unlock(&cache->space_info->lock); |
7616 | 8069 | ||
7617 | down_write(&cache->space_info->groups_sem); | 8070 | __link_block_group(cache->space_info, cache); |
7618 | list_add_tail(&cache->list, &cache->space_info->block_groups); | ||
7619 | up_write(&cache->space_info->groups_sem); | ||
7620 | 8071 | ||
7621 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 8072 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
7622 | BUG_ON(ret); | 8073 | BUG_ON(ret); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d2d03684fab2..a4080c21ec55 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -135,7 +135,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) | |||
135 | return state; | 135 | return state; |
136 | } | 136 | } |
137 | 137 | ||
138 | static void free_extent_state(struct extent_state *state) | 138 | void free_extent_state(struct extent_state *state) |
139 | { | 139 | { |
140 | if (!state) | 140 | if (!state) |
141 | return; | 141 | return; |
@@ -335,21 +335,18 @@ static int merge_state(struct extent_io_tree *tree, | |||
335 | } | 335 | } |
336 | 336 | ||
337 | static int set_state_cb(struct extent_io_tree *tree, | 337 | static int set_state_cb(struct extent_io_tree *tree, |
338 | struct extent_state *state, | 338 | struct extent_state *state, int *bits) |
339 | unsigned long bits) | ||
340 | { | 339 | { |
341 | if (tree->ops && tree->ops->set_bit_hook) { | 340 | if (tree->ops && tree->ops->set_bit_hook) { |
342 | return tree->ops->set_bit_hook(tree->mapping->host, | 341 | return tree->ops->set_bit_hook(tree->mapping->host, |
343 | state->start, state->end, | 342 | state, bits); |
344 | state->state, bits); | ||
345 | } | 343 | } |
346 | 344 | ||
347 | return 0; | 345 | return 0; |
348 | } | 346 | } |
349 | 347 | ||
350 | static void clear_state_cb(struct extent_io_tree *tree, | 348 | static void clear_state_cb(struct extent_io_tree *tree, |
351 | struct extent_state *state, | 349 | struct extent_state *state, int *bits) |
352 | unsigned long bits) | ||
353 | { | 350 | { |
354 | if (tree->ops && tree->ops->clear_bit_hook) | 351 | if (tree->ops && tree->ops->clear_bit_hook) |
355 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); | 352 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
@@ -367,9 +364,10 @@ static void clear_state_cb(struct extent_io_tree *tree, | |||
367 | */ | 364 | */ |
368 | static int insert_state(struct extent_io_tree *tree, | 365 | static int insert_state(struct extent_io_tree *tree, |
369 | struct extent_state *state, u64 start, u64 end, | 366 | struct extent_state *state, u64 start, u64 end, |
370 | int bits) | 367 | int *bits) |
371 | { | 368 | { |
372 | struct rb_node *node; | 369 | struct rb_node *node; |
370 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
373 | int ret; | 371 | int ret; |
374 | 372 | ||
375 | if (end < start) { | 373 | if (end < start) { |
@@ -384,9 +382,9 @@ static int insert_state(struct extent_io_tree *tree, | |||
384 | if (ret) | 382 | if (ret) |
385 | return ret; | 383 | return ret; |
386 | 384 | ||
387 | if (bits & EXTENT_DIRTY) | 385 | if (bits_to_set & EXTENT_DIRTY) |
388 | tree->dirty_bytes += end - start + 1; | 386 | tree->dirty_bytes += end - start + 1; |
389 | state->state |= bits; | 387 | state->state |= bits_to_set; |
390 | node = tree_insert(&tree->state, end, &state->rb_node); | 388 | node = tree_insert(&tree->state, end, &state->rb_node); |
391 | if (node) { | 389 | if (node) { |
392 | struct extent_state *found; | 390 | struct extent_state *found; |
@@ -456,13 +454,13 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
456 | * struct is freed and removed from the tree | 454 | * struct is freed and removed from the tree |
457 | */ | 455 | */ |
458 | static int clear_state_bit(struct extent_io_tree *tree, | 456 | static int clear_state_bit(struct extent_io_tree *tree, |
459 | struct extent_state *state, int bits, int wake, | 457 | struct extent_state *state, |
460 | int delete) | 458 | int *bits, int wake) |
461 | { | 459 | { |
462 | int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING; | 460 | int bits_to_clear = *bits & ~EXTENT_CTLBITS; |
463 | int ret = state->state & bits_to_clear; | 461 | int ret = state->state & bits_to_clear; |
464 | 462 | ||
465 | if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { | 463 | if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { |
466 | u64 range = state->end - state->start + 1; | 464 | u64 range = state->end - state->start + 1; |
467 | WARN_ON(range > tree->dirty_bytes); | 465 | WARN_ON(range > tree->dirty_bytes); |
468 | tree->dirty_bytes -= range; | 466 | tree->dirty_bytes -= range; |
@@ -471,9 +469,8 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
471 | state->state &= ~bits_to_clear; | 469 | state->state &= ~bits_to_clear; |
472 | if (wake) | 470 | if (wake) |
473 | wake_up(&state->wq); | 471 | wake_up(&state->wq); |
474 | if (delete || state->state == 0) { | 472 | if (state->state == 0) { |
475 | if (state->tree) { | 473 | if (state->tree) { |
476 | clear_state_cb(tree, state, state->state); | ||
477 | rb_erase(&state->rb_node, &tree->state); | 474 | rb_erase(&state->rb_node, &tree->state); |
478 | state->tree = NULL; | 475 | state->tree = NULL; |
479 | free_extent_state(state); | 476 | free_extent_state(state); |
@@ -514,6 +511,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
514 | int set = 0; | 511 | int set = 0; |
515 | int clear = 0; | 512 | int clear = 0; |
516 | 513 | ||
514 | if (delete) | ||
515 | bits |= ~EXTENT_CTLBITS; | ||
516 | bits |= EXTENT_FIRST_DELALLOC; | ||
517 | |||
517 | if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) | 518 | if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) |
518 | clear = 1; | 519 | clear = 1; |
519 | again: | 520 | again: |
@@ -580,8 +581,7 @@ hit_next: | |||
580 | if (err) | 581 | if (err) |
581 | goto out; | 582 | goto out; |
582 | if (state->end <= end) { | 583 | if (state->end <= end) { |
583 | set |= clear_state_bit(tree, state, bits, wake, | 584 | set |= clear_state_bit(tree, state, &bits, wake); |
584 | delete); | ||
585 | if (last_end == (u64)-1) | 585 | if (last_end == (u64)-1) |
586 | goto out; | 586 | goto out; |
587 | start = last_end + 1; | 587 | start = last_end + 1; |
@@ -602,7 +602,7 @@ hit_next: | |||
602 | if (wake) | 602 | if (wake) |
603 | wake_up(&state->wq); | 603 | wake_up(&state->wq); |
604 | 604 | ||
605 | set |= clear_state_bit(tree, prealloc, bits, wake, delete); | 605 | set |= clear_state_bit(tree, prealloc, &bits, wake); |
606 | 606 | ||
607 | prealloc = NULL; | 607 | prealloc = NULL; |
608 | goto out; | 608 | goto out; |
@@ -613,7 +613,7 @@ hit_next: | |||
613 | else | 613 | else |
614 | next_node = NULL; | 614 | next_node = NULL; |
615 | 615 | ||
616 | set |= clear_state_bit(tree, state, bits, wake, delete); | 616 | set |= clear_state_bit(tree, state, &bits, wake); |
617 | if (last_end == (u64)-1) | 617 | if (last_end == (u64)-1) |
618 | goto out; | 618 | goto out; |
619 | start = last_end + 1; | 619 | start = last_end + 1; |
@@ -706,19 +706,19 @@ out: | |||
706 | 706 | ||
707 | static int set_state_bits(struct extent_io_tree *tree, | 707 | static int set_state_bits(struct extent_io_tree *tree, |
708 | struct extent_state *state, | 708 | struct extent_state *state, |
709 | int bits) | 709 | int *bits) |
710 | { | 710 | { |
711 | int ret; | 711 | int ret; |
712 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
712 | 713 | ||
713 | ret = set_state_cb(tree, state, bits); | 714 | ret = set_state_cb(tree, state, bits); |
714 | if (ret) | 715 | if (ret) |
715 | return ret; | 716 | return ret; |
716 | 717 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | |
717 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | ||
718 | u64 range = state->end - state->start + 1; | 718 | u64 range = state->end - state->start + 1; |
719 | tree->dirty_bytes += range; | 719 | tree->dirty_bytes += range; |
720 | } | 720 | } |
721 | state->state |= bits; | 721 | state->state |= bits_to_set; |
722 | 722 | ||
723 | return 0; | 723 | return 0; |
724 | } | 724 | } |
@@ -745,10 +745,9 @@ static void cache_state(struct extent_state *state, | |||
745 | * [start, end] is inclusive This takes the tree lock. | 745 | * [start, end] is inclusive This takes the tree lock. |
746 | */ | 746 | */ |
747 | 747 | ||
748 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 748 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
749 | int bits, int exclusive_bits, u64 *failed_start, | 749 | int bits, int exclusive_bits, u64 *failed_start, |
750 | struct extent_state **cached_state, | 750 | struct extent_state **cached_state, gfp_t mask) |
751 | gfp_t mask) | ||
752 | { | 751 | { |
753 | struct extent_state *state; | 752 | struct extent_state *state; |
754 | struct extent_state *prealloc = NULL; | 753 | struct extent_state *prealloc = NULL; |
@@ -757,6 +756,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
757 | u64 last_start; | 756 | u64 last_start; |
758 | u64 last_end; | 757 | u64 last_end; |
759 | 758 | ||
759 | bits |= EXTENT_FIRST_DELALLOC; | ||
760 | again: | 760 | again: |
761 | if (!prealloc && (mask & __GFP_WAIT)) { | 761 | if (!prealloc && (mask & __GFP_WAIT)) { |
762 | prealloc = alloc_extent_state(mask); | 762 | prealloc = alloc_extent_state(mask); |
@@ -778,7 +778,7 @@ again: | |||
778 | */ | 778 | */ |
779 | node = tree_search(tree, start); | 779 | node = tree_search(tree, start); |
780 | if (!node) { | 780 | if (!node) { |
781 | err = insert_state(tree, prealloc, start, end, bits); | 781 | err = insert_state(tree, prealloc, start, end, &bits); |
782 | prealloc = NULL; | 782 | prealloc = NULL; |
783 | BUG_ON(err == -EEXIST); | 783 | BUG_ON(err == -EEXIST); |
784 | goto out; | 784 | goto out; |
@@ -802,7 +802,7 @@ hit_next: | |||
802 | goto out; | 802 | goto out; |
803 | } | 803 | } |
804 | 804 | ||
805 | err = set_state_bits(tree, state, bits); | 805 | err = set_state_bits(tree, state, &bits); |
806 | if (err) | 806 | if (err) |
807 | goto out; | 807 | goto out; |
808 | 808 | ||
@@ -852,7 +852,7 @@ hit_next: | |||
852 | if (err) | 852 | if (err) |
853 | goto out; | 853 | goto out; |
854 | if (state->end <= end) { | 854 | if (state->end <= end) { |
855 | err = set_state_bits(tree, state, bits); | 855 | err = set_state_bits(tree, state, &bits); |
856 | if (err) | 856 | if (err) |
857 | goto out; | 857 | goto out; |
858 | cache_state(state, cached_state); | 858 | cache_state(state, cached_state); |
@@ -877,7 +877,7 @@ hit_next: | |||
877 | else | 877 | else |
878 | this_end = last_start - 1; | 878 | this_end = last_start - 1; |
879 | err = insert_state(tree, prealloc, start, this_end, | 879 | err = insert_state(tree, prealloc, start, this_end, |
880 | bits); | 880 | &bits); |
881 | BUG_ON(err == -EEXIST); | 881 | BUG_ON(err == -EEXIST); |
882 | if (err) { | 882 | if (err) { |
883 | prealloc = NULL; | 883 | prealloc = NULL; |
@@ -903,7 +903,7 @@ hit_next: | |||
903 | err = split_state(tree, state, prealloc, end + 1); | 903 | err = split_state(tree, state, prealloc, end + 1); |
904 | BUG_ON(err == -EEXIST); | 904 | BUG_ON(err == -EEXIST); |
905 | 905 | ||
906 | err = set_state_bits(tree, prealloc, bits); | 906 | err = set_state_bits(tree, prealloc, &bits); |
907 | if (err) { | 907 | if (err) { |
908 | prealloc = NULL; | 908 | prealloc = NULL; |
909 | goto out; | 909 | goto out; |
@@ -966,8 +966,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
966 | { | 966 | { |
967 | return clear_extent_bit(tree, start, end, | 967 | return clear_extent_bit(tree, start, end, |
968 | EXTENT_DIRTY | EXTENT_DELALLOC | | 968 | EXTENT_DIRTY | EXTENT_DELALLOC | |
969 | EXTENT_DO_ACCOUNTING, 0, 0, | 969 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask); |
970 | NULL, mask); | ||
971 | } | 970 | } |
972 | 971 | ||
973 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 972 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -1435,9 +1434,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1435 | if (op & EXTENT_CLEAR_DELALLOC) | 1434 | if (op & EXTENT_CLEAR_DELALLOC) |
1436 | clear_bits |= EXTENT_DELALLOC; | 1435 | clear_bits |= EXTENT_DELALLOC; |
1437 | 1436 | ||
1438 | if (op & EXTENT_CLEAR_ACCOUNTING) | ||
1439 | clear_bits |= EXTENT_DO_ACCOUNTING; | ||
1440 | |||
1441 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); | 1437 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); |
1442 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | 1438 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
1443 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | | 1439 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | |
@@ -1916,7 +1912,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
1916 | 1912 | ||
1917 | if (tree->ops && tree->ops->submit_bio_hook) | 1913 | if (tree->ops && tree->ops->submit_bio_hook) |
1918 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1914 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
1919 | mirror_num, bio_flags); | 1915 | mirror_num, bio_flags, start); |
1920 | else | 1916 | else |
1921 | submit_bio(rw, bio); | 1917 | submit_bio(rw, bio); |
1922 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 1918 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
@@ -2020,6 +2016,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2020 | sector_t sector; | 2016 | sector_t sector; |
2021 | struct extent_map *em; | 2017 | struct extent_map *em; |
2022 | struct block_device *bdev; | 2018 | struct block_device *bdev; |
2019 | struct btrfs_ordered_extent *ordered; | ||
2023 | int ret; | 2020 | int ret; |
2024 | int nr = 0; | 2021 | int nr = 0; |
2025 | size_t page_offset = 0; | 2022 | size_t page_offset = 0; |
@@ -2031,7 +2028,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2031 | set_page_extent_mapped(page); | 2028 | set_page_extent_mapped(page); |
2032 | 2029 | ||
2033 | end = page_end; | 2030 | end = page_end; |
2034 | lock_extent(tree, start, end, GFP_NOFS); | 2031 | while (1) { |
2032 | lock_extent(tree, start, end, GFP_NOFS); | ||
2033 | ordered = btrfs_lookup_ordered_extent(inode, start); | ||
2034 | if (!ordered) | ||
2035 | break; | ||
2036 | unlock_extent(tree, start, end, GFP_NOFS); | ||
2037 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
2038 | btrfs_put_ordered_extent(ordered); | ||
2039 | } | ||
2035 | 2040 | ||
2036 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { | 2041 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { |
2037 | char *userpage; | 2042 | char *userpage; |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index bbab4813646f..5691c7b590da 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -16,7 +16,9 @@ | |||
16 | #define EXTENT_BOUNDARY (1 << 9) | 16 | #define EXTENT_BOUNDARY (1 << 9) |
17 | #define EXTENT_NODATASUM (1 << 10) | 17 | #define EXTENT_NODATASUM (1 << 10) |
18 | #define EXTENT_DO_ACCOUNTING (1 << 11) | 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) |
19 | #define EXTENT_FIRST_DELALLOC (1 << 12) | ||
19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | ||
20 | 22 | ||
21 | /* flags for bio submission */ | 23 | /* flags for bio submission */ |
22 | #define EXTENT_BIO_COMPRESSED 1 | 24 | #define EXTENT_BIO_COMPRESSED 1 |
@@ -47,7 +49,7 @@ struct extent_state; | |||
47 | 49 | ||
48 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, | 50 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, |
49 | struct bio *bio, int mirror_num, | 51 | struct bio *bio, int mirror_num, |
50 | unsigned long bio_flags); | 52 | unsigned long bio_flags, u64 bio_offset); |
51 | struct extent_io_ops { | 53 | struct extent_io_ops { |
52 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, | 54 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, |
53 | u64 start, u64 end, int *page_started, | 55 | u64 start, u64 end, int *page_started, |
@@ -69,10 +71,10 @@ struct extent_io_ops { | |||
69 | struct extent_state *state); | 71 | struct extent_state *state); |
70 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 72 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
71 | struct extent_state *state, int uptodate); | 73 | struct extent_state *state, int uptodate); |
72 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, | 74 | int (*set_bit_hook)(struct inode *inode, struct extent_state *state, |
73 | unsigned long old, unsigned long bits); | 75 | int *bits); |
74 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, | 76 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
75 | unsigned long bits); | 77 | int *bits); |
76 | int (*merge_extent_hook)(struct inode *inode, | 78 | int (*merge_extent_hook)(struct inode *inode, |
77 | struct extent_state *new, | 79 | struct extent_state *new, |
78 | struct extent_state *other); | 80 | struct extent_state *other); |
@@ -176,6 +178,7 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
176 | u64 *start, u64 search_end, | 178 | u64 *start, u64 search_end, |
177 | u64 max_bytes, unsigned long bits); | 179 | u64 max_bytes, unsigned long bits); |
178 | 180 | ||
181 | void free_extent_state(struct extent_state *state); | ||
179 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 182 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
180 | int bits, int filled, struct extent_state *cached_state); | 183 | int bits, int filled, struct extent_state *cached_state); |
181 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 184 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -185,6 +188,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
185 | gfp_t mask); | 188 | gfp_t mask); |
186 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 189 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
187 | int bits, gfp_t mask); | 190 | int bits, gfp_t mask); |
191 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
192 | int bits, int exclusive_bits, u64 *failed_start, | ||
193 | struct extent_state **cached_state, gfp_t mask); | ||
188 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 194 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
189 | gfp_t mask); | 195 | gfp_t mask); |
190 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 196 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 54a255065aa3..a562a250ae77 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -149,13 +149,14 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
149 | } | 149 | } |
150 | 150 | ||
151 | 151 | ||
152 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 152 | static int __btrfs_lookup_bio_sums(struct btrfs_root *root, |
153 | struct bio *bio, u32 *dst) | 153 | struct inode *inode, struct bio *bio, |
154 | u64 logical_offset, u32 *dst, int dio) | ||
154 | { | 155 | { |
155 | u32 sum; | 156 | u32 sum; |
156 | struct bio_vec *bvec = bio->bi_io_vec; | 157 | struct bio_vec *bvec = bio->bi_io_vec; |
157 | int bio_index = 0; | 158 | int bio_index = 0; |
158 | u64 offset; | 159 | u64 offset = 0; |
159 | u64 item_start_offset = 0; | 160 | u64 item_start_offset = 0; |
160 | u64 item_last_offset = 0; | 161 | u64 item_last_offset = 0; |
161 | u64 disk_bytenr; | 162 | u64 disk_bytenr; |
@@ -174,8 +175,11 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | |||
174 | WARN_ON(bio->bi_vcnt <= 0); | 175 | WARN_ON(bio->bi_vcnt <= 0); |
175 | 176 | ||
176 | disk_bytenr = (u64)bio->bi_sector << 9; | 177 | disk_bytenr = (u64)bio->bi_sector << 9; |
178 | if (dio) | ||
179 | offset = logical_offset; | ||
177 | while (bio_index < bio->bi_vcnt) { | 180 | while (bio_index < bio->bi_vcnt) { |
178 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | 181 | if (!dio) |
182 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | ||
179 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); | 183 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); |
180 | if (ret == 0) | 184 | if (ret == 0) |
181 | goto found; | 185 | goto found; |
@@ -238,6 +242,7 @@ found: | |||
238 | else | 242 | else |
239 | set_state_private(io_tree, offset, sum); | 243 | set_state_private(io_tree, offset, sum); |
240 | disk_bytenr += bvec->bv_len; | 244 | disk_bytenr += bvec->bv_len; |
245 | offset += bvec->bv_len; | ||
241 | bio_index++; | 246 | bio_index++; |
242 | bvec++; | 247 | bvec++; |
243 | } | 248 | } |
@@ -245,6 +250,18 @@ found: | |||
245 | return 0; | 250 | return 0; |
246 | } | 251 | } |
247 | 252 | ||
253 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | ||
254 | struct bio *bio, u32 *dst) | ||
255 | { | ||
256 | return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0); | ||
257 | } | ||
258 | |||
259 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
260 | struct bio *bio, u64 offset, u32 *dst) | ||
261 | { | ||
262 | return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); | ||
263 | } | ||
264 | |||
248 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 265 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
249 | struct list_head *list) | 266 | struct list_head *list) |
250 | { | 267 | { |
@@ -657,6 +674,9 @@ again: | |||
657 | goto found; | 674 | goto found; |
658 | } | 675 | } |
659 | ret = PTR_ERR(item); | 676 | ret = PTR_ERR(item); |
677 | if (ret != -EFBIG && ret != -ENOENT) | ||
678 | goto fail_unlock; | ||
679 | |||
660 | if (ret == -EFBIG) { | 680 | if (ret == -EFBIG) { |
661 | u32 item_size; | 681 | u32 item_size; |
662 | /* we found one, but it isn't big enough yet */ | 682 | /* we found one, but it isn't big enough yet */ |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 29ff749ff4ca..787b50a16a14 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -46,32 +46,42 @@ | |||
46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
47 | int write_bytes, | 47 | int write_bytes, |
48 | struct page **prepared_pages, | 48 | struct page **prepared_pages, |
49 | const char __user *buf) | 49 | struct iov_iter *i) |
50 | { | 50 | { |
51 | long page_fault = 0; | 51 | size_t copied; |
52 | int i; | 52 | int pg = 0; |
53 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 53 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
54 | 54 | ||
55 | for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { | 55 | while (write_bytes > 0) { |
56 | size_t count = min_t(size_t, | 56 | size_t count = min_t(size_t, |
57 | PAGE_CACHE_SIZE - offset, write_bytes); | 57 | PAGE_CACHE_SIZE - offset, write_bytes); |
58 | struct page *page = prepared_pages[i]; | 58 | struct page *page = prepared_pages[pg]; |
59 | fault_in_pages_readable(buf, count); | 59 | again: |
60 | if (unlikely(iov_iter_fault_in_readable(i, count))) | ||
61 | return -EFAULT; | ||
60 | 62 | ||
61 | /* Copy data from userspace to the current page */ | 63 | /* Copy data from userspace to the current page */ |
62 | kmap(page); | 64 | copied = iov_iter_copy_from_user(page, i, offset, count); |
63 | page_fault = __copy_from_user(page_address(page) + offset, | 65 | |
64 | buf, count); | ||
65 | /* Flush processor's dcache for this page */ | 66 | /* Flush processor's dcache for this page */ |
66 | flush_dcache_page(page); | 67 | flush_dcache_page(page); |
67 | kunmap(page); | 68 | iov_iter_advance(i, copied); |
68 | buf += count; | 69 | write_bytes -= copied; |
69 | write_bytes -= count; | ||
70 | 70 | ||
71 | if (page_fault) | 71 | if (unlikely(copied == 0)) { |
72 | break; | 72 | count = min_t(size_t, PAGE_CACHE_SIZE - offset, |
73 | iov_iter_single_seg_count(i)); | ||
74 | goto again; | ||
75 | } | ||
76 | |||
77 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | ||
78 | offset += copied; | ||
79 | } else { | ||
80 | pg++; | ||
81 | offset = 0; | ||
82 | } | ||
73 | } | 83 | } |
74 | return page_fault ? -EFAULT : 0; | 84 | return 0; |
75 | } | 85 | } |
76 | 86 | ||
77 | /* | 87 | /* |
@@ -126,8 +136,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
126 | end_of_last_block = start_pos + num_bytes - 1; | 136 | end_of_last_block = start_pos + num_bytes - 1; |
127 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 137 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
128 | NULL); | 138 | NULL); |
129 | if (err) | 139 | BUG_ON(err); |
130 | return err; | ||
131 | 140 | ||
132 | for (i = 0; i < num_pages; i++) { | 141 | for (i = 0; i < num_pages; i++) { |
133 | struct page *p = pages[i]; | 142 | struct page *p = pages[i]; |
@@ -142,7 +151,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
142 | * at this time. | 151 | * at this time. |
143 | */ | 152 | */ |
144 | } | 153 | } |
145 | return err; | 154 | return 0; |
146 | } | 155 | } |
147 | 156 | ||
148 | /* | 157 | /* |
@@ -823,45 +832,46 @@ again: | |||
823 | return 0; | 832 | return 0; |
824 | } | 833 | } |
825 | 834 | ||
826 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | 835 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
827 | size_t count, loff_t *ppos) | 836 | const struct iovec *iov, |
837 | unsigned long nr_segs, loff_t pos) | ||
828 | { | 838 | { |
829 | loff_t pos; | 839 | struct file *file = iocb->ki_filp; |
840 | struct inode *inode = fdentry(file)->d_inode; | ||
841 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
842 | struct page *pinned[2]; | ||
843 | struct page **pages = NULL; | ||
844 | struct iov_iter i; | ||
845 | loff_t *ppos = &iocb->ki_pos; | ||
830 | loff_t start_pos; | 846 | loff_t start_pos; |
831 | ssize_t num_written = 0; | 847 | ssize_t num_written = 0; |
832 | ssize_t err = 0; | 848 | ssize_t err = 0; |
849 | size_t count; | ||
850 | size_t ocount; | ||
833 | int ret = 0; | 851 | int ret = 0; |
834 | struct inode *inode = fdentry(file)->d_inode; | ||
835 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
836 | struct page **pages = NULL; | ||
837 | int nrptrs; | 852 | int nrptrs; |
838 | struct page *pinned[2]; | ||
839 | unsigned long first_index; | 853 | unsigned long first_index; |
840 | unsigned long last_index; | 854 | unsigned long last_index; |
841 | int will_write; | 855 | int will_write; |
856 | int buffered = 0; | ||
842 | 857 | ||
843 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | 858 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || |
844 | (file->f_flags & O_DIRECT)); | 859 | (file->f_flags & O_DIRECT)); |
845 | 860 | ||
846 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | ||
847 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | ||
848 | pinned[0] = NULL; | 861 | pinned[0] = NULL; |
849 | pinned[1] = NULL; | 862 | pinned[1] = NULL; |
850 | 863 | ||
851 | pos = *ppos; | ||
852 | start_pos = pos; | 864 | start_pos = pos; |
853 | 865 | ||
854 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 866 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
855 | 867 | ||
856 | /* do the reserve before the mutex lock in case we have to do some | ||
857 | * flushing. We wouldn't deadlock, but this is more polite. | ||
858 | */ | ||
859 | err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
860 | if (err) | ||
861 | goto out_nolock; | ||
862 | |||
863 | mutex_lock(&inode->i_mutex); | 868 | mutex_lock(&inode->i_mutex); |
864 | 869 | ||
870 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
871 | if (err) | ||
872 | goto out; | ||
873 | count = ocount; | ||
874 | |||
865 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 875 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
866 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 876 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
867 | if (err) | 877 | if (err) |
@@ -875,15 +885,53 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
875 | goto out; | 885 | goto out; |
876 | 886 | ||
877 | file_update_time(file); | 887 | file_update_time(file); |
888 | BTRFS_I(inode)->sequence++; | ||
889 | |||
890 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
891 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, | ||
892 | pos, ppos, count, | ||
893 | ocount); | ||
894 | /* | ||
895 | * the generic O_DIRECT will update in-memory i_size after the | ||
896 | * DIOs are done. But our endio handlers that update the on | ||
897 | * disk i_size never update past the in memory i_size. So we | ||
898 | * need one more update here to catch any additions to the | ||
899 | * file | ||
900 | */ | ||
901 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
902 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
903 | mark_inode_dirty(inode); | ||
904 | } | ||
878 | 905 | ||
906 | if (num_written < 0) { | ||
907 | ret = num_written; | ||
908 | num_written = 0; | ||
909 | goto out; | ||
910 | } else if (num_written == count) { | ||
911 | /* pick up pos changes done by the generic code */ | ||
912 | pos = *ppos; | ||
913 | goto out; | ||
914 | } | ||
915 | /* | ||
916 | * We are going to do buffered for the rest of the range, so we | ||
917 | * need to make sure to invalidate the buffered pages when we're | ||
918 | * done. | ||
919 | */ | ||
920 | buffered = 1; | ||
921 | pos += num_written; | ||
922 | } | ||
923 | |||
924 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
925 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / | ||
926 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | ||
927 | (sizeof(struct page *))); | ||
879 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 928 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
880 | 929 | ||
881 | /* generic_write_checks can change our pos */ | 930 | /* generic_write_checks can change our pos */ |
882 | start_pos = pos; | 931 | start_pos = pos; |
883 | 932 | ||
884 | BTRFS_I(inode)->sequence++; | ||
885 | first_index = pos >> PAGE_CACHE_SHIFT; | 933 | first_index = pos >> PAGE_CACHE_SHIFT; |
886 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 934 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; |
887 | 935 | ||
888 | /* | 936 | /* |
889 | * there are lots of better ways to do this, but this code | 937 | * there are lots of better ways to do this, but this code |
@@ -900,7 +948,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
900 | unlock_page(pinned[0]); | 948 | unlock_page(pinned[0]); |
901 | } | 949 | } |
902 | } | 950 | } |
903 | if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { | 951 | if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { |
904 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); | 952 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); |
905 | if (!PageUptodate(pinned[1])) { | 953 | if (!PageUptodate(pinned[1])) { |
906 | ret = btrfs_readpage(NULL, pinned[1]); | 954 | ret = btrfs_readpage(NULL, pinned[1]); |
@@ -911,10 +959,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
911 | } | 959 | } |
912 | } | 960 | } |
913 | 961 | ||
914 | while (count > 0) { | 962 | while (iov_iter_count(&i) > 0) { |
915 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 963 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
916 | size_t write_bytes = min(count, nrptrs * | 964 | size_t write_bytes = min(iov_iter_count(&i), |
917 | (size_t)PAGE_CACHE_SIZE - | 965 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
918 | offset); | 966 | offset); |
919 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | 967 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> |
920 | PAGE_CACHE_SHIFT; | 968 | PAGE_CACHE_SHIFT; |
@@ -922,7 +970,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
922 | WARN_ON(num_pages > nrptrs); | 970 | WARN_ON(num_pages > nrptrs); |
923 | memset(pages, 0, sizeof(struct page *) * nrptrs); | 971 | memset(pages, 0, sizeof(struct page *) * nrptrs); |
924 | 972 | ||
925 | ret = btrfs_check_data_free_space(root, inode, write_bytes); | 973 | ret = btrfs_delalloc_reserve_space(inode, write_bytes); |
926 | if (ret) | 974 | if (ret) |
927 | goto out; | 975 | goto out; |
928 | 976 | ||
@@ -930,26 +978,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
930 | pos, first_index, last_index, | 978 | pos, first_index, last_index, |
931 | write_bytes); | 979 | write_bytes); |
932 | if (ret) { | 980 | if (ret) { |
933 | btrfs_free_reserved_data_space(root, inode, | 981 | btrfs_delalloc_release_space(inode, write_bytes); |
934 | write_bytes); | ||
935 | goto out; | 982 | goto out; |
936 | } | 983 | } |
937 | 984 | ||
938 | ret = btrfs_copy_from_user(pos, num_pages, | 985 | ret = btrfs_copy_from_user(pos, num_pages, |
939 | write_bytes, pages, buf); | 986 | write_bytes, pages, &i); |
940 | if (ret) { | 987 | if (ret == 0) { |
941 | btrfs_free_reserved_data_space(root, inode, | 988 | dirty_and_release_pages(NULL, root, file, pages, |
942 | write_bytes); | 989 | num_pages, pos, write_bytes); |
943 | btrfs_drop_pages(pages, num_pages); | ||
944 | goto out; | ||
945 | } | 990 | } |
946 | 991 | ||
947 | ret = dirty_and_release_pages(NULL, root, file, pages, | ||
948 | num_pages, pos, write_bytes); | ||
949 | btrfs_drop_pages(pages, num_pages); | 992 | btrfs_drop_pages(pages, num_pages); |
950 | if (ret) { | 993 | if (ret) { |
951 | btrfs_free_reserved_data_space(root, inode, | 994 | btrfs_delalloc_release_space(inode, write_bytes); |
952 | write_bytes); | ||
953 | goto out; | 995 | goto out; |
954 | } | 996 | } |
955 | 997 | ||
@@ -965,8 +1007,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
965 | btrfs_throttle(root); | 1007 | btrfs_throttle(root); |
966 | } | 1008 | } |
967 | 1009 | ||
968 | buf += write_bytes; | ||
969 | count -= write_bytes; | ||
970 | pos += write_bytes; | 1010 | pos += write_bytes; |
971 | num_written += write_bytes; | 1011 | num_written += write_bytes; |
972 | 1012 | ||
@@ -976,9 +1016,7 @@ out: | |||
976 | mutex_unlock(&inode->i_mutex); | 1016 | mutex_unlock(&inode->i_mutex); |
977 | if (ret) | 1017 | if (ret) |
978 | err = ret; | 1018 | err = ret; |
979 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
980 | 1019 | ||
981 | out_nolock: | ||
982 | kfree(pages); | 1020 | kfree(pages); |
983 | if (pinned[0]) | 1021 | if (pinned[0]) |
984 | page_cache_release(pinned[0]); | 1022 | page_cache_release(pinned[0]); |
@@ -1008,7 +1046,7 @@ out_nolock: | |||
1008 | num_written = err; | 1046 | num_written = err; |
1009 | 1047 | ||
1010 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 1048 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { |
1011 | trans = btrfs_start_transaction(root, 1); | 1049 | trans = btrfs_start_transaction(root, 0); |
1012 | ret = btrfs_log_dentry_safe(trans, root, | 1050 | ret = btrfs_log_dentry_safe(trans, root, |
1013 | file->f_dentry); | 1051 | file->f_dentry); |
1014 | if (ret == 0) { | 1052 | if (ret == 0) { |
@@ -1023,7 +1061,7 @@ out_nolock: | |||
1023 | btrfs_end_transaction(trans, root); | 1061 | btrfs_end_transaction(trans, root); |
1024 | } | 1062 | } |
1025 | } | 1063 | } |
1026 | if (file->f_flags & O_DIRECT) { | 1064 | if (file->f_flags & O_DIRECT && buffered) { |
1027 | invalidate_mapping_pages(inode->i_mapping, | 1065 | invalidate_mapping_pages(inode->i_mapping, |
1028 | start_pos >> PAGE_CACHE_SHIFT, | 1066 | start_pos >> PAGE_CACHE_SHIFT, |
1029 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | 1067 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); |
@@ -1063,8 +1101,9 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
1063 | * important optimization for directories because holding the mutex prevents | 1101 | * important optimization for directories because holding the mutex prevents |
1064 | * new operations on the dir while we write to disk. | 1102 | * new operations on the dir while we write to disk. |
1065 | */ | 1103 | */ |
1066 | int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | 1104 | int btrfs_sync_file(struct file *file, int datasync) |
1067 | { | 1105 | { |
1106 | struct dentry *dentry = file->f_path.dentry; | ||
1068 | struct inode *inode = dentry->d_inode; | 1107 | struct inode *inode = dentry->d_inode; |
1069 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1108 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1070 | int ret = 0; | 1109 | int ret = 0; |
@@ -1104,9 +1143,9 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1104 | if (file && file->private_data) | 1143 | if (file && file->private_data) |
1105 | btrfs_ioctl_trans_end(file); | 1144 | btrfs_ioctl_trans_end(file); |
1106 | 1145 | ||
1107 | trans = btrfs_start_transaction(root, 1); | 1146 | trans = btrfs_start_transaction(root, 0); |
1108 | if (!trans) { | 1147 | if (IS_ERR(trans)) { |
1109 | ret = -ENOMEM; | 1148 | ret = PTR_ERR(trans); |
1110 | goto out; | 1149 | goto out; |
1111 | } | 1150 | } |
1112 | 1151 | ||
@@ -1161,7 +1200,7 @@ const struct file_operations btrfs_file_operations = { | |||
1161 | .read = do_sync_read, | 1200 | .read = do_sync_read, |
1162 | .aio_read = generic_file_aio_read, | 1201 | .aio_read = generic_file_aio_read, |
1163 | .splice_read = generic_file_splice_read, | 1202 | .splice_read = generic_file_splice_read, |
1164 | .write = btrfs_file_write, | 1203 | .aio_write = btrfs_file_aio_write, |
1165 | .mmap = btrfs_file_mmap, | 1204 | .mmap = btrfs_file_mmap, |
1166 | .open = generic_file_open, | 1205 | .open = generic_file_open, |
1167 | .release = btrfs_release_file, | 1206 | .release = btrfs_release_file, |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 72ce3c173d6a..64f1150bb48d 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -49,6 +49,33 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name, | |||
49 | return 0; | 49 | return 0; |
50 | } | 50 | } |
51 | 51 | ||
52 | struct btrfs_inode_ref * | ||
53 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | ||
54 | struct btrfs_root *root, | ||
55 | struct btrfs_path *path, | ||
56 | const char *name, int name_len, | ||
57 | u64 inode_objectid, u64 ref_objectid, int mod) | ||
58 | { | ||
59 | struct btrfs_key key; | ||
60 | struct btrfs_inode_ref *ref; | ||
61 | int ins_len = mod < 0 ? -1 : 0; | ||
62 | int cow = mod != 0; | ||
63 | int ret; | ||
64 | |||
65 | key.objectid = inode_objectid; | ||
66 | key.type = BTRFS_INODE_REF_KEY; | ||
67 | key.offset = ref_objectid; | ||
68 | |||
69 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | ||
70 | if (ret < 0) | ||
71 | return ERR_PTR(ret); | ||
72 | if (ret > 0) | ||
73 | return NULL; | ||
74 | if (!find_name_in_backref(path, name, name_len, &ref)) | ||
75 | return NULL; | ||
76 | return ref; | ||
77 | } | ||
78 | |||
52 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | 79 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, |
53 | struct btrfs_root *root, | 80 | struct btrfs_root *root, |
54 | const char *name, int name_len, | 81 | const char *name, int name_len, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d601629b85d1..fa6ccc1bfe2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -252,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
252 | inline_len, compressed_size, | 252 | inline_len, compressed_size, |
253 | compressed_pages); | 253 | compressed_pages); |
254 | BUG_ON(ret); | 254 | BUG_ON(ret); |
255 | btrfs_delalloc_release_metadata(inode, end + 1 - start); | ||
255 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); | 256 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
256 | return 0; | 257 | return 0; |
257 | } | 258 | } |
@@ -414,6 +415,7 @@ again: | |||
414 | trans = btrfs_join_transaction(root, 1); | 415 | trans = btrfs_join_transaction(root, 1); |
415 | BUG_ON(!trans); | 416 | BUG_ON(!trans); |
416 | btrfs_set_trans_block_group(trans, inode); | 417 | btrfs_set_trans_block_group(trans, inode); |
418 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
417 | 419 | ||
418 | /* lets try to make an inline extent */ | 420 | /* lets try to make an inline extent */ |
419 | if (ret || total_in < (actual_end - start)) { | 421 | if (ret || total_in < (actual_end - start)) { |
@@ -439,7 +441,6 @@ again: | |||
439 | start, end, NULL, | 441 | start, end, NULL, |
440 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | 442 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
441 | EXTENT_CLEAR_DELALLOC | | 443 | EXTENT_CLEAR_DELALLOC | |
442 | EXTENT_CLEAR_ACCOUNTING | | ||
443 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); | 444 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); |
444 | 445 | ||
445 | btrfs_end_transaction(trans, root); | 446 | btrfs_end_transaction(trans, root); |
@@ -697,6 +698,38 @@ retry: | |||
697 | return 0; | 698 | return 0; |
698 | } | 699 | } |
699 | 700 | ||
701 | static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | ||
702 | u64 num_bytes) | ||
703 | { | ||
704 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
705 | struct extent_map *em; | ||
706 | u64 alloc_hint = 0; | ||
707 | |||
708 | read_lock(&em_tree->lock); | ||
709 | em = search_extent_mapping(em_tree, start, num_bytes); | ||
710 | if (em) { | ||
711 | /* | ||
712 | * if block start isn't an actual block number then find the | ||
713 | * first block in this inode and use that as a hint. If that | ||
714 | * block is also bogus then just don't worry about it. | ||
715 | */ | ||
716 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
717 | free_extent_map(em); | ||
718 | em = search_extent_mapping(em_tree, 0, 0); | ||
719 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
720 | alloc_hint = em->block_start; | ||
721 | if (em) | ||
722 | free_extent_map(em); | ||
723 | } else { | ||
724 | alloc_hint = em->block_start; | ||
725 | free_extent_map(em); | ||
726 | } | ||
727 | } | ||
728 | read_unlock(&em_tree->lock); | ||
729 | |||
730 | return alloc_hint; | ||
731 | } | ||
732 | |||
700 | /* | 733 | /* |
701 | * when extent_io.c finds a delayed allocation range in the file, | 734 | * when extent_io.c finds a delayed allocation range in the file, |
702 | * the call backs end up in this code. The basic idea is to | 735 | * the call backs end up in this code. The basic idea is to |
@@ -734,6 +767,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
734 | trans = btrfs_join_transaction(root, 1); | 767 | trans = btrfs_join_transaction(root, 1); |
735 | BUG_ON(!trans); | 768 | BUG_ON(!trans); |
736 | btrfs_set_trans_block_group(trans, inode); | 769 | btrfs_set_trans_block_group(trans, inode); |
770 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
737 | 771 | ||
738 | actual_end = min_t(u64, isize, end + 1); | 772 | actual_end = min_t(u64, isize, end + 1); |
739 | 773 | ||
@@ -753,7 +787,6 @@ static noinline int cow_file_range(struct inode *inode, | |||
753 | EXTENT_CLEAR_UNLOCK_PAGE | | 787 | EXTENT_CLEAR_UNLOCK_PAGE | |
754 | EXTENT_CLEAR_UNLOCK | | 788 | EXTENT_CLEAR_UNLOCK | |
755 | EXTENT_CLEAR_DELALLOC | | 789 | EXTENT_CLEAR_DELALLOC | |
756 | EXTENT_CLEAR_ACCOUNTING | | ||
757 | EXTENT_CLEAR_DIRTY | | 790 | EXTENT_CLEAR_DIRTY | |
758 | EXTENT_SET_WRITEBACK | | 791 | EXTENT_SET_WRITEBACK | |
759 | EXTENT_END_WRITEBACK); | 792 | EXTENT_END_WRITEBACK); |
@@ -769,29 +802,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
769 | BUG_ON(disk_num_bytes > | 802 | BUG_ON(disk_num_bytes > |
770 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 803 | btrfs_super_total_bytes(&root->fs_info->super_copy)); |
771 | 804 | ||
772 | 805 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); | |
773 | read_lock(&BTRFS_I(inode)->extent_tree.lock); | ||
774 | em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, | ||
775 | start, num_bytes); | ||
776 | if (em) { | ||
777 | /* | ||
778 | * if block start isn't an actual block number then find the | ||
779 | * first block in this inode and use that as a hint. If that | ||
780 | * block is also bogus then just don't worry about it. | ||
781 | */ | ||
782 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
783 | free_extent_map(em); | ||
784 | em = search_extent_mapping(em_tree, 0, 0); | ||
785 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
786 | alloc_hint = em->block_start; | ||
787 | if (em) | ||
788 | free_extent_map(em); | ||
789 | } else { | ||
790 | alloc_hint = em->block_start; | ||
791 | free_extent_map(em); | ||
792 | } | ||
793 | } | ||
794 | read_unlock(&BTRFS_I(inode)->extent_tree.lock); | ||
795 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 806 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
796 | 807 | ||
797 | while (disk_num_bytes > 0) { | 808 | while (disk_num_bytes > 0) { |
@@ -1174,6 +1185,13 @@ out_check: | |||
1174 | num_bytes, num_bytes, type); | 1185 | num_bytes, num_bytes, type); |
1175 | BUG_ON(ret); | 1186 | BUG_ON(ret); |
1176 | 1187 | ||
1188 | if (root->root_key.objectid == | ||
1189 | BTRFS_DATA_RELOC_TREE_OBJECTID) { | ||
1190 | ret = btrfs_reloc_clone_csums(inode, cur_offset, | ||
1191 | num_bytes); | ||
1192 | BUG_ON(ret); | ||
1193 | } | ||
1194 | |||
1177 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 1195 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
1178 | cur_offset, cur_offset + num_bytes - 1, | 1196 | cur_offset, cur_offset + num_bytes - 1, |
1179 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | | 1197 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | |
@@ -1226,15 +1244,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1226 | } | 1244 | } |
1227 | 1245 | ||
1228 | static int btrfs_split_extent_hook(struct inode *inode, | 1246 | static int btrfs_split_extent_hook(struct inode *inode, |
1229 | struct extent_state *orig, u64 split) | 1247 | struct extent_state *orig, u64 split) |
1230 | { | 1248 | { |
1249 | /* not delalloc, ignore it */ | ||
1231 | if (!(orig->state & EXTENT_DELALLOC)) | 1250 | if (!(orig->state & EXTENT_DELALLOC)) |
1232 | return 0; | 1251 | return 0; |
1233 | 1252 | ||
1234 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1253 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
1235 | BTRFS_I(inode)->outstanding_extents++; | ||
1236 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1237 | |||
1238 | return 0; | 1254 | return 0; |
1239 | } | 1255 | } |
1240 | 1256 | ||
@@ -1252,10 +1268,7 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
1252 | if (!(other->state & EXTENT_DELALLOC)) | 1268 | if (!(other->state & EXTENT_DELALLOC)) |
1253 | return 0; | 1269 | return 0; |
1254 | 1270 | ||
1255 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1271 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
1256 | BTRFS_I(inode)->outstanding_extents--; | ||
1257 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1258 | |||
1259 | return 0; | 1272 | return 0; |
1260 | } | 1273 | } |
1261 | 1274 | ||
@@ -1264,8 +1277,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
1264 | * bytes in this file, and to maintain the list of inodes that | 1277 | * bytes in this file, and to maintain the list of inodes that |
1265 | * have pending delalloc work to be done. | 1278 | * have pending delalloc work to be done. |
1266 | */ | 1279 | */ |
1267 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | 1280 | static int btrfs_set_bit_hook(struct inode *inode, |
1268 | unsigned long old, unsigned long bits) | 1281 | struct extent_state *state, int *bits) |
1269 | { | 1282 | { |
1270 | 1283 | ||
1271 | /* | 1284 | /* |
@@ -1273,17 +1286,18 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
1273 | * but in this case, we are only testeing for the DELALLOC | 1286 | * but in this case, we are only testeing for the DELALLOC |
1274 | * bit, which is only set or cleared with irqs on | 1287 | * bit, which is only set or cleared with irqs on |
1275 | */ | 1288 | */ |
1276 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1289 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1277 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1290 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1291 | u64 len = state->end + 1 - state->start; | ||
1278 | 1292 | ||
1279 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1293 | if (*bits & EXTENT_FIRST_DELALLOC) |
1280 | BTRFS_I(inode)->outstanding_extents++; | 1294 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1281 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 1295 | else |
1282 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); | 1296 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
1283 | 1297 | ||
1284 | spin_lock(&root->fs_info->delalloc_lock); | 1298 | spin_lock(&root->fs_info->delalloc_lock); |
1285 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; | 1299 | BTRFS_I(inode)->delalloc_bytes += len; |
1286 | root->fs_info->delalloc_bytes += end - start + 1; | 1300 | root->fs_info->delalloc_bytes += len; |
1287 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1301 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
1288 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | 1302 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, |
1289 | &root->fs_info->delalloc_inodes); | 1303 | &root->fs_info->delalloc_inodes); |
@@ -1297,45 +1311,32 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
1297 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1311 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
1298 | */ | 1312 | */ |
1299 | static int btrfs_clear_bit_hook(struct inode *inode, | 1313 | static int btrfs_clear_bit_hook(struct inode *inode, |
1300 | struct extent_state *state, unsigned long bits) | 1314 | struct extent_state *state, int *bits) |
1301 | { | 1315 | { |
1302 | /* | 1316 | /* |
1303 | * set_bit and clear bit hooks normally require _irqsave/restore | 1317 | * set_bit and clear bit hooks normally require _irqsave/restore |
1304 | * but in this case, we are only testeing for the DELALLOC | 1318 | * but in this case, we are only testeing for the DELALLOC |
1305 | * bit, which is only set or cleared with irqs on | 1319 | * bit, which is only set or cleared with irqs on |
1306 | */ | 1320 | */ |
1307 | if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1321 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1308 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1322 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1323 | u64 len = state->end + 1 - state->start; | ||
1309 | 1324 | ||
1310 | if (bits & EXTENT_DO_ACCOUNTING) { | 1325 | if (*bits & EXTENT_FIRST_DELALLOC) |
1311 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1326 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1312 | WARN_ON(!BTRFS_I(inode)->outstanding_extents); | 1327 | else if (!(*bits & EXTENT_DO_ACCOUNTING)) |
1313 | BTRFS_I(inode)->outstanding_extents--; | 1328 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
1314 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 1329 | |
1315 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | 1330 | if (*bits & EXTENT_DO_ACCOUNTING) |
1316 | } | 1331 | btrfs_delalloc_release_metadata(inode, len); |
1332 | |||
1333 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) | ||
1334 | btrfs_free_reserved_data_space(inode, len); | ||
1317 | 1335 | ||
1318 | spin_lock(&root->fs_info->delalloc_lock); | 1336 | spin_lock(&root->fs_info->delalloc_lock); |
1319 | if (state->end - state->start + 1 > | 1337 | root->fs_info->delalloc_bytes -= len; |
1320 | root->fs_info->delalloc_bytes) { | 1338 | BTRFS_I(inode)->delalloc_bytes -= len; |
1321 | printk(KERN_INFO "btrfs warning: delalloc account " | 1339 | |
1322 | "%llu %llu\n", | ||
1323 | (unsigned long long) | ||
1324 | state->end - state->start + 1, | ||
1325 | (unsigned long long) | ||
1326 | root->fs_info->delalloc_bytes); | ||
1327 | btrfs_delalloc_free_space(root, inode, (u64)-1); | ||
1328 | root->fs_info->delalloc_bytes = 0; | ||
1329 | BTRFS_I(inode)->delalloc_bytes = 0; | ||
1330 | } else { | ||
1331 | btrfs_delalloc_free_space(root, inode, | ||
1332 | state->end - | ||
1333 | state->start + 1); | ||
1334 | root->fs_info->delalloc_bytes -= state->end - | ||
1335 | state->start + 1; | ||
1336 | BTRFS_I(inode)->delalloc_bytes -= state->end - | ||
1337 | state->start + 1; | ||
1338 | } | ||
1339 | if (BTRFS_I(inode)->delalloc_bytes == 0 && | 1340 | if (BTRFS_I(inode)->delalloc_bytes == 0 && |
1340 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1341 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
1341 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | 1342 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); |
@@ -1384,7 +1385,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
1384 | */ | 1385 | */ |
1385 | static int __btrfs_submit_bio_start(struct inode *inode, int rw, | 1386 | static int __btrfs_submit_bio_start(struct inode *inode, int rw, |
1386 | struct bio *bio, int mirror_num, | 1387 | struct bio *bio, int mirror_num, |
1387 | unsigned long bio_flags) | 1388 | unsigned long bio_flags, |
1389 | u64 bio_offset) | ||
1388 | { | 1390 | { |
1389 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1391 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1390 | int ret = 0; | 1392 | int ret = 0; |
@@ -1403,7 +1405,8 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw, | |||
1403 | * are inserted into the btree | 1405 | * are inserted into the btree |
1404 | */ | 1406 | */ |
1405 | static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | 1407 | static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, |
1406 | int mirror_num, unsigned long bio_flags) | 1408 | int mirror_num, unsigned long bio_flags, |
1409 | u64 bio_offset) | ||
1407 | { | 1410 | { |
1408 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1411 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1409 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); | 1412 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); |
@@ -1414,7 +1417,8 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
1414 | * on write, or reading the csums from the tree before a read | 1417 | * on write, or reading the csums from the tree before a read |
1415 | */ | 1418 | */ |
1416 | static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 1419 | static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
1417 | int mirror_num, unsigned long bio_flags) | 1420 | int mirror_num, unsigned long bio_flags, |
1421 | u64 bio_offset) | ||
1418 | { | 1422 | { |
1419 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1423 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1420 | int ret = 0; | 1424 | int ret = 0; |
@@ -1439,7 +1443,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1439 | /* we're doing a write, do the async checksumming */ | 1443 | /* we're doing a write, do the async checksumming */ |
1440 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 1444 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
1441 | inode, rw, bio, mirror_num, | 1445 | inode, rw, bio, mirror_num, |
1442 | bio_flags, __btrfs_submit_bio_start, | 1446 | bio_flags, bio_offset, |
1447 | __btrfs_submit_bio_start, | ||
1443 | __btrfs_submit_bio_done); | 1448 | __btrfs_submit_bio_done); |
1444 | } | 1449 | } |
1445 | 1450 | ||
@@ -1520,6 +1525,7 @@ again: | |||
1520 | goto again; | 1525 | goto again; |
1521 | } | 1526 | } |
1522 | 1527 | ||
1528 | BUG(); | ||
1523 | btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); | 1529 | btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); |
1524 | ClearPageChecked(page); | 1530 | ClearPageChecked(page); |
1525 | out: | 1531 | out: |
@@ -1650,7 +1656,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1650 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | 1656 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) |
1651 | { | 1657 | { |
1652 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1658 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1653 | struct btrfs_trans_handle *trans; | 1659 | struct btrfs_trans_handle *trans = NULL; |
1654 | struct btrfs_ordered_extent *ordered_extent = NULL; | 1660 | struct btrfs_ordered_extent *ordered_extent = NULL; |
1655 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1661 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1656 | struct extent_state *cached_state = NULL; | 1662 | struct extent_state *cached_state = NULL; |
@@ -1668,9 +1674,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1668 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1674 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1669 | if (!ret) { | 1675 | if (!ret) { |
1670 | trans = btrfs_join_transaction(root, 1); | 1676 | trans = btrfs_join_transaction(root, 1); |
1677 | btrfs_set_trans_block_group(trans, inode); | ||
1678 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
1671 | ret = btrfs_update_inode(trans, root, inode); | 1679 | ret = btrfs_update_inode(trans, root, inode); |
1672 | BUG_ON(ret); | 1680 | BUG_ON(ret); |
1673 | btrfs_end_transaction(trans, root); | ||
1674 | } | 1681 | } |
1675 | goto out; | 1682 | goto out; |
1676 | } | 1683 | } |
@@ -1680,6 +1687,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1680 | 0, &cached_state, GFP_NOFS); | 1687 | 0, &cached_state, GFP_NOFS); |
1681 | 1688 | ||
1682 | trans = btrfs_join_transaction(root, 1); | 1689 | trans = btrfs_join_transaction(root, 1); |
1690 | btrfs_set_trans_block_group(trans, inode); | ||
1691 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
1683 | 1692 | ||
1684 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1693 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
1685 | compressed = 1; | 1694 | compressed = 1; |
@@ -1711,12 +1720,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1711 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1720 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
1712 | &ordered_extent->list); | 1721 | &ordered_extent->list); |
1713 | 1722 | ||
1714 | /* this also removes the ordered extent from the tree */ | ||
1715 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1723 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1716 | ret = btrfs_update_inode(trans, root, inode); | 1724 | ret = btrfs_update_inode(trans, root, inode); |
1717 | BUG_ON(ret); | 1725 | BUG_ON(ret); |
1718 | btrfs_end_transaction(trans, root); | ||
1719 | out: | 1726 | out: |
1727 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | ||
1728 | if (trans) | ||
1729 | btrfs_end_transaction(trans, root); | ||
1720 | /* once for us */ | 1730 | /* once for us */ |
1721 | btrfs_put_ordered_extent(ordered_extent); | 1731 | btrfs_put_ordered_extent(ordered_extent); |
1722 | /* once for the tree */ | 1732 | /* once for the tree */ |
@@ -1838,7 +1848,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1838 | 1848 | ||
1839 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | 1849 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, |
1840 | failrec->last_mirror, | 1850 | failrec->last_mirror, |
1841 | failrec->bio_flags); | 1851 | failrec->bio_flags, 0); |
1842 | return 0; | 1852 | return 0; |
1843 | } | 1853 | } |
1844 | 1854 | ||
@@ -1993,32 +2003,196 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
1993 | } | 2003 | } |
1994 | 2004 | ||
1995 | /* | 2005 | /* |
2006 | * calculate extra metadata reservation when snapshotting a subvolume | ||
2007 | * contains orphan files. | ||
2008 | */ | ||
2009 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2010 | struct btrfs_pending_snapshot *pending, | ||
2011 | u64 *bytes_to_reserve) | ||
2012 | { | ||
2013 | struct btrfs_root *root; | ||
2014 | struct btrfs_block_rsv *block_rsv; | ||
2015 | u64 num_bytes; | ||
2016 | int index; | ||
2017 | |||
2018 | root = pending->root; | ||
2019 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
2020 | return; | ||
2021 | |||
2022 | block_rsv = root->orphan_block_rsv; | ||
2023 | |||
2024 | /* orphan block reservation for the snapshot */ | ||
2025 | num_bytes = block_rsv->size; | ||
2026 | |||
2027 | /* | ||
2028 | * after the snapshot is created, COWing tree blocks may use more | ||
2029 | * space than it frees. So we should make sure there is enough | ||
2030 | * reserved space. | ||
2031 | */ | ||
2032 | index = trans->transid & 0x1; | ||
2033 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
2034 | num_bytes += block_rsv->size - | ||
2035 | (block_rsv->reserved + block_rsv->freed[index]); | ||
2036 | } | ||
2037 | |||
2038 | *bytes_to_reserve += num_bytes; | ||
2039 | } | ||
2040 | |||
2041 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
2042 | struct btrfs_pending_snapshot *pending) | ||
2043 | { | ||
2044 | struct btrfs_root *root = pending->root; | ||
2045 | struct btrfs_root *snap = pending->snap; | ||
2046 | struct btrfs_block_rsv *block_rsv; | ||
2047 | u64 num_bytes; | ||
2048 | int index; | ||
2049 | int ret; | ||
2050 | |||
2051 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
2052 | return; | ||
2053 | |||
2054 | /* refill source subvolume's orphan block reservation */ | ||
2055 | block_rsv = root->orphan_block_rsv; | ||
2056 | index = trans->transid & 0x1; | ||
2057 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
2058 | num_bytes = block_rsv->size - | ||
2059 | (block_rsv->reserved + block_rsv->freed[index]); | ||
2060 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
2061 | root->orphan_block_rsv, | ||
2062 | num_bytes); | ||
2063 | BUG_ON(ret); | ||
2064 | } | ||
2065 | |||
2066 | /* setup orphan block reservation for the snapshot */ | ||
2067 | block_rsv = btrfs_alloc_block_rsv(snap); | ||
2068 | BUG_ON(!block_rsv); | ||
2069 | |||
2070 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
2071 | snap->orphan_block_rsv = block_rsv; | ||
2072 | |||
2073 | num_bytes = root->orphan_block_rsv->size; | ||
2074 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
2075 | block_rsv, num_bytes); | ||
2076 | BUG_ON(ret); | ||
2077 | |||
2078 | #if 0 | ||
2079 | /* insert orphan item for the snapshot */ | ||
2080 | WARN_ON(!root->orphan_item_inserted); | ||
2081 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
2082 | snap->root_key.objectid); | ||
2083 | BUG_ON(ret); | ||
2084 | snap->orphan_item_inserted = 1; | ||
2085 | #endif | ||
2086 | } | ||
2087 | |||
2088 | enum btrfs_orphan_cleanup_state { | ||
2089 | ORPHAN_CLEANUP_STARTED = 1, | ||
2090 | ORPHAN_CLEANUP_DONE = 2, | ||
2091 | }; | ||
2092 | |||
2093 | /* | ||
2094 | * This is called in transaction commmit time. If there are no orphan | ||
2095 | * files in the subvolume, it removes orphan item and frees block_rsv | ||
2096 | * structure. | ||
2097 | */ | ||
2098 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | ||
2099 | struct btrfs_root *root) | ||
2100 | { | ||
2101 | int ret; | ||
2102 | |||
2103 | if (!list_empty(&root->orphan_list) || | ||
2104 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) | ||
2105 | return; | ||
2106 | |||
2107 | if (root->orphan_item_inserted && | ||
2108 | btrfs_root_refs(&root->root_item) > 0) { | ||
2109 | ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, | ||
2110 | root->root_key.objectid); | ||
2111 | BUG_ON(ret); | ||
2112 | root->orphan_item_inserted = 0; | ||
2113 | } | ||
2114 | |||
2115 | if (root->orphan_block_rsv) { | ||
2116 | WARN_ON(root->orphan_block_rsv->size > 0); | ||
2117 | btrfs_free_block_rsv(root, root->orphan_block_rsv); | ||
2118 | root->orphan_block_rsv = NULL; | ||
2119 | } | ||
2120 | } | ||
2121 | |||
2122 | /* | ||
1996 | * This creates an orphan entry for the given inode in case something goes | 2123 | * This creates an orphan entry for the given inode in case something goes |
1997 | * wrong in the middle of an unlink/truncate. | 2124 | * wrong in the middle of an unlink/truncate. |
2125 | * | ||
2126 | * NOTE: caller of this function should reserve 5 units of metadata for | ||
2127 | * this function. | ||
1998 | */ | 2128 | */ |
1999 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | 2129 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) |
2000 | { | 2130 | { |
2001 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2131 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2002 | int ret = 0; | 2132 | struct btrfs_block_rsv *block_rsv = NULL; |
2133 | int reserve = 0; | ||
2134 | int insert = 0; | ||
2135 | int ret; | ||
2003 | 2136 | ||
2004 | spin_lock(&root->list_lock); | 2137 | if (!root->orphan_block_rsv) { |
2138 | block_rsv = btrfs_alloc_block_rsv(root); | ||
2139 | BUG_ON(!block_rsv); | ||
2140 | } | ||
2005 | 2141 | ||
2006 | /* already on the orphan list, we're good */ | 2142 | spin_lock(&root->orphan_lock); |
2007 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 2143 | if (!root->orphan_block_rsv) { |
2008 | spin_unlock(&root->list_lock); | 2144 | root->orphan_block_rsv = block_rsv; |
2009 | return 0; | 2145 | } else if (block_rsv) { |
2146 | btrfs_free_block_rsv(root, block_rsv); | ||
2147 | block_rsv = NULL; | ||
2148 | } | ||
2149 | |||
2150 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | ||
2151 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | ||
2152 | #if 0 | ||
2153 | /* | ||
2154 | * For proper ENOSPC handling, we should do orphan | ||
2155 | * cleanup when mounting. But this introduces backward | ||
2156 | * compatibility issue. | ||
2157 | */ | ||
2158 | if (!xchg(&root->orphan_item_inserted, 1)) | ||
2159 | insert = 2; | ||
2160 | else | ||
2161 | insert = 1; | ||
2162 | #endif | ||
2163 | insert = 1; | ||
2164 | } else { | ||
2165 | WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved); | ||
2010 | } | 2166 | } |
2011 | 2167 | ||
2012 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2168 | if (!BTRFS_I(inode)->orphan_meta_reserved) { |
2169 | BTRFS_I(inode)->orphan_meta_reserved = 1; | ||
2170 | reserve = 1; | ||
2171 | } | ||
2172 | spin_unlock(&root->orphan_lock); | ||
2013 | 2173 | ||
2014 | spin_unlock(&root->list_lock); | 2174 | if (block_rsv) |
2175 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
2015 | 2176 | ||
2016 | /* | 2177 | /* grab metadata reservation from transaction handle */ |
2017 | * insert an orphan item to track this unlinked/truncated file | 2178 | if (reserve) { |
2018 | */ | 2179 | ret = btrfs_orphan_reserve_metadata(trans, inode); |
2019 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | 2180 | BUG_ON(ret); |
2181 | } | ||
2020 | 2182 | ||
2021 | return ret; | 2183 | /* insert an orphan item to track this unlinked/truncated file */ |
2184 | if (insert >= 1) { | ||
2185 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | ||
2186 | BUG_ON(ret); | ||
2187 | } | ||
2188 | |||
2189 | /* insert an orphan item to track subvolume contains orphan files */ | ||
2190 | if (insert >= 2) { | ||
2191 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
2192 | root->root_key.objectid); | ||
2193 | BUG_ON(ret); | ||
2194 | } | ||
2195 | return 0; | ||
2022 | } | 2196 | } |
2023 | 2197 | ||
2024 | /* | 2198 | /* |
@@ -2028,26 +2202,31 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2028 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | 2202 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) |
2029 | { | 2203 | { |
2030 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2204 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2205 | int delete_item = 0; | ||
2206 | int release_rsv = 0; | ||
2031 | int ret = 0; | 2207 | int ret = 0; |
2032 | 2208 | ||
2033 | spin_lock(&root->list_lock); | 2209 | spin_lock(&root->orphan_lock); |
2034 | 2210 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | |
2035 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | 2211 | list_del_init(&BTRFS_I(inode)->i_orphan); |
2036 | spin_unlock(&root->list_lock); | 2212 | delete_item = 1; |
2037 | return 0; | ||
2038 | } | 2213 | } |
2039 | 2214 | ||
2040 | list_del_init(&BTRFS_I(inode)->i_orphan); | 2215 | if (BTRFS_I(inode)->orphan_meta_reserved) { |
2041 | if (!trans) { | 2216 | BTRFS_I(inode)->orphan_meta_reserved = 0; |
2042 | spin_unlock(&root->list_lock); | 2217 | release_rsv = 1; |
2043 | return 0; | ||
2044 | } | 2218 | } |
2219 | spin_unlock(&root->orphan_lock); | ||
2045 | 2220 | ||
2046 | spin_unlock(&root->list_lock); | 2221 | if (trans && delete_item) { |
2222 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | ||
2223 | BUG_ON(ret); | ||
2224 | } | ||
2047 | 2225 | ||
2048 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | 2226 | if (release_rsv) |
2227 | btrfs_orphan_release_metadata(inode); | ||
2049 | 2228 | ||
2050 | return ret; | 2229 | return 0; |
2051 | } | 2230 | } |
2052 | 2231 | ||
2053 | /* | 2232 | /* |
@@ -2064,7 +2243,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2064 | struct inode *inode; | 2243 | struct inode *inode; |
2065 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2244 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
2066 | 2245 | ||
2067 | if (!xchg(&root->clean_orphans, 0)) | 2246 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) |
2068 | return; | 2247 | return; |
2069 | 2248 | ||
2070 | path = btrfs_alloc_path(); | 2249 | path = btrfs_alloc_path(); |
@@ -2117,16 +2296,15 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2117 | found_key.type = BTRFS_INODE_ITEM_KEY; | 2296 | found_key.type = BTRFS_INODE_ITEM_KEY; |
2118 | found_key.offset = 0; | 2297 | found_key.offset = 0; |
2119 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 2298 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
2120 | if (IS_ERR(inode)) | 2299 | BUG_ON(IS_ERR(inode)); |
2121 | break; | ||
2122 | 2300 | ||
2123 | /* | 2301 | /* |
2124 | * add this inode to the orphan list so btrfs_orphan_del does | 2302 | * add this inode to the orphan list so btrfs_orphan_del does |
2125 | * the proper thing when we hit it | 2303 | * the proper thing when we hit it |
2126 | */ | 2304 | */ |
2127 | spin_lock(&root->list_lock); | 2305 | spin_lock(&root->orphan_lock); |
2128 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2306 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); |
2129 | spin_unlock(&root->list_lock); | 2307 | spin_unlock(&root->orphan_lock); |
2130 | 2308 | ||
2131 | /* | 2309 | /* |
2132 | * if this is a bad inode, means we actually succeeded in | 2310 | * if this is a bad inode, means we actually succeeded in |
@@ -2135,7 +2313,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2135 | * do a destroy_inode | 2313 | * do a destroy_inode |
2136 | */ | 2314 | */ |
2137 | if (is_bad_inode(inode)) { | 2315 | if (is_bad_inode(inode)) { |
2138 | trans = btrfs_start_transaction(root, 1); | 2316 | trans = btrfs_start_transaction(root, 0); |
2139 | btrfs_orphan_del(trans, inode); | 2317 | btrfs_orphan_del(trans, inode); |
2140 | btrfs_end_transaction(trans, root); | 2318 | btrfs_end_transaction(trans, root); |
2141 | iput(inode); | 2319 | iput(inode); |
@@ -2153,13 +2331,23 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2153 | /* this will do delete_inode and everything for us */ | 2331 | /* this will do delete_inode and everything for us */ |
2154 | iput(inode); | 2332 | iput(inode); |
2155 | } | 2333 | } |
2334 | btrfs_free_path(path); | ||
2335 | |||
2336 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; | ||
2337 | |||
2338 | if (root->orphan_block_rsv) | ||
2339 | btrfs_block_rsv_release(root, root->orphan_block_rsv, | ||
2340 | (u64)-1); | ||
2341 | |||
2342 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | ||
2343 | trans = btrfs_join_transaction(root, 1); | ||
2344 | btrfs_end_transaction(trans, root); | ||
2345 | } | ||
2156 | 2346 | ||
2157 | if (nr_unlink) | 2347 | if (nr_unlink) |
2158 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); | 2348 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); |
2159 | if (nr_truncate) | 2349 | if (nr_truncate) |
2160 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); | 2350 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); |
2161 | |||
2162 | btrfs_free_path(path); | ||
2163 | } | 2351 | } |
2164 | 2352 | ||
2165 | /* | 2353 | /* |
@@ -2478,29 +2666,201 @@ out: | |||
2478 | return ret; | 2666 | return ret; |
2479 | } | 2667 | } |
2480 | 2668 | ||
2481 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | 2669 | /* helper to check if there is any shared block in the path */ |
2670 | static int check_path_shared(struct btrfs_root *root, | ||
2671 | struct btrfs_path *path) | ||
2672 | { | ||
2673 | struct extent_buffer *eb; | ||
2674 | int level; | ||
2675 | int ret; | ||
2676 | u64 refs; | ||
2677 | |||
2678 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
2679 | if (!path->nodes[level]) | ||
2680 | break; | ||
2681 | eb = path->nodes[level]; | ||
2682 | if (!btrfs_block_can_be_shared(root, eb)) | ||
2683 | continue; | ||
2684 | ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len, | ||
2685 | &refs, NULL); | ||
2686 | if (refs > 1) | ||
2687 | return 1; | ||
2688 | } | ||
2689 | return 0; | ||
2690 | } | ||
2691 | |||
2692 | /* | ||
2693 | * helper to start transaction for unlink and rmdir. | ||
2694 | * | ||
2695 | * unlink and rmdir are special in btrfs, they do not always free space. | ||
2696 | * so in enospc case, we should make sure they will free space before | ||
2697 | * allowing them to use the global metadata reservation. | ||
2698 | */ | ||
2699 | static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | ||
2700 | struct dentry *dentry) | ||
2482 | { | 2701 | { |
2483 | struct btrfs_root *root; | ||
2484 | struct btrfs_trans_handle *trans; | 2702 | struct btrfs_trans_handle *trans; |
2703 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
2704 | struct btrfs_path *path; | ||
2705 | struct btrfs_inode_ref *ref; | ||
2706 | struct btrfs_dir_item *di; | ||
2485 | struct inode *inode = dentry->d_inode; | 2707 | struct inode *inode = dentry->d_inode; |
2708 | u64 index; | ||
2709 | int check_link = 1; | ||
2710 | int err = -ENOSPC; | ||
2486 | int ret; | 2711 | int ret; |
2487 | unsigned long nr = 0; | ||
2488 | 2712 | ||
2489 | root = BTRFS_I(dir)->root; | 2713 | trans = btrfs_start_transaction(root, 10); |
2714 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | ||
2715 | return trans; | ||
2490 | 2716 | ||
2491 | /* | 2717 | if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
2492 | * 5 items for unlink inode | 2718 | return ERR_PTR(-ENOSPC); |
2493 | * 1 for orphan | 2719 | |
2494 | */ | 2720 | /* check if there is someone else holds reference */ |
2495 | ret = btrfs_reserve_metadata_space(root, 6); | 2721 | if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1) |
2496 | if (ret) | 2722 | return ERR_PTR(-ENOSPC); |
2497 | return ret; | 2723 | |
2724 | if (atomic_read(&inode->i_count) > 2) | ||
2725 | return ERR_PTR(-ENOSPC); | ||
2726 | |||
2727 | if (xchg(&root->fs_info->enospc_unlink, 1)) | ||
2728 | return ERR_PTR(-ENOSPC); | ||
2498 | 2729 | ||
2499 | trans = btrfs_start_transaction(root, 1); | 2730 | path = btrfs_alloc_path(); |
2731 | if (!path) { | ||
2732 | root->fs_info->enospc_unlink = 0; | ||
2733 | return ERR_PTR(-ENOMEM); | ||
2734 | } | ||
2735 | |||
2736 | trans = btrfs_start_transaction(root, 0); | ||
2500 | if (IS_ERR(trans)) { | 2737 | if (IS_ERR(trans)) { |
2501 | btrfs_unreserve_metadata_space(root, 6); | 2738 | btrfs_free_path(path); |
2502 | return PTR_ERR(trans); | 2739 | root->fs_info->enospc_unlink = 0; |
2740 | return trans; | ||
2741 | } | ||
2742 | |||
2743 | path->skip_locking = 1; | ||
2744 | path->search_commit_root = 1; | ||
2745 | |||
2746 | ret = btrfs_lookup_inode(trans, root, path, | ||
2747 | &BTRFS_I(dir)->location, 0); | ||
2748 | if (ret < 0) { | ||
2749 | err = ret; | ||
2750 | goto out; | ||
2751 | } | ||
2752 | if (ret == 0) { | ||
2753 | if (check_path_shared(root, path)) | ||
2754 | goto out; | ||
2755 | } else { | ||
2756 | check_link = 0; | ||
2757 | } | ||
2758 | btrfs_release_path(root, path); | ||
2759 | |||
2760 | ret = btrfs_lookup_inode(trans, root, path, | ||
2761 | &BTRFS_I(inode)->location, 0); | ||
2762 | if (ret < 0) { | ||
2763 | err = ret; | ||
2764 | goto out; | ||
2765 | } | ||
2766 | if (ret == 0) { | ||
2767 | if (check_path_shared(root, path)) | ||
2768 | goto out; | ||
2769 | } else { | ||
2770 | check_link = 0; | ||
2771 | } | ||
2772 | btrfs_release_path(root, path); | ||
2773 | |||
2774 | if (ret == 0 && S_ISREG(inode->i_mode)) { | ||
2775 | ret = btrfs_lookup_file_extent(trans, root, path, | ||
2776 | inode->i_ino, (u64)-1, 0); | ||
2777 | if (ret < 0) { | ||
2778 | err = ret; | ||
2779 | goto out; | ||
2780 | } | ||
2781 | BUG_ON(ret == 0); | ||
2782 | if (check_path_shared(root, path)) | ||
2783 | goto out; | ||
2784 | btrfs_release_path(root, path); | ||
2785 | } | ||
2786 | |||
2787 | if (!check_link) { | ||
2788 | err = 0; | ||
2789 | goto out; | ||
2790 | } | ||
2791 | |||
2792 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
2793 | dentry->d_name.name, dentry->d_name.len, 0); | ||
2794 | if (IS_ERR(di)) { | ||
2795 | err = PTR_ERR(di); | ||
2796 | goto out; | ||
2797 | } | ||
2798 | if (di) { | ||
2799 | if (check_path_shared(root, path)) | ||
2800 | goto out; | ||
2801 | } else { | ||
2802 | err = 0; | ||
2803 | goto out; | ||
2503 | } | 2804 | } |
2805 | btrfs_release_path(root, path); | ||
2806 | |||
2807 | ref = btrfs_lookup_inode_ref(trans, root, path, | ||
2808 | dentry->d_name.name, dentry->d_name.len, | ||
2809 | inode->i_ino, dir->i_ino, 0); | ||
2810 | if (IS_ERR(ref)) { | ||
2811 | err = PTR_ERR(ref); | ||
2812 | goto out; | ||
2813 | } | ||
2814 | BUG_ON(!ref); | ||
2815 | if (check_path_shared(root, path)) | ||
2816 | goto out; | ||
2817 | index = btrfs_inode_ref_index(path->nodes[0], ref); | ||
2818 | btrfs_release_path(root, path); | ||
2819 | |||
2820 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, | ||
2821 | dentry->d_name.name, dentry->d_name.len, 0); | ||
2822 | if (IS_ERR(di)) { | ||
2823 | err = PTR_ERR(di); | ||
2824 | goto out; | ||
2825 | } | ||
2826 | BUG_ON(ret == -ENOENT); | ||
2827 | if (check_path_shared(root, path)) | ||
2828 | goto out; | ||
2829 | |||
2830 | err = 0; | ||
2831 | out: | ||
2832 | btrfs_free_path(path); | ||
2833 | if (err) { | ||
2834 | btrfs_end_transaction(trans, root); | ||
2835 | root->fs_info->enospc_unlink = 0; | ||
2836 | return ERR_PTR(err); | ||
2837 | } | ||
2838 | |||
2839 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
2840 | return trans; | ||
2841 | } | ||
2842 | |||
2843 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, | ||
2844 | struct btrfs_root *root) | ||
2845 | { | ||
2846 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { | ||
2847 | BUG_ON(!root->fs_info->enospc_unlink); | ||
2848 | root->fs_info->enospc_unlink = 0; | ||
2849 | } | ||
2850 | btrfs_end_transaction_throttle(trans, root); | ||
2851 | } | ||
2852 | |||
2853 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | ||
2854 | { | ||
2855 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
2856 | struct btrfs_trans_handle *trans; | ||
2857 | struct inode *inode = dentry->d_inode; | ||
2858 | int ret; | ||
2859 | unsigned long nr = 0; | ||
2860 | |||
2861 | trans = __unlink_start_trans(dir, dentry); | ||
2862 | if (IS_ERR(trans)) | ||
2863 | return PTR_ERR(trans); | ||
2504 | 2864 | ||
2505 | btrfs_set_trans_block_group(trans, dir); | 2865 | btrfs_set_trans_block_group(trans, dir); |
2506 | 2866 | ||
@@ -2508,14 +2868,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
2508 | 2868 | ||
2509 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 2869 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
2510 | dentry->d_name.name, dentry->d_name.len); | 2870 | dentry->d_name.name, dentry->d_name.len); |
2871 | BUG_ON(ret); | ||
2511 | 2872 | ||
2512 | if (inode->i_nlink == 0) | 2873 | if (inode->i_nlink == 0) { |
2513 | ret = btrfs_orphan_add(trans, inode); | 2874 | ret = btrfs_orphan_add(trans, inode); |
2875 | BUG_ON(ret); | ||
2876 | } | ||
2514 | 2877 | ||
2515 | nr = trans->blocks_used; | 2878 | nr = trans->blocks_used; |
2516 | 2879 | __unlink_end_trans(trans, root); | |
2517 | btrfs_end_transaction_throttle(trans, root); | ||
2518 | btrfs_unreserve_metadata_space(root, 6); | ||
2519 | btrfs_btree_balance_dirty(root, nr); | 2880 | btrfs_btree_balance_dirty(root, nr); |
2520 | return ret; | 2881 | return ret; |
2521 | } | 2882 | } |
@@ -2587,7 +2948,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2587 | { | 2948 | { |
2588 | struct inode *inode = dentry->d_inode; | 2949 | struct inode *inode = dentry->d_inode; |
2589 | int err = 0; | 2950 | int err = 0; |
2590 | int ret; | ||
2591 | struct btrfs_root *root = BTRFS_I(dir)->root; | 2951 | struct btrfs_root *root = BTRFS_I(dir)->root; |
2592 | struct btrfs_trans_handle *trans; | 2952 | struct btrfs_trans_handle *trans; |
2593 | unsigned long nr = 0; | 2953 | unsigned long nr = 0; |
@@ -2596,15 +2956,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2596 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 2956 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
2597 | return -ENOTEMPTY; | 2957 | return -ENOTEMPTY; |
2598 | 2958 | ||
2599 | ret = btrfs_reserve_metadata_space(root, 5); | 2959 | trans = __unlink_start_trans(dir, dentry); |
2600 | if (ret) | 2960 | if (IS_ERR(trans)) |
2601 | return ret; | ||
2602 | |||
2603 | trans = btrfs_start_transaction(root, 1); | ||
2604 | if (IS_ERR(trans)) { | ||
2605 | btrfs_unreserve_metadata_space(root, 5); | ||
2606 | return PTR_ERR(trans); | 2961 | return PTR_ERR(trans); |
2607 | } | ||
2608 | 2962 | ||
2609 | btrfs_set_trans_block_group(trans, dir); | 2963 | btrfs_set_trans_block_group(trans, dir); |
2610 | 2964 | ||
@@ -2627,12 +2981,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2627 | btrfs_i_size_write(inode, 0); | 2981 | btrfs_i_size_write(inode, 0); |
2628 | out: | 2982 | out: |
2629 | nr = trans->blocks_used; | 2983 | nr = trans->blocks_used; |
2630 | ret = btrfs_end_transaction_throttle(trans, root); | 2984 | __unlink_end_trans(trans, root); |
2631 | btrfs_unreserve_metadata_space(root, 5); | ||
2632 | btrfs_btree_balance_dirty(root, nr); | 2985 | btrfs_btree_balance_dirty(root, nr); |
2633 | 2986 | ||
2634 | if (ret && !err) | ||
2635 | err = ret; | ||
2636 | return err; | 2987 | return err; |
2637 | } | 2988 | } |
2638 | 2989 | ||
@@ -3029,6 +3380,7 @@ out: | |||
3029 | if (pending_del_nr) { | 3380 | if (pending_del_nr) { |
3030 | ret = btrfs_del_items(trans, root, path, pending_del_slot, | 3381 | ret = btrfs_del_items(trans, root, path, pending_del_slot, |
3031 | pending_del_nr); | 3382 | pending_del_nr); |
3383 | BUG_ON(ret); | ||
3032 | } | 3384 | } |
3033 | btrfs_free_path(path); | 3385 | btrfs_free_path(path); |
3034 | return err; | 3386 | return err; |
@@ -3056,11 +3408,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3056 | 3408 | ||
3057 | if ((offset & (blocksize - 1)) == 0) | 3409 | if ((offset & (blocksize - 1)) == 0) |
3058 | goto out; | 3410 | goto out; |
3059 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 3411 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
3060 | if (ret) | ||
3061 | goto out; | ||
3062 | |||
3063 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
3064 | if (ret) | 3412 | if (ret) |
3065 | goto out; | 3413 | goto out; |
3066 | 3414 | ||
@@ -3068,8 +3416,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3068 | again: | 3416 | again: |
3069 | page = grab_cache_page(mapping, index); | 3417 | page = grab_cache_page(mapping, index); |
3070 | if (!page) { | 3418 | if (!page) { |
3071 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | 3419 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3072 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
3073 | goto out; | 3420 | goto out; |
3074 | } | 3421 | } |
3075 | 3422 | ||
@@ -3132,8 +3479,7 @@ again: | |||
3132 | 3479 | ||
3133 | out_unlock: | 3480 | out_unlock: |
3134 | if (ret) | 3481 | if (ret) |
3135 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | 3482 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3136 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
3137 | unlock_page(page); | 3483 | unlock_page(page); |
3138 | page_cache_release(page); | 3484 | page_cache_release(page); |
3139 | out: | 3485 | out: |
@@ -3145,7 +3491,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3145 | struct btrfs_trans_handle *trans; | 3491 | struct btrfs_trans_handle *trans; |
3146 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3492 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3147 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3493 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
3148 | struct extent_map *em; | 3494 | struct extent_map *em = NULL; |
3149 | struct extent_state *cached_state = NULL; | 3495 | struct extent_state *cached_state = NULL; |
3150 | u64 mask = root->sectorsize - 1; | 3496 | u64 mask = root->sectorsize - 1; |
3151 | u64 hole_start = (inode->i_size + mask) & ~mask; | 3497 | u64 hole_start = (inode->i_size + mask) & ~mask; |
@@ -3183,11 +3529,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3183 | u64 hint_byte = 0; | 3529 | u64 hint_byte = 0; |
3184 | hole_size = last_byte - cur_offset; | 3530 | hole_size = last_byte - cur_offset; |
3185 | 3531 | ||
3186 | err = btrfs_reserve_metadata_space(root, 2); | 3532 | trans = btrfs_start_transaction(root, 2); |
3187 | if (err) | 3533 | if (IS_ERR(trans)) { |
3534 | err = PTR_ERR(trans); | ||
3188 | break; | 3535 | break; |
3189 | 3536 | } | |
3190 | trans = btrfs_start_transaction(root, 1); | ||
3191 | btrfs_set_trans_block_group(trans, inode); | 3537 | btrfs_set_trans_block_group(trans, inode); |
3192 | 3538 | ||
3193 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3539 | err = btrfs_drop_extents(trans, inode, cur_offset, |
@@ -3205,14 +3551,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3205 | last_byte - 1, 0); | 3551 | last_byte - 1, 0); |
3206 | 3552 | ||
3207 | btrfs_end_transaction(trans, root); | 3553 | btrfs_end_transaction(trans, root); |
3208 | btrfs_unreserve_metadata_space(root, 2); | ||
3209 | } | 3554 | } |
3210 | free_extent_map(em); | 3555 | free_extent_map(em); |
3556 | em = NULL; | ||
3211 | cur_offset = last_byte; | 3557 | cur_offset = last_byte; |
3212 | if (cur_offset >= block_end) | 3558 | if (cur_offset >= block_end) |
3213 | break; | 3559 | break; |
3214 | } | 3560 | } |
3215 | 3561 | ||
3562 | free_extent_map(em); | ||
3216 | unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, | 3563 | unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, |
3217 | GFP_NOFS); | 3564 | GFP_NOFS); |
3218 | return err; | 3565 | return err; |
@@ -3239,11 +3586,10 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3239 | } | 3586 | } |
3240 | } | 3587 | } |
3241 | 3588 | ||
3242 | ret = btrfs_reserve_metadata_space(root, 1); | 3589 | trans = btrfs_start_transaction(root, 5); |
3243 | if (ret) | 3590 | if (IS_ERR(trans)) |
3244 | return ret; | 3591 | return PTR_ERR(trans); |
3245 | 3592 | ||
3246 | trans = btrfs_start_transaction(root, 1); | ||
3247 | btrfs_set_trans_block_group(trans, inode); | 3593 | btrfs_set_trans_block_group(trans, inode); |
3248 | 3594 | ||
3249 | ret = btrfs_orphan_add(trans, inode); | 3595 | ret = btrfs_orphan_add(trans, inode); |
@@ -3251,7 +3597,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3251 | 3597 | ||
3252 | nr = trans->blocks_used; | 3598 | nr = trans->blocks_used; |
3253 | btrfs_end_transaction(trans, root); | 3599 | btrfs_end_transaction(trans, root); |
3254 | btrfs_unreserve_metadata_space(root, 1); | ||
3255 | btrfs_btree_balance_dirty(root, nr); | 3600 | btrfs_btree_balance_dirty(root, nr); |
3256 | 3601 | ||
3257 | if (attr->ia_size > inode->i_size) { | 3602 | if (attr->ia_size > inode->i_size) { |
@@ -3264,8 +3609,11 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3264 | i_size_write(inode, attr->ia_size); | 3609 | i_size_write(inode, attr->ia_size); |
3265 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 3610 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
3266 | 3611 | ||
3267 | trans = btrfs_start_transaction(root, 1); | 3612 | trans = btrfs_start_transaction(root, 0); |
3613 | BUG_ON(IS_ERR(trans)); | ||
3268 | btrfs_set_trans_block_group(trans, inode); | 3614 | btrfs_set_trans_block_group(trans, inode); |
3615 | trans->block_rsv = root->orphan_block_rsv; | ||
3616 | BUG_ON(!trans->block_rsv); | ||
3269 | 3617 | ||
3270 | ret = btrfs_update_inode(trans, root, inode); | 3618 | ret = btrfs_update_inode(trans, root, inode); |
3271 | BUG_ON(ret); | 3619 | BUG_ON(ret); |
@@ -3345,10 +3693,21 @@ void btrfs_delete_inode(struct inode *inode) | |||
3345 | btrfs_i_size_write(inode, 0); | 3693 | btrfs_i_size_write(inode, 0); |
3346 | 3694 | ||
3347 | while (1) { | 3695 | while (1) { |
3348 | trans = btrfs_start_transaction(root, 1); | 3696 | trans = btrfs_start_transaction(root, 0); |
3697 | BUG_ON(IS_ERR(trans)); | ||
3349 | btrfs_set_trans_block_group(trans, inode); | 3698 | btrfs_set_trans_block_group(trans, inode); |
3350 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | 3699 | trans->block_rsv = root->orphan_block_rsv; |
3700 | |||
3701 | ret = btrfs_block_rsv_check(trans, root, | ||
3702 | root->orphan_block_rsv, 0, 5); | ||
3703 | if (ret) { | ||
3704 | BUG_ON(ret != -EAGAIN); | ||
3705 | ret = btrfs_commit_transaction(trans, root); | ||
3706 | BUG_ON(ret); | ||
3707 | continue; | ||
3708 | } | ||
3351 | 3709 | ||
3710 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | ||
3352 | if (ret != -EAGAIN) | 3711 | if (ret != -EAGAIN) |
3353 | break; | 3712 | break; |
3354 | 3713 | ||
@@ -3356,6 +3715,7 @@ void btrfs_delete_inode(struct inode *inode) | |||
3356 | btrfs_end_transaction(trans, root); | 3715 | btrfs_end_transaction(trans, root); |
3357 | trans = NULL; | 3716 | trans = NULL; |
3358 | btrfs_btree_balance_dirty(root, nr); | 3717 | btrfs_btree_balance_dirty(root, nr); |
3718 | |||
3359 | } | 3719 | } |
3360 | 3720 | ||
3361 | if (ret == 0) { | 3721 | if (ret == 0) { |
@@ -3596,40 +3956,10 @@ again: | |||
3596 | return 0; | 3956 | return 0; |
3597 | } | 3957 | } |
3598 | 3958 | ||
3599 | static noinline void init_btrfs_i(struct inode *inode) | ||
3600 | { | ||
3601 | struct btrfs_inode *bi = BTRFS_I(inode); | ||
3602 | |||
3603 | bi->generation = 0; | ||
3604 | bi->sequence = 0; | ||
3605 | bi->last_trans = 0; | ||
3606 | bi->last_sub_trans = 0; | ||
3607 | bi->logged_trans = 0; | ||
3608 | bi->delalloc_bytes = 0; | ||
3609 | bi->reserved_bytes = 0; | ||
3610 | bi->disk_i_size = 0; | ||
3611 | bi->flags = 0; | ||
3612 | bi->index_cnt = (u64)-1; | ||
3613 | bi->last_unlink_trans = 0; | ||
3614 | bi->ordered_data_close = 0; | ||
3615 | bi->force_compress = 0; | ||
3616 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); | ||
3617 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, | ||
3618 | inode->i_mapping, GFP_NOFS); | ||
3619 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, | ||
3620 | inode->i_mapping, GFP_NOFS); | ||
3621 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); | ||
3622 | INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); | ||
3623 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | ||
3624 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | ||
3625 | mutex_init(&BTRFS_I(inode)->log_mutex); | ||
3626 | } | ||
3627 | |||
3628 | static int btrfs_init_locked_inode(struct inode *inode, void *p) | 3959 | static int btrfs_init_locked_inode(struct inode *inode, void *p) |
3629 | { | 3960 | { |
3630 | struct btrfs_iget_args *args = p; | 3961 | struct btrfs_iget_args *args = p; |
3631 | inode->i_ino = args->ino; | 3962 | inode->i_ino = args->ino; |
3632 | init_btrfs_i(inode); | ||
3633 | BTRFS_I(inode)->root = args->root; | 3963 | BTRFS_I(inode)->root = args->root; |
3634 | btrfs_set_inode_space_info(args->root, inode); | 3964 | btrfs_set_inode_space_info(args->root, inode); |
3635 | return 0; | 3965 | return 0; |
@@ -3692,8 +4022,6 @@ static struct inode *new_simple_dir(struct super_block *s, | |||
3692 | if (!inode) | 4022 | if (!inode) |
3693 | return ERR_PTR(-ENOMEM); | 4023 | return ERR_PTR(-ENOMEM); |
3694 | 4024 | ||
3695 | init_btrfs_i(inode); | ||
3696 | |||
3697 | BTRFS_I(inode)->root = root; | 4025 | BTRFS_I(inode)->root = root; |
3698 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); | 4026 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); |
3699 | BTRFS_I(inode)->dummy_inode = 1; | 4027 | BTRFS_I(inode)->dummy_inode = 1; |
@@ -3950,7 +4278,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
3950 | struct btrfs_trans_handle *trans; | 4278 | struct btrfs_trans_handle *trans; |
3951 | int ret = 0; | 4279 | int ret = 0; |
3952 | 4280 | ||
3953 | if (root->fs_info->btree_inode == inode) | 4281 | if (BTRFS_I(inode)->dummy_inode) |
3954 | return 0; | 4282 | return 0; |
3955 | 4283 | ||
3956 | if (wbc->sync_mode == WB_SYNC_ALL) { | 4284 | if (wbc->sync_mode == WB_SYNC_ALL) { |
@@ -3971,10 +4299,38 @@ void btrfs_dirty_inode(struct inode *inode) | |||
3971 | { | 4299 | { |
3972 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4300 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3973 | struct btrfs_trans_handle *trans; | 4301 | struct btrfs_trans_handle *trans; |
4302 | int ret; | ||
4303 | |||
4304 | if (BTRFS_I(inode)->dummy_inode) | ||
4305 | return; | ||
3974 | 4306 | ||
3975 | trans = btrfs_join_transaction(root, 1); | 4307 | trans = btrfs_join_transaction(root, 1); |
3976 | btrfs_set_trans_block_group(trans, inode); | 4308 | btrfs_set_trans_block_group(trans, inode); |
3977 | btrfs_update_inode(trans, root, inode); | 4309 | |
4310 | ret = btrfs_update_inode(trans, root, inode); | ||
4311 | if (ret && ret == -ENOSPC) { | ||
4312 | /* whoops, lets try again with the full transaction */ | ||
4313 | btrfs_end_transaction(trans, root); | ||
4314 | trans = btrfs_start_transaction(root, 1); | ||
4315 | if (IS_ERR(trans)) { | ||
4316 | if (printk_ratelimit()) { | ||
4317 | printk(KERN_ERR "btrfs: fail to " | ||
4318 | "dirty inode %lu error %ld\n", | ||
4319 | inode->i_ino, PTR_ERR(trans)); | ||
4320 | } | ||
4321 | return; | ||
4322 | } | ||
4323 | btrfs_set_trans_block_group(trans, inode); | ||
4324 | |||
4325 | ret = btrfs_update_inode(trans, root, inode); | ||
4326 | if (ret) { | ||
4327 | if (printk_ratelimit()) { | ||
4328 | printk(KERN_ERR "btrfs: fail to " | ||
4329 | "dirty inode %lu error %d\n", | ||
4330 | inode->i_ino, ret); | ||
4331 | } | ||
4332 | } | ||
4333 | } | ||
3978 | btrfs_end_transaction(trans, root); | 4334 | btrfs_end_transaction(trans, root); |
3979 | } | 4335 | } |
3980 | 4336 | ||
@@ -4092,7 +4448,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4092 | * btrfs_get_inode_index_count has an explanation for the magic | 4448 | * btrfs_get_inode_index_count has an explanation for the magic |
4093 | * number | 4449 | * number |
4094 | */ | 4450 | */ |
4095 | init_btrfs_i(inode); | ||
4096 | BTRFS_I(inode)->index_cnt = 2; | 4451 | BTRFS_I(inode)->index_cnt = 2; |
4097 | BTRFS_I(inode)->root = root; | 4452 | BTRFS_I(inode)->root = root; |
4098 | BTRFS_I(inode)->generation = trans->transid; | 4453 | BTRFS_I(inode)->generation = trans->transid; |
@@ -4247,26 +4602,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4247 | if (!new_valid_dev(rdev)) | 4602 | if (!new_valid_dev(rdev)) |
4248 | return -EINVAL; | 4603 | return -EINVAL; |
4249 | 4604 | ||
4605 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4606 | if (err) | ||
4607 | return err; | ||
4608 | |||
4250 | /* | 4609 | /* |
4251 | * 2 for inode item and ref | 4610 | * 2 for inode item and ref |
4252 | * 2 for dir items | 4611 | * 2 for dir items |
4253 | * 1 for xattr if selinux is on | 4612 | * 1 for xattr if selinux is on |
4254 | */ | 4613 | */ |
4255 | err = btrfs_reserve_metadata_space(root, 5); | 4614 | trans = btrfs_start_transaction(root, 5); |
4256 | if (err) | 4615 | if (IS_ERR(trans)) |
4257 | return err; | 4616 | return PTR_ERR(trans); |
4258 | 4617 | ||
4259 | trans = btrfs_start_transaction(root, 1); | ||
4260 | if (!trans) | ||
4261 | goto fail; | ||
4262 | btrfs_set_trans_block_group(trans, dir); | 4618 | btrfs_set_trans_block_group(trans, dir); |
4263 | 4619 | ||
4264 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
4265 | if (err) { | ||
4266 | err = -ENOSPC; | ||
4267 | goto out_unlock; | ||
4268 | } | ||
4269 | |||
4270 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4620 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4271 | dentry->d_name.len, | 4621 | dentry->d_name.len, |
4272 | dentry->d_parent->d_inode->i_ino, objectid, | 4622 | dentry->d_parent->d_inode->i_ino, objectid, |
@@ -4295,13 +4645,11 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4295 | out_unlock: | 4645 | out_unlock: |
4296 | nr = trans->blocks_used; | 4646 | nr = trans->blocks_used; |
4297 | btrfs_end_transaction_throttle(trans, root); | 4647 | btrfs_end_transaction_throttle(trans, root); |
4298 | fail: | 4648 | btrfs_btree_balance_dirty(root, nr); |
4299 | btrfs_unreserve_metadata_space(root, 5); | ||
4300 | if (drop_inode) { | 4649 | if (drop_inode) { |
4301 | inode_dec_link_count(inode); | 4650 | inode_dec_link_count(inode); |
4302 | iput(inode); | 4651 | iput(inode); |
4303 | } | 4652 | } |
4304 | btrfs_btree_balance_dirty(root, nr); | ||
4305 | return err; | 4653 | return err; |
4306 | } | 4654 | } |
4307 | 4655 | ||
@@ -4311,32 +4659,26 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4311 | struct btrfs_trans_handle *trans; | 4659 | struct btrfs_trans_handle *trans; |
4312 | struct btrfs_root *root = BTRFS_I(dir)->root; | 4660 | struct btrfs_root *root = BTRFS_I(dir)->root; |
4313 | struct inode *inode = NULL; | 4661 | struct inode *inode = NULL; |
4314 | int err; | ||
4315 | int drop_inode = 0; | 4662 | int drop_inode = 0; |
4663 | int err; | ||
4316 | unsigned long nr = 0; | 4664 | unsigned long nr = 0; |
4317 | u64 objectid; | 4665 | u64 objectid; |
4318 | u64 index = 0; | 4666 | u64 index = 0; |
4319 | 4667 | ||
4668 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4669 | if (err) | ||
4670 | return err; | ||
4320 | /* | 4671 | /* |
4321 | * 2 for inode item and ref | 4672 | * 2 for inode item and ref |
4322 | * 2 for dir items | 4673 | * 2 for dir items |
4323 | * 1 for xattr if selinux is on | 4674 | * 1 for xattr if selinux is on |
4324 | */ | 4675 | */ |
4325 | err = btrfs_reserve_metadata_space(root, 5); | 4676 | trans = btrfs_start_transaction(root, 5); |
4326 | if (err) | 4677 | if (IS_ERR(trans)) |
4327 | return err; | 4678 | return PTR_ERR(trans); |
4328 | 4679 | ||
4329 | trans = btrfs_start_transaction(root, 1); | ||
4330 | if (!trans) | ||
4331 | goto fail; | ||
4332 | btrfs_set_trans_block_group(trans, dir); | 4680 | btrfs_set_trans_block_group(trans, dir); |
4333 | 4681 | ||
4334 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
4335 | if (err) { | ||
4336 | err = -ENOSPC; | ||
4337 | goto out_unlock; | ||
4338 | } | ||
4339 | |||
4340 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4682 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4341 | dentry->d_name.len, | 4683 | dentry->d_name.len, |
4342 | dentry->d_parent->d_inode->i_ino, | 4684 | dentry->d_parent->d_inode->i_ino, |
@@ -4368,8 +4710,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4368 | out_unlock: | 4710 | out_unlock: |
4369 | nr = trans->blocks_used; | 4711 | nr = trans->blocks_used; |
4370 | btrfs_end_transaction_throttle(trans, root); | 4712 | btrfs_end_transaction_throttle(trans, root); |
4371 | fail: | ||
4372 | btrfs_unreserve_metadata_space(root, 5); | ||
4373 | if (drop_inode) { | 4713 | if (drop_inode) { |
4374 | inode_dec_link_count(inode); | 4714 | inode_dec_link_count(inode); |
4375 | iput(inode); | 4715 | iput(inode); |
@@ -4396,21 +4736,21 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4396 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 4736 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
4397 | return -EPERM; | 4737 | return -EPERM; |
4398 | 4738 | ||
4399 | /* | ||
4400 | * 1 item for inode ref | ||
4401 | * 2 items for dir items | ||
4402 | */ | ||
4403 | err = btrfs_reserve_metadata_space(root, 3); | ||
4404 | if (err) | ||
4405 | return err; | ||
4406 | |||
4407 | btrfs_inc_nlink(inode); | 4739 | btrfs_inc_nlink(inode); |
4408 | 4740 | ||
4409 | err = btrfs_set_inode_index(dir, &index); | 4741 | err = btrfs_set_inode_index(dir, &index); |
4410 | if (err) | 4742 | if (err) |
4411 | goto fail; | 4743 | goto fail; |
4412 | 4744 | ||
4413 | trans = btrfs_start_transaction(root, 1); | 4745 | /* |
4746 | * 1 item for inode ref | ||
4747 | * 2 items for dir items | ||
4748 | */ | ||
4749 | trans = btrfs_start_transaction(root, 3); | ||
4750 | if (IS_ERR(trans)) { | ||
4751 | err = PTR_ERR(trans); | ||
4752 | goto fail; | ||
4753 | } | ||
4414 | 4754 | ||
4415 | btrfs_set_trans_block_group(trans, dir); | 4755 | btrfs_set_trans_block_group(trans, dir); |
4416 | atomic_inc(&inode->i_count); | 4756 | atomic_inc(&inode->i_count); |
@@ -4429,7 +4769,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4429 | nr = trans->blocks_used; | 4769 | nr = trans->blocks_used; |
4430 | btrfs_end_transaction_throttle(trans, root); | 4770 | btrfs_end_transaction_throttle(trans, root); |
4431 | fail: | 4771 | fail: |
4432 | btrfs_unreserve_metadata_space(root, 3); | ||
4433 | if (drop_inode) { | 4772 | if (drop_inode) { |
4434 | inode_dec_link_count(inode); | 4773 | inode_dec_link_count(inode); |
4435 | iput(inode); | 4774 | iput(inode); |
@@ -4449,28 +4788,20 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4449 | u64 index = 0; | 4788 | u64 index = 0; |
4450 | unsigned long nr = 1; | 4789 | unsigned long nr = 1; |
4451 | 4790 | ||
4791 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4792 | if (err) | ||
4793 | return err; | ||
4794 | |||
4452 | /* | 4795 | /* |
4453 | * 2 items for inode and ref | 4796 | * 2 items for inode and ref |
4454 | * 2 items for dir items | 4797 | * 2 items for dir items |
4455 | * 1 for xattr if selinux is on | 4798 | * 1 for xattr if selinux is on |
4456 | */ | 4799 | */ |
4457 | err = btrfs_reserve_metadata_space(root, 5); | 4800 | trans = btrfs_start_transaction(root, 5); |
4458 | if (err) | 4801 | if (IS_ERR(trans)) |
4459 | return err; | 4802 | return PTR_ERR(trans); |
4460 | |||
4461 | trans = btrfs_start_transaction(root, 1); | ||
4462 | if (!trans) { | ||
4463 | err = -ENOMEM; | ||
4464 | goto out_unlock; | ||
4465 | } | ||
4466 | btrfs_set_trans_block_group(trans, dir); | 4803 | btrfs_set_trans_block_group(trans, dir); |
4467 | 4804 | ||
4468 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
4469 | if (err) { | ||
4470 | err = -ENOSPC; | ||
4471 | goto out_fail; | ||
4472 | } | ||
4473 | |||
4474 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4805 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4475 | dentry->d_name.len, | 4806 | dentry->d_name.len, |
4476 | dentry->d_parent->d_inode->i_ino, objectid, | 4807 | dentry->d_parent->d_inode->i_ino, objectid, |
@@ -4510,9 +4841,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4510 | out_fail: | 4841 | out_fail: |
4511 | nr = trans->blocks_used; | 4842 | nr = trans->blocks_used; |
4512 | btrfs_end_transaction_throttle(trans, root); | 4843 | btrfs_end_transaction_throttle(trans, root); |
4513 | |||
4514 | out_unlock: | ||
4515 | btrfs_unreserve_metadata_space(root, 5); | ||
4516 | if (drop_on_err) | 4844 | if (drop_on_err) |
4517 | iput(inode); | 4845 | iput(inode); |
4518 | btrfs_btree_balance_dirty(root, nr); | 4846 | btrfs_btree_balance_dirty(root, nr); |
@@ -4770,6 +5098,7 @@ again: | |||
4770 | } | 5098 | } |
4771 | flush_dcache_page(page); | 5099 | flush_dcache_page(page); |
4772 | } else if (create && PageUptodate(page)) { | 5100 | } else if (create && PageUptodate(page)) { |
5101 | WARN_ON(1); | ||
4773 | if (!trans) { | 5102 | if (!trans) { |
4774 | kunmap(page); | 5103 | kunmap(page); |
4775 | free_extent_map(em); | 5104 | free_extent_map(em); |
@@ -4866,11 +5195,651 @@ out: | |||
4866 | return em; | 5195 | return em; |
4867 | } | 5196 | } |
4868 | 5197 | ||
5198 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | ||
5199 | u64 start, u64 len) | ||
5200 | { | ||
5201 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5202 | struct btrfs_trans_handle *trans; | ||
5203 | struct extent_map *em; | ||
5204 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
5205 | struct btrfs_key ins; | ||
5206 | u64 alloc_hint; | ||
5207 | int ret; | ||
5208 | |||
5209 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | ||
5210 | |||
5211 | trans = btrfs_join_transaction(root, 0); | ||
5212 | if (!trans) | ||
5213 | return ERR_PTR(-ENOMEM); | ||
5214 | |||
5215 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
5216 | |||
5217 | alloc_hint = get_extent_allocation_hint(inode, start, len); | ||
5218 | ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, | ||
5219 | alloc_hint, (u64)-1, &ins, 1); | ||
5220 | if (ret) { | ||
5221 | em = ERR_PTR(ret); | ||
5222 | goto out; | ||
5223 | } | ||
5224 | |||
5225 | em = alloc_extent_map(GFP_NOFS); | ||
5226 | if (!em) { | ||
5227 | em = ERR_PTR(-ENOMEM); | ||
5228 | goto out; | ||
5229 | } | ||
5230 | |||
5231 | em->start = start; | ||
5232 | em->orig_start = em->start; | ||
5233 | em->len = ins.offset; | ||
5234 | |||
5235 | em->block_start = ins.objectid; | ||
5236 | em->block_len = ins.offset; | ||
5237 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
5238 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
5239 | |||
5240 | while (1) { | ||
5241 | write_lock(&em_tree->lock); | ||
5242 | ret = add_extent_mapping(em_tree, em); | ||
5243 | write_unlock(&em_tree->lock); | ||
5244 | if (ret != -EEXIST) | ||
5245 | break; | ||
5246 | btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0); | ||
5247 | } | ||
5248 | |||
5249 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, | ||
5250 | ins.offset, ins.offset, 0); | ||
5251 | if (ret) { | ||
5252 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset); | ||
5253 | em = ERR_PTR(ret); | ||
5254 | } | ||
5255 | out: | ||
5256 | btrfs_end_transaction(trans, root); | ||
5257 | return em; | ||
5258 | } | ||
5259 | |||
5260 | /* | ||
5261 | * returns 1 when the nocow is safe, < 1 on error, 0 if the | ||
5262 | * block must be cow'd | ||
5263 | */ | ||
5264 | static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | ||
5265 | struct inode *inode, u64 offset, u64 len) | ||
5266 | { | ||
5267 | struct btrfs_path *path; | ||
5268 | int ret; | ||
5269 | struct extent_buffer *leaf; | ||
5270 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5271 | struct btrfs_file_extent_item *fi; | ||
5272 | struct btrfs_key key; | ||
5273 | u64 disk_bytenr; | ||
5274 | u64 backref_offset; | ||
5275 | u64 extent_end; | ||
5276 | u64 num_bytes; | ||
5277 | int slot; | ||
5278 | int found_type; | ||
5279 | |||
5280 | path = btrfs_alloc_path(); | ||
5281 | if (!path) | ||
5282 | return -ENOMEM; | ||
5283 | |||
5284 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, | ||
5285 | offset, 0); | ||
5286 | if (ret < 0) | ||
5287 | goto out; | ||
5288 | |||
5289 | slot = path->slots[0]; | ||
5290 | if (ret == 1) { | ||
5291 | if (slot == 0) { | ||
5292 | /* can't find the item, must cow */ | ||
5293 | ret = 0; | ||
5294 | goto out; | ||
5295 | } | ||
5296 | slot--; | ||
5297 | } | ||
5298 | ret = 0; | ||
5299 | leaf = path->nodes[0]; | ||
5300 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
5301 | if (key.objectid != inode->i_ino || | ||
5302 | key.type != BTRFS_EXTENT_DATA_KEY) { | ||
5303 | /* not our file or wrong item type, must cow */ | ||
5304 | goto out; | ||
5305 | } | ||
5306 | |||
5307 | if (key.offset > offset) { | ||
5308 | /* Wrong offset, must cow */ | ||
5309 | goto out; | ||
5310 | } | ||
5311 | |||
5312 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
5313 | found_type = btrfs_file_extent_type(leaf, fi); | ||
5314 | if (found_type != BTRFS_FILE_EXTENT_REG && | ||
5315 | found_type != BTRFS_FILE_EXTENT_PREALLOC) { | ||
5316 | /* not a regular extent, must cow */ | ||
5317 | goto out; | ||
5318 | } | ||
5319 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
5320 | backref_offset = btrfs_file_extent_offset(leaf, fi); | ||
5321 | |||
5322 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); | ||
5323 | if (extent_end < offset + len) { | ||
5324 | /* extent doesn't include our full range, must cow */ | ||
5325 | goto out; | ||
5326 | } | ||
5327 | |||
5328 | if (btrfs_extent_readonly(root, disk_bytenr)) | ||
5329 | goto out; | ||
5330 | |||
5331 | /* | ||
5332 | * look for other files referencing this extent, if we | ||
5333 | * find any we must cow | ||
5334 | */ | ||
5335 | if (btrfs_cross_ref_exist(trans, root, inode->i_ino, | ||
5336 | key.offset - backref_offset, disk_bytenr)) | ||
5337 | goto out; | ||
5338 | |||
5339 | /* | ||
5340 | * adjust disk_bytenr and num_bytes to cover just the bytes | ||
5341 | * in this extent we are about to write. If there | ||
5342 | * are any csums in that range we have to cow in order | ||
5343 | * to keep the csums correct | ||
5344 | */ | ||
5345 | disk_bytenr += backref_offset; | ||
5346 | disk_bytenr += offset - key.offset; | ||
5347 | num_bytes = min(offset + len, extent_end) - offset; | ||
5348 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | ||
5349 | goto out; | ||
5350 | /* | ||
5351 | * all of the above have passed, it is safe to overwrite this extent | ||
5352 | * without cow | ||
5353 | */ | ||
5354 | ret = 1; | ||
5355 | out: | ||
5356 | btrfs_free_path(path); | ||
5357 | return ret; | ||
5358 | } | ||
5359 | |||
5360 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | ||
5361 | struct buffer_head *bh_result, int create) | ||
5362 | { | ||
5363 | struct extent_map *em; | ||
5364 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5365 | u64 start = iblock << inode->i_blkbits; | ||
5366 | u64 len = bh_result->b_size; | ||
5367 | struct btrfs_trans_handle *trans; | ||
5368 | |||
5369 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | ||
5370 | if (IS_ERR(em)) | ||
5371 | return PTR_ERR(em); | ||
5372 | |||
5373 | /* | ||
5374 | * Ok for INLINE and COMPRESSED extents we need to fallback on buffered | ||
5375 | * io. INLINE is special, and we could probably kludge it in here, but | ||
5376 | * it's still buffered so for safety lets just fall back to the generic | ||
5377 | * buffered path. | ||
5378 | * | ||
5379 | * For COMPRESSED we _have_ to read the entire extent in so we can | ||
5380 | * decompress it, so there will be buffering required no matter what we | ||
5381 | * do, so go ahead and fallback to buffered. | ||
5382 | * | ||
5383 | * We return -ENOTBLK because thats what makes DIO go ahead and go back | ||
5384 | * to buffered IO. Don't blame me, this is the price we pay for using | ||
5385 | * the generic code. | ||
5386 | */ | ||
5387 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || | ||
5388 | em->block_start == EXTENT_MAP_INLINE) { | ||
5389 | free_extent_map(em); | ||
5390 | return -ENOTBLK; | ||
5391 | } | ||
5392 | |||
5393 | /* Just a good old fashioned hole, return */ | ||
5394 | if (!create && (em->block_start == EXTENT_MAP_HOLE || | ||
5395 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
5396 | free_extent_map(em); | ||
5397 | /* DIO will do one hole at a time, so just unlock a sector */ | ||
5398 | unlock_extent(&BTRFS_I(inode)->io_tree, start, | ||
5399 | start + root->sectorsize - 1, GFP_NOFS); | ||
5400 | return 0; | ||
5401 | } | ||
5402 | |||
5403 | /* | ||
5404 | * We don't allocate a new extent in the following cases | ||
5405 | * | ||
5406 | * 1) The inode is marked as NODATACOW. In this case we'll just use the | ||
5407 | * existing extent. | ||
5408 | * 2) The extent is marked as PREALLOC. We're good to go here and can | ||
5409 | * just use the extent. | ||
5410 | * | ||
5411 | */ | ||
5412 | if (!create) { | ||
5413 | len = em->len - (start - em->start); | ||
5414 | goto map; | ||
5415 | } | ||
5416 | |||
5417 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || | ||
5418 | ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && | ||
5419 | em->block_start != EXTENT_MAP_HOLE)) { | ||
5420 | int type; | ||
5421 | int ret; | ||
5422 | u64 block_start; | ||
5423 | |||
5424 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
5425 | type = BTRFS_ORDERED_PREALLOC; | ||
5426 | else | ||
5427 | type = BTRFS_ORDERED_NOCOW; | ||
5428 | len = min(len, em->len - (start - em->start)); | ||
5429 | block_start = em->block_start + (start - em->start); | ||
5430 | |||
5431 | /* | ||
5432 | * we're not going to log anything, but we do need | ||
5433 | * to make sure the current transaction stays open | ||
5434 | * while we look for nocow cross refs | ||
5435 | */ | ||
5436 | trans = btrfs_join_transaction(root, 0); | ||
5437 | if (!trans) | ||
5438 | goto must_cow; | ||
5439 | |||
5440 | if (can_nocow_odirect(trans, inode, start, len) == 1) { | ||
5441 | ret = btrfs_add_ordered_extent_dio(inode, start, | ||
5442 | block_start, len, len, type); | ||
5443 | btrfs_end_transaction(trans, root); | ||
5444 | if (ret) { | ||
5445 | free_extent_map(em); | ||
5446 | return ret; | ||
5447 | } | ||
5448 | goto unlock; | ||
5449 | } | ||
5450 | btrfs_end_transaction(trans, root); | ||
5451 | } | ||
5452 | must_cow: | ||
5453 | /* | ||
5454 | * this will cow the extent, reset the len in case we changed | ||
5455 | * it above | ||
5456 | */ | ||
5457 | len = bh_result->b_size; | ||
5458 | free_extent_map(em); | ||
5459 | em = btrfs_new_extent_direct(inode, start, len); | ||
5460 | if (IS_ERR(em)) | ||
5461 | return PTR_ERR(em); | ||
5462 | len = min(len, em->len - (start - em->start)); | ||
5463 | unlock: | ||
5464 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, | ||
5465 | EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, | ||
5466 | 0, NULL, GFP_NOFS); | ||
5467 | map: | ||
5468 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> | ||
5469 | inode->i_blkbits; | ||
5470 | bh_result->b_size = len; | ||
5471 | bh_result->b_bdev = em->bdev; | ||
5472 | set_buffer_mapped(bh_result); | ||
5473 | if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
5474 | set_buffer_new(bh_result); | ||
5475 | |||
5476 | free_extent_map(em); | ||
5477 | |||
5478 | return 0; | ||
5479 | } | ||
5480 | |||
5481 | struct btrfs_dio_private { | ||
5482 | struct inode *inode; | ||
5483 | u64 logical_offset; | ||
5484 | u64 disk_bytenr; | ||
5485 | u64 bytes; | ||
5486 | u32 *csums; | ||
5487 | void *private; | ||
5488 | }; | ||
5489 | |||
5490 | static void btrfs_endio_direct_read(struct bio *bio, int err) | ||
5491 | { | ||
5492 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
5493 | struct bio_vec *bvec = bio->bi_io_vec; | ||
5494 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5495 | struct inode *inode = dip->inode; | ||
5496 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5497 | u64 start; | ||
5498 | u32 *private = dip->csums; | ||
5499 | |||
5500 | start = dip->logical_offset; | ||
5501 | do { | ||
5502 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | ||
5503 | struct page *page = bvec->bv_page; | ||
5504 | char *kaddr; | ||
5505 | u32 csum = ~(u32)0; | ||
5506 | unsigned long flags; | ||
5507 | |||
5508 | local_irq_save(flags); | ||
5509 | kaddr = kmap_atomic(page, KM_IRQ0); | ||
5510 | csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, | ||
5511 | csum, bvec->bv_len); | ||
5512 | btrfs_csum_final(csum, (char *)&csum); | ||
5513 | kunmap_atomic(kaddr, KM_IRQ0); | ||
5514 | local_irq_restore(flags); | ||
5515 | |||
5516 | flush_dcache_page(bvec->bv_page); | ||
5517 | if (csum != *private) { | ||
5518 | printk(KERN_ERR "btrfs csum failed ino %lu off" | ||
5519 | " %llu csum %u private %u\n", | ||
5520 | inode->i_ino, (unsigned long long)start, | ||
5521 | csum, *private); | ||
5522 | err = -EIO; | ||
5523 | } | ||
5524 | } | ||
5525 | |||
5526 | start += bvec->bv_len; | ||
5527 | private++; | ||
5528 | bvec++; | ||
5529 | } while (bvec <= bvec_end); | ||
5530 | |||
5531 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, | ||
5532 | dip->logical_offset + dip->bytes - 1, GFP_NOFS); | ||
5533 | bio->bi_private = dip->private; | ||
5534 | |||
5535 | kfree(dip->csums); | ||
5536 | kfree(dip); | ||
5537 | dio_end_io(bio, err); | ||
5538 | } | ||
5539 | |||
5540 | static void btrfs_endio_direct_write(struct bio *bio, int err) | ||
5541 | { | ||
5542 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5543 | struct inode *inode = dip->inode; | ||
5544 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5545 | struct btrfs_trans_handle *trans; | ||
5546 | struct btrfs_ordered_extent *ordered = NULL; | ||
5547 | struct extent_state *cached_state = NULL; | ||
5548 | int ret; | ||
5549 | |||
5550 | if (err) | ||
5551 | goto out_done; | ||
5552 | |||
5553 | ret = btrfs_dec_test_ordered_pending(inode, &ordered, | ||
5554 | dip->logical_offset, dip->bytes); | ||
5555 | if (!ret) | ||
5556 | goto out_done; | ||
5557 | |||
5558 | BUG_ON(!ordered); | ||
5559 | |||
5560 | trans = btrfs_join_transaction(root, 1); | ||
5561 | if (!trans) { | ||
5562 | err = -ENOMEM; | ||
5563 | goto out; | ||
5564 | } | ||
5565 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
5566 | |||
5567 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | ||
5568 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5569 | if (!ret) | ||
5570 | ret = btrfs_update_inode(trans, root, inode); | ||
5571 | err = ret; | ||
5572 | goto out; | ||
5573 | } | ||
5574 | |||
5575 | lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5576 | ordered->file_offset + ordered->len - 1, 0, | ||
5577 | &cached_state, GFP_NOFS); | ||
5578 | |||
5579 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { | ||
5580 | ret = btrfs_mark_extent_written(trans, inode, | ||
5581 | ordered->file_offset, | ||
5582 | ordered->file_offset + | ||
5583 | ordered->len); | ||
5584 | if (ret) { | ||
5585 | err = ret; | ||
5586 | goto out_unlock; | ||
5587 | } | ||
5588 | } else { | ||
5589 | ret = insert_reserved_file_extent(trans, inode, | ||
5590 | ordered->file_offset, | ||
5591 | ordered->start, | ||
5592 | ordered->disk_len, | ||
5593 | ordered->len, | ||
5594 | ordered->len, | ||
5595 | 0, 0, 0, | ||
5596 | BTRFS_FILE_EXTENT_REG); | ||
5597 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
5598 | ordered->file_offset, ordered->len); | ||
5599 | if (ret) { | ||
5600 | err = ret; | ||
5601 | WARN_ON(1); | ||
5602 | goto out_unlock; | ||
5603 | } | ||
5604 | } | ||
5605 | |||
5606 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | ||
5607 | btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5608 | btrfs_update_inode(trans, root, inode); | ||
5609 | out_unlock: | ||
5610 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5611 | ordered->file_offset + ordered->len - 1, | ||
5612 | &cached_state, GFP_NOFS); | ||
5613 | out: | ||
5614 | btrfs_delalloc_release_metadata(inode, ordered->len); | ||
5615 | btrfs_end_transaction(trans, root); | ||
5616 | btrfs_put_ordered_extent(ordered); | ||
5617 | btrfs_put_ordered_extent(ordered); | ||
5618 | out_done: | ||
5619 | bio->bi_private = dip->private; | ||
5620 | |||
5621 | kfree(dip->csums); | ||
5622 | kfree(dip); | ||
5623 | dio_end_io(bio, err); | ||
5624 | } | ||
5625 | |||
5626 | static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, | ||
5627 | struct bio *bio, int mirror_num, | ||
5628 | unsigned long bio_flags, u64 offset) | ||
5629 | { | ||
5630 | int ret; | ||
5631 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5632 | ret = btrfs_csum_one_bio(root, inode, bio, offset, 1); | ||
5633 | BUG_ON(ret); | ||
5634 | return 0; | ||
5635 | } | ||
5636 | |||
5637 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | ||
5638 | loff_t file_offset) | ||
5639 | { | ||
5640 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5641 | struct btrfs_dio_private *dip; | ||
5642 | struct bio_vec *bvec = bio->bi_io_vec; | ||
5643 | u64 start; | ||
5644 | int skip_sum; | ||
5645 | int write = rw & (1 << BIO_RW); | ||
5646 | int ret = 0; | ||
5647 | |||
5648 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
5649 | |||
5650 | dip = kmalloc(sizeof(*dip), GFP_NOFS); | ||
5651 | if (!dip) { | ||
5652 | ret = -ENOMEM; | ||
5653 | goto free_ordered; | ||
5654 | } | ||
5655 | dip->csums = NULL; | ||
5656 | |||
5657 | if (!skip_sum) { | ||
5658 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); | ||
5659 | if (!dip->csums) { | ||
5660 | ret = -ENOMEM; | ||
5661 | goto free_ordered; | ||
5662 | } | ||
5663 | } | ||
5664 | |||
5665 | dip->private = bio->bi_private; | ||
5666 | dip->inode = inode; | ||
5667 | dip->logical_offset = file_offset; | ||
5668 | |||
5669 | start = dip->logical_offset; | ||
5670 | dip->bytes = 0; | ||
5671 | do { | ||
5672 | dip->bytes += bvec->bv_len; | ||
5673 | bvec++; | ||
5674 | } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1)); | ||
5675 | |||
5676 | dip->disk_bytenr = (u64)bio->bi_sector << 9; | ||
5677 | bio->bi_private = dip; | ||
5678 | |||
5679 | if (write) | ||
5680 | bio->bi_end_io = btrfs_endio_direct_write; | ||
5681 | else | ||
5682 | bio->bi_end_io = btrfs_endio_direct_read; | ||
5683 | |||
5684 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
5685 | if (ret) | ||
5686 | goto out_err; | ||
5687 | |||
5688 | if (write && !skip_sum) { | ||
5689 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
5690 | inode, rw, bio, 0, 0, | ||
5691 | dip->logical_offset, | ||
5692 | __btrfs_submit_bio_start_direct_io, | ||
5693 | __btrfs_submit_bio_done); | ||
5694 | if (ret) | ||
5695 | goto out_err; | ||
5696 | return; | ||
5697 | } else if (!skip_sum) | ||
5698 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5699 | dip->logical_offset, dip->csums); | ||
5700 | |||
5701 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | ||
5702 | if (ret) | ||
5703 | goto out_err; | ||
5704 | return; | ||
5705 | out_err: | ||
5706 | kfree(dip->csums); | ||
5707 | kfree(dip); | ||
5708 | free_ordered: | ||
5709 | /* | ||
5710 | * If this is a write, we need to clean up the reserved space and kill | ||
5711 | * the ordered extent. | ||
5712 | */ | ||
5713 | if (write) { | ||
5714 | struct btrfs_ordered_extent *ordered; | ||
5715 | ordered = btrfs_lookup_ordered_extent(inode, | ||
5716 | dip->logical_offset); | ||
5717 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && | ||
5718 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) | ||
5719 | btrfs_free_reserved_extent(root, ordered->start, | ||
5720 | ordered->disk_len); | ||
5721 | btrfs_put_ordered_extent(ordered); | ||
5722 | btrfs_put_ordered_extent(ordered); | ||
5723 | } | ||
5724 | bio_endio(bio, ret); | ||
5725 | } | ||
5726 | |||
5727 | static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, | ||
5728 | const struct iovec *iov, loff_t offset, | ||
5729 | unsigned long nr_segs) | ||
5730 | { | ||
5731 | int seg; | ||
5732 | size_t size; | ||
5733 | unsigned long addr; | ||
5734 | unsigned blocksize_mask = root->sectorsize - 1; | ||
5735 | ssize_t retval = -EINVAL; | ||
5736 | loff_t end = offset; | ||
5737 | |||
5738 | if (offset & blocksize_mask) | ||
5739 | goto out; | ||
5740 | |||
5741 | /* Check the memory alignment. Blocks cannot straddle pages */ | ||
5742 | for (seg = 0; seg < nr_segs; seg++) { | ||
5743 | addr = (unsigned long)iov[seg].iov_base; | ||
5744 | size = iov[seg].iov_len; | ||
5745 | end += size; | ||
5746 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | ||
5747 | goto out; | ||
5748 | } | ||
5749 | retval = 0; | ||
5750 | out: | ||
5751 | return retval; | ||
5752 | } | ||
4869 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | 5753 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, |
4870 | const struct iovec *iov, loff_t offset, | 5754 | const struct iovec *iov, loff_t offset, |
4871 | unsigned long nr_segs) | 5755 | unsigned long nr_segs) |
4872 | { | 5756 | { |
4873 | return -EINVAL; | 5757 | struct file *file = iocb->ki_filp; |
5758 | struct inode *inode = file->f_mapping->host; | ||
5759 | struct btrfs_ordered_extent *ordered; | ||
5760 | struct extent_state *cached_state = NULL; | ||
5761 | u64 lockstart, lockend; | ||
5762 | ssize_t ret; | ||
5763 | int writing = rw & WRITE; | ||
5764 | int write_bits = 0; | ||
5765 | size_t count = iov_length(iov, nr_segs); | ||
5766 | |||
5767 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, | ||
5768 | offset, nr_segs)) { | ||
5769 | return 0; | ||
5770 | } | ||
5771 | |||
5772 | lockstart = offset; | ||
5773 | lockend = offset + count - 1; | ||
5774 | |||
5775 | if (writing) { | ||
5776 | ret = btrfs_delalloc_reserve_space(inode, count); | ||
5777 | if (ret) | ||
5778 | goto out; | ||
5779 | } | ||
5780 | |||
5781 | while (1) { | ||
5782 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5783 | 0, &cached_state, GFP_NOFS); | ||
5784 | /* | ||
5785 | * We're concerned with the entire range that we're going to be | ||
5786 | * doing DIO to, so we need to make sure theres no ordered | ||
5787 | * extents in this range. | ||
5788 | */ | ||
5789 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
5790 | lockend - lockstart + 1); | ||
5791 | if (!ordered) | ||
5792 | break; | ||
5793 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5794 | &cached_state, GFP_NOFS); | ||
5795 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
5796 | btrfs_put_ordered_extent(ordered); | ||
5797 | cond_resched(); | ||
5798 | } | ||
5799 | |||
5800 | /* | ||
5801 | * we don't use btrfs_set_extent_delalloc because we don't want | ||
5802 | * the dirty or uptodate bits | ||
5803 | */ | ||
5804 | if (writing) { | ||
5805 | write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; | ||
5806 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5807 | EXTENT_DELALLOC, 0, NULL, &cached_state, | ||
5808 | GFP_NOFS); | ||
5809 | if (ret) { | ||
5810 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
5811 | lockend, EXTENT_LOCKED | write_bits, | ||
5812 | 1, 0, &cached_state, GFP_NOFS); | ||
5813 | goto out; | ||
5814 | } | ||
5815 | } | ||
5816 | |||
5817 | free_extent_state(cached_state); | ||
5818 | cached_state = NULL; | ||
5819 | |||
5820 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
5821 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, | ||
5822 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, | ||
5823 | btrfs_submit_direct, 0); | ||
5824 | |||
5825 | if (ret < 0 && ret != -EIOCBQUEUED) { | ||
5826 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, | ||
5827 | offset + iov_length(iov, nr_segs) - 1, | ||
5828 | EXTENT_LOCKED | write_bits, 1, 0, | ||
5829 | &cached_state, GFP_NOFS); | ||
5830 | } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { | ||
5831 | /* | ||
5832 | * We're falling back to buffered, unlock the section we didn't | ||
5833 | * do IO on. | ||
5834 | */ | ||
5835 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, | ||
5836 | offset + iov_length(iov, nr_segs) - 1, | ||
5837 | EXTENT_LOCKED | write_bits, 1, 0, | ||
5838 | &cached_state, GFP_NOFS); | ||
5839 | } | ||
5840 | out: | ||
5841 | free_extent_state(cached_state); | ||
5842 | return ret; | ||
4874 | } | 5843 | } |
4875 | 5844 | ||
4876 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 5845 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
@@ -5034,7 +6003,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5034 | u64 page_start; | 6003 | u64 page_start; |
5035 | u64 page_end; | 6004 | u64 page_end; |
5036 | 6005 | ||
5037 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 6006 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
5038 | if (ret) { | 6007 | if (ret) { |
5039 | if (ret == -ENOMEM) | 6008 | if (ret == -ENOMEM) |
5040 | ret = VM_FAULT_OOM; | 6009 | ret = VM_FAULT_OOM; |
@@ -5043,13 +6012,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5043 | goto out; | 6012 | goto out; |
5044 | } | 6013 | } |
5045 | 6014 | ||
5046 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
5047 | if (ret) { | ||
5048 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
5049 | ret = VM_FAULT_SIGBUS; | ||
5050 | goto out; | ||
5051 | } | ||
5052 | |||
5053 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 6015 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
5054 | again: | 6016 | again: |
5055 | lock_page(page); | 6017 | lock_page(page); |
@@ -5059,7 +6021,6 @@ again: | |||
5059 | 6021 | ||
5060 | if ((page->mapping != inode->i_mapping) || | 6022 | if ((page->mapping != inode->i_mapping) || |
5061 | (page_start >= size)) { | 6023 | (page_start >= size)) { |
5062 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
5063 | /* page got truncated out from underneath us */ | 6024 | /* page got truncated out from underneath us */ |
5064 | goto out_unlock; | 6025 | goto out_unlock; |
5065 | } | 6026 | } |
@@ -5100,7 +6061,6 @@ again: | |||
5100 | unlock_extent_cached(io_tree, page_start, page_end, | 6061 | unlock_extent_cached(io_tree, page_start, page_end, |
5101 | &cached_state, GFP_NOFS); | 6062 | &cached_state, GFP_NOFS); |
5102 | ret = VM_FAULT_SIGBUS; | 6063 | ret = VM_FAULT_SIGBUS; |
5103 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
5104 | goto out_unlock; | 6064 | goto out_unlock; |
5105 | } | 6065 | } |
5106 | ret = 0; | 6066 | ret = 0; |
@@ -5127,10 +6087,10 @@ again: | |||
5127 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); | 6087 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); |
5128 | 6088 | ||
5129 | out_unlock: | 6089 | out_unlock: |
5130 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
5131 | if (!ret) | 6090 | if (!ret) |
5132 | return VM_FAULT_LOCKED; | 6091 | return VM_FAULT_LOCKED; |
5133 | unlock_page(page); | 6092 | unlock_page(page); |
6093 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
5134 | out: | 6094 | out: |
5135 | return ret; | 6095 | return ret; |
5136 | } | 6096 | } |
@@ -5155,8 +6115,10 @@ static void btrfs_truncate(struct inode *inode) | |||
5155 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 6115 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
5156 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 6116 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
5157 | 6117 | ||
5158 | trans = btrfs_start_transaction(root, 1); | 6118 | trans = btrfs_start_transaction(root, 0); |
6119 | BUG_ON(IS_ERR(trans)); | ||
5159 | btrfs_set_trans_block_group(trans, inode); | 6120 | btrfs_set_trans_block_group(trans, inode); |
6121 | trans->block_rsv = root->orphan_block_rsv; | ||
5160 | 6122 | ||
5161 | /* | 6123 | /* |
5162 | * setattr is responsible for setting the ordered_data_close flag, | 6124 | * setattr is responsible for setting the ordered_data_close flag, |
@@ -5179,6 +6141,23 @@ static void btrfs_truncate(struct inode *inode) | |||
5179 | btrfs_add_ordered_operation(trans, root, inode); | 6141 | btrfs_add_ordered_operation(trans, root, inode); |
5180 | 6142 | ||
5181 | while (1) { | 6143 | while (1) { |
6144 | if (!trans) { | ||
6145 | trans = btrfs_start_transaction(root, 0); | ||
6146 | BUG_ON(IS_ERR(trans)); | ||
6147 | btrfs_set_trans_block_group(trans, inode); | ||
6148 | trans->block_rsv = root->orphan_block_rsv; | ||
6149 | } | ||
6150 | |||
6151 | ret = btrfs_block_rsv_check(trans, root, | ||
6152 | root->orphan_block_rsv, 0, 5); | ||
6153 | if (ret) { | ||
6154 | BUG_ON(ret != -EAGAIN); | ||
6155 | ret = btrfs_commit_transaction(trans, root); | ||
6156 | BUG_ON(ret); | ||
6157 | trans = NULL; | ||
6158 | continue; | ||
6159 | } | ||
6160 | |||
5182 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6161 | ret = btrfs_truncate_inode_items(trans, root, inode, |
5183 | inode->i_size, | 6162 | inode->i_size, |
5184 | BTRFS_EXTENT_DATA_KEY); | 6163 | BTRFS_EXTENT_DATA_KEY); |
@@ -5190,10 +6169,8 @@ static void btrfs_truncate(struct inode *inode) | |||
5190 | 6169 | ||
5191 | nr = trans->blocks_used; | 6170 | nr = trans->blocks_used; |
5192 | btrfs_end_transaction(trans, root); | 6171 | btrfs_end_transaction(trans, root); |
6172 | trans = NULL; | ||
5193 | btrfs_btree_balance_dirty(root, nr); | 6173 | btrfs_btree_balance_dirty(root, nr); |
5194 | |||
5195 | trans = btrfs_start_transaction(root, 1); | ||
5196 | btrfs_set_trans_block_group(trans, inode); | ||
5197 | } | 6174 | } |
5198 | 6175 | ||
5199 | if (ret == 0 && inode->i_nlink > 0) { | 6176 | if (ret == 0 && inode->i_nlink > 0) { |
@@ -5254,21 +6231,47 @@ unsigned long btrfs_force_ra(struct address_space *mapping, | |||
5254 | struct inode *btrfs_alloc_inode(struct super_block *sb) | 6231 | struct inode *btrfs_alloc_inode(struct super_block *sb) |
5255 | { | 6232 | { |
5256 | struct btrfs_inode *ei; | 6233 | struct btrfs_inode *ei; |
6234 | struct inode *inode; | ||
5257 | 6235 | ||
5258 | ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); | 6236 | ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); |
5259 | if (!ei) | 6237 | if (!ei) |
5260 | return NULL; | 6238 | return NULL; |
6239 | |||
6240 | ei->root = NULL; | ||
6241 | ei->space_info = NULL; | ||
6242 | ei->generation = 0; | ||
6243 | ei->sequence = 0; | ||
5261 | ei->last_trans = 0; | 6244 | ei->last_trans = 0; |
5262 | ei->last_sub_trans = 0; | 6245 | ei->last_sub_trans = 0; |
5263 | ei->logged_trans = 0; | 6246 | ei->logged_trans = 0; |
5264 | ei->outstanding_extents = 0; | 6247 | ei->delalloc_bytes = 0; |
5265 | ei->reserved_extents = 0; | 6248 | ei->reserved_bytes = 0; |
5266 | ei->root = NULL; | 6249 | ei->disk_i_size = 0; |
6250 | ei->flags = 0; | ||
6251 | ei->index_cnt = (u64)-1; | ||
6252 | ei->last_unlink_trans = 0; | ||
6253 | |||
5267 | spin_lock_init(&ei->accounting_lock); | 6254 | spin_lock_init(&ei->accounting_lock); |
6255 | atomic_set(&ei->outstanding_extents, 0); | ||
6256 | ei->reserved_extents = 0; | ||
6257 | |||
6258 | ei->ordered_data_close = 0; | ||
6259 | ei->orphan_meta_reserved = 0; | ||
6260 | ei->dummy_inode = 0; | ||
6261 | ei->force_compress = 0; | ||
6262 | |||
6263 | inode = &ei->vfs_inode; | ||
6264 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); | ||
6265 | extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS); | ||
6266 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS); | ||
6267 | mutex_init(&ei->log_mutex); | ||
5268 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 6268 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
5269 | INIT_LIST_HEAD(&ei->i_orphan); | 6269 | INIT_LIST_HEAD(&ei->i_orphan); |
6270 | INIT_LIST_HEAD(&ei->delalloc_inodes); | ||
5270 | INIT_LIST_HEAD(&ei->ordered_operations); | 6271 | INIT_LIST_HEAD(&ei->ordered_operations); |
5271 | return &ei->vfs_inode; | 6272 | RB_CLEAR_NODE(&ei->rb_node); |
6273 | |||
6274 | return inode; | ||
5272 | } | 6275 | } |
5273 | 6276 | ||
5274 | void btrfs_destroy_inode(struct inode *inode) | 6277 | void btrfs_destroy_inode(struct inode *inode) |
@@ -5278,6 +6281,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
5278 | 6281 | ||
5279 | WARN_ON(!list_empty(&inode->i_dentry)); | 6282 | WARN_ON(!list_empty(&inode->i_dentry)); |
5280 | WARN_ON(inode->i_data.nrpages); | 6283 | WARN_ON(inode->i_data.nrpages); |
6284 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | ||
6285 | WARN_ON(BTRFS_I(inode)->reserved_extents); | ||
5281 | 6286 | ||
5282 | /* | 6287 | /* |
5283 | * This can happen where we create an inode, but somebody else also | 6288 | * This can happen where we create an inode, but somebody else also |
@@ -5298,13 +6303,13 @@ void btrfs_destroy_inode(struct inode *inode) | |||
5298 | spin_unlock(&root->fs_info->ordered_extent_lock); | 6303 | spin_unlock(&root->fs_info->ordered_extent_lock); |
5299 | } | 6304 | } |
5300 | 6305 | ||
5301 | spin_lock(&root->list_lock); | 6306 | spin_lock(&root->orphan_lock); |
5302 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 6307 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { |
5303 | printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", | 6308 | printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", |
5304 | inode->i_ino); | 6309 | inode->i_ino); |
5305 | list_del_init(&BTRFS_I(inode)->i_orphan); | 6310 | list_del_init(&BTRFS_I(inode)->i_orphan); |
5306 | } | 6311 | } |
5307 | spin_unlock(&root->list_lock); | 6312 | spin_unlock(&root->orphan_lock); |
5308 | 6313 | ||
5309 | while (1) { | 6314 | while (1) { |
5310 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 6315 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
@@ -5425,19 +6430,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
5425 | if (S_ISDIR(old_inode->i_mode) && new_inode && | 6430 | if (S_ISDIR(old_inode->i_mode) && new_inode && |
5426 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) | 6431 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
5427 | return -ENOTEMPTY; | 6432 | return -ENOTEMPTY; |
5428 | |||
5429 | /* | ||
5430 | * We want to reserve the absolute worst case amount of items. So if | ||
5431 | * both inodes are subvols and we need to unlink them then that would | ||
5432 | * require 4 item modifications, but if they are both normal inodes it | ||
5433 | * would require 5 item modifications, so we'll assume their normal | ||
5434 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | ||
5435 | * should cover the worst case number of items we'll modify. | ||
5436 | */ | ||
5437 | ret = btrfs_reserve_metadata_space(root, 11); | ||
5438 | if (ret) | ||
5439 | return ret; | ||
5440 | |||
5441 | /* | 6433 | /* |
5442 | * we're using rename to replace one file with another. | 6434 | * we're using rename to replace one file with another. |
5443 | * and the replacement file is large. Start IO on it now so | 6435 | * and the replacement file is large. Start IO on it now so |
@@ -5450,8 +6442,18 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
5450 | /* close the racy window with snapshot create/destroy ioctl */ | 6442 | /* close the racy window with snapshot create/destroy ioctl */ |
5451 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 6443 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
5452 | down_read(&root->fs_info->subvol_sem); | 6444 | down_read(&root->fs_info->subvol_sem); |
6445 | /* | ||
6446 | * We want to reserve the absolute worst case amount of items. So if | ||
6447 | * both inodes are subvols and we need to unlink them then that would | ||
6448 | * require 4 item modifications, but if they are both normal inodes it | ||
6449 | * would require 5 item modifications, so we'll assume their normal | ||
6450 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | ||
6451 | * should cover the worst case number of items we'll modify. | ||
6452 | */ | ||
6453 | trans = btrfs_start_transaction(root, 20); | ||
6454 | if (IS_ERR(trans)) | ||
6455 | return PTR_ERR(trans); | ||
5453 | 6456 | ||
5454 | trans = btrfs_start_transaction(root, 1); | ||
5455 | btrfs_set_trans_block_group(trans, new_dir); | 6457 | btrfs_set_trans_block_group(trans, new_dir); |
5456 | 6458 | ||
5457 | if (dest != root) | 6459 | if (dest != root) |
@@ -5550,7 +6552,6 @@ out_fail: | |||
5550 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 6552 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
5551 | up_read(&root->fs_info->subvol_sem); | 6553 | up_read(&root->fs_info->subvol_sem); |
5552 | 6554 | ||
5553 | btrfs_unreserve_metadata_space(root, 11); | ||
5554 | return ret; | 6555 | return ret; |
5555 | } | 6556 | } |
5556 | 6557 | ||
@@ -5602,6 +6603,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
5602 | return 0; | 6603 | return 0; |
5603 | } | 6604 | } |
5604 | 6605 | ||
6606 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput) | ||
6607 | { | ||
6608 | struct btrfs_inode *binode; | ||
6609 | struct inode *inode = NULL; | ||
6610 | |||
6611 | spin_lock(&root->fs_info->delalloc_lock); | ||
6612 | while (!list_empty(&root->fs_info->delalloc_inodes)) { | ||
6613 | binode = list_entry(root->fs_info->delalloc_inodes.next, | ||
6614 | struct btrfs_inode, delalloc_inodes); | ||
6615 | inode = igrab(&binode->vfs_inode); | ||
6616 | if (inode) { | ||
6617 | list_move_tail(&binode->delalloc_inodes, | ||
6618 | &root->fs_info->delalloc_inodes); | ||
6619 | break; | ||
6620 | } | ||
6621 | |||
6622 | list_del_init(&binode->delalloc_inodes); | ||
6623 | cond_resched_lock(&root->fs_info->delalloc_lock); | ||
6624 | } | ||
6625 | spin_unlock(&root->fs_info->delalloc_lock); | ||
6626 | |||
6627 | if (inode) { | ||
6628 | write_inode_now(inode, 0); | ||
6629 | if (delay_iput) | ||
6630 | btrfs_add_delayed_iput(inode); | ||
6631 | else | ||
6632 | iput(inode); | ||
6633 | return 1; | ||
6634 | } | ||
6635 | return 0; | ||
6636 | } | ||
6637 | |||
5605 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | 6638 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, |
5606 | const char *symname) | 6639 | const char *symname) |
5607 | { | 6640 | { |
@@ -5625,26 +6658,20 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
5625 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 6658 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
5626 | return -ENAMETOOLONG; | 6659 | return -ENAMETOOLONG; |
5627 | 6660 | ||
6661 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
6662 | if (err) | ||
6663 | return err; | ||
5628 | /* | 6664 | /* |
5629 | * 2 items for inode item and ref | 6665 | * 2 items for inode item and ref |
5630 | * 2 items for dir items | 6666 | * 2 items for dir items |
5631 | * 1 item for xattr if selinux is on | 6667 | * 1 item for xattr if selinux is on |
5632 | */ | 6668 | */ |
5633 | err = btrfs_reserve_metadata_space(root, 5); | 6669 | trans = btrfs_start_transaction(root, 5); |
5634 | if (err) | 6670 | if (IS_ERR(trans)) |
5635 | return err; | 6671 | return PTR_ERR(trans); |
5636 | 6672 | ||
5637 | trans = btrfs_start_transaction(root, 1); | ||
5638 | if (!trans) | ||
5639 | goto out_fail; | ||
5640 | btrfs_set_trans_block_group(trans, dir); | 6673 | btrfs_set_trans_block_group(trans, dir); |
5641 | 6674 | ||
5642 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
5643 | if (err) { | ||
5644 | err = -ENOSPC; | ||
5645 | goto out_unlock; | ||
5646 | } | ||
5647 | |||
5648 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 6675 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
5649 | dentry->d_name.len, | 6676 | dentry->d_name.len, |
5650 | dentry->d_parent->d_inode->i_ino, objectid, | 6677 | dentry->d_parent->d_inode->i_ino, objectid, |
@@ -5716,8 +6743,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
5716 | out_unlock: | 6743 | out_unlock: |
5717 | nr = trans->blocks_used; | 6744 | nr = trans->blocks_used; |
5718 | btrfs_end_transaction_throttle(trans, root); | 6745 | btrfs_end_transaction_throttle(trans, root); |
5719 | out_fail: | ||
5720 | btrfs_unreserve_metadata_space(root, 5); | ||
5721 | if (drop_inode) { | 6746 | if (drop_inode) { |
5722 | inode_dec_link_count(inode); | 6747 | inode_dec_link_count(inode); |
5723 | iput(inode); | 6748 | iput(inode); |
@@ -5726,33 +6751,28 @@ out_fail: | |||
5726 | return err; | 6751 | return err; |
5727 | } | 6752 | } |
5728 | 6753 | ||
5729 | static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | 6754 | int btrfs_prealloc_file_range(struct inode *inode, int mode, |
5730 | u64 alloc_hint, int mode, loff_t actual_len) | 6755 | u64 start, u64 num_bytes, u64 min_size, |
6756 | loff_t actual_len, u64 *alloc_hint) | ||
5731 | { | 6757 | { |
5732 | struct btrfs_trans_handle *trans; | 6758 | struct btrfs_trans_handle *trans; |
5733 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6759 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5734 | struct btrfs_key ins; | 6760 | struct btrfs_key ins; |
5735 | u64 cur_offset = start; | 6761 | u64 cur_offset = start; |
5736 | u64 num_bytes = end - start; | ||
5737 | int ret = 0; | 6762 | int ret = 0; |
5738 | u64 i_size; | ||
5739 | 6763 | ||
5740 | while (num_bytes > 0) { | 6764 | while (num_bytes > 0) { |
5741 | trans = btrfs_start_transaction(root, 1); | 6765 | trans = btrfs_start_transaction(root, 3); |
5742 | 6766 | if (IS_ERR(trans)) { | |
5743 | ret = btrfs_reserve_extent(trans, root, num_bytes, | 6767 | ret = PTR_ERR(trans); |
5744 | root->sectorsize, 0, alloc_hint, | 6768 | break; |
5745 | (u64)-1, &ins, 1); | ||
5746 | if (ret) { | ||
5747 | WARN_ON(1); | ||
5748 | goto stop_trans; | ||
5749 | } | 6769 | } |
5750 | 6770 | ||
5751 | ret = btrfs_reserve_metadata_space(root, 3); | 6771 | ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, |
6772 | 0, *alloc_hint, (u64)-1, &ins, 1); | ||
5752 | if (ret) { | 6773 | if (ret) { |
5753 | btrfs_free_reserved_extent(root, ins.objectid, | 6774 | btrfs_end_transaction(trans, root); |
5754 | ins.offset); | 6775 | break; |
5755 | goto stop_trans; | ||
5756 | } | 6776 | } |
5757 | 6777 | ||
5758 | ret = insert_reserved_file_extent(trans, inode, | 6778 | ret = insert_reserved_file_extent(trans, inode, |
@@ -5766,34 +6786,27 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
5766 | 6786 | ||
5767 | num_bytes -= ins.offset; | 6787 | num_bytes -= ins.offset; |
5768 | cur_offset += ins.offset; | 6788 | cur_offset += ins.offset; |
5769 | alloc_hint = ins.objectid + ins.offset; | 6789 | *alloc_hint = ins.objectid + ins.offset; |
5770 | 6790 | ||
5771 | inode->i_ctime = CURRENT_TIME; | 6791 | inode->i_ctime = CURRENT_TIME; |
5772 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; | 6792 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; |
5773 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 6793 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
5774 | (actual_len > inode->i_size) && | 6794 | (actual_len > inode->i_size) && |
5775 | (cur_offset > inode->i_size)) { | 6795 | (cur_offset > inode->i_size)) { |
5776 | |||
5777 | if (cur_offset > actual_len) | 6796 | if (cur_offset > actual_len) |
5778 | i_size = actual_len; | 6797 | i_size_write(inode, actual_len); |
5779 | else | 6798 | else |
5780 | i_size = cur_offset; | 6799 | i_size_write(inode, cur_offset); |
5781 | i_size_write(inode, i_size); | 6800 | i_size_write(inode, cur_offset); |
5782 | btrfs_ordered_update_i_size(inode, i_size, NULL); | 6801 | btrfs_ordered_update_i_size(inode, cur_offset, NULL); |
5783 | } | 6802 | } |
5784 | 6803 | ||
5785 | ret = btrfs_update_inode(trans, root, inode); | 6804 | ret = btrfs_update_inode(trans, root, inode); |
5786 | BUG_ON(ret); | 6805 | BUG_ON(ret); |
5787 | 6806 | ||
5788 | btrfs_end_transaction(trans, root); | 6807 | btrfs_end_transaction(trans, root); |
5789 | btrfs_unreserve_metadata_space(root, 3); | ||
5790 | } | 6808 | } |
5791 | return ret; | 6809 | return ret; |
5792 | |||
5793 | stop_trans: | ||
5794 | btrfs_end_transaction(trans, root); | ||
5795 | return ret; | ||
5796 | |||
5797 | } | 6810 | } |
5798 | 6811 | ||
5799 | static long btrfs_fallocate(struct inode *inode, int mode, | 6812 | static long btrfs_fallocate(struct inode *inode, int mode, |
@@ -5826,8 +6839,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5826 | goto out; | 6839 | goto out; |
5827 | } | 6840 | } |
5828 | 6841 | ||
5829 | ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, | 6842 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); |
5830 | alloc_end - alloc_start); | ||
5831 | if (ret) | 6843 | if (ret) |
5832 | goto out; | 6844 | goto out; |
5833 | 6845 | ||
@@ -5872,16 +6884,16 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5872 | if (em->block_start == EXTENT_MAP_HOLE || | 6884 | if (em->block_start == EXTENT_MAP_HOLE || |
5873 | (cur_offset >= inode->i_size && | 6885 | (cur_offset >= inode->i_size && |
5874 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 6886 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
5875 | ret = prealloc_file_range(inode, | 6887 | ret = btrfs_prealloc_file_range(inode, 0, cur_offset, |
5876 | cur_offset, last_byte, | 6888 | last_byte - cur_offset, |
5877 | alloc_hint, mode, offset+len); | 6889 | 1 << inode->i_blkbits, |
6890 | offset + len, | ||
6891 | &alloc_hint); | ||
5878 | if (ret < 0) { | 6892 | if (ret < 0) { |
5879 | free_extent_map(em); | 6893 | free_extent_map(em); |
5880 | break; | 6894 | break; |
5881 | } | 6895 | } |
5882 | } | 6896 | } |
5883 | if (em->block_start <= EXTENT_MAP_LAST_BYTE) | ||
5884 | alloc_hint = em->block_start; | ||
5885 | free_extent_map(em); | 6897 | free_extent_map(em); |
5886 | 6898 | ||
5887 | cur_offset = last_byte; | 6899 | cur_offset = last_byte; |
@@ -5893,8 +6905,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5893 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | 6905 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
5894 | &cached_state, GFP_NOFS); | 6906 | &cached_state, GFP_NOFS); |
5895 | 6907 | ||
5896 | btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, | 6908 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); |
5897 | alloc_end - alloc_start); | ||
5898 | out: | 6909 | out: |
5899 | mutex_unlock(&inode->i_mutex); | 6910 | mutex_unlock(&inode->i_mutex); |
5900 | return ret; | 6911 | return ret; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 97a97839a867..4cdb98cf26de 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -239,23 +239,19 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
239 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; | 239 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; |
240 | u64 index = 0; | 240 | u64 index = 0; |
241 | 241 | ||
242 | ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, | ||
243 | 0, &objectid); | ||
244 | if (ret) | ||
245 | return ret; | ||
242 | /* | 246 | /* |
243 | * 1 - inode item | 247 | * 1 - inode item |
244 | * 2 - refs | 248 | * 2 - refs |
245 | * 1 - root item | 249 | * 1 - root item |
246 | * 2 - dir items | 250 | * 2 - dir items |
247 | */ | 251 | */ |
248 | ret = btrfs_reserve_metadata_space(root, 6); | 252 | trans = btrfs_start_transaction(root, 6); |
249 | if (ret) | 253 | if (IS_ERR(trans)) |
250 | return ret; | 254 | return PTR_ERR(trans); |
251 | |||
252 | trans = btrfs_start_transaction(root, 1); | ||
253 | BUG_ON(!trans); | ||
254 | |||
255 | ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, | ||
256 | 0, &objectid); | ||
257 | if (ret) | ||
258 | goto fail; | ||
259 | 255 | ||
260 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 256 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
261 | 0, objectid, NULL, 0, 0, 0); | 257 | 0, objectid, NULL, 0, 0, 0); |
@@ -345,13 +341,10 @@ fail: | |||
345 | err = btrfs_commit_transaction(trans, root); | 341 | err = btrfs_commit_transaction(trans, root); |
346 | if (err && !ret) | 342 | if (err && !ret) |
347 | ret = err; | 343 | ret = err; |
348 | |||
349 | btrfs_unreserve_metadata_space(root, 6); | ||
350 | return ret; | 344 | return ret; |
351 | } | 345 | } |
352 | 346 | ||
353 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 347 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry) |
354 | char *name, int namelen) | ||
355 | { | 348 | { |
356 | struct inode *inode; | 349 | struct inode *inode; |
357 | struct btrfs_pending_snapshot *pending_snapshot; | 350 | struct btrfs_pending_snapshot *pending_snapshot; |
@@ -361,40 +354,33 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
361 | if (!root->ref_cows) | 354 | if (!root->ref_cows) |
362 | return -EINVAL; | 355 | return -EINVAL; |
363 | 356 | ||
364 | /* | ||
365 | * 1 - inode item | ||
366 | * 2 - refs | ||
367 | * 1 - root item | ||
368 | * 2 - dir items | ||
369 | */ | ||
370 | ret = btrfs_reserve_metadata_space(root, 6); | ||
371 | if (ret) | ||
372 | goto fail; | ||
373 | |||
374 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 357 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
375 | if (!pending_snapshot) { | 358 | if (!pending_snapshot) |
376 | ret = -ENOMEM; | 359 | return -ENOMEM; |
377 | btrfs_unreserve_metadata_space(root, 6); | 360 | |
378 | goto fail; | 361 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); |
379 | } | ||
380 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); | ||
381 | if (!pending_snapshot->name) { | ||
382 | ret = -ENOMEM; | ||
383 | kfree(pending_snapshot); | ||
384 | btrfs_unreserve_metadata_space(root, 6); | ||
385 | goto fail; | ||
386 | } | ||
387 | memcpy(pending_snapshot->name, name, namelen); | ||
388 | pending_snapshot->name[namelen] = '\0'; | ||
389 | pending_snapshot->dentry = dentry; | 362 | pending_snapshot->dentry = dentry; |
390 | trans = btrfs_start_transaction(root, 1); | ||
391 | BUG_ON(!trans); | ||
392 | pending_snapshot->root = root; | 363 | pending_snapshot->root = root; |
364 | |||
365 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | ||
366 | if (IS_ERR(trans)) { | ||
367 | ret = PTR_ERR(trans); | ||
368 | goto fail; | ||
369 | } | ||
370 | |||
371 | ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); | ||
372 | BUG_ON(ret); | ||
373 | |||
393 | list_add(&pending_snapshot->list, | 374 | list_add(&pending_snapshot->list, |
394 | &trans->transaction->pending_snapshots); | 375 | &trans->transaction->pending_snapshots); |
395 | ret = btrfs_commit_transaction(trans, root); | 376 | ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); |
396 | BUG_ON(ret); | 377 | BUG_ON(ret); |
397 | btrfs_unreserve_metadata_space(root, 6); | 378 | |
379 | ret = pending_snapshot->error; | ||
380 | if (ret) | ||
381 | goto fail; | ||
382 | |||
383 | btrfs_orphan_cleanup(pending_snapshot->snap); | ||
398 | 384 | ||
399 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 385 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); |
400 | if (IS_ERR(inode)) { | 386 | if (IS_ERR(inode)) { |
@@ -405,6 +391,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
405 | d_instantiate(dentry, inode); | 391 | d_instantiate(dentry, inode); |
406 | ret = 0; | 392 | ret = 0; |
407 | fail: | 393 | fail: |
394 | kfree(pending_snapshot); | ||
408 | return ret; | 395 | return ret; |
409 | } | 396 | } |
410 | 397 | ||
@@ -456,8 +443,7 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
456 | goto out_up_read; | 443 | goto out_up_read; |
457 | 444 | ||
458 | if (snap_src) { | 445 | if (snap_src) { |
459 | error = create_snapshot(snap_src, dentry, | 446 | error = create_snapshot(snap_src, dentry); |
460 | name, namelen); | ||
461 | } else { | 447 | } else { |
462 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 448 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
463 | name, namelen); | 449 | name, namelen); |
@@ -601,19 +587,9 @@ static int btrfs_defrag_file(struct file *file, | |||
601 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 587 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
602 | BTRFS_I(inode)->force_compress = 1; | 588 | BTRFS_I(inode)->force_compress = 1; |
603 | 589 | ||
604 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 590 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
605 | if (ret) { | 591 | if (ret) |
606 | ret = -ENOSPC; | 592 | goto err_unlock; |
607 | break; | ||
608 | } | ||
609 | |||
610 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
611 | if (ret) { | ||
612 | btrfs_free_reserved_data_space(root, inode, | ||
613 | PAGE_CACHE_SIZE); | ||
614 | ret = -ENOSPC; | ||
615 | break; | ||
616 | } | ||
617 | again: | 593 | again: |
618 | if (inode->i_size == 0 || | 594 | if (inode->i_size == 0 || |
619 | i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { | 595 | i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { |
@@ -622,8 +598,10 @@ again: | |||
622 | } | 598 | } |
623 | 599 | ||
624 | page = grab_cache_page(inode->i_mapping, i); | 600 | page = grab_cache_page(inode->i_mapping, i); |
625 | if (!page) | 601 | if (!page) { |
602 | ret = -ENOMEM; | ||
626 | goto err_reservations; | 603 | goto err_reservations; |
604 | } | ||
627 | 605 | ||
628 | if (!PageUptodate(page)) { | 606 | if (!PageUptodate(page)) { |
629 | btrfs_readpage(NULL, page); | 607 | btrfs_readpage(NULL, page); |
@@ -631,6 +609,7 @@ again: | |||
631 | if (!PageUptodate(page)) { | 609 | if (!PageUptodate(page)) { |
632 | unlock_page(page); | 610 | unlock_page(page); |
633 | page_cache_release(page); | 611 | page_cache_release(page); |
612 | ret = -EIO; | ||
634 | goto err_reservations; | 613 | goto err_reservations; |
635 | } | 614 | } |
636 | } | 615 | } |
@@ -644,8 +623,7 @@ again: | |||
644 | wait_on_page_writeback(page); | 623 | wait_on_page_writeback(page); |
645 | 624 | ||
646 | if (PageDirty(page)) { | 625 | if (PageDirty(page)) { |
647 | btrfs_free_reserved_data_space(root, inode, | 626 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
648 | PAGE_CACHE_SIZE); | ||
649 | goto loop_unlock; | 627 | goto loop_unlock; |
650 | } | 628 | } |
651 | 629 | ||
@@ -683,7 +661,6 @@ loop_unlock: | |||
683 | page_cache_release(page); | 661 | page_cache_release(page); |
684 | mutex_unlock(&inode->i_mutex); | 662 | mutex_unlock(&inode->i_mutex); |
685 | 663 | ||
686 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
687 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | 664 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); |
688 | i++; | 665 | i++; |
689 | } | 666 | } |
@@ -713,9 +690,9 @@ loop_unlock: | |||
713 | return 0; | 690 | return 0; |
714 | 691 | ||
715 | err_reservations: | 692 | err_reservations: |
693 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
694 | err_unlock: | ||
716 | mutex_unlock(&inode->i_mutex); | 695 | mutex_unlock(&inode->i_mutex); |
717 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
718 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
719 | return ret; | 696 | return ret; |
720 | } | 697 | } |
721 | 698 | ||
@@ -811,7 +788,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
811 | device->name, (unsigned long long)new_size); | 788 | device->name, (unsigned long long)new_size); |
812 | 789 | ||
813 | if (new_size > old_size) { | 790 | if (new_size > old_size) { |
814 | trans = btrfs_start_transaction(root, 1); | 791 | trans = btrfs_start_transaction(root, 0); |
815 | ret = btrfs_grow_device(trans, device, new_size); | 792 | ret = btrfs_grow_device(trans, device, new_size); |
816 | btrfs_commit_transaction(trans, root); | 793 | btrfs_commit_transaction(trans, root); |
817 | } else { | 794 | } else { |
@@ -1300,7 +1277,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
1300 | if (err) | 1277 | if (err) |
1301 | goto out_up_write; | 1278 | goto out_up_write; |
1302 | 1279 | ||
1303 | trans = btrfs_start_transaction(root, 1); | 1280 | trans = btrfs_start_transaction(root, 0); |
1281 | if (IS_ERR(trans)) { | ||
1282 | err = PTR_ERR(trans); | ||
1283 | goto out; | ||
1284 | } | ||
1285 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
1286 | |||
1304 | ret = btrfs_unlink_subvol(trans, root, dir, | 1287 | ret = btrfs_unlink_subvol(trans, root, dir, |
1305 | dest->root_key.objectid, | 1288 | dest->root_key.objectid, |
1306 | dentry->d_name.name, | 1289 | dentry->d_name.name, |
@@ -1314,10 +1297,12 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
1314 | dest->root_item.drop_level = 0; | 1297 | dest->root_item.drop_level = 0; |
1315 | btrfs_set_root_refs(&dest->root_item, 0); | 1298 | btrfs_set_root_refs(&dest->root_item, 0); |
1316 | 1299 | ||
1317 | ret = btrfs_insert_orphan_item(trans, | 1300 | if (!xchg(&dest->orphan_item_inserted, 1)) { |
1318 | root->fs_info->tree_root, | 1301 | ret = btrfs_insert_orphan_item(trans, |
1319 | dest->root_key.objectid); | 1302 | root->fs_info->tree_root, |
1320 | BUG_ON(ret); | 1303 | dest->root_key.objectid); |
1304 | BUG_ON(ret); | ||
1305 | } | ||
1321 | 1306 | ||
1322 | ret = btrfs_commit_transaction(trans, root); | 1307 | ret = btrfs_commit_transaction(trans, root); |
1323 | BUG_ON(ret); | 1308 | BUG_ON(ret); |
@@ -1358,8 +1343,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
1358 | ret = -EPERM; | 1343 | ret = -EPERM; |
1359 | goto out; | 1344 | goto out; |
1360 | } | 1345 | } |
1361 | btrfs_defrag_root(root, 0); | 1346 | ret = btrfs_defrag_root(root, 0); |
1362 | btrfs_defrag_root(root->fs_info->extent_root, 0); | 1347 | if (ret) |
1348 | goto out; | ||
1349 | ret = btrfs_defrag_root(root->fs_info->extent_root, 0); | ||
1363 | break; | 1350 | break; |
1364 | case S_IFREG: | 1351 | case S_IFREG: |
1365 | if (!(file->f_mode & FMODE_WRITE)) { | 1352 | if (!(file->f_mode & FMODE_WRITE)) { |
@@ -1389,9 +1376,11 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
1389 | /* the rest are all set to zero by kzalloc */ | 1376 | /* the rest are all set to zero by kzalloc */ |
1390 | range->len = (u64)-1; | 1377 | range->len = (u64)-1; |
1391 | } | 1378 | } |
1392 | btrfs_defrag_file(file, range); | 1379 | ret = btrfs_defrag_file(file, range); |
1393 | kfree(range); | 1380 | kfree(range); |
1394 | break; | 1381 | break; |
1382 | default: | ||
1383 | ret = -EINVAL; | ||
1395 | } | 1384 | } |
1396 | out: | 1385 | out: |
1397 | mnt_drop_write(file->f_path.mnt); | 1386 | mnt_drop_write(file->f_path.mnt); |
@@ -1550,12 +1539,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1550 | btrfs_wait_ordered_range(src, off, off+len); | 1539 | btrfs_wait_ordered_range(src, off, off+len); |
1551 | } | 1540 | } |
1552 | 1541 | ||
1553 | trans = btrfs_start_transaction(root, 1); | ||
1554 | BUG_ON(!trans); | ||
1555 | |||
1556 | /* punch hole in destination first */ | ||
1557 | btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1); | ||
1558 | |||
1559 | /* clone data */ | 1542 | /* clone data */ |
1560 | key.objectid = src->i_ino; | 1543 | key.objectid = src->i_ino; |
1561 | key.type = BTRFS_EXTENT_DATA_KEY; | 1544 | key.type = BTRFS_EXTENT_DATA_KEY; |
@@ -1566,7 +1549,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1566 | * note the key will change type as we walk through the | 1549 | * note the key will change type as we walk through the |
1567 | * tree. | 1550 | * tree. |
1568 | */ | 1551 | */ |
1569 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 1552 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
1570 | if (ret < 0) | 1553 | if (ret < 0) |
1571 | goto out; | 1554 | goto out; |
1572 | 1555 | ||
@@ -1629,12 +1612,31 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1629 | new_key.objectid = inode->i_ino; | 1612 | new_key.objectid = inode->i_ino; |
1630 | new_key.offset = key.offset + destoff - off; | 1613 | new_key.offset = key.offset + destoff - off; |
1631 | 1614 | ||
1615 | trans = btrfs_start_transaction(root, 1); | ||
1616 | if (IS_ERR(trans)) { | ||
1617 | ret = PTR_ERR(trans); | ||
1618 | goto out; | ||
1619 | } | ||
1620 | |||
1632 | if (type == BTRFS_FILE_EXTENT_REG || | 1621 | if (type == BTRFS_FILE_EXTENT_REG || |
1633 | type == BTRFS_FILE_EXTENT_PREALLOC) { | 1622 | type == BTRFS_FILE_EXTENT_PREALLOC) { |
1623 | if (off > key.offset) { | ||
1624 | datao += off - key.offset; | ||
1625 | datal -= off - key.offset; | ||
1626 | } | ||
1627 | |||
1628 | if (key.offset + datal > off + len) | ||
1629 | datal = off + len - key.offset; | ||
1630 | |||
1631 | ret = btrfs_drop_extents(trans, inode, | ||
1632 | new_key.offset, | ||
1633 | new_key.offset + datal, | ||
1634 | &hint_byte, 1); | ||
1635 | BUG_ON(ret); | ||
1636 | |||
1634 | ret = btrfs_insert_empty_item(trans, root, path, | 1637 | ret = btrfs_insert_empty_item(trans, root, path, |
1635 | &new_key, size); | 1638 | &new_key, size); |
1636 | if (ret) | 1639 | BUG_ON(ret); |
1637 | goto out; | ||
1638 | 1640 | ||
1639 | leaf = path->nodes[0]; | 1641 | leaf = path->nodes[0]; |
1640 | slot = path->slots[0]; | 1642 | slot = path->slots[0]; |
@@ -1645,14 +1647,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1645 | extent = btrfs_item_ptr(leaf, slot, | 1647 | extent = btrfs_item_ptr(leaf, slot, |
1646 | struct btrfs_file_extent_item); | 1648 | struct btrfs_file_extent_item); |
1647 | 1649 | ||
1648 | if (off > key.offset) { | ||
1649 | datao += off - key.offset; | ||
1650 | datal -= off - key.offset; | ||
1651 | } | ||
1652 | |||
1653 | if (key.offset + datal > off + len) | ||
1654 | datal = off + len - key.offset; | ||
1655 | |||
1656 | /* disko == 0 means it's a hole */ | 1650 | /* disko == 0 means it's a hole */ |
1657 | if (!disko) | 1651 | if (!disko) |
1658 | datao = 0; | 1652 | datao = 0; |
@@ -1683,14 +1677,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1683 | 1677 | ||
1684 | if (comp && (skip || trim)) { | 1678 | if (comp && (skip || trim)) { |
1685 | ret = -EINVAL; | 1679 | ret = -EINVAL; |
1680 | btrfs_end_transaction(trans, root); | ||
1686 | goto out; | 1681 | goto out; |
1687 | } | 1682 | } |
1688 | size -= skip + trim; | 1683 | size -= skip + trim; |
1689 | datal -= skip + trim; | 1684 | datal -= skip + trim; |
1685 | |||
1686 | ret = btrfs_drop_extents(trans, inode, | ||
1687 | new_key.offset, | ||
1688 | new_key.offset + datal, | ||
1689 | &hint_byte, 1); | ||
1690 | BUG_ON(ret); | ||
1691 | |||
1690 | ret = btrfs_insert_empty_item(trans, root, path, | 1692 | ret = btrfs_insert_empty_item(trans, root, path, |
1691 | &new_key, size); | 1693 | &new_key, size); |
1692 | if (ret) | 1694 | BUG_ON(ret); |
1693 | goto out; | ||
1694 | 1695 | ||
1695 | if (skip) { | 1696 | if (skip) { |
1696 | u32 start = | 1697 | u32 start = |
@@ -1708,8 +1709,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1708 | } | 1709 | } |
1709 | 1710 | ||
1710 | btrfs_mark_buffer_dirty(leaf); | 1711 | btrfs_mark_buffer_dirty(leaf); |
1711 | } | 1712 | btrfs_release_path(root, path); |
1712 | 1713 | ||
1714 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
1715 | if (new_key.offset + datal > inode->i_size) | ||
1716 | btrfs_i_size_write(inode, | ||
1717 | new_key.offset + datal); | ||
1718 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
1719 | ret = btrfs_update_inode(trans, root, inode); | ||
1720 | BUG_ON(ret); | ||
1721 | btrfs_end_transaction(trans, root); | ||
1722 | } | ||
1713 | next: | 1723 | next: |
1714 | btrfs_release_path(root, path); | 1724 | btrfs_release_path(root, path); |
1715 | key.offset++; | 1725 | key.offset++; |
@@ -1717,17 +1727,7 @@ next: | |||
1717 | ret = 0; | 1727 | ret = 0; |
1718 | out: | 1728 | out: |
1719 | btrfs_release_path(root, path); | 1729 | btrfs_release_path(root, path); |
1720 | if (ret == 0) { | ||
1721 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
1722 | if (destoff + olen > inode->i_size) | ||
1723 | btrfs_i_size_write(inode, destoff + olen); | ||
1724 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
1725 | ret = btrfs_update_inode(trans, root, inode); | ||
1726 | } | ||
1727 | btrfs_end_transaction(trans, root); | ||
1728 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); | 1730 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
1729 | if (ret) | ||
1730 | vmtruncate(inode, 0); | ||
1731 | out_unlock: | 1731 | out_unlock: |
1732 | mutex_unlock(&src->i_mutex); | 1732 | mutex_unlock(&src->i_mutex); |
1733 | mutex_unlock(&inode->i_mutex); | 1733 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a127c0ebb2dc..e56c72bc5add 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -124,6 +124,15 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) | |||
124 | return 1; | 124 | return 1; |
125 | } | 125 | } |
126 | 126 | ||
127 | static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, | ||
128 | u64 len) | ||
129 | { | ||
130 | if (file_offset + len <= entry->file_offset || | ||
131 | entry->file_offset + entry->len <= file_offset) | ||
132 | return 0; | ||
133 | return 1; | ||
134 | } | ||
135 | |||
127 | /* | 136 | /* |
128 | * look find the first ordered struct that has this offset, otherwise | 137 | * look find the first ordered struct that has this offset, otherwise |
129 | * the first one less than this offset | 138 | * the first one less than this offset |
@@ -161,8 +170,9 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
161 | * The tree is given a single reference on the ordered extent that was | 170 | * The tree is given a single reference on the ordered extent that was |
162 | * inserted. | 171 | * inserted. |
163 | */ | 172 | */ |
164 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
165 | u64 start, u64 len, u64 disk_len, int type) | 174 | u64 start, u64 len, u64 disk_len, |
175 | int type, int dio) | ||
166 | { | 176 | { |
167 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
168 | struct rb_node *node; | 178 | struct rb_node *node; |
@@ -182,6 +192,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
182 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 192 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
183 | set_bit(type, &entry->flags); | 193 | set_bit(type, &entry->flags); |
184 | 194 | ||
195 | if (dio) | ||
196 | set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); | ||
197 | |||
185 | /* one ref for the tree */ | 198 | /* one ref for the tree */ |
186 | atomic_set(&entry->refs, 1); | 199 | atomic_set(&entry->refs, 1); |
187 | init_waitqueue_head(&entry->wait); | 200 | init_waitqueue_head(&entry->wait); |
@@ -203,6 +216,20 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
203 | return 0; | 216 | return 0; |
204 | } | 217 | } |
205 | 218 | ||
219 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | ||
220 | u64 start, u64 len, u64 disk_len, int type) | ||
221 | { | ||
222 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
223 | disk_len, type, 0); | ||
224 | } | ||
225 | |||
226 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
227 | u64 start, u64 len, u64 disk_len, int type) | ||
228 | { | ||
229 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
230 | disk_len, type, 1); | ||
231 | } | ||
232 | |||
206 | /* | 233 | /* |
207 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted | 234 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted |
208 | * when an ordered extent is finished. If the list covers more than one | 235 | * when an ordered extent is finished. If the list covers more than one |
@@ -311,13 +338,6 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, | |||
311 | tree->last = NULL; | 338 | tree->last = NULL; |
312 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 339 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
313 | 340 | ||
314 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
315 | WARN_ON(!BTRFS_I(inode)->outstanding_extents); | ||
316 | BTRFS_I(inode)->outstanding_extents--; | ||
317 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
318 | btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, | ||
319 | inode, 1); | ||
320 | |||
321 | spin_lock(&root->fs_info->ordered_extent_lock); | 341 | spin_lock(&root->fs_info->ordered_extent_lock); |
322 | list_del_init(&entry->root_extent_list); | 342 | list_del_init(&entry->root_extent_list); |
323 | 343 | ||
@@ -491,7 +511,8 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
491 | * start IO on any dirty ones so the wait doesn't stall waiting | 511 | * start IO on any dirty ones so the wait doesn't stall waiting |
492 | * for pdflush to find them | 512 | * for pdflush to find them |
493 | */ | 513 | */ |
494 | filemap_fdatawrite_range(inode->i_mapping, start, end); | 514 | if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) |
515 | filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
495 | if (wait) { | 516 | if (wait) { |
496 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 517 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
497 | &entry->flags)); | 518 | &entry->flags)); |
@@ -588,6 +609,47 @@ out: | |||
588 | return entry; | 609 | return entry; |
589 | } | 610 | } |
590 | 611 | ||
612 | /* Since the DIO code tries to lock a wide area we need to look for any ordered | ||
613 | * extents that exist in the range, rather than just the start of the range. | ||
614 | */ | ||
615 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
616 | u64 file_offset, | ||
617 | u64 len) | ||
618 | { | ||
619 | struct btrfs_ordered_inode_tree *tree; | ||
620 | struct rb_node *node; | ||
621 | struct btrfs_ordered_extent *entry = NULL; | ||
622 | |||
623 | tree = &BTRFS_I(inode)->ordered_tree; | ||
624 | spin_lock(&tree->lock); | ||
625 | node = tree_search(tree, file_offset); | ||
626 | if (!node) { | ||
627 | node = tree_search(tree, file_offset + len); | ||
628 | if (!node) | ||
629 | goto out; | ||
630 | } | ||
631 | |||
632 | while (1) { | ||
633 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
634 | if (range_overlaps(entry, file_offset, len)) | ||
635 | break; | ||
636 | |||
637 | if (entry->file_offset >= file_offset + len) { | ||
638 | entry = NULL; | ||
639 | break; | ||
640 | } | ||
641 | entry = NULL; | ||
642 | node = rb_next(node); | ||
643 | if (!node) | ||
644 | break; | ||
645 | } | ||
646 | out: | ||
647 | if (entry) | ||
648 | atomic_inc(&entry->refs); | ||
649 | spin_unlock(&tree->lock); | ||
650 | return entry; | ||
651 | } | ||
652 | |||
591 | /* | 653 | /* |
592 | * lookup and return any extent before 'file_offset'. NULL is returned | 654 | * lookup and return any extent before 'file_offset'. NULL is returned |
593 | * if none is found | 655 | * if none is found |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index c82f76a9f040..8ac365492a3f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -72,6 +72,8 @@ struct btrfs_ordered_sum { | |||
72 | 72 | ||
73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ | 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ |
74 | 74 | ||
75 | #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ | ||
76 | |||
75 | struct btrfs_ordered_extent { | 77 | struct btrfs_ordered_extent { |
76 | /* logical offset in the file */ | 78 | /* logical offset in the file */ |
77 | u64 file_offset; | 79 | u64 file_offset; |
@@ -140,7 +142,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
140 | struct btrfs_ordered_extent **cached, | 142 | struct btrfs_ordered_extent **cached, |
141 | u64 file_offset, u64 io_size); | 143 | u64 file_offset, u64 io_size); |
142 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 144 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
143 | u64 start, u64 len, u64 disk_len, int tyep); | 145 | u64 start, u64 len, u64 disk_len, int type); |
146 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
147 | u64 start, u64 len, u64 disk_len, int type); | ||
144 | int btrfs_add_ordered_sum(struct inode *inode, | 148 | int btrfs_add_ordered_sum(struct inode *inode, |
145 | struct btrfs_ordered_extent *entry, | 149 | struct btrfs_ordered_extent *entry, |
146 | struct btrfs_ordered_sum *sum); | 150 | struct btrfs_ordered_sum *sum); |
@@ -151,6 +155,9 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
151 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); | 155 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); |
152 | struct btrfs_ordered_extent * | 156 | struct btrfs_ordered_extent * |
153 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | 157 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); |
158 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
159 | u64 file_offset, | ||
160 | u64 len); | ||
154 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | 161 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, |
155 | struct btrfs_ordered_extent *ordered); | 162 | struct btrfs_ordered_extent *ordered); |
156 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 163 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e558dd941ded..05d41e569236 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -44,8 +44,12 @@ struct tree_entry { | |||
44 | struct backref_node { | 44 | struct backref_node { |
45 | struct rb_node rb_node; | 45 | struct rb_node rb_node; |
46 | u64 bytenr; | 46 | u64 bytenr; |
47 | /* objectid tree block owner */ | 47 | |
48 | u64 new_bytenr; | ||
49 | /* objectid of tree block owner, can be not uptodate */ | ||
48 | u64 owner; | 50 | u64 owner; |
51 | /* link to pending, changed or detached list */ | ||
52 | struct list_head list; | ||
49 | /* list of upper level blocks reference this block */ | 53 | /* list of upper level blocks reference this block */ |
50 | struct list_head upper; | 54 | struct list_head upper; |
51 | /* list of child blocks in the cache */ | 55 | /* list of child blocks in the cache */ |
@@ -56,9 +60,9 @@ struct backref_node { | |||
56 | struct extent_buffer *eb; | 60 | struct extent_buffer *eb; |
57 | /* level of tree block */ | 61 | /* level of tree block */ |
58 | unsigned int level:8; | 62 | unsigned int level:8; |
59 | /* 1 if the block is root of old snapshot */ | 63 | /* is the block in non-reference counted tree */ |
60 | unsigned int old_root:1; | 64 | unsigned int cowonly:1; |
61 | /* 1 if no child blocks in the cache */ | 65 | /* 1 if no child node in the cache */ |
62 | unsigned int lowest:1; | 66 | unsigned int lowest:1; |
63 | /* is the extent buffer locked */ | 67 | /* is the extent buffer locked */ |
64 | unsigned int locked:1; | 68 | unsigned int locked:1; |
@@ -66,6 +70,16 @@ struct backref_node { | |||
66 | unsigned int processed:1; | 70 | unsigned int processed:1; |
67 | /* have backrefs of this block been checked */ | 71 | /* have backrefs of this block been checked */ |
68 | unsigned int checked:1; | 72 | unsigned int checked:1; |
73 | /* | ||
74 | * 1 if corresponding block has been cowed but some upper | ||
75 | * level block pointers may not point to the new location | ||
76 | */ | ||
77 | unsigned int pending:1; | ||
78 | /* | ||
79 | * 1 if the backref node isn't connected to any other | ||
80 | * backref node. | ||
81 | */ | ||
82 | unsigned int detached:1; | ||
69 | }; | 83 | }; |
70 | 84 | ||
71 | /* | 85 | /* |
@@ -74,7 +88,6 @@ struct backref_node { | |||
74 | struct backref_edge { | 88 | struct backref_edge { |
75 | struct list_head list[2]; | 89 | struct list_head list[2]; |
76 | struct backref_node *node[2]; | 90 | struct backref_node *node[2]; |
77 | u64 blockptr; | ||
78 | }; | 91 | }; |
79 | 92 | ||
80 | #define LOWER 0 | 93 | #define LOWER 0 |
@@ -83,9 +96,25 @@ struct backref_edge { | |||
83 | struct backref_cache { | 96 | struct backref_cache { |
84 | /* red black tree of all backref nodes in the cache */ | 97 | /* red black tree of all backref nodes in the cache */ |
85 | struct rb_root rb_root; | 98 | struct rb_root rb_root; |
86 | /* list of backref nodes with no child block in the cache */ | 99 | /* for passing backref nodes to btrfs_reloc_cow_block */ |
100 | struct backref_node *path[BTRFS_MAX_LEVEL]; | ||
101 | /* | ||
102 | * list of blocks that have been cowed but some block | ||
103 | * pointers in upper level blocks may not reflect the | ||
104 | * new location | ||
105 | */ | ||
87 | struct list_head pending[BTRFS_MAX_LEVEL]; | 106 | struct list_head pending[BTRFS_MAX_LEVEL]; |
88 | spinlock_t lock; | 107 | /* list of backref nodes with no child node */ |
108 | struct list_head leaves; | ||
109 | /* list of blocks that have been cowed in current transaction */ | ||
110 | struct list_head changed; | ||
111 | /* list of detached backref node. */ | ||
112 | struct list_head detached; | ||
113 | |||
114 | u64 last_trans; | ||
115 | |||
116 | int nr_nodes; | ||
117 | int nr_edges; | ||
89 | }; | 118 | }; |
90 | 119 | ||
91 | /* | 120 | /* |
@@ -113,15 +142,6 @@ struct tree_block { | |||
113 | unsigned int key_ready:1; | 142 | unsigned int key_ready:1; |
114 | }; | 143 | }; |
115 | 144 | ||
116 | /* inode vector */ | ||
117 | #define INODEVEC_SIZE 16 | ||
118 | |||
119 | struct inodevec { | ||
120 | struct list_head list; | ||
121 | struct inode *inode[INODEVEC_SIZE]; | ||
122 | int nr; | ||
123 | }; | ||
124 | |||
125 | #define MAX_EXTENTS 128 | 145 | #define MAX_EXTENTS 128 |
126 | 146 | ||
127 | struct file_extent_cluster { | 147 | struct file_extent_cluster { |
@@ -138,36 +158,43 @@ struct reloc_control { | |||
138 | struct btrfs_root *extent_root; | 158 | struct btrfs_root *extent_root; |
139 | /* inode for moving data */ | 159 | /* inode for moving data */ |
140 | struct inode *data_inode; | 160 | struct inode *data_inode; |
141 | struct btrfs_workers workers; | 161 | |
162 | struct btrfs_block_rsv *block_rsv; | ||
163 | |||
164 | struct backref_cache backref_cache; | ||
165 | |||
166 | struct file_extent_cluster cluster; | ||
142 | /* tree blocks have been processed */ | 167 | /* tree blocks have been processed */ |
143 | struct extent_io_tree processed_blocks; | 168 | struct extent_io_tree processed_blocks; |
144 | /* map start of tree root to corresponding reloc tree */ | 169 | /* map start of tree root to corresponding reloc tree */ |
145 | struct mapping_tree reloc_root_tree; | 170 | struct mapping_tree reloc_root_tree; |
146 | /* list of reloc trees */ | 171 | /* list of reloc trees */ |
147 | struct list_head reloc_roots; | 172 | struct list_head reloc_roots; |
173 | /* size of metadata reservation for merging reloc trees */ | ||
174 | u64 merging_rsv_size; | ||
175 | /* size of relocated tree nodes */ | ||
176 | u64 nodes_relocated; | ||
177 | |||
148 | u64 search_start; | 178 | u64 search_start; |
149 | u64 extents_found; | 179 | u64 extents_found; |
150 | u64 extents_skipped; | 180 | |
151 | int stage; | 181 | int block_rsv_retries; |
152 | int create_reloc_root; | 182 | |
183 | unsigned int stage:8; | ||
184 | unsigned int create_reloc_tree:1; | ||
185 | unsigned int merge_reloc_tree:1; | ||
153 | unsigned int found_file_extent:1; | 186 | unsigned int found_file_extent:1; |
154 | unsigned int found_old_snapshot:1; | 187 | unsigned int commit_transaction:1; |
155 | }; | 188 | }; |
156 | 189 | ||
157 | /* stages of data relocation */ | 190 | /* stages of data relocation */ |
158 | #define MOVE_DATA_EXTENTS 0 | 191 | #define MOVE_DATA_EXTENTS 0 |
159 | #define UPDATE_DATA_PTRS 1 | 192 | #define UPDATE_DATA_PTRS 1 |
160 | 193 | ||
161 | /* | 194 | static void remove_backref_node(struct backref_cache *cache, |
162 | * merge reloc tree to corresponding fs tree in worker threads | 195 | struct backref_node *node); |
163 | */ | 196 | static void __mark_block_processed(struct reloc_control *rc, |
164 | struct async_merge { | 197 | struct backref_node *node); |
165 | struct btrfs_work work; | ||
166 | struct reloc_control *rc; | ||
167 | struct btrfs_root *root; | ||
168 | struct completion *done; | ||
169 | atomic_t *num_pending; | ||
170 | }; | ||
171 | 198 | ||
172 | static void mapping_tree_init(struct mapping_tree *tree) | 199 | static void mapping_tree_init(struct mapping_tree *tree) |
173 | { | 200 | { |
@@ -181,15 +208,80 @@ static void backref_cache_init(struct backref_cache *cache) | |||
181 | cache->rb_root = RB_ROOT; | 208 | cache->rb_root = RB_ROOT; |
182 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) | 209 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) |
183 | INIT_LIST_HEAD(&cache->pending[i]); | 210 | INIT_LIST_HEAD(&cache->pending[i]); |
184 | spin_lock_init(&cache->lock); | 211 | INIT_LIST_HEAD(&cache->changed); |
212 | INIT_LIST_HEAD(&cache->detached); | ||
213 | INIT_LIST_HEAD(&cache->leaves); | ||
214 | } | ||
215 | |||
216 | static void backref_cache_cleanup(struct backref_cache *cache) | ||
217 | { | ||
218 | struct backref_node *node; | ||
219 | int i; | ||
220 | |||
221 | while (!list_empty(&cache->detached)) { | ||
222 | node = list_entry(cache->detached.next, | ||
223 | struct backref_node, list); | ||
224 | remove_backref_node(cache, node); | ||
225 | } | ||
226 | |||
227 | while (!list_empty(&cache->leaves)) { | ||
228 | node = list_entry(cache->leaves.next, | ||
229 | struct backref_node, lower); | ||
230 | remove_backref_node(cache, node); | ||
231 | } | ||
232 | |||
233 | cache->last_trans = 0; | ||
234 | |||
235 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) | ||
236 | BUG_ON(!list_empty(&cache->pending[i])); | ||
237 | BUG_ON(!list_empty(&cache->changed)); | ||
238 | BUG_ON(!list_empty(&cache->detached)); | ||
239 | BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); | ||
240 | BUG_ON(cache->nr_nodes); | ||
241 | BUG_ON(cache->nr_edges); | ||
242 | } | ||
243 | |||
244 | static struct backref_node *alloc_backref_node(struct backref_cache *cache) | ||
245 | { | ||
246 | struct backref_node *node; | ||
247 | |||
248 | node = kzalloc(sizeof(*node), GFP_NOFS); | ||
249 | if (node) { | ||
250 | INIT_LIST_HEAD(&node->list); | ||
251 | INIT_LIST_HEAD(&node->upper); | ||
252 | INIT_LIST_HEAD(&node->lower); | ||
253 | RB_CLEAR_NODE(&node->rb_node); | ||
254 | cache->nr_nodes++; | ||
255 | } | ||
256 | return node; | ||
257 | } | ||
258 | |||
259 | static void free_backref_node(struct backref_cache *cache, | ||
260 | struct backref_node *node) | ||
261 | { | ||
262 | if (node) { | ||
263 | cache->nr_nodes--; | ||
264 | kfree(node); | ||
265 | } | ||
266 | } | ||
267 | |||
268 | static struct backref_edge *alloc_backref_edge(struct backref_cache *cache) | ||
269 | { | ||
270 | struct backref_edge *edge; | ||
271 | |||
272 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | ||
273 | if (edge) | ||
274 | cache->nr_edges++; | ||
275 | return edge; | ||
185 | } | 276 | } |
186 | 277 | ||
187 | static void backref_node_init(struct backref_node *node) | 278 | static void free_backref_edge(struct backref_cache *cache, |
279 | struct backref_edge *edge) | ||
188 | { | 280 | { |
189 | memset(node, 0, sizeof(*node)); | 281 | if (edge) { |
190 | INIT_LIST_HEAD(&node->upper); | 282 | cache->nr_edges--; |
191 | INIT_LIST_HEAD(&node->lower); | 283 | kfree(edge); |
192 | RB_CLEAR_NODE(&node->rb_node); | 284 | } |
193 | } | 285 | } |
194 | 286 | ||
195 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, | 287 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, |
@@ -250,6 +342,7 @@ static struct backref_node *walk_up_backref(struct backref_node *node, | |||
250 | edges[idx++] = edge; | 342 | edges[idx++] = edge; |
251 | node = edge->node[UPPER]; | 343 | node = edge->node[UPPER]; |
252 | } | 344 | } |
345 | BUG_ON(node->detached); | ||
253 | *index = idx; | 346 | *index = idx; |
254 | return node; | 347 | return node; |
255 | } | 348 | } |
@@ -281,13 +374,18 @@ static struct backref_node *walk_down_backref(struct backref_edge *edges[], | |||
281 | return NULL; | 374 | return NULL; |
282 | } | 375 | } |
283 | 376 | ||
377 | static void unlock_node_buffer(struct backref_node *node) | ||
378 | { | ||
379 | if (node->locked) { | ||
380 | btrfs_tree_unlock(node->eb); | ||
381 | node->locked = 0; | ||
382 | } | ||
383 | } | ||
384 | |||
284 | static void drop_node_buffer(struct backref_node *node) | 385 | static void drop_node_buffer(struct backref_node *node) |
285 | { | 386 | { |
286 | if (node->eb) { | 387 | if (node->eb) { |
287 | if (node->locked) { | 388 | unlock_node_buffer(node); |
288 | btrfs_tree_unlock(node->eb); | ||
289 | node->locked = 0; | ||
290 | } | ||
291 | free_extent_buffer(node->eb); | 389 | free_extent_buffer(node->eb); |
292 | node->eb = NULL; | 390 | node->eb = NULL; |
293 | } | 391 | } |
@@ -296,14 +394,14 @@ static void drop_node_buffer(struct backref_node *node) | |||
296 | static void drop_backref_node(struct backref_cache *tree, | 394 | static void drop_backref_node(struct backref_cache *tree, |
297 | struct backref_node *node) | 395 | struct backref_node *node) |
298 | { | 396 | { |
299 | BUG_ON(!node->lowest); | ||
300 | BUG_ON(!list_empty(&node->upper)); | 397 | BUG_ON(!list_empty(&node->upper)); |
301 | 398 | ||
302 | drop_node_buffer(node); | 399 | drop_node_buffer(node); |
400 | list_del(&node->list); | ||
303 | list_del(&node->lower); | 401 | list_del(&node->lower); |
304 | 402 | if (!RB_EMPTY_NODE(&node->rb_node)) | |
305 | rb_erase(&node->rb_node, &tree->rb_root); | 403 | rb_erase(&node->rb_node, &tree->rb_root); |
306 | kfree(node); | 404 | free_backref_node(tree, node); |
307 | } | 405 | } |
308 | 406 | ||
309 | /* | 407 | /* |
@@ -318,27 +416,121 @@ static void remove_backref_node(struct backref_cache *cache, | |||
318 | if (!node) | 416 | if (!node) |
319 | return; | 417 | return; |
320 | 418 | ||
321 | BUG_ON(!node->lowest); | 419 | BUG_ON(!node->lowest && !node->detached); |
322 | while (!list_empty(&node->upper)) { | 420 | while (!list_empty(&node->upper)) { |
323 | edge = list_entry(node->upper.next, struct backref_edge, | 421 | edge = list_entry(node->upper.next, struct backref_edge, |
324 | list[LOWER]); | 422 | list[LOWER]); |
325 | upper = edge->node[UPPER]; | 423 | upper = edge->node[UPPER]; |
326 | list_del(&edge->list[LOWER]); | 424 | list_del(&edge->list[LOWER]); |
327 | list_del(&edge->list[UPPER]); | 425 | list_del(&edge->list[UPPER]); |
328 | kfree(edge); | 426 | free_backref_edge(cache, edge); |
427 | |||
428 | if (RB_EMPTY_NODE(&upper->rb_node)) { | ||
429 | BUG_ON(!list_empty(&node->upper)); | ||
430 | drop_backref_node(cache, node); | ||
431 | node = upper; | ||
432 | node->lowest = 1; | ||
433 | continue; | ||
434 | } | ||
329 | /* | 435 | /* |
330 | * add the node to pending list if no other | 436 | * add the node to leaf node list if no other |
331 | * child block cached. | 437 | * child block cached. |
332 | */ | 438 | */ |
333 | if (list_empty(&upper->lower)) { | 439 | if (list_empty(&upper->lower)) { |
334 | list_add_tail(&upper->lower, | 440 | list_add_tail(&upper->lower, &cache->leaves); |
335 | &cache->pending[upper->level]); | ||
336 | upper->lowest = 1; | 441 | upper->lowest = 1; |
337 | } | 442 | } |
338 | } | 443 | } |
444 | |||
339 | drop_backref_node(cache, node); | 445 | drop_backref_node(cache, node); |
340 | } | 446 | } |
341 | 447 | ||
448 | static void update_backref_node(struct backref_cache *cache, | ||
449 | struct backref_node *node, u64 bytenr) | ||
450 | { | ||
451 | struct rb_node *rb_node; | ||
452 | rb_erase(&node->rb_node, &cache->rb_root); | ||
453 | node->bytenr = bytenr; | ||
454 | rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); | ||
455 | BUG_ON(rb_node); | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * update backref cache after a transaction commit | ||
460 | */ | ||
461 | static int update_backref_cache(struct btrfs_trans_handle *trans, | ||
462 | struct backref_cache *cache) | ||
463 | { | ||
464 | struct backref_node *node; | ||
465 | int level = 0; | ||
466 | |||
467 | if (cache->last_trans == 0) { | ||
468 | cache->last_trans = trans->transid; | ||
469 | return 0; | ||
470 | } | ||
471 | |||
472 | if (cache->last_trans == trans->transid) | ||
473 | return 0; | ||
474 | |||
475 | /* | ||
476 | * detached nodes are used to avoid unnecessary backref | ||
477 | * lookup. transaction commit changes the extent tree. | ||
478 | * so the detached nodes are no longer useful. | ||
479 | */ | ||
480 | while (!list_empty(&cache->detached)) { | ||
481 | node = list_entry(cache->detached.next, | ||
482 | struct backref_node, list); | ||
483 | remove_backref_node(cache, node); | ||
484 | } | ||
485 | |||
486 | while (!list_empty(&cache->changed)) { | ||
487 | node = list_entry(cache->changed.next, | ||
488 | struct backref_node, list); | ||
489 | list_del_init(&node->list); | ||
490 | BUG_ON(node->pending); | ||
491 | update_backref_node(cache, node, node->new_bytenr); | ||
492 | } | ||
493 | |||
494 | /* | ||
495 | * some nodes can be left in the pending list if there were | ||
496 | * errors during processing the pending nodes. | ||
497 | */ | ||
498 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
499 | list_for_each_entry(node, &cache->pending[level], list) { | ||
500 | BUG_ON(!node->pending); | ||
501 | if (node->bytenr == node->new_bytenr) | ||
502 | continue; | ||
503 | update_backref_node(cache, node, node->new_bytenr); | ||
504 | } | ||
505 | } | ||
506 | |||
507 | cache->last_trans = 0; | ||
508 | return 1; | ||
509 | } | ||
510 | |||
511 | static int should_ignore_root(struct btrfs_root *root) | ||
512 | { | ||
513 | struct btrfs_root *reloc_root; | ||
514 | |||
515 | if (!root->ref_cows) | ||
516 | return 0; | ||
517 | |||
518 | reloc_root = root->reloc_root; | ||
519 | if (!reloc_root) | ||
520 | return 0; | ||
521 | |||
522 | if (btrfs_root_last_snapshot(&reloc_root->root_item) == | ||
523 | root->fs_info->running_transaction->transid - 1) | ||
524 | return 0; | ||
525 | /* | ||
526 | * if there is reloc tree and it was created in previous | ||
527 | * transaction backref lookup can find the reloc tree, | ||
528 | * so backref node for the fs tree root is useless for | ||
529 | * relocation. | ||
530 | */ | ||
531 | return 1; | ||
532 | } | ||
533 | |||
342 | /* | 534 | /* |
343 | * find reloc tree by address of tree root | 535 | * find reloc tree by address of tree root |
344 | */ | 536 | */ |
@@ -453,11 +645,12 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, | |||
453 | * for all upper level blocks that directly/indirectly reference the | 645 | * for all upper level blocks that directly/indirectly reference the |
454 | * block are also cached. | 646 | * block are also cached. |
455 | */ | 647 | */ |
456 | static struct backref_node *build_backref_tree(struct reloc_control *rc, | 648 | static noinline_for_stack |
457 | struct backref_cache *cache, | 649 | struct backref_node *build_backref_tree(struct reloc_control *rc, |
458 | struct btrfs_key *node_key, | 650 | struct btrfs_key *node_key, |
459 | int level, u64 bytenr) | 651 | int level, u64 bytenr) |
460 | { | 652 | { |
653 | struct backref_cache *cache = &rc->backref_cache; | ||
461 | struct btrfs_path *path1; | 654 | struct btrfs_path *path1; |
462 | struct btrfs_path *path2; | 655 | struct btrfs_path *path2; |
463 | struct extent_buffer *eb; | 656 | struct extent_buffer *eb; |
@@ -473,6 +666,8 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
473 | unsigned long end; | 666 | unsigned long end; |
474 | unsigned long ptr; | 667 | unsigned long ptr; |
475 | LIST_HEAD(list); | 668 | LIST_HEAD(list); |
669 | LIST_HEAD(useless); | ||
670 | int cowonly; | ||
476 | int ret; | 671 | int ret; |
477 | int err = 0; | 672 | int err = 0; |
478 | 673 | ||
@@ -483,15 +678,13 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
483 | goto out; | 678 | goto out; |
484 | } | 679 | } |
485 | 680 | ||
486 | node = kmalloc(sizeof(*node), GFP_NOFS); | 681 | node = alloc_backref_node(cache); |
487 | if (!node) { | 682 | if (!node) { |
488 | err = -ENOMEM; | 683 | err = -ENOMEM; |
489 | goto out; | 684 | goto out; |
490 | } | 685 | } |
491 | 686 | ||
492 | backref_node_init(node); | ||
493 | node->bytenr = bytenr; | 687 | node->bytenr = bytenr; |
494 | node->owner = 0; | ||
495 | node->level = level; | 688 | node->level = level; |
496 | node->lowest = 1; | 689 | node->lowest = 1; |
497 | cur = node; | 690 | cur = node; |
@@ -587,17 +780,20 @@ again: | |||
587 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 780 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
588 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || | 781 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || |
589 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | 782 | key.type == BTRFS_EXTENT_REF_V0_KEY) { |
590 | if (key.objectid == key.offset && | 783 | if (key.type == BTRFS_EXTENT_REF_V0_KEY) { |
591 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | ||
592 | struct btrfs_extent_ref_v0 *ref0; | 784 | struct btrfs_extent_ref_v0 *ref0; |
593 | ref0 = btrfs_item_ptr(eb, path1->slots[0], | 785 | ref0 = btrfs_item_ptr(eb, path1->slots[0], |
594 | struct btrfs_extent_ref_v0); | 786 | struct btrfs_extent_ref_v0); |
595 | root = find_tree_root(rc, eb, ref0); | 787 | root = find_tree_root(rc, eb, ref0); |
596 | if (root) | 788 | if (!root->ref_cows) |
597 | cur->root = root; | 789 | cur->cowonly = 1; |
598 | else | 790 | if (key.objectid == key.offset) { |
599 | cur->old_root = 1; | 791 | if (root && !should_ignore_root(root)) |
600 | break; | 792 | cur->root = root; |
793 | else | ||
794 | list_add(&cur->list, &useless); | ||
795 | break; | ||
796 | } | ||
601 | } | 797 | } |
602 | #else | 798 | #else |
603 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); | 799 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); |
@@ -614,22 +810,20 @@ again: | |||
614 | break; | 810 | break; |
615 | } | 811 | } |
616 | 812 | ||
617 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | 813 | edge = alloc_backref_edge(cache); |
618 | if (!edge) { | 814 | if (!edge) { |
619 | err = -ENOMEM; | 815 | err = -ENOMEM; |
620 | goto out; | 816 | goto out; |
621 | } | 817 | } |
622 | rb_node = tree_search(&cache->rb_root, key.offset); | 818 | rb_node = tree_search(&cache->rb_root, key.offset); |
623 | if (!rb_node) { | 819 | if (!rb_node) { |
624 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | 820 | upper = alloc_backref_node(cache); |
625 | if (!upper) { | 821 | if (!upper) { |
626 | kfree(edge); | 822 | free_backref_edge(cache, edge); |
627 | err = -ENOMEM; | 823 | err = -ENOMEM; |
628 | goto out; | 824 | goto out; |
629 | } | 825 | } |
630 | backref_node_init(upper); | ||
631 | upper->bytenr = key.offset; | 826 | upper->bytenr = key.offset; |
632 | upper->owner = 0; | ||
633 | upper->level = cur->level + 1; | 827 | upper->level = cur->level + 1; |
634 | /* | 828 | /* |
635 | * backrefs for the upper level block isn't | 829 | * backrefs for the upper level block isn't |
@@ -639,11 +833,12 @@ again: | |||
639 | } else { | 833 | } else { |
640 | upper = rb_entry(rb_node, struct backref_node, | 834 | upper = rb_entry(rb_node, struct backref_node, |
641 | rb_node); | 835 | rb_node); |
836 | BUG_ON(!upper->checked); | ||
642 | INIT_LIST_HEAD(&edge->list[UPPER]); | 837 | INIT_LIST_HEAD(&edge->list[UPPER]); |
643 | } | 838 | } |
644 | list_add(&edge->list[LOWER], &cur->upper); | 839 | list_add_tail(&edge->list[LOWER], &cur->upper); |
645 | edge->node[UPPER] = upper; | ||
646 | edge->node[LOWER] = cur; | 840 | edge->node[LOWER] = cur; |
841 | edge->node[UPPER] = upper; | ||
647 | 842 | ||
648 | goto next; | 843 | goto next; |
649 | } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { | 844 | } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { |
@@ -657,11 +852,17 @@ again: | |||
657 | goto out; | 852 | goto out; |
658 | } | 853 | } |
659 | 854 | ||
855 | if (!root->ref_cows) | ||
856 | cur->cowonly = 1; | ||
857 | |||
660 | if (btrfs_root_level(&root->root_item) == cur->level) { | 858 | if (btrfs_root_level(&root->root_item) == cur->level) { |
661 | /* tree root */ | 859 | /* tree root */ |
662 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 860 | BUG_ON(btrfs_root_bytenr(&root->root_item) != |
663 | cur->bytenr); | 861 | cur->bytenr); |
664 | cur->root = root; | 862 | if (should_ignore_root(root)) |
863 | list_add(&cur->list, &useless); | ||
864 | else | ||
865 | cur->root = root; | ||
665 | break; | 866 | break; |
666 | } | 867 | } |
667 | 868 | ||
@@ -692,11 +893,14 @@ again: | |||
692 | if (!path2->nodes[level]) { | 893 | if (!path2->nodes[level]) { |
693 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 894 | BUG_ON(btrfs_root_bytenr(&root->root_item) != |
694 | lower->bytenr); | 895 | lower->bytenr); |
695 | lower->root = root; | 896 | if (should_ignore_root(root)) |
897 | list_add(&lower->list, &useless); | ||
898 | else | ||
899 | lower->root = root; | ||
696 | break; | 900 | break; |
697 | } | 901 | } |
698 | 902 | ||
699 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | 903 | edge = alloc_backref_edge(cache); |
700 | if (!edge) { | 904 | if (!edge) { |
701 | err = -ENOMEM; | 905 | err = -ENOMEM; |
702 | goto out; | 906 | goto out; |
@@ -705,16 +909,17 @@ again: | |||
705 | eb = path2->nodes[level]; | 909 | eb = path2->nodes[level]; |
706 | rb_node = tree_search(&cache->rb_root, eb->start); | 910 | rb_node = tree_search(&cache->rb_root, eb->start); |
707 | if (!rb_node) { | 911 | if (!rb_node) { |
708 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | 912 | upper = alloc_backref_node(cache); |
709 | if (!upper) { | 913 | if (!upper) { |
710 | kfree(edge); | 914 | free_backref_edge(cache, edge); |
711 | err = -ENOMEM; | 915 | err = -ENOMEM; |
712 | goto out; | 916 | goto out; |
713 | } | 917 | } |
714 | backref_node_init(upper); | ||
715 | upper->bytenr = eb->start; | 918 | upper->bytenr = eb->start; |
716 | upper->owner = btrfs_header_owner(eb); | 919 | upper->owner = btrfs_header_owner(eb); |
717 | upper->level = lower->level + 1; | 920 | upper->level = lower->level + 1; |
921 | if (!root->ref_cows) | ||
922 | upper->cowonly = 1; | ||
718 | 923 | ||
719 | /* | 924 | /* |
720 | * if we know the block isn't shared | 925 | * if we know the block isn't shared |
@@ -744,10 +949,12 @@ again: | |||
744 | rb_node); | 949 | rb_node); |
745 | BUG_ON(!upper->checked); | 950 | BUG_ON(!upper->checked); |
746 | INIT_LIST_HEAD(&edge->list[UPPER]); | 951 | INIT_LIST_HEAD(&edge->list[UPPER]); |
952 | if (!upper->owner) | ||
953 | upper->owner = btrfs_header_owner(eb); | ||
747 | } | 954 | } |
748 | list_add_tail(&edge->list[LOWER], &lower->upper); | 955 | list_add_tail(&edge->list[LOWER], &lower->upper); |
749 | edge->node[UPPER] = upper; | ||
750 | edge->node[LOWER] = lower; | 956 | edge->node[LOWER] = lower; |
957 | edge->node[UPPER] = upper; | ||
751 | 958 | ||
752 | if (rb_node) | 959 | if (rb_node) |
753 | break; | 960 | break; |
@@ -785,8 +992,13 @@ next: | |||
785 | * into the cache. | 992 | * into the cache. |
786 | */ | 993 | */ |
787 | BUG_ON(!node->checked); | 994 | BUG_ON(!node->checked); |
788 | rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); | 995 | cowonly = node->cowonly; |
789 | BUG_ON(rb_node); | 996 | if (!cowonly) { |
997 | rb_node = tree_insert(&cache->rb_root, node->bytenr, | ||
998 | &node->rb_node); | ||
999 | BUG_ON(rb_node); | ||
1000 | list_add_tail(&node->lower, &cache->leaves); | ||
1001 | } | ||
790 | 1002 | ||
791 | list_for_each_entry(edge, &node->upper, list[LOWER]) | 1003 | list_for_each_entry(edge, &node->upper, list[LOWER]) |
792 | list_add_tail(&edge->list[UPPER], &list); | 1004 | list_add_tail(&edge->list[UPPER], &list); |
@@ -795,6 +1007,14 @@ next: | |||
795 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); | 1007 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); |
796 | list_del_init(&edge->list[UPPER]); | 1008 | list_del_init(&edge->list[UPPER]); |
797 | upper = edge->node[UPPER]; | 1009 | upper = edge->node[UPPER]; |
1010 | if (upper->detached) { | ||
1011 | list_del(&edge->list[LOWER]); | ||
1012 | lower = edge->node[LOWER]; | ||
1013 | free_backref_edge(cache, edge); | ||
1014 | if (list_empty(&lower->upper)) | ||
1015 | list_add(&lower->list, &useless); | ||
1016 | continue; | ||
1017 | } | ||
798 | 1018 | ||
799 | if (!RB_EMPTY_NODE(&upper->rb_node)) { | 1019 | if (!RB_EMPTY_NODE(&upper->rb_node)) { |
800 | if (upper->lowest) { | 1020 | if (upper->lowest) { |
@@ -807,25 +1027,69 @@ next: | |||
807 | } | 1027 | } |
808 | 1028 | ||
809 | BUG_ON(!upper->checked); | 1029 | BUG_ON(!upper->checked); |
810 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, | 1030 | BUG_ON(cowonly != upper->cowonly); |
811 | &upper->rb_node); | 1031 | if (!cowonly) { |
812 | BUG_ON(rb_node); | 1032 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, |
1033 | &upper->rb_node); | ||
1034 | BUG_ON(rb_node); | ||
1035 | } | ||
813 | 1036 | ||
814 | list_add_tail(&edge->list[UPPER], &upper->lower); | 1037 | list_add_tail(&edge->list[UPPER], &upper->lower); |
815 | 1038 | ||
816 | list_for_each_entry(edge, &upper->upper, list[LOWER]) | 1039 | list_for_each_entry(edge, &upper->upper, list[LOWER]) |
817 | list_add_tail(&edge->list[UPPER], &list); | 1040 | list_add_tail(&edge->list[UPPER], &list); |
818 | } | 1041 | } |
1042 | /* | ||
1043 | * process useless backref nodes. backref nodes for tree leaves | ||
1044 | * are deleted from the cache. backref nodes for upper level | ||
1045 | * tree blocks are left in the cache to avoid unnecessary backref | ||
1046 | * lookup. | ||
1047 | */ | ||
1048 | while (!list_empty(&useless)) { | ||
1049 | upper = list_entry(useless.next, struct backref_node, list); | ||
1050 | list_del_init(&upper->list); | ||
1051 | BUG_ON(!list_empty(&upper->upper)); | ||
1052 | if (upper == node) | ||
1053 | node = NULL; | ||
1054 | if (upper->lowest) { | ||
1055 | list_del_init(&upper->lower); | ||
1056 | upper->lowest = 0; | ||
1057 | } | ||
1058 | while (!list_empty(&upper->lower)) { | ||
1059 | edge = list_entry(upper->lower.next, | ||
1060 | struct backref_edge, list[UPPER]); | ||
1061 | list_del(&edge->list[UPPER]); | ||
1062 | list_del(&edge->list[LOWER]); | ||
1063 | lower = edge->node[LOWER]; | ||
1064 | free_backref_edge(cache, edge); | ||
1065 | |||
1066 | if (list_empty(&lower->upper)) | ||
1067 | list_add(&lower->list, &useless); | ||
1068 | } | ||
1069 | __mark_block_processed(rc, upper); | ||
1070 | if (upper->level > 0) { | ||
1071 | list_add(&upper->list, &cache->detached); | ||
1072 | upper->detached = 1; | ||
1073 | } else { | ||
1074 | rb_erase(&upper->rb_node, &cache->rb_root); | ||
1075 | free_backref_node(cache, upper); | ||
1076 | } | ||
1077 | } | ||
819 | out: | 1078 | out: |
820 | btrfs_free_path(path1); | 1079 | btrfs_free_path(path1); |
821 | btrfs_free_path(path2); | 1080 | btrfs_free_path(path2); |
822 | if (err) { | 1081 | if (err) { |
823 | INIT_LIST_HEAD(&list); | 1082 | while (!list_empty(&useless)) { |
1083 | lower = list_entry(useless.next, | ||
1084 | struct backref_node, upper); | ||
1085 | list_del_init(&lower->upper); | ||
1086 | } | ||
824 | upper = node; | 1087 | upper = node; |
1088 | INIT_LIST_HEAD(&list); | ||
825 | while (upper) { | 1089 | while (upper) { |
826 | if (RB_EMPTY_NODE(&upper->rb_node)) { | 1090 | if (RB_EMPTY_NODE(&upper->rb_node)) { |
827 | list_splice_tail(&upper->upper, &list); | 1091 | list_splice_tail(&upper->upper, &list); |
828 | kfree(upper); | 1092 | free_backref_node(cache, upper); |
829 | } | 1093 | } |
830 | 1094 | ||
831 | if (list_empty(&list)) | 1095 | if (list_empty(&list)) |
@@ -833,15 +1097,104 @@ out: | |||
833 | 1097 | ||
834 | edge = list_entry(list.next, struct backref_edge, | 1098 | edge = list_entry(list.next, struct backref_edge, |
835 | list[LOWER]); | 1099 | list[LOWER]); |
1100 | list_del(&edge->list[LOWER]); | ||
836 | upper = edge->node[UPPER]; | 1101 | upper = edge->node[UPPER]; |
837 | kfree(edge); | 1102 | free_backref_edge(cache, edge); |
838 | } | 1103 | } |
839 | return ERR_PTR(err); | 1104 | return ERR_PTR(err); |
840 | } | 1105 | } |
1106 | BUG_ON(node && node->detached); | ||
841 | return node; | 1107 | return node; |
842 | } | 1108 | } |
843 | 1109 | ||
844 | /* | 1110 | /* |
1111 | * helper to add backref node for the newly created snapshot. | ||
1112 | * the backref node is created by cloning backref node that | ||
1113 | * corresponds to root of source tree | ||
1114 | */ | ||
1115 | static int clone_backref_node(struct btrfs_trans_handle *trans, | ||
1116 | struct reloc_control *rc, | ||
1117 | struct btrfs_root *src, | ||
1118 | struct btrfs_root *dest) | ||
1119 | { | ||
1120 | struct btrfs_root *reloc_root = src->reloc_root; | ||
1121 | struct backref_cache *cache = &rc->backref_cache; | ||
1122 | struct backref_node *node = NULL; | ||
1123 | struct backref_node *new_node; | ||
1124 | struct backref_edge *edge; | ||
1125 | struct backref_edge *new_edge; | ||
1126 | struct rb_node *rb_node; | ||
1127 | |||
1128 | if (cache->last_trans > 0) | ||
1129 | update_backref_cache(trans, cache); | ||
1130 | |||
1131 | rb_node = tree_search(&cache->rb_root, src->commit_root->start); | ||
1132 | if (rb_node) { | ||
1133 | node = rb_entry(rb_node, struct backref_node, rb_node); | ||
1134 | if (node->detached) | ||
1135 | node = NULL; | ||
1136 | else | ||
1137 | BUG_ON(node->new_bytenr != reloc_root->node->start); | ||
1138 | } | ||
1139 | |||
1140 | if (!node) { | ||
1141 | rb_node = tree_search(&cache->rb_root, | ||
1142 | reloc_root->commit_root->start); | ||
1143 | if (rb_node) { | ||
1144 | node = rb_entry(rb_node, struct backref_node, | ||
1145 | rb_node); | ||
1146 | BUG_ON(node->detached); | ||
1147 | } | ||
1148 | } | ||
1149 | |||
1150 | if (!node) | ||
1151 | return 0; | ||
1152 | |||
1153 | new_node = alloc_backref_node(cache); | ||
1154 | if (!new_node) | ||
1155 | return -ENOMEM; | ||
1156 | |||
1157 | new_node->bytenr = dest->node->start; | ||
1158 | new_node->level = node->level; | ||
1159 | new_node->lowest = node->lowest; | ||
1160 | new_node->root = dest; | ||
1161 | |||
1162 | if (!node->lowest) { | ||
1163 | list_for_each_entry(edge, &node->lower, list[UPPER]) { | ||
1164 | new_edge = alloc_backref_edge(cache); | ||
1165 | if (!new_edge) | ||
1166 | goto fail; | ||
1167 | |||
1168 | new_edge->node[UPPER] = new_node; | ||
1169 | new_edge->node[LOWER] = edge->node[LOWER]; | ||
1170 | list_add_tail(&new_edge->list[UPPER], | ||
1171 | &new_node->lower); | ||
1172 | } | ||
1173 | } | ||
1174 | |||
1175 | rb_node = tree_insert(&cache->rb_root, new_node->bytenr, | ||
1176 | &new_node->rb_node); | ||
1177 | BUG_ON(rb_node); | ||
1178 | |||
1179 | if (!new_node->lowest) { | ||
1180 | list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { | ||
1181 | list_add_tail(&new_edge->list[LOWER], | ||
1182 | &new_edge->node[LOWER]->upper); | ||
1183 | } | ||
1184 | } | ||
1185 | return 0; | ||
1186 | fail: | ||
1187 | while (!list_empty(&new_node->lower)) { | ||
1188 | new_edge = list_entry(new_node->lower.next, | ||
1189 | struct backref_edge, list[UPPER]); | ||
1190 | list_del(&new_edge->list[UPPER]); | ||
1191 | free_backref_edge(cache, new_edge); | ||
1192 | } | ||
1193 | free_backref_node(cache, new_node); | ||
1194 | return -ENOMEM; | ||
1195 | } | ||
1196 | |||
1197 | /* | ||
845 | * helper to add 'address of tree root -> reloc tree' mapping | 1198 | * helper to add 'address of tree root -> reloc tree' mapping |
846 | */ | 1199 | */ |
847 | static int __add_reloc_root(struct btrfs_root *root) | 1200 | static int __add_reloc_root(struct btrfs_root *root) |
@@ -901,12 +1254,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del) | |||
901 | return 0; | 1254 | return 0; |
902 | } | 1255 | } |
903 | 1256 | ||
904 | /* | 1257 | static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, |
905 | * create reloc tree for a given fs tree. reloc tree is just a | 1258 | struct btrfs_root *root, u64 objectid) |
906 | * snapshot of the fs tree with special root objectid. | ||
907 | */ | ||
908 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
909 | struct btrfs_root *root) | ||
910 | { | 1259 | { |
911 | struct btrfs_root *reloc_root; | 1260 | struct btrfs_root *reloc_root; |
912 | struct extent_buffer *eb; | 1261 | struct extent_buffer *eb; |
@@ -914,36 +1263,45 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | |||
914 | struct btrfs_key root_key; | 1263 | struct btrfs_key root_key; |
915 | int ret; | 1264 | int ret; |
916 | 1265 | ||
917 | if (root->reloc_root) { | ||
918 | reloc_root = root->reloc_root; | ||
919 | reloc_root->last_trans = trans->transid; | ||
920 | return 0; | ||
921 | } | ||
922 | |||
923 | if (!root->fs_info->reloc_ctl || | ||
924 | !root->fs_info->reloc_ctl->create_reloc_root || | ||
925 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
926 | return 0; | ||
927 | |||
928 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | 1266 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); |
929 | BUG_ON(!root_item); | 1267 | BUG_ON(!root_item); |
930 | 1268 | ||
931 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; | 1269 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; |
932 | root_key.type = BTRFS_ROOT_ITEM_KEY; | 1270 | root_key.type = BTRFS_ROOT_ITEM_KEY; |
933 | root_key.offset = root->root_key.objectid; | 1271 | root_key.offset = objectid; |
934 | 1272 | ||
935 | ret = btrfs_copy_root(trans, root, root->commit_root, &eb, | 1273 | if (root->root_key.objectid == objectid) { |
936 | BTRFS_TREE_RELOC_OBJECTID); | 1274 | /* called by btrfs_init_reloc_root */ |
937 | BUG_ON(ret); | 1275 | ret = btrfs_copy_root(trans, root, root->commit_root, &eb, |
1276 | BTRFS_TREE_RELOC_OBJECTID); | ||
1277 | BUG_ON(ret); | ||
1278 | |||
1279 | btrfs_set_root_last_snapshot(&root->root_item, | ||
1280 | trans->transid - 1); | ||
1281 | } else { | ||
1282 | /* | ||
1283 | * called by btrfs_reloc_post_snapshot_hook. | ||
1284 | * the source tree is a reloc tree, all tree blocks | ||
1285 | * modified after it was created have RELOC flag | ||
1286 | * set in their headers. so it's OK to not update | ||
1287 | * the 'last_snapshot'. | ||
1288 | */ | ||
1289 | ret = btrfs_copy_root(trans, root, root->node, &eb, | ||
1290 | BTRFS_TREE_RELOC_OBJECTID); | ||
1291 | BUG_ON(ret); | ||
1292 | } | ||
938 | 1293 | ||
939 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); | ||
940 | memcpy(root_item, &root->root_item, sizeof(*root_item)); | 1294 | memcpy(root_item, &root->root_item, sizeof(*root_item)); |
941 | btrfs_set_root_refs(root_item, 1); | ||
942 | btrfs_set_root_bytenr(root_item, eb->start); | 1295 | btrfs_set_root_bytenr(root_item, eb->start); |
943 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); | 1296 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); |
944 | btrfs_set_root_generation(root_item, trans->transid); | 1297 | btrfs_set_root_generation(root_item, trans->transid); |
945 | memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); | 1298 | |
946 | root_item->drop_level = 0; | 1299 | if (root->root_key.objectid == objectid) { |
1300 | btrfs_set_root_refs(root_item, 0); | ||
1301 | memset(&root_item->drop_progress, 0, | ||
1302 | sizeof(struct btrfs_disk_key)); | ||
1303 | root_item->drop_level = 0; | ||
1304 | } | ||
947 | 1305 | ||
948 | btrfs_tree_unlock(eb); | 1306 | btrfs_tree_unlock(eb); |
949 | free_extent_buffer(eb); | 1307 | free_extent_buffer(eb); |
@@ -957,6 +1315,37 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | |||
957 | &root_key); | 1315 | &root_key); |
958 | BUG_ON(IS_ERR(reloc_root)); | 1316 | BUG_ON(IS_ERR(reloc_root)); |
959 | reloc_root->last_trans = trans->transid; | 1317 | reloc_root->last_trans = trans->transid; |
1318 | return reloc_root; | ||
1319 | } | ||
1320 | |||
1321 | /* | ||
1322 | * create reloc tree for a given fs tree. reloc tree is just a | ||
1323 | * snapshot of the fs tree with special root objectid. | ||
1324 | */ | ||
1325 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
1326 | struct btrfs_root *root) | ||
1327 | { | ||
1328 | struct btrfs_root *reloc_root; | ||
1329 | struct reloc_control *rc = root->fs_info->reloc_ctl; | ||
1330 | int clear_rsv = 0; | ||
1331 | |||
1332 | if (root->reloc_root) { | ||
1333 | reloc_root = root->reloc_root; | ||
1334 | reloc_root->last_trans = trans->transid; | ||
1335 | return 0; | ||
1336 | } | ||
1337 | |||
1338 | if (!rc || !rc->create_reloc_tree || | ||
1339 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
1340 | return 0; | ||
1341 | |||
1342 | if (!trans->block_rsv) { | ||
1343 | trans->block_rsv = rc->block_rsv; | ||
1344 | clear_rsv = 1; | ||
1345 | } | ||
1346 | reloc_root = create_reloc_root(trans, root, root->root_key.objectid); | ||
1347 | if (clear_rsv) | ||
1348 | trans->block_rsv = NULL; | ||
960 | 1349 | ||
961 | __add_reloc_root(reloc_root); | 1350 | __add_reloc_root(reloc_root); |
962 | root->reloc_root = reloc_root; | 1351 | root->reloc_root = reloc_root; |
@@ -980,7 +1369,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
980 | reloc_root = root->reloc_root; | 1369 | reloc_root = root->reloc_root; |
981 | root_item = &reloc_root->root_item; | 1370 | root_item = &reloc_root->root_item; |
982 | 1371 | ||
983 | if (btrfs_root_refs(root_item) == 0) { | 1372 | if (root->fs_info->reloc_ctl->merge_reloc_tree && |
1373 | btrfs_root_refs(root_item) == 0) { | ||
984 | root->reloc_root = NULL; | 1374 | root->reloc_root = NULL; |
985 | del = 1; | 1375 | del = 1; |
986 | } | 1376 | } |
@@ -1102,8 +1492,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, | |||
1102 | goto out; | 1492 | goto out; |
1103 | } | 1493 | } |
1104 | 1494 | ||
1105 | if (new_bytenr) | 1495 | *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
1106 | *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
1107 | ret = 0; | 1496 | ret = 0; |
1108 | out: | 1497 | out: |
1109 | btrfs_free_path(path); | 1498 | btrfs_free_path(path); |
@@ -1114,19 +1503,18 @@ out: | |||
1114 | * update file extent items in the tree leaf to point to | 1503 | * update file extent items in the tree leaf to point to |
1115 | * the new locations. | 1504 | * the new locations. |
1116 | */ | 1505 | */ |
1117 | static int replace_file_extents(struct btrfs_trans_handle *trans, | 1506 | static noinline_for_stack |
1118 | struct reloc_control *rc, | 1507 | int replace_file_extents(struct btrfs_trans_handle *trans, |
1119 | struct btrfs_root *root, | 1508 | struct reloc_control *rc, |
1120 | struct extent_buffer *leaf, | 1509 | struct btrfs_root *root, |
1121 | struct list_head *inode_list) | 1510 | struct extent_buffer *leaf) |
1122 | { | 1511 | { |
1123 | struct btrfs_key key; | 1512 | struct btrfs_key key; |
1124 | struct btrfs_file_extent_item *fi; | 1513 | struct btrfs_file_extent_item *fi; |
1125 | struct inode *inode = NULL; | 1514 | struct inode *inode = NULL; |
1126 | struct inodevec *ivec = NULL; | ||
1127 | u64 parent; | 1515 | u64 parent; |
1128 | u64 bytenr; | 1516 | u64 bytenr; |
1129 | u64 new_bytenr; | 1517 | u64 new_bytenr = 0; |
1130 | u64 num_bytes; | 1518 | u64 num_bytes; |
1131 | u64 end; | 1519 | u64 end; |
1132 | u32 nritems; | 1520 | u32 nritems; |
@@ -1166,21 +1554,12 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1166 | * to complete and drop the extent cache | 1554 | * to complete and drop the extent cache |
1167 | */ | 1555 | */ |
1168 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { | 1556 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
1169 | if (!ivec || ivec->nr == INODEVEC_SIZE) { | ||
1170 | ivec = kmalloc(sizeof(*ivec), GFP_NOFS); | ||
1171 | BUG_ON(!ivec); | ||
1172 | ivec->nr = 0; | ||
1173 | list_add_tail(&ivec->list, inode_list); | ||
1174 | } | ||
1175 | if (first) { | 1557 | if (first) { |
1176 | inode = find_next_inode(root, key.objectid); | 1558 | inode = find_next_inode(root, key.objectid); |
1177 | if (inode) | ||
1178 | ivec->inode[ivec->nr++] = inode; | ||
1179 | first = 0; | 1559 | first = 0; |
1180 | } else if (inode && inode->i_ino < key.objectid) { | 1560 | } else if (inode && inode->i_ino < key.objectid) { |
1561 | btrfs_add_delayed_iput(inode); | ||
1181 | inode = find_next_inode(root, key.objectid); | 1562 | inode = find_next_inode(root, key.objectid); |
1182 | if (inode) | ||
1183 | ivec->inode[ivec->nr++] = inode; | ||
1184 | } | 1563 | } |
1185 | if (inode && inode->i_ino == key.objectid) { | 1564 | if (inode && inode->i_ino == key.objectid) { |
1186 | end = key.offset + | 1565 | end = key.offset + |
@@ -1204,8 +1583,10 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1204 | 1583 | ||
1205 | ret = get_new_location(rc->data_inode, &new_bytenr, | 1584 | ret = get_new_location(rc->data_inode, &new_bytenr, |
1206 | bytenr, num_bytes); | 1585 | bytenr, num_bytes); |
1207 | if (ret > 0) | 1586 | if (ret > 0) { |
1587 | WARN_ON(1); | ||
1208 | continue; | 1588 | continue; |
1589 | } | ||
1209 | BUG_ON(ret < 0); | 1590 | BUG_ON(ret < 0); |
1210 | 1591 | ||
1211 | btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); | 1592 | btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); |
@@ -1225,6 +1606,8 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1225 | } | 1606 | } |
1226 | if (dirty) | 1607 | if (dirty) |
1227 | btrfs_mark_buffer_dirty(leaf); | 1608 | btrfs_mark_buffer_dirty(leaf); |
1609 | if (inode) | ||
1610 | btrfs_add_delayed_iput(inode); | ||
1228 | return 0; | 1611 | return 0; |
1229 | } | 1612 | } |
1230 | 1613 | ||
@@ -1248,11 +1631,11 @@ int memcmp_node_keys(struct extent_buffer *eb, int slot, | |||
1248 | * if no block got replaced, 0 is returned. if there are other | 1631 | * if no block got replaced, 0 is returned. if there are other |
1249 | * errors, a negative error number is returned. | 1632 | * errors, a negative error number is returned. |
1250 | */ | 1633 | */ |
1251 | static int replace_path(struct btrfs_trans_handle *trans, | 1634 | static noinline_for_stack |
1252 | struct btrfs_root *dest, struct btrfs_root *src, | 1635 | int replace_path(struct btrfs_trans_handle *trans, |
1253 | struct btrfs_path *path, struct btrfs_key *next_key, | 1636 | struct btrfs_root *dest, struct btrfs_root *src, |
1254 | struct extent_buffer **leaf, | 1637 | struct btrfs_path *path, struct btrfs_key *next_key, |
1255 | int lowest_level, int max_level) | 1638 | int lowest_level, int max_level) |
1256 | { | 1639 | { |
1257 | struct extent_buffer *eb; | 1640 | struct extent_buffer *eb; |
1258 | struct extent_buffer *parent; | 1641 | struct extent_buffer *parent; |
@@ -1263,16 +1646,16 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1263 | u64 new_ptr_gen; | 1646 | u64 new_ptr_gen; |
1264 | u64 last_snapshot; | 1647 | u64 last_snapshot; |
1265 | u32 blocksize; | 1648 | u32 blocksize; |
1649 | int cow = 0; | ||
1266 | int level; | 1650 | int level; |
1267 | int ret; | 1651 | int ret; |
1268 | int slot; | 1652 | int slot; |
1269 | 1653 | ||
1270 | BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | 1654 | BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); |
1271 | BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); | 1655 | BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); |
1272 | BUG_ON(lowest_level > 1 && leaf); | ||
1273 | 1656 | ||
1274 | last_snapshot = btrfs_root_last_snapshot(&src->root_item); | 1657 | last_snapshot = btrfs_root_last_snapshot(&src->root_item); |
1275 | 1658 | again: | |
1276 | slot = path->slots[lowest_level]; | 1659 | slot = path->slots[lowest_level]; |
1277 | btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); | 1660 | btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); |
1278 | 1661 | ||
@@ -1286,8 +1669,10 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1286 | return 0; | 1669 | return 0; |
1287 | } | 1670 | } |
1288 | 1671 | ||
1289 | ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); | 1672 | if (cow) { |
1290 | BUG_ON(ret); | 1673 | ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); |
1674 | BUG_ON(ret); | ||
1675 | } | ||
1291 | btrfs_set_lock_blocking(eb); | 1676 | btrfs_set_lock_blocking(eb); |
1292 | 1677 | ||
1293 | if (next_key) { | 1678 | if (next_key) { |
@@ -1331,7 +1716,7 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1331 | 1716 | ||
1332 | if (new_bytenr == 0 || old_ptr_gen > last_snapshot || | 1717 | if (new_bytenr == 0 || old_ptr_gen > last_snapshot || |
1333 | memcmp_node_keys(parent, slot, path, level)) { | 1718 | memcmp_node_keys(parent, slot, path, level)) { |
1334 | if (level <= lowest_level && !leaf) { | 1719 | if (level <= lowest_level) { |
1335 | ret = 0; | 1720 | ret = 0; |
1336 | break; | 1721 | break; |
1337 | } | 1722 | } |
@@ -1339,16 +1724,12 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1339 | eb = read_tree_block(dest, old_bytenr, blocksize, | 1724 | eb = read_tree_block(dest, old_bytenr, blocksize, |
1340 | old_ptr_gen); | 1725 | old_ptr_gen); |
1341 | btrfs_tree_lock(eb); | 1726 | btrfs_tree_lock(eb); |
1342 | ret = btrfs_cow_block(trans, dest, eb, parent, | 1727 | if (cow) { |
1343 | slot, &eb); | 1728 | ret = btrfs_cow_block(trans, dest, eb, parent, |
1344 | BUG_ON(ret); | 1729 | slot, &eb); |
1345 | btrfs_set_lock_blocking(eb); | 1730 | BUG_ON(ret); |
1346 | |||
1347 | if (level <= lowest_level) { | ||
1348 | *leaf = eb; | ||
1349 | ret = 0; | ||
1350 | break; | ||
1351 | } | 1731 | } |
1732 | btrfs_set_lock_blocking(eb); | ||
1352 | 1733 | ||
1353 | btrfs_tree_unlock(parent); | 1734 | btrfs_tree_unlock(parent); |
1354 | free_extent_buffer(parent); | 1735 | free_extent_buffer(parent); |
@@ -1357,6 +1738,13 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1357 | continue; | 1738 | continue; |
1358 | } | 1739 | } |
1359 | 1740 | ||
1741 | if (!cow) { | ||
1742 | btrfs_tree_unlock(parent); | ||
1743 | free_extent_buffer(parent); | ||
1744 | cow = 1; | ||
1745 | goto again; | ||
1746 | } | ||
1747 | |||
1360 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 1748 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
1361 | path->slots[level]); | 1749 | path->slots[level]); |
1362 | btrfs_release_path(src, path); | 1750 | btrfs_release_path(src, path); |
@@ -1562,20 +1950,6 @@ static int invalidate_extent_cache(struct btrfs_root *root, | |||
1562 | return 0; | 1950 | return 0; |
1563 | } | 1951 | } |
1564 | 1952 | ||
1565 | static void put_inodes(struct list_head *list) | ||
1566 | { | ||
1567 | struct inodevec *ivec; | ||
1568 | while (!list_empty(list)) { | ||
1569 | ivec = list_entry(list->next, struct inodevec, list); | ||
1570 | list_del(&ivec->list); | ||
1571 | while (ivec->nr > 0) { | ||
1572 | ivec->nr--; | ||
1573 | iput(ivec->inode[ivec->nr]); | ||
1574 | } | ||
1575 | kfree(ivec); | ||
1576 | } | ||
1577 | } | ||
1578 | |||
1579 | static int find_next_key(struct btrfs_path *path, int level, | 1953 | static int find_next_key(struct btrfs_path *path, int level, |
1580 | struct btrfs_key *key) | 1954 | struct btrfs_key *key) |
1581 | 1955 | ||
@@ -1608,13 +1982,14 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1608 | struct btrfs_root *reloc_root; | 1982 | struct btrfs_root *reloc_root; |
1609 | struct btrfs_root_item *root_item; | 1983 | struct btrfs_root_item *root_item; |
1610 | struct btrfs_path *path; | 1984 | struct btrfs_path *path; |
1611 | struct extent_buffer *leaf = NULL; | 1985 | struct extent_buffer *leaf; |
1612 | unsigned long nr; | 1986 | unsigned long nr; |
1613 | int level; | 1987 | int level; |
1614 | int max_level; | 1988 | int max_level; |
1615 | int replaced = 0; | 1989 | int replaced = 0; |
1616 | int ret; | 1990 | int ret; |
1617 | int err = 0; | 1991 | int err = 0; |
1992 | u32 min_reserved; | ||
1618 | 1993 | ||
1619 | path = btrfs_alloc_path(); | 1994 | path = btrfs_alloc_path(); |
1620 | if (!path) | 1995 | if (!path) |
@@ -1648,34 +2023,23 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1648 | btrfs_unlock_up_safe(path, 0); | 2023 | btrfs_unlock_up_safe(path, 0); |
1649 | } | 2024 | } |
1650 | 2025 | ||
1651 | if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { | 2026 | min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; |
1652 | trans = btrfs_start_transaction(root, 1); | 2027 | memset(&next_key, 0, sizeof(next_key)); |
1653 | 2028 | ||
1654 | leaf = path->nodes[0]; | 2029 | while (1) { |
1655 | btrfs_item_key_to_cpu(leaf, &key, 0); | 2030 | trans = btrfs_start_transaction(root, 0); |
1656 | btrfs_release_path(reloc_root, path); | 2031 | trans->block_rsv = rc->block_rsv; |
1657 | 2032 | ||
1658 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | 2033 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, |
1659 | if (ret < 0) { | 2034 | min_reserved, 0); |
1660 | err = ret; | 2035 | if (ret) { |
1661 | goto out; | 2036 | BUG_ON(ret != -EAGAIN); |
2037 | ret = btrfs_commit_transaction(trans, root); | ||
2038 | BUG_ON(ret); | ||
2039 | continue; | ||
1662 | } | 2040 | } |
1663 | 2041 | ||
1664 | leaf = path->nodes[0]; | ||
1665 | btrfs_unlock_up_safe(path, 1); | ||
1666 | ret = replace_file_extents(trans, rc, root, leaf, | ||
1667 | &inode_list); | ||
1668 | if (ret < 0) | ||
1669 | err = ret; | ||
1670 | goto out; | ||
1671 | } | ||
1672 | |||
1673 | memset(&next_key, 0, sizeof(next_key)); | ||
1674 | |||
1675 | while (1) { | ||
1676 | leaf = NULL; | ||
1677 | replaced = 0; | 2042 | replaced = 0; |
1678 | trans = btrfs_start_transaction(root, 1); | ||
1679 | max_level = level; | 2043 | max_level = level; |
1680 | 2044 | ||
1681 | ret = walk_down_reloc_tree(reloc_root, path, &level); | 2045 | ret = walk_down_reloc_tree(reloc_root, path, &level); |
@@ -1689,14 +2053,9 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1689 | if (!find_next_key(path, level, &key) && | 2053 | if (!find_next_key(path, level, &key) && |
1690 | btrfs_comp_cpu_keys(&next_key, &key) >= 0) { | 2054 | btrfs_comp_cpu_keys(&next_key, &key) >= 0) { |
1691 | ret = 0; | 2055 | ret = 0; |
1692 | } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) { | ||
1693 | ret = replace_path(trans, root, reloc_root, | ||
1694 | path, &next_key, &leaf, | ||
1695 | level, max_level); | ||
1696 | } else { | 2056 | } else { |
1697 | ret = replace_path(trans, root, reloc_root, | 2057 | ret = replace_path(trans, root, reloc_root, path, |
1698 | path, &next_key, NULL, | 2058 | &next_key, level, max_level); |
1699 | level, max_level); | ||
1700 | } | 2059 | } |
1701 | if (ret < 0) { | 2060 | if (ret < 0) { |
1702 | err = ret; | 2061 | err = ret; |
@@ -1708,16 +2067,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1708 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 2067 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
1709 | path->slots[level]); | 2068 | path->slots[level]); |
1710 | replaced = 1; | 2069 | replaced = 1; |
1711 | } else if (leaf) { | ||
1712 | /* | ||
1713 | * no block got replaced, try replacing file extents | ||
1714 | */ | ||
1715 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
1716 | ret = replace_file_extents(trans, rc, root, leaf, | ||
1717 | &inode_list); | ||
1718 | btrfs_tree_unlock(leaf); | ||
1719 | free_extent_buffer(leaf); | ||
1720 | BUG_ON(ret < 0); | ||
1721 | } | 2070 | } |
1722 | 2071 | ||
1723 | ret = walk_up_reloc_tree(reloc_root, path, &level); | 2072 | ret = walk_up_reloc_tree(reloc_root, path, &level); |
@@ -1734,15 +2083,10 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1734 | root_item->drop_level = level; | 2083 | root_item->drop_level = level; |
1735 | 2084 | ||
1736 | nr = trans->blocks_used; | 2085 | nr = trans->blocks_used; |
1737 | btrfs_end_transaction(trans, root); | 2086 | btrfs_end_transaction_throttle(trans, root); |
1738 | 2087 | ||
1739 | btrfs_btree_balance_dirty(root, nr); | 2088 | btrfs_btree_balance_dirty(root, nr); |
1740 | 2089 | ||
1741 | /* | ||
1742 | * put inodes outside transaction, otherwise we may deadlock. | ||
1743 | */ | ||
1744 | put_inodes(&inode_list); | ||
1745 | |||
1746 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2090 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
1747 | invalidate_extent_cache(root, &key, &next_key); | 2091 | invalidate_extent_cache(root, &key, &next_key); |
1748 | } | 2092 | } |
@@ -1765,87 +2109,125 @@ out: | |||
1765 | sizeof(root_item->drop_progress)); | 2109 | sizeof(root_item->drop_progress)); |
1766 | root_item->drop_level = 0; | 2110 | root_item->drop_level = 0; |
1767 | btrfs_set_root_refs(root_item, 0); | 2111 | btrfs_set_root_refs(root_item, 0); |
2112 | btrfs_update_reloc_root(trans, root); | ||
1768 | } | 2113 | } |
1769 | 2114 | ||
1770 | nr = trans->blocks_used; | 2115 | nr = trans->blocks_used; |
1771 | btrfs_end_transaction(trans, root); | 2116 | btrfs_end_transaction_throttle(trans, root); |
1772 | 2117 | ||
1773 | btrfs_btree_balance_dirty(root, nr); | 2118 | btrfs_btree_balance_dirty(root, nr); |
1774 | 2119 | ||
1775 | put_inodes(&inode_list); | ||
1776 | |||
1777 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2120 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
1778 | invalidate_extent_cache(root, &key, &next_key); | 2121 | invalidate_extent_cache(root, &key, &next_key); |
1779 | 2122 | ||
1780 | return err; | 2123 | return err; |
1781 | } | 2124 | } |
1782 | 2125 | ||
1783 | /* | 2126 | static noinline_for_stack |
1784 | * callback for the work threads. | 2127 | int prepare_to_merge(struct reloc_control *rc, int err) |
1785 | * this function merges reloc tree with corresponding fs tree, | ||
1786 | * and then drops the reloc tree. | ||
1787 | */ | ||
1788 | static void merge_func(struct btrfs_work *work) | ||
1789 | { | 2128 | { |
1790 | struct btrfs_trans_handle *trans; | 2129 | struct btrfs_root *root = rc->extent_root; |
1791 | struct btrfs_root *root; | ||
1792 | struct btrfs_root *reloc_root; | 2130 | struct btrfs_root *reloc_root; |
1793 | struct async_merge *async; | 2131 | struct btrfs_trans_handle *trans; |
2132 | LIST_HEAD(reloc_roots); | ||
2133 | u64 num_bytes = 0; | ||
2134 | int ret; | ||
2135 | int retries = 0; | ||
2136 | |||
2137 | mutex_lock(&root->fs_info->trans_mutex); | ||
2138 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; | ||
2139 | rc->merging_rsv_size += rc->nodes_relocated * 2; | ||
2140 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2141 | again: | ||
2142 | if (!err) { | ||
2143 | num_bytes = rc->merging_rsv_size; | ||
2144 | ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, | ||
2145 | num_bytes, &retries); | ||
2146 | if (ret) | ||
2147 | err = ret; | ||
2148 | } | ||
2149 | |||
2150 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
2151 | |||
2152 | if (!err) { | ||
2153 | if (num_bytes != rc->merging_rsv_size) { | ||
2154 | btrfs_end_transaction(trans, rc->extent_root); | ||
2155 | btrfs_block_rsv_release(rc->extent_root, | ||
2156 | rc->block_rsv, num_bytes); | ||
2157 | retries = 0; | ||
2158 | goto again; | ||
2159 | } | ||
2160 | } | ||
1794 | 2161 | ||
1795 | async = container_of(work, struct async_merge, work); | 2162 | rc->merge_reloc_tree = 1; |
1796 | reloc_root = async->root; | 2163 | |
2164 | while (!list_empty(&rc->reloc_roots)) { | ||
2165 | reloc_root = list_entry(rc->reloc_roots.next, | ||
2166 | struct btrfs_root, root_list); | ||
2167 | list_del_init(&reloc_root->root_list); | ||
1797 | 2168 | ||
1798 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { | ||
1799 | root = read_fs_root(reloc_root->fs_info, | 2169 | root = read_fs_root(reloc_root->fs_info, |
1800 | reloc_root->root_key.offset); | 2170 | reloc_root->root_key.offset); |
1801 | BUG_ON(IS_ERR(root)); | 2171 | BUG_ON(IS_ERR(root)); |
1802 | BUG_ON(root->reloc_root != reloc_root); | 2172 | BUG_ON(root->reloc_root != reloc_root); |
1803 | 2173 | ||
1804 | merge_reloc_root(async->rc, root); | 2174 | /* |
1805 | 2175 | * set reference count to 1, so btrfs_recover_relocation | |
1806 | trans = btrfs_start_transaction(root, 1); | 2176 | * knows it should resumes merging |
2177 | */ | ||
2178 | if (!err) | ||
2179 | btrfs_set_root_refs(&reloc_root->root_item, 1); | ||
1807 | btrfs_update_reloc_root(trans, root); | 2180 | btrfs_update_reloc_root(trans, root); |
1808 | btrfs_end_transaction(trans, root); | ||
1809 | } | ||
1810 | 2181 | ||
1811 | btrfs_drop_snapshot(reloc_root, 0); | 2182 | list_add(&reloc_root->root_list, &reloc_roots); |
2183 | } | ||
1812 | 2184 | ||
1813 | if (atomic_dec_and_test(async->num_pending)) | 2185 | list_splice(&reloc_roots, &rc->reloc_roots); |
1814 | complete(async->done); | ||
1815 | 2186 | ||
1816 | kfree(async); | 2187 | if (!err) |
2188 | btrfs_commit_transaction(trans, rc->extent_root); | ||
2189 | else | ||
2190 | btrfs_end_transaction(trans, rc->extent_root); | ||
2191 | return err; | ||
1817 | } | 2192 | } |
1818 | 2193 | ||
1819 | static int merge_reloc_roots(struct reloc_control *rc) | 2194 | static noinline_for_stack |
2195 | int merge_reloc_roots(struct reloc_control *rc) | ||
1820 | { | 2196 | { |
1821 | struct async_merge *async; | ||
1822 | struct btrfs_root *root; | 2197 | struct btrfs_root *root; |
1823 | struct completion done; | 2198 | struct btrfs_root *reloc_root; |
1824 | atomic_t num_pending; | 2199 | LIST_HEAD(reloc_roots); |
2200 | int found = 0; | ||
2201 | int ret; | ||
2202 | again: | ||
2203 | root = rc->extent_root; | ||
2204 | mutex_lock(&root->fs_info->trans_mutex); | ||
2205 | list_splice_init(&rc->reloc_roots, &reloc_roots); | ||
2206 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1825 | 2207 | ||
1826 | init_completion(&done); | 2208 | while (!list_empty(&reloc_roots)) { |
1827 | atomic_set(&num_pending, 1); | 2209 | found = 1; |
2210 | reloc_root = list_entry(reloc_roots.next, | ||
2211 | struct btrfs_root, root_list); | ||
1828 | 2212 | ||
1829 | while (!list_empty(&rc->reloc_roots)) { | 2213 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { |
1830 | root = list_entry(rc->reloc_roots.next, | 2214 | root = read_fs_root(reloc_root->fs_info, |
1831 | struct btrfs_root, root_list); | 2215 | reloc_root->root_key.offset); |
1832 | list_del_init(&root->root_list); | 2216 | BUG_ON(IS_ERR(root)); |
2217 | BUG_ON(root->reloc_root != reloc_root); | ||
1833 | 2218 | ||
1834 | async = kmalloc(sizeof(*async), GFP_NOFS); | 2219 | ret = merge_reloc_root(rc, root); |
1835 | BUG_ON(!async); | 2220 | BUG_ON(ret); |
1836 | async->work.func = merge_func; | 2221 | } else { |
1837 | async->work.flags = 0; | 2222 | list_del_init(&reloc_root->root_list); |
1838 | async->rc = rc; | 2223 | } |
1839 | async->root = root; | 2224 | btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0); |
1840 | async->done = &done; | ||
1841 | async->num_pending = &num_pending; | ||
1842 | atomic_inc(&num_pending); | ||
1843 | btrfs_queue_worker(&rc->workers, &async->work); | ||
1844 | } | 2225 | } |
1845 | 2226 | ||
1846 | if (!atomic_dec_and_test(&num_pending)) | 2227 | if (found) { |
1847 | wait_for_completion(&done); | 2228 | found = 0; |
1848 | 2229 | goto again; | |
2230 | } | ||
1849 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); | 2231 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); |
1850 | return 0; | 2232 | return 0; |
1851 | } | 2233 | } |
@@ -1876,119 +2258,169 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, | |||
1876 | return btrfs_record_root_in_trans(trans, root); | 2258 | return btrfs_record_root_in_trans(trans, root); |
1877 | } | 2259 | } |
1878 | 2260 | ||
1879 | /* | 2261 | static noinline_for_stack |
1880 | * select one tree from trees that references the block. | 2262 | struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, |
1881 | * for blocks in refernce counted trees, we preper reloc tree. | 2263 | struct reloc_control *rc, |
1882 | * if no reloc tree found and reloc_only is true, NULL is returned. | 2264 | struct backref_node *node, |
1883 | */ | 2265 | struct backref_edge *edges[], int *nr) |
1884 | static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans, | ||
1885 | struct backref_node *node, | ||
1886 | struct backref_edge *edges[], | ||
1887 | int *nr, int reloc_only) | ||
1888 | { | 2266 | { |
1889 | struct backref_node *next; | 2267 | struct backref_node *next; |
1890 | struct btrfs_root *root; | 2268 | struct btrfs_root *root; |
1891 | int index; | 2269 | int index = 0; |
1892 | int loop = 0; | 2270 | |
1893 | again: | ||
1894 | index = 0; | ||
1895 | next = node; | 2271 | next = node; |
1896 | while (1) { | 2272 | while (1) { |
1897 | cond_resched(); | 2273 | cond_resched(); |
1898 | next = walk_up_backref(next, edges, &index); | 2274 | next = walk_up_backref(next, edges, &index); |
1899 | root = next->root; | 2275 | root = next->root; |
1900 | if (!root) { | 2276 | BUG_ON(!root); |
1901 | BUG_ON(!node->old_root); | 2277 | BUG_ON(!root->ref_cows); |
1902 | goto skip; | ||
1903 | } | ||
1904 | |||
1905 | /* no other choice for non-refernce counted tree */ | ||
1906 | if (!root->ref_cows) { | ||
1907 | BUG_ON(reloc_only); | ||
1908 | break; | ||
1909 | } | ||
1910 | 2278 | ||
1911 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2279 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { |
1912 | record_reloc_root_in_trans(trans, root); | 2280 | record_reloc_root_in_trans(trans, root); |
1913 | break; | 2281 | break; |
1914 | } | 2282 | } |
1915 | 2283 | ||
1916 | if (loop) { | 2284 | btrfs_record_root_in_trans(trans, root); |
1917 | btrfs_record_root_in_trans(trans, root); | 2285 | root = root->reloc_root; |
2286 | |||
2287 | if (next->new_bytenr != root->node->start) { | ||
2288 | BUG_ON(next->new_bytenr); | ||
2289 | BUG_ON(!list_empty(&next->list)); | ||
2290 | next->new_bytenr = root->node->start; | ||
2291 | next->root = root; | ||
2292 | list_add_tail(&next->list, | ||
2293 | &rc->backref_cache.changed); | ||
2294 | __mark_block_processed(rc, next); | ||
1918 | break; | 2295 | break; |
1919 | } | 2296 | } |
1920 | 2297 | ||
1921 | if (reloc_only || next != node) { | 2298 | WARN_ON(1); |
1922 | if (!root->reloc_root) | ||
1923 | btrfs_record_root_in_trans(trans, root); | ||
1924 | root = root->reloc_root; | ||
1925 | /* | ||
1926 | * if the reloc tree was created in current | ||
1927 | * transation, there is no node in backref tree | ||
1928 | * corresponds to the root of the reloc tree. | ||
1929 | */ | ||
1930 | if (btrfs_root_last_snapshot(&root->root_item) == | ||
1931 | trans->transid - 1) | ||
1932 | break; | ||
1933 | } | ||
1934 | skip: | ||
1935 | root = NULL; | 2299 | root = NULL; |
1936 | next = walk_down_backref(edges, &index); | 2300 | next = walk_down_backref(edges, &index); |
1937 | if (!next || next->level <= node->level) | 2301 | if (!next || next->level <= node->level) |
1938 | break; | 2302 | break; |
1939 | } | 2303 | } |
2304 | if (!root) | ||
2305 | return NULL; | ||
1940 | 2306 | ||
1941 | if (!root && !loop && !reloc_only) { | 2307 | *nr = index; |
1942 | loop = 1; | 2308 | next = node; |
1943 | goto again; | 2309 | /* setup backref node path for btrfs_reloc_cow_block */ |
2310 | while (1) { | ||
2311 | rc->backref_cache.path[next->level] = next; | ||
2312 | if (--index < 0) | ||
2313 | break; | ||
2314 | next = edges[index]->node[UPPER]; | ||
1944 | } | 2315 | } |
1945 | |||
1946 | if (root) | ||
1947 | *nr = index; | ||
1948 | else | ||
1949 | *nr = 0; | ||
1950 | |||
1951 | return root; | 2316 | return root; |
1952 | } | 2317 | } |
1953 | 2318 | ||
2319 | /* | ||
2320 | * select a tree root for relocation. return NULL if the block | ||
2321 | * is reference counted. we should use do_relocation() in this | ||
2322 | * case. return a tree root pointer if the block isn't reference | ||
2323 | * counted. return -ENOENT if the block is root of reloc tree. | ||
2324 | */ | ||
1954 | static noinline_for_stack | 2325 | static noinline_for_stack |
1955 | struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, | 2326 | struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, |
1956 | struct backref_node *node) | 2327 | struct backref_node *node) |
1957 | { | 2328 | { |
2329 | struct backref_node *next; | ||
2330 | struct btrfs_root *root; | ||
2331 | struct btrfs_root *fs_root = NULL; | ||
1958 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | 2332 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; |
1959 | int nr; | 2333 | int index = 0; |
1960 | return __select_one_root(trans, node, edges, &nr, 0); | 2334 | |
2335 | next = node; | ||
2336 | while (1) { | ||
2337 | cond_resched(); | ||
2338 | next = walk_up_backref(next, edges, &index); | ||
2339 | root = next->root; | ||
2340 | BUG_ON(!root); | ||
2341 | |||
2342 | /* no other choice for non-refernce counted tree */ | ||
2343 | if (!root->ref_cows) | ||
2344 | return root; | ||
2345 | |||
2346 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) | ||
2347 | fs_root = root; | ||
2348 | |||
2349 | if (next != node) | ||
2350 | return NULL; | ||
2351 | |||
2352 | next = walk_down_backref(edges, &index); | ||
2353 | if (!next || next->level <= node->level) | ||
2354 | break; | ||
2355 | } | ||
2356 | |||
2357 | if (!fs_root) | ||
2358 | return ERR_PTR(-ENOENT); | ||
2359 | return fs_root; | ||
1961 | } | 2360 | } |
1962 | 2361 | ||
1963 | static noinline_for_stack | 2362 | static noinline_for_stack |
1964 | struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, | 2363 | u64 calcu_metadata_size(struct reloc_control *rc, |
1965 | struct backref_node *node, | 2364 | struct backref_node *node, int reserve) |
1966 | struct backref_edge *edges[], int *nr) | ||
1967 | { | 2365 | { |
1968 | return __select_one_root(trans, node, edges, nr, 1); | 2366 | struct backref_node *next = node; |
2367 | struct backref_edge *edge; | ||
2368 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | ||
2369 | u64 num_bytes = 0; | ||
2370 | int index = 0; | ||
2371 | |||
2372 | BUG_ON(reserve && node->processed); | ||
2373 | |||
2374 | while (next) { | ||
2375 | cond_resched(); | ||
2376 | while (1) { | ||
2377 | if (next->processed && (reserve || next != node)) | ||
2378 | break; | ||
2379 | |||
2380 | num_bytes += btrfs_level_size(rc->extent_root, | ||
2381 | next->level); | ||
2382 | |||
2383 | if (list_empty(&next->upper)) | ||
2384 | break; | ||
2385 | |||
2386 | edge = list_entry(next->upper.next, | ||
2387 | struct backref_edge, list[LOWER]); | ||
2388 | edges[index++] = edge; | ||
2389 | next = edge->node[UPPER]; | ||
2390 | } | ||
2391 | next = walk_down_backref(edges, &index); | ||
2392 | } | ||
2393 | return num_bytes; | ||
1969 | } | 2394 | } |
1970 | 2395 | ||
1971 | static void grab_path_buffers(struct btrfs_path *path, | 2396 | static int reserve_metadata_space(struct btrfs_trans_handle *trans, |
1972 | struct backref_node *node, | 2397 | struct reloc_control *rc, |
1973 | struct backref_edge *edges[], int nr) | 2398 | struct backref_node *node) |
1974 | { | 2399 | { |
1975 | int i = 0; | 2400 | struct btrfs_root *root = rc->extent_root; |
1976 | while (1) { | 2401 | u64 num_bytes; |
1977 | drop_node_buffer(node); | 2402 | int ret; |
1978 | node->eb = path->nodes[node->level]; | 2403 | |
1979 | BUG_ON(!node->eb); | 2404 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; |
1980 | if (path->locks[node->level]) | ||
1981 | node->locked = 1; | ||
1982 | path->nodes[node->level] = NULL; | ||
1983 | path->locks[node->level] = 0; | ||
1984 | |||
1985 | if (i >= nr) | ||
1986 | break; | ||
1987 | 2405 | ||
1988 | edges[i]->blockptr = node->eb->start; | 2406 | trans->block_rsv = rc->block_rsv; |
1989 | node = edges[i]->node[UPPER]; | 2407 | ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes, |
1990 | i++; | 2408 | &rc->block_rsv_retries); |
2409 | if (ret) { | ||
2410 | if (ret == -EAGAIN) | ||
2411 | rc->commit_transaction = 1; | ||
2412 | return ret; | ||
1991 | } | 2413 | } |
2414 | |||
2415 | rc->block_rsv_retries = 0; | ||
2416 | return 0; | ||
2417 | } | ||
2418 | |||
2419 | static void release_metadata_space(struct reloc_control *rc, | ||
2420 | struct backref_node *node) | ||
2421 | { | ||
2422 | u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2; | ||
2423 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes); | ||
1992 | } | 2424 | } |
1993 | 2425 | ||
1994 | /* | 2426 | /* |
@@ -1999,6 +2431,7 @@ static void grab_path_buffers(struct btrfs_path *path, | |||
1999 | * in that case this function just updates pointers. | 2431 | * in that case this function just updates pointers. |
2000 | */ | 2432 | */ |
2001 | static int do_relocation(struct btrfs_trans_handle *trans, | 2433 | static int do_relocation(struct btrfs_trans_handle *trans, |
2434 | struct reloc_control *rc, | ||
2002 | struct backref_node *node, | 2435 | struct backref_node *node, |
2003 | struct btrfs_key *key, | 2436 | struct btrfs_key *key, |
2004 | struct btrfs_path *path, int lowest) | 2437 | struct btrfs_path *path, int lowest) |
@@ -2019,18 +2452,25 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2019 | BUG_ON(lowest && node->eb); | 2452 | BUG_ON(lowest && node->eb); |
2020 | 2453 | ||
2021 | path->lowest_level = node->level + 1; | 2454 | path->lowest_level = node->level + 1; |
2455 | rc->backref_cache.path[node->level] = node; | ||
2022 | list_for_each_entry(edge, &node->upper, list[LOWER]) { | 2456 | list_for_each_entry(edge, &node->upper, list[LOWER]) { |
2023 | cond_resched(); | 2457 | cond_resched(); |
2024 | if (node->eb && node->eb->start == edge->blockptr) | ||
2025 | continue; | ||
2026 | 2458 | ||
2027 | upper = edge->node[UPPER]; | 2459 | upper = edge->node[UPPER]; |
2028 | root = select_reloc_root(trans, upper, edges, &nr); | 2460 | root = select_reloc_root(trans, rc, upper, edges, &nr); |
2029 | if (!root) | 2461 | BUG_ON(!root); |
2030 | continue; | 2462 | |
2031 | 2463 | if (upper->eb && !upper->locked) { | |
2032 | if (upper->eb && !upper->locked) | 2464 | if (!lowest) { |
2465 | ret = btrfs_bin_search(upper->eb, key, | ||
2466 | upper->level, &slot); | ||
2467 | BUG_ON(ret); | ||
2468 | bytenr = btrfs_node_blockptr(upper->eb, slot); | ||
2469 | if (node->eb->start == bytenr) | ||
2470 | goto next; | ||
2471 | } | ||
2033 | drop_node_buffer(upper); | 2472 | drop_node_buffer(upper); |
2473 | } | ||
2034 | 2474 | ||
2035 | if (!upper->eb) { | 2475 | if (!upper->eb) { |
2036 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | 2476 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); |
@@ -2040,11 +2480,17 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2040 | } | 2480 | } |
2041 | BUG_ON(ret > 0); | 2481 | BUG_ON(ret > 0); |
2042 | 2482 | ||
2043 | slot = path->slots[upper->level]; | 2483 | if (!upper->eb) { |
2484 | upper->eb = path->nodes[upper->level]; | ||
2485 | path->nodes[upper->level] = NULL; | ||
2486 | } else { | ||
2487 | BUG_ON(upper->eb != path->nodes[upper->level]); | ||
2488 | } | ||
2044 | 2489 | ||
2045 | btrfs_unlock_up_safe(path, upper->level + 1); | 2490 | upper->locked = 1; |
2046 | grab_path_buffers(path, upper, edges, nr); | 2491 | path->locks[upper->level] = 0; |
2047 | 2492 | ||
2493 | slot = path->slots[upper->level]; | ||
2048 | btrfs_release_path(NULL, path); | 2494 | btrfs_release_path(NULL, path); |
2049 | } else { | 2495 | } else { |
2050 | ret = btrfs_bin_search(upper->eb, key, upper->level, | 2496 | ret = btrfs_bin_search(upper->eb, key, upper->level, |
@@ -2053,14 +2499,11 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2053 | } | 2499 | } |
2054 | 2500 | ||
2055 | bytenr = btrfs_node_blockptr(upper->eb, slot); | 2501 | bytenr = btrfs_node_blockptr(upper->eb, slot); |
2056 | if (!lowest) { | 2502 | if (lowest) { |
2057 | if (node->eb->start == bytenr) { | 2503 | BUG_ON(bytenr != node->bytenr); |
2058 | btrfs_tree_unlock(upper->eb); | ||
2059 | upper->locked = 0; | ||
2060 | continue; | ||
2061 | } | ||
2062 | } else { | 2504 | } else { |
2063 | BUG_ON(node->bytenr != bytenr); | 2505 | if (node->eb->start == bytenr) |
2506 | goto next; | ||
2064 | } | 2507 | } |
2065 | 2508 | ||
2066 | blocksize = btrfs_level_size(root, node->level); | 2509 | blocksize = btrfs_level_size(root, node->level); |
@@ -2072,13 +2515,13 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2072 | if (!node->eb) { | 2515 | if (!node->eb) { |
2073 | ret = btrfs_cow_block(trans, root, eb, upper->eb, | 2516 | ret = btrfs_cow_block(trans, root, eb, upper->eb, |
2074 | slot, &eb); | 2517 | slot, &eb); |
2518 | btrfs_tree_unlock(eb); | ||
2519 | free_extent_buffer(eb); | ||
2075 | if (ret < 0) { | 2520 | if (ret < 0) { |
2076 | err = ret; | 2521 | err = ret; |
2077 | break; | 2522 | goto next; |
2078 | } | 2523 | } |
2079 | btrfs_set_lock_blocking(eb); | 2524 | BUG_ON(node->eb != eb); |
2080 | node->eb = eb; | ||
2081 | node->locked = 1; | ||
2082 | } else { | 2525 | } else { |
2083 | btrfs_set_node_blockptr(upper->eb, slot, | 2526 | btrfs_set_node_blockptr(upper->eb, slot, |
2084 | node->eb->start); | 2527 | node->eb->start); |
@@ -2096,67 +2539,80 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2096 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); | 2539 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); |
2097 | BUG_ON(ret); | 2540 | BUG_ON(ret); |
2098 | } | 2541 | } |
2099 | if (!lowest) { | 2542 | next: |
2100 | btrfs_tree_unlock(upper->eb); | 2543 | if (!upper->pending) |
2101 | upper->locked = 0; | 2544 | drop_node_buffer(upper); |
2102 | } | 2545 | else |
2546 | unlock_node_buffer(upper); | ||
2547 | if (err) | ||
2548 | break; | ||
2103 | } | 2549 | } |
2550 | |||
2551 | if (!err && node->pending) { | ||
2552 | drop_node_buffer(node); | ||
2553 | list_move_tail(&node->list, &rc->backref_cache.changed); | ||
2554 | node->pending = 0; | ||
2555 | } | ||
2556 | |||
2104 | path->lowest_level = 0; | 2557 | path->lowest_level = 0; |
2558 | BUG_ON(err == -ENOSPC); | ||
2105 | return err; | 2559 | return err; |
2106 | } | 2560 | } |
2107 | 2561 | ||
2108 | static int link_to_upper(struct btrfs_trans_handle *trans, | 2562 | static int link_to_upper(struct btrfs_trans_handle *trans, |
2563 | struct reloc_control *rc, | ||
2109 | struct backref_node *node, | 2564 | struct backref_node *node, |
2110 | struct btrfs_path *path) | 2565 | struct btrfs_path *path) |
2111 | { | 2566 | { |
2112 | struct btrfs_key key; | 2567 | struct btrfs_key key; |
2113 | if (!node->eb || list_empty(&node->upper)) | ||
2114 | return 0; | ||
2115 | 2568 | ||
2116 | btrfs_node_key_to_cpu(node->eb, &key, 0); | 2569 | btrfs_node_key_to_cpu(node->eb, &key, 0); |
2117 | return do_relocation(trans, node, &key, path, 0); | 2570 | return do_relocation(trans, rc, node, &key, path, 0); |
2118 | } | 2571 | } |
2119 | 2572 | ||
2120 | static int finish_pending_nodes(struct btrfs_trans_handle *trans, | 2573 | static int finish_pending_nodes(struct btrfs_trans_handle *trans, |
2121 | struct backref_cache *cache, | 2574 | struct reloc_control *rc, |
2122 | struct btrfs_path *path) | 2575 | struct btrfs_path *path, int err) |
2123 | { | 2576 | { |
2577 | LIST_HEAD(list); | ||
2578 | struct backref_cache *cache = &rc->backref_cache; | ||
2124 | struct backref_node *node; | 2579 | struct backref_node *node; |
2125 | int level; | 2580 | int level; |
2126 | int ret; | 2581 | int ret; |
2127 | int err = 0; | ||
2128 | 2582 | ||
2129 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | 2583 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { |
2130 | while (!list_empty(&cache->pending[level])) { | 2584 | while (!list_empty(&cache->pending[level])) { |
2131 | node = list_entry(cache->pending[level].next, | 2585 | node = list_entry(cache->pending[level].next, |
2132 | struct backref_node, lower); | 2586 | struct backref_node, list); |
2133 | BUG_ON(node->level != level); | 2587 | list_move_tail(&node->list, &list); |
2588 | BUG_ON(!node->pending); | ||
2134 | 2589 | ||
2135 | ret = link_to_upper(trans, node, path); | 2590 | if (!err) { |
2136 | if (ret < 0) | 2591 | ret = link_to_upper(trans, rc, node, path); |
2137 | err = ret; | 2592 | if (ret < 0) |
2138 | /* | 2593 | err = ret; |
2139 | * this remove the node from the pending list and | 2594 | } |
2140 | * may add some other nodes to the level + 1 | ||
2141 | * pending list | ||
2142 | */ | ||
2143 | remove_backref_node(cache, node); | ||
2144 | } | 2595 | } |
2596 | list_splice_init(&list, &cache->pending[level]); | ||
2145 | } | 2597 | } |
2146 | BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); | ||
2147 | return err; | 2598 | return err; |
2148 | } | 2599 | } |
2149 | 2600 | ||
2150 | static void mark_block_processed(struct reloc_control *rc, | 2601 | static void mark_block_processed(struct reloc_control *rc, |
2151 | struct backref_node *node) | 2602 | u64 bytenr, u32 blocksize) |
2603 | { | ||
2604 | set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1, | ||
2605 | EXTENT_DIRTY, GFP_NOFS); | ||
2606 | } | ||
2607 | |||
2608 | static void __mark_block_processed(struct reloc_control *rc, | ||
2609 | struct backref_node *node) | ||
2152 | { | 2610 | { |
2153 | u32 blocksize; | 2611 | u32 blocksize; |
2154 | if (node->level == 0 || | 2612 | if (node->level == 0 || |
2155 | in_block_group(node->bytenr, rc->block_group)) { | 2613 | in_block_group(node->bytenr, rc->block_group)) { |
2156 | blocksize = btrfs_level_size(rc->extent_root, node->level); | 2614 | blocksize = btrfs_level_size(rc->extent_root, node->level); |
2157 | set_extent_bits(&rc->processed_blocks, node->bytenr, | 2615 | mark_block_processed(rc, node->bytenr, blocksize); |
2158 | node->bytenr + blocksize - 1, EXTENT_DIRTY, | ||
2159 | GFP_NOFS); | ||
2160 | } | 2616 | } |
2161 | node->processed = 1; | 2617 | node->processed = 1; |
2162 | } | 2618 | } |
@@ -2179,7 +2635,7 @@ static void update_processed_blocks(struct reloc_control *rc, | |||
2179 | if (next->processed) | 2635 | if (next->processed) |
2180 | break; | 2636 | break; |
2181 | 2637 | ||
2182 | mark_block_processed(rc, next); | 2638 | __mark_block_processed(rc, next); |
2183 | 2639 | ||
2184 | if (list_empty(&next->upper)) | 2640 | if (list_empty(&next->upper)) |
2185 | break; | 2641 | break; |
@@ -2202,138 +2658,6 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, | |||
2202 | return 0; | 2658 | return 0; |
2203 | } | 2659 | } |
2204 | 2660 | ||
2205 | /* | ||
2206 | * check if there are any file extent pointers in the leaf point to | ||
2207 | * data require processing | ||
2208 | */ | ||
2209 | static int check_file_extents(struct reloc_control *rc, | ||
2210 | u64 bytenr, u32 blocksize, u64 ptr_gen) | ||
2211 | { | ||
2212 | struct btrfs_key found_key; | ||
2213 | struct btrfs_file_extent_item *fi; | ||
2214 | struct extent_buffer *leaf; | ||
2215 | u32 nritems; | ||
2216 | int i; | ||
2217 | int ret = 0; | ||
2218 | |||
2219 | leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen); | ||
2220 | |||
2221 | nritems = btrfs_header_nritems(leaf); | ||
2222 | for (i = 0; i < nritems; i++) { | ||
2223 | cond_resched(); | ||
2224 | btrfs_item_key_to_cpu(leaf, &found_key, i); | ||
2225 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
2226 | continue; | ||
2227 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
2228 | if (btrfs_file_extent_type(leaf, fi) == | ||
2229 | BTRFS_FILE_EXTENT_INLINE) | ||
2230 | continue; | ||
2231 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
2232 | if (bytenr == 0) | ||
2233 | continue; | ||
2234 | if (in_block_group(bytenr, rc->block_group)) { | ||
2235 | ret = 1; | ||
2236 | break; | ||
2237 | } | ||
2238 | } | ||
2239 | free_extent_buffer(leaf); | ||
2240 | return ret; | ||
2241 | } | ||
2242 | |||
2243 | /* | ||
2244 | * scan child blocks of a given block to find blocks require processing | ||
2245 | */ | ||
2246 | static int add_child_blocks(struct btrfs_trans_handle *trans, | ||
2247 | struct reloc_control *rc, | ||
2248 | struct backref_node *node, | ||
2249 | struct rb_root *blocks) | ||
2250 | { | ||
2251 | struct tree_block *block; | ||
2252 | struct rb_node *rb_node; | ||
2253 | u64 bytenr; | ||
2254 | u64 ptr_gen; | ||
2255 | u32 blocksize; | ||
2256 | u32 nritems; | ||
2257 | int i; | ||
2258 | int err = 0; | ||
2259 | |||
2260 | nritems = btrfs_header_nritems(node->eb); | ||
2261 | blocksize = btrfs_level_size(rc->extent_root, node->level - 1); | ||
2262 | for (i = 0; i < nritems; i++) { | ||
2263 | cond_resched(); | ||
2264 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
2265 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
2266 | if (ptr_gen == trans->transid) | ||
2267 | continue; | ||
2268 | if (!in_block_group(bytenr, rc->block_group) && | ||
2269 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
2270 | continue; | ||
2271 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
2272 | continue; | ||
2273 | |||
2274 | readahead_tree_block(rc->extent_root, | ||
2275 | bytenr, blocksize, ptr_gen); | ||
2276 | } | ||
2277 | |||
2278 | for (i = 0; i < nritems; i++) { | ||
2279 | cond_resched(); | ||
2280 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
2281 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
2282 | if (ptr_gen == trans->transid) | ||
2283 | continue; | ||
2284 | if (!in_block_group(bytenr, rc->block_group) && | ||
2285 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
2286 | continue; | ||
2287 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
2288 | continue; | ||
2289 | if (!in_block_group(bytenr, rc->block_group) && | ||
2290 | !check_file_extents(rc, bytenr, blocksize, ptr_gen)) | ||
2291 | continue; | ||
2292 | |||
2293 | block = kmalloc(sizeof(*block), GFP_NOFS); | ||
2294 | if (!block) { | ||
2295 | err = -ENOMEM; | ||
2296 | break; | ||
2297 | } | ||
2298 | block->bytenr = bytenr; | ||
2299 | btrfs_node_key_to_cpu(node->eb, &block->key, i); | ||
2300 | block->level = node->level - 1; | ||
2301 | block->key_ready = 1; | ||
2302 | rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); | ||
2303 | BUG_ON(rb_node); | ||
2304 | } | ||
2305 | if (err) | ||
2306 | free_block_list(blocks); | ||
2307 | return err; | ||
2308 | } | ||
2309 | |||
2310 | /* | ||
2311 | * find adjacent blocks require processing | ||
2312 | */ | ||
2313 | static noinline_for_stack | ||
2314 | int add_adjacent_blocks(struct btrfs_trans_handle *trans, | ||
2315 | struct reloc_control *rc, | ||
2316 | struct backref_cache *cache, | ||
2317 | struct rb_root *blocks, int level, | ||
2318 | struct backref_node **upper) | ||
2319 | { | ||
2320 | struct backref_node *node; | ||
2321 | int ret = 0; | ||
2322 | |||
2323 | WARN_ON(!list_empty(&cache->pending[level])); | ||
2324 | |||
2325 | if (list_empty(&cache->pending[level + 1])) | ||
2326 | return 1; | ||
2327 | |||
2328 | node = list_entry(cache->pending[level + 1].next, | ||
2329 | struct backref_node, lower); | ||
2330 | if (node->eb) | ||
2331 | ret = add_child_blocks(trans, rc, node, blocks); | ||
2332 | |||
2333 | *upper = node; | ||
2334 | return ret; | ||
2335 | } | ||
2336 | |||
2337 | static int get_tree_block_key(struct reloc_control *rc, | 2661 | static int get_tree_block_key(struct reloc_control *rc, |
2338 | struct tree_block *block) | 2662 | struct tree_block *block) |
2339 | { | 2663 | { |
@@ -2371,40 +2695,53 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, | |||
2371 | struct btrfs_path *path) | 2695 | struct btrfs_path *path) |
2372 | { | 2696 | { |
2373 | struct btrfs_root *root; | 2697 | struct btrfs_root *root; |
2374 | int ret; | 2698 | int release = 0; |
2699 | int ret = 0; | ||
2375 | 2700 | ||
2701 | if (!node) | ||
2702 | return 0; | ||
2703 | |||
2704 | BUG_ON(node->processed); | ||
2376 | root = select_one_root(trans, node); | 2705 | root = select_one_root(trans, node); |
2377 | if (unlikely(!root)) { | 2706 | if (root == ERR_PTR(-ENOENT)) { |
2378 | rc->found_old_snapshot = 1; | ||
2379 | update_processed_blocks(rc, node); | 2707 | update_processed_blocks(rc, node); |
2380 | return 0; | 2708 | goto out; |
2381 | } | 2709 | } |
2382 | 2710 | ||
2383 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2711 | if (!root || root->ref_cows) { |
2384 | ret = do_relocation(trans, node, key, path, 1); | 2712 | ret = reserve_metadata_space(trans, rc, node); |
2385 | if (ret < 0) | 2713 | if (ret) |
2386 | goto out; | ||
2387 | if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) { | ||
2388 | ret = replace_file_extents(trans, rc, root, | ||
2389 | node->eb, NULL); | ||
2390 | if (ret < 0) | ||
2391 | goto out; | ||
2392 | } | ||
2393 | drop_node_buffer(node); | ||
2394 | } else if (!root->ref_cows) { | ||
2395 | path->lowest_level = node->level; | ||
2396 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
2397 | btrfs_release_path(root, path); | ||
2398 | if (ret < 0) | ||
2399 | goto out; | 2714 | goto out; |
2400 | } else if (root != node->root) { | 2715 | release = 1; |
2401 | WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS); | ||
2402 | } | 2716 | } |
2403 | 2717 | ||
2404 | update_processed_blocks(rc, node); | 2718 | if (root) { |
2405 | ret = 0; | 2719 | if (root->ref_cows) { |
2720 | BUG_ON(node->new_bytenr); | ||
2721 | BUG_ON(!list_empty(&node->list)); | ||
2722 | btrfs_record_root_in_trans(trans, root); | ||
2723 | root = root->reloc_root; | ||
2724 | node->new_bytenr = root->node->start; | ||
2725 | node->root = root; | ||
2726 | list_add_tail(&node->list, &rc->backref_cache.changed); | ||
2727 | } else { | ||
2728 | path->lowest_level = node->level; | ||
2729 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
2730 | btrfs_release_path(root, path); | ||
2731 | if (ret > 0) | ||
2732 | ret = 0; | ||
2733 | } | ||
2734 | if (!ret) | ||
2735 | update_processed_blocks(rc, node); | ||
2736 | } else { | ||
2737 | ret = do_relocation(trans, rc, node, key, path, 1); | ||
2738 | } | ||
2406 | out: | 2739 | out: |
2407 | drop_node_buffer(node); | 2740 | if (ret || node->level == 0 || node->cowonly) { |
2741 | if (release) | ||
2742 | release_metadata_space(rc, node); | ||
2743 | remove_backref_node(&rc->backref_cache, node); | ||
2744 | } | ||
2408 | return ret; | 2745 | return ret; |
2409 | } | 2746 | } |
2410 | 2747 | ||
@@ -2415,12 +2752,10 @@ static noinline_for_stack | |||
2415 | int relocate_tree_blocks(struct btrfs_trans_handle *trans, | 2752 | int relocate_tree_blocks(struct btrfs_trans_handle *trans, |
2416 | struct reloc_control *rc, struct rb_root *blocks) | 2753 | struct reloc_control *rc, struct rb_root *blocks) |
2417 | { | 2754 | { |
2418 | struct backref_cache *cache; | ||
2419 | struct backref_node *node; | 2755 | struct backref_node *node; |
2420 | struct btrfs_path *path; | 2756 | struct btrfs_path *path; |
2421 | struct tree_block *block; | 2757 | struct tree_block *block; |
2422 | struct rb_node *rb_node; | 2758 | struct rb_node *rb_node; |
2423 | int level = -1; | ||
2424 | int ret; | 2759 | int ret; |
2425 | int err = 0; | 2760 | int err = 0; |
2426 | 2761 | ||
@@ -2428,21 +2763,9 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2428 | if (!path) | 2763 | if (!path) |
2429 | return -ENOMEM; | 2764 | return -ENOMEM; |
2430 | 2765 | ||
2431 | cache = kmalloc(sizeof(*cache), GFP_NOFS); | ||
2432 | if (!cache) { | ||
2433 | btrfs_free_path(path); | ||
2434 | return -ENOMEM; | ||
2435 | } | ||
2436 | |||
2437 | backref_cache_init(cache); | ||
2438 | |||
2439 | rb_node = rb_first(blocks); | 2766 | rb_node = rb_first(blocks); |
2440 | while (rb_node) { | 2767 | while (rb_node) { |
2441 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2768 | block = rb_entry(rb_node, struct tree_block, rb_node); |
2442 | if (level == -1) | ||
2443 | level = block->level; | ||
2444 | else | ||
2445 | BUG_ON(level != block->level); | ||
2446 | if (!block->key_ready) | 2769 | if (!block->key_ready) |
2447 | reada_tree_block(rc, block); | 2770 | reada_tree_block(rc, block); |
2448 | rb_node = rb_next(rb_node); | 2771 | rb_node = rb_next(rb_node); |
@@ -2460,7 +2783,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2460 | while (rb_node) { | 2783 | while (rb_node) { |
2461 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2784 | block = rb_entry(rb_node, struct tree_block, rb_node); |
2462 | 2785 | ||
2463 | node = build_backref_tree(rc, cache, &block->key, | 2786 | node = build_backref_tree(rc, &block->key, |
2464 | block->level, block->bytenr); | 2787 | block->level, block->bytenr); |
2465 | if (IS_ERR(node)) { | 2788 | if (IS_ERR(node)) { |
2466 | err = PTR_ERR(node); | 2789 | err = PTR_ERR(node); |
@@ -2470,79 +2793,62 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2470 | ret = relocate_tree_block(trans, rc, node, &block->key, | 2793 | ret = relocate_tree_block(trans, rc, node, &block->key, |
2471 | path); | 2794 | path); |
2472 | if (ret < 0) { | 2795 | if (ret < 0) { |
2473 | err = ret; | 2796 | if (ret != -EAGAIN || rb_node == rb_first(blocks)) |
2797 | err = ret; | ||
2474 | goto out; | 2798 | goto out; |
2475 | } | 2799 | } |
2476 | remove_backref_node(cache, node); | ||
2477 | rb_node = rb_next(rb_node); | 2800 | rb_node = rb_next(rb_node); |
2478 | } | 2801 | } |
2479 | 2802 | out: | |
2480 | if (level > 0) | ||
2481 | goto out; | ||
2482 | |||
2483 | free_block_list(blocks); | 2803 | free_block_list(blocks); |
2804 | err = finish_pending_nodes(trans, rc, path, err); | ||
2484 | 2805 | ||
2485 | /* | 2806 | btrfs_free_path(path); |
2486 | * now backrefs of some upper level tree blocks have been cached, | 2807 | return err; |
2487 | * try relocating blocks referenced by these upper level blocks. | 2808 | } |
2488 | */ | ||
2489 | while (1) { | ||
2490 | struct backref_node *upper = NULL; | ||
2491 | if (trans->transaction->in_commit || | ||
2492 | trans->transaction->delayed_refs.flushing) | ||
2493 | break; | ||
2494 | 2809 | ||
2495 | ret = add_adjacent_blocks(trans, rc, cache, blocks, level, | 2810 | static noinline_for_stack |
2496 | &upper); | 2811 | int prealloc_file_extent_cluster(struct inode *inode, |
2497 | if (ret < 0) | 2812 | struct file_extent_cluster *cluster) |
2498 | err = ret; | 2813 | { |
2499 | if (ret != 0) | 2814 | u64 alloc_hint = 0; |
2500 | break; | 2815 | u64 start; |
2816 | u64 end; | ||
2817 | u64 offset = BTRFS_I(inode)->index_cnt; | ||
2818 | u64 num_bytes; | ||
2819 | int nr = 0; | ||
2820 | int ret = 0; | ||
2501 | 2821 | ||
2502 | rb_node = rb_first(blocks); | 2822 | BUG_ON(cluster->start != cluster->boundary[0]); |
2503 | while (rb_node) { | 2823 | mutex_lock(&inode->i_mutex); |
2504 | block = rb_entry(rb_node, struct tree_block, rb_node); | ||
2505 | if (trans->transaction->in_commit || | ||
2506 | trans->transaction->delayed_refs.flushing) | ||
2507 | goto out; | ||
2508 | BUG_ON(!block->key_ready); | ||
2509 | node = build_backref_tree(rc, cache, &block->key, | ||
2510 | level, block->bytenr); | ||
2511 | if (IS_ERR(node)) { | ||
2512 | err = PTR_ERR(node); | ||
2513 | goto out; | ||
2514 | } | ||
2515 | 2824 | ||
2516 | ret = relocate_tree_block(trans, rc, node, | 2825 | ret = btrfs_check_data_free_space(inode, cluster->end + |
2517 | &block->key, path); | 2826 | 1 - cluster->start); |
2518 | if (ret < 0) { | 2827 | if (ret) |
2519 | err = ret; | 2828 | goto out; |
2520 | goto out; | ||
2521 | } | ||
2522 | remove_backref_node(cache, node); | ||
2523 | rb_node = rb_next(rb_node); | ||
2524 | } | ||
2525 | free_block_list(blocks); | ||
2526 | 2829 | ||
2527 | if (upper) { | 2830 | while (nr < cluster->nr) { |
2528 | ret = link_to_upper(trans, upper, path); | 2831 | start = cluster->boundary[nr] - offset; |
2529 | if (ret < 0) { | 2832 | if (nr + 1 < cluster->nr) |
2530 | err = ret; | 2833 | end = cluster->boundary[nr + 1] - 1 - offset; |
2531 | break; | 2834 | else |
2532 | } | 2835 | end = cluster->end - offset; |
2533 | remove_backref_node(cache, upper); | 2836 | |
2534 | } | 2837 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); |
2838 | num_bytes = end + 1 - start; | ||
2839 | ret = btrfs_prealloc_file_range(inode, 0, start, | ||
2840 | num_bytes, num_bytes, | ||
2841 | end + 1, &alloc_hint); | ||
2842 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2843 | if (ret) | ||
2844 | break; | ||
2845 | nr++; | ||
2535 | } | 2846 | } |
2847 | btrfs_free_reserved_data_space(inode, cluster->end + | ||
2848 | 1 - cluster->start); | ||
2536 | out: | 2849 | out: |
2537 | free_block_list(blocks); | 2850 | mutex_unlock(&inode->i_mutex); |
2538 | 2851 | return ret; | |
2539 | ret = finish_pending_nodes(trans, cache, path); | ||
2540 | if (ret < 0) | ||
2541 | err = ret; | ||
2542 | |||
2543 | kfree(cache); | ||
2544 | btrfs_free_path(path); | ||
2545 | return err; | ||
2546 | } | 2852 | } |
2547 | 2853 | ||
2548 | static noinline_for_stack | 2854 | static noinline_for_stack |
@@ -2588,7 +2894,6 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2588 | u64 offset = BTRFS_I(inode)->index_cnt; | 2894 | u64 offset = BTRFS_I(inode)->index_cnt; |
2589 | unsigned long index; | 2895 | unsigned long index; |
2590 | unsigned long last_index; | 2896 | unsigned long last_index; |
2591 | unsigned int dirty_page = 0; | ||
2592 | struct page *page; | 2897 | struct page *page; |
2593 | struct file_ra_state *ra; | 2898 | struct file_ra_state *ra; |
2594 | int nr = 0; | 2899 | int nr = 0; |
@@ -2601,21 +2906,24 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2601 | if (!ra) | 2906 | if (!ra) |
2602 | return -ENOMEM; | 2907 | return -ENOMEM; |
2603 | 2908 | ||
2604 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | 2909 | ret = prealloc_file_extent_cluster(inode, cluster); |
2605 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | 2910 | if (ret) |
2911 | goto out; | ||
2606 | 2912 | ||
2607 | mutex_lock(&inode->i_mutex); | 2913 | file_ra_state_init(ra, inode->i_mapping); |
2608 | 2914 | ||
2609 | i_size_write(inode, cluster->end + 1 - offset); | ||
2610 | ret = setup_extent_mapping(inode, cluster->start - offset, | 2915 | ret = setup_extent_mapping(inode, cluster->start - offset, |
2611 | cluster->end - offset, cluster->start); | 2916 | cluster->end - offset, cluster->start); |
2612 | if (ret) | 2917 | if (ret) |
2613 | goto out_unlock; | 2918 | goto out; |
2614 | |||
2615 | file_ra_state_init(ra, inode->i_mapping); | ||
2616 | 2919 | ||
2617 | WARN_ON(cluster->start != cluster->boundary[0]); | 2920 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; |
2921 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | ||
2618 | while (index <= last_index) { | 2922 | while (index <= last_index) { |
2923 | ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); | ||
2924 | if (ret) | ||
2925 | goto out; | ||
2926 | |||
2619 | page = find_lock_page(inode->i_mapping, index); | 2927 | page = find_lock_page(inode->i_mapping, index); |
2620 | if (!page) { | 2928 | if (!page) { |
2621 | page_cache_sync_readahead(inode->i_mapping, | 2929 | page_cache_sync_readahead(inode->i_mapping, |
@@ -2623,8 +2931,10 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2623 | last_index + 1 - index); | 2931 | last_index + 1 - index); |
2624 | page = grab_cache_page(inode->i_mapping, index); | 2932 | page = grab_cache_page(inode->i_mapping, index); |
2625 | if (!page) { | 2933 | if (!page) { |
2934 | btrfs_delalloc_release_metadata(inode, | ||
2935 | PAGE_CACHE_SIZE); | ||
2626 | ret = -ENOMEM; | 2936 | ret = -ENOMEM; |
2627 | goto out_unlock; | 2937 | goto out; |
2628 | } | 2938 | } |
2629 | } | 2939 | } |
2630 | 2940 | ||
@@ -2640,8 +2950,10 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2640 | if (!PageUptodate(page)) { | 2950 | if (!PageUptodate(page)) { |
2641 | unlock_page(page); | 2951 | unlock_page(page); |
2642 | page_cache_release(page); | 2952 | page_cache_release(page); |
2953 | btrfs_delalloc_release_metadata(inode, | ||
2954 | PAGE_CACHE_SIZE); | ||
2643 | ret = -EIO; | 2955 | ret = -EIO; |
2644 | goto out_unlock; | 2956 | goto out; |
2645 | } | 2957 | } |
2646 | } | 2958 | } |
2647 | 2959 | ||
@@ -2660,10 +2972,9 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2660 | EXTENT_BOUNDARY, GFP_NOFS); | 2972 | EXTENT_BOUNDARY, GFP_NOFS); |
2661 | nr++; | 2973 | nr++; |
2662 | } | 2974 | } |
2663 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); | ||
2664 | 2975 | ||
2976 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); | ||
2665 | set_page_dirty(page); | 2977 | set_page_dirty(page); |
2666 | dirty_page++; | ||
2667 | 2978 | ||
2668 | unlock_extent(&BTRFS_I(inode)->io_tree, | 2979 | unlock_extent(&BTRFS_I(inode)->io_tree, |
2669 | page_start, page_end, GFP_NOFS); | 2980 | page_start, page_end, GFP_NOFS); |
@@ -2671,20 +2982,11 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2671 | page_cache_release(page); | 2982 | page_cache_release(page); |
2672 | 2983 | ||
2673 | index++; | 2984 | index++; |
2674 | if (nr < cluster->nr && | 2985 | balance_dirty_pages_ratelimited(inode->i_mapping); |
2675 | page_end + 1 + offset == cluster->boundary[nr]) { | 2986 | btrfs_throttle(BTRFS_I(inode)->root); |
2676 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2677 | dirty_page); | ||
2678 | dirty_page = 0; | ||
2679 | } | ||
2680 | } | ||
2681 | if (dirty_page) { | ||
2682 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2683 | dirty_page); | ||
2684 | } | 2987 | } |
2685 | WARN_ON(nr != cluster->nr); | 2988 | WARN_ON(nr != cluster->nr); |
2686 | out_unlock: | 2989 | out: |
2687 | mutex_unlock(&inode->i_mutex); | ||
2688 | kfree(ra); | 2990 | kfree(ra); |
2689 | return ret; | 2991 | return ret; |
2690 | } | 2992 | } |
@@ -2870,9 +3172,6 @@ out: | |||
2870 | static int block_use_full_backref(struct reloc_control *rc, | 3172 | static int block_use_full_backref(struct reloc_control *rc, |
2871 | struct extent_buffer *eb) | 3173 | struct extent_buffer *eb) |
2872 | { | 3174 | { |
2873 | struct btrfs_path *path; | ||
2874 | struct btrfs_extent_item *ei; | ||
2875 | struct btrfs_key key; | ||
2876 | u64 flags; | 3175 | u64 flags; |
2877 | int ret; | 3176 | int ret; |
2878 | 3177 | ||
@@ -2880,28 +3179,14 @@ static int block_use_full_backref(struct reloc_control *rc, | |||
2880 | btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) | 3179 | btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) |
2881 | return 1; | 3180 | return 1; |
2882 | 3181 | ||
2883 | path = btrfs_alloc_path(); | 3182 | ret = btrfs_lookup_extent_info(NULL, rc->extent_root, |
2884 | BUG_ON(!path); | 3183 | eb->start, eb->len, NULL, &flags); |
2885 | |||
2886 | key.objectid = eb->start; | ||
2887 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
2888 | key.offset = eb->len; | ||
2889 | |||
2890 | path->search_commit_root = 1; | ||
2891 | path->skip_locking = 1; | ||
2892 | ret = btrfs_search_slot(NULL, rc->extent_root, | ||
2893 | &key, path, 0, 0); | ||
2894 | BUG_ON(ret); | 3184 | BUG_ON(ret); |
2895 | 3185 | ||
2896 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
2897 | struct btrfs_extent_item); | ||
2898 | flags = btrfs_extent_flags(path->nodes[0], ei); | ||
2899 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); | ||
2900 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) | 3186 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) |
2901 | ret = 1; | 3187 | ret = 1; |
2902 | else | 3188 | else |
2903 | ret = 0; | 3189 | ret = 0; |
2904 | btrfs_free_path(path); | ||
2905 | return ret; | 3190 | return ret; |
2906 | } | 3191 | } |
2907 | 3192 | ||
@@ -3074,22 +3359,10 @@ int add_data_references(struct reloc_control *rc, | |||
3074 | struct btrfs_extent_inline_ref *iref; | 3359 | struct btrfs_extent_inline_ref *iref; |
3075 | unsigned long ptr; | 3360 | unsigned long ptr; |
3076 | unsigned long end; | 3361 | unsigned long end; |
3077 | u32 blocksize; | 3362 | u32 blocksize = btrfs_level_size(rc->extent_root, 0); |
3078 | int ret; | 3363 | int ret; |
3079 | int err = 0; | 3364 | int err = 0; |
3080 | 3365 | ||
3081 | ret = get_new_location(rc->data_inode, NULL, extent_key->objectid, | ||
3082 | extent_key->offset); | ||
3083 | BUG_ON(ret < 0); | ||
3084 | if (ret > 0) { | ||
3085 | /* the relocated data is fragmented */ | ||
3086 | rc->extents_skipped++; | ||
3087 | btrfs_release_path(rc->extent_root, path); | ||
3088 | return 0; | ||
3089 | } | ||
3090 | |||
3091 | blocksize = btrfs_level_size(rc->extent_root, 0); | ||
3092 | |||
3093 | eb = path->nodes[0]; | 3366 | eb = path->nodes[0]; |
3094 | ptr = btrfs_item_ptr_offset(eb, path->slots[0]); | 3367 | ptr = btrfs_item_ptr_offset(eb, path->slots[0]); |
3095 | end = ptr + btrfs_item_size_nr(eb, path->slots[0]); | 3368 | end = ptr + btrfs_item_size_nr(eb, path->slots[0]); |
@@ -3170,7 +3443,8 @@ int add_data_references(struct reloc_control *rc, | |||
3170 | */ | 3443 | */ |
3171 | static noinline_for_stack | 3444 | static noinline_for_stack |
3172 | int find_next_extent(struct btrfs_trans_handle *trans, | 3445 | int find_next_extent(struct btrfs_trans_handle *trans, |
3173 | struct reloc_control *rc, struct btrfs_path *path) | 3446 | struct reloc_control *rc, struct btrfs_path *path, |
3447 | struct btrfs_key *extent_key) | ||
3174 | { | 3448 | { |
3175 | struct btrfs_key key; | 3449 | struct btrfs_key key; |
3176 | struct extent_buffer *leaf; | 3450 | struct extent_buffer *leaf; |
@@ -3225,6 +3499,7 @@ next: | |||
3225 | rc->search_start = end + 1; | 3499 | rc->search_start = end + 1; |
3226 | } else { | 3500 | } else { |
3227 | rc->search_start = key.objectid + key.offset; | 3501 | rc->search_start = key.objectid + key.offset; |
3502 | memcpy(extent_key, &key, sizeof(key)); | ||
3228 | return 0; | 3503 | return 0; |
3229 | } | 3504 | } |
3230 | } | 3505 | } |
@@ -3262,12 +3537,49 @@ static int check_extent_flags(u64 flags) | |||
3262 | return 0; | 3537 | return 0; |
3263 | } | 3538 | } |
3264 | 3539 | ||
3540 | static noinline_for_stack | ||
3541 | int prepare_to_relocate(struct reloc_control *rc) | ||
3542 | { | ||
3543 | struct btrfs_trans_handle *trans; | ||
3544 | int ret; | ||
3545 | |||
3546 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root); | ||
3547 | if (!rc->block_rsv) | ||
3548 | return -ENOMEM; | ||
3549 | |||
3550 | /* | ||
3551 | * reserve some space for creating reloc trees. | ||
3552 | * btrfs_init_reloc_root will use them when there | ||
3553 | * is no reservation in transaction handle. | ||
3554 | */ | ||
3555 | ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, | ||
3556 | rc->extent_root->nodesize * 256, | ||
3557 | &rc->block_rsv_retries); | ||
3558 | if (ret) | ||
3559 | return ret; | ||
3560 | |||
3561 | rc->block_rsv->refill_used = 1; | ||
3562 | btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv); | ||
3563 | |||
3564 | memset(&rc->cluster, 0, sizeof(rc->cluster)); | ||
3565 | rc->search_start = rc->block_group->key.objectid; | ||
3566 | rc->extents_found = 0; | ||
3567 | rc->nodes_relocated = 0; | ||
3568 | rc->merging_rsv_size = 0; | ||
3569 | rc->block_rsv_retries = 0; | ||
3570 | |||
3571 | rc->create_reloc_tree = 1; | ||
3572 | set_reloc_control(rc); | ||
3573 | |||
3574 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
3575 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3576 | return 0; | ||
3577 | } | ||
3265 | 3578 | ||
3266 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | 3579 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) |
3267 | { | 3580 | { |
3268 | struct rb_root blocks = RB_ROOT; | 3581 | struct rb_root blocks = RB_ROOT; |
3269 | struct btrfs_key key; | 3582 | struct btrfs_key key; |
3270 | struct file_extent_cluster *cluster; | ||
3271 | struct btrfs_trans_handle *trans = NULL; | 3583 | struct btrfs_trans_handle *trans = NULL; |
3272 | struct btrfs_path *path; | 3584 | struct btrfs_path *path; |
3273 | struct btrfs_extent_item *ei; | 3585 | struct btrfs_extent_item *ei; |
@@ -3277,33 +3589,25 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3277 | int ret; | 3589 | int ret; |
3278 | int err = 0; | 3590 | int err = 0; |
3279 | 3591 | ||
3280 | cluster = kzalloc(sizeof(*cluster), GFP_NOFS); | ||
3281 | if (!cluster) | ||
3282 | return -ENOMEM; | ||
3283 | |||
3284 | path = btrfs_alloc_path(); | 3592 | path = btrfs_alloc_path(); |
3285 | if (!path) { | 3593 | if (!path) |
3286 | kfree(cluster); | ||
3287 | return -ENOMEM; | 3594 | return -ENOMEM; |
3288 | } | ||
3289 | |||
3290 | rc->extents_found = 0; | ||
3291 | rc->extents_skipped = 0; | ||
3292 | |||
3293 | rc->search_start = rc->block_group->key.objectid; | ||
3294 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | ||
3295 | GFP_NOFS); | ||
3296 | |||
3297 | rc->create_reloc_root = 1; | ||
3298 | set_reloc_control(rc); | ||
3299 | 3595 | ||
3300 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3596 | ret = prepare_to_relocate(rc); |
3301 | btrfs_commit_transaction(trans, rc->extent_root); | 3597 | if (ret) { |
3598 | err = ret; | ||
3599 | goto out_free; | ||
3600 | } | ||
3302 | 3601 | ||
3303 | while (1) { | 3602 | while (1) { |
3304 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3603 | trans = btrfs_start_transaction(rc->extent_root, 0); |
3604 | |||
3605 | if (update_backref_cache(trans, &rc->backref_cache)) { | ||
3606 | btrfs_end_transaction(trans, rc->extent_root); | ||
3607 | continue; | ||
3608 | } | ||
3305 | 3609 | ||
3306 | ret = find_next_extent(trans, rc, path); | 3610 | ret = find_next_extent(trans, rc, path, &key); |
3307 | if (ret < 0) | 3611 | if (ret < 0) |
3308 | err = ret; | 3612 | err = ret; |
3309 | if (ret != 0) | 3613 | if (ret != 0) |
@@ -3313,9 +3617,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3313 | 3617 | ||
3314 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3618 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
3315 | struct btrfs_extent_item); | 3619 | struct btrfs_extent_item); |
3316 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | 3620 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); |
3317 | item_size = btrfs_item_size_nr(path->nodes[0], | ||
3318 | path->slots[0]); | ||
3319 | if (item_size >= sizeof(*ei)) { | 3621 | if (item_size >= sizeof(*ei)) { |
3320 | flags = btrfs_extent_flags(path->nodes[0], ei); | 3622 | flags = btrfs_extent_flags(path->nodes[0], ei); |
3321 | ret = check_extent_flags(flags); | 3623 | ret = check_extent_flags(flags); |
@@ -3356,73 +3658,100 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3356 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 3658 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
3357 | ret = add_tree_block(rc, &key, path, &blocks); | 3659 | ret = add_tree_block(rc, &key, path, &blocks); |
3358 | } else if (rc->stage == UPDATE_DATA_PTRS && | 3660 | } else if (rc->stage == UPDATE_DATA_PTRS && |
3359 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3661 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
3360 | ret = add_data_references(rc, &key, path, &blocks); | 3662 | ret = add_data_references(rc, &key, path, &blocks); |
3361 | } else { | 3663 | } else { |
3362 | btrfs_release_path(rc->extent_root, path); | 3664 | btrfs_release_path(rc->extent_root, path); |
3363 | ret = 0; | 3665 | ret = 0; |
3364 | } | 3666 | } |
3365 | if (ret < 0) { | 3667 | if (ret < 0) { |
3366 | err = 0; | 3668 | err = ret; |
3367 | break; | 3669 | break; |
3368 | } | 3670 | } |
3369 | 3671 | ||
3370 | if (!RB_EMPTY_ROOT(&blocks)) { | 3672 | if (!RB_EMPTY_ROOT(&blocks)) { |
3371 | ret = relocate_tree_blocks(trans, rc, &blocks); | 3673 | ret = relocate_tree_blocks(trans, rc, &blocks); |
3372 | if (ret < 0) { | 3674 | if (ret < 0) { |
3675 | if (ret != -EAGAIN) { | ||
3676 | err = ret; | ||
3677 | break; | ||
3678 | } | ||
3679 | rc->extents_found--; | ||
3680 | rc->search_start = key.objectid; | ||
3681 | } | ||
3682 | } | ||
3683 | |||
3684 | ret = btrfs_block_rsv_check(trans, rc->extent_root, | ||
3685 | rc->block_rsv, 0, 5); | ||
3686 | if (ret < 0) { | ||
3687 | if (ret != -EAGAIN) { | ||
3373 | err = ret; | 3688 | err = ret; |
3689 | WARN_ON(1); | ||
3374 | break; | 3690 | break; |
3375 | } | 3691 | } |
3692 | rc->commit_transaction = 1; | ||
3376 | } | 3693 | } |
3377 | 3694 | ||
3378 | nr = trans->blocks_used; | 3695 | if (rc->commit_transaction) { |
3379 | btrfs_end_transaction(trans, rc->extent_root); | 3696 | rc->commit_transaction = 0; |
3697 | ret = btrfs_commit_transaction(trans, rc->extent_root); | ||
3698 | BUG_ON(ret); | ||
3699 | } else { | ||
3700 | nr = trans->blocks_used; | ||
3701 | btrfs_end_transaction_throttle(trans, rc->extent_root); | ||
3702 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
3703 | } | ||
3380 | trans = NULL; | 3704 | trans = NULL; |
3381 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
3382 | 3705 | ||
3383 | if (rc->stage == MOVE_DATA_EXTENTS && | 3706 | if (rc->stage == MOVE_DATA_EXTENTS && |
3384 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3707 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
3385 | rc->found_file_extent = 1; | 3708 | rc->found_file_extent = 1; |
3386 | ret = relocate_data_extent(rc->data_inode, | 3709 | ret = relocate_data_extent(rc->data_inode, |
3387 | &key, cluster); | 3710 | &key, &rc->cluster); |
3388 | if (ret < 0) { | 3711 | if (ret < 0) { |
3389 | err = ret; | 3712 | err = ret; |
3390 | break; | 3713 | break; |
3391 | } | 3714 | } |
3392 | } | 3715 | } |
3393 | } | 3716 | } |
3394 | btrfs_free_path(path); | 3717 | |
3718 | btrfs_release_path(rc->extent_root, path); | ||
3719 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | ||
3720 | GFP_NOFS); | ||
3395 | 3721 | ||
3396 | if (trans) { | 3722 | if (trans) { |
3397 | nr = trans->blocks_used; | 3723 | nr = trans->blocks_used; |
3398 | btrfs_end_transaction(trans, rc->extent_root); | 3724 | btrfs_end_transaction_throttle(trans, rc->extent_root); |
3399 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3725 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
3400 | } | 3726 | } |
3401 | 3727 | ||
3402 | if (!err) { | 3728 | if (!err) { |
3403 | ret = relocate_file_extent_cluster(rc->data_inode, cluster); | 3729 | ret = relocate_file_extent_cluster(rc->data_inode, |
3730 | &rc->cluster); | ||
3404 | if (ret < 0) | 3731 | if (ret < 0) |
3405 | err = ret; | 3732 | err = ret; |
3406 | } | 3733 | } |
3407 | 3734 | ||
3408 | kfree(cluster); | 3735 | rc->create_reloc_tree = 0; |
3736 | set_reloc_control(rc); | ||
3409 | 3737 | ||
3410 | rc->create_reloc_root = 0; | 3738 | backref_cache_cleanup(&rc->backref_cache); |
3411 | smp_mb(); | 3739 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); |
3412 | 3740 | ||
3413 | if (rc->extents_found > 0) { | 3741 | err = prepare_to_merge(rc, err); |
3414 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3415 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3416 | } | ||
3417 | 3742 | ||
3418 | merge_reloc_roots(rc); | 3743 | merge_reloc_roots(rc); |
3419 | 3744 | ||
3745 | rc->merge_reloc_tree = 0; | ||
3420 | unset_reloc_control(rc); | 3746 | unset_reloc_control(rc); |
3747 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); | ||
3421 | 3748 | ||
3422 | /* get rid of pinned extents */ | 3749 | /* get rid of pinned extents */ |
3423 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3750 | trans = btrfs_join_transaction(rc->extent_root, 1); |
3424 | btrfs_commit_transaction(trans, rc->extent_root); | 3751 | btrfs_commit_transaction(trans, rc->extent_root); |
3425 | 3752 | out_free: | |
3753 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); | ||
3754 | btrfs_free_path(path); | ||
3426 | return err; | 3755 | return err; |
3427 | } | 3756 | } |
3428 | 3757 | ||
@@ -3448,7 +3777,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
3448 | btrfs_set_inode_generation(leaf, item, 1); | 3777 | btrfs_set_inode_generation(leaf, item, 1); |
3449 | btrfs_set_inode_size(leaf, item, 0); | 3778 | btrfs_set_inode_size(leaf, item, 0); |
3450 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | 3779 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); |
3451 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | 3780 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | |
3781 | BTRFS_INODE_PREALLOC); | ||
3452 | btrfs_mark_buffer_dirty(leaf); | 3782 | btrfs_mark_buffer_dirty(leaf); |
3453 | btrfs_release_path(root, path); | 3783 | btrfs_release_path(root, path); |
3454 | out: | 3784 | out: |
@@ -3460,8 +3790,9 @@ out: | |||
3460 | * helper to create inode for data relocation. | 3790 | * helper to create inode for data relocation. |
3461 | * the inode is in data relocation tree and its link count is 0 | 3791 | * the inode is in data relocation tree and its link count is 0 |
3462 | */ | 3792 | */ |
3463 | static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | 3793 | static noinline_for_stack |
3464 | struct btrfs_block_group_cache *group) | 3794 | struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, |
3795 | struct btrfs_block_group_cache *group) | ||
3465 | { | 3796 | { |
3466 | struct inode *inode = NULL; | 3797 | struct inode *inode = NULL; |
3467 | struct btrfs_trans_handle *trans; | 3798 | struct btrfs_trans_handle *trans; |
@@ -3475,8 +3806,9 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
3475 | if (IS_ERR(root)) | 3806 | if (IS_ERR(root)) |
3476 | return ERR_CAST(root); | 3807 | return ERR_CAST(root); |
3477 | 3808 | ||
3478 | trans = btrfs_start_transaction(root, 1); | 3809 | trans = btrfs_start_transaction(root, 6); |
3479 | BUG_ON(!trans); | 3810 | if (IS_ERR(trans)) |
3811 | return ERR_CAST(trans); | ||
3480 | 3812 | ||
3481 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | 3813 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); |
3482 | if (err) | 3814 | if (err) |
@@ -3496,7 +3828,6 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
3496 | out: | 3828 | out: |
3497 | nr = trans->blocks_used; | 3829 | nr = trans->blocks_used; |
3498 | btrfs_end_transaction(trans, root); | 3830 | btrfs_end_transaction(trans, root); |
3499 | |||
3500 | btrfs_btree_balance_dirty(root, nr); | 3831 | btrfs_btree_balance_dirty(root, nr); |
3501 | if (err) { | 3832 | if (err) { |
3502 | if (inode) | 3833 | if (inode) |
@@ -3506,6 +3837,21 @@ out: | |||
3506 | return inode; | 3837 | return inode; |
3507 | } | 3838 | } |
3508 | 3839 | ||
3840 | static struct reloc_control *alloc_reloc_control(void) | ||
3841 | { | ||
3842 | struct reloc_control *rc; | ||
3843 | |||
3844 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | ||
3845 | if (!rc) | ||
3846 | return NULL; | ||
3847 | |||
3848 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3849 | backref_cache_init(&rc->backref_cache); | ||
3850 | mapping_tree_init(&rc->reloc_root_tree); | ||
3851 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | ||
3852 | return rc; | ||
3853 | } | ||
3854 | |||
3509 | /* | 3855 | /* |
3510 | * function to relocate all extents in a block group. | 3856 | * function to relocate all extents in a block group. |
3511 | */ | 3857 | */ |
@@ -3514,24 +3860,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3514 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3860 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
3515 | struct reloc_control *rc; | 3861 | struct reloc_control *rc; |
3516 | int ret; | 3862 | int ret; |
3863 | int rw = 0; | ||
3517 | int err = 0; | 3864 | int err = 0; |
3518 | 3865 | ||
3519 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 3866 | rc = alloc_reloc_control(); |
3520 | if (!rc) | 3867 | if (!rc) |
3521 | return -ENOMEM; | 3868 | return -ENOMEM; |
3522 | 3869 | ||
3523 | mapping_tree_init(&rc->reloc_root_tree); | 3870 | rc->extent_root = extent_root; |
3524 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | ||
3525 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3526 | 3871 | ||
3527 | rc->block_group = btrfs_lookup_block_group(fs_info, group_start); | 3872 | rc->block_group = btrfs_lookup_block_group(fs_info, group_start); |
3528 | BUG_ON(!rc->block_group); | 3873 | BUG_ON(!rc->block_group); |
3529 | 3874 | ||
3530 | btrfs_init_workers(&rc->workers, "relocate", | 3875 | if (!rc->block_group->ro) { |
3531 | fs_info->thread_pool_size, NULL); | 3876 | ret = btrfs_set_block_group_ro(extent_root, rc->block_group); |
3532 | 3877 | if (ret) { | |
3533 | rc->extent_root = extent_root; | 3878 | err = ret; |
3534 | btrfs_prepare_block_group_relocation(extent_root, rc->block_group); | 3879 | goto out; |
3880 | } | ||
3881 | rw = 1; | ||
3882 | } | ||
3535 | 3883 | ||
3536 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); | 3884 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); |
3537 | if (IS_ERR(rc->data_inode)) { | 3885 | if (IS_ERR(rc->data_inode)) { |
@@ -3548,9 +3896,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3548 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); | 3896 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); |
3549 | 3897 | ||
3550 | while (1) { | 3898 | while (1) { |
3551 | rc->extents_found = 0; | ||
3552 | rc->extents_skipped = 0; | ||
3553 | |||
3554 | mutex_lock(&fs_info->cleaner_mutex); | 3899 | mutex_lock(&fs_info->cleaner_mutex); |
3555 | 3900 | ||
3556 | btrfs_clean_old_snapshots(fs_info->tree_root); | 3901 | btrfs_clean_old_snapshots(fs_info->tree_root); |
@@ -3559,7 +3904,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3559 | mutex_unlock(&fs_info->cleaner_mutex); | 3904 | mutex_unlock(&fs_info->cleaner_mutex); |
3560 | if (ret < 0) { | 3905 | if (ret < 0) { |
3561 | err = ret; | 3906 | err = ret; |
3562 | break; | 3907 | goto out; |
3563 | } | 3908 | } |
3564 | 3909 | ||
3565 | if (rc->extents_found == 0) | 3910 | if (rc->extents_found == 0) |
@@ -3573,18 +3918,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3573 | invalidate_mapping_pages(rc->data_inode->i_mapping, | 3918 | invalidate_mapping_pages(rc->data_inode->i_mapping, |
3574 | 0, -1); | 3919 | 0, -1); |
3575 | rc->stage = UPDATE_DATA_PTRS; | 3920 | rc->stage = UPDATE_DATA_PTRS; |
3576 | } else if (rc->stage == UPDATE_DATA_PTRS && | ||
3577 | rc->extents_skipped >= rc->extents_found) { | ||
3578 | iput(rc->data_inode); | ||
3579 | rc->data_inode = create_reloc_inode(fs_info, | ||
3580 | rc->block_group); | ||
3581 | if (IS_ERR(rc->data_inode)) { | ||
3582 | err = PTR_ERR(rc->data_inode); | ||
3583 | rc->data_inode = NULL; | ||
3584 | break; | ||
3585 | } | ||
3586 | rc->stage = MOVE_DATA_EXTENTS; | ||
3587 | rc->found_file_extent = 0; | ||
3588 | } | 3921 | } |
3589 | } | 3922 | } |
3590 | 3923 | ||
@@ -3597,8 +3930,9 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3597 | WARN_ON(rc->block_group->reserved > 0); | 3930 | WARN_ON(rc->block_group->reserved > 0); |
3598 | WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); | 3931 | WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); |
3599 | out: | 3932 | out: |
3933 | if (err && rw) | ||
3934 | btrfs_set_block_group_rw(extent_root, rc->block_group); | ||
3600 | iput(rc->data_inode); | 3935 | iput(rc->data_inode); |
3601 | btrfs_stop_workers(&rc->workers); | ||
3602 | btrfs_put_block_group(rc->block_group); | 3936 | btrfs_put_block_group(rc->block_group); |
3603 | kfree(rc); | 3937 | kfree(rc); |
3604 | return err; | 3938 | return err; |
@@ -3609,7 +3943,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) | |||
3609 | struct btrfs_trans_handle *trans; | 3943 | struct btrfs_trans_handle *trans; |
3610 | int ret; | 3944 | int ret; |
3611 | 3945 | ||
3612 | trans = btrfs_start_transaction(root->fs_info->tree_root, 1); | 3946 | trans = btrfs_start_transaction(root->fs_info->tree_root, 0); |
3613 | 3947 | ||
3614 | memset(&root->root_item.drop_progress, 0, | 3948 | memset(&root->root_item.drop_progress, 0, |
3615 | sizeof(root->root_item.drop_progress)); | 3949 | sizeof(root->root_item.drop_progress)); |
@@ -3702,20 +4036,20 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
3702 | if (list_empty(&reloc_roots)) | 4036 | if (list_empty(&reloc_roots)) |
3703 | goto out; | 4037 | goto out; |
3704 | 4038 | ||
3705 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 4039 | rc = alloc_reloc_control(); |
3706 | if (!rc) { | 4040 | if (!rc) { |
3707 | err = -ENOMEM; | 4041 | err = -ENOMEM; |
3708 | goto out; | 4042 | goto out; |
3709 | } | 4043 | } |
3710 | 4044 | ||
3711 | mapping_tree_init(&rc->reloc_root_tree); | ||
3712 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3713 | btrfs_init_workers(&rc->workers, "relocate", | ||
3714 | root->fs_info->thread_pool_size, NULL); | ||
3715 | rc->extent_root = root->fs_info->extent_root; | 4045 | rc->extent_root = root->fs_info->extent_root; |
3716 | 4046 | ||
3717 | set_reloc_control(rc); | 4047 | set_reloc_control(rc); |
3718 | 4048 | ||
4049 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
4050 | |||
4051 | rc->merge_reloc_tree = 1; | ||
4052 | |||
3719 | while (!list_empty(&reloc_roots)) { | 4053 | while (!list_empty(&reloc_roots)) { |
3720 | reloc_root = list_entry(reloc_roots.next, | 4054 | reloc_root = list_entry(reloc_roots.next, |
3721 | struct btrfs_root, root_list); | 4055 | struct btrfs_root, root_list); |
@@ -3735,20 +4069,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
3735 | fs_root->reloc_root = reloc_root; | 4069 | fs_root->reloc_root = reloc_root; |
3736 | } | 4070 | } |
3737 | 4071 | ||
3738 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3739 | btrfs_commit_transaction(trans, rc->extent_root); | 4072 | btrfs_commit_transaction(trans, rc->extent_root); |
3740 | 4073 | ||
3741 | merge_reloc_roots(rc); | 4074 | merge_reloc_roots(rc); |
3742 | 4075 | ||
3743 | unset_reloc_control(rc); | 4076 | unset_reloc_control(rc); |
3744 | 4077 | ||
3745 | trans = btrfs_start_transaction(rc->extent_root, 1); | 4078 | trans = btrfs_join_transaction(rc->extent_root, 1); |
3746 | btrfs_commit_transaction(trans, rc->extent_root); | 4079 | btrfs_commit_transaction(trans, rc->extent_root); |
3747 | out: | 4080 | out: |
3748 | if (rc) { | 4081 | kfree(rc); |
3749 | btrfs_stop_workers(&rc->workers); | ||
3750 | kfree(rc); | ||
3751 | } | ||
3752 | while (!list_empty(&reloc_roots)) { | 4082 | while (!list_empty(&reloc_roots)) { |
3753 | reloc_root = list_entry(reloc_roots.next, | 4083 | reloc_root = list_entry(reloc_roots.next, |
3754 | struct btrfs_root, root_list); | 4084 | struct btrfs_root, root_list); |
@@ -3814,3 +4144,130 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | |||
3814 | btrfs_put_ordered_extent(ordered); | 4144 | btrfs_put_ordered_extent(ordered); |
3815 | return 0; | 4145 | return 0; |
3816 | } | 4146 | } |
4147 | |||
4148 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | ||
4149 | struct btrfs_root *root, struct extent_buffer *buf, | ||
4150 | struct extent_buffer *cow) | ||
4151 | { | ||
4152 | struct reloc_control *rc; | ||
4153 | struct backref_node *node; | ||
4154 | int first_cow = 0; | ||
4155 | int level; | ||
4156 | int ret; | ||
4157 | |||
4158 | rc = root->fs_info->reloc_ctl; | ||
4159 | if (!rc) | ||
4160 | return; | ||
4161 | |||
4162 | BUG_ON(rc->stage == UPDATE_DATA_PTRS && | ||
4163 | root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); | ||
4164 | |||
4165 | level = btrfs_header_level(buf); | ||
4166 | if (btrfs_header_generation(buf) <= | ||
4167 | btrfs_root_last_snapshot(&root->root_item)) | ||
4168 | first_cow = 1; | ||
4169 | |||
4170 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && | ||
4171 | rc->create_reloc_tree) { | ||
4172 | WARN_ON(!first_cow && level == 0); | ||
4173 | |||
4174 | node = rc->backref_cache.path[level]; | ||
4175 | BUG_ON(node->bytenr != buf->start && | ||
4176 | node->new_bytenr != buf->start); | ||
4177 | |||
4178 | drop_node_buffer(node); | ||
4179 | extent_buffer_get(cow); | ||
4180 | node->eb = cow; | ||
4181 | node->new_bytenr = cow->start; | ||
4182 | |||
4183 | if (!node->pending) { | ||
4184 | list_move_tail(&node->list, | ||
4185 | &rc->backref_cache.pending[level]); | ||
4186 | node->pending = 1; | ||
4187 | } | ||
4188 | |||
4189 | if (first_cow) | ||
4190 | __mark_block_processed(rc, node); | ||
4191 | |||
4192 | if (first_cow && level > 0) | ||
4193 | rc->nodes_relocated += buf->len; | ||
4194 | } | ||
4195 | |||
4196 | if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { | ||
4197 | ret = replace_file_extents(trans, rc, root, cow); | ||
4198 | BUG_ON(ret); | ||
4199 | } | ||
4200 | } | ||
4201 | |||
4202 | /* | ||
4203 | * called before creating snapshot. it calculates metadata reservation | ||
4204 | * requried for relocating tree blocks in the snapshot | ||
4205 | */ | ||
4206 | void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, | ||
4207 | struct btrfs_pending_snapshot *pending, | ||
4208 | u64 *bytes_to_reserve) | ||
4209 | { | ||
4210 | struct btrfs_root *root; | ||
4211 | struct reloc_control *rc; | ||
4212 | |||
4213 | root = pending->root; | ||
4214 | if (!root->reloc_root) | ||
4215 | return; | ||
4216 | |||
4217 | rc = root->fs_info->reloc_ctl; | ||
4218 | if (!rc->merge_reloc_tree) | ||
4219 | return; | ||
4220 | |||
4221 | root = root->reloc_root; | ||
4222 | BUG_ON(btrfs_root_refs(&root->root_item) == 0); | ||
4223 | /* | ||
4224 | * relocation is in the stage of merging trees. the space | ||
4225 | * used by merging a reloc tree is twice the size of | ||
4226 | * relocated tree nodes in the worst case. half for cowing | ||
4227 | * the reloc tree, half for cowing the fs tree. the space | ||
4228 | * used by cowing the reloc tree will be freed after the | ||
4229 | * tree is dropped. if we create snapshot, cowing the fs | ||
4230 | * tree may use more space than it frees. so we need | ||
4231 | * reserve extra space. | ||
4232 | */ | ||
4233 | *bytes_to_reserve += rc->nodes_relocated; | ||
4234 | } | ||
4235 | |||
4236 | /* | ||
4237 | * called after snapshot is created. migrate block reservation | ||
4238 | * and create reloc root for the newly created snapshot | ||
4239 | */ | ||
4240 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | ||
4241 | struct btrfs_pending_snapshot *pending) | ||
4242 | { | ||
4243 | struct btrfs_root *root = pending->root; | ||
4244 | struct btrfs_root *reloc_root; | ||
4245 | struct btrfs_root *new_root; | ||
4246 | struct reloc_control *rc; | ||
4247 | int ret; | ||
4248 | |||
4249 | if (!root->reloc_root) | ||
4250 | return; | ||
4251 | |||
4252 | rc = root->fs_info->reloc_ctl; | ||
4253 | rc->merging_rsv_size += rc->nodes_relocated; | ||
4254 | |||
4255 | if (rc->merge_reloc_tree) { | ||
4256 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
4257 | rc->block_rsv, | ||
4258 | rc->nodes_relocated); | ||
4259 | BUG_ON(ret); | ||
4260 | } | ||
4261 | |||
4262 | new_root = pending->snap; | ||
4263 | reloc_root = create_reloc_root(trans, root->reloc_root, | ||
4264 | new_root->root_key.objectid); | ||
4265 | |||
4266 | __add_reloc_root(reloc_root); | ||
4267 | new_root->reloc_root = reloc_root; | ||
4268 | |||
4269 | if (rc->create_reloc_tree) { | ||
4270 | ret = clone_backref_node(trans, rc, root, reloc_root); | ||
4271 | BUG_ON(ret); | ||
4272 | } | ||
4273 | } | ||
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 67fa2d29d663..b91ccd972644 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -259,6 +259,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
259 | struct extent_buffer *leaf; | 259 | struct extent_buffer *leaf; |
260 | struct btrfs_path *path; | 260 | struct btrfs_path *path; |
261 | struct btrfs_key key; | 261 | struct btrfs_key key; |
262 | struct btrfs_key root_key; | ||
263 | struct btrfs_root *root; | ||
262 | int err = 0; | 264 | int err = 0; |
263 | int ret; | 265 | int ret; |
264 | 266 | ||
@@ -270,6 +272,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
270 | key.type = BTRFS_ORPHAN_ITEM_KEY; | 272 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
271 | key.offset = 0; | 273 | key.offset = 0; |
272 | 274 | ||
275 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
276 | root_key.offset = (u64)-1; | ||
277 | |||
273 | while (1) { | 278 | while (1) { |
274 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); | 279 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); |
275 | if (ret < 0) { | 280 | if (ret < 0) { |
@@ -294,13 +299,25 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
294 | key.type != BTRFS_ORPHAN_ITEM_KEY) | 299 | key.type != BTRFS_ORPHAN_ITEM_KEY) |
295 | break; | 300 | break; |
296 | 301 | ||
297 | ret = btrfs_find_dead_roots(tree_root, key.offset); | 302 | root_key.objectid = key.offset; |
298 | if (ret) { | 303 | key.offset++; |
304 | |||
305 | root = btrfs_read_fs_root_no_name(tree_root->fs_info, | ||
306 | &root_key); | ||
307 | if (!IS_ERR(root)) | ||
308 | continue; | ||
309 | |||
310 | ret = PTR_ERR(root); | ||
311 | if (ret != -ENOENT) { | ||
299 | err = ret; | 312 | err = ret; |
300 | break; | 313 | break; |
301 | } | 314 | } |
302 | 315 | ||
303 | key.offset++; | 316 | ret = btrfs_find_dead_roots(tree_root, root_key.objectid); |
317 | if (ret) { | ||
318 | err = ret; | ||
319 | break; | ||
320 | } | ||
304 | } | 321 | } |
305 | 322 | ||
306 | btrfs_free_path(path); | 323 | btrfs_free_path(path); |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1866dff0538e..d34b2dfc9628 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -498,7 +498,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
498 | btrfs_start_delalloc_inodes(root, 0); | 498 | btrfs_start_delalloc_inodes(root, 0); |
499 | btrfs_wait_ordered_extents(root, 0, 0); | 499 | btrfs_wait_ordered_extents(root, 0, 0); |
500 | 500 | ||
501 | trans = btrfs_start_transaction(root, 1); | 501 | trans = btrfs_start_transaction(root, 0); |
502 | ret = btrfs_commit_transaction(trans, root); | 502 | ret = btrfs_commit_transaction(trans, root); |
503 | return ret; | 503 | return ret; |
504 | } | 504 | } |
@@ -694,11 +694,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
694 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) | 694 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) |
695 | return -EINVAL; | 695 | return -EINVAL; |
696 | 696 | ||
697 | /* recover relocation */ | 697 | ret = btrfs_cleanup_fs_roots(root->fs_info); |
698 | ret = btrfs_recover_relocation(root); | ||
699 | WARN_ON(ret); | 698 | WARN_ON(ret); |
700 | 699 | ||
701 | ret = btrfs_cleanup_fs_roots(root->fs_info); | 700 | /* recover relocation */ |
701 | ret = btrfs_recover_relocation(root); | ||
702 | WARN_ON(ret); | 702 | WARN_ON(ret); |
703 | 703 | ||
704 | sb->s_flags &= ~MS_RDONLY; | 704 | sb->s_flags &= ~MS_RDONLY; |
@@ -714,34 +714,18 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
714 | struct list_head *head = &root->fs_info->space_info; | 714 | struct list_head *head = &root->fs_info->space_info; |
715 | struct btrfs_space_info *found; | 715 | struct btrfs_space_info *found; |
716 | u64 total_used = 0; | 716 | u64 total_used = 0; |
717 | u64 data_used = 0; | ||
718 | int bits = dentry->d_sb->s_blocksize_bits; | 717 | int bits = dentry->d_sb->s_blocksize_bits; |
719 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 718 | __be32 *fsid = (__be32 *)root->fs_info->fsid; |
720 | 719 | ||
721 | rcu_read_lock(); | 720 | rcu_read_lock(); |
722 | list_for_each_entry_rcu(found, head, list) { | 721 | list_for_each_entry_rcu(found, head, list) |
723 | if (found->flags & (BTRFS_BLOCK_GROUP_DUP| | 722 | total_used += found->disk_used; |
724 | BTRFS_BLOCK_GROUP_RAID10| | ||
725 | BTRFS_BLOCK_GROUP_RAID1)) { | ||
726 | total_used += found->bytes_used; | ||
727 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) | ||
728 | data_used += found->bytes_used; | ||
729 | else | ||
730 | data_used += found->total_bytes; | ||
731 | } | ||
732 | |||
733 | total_used += found->bytes_used; | ||
734 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) | ||
735 | data_used += found->bytes_used; | ||
736 | else | ||
737 | data_used += found->total_bytes; | ||
738 | } | ||
739 | rcu_read_unlock(); | 723 | rcu_read_unlock(); |
740 | 724 | ||
741 | buf->f_namelen = BTRFS_NAME_LEN; | 725 | buf->f_namelen = BTRFS_NAME_LEN; |
742 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | 726 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; |
743 | buf->f_bfree = buf->f_blocks - (total_used >> bits); | 727 | buf->f_bfree = buf->f_blocks - (total_used >> bits); |
744 | buf->f_bavail = buf->f_blocks - (data_used >> bits); | 728 | buf->f_bavail = buf->f_bfree; |
745 | buf->f_bsize = dentry->d_sb->s_blocksize; | 729 | buf->f_bsize = dentry->d_sb->s_blocksize; |
746 | buf->f_type = BTRFS_SUPER_MAGIC; | 730 | buf->f_type = BTRFS_SUPER_MAGIC; |
747 | 731 | ||
@@ -832,11 +816,14 @@ static const struct file_operations btrfs_ctl_fops = { | |||
832 | }; | 816 | }; |
833 | 817 | ||
834 | static struct miscdevice btrfs_misc = { | 818 | static struct miscdevice btrfs_misc = { |
835 | .minor = MISC_DYNAMIC_MINOR, | 819 | .minor = BTRFS_MINOR, |
836 | .name = "btrfs-control", | 820 | .name = "btrfs-control", |
837 | .fops = &btrfs_ctl_fops | 821 | .fops = &btrfs_ctl_fops |
838 | }; | 822 | }; |
839 | 823 | ||
824 | MODULE_ALIAS_MISCDEV(BTRFS_MINOR); | ||
825 | MODULE_ALIAS("devname:btrfs-control"); | ||
826 | |||
840 | static int btrfs_interface_init(void) | 827 | static int btrfs_interface_init(void) |
841 | { | 828 | { |
842 | return misc_register(&btrfs_misc); | 829 | return misc_register(&btrfs_misc); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2cb116099b90..66e4c66cc63b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -165,54 +165,89 @@ enum btrfs_trans_type { | |||
165 | TRANS_USERSPACE, | 165 | TRANS_USERSPACE, |
166 | }; | 166 | }; |
167 | 167 | ||
168 | static int may_wait_transaction(struct btrfs_root *root, int type) | ||
169 | { | ||
170 | if (!root->fs_info->log_root_recovering && | ||
171 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | ||
172 | type == TRANS_USERSPACE)) | ||
173 | return 1; | ||
174 | return 0; | ||
175 | } | ||
176 | |||
168 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | 177 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, |
169 | int num_blocks, int type) | 178 | u64 num_items, int type) |
170 | { | 179 | { |
171 | struct btrfs_trans_handle *h = | 180 | struct btrfs_trans_handle *h; |
172 | kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 181 | struct btrfs_transaction *cur_trans; |
182 | int retries = 0; | ||
173 | int ret; | 183 | int ret; |
184 | again: | ||
185 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | ||
186 | if (!h) | ||
187 | return ERR_PTR(-ENOMEM); | ||
174 | 188 | ||
175 | mutex_lock(&root->fs_info->trans_mutex); | 189 | mutex_lock(&root->fs_info->trans_mutex); |
176 | if (!root->fs_info->log_root_recovering && | 190 | if (may_wait_transaction(root, type)) |
177 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | ||
178 | type == TRANS_USERSPACE)) | ||
179 | wait_current_trans(root); | 191 | wait_current_trans(root); |
192 | |||
180 | ret = join_transaction(root); | 193 | ret = join_transaction(root); |
181 | BUG_ON(ret); | 194 | BUG_ON(ret); |
182 | 195 | ||
183 | h->transid = root->fs_info->running_transaction->transid; | 196 | cur_trans = root->fs_info->running_transaction; |
184 | h->transaction = root->fs_info->running_transaction; | 197 | cur_trans->use_count++; |
185 | h->blocks_reserved = num_blocks; | 198 | mutex_unlock(&root->fs_info->trans_mutex); |
199 | |||
200 | h->transid = cur_trans->transid; | ||
201 | h->transaction = cur_trans; | ||
186 | h->blocks_used = 0; | 202 | h->blocks_used = 0; |
187 | h->block_group = 0; | 203 | h->block_group = 0; |
188 | h->alloc_exclude_nr = 0; | 204 | h->bytes_reserved = 0; |
189 | h->alloc_exclude_start = 0; | ||
190 | h->delayed_ref_updates = 0; | 205 | h->delayed_ref_updates = 0; |
206 | h->block_rsv = NULL; | ||
191 | 207 | ||
192 | if (!current->journal_info && type != TRANS_USERSPACE) | 208 | smp_mb(); |
193 | current->journal_info = h; | 209 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
210 | btrfs_commit_transaction(h, root); | ||
211 | goto again; | ||
212 | } | ||
213 | |||
214 | if (num_items > 0) { | ||
215 | ret = btrfs_trans_reserve_metadata(h, root, num_items, | ||
216 | &retries); | ||
217 | if (ret == -EAGAIN) { | ||
218 | btrfs_commit_transaction(h, root); | ||
219 | goto again; | ||
220 | } | ||
221 | if (ret < 0) { | ||
222 | btrfs_end_transaction(h, root); | ||
223 | return ERR_PTR(ret); | ||
224 | } | ||
225 | } | ||
194 | 226 | ||
195 | root->fs_info->running_transaction->use_count++; | 227 | mutex_lock(&root->fs_info->trans_mutex); |
196 | record_root_in_trans(h, root); | 228 | record_root_in_trans(h, root); |
197 | mutex_unlock(&root->fs_info->trans_mutex); | 229 | mutex_unlock(&root->fs_info->trans_mutex); |
230 | |||
231 | if (!current->journal_info && type != TRANS_USERSPACE) | ||
232 | current->journal_info = h; | ||
198 | return h; | 233 | return h; |
199 | } | 234 | } |
200 | 235 | ||
201 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 236 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
202 | int num_blocks) | 237 | int num_items) |
203 | { | 238 | { |
204 | return start_transaction(root, num_blocks, TRANS_START); | 239 | return start_transaction(root, num_items, TRANS_START); |
205 | } | 240 | } |
206 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 241 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, |
207 | int num_blocks) | 242 | int num_blocks) |
208 | { | 243 | { |
209 | return start_transaction(root, num_blocks, TRANS_JOIN); | 244 | return start_transaction(root, 0, TRANS_JOIN); |
210 | } | 245 | } |
211 | 246 | ||
212 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 247 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, |
213 | int num_blocks) | 248 | int num_blocks) |
214 | { | 249 | { |
215 | return start_transaction(r, num_blocks, TRANS_USERSPACE); | 250 | return start_transaction(r, 0, TRANS_USERSPACE); |
216 | } | 251 | } |
217 | 252 | ||
218 | /* wait for a transaction commit to be fully complete */ | 253 | /* wait for a transaction commit to be fully complete */ |
@@ -286,10 +321,36 @@ void btrfs_throttle(struct btrfs_root *root) | |||
286 | mutex_unlock(&root->fs_info->trans_mutex); | 321 | mutex_unlock(&root->fs_info->trans_mutex); |
287 | } | 322 | } |
288 | 323 | ||
324 | static int should_end_transaction(struct btrfs_trans_handle *trans, | ||
325 | struct btrfs_root *root) | ||
326 | { | ||
327 | int ret; | ||
328 | ret = btrfs_block_rsv_check(trans, root, | ||
329 | &root->fs_info->global_block_rsv, 0, 5); | ||
330 | return ret ? 1 : 0; | ||
331 | } | ||
332 | |||
333 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | ||
334 | struct btrfs_root *root) | ||
335 | { | ||
336 | struct btrfs_transaction *cur_trans = trans->transaction; | ||
337 | int updates; | ||
338 | |||
339 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | ||
340 | return 1; | ||
341 | |||
342 | updates = trans->delayed_ref_updates; | ||
343 | trans->delayed_ref_updates = 0; | ||
344 | if (updates) | ||
345 | btrfs_run_delayed_refs(trans, root, updates); | ||
346 | |||
347 | return should_end_transaction(trans, root); | ||
348 | } | ||
349 | |||
289 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | 350 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, |
290 | struct btrfs_root *root, int throttle) | 351 | struct btrfs_root *root, int throttle) |
291 | { | 352 | { |
292 | struct btrfs_transaction *cur_trans; | 353 | struct btrfs_transaction *cur_trans = trans->transaction; |
293 | struct btrfs_fs_info *info = root->fs_info; | 354 | struct btrfs_fs_info *info = root->fs_info; |
294 | int count = 0; | 355 | int count = 0; |
295 | 356 | ||
@@ -313,9 +374,21 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
313 | count++; | 374 | count++; |
314 | } | 375 | } |
315 | 376 | ||
377 | btrfs_trans_release_metadata(trans, root); | ||
378 | |||
379 | if (!root->fs_info->open_ioctl_trans && | ||
380 | should_end_transaction(trans, root)) | ||
381 | trans->transaction->blocked = 1; | ||
382 | |||
383 | if (cur_trans->blocked && !cur_trans->in_commit) { | ||
384 | if (throttle) | ||
385 | return btrfs_commit_transaction(trans, root); | ||
386 | else | ||
387 | wake_up_process(info->transaction_kthread); | ||
388 | } | ||
389 | |||
316 | mutex_lock(&info->trans_mutex); | 390 | mutex_lock(&info->trans_mutex); |
317 | cur_trans = info->running_transaction; | 391 | WARN_ON(cur_trans != info->running_transaction); |
318 | WARN_ON(cur_trans != trans->transaction); | ||
319 | WARN_ON(cur_trans->num_writers < 1); | 392 | WARN_ON(cur_trans->num_writers < 1); |
320 | cur_trans->num_writers--; | 393 | cur_trans->num_writers--; |
321 | 394 | ||
@@ -603,6 +676,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
603 | 676 | ||
604 | btrfs_free_log(trans, root); | 677 | btrfs_free_log(trans, root); |
605 | btrfs_update_reloc_root(trans, root); | 678 | btrfs_update_reloc_root(trans, root); |
679 | btrfs_orphan_commit_root(trans, root); | ||
606 | 680 | ||
607 | if (root->commit_root != root->node) { | 681 | if (root->commit_root != root->node) { |
608 | switch_commit_root(root); | 682 | switch_commit_root(root); |
@@ -627,30 +701,30 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
627 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | 701 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) |
628 | { | 702 | { |
629 | struct btrfs_fs_info *info = root->fs_info; | 703 | struct btrfs_fs_info *info = root->fs_info; |
630 | int ret; | ||
631 | struct btrfs_trans_handle *trans; | 704 | struct btrfs_trans_handle *trans; |
705 | int ret; | ||
632 | unsigned long nr; | 706 | unsigned long nr; |
633 | 707 | ||
634 | smp_mb(); | 708 | if (xchg(&root->defrag_running, 1)) |
635 | if (root->defrag_running) | ||
636 | return 0; | 709 | return 0; |
637 | trans = btrfs_start_transaction(root, 1); | 710 | |
638 | while (1) { | 711 | while (1) { |
639 | root->defrag_running = 1; | 712 | trans = btrfs_start_transaction(root, 0); |
713 | if (IS_ERR(trans)) | ||
714 | return PTR_ERR(trans); | ||
715 | |||
640 | ret = btrfs_defrag_leaves(trans, root, cacheonly); | 716 | ret = btrfs_defrag_leaves(trans, root, cacheonly); |
717 | |||
641 | nr = trans->blocks_used; | 718 | nr = trans->blocks_used; |
642 | btrfs_end_transaction(trans, root); | 719 | btrfs_end_transaction(trans, root); |
643 | btrfs_btree_balance_dirty(info->tree_root, nr); | 720 | btrfs_btree_balance_dirty(info->tree_root, nr); |
644 | cond_resched(); | 721 | cond_resched(); |
645 | 722 | ||
646 | trans = btrfs_start_transaction(root, 1); | ||
647 | if (root->fs_info->closing || ret != -EAGAIN) | 723 | if (root->fs_info->closing || ret != -EAGAIN) |
648 | break; | 724 | break; |
649 | } | 725 | } |
650 | root->defrag_running = 0; | 726 | root->defrag_running = 0; |
651 | smp_mb(); | 727 | return ret; |
652 | btrfs_end_transaction(trans, root); | ||
653 | return 0; | ||
654 | } | 728 | } |
655 | 729 | ||
656 | #if 0 | 730 | #if 0 |
@@ -758,47 +832,63 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
758 | struct btrfs_root *root = pending->root; | 832 | struct btrfs_root *root = pending->root; |
759 | struct btrfs_root *parent_root; | 833 | struct btrfs_root *parent_root; |
760 | struct inode *parent_inode; | 834 | struct inode *parent_inode; |
835 | struct dentry *dentry; | ||
761 | struct extent_buffer *tmp; | 836 | struct extent_buffer *tmp; |
762 | struct extent_buffer *old; | 837 | struct extent_buffer *old; |
763 | int ret; | 838 | int ret; |
764 | u64 objectid; | 839 | int retries = 0; |
765 | int namelen; | 840 | u64 to_reserve = 0; |
766 | u64 index = 0; | 841 | u64 index = 0; |
767 | 842 | u64 objectid; | |
768 | parent_inode = pending->dentry->d_parent->d_inode; | ||
769 | parent_root = BTRFS_I(parent_inode)->root; | ||
770 | 843 | ||
771 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 844 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
772 | if (!new_root_item) { | 845 | if (!new_root_item) { |
773 | ret = -ENOMEM; | 846 | pending->error = -ENOMEM; |
774 | goto fail; | 847 | goto fail; |
775 | } | 848 | } |
849 | |||
776 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); | 850 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); |
777 | if (ret) | 851 | if (ret) { |
852 | pending->error = ret; | ||
778 | goto fail; | 853 | goto fail; |
854 | } | ||
855 | |||
856 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); | ||
857 | btrfs_orphan_pre_snapshot(trans, pending, &to_reserve); | ||
858 | |||
859 | if (to_reserve > 0) { | ||
860 | ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, | ||
861 | to_reserve, &retries); | ||
862 | if (ret) { | ||
863 | pending->error = ret; | ||
864 | goto fail; | ||
865 | } | ||
866 | } | ||
779 | 867 | ||
780 | key.objectid = objectid; | 868 | key.objectid = objectid; |
781 | /* record when the snapshot was created in key.offset */ | 869 | key.offset = (u64)-1; |
782 | key.offset = trans->transid; | 870 | key.type = BTRFS_ROOT_ITEM_KEY; |
783 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
784 | 871 | ||
785 | memcpy(&pending->root_key, &key, sizeof(key)); | 872 | trans->block_rsv = &pending->block_rsv; |
786 | pending->root_key.offset = (u64)-1; | ||
787 | 873 | ||
874 | dentry = pending->dentry; | ||
875 | parent_inode = dentry->d_parent->d_inode; | ||
876 | parent_root = BTRFS_I(parent_inode)->root; | ||
788 | record_root_in_trans(trans, parent_root); | 877 | record_root_in_trans(trans, parent_root); |
878 | |||
789 | /* | 879 | /* |
790 | * insert the directory item | 880 | * insert the directory item |
791 | */ | 881 | */ |
792 | namelen = strlen(pending->name); | ||
793 | ret = btrfs_set_inode_index(parent_inode, &index); | 882 | ret = btrfs_set_inode_index(parent_inode, &index); |
794 | BUG_ON(ret); | 883 | BUG_ON(ret); |
795 | ret = btrfs_insert_dir_item(trans, parent_root, | 884 | ret = btrfs_insert_dir_item(trans, parent_root, |
796 | pending->name, namelen, | 885 | dentry->d_name.name, dentry->d_name.len, |
797 | parent_inode->i_ino, | 886 | parent_inode->i_ino, &key, |
798 | &pending->root_key, BTRFS_FT_DIR, index); | 887 | BTRFS_FT_DIR, index); |
799 | BUG_ON(ret); | 888 | BUG_ON(ret); |
800 | 889 | ||
801 | btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); | 890 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
891 | dentry->d_name.len * 2); | ||
802 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 892 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
803 | BUG_ON(ret); | 893 | BUG_ON(ret); |
804 | 894 | ||
@@ -815,22 +905,32 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
815 | free_extent_buffer(old); | 905 | free_extent_buffer(old); |
816 | 906 | ||
817 | btrfs_set_root_node(new_root_item, tmp); | 907 | btrfs_set_root_node(new_root_item, tmp); |
818 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | 908 | /* record when the snapshot was created in key.offset */ |
819 | new_root_item); | 909 | key.offset = trans->transid; |
820 | BUG_ON(ret); | 910 | ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); |
821 | btrfs_tree_unlock(tmp); | 911 | btrfs_tree_unlock(tmp); |
822 | free_extent_buffer(tmp); | 912 | free_extent_buffer(tmp); |
913 | BUG_ON(ret); | ||
823 | 914 | ||
824 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | 915 | /* |
825 | pending->root_key.objectid, | 916 | * insert root back/forward references |
917 | */ | ||
918 | ret = btrfs_add_root_ref(trans, tree_root, objectid, | ||
826 | parent_root->root_key.objectid, | 919 | parent_root->root_key.objectid, |
827 | parent_inode->i_ino, index, pending->name, | 920 | parent_inode->i_ino, index, |
828 | namelen); | 921 | dentry->d_name.name, dentry->d_name.len); |
829 | BUG_ON(ret); | 922 | BUG_ON(ret); |
830 | 923 | ||
924 | key.offset = (u64)-1; | ||
925 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
926 | BUG_ON(IS_ERR(pending->snap)); | ||
927 | |||
928 | btrfs_reloc_post_snapshot(trans, pending); | ||
929 | btrfs_orphan_post_snapshot(trans, pending); | ||
831 | fail: | 930 | fail: |
832 | kfree(new_root_item); | 931 | kfree(new_root_item); |
833 | return ret; | 932 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); |
933 | return 0; | ||
834 | } | 934 | } |
835 | 935 | ||
836 | /* | 936 | /* |
@@ -878,6 +978,16 @@ int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | |||
878 | return ret; | 978 | return ret; |
879 | } | 979 | } |
880 | 980 | ||
981 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) | ||
982 | { | ||
983 | int ret = 0; | ||
984 | spin_lock(&info->new_trans_lock); | ||
985 | if (info->running_transaction) | ||
986 | ret = info->running_transaction->blocked; | ||
987 | spin_unlock(&info->new_trans_lock); | ||
988 | return ret; | ||
989 | } | ||
990 | |||
881 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 991 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
882 | struct btrfs_root *root) | 992 | struct btrfs_root *root) |
883 | { | 993 | { |
@@ -899,6 +1009,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
899 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1009 | ret = btrfs_run_delayed_refs(trans, root, 0); |
900 | BUG_ON(ret); | 1010 | BUG_ON(ret); |
901 | 1011 | ||
1012 | btrfs_trans_release_metadata(trans, root); | ||
1013 | |||
902 | cur_trans = trans->transaction; | 1014 | cur_trans = trans->transaction; |
903 | /* | 1015 | /* |
904 | * set the flushing flag so procs in this transaction have to | 1016 | * set the flushing flag so procs in this transaction have to |
@@ -951,9 +1063,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
951 | snap_pending = 1; | 1063 | snap_pending = 1; |
952 | 1064 | ||
953 | WARN_ON(cur_trans != trans->transaction); | 1065 | WARN_ON(cur_trans != trans->transaction); |
954 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
955 | TASK_UNINTERRUPTIBLE); | ||
956 | |||
957 | if (cur_trans->num_writers > 1) | 1066 | if (cur_trans->num_writers > 1) |
958 | timeout = MAX_SCHEDULE_TIMEOUT; | 1067 | timeout = MAX_SCHEDULE_TIMEOUT; |
959 | else if (should_grow) | 1068 | else if (should_grow) |
@@ -976,6 +1085,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
976 | */ | 1085 | */ |
977 | btrfs_run_ordered_operations(root, 1); | 1086 | btrfs_run_ordered_operations(root, 1); |
978 | 1087 | ||
1088 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
1089 | TASK_UNINTERRUPTIBLE); | ||
1090 | |||
979 | smp_mb(); | 1091 | smp_mb(); |
980 | if (cur_trans->num_writers > 1 || should_grow) | 1092 | if (cur_trans->num_writers > 1 || should_grow) |
981 | schedule_timeout(timeout); | 1093 | schedule_timeout(timeout); |
@@ -1103,9 +1215,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
1103 | 1215 | ||
1104 | if (btrfs_header_backref_rev(root->node) < | 1216 | if (btrfs_header_backref_rev(root->node) < |
1105 | BTRFS_MIXED_BACKREF_REV) | 1217 | BTRFS_MIXED_BACKREF_REV) |
1106 | btrfs_drop_snapshot(root, 0); | 1218 | btrfs_drop_snapshot(root, NULL, 0); |
1107 | else | 1219 | else |
1108 | btrfs_drop_snapshot(root, 1); | 1220 | btrfs_drop_snapshot(root, NULL, 1); |
1109 | } | 1221 | } |
1110 | return 0; | 1222 | return 0; |
1111 | } | 1223 | } |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 93c7ccb33118..e104986d0bfd 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -45,20 +45,23 @@ struct btrfs_transaction { | |||
45 | 45 | ||
46 | struct btrfs_trans_handle { | 46 | struct btrfs_trans_handle { |
47 | u64 transid; | 47 | u64 transid; |
48 | u64 block_group; | ||
49 | u64 bytes_reserved; | ||
48 | unsigned long blocks_reserved; | 50 | unsigned long blocks_reserved; |
49 | unsigned long blocks_used; | 51 | unsigned long blocks_used; |
50 | struct btrfs_transaction *transaction; | ||
51 | u64 block_group; | ||
52 | u64 alloc_exclude_start; | ||
53 | u64 alloc_exclude_nr; | ||
54 | unsigned long delayed_ref_updates; | 52 | unsigned long delayed_ref_updates; |
53 | struct btrfs_transaction *transaction; | ||
54 | struct btrfs_block_rsv *block_rsv; | ||
55 | }; | 55 | }; |
56 | 56 | ||
57 | struct btrfs_pending_snapshot { | 57 | struct btrfs_pending_snapshot { |
58 | struct dentry *dentry; | 58 | struct dentry *dentry; |
59 | struct btrfs_root *root; | 59 | struct btrfs_root *root; |
60 | char *name; | 60 | struct btrfs_root *snap; |
61 | struct btrfs_key root_key; | 61 | /* block reservation for the operation */ |
62 | struct btrfs_block_rsv block_rsv; | ||
63 | /* extra metadata reseration for relocation */ | ||
64 | int error; | ||
62 | struct list_head list; | 65 | struct list_head list; |
63 | }; | 66 | }; |
64 | 67 | ||
@@ -85,11 +88,11 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | |||
85 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 88 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
86 | struct btrfs_root *root); | 89 | struct btrfs_root *root); |
87 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 90 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
88 | int num_blocks); | 91 | int num_items); |
89 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 92 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, |
90 | int num_blocks); | 93 | int num_blocks); |
91 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 94 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, |
92 | int num_blocks); | 95 | int num_blocks); |
93 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 96 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
94 | struct btrfs_root *root); | 97 | struct btrfs_root *root); |
95 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | 98 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, |
@@ -103,6 +106,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
103 | struct btrfs_root *root); | 106 | struct btrfs_root *root); |
104 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | 107 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, |
105 | struct btrfs_root *root); | 108 | struct btrfs_root *root); |
109 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | ||
110 | struct btrfs_root *root); | ||
106 | void btrfs_throttle(struct btrfs_root *root); | 111 | void btrfs_throttle(struct btrfs_root *root); |
107 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 112 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
108 | struct btrfs_root *root); | 113 | struct btrfs_root *root); |
@@ -112,5 +117,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
112 | struct extent_io_tree *dirty_pages, int mark); | 117 | struct extent_io_tree *dirty_pages, int mark); |
113 | int btrfs_wait_marked_extents(struct btrfs_root *root, | 118 | int btrfs_wait_marked_extents(struct btrfs_root *root, |
114 | struct extent_io_tree *dirty_pages, int mark); | 119 | struct extent_io_tree *dirty_pages, int mark); |
120 | int btrfs_transaction_blocked(struct btrfs_fs_info *info); | ||
115 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); | 121 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); |
116 | #endif | 122 | #endif |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b10eacdb1620..f7ac8e013ed7 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
@@ -117,13 +117,14 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
117 | path->nodes[1], 0, | 117 | path->nodes[1], 0, |
118 | cache_only, &last_ret, | 118 | cache_only, &last_ret, |
119 | &root->defrag_progress); | 119 | &root->defrag_progress); |
120 | WARN_ON(ret && ret != -EAGAIN); | 120 | if (ret) { |
121 | WARN_ON(ret == -EAGAIN); | ||
122 | goto out; | ||
123 | } | ||
121 | if (next_key_ret == 0) { | 124 | if (next_key_ret == 0) { |
122 | memcpy(&root->defrag_progress, &key, sizeof(key)); | 125 | memcpy(&root->defrag_progress, &key, sizeof(key)); |
123 | ret = -EAGAIN; | 126 | ret = -EAGAIN; |
124 | } | 127 | } |
125 | |||
126 | btrfs_release_path(root, path); | ||
127 | out: | 128 | out: |
128 | if (path) | 129 | if (path) |
129 | btrfs_free_path(path); | 130 | btrfs_free_path(path); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index af57dd2b43d4..fb102a9aee9c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -135,6 +135,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
135 | struct btrfs_root *root) | 135 | struct btrfs_root *root) |
136 | { | 136 | { |
137 | int ret; | 137 | int ret; |
138 | int err = 0; | ||
138 | 139 | ||
139 | mutex_lock(&root->log_mutex); | 140 | mutex_lock(&root->log_mutex); |
140 | if (root->log_root) { | 141 | if (root->log_root) { |
@@ -155,17 +156,19 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
155 | mutex_lock(&root->fs_info->tree_log_mutex); | 156 | mutex_lock(&root->fs_info->tree_log_mutex); |
156 | if (!root->fs_info->log_root_tree) { | 157 | if (!root->fs_info->log_root_tree) { |
157 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 158 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
158 | BUG_ON(ret); | 159 | if (ret) |
160 | err = ret; | ||
159 | } | 161 | } |
160 | if (!root->log_root) { | 162 | if (err == 0 && !root->log_root) { |
161 | ret = btrfs_add_log_tree(trans, root); | 163 | ret = btrfs_add_log_tree(trans, root); |
162 | BUG_ON(ret); | 164 | if (ret) |
165 | err = ret; | ||
163 | } | 166 | } |
164 | mutex_unlock(&root->fs_info->tree_log_mutex); | 167 | mutex_unlock(&root->fs_info->tree_log_mutex); |
165 | root->log_batch++; | 168 | root->log_batch++; |
166 | atomic_inc(&root->log_writers); | 169 | atomic_inc(&root->log_writers); |
167 | mutex_unlock(&root->log_mutex); | 170 | mutex_unlock(&root->log_mutex); |
168 | return 0; | 171 | return err; |
169 | } | 172 | } |
170 | 173 | ||
171 | /* | 174 | /* |
@@ -376,7 +379,7 @@ insert: | |||
376 | BUG_ON(ret); | 379 | BUG_ON(ret); |
377 | } | 380 | } |
378 | } else if (ret) { | 381 | } else if (ret) { |
379 | BUG(); | 382 | return ret; |
380 | } | 383 | } |
381 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], | 384 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], |
382 | path->slots[0]); | 385 | path->slots[0]); |
@@ -1699,9 +1702,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1699 | 1702 | ||
1700 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1703 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); |
1701 | 1704 | ||
1702 | wc->process_func(root, next, wc, ptr_gen); | ||
1703 | |||
1704 | if (*level == 1) { | 1705 | if (*level == 1) { |
1706 | wc->process_func(root, next, wc, ptr_gen); | ||
1707 | |||
1705 | path->slots[*level]++; | 1708 | path->slots[*level]++; |
1706 | if (wc->free) { | 1709 | if (wc->free) { |
1707 | btrfs_read_buffer(next, ptr_gen); | 1710 | btrfs_read_buffer(next, ptr_gen); |
@@ -1734,35 +1737,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1734 | WARN_ON(*level < 0); | 1737 | WARN_ON(*level < 0); |
1735 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | 1738 | WARN_ON(*level >= BTRFS_MAX_LEVEL); |
1736 | 1739 | ||
1737 | if (path->nodes[*level] == root->node) | 1740 | path->slots[*level] = btrfs_header_nritems(path->nodes[*level]); |
1738 | parent = path->nodes[*level]; | ||
1739 | else | ||
1740 | parent = path->nodes[*level + 1]; | ||
1741 | |||
1742 | bytenr = path->nodes[*level]->start; | ||
1743 | |||
1744 | blocksize = btrfs_level_size(root, *level); | ||
1745 | root_owner = btrfs_header_owner(parent); | ||
1746 | root_gen = btrfs_header_generation(parent); | ||
1747 | |||
1748 | wc->process_func(root, path->nodes[*level], wc, | ||
1749 | btrfs_header_generation(path->nodes[*level])); | ||
1750 | |||
1751 | if (wc->free) { | ||
1752 | next = path->nodes[*level]; | ||
1753 | btrfs_tree_lock(next); | ||
1754 | clean_tree_block(trans, root, next); | ||
1755 | btrfs_set_lock_blocking(next); | ||
1756 | btrfs_wait_tree_block_writeback(next); | ||
1757 | btrfs_tree_unlock(next); | ||
1758 | |||
1759 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); | ||
1760 | ret = btrfs_free_reserved_extent(root, bytenr, blocksize); | ||
1761 | BUG_ON(ret); | ||
1762 | } | ||
1763 | free_extent_buffer(path->nodes[*level]); | ||
1764 | path->nodes[*level] = NULL; | ||
1765 | *level += 1; | ||
1766 | 1741 | ||
1767 | cond_resched(); | 1742 | cond_resched(); |
1768 | return 0; | 1743 | return 0; |
@@ -1781,7 +1756,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1781 | 1756 | ||
1782 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { | 1757 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { |
1783 | slot = path->slots[i]; | 1758 | slot = path->slots[i]; |
1784 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { | 1759 | if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { |
1785 | struct extent_buffer *node; | 1760 | struct extent_buffer *node; |
1786 | node = path->nodes[i]; | 1761 | node = path->nodes[i]; |
1787 | path->slots[i]++; | 1762 | path->slots[i]++; |
@@ -2047,7 +2022,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2047 | mutex_unlock(&log_root_tree->log_mutex); | 2022 | mutex_unlock(&log_root_tree->log_mutex); |
2048 | 2023 | ||
2049 | ret = update_log_root(trans, log); | 2024 | ret = update_log_root(trans, log); |
2050 | BUG_ON(ret); | ||
2051 | 2025 | ||
2052 | mutex_lock(&log_root_tree->log_mutex); | 2026 | mutex_lock(&log_root_tree->log_mutex); |
2053 | if (atomic_dec_and_test(&log_root_tree->log_writers)) { | 2027 | if (atomic_dec_and_test(&log_root_tree->log_writers)) { |
@@ -2056,6 +2030,15 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2056 | wake_up(&log_root_tree->log_writer_wait); | 2030 | wake_up(&log_root_tree->log_writer_wait); |
2057 | } | 2031 | } |
2058 | 2032 | ||
2033 | if (ret) { | ||
2034 | BUG_ON(ret != -ENOSPC); | ||
2035 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2036 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | ||
2037 | mutex_unlock(&log_root_tree->log_mutex); | ||
2038 | ret = -EAGAIN; | ||
2039 | goto out; | ||
2040 | } | ||
2041 | |||
2059 | index2 = log_root_tree->log_transid % 2; | 2042 | index2 = log_root_tree->log_transid % 2; |
2060 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2043 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
2061 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2044 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
@@ -2129,15 +2112,10 @@ out: | |||
2129 | return 0; | 2112 | return 0; |
2130 | } | 2113 | } |
2131 | 2114 | ||
2132 | /* | 2115 | static void free_log_tree(struct btrfs_trans_handle *trans, |
2133 | * free all the extents used by the tree log. This should be called | 2116 | struct btrfs_root *log) |
2134 | * at commit time of the full transaction | ||
2135 | */ | ||
2136 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | ||
2137 | { | 2117 | { |
2138 | int ret; | 2118 | int ret; |
2139 | struct btrfs_root *log; | ||
2140 | struct key; | ||
2141 | u64 start; | 2119 | u64 start; |
2142 | u64 end; | 2120 | u64 end; |
2143 | struct walk_control wc = { | 2121 | struct walk_control wc = { |
@@ -2145,10 +2123,6 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
2145 | .process_func = process_one_buffer | 2123 | .process_func = process_one_buffer |
2146 | }; | 2124 | }; |
2147 | 2125 | ||
2148 | if (!root->log_root || root->fs_info->log_root_recovering) | ||
2149 | return 0; | ||
2150 | |||
2151 | log = root->log_root; | ||
2152 | ret = walk_log_tree(trans, log, &wc); | 2126 | ret = walk_log_tree(trans, log, &wc); |
2153 | BUG_ON(ret); | 2127 | BUG_ON(ret); |
2154 | 2128 | ||
@@ -2162,14 +2136,30 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
2162 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | 2136 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); |
2163 | } | 2137 | } |
2164 | 2138 | ||
2165 | if (log->log_transid > 0) { | ||
2166 | ret = btrfs_del_root(trans, root->fs_info->log_root_tree, | ||
2167 | &log->root_key); | ||
2168 | BUG_ON(ret); | ||
2169 | } | ||
2170 | root->log_root = NULL; | ||
2171 | free_extent_buffer(log->node); | 2139 | free_extent_buffer(log->node); |
2172 | kfree(log); | 2140 | kfree(log); |
2141 | } | ||
2142 | |||
2143 | /* | ||
2144 | * free all the extents used by the tree log. This should be called | ||
2145 | * at commit time of the full transaction | ||
2146 | */ | ||
2147 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | ||
2148 | { | ||
2149 | if (root->log_root) { | ||
2150 | free_log_tree(trans, root->log_root); | ||
2151 | root->log_root = NULL; | ||
2152 | } | ||
2153 | return 0; | ||
2154 | } | ||
2155 | |||
2156 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
2157 | struct btrfs_fs_info *fs_info) | ||
2158 | { | ||
2159 | if (fs_info->log_root_tree) { | ||
2160 | free_log_tree(trans, fs_info->log_root_tree); | ||
2161 | fs_info->log_root_tree = NULL; | ||
2162 | } | ||
2173 | return 0; | 2163 | return 0; |
2174 | } | 2164 | } |
2175 | 2165 | ||
@@ -2203,6 +2193,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2203 | struct btrfs_dir_item *di; | 2193 | struct btrfs_dir_item *di; |
2204 | struct btrfs_path *path; | 2194 | struct btrfs_path *path; |
2205 | int ret; | 2195 | int ret; |
2196 | int err = 0; | ||
2206 | int bytes_del = 0; | 2197 | int bytes_del = 0; |
2207 | 2198 | ||
2208 | if (BTRFS_I(dir)->logged_trans < trans->transid) | 2199 | if (BTRFS_I(dir)->logged_trans < trans->transid) |
@@ -2218,7 +2209,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2218 | path = btrfs_alloc_path(); | 2209 | path = btrfs_alloc_path(); |
2219 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, | 2210 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, |
2220 | name, name_len, -1); | 2211 | name, name_len, -1); |
2221 | if (di && !IS_ERR(di)) { | 2212 | if (IS_ERR(di)) { |
2213 | err = PTR_ERR(di); | ||
2214 | goto fail; | ||
2215 | } | ||
2216 | if (di) { | ||
2222 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | 2217 | ret = btrfs_delete_one_dir_name(trans, log, path, di); |
2223 | bytes_del += name_len; | 2218 | bytes_del += name_len; |
2224 | BUG_ON(ret); | 2219 | BUG_ON(ret); |
@@ -2226,7 +2221,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2226 | btrfs_release_path(log, path); | 2221 | btrfs_release_path(log, path); |
2227 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, | 2222 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, |
2228 | index, name, name_len, -1); | 2223 | index, name, name_len, -1); |
2229 | if (di && !IS_ERR(di)) { | 2224 | if (IS_ERR(di)) { |
2225 | err = PTR_ERR(di); | ||
2226 | goto fail; | ||
2227 | } | ||
2228 | if (di) { | ||
2230 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | 2229 | ret = btrfs_delete_one_dir_name(trans, log, path, di); |
2231 | bytes_del += name_len; | 2230 | bytes_del += name_len; |
2232 | BUG_ON(ret); | 2231 | BUG_ON(ret); |
@@ -2244,6 +2243,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2244 | btrfs_release_path(log, path); | 2243 | btrfs_release_path(log, path); |
2245 | 2244 | ||
2246 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); | 2245 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); |
2246 | if (ret < 0) { | ||
2247 | err = ret; | ||
2248 | goto fail; | ||
2249 | } | ||
2247 | if (ret == 0) { | 2250 | if (ret == 0) { |
2248 | struct btrfs_inode_item *item; | 2251 | struct btrfs_inode_item *item; |
2249 | u64 i_size; | 2252 | u64 i_size; |
@@ -2261,9 +2264,13 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2261 | ret = 0; | 2264 | ret = 0; |
2262 | btrfs_release_path(log, path); | 2265 | btrfs_release_path(log, path); |
2263 | } | 2266 | } |
2264 | 2267 | fail: | |
2265 | btrfs_free_path(path); | 2268 | btrfs_free_path(path); |
2266 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | 2269 | mutex_unlock(&BTRFS_I(dir)->log_mutex); |
2270 | if (ret == -ENOSPC) { | ||
2271 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2272 | ret = 0; | ||
2273 | } | ||
2267 | btrfs_end_log_trans(root); | 2274 | btrfs_end_log_trans(root); |
2268 | 2275 | ||
2269 | return 0; | 2276 | return 0; |
@@ -2291,6 +2298,10 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
2291 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, | 2298 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, |
2292 | dirid, &index); | 2299 | dirid, &index); |
2293 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2300 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2301 | if (ret == -ENOSPC) { | ||
2302 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2303 | ret = 0; | ||
2304 | } | ||
2294 | btrfs_end_log_trans(root); | 2305 | btrfs_end_log_trans(root); |
2295 | 2306 | ||
2296 | return ret; | 2307 | return ret; |
@@ -2318,7 +2329,8 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, | |||
2318 | else | 2329 | else |
2319 | key.type = BTRFS_DIR_LOG_INDEX_KEY; | 2330 | key.type = BTRFS_DIR_LOG_INDEX_KEY; |
2320 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); | 2331 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); |
2321 | BUG_ON(ret); | 2332 | if (ret) |
2333 | return ret; | ||
2322 | 2334 | ||
2323 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2335 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
2324 | struct btrfs_dir_log_item); | 2336 | struct btrfs_dir_log_item); |
@@ -2343,6 +2355,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2343 | struct btrfs_key max_key; | 2355 | struct btrfs_key max_key; |
2344 | struct btrfs_root *log = root->log_root; | 2356 | struct btrfs_root *log = root->log_root; |
2345 | struct extent_buffer *src; | 2357 | struct extent_buffer *src; |
2358 | int err = 0; | ||
2346 | int ret; | 2359 | int ret; |
2347 | int i; | 2360 | int i; |
2348 | int nritems; | 2361 | int nritems; |
@@ -2405,6 +2418,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2405 | ret = overwrite_item(trans, log, dst_path, | 2418 | ret = overwrite_item(trans, log, dst_path, |
2406 | path->nodes[0], path->slots[0], | 2419 | path->nodes[0], path->slots[0], |
2407 | &tmp); | 2420 | &tmp); |
2421 | if (ret) { | ||
2422 | err = ret; | ||
2423 | goto done; | ||
2424 | } | ||
2408 | } | 2425 | } |
2409 | } | 2426 | } |
2410 | btrfs_release_path(root, path); | 2427 | btrfs_release_path(root, path); |
@@ -2432,7 +2449,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2432 | goto done; | 2449 | goto done; |
2433 | ret = overwrite_item(trans, log, dst_path, src, i, | 2450 | ret = overwrite_item(trans, log, dst_path, src, i, |
2434 | &min_key); | 2451 | &min_key); |
2435 | BUG_ON(ret); | 2452 | if (ret) { |
2453 | err = ret; | ||
2454 | goto done; | ||
2455 | } | ||
2436 | } | 2456 | } |
2437 | path->slots[0] = nritems; | 2457 | path->slots[0] = nritems; |
2438 | 2458 | ||
@@ -2454,22 +2474,30 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2454 | ret = overwrite_item(trans, log, dst_path, | 2474 | ret = overwrite_item(trans, log, dst_path, |
2455 | path->nodes[0], path->slots[0], | 2475 | path->nodes[0], path->slots[0], |
2456 | &tmp); | 2476 | &tmp); |
2457 | 2477 | if (ret) | |
2458 | BUG_ON(ret); | 2478 | err = ret; |
2459 | last_offset = tmp.offset; | 2479 | else |
2480 | last_offset = tmp.offset; | ||
2460 | goto done; | 2481 | goto done; |
2461 | } | 2482 | } |
2462 | } | 2483 | } |
2463 | done: | 2484 | done: |
2464 | *last_offset_ret = last_offset; | ||
2465 | btrfs_release_path(root, path); | 2485 | btrfs_release_path(root, path); |
2466 | btrfs_release_path(log, dst_path); | 2486 | btrfs_release_path(log, dst_path); |
2467 | 2487 | ||
2468 | /* insert the log range keys to indicate where the log is valid */ | 2488 | if (err == 0) { |
2469 | ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino, | 2489 | *last_offset_ret = last_offset; |
2470 | first_offset, last_offset); | 2490 | /* |
2471 | BUG_ON(ret); | 2491 | * insert the log range keys to indicate where the log |
2472 | return 0; | 2492 | * is valid |
2493 | */ | ||
2494 | ret = insert_dir_log_key(trans, log, path, key_type, | ||
2495 | inode->i_ino, first_offset, | ||
2496 | last_offset); | ||
2497 | if (ret) | ||
2498 | err = ret; | ||
2499 | } | ||
2500 | return err; | ||
2473 | } | 2501 | } |
2474 | 2502 | ||
2475 | /* | 2503 | /* |
@@ -2501,7 +2529,8 @@ again: | |||
2501 | ret = log_dir_items(trans, root, inode, path, | 2529 | ret = log_dir_items(trans, root, inode, path, |
2502 | dst_path, key_type, min_key, | 2530 | dst_path, key_type, min_key, |
2503 | &max_key); | 2531 | &max_key); |
2504 | BUG_ON(ret); | 2532 | if (ret) |
2533 | return ret; | ||
2505 | if (max_key == (u64)-1) | 2534 | if (max_key == (u64)-1) |
2506 | break; | 2535 | break; |
2507 | min_key = max_key + 1; | 2536 | min_key = max_key + 1; |
@@ -2535,8 +2564,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2535 | 2564 | ||
2536 | while (1) { | 2565 | while (1) { |
2537 | ret = btrfs_search_slot(trans, log, &key, path, -1, 1); | 2566 | ret = btrfs_search_slot(trans, log, &key, path, -1, 1); |
2538 | 2567 | BUG_ON(ret == 0); | |
2539 | if (ret != 1) | 2568 | if (ret < 0) |
2540 | break; | 2569 | break; |
2541 | 2570 | ||
2542 | if (path->slots[0] == 0) | 2571 | if (path->slots[0] == 0) |
@@ -2554,7 +2583,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2554 | btrfs_release_path(log, path); | 2583 | btrfs_release_path(log, path); |
2555 | } | 2584 | } |
2556 | btrfs_release_path(log, path); | 2585 | btrfs_release_path(log, path); |
2557 | return 0; | 2586 | return ret; |
2558 | } | 2587 | } |
2559 | 2588 | ||
2560 | static noinline int copy_items(struct btrfs_trans_handle *trans, | 2589 | static noinline int copy_items(struct btrfs_trans_handle *trans, |
@@ -2587,7 +2616,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2587 | } | 2616 | } |
2588 | ret = btrfs_insert_empty_items(trans, log, dst_path, | 2617 | ret = btrfs_insert_empty_items(trans, log, dst_path, |
2589 | ins_keys, ins_sizes, nr); | 2618 | ins_keys, ins_sizes, nr); |
2590 | BUG_ON(ret); | 2619 | if (ret) { |
2620 | kfree(ins_data); | ||
2621 | return ret; | ||
2622 | } | ||
2591 | 2623 | ||
2592 | for (i = 0; i < nr; i++, dst_path->slots[0]++) { | 2624 | for (i = 0; i < nr; i++, dst_path->slots[0]++) { |
2593 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], | 2625 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], |
@@ -2660,16 +2692,17 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2660 | * we have to do this after the loop above to avoid changing the | 2692 | * we have to do this after the loop above to avoid changing the |
2661 | * log tree while trying to change the log tree. | 2693 | * log tree while trying to change the log tree. |
2662 | */ | 2694 | */ |
2695 | ret = 0; | ||
2663 | while (!list_empty(&ordered_sums)) { | 2696 | while (!list_empty(&ordered_sums)) { |
2664 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, | 2697 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, |
2665 | struct btrfs_ordered_sum, | 2698 | struct btrfs_ordered_sum, |
2666 | list); | 2699 | list); |
2667 | ret = btrfs_csum_file_blocks(trans, log, sums); | 2700 | if (!ret) |
2668 | BUG_ON(ret); | 2701 | ret = btrfs_csum_file_blocks(trans, log, sums); |
2669 | list_del(&sums->list); | 2702 | list_del(&sums->list); |
2670 | kfree(sums); | 2703 | kfree(sums); |
2671 | } | 2704 | } |
2672 | return 0; | 2705 | return ret; |
2673 | } | 2706 | } |
2674 | 2707 | ||
2675 | /* log a single inode in the tree log. | 2708 | /* log a single inode in the tree log. |
@@ -2697,6 +2730,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2697 | struct btrfs_root *log = root->log_root; | 2730 | struct btrfs_root *log = root->log_root; |
2698 | struct extent_buffer *src = NULL; | 2731 | struct extent_buffer *src = NULL; |
2699 | u32 size; | 2732 | u32 size; |
2733 | int err = 0; | ||
2700 | int ret; | 2734 | int ret; |
2701 | int nritems; | 2735 | int nritems; |
2702 | int ins_start_slot = 0; | 2736 | int ins_start_slot = 0; |
@@ -2739,7 +2773,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2739 | } else { | 2773 | } else { |
2740 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); | 2774 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); |
2741 | } | 2775 | } |
2742 | BUG_ON(ret); | 2776 | if (ret) { |
2777 | err = ret; | ||
2778 | goto out_unlock; | ||
2779 | } | ||
2743 | path->keep_locks = 1; | 2780 | path->keep_locks = 1; |
2744 | 2781 | ||
2745 | while (1) { | 2782 | while (1) { |
@@ -2768,7 +2805,10 @@ again: | |||
2768 | 2805 | ||
2769 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, | 2806 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, |
2770 | ins_nr, inode_only); | 2807 | ins_nr, inode_only); |
2771 | BUG_ON(ret); | 2808 | if (ret) { |
2809 | err = ret; | ||
2810 | goto out_unlock; | ||
2811 | } | ||
2772 | ins_nr = 1; | 2812 | ins_nr = 1; |
2773 | ins_start_slot = path->slots[0]; | 2813 | ins_start_slot = path->slots[0]; |
2774 | next_slot: | 2814 | next_slot: |
@@ -2784,7 +2824,10 @@ next_slot: | |||
2784 | ret = copy_items(trans, log, dst_path, src, | 2824 | ret = copy_items(trans, log, dst_path, src, |
2785 | ins_start_slot, | 2825 | ins_start_slot, |
2786 | ins_nr, inode_only); | 2826 | ins_nr, inode_only); |
2787 | BUG_ON(ret); | 2827 | if (ret) { |
2828 | err = ret; | ||
2829 | goto out_unlock; | ||
2830 | } | ||
2788 | ins_nr = 0; | 2831 | ins_nr = 0; |
2789 | } | 2832 | } |
2790 | btrfs_release_path(root, path); | 2833 | btrfs_release_path(root, path); |
@@ -2802,7 +2845,10 @@ next_slot: | |||
2802 | ret = copy_items(trans, log, dst_path, src, | 2845 | ret = copy_items(trans, log, dst_path, src, |
2803 | ins_start_slot, | 2846 | ins_start_slot, |
2804 | ins_nr, inode_only); | 2847 | ins_nr, inode_only); |
2805 | BUG_ON(ret); | 2848 | if (ret) { |
2849 | err = ret; | ||
2850 | goto out_unlock; | ||
2851 | } | ||
2806 | ins_nr = 0; | 2852 | ins_nr = 0; |
2807 | } | 2853 | } |
2808 | WARN_ON(ins_nr); | 2854 | WARN_ON(ins_nr); |
@@ -2810,14 +2856,18 @@ next_slot: | |||
2810 | btrfs_release_path(root, path); | 2856 | btrfs_release_path(root, path); |
2811 | btrfs_release_path(log, dst_path); | 2857 | btrfs_release_path(log, dst_path); |
2812 | ret = log_directory_changes(trans, root, inode, path, dst_path); | 2858 | ret = log_directory_changes(trans, root, inode, path, dst_path); |
2813 | BUG_ON(ret); | 2859 | if (ret) { |
2860 | err = ret; | ||
2861 | goto out_unlock; | ||
2862 | } | ||
2814 | } | 2863 | } |
2815 | BTRFS_I(inode)->logged_trans = trans->transid; | 2864 | BTRFS_I(inode)->logged_trans = trans->transid; |
2865 | out_unlock: | ||
2816 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2866 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2817 | 2867 | ||
2818 | btrfs_free_path(path); | 2868 | btrfs_free_path(path); |
2819 | btrfs_free_path(dst_path); | 2869 | btrfs_free_path(dst_path); |
2820 | return 0; | 2870 | return err; |
2821 | } | 2871 | } |
2822 | 2872 | ||
2823 | /* | 2873 | /* |
@@ -2942,10 +2992,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2942 | goto end_no_trans; | 2992 | goto end_no_trans; |
2943 | } | 2993 | } |
2944 | 2994 | ||
2945 | start_log_trans(trans, root); | 2995 | ret = start_log_trans(trans, root); |
2996 | if (ret) | ||
2997 | goto end_trans; | ||
2946 | 2998 | ||
2947 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 2999 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
2948 | BUG_ON(ret); | 3000 | if (ret) |
3001 | goto end_trans; | ||
2949 | 3002 | ||
2950 | /* | 3003 | /* |
2951 | * for regular files, if its inode is already on disk, we don't | 3004 | * for regular files, if its inode is already on disk, we don't |
@@ -2955,8 +3008,10 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2955 | */ | 3008 | */ |
2956 | if (S_ISREG(inode->i_mode) && | 3009 | if (S_ISREG(inode->i_mode) && |
2957 | BTRFS_I(inode)->generation <= last_committed && | 3010 | BTRFS_I(inode)->generation <= last_committed && |
2958 | BTRFS_I(inode)->last_unlink_trans <= last_committed) | 3011 | BTRFS_I(inode)->last_unlink_trans <= last_committed) { |
2959 | goto no_parent; | 3012 | ret = 0; |
3013 | goto end_trans; | ||
3014 | } | ||
2960 | 3015 | ||
2961 | inode_only = LOG_INODE_EXISTS; | 3016 | inode_only = LOG_INODE_EXISTS; |
2962 | while (1) { | 3017 | while (1) { |
@@ -2970,15 +3025,21 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2970 | if (BTRFS_I(inode)->generation > | 3025 | if (BTRFS_I(inode)->generation > |
2971 | root->fs_info->last_trans_committed) { | 3026 | root->fs_info->last_trans_committed) { |
2972 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 3027 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
2973 | BUG_ON(ret); | 3028 | if (ret) |
3029 | goto end_trans; | ||
2974 | } | 3030 | } |
2975 | if (IS_ROOT(parent)) | 3031 | if (IS_ROOT(parent)) |
2976 | break; | 3032 | break; |
2977 | 3033 | ||
2978 | parent = parent->d_parent; | 3034 | parent = parent->d_parent; |
2979 | } | 3035 | } |
2980 | no_parent: | ||
2981 | ret = 0; | 3036 | ret = 0; |
3037 | end_trans: | ||
3038 | if (ret < 0) { | ||
3039 | BUG_ON(ret != -ENOSPC); | ||
3040 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
3041 | ret = 1; | ||
3042 | } | ||
2982 | btrfs_end_log_trans(root); | 3043 | btrfs_end_log_trans(root); |
2983 | end_no_trans: | 3044 | end_no_trans: |
2984 | return ret; | 3045 | return ret; |
@@ -3020,7 +3081,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
3020 | path = btrfs_alloc_path(); | 3081 | path = btrfs_alloc_path(); |
3021 | BUG_ON(!path); | 3082 | BUG_ON(!path); |
3022 | 3083 | ||
3023 | trans = btrfs_start_transaction(fs_info->tree_root, 1); | 3084 | trans = btrfs_start_transaction(fs_info->tree_root, 0); |
3024 | 3085 | ||
3025 | wc.trans = trans; | 3086 | wc.trans = trans; |
3026 | wc.pin = 1; | 3087 | wc.pin = 1; |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 0776eacb5083..3dfae84c8cc8 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -25,6 +25,8 @@ | |||
25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
26 | struct btrfs_root *root); | 26 | struct btrfs_root *root); |
27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
28 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
29 | struct btrfs_fs_info *fs_info); | ||
28 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 30 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
29 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 31 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
30 | struct btrfs_root *root, struct dentry *dentry); | 32 | struct btrfs_root *root, struct dentry *dentry); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8db7b14bbae8..d6e3af8be95b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -1097,7 +1097,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, | |||
1097 | if (!path) | 1097 | if (!path) |
1098 | return -ENOMEM; | 1098 | return -ENOMEM; |
1099 | 1099 | ||
1100 | trans = btrfs_start_transaction(root, 1); | 1100 | trans = btrfs_start_transaction(root, 0); |
1101 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | 1101 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; |
1102 | key.type = BTRFS_DEV_ITEM_KEY; | 1102 | key.type = BTRFS_DEV_ITEM_KEY; |
1103 | key.offset = device->devid; | 1103 | key.offset = device->devid; |
@@ -1486,7 +1486,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1486 | goto error; | 1486 | goto error; |
1487 | } | 1487 | } |
1488 | 1488 | ||
1489 | trans = btrfs_start_transaction(root, 1); | 1489 | trans = btrfs_start_transaction(root, 0); |
1490 | lock_chunks(root); | 1490 | lock_chunks(root); |
1491 | 1491 | ||
1492 | device->barriers = 1; | 1492 | device->barriers = 1; |
@@ -1751,9 +1751,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1751 | 1751 | ||
1752 | /* step one, relocate all the extents inside this chunk */ | 1752 | /* step one, relocate all the extents inside this chunk */ |
1753 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | 1753 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); |
1754 | BUG_ON(ret); | 1754 | if (ret) |
1755 | return ret; | ||
1755 | 1756 | ||
1756 | trans = btrfs_start_transaction(root, 1); | 1757 | trans = btrfs_start_transaction(root, 0); |
1757 | BUG_ON(!trans); | 1758 | BUG_ON(!trans); |
1758 | 1759 | ||
1759 | lock_chunks(root); | 1760 | lock_chunks(root); |
@@ -1925,7 +1926,7 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1925 | break; | 1926 | break; |
1926 | BUG_ON(ret); | 1927 | BUG_ON(ret); |
1927 | 1928 | ||
1928 | trans = btrfs_start_transaction(dev_root, 1); | 1929 | trans = btrfs_start_transaction(dev_root, 0); |
1929 | BUG_ON(!trans); | 1930 | BUG_ON(!trans); |
1930 | 1931 | ||
1931 | ret = btrfs_grow_device(trans, device, old_size); | 1932 | ret = btrfs_grow_device(trans, device, old_size); |
@@ -2094,11 +2095,7 @@ again: | |||
2094 | } | 2095 | } |
2095 | 2096 | ||
2096 | /* Shrinking succeeded, else we would be at "done". */ | 2097 | /* Shrinking succeeded, else we would be at "done". */ |
2097 | trans = btrfs_start_transaction(root, 1); | 2098 | trans = btrfs_start_transaction(root, 0); |
2098 | if (!trans) { | ||
2099 | ret = -ENOMEM; | ||
2100 | goto done; | ||
2101 | } | ||
2102 | lock_chunks(root); | 2099 | lock_chunks(root); |
2103 | 2100 | ||
2104 | device->disk_total_bytes = new_size; | 2101 | device->disk_total_bytes = new_size; |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 59acd3eb288a..88ecbb215878 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -154,15 +154,10 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
154 | if (trans) | 154 | if (trans) |
155 | return do_setxattr(trans, inode, name, value, size, flags); | 155 | return do_setxattr(trans, inode, name, value, size, flags); |
156 | 156 | ||
157 | ret = btrfs_reserve_metadata_space(root, 2); | 157 | trans = btrfs_start_transaction(root, 2); |
158 | if (ret) | 158 | if (IS_ERR(trans)) |
159 | return ret; | 159 | return PTR_ERR(trans); |
160 | 160 | ||
161 | trans = btrfs_start_transaction(root, 1); | ||
162 | if (!trans) { | ||
163 | ret = -ENOMEM; | ||
164 | goto out; | ||
165 | } | ||
166 | btrfs_set_trans_block_group(trans, inode); | 161 | btrfs_set_trans_block_group(trans, inode); |
167 | 162 | ||
168 | ret = do_setxattr(trans, inode, name, value, size, flags); | 163 | ret = do_setxattr(trans, inode, name, value, size, flags); |
@@ -174,7 +169,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
174 | BUG_ON(ret); | 169 | BUG_ON(ret); |
175 | out: | 170 | out: |
176 | btrfs_end_transaction_throttle(trans, root); | 171 | btrfs_end_transaction_throttle(trans, root); |
177 | btrfs_unreserve_metadata_space(root, 2); | ||
178 | return ret; | 172 | return ret; |
179 | } | 173 | } |
180 | 174 | ||
diff --git a/fs/buffer.c b/fs/buffer.c index e8aa7081d25c..d54812b198e9 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1949,14 +1949,11 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
1949 | } | 1949 | } |
1950 | 1950 | ||
1951 | /* | 1951 | /* |
1952 | * block_write_begin takes care of the basic task of block allocation and | 1952 | * Filesystems implementing the new truncate sequence should use the |
1953 | * bringing partial write blocks uptodate first. | 1953 | * _newtrunc postfix variant which won't incorrectly call vmtruncate. |
1954 | * | 1954 | * The filesystem needs to handle block truncation upon failure. |
1955 | * If *pagep is not NULL, then block_write_begin uses the locked page | ||
1956 | * at *pagep rather than allocating its own. In this case, the page will | ||
1957 | * not be unlocked or deallocated on failure. | ||
1958 | */ | 1955 | */ |
1959 | int block_write_begin(struct file *file, struct address_space *mapping, | 1956 | int block_write_begin_newtrunc(struct file *file, struct address_space *mapping, |
1960 | loff_t pos, unsigned len, unsigned flags, | 1957 | loff_t pos, unsigned len, unsigned flags, |
1961 | struct page **pagep, void **fsdata, | 1958 | struct page **pagep, void **fsdata, |
1962 | get_block_t *get_block) | 1959 | get_block_t *get_block) |
@@ -1992,20 +1989,50 @@ int block_write_begin(struct file *file, struct address_space *mapping, | |||
1992 | unlock_page(page); | 1989 | unlock_page(page); |
1993 | page_cache_release(page); | 1990 | page_cache_release(page); |
1994 | *pagep = NULL; | 1991 | *pagep = NULL; |
1995 | |||
1996 | /* | ||
1997 | * prepare_write() may have instantiated a few blocks | ||
1998 | * outside i_size. Trim these off again. Don't need | ||
1999 | * i_size_read because we hold i_mutex. | ||
2000 | */ | ||
2001 | if (pos + len > inode->i_size) | ||
2002 | vmtruncate(inode, inode->i_size); | ||
2003 | } | 1992 | } |
2004 | } | 1993 | } |
2005 | 1994 | ||
2006 | out: | 1995 | out: |
2007 | return status; | 1996 | return status; |
2008 | } | 1997 | } |
1998 | EXPORT_SYMBOL(block_write_begin_newtrunc); | ||
1999 | |||
2000 | /* | ||
2001 | * block_write_begin takes care of the basic task of block allocation and | ||
2002 | * bringing partial write blocks uptodate first. | ||
2003 | * | ||
2004 | * If *pagep is not NULL, then block_write_begin uses the locked page | ||
2005 | * at *pagep rather than allocating its own. In this case, the page will | ||
2006 | * not be unlocked or deallocated on failure. | ||
2007 | */ | ||
2008 | int block_write_begin(struct file *file, struct address_space *mapping, | ||
2009 | loff_t pos, unsigned len, unsigned flags, | ||
2010 | struct page **pagep, void **fsdata, | ||
2011 | get_block_t *get_block) | ||
2012 | { | ||
2013 | int ret; | ||
2014 | |||
2015 | ret = block_write_begin_newtrunc(file, mapping, pos, len, flags, | ||
2016 | pagep, fsdata, get_block); | ||
2017 | |||
2018 | /* | ||
2019 | * prepare_write() may have instantiated a few blocks | ||
2020 | * outside i_size. Trim these off again. Don't need | ||
2021 | * i_size_read because we hold i_mutex. | ||
2022 | * | ||
2023 | * Filesystems which pass down their own page also cannot | ||
2024 | * call into vmtruncate here because it would lead to lock | ||
2025 | * inversion problems (*pagep is locked). This is a further | ||
2026 | * example of where the old truncate sequence is inadequate. | ||
2027 | */ | ||
2028 | if (unlikely(ret) && *pagep == NULL) { | ||
2029 | loff_t isize = mapping->host->i_size; | ||
2030 | if (pos + len > isize) | ||
2031 | vmtruncate(mapping->host, isize); | ||
2032 | } | ||
2033 | |||
2034 | return ret; | ||
2035 | } | ||
2009 | EXPORT_SYMBOL(block_write_begin); | 2036 | EXPORT_SYMBOL(block_write_begin); |
2010 | 2037 | ||
2011 | int block_write_end(struct file *file, struct address_space *mapping, | 2038 | int block_write_end(struct file *file, struct address_space *mapping, |
@@ -2324,7 +2351,7 @@ out: | |||
2324 | * For moronic filesystems that do not allow holes in file. | 2351 | * For moronic filesystems that do not allow holes in file. |
2325 | * We may have to extend the file. | 2352 | * We may have to extend the file. |
2326 | */ | 2353 | */ |
2327 | int cont_write_begin(struct file *file, struct address_space *mapping, | 2354 | int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, |
2328 | loff_t pos, unsigned len, unsigned flags, | 2355 | loff_t pos, unsigned len, unsigned flags, |
2329 | struct page **pagep, void **fsdata, | 2356 | struct page **pagep, void **fsdata, |
2330 | get_block_t *get_block, loff_t *bytes) | 2357 | get_block_t *get_block, loff_t *bytes) |
@@ -2345,11 +2372,30 @@ int cont_write_begin(struct file *file, struct address_space *mapping, | |||
2345 | } | 2372 | } |
2346 | 2373 | ||
2347 | *pagep = NULL; | 2374 | *pagep = NULL; |
2348 | err = block_write_begin(file, mapping, pos, len, | 2375 | err = block_write_begin_newtrunc(file, mapping, pos, len, |
2349 | flags, pagep, fsdata, get_block); | 2376 | flags, pagep, fsdata, get_block); |
2350 | out: | 2377 | out: |
2351 | return err; | 2378 | return err; |
2352 | } | 2379 | } |
2380 | EXPORT_SYMBOL(cont_write_begin_newtrunc); | ||
2381 | |||
2382 | int cont_write_begin(struct file *file, struct address_space *mapping, | ||
2383 | loff_t pos, unsigned len, unsigned flags, | ||
2384 | struct page **pagep, void **fsdata, | ||
2385 | get_block_t *get_block, loff_t *bytes) | ||
2386 | { | ||
2387 | int ret; | ||
2388 | |||
2389 | ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags, | ||
2390 | pagep, fsdata, get_block, bytes); | ||
2391 | if (unlikely(ret)) { | ||
2392 | loff_t isize = mapping->host->i_size; | ||
2393 | if (pos + len > isize) | ||
2394 | vmtruncate(mapping->host, isize); | ||
2395 | } | ||
2396 | |||
2397 | return ret; | ||
2398 | } | ||
2353 | EXPORT_SYMBOL(cont_write_begin); | 2399 | EXPORT_SYMBOL(cont_write_begin); |
2354 | 2400 | ||
2355 | int block_prepare_write(struct page *page, unsigned from, unsigned to, | 2401 | int block_prepare_write(struct page *page, unsigned from, unsigned to, |
@@ -2381,7 +2427,7 @@ EXPORT_SYMBOL(block_commit_write); | |||
2381 | * | 2427 | * |
2382 | * We are not allowed to take the i_mutex here so we have to play games to | 2428 | * We are not allowed to take the i_mutex here so we have to play games to |
2383 | * protect against truncate races as the page could now be beyond EOF. Because | 2429 | * protect against truncate races as the page could now be beyond EOF. Because |
2384 | * vmtruncate() writes the inode size before removing pages, once we have the | 2430 | * truncate writes the inode size before removing pages, once we have the |
2385 | * page lock we can determine safely if the page is beyond EOF. If it is not | 2431 | * page lock we can determine safely if the page is beyond EOF. If it is not |
2386 | * beyond EOF, then the page is guaranteed safe against truncation until we | 2432 | * beyond EOF, then the page is guaranteed safe against truncation until we |
2387 | * unlock the page. | 2433 | * unlock the page. |
@@ -2464,10 +2510,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head) | |||
2464 | } | 2510 | } |
2465 | 2511 | ||
2466 | /* | 2512 | /* |
2467 | * On entry, the page is fully not uptodate. | 2513 | * Filesystems implementing the new truncate sequence should use the |
2468 | * On exit the page is fully uptodate in the areas outside (from,to) | 2514 | * _newtrunc postfix variant which won't incorrectly call vmtruncate. |
2515 | * The filesystem needs to handle block truncation upon failure. | ||
2469 | */ | 2516 | */ |
2470 | int nobh_write_begin(struct file *file, struct address_space *mapping, | 2517 | int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping, |
2471 | loff_t pos, unsigned len, unsigned flags, | 2518 | loff_t pos, unsigned len, unsigned flags, |
2472 | struct page **pagep, void **fsdata, | 2519 | struct page **pagep, void **fsdata, |
2473 | get_block_t *get_block) | 2520 | get_block_t *get_block) |
@@ -2500,8 +2547,8 @@ int nobh_write_begin(struct file *file, struct address_space *mapping, | |||
2500 | unlock_page(page); | 2547 | unlock_page(page); |
2501 | page_cache_release(page); | 2548 | page_cache_release(page); |
2502 | *pagep = NULL; | 2549 | *pagep = NULL; |
2503 | return block_write_begin(file, mapping, pos, len, flags, pagep, | 2550 | return block_write_begin_newtrunc(file, mapping, pos, len, |
2504 | fsdata, get_block); | 2551 | flags, pagep, fsdata, get_block); |
2505 | } | 2552 | } |
2506 | 2553 | ||
2507 | if (PageMappedToDisk(page)) | 2554 | if (PageMappedToDisk(page)) |
@@ -2605,8 +2652,34 @@ out_release: | |||
2605 | page_cache_release(page); | 2652 | page_cache_release(page); |
2606 | *pagep = NULL; | 2653 | *pagep = NULL; |
2607 | 2654 | ||
2608 | if (pos + len > inode->i_size) | 2655 | return ret; |
2609 | vmtruncate(inode, inode->i_size); | 2656 | } |
2657 | EXPORT_SYMBOL(nobh_write_begin_newtrunc); | ||
2658 | |||
2659 | /* | ||
2660 | * On entry, the page is fully not uptodate. | ||
2661 | * On exit the page is fully uptodate in the areas outside (from,to) | ||
2662 | */ | ||
2663 | int nobh_write_begin(struct file *file, struct address_space *mapping, | ||
2664 | loff_t pos, unsigned len, unsigned flags, | ||
2665 | struct page **pagep, void **fsdata, | ||
2666 | get_block_t *get_block) | ||
2667 | { | ||
2668 | int ret; | ||
2669 | |||
2670 | ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, | ||
2671 | pagep, fsdata, get_block); | ||
2672 | |||
2673 | /* | ||
2674 | * prepare_write() may have instantiated a few blocks | ||
2675 | * outside i_size. Trim these off again. Don't need | ||
2676 | * i_size_read because we hold i_mutex. | ||
2677 | */ | ||
2678 | if (unlikely(ret)) { | ||
2679 | loff_t isize = mapping->host->i_size; | ||
2680 | if (pos + len > isize) | ||
2681 | vmtruncate(mapping->host, isize); | ||
2682 | } | ||
2610 | 2683 | ||
2611 | return ret; | 2684 | return ret; |
2612 | } | 2685 | } |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index a9005d862ed4..d9c60b84949a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -274,7 +274,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
274 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 274 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; |
275 | int rc = 0; | 275 | int rc = 0; |
276 | struct page **pages; | 276 | struct page **pages; |
277 | struct pagevec pvec; | ||
278 | loff_t offset; | 277 | loff_t offset; |
279 | u64 len; | 278 | u64 len; |
280 | 279 | ||
@@ -297,8 +296,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
297 | if (rc < 0) | 296 | if (rc < 0) |
298 | goto out; | 297 | goto out; |
299 | 298 | ||
300 | /* set uptodate and add to lru in pagevec-sized chunks */ | ||
301 | pagevec_init(&pvec, 0); | ||
302 | for (; !list_empty(page_list) && len > 0; | 299 | for (; !list_empty(page_list) && len > 0; |
303 | rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) { | 300 | rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) { |
304 | struct page *page = | 301 | struct page *page = |
@@ -312,7 +309,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
312 | zero_user_segment(page, s, PAGE_CACHE_SIZE); | 309 | zero_user_segment(page, s, PAGE_CACHE_SIZE); |
313 | } | 310 | } |
314 | 311 | ||
315 | if (add_to_page_cache(page, mapping, page->index, GFP_NOFS)) { | 312 | if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { |
316 | page_cache_release(page); | 313 | page_cache_release(page); |
317 | dout("readpages %p add_to_page_cache failed %p\n", | 314 | dout("readpages %p add_to_page_cache failed %p\n", |
318 | inode, page); | 315 | inode, page); |
@@ -323,10 +320,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
323 | flush_dcache_page(page); | 320 | flush_dcache_page(page); |
324 | SetPageUptodate(page); | 321 | SetPageUptodate(page); |
325 | unlock_page(page); | 322 | unlock_page(page); |
326 | if (pagevec_add(&pvec, page) == 0) | 323 | page_cache_release(page); |
327 | pagevec_lru_add_file(&pvec); /* add to lru */ | ||
328 | } | 324 | } |
329 | pagevec_lru_add_file(&pvec); | ||
330 | rc = 0; | 325 | rc = 0; |
331 | 326 | ||
332 | out: | 327 | out: |
@@ -568,7 +563,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
568 | ceph_release_pages(req->r_pages, req->r_num_pages); | 563 | ceph_release_pages(req->r_pages, req->r_num_pages); |
569 | if (req->r_pages_from_pool) | 564 | if (req->r_pages_from_pool) |
570 | mempool_free(req->r_pages, | 565 | mempool_free(req->r_pages, |
571 | ceph_client(inode->i_sb)->wb_pagevec_pool); | 566 | ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool); |
572 | else | 567 | else |
573 | kfree(req->r_pages); | 568 | kfree(req->r_pages); |
574 | ceph_osdc_put_request(req); | 569 | ceph_osdc_put_request(req); |
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 818afe72e6c7..89490beaf537 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c | |||
@@ -1,7 +1,6 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include "ceph_debug.h" |
2 | 2 | ||
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/slab.h> | ||
5 | #include <linux/err.h> | 4 | #include <linux/err.h> |
6 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
7 | 6 | ||
@@ -150,7 +149,8 @@ int ceph_build_auth_request(struct ceph_auth_client *ac, | |||
150 | 149 | ||
151 | ret = ac->ops->build_request(ac, p + sizeof(u32), end); | 150 | ret = ac->ops->build_request(ac, p + sizeof(u32), end); |
152 | if (ret < 0) { | 151 | if (ret < 0) { |
153 | pr_err("error %d building request\n", ret); | 152 | pr_err("error %d building auth method %s request\n", ret, |
153 | ac->ops->name); | ||
154 | return ret; | 154 | return ret; |
155 | } | 155 | } |
156 | dout(" built request %d bytes\n", ret); | 156 | dout(" built request %d bytes\n", ret); |
@@ -229,7 +229,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, | |||
229 | if (ret == -EAGAIN) { | 229 | if (ret == -EAGAIN) { |
230 | return ceph_build_auth_request(ac, reply_buf, reply_len); | 230 | return ceph_build_auth_request(ac, reply_buf, reply_len); |
231 | } else if (ret) { | 231 | } else if (ret) { |
232 | pr_err("authentication error %d\n", ret); | 232 | pr_err("auth method '%s' error %d\n", ac->ops->name, ret); |
233 | return ret; | 233 | return ret; |
234 | } | 234 | } |
235 | return 0; | 235 | return 0; |
@@ -246,7 +246,7 @@ int ceph_build_auth(struct ceph_auth_client *ac, | |||
246 | if (!ac->protocol) | 246 | if (!ac->protocol) |
247 | return ceph_auth_build_hello(ac, msg_buf, msg_len); | 247 | return ceph_auth_build_hello(ac, msg_buf, msg_len); |
248 | BUG_ON(!ac->ops); | 248 | BUG_ON(!ac->ops); |
249 | if (!ac->ops->is_authenticated(ac)) | 249 | if (ac->ops->should_authenticate(ac)) |
250 | return ceph_build_auth_request(ac, msg_buf, msg_len); | 250 | return ceph_build_auth_request(ac, msg_buf, msg_len); |
251 | return 0; | 251 | return 0; |
252 | } | 252 | } |
diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h index ca4f57cfb267..d38a2fb4a137 100644 --- a/fs/ceph/auth.h +++ b/fs/ceph/auth.h | |||
@@ -15,6 +15,8 @@ struct ceph_auth_client; | |||
15 | struct ceph_authorizer; | 15 | struct ceph_authorizer; |
16 | 16 | ||
17 | struct ceph_auth_client_ops { | 17 | struct ceph_auth_client_ops { |
18 | const char *name; | ||
19 | |||
18 | /* | 20 | /* |
19 | * true if we are authenticated and can connect to | 21 | * true if we are authenticated and can connect to |
20 | * services. | 22 | * services. |
@@ -22,6 +24,12 @@ struct ceph_auth_client_ops { | |||
22 | int (*is_authenticated)(struct ceph_auth_client *ac); | 24 | int (*is_authenticated)(struct ceph_auth_client *ac); |
23 | 25 | ||
24 | /* | 26 | /* |
27 | * true if we should (re)authenticate, e.g., when our tickets | ||
28 | * are getting old and crusty. | ||
29 | */ | ||
30 | int (*should_authenticate)(struct ceph_auth_client *ac); | ||
31 | |||
32 | /* | ||
25 | * build requests and process replies during monitor | 33 | * build requests and process replies during monitor |
26 | * handshake. if handle_reply returns -EAGAIN, we build | 34 | * handshake. if handle_reply returns -EAGAIN, we build |
27 | * another request. | 35 | * another request. |
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c index 8cd9e3af07f7..ad1dc21286c7 100644 --- a/fs/ceph/auth_none.c +++ b/fs/ceph/auth_none.c | |||
@@ -31,6 +31,13 @@ static int is_authenticated(struct ceph_auth_client *ac) | |||
31 | return !xi->starting; | 31 | return !xi->starting; |
32 | } | 32 | } |
33 | 33 | ||
34 | static int should_authenticate(struct ceph_auth_client *ac) | ||
35 | { | ||
36 | struct ceph_auth_none_info *xi = ac->private; | ||
37 | |||
38 | return xi->starting; | ||
39 | } | ||
40 | |||
34 | /* | 41 | /* |
35 | * the generic auth code decode the global_id, and we carry no actual | 42 | * the generic auth code decode the global_id, and we carry no actual |
36 | * authenticate state, so nothing happens here. | 43 | * authenticate state, so nothing happens here. |
@@ -94,9 +101,11 @@ static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac, | |||
94 | } | 101 | } |
95 | 102 | ||
96 | static const struct ceph_auth_client_ops ceph_auth_none_ops = { | 103 | static const struct ceph_auth_client_ops ceph_auth_none_ops = { |
104 | .name = "none", | ||
97 | .reset = reset, | 105 | .reset = reset, |
98 | .destroy = destroy, | 106 | .destroy = destroy, |
99 | .is_authenticated = is_authenticated, | 107 | .is_authenticated = is_authenticated, |
108 | .should_authenticate = should_authenticate, | ||
100 | .handle_reply = handle_reply, | 109 | .handle_reply = handle_reply, |
101 | .create_authorizer = ceph_auth_none_create_authorizer, | 110 | .create_authorizer = ceph_auth_none_create_authorizer, |
102 | .destroy_authorizer = ceph_auth_none_destroy_authorizer, | 111 | .destroy_authorizer = ceph_auth_none_destroy_authorizer, |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index fee5a08da881..83d4d2785ffe 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
@@ -27,6 +27,17 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac) | |||
27 | return (ac->want_keys & xi->have_keys) == ac->want_keys; | 27 | return (ac->want_keys & xi->have_keys) == ac->want_keys; |
28 | } | 28 | } |
29 | 29 | ||
30 | static int ceph_x_should_authenticate(struct ceph_auth_client *ac) | ||
31 | { | ||
32 | struct ceph_x_info *xi = ac->private; | ||
33 | int need; | ||
34 | |||
35 | ceph_x_validate_tickets(ac, &need); | ||
36 | dout("ceph_x_should_authenticate want=%d need=%d have=%d\n", | ||
37 | ac->want_keys, need, xi->have_keys); | ||
38 | return need != 0; | ||
39 | } | ||
40 | |||
30 | static int ceph_x_encrypt_buflen(int ilen) | 41 | static int ceph_x_encrypt_buflen(int ilen) |
31 | { | 42 | { |
32 | return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + | 43 | return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + |
@@ -127,7 +138,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
127 | int ret; | 138 | int ret; |
128 | char *dbuf; | 139 | char *dbuf; |
129 | char *ticket_buf; | 140 | char *ticket_buf; |
130 | u8 struct_v; | 141 | u8 reply_struct_v; |
131 | 142 | ||
132 | dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); | 143 | dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); |
133 | if (!dbuf) | 144 | if (!dbuf) |
@@ -139,14 +150,14 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
139 | goto out_dbuf; | 150 | goto out_dbuf; |
140 | 151 | ||
141 | ceph_decode_need(&p, end, 1 + sizeof(u32), bad); | 152 | ceph_decode_need(&p, end, 1 + sizeof(u32), bad); |
142 | struct_v = ceph_decode_8(&p); | 153 | reply_struct_v = ceph_decode_8(&p); |
143 | if (struct_v != 1) | 154 | if (reply_struct_v != 1) |
144 | goto bad; | 155 | goto bad; |
145 | num = ceph_decode_32(&p); | 156 | num = ceph_decode_32(&p); |
146 | dout("%d tickets\n", num); | 157 | dout("%d tickets\n", num); |
147 | while (num--) { | 158 | while (num--) { |
148 | int type; | 159 | int type; |
149 | u8 struct_v; | 160 | u8 tkt_struct_v, blob_struct_v; |
150 | struct ceph_x_ticket_handler *th; | 161 | struct ceph_x_ticket_handler *th; |
151 | void *dp, *dend; | 162 | void *dp, *dend; |
152 | int dlen; | 163 | int dlen; |
@@ -165,8 +176,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
165 | type = ceph_decode_32(&p); | 176 | type = ceph_decode_32(&p); |
166 | dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); | 177 | dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); |
167 | 178 | ||
168 | struct_v = ceph_decode_8(&p); | 179 | tkt_struct_v = ceph_decode_8(&p); |
169 | if (struct_v != 1) | 180 | if (tkt_struct_v != 1) |
170 | goto bad; | 181 | goto bad; |
171 | 182 | ||
172 | th = get_ticket_handler(ac, type); | 183 | th = get_ticket_handler(ac, type); |
@@ -186,8 +197,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
186 | dend = dbuf + dlen; | 197 | dend = dbuf + dlen; |
187 | dp = dbuf; | 198 | dp = dbuf; |
188 | 199 | ||
189 | struct_v = ceph_decode_8(&dp); | 200 | tkt_struct_v = ceph_decode_8(&dp); |
190 | if (struct_v != 1) | 201 | if (tkt_struct_v != 1) |
191 | goto bad; | 202 | goto bad; |
192 | 203 | ||
193 | memcpy(&old_key, &th->session_key, sizeof(old_key)); | 204 | memcpy(&old_key, &th->session_key, sizeof(old_key)); |
@@ -224,7 +235,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
224 | tpend = tp + dlen; | 235 | tpend = tp + dlen; |
225 | dout(" ticket blob is %d bytes\n", dlen); | 236 | dout(" ticket blob is %d bytes\n", dlen); |
226 | ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); | 237 | ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); |
227 | struct_v = ceph_decode_8(&tp); | 238 | blob_struct_v = ceph_decode_8(&tp); |
228 | new_secret_id = ceph_decode_64(&tp); | 239 | new_secret_id = ceph_decode_64(&tp); |
229 | ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); | 240 | ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); |
230 | if (ret) | 241 | if (ret) |
@@ -618,7 +629,9 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, | |||
618 | 629 | ||
619 | 630 | ||
620 | static const struct ceph_auth_client_ops ceph_x_ops = { | 631 | static const struct ceph_auth_client_ops ceph_x_ops = { |
632 | .name = "x", | ||
621 | .is_authenticated = ceph_x_is_authenticated, | 633 | .is_authenticated = ceph_x_is_authenticated, |
634 | .should_authenticate = ceph_x_should_authenticate, | ||
622 | .build_request = ceph_x_build_request, | 635 | .build_request = ceph_x_build_request, |
623 | .handle_reply = ceph_x_handle_reply, | 636 | .handle_reply = ceph_x_handle_reply, |
624 | .create_authorizer = ceph_x_create_authorizer, | 637 | .create_authorizer = ceph_x_create_authorizer, |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index d9400534b279..ae3e3a306445 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -867,7 +867,8 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
867 | { | 867 | { |
868 | struct ceph_mds_session *session = cap->session; | 868 | struct ceph_mds_session *session = cap->session; |
869 | struct ceph_inode_info *ci = cap->ci; | 869 | struct ceph_inode_info *ci = cap->ci; |
870 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 870 | struct ceph_mds_client *mdsc = |
871 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | ||
871 | int removed = 0; | 872 | int removed = 0; |
872 | 873 | ||
873 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); | 874 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); |
@@ -937,9 +938,9 @@ static int send_cap_msg(struct ceph_mds_session *session, | |||
937 | seq, issue_seq, mseq, follows, size, max_size, | 938 | seq, issue_seq, mseq, follows, size, max_size, |
938 | xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); | 939 | xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); |
939 | 940 | ||
940 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), 0, 0, NULL); | 941 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS); |
941 | if (IS_ERR(msg)) | 942 | if (!msg) |
942 | return PTR_ERR(msg); | 943 | return -ENOMEM; |
943 | 944 | ||
944 | msg->hdr.tid = cpu_to_le64(flush_tid); | 945 | msg->hdr.tid = cpu_to_le64(flush_tid); |
945 | 946 | ||
@@ -1298,7 +1299,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
1298 | */ | 1299 | */ |
1299 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | 1300 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) |
1300 | { | 1301 | { |
1301 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 1302 | struct ceph_mds_client *mdsc = |
1303 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | ||
1302 | struct inode *inode = &ci->vfs_inode; | 1304 | struct inode *inode = &ci->vfs_inode; |
1303 | int was = ci->i_dirty_caps; | 1305 | int was = ci->i_dirty_caps; |
1304 | int dirty = 0; | 1306 | int dirty = 0; |
@@ -1336,7 +1338,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1336 | static int __mark_caps_flushing(struct inode *inode, | 1338 | static int __mark_caps_flushing(struct inode *inode, |
1337 | struct ceph_mds_session *session) | 1339 | struct ceph_mds_session *session) |
1338 | { | 1340 | { |
1339 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 1341 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
1340 | struct ceph_inode_info *ci = ceph_inode(inode); | 1342 | struct ceph_inode_info *ci = ceph_inode(inode); |
1341 | int flushing; | 1343 | int flushing; |
1342 | 1344 | ||
@@ -1663,7 +1665,7 @@ ack: | |||
1663 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, | 1665 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, |
1664 | unsigned *flush_tid) | 1666 | unsigned *flush_tid) |
1665 | { | 1667 | { |
1666 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 1668 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
1667 | struct ceph_inode_info *ci = ceph_inode(inode); | 1669 | struct ceph_inode_info *ci = ceph_inode(inode); |
1668 | int unlock_session = session ? 0 : 1; | 1670 | int unlock_session = session ? 0 : 1; |
1669 | int flushing = 0; | 1671 | int flushing = 0; |
@@ -1716,10 +1718,9 @@ out_unlocked: | |||
1716 | static int caps_are_flushed(struct inode *inode, unsigned tid) | 1718 | static int caps_are_flushed(struct inode *inode, unsigned tid) |
1717 | { | 1719 | { |
1718 | struct ceph_inode_info *ci = ceph_inode(inode); | 1720 | struct ceph_inode_info *ci = ceph_inode(inode); |
1719 | int dirty, i, ret = 1; | 1721 | int i, ret = 1; |
1720 | 1722 | ||
1721 | spin_lock(&inode->i_lock); | 1723 | spin_lock(&inode->i_lock); |
1722 | dirty = __ceph_caps_dirty(ci); | ||
1723 | for (i = 0; i < CEPH_CAP_BITS; i++) | 1724 | for (i = 0; i < CEPH_CAP_BITS; i++) |
1724 | if ((ci->i_flushing_caps & (1 << i)) && | 1725 | if ((ci->i_flushing_caps & (1 << i)) && |
1725 | ci->i_cap_flush_tid[i] <= tid) { | 1726 | ci->i_cap_flush_tid[i] <= tid) { |
@@ -1775,9 +1776,9 @@ out: | |||
1775 | spin_unlock(&ci->i_unsafe_lock); | 1776 | spin_unlock(&ci->i_unsafe_lock); |
1776 | } | 1777 | } |
1777 | 1778 | ||
1778 | int ceph_fsync(struct file *file, struct dentry *dentry, int datasync) | 1779 | int ceph_fsync(struct file *file, int datasync) |
1779 | { | 1780 | { |
1780 | struct inode *inode = dentry->d_inode; | 1781 | struct inode *inode = file->f_mapping->host; |
1781 | struct ceph_inode_info *ci = ceph_inode(inode); | 1782 | struct ceph_inode_info *ci = ceph_inode(inode); |
1782 | unsigned flush_tid; | 1783 | unsigned flush_tid; |
1783 | int ret; | 1784 | int ret; |
@@ -1829,7 +1830,8 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1829 | err = wait_event_interruptible(ci->i_cap_wq, | 1830 | err = wait_event_interruptible(ci->i_cap_wq, |
1830 | caps_are_flushed(inode, flush_tid)); | 1831 | caps_are_flushed(inode, flush_tid)); |
1831 | } else { | 1832 | } else { |
1832 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 1833 | struct ceph_mds_client *mdsc = |
1834 | &ceph_sb_to_client(inode->i_sb)->mdsc; | ||
1833 | 1835 | ||
1834 | spin_lock(&inode->i_lock); | 1836 | spin_lock(&inode->i_lock); |
1835 | if (__ceph_caps_dirty(ci)) | 1837 | if (__ceph_caps_dirty(ci)) |
@@ -2411,7 +2413,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
2411 | __releases(inode->i_lock) | 2413 | __releases(inode->i_lock) |
2412 | { | 2414 | { |
2413 | struct ceph_inode_info *ci = ceph_inode(inode); | 2415 | struct ceph_inode_info *ci = ceph_inode(inode); |
2414 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 2416 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
2415 | unsigned seq = le32_to_cpu(m->seq); | 2417 | unsigned seq = le32_to_cpu(m->seq); |
2416 | int dirty = le32_to_cpu(m->dirty); | 2418 | int dirty = le32_to_cpu(m->dirty); |
2417 | int cleaned = 0; | 2419 | int cleaned = 0; |
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 0c2241ef3653..2fa992eaf7da 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h | |||
@@ -19,7 +19,7 @@ | |||
19 | * Ceph release version | 19 | * Ceph release version |
20 | */ | 20 | */ |
21 | #define CEPH_VERSION_MAJOR 0 | 21 | #define CEPH_VERSION_MAJOR 0 |
22 | #define CEPH_VERSION_MINOR 19 | 22 | #define CEPH_VERSION_MINOR 20 |
23 | #define CEPH_VERSION_PATCH 0 | 23 | #define CEPH_VERSION_PATCH 0 |
24 | 24 | ||
25 | #define _CEPH_STRINGIFY(x) #x | 25 | #define _CEPH_STRINGIFY(x) #x |
@@ -36,7 +36,7 @@ | |||
36 | * client-facing protocol. | 36 | * client-facing protocol. |
37 | */ | 37 | */ |
38 | #define CEPH_OSD_PROTOCOL 8 /* cluster internal */ | 38 | #define CEPH_OSD_PROTOCOL 8 /* cluster internal */ |
39 | #define CEPH_MDS_PROTOCOL 9 /* cluster internal */ | 39 | #define CEPH_MDS_PROTOCOL 12 /* cluster internal */ |
40 | #define CEPH_MON_PROTOCOL 5 /* cluster internal */ | 40 | #define CEPH_MON_PROTOCOL 5 /* cluster internal */ |
41 | #define CEPH_OSDC_PROTOCOL 24 /* server/client */ | 41 | #define CEPH_OSDC_PROTOCOL 24 /* server/client */ |
42 | #define CEPH_MDSC_PROTOCOL 32 /* server/client */ | 42 | #define CEPH_MDSC_PROTOCOL 32 /* server/client */ |
@@ -53,8 +53,18 @@ | |||
53 | /* | 53 | /* |
54 | * feature bits | 54 | * feature bits |
55 | */ | 55 | */ |
56 | #define CEPH_FEATURE_SUPPORTED 0 | 56 | #define CEPH_FEATURE_UID 1 |
57 | #define CEPH_FEATURE_REQUIRED 0 | 57 | #define CEPH_FEATURE_NOSRCADDR 2 |
58 | #define CEPH_FEATURE_FLOCK 4 | ||
59 | |||
60 | #define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
61 | #define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID | ||
62 | #define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK | ||
63 | #define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID | ||
64 | #define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
65 | #define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID | ||
66 | #define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
67 | #define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
58 | 68 | ||
59 | 69 | ||
60 | /* | 70 | /* |
@@ -91,6 +101,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | |||
91 | #define CEPH_AUTH_NONE 0x1 | 101 | #define CEPH_AUTH_NONE 0x1 |
92 | #define CEPH_AUTH_CEPHX 0x2 | 102 | #define CEPH_AUTH_CEPHX 0x2 |
93 | 103 | ||
104 | #define CEPH_AUTH_UID_DEFAULT ((__u64) -1) | ||
105 | |||
94 | 106 | ||
95 | /********************************************* | 107 | /********************************************* |
96 | * message layer | 108 | * message layer |
@@ -128,11 +140,27 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | |||
128 | #define CEPH_MSG_CLIENT_SNAP 0x312 | 140 | #define CEPH_MSG_CLIENT_SNAP 0x312 |
129 | #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 | 141 | #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 |
130 | 142 | ||
143 | /* pool ops */ | ||
144 | #define CEPH_MSG_POOLOP_REPLY 48 | ||
145 | #define CEPH_MSG_POOLOP 49 | ||
146 | |||
147 | |||
131 | /* osd */ | 148 | /* osd */ |
132 | #define CEPH_MSG_OSD_MAP 41 | 149 | #define CEPH_MSG_OSD_MAP 41 |
133 | #define CEPH_MSG_OSD_OP 42 | 150 | #define CEPH_MSG_OSD_OP 42 |
134 | #define CEPH_MSG_OSD_OPREPLY 43 | 151 | #define CEPH_MSG_OSD_OPREPLY 43 |
135 | 152 | ||
153 | /* pool operations */ | ||
154 | enum { | ||
155 | POOL_OP_CREATE = 0x01, | ||
156 | POOL_OP_DELETE = 0x02, | ||
157 | POOL_OP_AUID_CHANGE = 0x03, | ||
158 | POOL_OP_CREATE_SNAP = 0x11, | ||
159 | POOL_OP_DELETE_SNAP = 0x12, | ||
160 | POOL_OP_CREATE_UNMANAGED_SNAP = 0x21, | ||
161 | POOL_OP_DELETE_UNMANAGED_SNAP = 0x22, | ||
162 | }; | ||
163 | |||
136 | struct ceph_mon_request_header { | 164 | struct ceph_mon_request_header { |
137 | __le64 have_version; | 165 | __le64 have_version; |
138 | __le16 session_mon; | 166 | __le16 session_mon; |
@@ -155,6 +183,31 @@ struct ceph_mon_statfs_reply { | |||
155 | struct ceph_statfs st; | 183 | struct ceph_statfs st; |
156 | } __attribute__ ((packed)); | 184 | } __attribute__ ((packed)); |
157 | 185 | ||
186 | const char *ceph_pool_op_name(int op); | ||
187 | |||
188 | struct ceph_mon_poolop { | ||
189 | struct ceph_mon_request_header monhdr; | ||
190 | struct ceph_fsid fsid; | ||
191 | __le32 pool; | ||
192 | __le32 op; | ||
193 | __le64 auid; | ||
194 | __le64 snapid; | ||
195 | __le32 name_len; | ||
196 | } __attribute__ ((packed)); | ||
197 | |||
198 | struct ceph_mon_poolop_reply { | ||
199 | struct ceph_mon_request_header monhdr; | ||
200 | struct ceph_fsid fsid; | ||
201 | __le32 reply_code; | ||
202 | __le32 epoch; | ||
203 | char has_data; | ||
204 | char data[0]; | ||
205 | } __attribute__ ((packed)); | ||
206 | |||
207 | struct ceph_mon_unmanaged_snap { | ||
208 | __le64 snapid; | ||
209 | } __attribute__ ((packed)); | ||
210 | |||
158 | struct ceph_osd_getmap { | 211 | struct ceph_osd_getmap { |
159 | struct ceph_mon_request_header monhdr; | 212 | struct ceph_mon_request_header monhdr; |
160 | struct ceph_fsid fsid; | 213 | struct ceph_fsid fsid; |
@@ -212,16 +265,17 @@ extern const char *ceph_mds_state_name(int s); | |||
212 | * - they also define the lock ordering by the MDS | 265 | * - they also define the lock ordering by the MDS |
213 | * - a few of these are internal to the mds | 266 | * - a few of these are internal to the mds |
214 | */ | 267 | */ |
215 | #define CEPH_LOCK_DN 1 | 268 | #define CEPH_LOCK_DVERSION 1 |
216 | #define CEPH_LOCK_ISNAP 2 | 269 | #define CEPH_LOCK_DN 2 |
217 | #define CEPH_LOCK_IVERSION 4 /* mds internal */ | 270 | #define CEPH_LOCK_ISNAP 16 |
218 | #define CEPH_LOCK_IFILE 8 /* mds internal */ | 271 | #define CEPH_LOCK_IVERSION 32 /* mds internal */ |
219 | #define CEPH_LOCK_IAUTH 32 | 272 | #define CEPH_LOCK_IFILE 64 |
220 | #define CEPH_LOCK_ILINK 64 | 273 | #define CEPH_LOCK_IAUTH 128 |
221 | #define CEPH_LOCK_IDFT 128 /* dir frag tree */ | 274 | #define CEPH_LOCK_ILINK 256 |
222 | #define CEPH_LOCK_INEST 256 /* mds internal */ | 275 | #define CEPH_LOCK_IDFT 512 /* dir frag tree */ |
223 | #define CEPH_LOCK_IXATTR 512 | 276 | #define CEPH_LOCK_INEST 1024 /* mds internal */ |
224 | #define CEPH_LOCK_INO 2048 /* immutable inode bits; not a lock */ | 277 | #define CEPH_LOCK_IXATTR 2048 |
278 | #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ | ||
225 | 279 | ||
226 | /* client_session ops */ | 280 | /* client_session ops */ |
227 | enum { | 281 | enum { |
@@ -308,6 +362,7 @@ union ceph_mds_request_args { | |||
308 | struct { | 362 | struct { |
309 | __le32 frag; /* which dir fragment */ | 363 | __le32 frag; /* which dir fragment */ |
310 | __le32 max_entries; /* how many dentries to grab */ | 364 | __le32 max_entries; /* how many dentries to grab */ |
365 | __le32 max_bytes; | ||
311 | } __attribute__ ((packed)) readdir; | 366 | } __attribute__ ((packed)) readdir; |
312 | struct { | 367 | struct { |
313 | __le32 mode; | 368 | __le32 mode; |
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c index 8e4be6a80c62..7503aee828ce 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/ceph_strings.c | |||
@@ -10,7 +10,6 @@ const char *ceph_entity_type_name(int type) | |||
10 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | 10 | case CEPH_ENTITY_TYPE_OSD: return "osd"; |
11 | case CEPH_ENTITY_TYPE_MON: return "mon"; | 11 | case CEPH_ENTITY_TYPE_MON: return "mon"; |
12 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | 12 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; |
13 | case CEPH_ENTITY_TYPE_ADMIN: return "admin"; | ||
14 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; | 13 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; |
15 | default: return "unknown"; | 14 | default: return "unknown"; |
16 | } | 15 | } |
@@ -45,6 +44,7 @@ const char *ceph_osd_op_name(int op) | |||
45 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | 44 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; |
46 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | 45 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; |
47 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | 46 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; |
47 | case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; | ||
48 | 48 | ||
49 | case CEPH_OSD_OP_PULL: return "pull"; | 49 | case CEPH_OSD_OP_PULL: return "pull"; |
50 | case CEPH_OSD_OP_PUSH: return "push"; | 50 | case CEPH_OSD_OP_PUSH: return "push"; |
@@ -174,3 +174,17 @@ const char *ceph_snap_op_name(int o) | |||
174 | } | 174 | } |
175 | return "???"; | 175 | return "???"; |
176 | } | 176 | } |
177 | |||
178 | const char *ceph_pool_op_name(int op) | ||
179 | { | ||
180 | switch (op) { | ||
181 | case POOL_OP_CREATE: return "create"; | ||
182 | case POOL_OP_DELETE: return "delete"; | ||
183 | case POOL_OP_AUID_CHANGE: return "auid change"; | ||
184 | case POOL_OP_CREATE_SNAP: return "create snap"; | ||
185 | case POOL_OP_DELETE_SNAP: return "delete snap"; | ||
186 | case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; | ||
187 | case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; | ||
188 | } | ||
189 | return "???"; | ||
190 | } | ||
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f7048da92acc..3be33fb066cc 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -113,7 +113,7 @@ static int osdmap_show(struct seq_file *s, void *p) | |||
113 | static int monc_show(struct seq_file *s, void *p) | 113 | static int monc_show(struct seq_file *s, void *p) |
114 | { | 114 | { |
115 | struct ceph_client *client = s->private; | 115 | struct ceph_client *client = s->private; |
116 | struct ceph_mon_statfs_request *req; | 116 | struct ceph_mon_generic_request *req; |
117 | struct ceph_mon_client *monc = &client->monc; | 117 | struct ceph_mon_client *monc = &client->monc; |
118 | struct rb_node *rp; | 118 | struct rb_node *rp; |
119 | 119 | ||
@@ -126,9 +126,14 @@ static int monc_show(struct seq_file *s, void *p) | |||
126 | if (monc->want_next_osdmap) | 126 | if (monc->want_next_osdmap) |
127 | seq_printf(s, "want next osdmap\n"); | 127 | seq_printf(s, "want next osdmap\n"); |
128 | 128 | ||
129 | for (rp = rb_first(&monc->statfs_request_tree); rp; rp = rb_next(rp)) { | 129 | for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { |
130 | req = rb_entry(rp, struct ceph_mon_statfs_request, node); | 130 | __u16 op; |
131 | seq_printf(s, "%lld statfs\n", req->tid); | 131 | req = rb_entry(rp, struct ceph_mon_generic_request, node); |
132 | op = le16_to_cpu(req->request->hdr.type); | ||
133 | if (op == CEPH_MSG_STATFS) | ||
134 | seq_printf(s, "%lld statfs\n", req->tid); | ||
135 | else | ||
136 | seq_printf(s, "%lld unknown\n", req->tid); | ||
132 | } | 137 | } |
133 | 138 | ||
134 | mutex_unlock(&monc->mutex); | 139 | mutex_unlock(&monc->mutex); |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 650d2db5ed26..f85719310db2 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -51,8 +51,11 @@ int ceph_init_dentry(struct dentry *dentry) | |||
51 | return -ENOMEM; /* oh well */ | 51 | return -ENOMEM; /* oh well */ |
52 | 52 | ||
53 | spin_lock(&dentry->d_lock); | 53 | spin_lock(&dentry->d_lock); |
54 | if (dentry->d_fsdata) /* lost a race */ | 54 | if (dentry->d_fsdata) { |
55 | /* lost a race */ | ||
56 | kmem_cache_free(ceph_dentry_cachep, di); | ||
55 | goto out_unlock; | 57 | goto out_unlock; |
58 | } | ||
56 | di->dentry = dentry; | 59 | di->dentry = dentry; |
57 | di->lease_session = NULL; | 60 | di->lease_session = NULL; |
58 | dentry->d_fsdata = di; | 61 | dentry->d_fsdata = di; |
@@ -125,7 +128,8 @@ more: | |||
125 | dentry = list_entry(p, struct dentry, d_u.d_child); | 128 | dentry = list_entry(p, struct dentry, d_u.d_child); |
126 | di = ceph_dentry(dentry); | 129 | di = ceph_dentry(dentry); |
127 | while (1) { | 130 | while (1) { |
128 | dout(" p %p/%p d_subdirs %p/%p\n", p->prev, p->next, | 131 | dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, |
132 | d_unhashed(dentry) ? "!hashed" : "hashed", | ||
129 | parent->d_subdirs.prev, parent->d_subdirs.next); | 133 | parent->d_subdirs.prev, parent->d_subdirs.next); |
130 | if (p == &parent->d_subdirs) { | 134 | if (p == &parent->d_subdirs) { |
131 | fi->at_end = 1; | 135 | fi->at_end = 1; |
@@ -229,6 +233,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
229 | u32 ftype; | 233 | u32 ftype; |
230 | struct ceph_mds_reply_info_parsed *rinfo; | 234 | struct ceph_mds_reply_info_parsed *rinfo; |
231 | const int max_entries = client->mount_args->max_readdir; | 235 | const int max_entries = client->mount_args->max_readdir; |
236 | const int max_bytes = client->mount_args->max_readdir_bytes; | ||
232 | 237 | ||
233 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); | 238 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); |
234 | if (fi->at_end) | 239 | if (fi->at_end) |
@@ -312,6 +317,7 @@ more: | |||
312 | req->r_readdir_offset = fi->next_offset; | 317 | req->r_readdir_offset = fi->next_offset; |
313 | req->r_args.readdir.frag = cpu_to_le32(frag); | 318 | req->r_args.readdir.frag = cpu_to_le32(frag); |
314 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); | 319 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); |
320 | req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes); | ||
315 | req->r_num_caps = max_entries + 1; | 321 | req->r_num_caps = max_entries + 1; |
316 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 322 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
317 | if (err < 0) { | 323 | if (err < 0) { |
@@ -335,7 +341,7 @@ more: | |||
335 | if (req->r_reply_info.dir_end) { | 341 | if (req->r_reply_info.dir_end) { |
336 | kfree(fi->last_name); | 342 | kfree(fi->last_name); |
337 | fi->last_name = NULL; | 343 | fi->last_name = NULL; |
338 | fi->next_offset = 0; | 344 | fi->next_offset = 2; |
339 | } else { | 345 | } else { |
340 | rinfo = &req->r_reply_info; | 346 | rinfo = &req->r_reply_info; |
341 | err = note_last_dentry(fi, | 347 | err = note_last_dentry(fi, |
@@ -478,7 +484,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) | |||
478 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | 484 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, |
479 | struct dentry *dentry, int err) | 485 | struct dentry *dentry, int err) |
480 | { | 486 | { |
481 | struct ceph_client *client = ceph_client(dentry->d_sb); | 487 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); |
482 | struct inode *parent = dentry->d_parent->d_inode; | 488 | struct inode *parent = dentry->d_parent->d_inode; |
483 | 489 | ||
484 | /* .snap dir? */ | 490 | /* .snap dir? */ |
@@ -568,7 +574,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | |||
568 | !is_root_ceph_dentry(dir, dentry) && | 574 | !is_root_ceph_dentry(dir, dentry) && |
569 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 575 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && |
570 | (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { | 576 | (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { |
571 | di->offset = ci->i_max_offset++; | ||
572 | spin_unlock(&dir->i_lock); | 577 | spin_unlock(&dir->i_lock); |
573 | dout(" dir %p complete, -ENOENT\n", dir); | 578 | dout(" dir %p complete, -ENOENT\n", dir); |
574 | d_add(dentry, NULL); | 579 | d_add(dentry, NULL); |
@@ -582,7 +587,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | |||
582 | CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; | 587 | CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; |
583 | req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); | 588 | req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); |
584 | if (IS_ERR(req)) | 589 | if (IS_ERR(req)) |
585 | return ERR_PTR(PTR_ERR(req)); | 590 | return ERR_CAST(req); |
586 | req->r_dentry = dget(dentry); | 591 | req->r_dentry = dget(dentry); |
587 | req->r_num_caps = 2; | 592 | req->r_num_caps = 2; |
588 | /* we only need inode linkage */ | 593 | /* we only need inode linkage */ |
@@ -888,13 +893,22 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
888 | 893 | ||
889 | /* ensure target dentry is invalidated, despite | 894 | /* ensure target dentry is invalidated, despite |
890 | rehashing bug in vfs_rename_dir */ | 895 | rehashing bug in vfs_rename_dir */ |
891 | new_dentry->d_time = jiffies; | 896 | ceph_invalidate_dentry_lease(new_dentry); |
892 | ceph_dentry(new_dentry)->lease_shared_gen = 0; | ||
893 | } | 897 | } |
894 | ceph_mdsc_put_request(req); | 898 | ceph_mdsc_put_request(req); |
895 | return err; | 899 | return err; |
896 | } | 900 | } |
897 | 901 | ||
902 | /* | ||
903 | * Ensure a dentry lease will no longer revalidate. | ||
904 | */ | ||
905 | void ceph_invalidate_dentry_lease(struct dentry *dentry) | ||
906 | { | ||
907 | spin_lock(&dentry->d_lock); | ||
908 | dentry->d_time = jiffies; | ||
909 | ceph_dentry(dentry)->lease_shared_gen = 0; | ||
910 | spin_unlock(&dentry->d_lock); | ||
911 | } | ||
898 | 912 | ||
899 | /* | 913 | /* |
900 | * Check if dentry lease is valid. If not, delete the lease. Try to | 914 | * Check if dentry lease is valid. If not, delete the lease. Try to |
@@ -972,8 +986,9 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
972 | { | 986 | { |
973 | struct inode *dir = dentry->d_parent->d_inode; | 987 | struct inode *dir = dentry->d_parent->d_inode; |
974 | 988 | ||
975 | dout("d_revalidate %p '%.*s' inode %p\n", dentry, | 989 | dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, |
976 | dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | 990 | dentry->d_name.len, dentry->d_name.name, dentry->d_inode, |
991 | ceph_dentry(dentry)->offset); | ||
977 | 992 | ||
978 | /* always trust cached snapped dentries, snapdir dentry */ | 993 | /* always trust cached snapped dentries, snapdir dentry */ |
979 | if (ceph_snap(dir) != CEPH_NOSNAP) { | 994 | if (ceph_snap(dir) != CEPH_NOSNAP) { |
@@ -1050,7 +1065,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, | |||
1050 | struct ceph_inode_info *ci = ceph_inode(inode); | 1065 | struct ceph_inode_info *ci = ceph_inode(inode); |
1051 | int left; | 1066 | int left; |
1052 | 1067 | ||
1053 | if (!ceph_test_opt(ceph_client(inode->i_sb), DIRSTAT)) | 1068 | if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) |
1054 | return -EISDIR; | 1069 | return -EISDIR; |
1055 | 1070 | ||
1056 | if (!cf->dir_info) { | 1071 | if (!cf->dir_info) { |
@@ -1092,10 +1107,9 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, | |||
1092 | * an fsync() on a dir will wait for any uncommitted directory | 1107 | * an fsync() on a dir will wait for any uncommitted directory |
1093 | * operations to commit. | 1108 | * operations to commit. |
1094 | */ | 1109 | */ |
1095 | static int ceph_dir_fsync(struct file *file, struct dentry *dentry, | 1110 | static int ceph_dir_fsync(struct file *file, int datasync) |
1096 | int datasync) | ||
1097 | { | 1111 | { |
1098 | struct inode *inode = dentry->d_inode; | 1112 | struct inode *inode = file->f_path.dentry->d_inode; |
1099 | struct ceph_inode_info *ci = ceph_inode(inode); | 1113 | struct ceph_inode_info *ci = ceph_inode(inode); |
1100 | struct list_head *head = &ci->i_unsafe_dirops; | 1114 | struct list_head *head = &ci->i_unsafe_dirops; |
1101 | struct ceph_mds_request *req; | 1115 | struct ceph_mds_request *req; |
@@ -1152,7 +1166,7 @@ void ceph_dentry_lru_add(struct dentry *dn) | |||
1152 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, | 1166 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, |
1153 | dn->d_name.len, dn->d_name.name); | 1167 | dn->d_name.len, dn->d_name.name); |
1154 | if (di) { | 1168 | if (di) { |
1155 | mdsc = &ceph_client(dn->d_sb)->mdsc; | 1169 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; |
1156 | spin_lock(&mdsc->dentry_lru_lock); | 1170 | spin_lock(&mdsc->dentry_lru_lock); |
1157 | list_add_tail(&di->lru, &mdsc->dentry_lru); | 1171 | list_add_tail(&di->lru, &mdsc->dentry_lru); |
1158 | mdsc->num_dentry++; | 1172 | mdsc->num_dentry++; |
@@ -1165,10 +1179,10 @@ void ceph_dentry_lru_touch(struct dentry *dn) | |||
1165 | struct ceph_dentry_info *di = ceph_dentry(dn); | 1179 | struct ceph_dentry_info *di = ceph_dentry(dn); |
1166 | struct ceph_mds_client *mdsc; | 1180 | struct ceph_mds_client *mdsc; |
1167 | 1181 | ||
1168 | dout("dentry_lru_touch %p %p '%.*s'\n", di, dn, | 1182 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, |
1169 | dn->d_name.len, dn->d_name.name); | 1183 | dn->d_name.len, dn->d_name.name, di->offset); |
1170 | if (di) { | 1184 | if (di) { |
1171 | mdsc = &ceph_client(dn->d_sb)->mdsc; | 1185 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; |
1172 | spin_lock(&mdsc->dentry_lru_lock); | 1186 | spin_lock(&mdsc->dentry_lru_lock); |
1173 | list_move_tail(&di->lru, &mdsc->dentry_lru); | 1187 | list_move_tail(&di->lru, &mdsc->dentry_lru); |
1174 | spin_unlock(&mdsc->dentry_lru_lock); | 1188 | spin_unlock(&mdsc->dentry_lru_lock); |
@@ -1183,7 +1197,7 @@ void ceph_dentry_lru_del(struct dentry *dn) | |||
1183 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, | 1197 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, |
1184 | dn->d_name.len, dn->d_name.name); | 1198 | dn->d_name.len, dn->d_name.name); |
1185 | if (di) { | 1199 | if (di) { |
1186 | mdsc = &ceph_client(dn->d_sb)->mdsc; | 1200 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; |
1187 | spin_lock(&mdsc->dentry_lru_lock); | 1201 | spin_lock(&mdsc->dentry_lru_lock); |
1188 | list_del_init(&di->lru); | 1202 | list_del_init(&di->lru); |
1189 | mdsc->num_dentry--; | 1203 | mdsc->num_dentry--; |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 9d67572fb328..4480cb1c63e7 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -93,11 +93,11 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
93 | return ERR_PTR(-ESTALE); | 93 | return ERR_PTR(-ESTALE); |
94 | 94 | ||
95 | dentry = d_obtain_alias(inode); | 95 | dentry = d_obtain_alias(inode); |
96 | if (!dentry) { | 96 | if (IS_ERR(dentry)) { |
97 | pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n", | 97 | pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n", |
98 | fh->ino, inode); | 98 | fh->ino, inode); |
99 | iput(inode); | 99 | iput(inode); |
100 | return ERR_PTR(-ENOMEM); | 100 | return dentry; |
101 | } | 101 | } |
102 | err = ceph_init_dentry(dentry); | 102 | err = ceph_init_dentry(dentry); |
103 | 103 | ||
@@ -115,7 +115,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
115 | static struct dentry *__cfh_to_dentry(struct super_block *sb, | 115 | static struct dentry *__cfh_to_dentry(struct super_block *sb, |
116 | struct ceph_nfs_confh *cfh) | 116 | struct ceph_nfs_confh *cfh) |
117 | { | 117 | { |
118 | struct ceph_mds_client *mdsc = &ceph_client(sb)->mdsc; | 118 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc; |
119 | struct inode *inode; | 119 | struct inode *inode; |
120 | struct dentry *dentry; | 120 | struct dentry *dentry; |
121 | struct ceph_vino vino; | 121 | struct ceph_vino vino; |
@@ -133,7 +133,7 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, | |||
133 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH, | 133 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH, |
134 | USE_ANY_MDS); | 134 | USE_ANY_MDS); |
135 | if (IS_ERR(req)) | 135 | if (IS_ERR(req)) |
136 | return ERR_PTR(PTR_ERR(req)); | 136 | return ERR_CAST(req); |
137 | 137 | ||
138 | req->r_ino1 = vino; | 138 | req->r_ino1 = vino; |
139 | req->r_ino2.ino = cfh->parent_ino; | 139 | req->r_ino2.ino = cfh->parent_ino; |
@@ -149,11 +149,11 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, | |||
149 | } | 149 | } |
150 | 150 | ||
151 | dentry = d_obtain_alias(inode); | 151 | dentry = d_obtain_alias(inode); |
152 | if (!dentry) { | 152 | if (IS_ERR(dentry)) { |
153 | pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n", | 153 | pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n", |
154 | cfh->ino, inode); | 154 | cfh->ino, inode); |
155 | iput(inode); | 155 | iput(inode); |
156 | return ERR_PTR(-ENOMEM); | 156 | return dentry; |
157 | } | 157 | } |
158 | err = ceph_init_dentry(dentry); | 158 | err = ceph_init_dentry(dentry); |
159 | if (err < 0) { | 159 | if (err < 0) { |
@@ -202,11 +202,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, | |||
202 | return ERR_PTR(-ESTALE); | 202 | return ERR_PTR(-ESTALE); |
203 | 203 | ||
204 | dentry = d_obtain_alias(inode); | 204 | dentry = d_obtain_alias(inode); |
205 | if (!dentry) { | 205 | if (IS_ERR(dentry)) { |
206 | pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", | 206 | pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", |
207 | cfh->ino, inode); | 207 | cfh->ino, inode); |
208 | iput(inode); | 208 | iput(inode); |
209 | return ERR_PTR(-ENOMEM); | 209 | return dentry; |
210 | } | 210 | } |
211 | err = ceph_init_dentry(dentry); | 211 | err = ceph_init_dentry(dentry); |
212 | if (err < 0) { | 212 | if (err < 0) { |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7d634938edc9..6251a1574b94 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -230,7 +230,7 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | |||
230 | /* do the open */ | 230 | /* do the open */ |
231 | req = prepare_open_request(dir->i_sb, flags, mode); | 231 | req = prepare_open_request(dir->i_sb, flags, mode); |
232 | if (IS_ERR(req)) | 232 | if (IS_ERR(req)) |
233 | return ERR_PTR(PTR_ERR(req)); | 233 | return ERR_CAST(req); |
234 | req->r_dentry = dget(dentry); | 234 | req->r_dentry = dget(dentry); |
235 | req->r_num_caps = 2; | 235 | req->r_num_caps = 2; |
236 | if (flags & O_CREAT) { | 236 | if (flags & O_CREAT) { |
@@ -317,16 +317,16 @@ void ceph_release_page_vector(struct page **pages, int num_pages) | |||
317 | /* | 317 | /* |
318 | * allocate a vector new pages | 318 | * allocate a vector new pages |
319 | */ | 319 | */ |
320 | static struct page **alloc_page_vector(int num_pages) | 320 | struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) |
321 | { | 321 | { |
322 | struct page **pages; | 322 | struct page **pages; |
323 | int i; | 323 | int i; |
324 | 324 | ||
325 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | 325 | pages = kmalloc(sizeof(*pages) * num_pages, flags); |
326 | if (!pages) | 326 | if (!pages) |
327 | return ERR_PTR(-ENOMEM); | 327 | return ERR_PTR(-ENOMEM); |
328 | for (i = 0; i < num_pages; i++) { | 328 | for (i = 0; i < num_pages; i++) { |
329 | pages[i] = alloc_page(GFP_NOFS); | 329 | pages[i] = __page_cache_alloc(flags); |
330 | if (pages[i] == NULL) { | 330 | if (pages[i] == NULL) { |
331 | ceph_release_page_vector(pages, i); | 331 | ceph_release_page_vector(pages, i); |
332 | return ERR_PTR(-ENOMEM); | 332 | return ERR_PTR(-ENOMEM); |
@@ -540,7 +540,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
540 | * in sequence. | 540 | * in sequence. |
541 | */ | 541 | */ |
542 | } else { | 542 | } else { |
543 | pages = alloc_page_vector(num_pages); | 543 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
544 | } | 544 | } |
545 | if (IS_ERR(pages)) | 545 | if (IS_ERR(pages)) |
546 | return PTR_ERR(pages); | 546 | return PTR_ERR(pages); |
@@ -649,8 +649,8 @@ more: | |||
649 | do_sync, | 649 | do_sync, |
650 | ci->i_truncate_seq, ci->i_truncate_size, | 650 | ci->i_truncate_seq, ci->i_truncate_size, |
651 | &mtime, false, 2); | 651 | &mtime, false, 2); |
652 | if (IS_ERR(req)) | 652 | if (!req) |
653 | return PTR_ERR(req); | 653 | return -ENOMEM; |
654 | 654 | ||
655 | num_pages = calc_pages_for(pos, len); | 655 | num_pages = calc_pages_for(pos, len); |
656 | 656 | ||
@@ -668,7 +668,7 @@ more: | |||
668 | truncate_inode_pages_range(inode->i_mapping, pos, | 668 | truncate_inode_pages_range(inode->i_mapping, pos, |
669 | (pos+len) | (PAGE_CACHE_SIZE-1)); | 669 | (pos+len) | (PAGE_CACHE_SIZE-1)); |
670 | } else { | 670 | } else { |
671 | pages = alloc_page_vector(num_pages); | 671 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
672 | if (IS_ERR(pages)) { | 672 | if (IS_ERR(pages)) { |
673 | ret = PTR_ERR(pages); | 673 | ret = PTR_ERR(pages); |
674 | goto out; | 674 | goto out; |
@@ -809,7 +809,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
809 | struct file *file = iocb->ki_filp; | 809 | struct file *file = iocb->ki_filp; |
810 | struct inode *inode = file->f_dentry->d_inode; | 810 | struct inode *inode = file->f_dentry->d_inode; |
811 | struct ceph_inode_info *ci = ceph_inode(inode); | 811 | struct ceph_inode_info *ci = ceph_inode(inode); |
812 | struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc; | 812 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; |
813 | loff_t endoff = pos + iov->iov_len; | 813 | loff_t endoff = pos + iov->iov_len; |
814 | int got = 0; | 814 | int got = 0; |
815 | int ret, err; | 815 | int ret, err; |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 85b4d2ffdeba..226f5a50d362 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -69,7 +69,7 @@ struct inode *ceph_get_snapdir(struct inode *parent) | |||
69 | 69 | ||
70 | BUG_ON(!S_ISDIR(parent->i_mode)); | 70 | BUG_ON(!S_ISDIR(parent->i_mode)); |
71 | if (IS_ERR(inode)) | 71 | if (IS_ERR(inode)) |
72 | return ERR_PTR(PTR_ERR(inode)); | 72 | return inode; |
73 | inode->i_mode = parent->i_mode; | 73 | inode->i_mode = parent->i_mode; |
74 | inode->i_uid = parent->i_uid; | 74 | inode->i_uid = parent->i_uid; |
75 | inode->i_gid = parent->i_gid; | 75 | inode->i_gid = parent->i_gid; |
@@ -384,7 +384,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
384 | */ | 384 | */ |
385 | if (ci->i_snap_realm) { | 385 | if (ci->i_snap_realm) { |
386 | struct ceph_mds_client *mdsc = | 386 | struct ceph_mds_client *mdsc = |
387 | &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 387 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
388 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 388 | struct ceph_snap_realm *realm = ci->i_snap_realm; |
389 | 389 | ||
390 | dout(" dropping residual ref to snap realm %p\n", realm); | 390 | dout(" dropping residual ref to snap realm %p\n", realm); |
@@ -619,11 +619,12 @@ static int fill_inode(struct inode *inode, | |||
619 | memcpy(ci->i_xattrs.blob->vec.iov_base, | 619 | memcpy(ci->i_xattrs.blob->vec.iov_base, |
620 | iinfo->xattr_data, iinfo->xattr_len); | 620 | iinfo->xattr_data, iinfo->xattr_len); |
621 | ci->i_xattrs.version = le64_to_cpu(info->xattr_version); | 621 | ci->i_xattrs.version = le64_to_cpu(info->xattr_version); |
622 | xattr_blob = NULL; | ||
622 | } | 623 | } |
623 | 624 | ||
624 | inode->i_mapping->a_ops = &ceph_aops; | 625 | inode->i_mapping->a_ops = &ceph_aops; |
625 | inode->i_mapping->backing_dev_info = | 626 | inode->i_mapping->backing_dev_info = |
626 | &ceph_client(inode->i_sb)->backing_dev_info; | 627 | &ceph_sb_to_client(inode->i_sb)->backing_dev_info; |
627 | 628 | ||
628 | switch (inode->i_mode & S_IFMT) { | 629 | switch (inode->i_mode & S_IFMT) { |
629 | case S_IFIFO: | 630 | case S_IFIFO: |
@@ -674,14 +675,15 @@ static int fill_inode(struct inode *inode, | |||
674 | /* set dir completion flag? */ | 675 | /* set dir completion flag? */ |
675 | if (ci->i_files == 0 && ci->i_subdirs == 0 && | 676 | if (ci->i_files == 0 && ci->i_subdirs == 0 && |
676 | ceph_snap(inode) == CEPH_NOSNAP && | 677 | ceph_snap(inode) == CEPH_NOSNAP && |
677 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED)) { | 678 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && |
679 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | ||
678 | dout(" marking %p complete (empty)\n", inode); | 680 | dout(" marking %p complete (empty)\n", inode); |
679 | ci->i_ceph_flags |= CEPH_I_COMPLETE; | 681 | ci->i_ceph_flags |= CEPH_I_COMPLETE; |
680 | ci->i_max_offset = 2; | 682 | ci->i_max_offset = 2; |
681 | } | 683 | } |
682 | 684 | ||
683 | /* it may be better to set st_size in getattr instead? */ | 685 | /* it may be better to set st_size in getattr instead? */ |
684 | if (ceph_test_opt(ceph_client(inode->i_sb), RBYTES)) | 686 | if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) |
685 | inode->i_size = ci->i_rbytes; | 687 | inode->i_size = ci->i_rbytes; |
686 | break; | 688 | break; |
687 | default: | 689 | default: |
@@ -802,6 +804,37 @@ out_unlock: | |||
802 | } | 804 | } |
803 | 805 | ||
804 | /* | 806 | /* |
807 | * Set dentry's directory position based on the current dir's max, and | ||
808 | * order it in d_subdirs, so that dcache_readdir behaves. | ||
809 | */ | ||
810 | static void ceph_set_dentry_offset(struct dentry *dn) | ||
811 | { | ||
812 | struct dentry *dir = dn->d_parent; | ||
813 | struct inode *inode = dn->d_parent->d_inode; | ||
814 | struct ceph_dentry_info *di; | ||
815 | |||
816 | BUG_ON(!inode); | ||
817 | |||
818 | di = ceph_dentry(dn); | ||
819 | |||
820 | spin_lock(&inode->i_lock); | ||
821 | if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | ||
822 | spin_unlock(&inode->i_lock); | ||
823 | return; | ||
824 | } | ||
825 | di->offset = ceph_inode(inode)->i_max_offset++; | ||
826 | spin_unlock(&inode->i_lock); | ||
827 | |||
828 | spin_lock(&dcache_lock); | ||
829 | spin_lock(&dn->d_lock); | ||
830 | list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); | ||
831 | dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, | ||
832 | dn->d_u.d_child.prev, dn->d_u.d_child.next); | ||
833 | spin_unlock(&dn->d_lock); | ||
834 | spin_unlock(&dcache_lock); | ||
835 | } | ||
836 | |||
837 | /* | ||
805 | * splice a dentry to an inode. | 838 | * splice a dentry to an inode. |
806 | * caller must hold directory i_mutex for this to be safe. | 839 | * caller must hold directory i_mutex for this to be safe. |
807 | * | 840 | * |
@@ -814,6 +847,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
814 | { | 847 | { |
815 | struct dentry *realdn; | 848 | struct dentry *realdn; |
816 | 849 | ||
850 | BUG_ON(dn->d_inode); | ||
851 | |||
817 | /* dn must be unhashed */ | 852 | /* dn must be unhashed */ |
818 | if (!d_unhashed(dn)) | 853 | if (!d_unhashed(dn)) |
819 | d_drop(dn); | 854 | d_drop(dn); |
@@ -835,44 +870,17 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
835 | dn = realdn; | 870 | dn = realdn; |
836 | } else { | 871 | } else { |
837 | BUG_ON(!ceph_dentry(dn)); | 872 | BUG_ON(!ceph_dentry(dn)); |
838 | |||
839 | dout("dn %p attached to %p ino %llx.%llx\n", | 873 | dout("dn %p attached to %p ino %llx.%llx\n", |
840 | dn, dn->d_inode, ceph_vinop(dn->d_inode)); | 874 | dn, dn->d_inode, ceph_vinop(dn->d_inode)); |
841 | } | 875 | } |
842 | if ((!prehash || *prehash) && d_unhashed(dn)) | 876 | if ((!prehash || *prehash) && d_unhashed(dn)) |
843 | d_rehash(dn); | 877 | d_rehash(dn); |
878 | ceph_set_dentry_offset(dn); | ||
844 | out: | 879 | out: |
845 | return dn; | 880 | return dn; |
846 | } | 881 | } |
847 | 882 | ||
848 | /* | 883 | /* |
849 | * Set dentry's directory position based on the current dir's max, and | ||
850 | * order it in d_subdirs, so that dcache_readdir behaves. | ||
851 | */ | ||
852 | static void ceph_set_dentry_offset(struct dentry *dn) | ||
853 | { | ||
854 | struct dentry *dir = dn->d_parent; | ||
855 | struct inode *inode = dn->d_parent->d_inode; | ||
856 | struct ceph_dentry_info *di; | ||
857 | |||
858 | BUG_ON(!inode); | ||
859 | |||
860 | di = ceph_dentry(dn); | ||
861 | |||
862 | spin_lock(&inode->i_lock); | ||
863 | di->offset = ceph_inode(inode)->i_max_offset++; | ||
864 | spin_unlock(&inode->i_lock); | ||
865 | |||
866 | spin_lock(&dcache_lock); | ||
867 | spin_lock(&dn->d_lock); | ||
868 | list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); | ||
869 | dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, | ||
870 | dn->d_u.d_child.prev, dn->d_u.d_child.next); | ||
871 | spin_unlock(&dn->d_lock); | ||
872 | spin_unlock(&dcache_lock); | ||
873 | } | ||
874 | |||
875 | /* | ||
876 | * Incorporate results into the local cache. This is either just | 884 | * Incorporate results into the local cache. This is either just |
877 | * one inode, or a directory, dentry, and possibly linked-to inode (e.g., | 885 | * one inode, or a directory, dentry, and possibly linked-to inode (e.g., |
878 | * after a lookup). | 886 | * after a lookup). |
@@ -933,14 +941,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
933 | 941 | ||
934 | if (!rinfo->head->is_target && !rinfo->head->is_dentry) { | 942 | if (!rinfo->head->is_target && !rinfo->head->is_dentry) { |
935 | dout("fill_trace reply is empty!\n"); | 943 | dout("fill_trace reply is empty!\n"); |
936 | if (rinfo->head->result == 0 && req->r_locked_dir) { | 944 | if (rinfo->head->result == 0 && req->r_locked_dir) |
937 | struct ceph_inode_info *ci = | 945 | ceph_invalidate_dir_request(req); |
938 | ceph_inode(req->r_locked_dir); | ||
939 | dout(" clearing %p complete (empty trace)\n", | ||
940 | req->r_locked_dir); | ||
941 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
942 | ci->i_release_count++; | ||
943 | } | ||
944 | return 0; | 946 | return 0; |
945 | } | 947 | } |
946 | 948 | ||
@@ -1011,13 +1013,18 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1011 | req->r_old_dentry->d_name.len, | 1013 | req->r_old_dentry->d_name.len, |
1012 | req->r_old_dentry->d_name.name, | 1014 | req->r_old_dentry->d_name.name, |
1013 | dn, dn->d_name.len, dn->d_name.name); | 1015 | dn, dn->d_name.len, dn->d_name.name); |
1016 | |||
1014 | /* ensure target dentry is invalidated, despite | 1017 | /* ensure target dentry is invalidated, despite |
1015 | rehashing bug in vfs_rename_dir */ | 1018 | rehashing bug in vfs_rename_dir */ |
1016 | dn->d_time = jiffies; | 1019 | ceph_invalidate_dentry_lease(dn); |
1017 | ceph_dentry(dn)->lease_shared_gen = 0; | 1020 | |
1018 | /* take overwritten dentry's readdir offset */ | 1021 | /* take overwritten dentry's readdir offset */ |
1022 | dout("dn %p gets %p offset %lld (old offset %lld)\n", | ||
1023 | req->r_old_dentry, dn, ceph_dentry(dn)->offset, | ||
1024 | ceph_dentry(req->r_old_dentry)->offset); | ||
1019 | ceph_dentry(req->r_old_dentry)->offset = | 1025 | ceph_dentry(req->r_old_dentry)->offset = |
1020 | ceph_dentry(dn)->offset; | 1026 | ceph_dentry(dn)->offset; |
1027 | |||
1021 | dn = req->r_old_dentry; /* use old_dentry */ | 1028 | dn = req->r_old_dentry; /* use old_dentry */ |
1022 | in = dn->d_inode; | 1029 | in = dn->d_inode; |
1023 | } | 1030 | } |
@@ -1059,7 +1066,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1059 | goto done; | 1066 | goto done; |
1060 | } | 1067 | } |
1061 | req->r_dentry = dn; /* may have spliced */ | 1068 | req->r_dentry = dn; /* may have spliced */ |
1062 | ceph_set_dentry_offset(dn); | ||
1063 | igrab(in); | 1069 | igrab(in); |
1064 | } else if (ceph_ino(in) == vino.ino && | 1070 | } else if (ceph_ino(in) == vino.ino && |
1065 | ceph_snap(in) == vino.snap) { | 1071 | ceph_snap(in) == vino.snap) { |
@@ -1102,7 +1108,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1102 | err = PTR_ERR(dn); | 1108 | err = PTR_ERR(dn); |
1103 | goto done; | 1109 | goto done; |
1104 | } | 1110 | } |
1105 | ceph_set_dentry_offset(dn); | ||
1106 | req->r_dentry = dn; /* may have spliced */ | 1111 | req->r_dentry = dn; /* may have spliced */ |
1107 | igrab(in); | 1112 | igrab(in); |
1108 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ | 1113 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ |
@@ -1429,7 +1434,7 @@ void ceph_queue_vmtruncate(struct inode *inode) | |||
1429 | { | 1434 | { |
1430 | struct ceph_inode_info *ci = ceph_inode(inode); | 1435 | struct ceph_inode_info *ci = ceph_inode(inode); |
1431 | 1436 | ||
1432 | if (queue_work(ceph_client(inode->i_sb)->trunc_wq, | 1437 | if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, |
1433 | &ci->i_vmtruncate_work)) { | 1438 | &ci->i_vmtruncate_work)) { |
1434 | dout("ceph_queue_vmtruncate %p\n", inode); | 1439 | dout("ceph_queue_vmtruncate %p\n", inode); |
1435 | igrab(inode); | 1440 | igrab(inode); |
@@ -1518,7 +1523,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1518 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1523 | struct inode *parent_inode = dentry->d_parent->d_inode; |
1519 | const unsigned int ia_valid = attr->ia_valid; | 1524 | const unsigned int ia_valid = attr->ia_valid; |
1520 | struct ceph_mds_request *req; | 1525 | struct ceph_mds_request *req; |
1521 | struct ceph_mds_client *mdsc = &ceph_client(dentry->d_sb)->mdsc; | 1526 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc; |
1522 | int issued; | 1527 | int issued; |
1523 | int release = 0, dirtied = 0; | 1528 | int release = 0, dirtied = 0; |
1524 | int mask = 0; | 1529 | int mask = 0; |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 8a5bcae62846..d085f07756b4 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -98,7 +98,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
98 | struct ceph_ioctl_dataloc dl; | 98 | struct ceph_ioctl_dataloc dl; |
99 | struct inode *inode = file->f_dentry->d_inode; | 99 | struct inode *inode = file->f_dentry->d_inode; |
100 | struct ceph_inode_info *ci = ceph_inode(inode); | 100 | struct ceph_inode_info *ci = ceph_inode(inode); |
101 | struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc; | 101 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; |
102 | u64 len = 1, olen; | 102 | u64 len = 1, olen; |
103 | u64 tmp; | 103 | u64 tmp; |
104 | struct ceph_object_layout ol; | 104 | struct ceph_object_layout ol; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 24561a557e01..b49f12822cbc 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -40,7 +40,7 @@ | |||
40 | static void __wake_requests(struct ceph_mds_client *mdsc, | 40 | static void __wake_requests(struct ceph_mds_client *mdsc, |
41 | struct list_head *head); | 41 | struct list_head *head); |
42 | 42 | ||
43 | const static struct ceph_connection_operations mds_con_ops; | 43 | static const struct ceph_connection_operations mds_con_ops; |
44 | 44 | ||
45 | 45 | ||
46 | /* | 46 | /* |
@@ -665,10 +665,10 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) | |||
665 | struct ceph_msg *msg; | 665 | struct ceph_msg *msg; |
666 | struct ceph_mds_session_head *h; | 666 | struct ceph_mds_session_head *h; |
667 | 667 | ||
668 | msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), 0, 0, NULL); | 668 | msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS); |
669 | if (IS_ERR(msg)) { | 669 | if (!msg) { |
670 | pr_err("create_session_msg ENOMEM creating msg\n"); | 670 | pr_err("create_session_msg ENOMEM creating msg\n"); |
671 | return ERR_PTR(PTR_ERR(msg)); | 671 | return NULL; |
672 | } | 672 | } |
673 | h = msg->front.iov_base; | 673 | h = msg->front.iov_base; |
674 | h->op = cpu_to_le32(op); | 674 | h->op = cpu_to_le32(op); |
@@ -687,7 +687,6 @@ static int __open_session(struct ceph_mds_client *mdsc, | |||
687 | struct ceph_msg *msg; | 687 | struct ceph_msg *msg; |
688 | int mstate; | 688 | int mstate; |
689 | int mds = session->s_mds; | 689 | int mds = session->s_mds; |
690 | int err = 0; | ||
691 | 690 | ||
692 | /* wait for mds to go active? */ | 691 | /* wait for mds to go active? */ |
693 | mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); | 692 | mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); |
@@ -698,13 +697,9 @@ static int __open_session(struct ceph_mds_client *mdsc, | |||
698 | 697 | ||
699 | /* send connect message */ | 698 | /* send connect message */ |
700 | msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq); | 699 | msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq); |
701 | if (IS_ERR(msg)) { | 700 | if (!msg) |
702 | err = PTR_ERR(msg); | 701 | return -ENOMEM; |
703 | goto out; | ||
704 | } | ||
705 | ceph_con_send(&session->s_con, msg); | 702 | ceph_con_send(&session->s_con, msg); |
706 | |||
707 | out: | ||
708 | return 0; | 703 | return 0; |
709 | } | 704 | } |
710 | 705 | ||
@@ -804,12 +799,49 @@ out: | |||
804 | } | 799 | } |
805 | 800 | ||
806 | static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | 801 | static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, |
807 | void *arg) | 802 | void *arg) |
808 | { | 803 | { |
809 | struct ceph_inode_info *ci = ceph_inode(inode); | 804 | struct ceph_inode_info *ci = ceph_inode(inode); |
805 | int drop = 0; | ||
806 | |||
810 | dout("removing cap %p, ci is %p, inode is %p\n", | 807 | dout("removing cap %p, ci is %p, inode is %p\n", |
811 | cap, ci, &ci->vfs_inode); | 808 | cap, ci, &ci->vfs_inode); |
812 | ceph_remove_cap(cap); | 809 | spin_lock(&inode->i_lock); |
810 | __ceph_remove_cap(cap); | ||
811 | if (!__ceph_is_any_real_caps(ci)) { | ||
812 | struct ceph_mds_client *mdsc = | ||
813 | &ceph_sb_to_client(inode->i_sb)->mdsc; | ||
814 | |||
815 | spin_lock(&mdsc->cap_dirty_lock); | ||
816 | if (!list_empty(&ci->i_dirty_item)) { | ||
817 | pr_info(" dropping dirty %s state for %p %lld\n", | ||
818 | ceph_cap_string(ci->i_dirty_caps), | ||
819 | inode, ceph_ino(inode)); | ||
820 | ci->i_dirty_caps = 0; | ||
821 | list_del_init(&ci->i_dirty_item); | ||
822 | drop = 1; | ||
823 | } | ||
824 | if (!list_empty(&ci->i_flushing_item)) { | ||
825 | pr_info(" dropping dirty+flushing %s state for %p %lld\n", | ||
826 | ceph_cap_string(ci->i_flushing_caps), | ||
827 | inode, ceph_ino(inode)); | ||
828 | ci->i_flushing_caps = 0; | ||
829 | list_del_init(&ci->i_flushing_item); | ||
830 | mdsc->num_cap_flushing--; | ||
831 | drop = 1; | ||
832 | } | ||
833 | if (drop && ci->i_wrbuffer_ref) { | ||
834 | pr_info(" dropping dirty data for %p %lld\n", | ||
835 | inode, ceph_ino(inode)); | ||
836 | ci->i_wrbuffer_ref = 0; | ||
837 | ci->i_wrbuffer_ref_head = 0; | ||
838 | drop++; | ||
839 | } | ||
840 | spin_unlock(&mdsc->cap_dirty_lock); | ||
841 | } | ||
842 | spin_unlock(&inode->i_lock); | ||
843 | while (drop--) | ||
844 | iput(inode); | ||
813 | return 0; | 845 | return 0; |
814 | } | 846 | } |
815 | 847 | ||
@@ -821,6 +853,7 @@ static void remove_session_caps(struct ceph_mds_session *session) | |||
821 | dout("remove_session_caps on %p\n", session); | 853 | dout("remove_session_caps on %p\n", session); |
822 | iterate_session_caps(session, remove_session_caps_cb, NULL); | 854 | iterate_session_caps(session, remove_session_caps_cb, NULL); |
823 | BUG_ON(session->s_nr_caps > 0); | 855 | BUG_ON(session->s_nr_caps > 0); |
856 | BUG_ON(!list_empty(&session->s_cap_flushing)); | ||
824 | cleanup_cap_releases(session); | 857 | cleanup_cap_releases(session); |
825 | } | 858 | } |
826 | 859 | ||
@@ -883,8 +916,8 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, | |||
883 | ceph_mds_state_name(state)); | 916 | ceph_mds_state_name(state)); |
884 | msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, | 917 | msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, |
885 | ++session->s_renew_seq); | 918 | ++session->s_renew_seq); |
886 | if (IS_ERR(msg)) | 919 | if (!msg) |
887 | return PTR_ERR(msg); | 920 | return -ENOMEM; |
888 | ceph_con_send(&session->s_con, msg); | 921 | ceph_con_send(&session->s_con, msg); |
889 | return 0; | 922 | return 0; |
890 | } | 923 | } |
@@ -931,17 +964,15 @@ static int request_close_session(struct ceph_mds_client *mdsc, | |||
931 | struct ceph_mds_session *session) | 964 | struct ceph_mds_session *session) |
932 | { | 965 | { |
933 | struct ceph_msg *msg; | 966 | struct ceph_msg *msg; |
934 | int err = 0; | ||
935 | 967 | ||
936 | dout("request_close_session mds%d state %s seq %lld\n", | 968 | dout("request_close_session mds%d state %s seq %lld\n", |
937 | session->s_mds, session_state_name(session->s_state), | 969 | session->s_mds, session_state_name(session->s_state), |
938 | session->s_seq); | 970 | session->s_seq); |
939 | msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); | 971 | msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); |
940 | if (IS_ERR(msg)) | 972 | if (!msg) |
941 | err = PTR_ERR(msg); | 973 | return -ENOMEM; |
942 | else | 974 | ceph_con_send(&session->s_con, msg); |
943 | ceph_con_send(&session->s_con, msg); | 975 | return 0; |
944 | return err; | ||
945 | } | 976 | } |
946 | 977 | ||
947 | /* | 978 | /* |
@@ -1059,7 +1090,7 @@ static int add_cap_releases(struct ceph_mds_client *mdsc, | |||
1059 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { | 1090 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { |
1060 | spin_unlock(&session->s_cap_lock); | 1091 | spin_unlock(&session->s_cap_lock); |
1061 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, | 1092 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, |
1062 | 0, 0, NULL); | 1093 | GFP_NOFS); |
1063 | if (!msg) | 1094 | if (!msg) |
1064 | goto out_unlocked; | 1095 | goto out_unlocked; |
1065 | dout("add_cap_releases %p msg %p now %d\n", session, msg, | 1096 | dout("add_cap_releases %p msg %p now %d\n", session, msg, |
@@ -1151,10 +1182,8 @@ static void send_cap_releases(struct ceph_mds_client *mdsc, | |||
1151 | struct ceph_msg *msg; | 1182 | struct ceph_msg *msg; |
1152 | 1183 | ||
1153 | dout("send_cap_releases mds%d\n", session->s_mds); | 1184 | dout("send_cap_releases mds%d\n", session->s_mds); |
1154 | while (1) { | 1185 | spin_lock(&session->s_cap_lock); |
1155 | spin_lock(&session->s_cap_lock); | 1186 | while (!list_empty(&session->s_cap_releases_done)) { |
1156 | if (list_empty(&session->s_cap_releases_done)) | ||
1157 | break; | ||
1158 | msg = list_first_entry(&session->s_cap_releases_done, | 1187 | msg = list_first_entry(&session->s_cap_releases_done, |
1159 | struct ceph_msg, list_head); | 1188 | struct ceph_msg, list_head); |
1160 | list_del_init(&msg->list_head); | 1189 | list_del_init(&msg->list_head); |
@@ -1162,10 +1191,49 @@ static void send_cap_releases(struct ceph_mds_client *mdsc, | |||
1162 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 1191 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
1163 | dout("send_cap_releases mds%d %p\n", session->s_mds, msg); | 1192 | dout("send_cap_releases mds%d %p\n", session->s_mds, msg); |
1164 | ceph_con_send(&session->s_con, msg); | 1193 | ceph_con_send(&session->s_con, msg); |
1194 | spin_lock(&session->s_cap_lock); | ||
1165 | } | 1195 | } |
1166 | spin_unlock(&session->s_cap_lock); | 1196 | spin_unlock(&session->s_cap_lock); |
1167 | } | 1197 | } |
1168 | 1198 | ||
1199 | static void discard_cap_releases(struct ceph_mds_client *mdsc, | ||
1200 | struct ceph_mds_session *session) | ||
1201 | { | ||
1202 | struct ceph_msg *msg; | ||
1203 | struct ceph_mds_cap_release *head; | ||
1204 | unsigned num; | ||
1205 | |||
1206 | dout("discard_cap_releases mds%d\n", session->s_mds); | ||
1207 | spin_lock(&session->s_cap_lock); | ||
1208 | |||
1209 | /* zero out the in-progress message */ | ||
1210 | msg = list_first_entry(&session->s_cap_releases, | ||
1211 | struct ceph_msg, list_head); | ||
1212 | head = msg->front.iov_base; | ||
1213 | num = le32_to_cpu(head->num); | ||
1214 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); | ||
1215 | head->num = cpu_to_le32(0); | ||
1216 | session->s_num_cap_releases += num; | ||
1217 | |||
1218 | /* requeue completed messages */ | ||
1219 | while (!list_empty(&session->s_cap_releases_done)) { | ||
1220 | msg = list_first_entry(&session->s_cap_releases_done, | ||
1221 | struct ceph_msg, list_head); | ||
1222 | list_del_init(&msg->list_head); | ||
1223 | |||
1224 | head = msg->front.iov_base; | ||
1225 | num = le32_to_cpu(head->num); | ||
1226 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, | ||
1227 | num); | ||
1228 | session->s_num_cap_releases += num; | ||
1229 | head->num = cpu_to_le32(0); | ||
1230 | msg->front.iov_len = sizeof(*head); | ||
1231 | list_add(&msg->list_head, &session->s_cap_releases); | ||
1232 | } | ||
1233 | |||
1234 | spin_unlock(&session->s_cap_lock); | ||
1235 | } | ||
1236 | |||
1169 | /* | 1237 | /* |
1170 | * requests | 1238 | * requests |
1171 | */ | 1239 | */ |
@@ -1181,6 +1249,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) | |||
1181 | if (!req) | 1249 | if (!req) |
1182 | return ERR_PTR(-ENOMEM); | 1250 | return ERR_PTR(-ENOMEM); |
1183 | 1251 | ||
1252 | mutex_init(&req->r_fill_mutex); | ||
1184 | req->r_started = jiffies; | 1253 | req->r_started = jiffies; |
1185 | req->r_resend_mds = -1; | 1254 | req->r_resend_mds = -1; |
1186 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); | 1255 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); |
@@ -1251,7 +1320,7 @@ retry: | |||
1251 | len += 1 + temp->d_name.len; | 1320 | len += 1 + temp->d_name.len; |
1252 | temp = temp->d_parent; | 1321 | temp = temp->d_parent; |
1253 | if (temp == NULL) { | 1322 | if (temp == NULL) { |
1254 | pr_err("build_path_dentry corrupt dentry %p\n", dentry); | 1323 | pr_err("build_path corrupt dentry %p\n", dentry); |
1255 | return ERR_PTR(-EINVAL); | 1324 | return ERR_PTR(-EINVAL); |
1256 | } | 1325 | } |
1257 | } | 1326 | } |
@@ -1267,7 +1336,7 @@ retry: | |||
1267 | struct inode *inode = temp->d_inode; | 1336 | struct inode *inode = temp->d_inode; |
1268 | 1337 | ||
1269 | if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { | 1338 | if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { |
1270 | dout("build_path_dentry path+%d: %p SNAPDIR\n", | 1339 | dout("build_path path+%d: %p SNAPDIR\n", |
1271 | pos, temp); | 1340 | pos, temp); |
1272 | } else if (stop_on_nosnap && inode && | 1341 | } else if (stop_on_nosnap && inode && |
1273 | ceph_snap(inode) == CEPH_NOSNAP) { | 1342 | ceph_snap(inode) == CEPH_NOSNAP) { |
@@ -1278,20 +1347,18 @@ retry: | |||
1278 | break; | 1347 | break; |
1279 | strncpy(path + pos, temp->d_name.name, | 1348 | strncpy(path + pos, temp->d_name.name, |
1280 | temp->d_name.len); | 1349 | temp->d_name.len); |
1281 | dout("build_path_dentry path+%d: %p '%.*s'\n", | ||
1282 | pos, temp, temp->d_name.len, path + pos); | ||
1283 | } | 1350 | } |
1284 | if (pos) | 1351 | if (pos) |
1285 | path[--pos] = '/'; | 1352 | path[--pos] = '/'; |
1286 | temp = temp->d_parent; | 1353 | temp = temp->d_parent; |
1287 | if (temp == NULL) { | 1354 | if (temp == NULL) { |
1288 | pr_err("build_path_dentry corrupt dentry\n"); | 1355 | pr_err("build_path corrupt dentry\n"); |
1289 | kfree(path); | 1356 | kfree(path); |
1290 | return ERR_PTR(-EINVAL); | 1357 | return ERR_PTR(-EINVAL); |
1291 | } | 1358 | } |
1292 | } | 1359 | } |
1293 | if (pos != 0) { | 1360 | if (pos != 0) { |
1294 | pr_err("build_path_dentry did not end path lookup where " | 1361 | pr_err("build_path did not end path lookup where " |
1295 | "expected, namelen is %d, pos is %d\n", len, pos); | 1362 | "expected, namelen is %d, pos is %d\n", len, pos); |
1296 | /* presumably this is only possible if racing with a | 1363 | /* presumably this is only possible if racing with a |
1297 | rename of one of the parent directories (we can not | 1364 | rename of one of the parent directories (we can not |
@@ -1303,7 +1370,7 @@ retry: | |||
1303 | 1370 | ||
1304 | *base = ceph_ino(temp->d_inode); | 1371 | *base = ceph_ino(temp->d_inode); |
1305 | *plen = len; | 1372 | *plen = len; |
1306 | dout("build_path_dentry on %p %d built %llx '%.*s'\n", | 1373 | dout("build_path on %p %d built %llx '%.*s'\n", |
1307 | dentry, atomic_read(&dentry->d_count), *base, len, path); | 1374 | dentry, atomic_read(&dentry->d_count), *base, len, path); |
1308 | return path; | 1375 | return path; |
1309 | } | 1376 | } |
@@ -1426,9 +1493,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1426 | if (req->r_old_dentry_drop) | 1493 | if (req->r_old_dentry_drop) |
1427 | len += req->r_old_dentry->d_name.len; | 1494 | len += req->r_old_dentry->d_name.len; |
1428 | 1495 | ||
1429 | msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, 0, 0, NULL); | 1496 | msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS); |
1430 | if (IS_ERR(msg)) | 1497 | if (!msg) { |
1498 | msg = ERR_PTR(-ENOMEM); | ||
1431 | goto out_free2; | 1499 | goto out_free2; |
1500 | } | ||
1432 | 1501 | ||
1433 | msg->hdr.tid = cpu_to_le64(req->r_tid); | 1502 | msg->hdr.tid = cpu_to_le64(req->r_tid); |
1434 | 1503 | ||
@@ -1517,9 +1586,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
1517 | } | 1586 | } |
1518 | msg = create_request_message(mdsc, req, mds); | 1587 | msg = create_request_message(mdsc, req, mds); |
1519 | if (IS_ERR(msg)) { | 1588 | if (IS_ERR(msg)) { |
1520 | req->r_reply = ERR_PTR(PTR_ERR(msg)); | 1589 | req->r_err = PTR_ERR(msg); |
1521 | complete_request(mdsc, req); | 1590 | complete_request(mdsc, req); |
1522 | return -PTR_ERR(msg); | 1591 | return PTR_ERR(msg); |
1523 | } | 1592 | } |
1524 | req->r_request = msg; | 1593 | req->r_request = msg; |
1525 | 1594 | ||
@@ -1552,7 +1621,7 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
1552 | int mds = -1; | 1621 | int mds = -1; |
1553 | int err = -EAGAIN; | 1622 | int err = -EAGAIN; |
1554 | 1623 | ||
1555 | if (req->r_reply) | 1624 | if (req->r_err || req->r_got_result) |
1556 | goto out; | 1625 | goto out; |
1557 | 1626 | ||
1558 | if (req->r_timeout && | 1627 | if (req->r_timeout && |
@@ -1609,7 +1678,7 @@ out: | |||
1609 | return err; | 1678 | return err; |
1610 | 1679 | ||
1611 | finish: | 1680 | finish: |
1612 | req->r_reply = ERR_PTR(err); | 1681 | req->r_err = err; |
1613 | complete_request(mdsc, req); | 1682 | complete_request(mdsc, req); |
1614 | goto out; | 1683 | goto out; |
1615 | } | 1684 | } |
@@ -1630,10 +1699,9 @@ static void __wake_requests(struct ceph_mds_client *mdsc, | |||
1630 | 1699 | ||
1631 | /* | 1700 | /* |
1632 | * Wake up threads with requests pending for @mds, so that they can | 1701 | * Wake up threads with requests pending for @mds, so that they can |
1633 | * resubmit their requests to a possibly different mds. If @all is set, | 1702 | * resubmit their requests to a possibly different mds. |
1634 | * wake up if their requests has been forwarded to @mds, too. | ||
1635 | */ | 1703 | */ |
1636 | static void kick_requests(struct ceph_mds_client *mdsc, int mds, int all) | 1704 | static void kick_requests(struct ceph_mds_client *mdsc, int mds) |
1637 | { | 1705 | { |
1638 | struct ceph_mds_request *req; | 1706 | struct ceph_mds_request *req; |
1639 | struct rb_node *p; | 1707 | struct rb_node *p; |
@@ -1689,64 +1757,78 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | |||
1689 | __register_request(mdsc, req, dir); | 1757 | __register_request(mdsc, req, dir); |
1690 | __do_request(mdsc, req); | 1758 | __do_request(mdsc, req); |
1691 | 1759 | ||
1692 | /* wait */ | 1760 | if (req->r_err) { |
1693 | if (!req->r_reply) { | 1761 | err = req->r_err; |
1694 | mutex_unlock(&mdsc->mutex); | 1762 | __unregister_request(mdsc, req); |
1695 | if (req->r_timeout) { | 1763 | dout("do_request early error %d\n", err); |
1696 | err = (long)wait_for_completion_interruptible_timeout( | 1764 | goto out; |
1697 | &req->r_completion, req->r_timeout); | ||
1698 | if (err == 0) | ||
1699 | req->r_reply = ERR_PTR(-EIO); | ||
1700 | else if (err < 0) | ||
1701 | req->r_reply = ERR_PTR(err); | ||
1702 | } else { | ||
1703 | err = wait_for_completion_interruptible( | ||
1704 | &req->r_completion); | ||
1705 | if (err) | ||
1706 | req->r_reply = ERR_PTR(err); | ||
1707 | } | ||
1708 | mutex_lock(&mdsc->mutex); | ||
1709 | } | 1765 | } |
1710 | 1766 | ||
1711 | if (IS_ERR(req->r_reply)) { | 1767 | /* wait */ |
1712 | err = PTR_ERR(req->r_reply); | 1768 | mutex_unlock(&mdsc->mutex); |
1713 | req->r_reply = NULL; | 1769 | dout("do_request waiting\n"); |
1770 | if (req->r_timeout) { | ||
1771 | err = (long)wait_for_completion_killable_timeout( | ||
1772 | &req->r_completion, req->r_timeout); | ||
1773 | if (err == 0) | ||
1774 | err = -EIO; | ||
1775 | } else { | ||
1776 | err = wait_for_completion_killable(&req->r_completion); | ||
1777 | } | ||
1778 | dout("do_request waited, got %d\n", err); | ||
1779 | mutex_lock(&mdsc->mutex); | ||
1714 | 1780 | ||
1715 | if (err == -ERESTARTSYS) { | 1781 | /* only abort if we didn't race with a real reply */ |
1716 | /* aborted */ | 1782 | if (req->r_got_result) { |
1717 | req->r_aborted = true; | 1783 | err = le32_to_cpu(req->r_reply_info.head->result); |
1784 | } else if (err < 0) { | ||
1785 | dout("aborted request %lld with %d\n", req->r_tid, err); | ||
1718 | 1786 | ||
1719 | if (req->r_locked_dir && | 1787 | /* |
1720 | (req->r_op & CEPH_MDS_OP_WRITE)) { | 1788 | * ensure we aren't running concurrently with |
1721 | struct ceph_inode_info *ci = | 1789 | * ceph_fill_trace or ceph_readdir_prepopulate, which |
1722 | ceph_inode(req->r_locked_dir); | 1790 | * rely on locks (dir mutex) held by our caller. |
1791 | */ | ||
1792 | mutex_lock(&req->r_fill_mutex); | ||
1793 | req->r_err = err; | ||
1794 | req->r_aborted = true; | ||
1795 | mutex_unlock(&req->r_fill_mutex); | ||
1723 | 1796 | ||
1724 | dout("aborted, clearing I_COMPLETE on %p\n", | 1797 | if (req->r_locked_dir && |
1725 | req->r_locked_dir); | 1798 | (req->r_op & CEPH_MDS_OP_WRITE)) |
1726 | spin_lock(&req->r_locked_dir->i_lock); | 1799 | ceph_invalidate_dir_request(req); |
1727 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
1728 | ci->i_release_count++; | ||
1729 | spin_unlock(&req->r_locked_dir->i_lock); | ||
1730 | } | ||
1731 | } else { | ||
1732 | /* clean up this request */ | ||
1733 | __unregister_request(mdsc, req); | ||
1734 | if (!list_empty(&req->r_unsafe_item)) | ||
1735 | list_del_init(&req->r_unsafe_item); | ||
1736 | complete(&req->r_safe_completion); | ||
1737 | } | ||
1738 | } else if (req->r_err) { | ||
1739 | err = req->r_err; | ||
1740 | } else { | 1800 | } else { |
1741 | err = le32_to_cpu(req->r_reply_info.head->result); | 1801 | err = req->r_err; |
1742 | } | 1802 | } |
1743 | mutex_unlock(&mdsc->mutex); | ||
1744 | 1803 | ||
1804 | out: | ||
1805 | mutex_unlock(&mdsc->mutex); | ||
1745 | dout("do_request %p done, result %d\n", req, err); | 1806 | dout("do_request %p done, result %d\n", req, err); |
1746 | return err; | 1807 | return err; |
1747 | } | 1808 | } |
1748 | 1809 | ||
1749 | /* | 1810 | /* |
1811 | * Invalidate dir I_COMPLETE, dentry lease state on an aborted MDS | ||
1812 | * namespace request. | ||
1813 | */ | ||
1814 | void ceph_invalidate_dir_request(struct ceph_mds_request *req) | ||
1815 | { | ||
1816 | struct inode *inode = req->r_locked_dir; | ||
1817 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
1818 | |||
1819 | dout("invalidate_dir_request %p (I_COMPLETE, lease(s))\n", inode); | ||
1820 | spin_lock(&inode->i_lock); | ||
1821 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
1822 | ci->i_release_count++; | ||
1823 | spin_unlock(&inode->i_lock); | ||
1824 | |||
1825 | if (req->r_dentry) | ||
1826 | ceph_invalidate_dentry_lease(req->r_dentry); | ||
1827 | if (req->r_old_dentry) | ||
1828 | ceph_invalidate_dentry_lease(req->r_old_dentry); | ||
1829 | } | ||
1830 | |||
1831 | /* | ||
1750 | * Handle mds reply. | 1832 | * Handle mds reply. |
1751 | * | 1833 | * |
1752 | * We take the session mutex and parse and process the reply immediately. | 1834 | * We take the session mutex and parse and process the reply immediately. |
@@ -1797,6 +1879,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1797 | mutex_unlock(&mdsc->mutex); | 1879 | mutex_unlock(&mdsc->mutex); |
1798 | goto out; | 1880 | goto out; |
1799 | } | 1881 | } |
1882 | if (req->r_got_safe && !head->safe) { | ||
1883 | pr_warning("got unsafe after safe on %llu from mds%d\n", | ||
1884 | tid, mds); | ||
1885 | mutex_unlock(&mdsc->mutex); | ||
1886 | goto out; | ||
1887 | } | ||
1800 | 1888 | ||
1801 | result = le32_to_cpu(head->result); | 1889 | result = le32_to_cpu(head->result); |
1802 | 1890 | ||
@@ -1838,11 +1926,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1838 | mutex_unlock(&mdsc->mutex); | 1926 | mutex_unlock(&mdsc->mutex); |
1839 | goto out; | 1927 | goto out; |
1840 | } | 1928 | } |
1841 | } | 1929 | } else { |
1842 | |||
1843 | BUG_ON(req->r_reply); | ||
1844 | |||
1845 | if (!head->safe) { | ||
1846 | req->r_got_unsafe = true; | 1930 | req->r_got_unsafe = true; |
1847 | list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); | 1931 | list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); |
1848 | } | 1932 | } |
@@ -1871,21 +1955,30 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1871 | } | 1955 | } |
1872 | 1956 | ||
1873 | /* insert trace into our cache */ | 1957 | /* insert trace into our cache */ |
1958 | mutex_lock(&req->r_fill_mutex); | ||
1874 | err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); | 1959 | err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); |
1875 | if (err == 0) { | 1960 | if (err == 0) { |
1876 | if (result == 0 && rinfo->dir_nr) | 1961 | if (result == 0 && rinfo->dir_nr) |
1877 | ceph_readdir_prepopulate(req, req->r_session); | 1962 | ceph_readdir_prepopulate(req, req->r_session); |
1878 | ceph_unreserve_caps(&req->r_caps_reservation); | 1963 | ceph_unreserve_caps(&req->r_caps_reservation); |
1879 | } | 1964 | } |
1965 | mutex_unlock(&req->r_fill_mutex); | ||
1880 | 1966 | ||
1881 | up_read(&mdsc->snap_rwsem); | 1967 | up_read(&mdsc->snap_rwsem); |
1882 | out_err: | 1968 | out_err: |
1883 | if (err) { | 1969 | mutex_lock(&mdsc->mutex); |
1884 | req->r_err = err; | 1970 | if (!req->r_aborted) { |
1971 | if (err) { | ||
1972 | req->r_err = err; | ||
1973 | } else { | ||
1974 | req->r_reply = msg; | ||
1975 | ceph_msg_get(msg); | ||
1976 | req->r_got_result = true; | ||
1977 | } | ||
1885 | } else { | 1978 | } else { |
1886 | req->r_reply = msg; | 1979 | dout("reply arrived after request %lld was aborted\n", tid); |
1887 | ceph_msg_get(msg); | ||
1888 | } | 1980 | } |
1981 | mutex_unlock(&mdsc->mutex); | ||
1889 | 1982 | ||
1890 | add_cap_releases(mdsc, req->r_session, -1); | 1983 | add_cap_releases(mdsc, req->r_session, -1); |
1891 | mutex_unlock(&session->s_mutex); | 1984 | mutex_unlock(&session->s_mutex); |
@@ -1921,16 +2014,21 @@ static void handle_forward(struct ceph_mds_client *mdsc, | |||
1921 | mutex_lock(&mdsc->mutex); | 2014 | mutex_lock(&mdsc->mutex); |
1922 | req = __lookup_request(mdsc, tid); | 2015 | req = __lookup_request(mdsc, tid); |
1923 | if (!req) { | 2016 | if (!req) { |
1924 | dout("forward %llu to mds%d - req dne\n", tid, next_mds); | 2017 | dout("forward tid %llu to mds%d - req dne\n", tid, next_mds); |
1925 | goto out; /* dup reply? */ | 2018 | goto out; /* dup reply? */ |
1926 | } | 2019 | } |
1927 | 2020 | ||
1928 | if (fwd_seq <= req->r_num_fwd) { | 2021 | if (req->r_aborted) { |
1929 | dout("forward %llu to mds%d - old seq %d <= %d\n", | 2022 | dout("forward tid %llu aborted, unregistering\n", tid); |
2023 | __unregister_request(mdsc, req); | ||
2024 | } else if (fwd_seq <= req->r_num_fwd) { | ||
2025 | dout("forward tid %llu to mds%d - old seq %d <= %d\n", | ||
1930 | tid, next_mds, req->r_num_fwd, fwd_seq); | 2026 | tid, next_mds, req->r_num_fwd, fwd_seq); |
1931 | } else { | 2027 | } else { |
1932 | /* resend. forward race not possible; mds would drop */ | 2028 | /* resend. forward race not possible; mds would drop */ |
1933 | dout("forward %llu to mds%d (we resend)\n", tid, next_mds); | 2029 | dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); |
2030 | BUG_ON(req->r_err); | ||
2031 | BUG_ON(req->r_got_result); | ||
1934 | req->r_num_fwd = fwd_seq; | 2032 | req->r_num_fwd = fwd_seq; |
1935 | req->r_resend_mds = next_mds; | 2033 | req->r_resend_mds = next_mds; |
1936 | put_request_session(req); | 2034 | put_request_session(req); |
@@ -1984,6 +2082,8 @@ static void handle_session(struct ceph_mds_session *session, | |||
1984 | 2082 | ||
1985 | switch (op) { | 2083 | switch (op) { |
1986 | case CEPH_SESSION_OPEN: | 2084 | case CEPH_SESSION_OPEN: |
2085 | if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) | ||
2086 | pr_info("mds%d reconnect success\n", session->s_mds); | ||
1987 | session->s_state = CEPH_MDS_SESSION_OPEN; | 2087 | session->s_state = CEPH_MDS_SESSION_OPEN; |
1988 | renewed_caps(mdsc, session, 0); | 2088 | renewed_caps(mdsc, session, 0); |
1989 | wake = 1; | 2089 | wake = 1; |
@@ -1997,10 +2097,12 @@ static void handle_session(struct ceph_mds_session *session, | |||
1997 | break; | 2097 | break; |
1998 | 2098 | ||
1999 | case CEPH_SESSION_CLOSE: | 2099 | case CEPH_SESSION_CLOSE: |
2100 | if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) | ||
2101 | pr_info("mds%d reconnect denied\n", session->s_mds); | ||
2000 | remove_session_caps(session); | 2102 | remove_session_caps(session); |
2001 | wake = 1; /* for good measure */ | 2103 | wake = 1; /* for good measure */ |
2002 | complete(&mdsc->session_close_waiters); | 2104 | complete(&mdsc->session_close_waiters); |
2003 | kick_requests(mdsc, mds, 0); /* cur only */ | 2105 | kick_requests(mdsc, mds); |
2004 | break; | 2106 | break; |
2005 | 2107 | ||
2006 | case CEPH_SESSION_STALE: | 2108 | case CEPH_SESSION_STALE: |
@@ -2132,54 +2234,44 @@ out: | |||
2132 | * | 2234 | * |
2133 | * called with mdsc->mutex held. | 2235 | * called with mdsc->mutex held. |
2134 | */ | 2236 | */ |
2135 | static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | 2237 | static void send_mds_reconnect(struct ceph_mds_client *mdsc, |
2238 | struct ceph_mds_session *session) | ||
2136 | { | 2239 | { |
2137 | struct ceph_mds_session *session = NULL; | ||
2138 | struct ceph_msg *reply; | 2240 | struct ceph_msg *reply; |
2139 | struct rb_node *p; | 2241 | struct rb_node *p; |
2242 | int mds = session->s_mds; | ||
2140 | int err = -ENOMEM; | 2243 | int err = -ENOMEM; |
2141 | struct ceph_pagelist *pagelist; | 2244 | struct ceph_pagelist *pagelist; |
2142 | 2245 | ||
2143 | pr_info("reconnect to recovering mds%d\n", mds); | 2246 | pr_info("mds%d reconnect start\n", mds); |
2144 | 2247 | ||
2145 | pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); | 2248 | pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); |
2146 | if (!pagelist) | 2249 | if (!pagelist) |
2147 | goto fail_nopagelist; | 2250 | goto fail_nopagelist; |
2148 | ceph_pagelist_init(pagelist); | 2251 | ceph_pagelist_init(pagelist); |
2149 | 2252 | ||
2150 | reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, 0, 0, NULL); | 2253 | reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS); |
2151 | if (IS_ERR(reply)) { | 2254 | if (!reply) |
2152 | err = PTR_ERR(reply); | ||
2153 | goto fail_nomsg; | 2255 | goto fail_nomsg; |
2154 | } | ||
2155 | |||
2156 | /* find session */ | ||
2157 | session = __ceph_lookup_mds_session(mdsc, mds); | ||
2158 | mutex_unlock(&mdsc->mutex); /* drop lock for duration */ | ||
2159 | 2256 | ||
2160 | if (session) { | 2257 | mutex_lock(&session->s_mutex); |
2161 | mutex_lock(&session->s_mutex); | 2258 | session->s_state = CEPH_MDS_SESSION_RECONNECTING; |
2259 | session->s_seq = 0; | ||
2162 | 2260 | ||
2163 | session->s_state = CEPH_MDS_SESSION_RECONNECTING; | 2261 | ceph_con_open(&session->s_con, |
2164 | session->s_seq = 0; | 2262 | ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); |
2165 | 2263 | ||
2166 | ceph_con_open(&session->s_con, | 2264 | /* replay unsafe requests */ |
2167 | ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); | 2265 | replay_unsafe_requests(mdsc, session); |
2168 | |||
2169 | /* replay unsafe requests */ | ||
2170 | replay_unsafe_requests(mdsc, session); | ||
2171 | } else { | ||
2172 | dout("no session for mds%d, will send short reconnect\n", | ||
2173 | mds); | ||
2174 | } | ||
2175 | 2266 | ||
2176 | down_read(&mdsc->snap_rwsem); | 2267 | down_read(&mdsc->snap_rwsem); |
2177 | 2268 | ||
2178 | if (!session) | ||
2179 | goto send; | ||
2180 | dout("session %p state %s\n", session, | 2269 | dout("session %p state %s\n", session, |
2181 | session_state_name(session->s_state)); | 2270 | session_state_name(session->s_state)); |
2182 | 2271 | ||
2272 | /* drop old cap expires; we're about to reestablish that state */ | ||
2273 | discard_cap_releases(mdsc, session); | ||
2274 | |||
2183 | /* traverse this session's caps */ | 2275 | /* traverse this session's caps */ |
2184 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); | 2276 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); |
2185 | if (err) | 2277 | if (err) |
@@ -2208,36 +2300,29 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | |||
2208 | goto fail; | 2300 | goto fail; |
2209 | } | 2301 | } |
2210 | 2302 | ||
2211 | send: | ||
2212 | reply->pagelist = pagelist; | 2303 | reply->pagelist = pagelist; |
2213 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | 2304 | reply->hdr.data_len = cpu_to_le32(pagelist->length); |
2214 | reply->nr_pages = calc_pages_for(0, pagelist->length); | 2305 | reply->nr_pages = calc_pages_for(0, pagelist->length); |
2215 | ceph_con_send(&session->s_con, reply); | 2306 | ceph_con_send(&session->s_con, reply); |
2216 | 2307 | ||
2217 | session->s_state = CEPH_MDS_SESSION_OPEN; | ||
2218 | mutex_unlock(&session->s_mutex); | 2308 | mutex_unlock(&session->s_mutex); |
2219 | 2309 | ||
2220 | mutex_lock(&mdsc->mutex); | 2310 | mutex_lock(&mdsc->mutex); |
2221 | __wake_requests(mdsc, &session->s_waiting); | 2311 | __wake_requests(mdsc, &session->s_waiting); |
2222 | mutex_unlock(&mdsc->mutex); | 2312 | mutex_unlock(&mdsc->mutex); |
2223 | 2313 | ||
2224 | ceph_put_mds_session(session); | ||
2225 | |||
2226 | up_read(&mdsc->snap_rwsem); | 2314 | up_read(&mdsc->snap_rwsem); |
2227 | mutex_lock(&mdsc->mutex); | ||
2228 | return; | 2315 | return; |
2229 | 2316 | ||
2230 | fail: | 2317 | fail: |
2231 | ceph_msg_put(reply); | 2318 | ceph_msg_put(reply); |
2232 | up_read(&mdsc->snap_rwsem); | 2319 | up_read(&mdsc->snap_rwsem); |
2233 | mutex_unlock(&session->s_mutex); | 2320 | mutex_unlock(&session->s_mutex); |
2234 | ceph_put_mds_session(session); | ||
2235 | fail_nomsg: | 2321 | fail_nomsg: |
2236 | ceph_pagelist_release(pagelist); | 2322 | ceph_pagelist_release(pagelist); |
2237 | kfree(pagelist); | 2323 | kfree(pagelist); |
2238 | fail_nopagelist: | 2324 | fail_nopagelist: |
2239 | pr_err("error %d preparing reconnect for mds%d\n", err, mds); | 2325 | pr_err("error %d preparing reconnect for mds%d\n", err, mds); |
2240 | mutex_lock(&mdsc->mutex); | ||
2241 | return; | 2326 | return; |
2242 | } | 2327 | } |
2243 | 2328 | ||
@@ -2290,7 +2375,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2290 | } | 2375 | } |
2291 | 2376 | ||
2292 | /* kick any requests waiting on the recovering mds */ | 2377 | /* kick any requests waiting on the recovering mds */ |
2293 | kick_requests(mdsc, i, 1); | 2378 | kick_requests(mdsc, i); |
2294 | } else if (oldstate == newstate) { | 2379 | } else if (oldstate == newstate) { |
2295 | continue; /* nothing new with this mds */ | 2380 | continue; /* nothing new with this mds */ |
2296 | } | 2381 | } |
@@ -2299,22 +2384,21 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2299 | * send reconnect? | 2384 | * send reconnect? |
2300 | */ | 2385 | */ |
2301 | if (s->s_state == CEPH_MDS_SESSION_RESTARTING && | 2386 | if (s->s_state == CEPH_MDS_SESSION_RESTARTING && |
2302 | newstate >= CEPH_MDS_STATE_RECONNECT) | 2387 | newstate >= CEPH_MDS_STATE_RECONNECT) { |
2303 | send_mds_reconnect(mdsc, i); | 2388 | mutex_unlock(&mdsc->mutex); |
2389 | send_mds_reconnect(mdsc, s); | ||
2390 | mutex_lock(&mdsc->mutex); | ||
2391 | } | ||
2304 | 2392 | ||
2305 | /* | 2393 | /* |
2306 | * kick requests on any mds that has gone active. | 2394 | * kick request on any mds that has gone active. |
2307 | * | ||
2308 | * kick requests on cur or forwarder: we may have sent | ||
2309 | * the request to mds1, mds1 told us it forwarded it | ||
2310 | * to mds2, but then we learn mds1 failed and can't be | ||
2311 | * sure it successfully forwarded our request before | ||
2312 | * it died. | ||
2313 | */ | 2395 | */ |
2314 | if (oldstate < CEPH_MDS_STATE_ACTIVE && | 2396 | if (oldstate < CEPH_MDS_STATE_ACTIVE && |
2315 | newstate >= CEPH_MDS_STATE_ACTIVE) { | 2397 | newstate >= CEPH_MDS_STATE_ACTIVE) { |
2316 | pr_info("mds%d reconnect completed\n", s->s_mds); | 2398 | if (oldstate != CEPH_MDS_STATE_CREATING && |
2317 | kick_requests(mdsc, i, 1); | 2399 | oldstate != CEPH_MDS_STATE_STARTING) |
2400 | pr_info("mds%d recovery completed\n", s->s_mds); | ||
2401 | kick_requests(mdsc, i); | ||
2318 | ceph_kick_flushing_caps(mdsc, s); | 2402 | ceph_kick_flushing_caps(mdsc, s); |
2319 | wake_up_session_caps(s, 1); | 2403 | wake_up_session_caps(s, 1); |
2320 | } | 2404 | } |
@@ -2457,12 +2541,12 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, | |||
2457 | dnamelen = dentry->d_name.len; | 2541 | dnamelen = dentry->d_name.len; |
2458 | len += dnamelen; | 2542 | len += dnamelen; |
2459 | 2543 | ||
2460 | msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, 0, 0, NULL); | 2544 | msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS); |
2461 | if (IS_ERR(msg)) | 2545 | if (!msg) |
2462 | return; | 2546 | return; |
2463 | lease = msg->front.iov_base; | 2547 | lease = msg->front.iov_base; |
2464 | lease->action = action; | 2548 | lease->action = action; |
2465 | lease->mask = cpu_to_le16(CEPH_LOCK_DN); | 2549 | lease->mask = cpu_to_le16(1); |
2466 | lease->ino = cpu_to_le64(ceph_vino(inode).ino); | 2550 | lease->ino = cpu_to_le64(ceph_vino(inode).ino); |
2467 | lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap); | 2551 | lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap); |
2468 | lease->seq = cpu_to_le32(seq); | 2552 | lease->seq = cpu_to_le32(seq); |
@@ -2492,7 +2576,7 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, | |||
2492 | 2576 | ||
2493 | BUG_ON(inode == NULL); | 2577 | BUG_ON(inode == NULL); |
2494 | BUG_ON(dentry == NULL); | 2578 | BUG_ON(dentry == NULL); |
2495 | BUG_ON(mask != CEPH_LOCK_DN); | 2579 | BUG_ON(mask == 0); |
2496 | 2580 | ||
2497 | /* is dentry lease valid? */ | 2581 | /* is dentry lease valid? */ |
2498 | spin_lock(&dentry->d_lock); | 2582 | spin_lock(&dentry->d_lock); |
@@ -2603,7 +2687,9 @@ static void delayed_work(struct work_struct *work) | |||
2603 | else | 2687 | else |
2604 | ceph_con_keepalive(&s->s_con); | 2688 | ceph_con_keepalive(&s->s_con); |
2605 | add_cap_releases(mdsc, s, -1); | 2689 | add_cap_releases(mdsc, s, -1); |
2606 | send_cap_releases(mdsc, s); | 2690 | if (s->s_state == CEPH_MDS_SESSION_OPEN || |
2691 | s->s_state == CEPH_MDS_SESSION_HUNG) | ||
2692 | send_cap_releases(mdsc, s); | ||
2607 | mutex_unlock(&s->s_mutex); | 2693 | mutex_unlock(&s->s_mutex); |
2608 | ceph_put_mds_session(s); | 2694 | ceph_put_mds_session(s); |
2609 | 2695 | ||
@@ -2620,6 +2706,9 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2620 | mdsc->client = client; | 2706 | mdsc->client = client; |
2621 | mutex_init(&mdsc->mutex); | 2707 | mutex_init(&mdsc->mutex); |
2622 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); | 2708 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); |
2709 | if (mdsc->mdsmap == NULL) | ||
2710 | return -ENOMEM; | ||
2711 | |||
2623 | init_completion(&mdsc->safe_umount_waiters); | 2712 | init_completion(&mdsc->safe_umount_waiters); |
2624 | init_completion(&mdsc->session_close_waiters); | 2713 | init_completion(&mdsc->session_close_waiters); |
2625 | INIT_LIST_HEAD(&mdsc->waiting_for_map); | 2714 | INIT_LIST_HEAD(&mdsc->waiting_for_map); |
@@ -2645,6 +2734,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2645 | init_waitqueue_head(&mdsc->cap_flushing_wq); | 2734 | init_waitqueue_head(&mdsc->cap_flushing_wq); |
2646 | spin_lock_init(&mdsc->dentry_lru_lock); | 2735 | spin_lock_init(&mdsc->dentry_lru_lock); |
2647 | INIT_LIST_HEAD(&mdsc->dentry_lru); | 2736 | INIT_LIST_HEAD(&mdsc->dentry_lru); |
2737 | |||
2648 | return 0; | 2738 | return 0; |
2649 | } | 2739 | } |
2650 | 2740 | ||
@@ -2740,6 +2830,9 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
2740 | { | 2830 | { |
2741 | u64 want_tid, want_flush; | 2831 | u64 want_tid, want_flush; |
2742 | 2832 | ||
2833 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | ||
2834 | return; | ||
2835 | |||
2743 | dout("sync\n"); | 2836 | dout("sync\n"); |
2744 | mutex_lock(&mdsc->mutex); | 2837 | mutex_lock(&mdsc->mutex); |
2745 | want_tid = mdsc->last_tid; | 2838 | want_tid = mdsc->last_tid; |
@@ -2922,9 +3015,10 @@ static void con_put(struct ceph_connection *con) | |||
2922 | static void peer_reset(struct ceph_connection *con) | 3015 | static void peer_reset(struct ceph_connection *con) |
2923 | { | 3016 | { |
2924 | struct ceph_mds_session *s = con->private; | 3017 | struct ceph_mds_session *s = con->private; |
3018 | struct ceph_mds_client *mdsc = s->s_mdsc; | ||
2925 | 3019 | ||
2926 | pr_err("mds%d gave us the boot. IMPLEMENT RECONNECT.\n", | 3020 | pr_warning("mds%d closed our session\n", s->s_mds); |
2927 | s->s_mds); | 3021 | send_mds_reconnect(mdsc, s); |
2928 | } | 3022 | } |
2929 | 3023 | ||
2930 | static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | 3024 | static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) |
@@ -3031,7 +3125,7 @@ static int invalidate_authorizer(struct ceph_connection *con) | |||
3031 | return ceph_monc_validate_auth(&mdsc->client->monc); | 3125 | return ceph_monc_validate_auth(&mdsc->client->monc); |
3032 | } | 3126 | } |
3033 | 3127 | ||
3034 | const static struct ceph_connection_operations mds_con_ops = { | 3128 | static const struct ceph_connection_operations mds_con_ops = { |
3035 | .get = con_get, | 3129 | .get = con_get, |
3036 | .put = con_put, | 3130 | .put = con_put, |
3037 | .dispatch = dispatch, | 3131 | .dispatch = dispatch, |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 961cc6f65878..d9936c4f1212 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -165,6 +165,8 @@ struct ceph_mds_request { | |||
165 | struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */ | 165 | struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */ |
166 | struct inode *r_target_inode; /* resulting inode */ | 166 | struct inode *r_target_inode; /* resulting inode */ |
167 | 167 | ||
168 | struct mutex r_fill_mutex; | ||
169 | |||
168 | union ceph_mds_request_args r_args; | 170 | union ceph_mds_request_args r_args; |
169 | int r_fmode; /* file mode, if expecting cap */ | 171 | int r_fmode; /* file mode, if expecting cap */ |
170 | 172 | ||
@@ -213,7 +215,7 @@ struct ceph_mds_request { | |||
213 | struct completion r_safe_completion; | 215 | struct completion r_safe_completion; |
214 | ceph_mds_request_callback_t r_callback; | 216 | ceph_mds_request_callback_t r_callback; |
215 | struct list_head r_unsafe_item; /* per-session unsafe list item */ | 217 | struct list_head r_unsafe_item; /* per-session unsafe list item */ |
216 | bool r_got_unsafe, r_got_safe; | 218 | bool r_got_unsafe, r_got_safe, r_got_result; |
217 | 219 | ||
218 | bool r_did_prepopulate; | 220 | bool r_did_prepopulate; |
219 | u32 r_readdir_offset; | 221 | u32 r_readdir_offset; |
@@ -301,6 +303,8 @@ extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, | |||
301 | struct inode *inode, | 303 | struct inode *inode, |
302 | struct dentry *dn, int mask); | 304 | struct dentry *dn, int mask); |
303 | 305 | ||
306 | extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); | ||
307 | |||
304 | extern struct ceph_mds_request * | 308 | extern struct ceph_mds_request * |
305 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); | 309 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); |
306 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, | 310 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, |
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index cd4fadb6491a..64b8b1f7863d 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -39,18 +39,6 @@ static void queue_con(struct ceph_connection *con); | |||
39 | static void con_work(struct work_struct *); | 39 | static void con_work(struct work_struct *); |
40 | static void ceph_fault(struct ceph_connection *con); | 40 | static void ceph_fault(struct ceph_connection *con); |
41 | 41 | ||
42 | const char *ceph_name_type_str(int t) | ||
43 | { | ||
44 | switch (t) { | ||
45 | case CEPH_ENTITY_TYPE_MON: return "mon"; | ||
46 | case CEPH_ENTITY_TYPE_MDS: return "mds"; | ||
47 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | ||
48 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | ||
49 | case CEPH_ENTITY_TYPE_ADMIN: return "admin"; | ||
50 | default: return "???"; | ||
51 | } | ||
52 | } | ||
53 | |||
54 | /* | 42 | /* |
55 | * nicely render a sockaddr as a string. | 43 | * nicely render a sockaddr as a string. |
56 | */ | 44 | */ |
@@ -132,6 +120,12 @@ void ceph_msgr_exit(void) | |||
132 | destroy_workqueue(ceph_msgr_wq); | 120 | destroy_workqueue(ceph_msgr_wq); |
133 | } | 121 | } |
134 | 122 | ||
123 | void ceph_msgr_flush() | ||
124 | { | ||
125 | flush_workqueue(ceph_msgr_wq); | ||
126 | } | ||
127 | |||
128 | |||
135 | /* | 129 | /* |
136 | * socket callback functions | 130 | * socket callback functions |
137 | */ | 131 | */ |
@@ -340,6 +334,7 @@ static void reset_connection(struct ceph_connection *con) | |||
340 | ceph_msg_put(con->out_msg); | 334 | ceph_msg_put(con->out_msg); |
341 | con->out_msg = NULL; | 335 | con->out_msg = NULL; |
342 | } | 336 | } |
337 | con->out_keepalive_pending = false; | ||
343 | con->in_seq = 0; | 338 | con->in_seq = 0; |
344 | con->in_seq_acked = 0; | 339 | con->in_seq_acked = 0; |
345 | } | 340 | } |
@@ -357,6 +352,7 @@ void ceph_con_close(struct ceph_connection *con) | |||
357 | clear_bit(WRITE_PENDING, &con->state); | 352 | clear_bit(WRITE_PENDING, &con->state); |
358 | mutex_lock(&con->mutex); | 353 | mutex_lock(&con->mutex); |
359 | reset_connection(con); | 354 | reset_connection(con); |
355 | con->peer_global_seq = 0; | ||
360 | cancel_delayed_work(&con->work); | 356 | cancel_delayed_work(&con->work); |
361 | mutex_unlock(&con->mutex); | 357 | mutex_unlock(&con->mutex); |
362 | queue_con(con); | 358 | queue_con(con); |
@@ -661,7 +657,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||
661 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 657 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, |
662 | con->connect_seq, global_seq, proto); | 658 | con->connect_seq, global_seq, proto); |
663 | 659 | ||
664 | con->out_connect.features = CEPH_FEATURE_SUPPORTED; | 660 | con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT; |
665 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 661 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); |
666 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 662 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); |
667 | con->out_connect.global_seq = cpu_to_le32(global_seq); | 663 | con->out_connect.global_seq = cpu_to_le32(global_seq); |
@@ -1124,8 +1120,8 @@ static void fail_protocol(struct ceph_connection *con) | |||
1124 | 1120 | ||
1125 | static int process_connect(struct ceph_connection *con) | 1121 | static int process_connect(struct ceph_connection *con) |
1126 | { | 1122 | { |
1127 | u64 sup_feat = CEPH_FEATURE_SUPPORTED; | 1123 | u64 sup_feat = CEPH_FEATURE_SUPPORTED_CLIENT; |
1128 | u64 req_feat = CEPH_FEATURE_REQUIRED; | 1124 | u64 req_feat = CEPH_FEATURE_REQUIRED_CLIENT; |
1129 | u64 server_feat = le64_to_cpu(con->in_reply.features); | 1125 | u64 server_feat = le64_to_cpu(con->in_reply.features); |
1130 | 1126 | ||
1131 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 1127 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); |
@@ -1233,6 +1229,7 @@ static int process_connect(struct ceph_connection *con) | |||
1233 | clear_bit(CONNECTING, &con->state); | 1229 | clear_bit(CONNECTING, &con->state); |
1234 | con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); | 1230 | con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); |
1235 | con->connect_seq++; | 1231 | con->connect_seq++; |
1232 | con->peer_features = server_feat; | ||
1236 | dout("process_connect got READY gseq %d cseq %d (%d)\n", | 1233 | dout("process_connect got READY gseq %d cseq %d (%d)\n", |
1237 | con->peer_global_seq, | 1234 | con->peer_global_seq, |
1238 | le32_to_cpu(con->in_reply.connect_seq), | 1235 | le32_to_cpu(con->in_reply.connect_seq), |
@@ -1402,19 +1399,17 @@ static int read_partial_message(struct ceph_connection *con) | |||
1402 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); | 1399 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); |
1403 | if (skip) { | 1400 | if (skip) { |
1404 | /* skip this message */ | 1401 | /* skip this message */ |
1405 | dout("alloc_msg returned NULL, skipping message\n"); | 1402 | dout("alloc_msg said skip message\n"); |
1406 | con->in_base_pos = -front_len - middle_len - data_len - | 1403 | con->in_base_pos = -front_len - middle_len - data_len - |
1407 | sizeof(m->footer); | 1404 | sizeof(m->footer); |
1408 | con->in_tag = CEPH_MSGR_TAG_READY; | 1405 | con->in_tag = CEPH_MSGR_TAG_READY; |
1409 | con->in_seq++; | 1406 | con->in_seq++; |
1410 | return 0; | 1407 | return 0; |
1411 | } | 1408 | } |
1412 | if (IS_ERR(con->in_msg)) { | 1409 | if (!con->in_msg) { |
1413 | ret = PTR_ERR(con->in_msg); | ||
1414 | con->in_msg = NULL; | ||
1415 | con->error_msg = | 1410 | con->error_msg = |
1416 | "error allocating memory for incoming message"; | 1411 | "error allocating memory for incoming message"; |
1417 | return ret; | 1412 | return -ENOMEM; |
1418 | } | 1413 | } |
1419 | m = con->in_msg; | 1414 | m = con->in_msg; |
1420 | m->front.iov_len = 0; /* haven't read it yet */ | 1415 | m->front.iov_len = 0; /* haven't read it yet */ |
@@ -1514,14 +1509,14 @@ static void process_message(struct ceph_connection *con) | |||
1514 | 1509 | ||
1515 | /* if first message, set peer_name */ | 1510 | /* if first message, set peer_name */ |
1516 | if (con->peer_name.type == 0) | 1511 | if (con->peer_name.type == 0) |
1517 | con->peer_name = msg->hdr.src.name; | 1512 | con->peer_name = msg->hdr.src; |
1518 | 1513 | ||
1519 | con->in_seq++; | 1514 | con->in_seq++; |
1520 | mutex_unlock(&con->mutex); | 1515 | mutex_unlock(&con->mutex); |
1521 | 1516 | ||
1522 | dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", | 1517 | dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", |
1523 | msg, le64_to_cpu(msg->hdr.seq), | 1518 | msg, le64_to_cpu(msg->hdr.seq), |
1524 | ENTITY_NAME(msg->hdr.src.name), | 1519 | ENTITY_NAME(msg->hdr.src), |
1525 | le16_to_cpu(msg->hdr.type), | 1520 | le16_to_cpu(msg->hdr.type), |
1526 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), | 1521 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), |
1527 | le32_to_cpu(msg->hdr.front_len), | 1522 | le32_to_cpu(msg->hdr.front_len), |
@@ -1546,7 +1541,6 @@ static int try_write(struct ceph_connection *con) | |||
1546 | dout("try_write start %p state %lu nref %d\n", con, con->state, | 1541 | dout("try_write start %p state %lu nref %d\n", con, con->state, |
1547 | atomic_read(&con->nref)); | 1542 | atomic_read(&con->nref)); |
1548 | 1543 | ||
1549 | mutex_lock(&con->mutex); | ||
1550 | more: | 1544 | more: |
1551 | dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); | 1545 | dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); |
1552 | 1546 | ||
@@ -1639,7 +1633,6 @@ do_next: | |||
1639 | done: | 1633 | done: |
1640 | ret = 0; | 1634 | ret = 0; |
1641 | out: | 1635 | out: |
1642 | mutex_unlock(&con->mutex); | ||
1643 | dout("try_write done on %p\n", con); | 1636 | dout("try_write done on %p\n", con); |
1644 | return ret; | 1637 | return ret; |
1645 | } | 1638 | } |
@@ -1651,7 +1644,6 @@ out: | |||
1651 | */ | 1644 | */ |
1652 | static int try_read(struct ceph_connection *con) | 1645 | static int try_read(struct ceph_connection *con) |
1653 | { | 1646 | { |
1654 | struct ceph_messenger *msgr; | ||
1655 | int ret = -1; | 1647 | int ret = -1; |
1656 | 1648 | ||
1657 | if (!con->sock) | 1649 | if (!con->sock) |
@@ -1661,9 +1653,6 @@ static int try_read(struct ceph_connection *con) | |||
1661 | return 0; | 1653 | return 0; |
1662 | 1654 | ||
1663 | dout("try_read start on %p\n", con); | 1655 | dout("try_read start on %p\n", con); |
1664 | msgr = con->msgr; | ||
1665 | |||
1666 | mutex_lock(&con->mutex); | ||
1667 | 1656 | ||
1668 | more: | 1657 | more: |
1669 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, | 1658 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, |
@@ -1758,7 +1747,6 @@ more: | |||
1758 | done: | 1747 | done: |
1759 | ret = 0; | 1748 | ret = 0; |
1760 | out: | 1749 | out: |
1761 | mutex_unlock(&con->mutex); | ||
1762 | dout("try_read done on %p\n", con); | 1750 | dout("try_read done on %p\n", con); |
1763 | return ret; | 1751 | return ret; |
1764 | 1752 | ||
@@ -1830,6 +1818,8 @@ more: | |||
1830 | dout("con_work %p start, clearing QUEUED\n", con); | 1818 | dout("con_work %p start, clearing QUEUED\n", con); |
1831 | clear_bit(QUEUED, &con->state); | 1819 | clear_bit(QUEUED, &con->state); |
1832 | 1820 | ||
1821 | mutex_lock(&con->mutex); | ||
1822 | |||
1833 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ | 1823 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ |
1834 | dout("con_work CLOSED\n"); | 1824 | dout("con_work CLOSED\n"); |
1835 | con_close_socket(con); | 1825 | con_close_socket(con); |
@@ -1844,11 +1834,16 @@ more: | |||
1844 | if (test_and_clear_bit(SOCK_CLOSED, &con->state) || | 1834 | if (test_and_clear_bit(SOCK_CLOSED, &con->state) || |
1845 | try_read(con) < 0 || | 1835 | try_read(con) < 0 || |
1846 | try_write(con) < 0) { | 1836 | try_write(con) < 0) { |
1837 | mutex_unlock(&con->mutex); | ||
1847 | backoff = 1; | 1838 | backoff = 1; |
1848 | ceph_fault(con); /* error/fault path */ | 1839 | ceph_fault(con); /* error/fault path */ |
1840 | goto done_unlocked; | ||
1849 | } | 1841 | } |
1850 | 1842 | ||
1851 | done: | 1843 | done: |
1844 | mutex_unlock(&con->mutex); | ||
1845 | |||
1846 | done_unlocked: | ||
1852 | clear_bit(BUSY, &con->state); | 1847 | clear_bit(BUSY, &con->state); |
1853 | dout("con->state=%lu\n", con->state); | 1848 | dout("con->state=%lu\n", con->state); |
1854 | if (test_bit(QUEUED, &con->state)) { | 1849 | if (test_bit(QUEUED, &con->state)) { |
@@ -1947,7 +1942,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | |||
1947 | 1942 | ||
1948 | /* the zero page is needed if a request is "canceled" while the message | 1943 | /* the zero page is needed if a request is "canceled" while the message |
1949 | * is being written over the socket */ | 1944 | * is being written over the socket */ |
1950 | msgr->zero_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 1945 | msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO); |
1951 | if (!msgr->zero_page) { | 1946 | if (!msgr->zero_page) { |
1952 | kfree(msgr); | 1947 | kfree(msgr); |
1953 | return ERR_PTR(-ENOMEM); | 1948 | return ERR_PTR(-ENOMEM); |
@@ -1987,9 +1982,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||
1987 | } | 1982 | } |
1988 | 1983 | ||
1989 | /* set src+dst */ | 1984 | /* set src+dst */ |
1990 | msg->hdr.src.name = con->msgr->inst.name; | 1985 | msg->hdr.src = con->msgr->inst.name; |
1991 | msg->hdr.src.addr = con->msgr->my_enc_addr; | ||
1992 | msg->hdr.orig_src = msg->hdr.src; | ||
1993 | 1986 | ||
1994 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); | 1987 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); |
1995 | 1988 | ||
@@ -2083,12 +2076,11 @@ void ceph_con_keepalive(struct ceph_connection *con) | |||
2083 | * construct a new message with given type, size | 2076 | * construct a new message with given type, size |
2084 | * the new msg has a ref count of 1. | 2077 | * the new msg has a ref count of 1. |
2085 | */ | 2078 | */ |
2086 | struct ceph_msg *ceph_msg_new(int type, int front_len, | 2079 | struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) |
2087 | int page_len, int page_off, struct page **pages) | ||
2088 | { | 2080 | { |
2089 | struct ceph_msg *m; | 2081 | struct ceph_msg *m; |
2090 | 2082 | ||
2091 | m = kmalloc(sizeof(*m), GFP_NOFS); | 2083 | m = kmalloc(sizeof(*m), flags); |
2092 | if (m == NULL) | 2084 | if (m == NULL) |
2093 | goto out; | 2085 | goto out; |
2094 | kref_init(&m->kref); | 2086 | kref_init(&m->kref); |
@@ -2100,8 +2092,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
2100 | m->hdr.version = 0; | 2092 | m->hdr.version = 0; |
2101 | m->hdr.front_len = cpu_to_le32(front_len); | 2093 | m->hdr.front_len = cpu_to_le32(front_len); |
2102 | m->hdr.middle_len = 0; | 2094 | m->hdr.middle_len = 0; |
2103 | m->hdr.data_len = cpu_to_le32(page_len); | 2095 | m->hdr.data_len = 0; |
2104 | m->hdr.data_off = cpu_to_le16(page_off); | 2096 | m->hdr.data_off = 0; |
2105 | m->hdr.reserved = 0; | 2097 | m->hdr.reserved = 0; |
2106 | m->footer.front_crc = 0; | 2098 | m->footer.front_crc = 0; |
2107 | m->footer.middle_crc = 0; | 2099 | m->footer.middle_crc = 0; |
@@ -2115,11 +2107,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
2115 | /* front */ | 2107 | /* front */ |
2116 | if (front_len) { | 2108 | if (front_len) { |
2117 | if (front_len > PAGE_CACHE_SIZE) { | 2109 | if (front_len > PAGE_CACHE_SIZE) { |
2118 | m->front.iov_base = __vmalloc(front_len, GFP_NOFS, | 2110 | m->front.iov_base = __vmalloc(front_len, flags, |
2119 | PAGE_KERNEL); | 2111 | PAGE_KERNEL); |
2120 | m->front_is_vmalloc = true; | 2112 | m->front_is_vmalloc = true; |
2121 | } else { | 2113 | } else { |
2122 | m->front.iov_base = kmalloc(front_len, GFP_NOFS); | 2114 | m->front.iov_base = kmalloc(front_len, flags); |
2123 | } | 2115 | } |
2124 | if (m->front.iov_base == NULL) { | 2116 | if (m->front.iov_base == NULL) { |
2125 | pr_err("msg_new can't allocate %d bytes\n", | 2117 | pr_err("msg_new can't allocate %d bytes\n", |
@@ -2135,19 +2127,18 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
2135 | m->middle = NULL; | 2127 | m->middle = NULL; |
2136 | 2128 | ||
2137 | /* data */ | 2129 | /* data */ |
2138 | m->nr_pages = calc_pages_for(page_off, page_len); | 2130 | m->nr_pages = 0; |
2139 | m->pages = pages; | 2131 | m->pages = NULL; |
2140 | m->pagelist = NULL; | 2132 | m->pagelist = NULL; |
2141 | 2133 | ||
2142 | dout("ceph_msg_new %p page %d~%d -> %d\n", m, page_off, page_len, | 2134 | dout("ceph_msg_new %p front %d\n", m, front_len); |
2143 | m->nr_pages); | ||
2144 | return m; | 2135 | return m; |
2145 | 2136 | ||
2146 | out2: | 2137 | out2: |
2147 | ceph_msg_put(m); | 2138 | ceph_msg_put(m); |
2148 | out: | 2139 | out: |
2149 | pr_err("msg_new can't create type %d len %d\n", type, front_len); | 2140 | pr_err("msg_new can't create type %d front %d\n", type, front_len); |
2150 | return ERR_PTR(-ENOMEM); | 2141 | return NULL; |
2151 | } | 2142 | } |
2152 | 2143 | ||
2153 | /* | 2144 | /* |
@@ -2190,29 +2181,25 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | |||
2190 | mutex_unlock(&con->mutex); | 2181 | mutex_unlock(&con->mutex); |
2191 | msg = con->ops->alloc_msg(con, hdr, skip); | 2182 | msg = con->ops->alloc_msg(con, hdr, skip); |
2192 | mutex_lock(&con->mutex); | 2183 | mutex_lock(&con->mutex); |
2193 | if (IS_ERR(msg)) | 2184 | if (!msg || *skip) |
2194 | return msg; | ||
2195 | |||
2196 | if (*skip) | ||
2197 | return NULL; | 2185 | return NULL; |
2198 | } | 2186 | } |
2199 | if (!msg) { | 2187 | if (!msg) { |
2200 | *skip = 0; | 2188 | *skip = 0; |
2201 | msg = ceph_msg_new(type, front_len, 0, 0, NULL); | 2189 | msg = ceph_msg_new(type, front_len, GFP_NOFS); |
2202 | if (!msg) { | 2190 | if (!msg) { |
2203 | pr_err("unable to allocate msg type %d len %d\n", | 2191 | pr_err("unable to allocate msg type %d len %d\n", |
2204 | type, front_len); | 2192 | type, front_len); |
2205 | return ERR_PTR(-ENOMEM); | 2193 | return NULL; |
2206 | } | 2194 | } |
2207 | } | 2195 | } |
2208 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); | 2196 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); |
2209 | 2197 | ||
2210 | if (middle_len) { | 2198 | if (middle_len && !msg->middle) { |
2211 | ret = ceph_alloc_middle(con, msg); | 2199 | ret = ceph_alloc_middle(con, msg); |
2212 | |||
2213 | if (ret < 0) { | 2200 | if (ret < 0) { |
2214 | ceph_msg_put(msg); | 2201 | ceph_msg_put(msg); |
2215 | return msg; | 2202 | return NULL; |
2216 | } | 2203 | } |
2217 | } | 2204 | } |
2218 | 2205 | ||
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index a5caf91cc971..76fbc957bc13 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h | |||
@@ -49,10 +49,8 @@ struct ceph_connection_operations { | |||
49 | int *skip); | 49 | int *skip); |
50 | }; | 50 | }; |
51 | 51 | ||
52 | extern const char *ceph_name_type_str(int t); | ||
53 | |||
54 | /* use format string %s%d */ | 52 | /* use format string %s%d */ |
55 | #define ENTITY_NAME(n) ceph_name_type_str((n).type), le64_to_cpu((n).num) | 53 | #define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num) |
56 | 54 | ||
57 | struct ceph_messenger { | 55 | struct ceph_messenger { |
58 | struct ceph_entity_inst inst; /* my name+address */ | 56 | struct ceph_entity_inst inst; /* my name+address */ |
@@ -144,6 +142,7 @@ struct ceph_connection { | |||
144 | struct ceph_entity_addr peer_addr; /* peer address */ | 142 | struct ceph_entity_addr peer_addr; /* peer address */ |
145 | struct ceph_entity_name peer_name; /* peer name */ | 143 | struct ceph_entity_name peer_name; /* peer name */ |
146 | struct ceph_entity_addr peer_addr_for_me; | 144 | struct ceph_entity_addr peer_addr_for_me; |
145 | unsigned peer_features; | ||
147 | u32 connect_seq; /* identify the most recent connection | 146 | u32 connect_seq; /* identify the most recent connection |
148 | attempt for this connection, client */ | 147 | attempt for this connection, client */ |
149 | u32 peer_global_seq; /* peer's global seq for this connection */ | 148 | u32 peer_global_seq; /* peer's global seq for this connection */ |
@@ -158,7 +157,6 @@ struct ceph_connection { | |||
158 | struct list_head out_queue; | 157 | struct list_head out_queue; |
159 | struct list_head out_sent; /* sending or sent but unacked */ | 158 | struct list_head out_sent; /* sending or sent but unacked */ |
160 | u64 out_seq; /* last message queued for send */ | 159 | u64 out_seq; /* last message queued for send */ |
161 | u64 out_seq_sent; /* last message sent */ | ||
162 | bool out_keepalive_pending; | 160 | bool out_keepalive_pending; |
163 | 161 | ||
164 | u64 in_seq, in_seq_acked; /* last message received, acked */ | 162 | u64 in_seq, in_seq_acked; /* last message received, acked */ |
@@ -215,6 +213,7 @@ extern int ceph_parse_ips(const char *c, const char *end, | |||
215 | 213 | ||
216 | extern int ceph_msgr_init(void); | 214 | extern int ceph_msgr_init(void); |
217 | extern void ceph_msgr_exit(void); | 215 | extern void ceph_msgr_exit(void); |
216 | extern void ceph_msgr_flush(void); | ||
218 | 217 | ||
219 | extern struct ceph_messenger *ceph_messenger_create( | 218 | extern struct ceph_messenger *ceph_messenger_create( |
220 | struct ceph_entity_addr *myaddr); | 219 | struct ceph_entity_addr *myaddr); |
@@ -234,9 +233,7 @@ extern void ceph_con_keepalive(struct ceph_connection *con); | |||
234 | extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); | 233 | extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); |
235 | extern void ceph_con_put(struct ceph_connection *con); | 234 | extern void ceph_con_put(struct ceph_connection *con); |
236 | 235 | ||
237 | extern struct ceph_msg *ceph_msg_new(int type, int front_len, | 236 | extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags); |
238 | int page_len, int page_off, | ||
239 | struct page **pages); | ||
240 | extern void ceph_msg_kfree(struct ceph_msg *m); | 237 | extern void ceph_msg_kfree(struct ceph_msg *m); |
241 | 238 | ||
242 | 239 | ||
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 8fdc011ca956..21c62e9b7d1d 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c | |||
@@ -28,7 +28,7 @@ | |||
28 | * resend any outstanding requests. | 28 | * resend any outstanding requests. |
29 | */ | 29 | */ |
30 | 30 | ||
31 | const static struct ceph_connection_operations mon_con_ops; | 31 | static const struct ceph_connection_operations mon_con_ops; |
32 | 32 | ||
33 | static int __validate_auth(struct ceph_mon_client *monc); | 33 | static int __validate_auth(struct ceph_mon_client *monc); |
34 | 34 | ||
@@ -104,6 +104,7 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) | |||
104 | monc->pending_auth = 1; | 104 | monc->pending_auth = 1; |
105 | monc->m_auth->front.iov_len = len; | 105 | monc->m_auth->front.iov_len = len; |
106 | monc->m_auth->hdr.front_len = cpu_to_le32(len); | 106 | monc->m_auth->hdr.front_len = cpu_to_le32(len); |
107 | ceph_con_revoke(monc->con, monc->m_auth); | ||
107 | ceph_msg_get(monc->m_auth); /* keep our ref */ | 108 | ceph_msg_get(monc->m_auth); /* keep our ref */ |
108 | ceph_con_send(monc->con, monc->m_auth); | 109 | ceph_con_send(monc->con, monc->m_auth); |
109 | } | 110 | } |
@@ -187,16 +188,12 @@ static void __send_subscribe(struct ceph_mon_client *monc) | |||
187 | monc->want_next_osdmap); | 188 | monc->want_next_osdmap); |
188 | if ((__sub_expired(monc) && !monc->sub_sent) || | 189 | if ((__sub_expired(monc) && !monc->sub_sent) || |
189 | monc->want_next_osdmap == 1) { | 190 | monc->want_next_osdmap == 1) { |
190 | struct ceph_msg *msg; | 191 | struct ceph_msg *msg = monc->m_subscribe; |
191 | struct ceph_mon_subscribe_item *i; | 192 | struct ceph_mon_subscribe_item *i; |
192 | void *p, *end; | 193 | void *p, *end; |
193 | 194 | ||
194 | msg = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, 0, 0, NULL); | ||
195 | if (!msg) | ||
196 | return; | ||
197 | |||
198 | p = msg->front.iov_base; | 195 | p = msg->front.iov_base; |
199 | end = p + msg->front.iov_len; | 196 | end = p + msg->front_max; |
200 | 197 | ||
201 | dout("__send_subscribe to 'mdsmap' %u+\n", | 198 | dout("__send_subscribe to 'mdsmap' %u+\n", |
202 | (unsigned)monc->have_mdsmap); | 199 | (unsigned)monc->have_mdsmap); |
@@ -226,7 +223,8 @@ static void __send_subscribe(struct ceph_mon_client *monc) | |||
226 | 223 | ||
227 | msg->front.iov_len = p - msg->front.iov_base; | 224 | msg->front.iov_len = p - msg->front.iov_base; |
228 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 225 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
229 | ceph_con_send(monc->con, msg); | 226 | ceph_con_revoke(monc->con, msg); |
227 | ceph_con_send(monc->con, ceph_msg_get(msg)); | ||
230 | 228 | ||
231 | monc->sub_sent = jiffies | 1; /* never 0 */ | 229 | monc->sub_sent = jiffies | 1; /* never 0 */ |
232 | } | 230 | } |
@@ -353,14 +351,14 @@ out: | |||
353 | /* | 351 | /* |
354 | * statfs | 352 | * statfs |
355 | */ | 353 | */ |
356 | static struct ceph_mon_statfs_request *__lookup_statfs( | 354 | static struct ceph_mon_generic_request *__lookup_generic_req( |
357 | struct ceph_mon_client *monc, u64 tid) | 355 | struct ceph_mon_client *monc, u64 tid) |
358 | { | 356 | { |
359 | struct ceph_mon_statfs_request *req; | 357 | struct ceph_mon_generic_request *req; |
360 | struct rb_node *n = monc->statfs_request_tree.rb_node; | 358 | struct rb_node *n = monc->generic_request_tree.rb_node; |
361 | 359 | ||
362 | while (n) { | 360 | while (n) { |
363 | req = rb_entry(n, struct ceph_mon_statfs_request, node); | 361 | req = rb_entry(n, struct ceph_mon_generic_request, node); |
364 | if (tid < req->tid) | 362 | if (tid < req->tid) |
365 | n = n->rb_left; | 363 | n = n->rb_left; |
366 | else if (tid > req->tid) | 364 | else if (tid > req->tid) |
@@ -371,16 +369,16 @@ static struct ceph_mon_statfs_request *__lookup_statfs( | |||
371 | return NULL; | 369 | return NULL; |
372 | } | 370 | } |
373 | 371 | ||
374 | static void __insert_statfs(struct ceph_mon_client *monc, | 372 | static void __insert_generic_request(struct ceph_mon_client *monc, |
375 | struct ceph_mon_statfs_request *new) | 373 | struct ceph_mon_generic_request *new) |
376 | { | 374 | { |
377 | struct rb_node **p = &monc->statfs_request_tree.rb_node; | 375 | struct rb_node **p = &monc->generic_request_tree.rb_node; |
378 | struct rb_node *parent = NULL; | 376 | struct rb_node *parent = NULL; |
379 | struct ceph_mon_statfs_request *req = NULL; | 377 | struct ceph_mon_generic_request *req = NULL; |
380 | 378 | ||
381 | while (*p) { | 379 | while (*p) { |
382 | parent = *p; | 380 | parent = *p; |
383 | req = rb_entry(parent, struct ceph_mon_statfs_request, node); | 381 | req = rb_entry(parent, struct ceph_mon_generic_request, node); |
384 | if (new->tid < req->tid) | 382 | if (new->tid < req->tid) |
385 | p = &(*p)->rb_left; | 383 | p = &(*p)->rb_left; |
386 | else if (new->tid > req->tid) | 384 | else if (new->tid > req->tid) |
@@ -390,113 +388,157 @@ static void __insert_statfs(struct ceph_mon_client *monc, | |||
390 | } | 388 | } |
391 | 389 | ||
392 | rb_link_node(&new->node, parent, p); | 390 | rb_link_node(&new->node, parent, p); |
393 | rb_insert_color(&new->node, &monc->statfs_request_tree); | 391 | rb_insert_color(&new->node, &monc->generic_request_tree); |
392 | } | ||
393 | |||
394 | static void release_generic_request(struct kref *kref) | ||
395 | { | ||
396 | struct ceph_mon_generic_request *req = | ||
397 | container_of(kref, struct ceph_mon_generic_request, kref); | ||
398 | |||
399 | if (req->reply) | ||
400 | ceph_msg_put(req->reply); | ||
401 | if (req->request) | ||
402 | ceph_msg_put(req->request); | ||
403 | } | ||
404 | |||
405 | static void put_generic_request(struct ceph_mon_generic_request *req) | ||
406 | { | ||
407 | kref_put(&req->kref, release_generic_request); | ||
408 | } | ||
409 | |||
410 | static void get_generic_request(struct ceph_mon_generic_request *req) | ||
411 | { | ||
412 | kref_get(&req->kref); | ||
413 | } | ||
414 | |||
415 | static struct ceph_msg *get_generic_reply(struct ceph_connection *con, | ||
416 | struct ceph_msg_header *hdr, | ||
417 | int *skip) | ||
418 | { | ||
419 | struct ceph_mon_client *monc = con->private; | ||
420 | struct ceph_mon_generic_request *req; | ||
421 | u64 tid = le64_to_cpu(hdr->tid); | ||
422 | struct ceph_msg *m; | ||
423 | |||
424 | mutex_lock(&monc->mutex); | ||
425 | req = __lookup_generic_req(monc, tid); | ||
426 | if (!req) { | ||
427 | dout("get_generic_reply %lld dne\n", tid); | ||
428 | *skip = 1; | ||
429 | m = NULL; | ||
430 | } else { | ||
431 | dout("get_generic_reply %lld got %p\n", tid, req->reply); | ||
432 | m = ceph_msg_get(req->reply); | ||
433 | /* | ||
434 | * we don't need to track the connection reading into | ||
435 | * this reply because we only have one open connection | ||
436 | * at a time, ever. | ||
437 | */ | ||
438 | } | ||
439 | mutex_unlock(&monc->mutex); | ||
440 | return m; | ||
394 | } | 441 | } |
395 | 442 | ||
396 | static void handle_statfs_reply(struct ceph_mon_client *monc, | 443 | static void handle_statfs_reply(struct ceph_mon_client *monc, |
397 | struct ceph_msg *msg) | 444 | struct ceph_msg *msg) |
398 | { | 445 | { |
399 | struct ceph_mon_statfs_request *req; | 446 | struct ceph_mon_generic_request *req; |
400 | struct ceph_mon_statfs_reply *reply = msg->front.iov_base; | 447 | struct ceph_mon_statfs_reply *reply = msg->front.iov_base; |
401 | u64 tid; | 448 | u64 tid = le64_to_cpu(msg->hdr.tid); |
402 | 449 | ||
403 | if (msg->front.iov_len != sizeof(*reply)) | 450 | if (msg->front.iov_len != sizeof(*reply)) |
404 | goto bad; | 451 | goto bad; |
405 | tid = le64_to_cpu(msg->hdr.tid); | ||
406 | dout("handle_statfs_reply %p tid %llu\n", msg, tid); | 452 | dout("handle_statfs_reply %p tid %llu\n", msg, tid); |
407 | 453 | ||
408 | mutex_lock(&monc->mutex); | 454 | mutex_lock(&monc->mutex); |
409 | req = __lookup_statfs(monc, tid); | 455 | req = __lookup_generic_req(monc, tid); |
410 | if (req) { | 456 | if (req) { |
411 | *req->buf = reply->st; | 457 | *(struct ceph_statfs *)req->buf = reply->st; |
412 | req->result = 0; | 458 | req->result = 0; |
459 | get_generic_request(req); | ||
413 | } | 460 | } |
414 | mutex_unlock(&monc->mutex); | 461 | mutex_unlock(&monc->mutex); |
415 | if (req) | 462 | if (req) { |
416 | complete(&req->completion); | 463 | complete(&req->completion); |
464 | put_generic_request(req); | ||
465 | } | ||
417 | return; | 466 | return; |
418 | 467 | ||
419 | bad: | 468 | bad: |
420 | pr_err("corrupt statfs reply, no tid\n"); | 469 | pr_err("corrupt generic reply, no tid\n"); |
421 | ceph_msg_dump(msg); | 470 | ceph_msg_dump(msg); |
422 | } | 471 | } |
423 | 472 | ||
424 | /* | 473 | /* |
425 | * (re)send a statfs request | 474 | * Do a synchronous statfs(). |
426 | */ | 475 | */ |
427 | static int send_statfs(struct ceph_mon_client *monc, | 476 | int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) |
428 | struct ceph_mon_statfs_request *req) | ||
429 | { | 477 | { |
430 | struct ceph_msg *msg; | 478 | struct ceph_mon_generic_request *req; |
431 | struct ceph_mon_statfs *h; | 479 | struct ceph_mon_statfs *h; |
480 | int err; | ||
432 | 481 | ||
433 | dout("send_statfs tid %llu\n", req->tid); | 482 | req = kzalloc(sizeof(*req), GFP_NOFS); |
434 | msg = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), 0, 0, NULL); | 483 | if (!req) |
435 | if (IS_ERR(msg)) | 484 | return -ENOMEM; |
436 | return PTR_ERR(msg); | 485 | |
437 | req->request = msg; | 486 | kref_init(&req->kref); |
438 | msg->hdr.tid = cpu_to_le64(req->tid); | 487 | req->buf = buf; |
439 | h = msg->front.iov_base; | 488 | init_completion(&req->completion); |
489 | |||
490 | err = -ENOMEM; | ||
491 | req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS); | ||
492 | if (!req->request) | ||
493 | goto out; | ||
494 | req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS); | ||
495 | if (!req->reply) | ||
496 | goto out; | ||
497 | |||
498 | /* fill out request */ | ||
499 | h = req->request->front.iov_base; | ||
440 | h->monhdr.have_version = 0; | 500 | h->monhdr.have_version = 0; |
441 | h->monhdr.session_mon = cpu_to_le16(-1); | 501 | h->monhdr.session_mon = cpu_to_le16(-1); |
442 | h->monhdr.session_mon_tid = 0; | 502 | h->monhdr.session_mon_tid = 0; |
443 | h->fsid = monc->monmap->fsid; | 503 | h->fsid = monc->monmap->fsid; |
444 | ceph_con_send(monc->con, msg); | ||
445 | return 0; | ||
446 | } | ||
447 | |||
448 | /* | ||
449 | * Do a synchronous statfs(). | ||
450 | */ | ||
451 | int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | ||
452 | { | ||
453 | struct ceph_mon_statfs_request req; | ||
454 | int err; | ||
455 | |||
456 | req.buf = buf; | ||
457 | init_completion(&req.completion); | ||
458 | |||
459 | /* allocate memory for reply */ | ||
460 | err = ceph_msgpool_resv(&monc->msgpool_statfs_reply, 1); | ||
461 | if (err) | ||
462 | return err; | ||
463 | 504 | ||
464 | /* register request */ | 505 | /* register request */ |
465 | mutex_lock(&monc->mutex); | 506 | mutex_lock(&monc->mutex); |
466 | req.tid = ++monc->last_tid; | 507 | req->tid = ++monc->last_tid; |
467 | req.last_attempt = jiffies; | 508 | req->request->hdr.tid = cpu_to_le64(req->tid); |
468 | req.delay = BASE_DELAY_INTERVAL; | 509 | __insert_generic_request(monc, req); |
469 | __insert_statfs(monc, &req); | 510 | monc->num_generic_requests++; |
470 | monc->num_statfs_requests++; | ||
471 | mutex_unlock(&monc->mutex); | 511 | mutex_unlock(&monc->mutex); |
472 | 512 | ||
473 | /* send request and wait */ | 513 | /* send request and wait */ |
474 | err = send_statfs(monc, &req); | 514 | ceph_con_send(monc->con, ceph_msg_get(req->request)); |
475 | if (!err) | 515 | err = wait_for_completion_interruptible(&req->completion); |
476 | err = wait_for_completion_interruptible(&req.completion); | ||
477 | 516 | ||
478 | mutex_lock(&monc->mutex); | 517 | mutex_lock(&monc->mutex); |
479 | rb_erase(&req.node, &monc->statfs_request_tree); | 518 | rb_erase(&req->node, &monc->generic_request_tree); |
480 | monc->num_statfs_requests--; | 519 | monc->num_generic_requests--; |
481 | ceph_msgpool_resv(&monc->msgpool_statfs_reply, -1); | ||
482 | mutex_unlock(&monc->mutex); | 520 | mutex_unlock(&monc->mutex); |
483 | 521 | ||
484 | if (!err) | 522 | if (!err) |
485 | err = req.result; | 523 | err = req->result; |
524 | |||
525 | out: | ||
526 | kref_put(&req->kref, release_generic_request); | ||
486 | return err; | 527 | return err; |
487 | } | 528 | } |
488 | 529 | ||
489 | /* | 530 | /* |
490 | * Resend pending statfs requests. | 531 | * Resend pending statfs requests. |
491 | */ | 532 | */ |
492 | static void __resend_statfs(struct ceph_mon_client *monc) | 533 | static void __resend_generic_request(struct ceph_mon_client *monc) |
493 | { | 534 | { |
494 | struct ceph_mon_statfs_request *req; | 535 | struct ceph_mon_generic_request *req; |
495 | struct rb_node *p; | 536 | struct rb_node *p; |
496 | 537 | ||
497 | for (p = rb_first(&monc->statfs_request_tree); p; p = rb_next(p)) { | 538 | for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) { |
498 | req = rb_entry(p, struct ceph_mon_statfs_request, node); | 539 | req = rb_entry(p, struct ceph_mon_generic_request, node); |
499 | send_statfs(monc, req); | 540 | ceph_con_revoke(monc->con, req->request); |
541 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | ||
500 | } | 542 | } |
501 | } | 543 | } |
502 | 544 | ||
@@ -586,26 +628,26 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
586 | CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | | 628 | CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | |
587 | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; | 629 | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; |
588 | 630 | ||
589 | /* msg pools */ | 631 | /* msgs */ |
590 | err = ceph_msgpool_init(&monc->msgpool_subscribe_ack, | 632 | err = -ENOMEM; |
591 | sizeof(struct ceph_mon_subscribe_ack), 1, false); | 633 | monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, |
592 | if (err < 0) | 634 | sizeof(struct ceph_mon_subscribe_ack), |
635 | GFP_NOFS); | ||
636 | if (!monc->m_subscribe_ack) | ||
593 | goto out_monmap; | 637 | goto out_monmap; |
594 | err = ceph_msgpool_init(&monc->msgpool_statfs_reply, | 638 | |
595 | sizeof(struct ceph_mon_statfs_reply), 0, false); | 639 | monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS); |
596 | if (err < 0) | 640 | if (!monc->m_subscribe) |
597 | goto out_pool1; | 641 | goto out_subscribe_ack; |
598 | err = ceph_msgpool_init(&monc->msgpool_auth_reply, 4096, 1, false); | 642 | |
599 | if (err < 0) | 643 | monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS); |
600 | goto out_pool2; | 644 | if (!monc->m_auth_reply) |
601 | 645 | goto out_subscribe; | |
602 | monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, 0, 0, NULL); | 646 | |
647 | monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS); | ||
603 | monc->pending_auth = 0; | 648 | monc->pending_auth = 0; |
604 | if (IS_ERR(monc->m_auth)) { | 649 | if (!monc->m_auth) |
605 | err = PTR_ERR(monc->m_auth); | 650 | goto out_auth_reply; |
606 | monc->m_auth = NULL; | ||
607 | goto out_pool3; | ||
608 | } | ||
609 | 651 | ||
610 | monc->cur_mon = -1; | 652 | monc->cur_mon = -1; |
611 | monc->hunting = true; | 653 | monc->hunting = true; |
@@ -613,8 +655,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
613 | monc->sub_sent = 0; | 655 | monc->sub_sent = 0; |
614 | 656 | ||
615 | INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); | 657 | INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); |
616 | monc->statfs_request_tree = RB_ROOT; | 658 | monc->generic_request_tree = RB_ROOT; |
617 | monc->num_statfs_requests = 0; | 659 | monc->num_generic_requests = 0; |
618 | monc->last_tid = 0; | 660 | monc->last_tid = 0; |
619 | 661 | ||
620 | monc->have_mdsmap = 0; | 662 | monc->have_mdsmap = 0; |
@@ -622,12 +664,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
622 | monc->want_next_osdmap = 1; | 664 | monc->want_next_osdmap = 1; |
623 | return 0; | 665 | return 0; |
624 | 666 | ||
625 | out_pool3: | 667 | out_auth_reply: |
626 | ceph_msgpool_destroy(&monc->msgpool_auth_reply); | 668 | ceph_msg_put(monc->m_auth_reply); |
627 | out_pool2: | 669 | out_subscribe: |
628 | ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); | 670 | ceph_msg_put(monc->m_subscribe); |
629 | out_pool1: | 671 | out_subscribe_ack: |
630 | ceph_msgpool_destroy(&monc->msgpool_statfs_reply); | 672 | ceph_msg_put(monc->m_subscribe_ack); |
631 | out_monmap: | 673 | out_monmap: |
632 | kfree(monc->monmap); | 674 | kfree(monc->monmap); |
633 | out: | 675 | out: |
@@ -651,9 +693,9 @@ void ceph_monc_stop(struct ceph_mon_client *monc) | |||
651 | ceph_auth_destroy(monc->auth); | 693 | ceph_auth_destroy(monc->auth); |
652 | 694 | ||
653 | ceph_msg_put(monc->m_auth); | 695 | ceph_msg_put(monc->m_auth); |
654 | ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); | 696 | ceph_msg_put(monc->m_auth_reply); |
655 | ceph_msgpool_destroy(&monc->msgpool_statfs_reply); | 697 | ceph_msg_put(monc->m_subscribe); |
656 | ceph_msgpool_destroy(&monc->msgpool_auth_reply); | 698 | ceph_msg_put(monc->m_subscribe_ack); |
657 | 699 | ||
658 | kfree(monc->monmap); | 700 | kfree(monc->monmap); |
659 | } | 701 | } |
@@ -662,8 +704,11 @@ static void handle_auth_reply(struct ceph_mon_client *monc, | |||
662 | struct ceph_msg *msg) | 704 | struct ceph_msg *msg) |
663 | { | 705 | { |
664 | int ret; | 706 | int ret; |
707 | int was_auth = 0; | ||
665 | 708 | ||
666 | mutex_lock(&monc->mutex); | 709 | mutex_lock(&monc->mutex); |
710 | if (monc->auth->ops) | ||
711 | was_auth = monc->auth->ops->is_authenticated(monc->auth); | ||
667 | monc->pending_auth = 0; | 712 | monc->pending_auth = 0; |
668 | ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, | 713 | ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, |
669 | msg->front.iov_len, | 714 | msg->front.iov_len, |
@@ -674,14 +719,14 @@ static void handle_auth_reply(struct ceph_mon_client *monc, | |||
674 | wake_up(&monc->client->auth_wq); | 719 | wake_up(&monc->client->auth_wq); |
675 | } else if (ret > 0) { | 720 | } else if (ret > 0) { |
676 | __send_prepared_auth_request(monc, ret); | 721 | __send_prepared_auth_request(monc, ret); |
677 | } else if (monc->auth->ops->is_authenticated(monc->auth)) { | 722 | } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { |
678 | dout("authenticated, starting session\n"); | 723 | dout("authenticated, starting session\n"); |
679 | 724 | ||
680 | monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; | 725 | monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; |
681 | monc->client->msgr->inst.name.num = monc->auth->global_id; | 726 | monc->client->msgr->inst.name.num = monc->auth->global_id; |
682 | 727 | ||
683 | __send_subscribe(monc); | 728 | __send_subscribe(monc); |
684 | __resend_statfs(monc); | 729 | __resend_generic_request(monc); |
685 | } | 730 | } |
686 | mutex_unlock(&monc->mutex); | 731 | mutex_unlock(&monc->mutex); |
687 | } | 732 | } |
@@ -770,18 +815,17 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, | |||
770 | 815 | ||
771 | switch (type) { | 816 | switch (type) { |
772 | case CEPH_MSG_MON_SUBSCRIBE_ACK: | 817 | case CEPH_MSG_MON_SUBSCRIBE_ACK: |
773 | m = ceph_msgpool_get(&monc->msgpool_subscribe_ack, front_len); | 818 | m = ceph_msg_get(monc->m_subscribe_ack); |
774 | break; | 819 | break; |
775 | case CEPH_MSG_STATFS_REPLY: | 820 | case CEPH_MSG_STATFS_REPLY: |
776 | m = ceph_msgpool_get(&monc->msgpool_statfs_reply, front_len); | 821 | return get_generic_reply(con, hdr, skip); |
777 | break; | ||
778 | case CEPH_MSG_AUTH_REPLY: | 822 | case CEPH_MSG_AUTH_REPLY: |
779 | m = ceph_msgpool_get(&monc->msgpool_auth_reply, front_len); | 823 | m = ceph_msg_get(monc->m_auth_reply); |
780 | break; | 824 | break; |
781 | case CEPH_MSG_MON_MAP: | 825 | case CEPH_MSG_MON_MAP: |
782 | case CEPH_MSG_MDS_MAP: | 826 | case CEPH_MSG_MDS_MAP: |
783 | case CEPH_MSG_OSD_MAP: | 827 | case CEPH_MSG_OSD_MAP: |
784 | m = ceph_msg_new(type, front_len, 0, 0, NULL); | 828 | m = ceph_msg_new(type, front_len, GFP_NOFS); |
785 | break; | 829 | break; |
786 | } | 830 | } |
787 | 831 | ||
@@ -826,7 +870,7 @@ out: | |||
826 | mutex_unlock(&monc->mutex); | 870 | mutex_unlock(&monc->mutex); |
827 | } | 871 | } |
828 | 872 | ||
829 | const static struct ceph_connection_operations mon_con_ops = { | 873 | static const struct ceph_connection_operations mon_con_ops = { |
830 | .get = ceph_con_get, | 874 | .get = ceph_con_get, |
831 | .put = ceph_con_put, | 875 | .put = ceph_con_put, |
832 | .dispatch = dispatch, | 876 | .dispatch = dispatch, |
diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index b958ad5afa06..174d794321d0 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h | |||
@@ -2,10 +2,10 @@ | |||
2 | #define _FS_CEPH_MON_CLIENT_H | 2 | #define _FS_CEPH_MON_CLIENT_H |
3 | 3 | ||
4 | #include <linux/completion.h> | 4 | #include <linux/completion.h> |
5 | #include <linux/kref.h> | ||
5 | #include <linux/rbtree.h> | 6 | #include <linux/rbtree.h> |
6 | 7 | ||
7 | #include "messenger.h" | 8 | #include "messenger.h" |
8 | #include "msgpool.h" | ||
9 | 9 | ||
10 | struct ceph_client; | 10 | struct ceph_client; |
11 | struct ceph_mount_args; | 11 | struct ceph_mount_args; |
@@ -22,7 +22,7 @@ struct ceph_monmap { | |||
22 | }; | 22 | }; |
23 | 23 | ||
24 | struct ceph_mon_client; | 24 | struct ceph_mon_client; |
25 | struct ceph_mon_statfs_request; | 25 | struct ceph_mon_generic_request; |
26 | 26 | ||
27 | 27 | ||
28 | /* | 28 | /* |
@@ -40,17 +40,19 @@ struct ceph_mon_request { | |||
40 | }; | 40 | }; |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * statfs() is done a bit differently because we need to get data back | 43 | * ceph_mon_generic_request is being used for the statfs and poolop requests |
44 | * which are bening done a bit differently because we need to get data back | ||
44 | * to the caller | 45 | * to the caller |
45 | */ | 46 | */ |
46 | struct ceph_mon_statfs_request { | 47 | struct ceph_mon_generic_request { |
48 | struct kref kref; | ||
47 | u64 tid; | 49 | u64 tid; |
48 | struct rb_node node; | 50 | struct rb_node node; |
49 | int result; | 51 | int result; |
50 | struct ceph_statfs *buf; | 52 | void *buf; |
51 | struct completion completion; | 53 | struct completion completion; |
52 | unsigned long last_attempt, delay; /* jiffies */ | ||
53 | struct ceph_msg *request; /* original request */ | 54 | struct ceph_msg *request; /* original request */ |
55 | struct ceph_msg *reply; /* and reply */ | ||
54 | }; | 56 | }; |
55 | 57 | ||
56 | struct ceph_mon_client { | 58 | struct ceph_mon_client { |
@@ -61,7 +63,7 @@ struct ceph_mon_client { | |||
61 | struct delayed_work delayed_work; | 63 | struct delayed_work delayed_work; |
62 | 64 | ||
63 | struct ceph_auth_client *auth; | 65 | struct ceph_auth_client *auth; |
64 | struct ceph_msg *m_auth; | 66 | struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack; |
65 | int pending_auth; | 67 | int pending_auth; |
66 | 68 | ||
67 | bool hunting; | 69 | bool hunting; |
@@ -70,14 +72,9 @@ struct ceph_mon_client { | |||
70 | struct ceph_connection *con; | 72 | struct ceph_connection *con; |
71 | bool have_fsid; | 73 | bool have_fsid; |
72 | 74 | ||
73 | /* msg pools */ | 75 | /* pending generic requests */ |
74 | struct ceph_msgpool msgpool_subscribe_ack; | 76 | struct rb_root generic_request_tree; |
75 | struct ceph_msgpool msgpool_statfs_reply; | 77 | int num_generic_requests; |
76 | struct ceph_msgpool msgpool_auth_reply; | ||
77 | |||
78 | /* pending statfs requests */ | ||
79 | struct rb_root statfs_request_tree; | ||
80 | int num_statfs_requests; | ||
81 | u64 last_tid; | 78 | u64 last_tid; |
82 | 79 | ||
83 | /* mds/osd map */ | 80 | /* mds/osd map */ |
diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index ca3b44a89f2d..dd65a6438131 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c | |||
@@ -7,180 +7,58 @@ | |||
7 | 7 | ||
8 | #include "msgpool.h" | 8 | #include "msgpool.h" |
9 | 9 | ||
10 | /* | 10 | static void *alloc_fn(gfp_t gfp_mask, void *arg) |
11 | * We use msg pools to preallocate memory for messages we expect to | 11 | { |
12 | * receive over the wire, to avoid getting ourselves into OOM | 12 | struct ceph_msgpool *pool = arg; |
13 | * conditions at unexpected times. We take use a few different | 13 | void *p; |
14 | * strategies: | ||
15 | * | ||
16 | * - for request/response type interactions, we preallocate the | ||
17 | * memory needed for the response when we generate the request. | ||
18 | * | ||
19 | * - for messages we can receive at any time from the MDS, we preallocate | ||
20 | * a pool of messages we can re-use. | ||
21 | * | ||
22 | * - for writeback, we preallocate some number of messages to use for | ||
23 | * requests and their replies, so that we always make forward | ||
24 | * progress. | ||
25 | * | ||
26 | * The msgpool behaves like a mempool_t, but keeps preallocated | ||
27 | * ceph_msgs strung together on a list_head instead of using a pointer | ||
28 | * vector. This avoids vector reallocation when we adjust the number | ||
29 | * of preallocated items (which happens frequently). | ||
30 | */ | ||
31 | 14 | ||
15 | p = ceph_msg_new(0, pool->front_len, gfp_mask); | ||
16 | if (!p) | ||
17 | pr_err("msgpool %s alloc failed\n", pool->name); | ||
18 | return p; | ||
19 | } | ||
32 | 20 | ||
33 | /* | 21 | static void free_fn(void *element, void *arg) |
34 | * Allocate or release as necessary to meet our target pool size. | ||
35 | */ | ||
36 | static int __fill_msgpool(struct ceph_msgpool *pool) | ||
37 | { | 22 | { |
38 | struct ceph_msg *msg; | 23 | ceph_msg_put(element); |
39 | |||
40 | while (pool->num < pool->min) { | ||
41 | dout("fill_msgpool %p %d/%d allocating\n", pool, pool->num, | ||
42 | pool->min); | ||
43 | spin_unlock(&pool->lock); | ||
44 | msg = ceph_msg_new(0, pool->front_len, 0, 0, NULL); | ||
45 | spin_lock(&pool->lock); | ||
46 | if (IS_ERR(msg)) | ||
47 | return PTR_ERR(msg); | ||
48 | msg->pool = pool; | ||
49 | list_add(&msg->list_head, &pool->msgs); | ||
50 | pool->num++; | ||
51 | } | ||
52 | while (pool->num > pool->min) { | ||
53 | msg = list_first_entry(&pool->msgs, struct ceph_msg, list_head); | ||
54 | dout("fill_msgpool %p %d/%d releasing %p\n", pool, pool->num, | ||
55 | pool->min, msg); | ||
56 | list_del_init(&msg->list_head); | ||
57 | pool->num--; | ||
58 | ceph_msg_kfree(msg); | ||
59 | } | ||
60 | return 0; | ||
61 | } | 24 | } |
62 | 25 | ||
63 | int ceph_msgpool_init(struct ceph_msgpool *pool, | 26 | int ceph_msgpool_init(struct ceph_msgpool *pool, |
64 | int front_len, int min, bool blocking) | 27 | int front_len, int size, bool blocking, const char *name) |
65 | { | 28 | { |
66 | int ret; | ||
67 | |||
68 | dout("msgpool_init %p front_len %d min %d\n", pool, front_len, min); | ||
69 | spin_lock_init(&pool->lock); | ||
70 | pool->front_len = front_len; | 29 | pool->front_len = front_len; |
71 | INIT_LIST_HEAD(&pool->msgs); | 30 | pool->pool = mempool_create(size, alloc_fn, free_fn, pool); |
72 | pool->num = 0; | 31 | if (!pool->pool) |
73 | pool->min = min; | 32 | return -ENOMEM; |
74 | pool->blocking = blocking; | 33 | pool->name = name; |
75 | init_waitqueue_head(&pool->wait); | 34 | return 0; |
76 | |||
77 | spin_lock(&pool->lock); | ||
78 | ret = __fill_msgpool(pool); | ||
79 | spin_unlock(&pool->lock); | ||
80 | return ret; | ||
81 | } | 35 | } |
82 | 36 | ||
83 | void ceph_msgpool_destroy(struct ceph_msgpool *pool) | 37 | void ceph_msgpool_destroy(struct ceph_msgpool *pool) |
84 | { | 38 | { |
85 | dout("msgpool_destroy %p\n", pool); | 39 | mempool_destroy(pool->pool); |
86 | spin_lock(&pool->lock); | ||
87 | pool->min = 0; | ||
88 | __fill_msgpool(pool); | ||
89 | spin_unlock(&pool->lock); | ||
90 | } | 40 | } |
91 | 41 | ||
92 | int ceph_msgpool_resv(struct ceph_msgpool *pool, int delta) | 42 | struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, |
43 | int front_len) | ||
93 | { | 44 | { |
94 | int ret; | 45 | if (front_len > pool->front_len) { |
95 | 46 | pr_err("msgpool_get pool %s need front %d, pool size is %d\n", | |
96 | spin_lock(&pool->lock); | 47 | pool->name, front_len, pool->front_len); |
97 | dout("msgpool_resv %p delta %d\n", pool, delta); | ||
98 | pool->min += delta; | ||
99 | ret = __fill_msgpool(pool); | ||
100 | spin_unlock(&pool->lock); | ||
101 | return ret; | ||
102 | } | ||
103 | |||
104 | struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) | ||
105 | { | ||
106 | wait_queue_t wait; | ||
107 | struct ceph_msg *msg; | ||
108 | |||
109 | if (front_len && front_len > pool->front_len) { | ||
110 | pr_err("msgpool_get pool %p need front %d, pool size is %d\n", | ||
111 | pool, front_len, pool->front_len); | ||
112 | WARN_ON(1); | 48 | WARN_ON(1); |
113 | 49 | ||
114 | /* try to alloc a fresh message */ | 50 | /* try to alloc a fresh message */ |
115 | msg = ceph_msg_new(0, front_len, 0, 0, NULL); | 51 | return ceph_msg_new(0, front_len, GFP_NOFS); |
116 | if (!IS_ERR(msg)) | ||
117 | return msg; | ||
118 | } | ||
119 | |||
120 | if (!front_len) | ||
121 | front_len = pool->front_len; | ||
122 | |||
123 | if (pool->blocking) { | ||
124 | /* mempool_t behavior; first try to alloc */ | ||
125 | msg = ceph_msg_new(0, front_len, 0, 0, NULL); | ||
126 | if (!IS_ERR(msg)) | ||
127 | return msg; | ||
128 | } | 52 | } |
129 | 53 | ||
130 | while (1) { | 54 | return mempool_alloc(pool->pool, GFP_NOFS); |
131 | spin_lock(&pool->lock); | ||
132 | if (likely(pool->num)) { | ||
133 | msg = list_entry(pool->msgs.next, struct ceph_msg, | ||
134 | list_head); | ||
135 | list_del_init(&msg->list_head); | ||
136 | pool->num--; | ||
137 | dout("msgpool_get %p got %p, now %d/%d\n", pool, msg, | ||
138 | pool->num, pool->min); | ||
139 | spin_unlock(&pool->lock); | ||
140 | return msg; | ||
141 | } | ||
142 | pr_err("msgpool_get %p now %d/%d, %s\n", pool, pool->num, | ||
143 | pool->min, pool->blocking ? "waiting" : "may fail"); | ||
144 | spin_unlock(&pool->lock); | ||
145 | |||
146 | if (!pool->blocking) { | ||
147 | WARN_ON(1); | ||
148 | |||
149 | /* maybe we can allocate it now? */ | ||
150 | msg = ceph_msg_new(0, front_len, 0, 0, NULL); | ||
151 | if (!IS_ERR(msg)) | ||
152 | return msg; | ||
153 | |||
154 | pr_err("msgpool_get %p empty + alloc failed\n", pool); | ||
155 | return ERR_PTR(-ENOMEM); | ||
156 | } | ||
157 | |||
158 | init_wait(&wait); | ||
159 | prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); | ||
160 | schedule(); | ||
161 | finish_wait(&pool->wait, &wait); | ||
162 | } | ||
163 | } | 55 | } |
164 | 56 | ||
165 | void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) | 57 | void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) |
166 | { | 58 | { |
167 | spin_lock(&pool->lock); | 59 | /* reset msg front_len; user may have changed it */ |
168 | if (pool->num < pool->min) { | 60 | msg->front.iov_len = pool->front_len; |
169 | /* reset msg front_len; user may have changed it */ | 61 | msg->hdr.front_len = cpu_to_le32(pool->front_len); |
170 | msg->front.iov_len = pool->front_len; | ||
171 | msg->hdr.front_len = cpu_to_le32(pool->front_len); | ||
172 | 62 | ||
173 | kref_set(&msg->kref, 1); /* retake a single ref */ | 63 | kref_init(&msg->kref); /* retake single ref */ |
174 | list_add(&msg->list_head, &pool->msgs); | ||
175 | pool->num++; | ||
176 | dout("msgpool_put %p reclaim %p, now %d/%d\n", pool, msg, | ||
177 | pool->num, pool->min); | ||
178 | spin_unlock(&pool->lock); | ||
179 | wake_up(&pool->wait); | ||
180 | } else { | ||
181 | dout("msgpool_put %p drop %p, at %d/%d\n", pool, msg, | ||
182 | pool->num, pool->min); | ||
183 | spin_unlock(&pool->lock); | ||
184 | ceph_msg_kfree(msg); | ||
185 | } | ||
186 | } | 64 | } |
diff --git a/fs/ceph/msgpool.h b/fs/ceph/msgpool.h index bc834bfcd720..a362605f9368 100644 --- a/fs/ceph/msgpool.h +++ b/fs/ceph/msgpool.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef _FS_CEPH_MSGPOOL | 1 | #ifndef _FS_CEPH_MSGPOOL |
2 | #define _FS_CEPH_MSGPOOL | 2 | #define _FS_CEPH_MSGPOOL |
3 | 3 | ||
4 | #include <linux/mempool.h> | ||
4 | #include "messenger.h" | 5 | #include "messenger.h" |
5 | 6 | ||
6 | /* | 7 | /* |
@@ -8,18 +9,15 @@ | |||
8 | * avoid unexpected OOM conditions. | 9 | * avoid unexpected OOM conditions. |
9 | */ | 10 | */ |
10 | struct ceph_msgpool { | 11 | struct ceph_msgpool { |
11 | spinlock_t lock; | 12 | const char *name; |
13 | mempool_t *pool; | ||
12 | int front_len; /* preallocated payload size */ | 14 | int front_len; /* preallocated payload size */ |
13 | struct list_head msgs; /* msgs in the pool; each has 1 ref */ | ||
14 | int num, min; /* cur, min # msgs in the pool */ | ||
15 | bool blocking; | ||
16 | wait_queue_head_t wait; | ||
17 | }; | 15 | }; |
18 | 16 | ||
19 | extern int ceph_msgpool_init(struct ceph_msgpool *pool, | 17 | extern int ceph_msgpool_init(struct ceph_msgpool *pool, |
20 | int front_len, int size, bool blocking); | 18 | int front_len, int size, bool blocking, |
19 | const char *name); | ||
21 | extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); | 20 | extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); |
22 | extern int ceph_msgpool_resv(struct ceph_msgpool *, int delta); | ||
23 | extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, | 21 | extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, |
24 | int front_len); | 22 | int front_len); |
25 | extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); | 23 | extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); |
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h index 8aaab414f3f8..892a0298dfdf 100644 --- a/fs/ceph/msgr.h +++ b/fs/ceph/msgr.h | |||
@@ -50,7 +50,6 @@ struct ceph_entity_name { | |||
50 | #define CEPH_ENTITY_TYPE_MDS 0x02 | 50 | #define CEPH_ENTITY_TYPE_MDS 0x02 |
51 | #define CEPH_ENTITY_TYPE_OSD 0x04 | 51 | #define CEPH_ENTITY_TYPE_OSD 0x04 |
52 | #define CEPH_ENTITY_TYPE_CLIENT 0x08 | 52 | #define CEPH_ENTITY_TYPE_CLIENT 0x08 |
53 | #define CEPH_ENTITY_TYPE_ADMIN 0x10 | ||
54 | #define CEPH_ENTITY_TYPE_AUTH 0x20 | 53 | #define CEPH_ENTITY_TYPE_AUTH 0x20 |
55 | 54 | ||
56 | #define CEPH_ENTITY_TYPE_ANY 0xFF | 55 | #define CEPH_ENTITY_TYPE_ANY 0xFF |
@@ -120,7 +119,7 @@ struct ceph_msg_connect_reply { | |||
120 | /* | 119 | /* |
121 | * message header | 120 | * message header |
122 | */ | 121 | */ |
123 | struct ceph_msg_header { | 122 | struct ceph_msg_header_old { |
124 | __le64 seq; /* message seq# for this session */ | 123 | __le64 seq; /* message seq# for this session */ |
125 | __le64 tid; /* transaction id */ | 124 | __le64 tid; /* transaction id */ |
126 | __le16 type; /* message type */ | 125 | __le16 type; /* message type */ |
@@ -138,6 +137,24 @@ struct ceph_msg_header { | |||
138 | __le32 crc; /* header crc32c */ | 137 | __le32 crc; /* header crc32c */ |
139 | } __attribute__ ((packed)); | 138 | } __attribute__ ((packed)); |
140 | 139 | ||
140 | struct ceph_msg_header { | ||
141 | __le64 seq; /* message seq# for this session */ | ||
142 | __le64 tid; /* transaction id */ | ||
143 | __le16 type; /* message type */ | ||
144 | __le16 priority; /* priority. higher value == higher priority */ | ||
145 | __le16 version; /* version of message encoding */ | ||
146 | |||
147 | __le32 front_len; /* bytes in main payload */ | ||
148 | __le32 middle_len;/* bytes in middle payload */ | ||
149 | __le32 data_len; /* bytes of data payload */ | ||
150 | __le16 data_off; /* sender: include full offset; | ||
151 | receiver: mask against ~PAGE_MASK */ | ||
152 | |||
153 | struct ceph_entity_name src; | ||
154 | __le32 reserved; | ||
155 | __le32 crc; /* header crc32c */ | ||
156 | } __attribute__ ((packed)); | ||
157 | |||
141 | #define CEPH_MSG_PRIO_LOW 64 | 158 | #define CEPH_MSG_PRIO_LOW 64 |
142 | #define CEPH_MSG_PRIO_DEFAULT 127 | 159 | #define CEPH_MSG_PRIO_DEFAULT 127 |
143 | #define CEPH_MSG_PRIO_HIGH 196 | 160 | #define CEPH_MSG_PRIO_HIGH 196 |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 3514f71ff85f..d25b4add85b4 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #define OSD_OP_FRONT_LEN 4096 | 16 | #define OSD_OP_FRONT_LEN 4096 |
17 | #define OSD_OPREPLY_FRONT_LEN 512 | 17 | #define OSD_OPREPLY_FRONT_LEN 512 |
18 | 18 | ||
19 | const static struct ceph_connection_operations osd_con_ops; | 19 | static const struct ceph_connection_operations osd_con_ops; |
20 | static int __kick_requests(struct ceph_osd_client *osdc, | 20 | static int __kick_requests(struct ceph_osd_client *osdc, |
21 | struct ceph_osd *kickosd); | 21 | struct ceph_osd *kickosd); |
22 | 22 | ||
@@ -147,7 +147,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
147 | req = kzalloc(sizeof(*req), GFP_NOFS); | 147 | req = kzalloc(sizeof(*req), GFP_NOFS); |
148 | } | 148 | } |
149 | if (req == NULL) | 149 | if (req == NULL) |
150 | return ERR_PTR(-ENOMEM); | 150 | return NULL; |
151 | 151 | ||
152 | req->r_osdc = osdc; | 152 | req->r_osdc = osdc; |
153 | req->r_mempool = use_mempool; | 153 | req->r_mempool = use_mempool; |
@@ -164,10 +164,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
164 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); | 164 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); |
165 | else | 165 | else |
166 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, | 166 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, |
167 | OSD_OPREPLY_FRONT_LEN, 0, 0, NULL); | 167 | OSD_OPREPLY_FRONT_LEN, GFP_NOFS); |
168 | if (IS_ERR(msg)) { | 168 | if (!msg) { |
169 | ceph_osdc_put_request(req); | 169 | ceph_osdc_put_request(req); |
170 | return ERR_PTR(PTR_ERR(msg)); | 170 | return NULL; |
171 | } | 171 | } |
172 | req->r_reply = msg; | 172 | req->r_reply = msg; |
173 | 173 | ||
@@ -178,10 +178,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
178 | if (use_mempool) | 178 | if (use_mempool) |
179 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); | 179 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); |
180 | else | 180 | else |
181 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, 0, 0, NULL); | 181 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS); |
182 | if (IS_ERR(msg)) { | 182 | if (!msg) { |
183 | ceph_osdc_put_request(req); | 183 | ceph_osdc_put_request(req); |
184 | return ERR_PTR(PTR_ERR(msg)); | 184 | return NULL; |
185 | } | 185 | } |
186 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); | 186 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); |
187 | memset(msg->front.iov_base, 0, msg->front.iov_len); | 187 | memset(msg->front.iov_base, 0, msg->front.iov_len); |
@@ -361,8 +361,13 @@ static void put_osd(struct ceph_osd *osd) | |||
361 | { | 361 | { |
362 | dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), | 362 | dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), |
363 | atomic_read(&osd->o_ref) - 1); | 363 | atomic_read(&osd->o_ref) - 1); |
364 | if (atomic_dec_and_test(&osd->o_ref)) | 364 | if (atomic_dec_and_test(&osd->o_ref)) { |
365 | struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; | ||
366 | |||
367 | if (osd->o_authorizer) | ||
368 | ac->ops->destroy_authorizer(ac, osd->o_authorizer); | ||
365 | kfree(osd); | 369 | kfree(osd); |
370 | } | ||
366 | } | 371 | } |
367 | 372 | ||
368 | /* | 373 | /* |
@@ -715,7 +720,7 @@ static void handle_timeout(struct work_struct *work) | |||
715 | * should mark the osd as failed and we should find out about | 720 | * should mark the osd as failed and we should find out about |
716 | * it from an updated osd map. | 721 | * it from an updated osd map. |
717 | */ | 722 | */ |
718 | while (!list_empty(&osdc->req_lru)) { | 723 | while (timeout && !list_empty(&osdc->req_lru)) { |
719 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, | 724 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, |
720 | r_req_lru_item); | 725 | r_req_lru_item); |
721 | 726 | ||
@@ -1078,6 +1083,7 @@ done: | |||
1078 | if (newmap) | 1083 | if (newmap) |
1079 | kick_requests(osdc, NULL); | 1084 | kick_requests(osdc, NULL); |
1080 | up_read(&osdc->map_sem); | 1085 | up_read(&osdc->map_sem); |
1086 | wake_up(&osdc->client->auth_wq); | ||
1081 | return; | 1087 | return; |
1082 | 1088 | ||
1083 | bad: | 1089 | bad: |
@@ -1087,45 +1093,6 @@ bad: | |||
1087 | return; | 1093 | return; |
1088 | } | 1094 | } |
1089 | 1095 | ||
1090 | |||
1091 | /* | ||
1092 | * A read request prepares specific pages that data is to be read into. | ||
1093 | * When a message is being read off the wire, we call prepare_pages to | ||
1094 | * find those pages. | ||
1095 | * 0 = success, -1 failure. | ||
1096 | */ | ||
1097 | static int __prepare_pages(struct ceph_connection *con, | ||
1098 | struct ceph_msg_header *hdr, | ||
1099 | struct ceph_osd_request *req, | ||
1100 | u64 tid, | ||
1101 | struct ceph_msg *m) | ||
1102 | { | ||
1103 | struct ceph_osd *osd = con->private; | ||
1104 | struct ceph_osd_client *osdc; | ||
1105 | int ret = -1; | ||
1106 | int data_len = le32_to_cpu(hdr->data_len); | ||
1107 | unsigned data_off = le16_to_cpu(hdr->data_off); | ||
1108 | |||
1109 | int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); | ||
1110 | |||
1111 | if (!osd) | ||
1112 | return -1; | ||
1113 | |||
1114 | osdc = osd->o_osdc; | ||
1115 | |||
1116 | dout("__prepare_pages on msg %p tid %llu, has %d pages, want %d\n", m, | ||
1117 | tid, req->r_num_pages, want); | ||
1118 | if (unlikely(req->r_num_pages < want)) | ||
1119 | goto out; | ||
1120 | m->pages = req->r_pages; | ||
1121 | m->nr_pages = req->r_num_pages; | ||
1122 | ret = 0; /* success */ | ||
1123 | out: | ||
1124 | BUG_ON(ret < 0 || m->nr_pages < want); | ||
1125 | |||
1126 | return ret; | ||
1127 | } | ||
1128 | |||
1129 | /* | 1096 | /* |
1130 | * Register request, send initial attempt. | 1097 | * Register request, send initial attempt. |
1131 | */ | 1098 | */ |
@@ -1252,11 +1219,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | |||
1252 | if (!osdc->req_mempool) | 1219 | if (!osdc->req_mempool) |
1253 | goto out; | 1220 | goto out; |
1254 | 1221 | ||
1255 | err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true); | 1222 | err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true, |
1223 | "osd_op"); | ||
1256 | if (err < 0) | 1224 | if (err < 0) |
1257 | goto out_mempool; | 1225 | goto out_mempool; |
1258 | err = ceph_msgpool_init(&osdc->msgpool_op_reply, | 1226 | err = ceph_msgpool_init(&osdc->msgpool_op_reply, |
1259 | OSD_OPREPLY_FRONT_LEN, 10, true); | 1227 | OSD_OPREPLY_FRONT_LEN, 10, true, |
1228 | "osd_op_reply"); | ||
1260 | if (err < 0) | 1229 | if (err < 0) |
1261 | goto out_msgpool; | 1230 | goto out_msgpool; |
1262 | return 0; | 1231 | return 0; |
@@ -1302,8 +1271,8 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
1302 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, | 1271 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, |
1303 | NULL, 0, truncate_seq, truncate_size, NULL, | 1272 | NULL, 0, truncate_seq, truncate_size, NULL, |
1304 | false, 1); | 1273 | false, 1); |
1305 | if (IS_ERR(req)) | 1274 | if (!req) |
1306 | return PTR_ERR(req); | 1275 | return -ENOMEM; |
1307 | 1276 | ||
1308 | /* it may be a short read due to an object boundary */ | 1277 | /* it may be a short read due to an object boundary */ |
1309 | req->r_pages = pages; | 1278 | req->r_pages = pages; |
@@ -1345,8 +1314,8 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
1345 | snapc, do_sync, | 1314 | snapc, do_sync, |
1346 | truncate_seq, truncate_size, mtime, | 1315 | truncate_seq, truncate_size, mtime, |
1347 | nofail, 1); | 1316 | nofail, 1); |
1348 | if (IS_ERR(req)) | 1317 | if (!req) |
1349 | return PTR_ERR(req); | 1318 | return -ENOMEM; |
1350 | 1319 | ||
1351 | /* it may be a short write due to an object boundary */ | 1320 | /* it may be a short write due to an object boundary */ |
1352 | req->r_pages = pages; | 1321 | req->r_pages = pages; |
@@ -1394,7 +1363,8 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
1394 | } | 1363 | } |
1395 | 1364 | ||
1396 | /* | 1365 | /* |
1397 | * lookup and return message for incoming reply | 1366 | * lookup and return message for incoming reply. set up reply message |
1367 | * pages. | ||
1398 | */ | 1368 | */ |
1399 | static struct ceph_msg *get_reply(struct ceph_connection *con, | 1369 | static struct ceph_msg *get_reply(struct ceph_connection *con, |
1400 | struct ceph_msg_header *hdr, | 1370 | struct ceph_msg_header *hdr, |
@@ -1407,7 +1377,6 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
1407 | int front = le32_to_cpu(hdr->front_len); | 1377 | int front = le32_to_cpu(hdr->front_len); |
1408 | int data_len = le32_to_cpu(hdr->data_len); | 1378 | int data_len = le32_to_cpu(hdr->data_len); |
1409 | u64 tid; | 1379 | u64 tid; |
1410 | int err; | ||
1411 | 1380 | ||
1412 | tid = le64_to_cpu(hdr->tid); | 1381 | tid = le64_to_cpu(hdr->tid); |
1413 | mutex_lock(&osdc->request_mutex); | 1382 | mutex_lock(&osdc->request_mutex); |
@@ -1425,13 +1394,14 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
1425 | req->r_reply, req->r_con_filling_msg); | 1394 | req->r_reply, req->r_con_filling_msg); |
1426 | ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); | 1395 | ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); |
1427 | ceph_con_put(req->r_con_filling_msg); | 1396 | ceph_con_put(req->r_con_filling_msg); |
1397 | req->r_con_filling_msg = NULL; | ||
1428 | } | 1398 | } |
1429 | 1399 | ||
1430 | if (front > req->r_reply->front.iov_len) { | 1400 | if (front > req->r_reply->front.iov_len) { |
1431 | pr_warning("get_reply front %d > preallocated %d\n", | 1401 | pr_warning("get_reply front %d > preallocated %d\n", |
1432 | front, (int)req->r_reply->front.iov_len); | 1402 | front, (int)req->r_reply->front.iov_len); |
1433 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, 0, 0, NULL); | 1403 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS); |
1434 | if (IS_ERR(m)) | 1404 | if (!m) |
1435 | goto out; | 1405 | goto out; |
1436 | ceph_msg_put(req->r_reply); | 1406 | ceph_msg_put(req->r_reply); |
1437 | req->r_reply = m; | 1407 | req->r_reply = m; |
@@ -1439,12 +1409,19 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
1439 | m = ceph_msg_get(req->r_reply); | 1409 | m = ceph_msg_get(req->r_reply); |
1440 | 1410 | ||
1441 | if (data_len > 0) { | 1411 | if (data_len > 0) { |
1442 | err = __prepare_pages(con, hdr, req, tid, m); | 1412 | unsigned data_off = le16_to_cpu(hdr->data_off); |
1443 | if (err < 0) { | 1413 | int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); |
1414 | |||
1415 | if (unlikely(req->r_num_pages < want)) { | ||
1416 | pr_warning("tid %lld reply %d > expected %d pages\n", | ||
1417 | tid, want, m->nr_pages); | ||
1444 | *skip = 1; | 1418 | *skip = 1; |
1445 | ceph_msg_put(m); | 1419 | ceph_msg_put(m); |
1446 | m = ERR_PTR(err); | 1420 | m = NULL; |
1421 | goto out; | ||
1447 | } | 1422 | } |
1423 | m->pages = req->r_pages; | ||
1424 | m->nr_pages = req->r_num_pages; | ||
1448 | } | 1425 | } |
1449 | *skip = 0; | 1426 | *skip = 0; |
1450 | req->r_con_filling_msg = ceph_con_get(con); | 1427 | req->r_con_filling_msg = ceph_con_get(con); |
@@ -1466,7 +1443,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, | |||
1466 | 1443 | ||
1467 | switch (type) { | 1444 | switch (type) { |
1468 | case CEPH_MSG_OSD_MAP: | 1445 | case CEPH_MSG_OSD_MAP: |
1469 | return ceph_msg_new(type, front, 0, 0, NULL); | 1446 | return ceph_msg_new(type, front, GFP_NOFS); |
1470 | case CEPH_MSG_OSD_OPREPLY: | 1447 | case CEPH_MSG_OSD_OPREPLY: |
1471 | return get_reply(con, hdr, skip); | 1448 | return get_reply(con, hdr, skip); |
1472 | default: | 1449 | default: |
@@ -1552,7 +1529,7 @@ static int invalidate_authorizer(struct ceph_connection *con) | |||
1552 | return ceph_monc_validate_auth(&osdc->client->monc); | 1529 | return ceph_monc_validate_auth(&osdc->client->monc); |
1553 | } | 1530 | } |
1554 | 1531 | ||
1555 | const static struct ceph_connection_operations osd_con_ops = { | 1532 | static const struct ceph_connection_operations osd_con_ops = { |
1556 | .get = get_osd_con, | 1533 | .get = get_osd_con, |
1557 | .put = put_osd_con, | 1534 | .put = put_osd_con, |
1558 | .dispatch = dispatch, | 1535 | .dispatch = dispatch, |
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index cfdd8f4388b7..ddc656fb5c05 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
@@ -706,7 +706,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
706 | len, *p, end); | 706 | len, *p, end); |
707 | newcrush = crush_decode(*p, min(*p+len, end)); | 707 | newcrush = crush_decode(*p, min(*p+len, end)); |
708 | if (IS_ERR(newcrush)) | 708 | if (IS_ERR(newcrush)) |
709 | return ERR_PTR(PTR_ERR(newcrush)); | 709 | return ERR_CAST(newcrush); |
710 | } | 710 | } |
711 | 711 | ||
712 | /* new flags? */ | 712 | /* new flags? */ |
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c index 5f8dbf7c745a..b6859f47d364 100644 --- a/fs/ceph/pagelist.c +++ b/fs/ceph/pagelist.c | |||
@@ -20,7 +20,7 @@ int ceph_pagelist_release(struct ceph_pagelist *pl) | |||
20 | 20 | ||
21 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | 21 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) |
22 | { | 22 | { |
23 | struct page *page = alloc_page(GFP_NOFS); | 23 | struct page *page = __page_cache_alloc(GFP_NOFS); |
24 | if (!page) | 24 | if (!page) |
25 | return -ENOMEM; | 25 | return -ENOMEM; |
26 | pl->room += PAGE_SIZE; | 26 | pl->room += PAGE_SIZE; |
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index fd56451a871f..8fcc023056c7 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
@@ -101,8 +101,8 @@ struct ceph_pg_pool { | |||
101 | __le64 snap_seq; /* seq for per-pool snapshot */ | 101 | __le64 snap_seq; /* seq for per-pool snapshot */ |
102 | __le32 snap_epoch; /* epoch of last snap */ | 102 | __le32 snap_epoch; /* epoch of last snap */ |
103 | __le32 num_snaps; | 103 | __le32 num_snaps; |
104 | __le32 num_removed_snap_intervals; | 104 | __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ |
105 | __le64 uid; | 105 | __le64 auid; /* who owns the pg */ |
106 | } __attribute__ ((packed)); | 106 | } __attribute__ ((packed)); |
107 | 107 | ||
108 | /* | 108 | /* |
@@ -208,6 +208,7 @@ enum { | |||
208 | /* read */ | 208 | /* read */ |
209 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, | 209 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, |
210 | CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, | 210 | CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, |
211 | CEPH_OSD_OP_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3, | ||
211 | 212 | ||
212 | /* write */ | 213 | /* write */ |
213 | CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, | 214 | CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, |
@@ -305,6 +306,22 @@ enum { | |||
305 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ | 306 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ |
306 | #define EBLACKLISTED ESHUTDOWN /* blacklisted */ | 307 | #define EBLACKLISTED ESHUTDOWN /* blacklisted */ |
307 | 308 | ||
309 | /* xattr comparison */ | ||
310 | enum { | ||
311 | CEPH_OSD_CMPXATTR_OP_NOP = 0, | ||
312 | CEPH_OSD_CMPXATTR_OP_EQ = 1, | ||
313 | CEPH_OSD_CMPXATTR_OP_NE = 2, | ||
314 | CEPH_OSD_CMPXATTR_OP_GT = 3, | ||
315 | CEPH_OSD_CMPXATTR_OP_GTE = 4, | ||
316 | CEPH_OSD_CMPXATTR_OP_LT = 5, | ||
317 | CEPH_OSD_CMPXATTR_OP_LTE = 6 | ||
318 | }; | ||
319 | |||
320 | enum { | ||
321 | CEPH_OSD_CMPXATTR_MODE_STRING = 1, | ||
322 | CEPH_OSD_CMPXATTR_MODE_U64 = 2 | ||
323 | }; | ||
324 | |||
308 | /* | 325 | /* |
309 | * an individual object operation. each may be accompanied by some data | 326 | * an individual object operation. each may be accompanied by some data |
310 | * payload | 327 | * payload |
@@ -321,6 +338,8 @@ struct ceph_osd_op { | |||
321 | struct { | 338 | struct { |
322 | __le32 name_len; | 339 | __le32 name_len; |
323 | __le32 value_len; | 340 | __le32 value_len; |
341 | __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ | ||
342 | __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ | ||
324 | } __attribute__ ((packed)) xattr; | 343 | } __attribute__ ((packed)) xattr; |
325 | struct { | 344 | struct { |
326 | __u8 class_len; | 345 | __u8 class_len; |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index d5114db70453..c0b26b6badba 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -512,7 +512,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||
512 | struct ceph_cap_snap *capsnap) | 512 | struct ceph_cap_snap *capsnap) |
513 | { | 513 | { |
514 | struct inode *inode = &ci->vfs_inode; | 514 | struct inode *inode = &ci->vfs_inode; |
515 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 515 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
516 | 516 | ||
517 | BUG_ON(capsnap->writing); | 517 | BUG_ON(capsnap->writing); |
518 | capsnap->size = inode->i_size; | 518 | capsnap->size = inode->i_size; |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9307bbee6fbe..4e0bee240b9d 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -8,14 +8,11 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/mount.h> | 9 | #include <linux/mount.h> |
10 | #include <linux/parser.h> | 10 | #include <linux/parser.h> |
11 | #include <linux/rwsem.h> | ||
12 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
13 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
14 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
15 | #include <linux/statfs.h> | 14 | #include <linux/statfs.h> |
16 | #include <linux/string.h> | 15 | #include <linux/string.h> |
17 | #include <linux/version.h> | ||
18 | #include <linux/vmalloc.h> | ||
19 | 16 | ||
20 | #include "decode.h" | 17 | #include "decode.h" |
21 | #include "super.h" | 18 | #include "super.h" |
@@ -107,12 +104,40 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
107 | static int ceph_syncfs(struct super_block *sb, int wait) | 104 | static int ceph_syncfs(struct super_block *sb, int wait) |
108 | { | 105 | { |
109 | dout("sync_fs %d\n", wait); | 106 | dout("sync_fs %d\n", wait); |
110 | ceph_osdc_sync(&ceph_client(sb)->osdc); | 107 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); |
111 | ceph_mdsc_sync(&ceph_client(sb)->mdsc); | 108 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); |
112 | dout("sync_fs %d done\n", wait); | 109 | dout("sync_fs %d done\n", wait); |
113 | return 0; | 110 | return 0; |
114 | } | 111 | } |
115 | 112 | ||
113 | static int default_congestion_kb(void) | ||
114 | { | ||
115 | int congestion_kb; | ||
116 | |||
117 | /* | ||
118 | * Copied from NFS | ||
119 | * | ||
120 | * congestion size, scale with available memory. | ||
121 | * | ||
122 | * 64MB: 8192k | ||
123 | * 128MB: 11585k | ||
124 | * 256MB: 16384k | ||
125 | * 512MB: 23170k | ||
126 | * 1GB: 32768k | ||
127 | * 2GB: 46340k | ||
128 | * 4GB: 65536k | ||
129 | * 8GB: 92681k | ||
130 | * 16GB: 131072k | ||
131 | * | ||
132 | * This allows larger machines to have larger/more transfers. | ||
133 | * Limit the default to 256M | ||
134 | */ | ||
135 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
136 | if (congestion_kb > 256*1024) | ||
137 | congestion_kb = 256*1024; | ||
138 | |||
139 | return congestion_kb; | ||
140 | } | ||
116 | 141 | ||
117 | /** | 142 | /** |
118 | * ceph_show_options - Show mount options in /proc/mounts | 143 | * ceph_show_options - Show mount options in /proc/mounts |
@@ -138,6 +163,35 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
138 | seq_puts(m, ",nocrc"); | 163 | seq_puts(m, ",nocrc"); |
139 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) | 164 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) |
140 | seq_puts(m, ",noasyncreaddir"); | 165 | seq_puts(m, ",noasyncreaddir"); |
166 | |||
167 | if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
168 | seq_printf(m, ",mount_timeout=%d", args->mount_timeout); | ||
169 | if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
170 | seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl); | ||
171 | if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
172 | seq_printf(m, ",osdtimeout=%d", args->osd_timeout); | ||
173 | if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
174 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
175 | args->osd_keepalive_timeout); | ||
176 | if (args->wsize) | ||
177 | seq_printf(m, ",wsize=%d", args->wsize); | ||
178 | if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
179 | seq_printf(m, ",rsize=%d", args->rsize); | ||
180 | if (args->congestion_kb != default_congestion_kb()) | ||
181 | seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb); | ||
182 | if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
183 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
184 | args->caps_wanted_delay_min); | ||
185 | if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
186 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
187 | args->caps_wanted_delay_max); | ||
188 | if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
189 | seq_printf(m, ",cap_release_safety=%d", | ||
190 | args->cap_release_safety); | ||
191 | if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
192 | seq_printf(m, ",readdir_max_entries=%d", args->max_readdir); | ||
193 | if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
194 | seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes); | ||
141 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | 195 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) |
142 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); | 196 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); |
143 | if (args->name) | 197 | if (args->name) |
@@ -161,35 +215,6 @@ static void ceph_inode_init_once(void *foo) | |||
161 | inode_init_once(&ci->vfs_inode); | 215 | inode_init_once(&ci->vfs_inode); |
162 | } | 216 | } |
163 | 217 | ||
164 | static int default_congestion_kb(void) | ||
165 | { | ||
166 | int congestion_kb; | ||
167 | |||
168 | /* | ||
169 | * Copied from NFS | ||
170 | * | ||
171 | * congestion size, scale with available memory. | ||
172 | * | ||
173 | * 64MB: 8192k | ||
174 | * 128MB: 11585k | ||
175 | * 256MB: 16384k | ||
176 | * 512MB: 23170k | ||
177 | * 1GB: 32768k | ||
178 | * 2GB: 46340k | ||
179 | * 4GB: 65536k | ||
180 | * 8GB: 92681k | ||
181 | * 16GB: 131072k | ||
182 | * | ||
183 | * This allows larger machines to have larger/more transfers. | ||
184 | * Limit the default to 256M | ||
185 | */ | ||
186 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
187 | if (congestion_kb > 256*1024) | ||
188 | congestion_kb = 256*1024; | ||
189 | |||
190 | return congestion_kb; | ||
191 | } | ||
192 | |||
193 | static int __init init_caches(void) | 218 | static int __init init_caches(void) |
194 | { | 219 | { |
195 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", | 220 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", |
@@ -308,7 +333,9 @@ enum { | |||
308 | Opt_osd_idle_ttl, | 333 | Opt_osd_idle_ttl, |
309 | Opt_caps_wanted_delay_min, | 334 | Opt_caps_wanted_delay_min, |
310 | Opt_caps_wanted_delay_max, | 335 | Opt_caps_wanted_delay_max, |
336 | Opt_cap_release_safety, | ||
311 | Opt_readdir_max_entries, | 337 | Opt_readdir_max_entries, |
338 | Opt_readdir_max_bytes, | ||
312 | Opt_congestion_kb, | 339 | Opt_congestion_kb, |
313 | Opt_last_int, | 340 | Opt_last_int, |
314 | /* int args above */ | 341 | /* int args above */ |
@@ -339,7 +366,9 @@ static match_table_t arg_tokens = { | |||
339 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | 366 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, |
340 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, | 367 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, |
341 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, | 368 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, |
369 | {Opt_cap_release_safety, "cap_release_safety=%d"}, | ||
342 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, | 370 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, |
371 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | ||
343 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 372 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
344 | /* int args above */ | 373 | /* int args above */ |
345 | {Opt_snapdirname, "snapdirname=%s"}, | 374 | {Opt_snapdirname, "snapdirname=%s"}, |
@@ -388,8 +417,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
388 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | 417 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; |
389 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | 418 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; |
390 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | 419 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); |
391 | args->cap_release_safety = CEPH_CAPS_PER_RELEASE * 4; | 420 | args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; |
392 | args->max_readdir = 1024; | 421 | args->max_readdir = CEPH_MAX_READDIR_DEFAULT; |
422 | args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
393 | args->congestion_kb = default_congestion_kb(); | 423 | args->congestion_kb = default_congestion_kb(); |
394 | 424 | ||
395 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | 425 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ |
@@ -497,6 +527,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
497 | case Opt_readdir_max_entries: | 527 | case Opt_readdir_max_entries: |
498 | args->max_readdir = intval; | 528 | args->max_readdir = intval; |
499 | break; | 529 | break; |
530 | case Opt_readdir_max_bytes: | ||
531 | args->max_readdir_bytes = intval; | ||
532 | break; | ||
500 | case Opt_congestion_kb: | 533 | case Opt_congestion_kb: |
501 | args->congestion_kb = intval; | 534 | args->congestion_kb = intval; |
502 | break; | 535 | break; |
@@ -636,9 +669,17 @@ static void ceph_destroy_client(struct ceph_client *client) | |||
636 | 669 | ||
637 | /* unmount */ | 670 | /* unmount */ |
638 | ceph_mdsc_stop(&client->mdsc); | 671 | ceph_mdsc_stop(&client->mdsc); |
639 | ceph_monc_stop(&client->monc); | ||
640 | ceph_osdc_stop(&client->osdc); | 672 | ceph_osdc_stop(&client->osdc); |
641 | 673 | ||
674 | /* | ||
675 | * make sure mds and osd connections close out before destroying | ||
676 | * the auth module, which is needed to free those connections' | ||
677 | * ceph_authorizers. | ||
678 | */ | ||
679 | ceph_msgr_flush(); | ||
680 | |||
681 | ceph_monc_stop(&client->monc); | ||
682 | |||
642 | ceph_adjust_min_caps(-client->min_caps); | 683 | ceph_adjust_min_caps(-client->min_caps); |
643 | 684 | ||
644 | ceph_debugfs_client_cleanup(client); | 685 | ceph_debugfs_client_cleanup(client); |
@@ -682,9 +723,10 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | |||
682 | /* | 723 | /* |
683 | * true if we have the mon map (and have thus joined the cluster) | 724 | * true if we have the mon map (and have thus joined the cluster) |
684 | */ | 725 | */ |
685 | static int have_mon_map(struct ceph_client *client) | 726 | static int have_mon_and_osd_map(struct ceph_client *client) |
686 | { | 727 | { |
687 | return client->monc.monmap && client->monc.monmap->epoch; | 728 | return client->monc.monmap && client->monc.monmap->epoch && |
729 | client->osdc.osdmap && client->osdc.osdmap->epoch; | ||
688 | } | 730 | } |
689 | 731 | ||
690 | /* | 732 | /* |
@@ -704,7 +746,7 @@ static struct dentry *open_root_dentry(struct ceph_client *client, | |||
704 | dout("open_root_inode opening '%s'\n", path); | 746 | dout("open_root_inode opening '%s'\n", path); |
705 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); | 747 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); |
706 | if (IS_ERR(req)) | 748 | if (IS_ERR(req)) |
707 | return ERR_PTR(PTR_ERR(req)); | 749 | return ERR_CAST(req); |
708 | req->r_path1 = kstrdup(path, GFP_NOFS); | 750 | req->r_path1 = kstrdup(path, GFP_NOFS); |
709 | req->r_ino1.ino = CEPH_INO_ROOT; | 751 | req->r_ino1.ino = CEPH_INO_ROOT; |
710 | req->r_ino1.snap = CEPH_NOSNAP; | 752 | req->r_ino1.snap = CEPH_NOSNAP; |
@@ -762,7 +804,7 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | |||
762 | if (err < 0) | 804 | if (err < 0) |
763 | goto out; | 805 | goto out; |
764 | 806 | ||
765 | while (!have_mon_map(client)) { | 807 | while (!have_mon_and_osd_map(client)) { |
766 | err = -EIO; | 808 | err = -EIO; |
767 | if (timeout && time_after_eq(jiffies, started + timeout)) | 809 | if (timeout && time_after_eq(jiffies, started + timeout)) |
768 | goto out; | 810 | goto out; |
@@ -770,8 +812,8 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | |||
770 | /* wait */ | 812 | /* wait */ |
771 | dout("mount waiting for mon_map\n"); | 813 | dout("mount waiting for mon_map\n"); |
772 | err = wait_event_interruptible_timeout(client->auth_wq, | 814 | err = wait_event_interruptible_timeout(client->auth_wq, |
773 | have_mon_map(client) || (client->auth_err < 0), | 815 | have_mon_and_osd_map(client) || (client->auth_err < 0), |
774 | timeout); | 816 | timeout); |
775 | if (err == -EINTR || err == -ERESTARTSYS) | 817 | if (err == -EINTR || err == -ERESTARTSYS) |
776 | goto out; | 818 | goto out; |
777 | if (client->auth_err < 0) { | 819 | if (client->auth_err < 0) { |
@@ -884,6 +926,8 @@ static int ceph_compare_super(struct super_block *sb, void *data) | |||
884 | /* | 926 | /* |
885 | * construct our own bdi so we can control readahead, etc. | 927 | * construct our own bdi so we can control readahead, etc. |
886 | */ | 928 | */ |
929 | static atomic_long_t bdi_seq = ATOMIC_INIT(0); | ||
930 | |||
887 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | 931 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) |
888 | { | 932 | { |
889 | int err; | 933 | int err; |
@@ -893,7 +937,8 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | |||
893 | client->backing_dev_info.ra_pages = | 937 | client->backing_dev_info.ra_pages = |
894 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) | 938 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) |
895 | >> PAGE_SHIFT; | 939 | >> PAGE_SHIFT; |
896 | err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); | 940 | err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d", |
941 | atomic_long_inc_return(&bdi_seq)); | ||
897 | if (!err) | 942 | if (!err) |
898 | sb->s_bdi = &client->backing_dev_info; | 943 | sb->s_bdi = &client->backing_dev_info; |
899 | return err; | 944 | return err; |
@@ -932,9 +977,9 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
932 | goto out; | 977 | goto out; |
933 | } | 978 | } |
934 | 979 | ||
935 | if (ceph_client(sb) != client) { | 980 | if (ceph_sb_to_client(sb) != client) { |
936 | ceph_destroy_client(client); | 981 | ceph_destroy_client(client); |
937 | client = ceph_client(sb); | 982 | client = ceph_sb_to_client(sb); |
938 | dout("get_sb got existing client %p\n", client); | 983 | dout("get_sb got existing client %p\n", client); |
939 | } else { | 984 | } else { |
940 | dout("get_sb using new client %p\n", client); | 985 | dout("get_sb using new client %p\n", client); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 13513b80d87f..10a4a406e887 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -10,7 +10,6 @@ | |||
10 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
11 | #include <linux/mempool.h> | 11 | #include <linux/mempool.h> |
12 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
13 | #include <linux/slab.h> | ||
14 | #include <linux/wait.h> | 13 | #include <linux/wait.h> |
15 | #include <linux/writeback.h> | 14 | #include <linux/writeback.h> |
16 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
@@ -52,24 +51,25 @@ | |||
52 | 51 | ||
53 | struct ceph_mount_args { | 52 | struct ceph_mount_args { |
54 | int sb_flags; | 53 | int sb_flags; |
54 | int flags; | ||
55 | struct ceph_fsid fsid; | ||
56 | struct ceph_entity_addr my_addr; | ||
55 | int num_mon; | 57 | int num_mon; |
56 | struct ceph_entity_addr *mon_addr; | 58 | struct ceph_entity_addr *mon_addr; |
57 | int flags; | ||
58 | int mount_timeout; | 59 | int mount_timeout; |
59 | int osd_idle_ttl; | 60 | int osd_idle_ttl; |
60 | int caps_wanted_delay_min, caps_wanted_delay_max; | ||
61 | struct ceph_fsid fsid; | ||
62 | struct ceph_entity_addr my_addr; | ||
63 | int wsize; | ||
64 | int rsize; /* max readahead */ | ||
65 | int max_readdir; /* max readdir size */ | ||
66 | int congestion_kb; /* max readdir size */ | ||
67 | int osd_timeout; | 61 | int osd_timeout; |
68 | int osd_keepalive_timeout; | 62 | int osd_keepalive_timeout; |
63 | int wsize; | ||
64 | int rsize; /* max readahead */ | ||
65 | int congestion_kb; /* max writeback in flight */ | ||
66 | int caps_wanted_delay_min, caps_wanted_delay_max; | ||
67 | int cap_release_safety; | ||
68 | int max_readdir; /* max readdir result (entires) */ | ||
69 | int max_readdir_bytes; /* max readdir result (bytes) */ | ||
69 | char *snapdir_name; /* default ".snap" */ | 70 | char *snapdir_name; /* default ".snap" */ |
70 | char *name; | 71 | char *name; |
71 | char *secret; | 72 | char *secret; |
72 | int cap_release_safety; | ||
73 | }; | 73 | }; |
74 | 74 | ||
75 | /* | 75 | /* |
@@ -80,13 +80,14 @@ struct ceph_mount_args { | |||
80 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 | 80 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 |
81 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | 81 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 |
82 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ | 82 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ |
83 | #define CEPH_MAX_READDIR_DEFAULT 1024 | ||
84 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) | ||
83 | 85 | ||
84 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | 86 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) |
85 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) | 87 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) |
86 | 88 | ||
87 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" | 89 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" |
88 | #define CEPH_AUTH_NAME_DEFAULT "guest" | 90 | #define CEPH_AUTH_NAME_DEFAULT "guest" |
89 | |||
90 | /* | 91 | /* |
91 | * Delay telling the MDS we no longer want caps, in case we reopen | 92 | * Delay telling the MDS we no longer want caps, in case we reopen |
92 | * the file. Delay a minimum amount of time, even if we send a cap | 93 | * the file. Delay a minimum amount of time, even if we send a cap |
@@ -96,6 +97,7 @@ struct ceph_mount_args { | |||
96 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ | 97 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ |
97 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ | 98 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ |
98 | 99 | ||
100 | #define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) | ||
99 | 101 | ||
100 | /* mount state */ | 102 | /* mount state */ |
101 | enum { | 103 | enum { |
@@ -160,12 +162,6 @@ struct ceph_client { | |||
160 | #endif | 162 | #endif |
161 | }; | 163 | }; |
162 | 164 | ||
163 | static inline struct ceph_client *ceph_client(struct super_block *sb) | ||
164 | { | ||
165 | return sb->s_fs_info; | ||
166 | } | ||
167 | |||
168 | |||
169 | /* | 165 | /* |
170 | * File i/o capability. This tracks shared state with the metadata | 166 | * File i/o capability. This tracks shared state with the metadata |
171 | * server that allows us to cache or writeback attributes or to read | 167 | * server that allows us to cache or writeback attributes or to read |
@@ -814,7 +810,7 @@ extern void ceph_put_cap(struct ceph_cap *cap); | |||
814 | 810 | ||
815 | extern void ceph_queue_caps_release(struct inode *inode); | 811 | extern void ceph_queue_caps_release(struct inode *inode); |
816 | extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); | 812 | extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); |
817 | extern int ceph_fsync(struct file *file, struct dentry *dentry, int datasync); | 813 | extern int ceph_fsync(struct file *file, int datasync); |
818 | extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, | 814 | extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, |
819 | struct ceph_mds_session *session); | 815 | struct ceph_mds_session *session); |
820 | extern int ceph_get_cap_mds(struct inode *inode); | 816 | extern int ceph_get_cap_mds(struct inode *inode); |
@@ -871,6 +867,7 @@ extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | |||
871 | extern void ceph_dentry_lru_add(struct dentry *dn); | 867 | extern void ceph_dentry_lru_add(struct dentry *dn); |
872 | extern void ceph_dentry_lru_touch(struct dentry *dn); | 868 | extern void ceph_dentry_lru_touch(struct dentry *dn); |
873 | extern void ceph_dentry_lru_del(struct dentry *dn); | 869 | extern void ceph_dentry_lru_del(struct dentry *dn); |
870 | extern void ceph_invalidate_dentry_lease(struct dentry *dentry); | ||
874 | 871 | ||
875 | /* | 872 | /* |
876 | * our d_ops vary depending on whether the inode is live, | 873 | * our d_ops vary depending on whether the inode is live, |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 2845422907fc..68aeebc69681 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -7,7 +7,8 @@ | |||
7 | 7 | ||
8 | static bool ceph_is_valid_xattr(const char *name) | 8 | static bool ceph_is_valid_xattr(const char *name) |
9 | { | 9 | { |
10 | return !strncmp(name, XATTR_SECURITY_PREFIX, | 10 | return !strncmp(name, "ceph.", 5) || |
11 | !strncmp(name, XATTR_SECURITY_PREFIX, | ||
11 | XATTR_SECURITY_PREFIX_LEN) || | 12 | XATTR_SECURITY_PREFIX_LEN) || |
12 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || | 13 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || |
13 | !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); | 14 | !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); |
@@ -76,14 +77,14 @@ static size_t ceph_vxattrcb_rctime(struct ceph_inode_info *ci, char *val, | |||
76 | } | 77 | } |
77 | 78 | ||
78 | static struct ceph_vxattr_cb ceph_dir_vxattrs[] = { | 79 | static struct ceph_vxattr_cb ceph_dir_vxattrs[] = { |
79 | { true, "user.ceph.dir.entries", ceph_vxattrcb_entries}, | 80 | { true, "ceph.dir.entries", ceph_vxattrcb_entries}, |
80 | { true, "user.ceph.dir.files", ceph_vxattrcb_files}, | 81 | { true, "ceph.dir.files", ceph_vxattrcb_files}, |
81 | { true, "user.ceph.dir.subdirs", ceph_vxattrcb_subdirs}, | 82 | { true, "ceph.dir.subdirs", ceph_vxattrcb_subdirs}, |
82 | { true, "user.ceph.dir.rentries", ceph_vxattrcb_rentries}, | 83 | { true, "ceph.dir.rentries", ceph_vxattrcb_rentries}, |
83 | { true, "user.ceph.dir.rfiles", ceph_vxattrcb_rfiles}, | 84 | { true, "ceph.dir.rfiles", ceph_vxattrcb_rfiles}, |
84 | { true, "user.ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs}, | 85 | { true, "ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs}, |
85 | { true, "user.ceph.dir.rbytes", ceph_vxattrcb_rbytes}, | 86 | { true, "ceph.dir.rbytes", ceph_vxattrcb_rbytes}, |
86 | { true, "user.ceph.dir.rctime", ceph_vxattrcb_rctime}, | 87 | { true, "ceph.dir.rctime", ceph_vxattrcb_rctime}, |
87 | { true, NULL, NULL } | 88 | { true, NULL, NULL } |
88 | }; | 89 | }; |
89 | 90 | ||
@@ -107,7 +108,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, | |||
107 | } | 108 | } |
108 | 109 | ||
109 | static struct ceph_vxattr_cb ceph_file_vxattrs[] = { | 110 | static struct ceph_vxattr_cb ceph_file_vxattrs[] = { |
110 | { true, "user.ceph.layout", ceph_vxattrcb_layout}, | 111 | { true, "ceph.layout", ceph_vxattrcb_layout}, |
111 | { NULL, NULL } | 112 | { NULL, NULL } |
112 | }; | 113 | }; |
113 | 114 | ||
@@ -186,12 +187,6 @@ static int __set_xattr(struct ceph_inode_info *ci, | |||
186 | ci->i_xattrs.names_size -= xattr->name_len; | 187 | ci->i_xattrs.names_size -= xattr->name_len; |
187 | ci->i_xattrs.vals_size -= xattr->val_len; | 188 | ci->i_xattrs.vals_size -= xattr->val_len; |
188 | } | 189 | } |
189 | if (!xattr) { | ||
190 | pr_err("__set_xattr ENOMEM on %p %llx.%llx xattr %s=%s\n", | ||
191 | &ci->vfs_inode, ceph_vinop(&ci->vfs_inode), name, | ||
192 | xattr->val); | ||
193 | return -ENOMEM; | ||
194 | } | ||
195 | ci->i_xattrs.names_size += name_len; | 190 | ci->i_xattrs.names_size += name_len; |
196 | ci->i_xattrs.vals_size += val_len; | 191 | ci->i_xattrs.vals_size += val_len; |
197 | if (val) | 192 | if (val) |
@@ -574,7 +569,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) | |||
574 | ci->i_xattrs.version, ci->i_xattrs.index_version); | 569 | ci->i_xattrs.version, ci->i_xattrs.index_version); |
575 | 570 | ||
576 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && | 571 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && |
577 | (ci->i_xattrs.index_version > ci->i_xattrs.version)) { | 572 | (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { |
578 | goto list_xattr; | 573 | goto list_xattr; |
579 | } else { | 574 | } else { |
580 | spin_unlock(&inode->i_lock); | 575 | spin_unlock(&inode->i_lock); |
@@ -622,7 +617,7 @@ out: | |||
622 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | 617 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, |
623 | const char *value, size_t size, int flags) | 618 | const char *value, size_t size, int flags) |
624 | { | 619 | { |
625 | struct ceph_client *client = ceph_client(dentry->d_sb); | 620 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); |
626 | struct inode *inode = dentry->d_inode; | 621 | struct inode *inode = dentry->d_inode; |
627 | struct ceph_inode_info *ci = ceph_inode(inode); | 622 | struct ceph_inode_info *ci = ceph_inode(inode); |
628 | struct inode *parent_inode = dentry->d_parent->d_inode; | 623 | struct inode *parent_inode = dentry->d_parent->d_inode; |
@@ -641,7 +636,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | |||
641 | return -ENOMEM; | 636 | return -ENOMEM; |
642 | err = -ENOMEM; | 637 | err = -ENOMEM; |
643 | for (i = 0; i < nr_pages; i++) { | 638 | for (i = 0; i < nr_pages; i++) { |
644 | pages[i] = alloc_page(GFP_NOFS); | 639 | pages[i] = __page_cache_alloc(GFP_NOFS); |
645 | if (!pages[i]) { | 640 | if (!pages[i]) { |
646 | nr_pages = i; | 641 | nr_pages = i; |
647 | goto out; | 642 | goto out; |
@@ -779,7 +774,7 @@ out: | |||
779 | 774 | ||
780 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) | 775 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) |
781 | { | 776 | { |
782 | struct ceph_client *client = ceph_client(dentry->d_sb); | 777 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); |
783 | struct ceph_mds_client *mdsc = &client->mdsc; | 778 | struct ceph_mds_client *mdsc = &client->mdsc; |
784 | struct inode *inode = dentry->d_inode; | 779 | struct inode *inode = dentry->d_inode; |
785 | struct inode *parent_inode = dentry->d_parent->d_inode; | 780 | struct inode *parent_inode = dentry->d_parent->d_inode; |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 0242ff9cbf41..a7eb65c84b1c 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -84,7 +84,7 @@ extern ssize_t cifs_user_read(struct file *file, char __user *read_data, | |||
84 | extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, | 84 | extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, |
85 | size_t write_size, loff_t *poffset); | 85 | size_t write_size, loff_t *poffset); |
86 | extern int cifs_lock(struct file *, int, struct file_lock *); | 86 | extern int cifs_lock(struct file *, int, struct file_lock *); |
87 | extern int cifs_fsync(struct file *, struct dentry *, int); | 87 | extern int cifs_fsync(struct file *, int); |
88 | extern int cifs_flush(struct file *, fl_owner_t id); | 88 | extern int cifs_flush(struct file *, fl_owner_t id); |
89 | extern int cifs_file_mmap(struct file * , struct vm_area_struct *); | 89 | extern int cifs_file_mmap(struct file * , struct vm_area_struct *); |
90 | extern const struct file_operations cifs_dir_ops; | 90 | extern const struct file_operations cifs_dir_ops; |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a83541ec9713..f1ff785b2292 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -1676,7 +1676,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, | |||
1676 | return rc; | 1676 | return rc; |
1677 | } | 1677 | } |
1678 | 1678 | ||
1679 | int cifs_fsync(struct file *file, struct dentry *dentry, int datasync) | 1679 | int cifs_fsync(struct file *file, int datasync) |
1680 | { | 1680 | { |
1681 | int xid; | 1681 | int xid; |
1682 | int rc = 0; | 1682 | int rc = 0; |
@@ -1688,7 +1688,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync) | |||
1688 | xid = GetXid(); | 1688 | xid = GetXid(); |
1689 | 1689 | ||
1690 | cFYI(1, "Sync file - name: %s datasync: 0x%x", | 1690 | cFYI(1, "Sync file - name: %s datasync: 0x%x", |
1691 | dentry->d_name.name, datasync); | 1691 | file->f_path.dentry->d_name.name, datasync); |
1692 | 1692 | ||
1693 | rc = filemap_write_and_wait(inode->i_mapping); | 1693 | rc = filemap_write_and_wait(inode->i_mapping); |
1694 | if (rc == 0) { | 1694 | if (rc == 0) { |
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h index d99860a33890..6b443ff43a19 100644 --- a/fs/coda/coda_int.h +++ b/fs/coda/coda_int.h | |||
@@ -11,8 +11,7 @@ extern int coda_fake_statfs; | |||
11 | 11 | ||
12 | void coda_destroy_inodecache(void); | 12 | void coda_destroy_inodecache(void); |
13 | int coda_init_inodecache(void); | 13 | int coda_init_inodecache(void); |
14 | int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, | 14 | int coda_fsync(struct file *coda_file, int datasync); |
15 | int datasync); | ||
16 | void coda_sysctl_init(void); | 15 | void coda_sysctl_init(void); |
17 | void coda_sysctl_clean(void); | 16 | void coda_sysctl_clean(void); |
18 | 17 | ||
diff --git a/fs/coda/file.c b/fs/coda/file.c index 7196077b1688..ad3cd2abeeb4 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c | |||
@@ -202,10 +202,10 @@ int coda_release(struct inode *coda_inode, struct file *coda_file) | |||
202 | return 0; | 202 | return 0; |
203 | } | 203 | } |
204 | 204 | ||
205 | int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync) | 205 | int coda_fsync(struct file *coda_file, int datasync) |
206 | { | 206 | { |
207 | struct file *host_file; | 207 | struct file *host_file; |
208 | struct inode *coda_inode = coda_dentry->d_inode; | 208 | struct inode *coda_inode = coda_file->f_path.dentry->d_inode; |
209 | struct coda_file_info *cfi; | 209 | struct coda_file_info *cfi; |
210 | int err = 0; | 210 | int err = 0; |
211 | 211 | ||
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index 773f2ce9aa06..ca25d96d45c9 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Pioctl operations for Coda. | 2 | * Pioctl operations for Coda. |
3 | * Original version: (C) 1996 Peter Braam | 3 | * Original version: (C) 1996 Peter Braam |
4 | * Rewritten for Linux 2.1: (C) 1997 Carnegie Mellon University | 4 | * Rewritten for Linux 2.1: (C) 1997 Carnegie Mellon University |
5 | * | 5 | * |
6 | * Carnegie Mellon encourages users of this code to contribute improvements | 6 | * Carnegie Mellon encourages users of this code to contribute improvements |
@@ -23,21 +23,22 @@ | |||
23 | #include <linux/coda_fs_i.h> | 23 | #include <linux/coda_fs_i.h> |
24 | #include <linux/coda_psdev.h> | 24 | #include <linux/coda_psdev.h> |
25 | 25 | ||
26 | #include <linux/smp_lock.h> | ||
27 | |||
26 | /* pioctl ops */ | 28 | /* pioctl ops */ |
27 | static int coda_ioctl_permission(struct inode *inode, int mask); | 29 | static int coda_ioctl_permission(struct inode *inode, int mask); |
28 | static int coda_pioctl(struct inode * inode, struct file * filp, | 30 | static long coda_pioctl(struct file *filp, unsigned int cmd, |
29 | unsigned int cmd, unsigned long user_data); | 31 | unsigned long user_data); |
30 | 32 | ||
31 | /* exported from this file */ | 33 | /* exported from this file */ |
32 | const struct inode_operations coda_ioctl_inode_operations = | 34 | const struct inode_operations coda_ioctl_inode_operations = { |
33 | { | ||
34 | .permission = coda_ioctl_permission, | 35 | .permission = coda_ioctl_permission, |
35 | .setattr = coda_setattr, | 36 | .setattr = coda_setattr, |
36 | }; | 37 | }; |
37 | 38 | ||
38 | const struct file_operations coda_ioctl_operations = { | 39 | const struct file_operations coda_ioctl_operations = { |
39 | .owner = THIS_MODULE, | 40 | .owner = THIS_MODULE, |
40 | .ioctl = coda_pioctl, | 41 | .unlocked_ioctl = coda_pioctl, |
41 | }; | 42 | }; |
42 | 43 | ||
43 | /* the coda pioctl inode ops */ | 44 | /* the coda pioctl inode ops */ |
@@ -46,48 +47,53 @@ static int coda_ioctl_permission(struct inode *inode, int mask) | |||
46 | return (mask & MAY_EXEC) ? -EACCES : 0; | 47 | return (mask & MAY_EXEC) ? -EACCES : 0; |
47 | } | 48 | } |
48 | 49 | ||
49 | static int coda_pioctl(struct inode * inode, struct file * filp, | 50 | static long coda_pioctl(struct file *filp, unsigned int cmd, |
50 | unsigned int cmd, unsigned long user_data) | 51 | unsigned long user_data) |
51 | { | 52 | { |
52 | struct path path; | 53 | struct path path; |
53 | int error; | 54 | int error; |
54 | struct PioctlData data; | 55 | struct PioctlData data; |
55 | struct inode *target_inode = NULL; | 56 | struct inode *inode = filp->f_dentry->d_inode; |
56 | struct coda_inode_info *cnp; | 57 | struct inode *target_inode = NULL; |
58 | struct coda_inode_info *cnp; | ||
57 | 59 | ||
58 | /* get the Pioctl data arguments from user space */ | 60 | lock_kernel(); |
59 | if (copy_from_user(&data, (void __user *)user_data, sizeof(data))) { | 61 | |
60 | return -EINVAL; | 62 | /* get the Pioctl data arguments from user space */ |
61 | } | 63 | if (copy_from_user(&data, (void __user *)user_data, sizeof(data))) { |
62 | 64 | error = -EINVAL; | |
63 | /* | 65 | goto out; |
64 | * Look up the pathname. Note that the pathname is in | ||
65 | * user memory, and namei takes care of this | ||
66 | */ | ||
67 | if (data.follow) { | ||
68 | error = user_path(data.path, &path); | ||
69 | } else { | ||
70 | error = user_lpath(data.path, &path); | ||
71 | } | 66 | } |
72 | 67 | ||
73 | if ( error ) { | 68 | /* |
74 | return error; | 69 | * Look up the pathname. Note that the pathname is in |
75 | } else { | 70 | * user memory, and namei takes care of this |
71 | */ | ||
72 | if (data.follow) | ||
73 | error = user_path(data.path, &path); | ||
74 | else | ||
75 | error = user_lpath(data.path, &path); | ||
76 | |||
77 | if (error) | ||
78 | goto out; | ||
79 | else | ||
76 | target_inode = path.dentry->d_inode; | 80 | target_inode = path.dentry->d_inode; |
77 | } | 81 | |
78 | |||
79 | /* return if it is not a Coda inode */ | 82 | /* return if it is not a Coda inode */ |
80 | if ( target_inode->i_sb != inode->i_sb ) { | 83 | if (target_inode->i_sb != inode->i_sb) { |
81 | path_put(&path); | 84 | path_put(&path); |
82 | return -EINVAL; | 85 | error = -EINVAL; |
86 | goto out; | ||
83 | } | 87 | } |
84 | 88 | ||
85 | /* now proceed to make the upcall */ | 89 | /* now proceed to make the upcall */ |
86 | cnp = ITOC(target_inode); | 90 | cnp = ITOC(target_inode); |
87 | 91 | ||
88 | error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data); | 92 | error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data); |
89 | 93 | ||
90 | path_put(&path); | 94 | path_put(&path); |
91 | return error; | ||
92 | } | ||
93 | 95 | ||
96 | out: | ||
97 | unlock_kernel(); | ||
98 | return error; | ||
99 | } | ||
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index be4392ca2098..66b9cf79c5ba 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c | |||
@@ -73,8 +73,7 @@ static unsigned int coda_psdev_poll(struct file *file, poll_table * wait) | |||
73 | return mask; | 73 | return mask; |
74 | } | 74 | } |
75 | 75 | ||
76 | static int coda_psdev_ioctl(struct inode * inode, struct file * filp, | 76 | static long coda_psdev_ioctl(struct file * filp, unsigned int cmd, unsigned long arg) |
77 | unsigned int cmd, unsigned long arg) | ||
78 | { | 77 | { |
79 | unsigned int data; | 78 | unsigned int data; |
80 | 79 | ||
@@ -344,7 +343,7 @@ static const struct file_operations coda_psdev_fops = { | |||
344 | .read = coda_psdev_read, | 343 | .read = coda_psdev_read, |
345 | .write = coda_psdev_write, | 344 | .write = coda_psdev_write, |
346 | .poll = coda_psdev_poll, | 345 | .poll = coda_psdev_poll, |
347 | .ioctl = coda_psdev_ioctl, | 346 | .unlocked_ioctl = coda_psdev_ioctl, |
348 | .open = coda_psdev_open, | 347 | .open = coda_psdev_open, |
349 | .release = coda_psdev_release, | 348 | .release = coda_psdev_release, |
350 | }; | 349 | }; |
diff --git a/fs/compat.c b/fs/compat.c index 05448730f840..f0b391c50552 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -568,6 +568,79 @@ out: | |||
568 | return ret; | 568 | return ret; |
569 | } | 569 | } |
570 | 570 | ||
571 | /* A write operation does a read from user space and vice versa */ | ||
572 | #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) | ||
573 | |||
574 | ssize_t compat_rw_copy_check_uvector(int type, | ||
575 | const struct compat_iovec __user *uvector, unsigned long nr_segs, | ||
576 | unsigned long fast_segs, struct iovec *fast_pointer, | ||
577 | struct iovec **ret_pointer) | ||
578 | { | ||
579 | compat_ssize_t tot_len; | ||
580 | struct iovec *iov = *ret_pointer = fast_pointer; | ||
581 | ssize_t ret = 0; | ||
582 | int seg; | ||
583 | |||
584 | /* | ||
585 | * SuS says "The readv() function *may* fail if the iovcnt argument | ||
586 | * was less than or equal to 0, or greater than {IOV_MAX}. Linux has | ||
587 | * traditionally returned zero for zero segments, so... | ||
588 | */ | ||
589 | if (nr_segs == 0) | ||
590 | goto out; | ||
591 | |||
592 | ret = -EINVAL; | ||
593 | if (nr_segs > UIO_MAXIOV || nr_segs < 0) | ||
594 | goto out; | ||
595 | if (nr_segs > fast_segs) { | ||
596 | ret = -ENOMEM; | ||
597 | iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); | ||
598 | if (iov == NULL) { | ||
599 | *ret_pointer = fast_pointer; | ||
600 | goto out; | ||
601 | } | ||
602 | } | ||
603 | *ret_pointer = iov; | ||
604 | |||
605 | /* | ||
606 | * Single unix specification: | ||
607 | * We should -EINVAL if an element length is not >= 0 and fitting an | ||
608 | * ssize_t. The total length is fitting an ssize_t | ||
609 | * | ||
610 | * Be careful here because iov_len is a size_t not an ssize_t | ||
611 | */ | ||
612 | tot_len = 0; | ||
613 | ret = -EINVAL; | ||
614 | for (seg = 0; seg < nr_segs; seg++) { | ||
615 | compat_ssize_t tmp = tot_len; | ||
616 | compat_uptr_t buf; | ||
617 | compat_ssize_t len; | ||
618 | |||
619 | if (__get_user(len, &uvector->iov_len) || | ||
620 | __get_user(buf, &uvector->iov_base)) { | ||
621 | ret = -EFAULT; | ||
622 | goto out; | ||
623 | } | ||
624 | if (len < 0) /* size_t not fitting in compat_ssize_t .. */ | ||
625 | goto out; | ||
626 | tot_len += len; | ||
627 | if (tot_len < tmp) /* maths overflow on the compat_ssize_t */ | ||
628 | goto out; | ||
629 | if (!access_ok(vrfy_dir(type), buf, len)) { | ||
630 | ret = -EFAULT; | ||
631 | goto out; | ||
632 | } | ||
633 | iov->iov_base = compat_ptr(buf); | ||
634 | iov->iov_len = (compat_size_t) len; | ||
635 | uvector++; | ||
636 | iov++; | ||
637 | } | ||
638 | ret = tot_len; | ||
639 | |||
640 | out: | ||
641 | return ret; | ||
642 | } | ||
643 | |||
571 | static inline long | 644 | static inline long |
572 | copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) | 645 | copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) |
573 | { | 646 | { |
@@ -600,7 +673,7 @@ compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb) | |||
600 | iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64)); | 673 | iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64)); |
601 | ret = copy_iocb(nr, iocb, iocb64); | 674 | ret = copy_iocb(nr, iocb, iocb64); |
602 | if (!ret) | 675 | if (!ret) |
603 | ret = sys_io_submit(ctx_id, nr, iocb64); | 676 | ret = do_io_submit(ctx_id, nr, iocb64, 1); |
604 | return ret; | 677 | return ret; |
605 | } | 678 | } |
606 | 679 | ||
@@ -1077,70 +1150,21 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, | |||
1077 | { | 1150 | { |
1078 | compat_ssize_t tot_len; | 1151 | compat_ssize_t tot_len; |
1079 | struct iovec iovstack[UIO_FASTIOV]; | 1152 | struct iovec iovstack[UIO_FASTIOV]; |
1080 | struct iovec *iov=iovstack, *vector; | 1153 | struct iovec *iov; |
1081 | ssize_t ret; | 1154 | ssize_t ret; |
1082 | int seg; | ||
1083 | io_fn_t fn; | 1155 | io_fn_t fn; |
1084 | iov_fn_t fnv; | 1156 | iov_fn_t fnv; |
1085 | 1157 | ||
1086 | /* | ||
1087 | * SuS says "The readv() function *may* fail if the iovcnt argument | ||
1088 | * was less than or equal to 0, or greater than {IOV_MAX}. Linux has | ||
1089 | * traditionally returned zero for zero segments, so... | ||
1090 | */ | ||
1091 | ret = 0; | ||
1092 | if (nr_segs == 0) | ||
1093 | goto out; | ||
1094 | |||
1095 | /* | ||
1096 | * First get the "struct iovec" from user memory and | ||
1097 | * verify all the pointers | ||
1098 | */ | ||
1099 | ret = -EINVAL; | 1158 | ret = -EINVAL; |
1100 | if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0)) | ||
1101 | goto out; | ||
1102 | if (!file->f_op) | 1159 | if (!file->f_op) |
1103 | goto out; | 1160 | goto out; |
1104 | if (nr_segs > UIO_FASTIOV) { | 1161 | |
1105 | ret = -ENOMEM; | ||
1106 | iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); | ||
1107 | if (!iov) | ||
1108 | goto out; | ||
1109 | } | ||
1110 | ret = -EFAULT; | 1162 | ret = -EFAULT; |
1111 | if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) | 1163 | if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) |
1112 | goto out; | 1164 | goto out; |
1113 | 1165 | ||
1114 | /* | 1166 | tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs, |
1115 | * Single unix specification: | 1167 | UIO_FASTIOV, iovstack, &iov); |
1116 | * We should -EINVAL if an element length is not >= 0 and fitting an | ||
1117 | * ssize_t. The total length is fitting an ssize_t | ||
1118 | * | ||
1119 | * Be careful here because iov_len is a size_t not an ssize_t | ||
1120 | */ | ||
1121 | tot_len = 0; | ||
1122 | vector = iov; | ||
1123 | ret = -EINVAL; | ||
1124 | for (seg = 0 ; seg < nr_segs; seg++) { | ||
1125 | compat_ssize_t tmp = tot_len; | ||
1126 | compat_ssize_t len; | ||
1127 | compat_uptr_t buf; | ||
1128 | |||
1129 | if (__get_user(len, &uvector->iov_len) || | ||
1130 | __get_user(buf, &uvector->iov_base)) { | ||
1131 | ret = -EFAULT; | ||
1132 | goto out; | ||
1133 | } | ||
1134 | if (len < 0) /* size_t not fitting an compat_ssize_t .. */ | ||
1135 | goto out; | ||
1136 | tot_len += len; | ||
1137 | if (tot_len < tmp) /* maths overflow on the compat_ssize_t */ | ||
1138 | goto out; | ||
1139 | vector->iov_base = compat_ptr(buf); | ||
1140 | vector->iov_len = (compat_size_t) len; | ||
1141 | uvector++; | ||
1142 | vector++; | ||
1143 | } | ||
1144 | if (tot_len == 0) { | 1168 | if (tot_len == 0) { |
1145 | ret = 0; | 1169 | ret = 0; |
1146 | goto out; | 1170 | goto out; |
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index c8af2d91174b..41645142b88b 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c | |||
@@ -72,16 +72,11 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr) | |||
72 | if (!sd) | 72 | if (!sd) |
73 | return -EINVAL; | 73 | return -EINVAL; |
74 | 74 | ||
75 | sd_iattr = sd->s_iattr; | 75 | error = simple_setattr(dentry, iattr); |
76 | |||
77 | error = inode_change_ok(inode, iattr); | ||
78 | if (error) | ||
79 | return error; | ||
80 | |||
81 | error = inode_setattr(inode, iattr); | ||
82 | if (error) | 76 | if (error) |
83 | return error; | 77 | return error; |
84 | 78 | ||
79 | sd_iattr = sd->s_iattr; | ||
85 | if (!sd_iattr) { | 80 | if (!sd_iattr) { |
86 | /* setting attributes for the first time, allocate now */ | 81 | /* setting attributes for the first time, allocate now */ |
87 | sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL); | 82 | sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL); |
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 4d74fc72c195..0210898458b2 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c | |||
@@ -277,8 +277,10 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n" | |||
277 | DEFINE_SIMPLE_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n"); | 277 | DEFINE_SIMPLE_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n"); |
278 | DEFINE_SIMPLE_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n"); | 278 | DEFINE_SIMPLE_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n"); |
279 | 279 | ||
280 | DEFINE_SIMPLE_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n"); | ||
281 | |||
280 | /* | 282 | /* |
281 | * debugfs_create_x{8,16,32} - create a debugfs file that is used to read and write an unsigned {8,16,32}-bit value | 283 | * debugfs_create_x{8,16,32,64} - create a debugfs file that is used to read and write an unsigned {8,16,32,64}-bit value |
282 | * | 284 | * |
283 | * These functions are exactly the same as the above functions (but use a hex | 285 | * These functions are exactly the same as the above functions (but use a hex |
284 | * output for the decimal challenged). For details look at the above unsigned | 286 | * output for the decimal challenged). For details look at the above unsigned |
@@ -357,6 +359,23 @@ struct dentry *debugfs_create_x32(const char *name, mode_t mode, | |||
357 | } | 359 | } |
358 | EXPORT_SYMBOL_GPL(debugfs_create_x32); | 360 | EXPORT_SYMBOL_GPL(debugfs_create_x32); |
359 | 361 | ||
362 | /** | ||
363 | * debugfs_create_x64 - create a debugfs file that is used to read and write an unsigned 64-bit value | ||
364 | * @name: a pointer to a string containing the name of the file to create. | ||
365 | * @mode: the permission that the file should have | ||
366 | * @parent: a pointer to the parent dentry for this file. This should be a | ||
367 | * directory dentry if set. If this parameter is %NULL, then the | ||
368 | * file will be created in the root of the debugfs filesystem. | ||
369 | * @value: a pointer to the variable that the file should read to and write | ||
370 | * from. | ||
371 | */ | ||
372 | struct dentry *debugfs_create_x64(const char *name, mode_t mode, | ||
373 | struct dentry *parent, u64 *value) | ||
374 | { | ||
375 | return debugfs_create_file(name, mode, parent, value, &fops_x64); | ||
376 | } | ||
377 | EXPORT_SYMBOL_GPL(debugfs_create_x64); | ||
378 | |||
360 | 379 | ||
361 | static int debugfs_size_t_set(void *data, u64 val) | 380 | static int debugfs_size_t_set(void *data, u64 val) |
362 | { | 381 | { |
diff --git a/fs/direct-io.c b/fs/direct-io.c index e82adc2debb7..7600aacf531d 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -82,6 +82,8 @@ struct dio { | |||
82 | int reap_counter; /* rate limit reaping */ | 82 | int reap_counter; /* rate limit reaping */ |
83 | get_block_t *get_block; /* block mapping function */ | 83 | get_block_t *get_block; /* block mapping function */ |
84 | dio_iodone_t *end_io; /* IO completion function */ | 84 | dio_iodone_t *end_io; /* IO completion function */ |
85 | dio_submit_t *submit_io; /* IO submition function */ | ||
86 | loff_t logical_offset_in_bio; /* current first logical block in bio */ | ||
85 | sector_t final_block_in_bio; /* current final block in bio + 1 */ | 87 | sector_t final_block_in_bio; /* current final block in bio + 1 */ |
86 | sector_t next_block_for_io; /* next block to be put under IO, | 88 | sector_t next_block_for_io; /* next block to be put under IO, |
87 | in dio_blocks units */ | 89 | in dio_blocks units */ |
@@ -96,6 +98,7 @@ struct dio { | |||
96 | unsigned cur_page_offset; /* Offset into it, in bytes */ | 98 | unsigned cur_page_offset; /* Offset into it, in bytes */ |
97 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ | 99 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ |
98 | sector_t cur_page_block; /* Where it starts */ | 100 | sector_t cur_page_block; /* Where it starts */ |
101 | loff_t cur_page_fs_offset; /* Offset in file */ | ||
99 | 102 | ||
100 | /* BIO completion state */ | 103 | /* BIO completion state */ |
101 | spinlock_t bio_lock; /* protects BIO fields below */ | 104 | spinlock_t bio_lock; /* protects BIO fields below */ |
@@ -300,6 +303,26 @@ static void dio_bio_end_io(struct bio *bio, int error) | |||
300 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 303 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
301 | } | 304 | } |
302 | 305 | ||
306 | /** | ||
307 | * dio_end_io - handle the end io action for the given bio | ||
308 | * @bio: The direct io bio thats being completed | ||
309 | * @error: Error if there was one | ||
310 | * | ||
311 | * This is meant to be called by any filesystem that uses their own dio_submit_t | ||
312 | * so that the DIO specific endio actions are dealt with after the filesystem | ||
313 | * has done it's completion work. | ||
314 | */ | ||
315 | void dio_end_io(struct bio *bio, int error) | ||
316 | { | ||
317 | struct dio *dio = bio->bi_private; | ||
318 | |||
319 | if (dio->is_async) | ||
320 | dio_bio_end_aio(bio, error); | ||
321 | else | ||
322 | dio_bio_end_io(bio, error); | ||
323 | } | ||
324 | EXPORT_SYMBOL_GPL(dio_end_io); | ||
325 | |||
303 | static int | 326 | static int |
304 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, | 327 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, |
305 | sector_t first_sector, int nr_vecs) | 328 | sector_t first_sector, int nr_vecs) |
@@ -316,6 +339,7 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, | |||
316 | bio->bi_end_io = dio_bio_end_io; | 339 | bio->bi_end_io = dio_bio_end_io; |
317 | 340 | ||
318 | dio->bio = bio; | 341 | dio->bio = bio; |
342 | dio->logical_offset_in_bio = dio->cur_page_fs_offset; | ||
319 | return 0; | 343 | return 0; |
320 | } | 344 | } |
321 | 345 | ||
@@ -340,10 +364,15 @@ static void dio_bio_submit(struct dio *dio) | |||
340 | if (dio->is_async && dio->rw == READ) | 364 | if (dio->is_async && dio->rw == READ) |
341 | bio_set_pages_dirty(bio); | 365 | bio_set_pages_dirty(bio); |
342 | 366 | ||
343 | submit_bio(dio->rw, bio); | 367 | if (dio->submit_io) |
368 | dio->submit_io(dio->rw, bio, dio->inode, | ||
369 | dio->logical_offset_in_bio); | ||
370 | else | ||
371 | submit_bio(dio->rw, bio); | ||
344 | 372 | ||
345 | dio->bio = NULL; | 373 | dio->bio = NULL; |
346 | dio->boundary = 0; | 374 | dio->boundary = 0; |
375 | dio->logical_offset_in_bio = 0; | ||
347 | } | 376 | } |
348 | 377 | ||
349 | /* | 378 | /* |
@@ -603,10 +632,26 @@ static int dio_send_cur_page(struct dio *dio) | |||
603 | int ret = 0; | 632 | int ret = 0; |
604 | 633 | ||
605 | if (dio->bio) { | 634 | if (dio->bio) { |
635 | loff_t cur_offset = dio->block_in_file << dio->blkbits; | ||
636 | loff_t bio_next_offset = dio->logical_offset_in_bio + | ||
637 | dio->bio->bi_size; | ||
638 | |||
606 | /* | 639 | /* |
607 | * See whether this new request is contiguous with the old | 640 | * See whether this new request is contiguous with the old. |
641 | * | ||
642 | * Btrfs cannot handl having logically non-contiguous requests | ||
643 | * submitted. For exmple if you have | ||
644 | * | ||
645 | * Logical: [0-4095][HOLE][8192-12287] | ||
646 | * Phyiscal: [0-4095] [4096-8181] | ||
647 | * | ||
648 | * We cannot submit those pages together as one BIO. So if our | ||
649 | * current logical offset in the file does not equal what would | ||
650 | * be the next logical offset in the bio, submit the bio we | ||
651 | * have. | ||
608 | */ | 652 | */ |
609 | if (dio->final_block_in_bio != dio->cur_page_block) | 653 | if (dio->final_block_in_bio != dio->cur_page_block || |
654 | cur_offset != bio_next_offset) | ||
610 | dio_bio_submit(dio); | 655 | dio_bio_submit(dio); |
611 | /* | 656 | /* |
612 | * Submit now if the underlying fs is about to perform a | 657 | * Submit now if the underlying fs is about to perform a |
@@ -701,6 +746,7 @@ submit_page_section(struct dio *dio, struct page *page, | |||
701 | dio->cur_page_offset = offset; | 746 | dio->cur_page_offset = offset; |
702 | dio->cur_page_len = len; | 747 | dio->cur_page_len = len; |
703 | dio->cur_page_block = blocknr; | 748 | dio->cur_page_block = blocknr; |
749 | dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits; | ||
704 | out: | 750 | out: |
705 | return ret; | 751 | return ret; |
706 | } | 752 | } |
@@ -935,7 +981,7 @@ static ssize_t | |||
935 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | 981 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, |
936 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, | 982 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, |
937 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, | 983 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, |
938 | struct dio *dio) | 984 | dio_submit_t submit_io, struct dio *dio) |
939 | { | 985 | { |
940 | unsigned long user_addr; | 986 | unsigned long user_addr; |
941 | unsigned long flags; | 987 | unsigned long flags; |
@@ -952,6 +998,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
952 | 998 | ||
953 | dio->get_block = get_block; | 999 | dio->get_block = get_block; |
954 | dio->end_io = end_io; | 1000 | dio->end_io = end_io; |
1001 | dio->submit_io = submit_io; | ||
955 | dio->final_block_in_bio = -1; | 1002 | dio->final_block_in_bio = -1; |
956 | dio->next_block_for_io = -1; | 1003 | dio->next_block_for_io = -1; |
957 | 1004 | ||
@@ -1008,7 +1055,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1008 | } | 1055 | } |
1009 | } /* end iovec loop */ | 1056 | } /* end iovec loop */ |
1010 | 1057 | ||
1011 | if (ret == -ENOTBLK && (rw & WRITE)) { | 1058 | if (ret == -ENOTBLK) { |
1012 | /* | 1059 | /* |
1013 | * The remaining part of the request will be | 1060 | * The remaining part of the request will be |
1014 | * be handled by buffered I/O when we return | 1061 | * be handled by buffered I/O when we return |
@@ -1087,30 +1134,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1087 | return ret; | 1134 | return ret; |
1088 | } | 1135 | } |
1089 | 1136 | ||
1090 | /* | ||
1091 | * This is a library function for use by filesystem drivers. | ||
1092 | * | ||
1093 | * The locking rules are governed by the flags parameter: | ||
1094 | * - if the flags value contains DIO_LOCKING we use a fancy locking | ||
1095 | * scheme for dumb filesystems. | ||
1096 | * For writes this function is called under i_mutex and returns with | ||
1097 | * i_mutex held, for reads, i_mutex is not held on entry, but it is | ||
1098 | * taken and dropped again before returning. | ||
1099 | * For reads and writes i_alloc_sem is taken in shared mode and released | ||
1100 | * on I/O completion (which may happen asynchronously after returning to | ||
1101 | * the caller). | ||
1102 | * | ||
1103 | * - if the flags value does NOT contain DIO_LOCKING we don't use any | ||
1104 | * internal locking but rather rely on the filesystem to synchronize | ||
1105 | * direct I/O reads/writes versus each other and truncate. | ||
1106 | * For reads and writes both i_mutex and i_alloc_sem are not held on | ||
1107 | * entry and are never taken. | ||
1108 | */ | ||
1109 | ssize_t | 1137 | ssize_t |
1110 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1138 | __blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode, |
1111 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 1139 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
1112 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 1140 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
1113 | int flags) | 1141 | dio_submit_t submit_io, int flags) |
1114 | { | 1142 | { |
1115 | int seg; | 1143 | int seg; |
1116 | size_t size; | 1144 | size_t size; |
@@ -1197,11 +1225,49 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1197 | (end > i_size_read(inode))); | 1225 | (end > i_size_read(inode))); |
1198 | 1226 | ||
1199 | retval = direct_io_worker(rw, iocb, inode, iov, offset, | 1227 | retval = direct_io_worker(rw, iocb, inode, iov, offset, |
1200 | nr_segs, blkbits, get_block, end_io, dio); | 1228 | nr_segs, blkbits, get_block, end_io, |
1229 | submit_io, dio); | ||
1230 | |||
1231 | out: | ||
1232 | return retval; | ||
1233 | } | ||
1234 | EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc); | ||
1235 | |||
1236 | /* | ||
1237 | * This is a library function for use by filesystem drivers. | ||
1238 | * | ||
1239 | * The locking rules are governed by the flags parameter: | ||
1240 | * - if the flags value contains DIO_LOCKING we use a fancy locking | ||
1241 | * scheme for dumb filesystems. | ||
1242 | * For writes this function is called under i_mutex and returns with | ||
1243 | * i_mutex held, for reads, i_mutex is not held on entry, but it is | ||
1244 | * taken and dropped again before returning. | ||
1245 | * For reads and writes i_alloc_sem is taken in shared mode and released | ||
1246 | * on I/O completion (which may happen asynchronously after returning to | ||
1247 | * the caller). | ||
1248 | * | ||
1249 | * - if the flags value does NOT contain DIO_LOCKING we don't use any | ||
1250 | * internal locking but rather rely on the filesystem to synchronize | ||
1251 | * direct I/O reads/writes versus each other and truncate. | ||
1252 | * For reads and writes both i_mutex and i_alloc_sem are not held on | ||
1253 | * entry and are never taken. | ||
1254 | */ | ||
1255 | ssize_t | ||
1256 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | ||
1257 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | ||
1258 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | ||
1259 | dio_submit_t submit_io, int flags) | ||
1260 | { | ||
1261 | ssize_t retval; | ||
1201 | 1262 | ||
1263 | retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov, | ||
1264 | offset, nr_segs, get_block, end_io, submit_io, flags); | ||
1202 | /* | 1265 | /* |
1203 | * In case of error extending write may have instantiated a few | 1266 | * In case of error extending write may have instantiated a few |
1204 | * blocks outside i_size. Trim these off again for DIO_LOCKING. | 1267 | * blocks outside i_size. Trim these off again for DIO_LOCKING. |
1268 | * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in | ||
1269 | * their own manner. This is a further example of where the old | ||
1270 | * truncate sequence is inadequate. | ||
1205 | * | 1271 | * |
1206 | * NOTE: filesystems with their own locking have to handle this | 1272 | * NOTE: filesystems with their own locking have to handle this |
1207 | * on their own. | 1273 | * on their own. |
@@ -1209,12 +1275,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1209 | if (flags & DIO_LOCKING) { | 1275 | if (flags & DIO_LOCKING) { |
1210 | if (unlikely((rw & WRITE) && retval < 0)) { | 1276 | if (unlikely((rw & WRITE) && retval < 0)) { |
1211 | loff_t isize = i_size_read(inode); | 1277 | loff_t isize = i_size_read(inode); |
1278 | loff_t end = offset + iov_length(iov, nr_segs); | ||
1279 | |||
1212 | if (end > isize) | 1280 | if (end > isize) |
1213 | vmtruncate(inode, isize); | 1281 | vmtruncate(inode, isize); |
1214 | } | 1282 | } |
1215 | } | 1283 | } |
1216 | 1284 | ||
1217 | out: | ||
1218 | return retval; | 1285 | return retval; |
1219 | } | 1286 | } |
1220 | EXPORT_SYMBOL(__blockdev_direct_IO); | 1287 | EXPORT_SYMBOL(__blockdev_direct_IO); |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 3bdddbcc785f..e8fcf4e2ed7d 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
@@ -274,7 +274,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file) | |||
274 | } | 274 | } |
275 | 275 | ||
276 | static int | 276 | static int |
277 | ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 277 | ecryptfs_fsync(struct file *file, int datasync) |
278 | { | 278 | { |
279 | return vfs_fsync(ecryptfs_file_to_lower(file), datasync); | 279 | return vfs_fsync(ecryptfs_file_to_lower(file), datasync); |
280 | } | 280 | } |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 65dee2f336ae..31ef5252f0fe 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -805,7 +805,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
805 | - (ia->ia_size & ~PAGE_CACHE_MASK)); | 805 | - (ia->ia_size & ~PAGE_CACHE_MASK)); |
806 | 806 | ||
807 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | 807 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { |
808 | rc = vmtruncate(inode, ia->ia_size); | 808 | rc = simple_setsize(inode, ia->ia_size); |
809 | if (rc) | 809 | if (rc) |
810 | goto out; | 810 | goto out; |
811 | lower_ia->ia_size = ia->ia_size; | 811 | lower_ia->ia_size = ia->ia_size; |
@@ -830,7 +830,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
830 | goto out; | 830 | goto out; |
831 | } | 831 | } |
832 | } | 832 | } |
833 | vmtruncate(inode, ia->ia_size); | 833 | simple_setsize(inode, ia->ia_size); |
834 | rc = ecryptfs_write_inode_size_to_metadata(inode); | 834 | rc = ecryptfs_write_inode_size_to_metadata(inode); |
835 | if (rc) { | 835 | if (rc) { |
836 | printk(KERN_ERR "Problem with " | 836 | printk(KERN_ERR "Problem with " |
@@ -242,9 +242,10 @@ static int __bprm_mm_init(struct linux_binprm *bprm) | |||
242 | * use STACK_TOP because that can depend on attributes which aren't | 242 | * use STACK_TOP because that can depend on attributes which aren't |
243 | * configured yet. | 243 | * configured yet. |
244 | */ | 244 | */ |
245 | BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); | ||
245 | vma->vm_end = STACK_TOP_MAX; | 246 | vma->vm_end = STACK_TOP_MAX; |
246 | vma->vm_start = vma->vm_end - PAGE_SIZE; | 247 | vma->vm_start = vma->vm_end - PAGE_SIZE; |
247 | vma->vm_flags = VM_STACK_FLAGS; | 248 | vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; |
248 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); | 249 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); |
249 | INIT_LIST_HEAD(&vma->anon_vma_chain); | 250 | INIT_LIST_HEAD(&vma->anon_vma_chain); |
250 | err = insert_vm_struct(mm, vma); | 251 | err = insert_vm_struct(mm, vma); |
@@ -616,6 +617,7 @@ int setup_arg_pages(struct linux_binprm *bprm, | |||
616 | else if (executable_stack == EXSTACK_DISABLE_X) | 617 | else if (executable_stack == EXSTACK_DISABLE_X) |
617 | vm_flags &= ~VM_EXEC; | 618 | vm_flags &= ~VM_EXEC; |
618 | vm_flags |= mm->def_flags; | 619 | vm_flags |= mm->def_flags; |
620 | vm_flags |= VM_STACK_INCOMPLETE_SETUP; | ||
619 | 621 | ||
620 | ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end, | 622 | ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end, |
621 | vm_flags); | 623 | vm_flags); |
@@ -630,6 +632,9 @@ int setup_arg_pages(struct linux_binprm *bprm, | |||
630 | goto out_unlock; | 632 | goto out_unlock; |
631 | } | 633 | } |
632 | 634 | ||
635 | /* mprotect_fixup is overkill to remove the temporary stack flags */ | ||
636 | vma->vm_flags &= ~VM_STACK_INCOMPLETE_SETUP; | ||
637 | |||
633 | stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */ | 638 | stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */ |
634 | stack_size = vma->vm_end - vma->vm_start; | 639 | stack_size = vma->vm_end - vma->vm_start; |
635 | /* | 640 | /* |
@@ -763,7 +768,6 @@ static int de_thread(struct task_struct *tsk) | |||
763 | struct signal_struct *sig = tsk->signal; | 768 | struct signal_struct *sig = tsk->signal; |
764 | struct sighand_struct *oldsighand = tsk->sighand; | 769 | struct sighand_struct *oldsighand = tsk->sighand; |
765 | spinlock_t *lock = &oldsighand->siglock; | 770 | spinlock_t *lock = &oldsighand->siglock; |
766 | int count; | ||
767 | 771 | ||
768 | if (thread_group_empty(tsk)) | 772 | if (thread_group_empty(tsk)) |
769 | goto no_thread_group; | 773 | goto no_thread_group; |
@@ -780,13 +784,13 @@ static int de_thread(struct task_struct *tsk) | |||
780 | spin_unlock_irq(lock); | 784 | spin_unlock_irq(lock); |
781 | return -EAGAIN; | 785 | return -EAGAIN; |
782 | } | 786 | } |
787 | |||
783 | sig->group_exit_task = tsk; | 788 | sig->group_exit_task = tsk; |
784 | zap_other_threads(tsk); | 789 | sig->notify_count = zap_other_threads(tsk); |
790 | if (!thread_group_leader(tsk)) | ||
791 | sig->notify_count--; | ||
785 | 792 | ||
786 | /* Account for the thread group leader hanging around: */ | 793 | while (sig->notify_count) { |
787 | count = thread_group_leader(tsk) ? 1 : 2; | ||
788 | sig->notify_count = count; | ||
789 | while (atomic_read(&sig->count) > count) { | ||
790 | __set_current_state(TASK_UNINTERRUPTIBLE); | 794 | __set_current_state(TASK_UNINTERRUPTIBLE); |
791 | spin_unlock_irq(lock); | 795 | spin_unlock_irq(lock); |
792 | schedule(); | 796 | schedule(); |
@@ -1657,12 +1661,15 @@ static int coredump_wait(int exit_code, struct core_state *core_state) | |||
1657 | struct task_struct *tsk = current; | 1661 | struct task_struct *tsk = current; |
1658 | struct mm_struct *mm = tsk->mm; | 1662 | struct mm_struct *mm = tsk->mm; |
1659 | struct completion *vfork_done; | 1663 | struct completion *vfork_done; |
1660 | int core_waiters; | 1664 | int core_waiters = -EBUSY; |
1661 | 1665 | ||
1662 | init_completion(&core_state->startup); | 1666 | init_completion(&core_state->startup); |
1663 | core_state->dumper.task = tsk; | 1667 | core_state->dumper.task = tsk; |
1664 | core_state->dumper.next = NULL; | 1668 | core_state->dumper.next = NULL; |
1665 | core_waiters = zap_threads(tsk, mm, core_state, exit_code); | 1669 | |
1670 | down_write(&mm->mmap_sem); | ||
1671 | if (!mm->core_state) | ||
1672 | core_waiters = zap_threads(tsk, mm, core_state, exit_code); | ||
1666 | up_write(&mm->mmap_sem); | 1673 | up_write(&mm->mmap_sem); |
1667 | 1674 | ||
1668 | if (unlikely(core_waiters < 0)) | 1675 | if (unlikely(core_waiters < 0)) |
@@ -1782,21 +1789,61 @@ static void wait_for_dump_helpers(struct file *file) | |||
1782 | } | 1789 | } |
1783 | 1790 | ||
1784 | 1791 | ||
1792 | /* | ||
1793 | * uhm_pipe_setup | ||
1794 | * helper function to customize the process used | ||
1795 | * to collect the core in userspace. Specifically | ||
1796 | * it sets up a pipe and installs it as fd 0 (stdin) | ||
1797 | * for the process. Returns 0 on success, or | ||
1798 | * PTR_ERR on failure. | ||
1799 | * Note that it also sets the core limit to 1. This | ||
1800 | * is a special value that we use to trap recursive | ||
1801 | * core dumps | ||
1802 | */ | ||
1803 | static int umh_pipe_setup(struct subprocess_info *info) | ||
1804 | { | ||
1805 | struct file *rp, *wp; | ||
1806 | struct fdtable *fdt; | ||
1807 | struct coredump_params *cp = (struct coredump_params *)info->data; | ||
1808 | struct files_struct *cf = current->files; | ||
1809 | |||
1810 | wp = create_write_pipe(0); | ||
1811 | if (IS_ERR(wp)) | ||
1812 | return PTR_ERR(wp); | ||
1813 | |||
1814 | rp = create_read_pipe(wp, 0); | ||
1815 | if (IS_ERR(rp)) { | ||
1816 | free_write_pipe(wp); | ||
1817 | return PTR_ERR(rp); | ||
1818 | } | ||
1819 | |||
1820 | cp->file = wp; | ||
1821 | |||
1822 | sys_close(0); | ||
1823 | fd_install(0, rp); | ||
1824 | spin_lock(&cf->file_lock); | ||
1825 | fdt = files_fdtable(cf); | ||
1826 | FD_SET(0, fdt->open_fds); | ||
1827 | FD_CLR(0, fdt->close_on_exec); | ||
1828 | spin_unlock(&cf->file_lock); | ||
1829 | |||
1830 | /* and disallow core files too */ | ||
1831 | current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; | ||
1832 | |||
1833 | return 0; | ||
1834 | } | ||
1835 | |||
1785 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) | 1836 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) |
1786 | { | 1837 | { |
1787 | struct core_state core_state; | 1838 | struct core_state core_state; |
1788 | char corename[CORENAME_MAX_SIZE + 1]; | 1839 | char corename[CORENAME_MAX_SIZE + 1]; |
1789 | struct mm_struct *mm = current->mm; | 1840 | struct mm_struct *mm = current->mm; |
1790 | struct linux_binfmt * binfmt; | 1841 | struct linux_binfmt * binfmt; |
1791 | struct inode * inode; | ||
1792 | const struct cred *old_cred; | 1842 | const struct cred *old_cred; |
1793 | struct cred *cred; | 1843 | struct cred *cred; |
1794 | int retval = 0; | 1844 | int retval = 0; |
1795 | int flag = 0; | 1845 | int flag = 0; |
1796 | int ispipe = 0; | 1846 | int ispipe; |
1797 | char **helper_argv = NULL; | ||
1798 | int helper_argc = 0; | ||
1799 | int dump_count = 0; | ||
1800 | static atomic_t core_dump_count = ATOMIC_INIT(0); | 1847 | static atomic_t core_dump_count = ATOMIC_INIT(0); |
1801 | struct coredump_params cprm = { | 1848 | struct coredump_params cprm = { |
1802 | .signr = signr, | 1849 | .signr = signr, |
@@ -1815,23 +1862,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1815 | binfmt = mm->binfmt; | 1862 | binfmt = mm->binfmt; |
1816 | if (!binfmt || !binfmt->core_dump) | 1863 | if (!binfmt || !binfmt->core_dump) |
1817 | goto fail; | 1864 | goto fail; |
1818 | 1865 | if (!__get_dumpable(cprm.mm_flags)) | |
1819 | cred = prepare_creds(); | ||
1820 | if (!cred) { | ||
1821 | retval = -ENOMEM; | ||
1822 | goto fail; | 1866 | goto fail; |
1823 | } | ||
1824 | 1867 | ||
1825 | down_write(&mm->mmap_sem); | 1868 | cred = prepare_creds(); |
1826 | /* | 1869 | if (!cred) |
1827 | * If another thread got here first, or we are not dumpable, bail out. | ||
1828 | */ | ||
1829 | if (mm->core_state || !__get_dumpable(cprm.mm_flags)) { | ||
1830 | up_write(&mm->mmap_sem); | ||
1831 | put_cred(cred); | ||
1832 | goto fail; | 1870 | goto fail; |
1833 | } | ||
1834 | |||
1835 | /* | 1871 | /* |
1836 | * We cannot trust fsuid as being the "true" uid of the | 1872 | * We cannot trust fsuid as being the "true" uid of the |
1837 | * process nor do we know its entire history. We only know it | 1873 | * process nor do we know its entire history. We only know it |
@@ -1844,10 +1880,8 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1844 | } | 1880 | } |
1845 | 1881 | ||
1846 | retval = coredump_wait(exit_code, &core_state); | 1882 | retval = coredump_wait(exit_code, &core_state); |
1847 | if (retval < 0) { | 1883 | if (retval < 0) |
1848 | put_cred(cred); | 1884 | goto fail_creds; |
1849 | goto fail; | ||
1850 | } | ||
1851 | 1885 | ||
1852 | old_cred = override_creds(cred); | 1886 | old_cred = override_creds(cred); |
1853 | 1887 | ||
@@ -1865,19 +1899,19 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1865 | ispipe = format_corename(corename, signr); | 1899 | ispipe = format_corename(corename, signr); |
1866 | unlock_kernel(); | 1900 | unlock_kernel(); |
1867 | 1901 | ||
1868 | if ((!ispipe) && (cprm.limit < binfmt->min_coredump)) | ||
1869 | goto fail_unlock; | ||
1870 | |||
1871 | if (ispipe) { | 1902 | if (ispipe) { |
1872 | if (cprm.limit == 0) { | 1903 | int dump_count; |
1904 | char **helper_argv; | ||
1905 | |||
1906 | if (cprm.limit == 1) { | ||
1873 | /* | 1907 | /* |
1874 | * Normally core limits are irrelevant to pipes, since | 1908 | * Normally core limits are irrelevant to pipes, since |
1875 | * we're not writing to the file system, but we use | 1909 | * we're not writing to the file system, but we use |
1876 | * cprm.limit of 0 here as a speacial value. Any | 1910 | * cprm.limit of 1 here as a speacial value. Any |
1877 | * non-zero limit gets set to RLIM_INFINITY below, but | 1911 | * non-1 limit gets set to RLIM_INFINITY below, but |
1878 | * a limit of 0 skips the dump. This is a consistent | 1912 | * a limit of 0 skips the dump. This is a consistent |
1879 | * way to catch recursive crashes. We can still crash | 1913 | * way to catch recursive crashes. We can still crash |
1880 | * if the core_pattern binary sets RLIM_CORE = !0 | 1914 | * if the core_pattern binary sets RLIM_CORE = !1 |
1881 | * but it runs as root, and can do lots of stupid things | 1915 | * but it runs as root, and can do lots of stupid things |
1882 | * Note that we use task_tgid_vnr here to grab the pid | 1916 | * Note that we use task_tgid_vnr here to grab the pid |
1883 | * of the process group leader. That way we get the | 1917 | * of the process group leader. That way we get the |
@@ -1885,11 +1919,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1885 | * core_pattern process dies. | 1919 | * core_pattern process dies. |
1886 | */ | 1920 | */ |
1887 | printk(KERN_WARNING | 1921 | printk(KERN_WARNING |
1888 | "Process %d(%s) has RLIMIT_CORE set to 0\n", | 1922 | "Process %d(%s) has RLIMIT_CORE set to 1\n", |
1889 | task_tgid_vnr(current), current->comm); | 1923 | task_tgid_vnr(current), current->comm); |
1890 | printk(KERN_WARNING "Aborting core\n"); | 1924 | printk(KERN_WARNING "Aborting core\n"); |
1891 | goto fail_unlock; | 1925 | goto fail_unlock; |
1892 | } | 1926 | } |
1927 | cprm.limit = RLIM_INFINITY; | ||
1893 | 1928 | ||
1894 | dump_count = atomic_inc_return(&core_dump_count); | 1929 | dump_count = atomic_inc_return(&core_dump_count); |
1895 | if (core_pipe_limit && (core_pipe_limit < dump_count)) { | 1930 | if (core_pipe_limit && (core_pipe_limit < dump_count)) { |
@@ -1899,71 +1934,74 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1899 | goto fail_dropcount; | 1934 | goto fail_dropcount; |
1900 | } | 1935 | } |
1901 | 1936 | ||
1902 | helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); | 1937 | helper_argv = argv_split(GFP_KERNEL, corename+1, NULL); |
1903 | if (!helper_argv) { | 1938 | if (!helper_argv) { |
1904 | printk(KERN_WARNING "%s failed to allocate memory\n", | 1939 | printk(KERN_WARNING "%s failed to allocate memory\n", |
1905 | __func__); | 1940 | __func__); |
1906 | goto fail_dropcount; | 1941 | goto fail_dropcount; |
1907 | } | 1942 | } |
1908 | 1943 | ||
1909 | cprm.limit = RLIM_INFINITY; | 1944 | retval = call_usermodehelper_fns(helper_argv[0], helper_argv, |
1910 | 1945 | NULL, UMH_WAIT_EXEC, umh_pipe_setup, | |
1911 | /* SIGPIPE can happen, but it's just never processed */ | 1946 | NULL, &cprm); |
1912 | if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL, | 1947 | argv_free(helper_argv); |
1913 | &cprm.file)) { | 1948 | if (retval) { |
1914 | printk(KERN_INFO "Core dump to %s pipe failed\n", | 1949 | printk(KERN_INFO "Core dump to %s pipe failed\n", |
1915 | corename); | 1950 | corename); |
1916 | goto fail_dropcount; | 1951 | goto close_fail; |
1917 | } | 1952 | } |
1918 | } else | 1953 | } else { |
1954 | struct inode *inode; | ||
1955 | |||
1956 | if (cprm.limit < binfmt->min_coredump) | ||
1957 | goto fail_unlock; | ||
1958 | |||
1919 | cprm.file = filp_open(corename, | 1959 | cprm.file = filp_open(corename, |
1920 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, | 1960 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, |
1921 | 0600); | 1961 | 0600); |
1922 | if (IS_ERR(cprm.file)) | 1962 | if (IS_ERR(cprm.file)) |
1923 | goto fail_dropcount; | 1963 | goto fail_unlock; |
1924 | inode = cprm.file->f_path.dentry->d_inode; | ||
1925 | if (inode->i_nlink > 1) | ||
1926 | goto close_fail; /* multiple links - don't dump */ | ||
1927 | if (!ispipe && d_unhashed(cprm.file->f_path.dentry)) | ||
1928 | goto close_fail; | ||
1929 | |||
1930 | /* AK: actually i see no reason to not allow this for named pipes etc., | ||
1931 | but keep the previous behaviour for now. */ | ||
1932 | if (!ispipe && !S_ISREG(inode->i_mode)) | ||
1933 | goto close_fail; | ||
1934 | /* | ||
1935 | * Dont allow local users get cute and trick others to coredump | ||
1936 | * into their pre-created files: | ||
1937 | * Note, this is not relevant for pipes | ||
1938 | */ | ||
1939 | if (!ispipe && (inode->i_uid != current_fsuid())) | ||
1940 | goto close_fail; | ||
1941 | if (!cprm.file->f_op) | ||
1942 | goto close_fail; | ||
1943 | if (!cprm.file->f_op->write) | ||
1944 | goto close_fail; | ||
1945 | if (!ispipe && | ||
1946 | do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0) | ||
1947 | goto close_fail; | ||
1948 | 1964 | ||
1949 | retval = binfmt->core_dump(&cprm); | 1965 | inode = cprm.file->f_path.dentry->d_inode; |
1966 | if (inode->i_nlink > 1) | ||
1967 | goto close_fail; | ||
1968 | if (d_unhashed(cprm.file->f_path.dentry)) | ||
1969 | goto close_fail; | ||
1970 | /* | ||
1971 | * AK: actually i see no reason to not allow this for named | ||
1972 | * pipes etc, but keep the previous behaviour for now. | ||
1973 | */ | ||
1974 | if (!S_ISREG(inode->i_mode)) | ||
1975 | goto close_fail; | ||
1976 | /* | ||
1977 | * Dont allow local users get cute and trick others to coredump | ||
1978 | * into their pre-created files. | ||
1979 | */ | ||
1980 | if (inode->i_uid != current_fsuid()) | ||
1981 | goto close_fail; | ||
1982 | if (!cprm.file->f_op || !cprm.file->f_op->write) | ||
1983 | goto close_fail; | ||
1984 | if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) | ||
1985 | goto close_fail; | ||
1986 | } | ||
1950 | 1987 | ||
1988 | retval = binfmt->core_dump(&cprm); | ||
1951 | if (retval) | 1989 | if (retval) |
1952 | current->signal->group_exit_code |= 0x80; | 1990 | current->signal->group_exit_code |= 0x80; |
1953 | close_fail: | 1991 | |
1954 | if (ispipe && core_pipe_limit) | 1992 | if (ispipe && core_pipe_limit) |
1955 | wait_for_dump_helpers(cprm.file); | 1993 | wait_for_dump_helpers(cprm.file); |
1956 | filp_close(cprm.file, NULL); | 1994 | close_fail: |
1995 | if (cprm.file) | ||
1996 | filp_close(cprm.file, NULL); | ||
1957 | fail_dropcount: | 1997 | fail_dropcount: |
1958 | if (dump_count) | 1998 | if (ispipe) |
1959 | atomic_dec(&core_dump_count); | 1999 | atomic_dec(&core_dump_count); |
1960 | fail_unlock: | 2000 | fail_unlock: |
1961 | if (helper_argv) | 2001 | coredump_finish(mm); |
1962 | argv_free(helper_argv); | ||
1963 | |||
1964 | revert_creds(old_cred); | 2002 | revert_creds(old_cred); |
2003 | fail_creds: | ||
1965 | put_cred(cred); | 2004 | put_cred(cred); |
1966 | coredump_finish(mm); | ||
1967 | fail: | 2005 | fail: |
1968 | return; | 2006 | return; |
1969 | } | 2007 | } |
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 4cfab1cc75c0..d91e9d829bc1 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c | |||
@@ -608,7 +608,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent) | |||
608 | de->inode_no = cpu_to_le64(parent->i_ino); | 608 | de->inode_no = cpu_to_le64(parent->i_ino); |
609 | memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR)); | 609 | memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR)); |
610 | exofs_set_de_type(de, inode); | 610 | exofs_set_de_type(de, inode); |
611 | kunmap_atomic(page, KM_USER0); | 611 | kunmap_atomic(kaddr, KM_USER0); |
612 | err = exofs_commit_chunk(page, 0, chunk_size); | 612 | err = exofs_commit_chunk(page, 0, chunk_size); |
613 | fail: | 613 | fail: |
614 | page_cache_release(page); | 614 | page_cache_release(page); |
diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 839b9dc1e70f..fef6899be397 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c | |||
@@ -40,12 +40,11 @@ static int exofs_release_file(struct inode *inode, struct file *filp) | |||
40 | return 0; | 40 | return 0; |
41 | } | 41 | } |
42 | 42 | ||
43 | static int exofs_file_fsync(struct file *filp, struct dentry *dentry, | 43 | static int exofs_file_fsync(struct file *filp, int datasync) |
44 | int datasync) | ||
45 | { | 44 | { |
46 | int ret; | 45 | int ret; |
47 | struct address_space *mapping = filp->f_mapping; | 46 | struct address_space *mapping = filp->f_mapping; |
48 | struct inode *inode = dentry->d_inode; | 47 | struct inode *inode = mapping->host; |
49 | struct super_block *sb; | 48 | struct super_block *sb; |
50 | 49 | ||
51 | ret = filemap_write_and_wait(mapping); | 50 | ret = filemap_write_and_wait(mapping); |
@@ -66,7 +65,7 @@ static int exofs_file_fsync(struct file *filp, struct dentry *dentry, | |||
66 | 65 | ||
67 | static int exofs_flush(struct file *file, fl_owner_t id) | 66 | static int exofs_flush(struct file *file, fl_owner_t id) |
68 | { | 67 | { |
69 | exofs_file_fsync(file, file->f_path.dentry, 1); | 68 | exofs_file_fsync(file, 1); |
70 | /* TODO: Flush the OSD target */ | 69 | /* TODO: Flush the OSD target */ |
71 | return 0; | 70 | return 0; |
72 | } | 71 | } |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index d7c6afa79754..4bb6ef822e46 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -755,6 +755,21 @@ static int exofs_write_end(struct file *file, struct address_space *mapping, | |||
755 | return ret; | 755 | return ret; |
756 | } | 756 | } |
757 | 757 | ||
758 | static int exofs_releasepage(struct page *page, gfp_t gfp) | ||
759 | { | ||
760 | EXOFS_DBGMSG("page 0x%lx\n", page->index); | ||
761 | WARN_ON(1); | ||
762 | return try_to_free_buffers(page); | ||
763 | } | ||
764 | |||
765 | static void exofs_invalidatepage(struct page *page, unsigned long offset) | ||
766 | { | ||
767 | EXOFS_DBGMSG("page_has_buffers=>%d\n", page_has_buffers(page)); | ||
768 | WARN_ON(1); | ||
769 | |||
770 | block_invalidatepage(page, offset); | ||
771 | } | ||
772 | |||
758 | const struct address_space_operations exofs_aops = { | 773 | const struct address_space_operations exofs_aops = { |
759 | .readpage = exofs_readpage, | 774 | .readpage = exofs_readpage, |
760 | .readpages = exofs_readpages, | 775 | .readpages = exofs_readpages, |
@@ -762,6 +777,21 @@ const struct address_space_operations exofs_aops = { | |||
762 | .writepages = exofs_writepages, | 777 | .writepages = exofs_writepages, |
763 | .write_begin = exofs_write_begin_export, | 778 | .write_begin = exofs_write_begin_export, |
764 | .write_end = exofs_write_end, | 779 | .write_end = exofs_write_end, |
780 | .releasepage = exofs_releasepage, | ||
781 | .set_page_dirty = __set_page_dirty_nobuffers, | ||
782 | .invalidatepage = exofs_invalidatepage, | ||
783 | |||
784 | /* Not implemented Yet */ | ||
785 | .bmap = NULL, /* TODO: use osd's OSD_ACT_READ_MAP */ | ||
786 | .direct_IO = NULL, /* TODO: Should be trivial to do */ | ||
787 | |||
788 | /* With these NULL has special meaning or default is not exported */ | ||
789 | .sync_page = NULL, | ||
790 | .get_xip_mem = NULL, | ||
791 | .migratepage = NULL, | ||
792 | .launder_page = NULL, | ||
793 | .is_partially_uptodate = NULL, | ||
794 | .error_remove_page = NULL, | ||
765 | }; | 795 | }; |
766 | 796 | ||
767 | /****************************************************************************** | 797 | /****************************************************************************** |
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 0b038e47ad2f..52b34f1d2738 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
@@ -122,7 +122,6 @@ extern int ext2_write_inode (struct inode *, struct writeback_control *); | |||
122 | extern void ext2_delete_inode (struct inode *); | 122 | extern void ext2_delete_inode (struct inode *); |
123 | extern int ext2_sync_inode (struct inode *); | 123 | extern int ext2_sync_inode (struct inode *); |
124 | extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); | 124 | extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); |
125 | extern void ext2_truncate (struct inode *); | ||
126 | extern int ext2_setattr (struct dentry *, struct iattr *); | 125 | extern int ext2_setattr (struct dentry *, struct iattr *); |
127 | extern void ext2_set_inode_flags(struct inode *inode); | 126 | extern void ext2_set_inode_flags(struct inode *inode); |
128 | extern void ext2_get_inode_flags(struct ext2_inode_info *); | 127 | extern void ext2_get_inode_flags(struct ext2_inode_info *); |
@@ -155,7 +154,7 @@ extern void ext2_write_super (struct super_block *); | |||
155 | extern const struct file_operations ext2_dir_operations; | 154 | extern const struct file_operations ext2_dir_operations; |
156 | 155 | ||
157 | /* file.c */ | 156 | /* file.c */ |
158 | extern int ext2_fsync(struct file *file, struct dentry *dentry, int datasync); | 157 | extern int ext2_fsync(struct file *file, int datasync); |
159 | extern const struct inode_operations ext2_file_inode_operations; | 158 | extern const struct inode_operations ext2_file_inode_operations; |
160 | extern const struct file_operations ext2_file_operations; | 159 | extern const struct file_operations ext2_file_operations; |
161 | extern const struct file_operations ext2_xip_file_operations; | 160 | extern const struct file_operations ext2_xip_file_operations; |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 5d198d0697fb..49eec9456c5b 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -40,13 +40,13 @@ static int ext2_release_file (struct inode * inode, struct file * filp) | |||
40 | return 0; | 40 | return 0; |
41 | } | 41 | } |
42 | 42 | ||
43 | int ext2_fsync(struct file *file, struct dentry *dentry, int datasync) | 43 | int ext2_fsync(struct file *file, int datasync) |
44 | { | 44 | { |
45 | int ret; | 45 | int ret; |
46 | struct super_block *sb = dentry->d_inode->i_sb; | 46 | struct super_block *sb = file->f_mapping->host->i_sb; |
47 | struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; | 47 | struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; |
48 | 48 | ||
49 | ret = simple_fsync(file, dentry, datasync); | 49 | ret = generic_file_fsync(file, datasync); |
50 | if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) { | 50 | if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) { |
51 | /* We don't really know where the IO error happened... */ | 51 | /* We don't really know where the IO error happened... */ |
52 | ext2_error(sb, __func__, | 52 | ext2_error(sb, __func__, |
@@ -95,7 +95,6 @@ const struct file_operations ext2_xip_file_operations = { | |||
95 | #endif | 95 | #endif |
96 | 96 | ||
97 | const struct inode_operations ext2_file_inode_operations = { | 97 | const struct inode_operations ext2_file_inode_operations = { |
98 | .truncate = ext2_truncate, | ||
99 | #ifdef CONFIG_EXT2_FS_XATTR | 98 | #ifdef CONFIG_EXT2_FS_XATTR |
100 | .setxattr = generic_setxattr, | 99 | .setxattr = generic_setxattr, |
101 | .getxattr = generic_getxattr, | 100 | .getxattr = generic_getxattr, |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 527c46d9bc1f..19214435b752 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -54,6 +54,18 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode) | |||
54 | inode->i_blocks - ea_blocks == 0); | 54 | inode->i_blocks - ea_blocks == 0); |
55 | } | 55 | } |
56 | 56 | ||
57 | static void ext2_truncate_blocks(struct inode *inode, loff_t offset); | ||
58 | |||
59 | static void ext2_write_failed(struct address_space *mapping, loff_t to) | ||
60 | { | ||
61 | struct inode *inode = mapping->host; | ||
62 | |||
63 | if (to > inode->i_size) { | ||
64 | truncate_pagecache(inode, to, inode->i_size); | ||
65 | ext2_truncate_blocks(inode, inode->i_size); | ||
66 | } | ||
67 | } | ||
68 | |||
57 | /* | 69 | /* |
58 | * Called at the last iput() if i_nlink is zero. | 70 | * Called at the last iput() if i_nlink is zero. |
59 | */ | 71 | */ |
@@ -71,7 +83,7 @@ void ext2_delete_inode (struct inode * inode) | |||
71 | 83 | ||
72 | inode->i_size = 0; | 84 | inode->i_size = 0; |
73 | if (inode->i_blocks) | 85 | if (inode->i_blocks) |
74 | ext2_truncate (inode); | 86 | ext2_truncate_blocks(inode, 0); |
75 | ext2_free_inode (inode); | 87 | ext2_free_inode (inode); |
76 | 88 | ||
77 | return; | 89 | return; |
@@ -757,8 +769,8 @@ int __ext2_write_begin(struct file *file, struct address_space *mapping, | |||
757 | loff_t pos, unsigned len, unsigned flags, | 769 | loff_t pos, unsigned len, unsigned flags, |
758 | struct page **pagep, void **fsdata) | 770 | struct page **pagep, void **fsdata) |
759 | { | 771 | { |
760 | return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 772 | return block_write_begin_newtrunc(file, mapping, pos, len, flags, |
761 | ext2_get_block); | 773 | pagep, fsdata, ext2_get_block); |
762 | } | 774 | } |
763 | 775 | ||
764 | static int | 776 | static int |
@@ -766,8 +778,25 @@ ext2_write_begin(struct file *file, struct address_space *mapping, | |||
766 | loff_t pos, unsigned len, unsigned flags, | 778 | loff_t pos, unsigned len, unsigned flags, |
767 | struct page **pagep, void **fsdata) | 779 | struct page **pagep, void **fsdata) |
768 | { | 780 | { |
781 | int ret; | ||
782 | |||
769 | *pagep = NULL; | 783 | *pagep = NULL; |
770 | return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata); | 784 | ret = __ext2_write_begin(file, mapping, pos, len, flags, pagep, fsdata); |
785 | if (ret < 0) | ||
786 | ext2_write_failed(mapping, pos + len); | ||
787 | return ret; | ||
788 | } | ||
789 | |||
790 | static int ext2_write_end(struct file *file, struct address_space *mapping, | ||
791 | loff_t pos, unsigned len, unsigned copied, | ||
792 | struct page *page, void *fsdata) | ||
793 | { | ||
794 | int ret; | ||
795 | |||
796 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | ||
797 | if (ret < len) | ||
798 | ext2_write_failed(mapping, pos + len); | ||
799 | return ret; | ||
771 | } | 800 | } |
772 | 801 | ||
773 | static int | 802 | static int |
@@ -775,13 +804,18 @@ ext2_nobh_write_begin(struct file *file, struct address_space *mapping, | |||
775 | loff_t pos, unsigned len, unsigned flags, | 804 | loff_t pos, unsigned len, unsigned flags, |
776 | struct page **pagep, void **fsdata) | 805 | struct page **pagep, void **fsdata) |
777 | { | 806 | { |
807 | int ret; | ||
808 | |||
778 | /* | 809 | /* |
779 | * Dir-in-pagecache still uses ext2_write_begin. Would have to rework | 810 | * Dir-in-pagecache still uses ext2_write_begin. Would have to rework |
780 | * directory handling code to pass around offsets rather than struct | 811 | * directory handling code to pass around offsets rather than struct |
781 | * pages in order to make this work easily. | 812 | * pages in order to make this work easily. |
782 | */ | 813 | */ |
783 | return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 814 | ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, pagep, |
784 | ext2_get_block); | 815 | fsdata, ext2_get_block); |
816 | if (ret < 0) | ||
817 | ext2_write_failed(mapping, pos + len); | ||
818 | return ret; | ||
785 | } | 819 | } |
786 | 820 | ||
787 | static int ext2_nobh_writepage(struct page *page, | 821 | static int ext2_nobh_writepage(struct page *page, |
@@ -800,10 +834,15 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
800 | loff_t offset, unsigned long nr_segs) | 834 | loff_t offset, unsigned long nr_segs) |
801 | { | 835 | { |
802 | struct file *file = iocb->ki_filp; | 836 | struct file *file = iocb->ki_filp; |
803 | struct inode *inode = file->f_mapping->host; | 837 | struct address_space *mapping = file->f_mapping; |
804 | 838 | struct inode *inode = mapping->host; | |
805 | return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 839 | ssize_t ret; |
806 | offset, nr_segs, ext2_get_block, NULL); | 840 | |
841 | ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev, | ||
842 | iov, offset, nr_segs, ext2_get_block, NULL); | ||
843 | if (ret < 0 && (rw & WRITE)) | ||
844 | ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); | ||
845 | return ret; | ||
807 | } | 846 | } |
808 | 847 | ||
809 | static int | 848 | static int |
@@ -818,7 +857,7 @@ const struct address_space_operations ext2_aops = { | |||
818 | .writepage = ext2_writepage, | 857 | .writepage = ext2_writepage, |
819 | .sync_page = block_sync_page, | 858 | .sync_page = block_sync_page, |
820 | .write_begin = ext2_write_begin, | 859 | .write_begin = ext2_write_begin, |
821 | .write_end = generic_write_end, | 860 | .write_end = ext2_write_end, |
822 | .bmap = ext2_bmap, | 861 | .bmap = ext2_bmap, |
823 | .direct_IO = ext2_direct_IO, | 862 | .direct_IO = ext2_direct_IO, |
824 | .writepages = ext2_writepages, | 863 | .writepages = ext2_writepages, |
@@ -1027,7 +1066,7 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de | |||
1027 | ext2_free_data(inode, p, q); | 1066 | ext2_free_data(inode, p, q); |
1028 | } | 1067 | } |
1029 | 1068 | ||
1030 | void ext2_truncate(struct inode *inode) | 1069 | static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) |
1031 | { | 1070 | { |
1032 | __le32 *i_data = EXT2_I(inode)->i_data; | 1071 | __le32 *i_data = EXT2_I(inode)->i_data; |
1033 | struct ext2_inode_info *ei = EXT2_I(inode); | 1072 | struct ext2_inode_info *ei = EXT2_I(inode); |
@@ -1039,27 +1078,8 @@ void ext2_truncate(struct inode *inode) | |||
1039 | int n; | 1078 | int n; |
1040 | long iblock; | 1079 | long iblock; |
1041 | unsigned blocksize; | 1080 | unsigned blocksize; |
1042 | |||
1043 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | ||
1044 | S_ISLNK(inode->i_mode))) | ||
1045 | return; | ||
1046 | if (ext2_inode_is_fast_symlink(inode)) | ||
1047 | return; | ||
1048 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
1049 | return; | ||
1050 | |||
1051 | blocksize = inode->i_sb->s_blocksize; | 1081 | blocksize = inode->i_sb->s_blocksize; |
1052 | iblock = (inode->i_size + blocksize-1) | 1082 | iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); |
1053 | >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); | ||
1054 | |||
1055 | if (mapping_is_xip(inode->i_mapping)) | ||
1056 | xip_truncate_page(inode->i_mapping, inode->i_size); | ||
1057 | else if (test_opt(inode->i_sb, NOBH)) | ||
1058 | nobh_truncate_page(inode->i_mapping, | ||
1059 | inode->i_size, ext2_get_block); | ||
1060 | else | ||
1061 | block_truncate_page(inode->i_mapping, | ||
1062 | inode->i_size, ext2_get_block); | ||
1063 | 1083 | ||
1064 | n = ext2_block_to_path(inode, iblock, offsets, NULL); | 1084 | n = ext2_block_to_path(inode, iblock, offsets, NULL); |
1065 | if (n == 0) | 1085 | if (n == 0) |
@@ -1127,6 +1147,62 @@ do_indirects: | |||
1127 | ext2_discard_reservation(inode); | 1147 | ext2_discard_reservation(inode); |
1128 | 1148 | ||
1129 | mutex_unlock(&ei->truncate_mutex); | 1149 | mutex_unlock(&ei->truncate_mutex); |
1150 | } | ||
1151 | |||
1152 | static void ext2_truncate_blocks(struct inode *inode, loff_t offset) | ||
1153 | { | ||
1154 | /* | ||
1155 | * XXX: it seems like a bug here that we don't allow | ||
1156 | * IS_APPEND inode to have blocks-past-i_size trimmed off. | ||
1157 | * review and fix this. | ||
1158 | * | ||
1159 | * Also would be nice to be able to handle IO errors and such, | ||
1160 | * but that's probably too much to ask. | ||
1161 | */ | ||
1162 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | ||
1163 | S_ISLNK(inode->i_mode))) | ||
1164 | return; | ||
1165 | if (ext2_inode_is_fast_symlink(inode)) | ||
1166 | return; | ||
1167 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
1168 | return; | ||
1169 | __ext2_truncate_blocks(inode, offset); | ||
1170 | } | ||
1171 | |||
1172 | int ext2_setsize(struct inode *inode, loff_t newsize) | ||
1173 | { | ||
1174 | loff_t oldsize; | ||
1175 | int error; | ||
1176 | |||
1177 | error = inode_newsize_ok(inode, newsize); | ||
1178 | if (error) | ||
1179 | return error; | ||
1180 | |||
1181 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | ||
1182 | S_ISLNK(inode->i_mode))) | ||
1183 | return -EINVAL; | ||
1184 | if (ext2_inode_is_fast_symlink(inode)) | ||
1185 | return -EINVAL; | ||
1186 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
1187 | return -EPERM; | ||
1188 | |||
1189 | if (mapping_is_xip(inode->i_mapping)) | ||
1190 | error = xip_truncate_page(inode->i_mapping, newsize); | ||
1191 | else if (test_opt(inode->i_sb, NOBH)) | ||
1192 | error = nobh_truncate_page(inode->i_mapping, | ||
1193 | newsize, ext2_get_block); | ||
1194 | else | ||
1195 | error = block_truncate_page(inode->i_mapping, | ||
1196 | newsize, ext2_get_block); | ||
1197 | if (error) | ||
1198 | return error; | ||
1199 | |||
1200 | oldsize = inode->i_size; | ||
1201 | i_size_write(inode, newsize); | ||
1202 | truncate_pagecache(inode, oldsize, newsize); | ||
1203 | |||
1204 | __ext2_truncate_blocks(inode, newsize); | ||
1205 | |||
1130 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; | 1206 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; |
1131 | if (inode_needs_sync(inode)) { | 1207 | if (inode_needs_sync(inode)) { |
1132 | sync_mapping_buffers(inode->i_mapping); | 1208 | sync_mapping_buffers(inode->i_mapping); |
@@ -1134,6 +1210,8 @@ do_indirects: | |||
1134 | } else { | 1210 | } else { |
1135 | mark_inode_dirty(inode); | 1211 | mark_inode_dirty(inode); |
1136 | } | 1212 | } |
1213 | |||
1214 | return 0; | ||
1137 | } | 1215 | } |
1138 | 1216 | ||
1139 | static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino, | 1217 | static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino, |
@@ -1474,8 +1552,15 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr) | |||
1474 | if (error) | 1552 | if (error) |
1475 | return error; | 1553 | return error; |
1476 | } | 1554 | } |
1477 | error = inode_setattr(inode, iattr); | 1555 | if (iattr->ia_valid & ATTR_SIZE) { |
1478 | if (!error && (iattr->ia_valid & ATTR_MODE)) | 1556 | error = ext2_setsize(inode, iattr->ia_size); |
1557 | if (error) | ||
1558 | return error; | ||
1559 | } | ||
1560 | generic_setattr(inode, iattr); | ||
1561 | if (iattr->ia_valid & ATTR_MODE) | ||
1479 | error = ext2_acl_chmod(inode); | 1562 | error = ext2_acl_chmod(inode); |
1563 | mark_inode_dirty(inode); | ||
1564 | |||
1480 | return error; | 1565 | return error; |
1481 | } | 1566 | } |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 71e9eb1fa696..7ff43f4a59cd 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -119,6 +119,8 @@ static void ext2_put_super (struct super_block * sb) | |||
119 | int i; | 119 | int i; |
120 | struct ext2_sb_info *sbi = EXT2_SB(sb); | 120 | struct ext2_sb_info *sbi = EXT2_SB(sb); |
121 | 121 | ||
122 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | ||
123 | |||
122 | if (sb->s_dirt) | 124 | if (sb->s_dirt) |
123 | ext2_write_super(sb); | 125 | ext2_write_super(sb); |
124 | 126 | ||
@@ -1063,6 +1065,12 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) | |||
1063 | sb->s_op = &ext2_sops; | 1065 | sb->s_op = &ext2_sops; |
1064 | sb->s_export_op = &ext2_export_ops; | 1066 | sb->s_export_op = &ext2_export_ops; |
1065 | sb->s_xattr = ext2_xattr_handlers; | 1067 | sb->s_xattr = ext2_xattr_handlers; |
1068 | |||
1069 | #ifdef CONFIG_QUOTA | ||
1070 | sb->dq_op = &dquot_operations; | ||
1071 | sb->s_qcop = &dquot_quotactl_ops; | ||
1072 | #endif | ||
1073 | |||
1066 | root = ext2_iget(sb, EXT2_ROOT_INO); | 1074 | root = ext2_iget(sb, EXT2_ROOT_INO); |
1067 | if (IS_ERR(root)) { | 1075 | if (IS_ERR(root)) { |
1068 | ret = PTR_ERR(root); | 1076 | ret = PTR_ERR(root); |
@@ -1241,6 +1249,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1241 | spin_unlock(&sbi->s_lock); | 1249 | spin_unlock(&sbi->s_lock); |
1242 | return 0; | 1250 | return 0; |
1243 | } | 1251 | } |
1252 | |||
1244 | /* | 1253 | /* |
1245 | * OK, we are remounting a valid rw partition rdonly, so set | 1254 | * OK, we are remounting a valid rw partition rdonly, so set |
1246 | * the rdonly flag and then mark the partition as valid again. | 1255 | * the rdonly flag and then mark the partition as valid again. |
@@ -1248,6 +1257,13 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1248 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 1257 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
1249 | es->s_mtime = cpu_to_le32(get_seconds()); | 1258 | es->s_mtime = cpu_to_le32(get_seconds()); |
1250 | spin_unlock(&sbi->s_lock); | 1259 | spin_unlock(&sbi->s_lock); |
1260 | |||
1261 | err = dquot_suspend(sb, -1); | ||
1262 | if (err < 0) { | ||
1263 | spin_lock(&sbi->s_lock); | ||
1264 | goto restore_opts; | ||
1265 | } | ||
1266 | |||
1251 | ext2_sync_super(sb, es, 1); | 1267 | ext2_sync_super(sb, es, 1); |
1252 | } else { | 1268 | } else { |
1253 | __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, | 1269 | __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, |
@@ -1269,8 +1285,12 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1269 | if (!ext2_setup_super (sb, es, 0)) | 1285 | if (!ext2_setup_super (sb, es, 0)) |
1270 | sb->s_flags &= ~MS_RDONLY; | 1286 | sb->s_flags &= ~MS_RDONLY; |
1271 | spin_unlock(&sbi->s_lock); | 1287 | spin_unlock(&sbi->s_lock); |
1288 | |||
1272 | ext2_write_super(sb); | 1289 | ext2_write_super(sb); |
1290 | |||
1291 | dquot_resume(sb, -1); | ||
1273 | } | 1292 | } |
1293 | |||
1274 | return 0; | 1294 | return 0; |
1275 | restore_opts: | 1295 | restore_opts: |
1276 | sbi->s_mount_opt = old_opts.s_mount_opt; | 1296 | sbi->s_mount_opt = old_opts.s_mount_opt; |
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 373fa90c796a..e2e72c367cf6 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
@@ -297,7 +297,7 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
297 | kfree (old); | 297 | kfree (old); |
298 | } | 298 | } |
299 | if (!parent) | 299 | if (!parent) |
300 | root->rb_node = NULL; | 300 | *root = RB_ROOT; |
301 | else if (parent->rb_left == n) | 301 | else if (parent->rb_left == n) |
302 | parent->rb_left = NULL; | 302 | parent->rb_left = NULL; |
303 | else if (parent->rb_right == n) | 303 | else if (parent->rb_right == n) |
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index fcf7487734b6..d7e9f74dc3a6 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c | |||
@@ -43,9 +43,9 @@ | |||
43 | * inode to disk. | 43 | * inode to disk. |
44 | */ | 44 | */ |
45 | 45 | ||
46 | int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) | 46 | int ext3_sync_file(struct file *file, int datasync) |
47 | { | 47 | { |
48 | struct inode *inode = dentry->d_inode; | 48 | struct inode *inode = file->f_mapping->host; |
49 | struct ext3_inode_info *ei = EXT3_I(inode); | 49 | struct ext3_inode_info *ei = EXT3_I(inode); |
50 | journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; | 50 | journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; |
51 | int ret, needs_barrier = 0; | 51 | int ret, needs_barrier = 0; |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 0fc1293d0e96..6c953bb255e7 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -410,6 +410,8 @@ static void ext3_put_super (struct super_block * sb) | |||
410 | struct ext3_super_block *es = sbi->s_es; | 410 | struct ext3_super_block *es = sbi->s_es; |
411 | int i, err; | 411 | int i, err; |
412 | 412 | ||
413 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | ||
414 | |||
413 | lock_kernel(); | 415 | lock_kernel(); |
414 | 416 | ||
415 | ext3_xattr_put_super(sb); | 417 | ext3_xattr_put_super(sb); |
@@ -748,7 +750,7 @@ static int ext3_release_dquot(struct dquot *dquot); | |||
748 | static int ext3_mark_dquot_dirty(struct dquot *dquot); | 750 | static int ext3_mark_dquot_dirty(struct dquot *dquot); |
749 | static int ext3_write_info(struct super_block *sb, int type); | 751 | static int ext3_write_info(struct super_block *sb, int type); |
750 | static int ext3_quota_on(struct super_block *sb, int type, int format_id, | 752 | static int ext3_quota_on(struct super_block *sb, int type, int format_id, |
751 | char *path, int remount); | 753 | char *path); |
752 | static int ext3_quota_on_mount(struct super_block *sb, int type); | 754 | static int ext3_quota_on_mount(struct super_block *sb, int type); |
753 | static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, | 755 | static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, |
754 | size_t len, loff_t off); | 756 | size_t len, loff_t off); |
@@ -767,12 +769,12 @@ static const struct dquot_operations ext3_quota_operations = { | |||
767 | 769 | ||
768 | static const struct quotactl_ops ext3_qctl_operations = { | 770 | static const struct quotactl_ops ext3_qctl_operations = { |
769 | .quota_on = ext3_quota_on, | 771 | .quota_on = ext3_quota_on, |
770 | .quota_off = vfs_quota_off, | 772 | .quota_off = dquot_quota_off, |
771 | .quota_sync = vfs_quota_sync, | 773 | .quota_sync = dquot_quota_sync, |
772 | .get_info = vfs_get_dqinfo, | 774 | .get_info = dquot_get_dqinfo, |
773 | .set_info = vfs_set_dqinfo, | 775 | .set_info = dquot_set_dqinfo, |
774 | .get_dqblk = vfs_get_dqblk, | 776 | .get_dqblk = dquot_get_dqblk, |
775 | .set_dqblk = vfs_set_dqblk | 777 | .set_dqblk = dquot_set_dqblk |
776 | }; | 778 | }; |
777 | #endif | 779 | #endif |
778 | 780 | ||
@@ -1527,7 +1529,7 @@ static void ext3_orphan_cleanup (struct super_block * sb, | |||
1527 | /* Turn quotas off */ | 1529 | /* Turn quotas off */ |
1528 | for (i = 0; i < MAXQUOTAS; i++) { | 1530 | for (i = 0; i < MAXQUOTAS; i++) { |
1529 | if (sb_dqopt(sb)->files[i]) | 1531 | if (sb_dqopt(sb)->files[i]) |
1530 | vfs_quota_off(sb, i, 0); | 1532 | dquot_quota_off(sb, i); |
1531 | } | 1533 | } |
1532 | #endif | 1534 | #endif |
1533 | sb->s_flags = s_flags; /* Restore MS_RDONLY status */ | 1535 | sb->s_flags = s_flags; /* Restore MS_RDONLY status */ |
@@ -2551,6 +2553,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2551 | ext3_fsblk_t n_blocks_count = 0; | 2553 | ext3_fsblk_t n_blocks_count = 0; |
2552 | unsigned long old_sb_flags; | 2554 | unsigned long old_sb_flags; |
2553 | struct ext3_mount_options old_opts; | 2555 | struct ext3_mount_options old_opts; |
2556 | int enable_quota = 0; | ||
2554 | int err; | 2557 | int err; |
2555 | #ifdef CONFIG_QUOTA | 2558 | #ifdef CONFIG_QUOTA |
2556 | int i; | 2559 | int i; |
@@ -2597,6 +2600,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2597 | } | 2600 | } |
2598 | 2601 | ||
2599 | if (*flags & MS_RDONLY) { | 2602 | if (*flags & MS_RDONLY) { |
2603 | err = dquot_suspend(sb, -1); | ||
2604 | if (err < 0) | ||
2605 | goto restore_opts; | ||
2606 | |||
2600 | /* | 2607 | /* |
2601 | * First of all, the unconditional stuff we have to do | 2608 | * First of all, the unconditional stuff we have to do |
2602 | * to disable replay of the journal when we next remount | 2609 | * to disable replay of the journal when we next remount |
@@ -2651,6 +2658,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2651 | goto restore_opts; | 2658 | goto restore_opts; |
2652 | if (!ext3_setup_super (sb, es, 0)) | 2659 | if (!ext3_setup_super (sb, es, 0)) |
2653 | sb->s_flags &= ~MS_RDONLY; | 2660 | sb->s_flags &= ~MS_RDONLY; |
2661 | enable_quota = 1; | ||
2654 | } | 2662 | } |
2655 | } | 2663 | } |
2656 | #ifdef CONFIG_QUOTA | 2664 | #ifdef CONFIG_QUOTA |
@@ -2662,6 +2670,9 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2662 | #endif | 2670 | #endif |
2663 | unlock_super(sb); | 2671 | unlock_super(sb); |
2664 | unlock_kernel(); | 2672 | unlock_kernel(); |
2673 | |||
2674 | if (enable_quota) | ||
2675 | dquot_resume(sb, -1); | ||
2665 | return 0; | 2676 | return 0; |
2666 | restore_opts: | 2677 | restore_opts: |
2667 | sb->s_flags = old_sb_flags; | 2678 | sb->s_flags = old_sb_flags; |
@@ -2851,24 +2862,21 @@ static int ext3_write_info(struct super_block *sb, int type) | |||
2851 | */ | 2862 | */ |
2852 | static int ext3_quota_on_mount(struct super_block *sb, int type) | 2863 | static int ext3_quota_on_mount(struct super_block *sb, int type) |
2853 | { | 2864 | { |
2854 | return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], | 2865 | return dquot_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], |
2855 | EXT3_SB(sb)->s_jquota_fmt, type); | 2866 | EXT3_SB(sb)->s_jquota_fmt, type); |
2856 | } | 2867 | } |
2857 | 2868 | ||
2858 | /* | 2869 | /* |
2859 | * Standard function to be called on quota_on | 2870 | * Standard function to be called on quota_on |
2860 | */ | 2871 | */ |
2861 | static int ext3_quota_on(struct super_block *sb, int type, int format_id, | 2872 | static int ext3_quota_on(struct super_block *sb, int type, int format_id, |
2862 | char *name, int remount) | 2873 | char *name) |
2863 | { | 2874 | { |
2864 | int err; | 2875 | int err; |
2865 | struct path path; | 2876 | struct path path; |
2866 | 2877 | ||
2867 | if (!test_opt(sb, QUOTA)) | 2878 | if (!test_opt(sb, QUOTA)) |
2868 | return -EINVAL; | 2879 | return -EINVAL; |
2869 | /* When remounting, no checks are needed and in fact, name is NULL */ | ||
2870 | if (remount) | ||
2871 | return vfs_quota_on(sb, type, format_id, name, remount); | ||
2872 | 2880 | ||
2873 | err = kern_path(name, LOOKUP_FOLLOW, &path); | 2881 | err = kern_path(name, LOOKUP_FOLLOW, &path); |
2874 | if (err) | 2882 | if (err) |
@@ -2906,7 +2914,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, | |||
2906 | } | 2914 | } |
2907 | } | 2915 | } |
2908 | 2916 | ||
2909 | err = vfs_quota_on_path(sb, type, format_id, &path); | 2917 | err = dquot_quota_on_path(sb, type, format_id, &path); |
2910 | path_put(&path); | 2918 | path_put(&path); |
2911 | return err; | 2919 | return err; |
2912 | } | 2920 | } |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d2f37a5516c7..95b7594c76f9 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -591,14 +591,15 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
591 | ret = ext4_mb_new_blocks(handle, &ar, errp); | 591 | ret = ext4_mb_new_blocks(handle, &ar, errp); |
592 | if (count) | 592 | if (count) |
593 | *count = ar.len; | 593 | *count = ar.len; |
594 | |||
595 | /* | 594 | /* |
596 | * Account for the allocated meta blocks | 595 | * Account for the allocated meta blocks. We will never |
596 | * fail EDQUOT for metdata, but we do account for it. | ||
597 | */ | 597 | */ |
598 | if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { | 598 | if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { |
599 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 599 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
600 | EXT4_I(inode)->i_allocated_meta_blocks += ar.len; | 600 | EXT4_I(inode)->i_allocated_meta_blocks += ar.len; |
601 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 601 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
602 | dquot_alloc_block_nofail(inode, ar.len); | ||
602 | } | 603 | } |
603 | return ret; | 604 | return ret; |
604 | } | 605 | } |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 538c48655084..5b6973fbf1bd 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -72,9 +72,9 @@ static int add_system_zone(struct ext4_sb_info *sbi, | |||
72 | else if (start_blk >= (entry->start_blk + entry->count)) | 72 | else if (start_blk >= (entry->start_blk + entry->count)) |
73 | n = &(*n)->rb_right; | 73 | n = &(*n)->rb_right; |
74 | else { | 74 | else { |
75 | if (start_blk + count > (entry->start_blk + | 75 | if (start_blk + count > (entry->start_blk + |
76 | entry->count)) | 76 | entry->count)) |
77 | entry->count = (start_blk + count - | 77 | entry->count = (start_blk + count - |
78 | entry->start_blk); | 78 | entry->start_blk); |
79 | new_node = *n; | 79 | new_node = *n; |
80 | new_entry = rb_entry(new_node, struct ext4_system_zone, | 80 | new_entry = rb_entry(new_node, struct ext4_system_zone, |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 86cb6d86a048..ea5e6cb7e2a5 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -83,11 +83,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir, | |||
83 | error_msg = "inode out of bounds"; | 83 | error_msg = "inode out of bounds"; |
84 | 84 | ||
85 | if (error_msg != NULL) | 85 | if (error_msg != NULL) |
86 | __ext4_error(dir->i_sb, function, | 86 | ext4_error_inode(function, dir, |
87 | "bad entry in directory #%lu: %s - block=%llu" | 87 | "bad entry in directory: %s - block=%llu" |
88 | "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", | 88 | "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", |
89 | dir->i_ino, error_msg, | 89 | error_msg, (unsigned long long) bh->b_blocknr, |
90 | (unsigned long long) bh->b_blocknr, | ||
91 | (unsigned) (offset%bh->b_size), offset, | 90 | (unsigned) (offset%bh->b_size), offset, |
92 | le32_to_cpu(de->inode), | 91 | le32_to_cpu(de->inode), |
93 | rlen, de->name_len); | 92 | rlen, de->name_len); |
@@ -111,7 +110,7 @@ static int ext4_readdir(struct file *filp, | |||
111 | 110 | ||
112 | if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, | 111 | if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, |
113 | EXT4_FEATURE_COMPAT_DIR_INDEX) && | 112 | EXT4_FEATURE_COMPAT_DIR_INDEX) && |
114 | ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) || | 113 | ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || |
115 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) { | 114 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) { |
116 | err = ext4_dx_readdir(filp, dirent, filldir); | 115 | err = ext4_dx_readdir(filp, dirent, filldir); |
117 | if (err != ERR_BAD_DX_DIR) { | 116 | if (err != ERR_BAD_DX_DIR) { |
@@ -122,20 +121,20 @@ static int ext4_readdir(struct file *filp, | |||
122 | * We don't set the inode dirty flag since it's not | 121 | * We don't set the inode dirty flag since it's not |
123 | * critical that it get flushed back to the disk. | 122 | * critical that it get flushed back to the disk. |
124 | */ | 123 | */ |
125 | EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL; | 124 | ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX); |
126 | } | 125 | } |
127 | stored = 0; | 126 | stored = 0; |
128 | offset = filp->f_pos & (sb->s_blocksize - 1); | 127 | offset = filp->f_pos & (sb->s_blocksize - 1); |
129 | 128 | ||
130 | while (!error && !stored && filp->f_pos < inode->i_size) { | 129 | while (!error && !stored && filp->f_pos < inode->i_size) { |
131 | ext4_lblk_t blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); | 130 | struct ext4_map_blocks map; |
132 | struct buffer_head map_bh; | ||
133 | struct buffer_head *bh = NULL; | 131 | struct buffer_head *bh = NULL; |
134 | 132 | ||
135 | map_bh.b_state = 0; | 133 | map.m_lblk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); |
136 | err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0); | 134 | map.m_len = 1; |
135 | err = ext4_map_blocks(NULL, inode, &map, 0); | ||
137 | if (err > 0) { | 136 | if (err > 0) { |
138 | pgoff_t index = map_bh.b_blocknr >> | 137 | pgoff_t index = map.m_pblk >> |
139 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 138 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
140 | if (!ra_has_index(&filp->f_ra, index)) | 139 | if (!ra_has_index(&filp->f_ra, index)) |
141 | page_cache_sync_readahead( | 140 | page_cache_sync_readahead( |
@@ -143,7 +142,7 @@ static int ext4_readdir(struct file *filp, | |||
143 | &filp->f_ra, filp, | 142 | &filp->f_ra, filp, |
144 | index, 1); | 143 | index, 1); |
145 | filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; | 144 | filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
146 | bh = ext4_bread(NULL, inode, blk, 0, &err); | 145 | bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); |
147 | } | 146 | } |
148 | 147 | ||
149 | /* | 148 | /* |
@@ -152,9 +151,8 @@ static int ext4_readdir(struct file *filp, | |||
152 | */ | 151 | */ |
153 | if (!bh) { | 152 | if (!bh) { |
154 | if (!dir_has_error) { | 153 | if (!dir_has_error) { |
155 | ext4_error(sb, "directory #%lu " | 154 | EXT4_ERROR_INODE(inode, "directory " |
156 | "contains a hole at offset %Lu", | 155 | "contains a hole at offset %Lu", |
157 | inode->i_ino, | ||
158 | (unsigned long long) filp->f_pos); | 156 | (unsigned long long) filp->f_pos); |
159 | dir_has_error = 1; | 157 | dir_has_error = 1; |
160 | } | 158 | } |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bf938cf7c5f0..19a4de57128a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -29,6 +29,9 @@ | |||
29 | #include <linux/wait.h> | 29 | #include <linux/wait.h> |
30 | #include <linux/blockgroup_lock.h> | 30 | #include <linux/blockgroup_lock.h> |
31 | #include <linux/percpu_counter.h> | 31 | #include <linux/percpu_counter.h> |
32 | #ifdef __KERNEL__ | ||
33 | #include <linux/compat.h> | ||
34 | #endif | ||
32 | 35 | ||
33 | /* | 36 | /* |
34 | * The fourth extended filesystem constants/structures | 37 | * The fourth extended filesystem constants/structures |
@@ -54,10 +57,10 @@ | |||
54 | #endif | 57 | #endif |
55 | 58 | ||
56 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ | 59 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ |
57 | ext4_error_inode(__func__, (inode), (fmt), ## a); | 60 | ext4_error_inode(__func__, (inode), (fmt), ## a) |
58 | 61 | ||
59 | #define EXT4_ERROR_FILE(file, fmt, a...) \ | 62 | #define EXT4_ERROR_FILE(file, fmt, a...) \ |
60 | ext4_error_file(__func__, (file), (fmt), ## a); | 63 | ext4_error_file(__func__, (file), (fmt), ## a) |
61 | 64 | ||
62 | /* data type for block offset of block group */ | 65 | /* data type for block offset of block group */ |
63 | typedef int ext4_grpblk_t; | 66 | typedef int ext4_grpblk_t; |
@@ -72,7 +75,7 @@ typedef __u32 ext4_lblk_t; | |||
72 | typedef unsigned int ext4_group_t; | 75 | typedef unsigned int ext4_group_t; |
73 | 76 | ||
74 | /* | 77 | /* |
75 | * Flags used in mballoc's allocation_context flags field. | 78 | * Flags used in mballoc's allocation_context flags field. |
76 | * | 79 | * |
77 | * Also used to show what's going on for debugging purposes when the | 80 | * Also used to show what's going on for debugging purposes when the |
78 | * flag field is exported via the traceport interface | 81 | * flag field is exported via the traceport interface |
@@ -126,6 +129,29 @@ struct ext4_allocation_request { | |||
126 | }; | 129 | }; |
127 | 130 | ||
128 | /* | 131 | /* |
132 | * Logical to physical block mapping, used by ext4_map_blocks() | ||
133 | * | ||
134 | * This structure is used to pass requests into ext4_map_blocks() as | ||
135 | * well as to store the information returned by ext4_map_blocks(). It | ||
136 | * takes less room on the stack than a struct buffer_head. | ||
137 | */ | ||
138 | #define EXT4_MAP_NEW (1 << BH_New) | ||
139 | #define EXT4_MAP_MAPPED (1 << BH_Mapped) | ||
140 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) | ||
141 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) | ||
142 | #define EXT4_MAP_UNINIT (1 << BH_Uninit) | ||
143 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ | ||
144 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ | ||
145 | EXT4_MAP_UNINIT) | ||
146 | |||
147 | struct ext4_map_blocks { | ||
148 | ext4_fsblk_t m_pblk; | ||
149 | ext4_lblk_t m_lblk; | ||
150 | unsigned int m_len; | ||
151 | unsigned int m_flags; | ||
152 | }; | ||
153 | |||
154 | /* | ||
129 | * For delayed allocation tracking | 155 | * For delayed allocation tracking |
130 | */ | 156 | */ |
131 | struct mpage_da_data { | 157 | struct mpage_da_data { |
@@ -321,6 +347,83 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) | |||
321 | return flags & EXT4_OTHER_FLMASK; | 347 | return flags & EXT4_OTHER_FLMASK; |
322 | } | 348 | } |
323 | 349 | ||
350 | /* | ||
351 | * Inode flags used for atomic set/get | ||
352 | */ | ||
353 | enum { | ||
354 | EXT4_INODE_SECRM = 0, /* Secure deletion */ | ||
355 | EXT4_INODE_UNRM = 1, /* Undelete */ | ||
356 | EXT4_INODE_COMPR = 2, /* Compress file */ | ||
357 | EXT4_INODE_SYNC = 3, /* Synchronous updates */ | ||
358 | EXT4_INODE_IMMUTABLE = 4, /* Immutable file */ | ||
359 | EXT4_INODE_APPEND = 5, /* writes to file may only append */ | ||
360 | EXT4_INODE_NODUMP = 6, /* do not dump file */ | ||
361 | EXT4_INODE_NOATIME = 7, /* do not update atime */ | ||
362 | /* Reserved for compression usage... */ | ||
363 | EXT4_INODE_DIRTY = 8, | ||
364 | EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */ | ||
365 | EXT4_INODE_NOCOMPR = 10, /* Don't compress */ | ||
366 | EXT4_INODE_ECOMPR = 11, /* Compression error */ | ||
367 | /* End compression flags --- maybe not all used */ | ||
368 | EXT4_INODE_INDEX = 12, /* hash-indexed directory */ | ||
369 | EXT4_INODE_IMAGIC = 13, /* AFS directory */ | ||
370 | EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */ | ||
371 | EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */ | ||
372 | EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */ | ||
373 | EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/ | ||
374 | EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */ | ||
375 | EXT4_INODE_EXTENTS = 19, /* Inode uses extents */ | ||
376 | EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ | ||
377 | EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ | ||
378 | EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */ | ||
379 | }; | ||
380 | |||
381 | #define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG)) | ||
382 | #define CHECK_FLAG_VALUE(FLAG) if (!TEST_FLAG_VALUE(FLAG)) { \ | ||
383 | printk(KERN_EMERG "EXT4 flag fail: " #FLAG ": %d %d\n", \ | ||
384 | EXT4_##FLAG##_FL, EXT4_INODE_##FLAG); BUG_ON(1); } | ||
385 | |||
386 | /* | ||
387 | * Since it's pretty easy to mix up bit numbers and hex values, and we | ||
388 | * can't do a compile-time test for ENUM values, we use a run-time | ||
389 | * test to make sure that EXT4_XXX_FL is consistent with respect to | ||
390 | * EXT4_INODE_XXX. If all is well the printk and BUG_ON will all drop | ||
391 | * out so it won't cost any extra space in the compiled kernel image. | ||
392 | * But it's important that these values are the same, since we are | ||
393 | * using EXT4_INODE_XXX to test for the flag values, but EXT4_XX_FL | ||
394 | * must be consistent with the values of FS_XXX_FL defined in | ||
395 | * include/linux/fs.h and the on-disk values found in ext2, ext3, and | ||
396 | * ext4 filesystems, and of course the values defined in e2fsprogs. | ||
397 | * | ||
398 | * It's not paranoia if the Murphy's Law really *is* out to get you. :-) | ||
399 | */ | ||
400 | static inline void ext4_check_flag_values(void) | ||
401 | { | ||
402 | CHECK_FLAG_VALUE(SECRM); | ||
403 | CHECK_FLAG_VALUE(UNRM); | ||
404 | CHECK_FLAG_VALUE(COMPR); | ||
405 | CHECK_FLAG_VALUE(SYNC); | ||
406 | CHECK_FLAG_VALUE(IMMUTABLE); | ||
407 | CHECK_FLAG_VALUE(APPEND); | ||
408 | CHECK_FLAG_VALUE(NODUMP); | ||
409 | CHECK_FLAG_VALUE(NOATIME); | ||
410 | CHECK_FLAG_VALUE(DIRTY); | ||
411 | CHECK_FLAG_VALUE(COMPRBLK); | ||
412 | CHECK_FLAG_VALUE(NOCOMPR); | ||
413 | CHECK_FLAG_VALUE(ECOMPR); | ||
414 | CHECK_FLAG_VALUE(INDEX); | ||
415 | CHECK_FLAG_VALUE(IMAGIC); | ||
416 | CHECK_FLAG_VALUE(JOURNAL_DATA); | ||
417 | CHECK_FLAG_VALUE(NOTAIL); | ||
418 | CHECK_FLAG_VALUE(DIRSYNC); | ||
419 | CHECK_FLAG_VALUE(TOPDIR); | ||
420 | CHECK_FLAG_VALUE(HUGE_FILE); | ||
421 | CHECK_FLAG_VALUE(EXTENTS); | ||
422 | CHECK_FLAG_VALUE(EA_INODE); | ||
423 | CHECK_FLAG_VALUE(EOFBLOCKS); | ||
424 | CHECK_FLAG_VALUE(RESERVED); | ||
425 | } | ||
426 | |||
324 | /* Used to pass group descriptor data when online resize is done */ | 427 | /* Used to pass group descriptor data when online resize is done */ |
325 | struct ext4_new_group_input { | 428 | struct ext4_new_group_input { |
326 | __u32 group; /* Group number for this data */ | 429 | __u32 group; /* Group number for this data */ |
@@ -332,6 +435,18 @@ struct ext4_new_group_input { | |||
332 | __u16 unused; | 435 | __u16 unused; |
333 | }; | 436 | }; |
334 | 437 | ||
438 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | ||
439 | struct compat_ext4_new_group_input { | ||
440 | u32 group; | ||
441 | compat_u64 block_bitmap; | ||
442 | compat_u64 inode_bitmap; | ||
443 | compat_u64 inode_table; | ||
444 | u32 blocks_count; | ||
445 | u16 reserved_blocks; | ||
446 | u16 unused; | ||
447 | }; | ||
448 | #endif | ||
449 | |||
335 | /* The struct ext4_new_group_input in kernel space, with free_blocks_count */ | 450 | /* The struct ext4_new_group_input in kernel space, with free_blocks_count */ |
336 | struct ext4_new_group_data { | 451 | struct ext4_new_group_data { |
337 | __u32 group; | 452 | __u32 group; |
@@ -355,7 +470,7 @@ struct ext4_new_group_data { | |||
355 | #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\ | 470 | #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\ |
356 | EXT4_GET_BLOCKS_CREATE) | 471 | EXT4_GET_BLOCKS_CREATE) |
357 | /* Caller is from the delayed allocation writeout path, | 472 | /* Caller is from the delayed allocation writeout path, |
358 | so set the magic i_delalloc_reserve_flag after taking the | 473 | so set the magic i_delalloc_reserve_flag after taking the |
359 | inode allocation semaphore for */ | 474 | inode allocation semaphore for */ |
360 | #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 | 475 | #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 |
361 | /* caller is from the direct IO path, request to creation of an | 476 | /* caller is from the direct IO path, request to creation of an |
@@ -398,6 +513,7 @@ struct ext4_new_group_data { | |||
398 | #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) | 513 | #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) |
399 | #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) | 514 | #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) |
400 | 515 | ||
516 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | ||
401 | /* | 517 | /* |
402 | * ioctl commands in 32 bit emulation | 518 | * ioctl commands in 32 bit emulation |
403 | */ | 519 | */ |
@@ -408,11 +524,13 @@ struct ext4_new_group_data { | |||
408 | #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) | 524 | #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) |
409 | #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) | 525 | #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) |
410 | #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) | 526 | #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) |
527 | #define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) | ||
411 | #ifdef CONFIG_JBD2_DEBUG | 528 | #ifdef CONFIG_JBD2_DEBUG |
412 | #define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) | 529 | #define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) |
413 | #endif | 530 | #endif |
414 | #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION | 531 | #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION |
415 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION | 532 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION |
533 | #endif | ||
416 | 534 | ||
417 | 535 | ||
418 | /* | 536 | /* |
@@ -616,9 +734,8 @@ struct ext4_ext_cache { | |||
616 | */ | 734 | */ |
617 | struct ext4_inode_info { | 735 | struct ext4_inode_info { |
618 | __le32 i_data[15]; /* unconverted */ | 736 | __le32 i_data[15]; /* unconverted */ |
619 | __u32 i_flags; | ||
620 | ext4_fsblk_t i_file_acl; | ||
621 | __u32 i_dtime; | 737 | __u32 i_dtime; |
738 | ext4_fsblk_t i_file_acl; | ||
622 | 739 | ||
623 | /* | 740 | /* |
624 | * i_block_group is the number of the block group which contains | 741 | * i_block_group is the number of the block group which contains |
@@ -629,6 +746,7 @@ struct ext4_inode_info { | |||
629 | */ | 746 | */ |
630 | ext4_group_t i_block_group; | 747 | ext4_group_t i_block_group; |
631 | unsigned long i_state_flags; /* Dynamic state flags */ | 748 | unsigned long i_state_flags; /* Dynamic state flags */ |
749 | unsigned long i_flags; | ||
632 | 750 | ||
633 | ext4_lblk_t i_dir_start_lookup; | 751 | ext4_lblk_t i_dir_start_lookup; |
634 | #ifdef CONFIG_EXT4_FS_XATTR | 752 | #ifdef CONFIG_EXT4_FS_XATTR |
@@ -1062,22 +1180,25 @@ enum { | |||
1062 | EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ | 1180 | EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ |
1063 | EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ | 1181 | EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ |
1064 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ | 1182 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ |
1183 | EXT4_STATE_NEWENTRY, /* File just added to dir */ | ||
1065 | }; | 1184 | }; |
1066 | 1185 | ||
1067 | static inline int ext4_test_inode_state(struct inode *inode, int bit) | 1186 | #define EXT4_INODE_BIT_FNS(name, field) \ |
1068 | { | 1187 | static inline int ext4_test_inode_##name(struct inode *inode, int bit) \ |
1069 | return test_bit(bit, &EXT4_I(inode)->i_state_flags); | 1188 | { \ |
1070 | } | 1189 | return test_bit(bit, &EXT4_I(inode)->i_##field); \ |
1071 | 1190 | } \ | |
1072 | static inline void ext4_set_inode_state(struct inode *inode, int bit) | 1191 | static inline void ext4_set_inode_##name(struct inode *inode, int bit) \ |
1073 | { | 1192 | { \ |
1074 | set_bit(bit, &EXT4_I(inode)->i_state_flags); | 1193 | set_bit(bit, &EXT4_I(inode)->i_##field); \ |
1194 | } \ | ||
1195 | static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ | ||
1196 | { \ | ||
1197 | clear_bit(bit, &EXT4_I(inode)->i_##field); \ | ||
1075 | } | 1198 | } |
1076 | 1199 | ||
1077 | static inline void ext4_clear_inode_state(struct inode *inode, int bit) | 1200 | EXT4_INODE_BIT_FNS(flag, flags) |
1078 | { | 1201 | EXT4_INODE_BIT_FNS(state, state_flags) |
1079 | clear_bit(bit, &EXT4_I(inode)->i_state_flags); | ||
1080 | } | ||
1081 | #else | 1202 | #else |
1082 | /* Assume that user mode programs are passing in an ext4fs superblock, not | 1203 | /* Assume that user mode programs are passing in an ext4fs superblock, not |
1083 | * a kernel struct super_block. This will allow us to call the feature-test | 1204 | * a kernel struct super_block. This will allow us to call the feature-test |
@@ -1264,7 +1385,7 @@ struct ext4_dir_entry_2 { | |||
1264 | 1385 | ||
1265 | #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ | 1386 | #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ |
1266 | EXT4_FEATURE_COMPAT_DIR_INDEX) && \ | 1387 | EXT4_FEATURE_COMPAT_DIR_INDEX) && \ |
1267 | (EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) | 1388 | ext4_test_inode_flag((dir), EXT4_INODE_INDEX)) |
1268 | #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) | 1389 | #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) |
1269 | #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) | 1390 | #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) |
1270 | 1391 | ||
@@ -1398,7 +1519,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
1398 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); | 1519 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); |
1399 | 1520 | ||
1400 | /* fsync.c */ | 1521 | /* fsync.c */ |
1401 | extern int ext4_sync_file(struct file *, struct dentry *, int); | 1522 | extern int ext4_sync_file(struct file *, int); |
1402 | 1523 | ||
1403 | /* hash.c */ | 1524 | /* hash.c */ |
1404 | extern int ext4fs_dirhash(const char *name, int len, struct | 1525 | extern int ext4fs_dirhash(const char *name, int len, struct |
@@ -1678,6 +1799,7 @@ struct ext4_group_info { | |||
1678 | ext4_grpblk_t bb_first_free; /* first free block */ | 1799 | ext4_grpblk_t bb_first_free; /* first free block */ |
1679 | ext4_grpblk_t bb_free; /* total free blocks */ | 1800 | ext4_grpblk_t bb_free; /* total free blocks */ |
1680 | ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ | 1801 | ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ |
1802 | ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */ | ||
1681 | struct list_head bb_prealloc_list; | 1803 | struct list_head bb_prealloc_list; |
1682 | #ifdef DOUBLE_CHECK | 1804 | #ifdef DOUBLE_CHECK |
1683 | void *bb_bitmap; | 1805 | void *bb_bitmap; |
@@ -1772,9 +1894,8 @@ extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | |||
1772 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 1894 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
1773 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | 1895 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, |
1774 | int chunk); | 1896 | int chunk); |
1775 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 1897 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, |
1776 | ext4_lblk_t iblock, unsigned int max_blocks, | 1898 | struct ext4_map_blocks *map, int flags); |
1777 | struct buffer_head *bh_result, int flags); | ||
1778 | extern void ext4_ext_truncate(struct inode *); | 1899 | extern void ext4_ext_truncate(struct inode *); |
1779 | extern void ext4_ext_init(struct super_block *); | 1900 | extern void ext4_ext_init(struct super_block *); |
1780 | extern void ext4_ext_release(struct super_block *); | 1901 | extern void ext4_ext_release(struct super_block *); |
@@ -1782,6 +1903,8 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, | |||
1782 | loff_t len); | 1903 | loff_t len); |
1783 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 1904 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, |
1784 | ssize_t len); | 1905 | ssize_t len); |
1906 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, | ||
1907 | struct ext4_map_blocks *map, int flags); | ||
1785 | extern int ext4_get_blocks(handle_t *handle, struct inode *inode, | 1908 | extern int ext4_get_blocks(handle_t *handle, struct inode *inode, |
1786 | sector_t block, unsigned int max_blocks, | 1909 | sector_t block, unsigned int max_blocks, |
1787 | struct buffer_head *bh, int flags); | 1910 | struct buffer_head *bh, int flags); |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index b79ad5126468..dade0c024797 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -273,7 +273,7 @@ static inline int ext4_should_journal_data(struct inode *inode) | |||
273 | return 1; | 273 | return 1; |
274 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) | 274 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) |
275 | return 1; | 275 | return 1; |
276 | if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) | 276 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) |
277 | return 1; | 277 | return 1; |
278 | return 0; | 278 | return 0; |
279 | } | 279 | } |
@@ -284,7 +284,7 @@ static inline int ext4_should_order_data(struct inode *inode) | |||
284 | return 0; | 284 | return 0; |
285 | if (!S_ISREG(inode->i_mode)) | 285 | if (!S_ISREG(inode->i_mode)) |
286 | return 0; | 286 | return 0; |
287 | if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) | 287 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) |
288 | return 0; | 288 | return 0; |
289 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) | 289 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) |
290 | return 1; | 290 | return 1; |
@@ -297,7 +297,7 @@ static inline int ext4_should_writeback_data(struct inode *inode) | |||
297 | return 0; | 297 | return 0; |
298 | if (EXT4_JOURNAL(inode) == NULL) | 298 | if (EXT4_JOURNAL(inode) == NULL) |
299 | return 1; | 299 | return 1; |
300 | if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) | 300 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) |
301 | return 0; | 301 | return 0; |
302 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | 302 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) |
303 | return 1; | 303 | return 1; |
@@ -321,7 +321,7 @@ static inline int ext4_should_dioread_nolock(struct inode *inode) | |||
321 | return 0; | 321 | return 0; |
322 | if (!S_ISREG(inode->i_mode)) | 322 | if (!S_ISREG(inode->i_mode)) |
323 | return 0; | 323 | return 0; |
324 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 324 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
325 | return 0; | 325 | return 0; |
326 | if (ext4_should_journal_data(inode)) | 326 | if (ext4_should_journal_data(inode)) |
327 | return 0; | 327 | return 0; |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 236b834b4ca8..377309c1af65 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -107,11 +107,8 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle, | |||
107 | if (err <= 0) | 107 | if (err <= 0) |
108 | return err; | 108 | return err; |
109 | err = ext4_truncate_restart_trans(handle, inode, needed); | 109 | err = ext4_truncate_restart_trans(handle, inode, needed); |
110 | /* | 110 | if (err == 0) |
111 | * We have dropped i_data_sem so someone might have cached again | 111 | err = -EAGAIN; |
112 | * an extent we are going to truncate. | ||
113 | */ | ||
114 | ext4_ext_invalidate_cache(inode); | ||
115 | 112 | ||
116 | return err; | 113 | return err; |
117 | } | 114 | } |
@@ -185,10 +182,10 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
185 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { | 182 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { |
186 | /* | 183 | /* |
187 | * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME | 184 | * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME |
188 | * block groups per flexgroup, reserve the first block | 185 | * block groups per flexgroup, reserve the first block |
189 | * group for directories and special files. Regular | 186 | * group for directories and special files. Regular |
190 | * files will start at the second block group. This | 187 | * files will start at the second block group. This |
191 | * tends to speed up directory access and improves | 188 | * tends to speed up directory access and improves |
192 | * fsck times. | 189 | * fsck times. |
193 | */ | 190 | */ |
194 | block_group &= ~(flex_size-1); | 191 | block_group &= ~(flex_size-1); |
@@ -439,10 +436,10 @@ static int __ext4_ext_check(const char *function, struct inode *inode, | |||
439 | return 0; | 436 | return 0; |
440 | 437 | ||
441 | corrupted: | 438 | corrupted: |
442 | __ext4_error(inode->i_sb, function, | 439 | ext4_error_inode(function, inode, |
443 | "bad header/extent in inode #%lu: %s - magic %x, " | 440 | "bad header/extent: %s - magic %x, " |
444 | "entries %u, max %u(%u), depth %u(%u)", | 441 | "entries %u, max %u(%u), depth %u(%u)", |
445 | inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), | 442 | error_msg, le16_to_cpu(eh->eh_magic), |
446 | le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), | 443 | le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), |
447 | max, le16_to_cpu(eh->eh_depth), depth); | 444 | max, le16_to_cpu(eh->eh_depth), depth); |
448 | 445 | ||
@@ -1622,9 +1619,7 @@ int ext4_ext_try_to_merge(struct inode *inode, | |||
1622 | merge_done = 1; | 1619 | merge_done = 1; |
1623 | WARN_ON(eh->eh_entries == 0); | 1620 | WARN_ON(eh->eh_entries == 0); |
1624 | if (!eh->eh_entries) | 1621 | if (!eh->eh_entries) |
1625 | ext4_error(inode->i_sb, | 1622 | EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!"); |
1626 | "inode#%lu, eh->eh_entries = 0!", | ||
1627 | inode->i_ino); | ||
1628 | } | 1623 | } |
1629 | 1624 | ||
1630 | return merge_done; | 1625 | return merge_done; |
@@ -2039,7 +2034,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | |||
2039 | struct ext4_ext_cache *cex; | 2034 | struct ext4_ext_cache *cex; |
2040 | int ret = EXT4_EXT_CACHE_NO; | 2035 | int ret = EXT4_EXT_CACHE_NO; |
2041 | 2036 | ||
2042 | /* | 2037 | /* |
2043 | * We borrow i_block_reservation_lock to protect i_cached_extent | 2038 | * We borrow i_block_reservation_lock to protect i_cached_extent |
2044 | */ | 2039 | */ |
2045 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 2040 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
@@ -2361,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
2361 | int depth = ext_depth(inode); | 2356 | int depth = ext_depth(inode); |
2362 | struct ext4_ext_path *path; | 2357 | struct ext4_ext_path *path; |
2363 | handle_t *handle; | 2358 | handle_t *handle; |
2364 | int i = 0, err = 0; | 2359 | int i, err; |
2365 | 2360 | ||
2366 | ext_debug("truncate since %u\n", start); | 2361 | ext_debug("truncate since %u\n", start); |
2367 | 2362 | ||
@@ -2370,23 +2365,26 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
2370 | if (IS_ERR(handle)) | 2365 | if (IS_ERR(handle)) |
2371 | return PTR_ERR(handle); | 2366 | return PTR_ERR(handle); |
2372 | 2367 | ||
2368 | again: | ||
2373 | ext4_ext_invalidate_cache(inode); | 2369 | ext4_ext_invalidate_cache(inode); |
2374 | 2370 | ||
2375 | /* | 2371 | /* |
2376 | * We start scanning from right side, freeing all the blocks | 2372 | * We start scanning from right side, freeing all the blocks |
2377 | * after i_size and walking into the tree depth-wise. | 2373 | * after i_size and walking into the tree depth-wise. |
2378 | */ | 2374 | */ |
2375 | depth = ext_depth(inode); | ||
2379 | path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); | 2376 | path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); |
2380 | if (path == NULL) { | 2377 | if (path == NULL) { |
2381 | ext4_journal_stop(handle); | 2378 | ext4_journal_stop(handle); |
2382 | return -ENOMEM; | 2379 | return -ENOMEM; |
2383 | } | 2380 | } |
2381 | path[0].p_depth = depth; | ||
2384 | path[0].p_hdr = ext_inode_hdr(inode); | 2382 | path[0].p_hdr = ext_inode_hdr(inode); |
2385 | if (ext4_ext_check(inode, path[0].p_hdr, depth)) { | 2383 | if (ext4_ext_check(inode, path[0].p_hdr, depth)) { |
2386 | err = -EIO; | 2384 | err = -EIO; |
2387 | goto out; | 2385 | goto out; |
2388 | } | 2386 | } |
2389 | path[0].p_depth = depth; | 2387 | i = err = 0; |
2390 | 2388 | ||
2391 | while (i >= 0 && err == 0) { | 2389 | while (i >= 0 && err == 0) { |
2392 | if (i == depth) { | 2390 | if (i == depth) { |
@@ -2480,6 +2478,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
2480 | out: | 2478 | out: |
2481 | ext4_ext_drop_refs(path); | 2479 | ext4_ext_drop_refs(path); |
2482 | kfree(path); | 2480 | kfree(path); |
2481 | if (err == -EAGAIN) | ||
2482 | goto again; | ||
2483 | ext4_journal_stop(handle); | 2483 | ext4_journal_stop(handle); |
2484 | 2484 | ||
2485 | return err; | 2485 | return err; |
@@ -2544,7 +2544,7 @@ static void bi_complete(struct bio *bio, int error) | |||
2544 | /* FIXME!! we need to try to merge to left or right after zero-out */ | 2544 | /* FIXME!! we need to try to merge to left or right after zero-out */ |
2545 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | 2545 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) |
2546 | { | 2546 | { |
2547 | int ret = -EIO; | 2547 | int ret; |
2548 | struct bio *bio; | 2548 | struct bio *bio; |
2549 | int blkbits, blocksize; | 2549 | int blkbits, blocksize; |
2550 | sector_t ee_pblock; | 2550 | sector_t ee_pblock; |
@@ -2568,6 +2568,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
2568 | len = ee_len; | 2568 | len = ee_len; |
2569 | 2569 | ||
2570 | bio = bio_alloc(GFP_NOIO, len); | 2570 | bio = bio_alloc(GFP_NOIO, len); |
2571 | if (!bio) | ||
2572 | return -ENOMEM; | ||
2573 | |||
2571 | bio->bi_sector = ee_pblock; | 2574 | bio->bi_sector = ee_pblock; |
2572 | bio->bi_bdev = inode->i_sb->s_bdev; | 2575 | bio->bi_bdev = inode->i_sb->s_bdev; |
2573 | 2576 | ||
@@ -2595,22 +2598,20 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
2595 | submit_bio(WRITE, bio); | 2598 | submit_bio(WRITE, bio); |
2596 | wait_for_completion(&event); | 2599 | wait_for_completion(&event); |
2597 | 2600 | ||
2598 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | 2601 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { |
2599 | ret = 0; | 2602 | bio_put(bio); |
2600 | else { | 2603 | return -EIO; |
2601 | ret = -EIO; | ||
2602 | break; | ||
2603 | } | 2604 | } |
2604 | bio_put(bio); | 2605 | bio_put(bio); |
2605 | ee_len -= done; | 2606 | ee_len -= done; |
2606 | ee_pblock += done << (blkbits - 9); | 2607 | ee_pblock += done << (blkbits - 9); |
2607 | } | 2608 | } |
2608 | return ret; | 2609 | return 0; |
2609 | } | 2610 | } |
2610 | 2611 | ||
2611 | #define EXT4_EXT_ZERO_LEN 7 | 2612 | #define EXT4_EXT_ZERO_LEN 7 |
2612 | /* | 2613 | /* |
2613 | * This function is called by ext4_ext_get_blocks() if someone tries to write | 2614 | * This function is called by ext4_ext_map_blocks() if someone tries to write |
2614 | * to an uninitialized extent. It may result in splitting the uninitialized | 2615 | * to an uninitialized extent. It may result in splitting the uninitialized |
2615 | * extent into multiple extents (upto three - one initialized and two | 2616 | * extent into multiple extents (upto three - one initialized and two |
2616 | * uninitialized). | 2617 | * uninitialized). |
@@ -2620,39 +2621,55 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
2620 | * c> Splits in three extents: Somone is writing in middle of the extent | 2621 | * c> Splits in three extents: Somone is writing in middle of the extent |
2621 | */ | 2622 | */ |
2622 | static int ext4_ext_convert_to_initialized(handle_t *handle, | 2623 | static int ext4_ext_convert_to_initialized(handle_t *handle, |
2623 | struct inode *inode, | 2624 | struct inode *inode, |
2624 | struct ext4_ext_path *path, | 2625 | struct ext4_map_blocks *map, |
2625 | ext4_lblk_t iblock, | 2626 | struct ext4_ext_path *path) |
2626 | unsigned int max_blocks) | ||
2627 | { | 2627 | { |
2628 | struct ext4_extent *ex, newex, orig_ex; | 2628 | struct ext4_extent *ex, newex, orig_ex; |
2629 | struct ext4_extent *ex1 = NULL; | 2629 | struct ext4_extent *ex1 = NULL; |
2630 | struct ext4_extent *ex2 = NULL; | 2630 | struct ext4_extent *ex2 = NULL; |
2631 | struct ext4_extent *ex3 = NULL; | 2631 | struct ext4_extent *ex3 = NULL; |
2632 | struct ext4_extent_header *eh; | 2632 | struct ext4_extent_header *eh; |
2633 | ext4_lblk_t ee_block; | 2633 | ext4_lblk_t ee_block, eof_block; |
2634 | unsigned int allocated, ee_len, depth; | 2634 | unsigned int allocated, ee_len, depth; |
2635 | ext4_fsblk_t newblock; | 2635 | ext4_fsblk_t newblock; |
2636 | int err = 0; | 2636 | int err = 0; |
2637 | int ret = 0; | 2637 | int ret = 0; |
2638 | int may_zeroout; | ||
2639 | |||
2640 | ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" | ||
2641 | "block %llu, max_blocks %u\n", inode->i_ino, | ||
2642 | (unsigned long long)map->m_lblk, map->m_len); | ||
2643 | |||
2644 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | ||
2645 | inode->i_sb->s_blocksize_bits; | ||
2646 | if (eof_block < map->m_lblk + map->m_len) | ||
2647 | eof_block = map->m_lblk + map->m_len; | ||
2638 | 2648 | ||
2639 | depth = ext_depth(inode); | 2649 | depth = ext_depth(inode); |
2640 | eh = path[depth].p_hdr; | 2650 | eh = path[depth].p_hdr; |
2641 | ex = path[depth].p_ext; | 2651 | ex = path[depth].p_ext; |
2642 | ee_block = le32_to_cpu(ex->ee_block); | 2652 | ee_block = le32_to_cpu(ex->ee_block); |
2643 | ee_len = ext4_ext_get_actual_len(ex); | 2653 | ee_len = ext4_ext_get_actual_len(ex); |
2644 | allocated = ee_len - (iblock - ee_block); | 2654 | allocated = ee_len - (map->m_lblk - ee_block); |
2645 | newblock = iblock - ee_block + ext_pblock(ex); | 2655 | newblock = map->m_lblk - ee_block + ext_pblock(ex); |
2656 | |||
2646 | ex2 = ex; | 2657 | ex2 = ex; |
2647 | orig_ex.ee_block = ex->ee_block; | 2658 | orig_ex.ee_block = ex->ee_block; |
2648 | orig_ex.ee_len = cpu_to_le16(ee_len); | 2659 | orig_ex.ee_len = cpu_to_le16(ee_len); |
2649 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); | 2660 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); |
2650 | 2661 | ||
2662 | /* | ||
2663 | * It is safe to convert extent to initialized via explicit | ||
2664 | * zeroout only if extent is fully insde i_size or new_size. | ||
2665 | */ | ||
2666 | may_zeroout = ee_block + ee_len <= eof_block; | ||
2667 | |||
2651 | err = ext4_ext_get_access(handle, inode, path + depth); | 2668 | err = ext4_ext_get_access(handle, inode, path + depth); |
2652 | if (err) | 2669 | if (err) |
2653 | goto out; | 2670 | goto out; |
2654 | /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ | 2671 | /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ |
2655 | if (ee_len <= 2*EXT4_EXT_ZERO_LEN) { | 2672 | if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { |
2656 | err = ext4_ext_zeroout(inode, &orig_ex); | 2673 | err = ext4_ext_zeroout(inode, &orig_ex); |
2657 | if (err) | 2674 | if (err) |
2658 | goto fix_extent_len; | 2675 | goto fix_extent_len; |
@@ -2665,10 +2682,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2665 | return allocated; | 2682 | return allocated; |
2666 | } | 2683 | } |
2667 | 2684 | ||
2668 | /* ex1: ee_block to iblock - 1 : uninitialized */ | 2685 | /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ |
2669 | if (iblock > ee_block) { | 2686 | if (map->m_lblk > ee_block) { |
2670 | ex1 = ex; | 2687 | ex1 = ex; |
2671 | ex1->ee_len = cpu_to_le16(iblock - ee_block); | 2688 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); |
2672 | ext4_ext_mark_uninitialized(ex1); | 2689 | ext4_ext_mark_uninitialized(ex1); |
2673 | ex2 = &newex; | 2690 | ex2 = &newex; |
2674 | } | 2691 | } |
@@ -2677,15 +2694,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2677 | * we insert ex3, if ex1 is NULL. This is to avoid temporary | 2694 | * we insert ex3, if ex1 is NULL. This is to avoid temporary |
2678 | * overlap of blocks. | 2695 | * overlap of blocks. |
2679 | */ | 2696 | */ |
2680 | if (!ex1 && allocated > max_blocks) | 2697 | if (!ex1 && allocated > map->m_len) |
2681 | ex2->ee_len = cpu_to_le16(max_blocks); | 2698 | ex2->ee_len = cpu_to_le16(map->m_len); |
2682 | /* ex3: to ee_block + ee_len : uninitialised */ | 2699 | /* ex3: to ee_block + ee_len : uninitialised */ |
2683 | if (allocated > max_blocks) { | 2700 | if (allocated > map->m_len) { |
2684 | unsigned int newdepth; | 2701 | unsigned int newdepth; |
2685 | /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ | 2702 | /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ |
2686 | if (allocated <= EXT4_EXT_ZERO_LEN) { | 2703 | if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { |
2687 | /* | 2704 | /* |
2688 | * iblock == ee_block is handled by the zerouout | 2705 | * map->m_lblk == ee_block is handled by the zerouout |
2689 | * at the beginning. | 2706 | * at the beginning. |
2690 | * Mark first half uninitialized. | 2707 | * Mark first half uninitialized. |
2691 | * Mark second half initialized and zero out the | 2708 | * Mark second half initialized and zero out the |
@@ -2698,7 +2715,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2698 | ext4_ext_dirty(handle, inode, path + depth); | 2715 | ext4_ext_dirty(handle, inode, path + depth); |
2699 | 2716 | ||
2700 | ex3 = &newex; | 2717 | ex3 = &newex; |
2701 | ex3->ee_block = cpu_to_le32(iblock); | 2718 | ex3->ee_block = cpu_to_le32(map->m_lblk); |
2702 | ext4_ext_store_pblock(ex3, newblock); | 2719 | ext4_ext_store_pblock(ex3, newblock); |
2703 | ex3->ee_len = cpu_to_le16(allocated); | 2720 | ex3->ee_len = cpu_to_le16(allocated); |
2704 | err = ext4_ext_insert_extent(handle, inode, path, | 2721 | err = ext4_ext_insert_extent(handle, inode, path, |
@@ -2711,7 +2728,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2711 | ex->ee_len = orig_ex.ee_len; | 2728 | ex->ee_len = orig_ex.ee_len; |
2712 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2729 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); |
2713 | ext4_ext_dirty(handle, inode, path + depth); | 2730 | ext4_ext_dirty(handle, inode, path + depth); |
2714 | /* blocks available from iblock */ | 2731 | /* blocks available from map->m_lblk */ |
2715 | return allocated; | 2732 | return allocated; |
2716 | 2733 | ||
2717 | } else if (err) | 2734 | } else if (err) |
@@ -2733,8 +2750,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2733 | */ | 2750 | */ |
2734 | depth = ext_depth(inode); | 2751 | depth = ext_depth(inode); |
2735 | ext4_ext_drop_refs(path); | 2752 | ext4_ext_drop_refs(path); |
2736 | path = ext4_ext_find_extent(inode, | 2753 | path = ext4_ext_find_extent(inode, map->m_lblk, |
2737 | iblock, path); | 2754 | path); |
2738 | if (IS_ERR(path)) { | 2755 | if (IS_ERR(path)) { |
2739 | err = PTR_ERR(path); | 2756 | err = PTR_ERR(path); |
2740 | return err; | 2757 | return err; |
@@ -2754,12 +2771,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2754 | return allocated; | 2771 | return allocated; |
2755 | } | 2772 | } |
2756 | ex3 = &newex; | 2773 | ex3 = &newex; |
2757 | ex3->ee_block = cpu_to_le32(iblock + max_blocks); | 2774 | ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); |
2758 | ext4_ext_store_pblock(ex3, newblock + max_blocks); | 2775 | ext4_ext_store_pblock(ex3, newblock + map->m_len); |
2759 | ex3->ee_len = cpu_to_le16(allocated - max_blocks); | 2776 | ex3->ee_len = cpu_to_le16(allocated - map->m_len); |
2760 | ext4_ext_mark_uninitialized(ex3); | 2777 | ext4_ext_mark_uninitialized(ex3); |
2761 | err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); | 2778 | err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); |
2762 | if (err == -ENOSPC) { | 2779 | if (err == -ENOSPC && may_zeroout) { |
2763 | err = ext4_ext_zeroout(inode, &orig_ex); | 2780 | err = ext4_ext_zeroout(inode, &orig_ex); |
2764 | if (err) | 2781 | if (err) |
2765 | goto fix_extent_len; | 2782 | goto fix_extent_len; |
@@ -2769,7 +2786,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2769 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2786 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); |
2770 | ext4_ext_dirty(handle, inode, path + depth); | 2787 | ext4_ext_dirty(handle, inode, path + depth); |
2771 | /* zeroed the full extent */ | 2788 | /* zeroed the full extent */ |
2772 | /* blocks available from iblock */ | 2789 | /* blocks available from map->m_lblk */ |
2773 | return allocated; | 2790 | return allocated; |
2774 | 2791 | ||
2775 | } else if (err) | 2792 | } else if (err) |
@@ -2783,11 +2800,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2783 | * update the extent length after successful insert of the | 2800 | * update the extent length after successful insert of the |
2784 | * split extent | 2801 | * split extent |
2785 | */ | 2802 | */ |
2786 | orig_ex.ee_len = cpu_to_le16(ee_len - | 2803 | ee_len -= ext4_ext_get_actual_len(ex3); |
2787 | ext4_ext_get_actual_len(ex3)); | 2804 | orig_ex.ee_len = cpu_to_le16(ee_len); |
2805 | may_zeroout = ee_block + ee_len <= eof_block; | ||
2806 | |||
2788 | depth = newdepth; | 2807 | depth = newdepth; |
2789 | ext4_ext_drop_refs(path); | 2808 | ext4_ext_drop_refs(path); |
2790 | path = ext4_ext_find_extent(inode, iblock, path); | 2809 | path = ext4_ext_find_extent(inode, map->m_lblk, path); |
2791 | if (IS_ERR(path)) { | 2810 | if (IS_ERR(path)) { |
2792 | err = PTR_ERR(path); | 2811 | err = PTR_ERR(path); |
2793 | goto out; | 2812 | goto out; |
@@ -2801,14 +2820,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2801 | if (err) | 2820 | if (err) |
2802 | goto out; | 2821 | goto out; |
2803 | 2822 | ||
2804 | allocated = max_blocks; | 2823 | allocated = map->m_len; |
2805 | 2824 | ||
2806 | /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying | 2825 | /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying |
2807 | * to insert a extent in the middle zerout directly | 2826 | * to insert a extent in the middle zerout directly |
2808 | * otherwise give the extent a chance to merge to left | 2827 | * otherwise give the extent a chance to merge to left |
2809 | */ | 2828 | */ |
2810 | if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && | 2829 | if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && |
2811 | iblock != ee_block) { | 2830 | map->m_lblk != ee_block && may_zeroout) { |
2812 | err = ext4_ext_zeroout(inode, &orig_ex); | 2831 | err = ext4_ext_zeroout(inode, &orig_ex); |
2813 | if (err) | 2832 | if (err) |
2814 | goto fix_extent_len; | 2833 | goto fix_extent_len; |
@@ -2818,7 +2837,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2818 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2837 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); |
2819 | ext4_ext_dirty(handle, inode, path + depth); | 2838 | ext4_ext_dirty(handle, inode, path + depth); |
2820 | /* zero out the first half */ | 2839 | /* zero out the first half */ |
2821 | /* blocks available from iblock */ | 2840 | /* blocks available from map->m_lblk */ |
2822 | return allocated; | 2841 | return allocated; |
2823 | } | 2842 | } |
2824 | } | 2843 | } |
@@ -2829,12 +2848,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2829 | */ | 2848 | */ |
2830 | if (ex1 && ex1 != ex) { | 2849 | if (ex1 && ex1 != ex) { |
2831 | ex1 = ex; | 2850 | ex1 = ex; |
2832 | ex1->ee_len = cpu_to_le16(iblock - ee_block); | 2851 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); |
2833 | ext4_ext_mark_uninitialized(ex1); | 2852 | ext4_ext_mark_uninitialized(ex1); |
2834 | ex2 = &newex; | 2853 | ex2 = &newex; |
2835 | } | 2854 | } |
2836 | /* ex2: iblock to iblock + maxblocks-1 : initialised */ | 2855 | /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ |
2837 | ex2->ee_block = cpu_to_le32(iblock); | 2856 | ex2->ee_block = cpu_to_le32(map->m_lblk); |
2838 | ext4_ext_store_pblock(ex2, newblock); | 2857 | ext4_ext_store_pblock(ex2, newblock); |
2839 | ex2->ee_len = cpu_to_le16(allocated); | 2858 | ex2->ee_len = cpu_to_le16(allocated); |
2840 | if (ex2 != ex) | 2859 | if (ex2 != ex) |
@@ -2877,7 +2896,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2877 | goto out; | 2896 | goto out; |
2878 | insert: | 2897 | insert: |
2879 | err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); | 2898 | err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); |
2880 | if (err == -ENOSPC) { | 2899 | if (err == -ENOSPC && may_zeroout) { |
2881 | err = ext4_ext_zeroout(inode, &orig_ex); | 2900 | err = ext4_ext_zeroout(inode, &orig_ex); |
2882 | if (err) | 2901 | if (err) |
2883 | goto fix_extent_len; | 2902 | goto fix_extent_len; |
@@ -2904,7 +2923,7 @@ fix_extent_len: | |||
2904 | } | 2923 | } |
2905 | 2924 | ||
2906 | /* | 2925 | /* |
2907 | * This function is called by ext4_ext_get_blocks() from | 2926 | * This function is called by ext4_ext_map_blocks() from |
2908 | * ext4_get_blocks_dio_write() when DIO to write | 2927 | * ext4_get_blocks_dio_write() when DIO to write |
2909 | * to an uninitialized extent. | 2928 | * to an uninitialized extent. |
2910 | * | 2929 | * |
@@ -2927,9 +2946,8 @@ fix_extent_len: | |||
2927 | */ | 2946 | */ |
2928 | static int ext4_split_unwritten_extents(handle_t *handle, | 2947 | static int ext4_split_unwritten_extents(handle_t *handle, |
2929 | struct inode *inode, | 2948 | struct inode *inode, |
2949 | struct ext4_map_blocks *map, | ||
2930 | struct ext4_ext_path *path, | 2950 | struct ext4_ext_path *path, |
2931 | ext4_lblk_t iblock, | ||
2932 | unsigned int max_blocks, | ||
2933 | int flags) | 2951 | int flags) |
2934 | { | 2952 | { |
2935 | struct ext4_extent *ex, newex, orig_ex; | 2953 | struct ext4_extent *ex, newex, orig_ex; |
@@ -2937,41 +2955,55 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2937 | struct ext4_extent *ex2 = NULL; | 2955 | struct ext4_extent *ex2 = NULL; |
2938 | struct ext4_extent *ex3 = NULL; | 2956 | struct ext4_extent *ex3 = NULL; |
2939 | struct ext4_extent_header *eh; | 2957 | struct ext4_extent_header *eh; |
2940 | ext4_lblk_t ee_block; | 2958 | ext4_lblk_t ee_block, eof_block; |
2941 | unsigned int allocated, ee_len, depth; | 2959 | unsigned int allocated, ee_len, depth; |
2942 | ext4_fsblk_t newblock; | 2960 | ext4_fsblk_t newblock; |
2943 | int err = 0; | 2961 | int err = 0; |
2962 | int may_zeroout; | ||
2963 | |||
2964 | ext_debug("ext4_split_unwritten_extents: inode %lu, logical" | ||
2965 | "block %llu, max_blocks %u\n", inode->i_ino, | ||
2966 | (unsigned long long)map->m_lblk, map->m_len); | ||
2967 | |||
2968 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | ||
2969 | inode->i_sb->s_blocksize_bits; | ||
2970 | if (eof_block < map->m_lblk + map->m_len) | ||
2971 | eof_block = map->m_lblk + map->m_len; | ||
2944 | 2972 | ||
2945 | ext_debug("ext4_split_unwritten_extents: inode %lu," | ||
2946 | "iblock %llu, max_blocks %u\n", inode->i_ino, | ||
2947 | (unsigned long long)iblock, max_blocks); | ||
2948 | depth = ext_depth(inode); | 2973 | depth = ext_depth(inode); |
2949 | eh = path[depth].p_hdr; | 2974 | eh = path[depth].p_hdr; |
2950 | ex = path[depth].p_ext; | 2975 | ex = path[depth].p_ext; |
2951 | ee_block = le32_to_cpu(ex->ee_block); | 2976 | ee_block = le32_to_cpu(ex->ee_block); |
2952 | ee_len = ext4_ext_get_actual_len(ex); | 2977 | ee_len = ext4_ext_get_actual_len(ex); |
2953 | allocated = ee_len - (iblock - ee_block); | 2978 | allocated = ee_len - (map->m_lblk - ee_block); |
2954 | newblock = iblock - ee_block + ext_pblock(ex); | 2979 | newblock = map->m_lblk - ee_block + ext_pblock(ex); |
2980 | |||
2955 | ex2 = ex; | 2981 | ex2 = ex; |
2956 | orig_ex.ee_block = ex->ee_block; | 2982 | orig_ex.ee_block = ex->ee_block; |
2957 | orig_ex.ee_len = cpu_to_le16(ee_len); | 2983 | orig_ex.ee_len = cpu_to_le16(ee_len); |
2958 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); | 2984 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); |
2959 | 2985 | ||
2960 | /* | 2986 | /* |
2987 | * It is safe to convert extent to initialized via explicit | ||
2988 | * zeroout only if extent is fully insde i_size or new_size. | ||
2989 | */ | ||
2990 | may_zeroout = ee_block + ee_len <= eof_block; | ||
2991 | |||
2992 | /* | ||
2961 | * If the uninitialized extent begins at the same logical | 2993 | * If the uninitialized extent begins at the same logical |
2962 | * block where the write begins, and the write completely | 2994 | * block where the write begins, and the write completely |
2963 | * covers the extent, then we don't need to split it. | 2995 | * covers the extent, then we don't need to split it. |
2964 | */ | 2996 | */ |
2965 | if ((iblock == ee_block) && (allocated <= max_blocks)) | 2997 | if ((map->m_lblk == ee_block) && (allocated <= map->m_len)) |
2966 | return allocated; | 2998 | return allocated; |
2967 | 2999 | ||
2968 | err = ext4_ext_get_access(handle, inode, path + depth); | 3000 | err = ext4_ext_get_access(handle, inode, path + depth); |
2969 | if (err) | 3001 | if (err) |
2970 | goto out; | 3002 | goto out; |
2971 | /* ex1: ee_block to iblock - 1 : uninitialized */ | 3003 | /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ |
2972 | if (iblock > ee_block) { | 3004 | if (map->m_lblk > ee_block) { |
2973 | ex1 = ex; | 3005 | ex1 = ex; |
2974 | ex1->ee_len = cpu_to_le16(iblock - ee_block); | 3006 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); |
2975 | ext4_ext_mark_uninitialized(ex1); | 3007 | ext4_ext_mark_uninitialized(ex1); |
2976 | ex2 = &newex; | 3008 | ex2 = &newex; |
2977 | } | 3009 | } |
@@ -2980,18 +3012,18 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2980 | * we insert ex3, if ex1 is NULL. This is to avoid temporary | 3012 | * we insert ex3, if ex1 is NULL. This is to avoid temporary |
2981 | * overlap of blocks. | 3013 | * overlap of blocks. |
2982 | */ | 3014 | */ |
2983 | if (!ex1 && allocated > max_blocks) | 3015 | if (!ex1 && allocated > map->m_len) |
2984 | ex2->ee_len = cpu_to_le16(max_blocks); | 3016 | ex2->ee_len = cpu_to_le16(map->m_len); |
2985 | /* ex3: to ee_block + ee_len : uninitialised */ | 3017 | /* ex3: to ee_block + ee_len : uninitialised */ |
2986 | if (allocated > max_blocks) { | 3018 | if (allocated > map->m_len) { |
2987 | unsigned int newdepth; | 3019 | unsigned int newdepth; |
2988 | ex3 = &newex; | 3020 | ex3 = &newex; |
2989 | ex3->ee_block = cpu_to_le32(iblock + max_blocks); | 3021 | ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); |
2990 | ext4_ext_store_pblock(ex3, newblock + max_blocks); | 3022 | ext4_ext_store_pblock(ex3, newblock + map->m_len); |
2991 | ex3->ee_len = cpu_to_le16(allocated - max_blocks); | 3023 | ex3->ee_len = cpu_to_le16(allocated - map->m_len); |
2992 | ext4_ext_mark_uninitialized(ex3); | 3024 | ext4_ext_mark_uninitialized(ex3); |
2993 | err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); | 3025 | err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); |
2994 | if (err == -ENOSPC) { | 3026 | if (err == -ENOSPC && may_zeroout) { |
2995 | err = ext4_ext_zeroout(inode, &orig_ex); | 3027 | err = ext4_ext_zeroout(inode, &orig_ex); |
2996 | if (err) | 3028 | if (err) |
2997 | goto fix_extent_len; | 3029 | goto fix_extent_len; |
@@ -3001,7 +3033,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3001 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 3033 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); |
3002 | ext4_ext_dirty(handle, inode, path + depth); | 3034 | ext4_ext_dirty(handle, inode, path + depth); |
3003 | /* zeroed the full extent */ | 3035 | /* zeroed the full extent */ |
3004 | /* blocks available from iblock */ | 3036 | /* blocks available from map->m_lblk */ |
3005 | return allocated; | 3037 | return allocated; |
3006 | 3038 | ||
3007 | } else if (err) | 3039 | } else if (err) |
@@ -3015,11 +3047,13 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3015 | * update the extent length after successful insert of the | 3047 | * update the extent length after successful insert of the |
3016 | * split extent | 3048 | * split extent |
3017 | */ | 3049 | */ |
3018 | orig_ex.ee_len = cpu_to_le16(ee_len - | 3050 | ee_len -= ext4_ext_get_actual_len(ex3); |
3019 | ext4_ext_get_actual_len(ex3)); | 3051 | orig_ex.ee_len = cpu_to_le16(ee_len); |
3052 | may_zeroout = ee_block + ee_len <= eof_block; | ||
3053 | |||
3020 | depth = newdepth; | 3054 | depth = newdepth; |
3021 | ext4_ext_drop_refs(path); | 3055 | ext4_ext_drop_refs(path); |
3022 | path = ext4_ext_find_extent(inode, iblock, path); | 3056 | path = ext4_ext_find_extent(inode, map->m_lblk, path); |
3023 | if (IS_ERR(path)) { | 3057 | if (IS_ERR(path)) { |
3024 | err = PTR_ERR(path); | 3058 | err = PTR_ERR(path); |
3025 | goto out; | 3059 | goto out; |
@@ -3033,7 +3067,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3033 | if (err) | 3067 | if (err) |
3034 | goto out; | 3068 | goto out; |
3035 | 3069 | ||
3036 | allocated = max_blocks; | 3070 | allocated = map->m_len; |
3037 | } | 3071 | } |
3038 | /* | 3072 | /* |
3039 | * If there was a change of depth as part of the | 3073 | * If there was a change of depth as part of the |
@@ -3042,15 +3076,15 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3042 | */ | 3076 | */ |
3043 | if (ex1 && ex1 != ex) { | 3077 | if (ex1 && ex1 != ex) { |
3044 | ex1 = ex; | 3078 | ex1 = ex; |
3045 | ex1->ee_len = cpu_to_le16(iblock - ee_block); | 3079 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); |
3046 | ext4_ext_mark_uninitialized(ex1); | 3080 | ext4_ext_mark_uninitialized(ex1); |
3047 | ex2 = &newex; | 3081 | ex2 = &newex; |
3048 | } | 3082 | } |
3049 | /* | 3083 | /* |
3050 | * ex2: iblock to iblock + maxblocks-1 : to be direct IO written, | 3084 | * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written |
3051 | * uninitialised still. | 3085 | * using direct I/O, uninitialised still. |
3052 | */ | 3086 | */ |
3053 | ex2->ee_block = cpu_to_le32(iblock); | 3087 | ex2->ee_block = cpu_to_le32(map->m_lblk); |
3054 | ext4_ext_store_pblock(ex2, newblock); | 3088 | ext4_ext_store_pblock(ex2, newblock); |
3055 | ex2->ee_len = cpu_to_le16(allocated); | 3089 | ex2->ee_len = cpu_to_le16(allocated); |
3056 | ext4_ext_mark_uninitialized(ex2); | 3090 | ext4_ext_mark_uninitialized(ex2); |
@@ -3062,7 +3096,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3062 | goto out; | 3096 | goto out; |
3063 | insert: | 3097 | insert: |
3064 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 3098 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); |
3065 | if (err == -ENOSPC) { | 3099 | if (err == -ENOSPC && may_zeroout) { |
3066 | err = ext4_ext_zeroout(inode, &orig_ex); | 3100 | err = ext4_ext_zeroout(inode, &orig_ex); |
3067 | if (err) | 3101 | if (err) |
3068 | goto fix_extent_len; | 3102 | goto fix_extent_len; |
@@ -3152,10 +3186,9 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev, | |||
3152 | 3186 | ||
3153 | static int | 3187 | static int |
3154 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3188 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
3155 | ext4_lblk_t iblock, unsigned int max_blocks, | 3189 | struct ext4_map_blocks *map, |
3156 | struct ext4_ext_path *path, int flags, | 3190 | struct ext4_ext_path *path, int flags, |
3157 | unsigned int allocated, struct buffer_head *bh_result, | 3191 | unsigned int allocated, ext4_fsblk_t newblock) |
3158 | ext4_fsblk_t newblock) | ||
3159 | { | 3192 | { |
3160 | int ret = 0; | 3193 | int ret = 0; |
3161 | int err = 0; | 3194 | int err = 0; |
@@ -3163,15 +3196,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3163 | 3196 | ||
3164 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" | 3197 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" |
3165 | "block %llu, max_blocks %u, flags %d, allocated %u", | 3198 | "block %llu, max_blocks %u, flags %d, allocated %u", |
3166 | inode->i_ino, (unsigned long long)iblock, max_blocks, | 3199 | inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, |
3167 | flags, allocated); | 3200 | flags, allocated); |
3168 | ext4_ext_show_leaf(inode, path); | 3201 | ext4_ext_show_leaf(inode, path); |
3169 | 3202 | ||
3170 | /* get_block() before submit the IO, split the extent */ | 3203 | /* get_block() before submit the IO, split the extent */ |
3171 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3204 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3172 | ret = ext4_split_unwritten_extents(handle, | 3205 | ret = ext4_split_unwritten_extents(handle, inode, map, |
3173 | inode, path, iblock, | 3206 | path, flags); |
3174 | max_blocks, flags); | ||
3175 | /* | 3207 | /* |
3176 | * Flag the inode(non aio case) or end_io struct (aio case) | 3208 | * Flag the inode(non aio case) or end_io struct (aio case) |
3177 | * that this IO needs to convertion to written when IO is | 3209 | * that this IO needs to convertion to written when IO is |
@@ -3182,7 +3214,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3182 | else | 3214 | else |
3183 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3215 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3184 | if (ext4_should_dioread_nolock(inode)) | 3216 | if (ext4_should_dioread_nolock(inode)) |
3185 | set_buffer_uninit(bh_result); | 3217 | map->m_flags |= EXT4_MAP_UNINIT; |
3186 | goto out; | 3218 | goto out; |
3187 | } | 3219 | } |
3188 | /* IO end_io complete, convert the filled extent to written */ | 3220 | /* IO end_io complete, convert the filled extent to written */ |
@@ -3210,14 +3242,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3210 | * the buffer head will be unmapped so that | 3242 | * the buffer head will be unmapped so that |
3211 | * a read from the block returns 0s. | 3243 | * a read from the block returns 0s. |
3212 | */ | 3244 | */ |
3213 | set_buffer_unwritten(bh_result); | 3245 | map->m_flags |= EXT4_MAP_UNWRITTEN; |
3214 | goto out1; | 3246 | goto out1; |
3215 | } | 3247 | } |
3216 | 3248 | ||
3217 | /* buffered write, writepage time, convert*/ | 3249 | /* buffered write, writepage time, convert*/ |
3218 | ret = ext4_ext_convert_to_initialized(handle, inode, | 3250 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); |
3219 | path, iblock, | ||
3220 | max_blocks); | ||
3221 | if (ret >= 0) | 3251 | if (ret >= 0) |
3222 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3252 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3223 | out: | 3253 | out: |
@@ -3226,7 +3256,7 @@ out: | |||
3226 | goto out2; | 3256 | goto out2; |
3227 | } else | 3257 | } else |
3228 | allocated = ret; | 3258 | allocated = ret; |
3229 | set_buffer_new(bh_result); | 3259 | map->m_flags |= EXT4_MAP_NEW; |
3230 | /* | 3260 | /* |
3231 | * if we allocated more blocks than requested | 3261 | * if we allocated more blocks than requested |
3232 | * we need to make sure we unmap the extra block | 3262 | * we need to make sure we unmap the extra block |
@@ -3234,11 +3264,11 @@ out: | |||
3234 | * unmapped later when we find the buffer_head marked | 3264 | * unmapped later when we find the buffer_head marked |
3235 | * new. | 3265 | * new. |
3236 | */ | 3266 | */ |
3237 | if (allocated > max_blocks) { | 3267 | if (allocated > map->m_len) { |
3238 | unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, | 3268 | unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, |
3239 | newblock + max_blocks, | 3269 | newblock + map->m_len, |
3240 | allocated - max_blocks); | 3270 | allocated - map->m_len); |
3241 | allocated = max_blocks; | 3271 | allocated = map->m_len; |
3242 | } | 3272 | } |
3243 | 3273 | ||
3244 | /* | 3274 | /* |
@@ -3252,13 +3282,13 @@ out: | |||
3252 | ext4_da_update_reserve_space(inode, allocated, 0); | 3282 | ext4_da_update_reserve_space(inode, allocated, 0); |
3253 | 3283 | ||
3254 | map_out: | 3284 | map_out: |
3255 | set_buffer_mapped(bh_result); | 3285 | map->m_flags |= EXT4_MAP_MAPPED; |
3256 | out1: | 3286 | out1: |
3257 | if (allocated > max_blocks) | 3287 | if (allocated > map->m_len) |
3258 | allocated = max_blocks; | 3288 | allocated = map->m_len; |
3259 | ext4_ext_show_leaf(inode, path); | 3289 | ext4_ext_show_leaf(inode, path); |
3260 | bh_result->b_bdev = inode->i_sb->s_bdev; | 3290 | map->m_pblk = newblock; |
3261 | bh_result->b_blocknr = newblock; | 3291 | map->m_len = allocated; |
3262 | out2: | 3292 | out2: |
3263 | if (path) { | 3293 | if (path) { |
3264 | ext4_ext_drop_refs(path); | 3294 | ext4_ext_drop_refs(path); |
@@ -3284,26 +3314,23 @@ out2: | |||
3284 | * | 3314 | * |
3285 | * return < 0, error case. | 3315 | * return < 0, error case. |
3286 | */ | 3316 | */ |
3287 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 3317 | int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, |
3288 | ext4_lblk_t iblock, | 3318 | struct ext4_map_blocks *map, int flags) |
3289 | unsigned int max_blocks, struct buffer_head *bh_result, | ||
3290 | int flags) | ||
3291 | { | 3319 | { |
3292 | struct ext4_ext_path *path = NULL; | 3320 | struct ext4_ext_path *path = NULL; |
3293 | struct ext4_extent_header *eh; | 3321 | struct ext4_extent_header *eh; |
3294 | struct ext4_extent newex, *ex, *last_ex; | 3322 | struct ext4_extent newex, *ex, *last_ex; |
3295 | ext4_fsblk_t newblock; | 3323 | ext4_fsblk_t newblock; |
3296 | int err = 0, depth, ret, cache_type; | 3324 | int i, err = 0, depth, ret, cache_type; |
3297 | unsigned int allocated = 0; | 3325 | unsigned int allocated = 0; |
3298 | struct ext4_allocation_request ar; | 3326 | struct ext4_allocation_request ar; |
3299 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3327 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
3300 | 3328 | ||
3301 | __clear_bit(BH_New, &bh_result->b_state); | ||
3302 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3329 | ext_debug("blocks %u/%u requested for inode %lu\n", |
3303 | iblock, max_blocks, inode->i_ino); | 3330 | map->m_lblk, map->m_len, inode->i_ino); |
3304 | 3331 | ||
3305 | /* check in cache */ | 3332 | /* check in cache */ |
3306 | cache_type = ext4_ext_in_cache(inode, iblock, &newex); | 3333 | cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex); |
3307 | if (cache_type) { | 3334 | if (cache_type) { |
3308 | if (cache_type == EXT4_EXT_CACHE_GAP) { | 3335 | if (cache_type == EXT4_EXT_CACHE_GAP) { |
3309 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { | 3336 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
@@ -3316,12 +3343,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3316 | /* we should allocate requested block */ | 3343 | /* we should allocate requested block */ |
3317 | } else if (cache_type == EXT4_EXT_CACHE_EXTENT) { | 3344 | } else if (cache_type == EXT4_EXT_CACHE_EXTENT) { |
3318 | /* block is already allocated */ | 3345 | /* block is already allocated */ |
3319 | newblock = iblock | 3346 | newblock = map->m_lblk |
3320 | - le32_to_cpu(newex.ee_block) | 3347 | - le32_to_cpu(newex.ee_block) |
3321 | + ext_pblock(&newex); | 3348 | + ext_pblock(&newex); |
3322 | /* number of remaining blocks in the extent */ | 3349 | /* number of remaining blocks in the extent */ |
3323 | allocated = ext4_ext_get_actual_len(&newex) - | 3350 | allocated = ext4_ext_get_actual_len(&newex) - |
3324 | (iblock - le32_to_cpu(newex.ee_block)); | 3351 | (map->m_lblk - le32_to_cpu(newex.ee_block)); |
3325 | goto out; | 3352 | goto out; |
3326 | } else { | 3353 | } else { |
3327 | BUG(); | 3354 | BUG(); |
@@ -3329,7 +3356,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3329 | } | 3356 | } |
3330 | 3357 | ||
3331 | /* find extent for this block */ | 3358 | /* find extent for this block */ |
3332 | path = ext4_ext_find_extent(inode, iblock, NULL); | 3359 | path = ext4_ext_find_extent(inode, map->m_lblk, NULL); |
3333 | if (IS_ERR(path)) { | 3360 | if (IS_ERR(path)) { |
3334 | err = PTR_ERR(path); | 3361 | err = PTR_ERR(path); |
3335 | path = NULL; | 3362 | path = NULL; |
@@ -3345,8 +3372,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3345 | */ | 3372 | */ |
3346 | if (unlikely(path[depth].p_ext == NULL && depth != 0)) { | 3373 | if (unlikely(path[depth].p_ext == NULL && depth != 0)) { |
3347 | EXT4_ERROR_INODE(inode, "bad extent address " | 3374 | EXT4_ERROR_INODE(inode, "bad extent address " |
3348 | "iblock: %d, depth: %d pblock %lld", | 3375 | "lblock: %lu, depth: %d pblock %lld", |
3349 | iblock, depth, path[depth].p_block); | 3376 | (unsigned long) map->m_lblk, depth, |
3377 | path[depth].p_block); | ||
3350 | err = -EIO; | 3378 | err = -EIO; |
3351 | goto out2; | 3379 | goto out2; |
3352 | } | 3380 | } |
@@ -3364,12 +3392,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3364 | */ | 3392 | */ |
3365 | ee_len = ext4_ext_get_actual_len(ex); | 3393 | ee_len = ext4_ext_get_actual_len(ex); |
3366 | /* if found extent covers block, simply return it */ | 3394 | /* if found extent covers block, simply return it */ |
3367 | if (in_range(iblock, ee_block, ee_len)) { | 3395 | if (in_range(map->m_lblk, ee_block, ee_len)) { |
3368 | newblock = iblock - ee_block + ee_start; | 3396 | newblock = map->m_lblk - ee_block + ee_start; |
3369 | /* number of remaining blocks in the extent */ | 3397 | /* number of remaining blocks in the extent */ |
3370 | allocated = ee_len - (iblock - ee_block); | 3398 | allocated = ee_len - (map->m_lblk - ee_block); |
3371 | ext_debug("%u fit into %u:%d -> %llu\n", iblock, | 3399 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, |
3372 | ee_block, ee_len, newblock); | 3400 | ee_block, ee_len, newblock); |
3373 | 3401 | ||
3374 | /* Do not put uninitialized extent in the cache */ | 3402 | /* Do not put uninitialized extent in the cache */ |
3375 | if (!ext4_ext_is_uninitialized(ex)) { | 3403 | if (!ext4_ext_is_uninitialized(ex)) { |
@@ -3379,8 +3407,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3379 | goto out; | 3407 | goto out; |
3380 | } | 3408 | } |
3381 | ret = ext4_ext_handle_uninitialized_extents(handle, | 3409 | ret = ext4_ext_handle_uninitialized_extents(handle, |
3382 | inode, iblock, max_blocks, path, | 3410 | inode, map, path, flags, allocated, |
3383 | flags, allocated, bh_result, newblock); | 3411 | newblock); |
3384 | return ret; | 3412 | return ret; |
3385 | } | 3413 | } |
3386 | } | 3414 | } |
@@ -3394,7 +3422,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3394 | * put just found gap into cache to speed up | 3422 | * put just found gap into cache to speed up |
3395 | * subsequent requests | 3423 | * subsequent requests |
3396 | */ | 3424 | */ |
3397 | ext4_ext_put_gap_in_cache(inode, path, iblock); | 3425 | ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); |
3398 | goto out2; | 3426 | goto out2; |
3399 | } | 3427 | } |
3400 | /* | 3428 | /* |
@@ -3402,11 +3430,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3402 | */ | 3430 | */ |
3403 | 3431 | ||
3404 | /* find neighbour allocated blocks */ | 3432 | /* find neighbour allocated blocks */ |
3405 | ar.lleft = iblock; | 3433 | ar.lleft = map->m_lblk; |
3406 | err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); | 3434 | err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); |
3407 | if (err) | 3435 | if (err) |
3408 | goto out2; | 3436 | goto out2; |
3409 | ar.lright = iblock; | 3437 | ar.lright = map->m_lblk; |
3410 | err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); | 3438 | err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); |
3411 | if (err) | 3439 | if (err) |
3412 | goto out2; | 3440 | goto out2; |
@@ -3417,26 +3445,26 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3417 | * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is | 3445 | * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is |
3418 | * EXT_UNINIT_MAX_LEN. | 3446 | * EXT_UNINIT_MAX_LEN. |
3419 | */ | 3447 | */ |
3420 | if (max_blocks > EXT_INIT_MAX_LEN && | 3448 | if (map->m_len > EXT_INIT_MAX_LEN && |
3421 | !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) | 3449 | !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) |
3422 | max_blocks = EXT_INIT_MAX_LEN; | 3450 | map->m_len = EXT_INIT_MAX_LEN; |
3423 | else if (max_blocks > EXT_UNINIT_MAX_LEN && | 3451 | else if (map->m_len > EXT_UNINIT_MAX_LEN && |
3424 | (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) | 3452 | (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) |
3425 | max_blocks = EXT_UNINIT_MAX_LEN; | 3453 | map->m_len = EXT_UNINIT_MAX_LEN; |
3426 | 3454 | ||
3427 | /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ | 3455 | /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ |
3428 | newex.ee_block = cpu_to_le32(iblock); | 3456 | newex.ee_block = cpu_to_le32(map->m_lblk); |
3429 | newex.ee_len = cpu_to_le16(max_blocks); | 3457 | newex.ee_len = cpu_to_le16(map->m_len); |
3430 | err = ext4_ext_check_overlap(inode, &newex, path); | 3458 | err = ext4_ext_check_overlap(inode, &newex, path); |
3431 | if (err) | 3459 | if (err) |
3432 | allocated = ext4_ext_get_actual_len(&newex); | 3460 | allocated = ext4_ext_get_actual_len(&newex); |
3433 | else | 3461 | else |
3434 | allocated = max_blocks; | 3462 | allocated = map->m_len; |
3435 | 3463 | ||
3436 | /* allocate new block */ | 3464 | /* allocate new block */ |
3437 | ar.inode = inode; | 3465 | ar.inode = inode; |
3438 | ar.goal = ext4_ext_find_goal(inode, path, iblock); | 3466 | ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); |
3439 | ar.logical = iblock; | 3467 | ar.logical = map->m_lblk; |
3440 | ar.len = allocated; | 3468 | ar.len = allocated; |
3441 | if (S_ISREG(inode->i_mode)) | 3469 | if (S_ISREG(inode->i_mode)) |
3442 | ar.flags = EXT4_MB_HINT_DATA; | 3470 | ar.flags = EXT4_MB_HINT_DATA; |
@@ -3470,21 +3498,33 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3470 | EXT4_STATE_DIO_UNWRITTEN); | 3498 | EXT4_STATE_DIO_UNWRITTEN); |
3471 | } | 3499 | } |
3472 | if (ext4_should_dioread_nolock(inode)) | 3500 | if (ext4_should_dioread_nolock(inode)) |
3473 | set_buffer_uninit(bh_result); | 3501 | map->m_flags |= EXT4_MAP_UNINIT; |
3474 | } | 3502 | } |
3475 | 3503 | ||
3476 | if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { | 3504 | if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { |
3477 | if (unlikely(!eh->eh_entries)) { | 3505 | if (unlikely(!eh->eh_entries)) { |
3478 | EXT4_ERROR_INODE(inode, | 3506 | EXT4_ERROR_INODE(inode, |
3479 | "eh->eh_entries == 0 ee_block %d", | 3507 | "eh->eh_entries == 0 and " |
3480 | ex->ee_block); | 3508 | "EOFBLOCKS_FL set"); |
3481 | err = -EIO; | 3509 | err = -EIO; |
3482 | goto out2; | 3510 | goto out2; |
3483 | } | 3511 | } |
3484 | last_ex = EXT_LAST_EXTENT(eh); | 3512 | last_ex = EXT_LAST_EXTENT(eh); |
3485 | if (iblock + ar.len > le32_to_cpu(last_ex->ee_block) | 3513 | /* |
3486 | + ext4_ext_get_actual_len(last_ex)) | 3514 | * If the current leaf block was reached by looking at |
3487 | EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; | 3515 | * the last index block all the way down the tree, and |
3516 | * we are extending the inode beyond the last extent | ||
3517 | * in the current leaf block, then clear the | ||
3518 | * EOFBLOCKS_FL flag. | ||
3519 | */ | ||
3520 | for (i = depth-1; i >= 0; i--) { | ||
3521 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) | ||
3522 | break; | ||
3523 | } | ||
3524 | if ((i < 0) && | ||
3525 | (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + | ||
3526 | ext4_ext_get_actual_len(last_ex))) | ||
3527 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
3488 | } | 3528 | } |
3489 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 3529 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); |
3490 | if (err) { | 3530 | if (err) { |
@@ -3500,9 +3540,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3500 | /* previous routine could use block we allocated */ | 3540 | /* previous routine could use block we allocated */ |
3501 | newblock = ext_pblock(&newex); | 3541 | newblock = ext_pblock(&newex); |
3502 | allocated = ext4_ext_get_actual_len(&newex); | 3542 | allocated = ext4_ext_get_actual_len(&newex); |
3503 | if (allocated > max_blocks) | 3543 | if (allocated > map->m_len) |
3504 | allocated = max_blocks; | 3544 | allocated = map->m_len; |
3505 | set_buffer_new(bh_result); | 3545 | map->m_flags |= EXT4_MAP_NEW; |
3506 | 3546 | ||
3507 | /* | 3547 | /* |
3508 | * Update reserved blocks/metadata blocks after successful | 3548 | * Update reserved blocks/metadata blocks after successful |
@@ -3516,18 +3556,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3516 | * when it is _not_ an uninitialized extent. | 3556 | * when it is _not_ an uninitialized extent. |
3517 | */ | 3557 | */ |
3518 | if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { | 3558 | if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { |
3519 | ext4_ext_put_in_cache(inode, iblock, allocated, newblock, | 3559 | ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock, |
3520 | EXT4_EXT_CACHE_EXTENT); | 3560 | EXT4_EXT_CACHE_EXTENT); |
3521 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3561 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3522 | } else | 3562 | } else |
3523 | ext4_update_inode_fsync_trans(handle, inode, 0); | 3563 | ext4_update_inode_fsync_trans(handle, inode, 0); |
3524 | out: | 3564 | out: |
3525 | if (allocated > max_blocks) | 3565 | if (allocated > map->m_len) |
3526 | allocated = max_blocks; | 3566 | allocated = map->m_len; |
3527 | ext4_ext_show_leaf(inode, path); | 3567 | ext4_ext_show_leaf(inode, path); |
3528 | set_buffer_mapped(bh_result); | 3568 | map->m_flags |= EXT4_MAP_MAPPED; |
3529 | bh_result->b_bdev = inode->i_sb->s_bdev; | 3569 | map->m_pblk = newblock; |
3530 | bh_result->b_blocknr = newblock; | 3570 | map->m_len = allocated; |
3531 | out2: | 3571 | out2: |
3532 | if (path) { | 3572 | if (path) { |
3533 | ext4_ext_drop_refs(path); | 3573 | ext4_ext_drop_refs(path); |
@@ -3625,7 +3665,7 @@ static void ext4_falloc_update_inode(struct inode *inode, | |||
3625 | * can proceed even if the new size is the same as i_size. | 3665 | * can proceed even if the new size is the same as i_size. |
3626 | */ | 3666 | */ |
3627 | if (new_size > i_size_read(inode)) | 3667 | if (new_size > i_size_read(inode)) |
3628 | EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL; | 3668 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
3629 | } | 3669 | } |
3630 | 3670 | ||
3631 | } | 3671 | } |
@@ -3640,55 +3680,57 @@ static void ext4_falloc_update_inode(struct inode *inode, | |||
3640 | long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | 3680 | long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) |
3641 | { | 3681 | { |
3642 | handle_t *handle; | 3682 | handle_t *handle; |
3643 | ext4_lblk_t block; | ||
3644 | loff_t new_size; | 3683 | loff_t new_size; |
3645 | unsigned int max_blocks; | 3684 | unsigned int max_blocks; |
3646 | int ret = 0; | 3685 | int ret = 0; |
3647 | int ret2 = 0; | 3686 | int ret2 = 0; |
3648 | int retries = 0; | 3687 | int retries = 0; |
3649 | struct buffer_head map_bh; | 3688 | struct ext4_map_blocks map; |
3650 | unsigned int credits, blkbits = inode->i_blkbits; | 3689 | unsigned int credits, blkbits = inode->i_blkbits; |
3651 | 3690 | ||
3652 | /* | 3691 | /* |
3653 | * currently supporting (pre)allocate mode for extent-based | 3692 | * currently supporting (pre)allocate mode for extent-based |
3654 | * files _only_ | 3693 | * files _only_ |
3655 | */ | 3694 | */ |
3656 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 3695 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
3657 | return -EOPNOTSUPP; | 3696 | return -EOPNOTSUPP; |
3658 | 3697 | ||
3659 | /* preallocation to directories is currently not supported */ | 3698 | /* preallocation to directories is currently not supported */ |
3660 | if (S_ISDIR(inode->i_mode)) | 3699 | if (S_ISDIR(inode->i_mode)) |
3661 | return -ENODEV; | 3700 | return -ENODEV; |
3662 | 3701 | ||
3663 | block = offset >> blkbits; | 3702 | map.m_lblk = offset >> blkbits; |
3664 | /* | 3703 | /* |
3665 | * We can't just convert len to max_blocks because | 3704 | * We can't just convert len to max_blocks because |
3666 | * If blocksize = 4096 offset = 3072 and len = 2048 | 3705 | * If blocksize = 4096 offset = 3072 and len = 2048 |
3667 | */ | 3706 | */ |
3668 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) | 3707 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) |
3669 | - block; | 3708 | - map.m_lblk; |
3670 | /* | 3709 | /* |
3671 | * credits to insert 1 extent into extent tree | 3710 | * credits to insert 1 extent into extent tree |
3672 | */ | 3711 | */ |
3673 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | 3712 | credits = ext4_chunk_trans_blocks(inode, max_blocks); |
3674 | mutex_lock(&inode->i_mutex); | 3713 | mutex_lock(&inode->i_mutex); |
3714 | ret = inode_newsize_ok(inode, (len + offset)); | ||
3715 | if (ret) { | ||
3716 | mutex_unlock(&inode->i_mutex); | ||
3717 | return ret; | ||
3718 | } | ||
3675 | retry: | 3719 | retry: |
3676 | while (ret >= 0 && ret < max_blocks) { | 3720 | while (ret >= 0 && ret < max_blocks) { |
3677 | block = block + ret; | 3721 | map.m_lblk = map.m_lblk + ret; |
3678 | max_blocks = max_blocks - ret; | 3722 | map.m_len = max_blocks = max_blocks - ret; |
3679 | handle = ext4_journal_start(inode, credits); | 3723 | handle = ext4_journal_start(inode, credits); |
3680 | if (IS_ERR(handle)) { | 3724 | if (IS_ERR(handle)) { |
3681 | ret = PTR_ERR(handle); | 3725 | ret = PTR_ERR(handle); |
3682 | break; | 3726 | break; |
3683 | } | 3727 | } |
3684 | map_bh.b_state = 0; | 3728 | ret = ext4_map_blocks(handle, inode, &map, |
3685 | ret = ext4_get_blocks(handle, inode, block, | ||
3686 | max_blocks, &map_bh, | ||
3687 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); | 3729 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); |
3688 | if (ret <= 0) { | 3730 | if (ret <= 0) { |
3689 | #ifdef EXT4FS_DEBUG | 3731 | #ifdef EXT4FS_DEBUG |
3690 | WARN_ON(ret <= 0); | 3732 | WARN_ON(ret <= 0); |
3691 | printk(KERN_ERR "%s: ext4_ext_get_blocks " | 3733 | printk(KERN_ERR "%s: ext4_ext_map_blocks " |
3692 | "returned error inode#%lu, block=%u, " | 3734 | "returned error inode#%lu, block=%u, " |
3693 | "max_blocks=%u", __func__, | 3735 | "max_blocks=%u", __func__, |
3694 | inode->i_ino, block, max_blocks); | 3736 | inode->i_ino, block, max_blocks); |
@@ -3697,14 +3739,14 @@ retry: | |||
3697 | ret2 = ext4_journal_stop(handle); | 3739 | ret2 = ext4_journal_stop(handle); |
3698 | break; | 3740 | break; |
3699 | } | 3741 | } |
3700 | if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len, | 3742 | if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len, |
3701 | blkbits) >> blkbits)) | 3743 | blkbits) >> blkbits)) |
3702 | new_size = offset + len; | 3744 | new_size = offset + len; |
3703 | else | 3745 | else |
3704 | new_size = (block + ret) << blkbits; | 3746 | new_size = (map.m_lblk + ret) << blkbits; |
3705 | 3747 | ||
3706 | ext4_falloc_update_inode(inode, mode, new_size, | 3748 | ext4_falloc_update_inode(inode, mode, new_size, |
3707 | buffer_new(&map_bh)); | 3749 | (map.m_flags & EXT4_MAP_NEW)); |
3708 | ext4_mark_inode_dirty(handle, inode); | 3750 | ext4_mark_inode_dirty(handle, inode); |
3709 | ret2 = ext4_journal_stop(handle); | 3751 | ret2 = ext4_journal_stop(handle); |
3710 | if (ret2) | 3752 | if (ret2) |
@@ -3733,42 +3775,39 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
3733 | ssize_t len) | 3775 | ssize_t len) |
3734 | { | 3776 | { |
3735 | handle_t *handle; | 3777 | handle_t *handle; |
3736 | ext4_lblk_t block; | ||
3737 | unsigned int max_blocks; | 3778 | unsigned int max_blocks; |
3738 | int ret = 0; | 3779 | int ret = 0; |
3739 | int ret2 = 0; | 3780 | int ret2 = 0; |
3740 | struct buffer_head map_bh; | 3781 | struct ext4_map_blocks map; |
3741 | unsigned int credits, blkbits = inode->i_blkbits; | 3782 | unsigned int credits, blkbits = inode->i_blkbits; |
3742 | 3783 | ||
3743 | block = offset >> blkbits; | 3784 | map.m_lblk = offset >> blkbits; |
3744 | /* | 3785 | /* |
3745 | * We can't just convert len to max_blocks because | 3786 | * We can't just convert len to max_blocks because |
3746 | * If blocksize = 4096 offset = 3072 and len = 2048 | 3787 | * If blocksize = 4096 offset = 3072 and len = 2048 |
3747 | */ | 3788 | */ |
3748 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) | 3789 | max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - |
3749 | - block; | 3790 | map.m_lblk); |
3750 | /* | 3791 | /* |
3751 | * credits to insert 1 extent into extent tree | 3792 | * credits to insert 1 extent into extent tree |
3752 | */ | 3793 | */ |
3753 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | 3794 | credits = ext4_chunk_trans_blocks(inode, max_blocks); |
3754 | while (ret >= 0 && ret < max_blocks) { | 3795 | while (ret >= 0 && ret < max_blocks) { |
3755 | block = block + ret; | 3796 | map.m_lblk += ret; |
3756 | max_blocks = max_blocks - ret; | 3797 | map.m_len = (max_blocks -= ret); |
3757 | handle = ext4_journal_start(inode, credits); | 3798 | handle = ext4_journal_start(inode, credits); |
3758 | if (IS_ERR(handle)) { | 3799 | if (IS_ERR(handle)) { |
3759 | ret = PTR_ERR(handle); | 3800 | ret = PTR_ERR(handle); |
3760 | break; | 3801 | break; |
3761 | } | 3802 | } |
3762 | map_bh.b_state = 0; | 3803 | ret = ext4_map_blocks(handle, inode, &map, |
3763 | ret = ext4_get_blocks(handle, inode, block, | ||
3764 | max_blocks, &map_bh, | ||
3765 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); | 3804 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); |
3766 | if (ret <= 0) { | 3805 | if (ret <= 0) { |
3767 | WARN_ON(ret <= 0); | 3806 | WARN_ON(ret <= 0); |
3768 | printk(KERN_ERR "%s: ext4_ext_get_blocks " | 3807 | printk(KERN_ERR "%s: ext4_ext_map_blocks " |
3769 | "returned error inode#%lu, block=%u, " | 3808 | "returned error inode#%lu, block=%u, " |
3770 | "max_blocks=%u", __func__, | 3809 | "max_blocks=%u", __func__, |
3771 | inode->i_ino, block, max_blocks); | 3810 | inode->i_ino, map.m_lblk, map.m_len); |
3772 | } | 3811 | } |
3773 | ext4_mark_inode_dirty(handle, inode); | 3812 | ext4_mark_inode_dirty(handle, inode); |
3774 | ret2 = ext4_journal_stop(handle); | 3813 | ret2 = ext4_journal_stop(handle); |
@@ -3898,7 +3937,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
3898 | int error = 0; | 3937 | int error = 0; |
3899 | 3938 | ||
3900 | /* fallback to generic here if not in extents fmt */ | 3939 | /* fallback to generic here if not in extents fmt */ |
3901 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 3940 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
3902 | return generic_block_fiemap(inode, fieinfo, start, len, | 3941 | return generic_block_fiemap(inode, fieinfo, start, len, |
3903 | ext4_get_block); | 3942 | ext4_get_block); |
3904 | 3943 | ||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d0776e410f34..5313ae4cda2d 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -66,7 +66,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
66 | * is smaller than s_maxbytes, which is for extent-mapped files. | 66 | * is smaller than s_maxbytes, which is for extent-mapped files. |
67 | */ | 67 | */ |
68 | 68 | ||
69 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { | 69 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
70 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 70 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
71 | size_t length = iov_length(iov, nr_segs); | 71 | size_t length = iov_length(iov, nr_segs); |
72 | 72 | ||
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index ef3d980e67cb..592adf2e546e 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -35,6 +35,29 @@ | |||
35 | #include <trace/events/ext4.h> | 35 | #include <trace/events/ext4.h> |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * If we're not journaling and this is a just-created file, we have to | ||
39 | * sync our parent directory (if it was freshly created) since | ||
40 | * otherwise it will only be written by writeback, leaving a huge | ||
41 | * window during which a crash may lose the file. This may apply for | ||
42 | * the parent directory's parent as well, and so on recursively, if | ||
43 | * they are also freshly created. | ||
44 | */ | ||
45 | static void ext4_sync_parent(struct inode *inode) | ||
46 | { | ||
47 | struct dentry *dentry = NULL; | ||
48 | |||
49 | while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { | ||
50 | ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); | ||
51 | dentry = list_entry(inode->i_dentry.next, | ||
52 | struct dentry, d_alias); | ||
53 | if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) | ||
54 | break; | ||
55 | inode = dentry->d_parent->d_inode; | ||
56 | sync_mapping_buffers(inode->i_mapping); | ||
57 | } | ||
58 | } | ||
59 | |||
60 | /* | ||
38 | * akpm: A new design for ext4_sync_file(). | 61 | * akpm: A new design for ext4_sync_file(). |
39 | * | 62 | * |
40 | * This is only called from sys_fsync(), sys_fdatasync() and sys_msync(). | 63 | * This is only called from sys_fsync(), sys_fdatasync() and sys_msync(). |
@@ -48,9 +71,9 @@ | |||
48 | * i_mutex lock is held when entering and exiting this function | 71 | * i_mutex lock is held when entering and exiting this function |
49 | */ | 72 | */ |
50 | 73 | ||
51 | int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | 74 | int ext4_sync_file(struct file *file, int datasync) |
52 | { | 75 | { |
53 | struct inode *inode = dentry->d_inode; | 76 | struct inode *inode = file->f_mapping->host; |
54 | struct ext4_inode_info *ei = EXT4_I(inode); | 77 | struct ext4_inode_info *ei = EXT4_I(inode); |
55 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 78 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
56 | int ret; | 79 | int ret; |
@@ -58,7 +81,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
58 | 81 | ||
59 | J_ASSERT(ext4_journal_current_handle() == NULL); | 82 | J_ASSERT(ext4_journal_current_handle() == NULL); |
60 | 83 | ||
61 | trace_ext4_sync_file(file, dentry, datasync); | 84 | trace_ext4_sync_file(file, datasync); |
62 | 85 | ||
63 | if (inode->i_sb->s_flags & MS_RDONLY) | 86 | if (inode->i_sb->s_flags & MS_RDONLY) |
64 | return 0; | 87 | return 0; |
@@ -66,9 +89,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
66 | ret = flush_completed_IO(inode); | 89 | ret = flush_completed_IO(inode); |
67 | if (ret < 0) | 90 | if (ret < 0) |
68 | return ret; | 91 | return ret; |
69 | 92 | ||
70 | if (!journal) | 93 | if (!journal) { |
71 | return simple_fsync(file, dentry, datasync); | 94 | ret = generic_file_fsync(file, datasync); |
95 | if (!ret && !list_empty(&inode->i_dentry)) | ||
96 | ext4_sync_parent(inode); | ||
97 | return ret; | ||
98 | } | ||
72 | 99 | ||
73 | /* | 100 | /* |
74 | * data=writeback,ordered: | 101 | * data=writeback,ordered: |
@@ -102,7 +129,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
102 | (journal->j_flags & JBD2_BARRIER)) | 129 | (journal->j_flags & JBD2_BARRIER)) |
103 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, | 130 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, |
104 | NULL, BLKDEV_IFL_WAIT); | 131 | NULL, BLKDEV_IFL_WAIT); |
105 | jbd2_log_wait_commit(journal, commit_tid); | 132 | ret = jbd2_log_wait_commit(journal, commit_tid); |
106 | } else if (journal->j_flags & JBD2_BARRIER) | 133 | } else if (journal->j_flags & JBD2_BARRIER) |
107 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, | 134 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, |
108 | BLKDEV_IFL_WAIT); | 135 | BLKDEV_IFL_WAIT); |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 1a0e183a2f04..25c4b3173fd9 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -240,56 +240,49 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
240 | if (fatal) | 240 | if (fatal) |
241 | goto error_return; | 241 | goto error_return; |
242 | 242 | ||
243 | /* Ok, now we can actually update the inode bitmaps.. */ | 243 | fatal = -ESRCH; |
244 | cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), | 244 | gdp = ext4_get_group_desc(sb, block_group, &bh2); |
245 | bit, bitmap_bh->b_data); | 245 | if (gdp) { |
246 | if (!cleared) | ||
247 | ext4_error(sb, "bit already cleared for inode %lu", ino); | ||
248 | else { | ||
249 | gdp = ext4_get_group_desc(sb, block_group, &bh2); | ||
250 | |||
251 | BUFFER_TRACE(bh2, "get_write_access"); | 246 | BUFFER_TRACE(bh2, "get_write_access"); |
252 | fatal = ext4_journal_get_write_access(handle, bh2); | 247 | fatal = ext4_journal_get_write_access(handle, bh2); |
253 | if (fatal) goto error_return; | 248 | } |
254 | 249 | ext4_lock_group(sb, block_group); | |
255 | if (gdp) { | 250 | cleared = ext4_clear_bit(bit, bitmap_bh->b_data); |
256 | ext4_lock_group(sb, block_group); | 251 | if (fatal || !cleared) { |
257 | count = ext4_free_inodes_count(sb, gdp) + 1; | 252 | ext4_unlock_group(sb, block_group); |
258 | ext4_free_inodes_set(sb, gdp, count); | 253 | goto out; |
259 | if (is_directory) { | 254 | } |
260 | count = ext4_used_dirs_count(sb, gdp) - 1; | ||
261 | ext4_used_dirs_set(sb, gdp, count); | ||
262 | if (sbi->s_log_groups_per_flex) { | ||
263 | ext4_group_t f; | ||
264 | |||
265 | f = ext4_flex_group(sbi, block_group); | ||
266 | atomic_dec(&sbi->s_flex_groups[f].used_dirs); | ||
267 | } | ||
268 | 255 | ||
269 | } | 256 | count = ext4_free_inodes_count(sb, gdp) + 1; |
270 | gdp->bg_checksum = ext4_group_desc_csum(sbi, | 257 | ext4_free_inodes_set(sb, gdp, count); |
271 | block_group, gdp); | 258 | if (is_directory) { |
272 | ext4_unlock_group(sb, block_group); | 259 | count = ext4_used_dirs_count(sb, gdp) - 1; |
273 | percpu_counter_inc(&sbi->s_freeinodes_counter); | 260 | ext4_used_dirs_set(sb, gdp, count); |
274 | if (is_directory) | 261 | percpu_counter_dec(&sbi->s_dirs_counter); |
275 | percpu_counter_dec(&sbi->s_dirs_counter); | ||
276 | |||
277 | if (sbi->s_log_groups_per_flex) { | ||
278 | ext4_group_t f; | ||
279 | |||
280 | f = ext4_flex_group(sbi, block_group); | ||
281 | atomic_inc(&sbi->s_flex_groups[f].free_inodes); | ||
282 | } | ||
283 | } | ||
284 | BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); | ||
285 | err = ext4_handle_dirty_metadata(handle, NULL, bh2); | ||
286 | if (!fatal) fatal = err; | ||
287 | } | 262 | } |
288 | BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata"); | 263 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); |
289 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 264 | ext4_unlock_group(sb, block_group); |
290 | if (!fatal) | 265 | |
291 | fatal = err; | 266 | percpu_counter_inc(&sbi->s_freeinodes_counter); |
292 | sb->s_dirt = 1; | 267 | if (sbi->s_log_groups_per_flex) { |
268 | ext4_group_t f = ext4_flex_group(sbi, block_group); | ||
269 | |||
270 | atomic_inc(&sbi->s_flex_groups[f].free_inodes); | ||
271 | if (is_directory) | ||
272 | atomic_dec(&sbi->s_flex_groups[f].used_dirs); | ||
273 | } | ||
274 | BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); | ||
275 | fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); | ||
276 | out: | ||
277 | if (cleared) { | ||
278 | BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata"); | ||
279 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | ||
280 | if (!fatal) | ||
281 | fatal = err; | ||
282 | sb->s_dirt = 1; | ||
283 | } else | ||
284 | ext4_error(sb, "bit already cleared for inode %lu", ino); | ||
285 | |||
293 | error_return: | 286 | error_return: |
294 | brelse(bitmap_bh); | 287 | brelse(bitmap_bh); |
295 | ext4_std_error(sb, fatal); | 288 | ext4_std_error(sb, fatal); |
@@ -499,7 +492,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
499 | 492 | ||
500 | if (S_ISDIR(mode) && | 493 | if (S_ISDIR(mode) && |
501 | ((parent == sb->s_root->d_inode) || | 494 | ((parent == sb->s_root->d_inode) || |
502 | (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) { | 495 | (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) { |
503 | int best_ndir = inodes_per_group; | 496 | int best_ndir = inodes_per_group; |
504 | int ret = -1; | 497 | int ret = -1; |
505 | 498 | ||
@@ -1041,7 +1034,7 @@ got: | |||
1041 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { | 1034 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { |
1042 | /* set extent flag only for directory, file and normal symlink*/ | 1035 | /* set extent flag only for directory, file and normal symlink*/ |
1043 | if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { | 1036 | if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { |
1044 | EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; | 1037 | ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); |
1045 | ext4_ext_tree_init(handle, inode); | 1038 | ext4_ext_tree_init(handle, inode); |
1046 | } | 1039 | } |
1047 | } | 1040 | } |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3e0f6af9d08d..19df61c321fd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -149,7 +149,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | |||
149 | int ret; | 149 | int ret; |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this | 152 | * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this |
153 | * moment, get_block can be called only for blocks inside i_size since | 153 | * moment, get_block can be called only for blocks inside i_size since |
154 | * page cache has been already dropped and writes are blocked by | 154 | * page cache has been already dropped and writes are blocked by |
155 | * i_mutex. So we can safely drop the i_data_sem here. | 155 | * i_mutex. So we can safely drop the i_data_sem here. |
@@ -348,9 +348,8 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
348 | if (blk && | 348 | if (blk && |
349 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 349 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
350 | blk, 1))) { | 350 | blk, 1))) { |
351 | __ext4_error(inode->i_sb, function, | 351 | ext4_error_inode(function, inode, |
352 | "invalid block reference %u " | 352 | "invalid block reference %u", blk); |
353 | "in inode #%lu", blk, inode->i_ino); | ||
354 | return -EIO; | 353 | return -EIO; |
355 | } | 354 | } |
356 | } | 355 | } |
@@ -785,7 +784,7 @@ failed: | |||
785 | /* Allocation failed, free what we already allocated */ | 784 | /* Allocation failed, free what we already allocated */ |
786 | ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); | 785 | ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); |
787 | for (i = 1; i <= n ; i++) { | 786 | for (i = 1; i <= n ; i++) { |
788 | /* | 787 | /* |
789 | * branch[i].bh is newly allocated, so there is no | 788 | * branch[i].bh is newly allocated, so there is no |
790 | * need to revoke the block, which is why we don't | 789 | * need to revoke the block, which is why we don't |
791 | * need to set EXT4_FREE_BLOCKS_METADATA. | 790 | * need to set EXT4_FREE_BLOCKS_METADATA. |
@@ -875,7 +874,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
875 | 874 | ||
876 | err_out: | 875 | err_out: |
877 | for (i = 1; i <= num; i++) { | 876 | for (i = 1; i <= num; i++) { |
878 | /* | 877 | /* |
879 | * branch[i].bh is newly allocated, so there is no | 878 | * branch[i].bh is newly allocated, so there is no |
880 | * need to revoke the block, which is why we don't | 879 | * need to revoke the block, which is why we don't |
881 | * need to set EXT4_FREE_BLOCKS_METADATA. | 880 | * need to set EXT4_FREE_BLOCKS_METADATA. |
@@ -890,9 +889,9 @@ err_out: | |||
890 | } | 889 | } |
891 | 890 | ||
892 | /* | 891 | /* |
893 | * The ext4_ind_get_blocks() function handles non-extents inodes | 892 | * The ext4_ind_map_blocks() function handles non-extents inodes |
894 | * (i.e., using the traditional indirect/double-indirect i_blocks | 893 | * (i.e., using the traditional indirect/double-indirect i_blocks |
895 | * scheme) for ext4_get_blocks(). | 894 | * scheme) for ext4_map_blocks(). |
896 | * | 895 | * |
897 | * Allocation strategy is simple: if we have to allocate something, we will | 896 | * Allocation strategy is simple: if we have to allocate something, we will |
898 | * have to go the whole way to leaf. So let's do it before attaching anything | 897 | * have to go the whole way to leaf. So let's do it before attaching anything |
@@ -917,9 +916,8 @@ err_out: | |||
917 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system | 916 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system |
918 | * blocks. | 917 | * blocks. |
919 | */ | 918 | */ |
920 | static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | 919 | static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, |
921 | ext4_lblk_t iblock, unsigned int maxblocks, | 920 | struct ext4_map_blocks *map, |
922 | struct buffer_head *bh_result, | ||
923 | int flags) | 921 | int flags) |
924 | { | 922 | { |
925 | int err = -EIO; | 923 | int err = -EIO; |
@@ -933,9 +931,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
933 | int count = 0; | 931 | int count = 0; |
934 | ext4_fsblk_t first_block = 0; | 932 | ext4_fsblk_t first_block = 0; |
935 | 933 | ||
936 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); | 934 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); |
937 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | 935 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
938 | depth = ext4_block_to_path(inode, iblock, offsets, | 936 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, |
939 | &blocks_to_boundary); | 937 | &blocks_to_boundary); |
940 | 938 | ||
941 | if (depth == 0) | 939 | if (depth == 0) |
@@ -946,10 +944,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
946 | /* Simplest case - block found, no allocation needed */ | 944 | /* Simplest case - block found, no allocation needed */ |
947 | if (!partial) { | 945 | if (!partial) { |
948 | first_block = le32_to_cpu(chain[depth - 1].key); | 946 | first_block = le32_to_cpu(chain[depth - 1].key); |
949 | clear_buffer_new(bh_result); | ||
950 | count++; | 947 | count++; |
951 | /*map more blocks*/ | 948 | /*map more blocks*/ |
952 | while (count < maxblocks && count <= blocks_to_boundary) { | 949 | while (count < map->m_len && count <= blocks_to_boundary) { |
953 | ext4_fsblk_t blk; | 950 | ext4_fsblk_t blk; |
954 | 951 | ||
955 | blk = le32_to_cpu(*(chain[depth-1].p + count)); | 952 | blk = le32_to_cpu(*(chain[depth-1].p + count)); |
@@ -969,7 +966,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
969 | /* | 966 | /* |
970 | * Okay, we need to do block allocation. | 967 | * Okay, we need to do block allocation. |
971 | */ | 968 | */ |
972 | goal = ext4_find_goal(inode, iblock, partial); | 969 | goal = ext4_find_goal(inode, map->m_lblk, partial); |
973 | 970 | ||
974 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 971 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
975 | indirect_blks = (chain + depth) - partial - 1; | 972 | indirect_blks = (chain + depth) - partial - 1; |
@@ -979,11 +976,11 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
979 | * direct blocks to allocate for this branch. | 976 | * direct blocks to allocate for this branch. |
980 | */ | 977 | */ |
981 | count = ext4_blks_to_allocate(partial, indirect_blks, | 978 | count = ext4_blks_to_allocate(partial, indirect_blks, |
982 | maxblocks, blocks_to_boundary); | 979 | map->m_len, blocks_to_boundary); |
983 | /* | 980 | /* |
984 | * Block out ext4_truncate while we alter the tree | 981 | * Block out ext4_truncate while we alter the tree |
985 | */ | 982 | */ |
986 | err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, | 983 | err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, |
987 | &count, goal, | 984 | &count, goal, |
988 | offsets + (partial - chain), partial); | 985 | offsets + (partial - chain), partial); |
989 | 986 | ||
@@ -995,18 +992,20 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
995 | * may need to return -EAGAIN upwards in the worst case. --sct | 992 | * may need to return -EAGAIN upwards in the worst case. --sct |
996 | */ | 993 | */ |
997 | if (!err) | 994 | if (!err) |
998 | err = ext4_splice_branch(handle, inode, iblock, | 995 | err = ext4_splice_branch(handle, inode, map->m_lblk, |
999 | partial, indirect_blks, count); | 996 | partial, indirect_blks, count); |
1000 | if (err) | 997 | if (err) |
1001 | goto cleanup; | 998 | goto cleanup; |
1002 | 999 | ||
1003 | set_buffer_new(bh_result); | 1000 | map->m_flags |= EXT4_MAP_NEW; |
1004 | 1001 | ||
1005 | ext4_update_inode_fsync_trans(handle, inode, 1); | 1002 | ext4_update_inode_fsync_trans(handle, inode, 1); |
1006 | got_it: | 1003 | got_it: |
1007 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); | 1004 | map->m_flags |= EXT4_MAP_MAPPED; |
1005 | map->m_pblk = le32_to_cpu(chain[depth-1].key); | ||
1006 | map->m_len = count; | ||
1008 | if (count > blocks_to_boundary) | 1007 | if (count > blocks_to_boundary) |
1009 | set_buffer_boundary(bh_result); | 1008 | map->m_flags |= EXT4_MAP_BOUNDARY; |
1010 | err = count; | 1009 | err = count; |
1011 | /* Clean up and exit */ | 1010 | /* Clean up and exit */ |
1012 | partial = chain + depth - 1; /* the whole chain */ | 1011 | partial = chain + depth - 1; /* the whole chain */ |
@@ -1016,7 +1015,6 @@ cleanup: | |||
1016 | brelse(partial->bh); | 1015 | brelse(partial->bh); |
1017 | partial--; | 1016 | partial--; |
1018 | } | 1017 | } |
1019 | BUFFER_TRACE(bh_result, "returned"); | ||
1020 | out: | 1018 | out: |
1021 | return err; | 1019 | return err; |
1022 | } | 1020 | } |
@@ -1061,7 +1059,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, | |||
1061 | */ | 1059 | */ |
1062 | static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) | 1060 | static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) |
1063 | { | 1061 | { |
1064 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 1062 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
1065 | return ext4_ext_calc_metadata_amount(inode, lblock); | 1063 | return ext4_ext_calc_metadata_amount(inode, lblock); |
1066 | 1064 | ||
1067 | return ext4_indirect_calc_metadata_amount(inode, lblock); | 1065 | return ext4_indirect_calc_metadata_amount(inode, lblock); |
@@ -1076,7 +1074,6 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1076 | { | 1074 | { |
1077 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1075 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1078 | struct ext4_inode_info *ei = EXT4_I(inode); | 1076 | struct ext4_inode_info *ei = EXT4_I(inode); |
1079 | int mdb_free = 0, allocated_meta_blocks = 0; | ||
1080 | 1077 | ||
1081 | spin_lock(&ei->i_block_reservation_lock); | 1078 | spin_lock(&ei->i_block_reservation_lock); |
1082 | trace_ext4_da_update_reserve_space(inode, used); | 1079 | trace_ext4_da_update_reserve_space(inode, used); |
@@ -1091,11 +1088,10 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1091 | 1088 | ||
1092 | /* Update per-inode reservations */ | 1089 | /* Update per-inode reservations */ |
1093 | ei->i_reserved_data_blocks -= used; | 1090 | ei->i_reserved_data_blocks -= used; |
1094 | used += ei->i_allocated_meta_blocks; | ||
1095 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; | 1091 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; |
1096 | allocated_meta_blocks = ei->i_allocated_meta_blocks; | 1092 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
1093 | used + ei->i_allocated_meta_blocks); | ||
1097 | ei->i_allocated_meta_blocks = 0; | 1094 | ei->i_allocated_meta_blocks = 0; |
1098 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); | ||
1099 | 1095 | ||
1100 | if (ei->i_reserved_data_blocks == 0) { | 1096 | if (ei->i_reserved_data_blocks == 0) { |
1101 | /* | 1097 | /* |
@@ -1103,30 +1099,23 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1103 | * only when we have written all of the delayed | 1099 | * only when we have written all of the delayed |
1104 | * allocation blocks. | 1100 | * allocation blocks. |
1105 | */ | 1101 | */ |
1106 | mdb_free = ei->i_reserved_meta_blocks; | 1102 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
1103 | ei->i_reserved_meta_blocks); | ||
1107 | ei->i_reserved_meta_blocks = 0; | 1104 | ei->i_reserved_meta_blocks = 0; |
1108 | ei->i_da_metadata_calc_len = 0; | 1105 | ei->i_da_metadata_calc_len = 0; |
1109 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); | ||
1110 | } | 1106 | } |
1111 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1107 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1112 | 1108 | ||
1113 | /* Update quota subsystem */ | 1109 | /* Update quota subsystem for data blocks */ |
1114 | if (quota_claim) { | 1110 | if (quota_claim) |
1115 | dquot_claim_block(inode, used); | 1111 | dquot_claim_block(inode, used); |
1116 | if (mdb_free) | 1112 | else { |
1117 | dquot_release_reservation_block(inode, mdb_free); | ||
1118 | } else { | ||
1119 | /* | 1113 | /* |
1120 | * We did fallocate with an offset that is already delayed | 1114 | * We did fallocate with an offset that is already delayed |
1121 | * allocated. So on delayed allocated writeback we should | 1115 | * allocated. So on delayed allocated writeback we should |
1122 | * not update the quota for allocated blocks. But then | 1116 | * not re-claim the quota for fallocated blocks. |
1123 | * converting an fallocate region to initialized region would | ||
1124 | * have caused a metadata allocation. So claim quota for | ||
1125 | * that | ||
1126 | */ | 1117 | */ |
1127 | if (allocated_meta_blocks) | 1118 | dquot_release_reservation_block(inode, used); |
1128 | dquot_claim_block(inode, allocated_meta_blocks); | ||
1129 | dquot_release_reservation_block(inode, mdb_free + used); | ||
1130 | } | 1119 | } |
1131 | 1120 | ||
1132 | /* | 1121 | /* |
@@ -1139,15 +1128,15 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1139 | ext4_discard_preallocations(inode); | 1128 | ext4_discard_preallocations(inode); |
1140 | } | 1129 | } |
1141 | 1130 | ||
1142 | static int check_block_validity(struct inode *inode, const char *msg, | 1131 | static int check_block_validity(struct inode *inode, const char *func, |
1143 | sector_t logical, sector_t phys, int len) | 1132 | struct ext4_map_blocks *map) |
1144 | { | 1133 | { |
1145 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | 1134 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, |
1146 | __ext4_error(inode->i_sb, msg, | 1135 | map->m_len)) { |
1147 | "inode #%lu logical block %llu mapped to %llu " | 1136 | ext4_error_inode(func, inode, |
1148 | "(size %d)", inode->i_ino, | 1137 | "lblock %lu mapped to illegal pblock %llu " |
1149 | (unsigned long long) logical, | 1138 | "(length %d)", (unsigned long) map->m_lblk, |
1150 | (unsigned long long) phys, len); | 1139 | map->m_pblk, map->m_len); |
1151 | return -EIO; | 1140 | return -EIO; |
1152 | } | 1141 | } |
1153 | return 0; | 1142 | return 0; |
@@ -1212,15 +1201,15 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
1212 | } | 1201 | } |
1213 | 1202 | ||
1214 | /* | 1203 | /* |
1215 | * The ext4_get_blocks() function tries to look up the requested blocks, | 1204 | * The ext4_map_blocks() function tries to look up the requested blocks, |
1216 | * and returns if the blocks are already mapped. | 1205 | * and returns if the blocks are already mapped. |
1217 | * | 1206 | * |
1218 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks | 1207 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks |
1219 | * and store the allocated blocks in the result buffer head and mark it | 1208 | * and store the allocated blocks in the result buffer head and mark it |
1220 | * mapped. | 1209 | * mapped. |
1221 | * | 1210 | * |
1222 | * If file type is extents based, it will call ext4_ext_get_blocks(), | 1211 | * If file type is extents based, it will call ext4_ext_map_blocks(), |
1223 | * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping | 1212 | * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping |
1224 | * based files | 1213 | * based files |
1225 | * | 1214 | * |
1226 | * On success, it returns the number of blocks being mapped or allocate. | 1215 | * On success, it returns the number of blocks being mapped or allocate. |
@@ -1233,35 +1222,29 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
1233 | * | 1222 | * |
1234 | * It returns the error in case of allocation failure. | 1223 | * It returns the error in case of allocation failure. |
1235 | */ | 1224 | */ |
1236 | int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | 1225 | int ext4_map_blocks(handle_t *handle, struct inode *inode, |
1237 | unsigned int max_blocks, struct buffer_head *bh, | 1226 | struct ext4_map_blocks *map, int flags) |
1238 | int flags) | ||
1239 | { | 1227 | { |
1240 | int retval; | 1228 | int retval; |
1241 | 1229 | ||
1242 | clear_buffer_mapped(bh); | 1230 | map->m_flags = 0; |
1243 | clear_buffer_unwritten(bh); | 1231 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," |
1244 | 1232 | "logical block %lu\n", inode->i_ino, flags, map->m_len, | |
1245 | ext_debug("ext4_get_blocks(): inode %lu, flag %d, max_blocks %u," | 1233 | (unsigned long) map->m_lblk); |
1246 | "logical block %lu\n", inode->i_ino, flags, max_blocks, | ||
1247 | (unsigned long)block); | ||
1248 | /* | 1234 | /* |
1249 | * Try to see if we can get the block without requesting a new | 1235 | * Try to see if we can get the block without requesting a new |
1250 | * file system block. | 1236 | * file system block. |
1251 | */ | 1237 | */ |
1252 | down_read((&EXT4_I(inode)->i_data_sem)); | 1238 | down_read((&EXT4_I(inode)->i_data_sem)); |
1253 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 1239 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
1254 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | 1240 | retval = ext4_ext_map_blocks(handle, inode, map, 0); |
1255 | bh, 0); | ||
1256 | } else { | 1241 | } else { |
1257 | retval = ext4_ind_get_blocks(handle, inode, block, max_blocks, | 1242 | retval = ext4_ind_map_blocks(handle, inode, map, 0); |
1258 | bh, 0); | ||
1259 | } | 1243 | } |
1260 | up_read((&EXT4_I(inode)->i_data_sem)); | 1244 | up_read((&EXT4_I(inode)->i_data_sem)); |
1261 | 1245 | ||
1262 | if (retval > 0 && buffer_mapped(bh)) { | 1246 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1263 | int ret = check_block_validity(inode, "file system corruption", | 1247 | int ret = check_block_validity(inode, __func__, map); |
1264 | block, bh->b_blocknr, retval); | ||
1265 | if (ret != 0) | 1248 | if (ret != 0) |
1266 | return ret; | 1249 | return ret; |
1267 | } | 1250 | } |
@@ -1277,7 +1260,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1277 | * ext4_ext_get_block() returns th create = 0 | 1260 | * ext4_ext_get_block() returns th create = 0 |
1278 | * with buffer head unmapped. | 1261 | * with buffer head unmapped. |
1279 | */ | 1262 | */ |
1280 | if (retval > 0 && buffer_mapped(bh)) | 1263 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) |
1281 | return retval; | 1264 | return retval; |
1282 | 1265 | ||
1283 | /* | 1266 | /* |
@@ -1290,7 +1273,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1290 | * of BH_Unwritten and BH_Mapped flags being simultaneously | 1273 | * of BH_Unwritten and BH_Mapped flags being simultaneously |
1291 | * set on the buffer_head. | 1274 | * set on the buffer_head. |
1292 | */ | 1275 | */ |
1293 | clear_buffer_unwritten(bh); | 1276 | map->m_flags &= ~EXT4_MAP_UNWRITTEN; |
1294 | 1277 | ||
1295 | /* | 1278 | /* |
1296 | * New blocks allocate and/or writing to uninitialized extent | 1279 | * New blocks allocate and/or writing to uninitialized extent |
@@ -1312,14 +1295,12 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1312 | * We need to check for EXT4 here because migrate | 1295 | * We need to check for EXT4 here because migrate |
1313 | * could have changed the inode type in between | 1296 | * could have changed the inode type in between |
1314 | */ | 1297 | */ |
1315 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 1298 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
1316 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | 1299 | retval = ext4_ext_map_blocks(handle, inode, map, flags); |
1317 | bh, flags); | ||
1318 | } else { | 1300 | } else { |
1319 | retval = ext4_ind_get_blocks(handle, inode, block, | 1301 | retval = ext4_ind_map_blocks(handle, inode, map, flags); |
1320 | max_blocks, bh, flags); | ||
1321 | 1302 | ||
1322 | if (retval > 0 && buffer_new(bh)) { | 1303 | if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { |
1323 | /* | 1304 | /* |
1324 | * We allocated new blocks which will result in | 1305 | * We allocated new blocks which will result in |
1325 | * i_data's format changing. Force the migrate | 1306 | * i_data's format changing. Force the migrate |
@@ -1342,10 +1323,10 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1342 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | 1323 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; |
1343 | 1324 | ||
1344 | up_write((&EXT4_I(inode)->i_data_sem)); | 1325 | up_write((&EXT4_I(inode)->i_data_sem)); |
1345 | if (retval > 0 && buffer_mapped(bh)) { | 1326 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1346 | int ret = check_block_validity(inode, "file system " | 1327 | int ret = check_block_validity(inode, |
1347 | "corruption after allocation", | 1328 | "ext4_map_blocks_after_alloc", |
1348 | block, bh->b_blocknr, retval); | 1329 | map); |
1349 | if (ret != 0) | 1330 | if (ret != 0) |
1350 | return ret; | 1331 | return ret; |
1351 | } | 1332 | } |
@@ -1355,109 +1336,109 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1355 | /* Maximum number of blocks we map for direct IO at once. */ | 1336 | /* Maximum number of blocks we map for direct IO at once. */ |
1356 | #define DIO_MAX_BLOCKS 4096 | 1337 | #define DIO_MAX_BLOCKS 4096 |
1357 | 1338 | ||
1358 | int ext4_get_block(struct inode *inode, sector_t iblock, | 1339 | static int _ext4_get_block(struct inode *inode, sector_t iblock, |
1359 | struct buffer_head *bh_result, int create) | 1340 | struct buffer_head *bh, int flags) |
1360 | { | 1341 | { |
1361 | handle_t *handle = ext4_journal_current_handle(); | 1342 | handle_t *handle = ext4_journal_current_handle(); |
1343 | struct ext4_map_blocks map; | ||
1362 | int ret = 0, started = 0; | 1344 | int ret = 0, started = 0; |
1363 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
1364 | int dio_credits; | 1345 | int dio_credits; |
1365 | 1346 | ||
1366 | if (create && !handle) { | 1347 | map.m_lblk = iblock; |
1348 | map.m_len = bh->b_size >> inode->i_blkbits; | ||
1349 | |||
1350 | if (flags && !handle) { | ||
1367 | /* Direct IO write... */ | 1351 | /* Direct IO write... */ |
1368 | if (max_blocks > DIO_MAX_BLOCKS) | 1352 | if (map.m_len > DIO_MAX_BLOCKS) |
1369 | max_blocks = DIO_MAX_BLOCKS; | 1353 | map.m_len = DIO_MAX_BLOCKS; |
1370 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); | 1354 | dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); |
1371 | handle = ext4_journal_start(inode, dio_credits); | 1355 | handle = ext4_journal_start(inode, dio_credits); |
1372 | if (IS_ERR(handle)) { | 1356 | if (IS_ERR(handle)) { |
1373 | ret = PTR_ERR(handle); | 1357 | ret = PTR_ERR(handle); |
1374 | goto out; | 1358 | return ret; |
1375 | } | 1359 | } |
1376 | started = 1; | 1360 | started = 1; |
1377 | } | 1361 | } |
1378 | 1362 | ||
1379 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, | 1363 | ret = ext4_map_blocks(handle, inode, &map, flags); |
1380 | create ? EXT4_GET_BLOCKS_CREATE : 0); | ||
1381 | if (ret > 0) { | 1364 | if (ret > 0) { |
1382 | bh_result->b_size = (ret << inode->i_blkbits); | 1365 | map_bh(bh, inode->i_sb, map.m_pblk); |
1366 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | ||
1367 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | ||
1383 | ret = 0; | 1368 | ret = 0; |
1384 | } | 1369 | } |
1385 | if (started) | 1370 | if (started) |
1386 | ext4_journal_stop(handle); | 1371 | ext4_journal_stop(handle); |
1387 | out: | ||
1388 | return ret; | 1372 | return ret; |
1389 | } | 1373 | } |
1390 | 1374 | ||
1375 | int ext4_get_block(struct inode *inode, sector_t iblock, | ||
1376 | struct buffer_head *bh, int create) | ||
1377 | { | ||
1378 | return _ext4_get_block(inode, iblock, bh, | ||
1379 | create ? EXT4_GET_BLOCKS_CREATE : 0); | ||
1380 | } | ||
1381 | |||
1391 | /* | 1382 | /* |
1392 | * `handle' can be NULL if create is zero | 1383 | * `handle' can be NULL if create is zero |
1393 | */ | 1384 | */ |
1394 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | 1385 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, |
1395 | ext4_lblk_t block, int create, int *errp) | 1386 | ext4_lblk_t block, int create, int *errp) |
1396 | { | 1387 | { |
1397 | struct buffer_head dummy; | 1388 | struct ext4_map_blocks map; |
1389 | struct buffer_head *bh; | ||
1398 | int fatal = 0, err; | 1390 | int fatal = 0, err; |
1399 | int flags = 0; | ||
1400 | 1391 | ||
1401 | J_ASSERT(handle != NULL || create == 0); | 1392 | J_ASSERT(handle != NULL || create == 0); |
1402 | 1393 | ||
1403 | dummy.b_state = 0; | 1394 | map.m_lblk = block; |
1404 | dummy.b_blocknr = -1000; | 1395 | map.m_len = 1; |
1405 | buffer_trace_init(&dummy.b_history); | 1396 | err = ext4_map_blocks(handle, inode, &map, |
1406 | if (create) | 1397 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
1407 | flags |= EXT4_GET_BLOCKS_CREATE; | 1398 | |
1408 | err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags); | 1399 | if (err < 0) |
1409 | /* | 1400 | *errp = err; |
1410 | * ext4_get_blocks() returns number of blocks mapped. 0 in | 1401 | if (err <= 0) |
1411 | * case of a HOLE. | 1402 | return NULL; |
1412 | */ | 1403 | *errp = 0; |
1413 | if (err > 0) { | 1404 | |
1414 | if (err > 1) | 1405 | bh = sb_getblk(inode->i_sb, map.m_pblk); |
1415 | WARN_ON(1); | 1406 | if (!bh) { |
1416 | err = 0; | 1407 | *errp = -EIO; |
1408 | return NULL; | ||
1417 | } | 1409 | } |
1418 | *errp = err; | 1410 | if (map.m_flags & EXT4_MAP_NEW) { |
1419 | if (!err && buffer_mapped(&dummy)) { | 1411 | J_ASSERT(create != 0); |
1420 | struct buffer_head *bh; | 1412 | J_ASSERT(handle != NULL); |
1421 | bh = sb_getblk(inode->i_sb, dummy.b_blocknr); | ||
1422 | if (!bh) { | ||
1423 | *errp = -EIO; | ||
1424 | goto err; | ||
1425 | } | ||
1426 | if (buffer_new(&dummy)) { | ||
1427 | J_ASSERT(create != 0); | ||
1428 | J_ASSERT(handle != NULL); | ||
1429 | 1413 | ||
1430 | /* | 1414 | /* |
1431 | * Now that we do not always journal data, we should | 1415 | * Now that we do not always journal data, we should |
1432 | * keep in mind whether this should always journal the | 1416 | * keep in mind whether this should always journal the |
1433 | * new buffer as metadata. For now, regular file | 1417 | * new buffer as metadata. For now, regular file |
1434 | * writes use ext4_get_block instead, so it's not a | 1418 | * writes use ext4_get_block instead, so it's not a |
1435 | * problem. | 1419 | * problem. |
1436 | */ | 1420 | */ |
1437 | lock_buffer(bh); | 1421 | lock_buffer(bh); |
1438 | BUFFER_TRACE(bh, "call get_create_access"); | 1422 | BUFFER_TRACE(bh, "call get_create_access"); |
1439 | fatal = ext4_journal_get_create_access(handle, bh); | 1423 | fatal = ext4_journal_get_create_access(handle, bh); |
1440 | if (!fatal && !buffer_uptodate(bh)) { | 1424 | if (!fatal && !buffer_uptodate(bh)) { |
1441 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); | 1425 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); |
1442 | set_buffer_uptodate(bh); | 1426 | set_buffer_uptodate(bh); |
1443 | } | ||
1444 | unlock_buffer(bh); | ||
1445 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
1446 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
1447 | if (!fatal) | ||
1448 | fatal = err; | ||
1449 | } else { | ||
1450 | BUFFER_TRACE(bh, "not a new buffer"); | ||
1451 | } | ||
1452 | if (fatal) { | ||
1453 | *errp = fatal; | ||
1454 | brelse(bh); | ||
1455 | bh = NULL; | ||
1456 | } | 1427 | } |
1457 | return bh; | 1428 | unlock_buffer(bh); |
1429 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
1430 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
1431 | if (!fatal) | ||
1432 | fatal = err; | ||
1433 | } else { | ||
1434 | BUFFER_TRACE(bh, "not a new buffer"); | ||
1458 | } | 1435 | } |
1459 | err: | 1436 | if (fatal) { |
1460 | return NULL; | 1437 | *errp = fatal; |
1438 | brelse(bh); | ||
1439 | bh = NULL; | ||
1440 | } | ||
1441 | return bh; | ||
1461 | } | 1442 | } |
1462 | 1443 | ||
1463 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | 1444 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, |
@@ -1860,7 +1841,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | |||
1860 | int retries = 0; | 1841 | int retries = 0; |
1861 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1842 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1862 | struct ext4_inode_info *ei = EXT4_I(inode); | 1843 | struct ext4_inode_info *ei = EXT4_I(inode); |
1863 | unsigned long md_needed, md_reserved; | 1844 | unsigned long md_needed; |
1864 | int ret; | 1845 | int ret; |
1865 | 1846 | ||
1866 | /* | 1847 | /* |
@@ -1870,22 +1851,24 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | |||
1870 | */ | 1851 | */ |
1871 | repeat: | 1852 | repeat: |
1872 | spin_lock(&ei->i_block_reservation_lock); | 1853 | spin_lock(&ei->i_block_reservation_lock); |
1873 | md_reserved = ei->i_reserved_meta_blocks; | ||
1874 | md_needed = ext4_calc_metadata_amount(inode, lblock); | 1854 | md_needed = ext4_calc_metadata_amount(inode, lblock); |
1875 | trace_ext4_da_reserve_space(inode, md_needed); | 1855 | trace_ext4_da_reserve_space(inode, md_needed); |
1876 | spin_unlock(&ei->i_block_reservation_lock); | 1856 | spin_unlock(&ei->i_block_reservation_lock); |
1877 | 1857 | ||
1878 | /* | 1858 | /* |
1879 | * Make quota reservation here to prevent quota overflow | 1859 | * We will charge metadata quota at writeout time; this saves |
1880 | * later. Real quota accounting is done at pages writeout | 1860 | * us from metadata over-estimation, though we may go over by |
1881 | * time. | 1861 | * a small amount in the end. Here we just reserve for data. |
1882 | */ | 1862 | */ |
1883 | ret = dquot_reserve_block(inode, md_needed + 1); | 1863 | ret = dquot_reserve_block(inode, 1); |
1884 | if (ret) | 1864 | if (ret) |
1885 | return ret; | 1865 | return ret; |
1886 | 1866 | /* | |
1867 | * We do still charge estimated metadata to the sb though; | ||
1868 | * we cannot afford to run out of free blocks. | ||
1869 | */ | ||
1887 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { | 1870 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { |
1888 | dquot_release_reservation_block(inode, md_needed + 1); | 1871 | dquot_release_reservation_block(inode, 1); |
1889 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1872 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1890 | yield(); | 1873 | yield(); |
1891 | goto repeat; | 1874 | goto repeat; |
@@ -1910,6 +1893,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1910 | 1893 | ||
1911 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1894 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1912 | 1895 | ||
1896 | trace_ext4_da_release_space(inode, to_free); | ||
1913 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { | 1897 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { |
1914 | /* | 1898 | /* |
1915 | * if there aren't enough reserved blocks, then the | 1899 | * if there aren't enough reserved blocks, then the |
@@ -1932,12 +1916,13 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1932 | * only when we have written all of the delayed | 1916 | * only when we have written all of the delayed |
1933 | * allocation blocks. | 1917 | * allocation blocks. |
1934 | */ | 1918 | */ |
1935 | to_free += ei->i_reserved_meta_blocks; | 1919 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
1920 | ei->i_reserved_meta_blocks); | ||
1936 | ei->i_reserved_meta_blocks = 0; | 1921 | ei->i_reserved_meta_blocks = 0; |
1937 | ei->i_da_metadata_calc_len = 0; | 1922 | ei->i_da_metadata_calc_len = 0; |
1938 | } | 1923 | } |
1939 | 1924 | ||
1940 | /* update fs dirty blocks counter */ | 1925 | /* update fs dirty data blocks counter */ |
1941 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); | 1926 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); |
1942 | 1927 | ||
1943 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1928 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
@@ -2042,28 +2027,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
2042 | /* | 2027 | /* |
2043 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | 2028 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers |
2044 | * | 2029 | * |
2045 | * @mpd->inode - inode to walk through | ||
2046 | * @exbh->b_blocknr - first block on a disk | ||
2047 | * @exbh->b_size - amount of space in bytes | ||
2048 | * @logical - first logical block to start assignment with | ||
2049 | * | ||
2050 | * the function goes through all passed space and put actual disk | 2030 | * the function goes through all passed space and put actual disk |
2051 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten | 2031 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten |
2052 | */ | 2032 | */ |
2053 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | 2033 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, |
2054 | struct buffer_head *exbh) | 2034 | struct ext4_map_blocks *map) |
2055 | { | 2035 | { |
2056 | struct inode *inode = mpd->inode; | 2036 | struct inode *inode = mpd->inode; |
2057 | struct address_space *mapping = inode->i_mapping; | 2037 | struct address_space *mapping = inode->i_mapping; |
2058 | int blocks = exbh->b_size >> inode->i_blkbits; | 2038 | int blocks = map->m_len; |
2059 | sector_t pblock = exbh->b_blocknr, cur_logical; | 2039 | sector_t pblock = map->m_pblk, cur_logical; |
2060 | struct buffer_head *head, *bh; | 2040 | struct buffer_head *head, *bh; |
2061 | pgoff_t index, end; | 2041 | pgoff_t index, end; |
2062 | struct pagevec pvec; | 2042 | struct pagevec pvec; |
2063 | int nr_pages, i; | 2043 | int nr_pages, i; |
2064 | 2044 | ||
2065 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2045 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2066 | end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2046 | end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2067 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2047 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2068 | 2048 | ||
2069 | pagevec_init(&pvec, 0); | 2049 | pagevec_init(&pvec, 0); |
@@ -2090,17 +2070,16 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
2090 | 2070 | ||
2091 | /* skip blocks out of the range */ | 2071 | /* skip blocks out of the range */ |
2092 | do { | 2072 | do { |
2093 | if (cur_logical >= logical) | 2073 | if (cur_logical >= map->m_lblk) |
2094 | break; | 2074 | break; |
2095 | cur_logical++; | 2075 | cur_logical++; |
2096 | } while ((bh = bh->b_this_page) != head); | 2076 | } while ((bh = bh->b_this_page) != head); |
2097 | 2077 | ||
2098 | do { | 2078 | do { |
2099 | if (cur_logical >= logical + blocks) | 2079 | if (cur_logical >= map->m_lblk + blocks) |
2100 | break; | 2080 | break; |
2101 | 2081 | ||
2102 | if (buffer_delay(bh) || | 2082 | if (buffer_delay(bh) || buffer_unwritten(bh)) { |
2103 | buffer_unwritten(bh)) { | ||
2104 | 2083 | ||
2105 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); | 2084 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); |
2106 | 2085 | ||
@@ -2119,7 +2098,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
2119 | } else if (buffer_mapped(bh)) | 2098 | } else if (buffer_mapped(bh)) |
2120 | BUG_ON(bh->b_blocknr != pblock); | 2099 | BUG_ON(bh->b_blocknr != pblock); |
2121 | 2100 | ||
2122 | if (buffer_uninit(exbh)) | 2101 | if (map->m_flags & EXT4_MAP_UNINIT) |
2123 | set_buffer_uninit(bh); | 2102 | set_buffer_uninit(bh); |
2124 | cur_logical++; | 2103 | cur_logical++; |
2125 | pblock++; | 2104 | pblock++; |
@@ -2130,21 +2109,6 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
2130 | } | 2109 | } |
2131 | 2110 | ||
2132 | 2111 | ||
2133 | /* | ||
2134 | * __unmap_underlying_blocks - just a helper function to unmap | ||
2135 | * set of blocks described by @bh | ||
2136 | */ | ||
2137 | static inline void __unmap_underlying_blocks(struct inode *inode, | ||
2138 | struct buffer_head *bh) | ||
2139 | { | ||
2140 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
2141 | int blocks, i; | ||
2142 | |||
2143 | blocks = bh->b_size >> inode->i_blkbits; | ||
2144 | for (i = 0; i < blocks; i++) | ||
2145 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); | ||
2146 | } | ||
2147 | |||
2148 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | 2112 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, |
2149 | sector_t logical, long blk_cnt) | 2113 | sector_t logical, long blk_cnt) |
2150 | { | 2114 | { |
@@ -2206,7 +2170,7 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
2206 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2170 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) |
2207 | { | 2171 | { |
2208 | int err, blks, get_blocks_flags; | 2172 | int err, blks, get_blocks_flags; |
2209 | struct buffer_head new; | 2173 | struct ext4_map_blocks map; |
2210 | sector_t next = mpd->b_blocknr; | 2174 | sector_t next = mpd->b_blocknr; |
2211 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | 2175 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; |
2212 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | 2176 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; |
@@ -2247,15 +2211,15 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2247 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting | 2211 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting |
2248 | * variables are updated after the blocks have been allocated. | 2212 | * variables are updated after the blocks have been allocated. |
2249 | */ | 2213 | */ |
2250 | new.b_state = 0; | 2214 | map.m_lblk = next; |
2215 | map.m_len = max_blocks; | ||
2251 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; | 2216 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; |
2252 | if (ext4_should_dioread_nolock(mpd->inode)) | 2217 | if (ext4_should_dioread_nolock(mpd->inode)) |
2253 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | 2218 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; |
2254 | if (mpd->b_state & (1 << BH_Delay)) | 2219 | if (mpd->b_state & (1 << BH_Delay)) |
2255 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | 2220 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; |
2256 | 2221 | ||
2257 | blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, | 2222 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); |
2258 | &new, get_blocks_flags); | ||
2259 | if (blks < 0) { | 2223 | if (blks < 0) { |
2260 | err = blks; | 2224 | err = blks; |
2261 | /* | 2225 | /* |
@@ -2282,7 +2246,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2282 | ext4_msg(mpd->inode->i_sb, KERN_CRIT, | 2246 | ext4_msg(mpd->inode->i_sb, KERN_CRIT, |
2283 | "delayed block allocation failed for inode %lu at " | 2247 | "delayed block allocation failed for inode %lu at " |
2284 | "logical offset %llu with max blocks %zd with " | 2248 | "logical offset %llu with max blocks %zd with " |
2285 | "error %d\n", mpd->inode->i_ino, | 2249 | "error %d", mpd->inode->i_ino, |
2286 | (unsigned long long) next, | 2250 | (unsigned long long) next, |
2287 | mpd->b_size >> mpd->inode->i_blkbits, err); | 2251 | mpd->b_size >> mpd->inode->i_blkbits, err); |
2288 | printk(KERN_CRIT "This should not happen!! " | 2252 | printk(KERN_CRIT "This should not happen!! " |
@@ -2297,10 +2261,13 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2297 | } | 2261 | } |
2298 | BUG_ON(blks == 0); | 2262 | BUG_ON(blks == 0); |
2299 | 2263 | ||
2300 | new.b_size = (blks << mpd->inode->i_blkbits); | 2264 | if (map.m_flags & EXT4_MAP_NEW) { |
2265 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | ||
2266 | int i; | ||
2301 | 2267 | ||
2302 | if (buffer_new(&new)) | 2268 | for (i = 0; i < map.m_len; i++) |
2303 | __unmap_underlying_blocks(mpd->inode, &new); | 2269 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
2270 | } | ||
2304 | 2271 | ||
2305 | /* | 2272 | /* |
2306 | * If blocks are delayed marked, we need to | 2273 | * If blocks are delayed marked, we need to |
@@ -2308,7 +2275,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2308 | */ | 2275 | */ |
2309 | if ((mpd->b_state & (1 << BH_Delay)) || | 2276 | if ((mpd->b_state & (1 << BH_Delay)) || |
2310 | (mpd->b_state & (1 << BH_Unwritten))) | 2277 | (mpd->b_state & (1 << BH_Unwritten))) |
2311 | mpage_put_bnr_to_bhs(mpd, next, &new); | 2278 | mpage_put_bnr_to_bhs(mpd, &map); |
2312 | 2279 | ||
2313 | if (ext4_should_order_data(mpd->inode)) { | 2280 | if (ext4_should_order_data(mpd->inode)) { |
2314 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 2281 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
@@ -2349,8 +2316,17 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | |||
2349 | sector_t next; | 2316 | sector_t next; |
2350 | int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; | 2317 | int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; |
2351 | 2318 | ||
2319 | /* | ||
2320 | * XXX Don't go larger than mballoc is willing to allocate | ||
2321 | * This is a stopgap solution. We eventually need to fold | ||
2322 | * mpage_da_submit_io() into this function and then call | ||
2323 | * ext4_get_blocks() multiple times in a loop | ||
2324 | */ | ||
2325 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) | ||
2326 | goto flush_it; | ||
2327 | |||
2352 | /* check if thereserved journal credits might overflow */ | 2328 | /* check if thereserved journal credits might overflow */ |
2353 | if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { | 2329 | if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) { |
2354 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | 2330 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { |
2355 | /* | 2331 | /* |
2356 | * With non-extent format we are limited by the journal | 2332 | * With non-extent format we are limited by the journal |
@@ -2423,17 +2399,6 @@ static int __mpage_da_writepage(struct page *page, | |||
2423 | struct buffer_head *bh, *head; | 2399 | struct buffer_head *bh, *head; |
2424 | sector_t logical; | 2400 | sector_t logical; |
2425 | 2401 | ||
2426 | if (mpd->io_done) { | ||
2427 | /* | ||
2428 | * Rest of the page in the page_vec | ||
2429 | * redirty then and skip then. We will | ||
2430 | * try to write them again after | ||
2431 | * starting a new transaction | ||
2432 | */ | ||
2433 | redirty_page_for_writepage(wbc, page); | ||
2434 | unlock_page(page); | ||
2435 | return MPAGE_DA_EXTENT_TAIL; | ||
2436 | } | ||
2437 | /* | 2402 | /* |
2438 | * Can we merge this page to current extent? | 2403 | * Can we merge this page to current extent? |
2439 | */ | 2404 | */ |
@@ -2528,8 +2493,9 @@ static int __mpage_da_writepage(struct page *page, | |||
2528 | * initialized properly. | 2493 | * initialized properly. |
2529 | */ | 2494 | */ |
2530 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | 2495 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, |
2531 | struct buffer_head *bh_result, int create) | 2496 | struct buffer_head *bh, int create) |
2532 | { | 2497 | { |
2498 | struct ext4_map_blocks map; | ||
2533 | int ret = 0; | 2499 | int ret = 0; |
2534 | sector_t invalid_block = ~((sector_t) 0xffff); | 2500 | sector_t invalid_block = ~((sector_t) 0xffff); |
2535 | 2501 | ||
@@ -2537,16 +2503,22 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2537 | invalid_block = ~0; | 2503 | invalid_block = ~0; |
2538 | 2504 | ||
2539 | BUG_ON(create == 0); | 2505 | BUG_ON(create == 0); |
2540 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | 2506 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); |
2507 | |||
2508 | map.m_lblk = iblock; | ||
2509 | map.m_len = 1; | ||
2541 | 2510 | ||
2542 | /* | 2511 | /* |
2543 | * first, we need to know whether the block is allocated already | 2512 | * first, we need to know whether the block is allocated already |
2544 | * preallocated blocks are unmapped but should treated | 2513 | * preallocated blocks are unmapped but should treated |
2545 | * the same as allocated blocks. | 2514 | * the same as allocated blocks. |
2546 | */ | 2515 | */ |
2547 | ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0); | 2516 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
2548 | if ((ret == 0) && !buffer_delay(bh_result)) { | 2517 | if (ret < 0) |
2549 | /* the block isn't (pre)allocated yet, let's reserve space */ | 2518 | return ret; |
2519 | if (ret == 0) { | ||
2520 | if (buffer_delay(bh)) | ||
2521 | return 0; /* Not sure this could or should happen */ | ||
2550 | /* | 2522 | /* |
2551 | * XXX: __block_prepare_write() unmaps passed block, | 2523 | * XXX: __block_prepare_write() unmaps passed block, |
2552 | * is it OK? | 2524 | * is it OK? |
@@ -2556,26 +2528,26 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2556 | /* not enough space to reserve */ | 2528 | /* not enough space to reserve */ |
2557 | return ret; | 2529 | return ret; |
2558 | 2530 | ||
2559 | map_bh(bh_result, inode->i_sb, invalid_block); | 2531 | map_bh(bh, inode->i_sb, invalid_block); |
2560 | set_buffer_new(bh_result); | 2532 | set_buffer_new(bh); |
2561 | set_buffer_delay(bh_result); | 2533 | set_buffer_delay(bh); |
2562 | } else if (ret > 0) { | 2534 | return 0; |
2563 | bh_result->b_size = (ret << inode->i_blkbits); | ||
2564 | if (buffer_unwritten(bh_result)) { | ||
2565 | /* A delayed write to unwritten bh should | ||
2566 | * be marked new and mapped. Mapped ensures | ||
2567 | * that we don't do get_block multiple times | ||
2568 | * when we write to the same offset and new | ||
2569 | * ensures that we do proper zero out for | ||
2570 | * partial write. | ||
2571 | */ | ||
2572 | set_buffer_new(bh_result); | ||
2573 | set_buffer_mapped(bh_result); | ||
2574 | } | ||
2575 | ret = 0; | ||
2576 | } | 2535 | } |
2577 | 2536 | ||
2578 | return ret; | 2537 | map_bh(bh, inode->i_sb, map.m_pblk); |
2538 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | ||
2539 | |||
2540 | if (buffer_unwritten(bh)) { | ||
2541 | /* A delayed write to unwritten bh should be marked | ||
2542 | * new and mapped. Mapped ensures that we don't do | ||
2543 | * get_block multiple times when we write to the same | ||
2544 | * offset and new ensures that we do proper zero out | ||
2545 | * for partial write. | ||
2546 | */ | ||
2547 | set_buffer_new(bh); | ||
2548 | set_buffer_mapped(bh); | ||
2549 | } | ||
2550 | return 0; | ||
2579 | } | 2551 | } |
2580 | 2552 | ||
2581 | /* | 2553 | /* |
@@ -2597,21 +2569,8 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2597 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | 2569 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, |
2598 | struct buffer_head *bh_result, int create) | 2570 | struct buffer_head *bh_result, int create) |
2599 | { | 2571 | { |
2600 | int ret = 0; | ||
2601 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
2602 | |||
2603 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | 2572 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); |
2604 | 2573 | return _ext4_get_block(inode, iblock, bh_result, 0); | |
2605 | /* | ||
2606 | * we don't want to do block allocation in writepage | ||
2607 | * so call get_block_wrap with create = 0 | ||
2608 | */ | ||
2609 | ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); | ||
2610 | if (ret > 0) { | ||
2611 | bh_result->b_size = (ret << inode->i_blkbits); | ||
2612 | ret = 0; | ||
2613 | } | ||
2614 | return ret; | ||
2615 | } | 2574 | } |
2616 | 2575 | ||
2617 | static int bget_one(handle_t *handle, struct buffer_head *bh) | 2576 | static int bget_one(handle_t *handle, struct buffer_head *bh) |
@@ -2821,13 +2780,131 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2821 | * number of contiguous block. So we will limit | 2780 | * number of contiguous block. So we will limit |
2822 | * number of contiguous block to a sane value | 2781 | * number of contiguous block to a sane value |
2823 | */ | 2782 | */ |
2824 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) && | 2783 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && |
2825 | (max_blocks > EXT4_MAX_TRANS_DATA)) | 2784 | (max_blocks > EXT4_MAX_TRANS_DATA)) |
2826 | max_blocks = EXT4_MAX_TRANS_DATA; | 2785 | max_blocks = EXT4_MAX_TRANS_DATA; |
2827 | 2786 | ||
2828 | return ext4_chunk_trans_blocks(inode, max_blocks); | 2787 | return ext4_chunk_trans_blocks(inode, max_blocks); |
2829 | } | 2788 | } |
2830 | 2789 | ||
2790 | /* | ||
2791 | * write_cache_pages_da - walk the list of dirty pages of the given | ||
2792 | * address space and call the callback function (which usually writes | ||
2793 | * the pages). | ||
2794 | * | ||
2795 | * This is a forked version of write_cache_pages(). Differences: | ||
2796 | * Range cyclic is ignored. | ||
2797 | * no_nrwrite_index_update is always presumed true | ||
2798 | */ | ||
2799 | static int write_cache_pages_da(struct address_space *mapping, | ||
2800 | struct writeback_control *wbc, | ||
2801 | struct mpage_da_data *mpd) | ||
2802 | { | ||
2803 | int ret = 0; | ||
2804 | int done = 0; | ||
2805 | struct pagevec pvec; | ||
2806 | int nr_pages; | ||
2807 | pgoff_t index; | ||
2808 | pgoff_t end; /* Inclusive */ | ||
2809 | long nr_to_write = wbc->nr_to_write; | ||
2810 | |||
2811 | pagevec_init(&pvec, 0); | ||
2812 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
2813 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2814 | |||
2815 | while (!done && (index <= end)) { | ||
2816 | int i; | ||
2817 | |||
2818 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
2819 | PAGECACHE_TAG_DIRTY, | ||
2820 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
2821 | if (nr_pages == 0) | ||
2822 | break; | ||
2823 | |||
2824 | for (i = 0; i < nr_pages; i++) { | ||
2825 | struct page *page = pvec.pages[i]; | ||
2826 | |||
2827 | /* | ||
2828 | * At this point, the page may be truncated or | ||
2829 | * invalidated (changing page->mapping to NULL), or | ||
2830 | * even swizzled back from swapper_space to tmpfs file | ||
2831 | * mapping. However, page->index will not change | ||
2832 | * because we have a reference on the page. | ||
2833 | */ | ||
2834 | if (page->index > end) { | ||
2835 | done = 1; | ||
2836 | break; | ||
2837 | } | ||
2838 | |||
2839 | lock_page(page); | ||
2840 | |||
2841 | /* | ||
2842 | * Page truncated or invalidated. We can freely skip it | ||
2843 | * then, even for data integrity operations: the page | ||
2844 | * has disappeared concurrently, so there could be no | ||
2845 | * real expectation of this data interity operation | ||
2846 | * even if there is now a new, dirty page at the same | ||
2847 | * pagecache address. | ||
2848 | */ | ||
2849 | if (unlikely(page->mapping != mapping)) { | ||
2850 | continue_unlock: | ||
2851 | unlock_page(page); | ||
2852 | continue; | ||
2853 | } | ||
2854 | |||
2855 | if (!PageDirty(page)) { | ||
2856 | /* someone wrote it for us */ | ||
2857 | goto continue_unlock; | ||
2858 | } | ||
2859 | |||
2860 | if (PageWriteback(page)) { | ||
2861 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
2862 | wait_on_page_writeback(page); | ||
2863 | else | ||
2864 | goto continue_unlock; | ||
2865 | } | ||
2866 | |||
2867 | BUG_ON(PageWriteback(page)); | ||
2868 | if (!clear_page_dirty_for_io(page)) | ||
2869 | goto continue_unlock; | ||
2870 | |||
2871 | ret = __mpage_da_writepage(page, wbc, mpd); | ||
2872 | if (unlikely(ret)) { | ||
2873 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | ||
2874 | unlock_page(page); | ||
2875 | ret = 0; | ||
2876 | } else { | ||
2877 | done = 1; | ||
2878 | break; | ||
2879 | } | ||
2880 | } | ||
2881 | |||
2882 | if (nr_to_write > 0) { | ||
2883 | nr_to_write--; | ||
2884 | if (nr_to_write == 0 && | ||
2885 | wbc->sync_mode == WB_SYNC_NONE) { | ||
2886 | /* | ||
2887 | * We stop writing back only if we are | ||
2888 | * not doing integrity sync. In case of | ||
2889 | * integrity sync we have to keep going | ||
2890 | * because someone may be concurrently | ||
2891 | * dirtying pages, and we might have | ||
2892 | * synced a lot of newly appeared dirty | ||
2893 | * pages, but have not synced all of the | ||
2894 | * old dirty pages. | ||
2895 | */ | ||
2896 | done = 1; | ||
2897 | break; | ||
2898 | } | ||
2899 | } | ||
2900 | } | ||
2901 | pagevec_release(&pvec); | ||
2902 | cond_resched(); | ||
2903 | } | ||
2904 | return ret; | ||
2905 | } | ||
2906 | |||
2907 | |||
2831 | static int ext4_da_writepages(struct address_space *mapping, | 2908 | static int ext4_da_writepages(struct address_space *mapping, |
2832 | struct writeback_control *wbc) | 2909 | struct writeback_control *wbc) |
2833 | { | 2910 | { |
@@ -2836,7 +2913,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2836 | handle_t *handle = NULL; | 2913 | handle_t *handle = NULL; |
2837 | struct mpage_da_data mpd; | 2914 | struct mpage_da_data mpd; |
2838 | struct inode *inode = mapping->host; | 2915 | struct inode *inode = mapping->host; |
2839 | int no_nrwrite_index_update; | ||
2840 | int pages_written = 0; | 2916 | int pages_written = 0; |
2841 | long pages_skipped; | 2917 | long pages_skipped; |
2842 | unsigned int max_pages; | 2918 | unsigned int max_pages; |
@@ -2916,12 +2992,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2916 | mpd.wbc = wbc; | 2992 | mpd.wbc = wbc; |
2917 | mpd.inode = mapping->host; | 2993 | mpd.inode = mapping->host; |
2918 | 2994 | ||
2919 | /* | ||
2920 | * we don't want write_cache_pages to update | ||
2921 | * nr_to_write and writeback_index | ||
2922 | */ | ||
2923 | no_nrwrite_index_update = wbc->no_nrwrite_index_update; | ||
2924 | wbc->no_nrwrite_index_update = 1; | ||
2925 | pages_skipped = wbc->pages_skipped; | 2995 | pages_skipped = wbc->pages_skipped; |
2926 | 2996 | ||
2927 | retry: | 2997 | retry: |
@@ -2941,7 +3011,7 @@ retry: | |||
2941 | if (IS_ERR(handle)) { | 3011 | if (IS_ERR(handle)) { |
2942 | ret = PTR_ERR(handle); | 3012 | ret = PTR_ERR(handle); |
2943 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " | 3013 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " |
2944 | "%ld pages, ino %lu; err %d\n", __func__, | 3014 | "%ld pages, ino %lu; err %d", __func__, |
2945 | wbc->nr_to_write, inode->i_ino, ret); | 3015 | wbc->nr_to_write, inode->i_ino, ret); |
2946 | goto out_writepages; | 3016 | goto out_writepages; |
2947 | } | 3017 | } |
@@ -2963,8 +3033,7 @@ retry: | |||
2963 | mpd.io_done = 0; | 3033 | mpd.io_done = 0; |
2964 | mpd.pages_written = 0; | 3034 | mpd.pages_written = 0; |
2965 | mpd.retval = 0; | 3035 | mpd.retval = 0; |
2966 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, | 3036 | ret = write_cache_pages_da(mapping, wbc, &mpd); |
2967 | &mpd); | ||
2968 | /* | 3037 | /* |
2969 | * If we have a contiguous extent of pages and we | 3038 | * If we have a contiguous extent of pages and we |
2970 | * haven't done the I/O yet, map the blocks and submit | 3039 | * haven't done the I/O yet, map the blocks and submit |
@@ -3016,7 +3085,7 @@ retry: | |||
3016 | if (pages_skipped != wbc->pages_skipped) | 3085 | if (pages_skipped != wbc->pages_skipped) |
3017 | ext4_msg(inode->i_sb, KERN_CRIT, | 3086 | ext4_msg(inode->i_sb, KERN_CRIT, |
3018 | "This should not happen leaving %s " | 3087 | "This should not happen leaving %s " |
3019 | "with nr_to_write = %ld ret = %d\n", | 3088 | "with nr_to_write = %ld ret = %d", |
3020 | __func__, wbc->nr_to_write, ret); | 3089 | __func__, wbc->nr_to_write, ret); |
3021 | 3090 | ||
3022 | /* Update index */ | 3091 | /* Update index */ |
@@ -3030,8 +3099,6 @@ retry: | |||
3030 | mapping->writeback_index = index; | 3099 | mapping->writeback_index = index; |
3031 | 3100 | ||
3032 | out_writepages: | 3101 | out_writepages: |
3033 | if (!no_nrwrite_index_update) | ||
3034 | wbc->no_nrwrite_index_update = 0; | ||
3035 | wbc->nr_to_write -= nr_to_writebump; | 3102 | wbc->nr_to_write -= nr_to_writebump; |
3036 | wbc->range_start = range_start; | 3103 | wbc->range_start = range_start; |
3037 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 3104 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
@@ -3076,7 +3143,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
3076 | loff_t pos, unsigned len, unsigned flags, | 3143 | loff_t pos, unsigned len, unsigned flags, |
3077 | struct page **pagep, void **fsdata) | 3144 | struct page **pagep, void **fsdata) |
3078 | { | 3145 | { |
3079 | int ret, retries = 0, quota_retries = 0; | 3146 | int ret, retries = 0; |
3080 | struct page *page; | 3147 | struct page *page; |
3081 | pgoff_t index; | 3148 | pgoff_t index; |
3082 | unsigned from, to; | 3149 | unsigned from, to; |
@@ -3135,22 +3202,6 @@ retry: | |||
3135 | 3202 | ||
3136 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3203 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
3137 | goto retry; | 3204 | goto retry; |
3138 | |||
3139 | if ((ret == -EDQUOT) && | ||
3140 | EXT4_I(inode)->i_reserved_meta_blocks && | ||
3141 | (quota_retries++ < 3)) { | ||
3142 | /* | ||
3143 | * Since we often over-estimate the number of meta | ||
3144 | * data blocks required, we may sometimes get a | ||
3145 | * spurios out of quota error even though there would | ||
3146 | * be enough space once we write the data blocks and | ||
3147 | * find out how many meta data blocks were _really_ | ||
3148 | * required. So try forcing the inode write to see if | ||
3149 | * that helps. | ||
3150 | */ | ||
3151 | write_inode_now(inode, (quota_retries == 3)); | ||
3152 | goto retry; | ||
3153 | } | ||
3154 | out: | 3205 | out: |
3155 | return ret; | 3206 | return ret; |
3156 | } | 3207 | } |
@@ -3546,46 +3597,18 @@ out: | |||
3546 | return ret; | 3597 | return ret; |
3547 | } | 3598 | } |
3548 | 3599 | ||
3600 | /* | ||
3601 | * ext4_get_block used when preparing for a DIO write or buffer write. | ||
3602 | * We allocate an uinitialized extent if blocks haven't been allocated. | ||
3603 | * The extent will be converted to initialized after the IO is complete. | ||
3604 | */ | ||
3549 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | 3605 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, |
3550 | struct buffer_head *bh_result, int create) | 3606 | struct buffer_head *bh_result, int create) |
3551 | { | 3607 | { |
3552 | handle_t *handle = ext4_journal_current_handle(); | ||
3553 | int ret = 0; | ||
3554 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
3555 | int dio_credits; | ||
3556 | int started = 0; | ||
3557 | |||
3558 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", | 3608 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", |
3559 | inode->i_ino, create); | 3609 | inode->i_ino, create); |
3560 | /* | 3610 | return _ext4_get_block(inode, iblock, bh_result, |
3561 | * ext4_get_block in prepare for a DIO write or buffer write. | 3611 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
3562 | * We allocate an uinitialized extent if blocks haven't been allocated. | ||
3563 | * The extent will be converted to initialized after IO complete. | ||
3564 | */ | ||
3565 | create = EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
3566 | |||
3567 | if (!handle) { | ||
3568 | if (max_blocks > DIO_MAX_BLOCKS) | ||
3569 | max_blocks = DIO_MAX_BLOCKS; | ||
3570 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); | ||
3571 | handle = ext4_journal_start(inode, dio_credits); | ||
3572 | if (IS_ERR(handle)) { | ||
3573 | ret = PTR_ERR(handle); | ||
3574 | goto out; | ||
3575 | } | ||
3576 | started = 1; | ||
3577 | } | ||
3578 | |||
3579 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, | ||
3580 | create); | ||
3581 | if (ret > 0) { | ||
3582 | bh_result->b_size = (ret << inode->i_blkbits); | ||
3583 | ret = 0; | ||
3584 | } | ||
3585 | if (started) | ||
3586 | ext4_journal_stop(handle); | ||
3587 | out: | ||
3588 | return ret; | ||
3589 | } | 3612 | } |
3590 | 3613 | ||
3591 | static void dump_completed_IO(struct inode * inode) | 3614 | static void dump_completed_IO(struct inode * inode) |
@@ -3973,7 +3996,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
3973 | struct file *file = iocb->ki_filp; | 3996 | struct file *file = iocb->ki_filp; |
3974 | struct inode *inode = file->f_mapping->host; | 3997 | struct inode *inode = file->f_mapping->host; |
3975 | 3998 | ||
3976 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 3999 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3977 | return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 4000 | return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
3978 | 4001 | ||
3979 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 4002 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); |
@@ -4302,10 +4325,9 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4302 | 4325 | ||
4303 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | 4326 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, |
4304 | count)) { | 4327 | count)) { |
4305 | ext4_error(inode->i_sb, "inode #%lu: " | 4328 | EXT4_ERROR_INODE(inode, "attempt to clear invalid " |
4306 | "attempt to clear blocks %llu len %lu, invalid", | 4329 | "blocks %llu len %lu", |
4307 | inode->i_ino, (unsigned long long) block_to_free, | 4330 | (unsigned long long) block_to_free, count); |
4308 | count); | ||
4309 | return 1; | 4331 | return 1; |
4310 | } | 4332 | } |
4311 | 4333 | ||
@@ -4410,11 +4432,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4410 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | 4432 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) |
4411 | ext4_handle_dirty_metadata(handle, inode, this_bh); | 4433 | ext4_handle_dirty_metadata(handle, inode, this_bh); |
4412 | else | 4434 | else |
4413 | ext4_error(inode->i_sb, | 4435 | EXT4_ERROR_INODE(inode, |
4414 | "circular indirect block detected, " | 4436 | "circular indirect block detected at " |
4415 | "inode=%lu, block=%llu", | 4437 | "block %llu", |
4416 | inode->i_ino, | 4438 | (unsigned long long) this_bh->b_blocknr); |
4417 | (unsigned long long) this_bh->b_blocknr); | ||
4418 | } | 4439 | } |
4419 | } | 4440 | } |
4420 | 4441 | ||
@@ -4452,11 +4473,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4452 | 4473 | ||
4453 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 4474 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
4454 | nr, 1)) { | 4475 | nr, 1)) { |
4455 | ext4_error(inode->i_sb, | 4476 | EXT4_ERROR_INODE(inode, |
4456 | "indirect mapped block in inode " | 4477 | "invalid indirect mapped " |
4457 | "#%lu invalid (level %d, blk #%lu)", | 4478 | "block %lu (level %d)", |
4458 | inode->i_ino, depth, | 4479 | (unsigned long) nr, depth); |
4459 | (unsigned long) nr); | ||
4460 | break; | 4480 | break; |
4461 | } | 4481 | } |
4462 | 4482 | ||
@@ -4468,9 +4488,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4468 | * (should be rare). | 4488 | * (should be rare). |
4469 | */ | 4489 | */ |
4470 | if (!bh) { | 4490 | if (!bh) { |
4471 | ext4_error(inode->i_sb, | 4491 | EXT4_ERROR_INODE(inode, |
4472 | "Read failure, inode=%lu, block=%llu", | 4492 | "Read failure block=%llu", |
4473 | inode->i_ino, nr); | 4493 | (unsigned long long) nr); |
4474 | continue; | 4494 | continue; |
4475 | } | 4495 | } |
4476 | 4496 | ||
@@ -4612,12 +4632,12 @@ void ext4_truncate(struct inode *inode) | |||
4612 | if (!ext4_can_truncate(inode)) | 4632 | if (!ext4_can_truncate(inode)) |
4613 | return; | 4633 | return; |
4614 | 4634 | ||
4615 | EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; | 4635 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
4616 | 4636 | ||
4617 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 4637 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
4618 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); | 4638 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); |
4619 | 4639 | ||
4620 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 4640 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
4621 | ext4_ext_truncate(inode); | 4641 | ext4_ext_truncate(inode); |
4622 | return; | 4642 | return; |
4623 | } | 4643 | } |
@@ -4785,8 +4805,8 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
4785 | 4805 | ||
4786 | bh = sb_getblk(sb, block); | 4806 | bh = sb_getblk(sb, block); |
4787 | if (!bh) { | 4807 | if (!bh) { |
4788 | ext4_error(sb, "unable to read inode block - " | 4808 | EXT4_ERROR_INODE(inode, "unable to read inode block - " |
4789 | "inode=%lu, block=%llu", inode->i_ino, block); | 4809 | "block %llu", block); |
4790 | return -EIO; | 4810 | return -EIO; |
4791 | } | 4811 | } |
4792 | if (!buffer_uptodate(bh)) { | 4812 | if (!buffer_uptodate(bh)) { |
@@ -4884,8 +4904,8 @@ make_io: | |||
4884 | submit_bh(READ_META, bh); | 4904 | submit_bh(READ_META, bh); |
4885 | wait_on_buffer(bh); | 4905 | wait_on_buffer(bh); |
4886 | if (!buffer_uptodate(bh)) { | 4906 | if (!buffer_uptodate(bh)) { |
4887 | ext4_error(sb, "unable to read inode block - inode=%lu," | 4907 | EXT4_ERROR_INODE(inode, "unable to read inode " |
4888 | " block=%llu", inode->i_ino, block); | 4908 | "block %llu", block); |
4889 | brelse(bh); | 4909 | brelse(bh); |
4890 | return -EIO; | 4910 | return -EIO; |
4891 | } | 4911 | } |
@@ -5096,8 +5116,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5096 | ret = 0; | 5116 | ret = 0; |
5097 | if (ei->i_file_acl && | 5117 | if (ei->i_file_acl && |
5098 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { | 5118 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { |
5099 | ext4_error(sb, "bad extended attribute block %llu inode #%lu", | 5119 | EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", |
5100 | ei->i_file_acl, inode->i_ino); | 5120 | ei->i_file_acl); |
5101 | ret = -EIO; | 5121 | ret = -EIO; |
5102 | goto bad_inode; | 5122 | goto bad_inode; |
5103 | } else if (ei->i_flags & EXT4_EXTENTS_FL) { | 5123 | } else if (ei->i_flags & EXT4_EXTENTS_FL) { |
@@ -5142,8 +5162,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5142 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 5162 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
5143 | } else { | 5163 | } else { |
5144 | ret = -EIO; | 5164 | ret = -EIO; |
5145 | ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu", | 5165 | EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); |
5146 | inode->i_mode, inode->i_ino); | ||
5147 | goto bad_inode; | 5166 | goto bad_inode; |
5148 | } | 5167 | } |
5149 | brelse(iloc.bh); | 5168 | brelse(iloc.bh); |
@@ -5381,9 +5400,9 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
5381 | if (wbc->sync_mode == WB_SYNC_ALL) | 5400 | if (wbc->sync_mode == WB_SYNC_ALL) |
5382 | sync_dirty_buffer(iloc.bh); | 5401 | sync_dirty_buffer(iloc.bh); |
5383 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 5402 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
5384 | ext4_error(inode->i_sb, "IO error syncing inode, " | 5403 | EXT4_ERROR_INODE(inode, |
5385 | "inode=%lu, block=%llu", inode->i_ino, | 5404 | "IO error syncing inode (block=%llu)", |
5386 | (unsigned long long)iloc.bh->b_blocknr); | 5405 | (unsigned long long) iloc.bh->b_blocknr); |
5387 | err = -EIO; | 5406 | err = -EIO; |
5388 | } | 5407 | } |
5389 | brelse(iloc.bh); | 5408 | brelse(iloc.bh); |
@@ -5455,7 +5474,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5455 | } | 5474 | } |
5456 | 5475 | ||
5457 | if (attr->ia_valid & ATTR_SIZE) { | 5476 | if (attr->ia_valid & ATTR_SIZE) { |
5458 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { | 5477 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
5459 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 5478 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
5460 | 5479 | ||
5461 | if (attr->ia_size > sbi->s_bitmap_maxbytes) { | 5480 | if (attr->ia_size > sbi->s_bitmap_maxbytes) { |
@@ -5468,7 +5487,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5468 | if (S_ISREG(inode->i_mode) && | 5487 | if (S_ISREG(inode->i_mode) && |
5469 | attr->ia_valid & ATTR_SIZE && | 5488 | attr->ia_valid & ATTR_SIZE && |
5470 | (attr->ia_size < inode->i_size || | 5489 | (attr->ia_size < inode->i_size || |
5471 | (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) { | 5490 | (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) { |
5472 | handle_t *handle; | 5491 | handle_t *handle; |
5473 | 5492 | ||
5474 | handle = ext4_journal_start(inode, 3); | 5493 | handle = ext4_journal_start(inode, 3); |
@@ -5500,7 +5519,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5500 | } | 5519 | } |
5501 | } | 5520 | } |
5502 | /* ext4_truncate will clear the flag */ | 5521 | /* ext4_truncate will clear the flag */ |
5503 | if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) | 5522 | if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) |
5504 | ext4_truncate(inode); | 5523 | ext4_truncate(inode); |
5505 | } | 5524 | } |
5506 | 5525 | ||
@@ -5576,7 +5595,7 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | |||
5576 | 5595 | ||
5577 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5596 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5578 | { | 5597 | { |
5579 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 5598 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
5580 | return ext4_indirect_trans_blocks(inode, nrblocks, chunk); | 5599 | return ext4_indirect_trans_blocks(inode, nrblocks, chunk); |
5581 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); | 5600 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); |
5582 | } | 5601 | } |
@@ -5911,9 +5930,9 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
5911 | */ | 5930 | */ |
5912 | 5931 | ||
5913 | if (val) | 5932 | if (val) |
5914 | EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL; | 5933 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
5915 | else | 5934 | else |
5916 | EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL; | 5935 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
5917 | ext4_set_aops(inode); | 5936 | ext4_set_aops(inode); |
5918 | 5937 | ||
5919 | jbd2_journal_unlock_updates(journal); | 5938 | jbd2_journal_unlock_updates(journal); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 016d0249294f..bf5ae883b1bd 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -258,7 +258,7 @@ setversion_out: | |||
258 | if (me.moved_len > 0) | 258 | if (me.moved_len > 0) |
259 | file_remove_suid(donor_filp); | 259 | file_remove_suid(donor_filp); |
260 | 260 | ||
261 | if (copy_to_user((struct move_extent __user *)arg, | 261 | if (copy_to_user((struct move_extent __user *)arg, |
262 | &me, sizeof(me))) | 262 | &me, sizeof(me))) |
263 | err = -EFAULT; | 263 | err = -EFAULT; |
264 | mext_out: | 264 | mext_out: |
@@ -373,7 +373,30 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
373 | case EXT4_IOC32_SETRSVSZ: | 373 | case EXT4_IOC32_SETRSVSZ: |
374 | cmd = EXT4_IOC_SETRSVSZ; | 374 | cmd = EXT4_IOC_SETRSVSZ; |
375 | break; | 375 | break; |
376 | case EXT4_IOC_GROUP_ADD: | 376 | case EXT4_IOC32_GROUP_ADD: { |
377 | struct compat_ext4_new_group_input __user *uinput; | ||
378 | struct ext4_new_group_input input; | ||
379 | mm_segment_t old_fs; | ||
380 | int err; | ||
381 | |||
382 | uinput = compat_ptr(arg); | ||
383 | err = get_user(input.group, &uinput->group); | ||
384 | err |= get_user(input.block_bitmap, &uinput->block_bitmap); | ||
385 | err |= get_user(input.inode_bitmap, &uinput->inode_bitmap); | ||
386 | err |= get_user(input.inode_table, &uinput->inode_table); | ||
387 | err |= get_user(input.blocks_count, &uinput->blocks_count); | ||
388 | err |= get_user(input.reserved_blocks, | ||
389 | &uinput->reserved_blocks); | ||
390 | if (err) | ||
391 | return -EFAULT; | ||
392 | old_fs = get_fs(); | ||
393 | set_fs(KERNEL_DS); | ||
394 | err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD, | ||
395 | (unsigned long) &input); | ||
396 | set_fs(old_fs); | ||
397 | return err; | ||
398 | } | ||
399 | case EXT4_IOC_MOVE_EXT: | ||
377 | break; | 400 | break; |
378 | default: | 401 | default: |
379 | return -ENOIOCTLCMD; | 402 | return -ENOIOCTLCMD; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b423a364dca3..12b3bc026a68 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -658,6 +658,27 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, | |||
658 | } | 658 | } |
659 | } | 659 | } |
660 | 660 | ||
661 | /* | ||
662 | * Cache the order of the largest free extent we have available in this block | ||
663 | * group. | ||
664 | */ | ||
665 | static void | ||
666 | mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp) | ||
667 | { | ||
668 | int i; | ||
669 | int bits; | ||
670 | |||
671 | grp->bb_largest_free_order = -1; /* uninit */ | ||
672 | |||
673 | bits = sb->s_blocksize_bits + 1; | ||
674 | for (i = bits; i >= 0; i--) { | ||
675 | if (grp->bb_counters[i] > 0) { | ||
676 | grp->bb_largest_free_order = i; | ||
677 | break; | ||
678 | } | ||
679 | } | ||
680 | } | ||
681 | |||
661 | static noinline_for_stack | 682 | static noinline_for_stack |
662 | void ext4_mb_generate_buddy(struct super_block *sb, | 683 | void ext4_mb_generate_buddy(struct super_block *sb, |
663 | void *buddy, void *bitmap, ext4_group_t group) | 684 | void *buddy, void *bitmap, ext4_group_t group) |
@@ -700,6 +721,7 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
700 | */ | 721 | */ |
701 | grp->bb_free = free; | 722 | grp->bb_free = free; |
702 | } | 723 | } |
724 | mb_set_largest_free_order(sb, grp); | ||
703 | 725 | ||
704 | clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); | 726 | clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); |
705 | 727 | ||
@@ -725,6 +747,9 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
725 | * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks. | 747 | * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks. |
726 | * So it can have information regarding groups_per_page which | 748 | * So it can have information regarding groups_per_page which |
727 | * is blocks_per_page/2 | 749 | * is blocks_per_page/2 |
750 | * | ||
751 | * Locking note: This routine takes the block group lock of all groups | ||
752 | * for this page; do not hold this lock when calling this routine! | ||
728 | */ | 753 | */ |
729 | 754 | ||
730 | static int ext4_mb_init_cache(struct page *page, char *incore) | 755 | static int ext4_mb_init_cache(struct page *page, char *incore) |
@@ -865,6 +890,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
865 | BUG_ON(incore == NULL); | 890 | BUG_ON(incore == NULL); |
866 | mb_debug(1, "put buddy for group %u in page %lu/%x\n", | 891 | mb_debug(1, "put buddy for group %u in page %lu/%x\n", |
867 | group, page->index, i * blocksize); | 892 | group, page->index, i * blocksize); |
893 | trace_ext4_mb_buddy_bitmap_load(sb, group); | ||
868 | grinfo = ext4_get_group_info(sb, group); | 894 | grinfo = ext4_get_group_info(sb, group); |
869 | grinfo->bb_fragments = 0; | 895 | grinfo->bb_fragments = 0; |
870 | memset(grinfo->bb_counters, 0, | 896 | memset(grinfo->bb_counters, 0, |
@@ -882,6 +908,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
882 | BUG_ON(incore != NULL); | 908 | BUG_ON(incore != NULL); |
883 | mb_debug(1, "put bitmap for group %u in page %lu/%x\n", | 909 | mb_debug(1, "put bitmap for group %u in page %lu/%x\n", |
884 | group, page->index, i * blocksize); | 910 | group, page->index, i * blocksize); |
911 | trace_ext4_mb_bitmap_load(sb, group); | ||
885 | 912 | ||
886 | /* see comments in ext4_mb_put_pa() */ | 913 | /* see comments in ext4_mb_put_pa() */ |
887 | ext4_lock_group(sb, group); | 914 | ext4_lock_group(sb, group); |
@@ -910,6 +937,11 @@ out: | |||
910 | return err; | 937 | return err; |
911 | } | 938 | } |
912 | 939 | ||
940 | /* | ||
941 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the | ||
942 | * block group lock of all groups for this page; do not hold the BG lock when | ||
943 | * calling this routine! | ||
944 | */ | ||
913 | static noinline_for_stack | 945 | static noinline_for_stack |
914 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | 946 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) |
915 | { | 947 | { |
@@ -1004,6 +1036,11 @@ err: | |||
1004 | return ret; | 1036 | return ret; |
1005 | } | 1037 | } |
1006 | 1038 | ||
1039 | /* | ||
1040 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the | ||
1041 | * block group lock of all groups for this page; do not hold the BG lock when | ||
1042 | * calling this routine! | ||
1043 | */ | ||
1007 | static noinline_for_stack int | 1044 | static noinline_for_stack int |
1008 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | 1045 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, |
1009 | struct ext4_buddy *e4b) | 1046 | struct ext4_buddy *e4b) |
@@ -1150,7 +1187,7 @@ err: | |||
1150 | return ret; | 1187 | return ret; |
1151 | } | 1188 | } |
1152 | 1189 | ||
1153 | static void ext4_mb_release_desc(struct ext4_buddy *e4b) | 1190 | static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) |
1154 | { | 1191 | { |
1155 | if (e4b->bd_bitmap_page) | 1192 | if (e4b->bd_bitmap_page) |
1156 | page_cache_release(e4b->bd_bitmap_page); | 1193 | page_cache_release(e4b->bd_bitmap_page); |
@@ -1299,6 +1336,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1299 | buddy = buddy2; | 1336 | buddy = buddy2; |
1300 | } while (1); | 1337 | } while (1); |
1301 | } | 1338 | } |
1339 | mb_set_largest_free_order(sb, e4b->bd_info); | ||
1302 | mb_check_buddy(e4b); | 1340 | mb_check_buddy(e4b); |
1303 | } | 1341 | } |
1304 | 1342 | ||
@@ -1427,6 +1465,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) | |||
1427 | e4b->bd_info->bb_counters[ord]++; | 1465 | e4b->bd_info->bb_counters[ord]++; |
1428 | e4b->bd_info->bb_counters[ord]++; | 1466 | e4b->bd_info->bb_counters[ord]++; |
1429 | } | 1467 | } |
1468 | mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); | ||
1430 | 1469 | ||
1431 | mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); | 1470 | mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); |
1432 | mb_check_buddy(e4b); | 1471 | mb_check_buddy(e4b); |
@@ -1617,7 +1656,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, | |||
1617 | } | 1656 | } |
1618 | 1657 | ||
1619 | ext4_unlock_group(ac->ac_sb, group); | 1658 | ext4_unlock_group(ac->ac_sb, group); |
1620 | ext4_mb_release_desc(e4b); | 1659 | ext4_mb_unload_buddy(e4b); |
1621 | 1660 | ||
1622 | return 0; | 1661 | return 0; |
1623 | } | 1662 | } |
@@ -1672,7 +1711,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | |||
1672 | ext4_mb_use_best_found(ac, e4b); | 1711 | ext4_mb_use_best_found(ac, e4b); |
1673 | } | 1712 | } |
1674 | ext4_unlock_group(ac->ac_sb, group); | 1713 | ext4_unlock_group(ac->ac_sb, group); |
1675 | ext4_mb_release_desc(e4b); | 1714 | ext4_mb_unload_buddy(e4b); |
1676 | 1715 | ||
1677 | return 0; | 1716 | return 0; |
1678 | } | 1717 | } |
@@ -1821,16 +1860,22 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | |||
1821 | } | 1860 | } |
1822 | } | 1861 | } |
1823 | 1862 | ||
1863 | /* This is now called BEFORE we load the buddy bitmap. */ | ||
1824 | static int ext4_mb_good_group(struct ext4_allocation_context *ac, | 1864 | static int ext4_mb_good_group(struct ext4_allocation_context *ac, |
1825 | ext4_group_t group, int cr) | 1865 | ext4_group_t group, int cr) |
1826 | { | 1866 | { |
1827 | unsigned free, fragments; | 1867 | unsigned free, fragments; |
1828 | unsigned i, bits; | ||
1829 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); | 1868 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); |
1830 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); | 1869 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); |
1831 | 1870 | ||
1832 | BUG_ON(cr < 0 || cr >= 4); | 1871 | BUG_ON(cr < 0 || cr >= 4); |
1833 | BUG_ON(EXT4_MB_GRP_NEED_INIT(grp)); | 1872 | |
1873 | /* We only do this if the grp has never been initialized */ | ||
1874 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | ||
1875 | int ret = ext4_mb_init_group(ac->ac_sb, group); | ||
1876 | if (ret) | ||
1877 | return 0; | ||
1878 | } | ||
1834 | 1879 | ||
1835 | free = grp->bb_free; | 1880 | free = grp->bb_free; |
1836 | fragments = grp->bb_fragments; | 1881 | fragments = grp->bb_fragments; |
@@ -1843,17 +1888,16 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1843 | case 0: | 1888 | case 0: |
1844 | BUG_ON(ac->ac_2order == 0); | 1889 | BUG_ON(ac->ac_2order == 0); |
1845 | 1890 | ||
1891 | if (grp->bb_largest_free_order < ac->ac_2order) | ||
1892 | return 0; | ||
1893 | |||
1846 | /* Avoid using the first bg of a flexgroup for data files */ | 1894 | /* Avoid using the first bg of a flexgroup for data files */ |
1847 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && | 1895 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && |
1848 | (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && | 1896 | (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && |
1849 | ((group % flex_size) == 0)) | 1897 | ((group % flex_size) == 0)) |
1850 | return 0; | 1898 | return 0; |
1851 | 1899 | ||
1852 | bits = ac->ac_sb->s_blocksize_bits + 1; | 1900 | return 1; |
1853 | for (i = ac->ac_2order; i <= bits; i++) | ||
1854 | if (grp->bb_counters[i] > 0) | ||
1855 | return 1; | ||
1856 | break; | ||
1857 | case 1: | 1901 | case 1: |
1858 | if ((free / fragments) >= ac->ac_g_ex.fe_len) | 1902 | if ((free / fragments) >= ac->ac_g_ex.fe_len) |
1859 | return 1; | 1903 | return 1; |
@@ -1964,7 +2008,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
1964 | sbi = EXT4_SB(sb); | 2008 | sbi = EXT4_SB(sb); |
1965 | ngroups = ext4_get_groups_count(sb); | 2009 | ngroups = ext4_get_groups_count(sb); |
1966 | /* non-extent files are limited to low blocks/groups */ | 2010 | /* non-extent files are limited to low blocks/groups */ |
1967 | if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL)) | 2011 | if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))) |
1968 | ngroups = sbi->s_blockfile_groups; | 2012 | ngroups = sbi->s_blockfile_groups; |
1969 | 2013 | ||
1970 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); | 2014 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); |
@@ -2024,15 +2068,11 @@ repeat: | |||
2024 | group = ac->ac_g_ex.fe_group; | 2068 | group = ac->ac_g_ex.fe_group; |
2025 | 2069 | ||
2026 | for (i = 0; i < ngroups; group++, i++) { | 2070 | for (i = 0; i < ngroups; group++, i++) { |
2027 | struct ext4_group_info *grp; | ||
2028 | struct ext4_group_desc *desc; | ||
2029 | |||
2030 | if (group == ngroups) | 2071 | if (group == ngroups) |
2031 | group = 0; | 2072 | group = 0; |
2032 | 2073 | ||
2033 | /* quick check to skip empty groups */ | 2074 | /* This now checks without needing the buddy page */ |
2034 | grp = ext4_get_group_info(sb, group); | 2075 | if (!ext4_mb_good_group(ac, group, cr)) |
2035 | if (grp->bb_free == 0) | ||
2036 | continue; | 2076 | continue; |
2037 | 2077 | ||
2038 | err = ext4_mb_load_buddy(sb, group, &e4b); | 2078 | err = ext4_mb_load_buddy(sb, group, &e4b); |
@@ -2040,15 +2080,18 @@ repeat: | |||
2040 | goto out; | 2080 | goto out; |
2041 | 2081 | ||
2042 | ext4_lock_group(sb, group); | 2082 | ext4_lock_group(sb, group); |
2083 | |||
2084 | /* | ||
2085 | * We need to check again after locking the | ||
2086 | * block group | ||
2087 | */ | ||
2043 | if (!ext4_mb_good_group(ac, group, cr)) { | 2088 | if (!ext4_mb_good_group(ac, group, cr)) { |
2044 | /* someone did allocation from this group */ | ||
2045 | ext4_unlock_group(sb, group); | 2089 | ext4_unlock_group(sb, group); |
2046 | ext4_mb_release_desc(&e4b); | 2090 | ext4_mb_unload_buddy(&e4b); |
2047 | continue; | 2091 | continue; |
2048 | } | 2092 | } |
2049 | 2093 | ||
2050 | ac->ac_groups_scanned++; | 2094 | ac->ac_groups_scanned++; |
2051 | desc = ext4_get_group_desc(sb, group, NULL); | ||
2052 | if (cr == 0) | 2095 | if (cr == 0) |
2053 | ext4_mb_simple_scan_group(ac, &e4b); | 2096 | ext4_mb_simple_scan_group(ac, &e4b); |
2054 | else if (cr == 1 && | 2097 | else if (cr == 1 && |
@@ -2058,7 +2101,7 @@ repeat: | |||
2058 | ext4_mb_complex_scan_group(ac, &e4b); | 2101 | ext4_mb_complex_scan_group(ac, &e4b); |
2059 | 2102 | ||
2060 | ext4_unlock_group(sb, group); | 2103 | ext4_unlock_group(sb, group); |
2061 | ext4_mb_release_desc(&e4b); | 2104 | ext4_mb_unload_buddy(&e4b); |
2062 | 2105 | ||
2063 | if (ac->ac_status != AC_STATUS_CONTINUE) | 2106 | if (ac->ac_status != AC_STATUS_CONTINUE) |
2064 | break; | 2107 | break; |
@@ -2148,7 +2191,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) | |||
2148 | ext4_lock_group(sb, group); | 2191 | ext4_lock_group(sb, group); |
2149 | memcpy(&sg, ext4_get_group_info(sb, group), i); | 2192 | memcpy(&sg, ext4_get_group_info(sb, group), i); |
2150 | ext4_unlock_group(sb, group); | 2193 | ext4_unlock_group(sb, group); |
2151 | ext4_mb_release_desc(&e4b); | 2194 | ext4_mb_unload_buddy(&e4b); |
2152 | 2195 | ||
2153 | seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, | 2196 | seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, |
2154 | sg.info.bb_fragments, sg.info.bb_first_free); | 2197 | sg.info.bb_fragments, sg.info.bb_first_free); |
@@ -2255,6 +2298,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2255 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2298 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
2256 | init_rwsem(&meta_group_info[i]->alloc_sem); | 2299 | init_rwsem(&meta_group_info[i]->alloc_sem); |
2257 | meta_group_info[i]->bb_free_root = RB_ROOT; | 2300 | meta_group_info[i]->bb_free_root = RB_ROOT; |
2301 | meta_group_info[i]->bb_largest_free_order = -1; /* uninit */ | ||
2258 | 2302 | ||
2259 | #ifdef DOUBLE_CHECK | 2303 | #ifdef DOUBLE_CHECK |
2260 | { | 2304 | { |
@@ -2536,6 +2580,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2536 | entry->count, entry->group, entry); | 2580 | entry->count, entry->group, entry); |
2537 | 2581 | ||
2538 | if (test_opt(sb, DISCARD)) { | 2582 | if (test_opt(sb, DISCARD)) { |
2583 | int ret; | ||
2539 | ext4_fsblk_t discard_block; | 2584 | ext4_fsblk_t discard_block; |
2540 | 2585 | ||
2541 | discard_block = entry->start_blk + | 2586 | discard_block = entry->start_blk + |
@@ -2543,7 +2588,12 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2543 | trace_ext4_discard_blocks(sb, | 2588 | trace_ext4_discard_blocks(sb, |
2544 | (unsigned long long)discard_block, | 2589 | (unsigned long long)discard_block, |
2545 | entry->count); | 2590 | entry->count); |
2546 | sb_issue_discard(sb, discard_block, entry->count); | 2591 | ret = sb_issue_discard(sb, discard_block, entry->count); |
2592 | if (ret == EOPNOTSUPP) { | ||
2593 | ext4_warning(sb, | ||
2594 | "discard not supported, disabling"); | ||
2595 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); | ||
2596 | } | ||
2547 | } | 2597 | } |
2548 | 2598 | ||
2549 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2599 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
@@ -2568,7 +2618,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2568 | } | 2618 | } |
2569 | ext4_unlock_group(sb, entry->group); | 2619 | ext4_unlock_group(sb, entry->group); |
2570 | kmem_cache_free(ext4_free_ext_cachep, entry); | 2620 | kmem_cache_free(ext4_free_ext_cachep, entry); |
2571 | ext4_mb_release_desc(&e4b); | 2621 | ext4_mb_unload_buddy(&e4b); |
2572 | } | 2622 | } |
2573 | 2623 | ||
2574 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); | 2624 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); |
@@ -2641,7 +2691,7 @@ int __init init_ext4_mballoc(void) | |||
2641 | 2691 | ||
2642 | void exit_ext4_mballoc(void) | 2692 | void exit_ext4_mballoc(void) |
2643 | { | 2693 | { |
2644 | /* | 2694 | /* |
2645 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep | 2695 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep |
2646 | * before destroying the slab cache. | 2696 | * before destroying the slab cache. |
2647 | */ | 2697 | */ |
@@ -2981,7 +3031,7 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) | |||
2981 | if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) { | 3031 | if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) { |
2982 | atomic_inc(&sbi->s_bal_reqs); | 3032 | atomic_inc(&sbi->s_bal_reqs); |
2983 | atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); | 3033 | atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); |
2984 | if (ac->ac_o_ex.fe_len >= ac->ac_g_ex.fe_len) | 3034 | if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len) |
2985 | atomic_inc(&sbi->s_bal_success); | 3035 | atomic_inc(&sbi->s_bal_success); |
2986 | atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); | 3036 | atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); |
2987 | if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && | 3037 | if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && |
@@ -3123,7 +3173,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3123 | continue; | 3173 | continue; |
3124 | 3174 | ||
3125 | /* non-extent files can't have physical blocks past 2^32 */ | 3175 | /* non-extent files can't have physical blocks past 2^32 */ |
3126 | if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) && | 3176 | if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && |
3127 | pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) | 3177 | pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) |
3128 | continue; | 3178 | continue; |
3129 | 3179 | ||
@@ -3280,7 +3330,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, | |||
3280 | spin_unlock(&pa->pa_lock); | 3330 | spin_unlock(&pa->pa_lock); |
3281 | 3331 | ||
3282 | grp_blk = pa->pa_pstart; | 3332 | grp_blk = pa->pa_pstart; |
3283 | /* | 3333 | /* |
3284 | * If doing group-based preallocation, pa_pstart may be in the | 3334 | * If doing group-based preallocation, pa_pstart may be in the |
3285 | * next group when pa is used up | 3335 | * next group when pa is used up |
3286 | */ | 3336 | */ |
@@ -3697,7 +3747,7 @@ out: | |||
3697 | ext4_unlock_group(sb, group); | 3747 | ext4_unlock_group(sb, group); |
3698 | if (ac) | 3748 | if (ac) |
3699 | kmem_cache_free(ext4_ac_cachep, ac); | 3749 | kmem_cache_free(ext4_ac_cachep, ac); |
3700 | ext4_mb_release_desc(&e4b); | 3750 | ext4_mb_unload_buddy(&e4b); |
3701 | put_bh(bitmap_bh); | 3751 | put_bh(bitmap_bh); |
3702 | return free; | 3752 | return free; |
3703 | } | 3753 | } |
@@ -3801,7 +3851,7 @@ repeat: | |||
3801 | if (bitmap_bh == NULL) { | 3851 | if (bitmap_bh == NULL) { |
3802 | ext4_error(sb, "Error reading block bitmap for %u", | 3852 | ext4_error(sb, "Error reading block bitmap for %u", |
3803 | group); | 3853 | group); |
3804 | ext4_mb_release_desc(&e4b); | 3854 | ext4_mb_unload_buddy(&e4b); |
3805 | continue; | 3855 | continue; |
3806 | } | 3856 | } |
3807 | 3857 | ||
@@ -3810,7 +3860,7 @@ repeat: | |||
3810 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3860 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); |
3811 | ext4_unlock_group(sb, group); | 3861 | ext4_unlock_group(sb, group); |
3812 | 3862 | ||
3813 | ext4_mb_release_desc(&e4b); | 3863 | ext4_mb_unload_buddy(&e4b); |
3814 | put_bh(bitmap_bh); | 3864 | put_bh(bitmap_bh); |
3815 | 3865 | ||
3816 | list_del(&pa->u.pa_tmp_list); | 3866 | list_del(&pa->u.pa_tmp_list); |
@@ -4074,7 +4124,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4074 | ext4_mb_release_group_pa(&e4b, pa, ac); | 4124 | ext4_mb_release_group_pa(&e4b, pa, ac); |
4075 | ext4_unlock_group(sb, group); | 4125 | ext4_unlock_group(sb, group); |
4076 | 4126 | ||
4077 | ext4_mb_release_desc(&e4b); | 4127 | ext4_mb_unload_buddy(&e4b); |
4078 | list_del(&pa->u.pa_tmp_list); | 4128 | list_del(&pa->u.pa_tmp_list); |
4079 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 4129 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
4080 | } | 4130 | } |
@@ -4484,12 +4534,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4484 | if (!bh) | 4534 | if (!bh) |
4485 | tbh = sb_find_get_block(inode->i_sb, | 4535 | tbh = sb_find_get_block(inode->i_sb, |
4486 | block + i); | 4536 | block + i); |
4487 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | 4537 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
4488 | inode, tbh, block + i); | 4538 | inode, tbh, block + i); |
4489 | } | 4539 | } |
4490 | } | 4540 | } |
4491 | 4541 | ||
4492 | /* | 4542 | /* |
4493 | * We need to make sure we don't reuse the freed block until | 4543 | * We need to make sure we don't reuse the freed block until |
4494 | * after the transaction is committed, which we can do by | 4544 | * after the transaction is committed, which we can do by |
4495 | * treating the block as metadata, below. We make an | 4545 | * treating the block as metadata, below. We make an |
@@ -4610,7 +4660,7 @@ do_more: | |||
4610 | atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); | 4660 | atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); |
4611 | } | 4661 | } |
4612 | 4662 | ||
4613 | ext4_mb_release_desc(&e4b); | 4663 | ext4_mb_unload_buddy(&e4b); |
4614 | 4664 | ||
4615 | freed += count; | 4665 | freed += count; |
4616 | 4666 | ||
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 34dcfc52ef44..6f3a27ec30bf 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -475,7 +475,7 @@ int ext4_ext_migrate(struct inode *inode) | |||
475 | */ | 475 | */ |
476 | if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, | 476 | if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, |
477 | EXT4_FEATURE_INCOMPAT_EXTENTS) || | 477 | EXT4_FEATURE_INCOMPAT_EXTENTS) || |
478 | (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 478 | (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
479 | return -EINVAL; | 479 | return -EINVAL; |
480 | 480 | ||
481 | if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) | 481 | if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index d1fc662cc311..3a6c92ac131c 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -482,6 +482,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
482 | int depth = ext_depth(orig_inode); | 482 | int depth = ext_depth(orig_inode); |
483 | int ret; | 483 | int ret; |
484 | 484 | ||
485 | start_ext.ee_block = end_ext.ee_block = 0; | ||
485 | o_start = o_end = oext = orig_path[depth].p_ext; | 486 | o_start = o_end = oext = orig_path[depth].p_ext; |
486 | oext_alen = ext4_ext_get_actual_len(oext); | 487 | oext_alen = ext4_ext_get_actual_len(oext); |
487 | start_ext.ee_len = end_ext.ee_len = 0; | 488 | start_ext.ee_len = end_ext.ee_len = 0; |
@@ -529,7 +530,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
529 | * new_ext |-------| | 530 | * new_ext |-------| |
530 | */ | 531 | */ |
531 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { | 532 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { |
532 | ext4_error(orig_inode->i_sb, | 533 | EXT4_ERROR_INODE(orig_inode, |
533 | "new_ext_end(%u) should be less than or equal to " | 534 | "new_ext_end(%u) should be less than or equal to " |
534 | "oext->ee_block(%u) + oext_alen(%d) - 1", | 535 | "oext->ee_block(%u) + oext_alen(%d) - 1", |
535 | new_ext_end, le32_to_cpu(oext->ee_block), | 536 | new_ext_end, le32_to_cpu(oext->ee_block), |
@@ -692,12 +693,12 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
692 | while (1) { | 693 | while (1) { |
693 | /* The extent for donor must be found. */ | 694 | /* The extent for donor must be found. */ |
694 | if (!dext) { | 695 | if (!dext) { |
695 | ext4_error(donor_inode->i_sb, | 696 | EXT4_ERROR_INODE(donor_inode, |
696 | "The extent for donor must be found"); | 697 | "The extent for donor must be found"); |
697 | *err = -EIO; | 698 | *err = -EIO; |
698 | goto out; | 699 | goto out; |
699 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { | 700 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { |
700 | ext4_error(donor_inode->i_sb, | 701 | EXT4_ERROR_INODE(donor_inode, |
701 | "Donor offset(%u) and the first block of donor " | 702 | "Donor offset(%u) and the first block of donor " |
702 | "extent(%u) should be equal", | 703 | "extent(%u) should be equal", |
703 | donor_off, | 704 | donor_off, |
@@ -976,11 +977,11 @@ mext_check_arguments(struct inode *orig_inode, | |||
976 | } | 977 | } |
977 | 978 | ||
978 | /* Ext4 move extent supports only extent based file */ | 979 | /* Ext4 move extent supports only extent based file */ |
979 | if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) { | 980 | if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { |
980 | ext4_debug("ext4 move extent: orig file is not extents " | 981 | ext4_debug("ext4 move extent: orig file is not extents " |
981 | "based file [ino:orig %lu]\n", orig_inode->i_ino); | 982 | "based file [ino:orig %lu]\n", orig_inode->i_ino); |
982 | return -EOPNOTSUPP; | 983 | return -EOPNOTSUPP; |
983 | } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) { | 984 | } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) { |
984 | ext4_debug("ext4 move extent: donor file is not extents " | 985 | ext4_debug("ext4 move extent: donor file is not extents " |
985 | "based file [ino:donor %lu]\n", donor_inode->i_ino); | 986 | "based file [ino:donor %lu]\n", donor_inode->i_ino); |
986 | return -EOPNOTSUPP; | 987 | return -EOPNOTSUPP; |
@@ -1354,7 +1355,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1354 | if (ret1 < 0) | 1355 | if (ret1 < 0) |
1355 | break; | 1356 | break; |
1356 | if (*moved_len > len) { | 1357 | if (*moved_len > len) { |
1357 | ext4_error(orig_inode->i_sb, | 1358 | EXT4_ERROR_INODE(orig_inode, |
1358 | "We replaced blocks too much! " | 1359 | "We replaced blocks too much! " |
1359 | "sum of replaced: %llu requested: %llu", | 1360 | "sum of replaced: %llu requested: %llu", |
1360 | *moved_len, len); | 1361 | *moved_len, len); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 0c070fabd108..a43e6617b351 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -187,7 +187,7 @@ unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) | |||
187 | return blocksize; | 187 | return blocksize; |
188 | return (len & 65532) | ((len & 3) << 16); | 188 | return (len & 65532) | ((len & 3) << 16); |
189 | } | 189 | } |
190 | 190 | ||
191 | __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) | 191 | __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) |
192 | { | 192 | { |
193 | if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) | 193 | if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) |
@@ -197,7 +197,7 @@ __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) | |||
197 | if (len == blocksize) { | 197 | if (len == blocksize) { |
198 | if (blocksize == 65536) | 198 | if (blocksize == 65536) |
199 | return cpu_to_le16(EXT4_MAX_REC_LEN); | 199 | return cpu_to_le16(EXT4_MAX_REC_LEN); |
200 | else | 200 | else |
201 | return cpu_to_le16(0); | 201 | return cpu_to_le16(0); |
202 | } | 202 | } |
203 | return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); | 203 | return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); |
@@ -349,7 +349,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
349 | brelse(bh); | 349 | brelse(bh); |
350 | } | 350 | } |
351 | if (bcount) | 351 | if (bcount) |
352 | printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", | 352 | printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", |
353 | levels ? "" : " ", names, space/bcount, | 353 | levels ? "" : " ", names, space/bcount, |
354 | (space/bcount)*100/blocksize); | 354 | (space/bcount)*100/blocksize); |
355 | return (struct stats) { names, space, bcount}; | 355 | return (struct stats) { names, space, bcount}; |
@@ -653,10 +653,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
653 | int ret, err; | 653 | int ret, err; |
654 | __u32 hashval; | 654 | __u32 hashval; |
655 | 655 | ||
656 | dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", | 656 | dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", |
657 | start_hash, start_minor_hash)); | 657 | start_hash, start_minor_hash)); |
658 | dir = dir_file->f_path.dentry->d_inode; | 658 | dir = dir_file->f_path.dentry->d_inode; |
659 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { | 659 | if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) { |
660 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | 660 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
661 | if (hinfo.hash_version <= DX_HASH_TEA) | 661 | if (hinfo.hash_version <= DX_HASH_TEA) |
662 | hinfo.hash_version += | 662 | hinfo.hash_version += |
@@ -801,7 +801,7 @@ static void ext4_update_dx_flag(struct inode *inode) | |||
801 | { | 801 | { |
802 | if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, | 802 | if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, |
803 | EXT4_FEATURE_COMPAT_DIR_INDEX)) | 803 | EXT4_FEATURE_COMPAT_DIR_INDEX)) |
804 | EXT4_I(inode)->i_flags &= ~EXT4_INDEX_FL; | 804 | ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); |
805 | } | 805 | } |
806 | 806 | ||
807 | /* | 807 | /* |
@@ -943,8 +943,8 @@ restart: | |||
943 | wait_on_buffer(bh); | 943 | wait_on_buffer(bh); |
944 | if (!buffer_uptodate(bh)) { | 944 | if (!buffer_uptodate(bh)) { |
945 | /* read error, skip block & hope for the best */ | 945 | /* read error, skip block & hope for the best */ |
946 | ext4_error(sb, "reading directory #%lu offset %lu", | 946 | EXT4_ERROR_INODE(dir, "reading directory lblock %lu", |
947 | dir->i_ino, (unsigned long)block); | 947 | (unsigned long) block); |
948 | brelse(bh); | 948 | brelse(bh); |
949 | goto next; | 949 | goto next; |
950 | } | 950 | } |
@@ -1066,15 +1066,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru | |||
1066 | __u32 ino = le32_to_cpu(de->inode); | 1066 | __u32 ino = le32_to_cpu(de->inode); |
1067 | brelse(bh); | 1067 | brelse(bh); |
1068 | if (!ext4_valid_inum(dir->i_sb, ino)) { | 1068 | if (!ext4_valid_inum(dir->i_sb, ino)) { |
1069 | ext4_error(dir->i_sb, "bad inode number: %u", ino); | 1069 | EXT4_ERROR_INODE(dir, "bad inode number: %u", ino); |
1070 | return ERR_PTR(-EIO); | 1070 | return ERR_PTR(-EIO); |
1071 | } | 1071 | } |
1072 | inode = ext4_iget(dir->i_sb, ino); | 1072 | inode = ext4_iget(dir->i_sb, ino); |
1073 | if (unlikely(IS_ERR(inode))) { | 1073 | if (unlikely(IS_ERR(inode))) { |
1074 | if (PTR_ERR(inode) == -ESTALE) { | 1074 | if (PTR_ERR(inode) == -ESTALE) { |
1075 | ext4_error(dir->i_sb, | 1075 | EXT4_ERROR_INODE(dir, |
1076 | "deleted inode referenced: %u", | 1076 | "deleted inode referenced: %u", |
1077 | ino); | 1077 | ino); |
1078 | return ERR_PTR(-EIO); | 1078 | return ERR_PTR(-EIO); |
1079 | } else { | 1079 | } else { |
1080 | return ERR_CAST(inode); | 1080 | return ERR_CAST(inode); |
@@ -1104,8 +1104,8 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
1104 | brelse(bh); | 1104 | brelse(bh); |
1105 | 1105 | ||
1106 | if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { | 1106 | if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { |
1107 | ext4_error(child->d_inode->i_sb, | 1107 | EXT4_ERROR_INODE(child->d_inode, |
1108 | "bad inode number: %u", ino); | 1108 | "bad parent inode number: %u", ino); |
1109 | return ERR_PTR(-EIO); | 1109 | return ERR_PTR(-EIO); |
1110 | } | 1110 | } |
1111 | 1111 | ||
@@ -1141,7 +1141,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, | |||
1141 | unsigned rec_len = 0; | 1141 | unsigned rec_len = 0; |
1142 | 1142 | ||
1143 | while (count--) { | 1143 | while (count--) { |
1144 | struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) | 1144 | struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) |
1145 | (from + (map->offs<<2)); | 1145 | (from + (map->offs<<2)); |
1146 | rec_len = EXT4_DIR_REC_LEN(de->name_len); | 1146 | rec_len = EXT4_DIR_REC_LEN(de->name_len); |
1147 | memcpy (to, de, rec_len); | 1147 | memcpy (to, de, rec_len); |
@@ -1404,9 +1404,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1404 | de = (struct ext4_dir_entry_2 *)((char *)fde + | 1404 | de = (struct ext4_dir_entry_2 *)((char *)fde + |
1405 | ext4_rec_len_from_disk(fde->rec_len, blocksize)); | 1405 | ext4_rec_len_from_disk(fde->rec_len, blocksize)); |
1406 | if ((char *) de >= (((char *) root) + blocksize)) { | 1406 | if ((char *) de >= (((char *) root) + blocksize)) { |
1407 | ext4_error(dir->i_sb, | 1407 | EXT4_ERROR_INODE(dir, "invalid rec_len for '..'"); |
1408 | "invalid rec_len for '..' in inode %lu", | ||
1409 | dir->i_ino); | ||
1410 | brelse(bh); | 1408 | brelse(bh); |
1411 | return -EIO; | 1409 | return -EIO; |
1412 | } | 1410 | } |
@@ -1418,7 +1416,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1418 | brelse(bh); | 1416 | brelse(bh); |
1419 | return retval; | 1417 | return retval; |
1420 | } | 1418 | } |
1421 | EXT4_I(dir)->i_flags |= EXT4_INDEX_FL; | 1419 | ext4_set_inode_flag(dir, EXT4_INODE_INDEX); |
1422 | data1 = bh2->b_data; | 1420 | data1 = bh2->b_data; |
1423 | 1421 | ||
1424 | memcpy (data1, de, len); | 1422 | memcpy (data1, de, len); |
@@ -1491,7 +1489,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1491 | retval = ext4_dx_add_entry(handle, dentry, inode); | 1489 | retval = ext4_dx_add_entry(handle, dentry, inode); |
1492 | if (!retval || (retval != ERR_BAD_DX_DIR)) | 1490 | if (!retval || (retval != ERR_BAD_DX_DIR)) |
1493 | return retval; | 1491 | return retval; |
1494 | EXT4_I(dir)->i_flags &= ~EXT4_INDEX_FL; | 1492 | ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); |
1495 | dx_fallback++; | 1493 | dx_fallback++; |
1496 | ext4_mark_inode_dirty(handle, dir); | 1494 | ext4_mark_inode_dirty(handle, dir); |
1497 | } | 1495 | } |
@@ -1519,6 +1517,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1519 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); | 1517 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); |
1520 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); | 1518 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1521 | brelse(bh); | 1519 | brelse(bh); |
1520 | if (retval == 0) | ||
1521 | ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY); | ||
1522 | return retval; | 1522 | return retval; |
1523 | } | 1523 | } |
1524 | 1524 | ||
@@ -1915,9 +1915,8 @@ static int empty_dir(struct inode *inode) | |||
1915 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || | 1915 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || |
1916 | !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { | 1916 | !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { |
1917 | if (err) | 1917 | if (err) |
1918 | ext4_error(inode->i_sb, | 1918 | EXT4_ERROR_INODE(inode, |
1919 | "error %d reading directory #%lu offset 0", | 1919 | "error %d reading directory lblock 0", err); |
1920 | err, inode->i_ino); | ||
1921 | else | 1920 | else |
1922 | ext4_warning(inode->i_sb, | 1921 | ext4_warning(inode->i_sb, |
1923 | "bad directory (dir #%lu) - no data block", | 1922 | "bad directory (dir #%lu) - no data block", |
@@ -1941,17 +1940,17 @@ static int empty_dir(struct inode *inode) | |||
1941 | de = ext4_next_entry(de1, sb->s_blocksize); | 1940 | de = ext4_next_entry(de1, sb->s_blocksize); |
1942 | while (offset < inode->i_size) { | 1941 | while (offset < inode->i_size) { |
1943 | if (!bh || | 1942 | if (!bh || |
1944 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { | 1943 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { |
1944 | unsigned int lblock; | ||
1945 | err = 0; | 1945 | err = 0; |
1946 | brelse(bh); | 1946 | brelse(bh); |
1947 | bh = ext4_bread(NULL, inode, | 1947 | lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); |
1948 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); | 1948 | bh = ext4_bread(NULL, inode, lblock, 0, &err); |
1949 | if (!bh) { | 1949 | if (!bh) { |
1950 | if (err) | 1950 | if (err) |
1951 | ext4_error(sb, | 1951 | EXT4_ERROR_INODE(inode, |
1952 | "error %d reading directory" | 1952 | "error %d reading directory " |
1953 | " #%lu offset %u", | 1953 | "lblock %u", err, lblock); |
1954 | err, inode->i_ino, offset); | ||
1955 | offset += sb->s_blocksize; | 1954 | offset += sb->s_blocksize; |
1956 | continue; | 1955 | continue; |
1957 | } | 1956 | } |
@@ -2297,7 +2296,7 @@ retry: | |||
2297 | } | 2296 | } |
2298 | } else { | 2297 | } else { |
2299 | /* clear the extent format for fast symlink */ | 2298 | /* clear the extent format for fast symlink */ |
2300 | EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; | 2299 | ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); |
2301 | inode->i_op = &ext4_fast_symlink_inode_operations; | 2300 | inode->i_op = &ext4_fast_symlink_inode_operations; |
2302 | memcpy((char *)&EXT4_I(inode)->i_data, symname, l); | 2301 | memcpy((char *)&EXT4_I(inode)->i_data, symname, l); |
2303 | inode->i_size = l-1; | 2302 | inode->i_size = l-1; |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 5692c48754a0..6df797eb9aeb 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -911,7 +911,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
911 | percpu_counter_add(&sbi->s_freeinodes_counter, | 911 | percpu_counter_add(&sbi->s_freeinodes_counter, |
912 | EXT4_INODES_PER_GROUP(sb)); | 912 | EXT4_INODES_PER_GROUP(sb)); |
913 | 913 | ||
914 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { | 914 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && |
915 | sbi->s_log_groups_per_flex) { | ||
915 | ext4_group_t flex_group; | 916 | ext4_group_t flex_group; |
916 | flex_group = ext4_flex_group(sbi, input->group); | 917 | flex_group = ext4_flex_group(sbi, input->group); |
917 | atomic_add(input->free_blocks_count, | 918 | atomic_add(input->free_blocks_count, |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e14d22c170d5..4e8983a9811b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -241,6 +241,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) | |||
241 | if (sb->s_flags & MS_RDONLY) | 241 | if (sb->s_flags & MS_RDONLY) |
242 | return ERR_PTR(-EROFS); | 242 | return ERR_PTR(-EROFS); |
243 | 243 | ||
244 | vfs_check_frozen(sb, SB_FREEZE_WRITE); | ||
244 | /* Special case here: if the journal has aborted behind our | 245 | /* Special case here: if the journal has aborted behind our |
245 | * backs (eg. EIO in the commit thread), then we still need to | 246 | * backs (eg. EIO in the commit thread), then we still need to |
246 | * take the FS itself readonly cleanly. */ | 247 | * take the FS itself readonly cleanly. */ |
@@ -645,6 +646,8 @@ static void ext4_put_super(struct super_block *sb) | |||
645 | struct ext4_super_block *es = sbi->s_es; | 646 | struct ext4_super_block *es = sbi->s_es; |
646 | int i, err; | 647 | int i, err; |
647 | 648 | ||
649 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | ||
650 | |||
648 | flush_workqueue(sbi->dio_unwritten_wq); | 651 | flush_workqueue(sbi->dio_unwritten_wq); |
649 | destroy_workqueue(sbi->dio_unwritten_wq); | 652 | destroy_workqueue(sbi->dio_unwritten_wq); |
650 | 653 | ||
@@ -941,6 +944,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
941 | seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); | 944 | seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); |
942 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) | 945 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) |
943 | seq_puts(seq, ",journal_async_commit"); | 946 | seq_puts(seq, ",journal_async_commit"); |
947 | else if (test_opt(sb, JOURNAL_CHECKSUM)) | ||
948 | seq_puts(seq, ",journal_checksum"); | ||
944 | if (test_opt(sb, NOBH)) | 949 | if (test_opt(sb, NOBH)) |
945 | seq_puts(seq, ",nobh"); | 950 | seq_puts(seq, ",nobh"); |
946 | if (test_opt(sb, I_VERSION)) | 951 | if (test_opt(sb, I_VERSION)) |
@@ -1059,7 +1064,7 @@ static int ext4_release_dquot(struct dquot *dquot); | |||
1059 | static int ext4_mark_dquot_dirty(struct dquot *dquot); | 1064 | static int ext4_mark_dquot_dirty(struct dquot *dquot); |
1060 | static int ext4_write_info(struct super_block *sb, int type); | 1065 | static int ext4_write_info(struct super_block *sb, int type); |
1061 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, | 1066 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, |
1062 | char *path, int remount); | 1067 | char *path); |
1063 | static int ext4_quota_on_mount(struct super_block *sb, int type); | 1068 | static int ext4_quota_on_mount(struct super_block *sb, int type); |
1064 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | 1069 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, |
1065 | size_t len, loff_t off); | 1070 | size_t len, loff_t off); |
@@ -1081,12 +1086,12 @@ static const struct dquot_operations ext4_quota_operations = { | |||
1081 | 1086 | ||
1082 | static const struct quotactl_ops ext4_qctl_operations = { | 1087 | static const struct quotactl_ops ext4_qctl_operations = { |
1083 | .quota_on = ext4_quota_on, | 1088 | .quota_on = ext4_quota_on, |
1084 | .quota_off = vfs_quota_off, | 1089 | .quota_off = dquot_quota_off, |
1085 | .quota_sync = vfs_quota_sync, | 1090 | .quota_sync = dquot_quota_sync, |
1086 | .get_info = vfs_get_dqinfo, | 1091 | .get_info = dquot_get_dqinfo, |
1087 | .set_info = vfs_set_dqinfo, | 1092 | .set_info = dquot_set_dqinfo, |
1088 | .get_dqblk = vfs_get_dqblk, | 1093 | .get_dqblk = dquot_get_dqblk, |
1089 | .set_dqblk = vfs_set_dqblk | 1094 | .set_dqblk = dquot_set_dqblk |
1090 | }; | 1095 | }; |
1091 | #endif | 1096 | #endif |
1092 | 1097 | ||
@@ -2051,7 +2056,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2051 | /* Turn quotas off */ | 2056 | /* Turn quotas off */ |
2052 | for (i = 0; i < MAXQUOTAS; i++) { | 2057 | for (i = 0; i < MAXQUOTAS; i++) { |
2053 | if (sb_dqopt(sb)->files[i]) | 2058 | if (sb_dqopt(sb)->files[i]) |
2054 | vfs_quota_off(sb, i, 0); | 2059 | dquot_quota_off(sb, i); |
2055 | } | 2060 | } |
2056 | #endif | 2061 | #endif |
2057 | sb->s_flags = s_flags; /* Restore MS_RDONLY status */ | 2062 | sb->s_flags = s_flags; /* Restore MS_RDONLY status */ |
@@ -2213,7 +2218,7 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) | |||
2213 | struct ext4_attr { | 2218 | struct ext4_attr { |
2214 | struct attribute attr; | 2219 | struct attribute attr; |
2215 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); | 2220 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); |
2216 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, | 2221 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, |
2217 | const char *, size_t); | 2222 | const char *, size_t); |
2218 | int offset; | 2223 | int offset; |
2219 | }; | 2224 | }; |
@@ -2430,6 +2435,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2430 | __releases(kernel_lock) | 2435 | __releases(kernel_lock) |
2431 | __acquires(kernel_lock) | 2436 | __acquires(kernel_lock) |
2432 | { | 2437 | { |
2438 | char *orig_data = kstrdup(data, GFP_KERNEL); | ||
2433 | struct buffer_head *bh; | 2439 | struct buffer_head *bh; |
2434 | struct ext4_super_block *es = NULL; | 2440 | struct ext4_super_block *es = NULL; |
2435 | struct ext4_sb_info *sbi; | 2441 | struct ext4_sb_info *sbi; |
@@ -2793,24 +2799,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2793 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | 2799 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
2794 | spin_lock_init(&sbi->s_next_gen_lock); | 2800 | spin_lock_init(&sbi->s_next_gen_lock); |
2795 | 2801 | ||
2796 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | ||
2797 | ext4_count_free_blocks(sb)); | ||
2798 | if (!err) { | ||
2799 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | ||
2800 | ext4_count_free_inodes(sb)); | ||
2801 | } | ||
2802 | if (!err) { | ||
2803 | err = percpu_counter_init(&sbi->s_dirs_counter, | ||
2804 | ext4_count_dirs(sb)); | ||
2805 | } | ||
2806 | if (!err) { | ||
2807 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | ||
2808 | } | ||
2809 | if (err) { | ||
2810 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | ||
2811 | goto failed_mount3; | ||
2812 | } | ||
2813 | |||
2814 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 2802 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
2815 | sbi->s_max_writeback_mb_bump = 128; | 2803 | sbi->s_max_writeback_mb_bump = 128; |
2816 | 2804 | ||
@@ -2910,6 +2898,20 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2910 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); | 2898 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); |
2911 | 2899 | ||
2912 | no_journal: | 2900 | no_journal: |
2901 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | ||
2902 | ext4_count_free_blocks(sb)); | ||
2903 | if (!err) | ||
2904 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | ||
2905 | ext4_count_free_inodes(sb)); | ||
2906 | if (!err) | ||
2907 | err = percpu_counter_init(&sbi->s_dirs_counter, | ||
2908 | ext4_count_dirs(sb)); | ||
2909 | if (!err) | ||
2910 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | ||
2911 | if (err) { | ||
2912 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | ||
2913 | goto failed_mount_wq; | ||
2914 | } | ||
2913 | if (test_opt(sb, NOBH)) { | 2915 | if (test_opt(sb, NOBH)) { |
2914 | if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { | 2916 | if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { |
2915 | ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " | 2917 | ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " |
@@ -3001,7 +3003,7 @@ no_journal: | |||
3001 | err = ext4_setup_system_zone(sb); | 3003 | err = ext4_setup_system_zone(sb); |
3002 | if (err) { | 3004 | if (err) { |
3003 | ext4_msg(sb, KERN_ERR, "failed to initialize system " | 3005 | ext4_msg(sb, KERN_ERR, "failed to initialize system " |
3004 | "zone (%d)\n", err); | 3006 | "zone (%d)", err); |
3005 | goto failed_mount4; | 3007 | goto failed_mount4; |
3006 | } | 3008 | } |
3007 | 3009 | ||
@@ -3040,9 +3042,11 @@ no_journal: | |||
3040 | } else | 3042 | } else |
3041 | descr = "out journal"; | 3043 | descr = "out journal"; |
3042 | 3044 | ||
3043 | ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr); | 3045 | ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " |
3046 | "Opts: %s", descr, orig_data); | ||
3044 | 3047 | ||
3045 | lock_kernel(); | 3048 | lock_kernel(); |
3049 | kfree(orig_data); | ||
3046 | return 0; | 3050 | return 0; |
3047 | 3051 | ||
3048 | cantfind_ext4: | 3052 | cantfind_ext4: |
@@ -3059,6 +3063,10 @@ failed_mount_wq: | |||
3059 | jbd2_journal_destroy(sbi->s_journal); | 3063 | jbd2_journal_destroy(sbi->s_journal); |
3060 | sbi->s_journal = NULL; | 3064 | sbi->s_journal = NULL; |
3061 | } | 3065 | } |
3066 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | ||
3067 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | ||
3068 | percpu_counter_destroy(&sbi->s_dirs_counter); | ||
3069 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
3062 | failed_mount3: | 3070 | failed_mount3: |
3063 | if (sbi->s_flex_groups) { | 3071 | if (sbi->s_flex_groups) { |
3064 | if (is_vmalloc_addr(sbi->s_flex_groups)) | 3072 | if (is_vmalloc_addr(sbi->s_flex_groups)) |
@@ -3066,10 +3074,6 @@ failed_mount3: | |||
3066 | else | 3074 | else |
3067 | kfree(sbi->s_flex_groups); | 3075 | kfree(sbi->s_flex_groups); |
3068 | } | 3076 | } |
3069 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | ||
3070 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | ||
3071 | percpu_counter_destroy(&sbi->s_dirs_counter); | ||
3072 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
3073 | failed_mount2: | 3077 | failed_mount2: |
3074 | for (i = 0; i < db_count; i++) | 3078 | for (i = 0; i < db_count; i++) |
3075 | brelse(sbi->s_group_desc[i]); | 3079 | brelse(sbi->s_group_desc[i]); |
@@ -3089,6 +3093,7 @@ out_fail: | |||
3089 | kfree(sbi->s_blockgroup_lock); | 3093 | kfree(sbi->s_blockgroup_lock); |
3090 | kfree(sbi); | 3094 | kfree(sbi); |
3091 | lock_kernel(); | 3095 | lock_kernel(); |
3096 | kfree(orig_data); | ||
3092 | return ret; | 3097 | return ret; |
3093 | } | 3098 | } |
3094 | 3099 | ||
@@ -3380,7 +3385,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
3380 | if (!(sb->s_flags & MS_RDONLY)) | 3385 | if (!(sb->s_flags & MS_RDONLY)) |
3381 | es->s_wtime = cpu_to_le32(get_seconds()); | 3386 | es->s_wtime = cpu_to_le32(get_seconds()); |
3382 | es->s_kbytes_written = | 3387 | es->s_kbytes_written = |
3383 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + | 3388 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + |
3384 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 3389 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
3385 | EXT4_SB(sb)->s_sectors_written_start) >> 1)); | 3390 | EXT4_SB(sb)->s_sectors_written_start) >> 1)); |
3386 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( | 3391 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( |
@@ -3485,8 +3490,10 @@ int ext4_force_commit(struct super_block *sb) | |||
3485 | return 0; | 3490 | return 0; |
3486 | 3491 | ||
3487 | journal = EXT4_SB(sb)->s_journal; | 3492 | journal = EXT4_SB(sb)->s_journal; |
3488 | if (journal) | 3493 | if (journal) { |
3494 | vfs_check_frozen(sb, SB_FREEZE_WRITE); | ||
3489 | ret = ext4_journal_force_commit(journal); | 3495 | ret = ext4_journal_force_commit(journal); |
3496 | } | ||
3490 | 3497 | ||
3491 | return ret; | 3498 | return ret; |
3492 | } | 3499 | } |
@@ -3535,18 +3542,16 @@ static int ext4_freeze(struct super_block *sb) | |||
3535 | * the journal. | 3542 | * the journal. |
3536 | */ | 3543 | */ |
3537 | error = jbd2_journal_flush(journal); | 3544 | error = jbd2_journal_flush(journal); |
3538 | if (error < 0) { | 3545 | if (error < 0) |
3539 | out: | 3546 | goto out; |
3540 | jbd2_journal_unlock_updates(journal); | ||
3541 | return error; | ||
3542 | } | ||
3543 | 3547 | ||
3544 | /* Journal blocked and flushed, clear needs_recovery flag. */ | 3548 | /* Journal blocked and flushed, clear needs_recovery flag. */ |
3545 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 3549 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
3546 | error = ext4_commit_super(sb, 1); | 3550 | error = ext4_commit_super(sb, 1); |
3547 | if (error) | 3551 | out: |
3548 | goto out; | 3552 | /* we rely on s_frozen to stop further updates */ |
3549 | return 0; | 3553 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
3554 | return error; | ||
3550 | } | 3555 | } |
3551 | 3556 | ||
3552 | /* | 3557 | /* |
@@ -3563,7 +3568,6 @@ static int ext4_unfreeze(struct super_block *sb) | |||
3563 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 3568 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
3564 | ext4_commit_super(sb, 1); | 3569 | ext4_commit_super(sb, 1); |
3565 | unlock_super(sb); | 3570 | unlock_super(sb); |
3566 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | ||
3567 | return 0; | 3571 | return 0; |
3568 | } | 3572 | } |
3569 | 3573 | ||
@@ -3574,12 +3578,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3574 | ext4_fsblk_t n_blocks_count = 0; | 3578 | ext4_fsblk_t n_blocks_count = 0; |
3575 | unsigned long old_sb_flags; | 3579 | unsigned long old_sb_flags; |
3576 | struct ext4_mount_options old_opts; | 3580 | struct ext4_mount_options old_opts; |
3581 | int enable_quota = 0; | ||
3577 | ext4_group_t g; | 3582 | ext4_group_t g; |
3578 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 3583 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
3579 | int err; | 3584 | int err; |
3580 | #ifdef CONFIG_QUOTA | 3585 | #ifdef CONFIG_QUOTA |
3581 | int i; | 3586 | int i; |
3582 | #endif | 3587 | #endif |
3588 | char *orig_data = kstrdup(data, GFP_KERNEL); | ||
3583 | 3589 | ||
3584 | lock_kernel(); | 3590 | lock_kernel(); |
3585 | 3591 | ||
@@ -3630,6 +3636,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3630 | } | 3636 | } |
3631 | 3637 | ||
3632 | if (*flags & MS_RDONLY) { | 3638 | if (*flags & MS_RDONLY) { |
3639 | err = dquot_suspend(sb, -1); | ||
3640 | if (err < 0) | ||
3641 | goto restore_opts; | ||
3642 | |||
3633 | /* | 3643 | /* |
3634 | * First of all, the unconditional stuff we have to do | 3644 | * First of all, the unconditional stuff we have to do |
3635 | * to disable replay of the journal when we next remount | 3645 | * to disable replay of the journal when we next remount |
@@ -3698,6 +3708,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3698 | goto restore_opts; | 3708 | goto restore_opts; |
3699 | if (!ext4_setup_super(sb, es, 0)) | 3709 | if (!ext4_setup_super(sb, es, 0)) |
3700 | sb->s_flags &= ~MS_RDONLY; | 3710 | sb->s_flags &= ~MS_RDONLY; |
3711 | enable_quota = 1; | ||
3701 | } | 3712 | } |
3702 | } | 3713 | } |
3703 | ext4_setup_system_zone(sb); | 3714 | ext4_setup_system_zone(sb); |
@@ -3713,6 +3724,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3713 | #endif | 3724 | #endif |
3714 | unlock_super(sb); | 3725 | unlock_super(sb); |
3715 | unlock_kernel(); | 3726 | unlock_kernel(); |
3727 | if (enable_quota) | ||
3728 | dquot_resume(sb, -1); | ||
3729 | |||
3730 | ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); | ||
3731 | kfree(orig_data); | ||
3716 | return 0; | 3732 | return 0; |
3717 | 3733 | ||
3718 | restore_opts: | 3734 | restore_opts: |
@@ -3734,6 +3750,7 @@ restore_opts: | |||
3734 | #endif | 3750 | #endif |
3735 | unlock_super(sb); | 3751 | unlock_super(sb); |
3736 | unlock_kernel(); | 3752 | unlock_kernel(); |
3753 | kfree(orig_data); | ||
3737 | return err; | 3754 | return err; |
3738 | } | 3755 | } |
3739 | 3756 | ||
@@ -3906,24 +3923,21 @@ static int ext4_write_info(struct super_block *sb, int type) | |||
3906 | */ | 3923 | */ |
3907 | static int ext4_quota_on_mount(struct super_block *sb, int type) | 3924 | static int ext4_quota_on_mount(struct super_block *sb, int type) |
3908 | { | 3925 | { |
3909 | return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], | 3926 | return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], |
3910 | EXT4_SB(sb)->s_jquota_fmt, type); | 3927 | EXT4_SB(sb)->s_jquota_fmt, type); |
3911 | } | 3928 | } |
3912 | 3929 | ||
3913 | /* | 3930 | /* |
3914 | * Standard function to be called on quota_on | 3931 | * Standard function to be called on quota_on |
3915 | */ | 3932 | */ |
3916 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, | 3933 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, |
3917 | char *name, int remount) | 3934 | char *name) |
3918 | { | 3935 | { |
3919 | int err; | 3936 | int err; |
3920 | struct path path; | 3937 | struct path path; |
3921 | 3938 | ||
3922 | if (!test_opt(sb, QUOTA)) | 3939 | if (!test_opt(sb, QUOTA)) |
3923 | return -EINVAL; | 3940 | return -EINVAL; |
3924 | /* When remounting, no checks are needed and in fact, name is NULL */ | ||
3925 | if (remount) | ||
3926 | return vfs_quota_on(sb, type, format_id, name, remount); | ||
3927 | 3941 | ||
3928 | err = kern_path(name, LOOKUP_FOLLOW, &path); | 3942 | err = kern_path(name, LOOKUP_FOLLOW, &path); |
3929 | if (err) | 3943 | if (err) |
@@ -3962,7 +3976,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
3962 | } | 3976 | } |
3963 | } | 3977 | } |
3964 | 3978 | ||
3965 | err = vfs_quota_on_path(sb, type, format_id, &path); | 3979 | err = dquot_quota_on_path(sb, type, format_id, &path); |
3966 | path_put(&path); | 3980 | path_put(&path); |
3967 | return err; | 3981 | return err; |
3968 | } | 3982 | } |
@@ -4141,6 +4155,7 @@ static int __init init_ext4_fs(void) | |||
4141 | { | 4155 | { |
4142 | int err; | 4156 | int err; |
4143 | 4157 | ||
4158 | ext4_check_flag_values(); | ||
4144 | err = init_ext4_system_zone(); | 4159 | err = init_ext4_system_zone(); |
4145 | if (err) | 4160 | if (err) |
4146 | return err; | 4161 | return err; |
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 00740cb32be3..ed9354aff279 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c | |||
@@ -34,6 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
34 | .readlink = generic_readlink, | 34 | .readlink = generic_readlink, |
35 | .follow_link = page_follow_link_light, | 35 | .follow_link = page_follow_link_light, |
36 | .put_link = page_put_link, | 36 | .put_link = page_put_link, |
37 | .setattr = ext4_setattr, | ||
37 | #ifdef CONFIG_EXT4_FS_XATTR | 38 | #ifdef CONFIG_EXT4_FS_XATTR |
38 | .setxattr = generic_setxattr, | 39 | .setxattr = generic_setxattr, |
39 | .getxattr = generic_getxattr, | 40 | .getxattr = generic_getxattr, |
@@ -45,6 +46,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
45 | const struct inode_operations ext4_fast_symlink_inode_operations = { | 46 | const struct inode_operations ext4_fast_symlink_inode_operations = { |
46 | .readlink = generic_readlink, | 47 | .readlink = generic_readlink, |
47 | .follow_link = ext4_follow_link, | 48 | .follow_link = ext4_follow_link, |
49 | .setattr = ext4_setattr, | ||
48 | #ifdef CONFIG_EXT4_FS_XATTR | 50 | #ifdef CONFIG_EXT4_FS_XATTR |
49 | .setxattr = generic_setxattr, | 51 | .setxattr = generic_setxattr, |
50 | .getxattr = generic_getxattr, | 52 | .getxattr = generic_getxattr, |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 2de0e9515089..04338009793a 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -228,9 +228,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, | |||
228 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); | 228 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); |
229 | if (ext4_xattr_check_block(bh)) { | 229 | if (ext4_xattr_check_block(bh)) { |
230 | bad_block: | 230 | bad_block: |
231 | ext4_error(inode->i_sb, | 231 | EXT4_ERROR_INODE(inode, "bad block %llu", |
232 | "inode %lu: bad block %llu", inode->i_ino, | 232 | EXT4_I(inode)->i_file_acl); |
233 | EXT4_I(inode)->i_file_acl); | ||
234 | error = -EIO; | 233 | error = -EIO; |
235 | goto cleanup; | 234 | goto cleanup; |
236 | } | 235 | } |
@@ -372,9 +371,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
372 | ea_bdebug(bh, "b_count=%d, refcount=%d", | 371 | ea_bdebug(bh, "b_count=%d, refcount=%d", |
373 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); | 372 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); |
374 | if (ext4_xattr_check_block(bh)) { | 373 | if (ext4_xattr_check_block(bh)) { |
375 | ext4_error(inode->i_sb, | 374 | EXT4_ERROR_INODE(inode, "bad block %llu", |
376 | "inode %lu: bad block %llu", inode->i_ino, | 375 | EXT4_I(inode)->i_file_acl); |
377 | EXT4_I(inode)->i_file_acl); | ||
378 | error = -EIO; | 376 | error = -EIO; |
379 | goto cleanup; | 377 | goto cleanup; |
380 | } | 378 | } |
@@ -666,8 +664,8 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, | |||
666 | atomic_read(&(bs->bh->b_count)), | 664 | atomic_read(&(bs->bh->b_count)), |
667 | le32_to_cpu(BHDR(bs->bh)->h_refcount)); | 665 | le32_to_cpu(BHDR(bs->bh)->h_refcount)); |
668 | if (ext4_xattr_check_block(bs->bh)) { | 666 | if (ext4_xattr_check_block(bs->bh)) { |
669 | ext4_error(sb, "inode %lu: bad block %llu", | 667 | EXT4_ERROR_INODE(inode, "bad block %llu", |
670 | inode->i_ino, EXT4_I(inode)->i_file_acl); | 668 | EXT4_I(inode)->i_file_acl); |
671 | error = -EIO; | 669 | error = -EIO; |
672 | goto cleanup; | 670 | goto cleanup; |
673 | } | 671 | } |
@@ -820,7 +818,7 @@ inserted: | |||
820 | EXT4_I(inode)->i_block_group); | 818 | EXT4_I(inode)->i_block_group); |
821 | 819 | ||
822 | /* non-extent files can't have physical blocks past 2^32 */ | 820 | /* non-extent files can't have physical blocks past 2^32 */ |
823 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 821 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
824 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | 822 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; |
825 | 823 | ||
826 | block = ext4_new_meta_blocks(handle, inode, | 824 | block = ext4_new_meta_blocks(handle, inode, |
@@ -828,7 +826,7 @@ inserted: | |||
828 | if (error) | 826 | if (error) |
829 | goto cleanup; | 827 | goto cleanup; |
830 | 828 | ||
831 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 829 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
832 | BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); | 830 | BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); |
833 | 831 | ||
834 | ea_idebug(inode, "creating block %d", block); | 832 | ea_idebug(inode, "creating block %d", block); |
@@ -880,8 +878,8 @@ cleanup_dquot: | |||
880 | goto cleanup; | 878 | goto cleanup; |
881 | 879 | ||
882 | bad_block: | 880 | bad_block: |
883 | ext4_error(inode->i_sb, "inode %lu: bad block %llu", | 881 | EXT4_ERROR_INODE(inode, "bad block %llu", |
884 | inode->i_ino, EXT4_I(inode)->i_file_acl); | 882 | EXT4_I(inode)->i_file_acl); |
885 | goto cleanup; | 883 | goto cleanup; |
886 | 884 | ||
887 | #undef header | 885 | #undef header |
@@ -1194,8 +1192,8 @@ retry: | |||
1194 | if (!bh) | 1192 | if (!bh) |
1195 | goto cleanup; | 1193 | goto cleanup; |
1196 | if (ext4_xattr_check_block(bh)) { | 1194 | if (ext4_xattr_check_block(bh)) { |
1197 | ext4_error(inode->i_sb, "inode %lu: bad block %llu", | 1195 | EXT4_ERROR_INODE(inode, "bad block %llu", |
1198 | inode->i_ino, EXT4_I(inode)->i_file_acl); | 1196 | EXT4_I(inode)->i_file_acl); |
1199 | error = -EIO; | 1197 | error = -EIO; |
1200 | goto cleanup; | 1198 | goto cleanup; |
1201 | } | 1199 | } |
@@ -1372,14 +1370,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) | |||
1372 | goto cleanup; | 1370 | goto cleanup; |
1373 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); | 1371 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); |
1374 | if (!bh) { | 1372 | if (!bh) { |
1375 | ext4_error(inode->i_sb, "inode %lu: block %llu read error", | 1373 | EXT4_ERROR_INODE(inode, "block %llu read error", |
1376 | inode->i_ino, EXT4_I(inode)->i_file_acl); | 1374 | EXT4_I(inode)->i_file_acl); |
1377 | goto cleanup; | 1375 | goto cleanup; |
1378 | } | 1376 | } |
1379 | if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || | 1377 | if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || |
1380 | BHDR(bh)->h_blocks != cpu_to_le32(1)) { | 1378 | BHDR(bh)->h_blocks != cpu_to_le32(1)) { |
1381 | ext4_error(inode->i_sb, "inode %lu: bad block %llu", | 1379 | EXT4_ERROR_INODE(inode, "bad block %llu", |
1382 | inode->i_ino, EXT4_I(inode)->i_file_acl); | 1380 | EXT4_I(inode)->i_file_acl); |
1383 | goto cleanup; | 1381 | goto cleanup; |
1384 | } | 1382 | } |
1385 | ext4_xattr_release_block(handle, inode, bh); | 1383 | ext4_xattr_release_block(handle, inode, bh); |
@@ -1504,9 +1502,8 @@ again: | |||
1504 | } | 1502 | } |
1505 | bh = sb_bread(inode->i_sb, ce->e_block); | 1503 | bh = sb_bread(inode->i_sb, ce->e_block); |
1506 | if (!bh) { | 1504 | if (!bh) { |
1507 | ext4_error(inode->i_sb, | 1505 | EXT4_ERROR_INODE(inode, "block %lu read error", |
1508 | "inode %lu: block %lu read error", | 1506 | (unsigned long) ce->e_block); |
1509 | inode->i_ino, (unsigned long) ce->e_block); | ||
1510 | } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= | 1507 | } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= |
1511 | EXT4_XATTR_REFCOUNT_MAX) { | 1508 | EXT4_XATTR_REFCOUNT_MAX) { |
1512 | ea_idebug(inode, "block %lu refcount %d>=%d", | 1509 | ea_idebug(inode, "block %lu refcount %d>=%d", |
diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 113f0a1e565d..ae8200f84e39 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c | |||
@@ -242,9 +242,10 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) | |||
242 | while (*fclus < cluster) { | 242 | while (*fclus < cluster) { |
243 | /* prevent the infinite loop of cluster chain */ | 243 | /* prevent the infinite loop of cluster chain */ |
244 | if (*fclus > limit) { | 244 | if (*fclus > limit) { |
245 | fat_fs_error(sb, "%s: detected the cluster chain loop" | 245 | fat_fs_error_ratelimit(sb, |
246 | " (i_pos %lld)", __func__, | 246 | "%s: detected the cluster chain loop" |
247 | MSDOS_I(inode)->i_pos); | 247 | " (i_pos %lld)", __func__, |
248 | MSDOS_I(inode)->i_pos); | ||
248 | nr = -EIO; | 249 | nr = -EIO; |
249 | goto out; | 250 | goto out; |
250 | } | 251 | } |
@@ -253,9 +254,9 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) | |||
253 | if (nr < 0) | 254 | if (nr < 0) |
254 | goto out; | 255 | goto out; |
255 | else if (nr == FAT_ENT_FREE) { | 256 | else if (nr == FAT_ENT_FREE) { |
256 | fat_fs_error(sb, "%s: invalid cluster chain" | 257 | fat_fs_error_ratelimit(sb, "%s: invalid cluster chain" |
257 | " (i_pos %lld)", __func__, | 258 | " (i_pos %lld)", __func__, |
258 | MSDOS_I(inode)->i_pos); | 259 | MSDOS_I(inode)->i_pos); |
259 | nr = -EIO; | 260 | nr = -EIO; |
260 | goto out; | 261 | goto out; |
261 | } else if (nr == FAT_ENT_EOF) { | 262 | } else if (nr == FAT_ENT_EOF) { |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 530b4ca01510..ee42b9e0b16a 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/buffer_head.h> | 19 | #include <linux/buffer_head.h> |
20 | #include <linux/compat.h> | 20 | #include <linux/compat.h> |
21 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
22 | #include <linux/kernel.h> | ||
22 | #include "fat.h" | 23 | #include "fat.h" |
23 | 24 | ||
24 | /* | 25 | /* |
@@ -140,28 +141,22 @@ static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len, | |||
140 | { | 141 | { |
141 | const wchar_t *ip; | 142 | const wchar_t *ip; |
142 | wchar_t ec; | 143 | wchar_t ec; |
143 | unsigned char *op, nc; | 144 | unsigned char *op; |
144 | int charlen; | 145 | int charlen; |
145 | int k; | ||
146 | 146 | ||
147 | ip = uni; | 147 | ip = uni; |
148 | op = ascii; | 148 | op = ascii; |
149 | 149 | ||
150 | while (*ip && ((len - NLS_MAX_CHARSET_SIZE) > 0)) { | 150 | while (*ip && ((len - NLS_MAX_CHARSET_SIZE) > 0)) { |
151 | ec = *ip++; | 151 | ec = *ip++; |
152 | if ( (charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE)) > 0) { | 152 | if ((charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE)) > 0) { |
153 | op += charlen; | 153 | op += charlen; |
154 | len -= charlen; | 154 | len -= charlen; |
155 | } else { | 155 | } else { |
156 | if (uni_xlate == 1) { | 156 | if (uni_xlate == 1) { |
157 | *op = ':'; | 157 | *op++ = ':'; |
158 | for (k = 4; k > 0; k--) { | 158 | op = pack_hex_byte(op, ec >> 8); |
159 | nc = ec & 0xF; | 159 | op = pack_hex_byte(op, ec); |
160 | op[k] = nc > 9 ? nc + ('a' - 10) | ||
161 | : nc + '0'; | ||
162 | ec >>= 4; | ||
163 | } | ||
164 | op += 5; | ||
165 | len -= 5; | 160 | len -= 5; |
166 | } else { | 161 | } else { |
167 | *op++ = '?'; | 162 | *op++ = '?'; |
@@ -758,9 +753,10 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp, | |||
758 | return ret; | 753 | return ret; |
759 | } | 754 | } |
760 | 755 | ||
761 | static int fat_dir_ioctl(struct inode *inode, struct file *filp, | 756 | static long fat_dir_ioctl(struct file *filp, unsigned int cmd, |
762 | unsigned int cmd, unsigned long arg) | 757 | unsigned long arg) |
763 | { | 758 | { |
759 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
764 | struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg; | 760 | struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg; |
765 | int short_only, both; | 761 | int short_only, both; |
766 | 762 | ||
@@ -774,7 +770,7 @@ static int fat_dir_ioctl(struct inode *inode, struct file *filp, | |||
774 | both = 1; | 770 | both = 1; |
775 | break; | 771 | break; |
776 | default: | 772 | default: |
777 | return fat_generic_ioctl(inode, filp, cmd, arg); | 773 | return fat_generic_ioctl(filp, cmd, arg); |
778 | } | 774 | } |
779 | 775 | ||
780 | if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2]))) | 776 | if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2]))) |
@@ -814,7 +810,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, | |||
814 | both = 1; | 810 | both = 1; |
815 | break; | 811 | break; |
816 | default: | 812 | default: |
817 | return -ENOIOCTLCMD; | 813 | return fat_generic_ioctl(filp, cmd, (unsigned long)arg); |
818 | } | 814 | } |
819 | 815 | ||
820 | if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2]))) | 816 | if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2]))) |
@@ -836,7 +832,7 @@ const struct file_operations fat_dir_operations = { | |||
836 | .llseek = generic_file_llseek, | 832 | .llseek = generic_file_llseek, |
837 | .read = generic_read_dir, | 833 | .read = generic_read_dir, |
838 | .readdir = fat_readdir, | 834 | .readdir = fat_readdir, |
839 | .ioctl = fat_dir_ioctl, | 835 | .unlocked_ioctl = fat_dir_ioctl, |
840 | #ifdef CONFIG_COMPAT | 836 | #ifdef CONFIG_COMPAT |
841 | .compat_ioctl = fat_compat_dir_ioctl, | 837 | .compat_ioctl = fat_compat_dir_ioctl, |
842 | #endif | 838 | #endif |
diff --git a/fs/fat/fat.h b/fs/fat/fat.h index e6efdfa0f6db..27ac25725954 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/nls.h> | 6 | #include <linux/nls.h> |
7 | #include <linux/fs.h> | 7 | #include <linux/fs.h> |
8 | #include <linux/mutex.h> | 8 | #include <linux/mutex.h> |
9 | #include <linux/ratelimit.h> | ||
9 | #include <linux/msdos_fs.h> | 10 | #include <linux/msdos_fs.h> |
10 | 11 | ||
11 | /* | 12 | /* |
@@ -82,6 +83,8 @@ struct msdos_sb_info { | |||
82 | struct fatent_operations *fatent_ops; | 83 | struct fatent_operations *fatent_ops; |
83 | struct inode *fat_inode; | 84 | struct inode *fat_inode; |
84 | 85 | ||
86 | struct ratelimit_state ratelimit; | ||
87 | |||
85 | spinlock_t inode_hash_lock; | 88 | spinlock_t inode_hash_lock; |
86 | struct hlist_head inode_hashtable[FAT_HASH_SIZE]; | 89 | struct hlist_head inode_hashtable[FAT_HASH_SIZE]; |
87 | }; | 90 | }; |
@@ -298,16 +301,16 @@ extern int fat_free_clusters(struct inode *inode, int cluster); | |||
298 | extern int fat_count_free_clusters(struct super_block *sb); | 301 | extern int fat_count_free_clusters(struct super_block *sb); |
299 | 302 | ||
300 | /* fat/file.c */ | 303 | /* fat/file.c */ |
301 | extern int fat_generic_ioctl(struct inode *inode, struct file *filp, | 304 | extern long fat_generic_ioctl(struct file *filp, unsigned int cmd, |
302 | unsigned int cmd, unsigned long arg); | 305 | unsigned long arg); |
303 | extern const struct file_operations fat_file_operations; | 306 | extern const struct file_operations fat_file_operations; |
304 | extern const struct inode_operations fat_file_inode_operations; | 307 | extern const struct inode_operations fat_file_inode_operations; |
305 | extern int fat_setattr(struct dentry * dentry, struct iattr * attr); | 308 | extern int fat_setattr(struct dentry * dentry, struct iattr * attr); |
306 | extern void fat_truncate(struct inode *inode); | 309 | extern int fat_setsize(struct inode *inode, loff_t offset); |
310 | extern void fat_truncate_blocks(struct inode *inode, loff_t offset); | ||
307 | extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, | 311 | extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, |
308 | struct kstat *stat); | 312 | struct kstat *stat); |
309 | extern int fat_file_fsync(struct file *file, struct dentry *dentry, | 313 | extern int fat_file_fsync(struct file *file, int datasync); |
310 | int datasync); | ||
311 | 314 | ||
312 | /* fat/inode.c */ | 315 | /* fat/inode.c */ |
313 | extern void fat_attach(struct inode *inode, loff_t i_pos); | 316 | extern void fat_attach(struct inode *inode, loff_t i_pos); |
@@ -322,8 +325,13 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
322 | extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, | 325 | extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, |
323 | struct inode *i2); | 326 | struct inode *i2); |
324 | /* fat/misc.c */ | 327 | /* fat/misc.c */ |
325 | extern void fat_fs_error(struct super_block *s, const char *fmt, ...) | 328 | extern void |
326 | __attribute__ ((format (printf, 2, 3))) __cold; | 329 | __fat_fs_error(struct super_block *s, int report, const char *fmt, ...) |
330 | __attribute__ ((format (printf, 3, 4))) __cold; | ||
331 | #define fat_fs_error(s, fmt, args...) \ | ||
332 | __fat_fs_error(s, 1, fmt , ## args) | ||
333 | #define fat_fs_error_ratelimit(s, fmt, args...) \ | ||
334 | __fat_fs_error(s, __ratelimit(&MSDOS_SB(s)->ratelimit), fmt , ## args) | ||
327 | extern int fat_clusters_flush(struct super_block *sb); | 335 | extern int fat_clusters_flush(struct super_block *sb); |
328 | extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); | 336 | extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); |
329 | extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, | 337 | extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, |
diff --git a/fs/fat/file.c b/fs/fat/file.c index e8c159de236b..990dfae022e5 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
@@ -8,6 +8,7 @@ | |||
8 | 8 | ||
9 | #include <linux/capability.h> | 9 | #include <linux/capability.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/compat.h> | ||
11 | #include <linux/mount.h> | 12 | #include <linux/mount.h> |
12 | #include <linux/time.h> | 13 | #include <linux/time.h> |
13 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
@@ -114,9 +115,9 @@ out: | |||
114 | return err; | 115 | return err; |
115 | } | 116 | } |
116 | 117 | ||
117 | int fat_generic_ioctl(struct inode *inode, struct file *filp, | 118 | long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
118 | unsigned int cmd, unsigned long arg) | ||
119 | { | 119 | { |
120 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
120 | u32 __user *user_attr = (u32 __user *)arg; | 121 | u32 __user *user_attr = (u32 __user *)arg; |
121 | 122 | ||
122 | switch (cmd) { | 123 | switch (cmd) { |
@@ -129,6 +130,15 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, | |||
129 | } | 130 | } |
130 | } | 131 | } |
131 | 132 | ||
133 | #ifdef CONFIG_COMPAT | ||
134 | static long fat_generic_compat_ioctl(struct file *filp, unsigned int cmd, | ||
135 | unsigned long arg) | ||
136 | |||
137 | { | ||
138 | return fat_generic_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); | ||
139 | } | ||
140 | #endif | ||
141 | |||
132 | static int fat_file_release(struct inode *inode, struct file *filp) | 142 | static int fat_file_release(struct inode *inode, struct file *filp) |
133 | { | 143 | { |
134 | if ((filp->f_mode & FMODE_WRITE) && | 144 | if ((filp->f_mode & FMODE_WRITE) && |
@@ -139,12 +149,12 @@ static int fat_file_release(struct inode *inode, struct file *filp) | |||
139 | return 0; | 149 | return 0; |
140 | } | 150 | } |
141 | 151 | ||
142 | int fat_file_fsync(struct file *filp, struct dentry *dentry, int datasync) | 152 | int fat_file_fsync(struct file *filp, int datasync) |
143 | { | 153 | { |
144 | struct inode *inode = dentry->d_inode; | 154 | struct inode *inode = filp->f_mapping->host; |
145 | int res, err; | 155 | int res, err; |
146 | 156 | ||
147 | res = simple_fsync(filp, dentry, datasync); | 157 | res = generic_file_fsync(filp, datasync); |
148 | err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping); | 158 | err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping); |
149 | 159 | ||
150 | return res ? res : err; | 160 | return res ? res : err; |
@@ -159,7 +169,10 @@ const struct file_operations fat_file_operations = { | |||
159 | .aio_write = generic_file_aio_write, | 169 | .aio_write = generic_file_aio_write, |
160 | .mmap = generic_file_mmap, | 170 | .mmap = generic_file_mmap, |
161 | .release = fat_file_release, | 171 | .release = fat_file_release, |
162 | .ioctl = fat_generic_ioctl, | 172 | .unlocked_ioctl = fat_generic_ioctl, |
173 | #ifdef CONFIG_COMPAT | ||
174 | .compat_ioctl = fat_generic_compat_ioctl, | ||
175 | #endif | ||
163 | .fsync = fat_file_fsync, | 176 | .fsync = fat_file_fsync, |
164 | .splice_read = generic_file_splice_read, | 177 | .splice_read = generic_file_splice_read, |
165 | }; | 178 | }; |
@@ -270,7 +283,7 @@ static int fat_free(struct inode *inode, int skip) | |||
270 | return fat_free_clusters(inode, free_start); | 283 | return fat_free_clusters(inode, free_start); |
271 | } | 284 | } |
272 | 285 | ||
273 | void fat_truncate(struct inode *inode) | 286 | void fat_truncate_blocks(struct inode *inode, loff_t offset) |
274 | { | 287 | { |
275 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); | 288 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); |
276 | const unsigned int cluster_size = sbi->cluster_size; | 289 | const unsigned int cluster_size = sbi->cluster_size; |
@@ -280,10 +293,10 @@ void fat_truncate(struct inode *inode) | |||
280 | * This protects against truncating a file bigger than it was then | 293 | * This protects against truncating a file bigger than it was then |
281 | * trying to write into the hole. | 294 | * trying to write into the hole. |
282 | */ | 295 | */ |
283 | if (MSDOS_I(inode)->mmu_private > inode->i_size) | 296 | if (MSDOS_I(inode)->mmu_private > offset) |
284 | MSDOS_I(inode)->mmu_private = inode->i_size; | 297 | MSDOS_I(inode)->mmu_private = offset; |
285 | 298 | ||
286 | nr_clusters = (inode->i_size + (cluster_size - 1)) >> sbi->cluster_bits; | 299 | nr_clusters = (offset + (cluster_size - 1)) >> sbi->cluster_bits; |
287 | 300 | ||
288 | fat_free(inode, nr_clusters); | 301 | fat_free(inode, nr_clusters); |
289 | fat_flush_inodes(inode->i_sb, inode, NULL); | 302 | fat_flush_inodes(inode->i_sb, inode, NULL); |
@@ -351,6 +364,18 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) | |||
351 | return 0; | 364 | return 0; |
352 | } | 365 | } |
353 | 366 | ||
367 | int fat_setsize(struct inode *inode, loff_t offset) | ||
368 | { | ||
369 | int error; | ||
370 | |||
371 | error = simple_setsize(inode, offset); | ||
372 | if (error) | ||
373 | return error; | ||
374 | fat_truncate_blocks(inode, offset); | ||
375 | |||
376 | return error; | ||
377 | } | ||
378 | |||
354 | #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) | 379 | #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) |
355 | /* valid file mode bits */ | 380 | /* valid file mode bits */ |
356 | #define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) | 381 | #define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) |
@@ -365,7 +390,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) | |||
365 | /* | 390 | /* |
366 | * Expand the file. Since inode_setattr() updates ->i_size | 391 | * Expand the file. Since inode_setattr() updates ->i_size |
367 | * before calling the ->truncate(), but FAT needs to fill the | 392 | * before calling the ->truncate(), but FAT needs to fill the |
368 | * hole before it. | 393 | * hole before it. XXX: this is no longer true with new truncate |
394 | * sequence. | ||
369 | */ | 395 | */ |
370 | if (attr->ia_valid & ATTR_SIZE) { | 396 | if (attr->ia_valid & ATTR_SIZE) { |
371 | if (attr->ia_size > inode->i_size) { | 397 | if (attr->ia_size > inode->i_size) { |
@@ -414,15 +440,20 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) | |||
414 | attr->ia_valid &= ~ATTR_MODE; | 440 | attr->ia_valid &= ~ATTR_MODE; |
415 | } | 441 | } |
416 | 442 | ||
417 | if (attr->ia_valid) | 443 | if (attr->ia_valid & ATTR_SIZE) { |
418 | error = inode_setattr(inode, attr); | 444 | error = fat_setsize(inode, attr->ia_size); |
445 | if (error) | ||
446 | goto out; | ||
447 | } | ||
448 | |||
449 | generic_setattr(inode, attr); | ||
450 | mark_inode_dirty(inode); | ||
419 | out: | 451 | out: |
420 | return error; | 452 | return error; |
421 | } | 453 | } |
422 | EXPORT_SYMBOL_GPL(fat_setattr); | 454 | EXPORT_SYMBOL_GPL(fat_setattr); |
423 | 455 | ||
424 | const struct inode_operations fat_file_inode_operations = { | 456 | const struct inode_operations fat_file_inode_operations = { |
425 | .truncate = fat_truncate, | ||
426 | .setattr = fat_setattr, | 457 | .setattr = fat_setattr, |
427 | .getattr = fat_getattr, | 458 | .getattr = fat_getattr, |
428 | }; | 459 | }; |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 0ce143bd7d56..7bf45aee56d7 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -142,14 +142,29 @@ static int fat_readpages(struct file *file, struct address_space *mapping, | |||
142 | return mpage_readpages(mapping, pages, nr_pages, fat_get_block); | 142 | return mpage_readpages(mapping, pages, nr_pages, fat_get_block); |
143 | } | 143 | } |
144 | 144 | ||
145 | static void fat_write_failed(struct address_space *mapping, loff_t to) | ||
146 | { | ||
147 | struct inode *inode = mapping->host; | ||
148 | |||
149 | if (to > inode->i_size) { | ||
150 | truncate_pagecache(inode, to, inode->i_size); | ||
151 | fat_truncate_blocks(inode, inode->i_size); | ||
152 | } | ||
153 | } | ||
154 | |||
145 | static int fat_write_begin(struct file *file, struct address_space *mapping, | 155 | static int fat_write_begin(struct file *file, struct address_space *mapping, |
146 | loff_t pos, unsigned len, unsigned flags, | 156 | loff_t pos, unsigned len, unsigned flags, |
147 | struct page **pagep, void **fsdata) | 157 | struct page **pagep, void **fsdata) |
148 | { | 158 | { |
159 | int err; | ||
160 | |||
149 | *pagep = NULL; | 161 | *pagep = NULL; |
150 | return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 162 | err = cont_write_begin_newtrunc(file, mapping, pos, len, flags, |
151 | fat_get_block, | 163 | pagep, fsdata, fat_get_block, |
152 | &MSDOS_I(mapping->host)->mmu_private); | 164 | &MSDOS_I(mapping->host)->mmu_private); |
165 | if (err < 0) | ||
166 | fat_write_failed(mapping, pos + len); | ||
167 | return err; | ||
153 | } | 168 | } |
154 | 169 | ||
155 | static int fat_write_end(struct file *file, struct address_space *mapping, | 170 | static int fat_write_end(struct file *file, struct address_space *mapping, |
@@ -159,6 +174,8 @@ static int fat_write_end(struct file *file, struct address_space *mapping, | |||
159 | struct inode *inode = mapping->host; | 174 | struct inode *inode = mapping->host; |
160 | int err; | 175 | int err; |
161 | err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); | 176 | err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); |
177 | if (err < len) | ||
178 | fat_write_failed(mapping, pos + len); | ||
162 | if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) { | 179 | if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) { |
163 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; | 180 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; |
164 | MSDOS_I(inode)->i_attrs |= ATTR_ARCH; | 181 | MSDOS_I(inode)->i_attrs |= ATTR_ARCH; |
@@ -172,7 +189,9 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, | |||
172 | loff_t offset, unsigned long nr_segs) | 189 | loff_t offset, unsigned long nr_segs) |
173 | { | 190 | { |
174 | struct file *file = iocb->ki_filp; | 191 | struct file *file = iocb->ki_filp; |
175 | struct inode *inode = file->f_mapping->host; | 192 | struct address_space *mapping = file->f_mapping; |
193 | struct inode *inode = mapping->host; | ||
194 | ssize_t ret; | ||
176 | 195 | ||
177 | if (rw == WRITE) { | 196 | if (rw == WRITE) { |
178 | /* | 197 | /* |
@@ -193,8 +212,12 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, | |||
193 | * FAT need to use the DIO_LOCKING for avoiding the race | 212 | * FAT need to use the DIO_LOCKING for avoiding the race |
194 | * condition of fat_get_block() and ->truncate(). | 213 | * condition of fat_get_block() and ->truncate(). |
195 | */ | 214 | */ |
196 | return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 215 | ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev, |
197 | offset, nr_segs, fat_get_block, NULL); | 216 | iov, offset, nr_segs, fat_get_block, NULL); |
217 | if (ret < 0 && (rw & WRITE)) | ||
218 | fat_write_failed(mapping, offset + iov_length(iov, nr_segs)); | ||
219 | |||
220 | return ret; | ||
198 | } | 221 | } |
199 | 222 | ||
200 | static sector_t _fat_bmap(struct address_space *mapping, sector_t block) | 223 | static sector_t _fat_bmap(struct address_space *mapping, sector_t block) |
@@ -429,7 +452,7 @@ static void fat_delete_inode(struct inode *inode) | |||
429 | { | 452 | { |
430 | truncate_inode_pages(&inode->i_data, 0); | 453 | truncate_inode_pages(&inode->i_data, 0); |
431 | inode->i_size = 0; | 454 | inode->i_size = 0; |
432 | fat_truncate(inode); | 455 | fat_truncate_blocks(inode, 0); |
433 | clear_inode(inode); | 456 | clear_inode(inode); |
434 | } | 457 | } |
435 | 458 | ||
@@ -1250,6 +1273,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
1250 | sb->s_op = &fat_sops; | 1273 | sb->s_op = &fat_sops; |
1251 | sb->s_export_op = &fat_export_ops; | 1274 | sb->s_export_op = &fat_export_ops; |
1252 | sbi->dir_ops = fs_dir_inode_ops; | 1275 | sbi->dir_ops = fs_dir_inode_ops; |
1276 | ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, | ||
1277 | DEFAULT_RATELIMIT_BURST); | ||
1253 | 1278 | ||
1254 | error = parse_options(data, isvfat, silent, &debug, &sbi->options); | 1279 | error = parse_options(data, isvfat, silent, &debug, &sbi->options); |
1255 | if (error) | 1280 | if (error) |
@@ -1497,10 +1522,8 @@ out_fail: | |||
1497 | iput(fat_inode); | 1522 | iput(fat_inode); |
1498 | if (root_inode) | 1523 | if (root_inode) |
1499 | iput(root_inode); | 1524 | iput(root_inode); |
1500 | if (sbi->nls_io) | 1525 | unload_nls(sbi->nls_io); |
1501 | unload_nls(sbi->nls_io); | 1526 | unload_nls(sbi->nls_disk); |
1502 | if (sbi->nls_disk) | ||
1503 | unload_nls(sbi->nls_disk); | ||
1504 | if (sbi->options.iocharset != fat_default_iocharset) | 1527 | if (sbi->options.iocharset != fat_default_iocharset) |
1505 | kfree(sbi->options.iocharset); | 1528 | kfree(sbi->options.iocharset); |
1506 | sb->s_fs_info = NULL; | 1529 | sb->s_fs_info = NULL; |
diff --git a/fs/fat/misc.c b/fs/fat/misc.c index d3da05f26465..1fa23f6ffba5 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c | |||
@@ -20,27 +20,29 @@ | |||
20 | * In case the file system is remounted read-only, it can be made writable | 20 | * In case the file system is remounted read-only, it can be made writable |
21 | * again by remounting it. | 21 | * again by remounting it. |
22 | */ | 22 | */ |
23 | void fat_fs_error(struct super_block *s, const char *fmt, ...) | 23 | void __fat_fs_error(struct super_block *s, int report, const char *fmt, ...) |
24 | { | 24 | { |
25 | struct fat_mount_options *opts = &MSDOS_SB(s)->options; | 25 | struct fat_mount_options *opts = &MSDOS_SB(s)->options; |
26 | va_list args; | 26 | va_list args; |
27 | 27 | ||
28 | printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id); | 28 | if (report) { |
29 | printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id); | ||
29 | 30 | ||
30 | printk(KERN_ERR " "); | 31 | printk(KERN_ERR " "); |
31 | va_start(args, fmt); | 32 | va_start(args, fmt); |
32 | vprintk(fmt, args); | 33 | vprintk(fmt, args); |
33 | va_end(args); | 34 | va_end(args); |
34 | printk("\n"); | 35 | printk("\n"); |
36 | } | ||
35 | 37 | ||
36 | if (opts->errors == FAT_ERRORS_PANIC) | 38 | if (opts->errors == FAT_ERRORS_PANIC) |
37 | panic(" FAT fs panic from previous error\n"); | 39 | panic("FAT: fs panic from previous error\n"); |
38 | else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) { | 40 | else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) { |
39 | s->s_flags |= MS_RDONLY; | 41 | s->s_flags |= MS_RDONLY; |
40 | printk(KERN_ERR " File system has been set read-only\n"); | 42 | printk(KERN_ERR "FAT: Filesystem has been set read-only\n"); |
41 | } | 43 | } |
42 | } | 44 | } |
43 | EXPORT_SYMBOL_GPL(fat_fs_error); | 45 | EXPORT_SYMBOL_GPL(__fat_fs_error); |
44 | 46 | ||
45 | /* Flushes the number of free clusters on FAT32 */ | 47 | /* Flushes the number of free clusters on FAT32 */ |
46 | /* XXX: Need to write one per FSINFO block. Currently only writes 1 */ | 48 | /* XXX: Need to write one per FSINFO block. Currently only writes 1 */ |
diff --git a/fs/file_table.c b/fs/file_table.c index 32d12b78bac8..5c7d10ead4ad 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -194,14 +194,6 @@ struct file *alloc_file(struct path *path, fmode_t mode, | |||
194 | } | 194 | } |
195 | EXPORT_SYMBOL(alloc_file); | 195 | EXPORT_SYMBOL(alloc_file); |
196 | 196 | ||
197 | void fput(struct file *file) | ||
198 | { | ||
199 | if (atomic_long_dec_and_test(&file->f_count)) | ||
200 | __fput(file); | ||
201 | } | ||
202 | |||
203 | EXPORT_SYMBOL(fput); | ||
204 | |||
205 | /** | 197 | /** |
206 | * drop_file_write_access - give up ability to write to a file | 198 | * drop_file_write_access - give up ability to write to a file |
207 | * @file: the file to which we will stop writing | 199 | * @file: the file to which we will stop writing |
@@ -227,10 +219,9 @@ void drop_file_write_access(struct file *file) | |||
227 | } | 219 | } |
228 | EXPORT_SYMBOL_GPL(drop_file_write_access); | 220 | EXPORT_SYMBOL_GPL(drop_file_write_access); |
229 | 221 | ||
230 | /* __fput is called from task context when aio completion releases the last | 222 | /* the real guts of fput() - releasing the last reference to file |
231 | * last use of a struct file *. Do not use otherwise. | ||
232 | */ | 223 | */ |
233 | void __fput(struct file *file) | 224 | static void __fput(struct file *file) |
234 | { | 225 | { |
235 | struct dentry *dentry = file->f_path.dentry; | 226 | struct dentry *dentry = file->f_path.dentry; |
236 | struct vfsmount *mnt = file->f_path.mnt; | 227 | struct vfsmount *mnt = file->f_path.mnt; |
@@ -268,6 +259,14 @@ void __fput(struct file *file) | |||
268 | mntput(mnt); | 259 | mntput(mnt); |
269 | } | 260 | } |
270 | 261 | ||
262 | void fput(struct file *file) | ||
263 | { | ||
264 | if (atomic_long_dec_and_test(&file->f_count)) | ||
265 | __fput(file); | ||
266 | } | ||
267 | |||
268 | EXPORT_SYMBOL(fput); | ||
269 | |||
271 | struct file *fget(unsigned int fd) | 270 | struct file *fget(unsigned int fd) |
272 | { | 271 | { |
273 | struct file *file; | 272 | struct file *file; |
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index aee049cb9f84..0ec7bb2c95c6 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c | |||
@@ -57,6 +57,8 @@ const struct inode_operations vxfs_dir_inode_ops = { | |||
57 | }; | 57 | }; |
58 | 58 | ||
59 | const struct file_operations vxfs_dir_operations = { | 59 | const struct file_operations vxfs_dir_operations = { |
60 | .llseek = generic_file_llseek, | ||
61 | .read = generic_read_dir, | ||
60 | .readdir = vxfs_readdir, | 62 | .readdir = vxfs_readdir, |
61 | }; | 63 | }; |
62 | 64 | ||
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 408a7877b79d..1d1088f48bc2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -398,11 +398,11 @@ static void inode_wait_for_writeback(struct inode *inode) | |||
398 | wait_queue_head_t *wqh; | 398 | wait_queue_head_t *wqh; |
399 | 399 | ||
400 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); | 400 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); |
401 | do { | 401 | while (inode->i_state & I_SYNC) { |
402 | spin_unlock(&inode_lock); | 402 | spin_unlock(&inode_lock); |
403 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); | 403 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); |
404 | spin_lock(&inode_lock); | 404 | spin_lock(&inode_lock); |
405 | } while (inode->i_state & I_SYNC); | 405 | } |
406 | } | 406 | } |
407 | 407 | ||
408 | /* | 408 | /* |
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index 1e1f286dd70e..4a8eb31c5338 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c | |||
@@ -103,7 +103,7 @@ static struct fscache_object *fscache_objlist_lookup(loff_t *_pos) | |||
103 | /* banners (can't represent line 0 by pos 0 as that would involve | 103 | /* banners (can't represent line 0 by pos 0 as that would involve |
104 | * returning a NULL pointer) */ | 104 | * returning a NULL pointer) */ |
105 | if (pos == 0) | 105 | if (pos == 0) |
106 | return (struct fscache_object *) ++(*_pos); | 106 | return (struct fscache_object *)(long)++(*_pos); |
107 | if (pos < 3) | 107 | if (pos < 3) |
108 | return (struct fscache_object *)pos; | 108 | return (struct fscache_object *)pos; |
109 | 109 | ||
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index eb7e9423691f..9424796d6634 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -16,8 +16,12 @@ | |||
16 | #include <linux/pagemap.h> | 16 | #include <linux/pagemap.h> |
17 | #include <linux/file.h> | 17 | #include <linux/file.h> |
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/pipe_fs_i.h> | ||
20 | #include <linux/swap.h> | ||
21 | #include <linux/splice.h> | ||
19 | 22 | ||
20 | MODULE_ALIAS_MISCDEV(FUSE_MINOR); | 23 | MODULE_ALIAS_MISCDEV(FUSE_MINOR); |
24 | MODULE_ALIAS("devname:fuse"); | ||
21 | 25 | ||
22 | static struct kmem_cache *fuse_req_cachep; | 26 | static struct kmem_cache *fuse_req_cachep; |
23 | 27 | ||
@@ -498,6 +502,9 @@ struct fuse_copy_state { | |||
498 | int write; | 502 | int write; |
499 | struct fuse_req *req; | 503 | struct fuse_req *req; |
500 | const struct iovec *iov; | 504 | const struct iovec *iov; |
505 | struct pipe_buffer *pipebufs; | ||
506 | struct pipe_buffer *currbuf; | ||
507 | struct pipe_inode_info *pipe; | ||
501 | unsigned long nr_segs; | 508 | unsigned long nr_segs; |
502 | unsigned long seglen; | 509 | unsigned long seglen; |
503 | unsigned long addr; | 510 | unsigned long addr; |
@@ -505,16 +512,16 @@ struct fuse_copy_state { | |||
505 | void *mapaddr; | 512 | void *mapaddr; |
506 | void *buf; | 513 | void *buf; |
507 | unsigned len; | 514 | unsigned len; |
515 | unsigned move_pages:1; | ||
508 | }; | 516 | }; |
509 | 517 | ||
510 | static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc, | 518 | static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc, |
511 | int write, struct fuse_req *req, | 519 | int write, |
512 | const struct iovec *iov, unsigned long nr_segs) | 520 | const struct iovec *iov, unsigned long nr_segs) |
513 | { | 521 | { |
514 | memset(cs, 0, sizeof(*cs)); | 522 | memset(cs, 0, sizeof(*cs)); |
515 | cs->fc = fc; | 523 | cs->fc = fc; |
516 | cs->write = write; | 524 | cs->write = write; |
517 | cs->req = req; | ||
518 | cs->iov = iov; | 525 | cs->iov = iov; |
519 | cs->nr_segs = nr_segs; | 526 | cs->nr_segs = nr_segs; |
520 | } | 527 | } |
@@ -522,7 +529,18 @@ static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc, | |||
522 | /* Unmap and put previous page of userspace buffer */ | 529 | /* Unmap and put previous page of userspace buffer */ |
523 | static void fuse_copy_finish(struct fuse_copy_state *cs) | 530 | static void fuse_copy_finish(struct fuse_copy_state *cs) |
524 | { | 531 | { |
525 | if (cs->mapaddr) { | 532 | if (cs->currbuf) { |
533 | struct pipe_buffer *buf = cs->currbuf; | ||
534 | |||
535 | if (!cs->write) { | ||
536 | buf->ops->unmap(cs->pipe, buf, cs->mapaddr); | ||
537 | } else { | ||
538 | kunmap_atomic(cs->mapaddr, KM_USER0); | ||
539 | buf->len = PAGE_SIZE - cs->len; | ||
540 | } | ||
541 | cs->currbuf = NULL; | ||
542 | cs->mapaddr = NULL; | ||
543 | } else if (cs->mapaddr) { | ||
526 | kunmap_atomic(cs->mapaddr, KM_USER0); | 544 | kunmap_atomic(cs->mapaddr, KM_USER0); |
527 | if (cs->write) { | 545 | if (cs->write) { |
528 | flush_dcache_page(cs->pg); | 546 | flush_dcache_page(cs->pg); |
@@ -544,26 +562,61 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) | |||
544 | 562 | ||
545 | unlock_request(cs->fc, cs->req); | 563 | unlock_request(cs->fc, cs->req); |
546 | fuse_copy_finish(cs); | 564 | fuse_copy_finish(cs); |
547 | if (!cs->seglen) { | 565 | if (cs->pipebufs) { |
548 | BUG_ON(!cs->nr_segs); | 566 | struct pipe_buffer *buf = cs->pipebufs; |
549 | cs->seglen = cs->iov[0].iov_len; | 567 | |
550 | cs->addr = (unsigned long) cs->iov[0].iov_base; | 568 | if (!cs->write) { |
551 | cs->iov++; | 569 | err = buf->ops->confirm(cs->pipe, buf); |
552 | cs->nr_segs--; | 570 | if (err) |
571 | return err; | ||
572 | |||
573 | BUG_ON(!cs->nr_segs); | ||
574 | cs->currbuf = buf; | ||
575 | cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); | ||
576 | cs->len = buf->len; | ||
577 | cs->buf = cs->mapaddr + buf->offset; | ||
578 | cs->pipebufs++; | ||
579 | cs->nr_segs--; | ||
580 | } else { | ||
581 | struct page *page; | ||
582 | |||
583 | if (cs->nr_segs == cs->pipe->buffers) | ||
584 | return -EIO; | ||
585 | |||
586 | page = alloc_page(GFP_HIGHUSER); | ||
587 | if (!page) | ||
588 | return -ENOMEM; | ||
589 | |||
590 | buf->page = page; | ||
591 | buf->offset = 0; | ||
592 | buf->len = 0; | ||
593 | |||
594 | cs->currbuf = buf; | ||
595 | cs->mapaddr = kmap_atomic(page, KM_USER0); | ||
596 | cs->buf = cs->mapaddr; | ||
597 | cs->len = PAGE_SIZE; | ||
598 | cs->pipebufs++; | ||
599 | cs->nr_segs++; | ||
600 | } | ||
601 | } else { | ||
602 | if (!cs->seglen) { | ||
603 | BUG_ON(!cs->nr_segs); | ||
604 | cs->seglen = cs->iov[0].iov_len; | ||
605 | cs->addr = (unsigned long) cs->iov[0].iov_base; | ||
606 | cs->iov++; | ||
607 | cs->nr_segs--; | ||
608 | } | ||
609 | err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg); | ||
610 | if (err < 0) | ||
611 | return err; | ||
612 | BUG_ON(err != 1); | ||
613 | offset = cs->addr % PAGE_SIZE; | ||
614 | cs->mapaddr = kmap_atomic(cs->pg, KM_USER0); | ||
615 | cs->buf = cs->mapaddr + offset; | ||
616 | cs->len = min(PAGE_SIZE - offset, cs->seglen); | ||
617 | cs->seglen -= cs->len; | ||
618 | cs->addr += cs->len; | ||
553 | } | 619 | } |
554 | down_read(¤t->mm->mmap_sem); | ||
555 | err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0, | ||
556 | &cs->pg, NULL); | ||
557 | up_read(¤t->mm->mmap_sem); | ||
558 | if (err < 0) | ||
559 | return err; | ||
560 | BUG_ON(err != 1); | ||
561 | offset = cs->addr % PAGE_SIZE; | ||
562 | cs->mapaddr = kmap_atomic(cs->pg, KM_USER0); | ||
563 | cs->buf = cs->mapaddr + offset; | ||
564 | cs->len = min(PAGE_SIZE - offset, cs->seglen); | ||
565 | cs->seglen -= cs->len; | ||
566 | cs->addr += cs->len; | ||
567 | 620 | ||
568 | return lock_request(cs->fc, cs->req); | 621 | return lock_request(cs->fc, cs->req); |
569 | } | 622 | } |
@@ -585,23 +638,178 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) | |||
585 | return ncpy; | 638 | return ncpy; |
586 | } | 639 | } |
587 | 640 | ||
641 | static int fuse_check_page(struct page *page) | ||
642 | { | ||
643 | if (page_mapcount(page) || | ||
644 | page->mapping != NULL || | ||
645 | page_count(page) != 1 || | ||
646 | (page->flags & PAGE_FLAGS_CHECK_AT_PREP & | ||
647 | ~(1 << PG_locked | | ||
648 | 1 << PG_referenced | | ||
649 | 1 << PG_uptodate | | ||
650 | 1 << PG_lru | | ||
651 | 1 << PG_active | | ||
652 | 1 << PG_reclaim))) { | ||
653 | printk(KERN_WARNING "fuse: trying to steal weird page\n"); | ||
654 | printk(KERN_WARNING " page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping); | ||
655 | return 1; | ||
656 | } | ||
657 | return 0; | ||
658 | } | ||
659 | |||
660 | static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) | ||
661 | { | ||
662 | int err; | ||
663 | struct page *oldpage = *pagep; | ||
664 | struct page *newpage; | ||
665 | struct pipe_buffer *buf = cs->pipebufs; | ||
666 | struct address_space *mapping; | ||
667 | pgoff_t index; | ||
668 | |||
669 | unlock_request(cs->fc, cs->req); | ||
670 | fuse_copy_finish(cs); | ||
671 | |||
672 | err = buf->ops->confirm(cs->pipe, buf); | ||
673 | if (err) | ||
674 | return err; | ||
675 | |||
676 | BUG_ON(!cs->nr_segs); | ||
677 | cs->currbuf = buf; | ||
678 | cs->len = buf->len; | ||
679 | cs->pipebufs++; | ||
680 | cs->nr_segs--; | ||
681 | |||
682 | if (cs->len != PAGE_SIZE) | ||
683 | goto out_fallback; | ||
684 | |||
685 | if (buf->ops->steal(cs->pipe, buf) != 0) | ||
686 | goto out_fallback; | ||
687 | |||
688 | newpage = buf->page; | ||
689 | |||
690 | if (WARN_ON(!PageUptodate(newpage))) | ||
691 | return -EIO; | ||
692 | |||
693 | ClearPageMappedToDisk(newpage); | ||
694 | |||
695 | if (fuse_check_page(newpage) != 0) | ||
696 | goto out_fallback_unlock; | ||
697 | |||
698 | mapping = oldpage->mapping; | ||
699 | index = oldpage->index; | ||
700 | |||
701 | /* | ||
702 | * This is a new and locked page, it shouldn't be mapped or | ||
703 | * have any special flags on it | ||
704 | */ | ||
705 | if (WARN_ON(page_mapped(oldpage))) | ||
706 | goto out_fallback_unlock; | ||
707 | if (WARN_ON(page_has_private(oldpage))) | ||
708 | goto out_fallback_unlock; | ||
709 | if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage))) | ||
710 | goto out_fallback_unlock; | ||
711 | if (WARN_ON(PageMlocked(oldpage))) | ||
712 | goto out_fallback_unlock; | ||
713 | |||
714 | remove_from_page_cache(oldpage); | ||
715 | page_cache_release(oldpage); | ||
716 | |||
717 | err = add_to_page_cache_locked(newpage, mapping, index, GFP_KERNEL); | ||
718 | if (err) { | ||
719 | printk(KERN_WARNING "fuse_try_move_page: failed to add page"); | ||
720 | goto out_fallback_unlock; | ||
721 | } | ||
722 | page_cache_get(newpage); | ||
723 | |||
724 | if (!(buf->flags & PIPE_BUF_FLAG_LRU)) | ||
725 | lru_cache_add_file(newpage); | ||
726 | |||
727 | err = 0; | ||
728 | spin_lock(&cs->fc->lock); | ||
729 | if (cs->req->aborted) | ||
730 | err = -ENOENT; | ||
731 | else | ||
732 | *pagep = newpage; | ||
733 | spin_unlock(&cs->fc->lock); | ||
734 | |||
735 | if (err) { | ||
736 | unlock_page(newpage); | ||
737 | page_cache_release(newpage); | ||
738 | return err; | ||
739 | } | ||
740 | |||
741 | unlock_page(oldpage); | ||
742 | page_cache_release(oldpage); | ||
743 | cs->len = 0; | ||
744 | |||
745 | return 0; | ||
746 | |||
747 | out_fallback_unlock: | ||
748 | unlock_page(newpage); | ||
749 | out_fallback: | ||
750 | cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); | ||
751 | cs->buf = cs->mapaddr + buf->offset; | ||
752 | |||
753 | err = lock_request(cs->fc, cs->req); | ||
754 | if (err) | ||
755 | return err; | ||
756 | |||
757 | return 1; | ||
758 | } | ||
759 | |||
760 | static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, | ||
761 | unsigned offset, unsigned count) | ||
762 | { | ||
763 | struct pipe_buffer *buf; | ||
764 | |||
765 | if (cs->nr_segs == cs->pipe->buffers) | ||
766 | return -EIO; | ||
767 | |||
768 | unlock_request(cs->fc, cs->req); | ||
769 | fuse_copy_finish(cs); | ||
770 | |||
771 | buf = cs->pipebufs; | ||
772 | page_cache_get(page); | ||
773 | buf->page = page; | ||
774 | buf->offset = offset; | ||
775 | buf->len = count; | ||
776 | |||
777 | cs->pipebufs++; | ||
778 | cs->nr_segs++; | ||
779 | cs->len = 0; | ||
780 | |||
781 | return 0; | ||
782 | } | ||
783 | |||
588 | /* | 784 | /* |
589 | * Copy a page in the request to/from the userspace buffer. Must be | 785 | * Copy a page in the request to/from the userspace buffer. Must be |
590 | * done atomically | 786 | * done atomically |
591 | */ | 787 | */ |
592 | static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, | 788 | static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, |
593 | unsigned offset, unsigned count, int zeroing) | 789 | unsigned offset, unsigned count, int zeroing) |
594 | { | 790 | { |
791 | int err; | ||
792 | struct page *page = *pagep; | ||
793 | |||
595 | if (page && zeroing && count < PAGE_SIZE) { | 794 | if (page && zeroing && count < PAGE_SIZE) { |
596 | void *mapaddr = kmap_atomic(page, KM_USER1); | 795 | void *mapaddr = kmap_atomic(page, KM_USER1); |
597 | memset(mapaddr, 0, PAGE_SIZE); | 796 | memset(mapaddr, 0, PAGE_SIZE); |
598 | kunmap_atomic(mapaddr, KM_USER1); | 797 | kunmap_atomic(mapaddr, KM_USER1); |
599 | } | 798 | } |
600 | while (count) { | 799 | while (count) { |
601 | if (!cs->len) { | 800 | if (cs->write && cs->pipebufs && page) { |
602 | int err = fuse_copy_fill(cs); | 801 | return fuse_ref_page(cs, page, offset, count); |
603 | if (err) | 802 | } else if (!cs->len) { |
604 | return err; | 803 | if (cs->move_pages && page && |
804 | offset == 0 && count == PAGE_SIZE) { | ||
805 | err = fuse_try_move_page(cs, pagep); | ||
806 | if (err <= 0) | ||
807 | return err; | ||
808 | } else { | ||
809 | err = fuse_copy_fill(cs); | ||
810 | if (err) | ||
811 | return err; | ||
812 | } | ||
605 | } | 813 | } |
606 | if (page) { | 814 | if (page) { |
607 | void *mapaddr = kmap_atomic(page, KM_USER1); | 815 | void *mapaddr = kmap_atomic(page, KM_USER1); |
@@ -626,8 +834,10 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, | |||
626 | unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset); | 834 | unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset); |
627 | 835 | ||
628 | for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { | 836 | for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { |
629 | struct page *page = req->pages[i]; | 837 | int err; |
630 | int err = fuse_copy_page(cs, page, offset, count, zeroing); | 838 | |
839 | err = fuse_copy_page(cs, &req->pages[i], offset, count, | ||
840 | zeroing); | ||
631 | if (err) | 841 | if (err) |
632 | return err; | 842 | return err; |
633 | 843 | ||
@@ -704,11 +914,10 @@ __acquires(&fc->lock) | |||
704 | * | 914 | * |
705 | * Called with fc->lock held, releases it | 915 | * Called with fc->lock held, releases it |
706 | */ | 916 | */ |
707 | static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req, | 917 | static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, |
708 | const struct iovec *iov, unsigned long nr_segs) | 918 | size_t nbytes, struct fuse_req *req) |
709 | __releases(&fc->lock) | 919 | __releases(&fc->lock) |
710 | { | 920 | { |
711 | struct fuse_copy_state cs; | ||
712 | struct fuse_in_header ih; | 921 | struct fuse_in_header ih; |
713 | struct fuse_interrupt_in arg; | 922 | struct fuse_interrupt_in arg; |
714 | unsigned reqsize = sizeof(ih) + sizeof(arg); | 923 | unsigned reqsize = sizeof(ih) + sizeof(arg); |
@@ -724,14 +933,13 @@ __releases(&fc->lock) | |||
724 | arg.unique = req->in.h.unique; | 933 | arg.unique = req->in.h.unique; |
725 | 934 | ||
726 | spin_unlock(&fc->lock); | 935 | spin_unlock(&fc->lock); |
727 | if (iov_length(iov, nr_segs) < reqsize) | 936 | if (nbytes < reqsize) |
728 | return -EINVAL; | 937 | return -EINVAL; |
729 | 938 | ||
730 | fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs); | 939 | err = fuse_copy_one(cs, &ih, sizeof(ih)); |
731 | err = fuse_copy_one(&cs, &ih, sizeof(ih)); | ||
732 | if (!err) | 940 | if (!err) |
733 | err = fuse_copy_one(&cs, &arg, sizeof(arg)); | 941 | err = fuse_copy_one(cs, &arg, sizeof(arg)); |
734 | fuse_copy_finish(&cs); | 942 | fuse_copy_finish(cs); |
735 | 943 | ||
736 | return err ? err : reqsize; | 944 | return err ? err : reqsize; |
737 | } | 945 | } |
@@ -745,18 +953,13 @@ __releases(&fc->lock) | |||
745 | * request_end(). Otherwise add it to the processing list, and set | 953 | * request_end(). Otherwise add it to the processing list, and set |
746 | * the 'sent' flag. | 954 | * the 'sent' flag. |
747 | */ | 955 | */ |
748 | static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, | 956 | static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, |
749 | unsigned long nr_segs, loff_t pos) | 957 | struct fuse_copy_state *cs, size_t nbytes) |
750 | { | 958 | { |
751 | int err; | 959 | int err; |
752 | struct fuse_req *req; | 960 | struct fuse_req *req; |
753 | struct fuse_in *in; | 961 | struct fuse_in *in; |
754 | struct fuse_copy_state cs; | ||
755 | unsigned reqsize; | 962 | unsigned reqsize; |
756 | struct file *file = iocb->ki_filp; | ||
757 | struct fuse_conn *fc = fuse_get_conn(file); | ||
758 | if (!fc) | ||
759 | return -EPERM; | ||
760 | 963 | ||
761 | restart: | 964 | restart: |
762 | spin_lock(&fc->lock); | 965 | spin_lock(&fc->lock); |
@@ -776,7 +979,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, | |||
776 | if (!list_empty(&fc->interrupts)) { | 979 | if (!list_empty(&fc->interrupts)) { |
777 | req = list_entry(fc->interrupts.next, struct fuse_req, | 980 | req = list_entry(fc->interrupts.next, struct fuse_req, |
778 | intr_entry); | 981 | intr_entry); |
779 | return fuse_read_interrupt(fc, req, iov, nr_segs); | 982 | return fuse_read_interrupt(fc, cs, nbytes, req); |
780 | } | 983 | } |
781 | 984 | ||
782 | req = list_entry(fc->pending.next, struct fuse_req, list); | 985 | req = list_entry(fc->pending.next, struct fuse_req, list); |
@@ -786,7 +989,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, | |||
786 | in = &req->in; | 989 | in = &req->in; |
787 | reqsize = in->h.len; | 990 | reqsize = in->h.len; |
788 | /* If request is too large, reply with an error and restart the read */ | 991 | /* If request is too large, reply with an error and restart the read */ |
789 | if (iov_length(iov, nr_segs) < reqsize) { | 992 | if (nbytes < reqsize) { |
790 | req->out.h.error = -EIO; | 993 | req->out.h.error = -EIO; |
791 | /* SETXATTR is special, since it may contain too large data */ | 994 | /* SETXATTR is special, since it may contain too large data */ |
792 | if (in->h.opcode == FUSE_SETXATTR) | 995 | if (in->h.opcode == FUSE_SETXATTR) |
@@ -795,12 +998,12 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, | |||
795 | goto restart; | 998 | goto restart; |
796 | } | 999 | } |
797 | spin_unlock(&fc->lock); | 1000 | spin_unlock(&fc->lock); |
798 | fuse_copy_init(&cs, fc, 1, req, iov, nr_segs); | 1001 | cs->req = req; |
799 | err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); | 1002 | err = fuse_copy_one(cs, &in->h, sizeof(in->h)); |
800 | if (!err) | 1003 | if (!err) |
801 | err = fuse_copy_args(&cs, in->numargs, in->argpages, | 1004 | err = fuse_copy_args(cs, in->numargs, in->argpages, |
802 | (struct fuse_arg *) in->args, 0); | 1005 | (struct fuse_arg *) in->args, 0); |
803 | fuse_copy_finish(&cs); | 1006 | fuse_copy_finish(cs); |
804 | spin_lock(&fc->lock); | 1007 | spin_lock(&fc->lock); |
805 | req->locked = 0; | 1008 | req->locked = 0; |
806 | if (req->aborted) { | 1009 | if (req->aborted) { |
@@ -828,6 +1031,110 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, | |||
828 | return err; | 1031 | return err; |
829 | } | 1032 | } |
830 | 1033 | ||
1034 | static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, | ||
1035 | unsigned long nr_segs, loff_t pos) | ||
1036 | { | ||
1037 | struct fuse_copy_state cs; | ||
1038 | struct file *file = iocb->ki_filp; | ||
1039 | struct fuse_conn *fc = fuse_get_conn(file); | ||
1040 | if (!fc) | ||
1041 | return -EPERM; | ||
1042 | |||
1043 | fuse_copy_init(&cs, fc, 1, iov, nr_segs); | ||
1044 | |||
1045 | return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs)); | ||
1046 | } | ||
1047 | |||
1048 | static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe, | ||
1049 | struct pipe_buffer *buf) | ||
1050 | { | ||
1051 | return 1; | ||
1052 | } | ||
1053 | |||
1054 | static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = { | ||
1055 | .can_merge = 0, | ||
1056 | .map = generic_pipe_buf_map, | ||
1057 | .unmap = generic_pipe_buf_unmap, | ||
1058 | .confirm = generic_pipe_buf_confirm, | ||
1059 | .release = generic_pipe_buf_release, | ||
1060 | .steal = fuse_dev_pipe_buf_steal, | ||
1061 | .get = generic_pipe_buf_get, | ||
1062 | }; | ||
1063 | |||
1064 | static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, | ||
1065 | struct pipe_inode_info *pipe, | ||
1066 | size_t len, unsigned int flags) | ||
1067 | { | ||
1068 | int ret; | ||
1069 | int page_nr = 0; | ||
1070 | int do_wakeup = 0; | ||
1071 | struct pipe_buffer *bufs; | ||
1072 | struct fuse_copy_state cs; | ||
1073 | struct fuse_conn *fc = fuse_get_conn(in); | ||
1074 | if (!fc) | ||
1075 | return -EPERM; | ||
1076 | |||
1077 | bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL); | ||
1078 | if (!bufs) | ||
1079 | return -ENOMEM; | ||
1080 | |||
1081 | fuse_copy_init(&cs, fc, 1, NULL, 0); | ||
1082 | cs.pipebufs = bufs; | ||
1083 | cs.pipe = pipe; | ||
1084 | ret = fuse_dev_do_read(fc, in, &cs, len); | ||
1085 | if (ret < 0) | ||
1086 | goto out; | ||
1087 | |||
1088 | ret = 0; | ||
1089 | pipe_lock(pipe); | ||
1090 | |||
1091 | if (!pipe->readers) { | ||
1092 | send_sig(SIGPIPE, current, 0); | ||
1093 | if (!ret) | ||
1094 | ret = -EPIPE; | ||
1095 | goto out_unlock; | ||
1096 | } | ||
1097 | |||
1098 | if (pipe->nrbufs + cs.nr_segs > pipe->buffers) { | ||
1099 | ret = -EIO; | ||
1100 | goto out_unlock; | ||
1101 | } | ||
1102 | |||
1103 | while (page_nr < cs.nr_segs) { | ||
1104 | int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); | ||
1105 | struct pipe_buffer *buf = pipe->bufs + newbuf; | ||
1106 | |||
1107 | buf->page = bufs[page_nr].page; | ||
1108 | buf->offset = bufs[page_nr].offset; | ||
1109 | buf->len = bufs[page_nr].len; | ||
1110 | buf->ops = &fuse_dev_pipe_buf_ops; | ||
1111 | |||
1112 | pipe->nrbufs++; | ||
1113 | page_nr++; | ||
1114 | ret += buf->len; | ||
1115 | |||
1116 | if (pipe->inode) | ||
1117 | do_wakeup = 1; | ||
1118 | } | ||
1119 | |||
1120 | out_unlock: | ||
1121 | pipe_unlock(pipe); | ||
1122 | |||
1123 | if (do_wakeup) { | ||
1124 | smp_mb(); | ||
1125 | if (waitqueue_active(&pipe->wait)) | ||
1126 | wake_up_interruptible(&pipe->wait); | ||
1127 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
1128 | } | ||
1129 | |||
1130 | out: | ||
1131 | for (; page_nr < cs.nr_segs; page_nr++) | ||
1132 | page_cache_release(bufs[page_nr].page); | ||
1133 | |||
1134 | kfree(bufs); | ||
1135 | return ret; | ||
1136 | } | ||
1137 | |||
831 | static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size, | 1138 | static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size, |
832 | struct fuse_copy_state *cs) | 1139 | struct fuse_copy_state *cs) |
833 | { | 1140 | { |
@@ -987,23 +1294,17 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, | |||
987 | * it from the list and copy the rest of the buffer to the request. | 1294 | * it from the list and copy the rest of the buffer to the request. |
988 | * The request is finished by calling request_end() | 1295 | * The request is finished by calling request_end() |
989 | */ | 1296 | */ |
990 | static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, | 1297 | static ssize_t fuse_dev_do_write(struct fuse_conn *fc, |
991 | unsigned long nr_segs, loff_t pos) | 1298 | struct fuse_copy_state *cs, size_t nbytes) |
992 | { | 1299 | { |
993 | int err; | 1300 | int err; |
994 | size_t nbytes = iov_length(iov, nr_segs); | ||
995 | struct fuse_req *req; | 1301 | struct fuse_req *req; |
996 | struct fuse_out_header oh; | 1302 | struct fuse_out_header oh; |
997 | struct fuse_copy_state cs; | ||
998 | struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); | ||
999 | if (!fc) | ||
1000 | return -EPERM; | ||
1001 | 1303 | ||
1002 | fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs); | ||
1003 | if (nbytes < sizeof(struct fuse_out_header)) | 1304 | if (nbytes < sizeof(struct fuse_out_header)) |
1004 | return -EINVAL; | 1305 | return -EINVAL; |
1005 | 1306 | ||
1006 | err = fuse_copy_one(&cs, &oh, sizeof(oh)); | 1307 | err = fuse_copy_one(cs, &oh, sizeof(oh)); |
1007 | if (err) | 1308 | if (err) |
1008 | goto err_finish; | 1309 | goto err_finish; |
1009 | 1310 | ||
@@ -1016,7 +1317,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, | |||
1016 | * and error contains notification code. | 1317 | * and error contains notification code. |
1017 | */ | 1318 | */ |
1018 | if (!oh.unique) { | 1319 | if (!oh.unique) { |
1019 | err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), &cs); | 1320 | err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs); |
1020 | return err ? err : nbytes; | 1321 | return err ? err : nbytes; |
1021 | } | 1322 | } |
1022 | 1323 | ||
@@ -1035,7 +1336,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, | |||
1035 | 1336 | ||
1036 | if (req->aborted) { | 1337 | if (req->aborted) { |
1037 | spin_unlock(&fc->lock); | 1338 | spin_unlock(&fc->lock); |
1038 | fuse_copy_finish(&cs); | 1339 | fuse_copy_finish(cs); |
1039 | spin_lock(&fc->lock); | 1340 | spin_lock(&fc->lock); |
1040 | request_end(fc, req); | 1341 | request_end(fc, req); |
1041 | return -ENOENT; | 1342 | return -ENOENT; |
@@ -1052,7 +1353,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, | |||
1052 | queue_interrupt(fc, req); | 1353 | queue_interrupt(fc, req); |
1053 | 1354 | ||
1054 | spin_unlock(&fc->lock); | 1355 | spin_unlock(&fc->lock); |
1055 | fuse_copy_finish(&cs); | 1356 | fuse_copy_finish(cs); |
1056 | return nbytes; | 1357 | return nbytes; |
1057 | } | 1358 | } |
1058 | 1359 | ||
@@ -1060,11 +1361,13 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, | |||
1060 | list_move(&req->list, &fc->io); | 1361 | list_move(&req->list, &fc->io); |
1061 | req->out.h = oh; | 1362 | req->out.h = oh; |
1062 | req->locked = 1; | 1363 | req->locked = 1; |
1063 | cs.req = req; | 1364 | cs->req = req; |
1365 | if (!req->out.page_replace) | ||
1366 | cs->move_pages = 0; | ||
1064 | spin_unlock(&fc->lock); | 1367 | spin_unlock(&fc->lock); |
1065 | 1368 | ||
1066 | err = copy_out_args(&cs, &req->out, nbytes); | 1369 | err = copy_out_args(cs, &req->out, nbytes); |
1067 | fuse_copy_finish(&cs); | 1370 | fuse_copy_finish(cs); |
1068 | 1371 | ||
1069 | spin_lock(&fc->lock); | 1372 | spin_lock(&fc->lock); |
1070 | req->locked = 0; | 1373 | req->locked = 0; |
@@ -1080,10 +1383,101 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, | |||
1080 | err_unlock: | 1383 | err_unlock: |
1081 | spin_unlock(&fc->lock); | 1384 | spin_unlock(&fc->lock); |
1082 | err_finish: | 1385 | err_finish: |
1083 | fuse_copy_finish(&cs); | 1386 | fuse_copy_finish(cs); |
1084 | return err; | 1387 | return err; |
1085 | } | 1388 | } |
1086 | 1389 | ||
1390 | static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, | ||
1391 | unsigned long nr_segs, loff_t pos) | ||
1392 | { | ||
1393 | struct fuse_copy_state cs; | ||
1394 | struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); | ||
1395 | if (!fc) | ||
1396 | return -EPERM; | ||
1397 | |||
1398 | fuse_copy_init(&cs, fc, 0, iov, nr_segs); | ||
1399 | |||
1400 | return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs)); | ||
1401 | } | ||
1402 | |||
1403 | static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, | ||
1404 | struct file *out, loff_t *ppos, | ||
1405 | size_t len, unsigned int flags) | ||
1406 | { | ||
1407 | unsigned nbuf; | ||
1408 | unsigned idx; | ||
1409 | struct pipe_buffer *bufs; | ||
1410 | struct fuse_copy_state cs; | ||
1411 | struct fuse_conn *fc; | ||
1412 | size_t rem; | ||
1413 | ssize_t ret; | ||
1414 | |||
1415 | fc = fuse_get_conn(out); | ||
1416 | if (!fc) | ||
1417 | return -EPERM; | ||
1418 | |||
1419 | bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL); | ||
1420 | if (!bufs) | ||
1421 | return -ENOMEM; | ||
1422 | |||
1423 | pipe_lock(pipe); | ||
1424 | nbuf = 0; | ||
1425 | rem = 0; | ||
1426 | for (idx = 0; idx < pipe->nrbufs && rem < len; idx++) | ||
1427 | rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len; | ||
1428 | |||
1429 | ret = -EINVAL; | ||
1430 | if (rem < len) { | ||
1431 | pipe_unlock(pipe); | ||
1432 | goto out; | ||
1433 | } | ||
1434 | |||
1435 | rem = len; | ||
1436 | while (rem) { | ||
1437 | struct pipe_buffer *ibuf; | ||
1438 | struct pipe_buffer *obuf; | ||
1439 | |||
1440 | BUG_ON(nbuf >= pipe->buffers); | ||
1441 | BUG_ON(!pipe->nrbufs); | ||
1442 | ibuf = &pipe->bufs[pipe->curbuf]; | ||
1443 | obuf = &bufs[nbuf]; | ||
1444 | |||
1445 | if (rem >= ibuf->len) { | ||
1446 | *obuf = *ibuf; | ||
1447 | ibuf->ops = NULL; | ||
1448 | pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); | ||
1449 | pipe->nrbufs--; | ||
1450 | } else { | ||
1451 | ibuf->ops->get(pipe, ibuf); | ||
1452 | *obuf = *ibuf; | ||
1453 | obuf->flags &= ~PIPE_BUF_FLAG_GIFT; | ||
1454 | obuf->len = rem; | ||
1455 | ibuf->offset += obuf->len; | ||
1456 | ibuf->len -= obuf->len; | ||
1457 | } | ||
1458 | nbuf++; | ||
1459 | rem -= obuf->len; | ||
1460 | } | ||
1461 | pipe_unlock(pipe); | ||
1462 | |||
1463 | fuse_copy_init(&cs, fc, 0, NULL, nbuf); | ||
1464 | cs.pipebufs = bufs; | ||
1465 | cs.pipe = pipe; | ||
1466 | |||
1467 | if (flags & SPLICE_F_MOVE) | ||
1468 | cs.move_pages = 1; | ||
1469 | |||
1470 | ret = fuse_dev_do_write(fc, &cs, len); | ||
1471 | |||
1472 | for (idx = 0; idx < nbuf; idx++) { | ||
1473 | struct pipe_buffer *buf = &bufs[idx]; | ||
1474 | buf->ops->release(pipe, buf); | ||
1475 | } | ||
1476 | out: | ||
1477 | kfree(bufs); | ||
1478 | return ret; | ||
1479 | } | ||
1480 | |||
1087 | static unsigned fuse_dev_poll(struct file *file, poll_table *wait) | 1481 | static unsigned fuse_dev_poll(struct file *file, poll_table *wait) |
1088 | { | 1482 | { |
1089 | unsigned mask = POLLOUT | POLLWRNORM; | 1483 | unsigned mask = POLLOUT | POLLWRNORM; |
@@ -1225,8 +1619,10 @@ const struct file_operations fuse_dev_operations = { | |||
1225 | .llseek = no_llseek, | 1619 | .llseek = no_llseek, |
1226 | .read = do_sync_read, | 1620 | .read = do_sync_read, |
1227 | .aio_read = fuse_dev_read, | 1621 | .aio_read = fuse_dev_read, |
1622 | .splice_read = fuse_dev_splice_read, | ||
1228 | .write = do_sync_write, | 1623 | .write = do_sync_write, |
1229 | .aio_write = fuse_dev_write, | 1624 | .aio_write = fuse_dev_write, |
1625 | .splice_write = fuse_dev_splice_write, | ||
1230 | .poll = fuse_dev_poll, | 1626 | .poll = fuse_dev_poll, |
1231 | .release = fuse_dev_release, | 1627 | .release = fuse_dev_release, |
1232 | .fasync = fuse_dev_fasync, | 1628 | .fasync = fuse_dev_fasync, |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 4787ae6c5c1c..3cdc5f78a406 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -1156,10 +1156,9 @@ static int fuse_dir_release(struct inode *inode, struct file *file) | |||
1156 | return 0; | 1156 | return 0; |
1157 | } | 1157 | } |
1158 | 1158 | ||
1159 | static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync) | 1159 | static int fuse_dir_fsync(struct file *file, int datasync) |
1160 | { | 1160 | { |
1161 | /* nfsd can call this with no file */ | 1161 | return fuse_fsync_common(file, datasync, 1); |
1162 | return file ? fuse_fsync_common(file, de, datasync, 1) : 0; | ||
1163 | } | 1162 | } |
1164 | 1163 | ||
1165 | static bool update_mtime(unsigned ivalid) | 1164 | static bool update_mtime(unsigned ivalid) |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a9f5e137f1d3..ada0adeb3bb5 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -351,10 +351,9 @@ static void fuse_sync_writes(struct inode *inode) | |||
351 | fuse_release_nowrite(inode); | 351 | fuse_release_nowrite(inode); |
352 | } | 352 | } |
353 | 353 | ||
354 | int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, | 354 | int fuse_fsync_common(struct file *file, int datasync, int isdir) |
355 | int isdir) | ||
356 | { | 355 | { |
357 | struct inode *inode = de->d_inode; | 356 | struct inode *inode = file->f_mapping->host; |
358 | struct fuse_conn *fc = get_fuse_conn(inode); | 357 | struct fuse_conn *fc = get_fuse_conn(inode); |
359 | struct fuse_file *ff = file->private_data; | 358 | struct fuse_file *ff = file->private_data; |
360 | struct fuse_req *req; | 359 | struct fuse_req *req; |
@@ -403,9 +402,9 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, | |||
403 | return err; | 402 | return err; |
404 | } | 403 | } |
405 | 404 | ||
406 | static int fuse_fsync(struct file *file, struct dentry *de, int datasync) | 405 | static int fuse_fsync(struct file *file, int datasync) |
407 | { | 406 | { |
408 | return fuse_fsync_common(file, de, datasync, 0); | 407 | return fuse_fsync_common(file, datasync, 0); |
409 | } | 408 | } |
410 | 409 | ||
411 | void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, | 410 | void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, |
@@ -517,17 +516,26 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) | |||
517 | int i; | 516 | int i; |
518 | size_t count = req->misc.read.in.size; | 517 | size_t count = req->misc.read.in.size; |
519 | size_t num_read = req->out.args[0].size; | 518 | size_t num_read = req->out.args[0].size; |
520 | struct inode *inode = req->pages[0]->mapping->host; | 519 | struct address_space *mapping = NULL; |
521 | 520 | ||
522 | /* | 521 | for (i = 0; mapping == NULL && i < req->num_pages; i++) |
523 | * Short read means EOF. If file size is larger, truncate it | 522 | mapping = req->pages[i]->mapping; |
524 | */ | ||
525 | if (!req->out.h.error && num_read < count) { | ||
526 | loff_t pos = page_offset(req->pages[0]) + num_read; | ||
527 | fuse_read_update_size(inode, pos, req->misc.read.attr_ver); | ||
528 | } | ||
529 | 523 | ||
530 | fuse_invalidate_attr(inode); /* atime changed */ | 524 | if (mapping) { |
525 | struct inode *inode = mapping->host; | ||
526 | |||
527 | /* | ||
528 | * Short read means EOF. If file size is larger, truncate it | ||
529 | */ | ||
530 | if (!req->out.h.error && num_read < count) { | ||
531 | loff_t pos; | ||
532 | |||
533 | pos = page_offset(req->pages[0]) + num_read; | ||
534 | fuse_read_update_size(inode, pos, | ||
535 | req->misc.read.attr_ver); | ||
536 | } | ||
537 | fuse_invalidate_attr(inode); /* atime changed */ | ||
538 | } | ||
531 | 539 | ||
532 | for (i = 0; i < req->num_pages; i++) { | 540 | for (i = 0; i < req->num_pages; i++) { |
533 | struct page *page = req->pages[i]; | 541 | struct page *page = req->pages[i]; |
@@ -536,6 +544,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) | |||
536 | else | 544 | else |
537 | SetPageError(page); | 545 | SetPageError(page); |
538 | unlock_page(page); | 546 | unlock_page(page); |
547 | page_cache_release(page); | ||
539 | } | 548 | } |
540 | if (req->ff) | 549 | if (req->ff) |
541 | fuse_file_put(req->ff); | 550 | fuse_file_put(req->ff); |
@@ -550,6 +559,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file) | |||
550 | 559 | ||
551 | req->out.argpages = 1; | 560 | req->out.argpages = 1; |
552 | req->out.page_zeroing = 1; | 561 | req->out.page_zeroing = 1; |
562 | req->out.page_replace = 1; | ||
553 | fuse_read_fill(req, file, pos, count, FUSE_READ); | 563 | fuse_read_fill(req, file, pos, count, FUSE_READ); |
554 | req->misc.read.attr_ver = fuse_get_attr_version(fc); | 564 | req->misc.read.attr_ver = fuse_get_attr_version(fc); |
555 | if (fc->async_read) { | 565 | if (fc->async_read) { |
@@ -589,6 +599,7 @@ static int fuse_readpages_fill(void *_data, struct page *page) | |||
589 | return PTR_ERR(req); | 599 | return PTR_ERR(req); |
590 | } | 600 | } |
591 | } | 601 | } |
602 | page_cache_get(page); | ||
592 | req->pages[req->num_pages] = page; | 603 | req->pages[req->num_pages] = page; |
593 | req->num_pages++; | 604 | req->num_pages++; |
594 | return 0; | 605 | return 0; |
@@ -994,10 +1005,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, | |||
994 | nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); | 1005 | nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); |
995 | npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1006 | npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; |
996 | npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); | 1007 | npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); |
997 | down_read(¤t->mm->mmap_sem); | 1008 | npages = get_user_pages_fast(user_addr, npages, !write, req->pages); |
998 | npages = get_user_pages(current, current->mm, user_addr, npages, !write, | ||
999 | 0, req->pages, NULL); | ||
1000 | up_read(¤t->mm->mmap_sem); | ||
1001 | if (npages < 0) | 1009 | if (npages < 0) |
1002 | return npages; | 1010 | return npages; |
1003 | 1011 | ||
@@ -1580,9 +1588,9 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, | |||
1580 | while (iov_iter_count(&ii)) { | 1588 | while (iov_iter_count(&ii)) { |
1581 | struct page *page = pages[page_idx++]; | 1589 | struct page *page = pages[page_idx++]; |
1582 | size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii)); | 1590 | size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii)); |
1583 | void *kaddr, *map; | 1591 | void *kaddr; |
1584 | 1592 | ||
1585 | kaddr = map = kmap(page); | 1593 | kaddr = kmap(page); |
1586 | 1594 | ||
1587 | while (todo) { | 1595 | while (todo) { |
1588 | char __user *uaddr = ii.iov->iov_base + ii.iov_offset; | 1596 | char __user *uaddr = ii.iov->iov_base + ii.iov_offset; |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 01cc462ff45d..8f309f04064e 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -177,6 +177,9 @@ struct fuse_out { | |||
177 | /** Zero partially or not copied pages */ | 177 | /** Zero partially or not copied pages */ |
178 | unsigned page_zeroing:1; | 178 | unsigned page_zeroing:1; |
179 | 179 | ||
180 | /** Pages may be replaced with new ones */ | ||
181 | unsigned page_replace:1; | ||
182 | |||
180 | /** Number or arguments */ | 183 | /** Number or arguments */ |
181 | unsigned numargs; | 184 | unsigned numargs; |
182 | 185 | ||
@@ -568,8 +571,7 @@ void fuse_release_common(struct file *file, int opcode); | |||
568 | /** | 571 | /** |
569 | * Send FSYNC or FSYNCDIR request | 572 | * Send FSYNC or FSYNCDIR request |
570 | */ | 573 | */ |
571 | int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, | 574 | int fuse_fsync_common(struct file *file, int datasync, int isdir); |
572 | int isdir); | ||
573 | 575 | ||
574 | /** | 576 | /** |
575 | * Notify poll wakeup | 577 | * Notify poll wakeup |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 9fb76b0a0485..48171f4c943d 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
@@ -236,10 +236,14 @@ static int gfs2_xattr_system_get(struct dentry *dentry, const char *name, | |||
236 | void *buffer, size_t size, int xtype) | 236 | void *buffer, size_t size, int xtype) |
237 | { | 237 | { |
238 | struct inode *inode = dentry->d_inode; | 238 | struct inode *inode = dentry->d_inode; |
239 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
239 | struct posix_acl *acl; | 240 | struct posix_acl *acl; |
240 | int type; | 241 | int type; |
241 | int error; | 242 | int error; |
242 | 243 | ||
244 | if (!sdp->sd_args.ar_posix_acl) | ||
245 | return -EOPNOTSUPP; | ||
246 | |||
243 | type = gfs2_acl_type(name); | 247 | type = gfs2_acl_type(name); |
244 | if (type < 0) | 248 | if (type < 0) |
245 | return type; | 249 | return type; |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index a739a0a48067..9f8b52500d63 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -700,8 +700,14 @@ out: | |||
700 | return 0; | 700 | return 0; |
701 | 701 | ||
702 | page_cache_release(page); | 702 | page_cache_release(page); |
703 | |||
704 | /* | ||
705 | * XXX(hch): the call below should probably be replaced with | ||
706 | * a call to the gfs2-specific truncate blocks helper to actually | ||
707 | * release disk blocks.. | ||
708 | */ | ||
703 | if (pos + len > ip->i_inode.i_size) | 709 | if (pos + len > ip->i_inode.i_size) |
704 | vmtruncate(&ip->i_inode, ip->i_inode.i_size); | 710 | simple_setsize(&ip->i_inode, ip->i_inode.i_size); |
705 | out_endtrans: | 711 | out_endtrans: |
706 | gfs2_trans_end(sdp); | 712 | gfs2_trans_end(sdp); |
707 | out_trans_fail: | 713 | out_trans_fail: |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index e6dd2aec6f82..ed9a94f0ef15 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -218,6 +218,11 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) | |||
218 | if (error) | 218 | if (error) |
219 | goto out_drop_write; | 219 | goto out_drop_write; |
220 | 220 | ||
221 | error = -EACCES; | ||
222 | if (!is_owner_or_cap(inode)) | ||
223 | goto out; | ||
224 | |||
225 | error = 0; | ||
221 | flags = ip->i_diskflags; | 226 | flags = ip->i_diskflags; |
222 | new_flags = (flags & ~mask) | (reqflags & mask); | 227 | new_flags = (flags & ~mask) | (reqflags & mask); |
223 | if ((new_flags ^ flags) == 0) | 228 | if ((new_flags ^ flags) == 0) |
@@ -275,8 +280,10 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr) | |||
275 | { | 280 | { |
276 | struct inode *inode = filp->f_path.dentry->d_inode; | 281 | struct inode *inode = filp->f_path.dentry->d_inode; |
277 | u32 fsflags, gfsflags; | 282 | u32 fsflags, gfsflags; |
283 | |||
278 | if (get_user(fsflags, ptr)) | 284 | if (get_user(fsflags, ptr)) |
279 | return -EFAULT; | 285 | return -EFAULT; |
286 | |||
280 | gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags); | 287 | gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags); |
281 | if (!S_ISDIR(inode->i_mode)) { | 288 | if (!S_ISDIR(inode->i_mode)) { |
282 | if (gfsflags & GFS2_DIF_INHERIT_JDATA) | 289 | if (gfsflags & GFS2_DIF_INHERIT_JDATA) |
@@ -547,9 +554,9 @@ static int gfs2_close(struct inode *inode, struct file *file) | |||
547 | * Returns: errno | 554 | * Returns: errno |
548 | */ | 555 | */ |
549 | 556 | ||
550 | static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) | 557 | static int gfs2_fsync(struct file *file, int datasync) |
551 | { | 558 | { |
552 | struct inode *inode = dentry->d_inode; | 559 | struct inode *inode = file->f_mapping->host; |
553 | int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); | 560 | int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); |
554 | int ret = 0; | 561 | int ret = 0; |
555 | 562 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 51d8061fa07a..b5612cbb62a5 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -242,34 +242,38 @@ fail: | |||
242 | } | 242 | } |
243 | 243 | ||
244 | /** | 244 | /** |
245 | * gfs2_unlinked_inode_lookup - Lookup an unlinked inode for reclamation | 245 | * gfs2_process_unlinked_inode - Lookup an unlinked inode for reclamation |
246 | * and try to reclaim it by doing iput. | ||
247 | * | ||
248 | * This function assumes no rgrp locks are currently held. | ||
249 | * | ||
246 | * @sb: The super block | 250 | * @sb: The super block |
247 | * no_addr: The inode number | 251 | * no_addr: The inode number |
248 | * @@inode: A pointer to the inode found, if any | ||
249 | * | 252 | * |
250 | * Returns: 0 and *inode if no errors occurred. If an error occurs, | ||
251 | * the resulting *inode may or may not be NULL. | ||
252 | */ | 253 | */ |
253 | 254 | ||
254 | int gfs2_unlinked_inode_lookup(struct super_block *sb, u64 no_addr, | 255 | void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) |
255 | struct inode **inode) | ||
256 | { | 256 | { |
257 | struct gfs2_sbd *sdp; | 257 | struct gfs2_sbd *sdp; |
258 | struct gfs2_inode *ip; | 258 | struct gfs2_inode *ip; |
259 | struct gfs2_glock *io_gl; | 259 | struct gfs2_glock *io_gl; |
260 | int error; | 260 | int error; |
261 | struct gfs2_holder gh; | 261 | struct gfs2_holder gh; |
262 | struct inode *inode; | ||
262 | 263 | ||
263 | *inode = gfs2_iget_skip(sb, no_addr); | 264 | inode = gfs2_iget_skip(sb, no_addr); |
264 | 265 | ||
265 | if (!(*inode)) | 266 | if (!inode) |
266 | return -ENOBUFS; | 267 | return; |
267 | 268 | ||
268 | if (!((*inode)->i_state & I_NEW)) | 269 | /* If it's not a new inode, someone's using it, so leave it alone. */ |
269 | return -ENOBUFS; | 270 | if (!(inode->i_state & I_NEW)) { |
271 | iput(inode); | ||
272 | return; | ||
273 | } | ||
270 | 274 | ||
271 | ip = GFS2_I(*inode); | 275 | ip = GFS2_I(inode); |
272 | sdp = GFS2_SB(*inode); | 276 | sdp = GFS2_SB(inode); |
273 | ip->i_no_formal_ino = -1; | 277 | ip->i_no_formal_ino = -1; |
274 | 278 | ||
275 | error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); | 279 | error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); |
@@ -284,15 +288,13 @@ int gfs2_unlinked_inode_lookup(struct super_block *sb, u64 no_addr, | |||
284 | set_bit(GIF_INVALID, &ip->i_flags); | 288 | set_bit(GIF_INVALID, &ip->i_flags); |
285 | error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT, | 289 | error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT, |
286 | &ip->i_iopen_gh); | 290 | &ip->i_iopen_gh); |
287 | if (unlikely(error)) { | 291 | if (unlikely(error)) |
288 | if (error == GLR_TRYFAILED) | ||
289 | error = 0; | ||
290 | goto fail_iopen; | 292 | goto fail_iopen; |
291 | } | 293 | |
292 | ip->i_iopen_gh.gh_gl->gl_object = ip; | 294 | ip->i_iopen_gh.gh_gl->gl_object = ip; |
293 | gfs2_glock_put(io_gl); | 295 | gfs2_glock_put(io_gl); |
294 | 296 | ||
295 | (*inode)->i_mode = DT2IF(DT_UNKNOWN); | 297 | inode->i_mode = DT2IF(DT_UNKNOWN); |
296 | 298 | ||
297 | /* | 299 | /* |
298 | * We must read the inode in order to work out its type in | 300 | * We must read the inode in order to work out its type in |
@@ -303,16 +305,17 @@ int gfs2_unlinked_inode_lookup(struct super_block *sb, u64 no_addr, | |||
303 | */ | 305 | */ |
304 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY, | 306 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY, |
305 | &gh); | 307 | &gh); |
306 | if (unlikely(error)) { | 308 | if (unlikely(error)) |
307 | if (error == GLR_TRYFAILED) | ||
308 | error = 0; | ||
309 | goto fail_glock; | 309 | goto fail_glock; |
310 | } | 310 | |
311 | /* Inode is now uptodate */ | 311 | /* Inode is now uptodate */ |
312 | gfs2_glock_dq_uninit(&gh); | 312 | gfs2_glock_dq_uninit(&gh); |
313 | gfs2_set_iop(*inode); | 313 | gfs2_set_iop(inode); |
314 | |||
315 | /* The iput will cause it to be deleted. */ | ||
316 | iput(inode); | ||
317 | return; | ||
314 | 318 | ||
315 | return 0; | ||
316 | fail_glock: | 319 | fail_glock: |
317 | gfs2_glock_dq(&ip->i_iopen_gh); | 320 | gfs2_glock_dq(&ip->i_iopen_gh); |
318 | fail_iopen: | 321 | fail_iopen: |
@@ -321,7 +324,8 @@ fail_put: | |||
321 | ip->i_gl->gl_object = NULL; | 324 | ip->i_gl->gl_object = NULL; |
322 | gfs2_glock_put(ip->i_gl); | 325 | gfs2_glock_put(ip->i_gl); |
323 | fail: | 326 | fail: |
324 | return error; | 327 | iget_failed(inode); |
328 | return; | ||
325 | } | 329 | } |
326 | 330 | ||
327 | static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | 331 | static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index e161461d4c57..300ada3f21de 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -84,8 +84,7 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, | |||
84 | extern void gfs2_set_iop(struct inode *inode); | 84 | extern void gfs2_set_iop(struct inode *inode); |
85 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, | 85 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, |
86 | u64 no_addr, u64 no_formal_ino); | 86 | u64 no_addr, u64 no_formal_ino); |
87 | extern int gfs2_unlinked_inode_lookup(struct super_block *sb, u64 no_addr, | 87 | extern void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr); |
88 | struct inode **inode); | ||
89 | extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); | 88 | extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); |
90 | 89 | ||
91 | extern int gfs2_inode_refresh(struct gfs2_inode *ip); | 90 | extern int gfs2_inode_refresh(struct gfs2_inode *ip); |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index b593f0e28f25..6a857e24f947 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -696,7 +696,7 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp) | |||
696 | * | 696 | * |
697 | */ | 697 | */ |
698 | 698 | ||
699 | void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | 699 | void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) |
700 | { | 700 | { |
701 | struct gfs2_ail *ai; | 701 | struct gfs2_ail *ai; |
702 | 702 | ||
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index eb570b4ad443..0d007f920234 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h | |||
@@ -47,28 +47,21 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp, | |||
47 | sdp->sd_log_head = sdp->sd_log_tail = value; | 47 | sdp->sd_log_head = sdp->sd_log_tail = value; |
48 | } | 48 | } |
49 | 49 | ||
50 | unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, | 50 | extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, |
51 | unsigned int ssize); | 51 | unsigned int ssize); |
52 | 52 | ||
53 | int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); | 53 | extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); |
54 | void gfs2_log_incr_head(struct gfs2_sbd *sdp); | 54 | extern void gfs2_log_incr_head(struct gfs2_sbd *sdp); |
55 | 55 | ||
56 | struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); | 56 | extern struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); |
57 | struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, | 57 | extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, |
58 | struct buffer_head *real); | 58 | struct buffer_head *real); |
59 | void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); | 59 | extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); |
60 | extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); | ||
61 | extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); | ||
60 | 62 | ||
61 | static inline void gfs2_log_flush(struct gfs2_sbd *sbd, struct gfs2_glock *gl) | 63 | extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); |
62 | { | 64 | extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); |
63 | if (!gl || test_bit(GLF_LFLUSH, &gl->gl_flags)) | 65 | extern int gfs2_logd(void *data); |
64 | __gfs2_log_flush(sbd, gl); | ||
65 | } | ||
66 | |||
67 | void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); | ||
68 | void gfs2_remove_from_ail(struct gfs2_bufdata *bd); | ||
69 | |||
70 | void gfs2_log_shutdown(struct gfs2_sbd *sdp); | ||
71 | void gfs2_meta_syncfs(struct gfs2_sbd *sdp); | ||
72 | int gfs2_logd(void *data); | ||
73 | 66 | ||
74 | #endif /* __LOG_DOT_H__ */ | 67 | #endif /* __LOG_DOT_H__ */ |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 4e64352d49de..98cdd05f3316 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -1071,6 +1071,9 @@ int gfs2_permission(struct inode *inode, int mask) | |||
1071 | return error; | 1071 | return error; |
1072 | } | 1072 | } |
1073 | 1073 | ||
1074 | /* | ||
1075 | * XXX: should be changed to have proper ordering by opencoding simple_setsize | ||
1076 | */ | ||
1074 | static int setattr_size(struct inode *inode, struct iattr *attr) | 1077 | static int setattr_size(struct inode *inode, struct iattr *attr) |
1075 | { | 1078 | { |
1076 | struct gfs2_inode *ip = GFS2_I(inode); | 1079 | struct gfs2_inode *ip = GFS2_I(inode); |
@@ -1081,7 +1084,7 @@ static int setattr_size(struct inode *inode, struct iattr *attr) | |||
1081 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); | 1084 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); |
1082 | if (error) | 1085 | if (error) |
1083 | return error; | 1086 | return error; |
1084 | error = vmtruncate(inode, attr->ia_size); | 1087 | error = simple_setsize(inode, attr->ia_size); |
1085 | gfs2_trans_end(sdp); | 1088 | gfs2_trans_end(sdp); |
1086 | if (error) | 1089 | if (error) |
1087 | return error; | 1090 | return error; |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 117fa4171f62..171a744f8e45 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -1192,7 +1192,6 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) | |||
1192 | { | 1192 | { |
1193 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1193 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1194 | struct gfs2_alloc *al = ip->i_alloc; | 1194 | struct gfs2_alloc *al = ip->i_alloc; |
1195 | struct inode *inode; | ||
1196 | int error = 0; | 1195 | int error = 0; |
1197 | u64 last_unlinked = NO_BLOCK, unlinked; | 1196 | u64 last_unlinked = NO_BLOCK, unlinked; |
1198 | 1197 | ||
@@ -1210,22 +1209,27 @@ try_again: | |||
1210 | if (error) | 1209 | if (error) |
1211 | return error; | 1210 | return error; |
1212 | 1211 | ||
1212 | /* Find an rgrp suitable for allocation. If it encounters any unlinked | ||
1213 | dinodes along the way, error will equal -EAGAIN and unlinked will | ||
1214 | contains it block address. We then need to look up that inode and | ||
1215 | try to free it, and try the allocation again. */ | ||
1213 | error = get_local_rgrp(ip, &unlinked, &last_unlinked); | 1216 | error = get_local_rgrp(ip, &unlinked, &last_unlinked); |
1214 | if (error) { | 1217 | if (error) { |
1215 | if (ip != GFS2_I(sdp->sd_rindex)) | 1218 | if (ip != GFS2_I(sdp->sd_rindex)) |
1216 | gfs2_glock_dq_uninit(&al->al_ri_gh); | 1219 | gfs2_glock_dq_uninit(&al->al_ri_gh); |
1217 | if (error != -EAGAIN) | 1220 | if (error != -EAGAIN) |
1218 | return error; | 1221 | return error; |
1219 | error = gfs2_unlinked_inode_lookup(ip->i_inode.i_sb, | 1222 | |
1220 | unlinked, &inode); | 1223 | gfs2_process_unlinked_inode(ip->i_inode.i_sb, unlinked); |
1221 | if (inode) | 1224 | /* regardless of whether or not gfs2_process_unlinked_inode |
1222 | iput(inode); | 1225 | was successful, we don't want to repeat it again. */ |
1226 | last_unlinked = unlinked; | ||
1223 | gfs2_log_flush(sdp, NULL); | 1227 | gfs2_log_flush(sdp, NULL); |
1224 | if (error == GLR_TRYFAILED) | 1228 | error = 0; |
1225 | error = 0; | 1229 | |
1226 | goto try_again; | 1230 | goto try_again; |
1227 | } | 1231 | } |
1228 | 1232 | /* no error, so we have the rgrp set in the inode's allocation. */ | |
1229 | al->al_file = file; | 1233 | al->al_file = file; |
1230 | al->al_line = line; | 1234 | al->al_line = line; |
1231 | 1235 | ||
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 5f4023678251..764fd1bdca88 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c | |||
@@ -494,7 +494,7 @@ const struct inode_operations hfsplus_dir_inode_operations = { | |||
494 | const struct file_operations hfsplus_dir_operations = { | 494 | const struct file_operations hfsplus_dir_operations = { |
495 | .read = generic_read_dir, | 495 | .read = generic_read_dir, |
496 | .readdir = hfsplus_readdir, | 496 | .readdir = hfsplus_readdir, |
497 | .ioctl = hfsplus_ioctl, | 497 | .unlocked_ioctl = hfsplus_ioctl, |
498 | .llseek = generic_file_llseek, | 498 | .llseek = generic_file_llseek, |
499 | .release = hfsplus_dir_release, | 499 | .release = hfsplus_dir_release, |
500 | }; | 500 | }; |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 5c10d803d9df..6505c30ad965 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
@@ -337,8 +337,7 @@ struct inode *hfsplus_new_inode(struct super_block *, int); | |||
337 | void hfsplus_delete_inode(struct inode *); | 337 | void hfsplus_delete_inode(struct inode *); |
338 | 338 | ||
339 | /* ioctl.c */ | 339 | /* ioctl.c */ |
340 | int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | 340 | long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); |
341 | unsigned long arg); | ||
342 | int hfsplus_setxattr(struct dentry *dentry, const char *name, | 341 | int hfsplus_setxattr(struct dentry *dentry, const char *name, |
343 | const void *value, size_t size, int flags); | 342 | const void *value, size_t size, int flags); |
344 | ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | 343 | ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 1bcf597c0562..9bbb82924a22 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
@@ -285,7 +285,7 @@ static const struct file_operations hfsplus_file_operations = { | |||
285 | .fsync = file_fsync, | 285 | .fsync = file_fsync, |
286 | .open = hfsplus_file_open, | 286 | .open = hfsplus_file_open, |
287 | .release = hfsplus_file_release, | 287 | .release = hfsplus_file_release, |
288 | .ioctl = hfsplus_ioctl, | 288 | .unlocked_ioctl = hfsplus_ioctl, |
289 | }; | 289 | }; |
290 | 290 | ||
291 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) | 291 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) |
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index f457d2ca51ab..ac405f099026 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c | |||
@@ -17,14 +17,16 @@ | |||
17 | #include <linux/mount.h> | 17 | #include <linux/mount.h> |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/xattr.h> | 19 | #include <linux/xattr.h> |
20 | #include <linux/smp_lock.h> | ||
20 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
21 | #include "hfsplus_fs.h" | 22 | #include "hfsplus_fs.h" |
22 | 23 | ||
23 | int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | 24 | long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
24 | unsigned long arg) | ||
25 | { | 25 | { |
26 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
26 | unsigned int flags; | 27 | unsigned int flags; |
27 | 28 | ||
29 | lock_kernel(); | ||
28 | switch (cmd) { | 30 | switch (cmd) { |
29 | case HFSPLUS_IOC_EXT2_GETFLAGS: | 31 | case HFSPLUS_IOC_EXT2_GETFLAGS: |
30 | flags = 0; | 32 | flags = 0; |
@@ -38,8 +40,10 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | |||
38 | case HFSPLUS_IOC_EXT2_SETFLAGS: { | 40 | case HFSPLUS_IOC_EXT2_SETFLAGS: { |
39 | int err = 0; | 41 | int err = 0; |
40 | err = mnt_want_write(filp->f_path.mnt); | 42 | err = mnt_want_write(filp->f_path.mnt); |
41 | if (err) | 43 | if (err) { |
44 | unlock_kernel(); | ||
42 | return err; | 45 | return err; |
46 | } | ||
43 | 47 | ||
44 | if (!is_owner_or_cap(inode)) { | 48 | if (!is_owner_or_cap(inode)) { |
45 | err = -EACCES; | 49 | err = -EACCES; |
@@ -85,9 +89,11 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | |||
85 | mark_inode_dirty(inode); | 89 | mark_inode_dirty(inode); |
86 | setflags_out: | 90 | setflags_out: |
87 | mnt_drop_write(filp->f_path.mnt); | 91 | mnt_drop_write(filp->f_path.mnt); |
92 | unlock_kernel(); | ||
88 | return err; | 93 | return err; |
89 | } | 94 | } |
90 | default: | 95 | default: |
96 | unlock_kernel(); | ||
91 | return -ENOTTY; | 97 | return -ENOTTY; |
92 | } | 98 | } |
93 | } | 99 | } |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 3a029d8f4cf1..87ac1891a185 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -411,9 +411,9 @@ int hostfs_file_open(struct inode *ino, struct file *file) | |||
411 | return 0; | 411 | return 0; |
412 | } | 412 | } |
413 | 413 | ||
414 | int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 414 | int hostfs_fsync(struct file *file, int datasync) |
415 | { | 415 | { |
416 | return fsync_file(HOSTFS_I(dentry->d_inode)->fd, datasync); | 416 | return fsync_file(HOSTFS_I(file->f_mapping->host)->fd, datasync); |
417 | } | 417 | } |
418 | 418 | ||
419 | static const struct file_operations hostfs_file_fops = { | 419 | static const struct file_operations hostfs_file_fops = { |
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 3efabff00367..a9ae9bfa752f 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c | |||
@@ -19,9 +19,9 @@ static int hpfs_file_release(struct inode *inode, struct file *file) | |||
19 | return 0; | 19 | return 0; |
20 | } | 20 | } |
21 | 21 | ||
22 | int hpfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) | 22 | int hpfs_file_fsync(struct file *file, int datasync) |
23 | { | 23 | { |
24 | /*return file_fsync(file, dentry);*/ | 24 | /*return file_fsync(file, datasync);*/ |
25 | return 0; /* Don't fsync :-) */ | 25 | return 0; /* Don't fsync :-) */ |
26 | } | 26 | } |
27 | 27 | ||
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 97bf738cd5d6..75f9d4324851 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h | |||
@@ -268,7 +268,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, const char *, | |||
268 | 268 | ||
269 | /* file.c */ | 269 | /* file.c */ |
270 | 270 | ||
271 | int hpfs_file_fsync(struct file *, struct dentry *, int); | 271 | int hpfs_file_fsync(struct file *, int); |
272 | extern const struct file_operations hpfs_file_ops; | 272 | extern const struct file_operations hpfs_file_ops; |
273 | extern const struct inode_operations hpfs_file_iops; | 273 | extern const struct inode_operations hpfs_file_iops; |
274 | extern const struct address_space_operations hpfs_aops; | 274 | extern const struct address_space_operations hpfs_aops; |
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 2e4dfa8593da..826c3f9d29ac 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c | |||
@@ -587,7 +587,7 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) | |||
587 | return err; | 587 | return err; |
588 | } | 588 | } |
589 | 589 | ||
590 | static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 590 | static int hppfs_fsync(struct file *file, int datasync) |
591 | { | 591 | { |
592 | return 0; | 592 | return 0; |
593 | } | 593 | } |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a0bbd3d1b41a..a4e9a7ec3691 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -688,7 +688,7 @@ static void init_once(void *foo) | |||
688 | const struct file_operations hugetlbfs_file_operations = { | 688 | const struct file_operations hugetlbfs_file_operations = { |
689 | .read = hugetlbfs_read, | 689 | .read = hugetlbfs_read, |
690 | .mmap = hugetlbfs_file_mmap, | 690 | .mmap = hugetlbfs_file_mmap, |
691 | .fsync = simple_sync_file, | 691 | .fsync = noop_fsync, |
692 | .get_unmapped_area = hugetlb_get_unmapped_area, | 692 | .get_unmapped_area = hugetlb_get_unmapped_area, |
693 | }; | 693 | }; |
694 | 694 | ||
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index b9ab69b3a482..e0aca9a0ac68 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c | |||
@@ -272,6 +272,7 @@ static int isofs_readdir(struct file *filp, | |||
272 | 272 | ||
273 | const struct file_operations isofs_dir_operations = | 273 | const struct file_operations isofs_dir_operations = |
274 | { | 274 | { |
275 | .llseek = generic_file_llseek, | ||
275 | .read = generic_read_dir, | 276 | .read = generic_read_dir, |
276 | .readdir = isofs_readdir, | 277 | .readdir = isofs_readdir, |
277 | }; | 278 | }; |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index bfc70f57900f..e214d68620ac 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -1311,7 +1311,6 @@ int jbd2_journal_stop(handle_t *handle) | |||
1311 | if (handle->h_sync) | 1311 | if (handle->h_sync) |
1312 | transaction->t_synchronous_commit = 1; | 1312 | transaction->t_synchronous_commit = 1; |
1313 | current->journal_info = NULL; | 1313 | current->journal_info = NULL; |
1314 | spin_lock(&journal->j_state_lock); | ||
1315 | spin_lock(&transaction->t_handle_lock); | 1314 | spin_lock(&transaction->t_handle_lock); |
1316 | transaction->t_outstanding_credits -= handle->h_buffer_credits; | 1315 | transaction->t_outstanding_credits -= handle->h_buffer_credits; |
1317 | transaction->t_updates--; | 1316 | transaction->t_updates--; |
@@ -1340,8 +1339,7 @@ int jbd2_journal_stop(handle_t *handle) | |||
1340 | jbd_debug(2, "transaction too old, requesting commit for " | 1339 | jbd_debug(2, "transaction too old, requesting commit for " |
1341 | "handle %p\n", handle); | 1340 | "handle %p\n", handle); |
1342 | /* This is non-blocking */ | 1341 | /* This is non-blocking */ |
1343 | __jbd2_log_start_commit(journal, transaction->t_tid); | 1342 | jbd2_log_start_commit(journal, transaction->t_tid); |
1344 | spin_unlock(&journal->j_state_lock); | ||
1345 | 1343 | ||
1346 | /* | 1344 | /* |
1347 | * Special case: JBD2_SYNC synchronous updates require us | 1345 | * Special case: JBD2_SYNC synchronous updates require us |
@@ -1351,7 +1349,6 @@ int jbd2_journal_stop(handle_t *handle) | |||
1351 | err = jbd2_log_wait_commit(journal, tid); | 1349 | err = jbd2_log_wait_commit(journal, tid); |
1352 | } else { | 1350 | } else { |
1353 | spin_unlock(&transaction->t_handle_lock); | 1351 | spin_unlock(&transaction->t_handle_lock); |
1354 | spin_unlock(&journal->j_state_lock); | ||
1355 | } | 1352 | } |
1356 | 1353 | ||
1357 | lock_map_release(&handle->h_lockdep_map); | 1354 | lock_map_release(&handle->h_lockdep_map); |
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index e7291c161a19..813497024437 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c | |||
@@ -26,9 +26,9 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
26 | struct page **pagep, void **fsdata); | 26 | struct page **pagep, void **fsdata); |
27 | static int jffs2_readpage (struct file *filp, struct page *pg); | 27 | static int jffs2_readpage (struct file *filp, struct page *pg); |
28 | 28 | ||
29 | int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync) | 29 | int jffs2_fsync(struct file *filp, int datasync) |
30 | { | 30 | { |
31 | struct inode *inode = dentry->d_inode; | 31 | struct inode *inode = filp->f_mapping->host; |
32 | struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); | 32 | struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); |
33 | 33 | ||
34 | /* Trigger GC to flush any pending writes for this inode */ | 34 | /* Trigger GC to flush any pending writes for this inode */ |
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 86e0821fc989..8bc2c80ab159 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c | |||
@@ -169,13 +169,13 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) | |||
169 | mutex_unlock(&f->sem); | 169 | mutex_unlock(&f->sem); |
170 | jffs2_complete_reservation(c); | 170 | jffs2_complete_reservation(c); |
171 | 171 | ||
172 | /* We have to do the vmtruncate() without f->sem held, since | 172 | /* We have to do the simple_setsize() without f->sem held, since |
173 | some pages may be locked and waiting for it in readpage(). | 173 | some pages may be locked and waiting for it in readpage(). |
174 | We are protected from a simultaneous write() extending i_size | 174 | We are protected from a simultaneous write() extending i_size |
175 | back past iattr->ia_size, because do_truncate() holds the | 175 | back past iattr->ia_size, because do_truncate() holds the |
176 | generic inode semaphore. */ | 176 | generic inode semaphore. */ |
177 | if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) { | 177 | if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) { |
178 | vmtruncate(inode, iattr->ia_size); | 178 | simple_setsize(inode, iattr->ia_size); |
179 | inode->i_blocks = (inode->i_size + 511) >> 9; | 179 | inode->i_blocks = (inode->i_size + 511) >> 9; |
180 | } | 180 | } |
181 | 181 | ||
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 035a767f958b..4791aacf3084 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h | |||
@@ -158,7 +158,7 @@ extern const struct inode_operations jffs2_dir_inode_operations; | |||
158 | extern const struct file_operations jffs2_file_operations; | 158 | extern const struct file_operations jffs2_file_operations; |
159 | extern const struct inode_operations jffs2_file_inode_operations; | 159 | extern const struct inode_operations jffs2_file_inode_operations; |
160 | extern const struct address_space_operations jffs2_file_address_operations; | 160 | extern const struct address_space_operations jffs2_file_address_operations; |
161 | int jffs2_fsync(struct file *, struct dentry *, int); | 161 | int jffs2_fsync(struct file *, int); |
162 | int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); | 162 | int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); |
163 | 163 | ||
164 | /* ioctl.c */ | 164 | /* ioctl.c */ |
diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 85d9ec659225..127263cc8657 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c | |||
@@ -27,9 +27,9 @@ | |||
27 | #include "jfs_acl.h" | 27 | #include "jfs_acl.h" |
28 | #include "jfs_debug.h" | 28 | #include "jfs_debug.h" |
29 | 29 | ||
30 | int jfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 30 | int jfs_fsync(struct file *file, int datasync) |
31 | { | 31 | { |
32 | struct inode *inode = dentry->d_inode; | 32 | struct inode *inode = file->f_mapping->host; |
33 | int rc = 0; | 33 | int rc = 0; |
34 | 34 | ||
35 | if (!(inode->i_state & I_DIRTY) || | 35 | if (!(inode->i_state & I_DIRTY) || |
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 9e6bda30a6e8..11042b1f44b5 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h | |||
@@ -21,7 +21,7 @@ | |||
21 | struct fid; | 21 | struct fid; |
22 | 22 | ||
23 | extern struct inode *ialloc(struct inode *, umode_t); | 23 | extern struct inode *ialloc(struct inode *, umode_t); |
24 | extern int jfs_fsync(struct file *, struct dentry *, int); | 24 | extern int jfs_fsync(struct file *, int); |
25 | extern long jfs_ioctl(struct file *, unsigned int, unsigned long); | 25 | extern long jfs_ioctl(struct file *, unsigned int, unsigned long); |
26 | extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); | 26 | extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); |
27 | extern struct inode *jfs_iget(struct super_block *, unsigned long); | 27 | extern struct inode *jfs_iget(struct super_block *, unsigned long); |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index b66832ac33ac..b38f96bef829 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -179,6 +179,8 @@ static void jfs_put_super(struct super_block *sb) | |||
179 | 179 | ||
180 | jfs_info("In jfs_put_super"); | 180 | jfs_info("In jfs_put_super"); |
181 | 181 | ||
182 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | ||
183 | |||
182 | lock_kernel(); | 184 | lock_kernel(); |
183 | 185 | ||
184 | rc = jfs_umount(sb); | 186 | rc = jfs_umount(sb); |
@@ -396,10 +398,20 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) | |||
396 | 398 | ||
397 | JFS_SBI(sb)->flag = flag; | 399 | JFS_SBI(sb)->flag = flag; |
398 | ret = jfs_mount_rw(sb, 1); | 400 | ret = jfs_mount_rw(sb, 1); |
401 | |||
402 | /* mark the fs r/w for quota activity */ | ||
403 | sb->s_flags &= ~MS_RDONLY; | ||
404 | |||
399 | unlock_kernel(); | 405 | unlock_kernel(); |
406 | dquot_resume(sb, -1); | ||
400 | return ret; | 407 | return ret; |
401 | } | 408 | } |
402 | if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { | 409 | if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { |
410 | rc = dquot_suspend(sb, -1); | ||
411 | if (rc < 0) { | ||
412 | unlock_kernel(); | ||
413 | return rc; | ||
414 | } | ||
403 | rc = jfs_umount_rw(sb); | 415 | rc = jfs_umount_rw(sb); |
404 | JFS_SBI(sb)->flag = flag; | 416 | JFS_SBI(sb)->flag = flag; |
405 | unlock_kernel(); | 417 | unlock_kernel(); |
@@ -469,6 +481,10 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) | |||
469 | */ | 481 | */ |
470 | sb->s_op = &jfs_super_operations; | 482 | sb->s_op = &jfs_super_operations; |
471 | sb->s_export_op = &jfs_export_operations; | 483 | sb->s_export_op = &jfs_export_operations; |
484 | #ifdef CONFIG_QUOTA | ||
485 | sb->dq_op = &dquot_operations; | ||
486 | sb->s_qcop = &dquot_quotactl_ops; | ||
487 | #endif | ||
472 | 488 | ||
473 | /* | 489 | /* |
474 | * Initialize direct-mapping inode/address-space | 490 | * Initialize direct-mapping inode/address-space |
diff --git a/fs/libfs.c b/fs/libfs.c index 232bea425b09..09e1016eb774 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | #include <linux/mount.h> | 9 | #include <linux/mount.h> |
10 | #include <linux/vfs.h> | 10 | #include <linux/vfs.h> |
11 | #include <linux/quotaops.h> | ||
11 | #include <linux/mutex.h> | 12 | #include <linux/mutex.h> |
12 | #include <linux/exportfs.h> | 13 | #include <linux/exportfs.h> |
13 | #include <linux/writeback.h> | 14 | #include <linux/writeback.h> |
@@ -58,11 +59,6 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na | |||
58 | return NULL; | 59 | return NULL; |
59 | } | 60 | } |
60 | 61 | ||
61 | int simple_sync_file(struct file * file, struct dentry *dentry, int datasync) | ||
62 | { | ||
63 | return 0; | ||
64 | } | ||
65 | |||
66 | int dcache_dir_open(struct inode *inode, struct file *file) | 62 | int dcache_dir_open(struct inode *inode, struct file *file) |
67 | { | 63 | { |
68 | static struct qstr cursor_name = {.len = 1, .name = "."}; | 64 | static struct qstr cursor_name = {.len = 1, .name = "."}; |
@@ -190,7 +186,7 @@ const struct file_operations simple_dir_operations = { | |||
190 | .llseek = dcache_dir_lseek, | 186 | .llseek = dcache_dir_lseek, |
191 | .read = generic_read_dir, | 187 | .read = generic_read_dir, |
192 | .readdir = dcache_readdir, | 188 | .readdir = dcache_readdir, |
193 | .fsync = simple_sync_file, | 189 | .fsync = noop_fsync, |
194 | }; | 190 | }; |
195 | 191 | ||
196 | const struct inode_operations simple_dir_inode_operations = { | 192 | const struct inode_operations simple_dir_inode_operations = { |
@@ -330,6 +326,81 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
330 | return 0; | 326 | return 0; |
331 | } | 327 | } |
332 | 328 | ||
329 | /** | ||
330 | * simple_setsize - handle core mm and vfs requirements for file size change | ||
331 | * @inode: inode | ||
332 | * @newsize: new file size | ||
333 | * | ||
334 | * Returns 0 on success, -error on failure. | ||
335 | * | ||
336 | * simple_setsize must be called with inode_mutex held. | ||
337 | * | ||
338 | * simple_setsize will check that the requested new size is OK (see | ||
339 | * inode_newsize_ok), and then will perform the necessary i_size update | ||
340 | * and pagecache truncation (if necessary). It will be typically be called | ||
341 | * from the filesystem's setattr function when ATTR_SIZE is passed in. | ||
342 | * | ||
343 | * The inode itself must have correct permissions and attributes to allow | ||
344 | * i_size to be changed, this function then just checks that the new size | ||
345 | * requested is valid. | ||
346 | * | ||
347 | * In the case of simple in-memory filesystems with inodes stored solely | ||
348 | * in the inode cache, and file data in the pagecache, nothing more needs | ||
349 | * to be done to satisfy a truncate request. Filesystems with on-disk | ||
350 | * blocks for example will need to free them in the case of truncate, in | ||
351 | * that case it may be easier not to use simple_setsize (but each of its | ||
352 | * components will likely be required at some point to update pagecache | ||
353 | * and inode etc). | ||
354 | */ | ||
355 | int simple_setsize(struct inode *inode, loff_t newsize) | ||
356 | { | ||
357 | loff_t oldsize; | ||
358 | int error; | ||
359 | |||
360 | error = inode_newsize_ok(inode, newsize); | ||
361 | if (error) | ||
362 | return error; | ||
363 | |||
364 | oldsize = inode->i_size; | ||
365 | i_size_write(inode, newsize); | ||
366 | truncate_pagecache(inode, oldsize, newsize); | ||
367 | |||
368 | return error; | ||
369 | } | ||
370 | EXPORT_SYMBOL(simple_setsize); | ||
371 | |||
372 | /** | ||
373 | * simple_setattr - setattr for simple in-memory filesystem | ||
374 | * @dentry: dentry | ||
375 | * @iattr: iattr structure | ||
376 | * | ||
377 | * Returns 0 on success, -error on failure. | ||
378 | * | ||
379 | * simple_setattr implements setattr for an in-memory filesystem which | ||
380 | * does not store its own file data or metadata (eg. uses the page cache | ||
381 | * and inode cache as its data store). | ||
382 | */ | ||
383 | int simple_setattr(struct dentry *dentry, struct iattr *iattr) | ||
384 | { | ||
385 | struct inode *inode = dentry->d_inode; | ||
386 | int error; | ||
387 | |||
388 | error = inode_change_ok(inode, iattr); | ||
389 | if (error) | ||
390 | return error; | ||
391 | |||
392 | if (iattr->ia_valid & ATTR_SIZE) { | ||
393 | error = simple_setsize(inode, iattr->ia_size); | ||
394 | if (error) | ||
395 | return error; | ||
396 | } | ||
397 | |||
398 | generic_setattr(inode, iattr); | ||
399 | |||
400 | return error; | ||
401 | } | ||
402 | EXPORT_SYMBOL(simple_setattr); | ||
403 | |||
333 | int simple_readpage(struct file *file, struct page *page) | 404 | int simple_readpage(struct file *file, struct page *page) |
334 | { | 405 | { |
335 | clear_highpage(page); | 406 | clear_highpage(page); |
@@ -851,13 +922,22 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
851 | } | 922 | } |
852 | EXPORT_SYMBOL_GPL(generic_fh_to_parent); | 923 | EXPORT_SYMBOL_GPL(generic_fh_to_parent); |
853 | 924 | ||
854 | int simple_fsync(struct file *file, struct dentry *dentry, int datasync) | 925 | /** |
926 | * generic_file_fsync - generic fsync implementation for simple filesystems | ||
927 | * @file: file to synchronize | ||
928 | * @datasync: only synchronize essential metadata if true | ||
929 | * | ||
930 | * This is a generic implementation of the fsync method for simple | ||
931 | * filesystems which track all non-inode metadata in the buffers list | ||
932 | * hanging off the address_space structure. | ||
933 | */ | ||
934 | int generic_file_fsync(struct file *file, int datasync) | ||
855 | { | 935 | { |
856 | struct writeback_control wbc = { | 936 | struct writeback_control wbc = { |
857 | .sync_mode = WB_SYNC_ALL, | 937 | .sync_mode = WB_SYNC_ALL, |
858 | .nr_to_write = 0, /* metadata-only; caller takes care of data */ | 938 | .nr_to_write = 0, /* metadata-only; caller takes care of data */ |
859 | }; | 939 | }; |
860 | struct inode *inode = dentry->d_inode; | 940 | struct inode *inode = file->f_mapping->host; |
861 | int err; | 941 | int err; |
862 | int ret; | 942 | int ret; |
863 | 943 | ||
@@ -872,7 +952,15 @@ int simple_fsync(struct file *file, struct dentry *dentry, int datasync) | |||
872 | ret = err; | 952 | ret = err; |
873 | return ret; | 953 | return ret; |
874 | } | 954 | } |
875 | EXPORT_SYMBOL(simple_fsync); | 955 | EXPORT_SYMBOL(generic_file_fsync); |
956 | |||
957 | /* | ||
958 | * No-op implementation of ->fsync for in-memory filesystems. | ||
959 | */ | ||
960 | int noop_fsync(struct file *file, int datasync) | ||
961 | { | ||
962 | return 0; | ||
963 | } | ||
876 | 964 | ||
877 | EXPORT_SYMBOL(dcache_dir_close); | 965 | EXPORT_SYMBOL(dcache_dir_close); |
878 | EXPORT_SYMBOL(dcache_dir_lseek); | 966 | EXPORT_SYMBOL(dcache_dir_lseek); |
@@ -895,7 +983,7 @@ EXPORT_SYMBOL(simple_release_fs); | |||
895 | EXPORT_SYMBOL(simple_rename); | 983 | EXPORT_SYMBOL(simple_rename); |
896 | EXPORT_SYMBOL(simple_rmdir); | 984 | EXPORT_SYMBOL(simple_rmdir); |
897 | EXPORT_SYMBOL(simple_statfs); | 985 | EXPORT_SYMBOL(simple_statfs); |
898 | EXPORT_SYMBOL(simple_sync_file); | 986 | EXPORT_SYMBOL(noop_fsync); |
899 | EXPORT_SYMBOL(simple_unlink); | 987 | EXPORT_SYMBOL(simple_unlink); |
900 | EXPORT_SYMBOL(simple_read_from_buffer); | 988 | EXPORT_SYMBOL(simple_read_from_buffer); |
901 | EXPORT_SYMBOL(simple_write_to_buffer); | 989 | EXPORT_SYMBOL(simple_write_to_buffer); |
diff --git a/fs/logfs/file.c b/fs/logfs/file.c index 0de524071870..abe1cafbd4c2 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c | |||
@@ -219,9 +219,9 @@ int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, | |||
219 | } | 219 | } |
220 | } | 220 | } |
221 | 221 | ||
222 | int logfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 222 | int logfs_fsync(struct file *file, int datasync) |
223 | { | 223 | { |
224 | struct super_block *sb = dentry->d_inode->i_sb; | 224 | struct super_block *sb = file->f_mapping->host->i_sb; |
225 | 225 | ||
226 | logfs_write_anchor(sb); | 226 | logfs_write_anchor(sb); |
227 | return 0; | 227 | return 0; |
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 1a9db84f8d8f..c838c4d72111 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h | |||
@@ -506,7 +506,7 @@ extern const struct address_space_operations logfs_reg_aops; | |||
506 | int logfs_readpage(struct file *file, struct page *page); | 506 | int logfs_readpage(struct file *file, struct page *page); |
507 | int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, | 507 | int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, |
508 | unsigned long arg); | 508 | unsigned long arg); |
509 | int logfs_fsync(struct file *file, struct dentry *dentry, int datasync); | 509 | int logfs_fsync(struct file *file, int datasync); |
510 | 510 | ||
511 | /* gc.c */ | 511 | /* gc.c */ |
512 | u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec); | 512 | u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec); |
diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 6198731d7fcd..91969589131c 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c | |||
@@ -22,7 +22,7 @@ const struct file_operations minix_dir_operations = { | |||
22 | .llseek = generic_file_llseek, | 22 | .llseek = generic_file_llseek, |
23 | .read = generic_read_dir, | 23 | .read = generic_read_dir, |
24 | .readdir = minix_readdir, | 24 | .readdir = minix_readdir, |
25 | .fsync = simple_fsync, | 25 | .fsync = generic_file_fsync, |
26 | }; | 26 | }; |
27 | 27 | ||
28 | static inline void dir_put_page(struct page *page) | 28 | static inline void dir_put_page(struct page *page) |
@@ -72,11 +72,8 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n) | |||
72 | { | 72 | { |
73 | struct address_space *mapping = dir->i_mapping; | 73 | struct address_space *mapping = dir->i_mapping; |
74 | struct page *page = read_mapping_page(mapping, n, NULL); | 74 | struct page *page = read_mapping_page(mapping, n, NULL); |
75 | if (!IS_ERR(page)) { | 75 | if (!IS_ERR(page)) |
76 | kmap(page); | 76 | kmap(page); |
77 | if (!PageUptodate(page)) | ||
78 | goto fail; | ||
79 | } | ||
80 | return page; | 77 | return page; |
81 | 78 | ||
82 | fail: | 79 | fail: |
diff --git a/fs/minix/file.c b/fs/minix/file.c index 3eec3e607a87..d5320ff23faf 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c | |||
@@ -19,7 +19,7 @@ const struct file_operations minix_file_operations = { | |||
19 | .write = do_sync_write, | 19 | .write = do_sync_write, |
20 | .aio_write = generic_file_aio_write, | 20 | .aio_write = generic_file_aio_write, |
21 | .mmap = generic_file_mmap, | 21 | .mmap = generic_file_mmap, |
22 | .fsync = simple_fsync, | 22 | .fsync = generic_file_fsync, |
23 | .splice_read = generic_file_splice_read, | 23 | .splice_read = generic_file_splice_read, |
24 | }; | 24 | }; |
25 | 25 | ||
diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c index f23010969369..13487ad16894 100644 --- a/fs/minix/itree_v2.c +++ b/fs/minix/itree_v2.c | |||
@@ -20,6 +20,9 @@ static inline block_t *i_data(struct inode *inode) | |||
20 | return (block_t *)minix_i(inode)->u.i2_data; | 20 | return (block_t *)minix_i(inode)->u.i2_data; |
21 | } | 21 | } |
22 | 22 | ||
23 | #define DIRCOUNT 7 | ||
24 | #define INDIRCOUNT(sb) (1 << ((sb)->s_blocksize_bits - 2)) | ||
25 | |||
23 | static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) | 26 | static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) |
24 | { | 27 | { |
25 | int n = 0; | 28 | int n = 0; |
@@ -34,21 +37,21 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) | |||
34 | printk("MINIX-fs: block_to_path: " | 37 | printk("MINIX-fs: block_to_path: " |
35 | "block %ld too big on dev %s\n", | 38 | "block %ld too big on dev %s\n", |
36 | block, bdevname(sb->s_bdev, b)); | 39 | block, bdevname(sb->s_bdev, b)); |
37 | } else if (block < 7) { | 40 | } else if (block < DIRCOUNT) { |
38 | offsets[n++] = block; | 41 | offsets[n++] = block; |
39 | } else if ((block -= 7) < 256) { | 42 | } else if ((block -= DIRCOUNT) < INDIRCOUNT(sb)) { |
40 | offsets[n++] = 7; | 43 | offsets[n++] = DIRCOUNT; |
41 | offsets[n++] = block; | 44 | offsets[n++] = block; |
42 | } else if ((block -= 256) < 256*256) { | 45 | } else if ((block -= INDIRCOUNT(sb)) < INDIRCOUNT(sb) * INDIRCOUNT(sb)) { |
43 | offsets[n++] = 8; | 46 | offsets[n++] = DIRCOUNT + 1; |
44 | offsets[n++] = block>>8; | 47 | offsets[n++] = block / INDIRCOUNT(sb); |
45 | offsets[n++] = block & 255; | 48 | offsets[n++] = block % INDIRCOUNT(sb); |
46 | } else { | 49 | } else { |
47 | block -= 256*256; | 50 | block -= INDIRCOUNT(sb) * INDIRCOUNT(sb); |
48 | offsets[n++] = 9; | 51 | offsets[n++] = DIRCOUNT + 2; |
49 | offsets[n++] = block>>16; | 52 | offsets[n++] = (block / INDIRCOUNT(sb)) / INDIRCOUNT(sb); |
50 | offsets[n++] = (block>>8) & 255; | 53 | offsets[n++] = (block / INDIRCOUNT(sb)) % INDIRCOUNT(sb); |
51 | offsets[n++] = block & 255; | 54 | offsets[n++] = block % INDIRCOUNT(sb); |
52 | } | 55 | } |
53 | return n; | 56 | return n; |
54 | } | 57 | } |
diff --git a/fs/namei.c b/fs/namei.c index 48e1f60520ea..868d0cb9d473 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1621,6 +1621,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
1621 | case LAST_DOTDOT: | 1621 | case LAST_DOTDOT: |
1622 | follow_dotdot(nd); | 1622 | follow_dotdot(nd); |
1623 | dir = nd->path.dentry; | 1623 | dir = nd->path.dentry; |
1624 | case LAST_DOT: | ||
1624 | if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { | 1625 | if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { |
1625 | if (!dir->d_op->d_revalidate(dir, nd)) { | 1626 | if (!dir->d_op->d_revalidate(dir, nd)) { |
1626 | error = -ESTALE; | 1627 | error = -ESTALE; |
@@ -1628,7 +1629,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
1628 | } | 1629 | } |
1629 | } | 1630 | } |
1630 | /* fallthrough */ | 1631 | /* fallthrough */ |
1631 | case LAST_DOT: | ||
1632 | case LAST_ROOT: | 1632 | case LAST_ROOT: |
1633 | if (open_flag & O_CREAT) | 1633 | if (open_flag & O_CREAT) |
1634 | goto exit; | 1634 | goto exit; |
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 7edfcd4d5e52..9578cbe0cd58 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c | |||
@@ -49,9 +49,10 @@ extern int ncp_symlink(struct inode *, struct dentry *, const char *); | |||
49 | 49 | ||
50 | const struct file_operations ncp_dir_operations = | 50 | const struct file_operations ncp_dir_operations = |
51 | { | 51 | { |
52 | .llseek = generic_file_llseek, | ||
52 | .read = generic_read_dir, | 53 | .read = generic_read_dir, |
53 | .readdir = ncp_readdir, | 54 | .readdir = ncp_readdir, |
54 | .ioctl = ncp_ioctl, | 55 | .unlocked_ioctl = ncp_ioctl, |
55 | #ifdef CONFIG_COMPAT | 56 | #ifdef CONFIG_COMPAT |
56 | .compat_ioctl = ncp_compat_ioctl, | 57 | .compat_ioctl = ncp_compat_ioctl, |
57 | #endif | 58 | #endif |
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 1daabb90e0a5..3639cc5cbdae 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c | |||
@@ -22,7 +22,7 @@ | |||
22 | #include <linux/ncp_fs.h> | 22 | #include <linux/ncp_fs.h> |
23 | #include "ncplib_kernel.h" | 23 | #include "ncplib_kernel.h" |
24 | 24 | ||
25 | static int ncp_fsync(struct file *file, struct dentry *dentry, int datasync) | 25 | static int ncp_fsync(struct file *file, int datasync) |
26 | { | 26 | { |
27 | return 0; | 27 | return 0; |
28 | } | 28 | } |
@@ -295,7 +295,7 @@ const struct file_operations ncp_file_operations = | |||
295 | .llseek = ncp_remote_llseek, | 295 | .llseek = ncp_remote_llseek, |
296 | .read = ncp_file_read, | 296 | .read = ncp_file_read, |
297 | .write = ncp_file_write, | 297 | .write = ncp_file_write, |
298 | .ioctl = ncp_ioctl, | 298 | .unlocked_ioctl = ncp_ioctl, |
299 | #ifdef CONFIG_COMPAT | 299 | #ifdef CONFIG_COMPAT |
300 | .compat_ioctl = ncp_compat_ioctl, | 300 | .compat_ioctl = ncp_compat_ioctl, |
301 | #endif | 301 | #endif |
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 60a5e2864ea8..023c03d02070 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/smp_lock.h> | 20 | #include <linux/smp_lock.h> |
21 | #include <linux/vmalloc.h> | 21 | #include <linux/vmalloc.h> |
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/smp_lock.h> | ||
23 | 24 | ||
24 | #include <linux/ncp_fs.h> | 25 | #include <linux/ncp_fs.h> |
25 | 26 | ||
@@ -261,9 +262,9 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) | |||
261 | } | 262 | } |
262 | #endif /* CONFIG_NCPFS_NLS */ | 263 | #endif /* CONFIG_NCPFS_NLS */ |
263 | 264 | ||
264 | static int __ncp_ioctl(struct inode *inode, struct file *filp, | 265 | static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
265 | unsigned int cmd, unsigned long arg) | ||
266 | { | 266 | { |
267 | struct inode *inode = filp->f_dentry->d_inode; | ||
267 | struct ncp_server *server = NCP_SERVER(inode); | 268 | struct ncp_server *server = NCP_SERVER(inode); |
268 | int result; | 269 | int result; |
269 | struct ncp_ioctl_request request; | 270 | struct ncp_ioctl_request request; |
@@ -841,11 +842,11 @@ static int ncp_ioctl_need_write(unsigned int cmd) | |||
841 | } | 842 | } |
842 | } | 843 | } |
843 | 844 | ||
844 | int ncp_ioctl(struct inode *inode, struct file *filp, | 845 | long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
845 | unsigned int cmd, unsigned long arg) | ||
846 | { | 846 | { |
847 | int ret; | 847 | long ret; |
848 | 848 | ||
849 | lock_kernel(); | ||
849 | if (ncp_ioctl_need_write(cmd)) { | 850 | if (ncp_ioctl_need_write(cmd)) { |
850 | /* | 851 | /* |
851 | * inside the ioctl(), any failures which | 852 | * inside the ioctl(), any failures which |
@@ -853,24 +854,28 @@ int ncp_ioctl(struct inode *inode, struct file *filp, | |||
853 | * -EACCESS, so it seems consistent to keep | 854 | * -EACCESS, so it seems consistent to keep |
854 | * that here. | 855 | * that here. |
855 | */ | 856 | */ |
856 | if (mnt_want_write(filp->f_path.mnt)) | 857 | if (mnt_want_write(filp->f_path.mnt)) { |
857 | return -EACCES; | 858 | ret = -EACCES; |
859 | goto out; | ||
860 | } | ||
858 | } | 861 | } |
859 | ret = __ncp_ioctl(inode, filp, cmd, arg); | 862 | ret = __ncp_ioctl(filp, cmd, arg); |
860 | if (ncp_ioctl_need_write(cmd)) | 863 | if (ncp_ioctl_need_write(cmd)) |
861 | mnt_drop_write(filp->f_path.mnt); | 864 | mnt_drop_write(filp->f_path.mnt); |
865 | |||
866 | out: | ||
867 | unlock_kernel(); | ||
862 | return ret; | 868 | return ret; |
863 | } | 869 | } |
864 | 870 | ||
865 | #ifdef CONFIG_COMPAT | 871 | #ifdef CONFIG_COMPAT |
866 | long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 872 | long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
867 | { | 873 | { |
868 | struct inode *inode = file->f_path.dentry->d_inode; | 874 | long ret; |
869 | int ret; | ||
870 | 875 | ||
871 | lock_kernel(); | 876 | lock_kernel(); |
872 | arg = (unsigned long) compat_ptr(arg); | 877 | arg = (unsigned long) compat_ptr(arg); |
873 | ret = ncp_ioctl(inode, file, cmd, arg); | 878 | ret = ncp_ioctl(file, cmd, arg); |
874 | unlock_kernel(); | 879 | unlock_kernel(); |
875 | return ret; | 880 | return ret; |
876 | } | 881 | } |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ee9a179ebdf3..782b431ef91c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -53,7 +53,7 @@ static int nfs_link(struct dentry *, struct inode *, struct dentry *); | |||
53 | static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); | 53 | static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); |
54 | static int nfs_rename(struct inode *, struct dentry *, | 54 | static int nfs_rename(struct inode *, struct dentry *, |
55 | struct inode *, struct dentry *); | 55 | struct inode *, struct dentry *); |
56 | static int nfs_fsync_dir(struct file *, struct dentry *, int); | 56 | static int nfs_fsync_dir(struct file *, int); |
57 | static loff_t nfs_llseek_dir(struct file *, loff_t, int); | 57 | static loff_t nfs_llseek_dir(struct file *, loff_t, int); |
58 | 58 | ||
59 | const struct file_operations nfs_dir_operations = { | 59 | const struct file_operations nfs_dir_operations = { |
@@ -641,8 +641,10 @@ out: | |||
641 | * All directory operations under NFS are synchronous, so fsync() | 641 | * All directory operations under NFS are synchronous, so fsync() |
642 | * is a dummy operation. | 642 | * is a dummy operation. |
643 | */ | 643 | */ |
644 | static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) | 644 | static int nfs_fsync_dir(struct file *filp, int datasync) |
645 | { | 645 | { |
646 | struct dentry *dentry = filp->f_path.dentry; | ||
647 | |||
646 | dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", | 648 | dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", |
647 | dentry->d_parent->d_name.name, dentry->d_name.name, | 649 | dentry->d_parent->d_name.name, dentry->d_name.name, |
648 | datasync); | 650 | datasync); |
@@ -1741,6 +1743,7 @@ remove_lru_entry: | |||
1741 | clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); | 1743 | clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); |
1742 | smp_mb__after_clear_bit(); | 1744 | smp_mb__after_clear_bit(); |
1743 | } | 1745 | } |
1746 | spin_unlock(&inode->i_lock); | ||
1744 | } | 1747 | } |
1745 | spin_unlock(&nfs_access_lru_lock); | 1748 | spin_unlock(&nfs_access_lru_lock); |
1746 | nfs_access_free_list(&head); | 1749 | nfs_access_free_list(&head); |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index cac96bcc91e4..36a5e74f51b4 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -53,7 +53,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, | |||
53 | static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, | 53 | static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, |
54 | unsigned long nr_segs, loff_t pos); | 54 | unsigned long nr_segs, loff_t pos); |
55 | static int nfs_file_flush(struct file *, fl_owner_t id); | 55 | static int nfs_file_flush(struct file *, fl_owner_t id); |
56 | static int nfs_file_fsync(struct file *, struct dentry *dentry, int datasync); | 56 | static int nfs_file_fsync(struct file *, int datasync); |
57 | static int nfs_check_flags(int flags); | 57 | static int nfs_check_flags(int flags); |
58 | static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); | 58 | static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); |
59 | static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); | 59 | static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); |
@@ -322,8 +322,9 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) | |||
322 | * whether any write errors occurred for this process. | 322 | * whether any write errors occurred for this process. |
323 | */ | 323 | */ |
324 | static int | 324 | static int |
325 | nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) | 325 | nfs_file_fsync(struct file *file, int datasync) |
326 | { | 326 | { |
327 | struct dentry *dentry = file->f_path.dentry; | ||
327 | struct nfs_open_context *ctx = nfs_file_open_context(file); | 328 | struct nfs_open_context *ctx = nfs_file_open_context(file); |
328 | struct inode *inode = dentry->d_inode; | 329 | struct inode *inode = dentry->d_inode; |
329 | 330 | ||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2f8b1157daa2..04214fc5c304 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -1060,7 +1060,7 @@ static int nfs_parse_mount_options(char *raw, | |||
1060 | goto out_nomem; | 1060 | goto out_nomem; |
1061 | rc = strict_strtoul(string, 10, &option); | 1061 | rc = strict_strtoul(string, 10, &option); |
1062 | kfree(string); | 1062 | kfree(string); |
1063 | if (rc != 0 || option > USHORT_MAX) | 1063 | if (rc != 0 || option > USHRT_MAX) |
1064 | goto out_invalid_value; | 1064 | goto out_invalid_value; |
1065 | mnt->nfs_server.port = option; | 1065 | mnt->nfs_server.port = option; |
1066 | break; | 1066 | break; |
@@ -1181,7 +1181,7 @@ static int nfs_parse_mount_options(char *raw, | |||
1181 | goto out_nomem; | 1181 | goto out_nomem; |
1182 | rc = strict_strtoul(string, 10, &option); | 1182 | rc = strict_strtoul(string, 10, &option); |
1183 | kfree(string); | 1183 | kfree(string); |
1184 | if (rc != 0 || option > USHORT_MAX) | 1184 | if (rc != 0 || option > USHRT_MAX) |
1185 | goto out_invalid_value; | 1185 | goto out_invalid_value; |
1186 | mnt->mount_server.port = option; | 1186 | mnt->mount_server.port = option; |
1187 | break; | 1187 | break; |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3aea3ca98ab7..91679e2631ee 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1386,7 +1386,7 @@ static int nfs_commit_inode(struct inode *inode, int how) | |||
1386 | int res = 0; | 1386 | int res = 0; |
1387 | 1387 | ||
1388 | if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) | 1388 | if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) |
1389 | goto out; | 1389 | goto out_mark_dirty; |
1390 | spin_lock(&inode->i_lock); | 1390 | spin_lock(&inode->i_lock); |
1391 | res = nfs_scan_commit(inode, &head, 0, 0); | 1391 | res = nfs_scan_commit(inode, &head, 0, 0); |
1392 | spin_unlock(&inode->i_lock); | 1392 | spin_unlock(&inode->i_lock); |
@@ -1398,9 +1398,18 @@ static int nfs_commit_inode(struct inode *inode, int how) | |||
1398 | wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, | 1398 | wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, |
1399 | nfs_wait_bit_killable, | 1399 | nfs_wait_bit_killable, |
1400 | TASK_KILLABLE); | 1400 | TASK_KILLABLE); |
1401 | else | ||
1402 | goto out_mark_dirty; | ||
1401 | } else | 1403 | } else |
1402 | nfs_commit_clear_lock(NFS_I(inode)); | 1404 | nfs_commit_clear_lock(NFS_I(inode)); |
1403 | out: | 1405 | return res; |
1406 | /* Note: If we exit without ensuring that the commit is complete, | ||
1407 | * we must mark the inode as dirty. Otherwise, future calls to | ||
1408 | * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure | ||
1409 | * that the data is on the disk. | ||
1410 | */ | ||
1411 | out_mark_dirty: | ||
1412 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | ||
1404 | return res; | 1413 | return res; |
1405 | } | 1414 | } |
1406 | 1415 | ||
@@ -1509,14 +1518,17 @@ int nfs_wb_page(struct inode *inode, struct page *page) | |||
1509 | }; | 1518 | }; |
1510 | int ret; | 1519 | int ret; |
1511 | 1520 | ||
1512 | while(PagePrivate(page)) { | 1521 | for (;;) { |
1513 | wait_on_page_writeback(page); | 1522 | wait_on_page_writeback(page); |
1514 | if (clear_page_dirty_for_io(page)) { | 1523 | if (clear_page_dirty_for_io(page)) { |
1515 | ret = nfs_writepage_locked(page, &wbc); | 1524 | ret = nfs_writepage_locked(page, &wbc); |
1516 | if (ret < 0) | 1525 | if (ret < 0) |
1517 | goto out_error; | 1526 | goto out_error; |
1527 | continue; | ||
1518 | } | 1528 | } |
1519 | ret = sync_inode(inode, &wbc); | 1529 | if (!PagePrivate(page)) |
1530 | break; | ||
1531 | ret = nfs_commit_inode(inode, FLUSH_SYNC); | ||
1520 | if (ret < 0) | 1532 | if (ret < 0) |
1521 | goto out_error; | 1533 | goto out_error; |
1522 | } | 1534 | } |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index bc3194ea01f5..508941c23af7 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -998,7 +998,7 @@ static ssize_t __write_ports_addxprt(char *buf) | |||
998 | if (sscanf(buf, "%15s %4u", transport, &port) != 2) | 998 | if (sscanf(buf, "%15s %4u", transport, &port) != 2) |
999 | return -EINVAL; | 999 | return -EINVAL; |
1000 | 1000 | ||
1001 | if (port < 1 || port > USHORT_MAX) | 1001 | if (port < 1 || port > USHRT_MAX) |
1002 | return -EINVAL; | 1002 | return -EINVAL; |
1003 | 1003 | ||
1004 | err = nfsd_create_serv(); | 1004 | err = nfsd_create_serv(); |
@@ -1040,7 +1040,7 @@ static ssize_t __write_ports_delxprt(char *buf) | |||
1040 | if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2) | 1040 | if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2) |
1041 | return -EINVAL; | 1041 | return -EINVAL; |
1042 | 1042 | ||
1043 | if (port < 1 || port > USHORT_MAX || nfsd_serv == NULL) | 1043 | if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) |
1044 | return -EINVAL; | 1044 | return -EINVAL; |
1045 | 1045 | ||
1046 | xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); | 1046 | xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); |
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 30292df443ce..c9a30d7ff6fc 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c | |||
@@ -27,7 +27,7 @@ | |||
27 | #include "nilfs.h" | 27 | #include "nilfs.h" |
28 | #include "segment.h" | 28 | #include "segment.h" |
29 | 29 | ||
30 | int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | 30 | int nilfs_sync_file(struct file *file, int datasync) |
31 | { | 31 | { |
32 | /* | 32 | /* |
33 | * Called from fsync() system call | 33 | * Called from fsync() system call |
@@ -37,7 +37,7 @@ int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
37 | * This function should be implemented when the writeback function | 37 | * This function should be implemented when the writeback function |
38 | * will be implemented. | 38 | * will be implemented. |
39 | */ | 39 | */ |
40 | struct inode *inode = dentry->d_inode; | 40 | struct inode *inode = file->f_mapping->host; |
41 | int err; | 41 | int err; |
42 | 42 | ||
43 | if (!nilfs_inode_dirty(inode)) | 43 | if (!nilfs_inode_dirty(inode)) |
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 8723e5bfd071..47d6d7928122 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h | |||
@@ -228,7 +228,7 @@ extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, | |||
228 | struct page *, struct inode *); | 228 | struct page *, struct inode *); |
229 | 229 | ||
230 | /* file.c */ | 230 | /* file.c */ |
231 | extern int nilfs_sync_file(struct file *, struct dentry *, int); | 231 | extern int nilfs_sync_file(struct file *, int); |
232 | 232 | ||
233 | /* ioctl.c */ | 233 | /* ioctl.c */ |
234 | long nilfs_ioctl(struct file *, unsigned int, unsigned long); | 234 | long nilfs_ioctl(struct file *, unsigned int, unsigned long); |
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index fe44d3feee4a..0f48e7c5d9e1 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c | |||
@@ -1527,10 +1527,9 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp) | |||
1527 | * this problem for now. We do write the $BITMAP attribute if it is present | 1527 | * this problem for now. We do write the $BITMAP attribute if it is present |
1528 | * which is the important one for a directory so things are not too bad. | 1528 | * which is the important one for a directory so things are not too bad. |
1529 | */ | 1529 | */ |
1530 | static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry, | 1530 | static int ntfs_dir_fsync(struct file *filp, int datasync) |
1531 | int datasync) | ||
1532 | { | 1531 | { |
1533 | struct inode *bmp_vi, *vi = dentry->d_inode; | 1532 | struct inode *bmp_vi, *vi = filp->f_mapping->host; |
1534 | int err, ret; | 1533 | int err, ret; |
1535 | ntfs_attr na; | 1534 | ntfs_attr na; |
1536 | 1535 | ||
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 8804f093ba75..113ebd9f25a4 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
@@ -98,9 +98,6 @@ static int ntfs_file_open(struct inode *vi, struct file *filp) | |||
98 | * the page at all. For a more detailed explanation see ntfs_truncate() in | 98 | * the page at all. For a more detailed explanation see ntfs_truncate() in |
99 | * fs/ntfs/inode.c. | 99 | * fs/ntfs/inode.c. |
100 | * | 100 | * |
101 | * @cached_page and @lru_pvec are just optimizations for dealing with multiple | ||
102 | * pages. | ||
103 | * | ||
104 | * Return 0 on success and -errno on error. In the case that an error is | 101 | * Return 0 on success and -errno on error. In the case that an error is |
105 | * encountered it is possible that the initialized size will already have been | 102 | * encountered it is possible that the initialized size will already have been |
106 | * incremented some way towards @new_init_size but it is guaranteed that if | 103 | * incremented some way towards @new_init_size but it is guaranteed that if |
@@ -110,8 +107,7 @@ static int ntfs_file_open(struct inode *vi, struct file *filp) | |||
110 | * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be | 107 | * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be |
111 | * held by the caller. | 108 | * held by the caller. |
112 | */ | 109 | */ |
113 | static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size, | 110 | static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size) |
114 | struct page **cached_page, struct pagevec *lru_pvec) | ||
115 | { | 111 | { |
116 | s64 old_init_size; | 112 | s64 old_init_size; |
117 | loff_t old_i_size; | 113 | loff_t old_i_size; |
@@ -403,18 +399,13 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov, | |||
403 | * Obtain @nr_pages locked page cache pages from the mapping @mapping and | 399 | * Obtain @nr_pages locked page cache pages from the mapping @mapping and |
404 | * starting at index @index. | 400 | * starting at index @index. |
405 | * | 401 | * |
406 | * If a page is newly created, increment its refcount and add it to the | 402 | * If a page is newly created, add it to lru list |
407 | * caller's lru-buffering pagevec @lru_pvec. | ||
408 | * | ||
409 | * This is the same as mm/filemap.c::__grab_cache_page(), except that @nr_pages | ||
410 | * are obtained at once instead of just one page and that 0 is returned on | ||
411 | * success and -errno on error. | ||
412 | * | 403 | * |
413 | * Note, the page locks are obtained in ascending page index order. | 404 | * Note, the page locks are obtained in ascending page index order. |
414 | */ | 405 | */ |
415 | static inline int __ntfs_grab_cache_pages(struct address_space *mapping, | 406 | static inline int __ntfs_grab_cache_pages(struct address_space *mapping, |
416 | pgoff_t index, const unsigned nr_pages, struct page **pages, | 407 | pgoff_t index, const unsigned nr_pages, struct page **pages, |
417 | struct page **cached_page, struct pagevec *lru_pvec) | 408 | struct page **cached_page) |
418 | { | 409 | { |
419 | int err, nr; | 410 | int err, nr; |
420 | 411 | ||
@@ -430,7 +421,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, | |||
430 | goto err_out; | 421 | goto err_out; |
431 | } | 422 | } |
432 | } | 423 | } |
433 | err = add_to_page_cache(*cached_page, mapping, index, | 424 | err = add_to_page_cache_lru(*cached_page, mapping, index, |
434 | GFP_KERNEL); | 425 | GFP_KERNEL); |
435 | if (unlikely(err)) { | 426 | if (unlikely(err)) { |
436 | if (err == -EEXIST) | 427 | if (err == -EEXIST) |
@@ -438,9 +429,6 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, | |||
438 | goto err_out; | 429 | goto err_out; |
439 | } | 430 | } |
440 | pages[nr] = *cached_page; | 431 | pages[nr] = *cached_page; |
441 | page_cache_get(*cached_page); | ||
442 | if (unlikely(!pagevec_add(lru_pvec, *cached_page))) | ||
443 | __pagevec_lru_add_file(lru_pvec); | ||
444 | *cached_page = NULL; | 432 | *cached_page = NULL; |
445 | } | 433 | } |
446 | index++; | 434 | index++; |
@@ -1800,7 +1788,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
1800 | ssize_t status, written; | 1788 | ssize_t status, written; |
1801 | unsigned nr_pages; | 1789 | unsigned nr_pages; |
1802 | int err; | 1790 | int err; |
1803 | struct pagevec lru_pvec; | ||
1804 | 1791 | ||
1805 | ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " | 1792 | ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " |
1806 | "pos 0x%llx, count 0x%lx.", | 1793 | "pos 0x%llx, count 0x%lx.", |
@@ -1912,7 +1899,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
1912 | } | 1899 | } |
1913 | } | 1900 | } |
1914 | } | 1901 | } |
1915 | pagevec_init(&lru_pvec, 0); | ||
1916 | written = 0; | 1902 | written = 0; |
1917 | /* | 1903 | /* |
1918 | * If the write starts beyond the initialized size, extend it up to the | 1904 | * If the write starts beyond the initialized size, extend it up to the |
@@ -1925,8 +1911,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
1925 | ll = ni->initialized_size; | 1911 | ll = ni->initialized_size; |
1926 | read_unlock_irqrestore(&ni->size_lock, flags); | 1912 | read_unlock_irqrestore(&ni->size_lock, flags); |
1927 | if (pos > ll) { | 1913 | if (pos > ll) { |
1928 | err = ntfs_attr_extend_initialized(ni, pos, &cached_page, | 1914 | err = ntfs_attr_extend_initialized(ni, pos); |
1929 | &lru_pvec); | ||
1930 | if (err < 0) { | 1915 | if (err < 0) { |
1931 | ntfs_error(vol->sb, "Cannot perform write to inode " | 1916 | ntfs_error(vol->sb, "Cannot perform write to inode " |
1932 | "0x%lx, attribute type 0x%x, because " | 1917 | "0x%lx, attribute type 0x%x, because " |
@@ -2012,7 +1997,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
2012 | ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes); | 1997 | ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes); |
2013 | /* Get and lock @do_pages starting at index @start_idx. */ | 1998 | /* Get and lock @do_pages starting at index @start_idx. */ |
2014 | status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, | 1999 | status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, |
2015 | pages, &cached_page, &lru_pvec); | 2000 | pages, &cached_page); |
2016 | if (unlikely(status)) | 2001 | if (unlikely(status)) |
2017 | break; | 2002 | break; |
2018 | /* | 2003 | /* |
@@ -2077,7 +2062,6 @@ err_out: | |||
2077 | *ppos = pos; | 2062 | *ppos = pos; |
2078 | if (cached_page) | 2063 | if (cached_page) |
2079 | page_cache_release(cached_page); | 2064 | page_cache_release(cached_page); |
2080 | pagevec_lru_add_file(&lru_pvec); | ||
2081 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", | 2065 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", |
2082 | written ? "written" : "status", (unsigned long)written, | 2066 | written ? "written" : "status", (unsigned long)written, |
2083 | (long)status); | 2067 | (long)status); |
@@ -2149,7 +2133,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2149 | /** | 2133 | /** |
2150 | * ntfs_file_fsync - sync a file to disk | 2134 | * ntfs_file_fsync - sync a file to disk |
2151 | * @filp: file to be synced | 2135 | * @filp: file to be synced |
2152 | * @dentry: dentry describing the file to sync | ||
2153 | * @datasync: if non-zero only flush user data and not metadata | 2136 | * @datasync: if non-zero only flush user data and not metadata |
2154 | * | 2137 | * |
2155 | * Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync | 2138 | * Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync |
@@ -2165,19 +2148,15 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2165 | * Also, if @datasync is true, we do not wait on the inode to be written out | 2148 | * Also, if @datasync is true, we do not wait on the inode to be written out |
2166 | * but we always wait on the page cache pages to be written out. | 2149 | * but we always wait on the page cache pages to be written out. |
2167 | * | 2150 | * |
2168 | * Note: In the past @filp could be NULL so we ignore it as we don't need it | ||
2169 | * anyway. | ||
2170 | * | ||
2171 | * Locking: Caller must hold i_mutex on the inode. | 2151 | * Locking: Caller must hold i_mutex on the inode. |
2172 | * | 2152 | * |
2173 | * TODO: We should probably also write all attribute/index inodes associated | 2153 | * TODO: We should probably also write all attribute/index inodes associated |
2174 | * with this inode but since we have no simple way of getting to them we ignore | 2154 | * with this inode but since we have no simple way of getting to them we ignore |
2175 | * this problem for now. | 2155 | * this problem for now. |
2176 | */ | 2156 | */ |
2177 | static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, | 2157 | static int ntfs_file_fsync(struct file *filp, int datasync) |
2178 | int datasync) | ||
2179 | { | 2158 | { |
2180 | struct inode *vi = dentry->d_inode; | 2159 | struct inode *vi = filp->f_mapping->host; |
2181 | int err, ret = 0; | 2160 | int err, ret = 0; |
2182 | 2161 | ||
2183 | ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); | 2162 | ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); |
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index b7428c5d0d3b..ec6d12339593 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c | |||
@@ -403,7 +403,7 @@ void ocfs2_block_check_compute(void *data, size_t blocksize, | |||
403 | * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no | 403 | * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no |
404 | * larger than 16 bits. | 404 | * larger than 16 bits. |
405 | */ | 405 | */ |
406 | BUG_ON(ecc > USHORT_MAX); | 406 | BUG_ON(ecc > USHRT_MAX); |
407 | 407 | ||
408 | bc->bc_crc32e = cpu_to_le32(crc); | 408 | bc->bc_crc32e = cpu_to_le32(crc); |
409 | bc->bc_ecc = cpu_to_le16((u16)ecc); | 409 | bc->bc_ecc = cpu_to_le16((u16)ecc); |
@@ -508,7 +508,7 @@ void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr, | |||
508 | * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no | 508 | * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no |
509 | * larger than 16 bits. | 509 | * larger than 16 bits. |
510 | */ | 510 | */ |
511 | BUG_ON(ecc > USHORT_MAX); | 511 | BUG_ON(ecc > USHRT_MAX); |
512 | 512 | ||
513 | bc->bc_crc32e = cpu_to_le32(crc); | 513 | bc->bc_crc32e = cpu_to_le32(crc); |
514 | bc->bc_ecc = cpu_to_le16((u16)ecc); | 514 | bc->bc_ecc = cpu_to_le16((u16)ecc); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 97e54b9e654b..6a13ea64c447 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -175,13 +175,12 @@ static int ocfs2_dir_release(struct inode *inode, struct file *file) | |||
175 | return 0; | 175 | return 0; |
176 | } | 176 | } |
177 | 177 | ||
178 | static int ocfs2_sync_file(struct file *file, | 178 | static int ocfs2_sync_file(struct file *file, int datasync) |
179 | struct dentry *dentry, | ||
180 | int datasync) | ||
181 | { | 179 | { |
182 | int err = 0; | 180 | int err = 0; |
183 | journal_t *journal; | 181 | journal_t *journal; |
184 | struct inode *inode = dentry->d_inode; | 182 | struct dentry *dentry = file->f_path.dentry; |
183 | struct inode *inode = file->f_mapping->host; | ||
185 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 184 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
186 | 185 | ||
187 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, | 186 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, |
@@ -1053,7 +1052,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1053 | } | 1052 | } |
1054 | 1053 | ||
1055 | /* | 1054 | /* |
1056 | * This will intentionally not wind up calling vmtruncate(), | 1055 | * This will intentionally not wind up calling simple_setsize(), |
1057 | * since all the work for a size change has been done above. | 1056 | * since all the work for a size change has been done above. |
1058 | * Otherwise, we could get into problems with truncate as | 1057 | * Otherwise, we could get into problems with truncate as |
1059 | * ip_alloc_sem is used there to protect against i_size | 1058 | * ip_alloc_sem is used there to protect against i_size |
@@ -2119,9 +2118,13 @@ relock: | |||
2119 | * direct write may have instantiated a few | 2118 | * direct write may have instantiated a few |
2120 | * blocks outside i_size. Trim these off again. | 2119 | * blocks outside i_size. Trim these off again. |
2121 | * Don't need i_size_read because we hold i_mutex. | 2120 | * Don't need i_size_read because we hold i_mutex. |
2121 | * | ||
2122 | * XXX(hch): this looks buggy because ocfs2 did not | ||
2123 | * actually implement ->truncate. Take a look at | ||
2124 | * the new truncate sequence and update this accordingly | ||
2122 | */ | 2125 | */ |
2123 | if (*ppos + count > inode->i_size) | 2126 | if (*ppos + count > inode->i_size) |
2124 | vmtruncate(inode, inode->i_size); | 2127 | simple_setsize(inode, inode->i_size); |
2125 | ret = written; | 2128 | ret = written; |
2126 | goto out_dio; | 2129 | goto out_dio; |
2127 | } | 2130 | } |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 2c26ce251cb3..0eaa929a4dbf 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -879,13 +879,15 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend) | |||
879 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) | 879 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) |
880 | continue; | 880 | continue; |
881 | if (unsuspend) | 881 | if (unsuspend) |
882 | status = vfs_quota_enable( | 882 | status = dquot_resume(sb, type); |
883 | sb_dqopt(sb)->files[type], | 883 | else { |
884 | type, QFMT_OCFS2, | 884 | struct ocfs2_mem_dqinfo *oinfo; |
885 | DQUOT_SUSPENDED); | 885 | |
886 | else | 886 | /* Cancel periodic syncing before suspending */ |
887 | status = vfs_quota_disable(sb, type, | 887 | oinfo = sb_dqinfo(sb, type)->dqi_priv; |
888 | DQUOT_SUSPENDED); | 888 | cancel_delayed_work_sync(&oinfo->dqi_sync_work); |
889 | status = dquot_suspend(sb, type); | ||
890 | } | ||
889 | if (status < 0) | 891 | if (status < 0) |
890 | break; | 892 | break; |
891 | } | 893 | } |
@@ -916,8 +918,8 @@ static int ocfs2_enable_quotas(struct ocfs2_super *osb) | |||
916 | status = -ENOENT; | 918 | status = -ENOENT; |
917 | goto out_quota_off; | 919 | goto out_quota_off; |
918 | } | 920 | } |
919 | status = vfs_quota_enable(inode[type], type, QFMT_OCFS2, | 921 | status = dquot_enable(inode[type], type, QFMT_OCFS2, |
920 | DQUOT_USAGE_ENABLED); | 922 | DQUOT_USAGE_ENABLED); |
921 | if (status < 0) | 923 | if (status < 0) |
922 | goto out_quota_off; | 924 | goto out_quota_off; |
923 | } | 925 | } |
@@ -952,8 +954,8 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) | |||
952 | /* Turn off quotas. This will remove all dquot structures from | 954 | /* Turn off quotas. This will remove all dquot structures from |
953 | * memory and so they will be automatically synced to global | 955 | * memory and so they will be automatically synced to global |
954 | * quota files */ | 956 | * quota files */ |
955 | vfs_quota_disable(sb, type, DQUOT_USAGE_ENABLED | | 957 | dquot_disable(sb, type, DQUOT_USAGE_ENABLED | |
956 | DQUOT_LIMITS_ENABLED); | 958 | DQUOT_LIMITS_ENABLED); |
957 | if (!inode) | 959 | if (!inode) |
958 | continue; | 960 | continue; |
959 | iput(inode); | 961 | iput(inode); |
@@ -962,7 +964,7 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) | |||
962 | 964 | ||
963 | /* Handle quota on quotactl */ | 965 | /* Handle quota on quotactl */ |
964 | static int ocfs2_quota_on(struct super_block *sb, int type, int format_id, | 966 | static int ocfs2_quota_on(struct super_block *sb, int type, int format_id, |
965 | char *path, int remount) | 967 | char *path) |
966 | { | 968 | { |
967 | unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, | 969 | unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, |
968 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; | 970 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; |
@@ -970,30 +972,24 @@ static int ocfs2_quota_on(struct super_block *sb, int type, int format_id, | |||
970 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) | 972 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) |
971 | return -EINVAL; | 973 | return -EINVAL; |
972 | 974 | ||
973 | if (remount) | 975 | return dquot_enable(sb_dqopt(sb)->files[type], type, |
974 | return 0; /* Just ignore it has been handled in | 976 | format_id, DQUOT_LIMITS_ENABLED); |
975 | * ocfs2_remount() */ | ||
976 | return vfs_quota_enable(sb_dqopt(sb)->files[type], type, | ||
977 | format_id, DQUOT_LIMITS_ENABLED); | ||
978 | } | 977 | } |
979 | 978 | ||
980 | /* Handle quota off quotactl */ | 979 | /* Handle quota off quotactl */ |
981 | static int ocfs2_quota_off(struct super_block *sb, int type, int remount) | 980 | static int ocfs2_quota_off(struct super_block *sb, int type) |
982 | { | 981 | { |
983 | if (remount) | 982 | return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); |
984 | return 0; /* Ignore now and handle later in | ||
985 | * ocfs2_remount() */ | ||
986 | return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED); | ||
987 | } | 983 | } |
988 | 984 | ||
989 | static const struct quotactl_ops ocfs2_quotactl_ops = { | 985 | static const struct quotactl_ops ocfs2_quotactl_ops = { |
990 | .quota_on = ocfs2_quota_on, | 986 | .quota_on = ocfs2_quota_on, |
991 | .quota_off = ocfs2_quota_off, | 987 | .quota_off = ocfs2_quota_off, |
992 | .quota_sync = vfs_quota_sync, | 988 | .quota_sync = dquot_quota_sync, |
993 | .get_info = vfs_get_dqinfo, | 989 | .get_info = dquot_get_dqinfo, |
994 | .set_info = vfs_set_dqinfo, | 990 | .set_info = dquot_set_dqinfo, |
995 | .get_dqblk = vfs_get_dqblk, | 991 | .get_dqblk = dquot_get_dqblk, |
996 | .set_dqblk = vfs_set_dqblk, | 992 | .set_dqblk = dquot_set_dqblk, |
997 | }; | 993 | }; |
998 | 994 | ||
999 | static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | 995 | static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) |
diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 399487c09364..6e7a3291bbe8 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c | |||
@@ -329,7 +329,7 @@ const struct file_operations omfs_file_operations = { | |||
329 | .aio_read = generic_file_aio_read, | 329 | .aio_read = generic_file_aio_read, |
330 | .aio_write = generic_file_aio_write, | 330 | .aio_write = generic_file_aio_write, |
331 | .mmap = generic_file_mmap, | 331 | .mmap = generic_file_mmap, |
332 | .fsync = simple_fsync, | 332 | .fsync = generic_file_fsync, |
333 | .splice_read = generic_file_splice_read, | 333 | .splice_read = generic_file_splice_read, |
334 | }; | 334 | }; |
335 | 335 | ||
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 3ceca05b668c..648c9d8f3357 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/pagemap.h> | 27 | #include <linux/pagemap.h> |
28 | #include <linux/stringify.h> | 28 | #include <linux/stringify.h> |
29 | #include <linux/kernel.h> | ||
29 | #include "ldm.h" | 30 | #include "ldm.h" |
30 | #include "check.h" | 31 | #include "check.h" |
31 | #include "msdos.h" | 32 | #include "msdos.h" |
@@ -77,17 +78,16 @@ static int ldm_parse_hexbyte (const u8 *src) | |||
77 | int h; | 78 | int h; |
78 | 79 | ||
79 | /* high part */ | 80 | /* high part */ |
80 | if ((x = src[0] - '0') <= '9'-'0') h = x; | 81 | x = h = hex_to_bin(src[0]); |
81 | else if ((x = src[0] - 'a') <= 'f'-'a') h = x+10; | 82 | if (h < 0) |
82 | else if ((x = src[0] - 'A') <= 'F'-'A') h = x+10; | 83 | return -1; |
83 | else return -1; | ||
84 | h <<= 4; | ||
85 | 84 | ||
86 | /* low part */ | 85 | /* low part */ |
87 | if ((x = src[1] - '0') <= '9'-'0') return h | x; | 86 | h = hex_to_bin(src[1]); |
88 | if ((x = src[1] - 'a') <= 'f'-'a') return h | (x+10); | 87 | if (h < 0) |
89 | if ((x = src[1] - 'A') <= 'F'-'A') return h | (x+10); | 88 | return -1; |
90 | return -1; | 89 | |
90 | return (x << 4) + h; | ||
91 | } | 91 | } |
92 | 92 | ||
93 | /** | 93 | /** |
@@ -230,6 +230,7 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe, | |||
230 | 230 | ||
231 | return kmap(buf->page); | 231 | return kmap(buf->page); |
232 | } | 232 | } |
233 | EXPORT_SYMBOL(generic_pipe_buf_map); | ||
233 | 234 | ||
234 | /** | 235 | /** |
235 | * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer | 236 | * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer |
@@ -249,6 +250,7 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, | |||
249 | } else | 250 | } else |
250 | kunmap(buf->page); | 251 | kunmap(buf->page); |
251 | } | 252 | } |
253 | EXPORT_SYMBOL(generic_pipe_buf_unmap); | ||
252 | 254 | ||
253 | /** | 255 | /** |
254 | * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer | 256 | * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer |
@@ -279,6 +281,7 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe, | |||
279 | 281 | ||
280 | return 1; | 282 | return 1; |
281 | } | 283 | } |
284 | EXPORT_SYMBOL(generic_pipe_buf_steal); | ||
282 | 285 | ||
283 | /** | 286 | /** |
284 | * generic_pipe_buf_get - get a reference to a &struct pipe_buffer | 287 | * generic_pipe_buf_get - get a reference to a &struct pipe_buffer |
@@ -294,6 +297,7 @@ void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) | |||
294 | { | 297 | { |
295 | page_cache_get(buf->page); | 298 | page_cache_get(buf->page); |
296 | } | 299 | } |
300 | EXPORT_SYMBOL(generic_pipe_buf_get); | ||
297 | 301 | ||
298 | /** | 302 | /** |
299 | * generic_pipe_buf_confirm - verify contents of the pipe buffer | 303 | * generic_pipe_buf_confirm - verify contents of the pipe buffer |
@@ -309,6 +313,7 @@ int generic_pipe_buf_confirm(struct pipe_inode_info *info, | |||
309 | { | 313 | { |
310 | return 0; | 314 | return 0; |
311 | } | 315 | } |
316 | EXPORT_SYMBOL(generic_pipe_buf_confirm); | ||
312 | 317 | ||
313 | /** | 318 | /** |
314 | * generic_pipe_buf_release - put a reference to a &struct pipe_buffer | 319 | * generic_pipe_buf_release - put a reference to a &struct pipe_buffer |
@@ -323,6 +328,7 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe, | |||
323 | { | 328 | { |
324 | page_cache_release(buf->page); | 329 | page_cache_release(buf->page); |
325 | } | 330 | } |
331 | EXPORT_SYMBOL(generic_pipe_buf_release); | ||
326 | 332 | ||
327 | static const struct pipe_buf_operations anon_pipe_buf_ops = { | 333 | static const struct pipe_buf_operations anon_pipe_buf_ops = { |
328 | .can_merge = 1, | 334 | .can_merge = 1, |
@@ -1172,16 +1178,20 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | |||
1172 | nr_pages = (arg + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1178 | nr_pages = (arg + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1173 | nr_pages = roundup_pow_of_two(nr_pages); | 1179 | nr_pages = roundup_pow_of_two(nr_pages); |
1174 | 1180 | ||
1175 | if (!capable(CAP_SYS_ADMIN) && nr_pages > pipe_max_pages) | 1181 | if (!capable(CAP_SYS_ADMIN) && nr_pages > pipe_max_pages) { |
1176 | return -EPERM; | 1182 | ret = -EPERM; |
1183 | goto out; | ||
1184 | } | ||
1177 | 1185 | ||
1178 | /* | 1186 | /* |
1179 | * The pipe needs to be at least 2 pages large to | 1187 | * The pipe needs to be at least 2 pages large to |
1180 | * guarantee POSIX behaviour. | 1188 | * guarantee POSIX behaviour. |
1181 | */ | 1189 | */ |
1182 | if (nr_pages < 2) | 1190 | if (arg < 2) { |
1183 | return -EINVAL; | 1191 | ret = -EINVAL; |
1184 | ret = pipe_set_size(pipe, nr_pages); | 1192 | goto out; |
1193 | } | ||
1194 | ret = pipe_set_size(pipe, arg); | ||
1185 | break; | 1195 | break; |
1186 | } | 1196 | } |
1187 | case F_GETPIPE_SZ: | 1197 | case F_GETPIPE_SZ: |
@@ -1192,6 +1202,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | |||
1192 | break; | 1202 | break; |
1193 | } | 1203 | } |
1194 | 1204 | ||
1205 | out: | ||
1195 | mutex_unlock(&pipe->inode->i_mutex); | 1206 | mutex_unlock(&pipe->inode->i_mutex); |
1196 | return ret; | 1207 | return ret; |
1197 | } | 1208 | } |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 885ab5513ac5..9b58d38bc911 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -267,7 +267,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) | |||
267 | shpending = p->signal->shared_pending.signal; | 267 | shpending = p->signal->shared_pending.signal; |
268 | blocked = p->blocked; | 268 | blocked = p->blocked; |
269 | collect_sigign_sigcatch(p, &ignored, &caught); | 269 | collect_sigign_sigcatch(p, &ignored, &caught); |
270 | num_threads = atomic_read(&p->signal->count); | 270 | num_threads = get_nr_threads(p); |
271 | rcu_read_lock(); /* FIXME: is this correct? */ | 271 | rcu_read_lock(); /* FIXME: is this correct? */ |
272 | qsize = atomic_read(&__task_cred(p)->user->sigpending); | 272 | qsize = atomic_read(&__task_cred(p)->user->sigpending); |
273 | rcu_read_unlock(); | 273 | rcu_read_unlock(); |
@@ -410,7 +410,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
410 | tty_nr = new_encode_dev(tty_devnum(sig->tty)); | 410 | tty_nr = new_encode_dev(tty_devnum(sig->tty)); |
411 | } | 411 | } |
412 | 412 | ||
413 | num_threads = atomic_read(&sig->count); | 413 | num_threads = get_nr_threads(task); |
414 | collect_sigign_sigcatch(task, &sigign, &sigcatch); | 414 | collect_sigign_sigcatch(task, &sigign, &sigcatch); |
415 | 415 | ||
416 | cmin_flt = sig->cmin_flt; | 416 | cmin_flt = sig->cmin_flt; |
diff --git a/fs/proc/base.c b/fs/proc/base.c index c7f9f23449dc..acb7ef80ea4f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -166,18 +166,6 @@ static int get_fs_path(struct task_struct *task, struct path *path, bool root) | |||
166 | return result; | 166 | return result; |
167 | } | 167 | } |
168 | 168 | ||
169 | static int get_nr_threads(struct task_struct *tsk) | ||
170 | { | ||
171 | unsigned long flags; | ||
172 | int count = 0; | ||
173 | |||
174 | if (lock_task_sighand(tsk, &flags)) { | ||
175 | count = atomic_read(&tsk->signal->count); | ||
176 | unlock_task_sighand(tsk, &flags); | ||
177 | } | ||
178 | return count; | ||
179 | } | ||
180 | |||
181 | static int proc_cwd_link(struct inode *inode, struct path *path) | 169 | static int proc_cwd_link(struct inode *inode, struct path *path) |
182 | { | 170 | { |
183 | struct task_struct *task = get_proc_task(inode); | 171 | struct task_struct *task = get_proc_task(inode); |
@@ -2444,7 +2432,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir, | |||
2444 | const struct pid_entry *p = ptr; | 2432 | const struct pid_entry *p = ptr; |
2445 | struct inode *inode; | 2433 | struct inode *inode; |
2446 | struct proc_inode *ei; | 2434 | struct proc_inode *ei; |
2447 | struct dentry *error = ERR_PTR(-EINVAL); | 2435 | struct dentry *error; |
2448 | 2436 | ||
2449 | /* Allocate the inode */ | 2437 | /* Allocate the inode */ |
2450 | error = ERR_PTR(-ENOMEM); | 2438 | error = ERR_PTR(-ENOMEM); |
@@ -2794,7 +2782,7 @@ out: | |||
2794 | 2782 | ||
2795 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | 2783 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
2796 | { | 2784 | { |
2797 | struct dentry *result = ERR_PTR(-ENOENT); | 2785 | struct dentry *result; |
2798 | struct task_struct *task; | 2786 | struct task_struct *task; |
2799 | unsigned tgid; | 2787 | unsigned tgid; |
2800 | struct pid_namespace *ns; | 2788 | struct pid_namespace *ns; |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 43c127490606..2791907744ed 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -343,21 +343,6 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ | |||
343 | /* | 343 | /* |
344 | * Return an inode number between PROC_DYNAMIC_FIRST and | 344 | * Return an inode number between PROC_DYNAMIC_FIRST and |
345 | * 0xffffffff, or zero on failure. | 345 | * 0xffffffff, or zero on failure. |
346 | * | ||
347 | * Current inode allocations in the proc-fs (hex-numbers): | ||
348 | * | ||
349 | * 00000000 reserved | ||
350 | * 00000001-00000fff static entries (goners) | ||
351 | * 001 root-ino | ||
352 | * | ||
353 | * 00001000-00001fff unused | ||
354 | * 0001xxxx-7fffxxxx pid-dir entries for pid 1-7fff | ||
355 | * 80000000-efffffff unused | ||
356 | * f0000000-ffffffff dynamic entries | ||
357 | * | ||
358 | * Goal: | ||
359 | * Once we split the thing into several virtual filesystems, | ||
360 | * we will get rid of magical ranges (and this comment, BTW). | ||
361 | */ | 346 | */ |
362 | static unsigned int get_inode_number(void) | 347 | static unsigned int get_inode_number(void) |
363 | { | 348 | { |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index c837a77351be..6f37c391468d 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -588,7 +588,7 @@ static struct kcore_list kcore_text; | |||
588 | */ | 588 | */ |
589 | static void __init proc_kcore_text_init(void) | 589 | static void __init proc_kcore_text_init(void) |
590 | { | 590 | { |
591 | kclist_add(&kcore_text, _stext, _end - _stext, KCORE_TEXT); | 591 | kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT); |
592 | } | 592 | } |
593 | #else | 593 | #else |
594 | static void __init proc_kcore_text_init(void) | 594 | static void __init proc_kcore_text_init(void) |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 757c069f2a65..4258384ed22d 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -110,7 +110,6 @@ void __init proc_root_init(void) | |||
110 | if (err) | 110 | if (err) |
111 | return; | 111 | return; |
112 | proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); | 112 | proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); |
113 | err = PTR_ERR(proc_mnt); | ||
114 | if (IS_ERR(proc_mnt)) { | 113 | if (IS_ERR(proc_mnt)) { |
115 | unregister_filesystem(&proc_fs_type); | 114 | unregister_filesystem(&proc_fs_type); |
116 | return; | 115 | return; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 47f5b145f56e..aea1d3f1ffb5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -634,6 +634,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
634 | return err; | 634 | return err; |
635 | } | 635 | } |
636 | 636 | ||
637 | #ifdef CONFIG_HUGETLB_PAGE | ||
637 | static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) | 638 | static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) |
638 | { | 639 | { |
639 | u64 pme = 0; | 640 | u64 pme = 0; |
@@ -664,6 +665,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
664 | 665 | ||
665 | return err; | 666 | return err; |
666 | } | 667 | } |
668 | #endif /* HUGETLB_PAGE */ | ||
667 | 669 | ||
668 | /* | 670 | /* |
669 | * /proc/pid/pagemap - an array mapping virtual pages to pfns | 671 | * /proc/pid/pagemap - an array mapping virtual pages to pfns |
@@ -733,7 +735,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
733 | 735 | ||
734 | pagemap_walk.pmd_entry = pagemap_pte_range; | 736 | pagemap_walk.pmd_entry = pagemap_pte_range; |
735 | pagemap_walk.pte_hole = pagemap_pte_hole; | 737 | pagemap_walk.pte_hole = pagemap_pte_hole; |
738 | #ifdef CONFIG_HUGETLB_PAGE | ||
736 | pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; | 739 | pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; |
740 | #endif | ||
737 | pagemap_walk.mm = mm; | 741 | pagemap_walk.mm = mm; |
738 | pagemap_walk.private = ± | 742 | pagemap_walk.private = ± |
739 | 743 | ||
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index 6f30c3d5bcbf..6e8fc62b40a8 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c | |||
@@ -77,9 +77,10 @@ out: | |||
77 | 77 | ||
78 | const struct file_operations qnx4_dir_operations = | 78 | const struct file_operations qnx4_dir_operations = |
79 | { | 79 | { |
80 | .llseek = generic_file_llseek, | ||
80 | .read = generic_read_dir, | 81 | .read = generic_read_dir, |
81 | .readdir = qnx4_readdir, | 82 | .readdir = qnx4_readdir, |
82 | .fsync = simple_fsync, | 83 | .fsync = generic_file_fsync, |
83 | }; | 84 | }; |
84 | 85 | ||
85 | const struct inode_operations qnx4_dir_inode_operations = | 86 | const struct inode_operations qnx4_dir_inode_operations = |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 655a4c52b8c3..12c233da1b6b 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -228,10 +228,6 @@ static struct hlist_head *dquot_hash; | |||
228 | 228 | ||
229 | struct dqstats dqstats; | 229 | struct dqstats dqstats; |
230 | EXPORT_SYMBOL(dqstats); | 230 | EXPORT_SYMBOL(dqstats); |
231 | #ifdef CONFIG_SMP | ||
232 | struct dqstats *dqstats_pcpu; | ||
233 | EXPORT_SYMBOL(dqstats_pcpu); | ||
234 | #endif | ||
235 | 231 | ||
236 | static qsize_t inode_get_rsv_space(struct inode *inode); | 232 | static qsize_t inode_get_rsv_space(struct inode *inode); |
237 | static void __dquot_initialize(struct inode *inode, int type); | 233 | static void __dquot_initialize(struct inode *inode, int type); |
@@ -584,7 +580,7 @@ out: | |||
584 | } | 580 | } |
585 | EXPORT_SYMBOL(dquot_scan_active); | 581 | EXPORT_SYMBOL(dquot_scan_active); |
586 | 582 | ||
587 | int vfs_quota_sync(struct super_block *sb, int type, int wait) | 583 | int dquot_quota_sync(struct super_block *sb, int type, int wait) |
588 | { | 584 | { |
589 | struct list_head *dirty; | 585 | struct list_head *dirty; |
590 | struct dquot *dquot; | 586 | struct dquot *dquot; |
@@ -656,7 +652,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait) | |||
656 | 652 | ||
657 | return 0; | 653 | return 0; |
658 | } | 654 | } |
659 | EXPORT_SYMBOL(vfs_quota_sync); | 655 | EXPORT_SYMBOL(dquot_quota_sync); |
660 | 656 | ||
661 | /* Free unused dquots from cache */ | 657 | /* Free unused dquots from cache */ |
662 | static void prune_dqcache(int count) | 658 | static void prune_dqcache(int count) |
@@ -676,27 +672,10 @@ static void prune_dqcache(int count) | |||
676 | } | 672 | } |
677 | } | 673 | } |
678 | 674 | ||
679 | static int dqstats_read(unsigned int type) | ||
680 | { | ||
681 | int count = 0; | ||
682 | #ifdef CONFIG_SMP | ||
683 | int cpu; | ||
684 | for_each_possible_cpu(cpu) | ||
685 | count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type]; | ||
686 | /* Statistics reading is racy, but absolute accuracy isn't required */ | ||
687 | if (count < 0) | ||
688 | count = 0; | ||
689 | #else | ||
690 | count = dqstats.stat[type]; | ||
691 | #endif | ||
692 | return count; | ||
693 | } | ||
694 | |||
695 | /* | 675 | /* |
696 | * This is called from kswapd when we think we need some | 676 | * This is called from kswapd when we think we need some |
697 | * more memory | 677 | * more memory |
698 | */ | 678 | */ |
699 | |||
700 | static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) | 679 | static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) |
701 | { | 680 | { |
702 | if (nr) { | 681 | if (nr) { |
@@ -704,7 +683,9 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) | |||
704 | prune_dqcache(nr); | 683 | prune_dqcache(nr); |
705 | spin_unlock(&dq_list_lock); | 684 | spin_unlock(&dq_list_lock); |
706 | } | 685 | } |
707 | return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure; | 686 | return ((unsigned) |
687 | percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS]) | ||
688 | /100) * sysctl_vfs_cache_pressure; | ||
708 | } | 689 | } |
709 | 690 | ||
710 | static struct shrinker dqcache_shrinker = { | 691 | static struct shrinker dqcache_shrinker = { |
@@ -1514,11 +1495,13 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve) | |||
1514 | /* | 1495 | /* |
1515 | * This operation can block, but only after everything is updated | 1496 | * This operation can block, but only after everything is updated |
1516 | */ | 1497 | */ |
1517 | int __dquot_alloc_space(struct inode *inode, qsize_t number, | 1498 | int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) |
1518 | int warn, int reserve) | ||
1519 | { | 1499 | { |
1520 | int cnt, ret = 0; | 1500 | int cnt, ret = 0; |
1521 | char warntype[MAXQUOTAS]; | 1501 | char warntype[MAXQUOTAS]; |
1502 | int warn = flags & DQUOT_SPACE_WARN; | ||
1503 | int reserve = flags & DQUOT_SPACE_RESERVE; | ||
1504 | int nofail = flags & DQUOT_SPACE_NOFAIL; | ||
1522 | 1505 | ||
1523 | /* | 1506 | /* |
1524 | * First test before acquiring mutex - solves deadlocks when we | 1507 | * First test before acquiring mutex - solves deadlocks when we |
@@ -1539,7 +1522,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, | |||
1539 | continue; | 1522 | continue; |
1540 | ret = check_bdq(inode->i_dquot[cnt], number, !warn, | 1523 | ret = check_bdq(inode->i_dquot[cnt], number, !warn, |
1541 | warntype+cnt); | 1524 | warntype+cnt); |
1542 | if (ret) { | 1525 | if (ret && !nofail) { |
1543 | spin_unlock(&dq_data_lock); | 1526 | spin_unlock(&dq_data_lock); |
1544 | goto out_flush_warn; | 1527 | goto out_flush_warn; |
1545 | } | 1528 | } |
@@ -1638,10 +1621,11 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty); | |||
1638 | /* | 1621 | /* |
1639 | * This operation can block, but only after everything is updated | 1622 | * This operation can block, but only after everything is updated |
1640 | */ | 1623 | */ |
1641 | void __dquot_free_space(struct inode *inode, qsize_t number, int reserve) | 1624 | void __dquot_free_space(struct inode *inode, qsize_t number, int flags) |
1642 | { | 1625 | { |
1643 | unsigned int cnt; | 1626 | unsigned int cnt; |
1644 | char warntype[MAXQUOTAS]; | 1627 | char warntype[MAXQUOTAS]; |
1628 | int reserve = flags & DQUOT_SPACE_RESERVE; | ||
1645 | 1629 | ||
1646 | /* First test before acquiring mutex - solves deadlocks when we | 1630 | /* First test before acquiring mutex - solves deadlocks when we |
1647 | * re-enter the quota code and are already holding the mutex */ | 1631 | * re-enter the quota code and are already holding the mutex */ |
@@ -1812,7 +1796,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) | |||
1812 | if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) | 1796 | if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) |
1813 | transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA); | 1797 | transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA); |
1814 | if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) | 1798 | if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) |
1815 | transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_uid, GRPQUOTA); | 1799 | transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_gid, GRPQUOTA); |
1816 | 1800 | ||
1817 | ret = __dquot_transfer(inode, transfer_to); | 1801 | ret = __dquot_transfer(inode, transfer_to); |
1818 | dqput_all(transfer_to); | 1802 | dqput_all(transfer_to); |
@@ -1847,6 +1831,7 @@ const struct dquot_operations dquot_operations = { | |||
1847 | .alloc_dquot = dquot_alloc, | 1831 | .alloc_dquot = dquot_alloc, |
1848 | .destroy_dquot = dquot_destroy, | 1832 | .destroy_dquot = dquot_destroy, |
1849 | }; | 1833 | }; |
1834 | EXPORT_SYMBOL(dquot_operations); | ||
1850 | 1835 | ||
1851 | /* | 1836 | /* |
1852 | * Generic helper for ->open on filesystems supporting disk quotas. | 1837 | * Generic helper for ->open on filesystems supporting disk quotas. |
@@ -1865,7 +1850,7 @@ EXPORT_SYMBOL(dquot_file_open); | |||
1865 | /* | 1850 | /* |
1866 | * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) | 1851 | * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) |
1867 | */ | 1852 | */ |
1868 | int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags) | 1853 | int dquot_disable(struct super_block *sb, int type, unsigned int flags) |
1869 | { | 1854 | { |
1870 | int cnt, ret = 0; | 1855 | int cnt, ret = 0; |
1871 | struct quota_info *dqopt = sb_dqopt(sb); | 1856 | struct quota_info *dqopt = sb_dqopt(sb); |
@@ -1995,14 +1980,15 @@ put_inodes: | |||
1995 | } | 1980 | } |
1996 | return ret; | 1981 | return ret; |
1997 | } | 1982 | } |
1998 | EXPORT_SYMBOL(vfs_quota_disable); | 1983 | EXPORT_SYMBOL(dquot_disable); |
1999 | 1984 | ||
2000 | int vfs_quota_off(struct super_block *sb, int type, int remount) | 1985 | int dquot_quota_off(struct super_block *sb, int type) |
2001 | { | 1986 | { |
2002 | return vfs_quota_disable(sb, type, remount ? DQUOT_SUSPENDED : | 1987 | return dquot_disable(sb, type, |
2003 | (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED)); | 1988 | DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
2004 | } | 1989 | } |
2005 | EXPORT_SYMBOL(vfs_quota_off); | 1990 | EXPORT_SYMBOL(dquot_quota_off); |
1991 | |||
2006 | /* | 1992 | /* |
2007 | * Turn quotas on on a device | 1993 | * Turn quotas on on a device |
2008 | */ | 1994 | */ |
@@ -2120,36 +2106,43 @@ out_fmt: | |||
2120 | } | 2106 | } |
2121 | 2107 | ||
2122 | /* Reenable quotas on remount RW */ | 2108 | /* Reenable quotas on remount RW */ |
2123 | static int vfs_quota_on_remount(struct super_block *sb, int type) | 2109 | int dquot_resume(struct super_block *sb, int type) |
2124 | { | 2110 | { |
2125 | struct quota_info *dqopt = sb_dqopt(sb); | 2111 | struct quota_info *dqopt = sb_dqopt(sb); |
2126 | struct inode *inode; | 2112 | struct inode *inode; |
2127 | int ret; | 2113 | int ret = 0, cnt; |
2128 | unsigned int flags; | 2114 | unsigned int flags; |
2129 | 2115 | ||
2130 | mutex_lock(&dqopt->dqonoff_mutex); | 2116 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { |
2131 | if (!sb_has_quota_suspended(sb, type)) { | 2117 | if (type != -1 && cnt != type) |
2118 | continue; | ||
2119 | |||
2120 | mutex_lock(&dqopt->dqonoff_mutex); | ||
2121 | if (!sb_has_quota_suspended(sb, cnt)) { | ||
2122 | mutex_unlock(&dqopt->dqonoff_mutex); | ||
2123 | continue; | ||
2124 | } | ||
2125 | inode = dqopt->files[cnt]; | ||
2126 | dqopt->files[cnt] = NULL; | ||
2127 | spin_lock(&dq_state_lock); | ||
2128 | flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED | | ||
2129 | DQUOT_LIMITS_ENABLED, | ||
2130 | cnt); | ||
2131 | dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, cnt); | ||
2132 | spin_unlock(&dq_state_lock); | ||
2132 | mutex_unlock(&dqopt->dqonoff_mutex); | 2133 | mutex_unlock(&dqopt->dqonoff_mutex); |
2133 | return 0; | ||
2134 | } | ||
2135 | inode = dqopt->files[type]; | ||
2136 | dqopt->files[type] = NULL; | ||
2137 | spin_lock(&dq_state_lock); | ||
2138 | flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED | | ||
2139 | DQUOT_LIMITS_ENABLED, type); | ||
2140 | dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, type); | ||
2141 | spin_unlock(&dq_state_lock); | ||
2142 | mutex_unlock(&dqopt->dqonoff_mutex); | ||
2143 | 2134 | ||
2144 | flags = dquot_generic_flag(flags, type); | 2135 | flags = dquot_generic_flag(flags, cnt); |
2145 | ret = vfs_load_quota_inode(inode, type, dqopt->info[type].dqi_fmt_id, | 2136 | ret = vfs_load_quota_inode(inode, cnt, |
2146 | flags); | 2137 | dqopt->info[cnt].dqi_fmt_id, flags); |
2147 | iput(inode); | 2138 | iput(inode); |
2139 | } | ||
2148 | 2140 | ||
2149 | return ret; | 2141 | return ret; |
2150 | } | 2142 | } |
2143 | EXPORT_SYMBOL(dquot_resume); | ||
2151 | 2144 | ||
2152 | int vfs_quota_on_path(struct super_block *sb, int type, int format_id, | 2145 | int dquot_quota_on_path(struct super_block *sb, int type, int format_id, |
2153 | struct path *path) | 2146 | struct path *path) |
2154 | { | 2147 | { |
2155 | int error = security_quota_on(path->dentry); | 2148 | int error = security_quota_on(path->dentry); |
@@ -2164,40 +2157,36 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id, | |||
2164 | DQUOT_LIMITS_ENABLED); | 2157 | DQUOT_LIMITS_ENABLED); |
2165 | return error; | 2158 | return error; |
2166 | } | 2159 | } |
2167 | EXPORT_SYMBOL(vfs_quota_on_path); | 2160 | EXPORT_SYMBOL(dquot_quota_on_path); |
2168 | 2161 | ||
2169 | int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name, | 2162 | int dquot_quota_on(struct super_block *sb, int type, int format_id, char *name) |
2170 | int remount) | ||
2171 | { | 2163 | { |
2172 | struct path path; | 2164 | struct path path; |
2173 | int error; | 2165 | int error; |
2174 | 2166 | ||
2175 | if (remount) | ||
2176 | return vfs_quota_on_remount(sb, type); | ||
2177 | |||
2178 | error = kern_path(name, LOOKUP_FOLLOW, &path); | 2167 | error = kern_path(name, LOOKUP_FOLLOW, &path); |
2179 | if (!error) { | 2168 | if (!error) { |
2180 | error = vfs_quota_on_path(sb, type, format_id, &path); | 2169 | error = dquot_quota_on_path(sb, type, format_id, &path); |
2181 | path_put(&path); | 2170 | path_put(&path); |
2182 | } | 2171 | } |
2183 | return error; | 2172 | return error; |
2184 | } | 2173 | } |
2185 | EXPORT_SYMBOL(vfs_quota_on); | 2174 | EXPORT_SYMBOL(dquot_quota_on); |
2186 | 2175 | ||
2187 | /* | 2176 | /* |
2188 | * More powerful function for turning on quotas allowing setting | 2177 | * More powerful function for turning on quotas allowing setting |
2189 | * of individual quota flags | 2178 | * of individual quota flags |
2190 | */ | 2179 | */ |
2191 | int vfs_quota_enable(struct inode *inode, int type, int format_id, | 2180 | int dquot_enable(struct inode *inode, int type, int format_id, |
2192 | unsigned int flags) | 2181 | unsigned int flags) |
2193 | { | 2182 | { |
2194 | int ret = 0; | 2183 | int ret = 0; |
2195 | struct super_block *sb = inode->i_sb; | 2184 | struct super_block *sb = inode->i_sb; |
2196 | struct quota_info *dqopt = sb_dqopt(sb); | 2185 | struct quota_info *dqopt = sb_dqopt(sb); |
2197 | 2186 | ||
2198 | /* Just unsuspend quotas? */ | 2187 | /* Just unsuspend quotas? */ |
2199 | if (flags & DQUOT_SUSPENDED) | 2188 | BUG_ON(flags & DQUOT_SUSPENDED); |
2200 | return vfs_quota_on_remount(sb, type); | 2189 | |
2201 | if (!flags) | 2190 | if (!flags) |
2202 | return 0; | 2191 | return 0; |
2203 | /* Just updating flags needed? */ | 2192 | /* Just updating flags needed? */ |
@@ -2229,13 +2218,13 @@ out_lock: | |||
2229 | load_quota: | 2218 | load_quota: |
2230 | return vfs_load_quota_inode(inode, type, format_id, flags); | 2219 | return vfs_load_quota_inode(inode, type, format_id, flags); |
2231 | } | 2220 | } |
2232 | EXPORT_SYMBOL(vfs_quota_enable); | 2221 | EXPORT_SYMBOL(dquot_enable); |
2233 | 2222 | ||
2234 | /* | 2223 | /* |
2235 | * This function is used when filesystem needs to initialize quotas | 2224 | * This function is used when filesystem needs to initialize quotas |
2236 | * during mount time. | 2225 | * during mount time. |
2237 | */ | 2226 | */ |
2238 | int vfs_quota_on_mount(struct super_block *sb, char *qf_name, | 2227 | int dquot_quota_on_mount(struct super_block *sb, char *qf_name, |
2239 | int format_id, int type) | 2228 | int format_id, int type) |
2240 | { | 2229 | { |
2241 | struct dentry *dentry; | 2230 | struct dentry *dentry; |
@@ -2261,24 +2250,7 @@ out: | |||
2261 | dput(dentry); | 2250 | dput(dentry); |
2262 | return error; | 2251 | return error; |
2263 | } | 2252 | } |
2264 | EXPORT_SYMBOL(vfs_quota_on_mount); | 2253 | EXPORT_SYMBOL(dquot_quota_on_mount); |
2265 | |||
2266 | /* Wrapper to turn on quotas when remounting rw */ | ||
2267 | int vfs_dq_quota_on_remount(struct super_block *sb) | ||
2268 | { | ||
2269 | int cnt; | ||
2270 | int ret = 0, err; | ||
2271 | |||
2272 | if (!sb->s_qcop || !sb->s_qcop->quota_on) | ||
2273 | return -ENOSYS; | ||
2274 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | ||
2275 | err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1); | ||
2276 | if (err < 0 && !ret) | ||
2277 | ret = err; | ||
2278 | } | ||
2279 | return ret; | ||
2280 | } | ||
2281 | EXPORT_SYMBOL(vfs_dq_quota_on_remount); | ||
2282 | 2254 | ||
2283 | static inline qsize_t qbtos(qsize_t blocks) | 2255 | static inline qsize_t qbtos(qsize_t blocks) |
2284 | { | 2256 | { |
@@ -2313,8 +2285,8 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) | |||
2313 | spin_unlock(&dq_data_lock); | 2285 | spin_unlock(&dq_data_lock); |
2314 | } | 2286 | } |
2315 | 2287 | ||
2316 | int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, | 2288 | int dquot_get_dqblk(struct super_block *sb, int type, qid_t id, |
2317 | struct fs_disk_quota *di) | 2289 | struct fs_disk_quota *di) |
2318 | { | 2290 | { |
2319 | struct dquot *dquot; | 2291 | struct dquot *dquot; |
2320 | 2292 | ||
@@ -2326,7 +2298,7 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, | |||
2326 | 2298 | ||
2327 | return 0; | 2299 | return 0; |
2328 | } | 2300 | } |
2329 | EXPORT_SYMBOL(vfs_get_dqblk); | 2301 | EXPORT_SYMBOL(dquot_get_dqblk); |
2330 | 2302 | ||
2331 | #define VFS_FS_DQ_MASK \ | 2303 | #define VFS_FS_DQ_MASK \ |
2332 | (FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \ | 2304 | (FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \ |
@@ -2425,7 +2397,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) | |||
2425 | return 0; | 2397 | return 0; |
2426 | } | 2398 | } |
2427 | 2399 | ||
2428 | int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, | 2400 | int dquot_set_dqblk(struct super_block *sb, int type, qid_t id, |
2429 | struct fs_disk_quota *di) | 2401 | struct fs_disk_quota *di) |
2430 | { | 2402 | { |
2431 | struct dquot *dquot; | 2403 | struct dquot *dquot; |
@@ -2441,10 +2413,10 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
2441 | out: | 2413 | out: |
2442 | return rc; | 2414 | return rc; |
2443 | } | 2415 | } |
2444 | EXPORT_SYMBOL(vfs_set_dqblk); | 2416 | EXPORT_SYMBOL(dquot_set_dqblk); |
2445 | 2417 | ||
2446 | /* Generic routine for getting common part of quota file information */ | 2418 | /* Generic routine for getting common part of quota file information */ |
2447 | int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) | 2419 | int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) |
2448 | { | 2420 | { |
2449 | struct mem_dqinfo *mi; | 2421 | struct mem_dqinfo *mi; |
2450 | 2422 | ||
@@ -2463,10 +2435,10 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) | |||
2463 | mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); | 2435 | mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); |
2464 | return 0; | 2436 | return 0; |
2465 | } | 2437 | } |
2466 | EXPORT_SYMBOL(vfs_get_dqinfo); | 2438 | EXPORT_SYMBOL(dquot_get_dqinfo); |
2467 | 2439 | ||
2468 | /* Generic routine for setting common part of quota file information */ | 2440 | /* Generic routine for setting common part of quota file information */ |
2469 | int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) | 2441 | int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) |
2470 | { | 2442 | { |
2471 | struct mem_dqinfo *mi; | 2443 | struct mem_dqinfo *mi; |
2472 | int err = 0; | 2444 | int err = 0; |
@@ -2493,27 +2465,27 @@ out: | |||
2493 | mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); | 2465 | mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); |
2494 | return err; | 2466 | return err; |
2495 | } | 2467 | } |
2496 | EXPORT_SYMBOL(vfs_set_dqinfo); | 2468 | EXPORT_SYMBOL(dquot_set_dqinfo); |
2497 | 2469 | ||
2498 | const struct quotactl_ops vfs_quotactl_ops = { | 2470 | const struct quotactl_ops dquot_quotactl_ops = { |
2499 | .quota_on = vfs_quota_on, | 2471 | .quota_on = dquot_quota_on, |
2500 | .quota_off = vfs_quota_off, | 2472 | .quota_off = dquot_quota_off, |
2501 | .quota_sync = vfs_quota_sync, | 2473 | .quota_sync = dquot_quota_sync, |
2502 | .get_info = vfs_get_dqinfo, | 2474 | .get_info = dquot_get_dqinfo, |
2503 | .set_info = vfs_set_dqinfo, | 2475 | .set_info = dquot_set_dqinfo, |
2504 | .get_dqblk = vfs_get_dqblk, | 2476 | .get_dqblk = dquot_get_dqblk, |
2505 | .set_dqblk = vfs_set_dqblk | 2477 | .set_dqblk = dquot_set_dqblk |
2506 | }; | 2478 | }; |
2507 | 2479 | EXPORT_SYMBOL(dquot_quotactl_ops); | |
2508 | 2480 | ||
2509 | static int do_proc_dqstats(struct ctl_table *table, int write, | 2481 | static int do_proc_dqstats(struct ctl_table *table, int write, |
2510 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2482 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2511 | { | 2483 | { |
2512 | #ifdef CONFIG_SMP | ||
2513 | /* Update global table */ | ||
2514 | unsigned int type = (int *)table->data - dqstats.stat; | 2484 | unsigned int type = (int *)table->data - dqstats.stat; |
2515 | dqstats.stat[type] = dqstats_read(type); | 2485 | |
2516 | #endif | 2486 | /* Update global table */ |
2487 | dqstats.stat[type] = | ||
2488 | percpu_counter_sum_positive(&dqstats.counter[type]); | ||
2517 | return proc_dointvec(table, write, buffer, lenp, ppos); | 2489 | return proc_dointvec(table, write, buffer, lenp, ppos); |
2518 | } | 2490 | } |
2519 | 2491 | ||
@@ -2606,7 +2578,7 @@ static ctl_table sys_table[] = { | |||
2606 | 2578 | ||
2607 | static int __init dquot_init(void) | 2579 | static int __init dquot_init(void) |
2608 | { | 2580 | { |
2609 | int i; | 2581 | int i, ret; |
2610 | unsigned long nr_hash, order; | 2582 | unsigned long nr_hash, order; |
2611 | 2583 | ||
2612 | printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__); | 2584 | printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__); |
@@ -2624,12 +2596,11 @@ static int __init dquot_init(void) | |||
2624 | if (!dquot_hash) | 2596 | if (!dquot_hash) |
2625 | panic("Cannot create dquot hash table"); | 2597 | panic("Cannot create dquot hash table"); |
2626 | 2598 | ||
2627 | #ifdef CONFIG_SMP | 2599 | for (i = 0; i < _DQST_DQSTAT_LAST; i++) { |
2628 | dqstats_pcpu = alloc_percpu(struct dqstats); | 2600 | ret = percpu_counter_init(&dqstats.counter[i], 0); |
2629 | if (!dqstats_pcpu) | 2601 | if (ret) |
2630 | panic("Cannot create dquot stats table"); | 2602 | panic("Cannot create dquot stat counters"); |
2631 | #endif | 2603 | } |
2632 | memset(&dqstats, 0, sizeof(struct dqstats)); | ||
2633 | 2604 | ||
2634 | /* Find power-of-two hlist_heads which can fit into allocation */ | 2605 | /* Find power-of-two hlist_heads which can fit into allocation */ |
2635 | nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head); | 2606 | nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head); |
diff --git a/fs/quota/quota.c b/fs/quota/quota.c index ce3dfd066f59..b299961e1edb 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c | |||
@@ -73,7 +73,7 @@ static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, | |||
73 | if (IS_ERR(pathname)) | 73 | if (IS_ERR(pathname)) |
74 | return PTR_ERR(pathname); | 74 | return PTR_ERR(pathname); |
75 | if (sb->s_qcop->quota_on) | 75 | if (sb->s_qcop->quota_on) |
76 | ret = sb->s_qcop->quota_on(sb, type, id, pathname, 0); | 76 | ret = sb->s_qcop->quota_on(sb, type, id, pathname); |
77 | putname(pathname); | 77 | putname(pathname); |
78 | return ret; | 78 | return ret; |
79 | } | 79 | } |
@@ -260,7 +260,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, | |||
260 | case Q_QUOTAOFF: | 260 | case Q_QUOTAOFF: |
261 | if (!sb->s_qcop->quota_off) | 261 | if (!sb->s_qcop->quota_off) |
262 | return -ENOSYS; | 262 | return -ENOSYS; |
263 | return sb->s_qcop->quota_off(sb, type, 0); | 263 | return sb->s_qcop->quota_off(sb, type); |
264 | case Q_GETFMT: | 264 | case Q_GETFMT: |
265 | return quota_getfmt(sb, type, addr); | 265 | return quota_getfmt(sb, type, addr); |
266 | case Q_GETINFO: | 266 | case Q_GETINFO: |
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 78f613cb9c76..4884ac5ae9be 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c | |||
@@ -43,12 +43,13 @@ const struct file_operations ramfs_file_operations = { | |||
43 | .write = do_sync_write, | 43 | .write = do_sync_write, |
44 | .aio_write = generic_file_aio_write, | 44 | .aio_write = generic_file_aio_write, |
45 | .mmap = generic_file_mmap, | 45 | .mmap = generic_file_mmap, |
46 | .fsync = simple_sync_file, | 46 | .fsync = noop_fsync, |
47 | .splice_read = generic_file_splice_read, | 47 | .splice_read = generic_file_splice_read, |
48 | .splice_write = generic_file_splice_write, | 48 | .splice_write = generic_file_splice_write, |
49 | .llseek = generic_file_llseek, | 49 | .llseek = generic_file_llseek, |
50 | }; | 50 | }; |
51 | 51 | ||
52 | const struct inode_operations ramfs_file_inode_operations = { | 52 | const struct inode_operations ramfs_file_inode_operations = { |
53 | .setattr = simple_setattr, | ||
53 | .getattr = simple_getattr, | 54 | .getattr = simple_getattr, |
54 | }; | 55 | }; |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 5ea4ad81a429..d532c20fc179 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
@@ -42,7 +42,7 @@ const struct file_operations ramfs_file_operations = { | |||
42 | .aio_read = generic_file_aio_read, | 42 | .aio_read = generic_file_aio_read, |
43 | .write = do_sync_write, | 43 | .write = do_sync_write, |
44 | .aio_write = generic_file_aio_write, | 44 | .aio_write = generic_file_aio_write, |
45 | .fsync = simple_sync_file, | 45 | .fsync = noop_fsync, |
46 | .splice_read = generic_file_splice_read, | 46 | .splice_read = generic_file_splice_read, |
47 | .splice_write = generic_file_splice_write, | 47 | .splice_write = generic_file_splice_write, |
48 | .llseek = generic_file_llseek, | 48 | .llseek = generic_file_llseek, |
@@ -146,7 +146,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) | |||
146 | return ret; | 146 | return ret; |
147 | } | 147 | } |
148 | 148 | ||
149 | ret = vmtruncate(inode, newsize); | 149 | ret = simple_setsize(inode, newsize); |
150 | 150 | ||
151 | return ret; | 151 | return ret; |
152 | } | 152 | } |
@@ -169,7 +169,8 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia) | |||
169 | 169 | ||
170 | /* pick out size-changing events */ | 170 | /* pick out size-changing events */ |
171 | if (ia->ia_valid & ATTR_SIZE) { | 171 | if (ia->ia_valid & ATTR_SIZE) { |
172 | loff_t size = i_size_read(inode); | 172 | loff_t size = inode->i_size; |
173 | |||
173 | if (ia->ia_size != size) { | 174 | if (ia->ia_size != size) { |
174 | ret = ramfs_nommu_resize(inode, ia->ia_size, size); | 175 | ret = ramfs_nommu_resize(inode, ia->ia_size, size); |
175 | if (ret < 0 || ia->ia_valid == ATTR_SIZE) | 176 | if (ret < 0 || ia->ia_valid == ATTR_SIZE) |
@@ -182,7 +183,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia) | |||
182 | } | 183 | } |
183 | } | 184 | } |
184 | 185 | ||
185 | ret = inode_setattr(inode, ia); | 186 | generic_setattr(inode, ia); |
186 | out: | 187 | out: |
187 | ia->ia_valid = old_ia_valid; | 188 | ia->ia_valid = old_ia_valid; |
188 | return ret; | 189 | return ret; |
diff --git a/fs/read_write.c b/fs/read_write.c index 113386d6fd2d..9c0485236e68 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -97,6 +97,23 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) | |||
97 | } | 97 | } |
98 | EXPORT_SYMBOL(generic_file_llseek); | 98 | EXPORT_SYMBOL(generic_file_llseek); |
99 | 99 | ||
100 | /** | ||
101 | * noop_llseek - No Operation Performed llseek implementation | ||
102 | * @file: file structure to seek on | ||
103 | * @offset: file offset to seek to | ||
104 | * @origin: type of seek | ||
105 | * | ||
106 | * This is an implementation of ->llseek useable for the rare special case when | ||
107 | * userspace expects the seek to succeed but the (device) file is actually not | ||
108 | * able to perform the seek. In this case you use noop_llseek() instead of | ||
109 | * falling back to the default implementation of ->llseek. | ||
110 | */ | ||
111 | loff_t noop_llseek(struct file *file, loff_t offset, int origin) | ||
112 | { | ||
113 | return file->f_pos; | ||
114 | } | ||
115 | EXPORT_SYMBOL(noop_llseek); | ||
116 | |||
100 | loff_t no_llseek(struct file *file, loff_t offset, int origin) | 117 | loff_t no_llseek(struct file *file, loff_t offset, int origin) |
101 | { | 118 | { |
102 | return -ESPIPE; | 119 | return -ESPIPE; |
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 07930449a958..198dabf1b2bb 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c | |||
@@ -14,10 +14,10 @@ | |||
14 | extern const struct reiserfs_key MIN_KEY; | 14 | extern const struct reiserfs_key MIN_KEY; |
15 | 15 | ||
16 | static int reiserfs_readdir(struct file *, void *, filldir_t); | 16 | static int reiserfs_readdir(struct file *, void *, filldir_t); |
17 | static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, | 17 | static int reiserfs_dir_fsync(struct file *filp, int datasync); |
18 | int datasync); | ||
19 | 18 | ||
20 | const struct file_operations reiserfs_dir_operations = { | 19 | const struct file_operations reiserfs_dir_operations = { |
20 | .llseek = generic_file_llseek, | ||
21 | .read = generic_read_dir, | 21 | .read = generic_read_dir, |
22 | .readdir = reiserfs_readdir, | 22 | .readdir = reiserfs_readdir, |
23 | .fsync = reiserfs_dir_fsync, | 23 | .fsync = reiserfs_dir_fsync, |
@@ -27,10 +27,9 @@ const struct file_operations reiserfs_dir_operations = { | |||
27 | #endif | 27 | #endif |
28 | }; | 28 | }; |
29 | 29 | ||
30 | static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, | 30 | static int reiserfs_dir_fsync(struct file *filp, int datasync) |
31 | int datasync) | ||
32 | { | 31 | { |
33 | struct inode *inode = dentry->d_inode; | 32 | struct inode *inode = filp->f_mapping->host; |
34 | int err; | 33 | int err; |
35 | reiserfs_write_lock(inode->i_sb); | 34 | reiserfs_write_lock(inode->i_sb); |
36 | err = reiserfs_commit_for_inode(inode); | 35 | err = reiserfs_commit_for_inode(inode); |
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 9977df9f3a54..b82cdd8a45dd 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -134,10 +134,9 @@ static void reiserfs_vfs_truncate_file(struct inode *inode) | |||
134 | * be removed... | 134 | * be removed... |
135 | */ | 135 | */ |
136 | 136 | ||
137 | static int reiserfs_sync_file(struct file *filp, | 137 | static int reiserfs_sync_file(struct file *filp, int datasync) |
138 | struct dentry *dentry, int datasync) | ||
139 | { | 138 | { |
140 | struct inode *inode = dentry->d_inode; | 139 | struct inode *inode = filp->f_mapping->host; |
141 | int err; | 140 | int err; |
142 | int barrier_done; | 141 | int barrier_done; |
143 | 142 | ||
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 59125fb36d42..9822fa15118b 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -158,6 +158,7 @@ static int finish_unfinished(struct super_block *s) | |||
158 | #ifdef CONFIG_QUOTA | 158 | #ifdef CONFIG_QUOTA |
159 | int i; | 159 | int i; |
160 | int ms_active_set; | 160 | int ms_active_set; |
161 | int quota_enabled[MAXQUOTAS]; | ||
161 | #endif | 162 | #endif |
162 | 163 | ||
163 | /* compose key to look for "save" links */ | 164 | /* compose key to look for "save" links */ |
@@ -179,8 +180,15 @@ static int finish_unfinished(struct super_block *s) | |||
179 | } | 180 | } |
180 | /* Turn on quotas so that they are updated correctly */ | 181 | /* Turn on quotas so that they are updated correctly */ |
181 | for (i = 0; i < MAXQUOTAS; i++) { | 182 | for (i = 0; i < MAXQUOTAS; i++) { |
183 | quota_enabled[i] = 1; | ||
182 | if (REISERFS_SB(s)->s_qf_names[i]) { | 184 | if (REISERFS_SB(s)->s_qf_names[i]) { |
183 | int ret = reiserfs_quota_on_mount(s, i); | 185 | int ret; |
186 | |||
187 | if (sb_has_quota_active(s, i)) { | ||
188 | quota_enabled[i] = 0; | ||
189 | continue; | ||
190 | } | ||
191 | ret = reiserfs_quota_on_mount(s, i); | ||
184 | if (ret < 0) | 192 | if (ret < 0) |
185 | reiserfs_warning(s, "reiserfs-2500", | 193 | reiserfs_warning(s, "reiserfs-2500", |
186 | "cannot turn on journaled " | 194 | "cannot turn on journaled " |
@@ -304,8 +312,8 @@ static int finish_unfinished(struct super_block *s) | |||
304 | #ifdef CONFIG_QUOTA | 312 | #ifdef CONFIG_QUOTA |
305 | /* Turn quotas off */ | 313 | /* Turn quotas off */ |
306 | for (i = 0; i < MAXQUOTAS; i++) { | 314 | for (i = 0; i < MAXQUOTAS; i++) { |
307 | if (sb_dqopt(s)->files[i]) | 315 | if (sb_dqopt(s)->files[i] && quota_enabled[i]) |
308 | vfs_quota_off(s, i, 0); | 316 | dquot_quota_off(s, i); |
309 | } | 317 | } |
310 | if (ms_active_set) | 318 | if (ms_active_set) |
311 | /* Restore the flag back */ | 319 | /* Restore the flag back */ |
@@ -466,6 +474,8 @@ static void reiserfs_put_super(struct super_block *s) | |||
466 | struct reiserfs_transaction_handle th; | 474 | struct reiserfs_transaction_handle th; |
467 | th.t_trans_id = 0; | 475 | th.t_trans_id = 0; |
468 | 476 | ||
477 | dquot_disable(s, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | ||
478 | |||
469 | reiserfs_write_lock(s); | 479 | reiserfs_write_lock(s); |
470 | 480 | ||
471 | if (s->s_dirt) | 481 | if (s->s_dirt) |
@@ -620,7 +630,7 @@ static int reiserfs_acquire_dquot(struct dquot *); | |||
620 | static int reiserfs_release_dquot(struct dquot *); | 630 | static int reiserfs_release_dquot(struct dquot *); |
621 | static int reiserfs_mark_dquot_dirty(struct dquot *); | 631 | static int reiserfs_mark_dquot_dirty(struct dquot *); |
622 | static int reiserfs_write_info(struct super_block *, int); | 632 | static int reiserfs_write_info(struct super_block *, int); |
623 | static int reiserfs_quota_on(struct super_block *, int, int, char *, int); | 633 | static int reiserfs_quota_on(struct super_block *, int, int, char *); |
624 | 634 | ||
625 | static const struct dquot_operations reiserfs_quota_operations = { | 635 | static const struct dquot_operations reiserfs_quota_operations = { |
626 | .write_dquot = reiserfs_write_dquot, | 636 | .write_dquot = reiserfs_write_dquot, |
@@ -634,12 +644,12 @@ static const struct dquot_operations reiserfs_quota_operations = { | |||
634 | 644 | ||
635 | static const struct quotactl_ops reiserfs_qctl_operations = { | 645 | static const struct quotactl_ops reiserfs_qctl_operations = { |
636 | .quota_on = reiserfs_quota_on, | 646 | .quota_on = reiserfs_quota_on, |
637 | .quota_off = vfs_quota_off, | 647 | .quota_off = dquot_quota_off, |
638 | .quota_sync = vfs_quota_sync, | 648 | .quota_sync = dquot_quota_sync, |
639 | .get_info = vfs_get_dqinfo, | 649 | .get_info = dquot_get_dqinfo, |
640 | .set_info = vfs_set_dqinfo, | 650 | .set_info = dquot_set_dqinfo, |
641 | .get_dqblk = vfs_get_dqblk, | 651 | .get_dqblk = dquot_get_dqblk, |
642 | .set_dqblk = vfs_set_dqblk, | 652 | .set_dqblk = dquot_set_dqblk, |
643 | }; | 653 | }; |
644 | #endif | 654 | #endif |
645 | 655 | ||
@@ -1242,6 +1252,11 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1242 | if (s->s_flags & MS_RDONLY) | 1252 | if (s->s_flags & MS_RDONLY) |
1243 | /* it is read-only already */ | 1253 | /* it is read-only already */ |
1244 | goto out_ok; | 1254 | goto out_ok; |
1255 | |||
1256 | err = dquot_suspend(s, -1); | ||
1257 | if (err < 0) | ||
1258 | goto out_err; | ||
1259 | |||
1245 | /* try to remount file system with read-only permissions */ | 1260 | /* try to remount file system with read-only permissions */ |
1246 | if (sb_umount_state(rs) == REISERFS_VALID_FS | 1261 | if (sb_umount_state(rs) == REISERFS_VALID_FS |
1247 | || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) { | 1262 | || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) { |
@@ -1295,6 +1310,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1295 | s->s_dirt = 0; | 1310 | s->s_dirt = 0; |
1296 | 1311 | ||
1297 | if (!(*mount_flags & MS_RDONLY)) { | 1312 | if (!(*mount_flags & MS_RDONLY)) { |
1313 | dquot_resume(s, -1); | ||
1298 | finish_unfinished(s); | 1314 | finish_unfinished(s); |
1299 | reiserfs_xattr_init(s, *mount_flags); | 1315 | reiserfs_xattr_init(s, *mount_flags); |
1300 | } | 1316 | } |
@@ -2022,15 +2038,15 @@ static int reiserfs_write_info(struct super_block *sb, int type) | |||
2022 | */ | 2038 | */ |
2023 | static int reiserfs_quota_on_mount(struct super_block *sb, int type) | 2039 | static int reiserfs_quota_on_mount(struct super_block *sb, int type) |
2024 | { | 2040 | { |
2025 | return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], | 2041 | return dquot_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], |
2026 | REISERFS_SB(sb)->s_jquota_fmt, type); | 2042 | REISERFS_SB(sb)->s_jquota_fmt, type); |
2027 | } | 2043 | } |
2028 | 2044 | ||
2029 | /* | 2045 | /* |
2030 | * Standard function to be called on quota_on | 2046 | * Standard function to be called on quota_on |
2031 | */ | 2047 | */ |
2032 | static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | 2048 | static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, |
2033 | char *name, int remount) | 2049 | char *name) |
2034 | { | 2050 | { |
2035 | int err; | 2051 | int err; |
2036 | struct path path; | 2052 | struct path path; |
@@ -2039,9 +2055,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | |||
2039 | 2055 | ||
2040 | if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) | 2056 | if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) |
2041 | return -EINVAL; | 2057 | return -EINVAL; |
2042 | /* No more checks needed? Path and format_id are bogus anyway... */ | 2058 | |
2043 | if (remount) | ||
2044 | return vfs_quota_on(sb, type, format_id, name, 1); | ||
2045 | err = kern_path(name, LOOKUP_FOLLOW, &path); | 2059 | err = kern_path(name, LOOKUP_FOLLOW, &path); |
2046 | if (err) | 2060 | if (err) |
2047 | return err; | 2061 | return err; |
@@ -2085,7 +2099,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | |||
2085 | if (err) | 2099 | if (err) |
2086 | goto out; | 2100 | goto out; |
2087 | } | 2101 | } |
2088 | err = vfs_quota_on_path(sb, type, format_id, &path); | 2102 | err = dquot_quota_on_path(sb, type, format_id, &path); |
2089 | out: | 2103 | out: |
2090 | path_put(&path); | 2104 | path_put(&path); |
2091 | return err; | 2105 | return err; |
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index 3e4803b4427e..00a70cab1f36 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c | |||
@@ -37,9 +37,10 @@ static int smb_link(struct dentry *, struct inode *, struct dentry *); | |||
37 | 37 | ||
38 | const struct file_operations smb_dir_operations = | 38 | const struct file_operations smb_dir_operations = |
39 | { | 39 | { |
40 | .llseek = generic_file_llseek, | ||
40 | .read = generic_read_dir, | 41 | .read = generic_read_dir, |
41 | .readdir = smb_readdir, | 42 | .readdir = smb_readdir, |
42 | .ioctl = smb_ioctl, | 43 | .unlocked_ioctl = smb_ioctl, |
43 | .open = smb_dir_open, | 44 | .open = smb_dir_open, |
44 | }; | 45 | }; |
45 | 46 | ||
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index dbf6548bbf06..8e187a0f94bb 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c | |||
@@ -28,8 +28,9 @@ | |||
28 | #include "proto.h" | 28 | #include "proto.h" |
29 | 29 | ||
30 | static int | 30 | static int |
31 | smb_fsync(struct file *file, struct dentry * dentry, int datasync) | 31 | smb_fsync(struct file *file, int datasync) |
32 | { | 32 | { |
33 | struct dentry *dentry = file->f_path.dentry; | ||
33 | struct smb_sb_info *server = server_from_dentry(dentry); | 34 | struct smb_sb_info *server = server_from_dentry(dentry); |
34 | int result; | 35 | int result; |
35 | 36 | ||
@@ -437,7 +438,7 @@ const struct file_operations smb_file_operations = | |||
437 | .aio_read = smb_file_aio_read, | 438 | .aio_read = smb_file_aio_read, |
438 | .write = do_sync_write, | 439 | .write = do_sync_write, |
439 | .aio_write = smb_file_aio_write, | 440 | .aio_write = smb_file_aio_write, |
440 | .ioctl = smb_ioctl, | 441 | .unlocked_ioctl = smb_ioctl, |
441 | .mmap = smb_file_mmap, | 442 | .mmap = smb_file_mmap, |
442 | .open = smb_file_open, | 443 | .open = smb_file_open, |
443 | .release = smb_file_release, | 444 | .release = smb_file_release, |
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index dfa1d67f8fca..9551cb6f7fe4 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c | |||
@@ -714,7 +714,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr) | |||
714 | error = server->ops->truncate(inode, attr->ia_size); | 714 | error = server->ops->truncate(inode, attr->ia_size); |
715 | if (error) | 715 | if (error) |
716 | goto out; | 716 | goto out; |
717 | error = vmtruncate(inode, attr->ia_size); | 717 | error = simple_setsize(inode, attr->ia_size); |
718 | if (error) | 718 | if (error) |
719 | goto out; | 719 | goto out; |
720 | refresh = 1; | 720 | refresh = 1; |
diff --git a/fs/smbfs/ioctl.c b/fs/smbfs/ioctl.c index dbae1f8ea26f..07215312ad39 100644 --- a/fs/smbfs/ioctl.c +++ b/fs/smbfs/ioctl.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/time.h> | 13 | #include <linux/time.h> |
14 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
15 | #include <linux/highuid.h> | 15 | #include <linux/highuid.h> |
16 | #include <linux/smp_lock.h> | ||
16 | #include <linux/net.h> | 17 | #include <linux/net.h> |
17 | 18 | ||
18 | #include <linux/smb_fs.h> | 19 | #include <linux/smb_fs.h> |
@@ -22,14 +23,14 @@ | |||
22 | 23 | ||
23 | #include "proto.h" | 24 | #include "proto.h" |
24 | 25 | ||
25 | int | 26 | long |
26 | smb_ioctl(struct inode *inode, struct file *filp, | 27 | smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
27 | unsigned int cmd, unsigned long arg) | ||
28 | { | 28 | { |
29 | struct smb_sb_info *server = server_from_inode(inode); | 29 | struct smb_sb_info *server = server_from_inode(filp->f_path.dentry->d_inode); |
30 | struct smb_conn_opt opt; | 30 | struct smb_conn_opt opt; |
31 | int result = -EINVAL; | 31 | int result = -EINVAL; |
32 | 32 | ||
33 | lock_kernel(); | ||
33 | switch (cmd) { | 34 | switch (cmd) { |
34 | uid16_t uid16; | 35 | uid16_t uid16; |
35 | uid_t uid32; | 36 | uid_t uid32; |
@@ -62,6 +63,7 @@ smb_ioctl(struct inode *inode, struct file *filp, | |||
62 | default: | 63 | default: |
63 | break; | 64 | break; |
64 | } | 65 | } |
66 | unlock_kernel(); | ||
65 | 67 | ||
66 | return result; | 68 | return result; |
67 | } | 69 | } |
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h index 03f456c1b7d4..05939a6f43e6 100644 --- a/fs/smbfs/proto.h +++ b/fs/smbfs/proto.h | |||
@@ -67,7 +67,7 @@ extern const struct address_space_operations smb_file_aops; | |||
67 | extern const struct file_operations smb_file_operations; | 67 | extern const struct file_operations smb_file_operations; |
68 | extern const struct inode_operations smb_file_inode_operations; | 68 | extern const struct inode_operations smb_file_inode_operations; |
69 | /* ioctl.c */ | 69 | /* ioctl.c */ |
70 | extern int smb_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); | 70 | extern long smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); |
71 | /* smbiod.c */ | 71 | /* smbiod.c */ |
72 | extern void smbiod_wake_up(void); | 72 | extern void smbiod_wake_up(void); |
73 | extern int smbiod_register_server(struct smb_sb_info *server); | 73 | extern int smbiod_register_server(struct smb_sb_info *server); |
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c index 54350b59046b..00b2909bd469 100644 --- a/fs/smbfs/symlink.c +++ b/fs/smbfs/symlink.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/pagemap.h> | 15 | #include <linux/pagemap.h> |
16 | #include <linux/net.h> | 16 | #include <linux/net.h> |
17 | #include <linux/namei.h> | 17 | #include <linux/namei.h> |
18 | #include <linux/slab.h> | ||
19 | 18 | ||
20 | #include <asm/uaccess.h> | 19 | #include <asm/uaccess.h> |
21 | #include <asm/system.h> | 20 | #include <asm/system.h> |
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 25a00d19d686..cc6ce8a84c21 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig | |||
@@ -26,6 +26,17 @@ config SQUASHFS | |||
26 | 26 | ||
27 | If unsure, say N. | 27 | If unsure, say N. |
28 | 28 | ||
29 | config SQUASHFS_XATTRS | ||
30 | bool "Squashfs XATTR support" | ||
31 | depends on SQUASHFS | ||
32 | default n | ||
33 | help | ||
34 | Saying Y here includes support for extended attributes (xattrs). | ||
35 | Xattrs are name:value pairs associated with inodes by | ||
36 | the kernel or by users (see the attr(5) manual page). | ||
37 | |||
38 | If unsure, say N. | ||
39 | |||
29 | config SQUASHFS_EMBEDDED | 40 | config SQUASHFS_EMBEDDED |
30 | 41 | ||
31 | bool "Additional option for memory-constrained systems" | 42 | bool "Additional option for memory-constrained systems" |
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index df8a19ef870d..2cee3e9fa452 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile | |||
@@ -5,3 +5,5 @@ | |||
5 | obj-$(CONFIG_SQUASHFS) += squashfs.o | 5 | obj-$(CONFIG_SQUASHFS) += squashfs.o |
6 | squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o | 6 | squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o |
7 | squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o | 7 | squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o |
8 | squashfs-$(CONFIG_SQUASHFS_XATTRS) += xattr.o xattr_id.o | ||
9 | |||
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index 49daaf669e41..62e63ad25075 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c | |||
@@ -40,11 +40,13 @@ | |||
40 | 40 | ||
41 | #include <linux/fs.h> | 41 | #include <linux/fs.h> |
42 | #include <linux/vfs.h> | 42 | #include <linux/vfs.h> |
43 | #include <linux/xattr.h> | ||
43 | 44 | ||
44 | #include "squashfs_fs.h" | 45 | #include "squashfs_fs.h" |
45 | #include "squashfs_fs_sb.h" | 46 | #include "squashfs_fs_sb.h" |
46 | #include "squashfs_fs_i.h" | 47 | #include "squashfs_fs_i.h" |
47 | #include "squashfs.h" | 48 | #include "squashfs.h" |
49 | #include "xattr.h" | ||
48 | 50 | ||
49 | /* | 51 | /* |
50 | * Initialise VFS inode with the base inode information common to all | 52 | * Initialise VFS inode with the base inode information common to all |
@@ -111,6 +113,7 @@ int squashfs_read_inode(struct inode *inode, long long ino) | |||
111 | int err, type, offset = SQUASHFS_INODE_OFFSET(ino); | 113 | int err, type, offset = SQUASHFS_INODE_OFFSET(ino); |
112 | union squashfs_inode squashfs_ino; | 114 | union squashfs_inode squashfs_ino; |
113 | struct squashfs_base_inode *sqshb_ino = &squashfs_ino.base; | 115 | struct squashfs_base_inode *sqshb_ino = &squashfs_ino.base; |
116 | int xattr_id = SQUASHFS_INVALID_XATTR; | ||
114 | 117 | ||
115 | TRACE("Entered squashfs_read_inode\n"); | 118 | TRACE("Entered squashfs_read_inode\n"); |
116 | 119 | ||
@@ -199,8 +202,10 @@ int squashfs_read_inode(struct inode *inode, long long ino) | |||
199 | frag_offset = 0; | 202 | frag_offset = 0; |
200 | } | 203 | } |
201 | 204 | ||
205 | xattr_id = le32_to_cpu(sqsh_ino->xattr); | ||
202 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); | 206 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); |
203 | inode->i_size = le64_to_cpu(sqsh_ino->file_size); | 207 | inode->i_size = le64_to_cpu(sqsh_ino->file_size); |
208 | inode->i_op = &squashfs_inode_ops; | ||
204 | inode->i_fop = &generic_ro_fops; | 209 | inode->i_fop = &generic_ro_fops; |
205 | inode->i_mode |= S_IFREG; | 210 | inode->i_mode |= S_IFREG; |
206 | inode->i_blocks = ((inode->i_size - | 211 | inode->i_blocks = ((inode->i_size - |
@@ -251,6 +256,7 @@ int squashfs_read_inode(struct inode *inode, long long ino) | |||
251 | if (err < 0) | 256 | if (err < 0) |
252 | goto failed_read; | 257 | goto failed_read; |
253 | 258 | ||
259 | xattr_id = le32_to_cpu(sqsh_ino->xattr); | ||
254 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); | 260 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); |
255 | inode->i_size = le32_to_cpu(sqsh_ino->file_size); | 261 | inode->i_size = le32_to_cpu(sqsh_ino->file_size); |
256 | inode->i_op = &squashfs_dir_inode_ops; | 262 | inode->i_op = &squashfs_dir_inode_ops; |
@@ -280,21 +286,33 @@ int squashfs_read_inode(struct inode *inode, long long ino) | |||
280 | 286 | ||
281 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); | 287 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); |
282 | inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); | 288 | inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); |
283 | inode->i_op = &page_symlink_inode_operations; | 289 | inode->i_op = &squashfs_symlink_inode_ops; |
284 | inode->i_data.a_ops = &squashfs_symlink_aops; | 290 | inode->i_data.a_ops = &squashfs_symlink_aops; |
285 | inode->i_mode |= S_IFLNK; | 291 | inode->i_mode |= S_IFLNK; |
286 | squashfs_i(inode)->start = block; | 292 | squashfs_i(inode)->start = block; |
287 | squashfs_i(inode)->offset = offset; | 293 | squashfs_i(inode)->offset = offset; |
288 | 294 | ||
295 | if (type == SQUASHFS_LSYMLINK_TYPE) { | ||
296 | __le32 xattr; | ||
297 | |||
298 | err = squashfs_read_metadata(sb, NULL, &block, | ||
299 | &offset, inode->i_size); | ||
300 | if (err < 0) | ||
301 | goto failed_read; | ||
302 | err = squashfs_read_metadata(sb, &xattr, &block, | ||
303 | &offset, sizeof(xattr)); | ||
304 | if (err < 0) | ||
305 | goto failed_read; | ||
306 | xattr_id = le32_to_cpu(xattr); | ||
307 | } | ||
308 | |||
289 | TRACE("Symbolic link inode %x:%x, start_block %llx, offset " | 309 | TRACE("Symbolic link inode %x:%x, start_block %llx, offset " |
290 | "%x\n", SQUASHFS_INODE_BLK(ino), offset, | 310 | "%x\n", SQUASHFS_INODE_BLK(ino), offset, |
291 | block, offset); | 311 | block, offset); |
292 | break; | 312 | break; |
293 | } | 313 | } |
294 | case SQUASHFS_BLKDEV_TYPE: | 314 | case SQUASHFS_BLKDEV_TYPE: |
295 | case SQUASHFS_CHRDEV_TYPE: | 315 | case SQUASHFS_CHRDEV_TYPE: { |
296 | case SQUASHFS_LBLKDEV_TYPE: | ||
297 | case SQUASHFS_LCHRDEV_TYPE: { | ||
298 | struct squashfs_dev_inode *sqsh_ino = &squashfs_ino.dev; | 316 | struct squashfs_dev_inode *sqsh_ino = &squashfs_ino.dev; |
299 | unsigned int rdev; | 317 | unsigned int rdev; |
300 | 318 | ||
@@ -315,10 +333,32 @@ int squashfs_read_inode(struct inode *inode, long long ino) | |||
315 | SQUASHFS_INODE_BLK(ino), offset, rdev); | 333 | SQUASHFS_INODE_BLK(ino), offset, rdev); |
316 | break; | 334 | break; |
317 | } | 335 | } |
336 | case SQUASHFS_LBLKDEV_TYPE: | ||
337 | case SQUASHFS_LCHRDEV_TYPE: { | ||
338 | struct squashfs_ldev_inode *sqsh_ino = &squashfs_ino.ldev; | ||
339 | unsigned int rdev; | ||
340 | |||
341 | err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, | ||
342 | sizeof(*sqsh_ino)); | ||
343 | if (err < 0) | ||
344 | goto failed_read; | ||
345 | |||
346 | if (type == SQUASHFS_LCHRDEV_TYPE) | ||
347 | inode->i_mode |= S_IFCHR; | ||
348 | else | ||
349 | inode->i_mode |= S_IFBLK; | ||
350 | xattr_id = le32_to_cpu(sqsh_ino->xattr); | ||
351 | inode->i_op = &squashfs_inode_ops; | ||
352 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); | ||
353 | rdev = le32_to_cpu(sqsh_ino->rdev); | ||
354 | init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); | ||
355 | |||
356 | TRACE("Device inode %x:%x, rdev %x\n", | ||
357 | SQUASHFS_INODE_BLK(ino), offset, rdev); | ||
358 | break; | ||
359 | } | ||
318 | case SQUASHFS_FIFO_TYPE: | 360 | case SQUASHFS_FIFO_TYPE: |
319 | case SQUASHFS_SOCKET_TYPE: | 361 | case SQUASHFS_SOCKET_TYPE: { |
320 | case SQUASHFS_LFIFO_TYPE: | ||
321 | case SQUASHFS_LSOCKET_TYPE: { | ||
322 | struct squashfs_ipc_inode *sqsh_ino = &squashfs_ino.ipc; | 362 | struct squashfs_ipc_inode *sqsh_ino = &squashfs_ino.ipc; |
323 | 363 | ||
324 | err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, | 364 | err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, |
@@ -334,14 +374,52 @@ int squashfs_read_inode(struct inode *inode, long long ino) | |||
334 | init_special_inode(inode, inode->i_mode, 0); | 374 | init_special_inode(inode, inode->i_mode, 0); |
335 | break; | 375 | break; |
336 | } | 376 | } |
377 | case SQUASHFS_LFIFO_TYPE: | ||
378 | case SQUASHFS_LSOCKET_TYPE: { | ||
379 | struct squashfs_lipc_inode *sqsh_ino = &squashfs_ino.lipc; | ||
380 | |||
381 | err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, | ||
382 | sizeof(*sqsh_ino)); | ||
383 | if (err < 0) | ||
384 | goto failed_read; | ||
385 | |||
386 | if (type == SQUASHFS_LFIFO_TYPE) | ||
387 | inode->i_mode |= S_IFIFO; | ||
388 | else | ||
389 | inode->i_mode |= S_IFSOCK; | ||
390 | xattr_id = le32_to_cpu(sqsh_ino->xattr); | ||
391 | inode->i_op = &squashfs_inode_ops; | ||
392 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); | ||
393 | init_special_inode(inode, inode->i_mode, 0); | ||
394 | break; | ||
395 | } | ||
337 | default: | 396 | default: |
338 | ERROR("Unknown inode type %d in squashfs_iget!\n", type); | 397 | ERROR("Unknown inode type %d in squashfs_iget!\n", type); |
339 | return -EINVAL; | 398 | return -EINVAL; |
340 | } | 399 | } |
341 | 400 | ||
401 | if (xattr_id != SQUASHFS_INVALID_XATTR && msblk->xattr_id_table) { | ||
402 | err = squashfs_xattr_lookup(sb, xattr_id, | ||
403 | &squashfs_i(inode)->xattr_count, | ||
404 | &squashfs_i(inode)->xattr_size, | ||
405 | &squashfs_i(inode)->xattr); | ||
406 | if (err < 0) | ||
407 | goto failed_read; | ||
408 | inode->i_blocks += ((squashfs_i(inode)->xattr_size - 1) >> 9) | ||
409 | + 1; | ||
410 | } else | ||
411 | squashfs_i(inode)->xattr_count = 0; | ||
412 | |||
342 | return 0; | 413 | return 0; |
343 | 414 | ||
344 | failed_read: | 415 | failed_read: |
345 | ERROR("Unable to read inode 0x%llx\n", ino); | 416 | ERROR("Unable to read inode 0x%llx\n", ino); |
346 | return err; | 417 | return err; |
347 | } | 418 | } |
419 | |||
420 | |||
421 | const struct inode_operations squashfs_inode_ops = { | ||
422 | .getxattr = generic_getxattr, | ||
423 | .listxattr = squashfs_listxattr | ||
424 | }; | ||
425 | |||
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index 5266bd8ad932..7a9464d08cf6 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c | |||
@@ -57,11 +57,13 @@ | |||
57 | #include <linux/slab.h> | 57 | #include <linux/slab.h> |
58 | #include <linux/string.h> | 58 | #include <linux/string.h> |
59 | #include <linux/dcache.h> | 59 | #include <linux/dcache.h> |
60 | #include <linux/xattr.h> | ||
60 | 61 | ||
61 | #include "squashfs_fs.h" | 62 | #include "squashfs_fs.h" |
62 | #include "squashfs_fs_sb.h" | 63 | #include "squashfs_fs_sb.h" |
63 | #include "squashfs_fs_i.h" | 64 | #include "squashfs_fs_i.h" |
64 | #include "squashfs.h" | 65 | #include "squashfs.h" |
66 | #include "xattr.h" | ||
65 | 67 | ||
66 | /* | 68 | /* |
67 | * Lookup name in the directory index, returning the location of the metadata | 69 | * Lookup name in the directory index, returning the location of the metadata |
@@ -237,5 +239,7 @@ failed: | |||
237 | 239 | ||
238 | 240 | ||
239 | const struct inode_operations squashfs_dir_inode_ops = { | 241 | const struct inode_operations squashfs_dir_inode_ops = { |
240 | .lookup = squashfs_lookup | 242 | .lookup = squashfs_lookup, |
243 | .getxattr = generic_getxattr, | ||
244 | .listxattr = squashfs_listxattr | ||
241 | }; | 245 | }; |
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index fe2587af5512..733a17c42945 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h | |||
@@ -73,8 +73,11 @@ extern struct inode *squashfs_iget(struct super_block *, long long, | |||
73 | unsigned int); | 73 | unsigned int); |
74 | extern int squashfs_read_inode(struct inode *, long long); | 74 | extern int squashfs_read_inode(struct inode *, long long); |
75 | 75 | ||
76 | /* xattr.c */ | ||
77 | extern ssize_t squashfs_listxattr(struct dentry *, char *, size_t); | ||
78 | |||
76 | /* | 79 | /* |
77 | * Inodes, files and decompressor operations | 80 | * Inodes, files, decompressor and xattr operations |
78 | */ | 81 | */ |
79 | 82 | ||
80 | /* dir.c */ | 83 | /* dir.c */ |
@@ -86,11 +89,18 @@ extern const struct export_operations squashfs_export_ops; | |||
86 | /* file.c */ | 89 | /* file.c */ |
87 | extern const struct address_space_operations squashfs_aops; | 90 | extern const struct address_space_operations squashfs_aops; |
88 | 91 | ||
92 | /* inode.c */ | ||
93 | extern const struct inode_operations squashfs_inode_ops; | ||
94 | |||
89 | /* namei.c */ | 95 | /* namei.c */ |
90 | extern const struct inode_operations squashfs_dir_inode_ops; | 96 | extern const struct inode_operations squashfs_dir_inode_ops; |
91 | 97 | ||
92 | /* symlink.c */ | 98 | /* symlink.c */ |
93 | extern const struct address_space_operations squashfs_symlink_aops; | 99 | extern const struct address_space_operations squashfs_symlink_aops; |
100 | extern const struct inode_operations squashfs_symlink_inode_ops; | ||
101 | |||
102 | /* xattr.c */ | ||
103 | extern const struct xattr_handler *squashfs_xattr_handlers[]; | ||
94 | 104 | ||
95 | /* zlib_wrapper.c */ | 105 | /* zlib_wrapper.c */ |
96 | extern const struct squashfs_decompressor squashfs_zlib_comp_ops; | 106 | extern const struct squashfs_decompressor squashfs_zlib_comp_ops; |
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 79024245ea00..8eabb808b78d 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h | |||
@@ -46,6 +46,7 @@ | |||
46 | #define SQUASHFS_NAME_LEN 256 | 46 | #define SQUASHFS_NAME_LEN 256 |
47 | 47 | ||
48 | #define SQUASHFS_INVALID_FRAG (0xffffffffU) | 48 | #define SQUASHFS_INVALID_FRAG (0xffffffffU) |
49 | #define SQUASHFS_INVALID_XATTR (0xffffffffU) | ||
49 | #define SQUASHFS_INVALID_BLK (-1LL) | 50 | #define SQUASHFS_INVALID_BLK (-1LL) |
50 | 51 | ||
51 | /* Filesystem flags */ | 52 | /* Filesystem flags */ |
@@ -96,6 +97,13 @@ | |||
96 | #define SQUASHFS_LFIFO_TYPE 13 | 97 | #define SQUASHFS_LFIFO_TYPE 13 |
97 | #define SQUASHFS_LSOCKET_TYPE 14 | 98 | #define SQUASHFS_LSOCKET_TYPE 14 |
98 | 99 | ||
100 | /* Xattr types */ | ||
101 | #define SQUASHFS_XATTR_USER 0 | ||
102 | #define SQUASHFS_XATTR_TRUSTED 1 | ||
103 | #define SQUASHFS_XATTR_SECURITY 2 | ||
104 | #define SQUASHFS_XATTR_VALUE_OOL 256 | ||
105 | #define SQUASHFS_XATTR_PREFIX_MASK 0xff | ||
106 | |||
99 | /* Flag whether block is compressed or uncompressed, bit is set if block is | 107 | /* Flag whether block is compressed or uncompressed, bit is set if block is |
100 | * uncompressed */ | 108 | * uncompressed */ |
101 | #define SQUASHFS_COMPRESSED_BIT (1 << 15) | 109 | #define SQUASHFS_COMPRESSED_BIT (1 << 15) |
@@ -174,6 +182,24 @@ | |||
174 | 182 | ||
175 | #define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\ | 183 | #define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\ |
176 | sizeof(u64)) | 184 | sizeof(u64)) |
185 | /* xattr id lookup table defines */ | ||
186 | #define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id)) | ||
187 | |||
188 | #define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \ | ||
189 | SQUASHFS_METADATA_SIZE) | ||
190 | |||
191 | #define SQUASHFS_XATTR_BLOCK_OFFSET(A) (SQUASHFS_XATTR_BYTES(A) % \ | ||
192 | SQUASHFS_METADATA_SIZE) | ||
193 | |||
194 | #define SQUASHFS_XATTR_BLOCKS(A) ((SQUASHFS_XATTR_BYTES(A) + \ | ||
195 | SQUASHFS_METADATA_SIZE - 1) / \ | ||
196 | SQUASHFS_METADATA_SIZE) | ||
197 | |||
198 | #define SQUASHFS_XATTR_BLOCK_BYTES(A) (SQUASHFS_XATTR_BLOCKS(A) *\ | ||
199 | sizeof(u64)) | ||
200 | #define SQUASHFS_XATTR_BLK(A) ((unsigned int) ((A) >> 16)) | ||
201 | |||
202 | #define SQUASHFS_XATTR_OFFSET(A) ((unsigned int) ((A) & 0xffff)) | ||
177 | 203 | ||
178 | /* cached data constants for filesystem */ | 204 | /* cached data constants for filesystem */ |
179 | #define SQUASHFS_CACHED_BLKS 8 | 205 | #define SQUASHFS_CACHED_BLKS 8 |
@@ -228,7 +254,7 @@ struct squashfs_super_block { | |||
228 | __le64 root_inode; | 254 | __le64 root_inode; |
229 | __le64 bytes_used; | 255 | __le64 bytes_used; |
230 | __le64 id_table_start; | 256 | __le64 id_table_start; |
231 | __le64 xattr_table_start; | 257 | __le64 xattr_id_table_start; |
232 | __le64 inode_table_start; | 258 | __le64 inode_table_start; |
233 | __le64 directory_table_start; | 259 | __le64 directory_table_start; |
234 | __le64 fragment_table_start; | 260 | __le64 fragment_table_start; |
@@ -261,6 +287,17 @@ struct squashfs_ipc_inode { | |||
261 | __le32 nlink; | 287 | __le32 nlink; |
262 | }; | 288 | }; |
263 | 289 | ||
290 | struct squashfs_lipc_inode { | ||
291 | __le16 inode_type; | ||
292 | __le16 mode; | ||
293 | __le16 uid; | ||
294 | __le16 guid; | ||
295 | __le32 mtime; | ||
296 | __le32 inode_number; | ||
297 | __le32 nlink; | ||
298 | __le32 xattr; | ||
299 | }; | ||
300 | |||
264 | struct squashfs_dev_inode { | 301 | struct squashfs_dev_inode { |
265 | __le16 inode_type; | 302 | __le16 inode_type; |
266 | __le16 mode; | 303 | __le16 mode; |
@@ -272,6 +309,18 @@ struct squashfs_dev_inode { | |||
272 | __le32 rdev; | 309 | __le32 rdev; |
273 | }; | 310 | }; |
274 | 311 | ||
312 | struct squashfs_ldev_inode { | ||
313 | __le16 inode_type; | ||
314 | __le16 mode; | ||
315 | __le16 uid; | ||
316 | __le16 guid; | ||
317 | __le32 mtime; | ||
318 | __le32 inode_number; | ||
319 | __le32 nlink; | ||
320 | __le32 rdev; | ||
321 | __le32 xattr; | ||
322 | }; | ||
323 | |||
275 | struct squashfs_symlink_inode { | 324 | struct squashfs_symlink_inode { |
276 | __le16 inode_type; | 325 | __le16 inode_type; |
277 | __le16 mode; | 326 | __le16 mode; |
@@ -349,12 +398,14 @@ struct squashfs_ldir_inode { | |||
349 | union squashfs_inode { | 398 | union squashfs_inode { |
350 | struct squashfs_base_inode base; | 399 | struct squashfs_base_inode base; |
351 | struct squashfs_dev_inode dev; | 400 | struct squashfs_dev_inode dev; |
401 | struct squashfs_ldev_inode ldev; | ||
352 | struct squashfs_symlink_inode symlink; | 402 | struct squashfs_symlink_inode symlink; |
353 | struct squashfs_reg_inode reg; | 403 | struct squashfs_reg_inode reg; |
354 | struct squashfs_lreg_inode lreg; | 404 | struct squashfs_lreg_inode lreg; |
355 | struct squashfs_dir_inode dir; | 405 | struct squashfs_dir_inode dir; |
356 | struct squashfs_ldir_inode ldir; | 406 | struct squashfs_ldir_inode ldir; |
357 | struct squashfs_ipc_inode ipc; | 407 | struct squashfs_ipc_inode ipc; |
408 | struct squashfs_lipc_inode lipc; | ||
358 | }; | 409 | }; |
359 | 410 | ||
360 | struct squashfs_dir_entry { | 411 | struct squashfs_dir_entry { |
@@ -377,4 +428,27 @@ struct squashfs_fragment_entry { | |||
377 | unsigned int unused; | 428 | unsigned int unused; |
378 | }; | 429 | }; |
379 | 430 | ||
431 | struct squashfs_xattr_entry { | ||
432 | __le16 type; | ||
433 | __le16 size; | ||
434 | char data[0]; | ||
435 | }; | ||
436 | |||
437 | struct squashfs_xattr_val { | ||
438 | __le32 vsize; | ||
439 | char value[0]; | ||
440 | }; | ||
441 | |||
442 | struct squashfs_xattr_id { | ||
443 | __le64 xattr; | ||
444 | __le32 count; | ||
445 | __le32 size; | ||
446 | }; | ||
447 | |||
448 | struct squashfs_xattr_id_table { | ||
449 | __le64 xattr_table_start; | ||
450 | __le32 xattr_ids; | ||
451 | __le32 unused; | ||
452 | }; | ||
453 | |||
380 | #endif | 454 | #endif |
diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h index fbfca30c0c68..d3e3a37f28a1 100644 --- a/fs/squashfs/squashfs_fs_i.h +++ b/fs/squashfs/squashfs_fs_i.h | |||
@@ -26,6 +26,9 @@ | |||
26 | struct squashfs_inode_info { | 26 | struct squashfs_inode_info { |
27 | u64 start; | 27 | u64 start; |
28 | int offset; | 28 | int offset; |
29 | u64 xattr; | ||
30 | unsigned int xattr_size; | ||
31 | int xattr_count; | ||
29 | union { | 32 | union { |
30 | struct { | 33 | struct { |
31 | u64 fragment_block; | 34 | u64 fragment_block; |
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 2e77dc547e25..d9037a5215f0 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h | |||
@@ -61,6 +61,7 @@ struct squashfs_sb_info { | |||
61 | int next_meta_index; | 61 | int next_meta_index; |
62 | __le64 *id_table; | 62 | __le64 *id_table; |
63 | __le64 *fragment_index; | 63 | __le64 *fragment_index; |
64 | __le64 *xattr_id_table; | ||
64 | struct mutex read_data_mutex; | 65 | struct mutex read_data_mutex; |
65 | struct mutex meta_index_mutex; | 66 | struct mutex meta_index_mutex; |
66 | struct meta_index *meta_index; | 67 | struct meta_index *meta_index; |
@@ -68,9 +69,11 @@ struct squashfs_sb_info { | |||
68 | __le64 *inode_lookup_table; | 69 | __le64 *inode_lookup_table; |
69 | u64 inode_table; | 70 | u64 inode_table; |
70 | u64 directory_table; | 71 | u64 directory_table; |
72 | u64 xattr_table; | ||
71 | unsigned int block_size; | 73 | unsigned int block_size; |
72 | unsigned short block_log; | 74 | unsigned short block_log; |
73 | long long bytes_used; | 75 | long long bytes_used; |
74 | unsigned int inodes; | 76 | unsigned int inodes; |
77 | int xattr_ids; | ||
75 | }; | 78 | }; |
76 | #endif | 79 | #endif |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 48b6f4a385a6..88b4f8606652 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -36,12 +36,14 @@ | |||
36 | #include <linux/init.h> | 36 | #include <linux/init.h> |
37 | #include <linux/module.h> | 37 | #include <linux/module.h> |
38 | #include <linux/magic.h> | 38 | #include <linux/magic.h> |
39 | #include <linux/xattr.h> | ||
39 | 40 | ||
40 | #include "squashfs_fs.h" | 41 | #include "squashfs_fs.h" |
41 | #include "squashfs_fs_sb.h" | 42 | #include "squashfs_fs_sb.h" |
42 | #include "squashfs_fs_i.h" | 43 | #include "squashfs_fs_i.h" |
43 | #include "squashfs.h" | 44 | #include "squashfs.h" |
44 | #include "decompressor.h" | 45 | #include "decompressor.h" |
46 | #include "xattr.h" | ||
45 | 47 | ||
46 | static struct file_system_type squashfs_fs_type; | 48 | static struct file_system_type squashfs_fs_type; |
47 | static const struct super_operations squashfs_super_ops; | 49 | static const struct super_operations squashfs_super_ops; |
@@ -82,7 +84,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) | |||
82 | long long root_inode; | 84 | long long root_inode; |
83 | unsigned short flags; | 85 | unsigned short flags; |
84 | unsigned int fragments; | 86 | unsigned int fragments; |
85 | u64 lookup_table_start; | 87 | u64 lookup_table_start, xattr_id_table_start; |
86 | int err; | 88 | int err; |
87 | 89 | ||
88 | TRACE("Entered squashfs_fill_superblock\n"); | 90 | TRACE("Entered squashfs_fill_superblock\n"); |
@@ -139,13 +141,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) | |||
139 | if (msblk->decompressor == NULL) | 141 | if (msblk->decompressor == NULL) |
140 | goto failed_mount; | 142 | goto failed_mount; |
141 | 143 | ||
142 | /* | ||
143 | * Check if there's xattrs in the filesystem. These are not | ||
144 | * supported in this version, so warn that they will be ignored. | ||
145 | */ | ||
146 | if (le64_to_cpu(sblk->xattr_table_start) != SQUASHFS_INVALID_BLK) | ||
147 | ERROR("Xattrs in filesystem, these will be ignored\n"); | ||
148 | |||
149 | /* Check the filesystem does not extend beyond the end of the | 144 | /* Check the filesystem does not extend beyond the end of the |
150 | block device */ | 145 | block device */ |
151 | msblk->bytes_used = le64_to_cpu(sblk->bytes_used); | 146 | msblk->bytes_used = le64_to_cpu(sblk->bytes_used); |
@@ -253,7 +248,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) | |||
253 | allocate_lookup_table: | 248 | allocate_lookup_table: |
254 | lookup_table_start = le64_to_cpu(sblk->lookup_table_start); | 249 | lookup_table_start = le64_to_cpu(sblk->lookup_table_start); |
255 | if (lookup_table_start == SQUASHFS_INVALID_BLK) | 250 | if (lookup_table_start == SQUASHFS_INVALID_BLK) |
256 | goto allocate_root; | 251 | goto allocate_xattr_table; |
257 | 252 | ||
258 | /* Allocate and read inode lookup table */ | 253 | /* Allocate and read inode lookup table */ |
259 | msblk->inode_lookup_table = squashfs_read_inode_lookup_table(sb, | 254 | msblk->inode_lookup_table = squashfs_read_inode_lookup_table(sb, |
@@ -266,6 +261,21 @@ allocate_lookup_table: | |||
266 | 261 | ||
267 | sb->s_export_op = &squashfs_export_ops; | 262 | sb->s_export_op = &squashfs_export_ops; |
268 | 263 | ||
264 | allocate_xattr_table: | ||
265 | sb->s_xattr = squashfs_xattr_handlers; | ||
266 | xattr_id_table_start = le64_to_cpu(sblk->xattr_id_table_start); | ||
267 | if (xattr_id_table_start == SQUASHFS_INVALID_BLK) | ||
268 | goto allocate_root; | ||
269 | |||
270 | /* Allocate and read xattr id lookup table */ | ||
271 | msblk->xattr_id_table = squashfs_read_xattr_id_table(sb, | ||
272 | xattr_id_table_start, &msblk->xattr_table, &msblk->xattr_ids); | ||
273 | if (IS_ERR(msblk->xattr_id_table)) { | ||
274 | err = PTR_ERR(msblk->xattr_id_table); | ||
275 | msblk->xattr_id_table = NULL; | ||
276 | if (err != -ENOTSUPP) | ||
277 | goto failed_mount; | ||
278 | } | ||
269 | allocate_root: | 279 | allocate_root: |
270 | root = new_inode(sb); | 280 | root = new_inode(sb); |
271 | if (!root) { | 281 | if (!root) { |
@@ -301,6 +311,7 @@ failed_mount: | |||
301 | kfree(msblk->inode_lookup_table); | 311 | kfree(msblk->inode_lookup_table); |
302 | kfree(msblk->fragment_index); | 312 | kfree(msblk->fragment_index); |
303 | kfree(msblk->id_table); | 313 | kfree(msblk->id_table); |
314 | kfree(msblk->xattr_id_table); | ||
304 | kfree(sb->s_fs_info); | 315 | kfree(sb->s_fs_info); |
305 | sb->s_fs_info = NULL; | 316 | sb->s_fs_info = NULL; |
306 | kfree(sblk); | 317 | kfree(sblk); |
@@ -355,6 +366,7 @@ static void squashfs_put_super(struct super_block *sb) | |||
355 | kfree(sbi->fragment_index); | 366 | kfree(sbi->fragment_index); |
356 | kfree(sbi->meta_index); | 367 | kfree(sbi->meta_index); |
357 | kfree(sbi->inode_lookup_table); | 368 | kfree(sbi->inode_lookup_table); |
369 | kfree(sbi->xattr_id_table); | ||
358 | kfree(sb->s_fs_info); | 370 | kfree(sb->s_fs_info); |
359 | sb->s_fs_info = NULL; | 371 | sb->s_fs_info = NULL; |
360 | } | 372 | } |
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c index 32b911f4ee39..ec86434921e1 100644 --- a/fs/squashfs/symlink.c +++ b/fs/squashfs/symlink.c | |||
@@ -35,11 +35,13 @@ | |||
35 | #include <linux/kernel.h> | 35 | #include <linux/kernel.h> |
36 | #include <linux/string.h> | 36 | #include <linux/string.h> |
37 | #include <linux/pagemap.h> | 37 | #include <linux/pagemap.h> |
38 | #include <linux/xattr.h> | ||
38 | 39 | ||
39 | #include "squashfs_fs.h" | 40 | #include "squashfs_fs.h" |
40 | #include "squashfs_fs_sb.h" | 41 | #include "squashfs_fs_sb.h" |
41 | #include "squashfs_fs_i.h" | 42 | #include "squashfs_fs_i.h" |
42 | #include "squashfs.h" | 43 | #include "squashfs.h" |
44 | #include "xattr.h" | ||
43 | 45 | ||
44 | static int squashfs_symlink_readpage(struct file *file, struct page *page) | 46 | static int squashfs_symlink_readpage(struct file *file, struct page *page) |
45 | { | 47 | { |
@@ -114,3 +116,12 @@ error_out: | |||
114 | const struct address_space_operations squashfs_symlink_aops = { | 116 | const struct address_space_operations squashfs_symlink_aops = { |
115 | .readpage = squashfs_symlink_readpage | 117 | .readpage = squashfs_symlink_readpage |
116 | }; | 118 | }; |
119 | |||
120 | const struct inode_operations squashfs_symlink_inode_ops = { | ||
121 | .readlink = generic_readlink, | ||
122 | .follow_link = page_follow_link_light, | ||
123 | .put_link = page_put_link, | ||
124 | .getxattr = generic_getxattr, | ||
125 | .listxattr = squashfs_listxattr | ||
126 | }; | ||
127 | |||
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c new file mode 100644 index 000000000000..c7655e8b31cd --- /dev/null +++ b/fs/squashfs/xattr.c | |||
@@ -0,0 +1,323 @@ | |||
1 | /* | ||
2 | * Squashfs - a compressed read only filesystem for Linux | ||
3 | * | ||
4 | * Copyright (c) 2010 | ||
5 | * Phillip Lougher <phillip@lougher.demon.co.uk> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version 2, | ||
10 | * or (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
20 | * | ||
21 | * xattr_id.c | ||
22 | */ | ||
23 | |||
24 | #include <linux/init.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/string.h> | ||
27 | #include <linux/fs.h> | ||
28 | #include <linux/vfs.h> | ||
29 | #include <linux/xattr.h> | ||
30 | #include <linux/slab.h> | ||
31 | |||
32 | #include "squashfs_fs.h" | ||
33 | #include "squashfs_fs_sb.h" | ||
34 | #include "squashfs_fs_i.h" | ||
35 | #include "squashfs.h" | ||
36 | |||
37 | static const struct xattr_handler *squashfs_xattr_handler(int); | ||
38 | |||
39 | ssize_t squashfs_listxattr(struct dentry *d, char *buffer, | ||
40 | size_t buffer_size) | ||
41 | { | ||
42 | struct inode *inode = d->d_inode; | ||
43 | struct super_block *sb = inode->i_sb; | ||
44 | struct squashfs_sb_info *msblk = sb->s_fs_info; | ||
45 | u64 start = SQUASHFS_XATTR_BLK(squashfs_i(inode)->xattr) | ||
46 | + msblk->xattr_table; | ||
47 | int offset = SQUASHFS_XATTR_OFFSET(squashfs_i(inode)->xattr); | ||
48 | int count = squashfs_i(inode)->xattr_count; | ||
49 | size_t rest = buffer_size; | ||
50 | int err; | ||
51 | |||
52 | /* check that the file system has xattrs */ | ||
53 | if (msblk->xattr_id_table == NULL) | ||
54 | return -EOPNOTSUPP; | ||
55 | |||
56 | /* loop reading each xattr name */ | ||
57 | while (count--) { | ||
58 | struct squashfs_xattr_entry entry; | ||
59 | struct squashfs_xattr_val val; | ||
60 | const struct xattr_handler *handler; | ||
61 | int name_size, prefix_size = 0; | ||
62 | |||
63 | err = squashfs_read_metadata(sb, &entry, &start, &offset, | ||
64 | sizeof(entry)); | ||
65 | if (err < 0) | ||
66 | goto failed; | ||
67 | |||
68 | name_size = le16_to_cpu(entry.size); | ||
69 | handler = squashfs_xattr_handler(le16_to_cpu(entry.type)); | ||
70 | if (handler) | ||
71 | prefix_size = handler->list(d, buffer, rest, NULL, | ||
72 | name_size, handler->flags); | ||
73 | if (prefix_size) { | ||
74 | if (buffer) { | ||
75 | if (prefix_size + name_size + 1 > rest) { | ||
76 | err = -ERANGE; | ||
77 | goto failed; | ||
78 | } | ||
79 | buffer += prefix_size; | ||
80 | } | ||
81 | err = squashfs_read_metadata(sb, buffer, &start, | ||
82 | &offset, name_size); | ||
83 | if (err < 0) | ||
84 | goto failed; | ||
85 | if (buffer) { | ||
86 | buffer[name_size] = '\0'; | ||
87 | buffer += name_size + 1; | ||
88 | } | ||
89 | rest -= prefix_size + name_size + 1; | ||
90 | } else { | ||
91 | /* no handler or insuffficient privileges, so skip */ | ||
92 | err = squashfs_read_metadata(sb, NULL, &start, | ||
93 | &offset, name_size); | ||
94 | if (err < 0) | ||
95 | goto failed; | ||
96 | } | ||
97 | |||
98 | |||
99 | /* skip remaining xattr entry */ | ||
100 | err = squashfs_read_metadata(sb, &val, &start, &offset, | ||
101 | sizeof(val)); | ||
102 | if (err < 0) | ||
103 | goto failed; | ||
104 | |||
105 | err = squashfs_read_metadata(sb, NULL, &start, &offset, | ||
106 | le32_to_cpu(val.vsize)); | ||
107 | if (err < 0) | ||
108 | goto failed; | ||
109 | } | ||
110 | err = buffer_size - rest; | ||
111 | |||
112 | failed: | ||
113 | return err; | ||
114 | } | ||
115 | |||
116 | |||
117 | static int squashfs_xattr_get(struct inode *inode, int name_index, | ||
118 | const char *name, void *buffer, size_t buffer_size) | ||
119 | { | ||
120 | struct super_block *sb = inode->i_sb; | ||
121 | struct squashfs_sb_info *msblk = sb->s_fs_info; | ||
122 | u64 start = SQUASHFS_XATTR_BLK(squashfs_i(inode)->xattr) | ||
123 | + msblk->xattr_table; | ||
124 | int offset = SQUASHFS_XATTR_OFFSET(squashfs_i(inode)->xattr); | ||
125 | int count = squashfs_i(inode)->xattr_count; | ||
126 | int name_len = strlen(name); | ||
127 | int err, vsize; | ||
128 | char *target = kmalloc(name_len, GFP_KERNEL); | ||
129 | |||
130 | if (target == NULL) | ||
131 | return -ENOMEM; | ||
132 | |||
133 | /* loop reading each xattr name */ | ||
134 | for (; count; count--) { | ||
135 | struct squashfs_xattr_entry entry; | ||
136 | struct squashfs_xattr_val val; | ||
137 | int type, prefix, name_size; | ||
138 | |||
139 | err = squashfs_read_metadata(sb, &entry, &start, &offset, | ||
140 | sizeof(entry)); | ||
141 | if (err < 0) | ||
142 | goto failed; | ||
143 | |||
144 | name_size = le16_to_cpu(entry.size); | ||
145 | type = le16_to_cpu(entry.type); | ||
146 | prefix = type & SQUASHFS_XATTR_PREFIX_MASK; | ||
147 | |||
148 | if (prefix == name_index && name_size == name_len) | ||
149 | err = squashfs_read_metadata(sb, target, &start, | ||
150 | &offset, name_size); | ||
151 | else | ||
152 | err = squashfs_read_metadata(sb, NULL, &start, | ||
153 | &offset, name_size); | ||
154 | if (err < 0) | ||
155 | goto failed; | ||
156 | |||
157 | if (prefix == name_index && name_size == name_len && | ||
158 | strncmp(target, name, name_size) == 0) { | ||
159 | /* found xattr */ | ||
160 | if (type & SQUASHFS_XATTR_VALUE_OOL) { | ||
161 | __le64 xattr; | ||
162 | /* val is a reference to the real location */ | ||
163 | err = squashfs_read_metadata(sb, &val, &start, | ||
164 | &offset, sizeof(val)); | ||
165 | if (err < 0) | ||
166 | goto failed; | ||
167 | err = squashfs_read_metadata(sb, &xattr, &start, | ||
168 | &offset, sizeof(xattr)); | ||
169 | if (err < 0) | ||
170 | goto failed; | ||
171 | xattr = le64_to_cpu(xattr); | ||
172 | start = SQUASHFS_XATTR_BLK(xattr) + | ||
173 | msblk->xattr_table; | ||
174 | offset = SQUASHFS_XATTR_OFFSET(xattr); | ||
175 | } | ||
176 | /* read xattr value */ | ||
177 | err = squashfs_read_metadata(sb, &val, &start, &offset, | ||
178 | sizeof(val)); | ||
179 | if (err < 0) | ||
180 | goto failed; | ||
181 | |||
182 | vsize = le32_to_cpu(val.vsize); | ||
183 | if (buffer) { | ||
184 | if (vsize > buffer_size) { | ||
185 | err = -ERANGE; | ||
186 | goto failed; | ||
187 | } | ||
188 | err = squashfs_read_metadata(sb, buffer, &start, | ||
189 | &offset, vsize); | ||
190 | if (err < 0) | ||
191 | goto failed; | ||
192 | } | ||
193 | break; | ||
194 | } | ||
195 | |||
196 | /* no match, skip remaining xattr entry */ | ||
197 | err = squashfs_read_metadata(sb, &val, &start, &offset, | ||
198 | sizeof(val)); | ||
199 | if (err < 0) | ||
200 | goto failed; | ||
201 | err = squashfs_read_metadata(sb, NULL, &start, &offset, | ||
202 | le32_to_cpu(val.vsize)); | ||
203 | if (err < 0) | ||
204 | goto failed; | ||
205 | } | ||
206 | err = count ? vsize : -ENODATA; | ||
207 | |||
208 | failed: | ||
209 | kfree(target); | ||
210 | return err; | ||
211 | } | ||
212 | |||
213 | |||
214 | /* | ||
215 | * User namespace support | ||
216 | */ | ||
217 | static size_t squashfs_user_list(struct dentry *d, char *list, size_t list_size, | ||
218 | const char *name, size_t name_len, int type) | ||
219 | { | ||
220 | if (list && XATTR_USER_PREFIX_LEN <= list_size) | ||
221 | memcpy(list, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); | ||
222 | return XATTR_USER_PREFIX_LEN; | ||
223 | } | ||
224 | |||
225 | static int squashfs_user_get(struct dentry *d, const char *name, void *buffer, | ||
226 | size_t size, int type) | ||
227 | { | ||
228 | if (name[0] == '\0') | ||
229 | return -EINVAL; | ||
230 | |||
231 | return squashfs_xattr_get(d->d_inode, SQUASHFS_XATTR_USER, name, | ||
232 | buffer, size); | ||
233 | } | ||
234 | |||
235 | static const struct xattr_handler squashfs_xattr_user_handler = { | ||
236 | .prefix = XATTR_USER_PREFIX, | ||
237 | .list = squashfs_user_list, | ||
238 | .get = squashfs_user_get | ||
239 | }; | ||
240 | |||
241 | /* | ||
242 | * Trusted namespace support | ||
243 | */ | ||
244 | static size_t squashfs_trusted_list(struct dentry *d, char *list, | ||
245 | size_t list_size, const char *name, size_t name_len, int type) | ||
246 | { | ||
247 | if (!capable(CAP_SYS_ADMIN)) | ||
248 | return 0; | ||
249 | |||
250 | if (list && XATTR_TRUSTED_PREFIX_LEN <= list_size) | ||
251 | memcpy(list, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); | ||
252 | return XATTR_TRUSTED_PREFIX_LEN; | ||
253 | } | ||
254 | |||
255 | static int squashfs_trusted_get(struct dentry *d, const char *name, | ||
256 | void *buffer, size_t size, int type) | ||
257 | { | ||
258 | if (name[0] == '\0') | ||
259 | return -EINVAL; | ||
260 | |||
261 | return squashfs_xattr_get(d->d_inode, SQUASHFS_XATTR_TRUSTED, name, | ||
262 | buffer, size); | ||
263 | } | ||
264 | |||
265 | static const struct xattr_handler squashfs_xattr_trusted_handler = { | ||
266 | .prefix = XATTR_TRUSTED_PREFIX, | ||
267 | .list = squashfs_trusted_list, | ||
268 | .get = squashfs_trusted_get | ||
269 | }; | ||
270 | |||
271 | /* | ||
272 | * Security namespace support | ||
273 | */ | ||
274 | static size_t squashfs_security_list(struct dentry *d, char *list, | ||
275 | size_t list_size, const char *name, size_t name_len, int type) | ||
276 | { | ||
277 | if (list && XATTR_SECURITY_PREFIX_LEN <= list_size) | ||
278 | memcpy(list, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); | ||
279 | return XATTR_SECURITY_PREFIX_LEN; | ||
280 | } | ||
281 | |||
282 | static int squashfs_security_get(struct dentry *d, const char *name, | ||
283 | void *buffer, size_t size, int type) | ||
284 | { | ||
285 | if (name[0] == '\0') | ||
286 | return -EINVAL; | ||
287 | |||
288 | return squashfs_xattr_get(d->d_inode, SQUASHFS_XATTR_SECURITY, name, | ||
289 | buffer, size); | ||
290 | } | ||
291 | |||
292 | static const struct xattr_handler squashfs_xattr_security_handler = { | ||
293 | .prefix = XATTR_SECURITY_PREFIX, | ||
294 | .list = squashfs_security_list, | ||
295 | .get = squashfs_security_get | ||
296 | }; | ||
297 | |||
298 | static inline const struct xattr_handler *squashfs_xattr_handler(int type) | ||
299 | { | ||
300 | if (type & ~(SQUASHFS_XATTR_PREFIX_MASK | SQUASHFS_XATTR_VALUE_OOL)) | ||
301 | /* ignore unrecognised type */ | ||
302 | return NULL; | ||
303 | |||
304 | switch (type & SQUASHFS_XATTR_PREFIX_MASK) { | ||
305 | case SQUASHFS_XATTR_USER: | ||
306 | return &squashfs_xattr_user_handler; | ||
307 | case SQUASHFS_XATTR_TRUSTED: | ||
308 | return &squashfs_xattr_trusted_handler; | ||
309 | case SQUASHFS_XATTR_SECURITY: | ||
310 | return &squashfs_xattr_security_handler; | ||
311 | default: | ||
312 | /* ignore unrecognised type */ | ||
313 | return NULL; | ||
314 | } | ||
315 | } | ||
316 | |||
317 | const struct xattr_handler *squashfs_xattr_handlers[] = { | ||
318 | &squashfs_xattr_user_handler, | ||
319 | &squashfs_xattr_trusted_handler, | ||
320 | &squashfs_xattr_security_handler, | ||
321 | NULL | ||
322 | }; | ||
323 | |||
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h new file mode 100644 index 000000000000..9da071ae181c --- /dev/null +++ b/fs/squashfs/xattr.h | |||
@@ -0,0 +1,46 @@ | |||
1 | /* | ||
2 | * Squashfs - a compressed read only filesystem for Linux | ||
3 | * | ||
4 | * Copyright (c) 2010 | ||
5 | * Phillip Lougher <phillip@lougher.demon.co.uk> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version 2, | ||
10 | * or (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
20 | * | ||
21 | * xattr.h | ||
22 | */ | ||
23 | |||
24 | #ifdef CONFIG_SQUASHFS_XATTRS | ||
25 | extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, | ||
26 | u64 *, int *); | ||
27 | extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, | ||
28 | int *, unsigned long long *); | ||
29 | #else | ||
30 | static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb, | ||
31 | u64 start, u64 *xattr_table_start, int *xattr_ids) | ||
32 | { | ||
33 | ERROR("Xattrs in filesystem, these will be ignored\n"); | ||
34 | return ERR_PTR(-ENOTSUPP); | ||
35 | } | ||
36 | |||
37 | static inline int squashfs_xattr_lookup(struct super_block *sb, | ||
38 | unsigned int index, int *count, int *size, | ||
39 | unsigned long long *xattr) | ||
40 | { | ||
41 | return 0; | ||
42 | } | ||
43 | #define squashfs_listxattr NULL | ||
44 | #define generic_getxattr NULL | ||
45 | #define squashfs_xattr_handlers NULL | ||
46 | #endif | ||
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c new file mode 100644 index 000000000000..cfb41106098f --- /dev/null +++ b/fs/squashfs/xattr_id.c | |||
@@ -0,0 +1,100 @@ | |||
1 | /* | ||
2 | * Squashfs - a compressed read only filesystem for Linux | ||
3 | * | ||
4 | * Copyright (c) 2010 | ||
5 | * Phillip Lougher <phillip@lougher.demon.co.uk> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version 2, | ||
10 | * or (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
20 | * | ||
21 | * xattr_id.c | ||
22 | */ | ||
23 | |||
24 | /* | ||
25 | * This file implements code to map the 32-bit xattr id stored in the inode | ||
26 | * into the on disk location of the xattr data. | ||
27 | */ | ||
28 | |||
29 | #include <linux/fs.h> | ||
30 | #include <linux/vfs.h> | ||
31 | #include <linux/slab.h> | ||
32 | |||
33 | #include "squashfs_fs.h" | ||
34 | #include "squashfs_fs_sb.h" | ||
35 | #include "squashfs_fs_i.h" | ||
36 | #include "squashfs.h" | ||
37 | |||
38 | /* | ||
39 | * Map xattr id using the xattr id look up table | ||
40 | */ | ||
41 | int squashfs_xattr_lookup(struct super_block *sb, unsigned int index, | ||
42 | int *count, unsigned int *size, unsigned long long *xattr) | ||
43 | { | ||
44 | struct squashfs_sb_info *msblk = sb->s_fs_info; | ||
45 | int block = SQUASHFS_XATTR_BLOCK(index); | ||
46 | int offset = SQUASHFS_XATTR_BLOCK_OFFSET(index); | ||
47 | u64 start_block = le64_to_cpu(msblk->xattr_id_table[block]); | ||
48 | struct squashfs_xattr_id id; | ||
49 | int err; | ||
50 | |||
51 | err = squashfs_read_metadata(sb, &id, &start_block, &offset, | ||
52 | sizeof(id)); | ||
53 | if (err < 0) | ||
54 | return err; | ||
55 | |||
56 | *xattr = le64_to_cpu(id.xattr); | ||
57 | *size = le32_to_cpu(id.size); | ||
58 | *count = le32_to_cpu(id.count); | ||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | |||
63 | /* | ||
64 | * Read uncompressed xattr id lookup table indexes from disk into memory | ||
65 | */ | ||
66 | __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start, | ||
67 | u64 *xattr_table_start, int *xattr_ids) | ||
68 | { | ||
69 | unsigned int len; | ||
70 | __le64 *xid_table; | ||
71 | struct squashfs_xattr_id_table id_table; | ||
72 | int err; | ||
73 | |||
74 | err = squashfs_read_table(sb, &id_table, start, sizeof(id_table)); | ||
75 | if (err < 0) { | ||
76 | ERROR("unable to read xattr id table\n"); | ||
77 | return ERR_PTR(err); | ||
78 | } | ||
79 | *xattr_table_start = le64_to_cpu(id_table.xattr_table_start); | ||
80 | *xattr_ids = le32_to_cpu(id_table.xattr_ids); | ||
81 | len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); | ||
82 | |||
83 | TRACE("In read_xattr_index_table, length %d\n", len); | ||
84 | |||
85 | /* Allocate xattr id lookup table indexes */ | ||
86 | xid_table = kmalloc(len, GFP_KERNEL); | ||
87 | if (xid_table == NULL) { | ||
88 | ERROR("Failed to allocate xattr id index table\n"); | ||
89 | return ERR_PTR(-ENOMEM); | ||
90 | } | ||
91 | |||
92 | err = squashfs_read_table(sb, xid_table, start + sizeof(id_table), len); | ||
93 | if (err < 0) { | ||
94 | ERROR("unable to read xattr id index table\n"); | ||
95 | kfree(xid_table); | ||
96 | return ERR_PTR(err); | ||
97 | } | ||
98 | |||
99 | return xid_table; | ||
100 | } | ||
diff --git a/fs/super.c b/fs/super.c index 69688b15f1fa..5c35bc7a499e 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/acct.h> | 25 | #include <linux/acct.h> |
26 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
27 | #include <linux/quotaops.h> | ||
28 | #include <linux/mount.h> | 27 | #include <linux/mount.h> |
29 | #include <linux/security.h> | 28 | #include <linux/security.h> |
30 | #include <linux/writeback.h> /* for the emergency remount stuff */ | 29 | #include <linux/writeback.h> /* for the emergency remount stuff */ |
@@ -94,8 +93,6 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
94 | init_rwsem(&s->s_dquot.dqptr_sem); | 93 | init_rwsem(&s->s_dquot.dqptr_sem); |
95 | init_waitqueue_head(&s->s_wait_unfrozen); | 94 | init_waitqueue_head(&s->s_wait_unfrozen); |
96 | s->s_maxbytes = MAX_NON_LFS; | 95 | s->s_maxbytes = MAX_NON_LFS; |
97 | s->dq_op = sb_dquot_ops; | ||
98 | s->s_qcop = sb_quotactl_ops; | ||
99 | s->s_op = &default_op; | 96 | s->s_op = &default_op; |
100 | s->s_time_gran = 1000000000; | 97 | s->s_time_gran = 1000000000; |
101 | } | 98 | } |
@@ -160,7 +157,6 @@ void deactivate_locked_super(struct super_block *s) | |||
160 | { | 157 | { |
161 | struct file_system_type *fs = s->s_type; | 158 | struct file_system_type *fs = s->s_type; |
162 | if (atomic_dec_and_test(&s->s_active)) { | 159 | if (atomic_dec_and_test(&s->s_active)) { |
163 | vfs_dq_off(s, 0); | ||
164 | fs->kill_sb(s); | 160 | fs->kill_sb(s); |
165 | put_filesystem(fs); | 161 | put_filesystem(fs); |
166 | put_super(s); | 162 | put_super(s); |
@@ -524,7 +520,7 @@ rescan: | |||
524 | int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | 520 | int do_remount_sb(struct super_block *sb, int flags, void *data, int force) |
525 | { | 521 | { |
526 | int retval; | 522 | int retval; |
527 | int remount_rw, remount_ro; | 523 | int remount_ro; |
528 | 524 | ||
529 | if (sb->s_frozen != SB_UNFROZEN) | 525 | if (sb->s_frozen != SB_UNFROZEN) |
530 | return -EBUSY; | 526 | return -EBUSY; |
@@ -540,7 +536,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
540 | sync_filesystem(sb); | 536 | sync_filesystem(sb); |
541 | 537 | ||
542 | remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); | 538 | remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); |
543 | remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); | ||
544 | 539 | ||
545 | /* If we are remounting RDONLY and current sb is read/write, | 540 | /* If we are remounting RDONLY and current sb is read/write, |
546 | make sure there are no rw files opened */ | 541 | make sure there are no rw files opened */ |
@@ -549,9 +544,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
549 | mark_files_ro(sb); | 544 | mark_files_ro(sb); |
550 | else if (!fs_may_remount_ro(sb)) | 545 | else if (!fs_may_remount_ro(sb)) |
551 | return -EBUSY; | 546 | return -EBUSY; |
552 | retval = vfs_dq_off(sb, 1); | ||
553 | if (retval < 0 && retval != -ENOSYS) | ||
554 | return -EBUSY; | ||
555 | } | 547 | } |
556 | 548 | ||
557 | if (sb->s_op->remount_fs) { | 549 | if (sb->s_op->remount_fs) { |
@@ -560,8 +552,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
560 | return retval; | 552 | return retval; |
561 | } | 553 | } |
562 | sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); | 554 | sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); |
563 | if (remount_rw) | 555 | |
564 | vfs_dq_quota_on_remount(sb); | ||
565 | /* | 556 | /* |
566 | * Some filesystems modify their metadata via some other path than the | 557 | * Some filesystems modify their metadata via some other path than the |
567 | * bdev buffer cache (eg. use a private mapping, or directories in | 558 | * bdev buffer cache (eg. use a private mapping, or directories in |
@@ -946,8 +937,8 @@ out: | |||
946 | EXPORT_SYMBOL_GPL(vfs_kern_mount); | 937 | EXPORT_SYMBOL_GPL(vfs_kern_mount); |
947 | 938 | ||
948 | /** | 939 | /** |
949 | * freeze_super -- lock the filesystem and force it into a consistent state | 940 | * freeze_super - lock the filesystem and force it into a consistent state |
950 | * @super: the super to lock | 941 | * @sb: the super to lock |
951 | * | 942 | * |
952 | * Syncs the super to make sure the filesystem is consistent and calls the fs's | 943 | * Syncs the super to make sure the filesystem is consistent and calls the fs's |
953 | * freeze_fs. Subsequent calls to this without first thawing the fs will return | 944 | * freeze_fs. Subsequent calls to this without first thawing the fs will return |
@@ -130,12 +130,10 @@ void emergency_sync(void) | |||
130 | 130 | ||
131 | /* | 131 | /* |
132 | * Generic function to fsync a file. | 132 | * Generic function to fsync a file. |
133 | * | ||
134 | * filp may be NULL if called via the msync of a vma. | ||
135 | */ | 133 | */ |
136 | int file_fsync(struct file *filp, struct dentry *dentry, int datasync) | 134 | int file_fsync(struct file *filp, int datasync) |
137 | { | 135 | { |
138 | struct inode * inode = dentry->d_inode; | 136 | struct inode *inode = filp->f_mapping->host; |
139 | struct super_block * sb; | 137 | struct super_block * sb; |
140 | int ret, err; | 138 | int ret, err; |
141 | 139 | ||
@@ -183,7 +181,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) | |||
183 | * livelocks in fsync_buffers_list(). | 181 | * livelocks in fsync_buffers_list(). |
184 | */ | 182 | */ |
185 | mutex_lock(&mapping->host->i_mutex); | 183 | mutex_lock(&mapping->host->i_mutex); |
186 | err = file->f_op->fsync(file, file->f_path.dentry, datasync); | 184 | err = file->f_op->fsync(file, datasync); |
187 | if (!ret) | 185 | if (!ret) |
188 | ret = err; | 186 | ret = err; |
189 | mutex_unlock(&mapping->host->i_mutex); | 187 | mutex_unlock(&mapping->host->i_mutex); |
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index bbd77e95cf7f..bde1a4c3679a 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c | |||
@@ -117,13 +117,11 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
117 | if (error) | 117 | if (error) |
118 | goto out; | 118 | goto out; |
119 | 119 | ||
120 | iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */ | 120 | /* this ignores size changes */ |
121 | 121 | generic_setattr(inode, iattr); | |
122 | error = inode_setattr(inode, iattr); | ||
123 | if (error) | ||
124 | goto out; | ||
125 | 122 | ||
126 | error = sysfs_sd_setattr(sd, iattr); | 123 | error = sysfs_sd_setattr(sd, iattr); |
124 | |||
127 | out: | 125 | out: |
128 | mutex_unlock(&sysfs_mutex); | 126 | mutex_unlock(&sysfs_mutex); |
129 | return error; | 127 | return error; |
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 1dabed286b4c..79941e4964a4 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c | |||
@@ -24,7 +24,7 @@ const struct file_operations sysv_dir_operations = { | |||
24 | .llseek = generic_file_llseek, | 24 | .llseek = generic_file_llseek, |
25 | .read = generic_read_dir, | 25 | .read = generic_read_dir, |
26 | .readdir = sysv_readdir, | 26 | .readdir = sysv_readdir, |
27 | .fsync = simple_fsync, | 27 | .fsync = generic_file_fsync, |
28 | }; | 28 | }; |
29 | 29 | ||
30 | static inline void dir_put_page(struct page *page) | 30 | static inline void dir_put_page(struct page *page) |
diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 96340c01f4a7..750cc22349bd 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c | |||
@@ -26,7 +26,7 @@ const struct file_operations sysv_file_operations = { | |||
26 | .write = do_sync_write, | 26 | .write = do_sync_write, |
27 | .aio_write = generic_file_aio_write, | 27 | .aio_write = generic_file_aio_write, |
28 | .mmap = generic_file_mmap, | 28 | .mmap = generic_file_mmap, |
29 | .fsync = simple_fsync, | 29 | .fsync = generic_file_fsync, |
30 | .splice_read = generic_file_splice_read, | 30 | .splice_read = generic_file_splice_read, |
31 | }; | 31 | }; |
32 | 32 | ||
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 4573734d723d..d4a5380b5669 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c | |||
@@ -43,6 +43,7 @@ static int sysv_sync_fs(struct super_block *sb, int wait) | |||
43 | * then attach current time stamp. | 43 | * then attach current time stamp. |
44 | * But if the filesystem was marked clean, keep it clean. | 44 | * But if the filesystem was marked clean, keep it clean. |
45 | */ | 45 | */ |
46 | sb->s_dirt = 0; | ||
46 | old_time = fs32_to_cpu(sbi, *sbi->s_sb_time); | 47 | old_time = fs32_to_cpu(sbi, *sbi->s_sb_time); |
47 | if (sbi->s_type == FSTYPE_SYSV4) { | 48 | if (sbi->s_type == FSTYPE_SYSV4) { |
48 | if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time)) | 49 | if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time)) |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 5692cf72b807..12f445cee9f7 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -967,12 +967,15 @@ static int do_writepage(struct page *page, int len) | |||
967 | * the page locked, and it locks @ui_mutex. However, write-back does take inode | 967 | * the page locked, and it locks @ui_mutex. However, write-back does take inode |
968 | * @i_mutex, which means other VFS operations may be run on this inode at the | 968 | * @i_mutex, which means other VFS operations may be run on this inode at the |
969 | * same time. And the problematic one is truncation to smaller size, from where | 969 | * same time. And the problematic one is truncation to smaller size, from where |
970 | * we have to call 'vmtruncate()', which first changes @inode->i_size, then | 970 | * we have to call 'simple_setsize()', which first changes @inode->i_size, then |
971 | * drops the truncated pages. And while dropping the pages, it takes the page | 971 | * drops the truncated pages. And while dropping the pages, it takes the page |
972 | * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with | 972 | * lock. This means that 'do_truncation()' cannot call 'simple_setsize()' with |
973 | * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This | 973 | * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This |
974 | * means that @inode->i_size is changed while @ui_mutex is unlocked. | 974 | * means that @inode->i_size is changed while @ui_mutex is unlocked. |
975 | * | 975 | * |
976 | * XXX: with the new truncate the above is not true anymore, the simple_setsize | ||
977 | * calls can be replaced with the individual components. | ||
978 | * | ||
976 | * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond | 979 | * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond |
977 | * inode size. How do we do this if @inode->i_size may became smaller while we | 980 | * inode size. How do we do this if @inode->i_size may became smaller while we |
978 | * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the | 981 | * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the |
@@ -1125,7 +1128,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
1125 | budgeted = 0; | 1128 | budgeted = 0; |
1126 | } | 1129 | } |
1127 | 1130 | ||
1128 | err = vmtruncate(inode, new_size); | 1131 | err = simple_setsize(inode, new_size); |
1129 | if (err) | 1132 | if (err) |
1130 | goto out_budg; | 1133 | goto out_budg; |
1131 | 1134 | ||
@@ -1214,7 +1217,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, | |||
1214 | 1217 | ||
1215 | if (attr->ia_valid & ATTR_SIZE) { | 1218 | if (attr->ia_valid & ATTR_SIZE) { |
1216 | dbg_gen("size %lld -> %lld", inode->i_size, new_size); | 1219 | dbg_gen("size %lld -> %lld", inode->i_size, new_size); |
1217 | err = vmtruncate(inode, new_size); | 1220 | err = simple_setsize(inode, new_size); |
1218 | if (err) | 1221 | if (err) |
1219 | goto out; | 1222 | goto out; |
1220 | } | 1223 | } |
@@ -1223,7 +1226,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, | |||
1223 | if (attr->ia_valid & ATTR_SIZE) { | 1226 | if (attr->ia_valid & ATTR_SIZE) { |
1224 | /* Truncation changes inode [mc]time */ | 1227 | /* Truncation changes inode [mc]time */ |
1225 | inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); | 1228 | inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); |
1226 | /* 'vmtruncate()' changed @i_size, update @ui_size */ | 1229 | /* 'simple_setsize()' changed @i_size, update @ui_size */ |
1227 | ui->ui_size = inode->i_size; | 1230 | ui->ui_size = inode->i_size; |
1228 | } | 1231 | } |
1229 | 1232 | ||
@@ -1304,9 +1307,9 @@ static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
1304 | return NULL; | 1307 | return NULL; |
1305 | } | 1308 | } |
1306 | 1309 | ||
1307 | int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) | 1310 | int ubifs_fsync(struct file *file, int datasync) |
1308 | { | 1311 | { |
1309 | struct inode *inode = dentry->d_inode; | 1312 | struct inode *inode = file->f_mapping->host; |
1310 | struct ubifs_info *c = inode->i_sb->s_fs_info; | 1313 | struct ubifs_info *c = inode->i_sb->s_fs_info; |
1311 | int err; | 1314 | int err; |
1312 | 1315 | ||
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index bd2542dad014..2eef553d50c8 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -379,7 +379,7 @@ struct ubifs_gced_idx_leb { | |||
379 | * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses | 379 | * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses |
380 | * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot | 380 | * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot |
381 | * make sure @inode->i_size is always changed under @ui_mutex, because it | 381 | * make sure @inode->i_size is always changed under @ui_mutex, because it |
382 | * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock | 382 | * cannot call 'simple_setsize()' with @ui_mutex locked, because it would deadlock |
383 | * with 'ubifs_writepage()' (see file.c). All the other inode fields are | 383 | * with 'ubifs_writepage()' (see file.c). All the other inode fields are |
384 | * changed under @ui_mutex, so they do not need "shadow" fields. Note, one | 384 | * changed under @ui_mutex, so they do not need "shadow" fields. Note, one |
385 | * could consider to rework locking and base it on "shadow" fields. | 385 | * could consider to rework locking and base it on "shadow" fields. |
@@ -1678,7 +1678,7 @@ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); | |||
1678 | int ubifs_calc_dark(const struct ubifs_info *c, int spc); | 1678 | int ubifs_calc_dark(const struct ubifs_info *c, int spc); |
1679 | 1679 | ||
1680 | /* file.c */ | 1680 | /* file.c */ |
1681 | int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); | 1681 | int ubifs_fsync(struct file *file, int datasync); |
1682 | int ubifs_setattr(struct dentry *dentry, struct iattr *attr); | 1682 | int ubifs_setattr(struct dentry *dentry, struct iattr *attr); |
1683 | 1683 | ||
1684 | /* dir.c */ | 1684 | /* dir.c */ |
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 9a9378b4eb5a..b608efaa4cee 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c | |||
@@ -21,7 +21,6 @@ | |||
21 | 21 | ||
22 | #include "udfdecl.h" | 22 | #include "udfdecl.h" |
23 | 23 | ||
24 | #include <linux/quotaops.h> | ||
25 | #include <linux/buffer_head.h> | 24 | #include <linux/buffer_head.h> |
26 | #include <linux/bitops.h> | 25 | #include <linux/bitops.h> |
27 | 26 | ||
@@ -159,8 +158,6 @@ static void udf_bitmap_free_blocks(struct super_block *sb, | |||
159 | udf_debug("byte=%2x\n", | 158 | udf_debug("byte=%2x\n", |
160 | ((char *)bh->b_data)[(bit + i) >> 3]); | 159 | ((char *)bh->b_data)[(bit + i) >> 3]); |
161 | } else { | 160 | } else { |
162 | if (inode) | ||
163 | dquot_free_block(inode, 1); | ||
164 | udf_add_free_space(sb, sbi->s_partition, 1); | 161 | udf_add_free_space(sb, sbi->s_partition, 1); |
165 | } | 162 | } |
166 | } | 163 | } |
@@ -210,15 +207,8 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb, | |||
210 | bit = block % (sb->s_blocksize << 3); | 207 | bit = block % (sb->s_blocksize << 3); |
211 | 208 | ||
212 | while (bit < (sb->s_blocksize << 3) && block_count > 0) { | 209 | while (bit < (sb->s_blocksize << 3) && block_count > 0) { |
213 | if (!udf_test_bit(bit, bh->b_data)) | 210 | if (!udf_clear_bit(bit, bh->b_data)) |
214 | goto out; | 211 | goto out; |
215 | else if (dquot_prealloc_block(inode, 1)) | ||
216 | goto out; | ||
217 | else if (!udf_clear_bit(bit, bh->b_data)) { | ||
218 | udf_debug("bit already cleared for block %d\n", bit); | ||
219 | dquot_free_block(inode, 1); | ||
220 | goto out; | ||
221 | } | ||
222 | block_count--; | 212 | block_count--; |
223 | alloc_count++; | 213 | alloc_count++; |
224 | bit++; | 214 | bit++; |
@@ -338,20 +328,6 @@ search_back: | |||
338 | } | 328 | } |
339 | 329 | ||
340 | got_block: | 330 | got_block: |
341 | |||
342 | /* | ||
343 | * Check quota for allocation of this block. | ||
344 | */ | ||
345 | if (inode) { | ||
346 | int ret = dquot_alloc_block(inode, 1); | ||
347 | |||
348 | if (ret) { | ||
349 | mutex_unlock(&sbi->s_alloc_mutex); | ||
350 | *err = ret; | ||
351 | return 0; | ||
352 | } | ||
353 | } | ||
354 | |||
355 | newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) - | 331 | newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) - |
356 | (sizeof(struct spaceBitmapDesc) << 3); | 332 | (sizeof(struct spaceBitmapDesc) << 3); |
357 | 333 | ||
@@ -401,10 +377,6 @@ static void udf_table_free_blocks(struct super_block *sb, | |||
401 | } | 377 | } |
402 | 378 | ||
403 | iinfo = UDF_I(table); | 379 | iinfo = UDF_I(table); |
404 | /* We do this up front - There are some error conditions that | ||
405 | could occure, but.. oh well */ | ||
406 | if (inode) | ||
407 | dquot_free_block(inode, count); | ||
408 | udf_add_free_space(sb, sbi->s_partition, count); | 380 | udf_add_free_space(sb, sbi->s_partition, count); |
409 | 381 | ||
410 | start = bloc->logicalBlockNum + offset; | 382 | start = bloc->logicalBlockNum + offset; |
@@ -649,10 +621,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb, | |||
649 | epos.offset -= adsize; | 621 | epos.offset -= adsize; |
650 | 622 | ||
651 | alloc_count = (elen >> sb->s_blocksize_bits); | 623 | alloc_count = (elen >> sb->s_blocksize_bits); |
652 | if (inode && dquot_prealloc_block(inode, | 624 | if (alloc_count > block_count) { |
653 | alloc_count > block_count ? block_count : alloc_count)) | ||
654 | alloc_count = 0; | ||
655 | else if (alloc_count > block_count) { | ||
656 | alloc_count = block_count; | 625 | alloc_count = block_count; |
657 | eloc.logicalBlockNum += alloc_count; | 626 | eloc.logicalBlockNum += alloc_count; |
658 | elen -= (alloc_count << sb->s_blocksize_bits); | 627 | elen -= (alloc_count << sb->s_blocksize_bits); |
@@ -752,14 +721,6 @@ static int udf_table_new_block(struct super_block *sb, | |||
752 | newblock = goal_eloc.logicalBlockNum; | 721 | newblock = goal_eloc.logicalBlockNum; |
753 | goal_eloc.logicalBlockNum++; | 722 | goal_eloc.logicalBlockNum++; |
754 | goal_elen -= sb->s_blocksize; | 723 | goal_elen -= sb->s_blocksize; |
755 | if (inode) { | ||
756 | *err = dquot_alloc_block(inode, 1); | ||
757 | if (*err) { | ||
758 | brelse(goal_epos.bh); | ||
759 | mutex_unlock(&sbi->s_alloc_mutex); | ||
760 | return 0; | ||
761 | } | ||
762 | } | ||
763 | 724 | ||
764 | if (goal_elen) | 725 | if (goal_elen) |
765 | udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1); | 726 | udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1); |
diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 3a84455c2a77..51552bf50225 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c | |||
@@ -207,8 +207,9 @@ static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
207 | 207 | ||
208 | /* readdir and lookup functions */ | 208 | /* readdir and lookup functions */ |
209 | const struct file_operations udf_dir_operations = { | 209 | const struct file_operations udf_dir_operations = { |
210 | .llseek = generic_file_llseek, | ||
210 | .read = generic_read_dir, | 211 | .read = generic_read_dir, |
211 | .readdir = udf_readdir, | 212 | .readdir = udf_readdir, |
212 | .unlocked_ioctl = udf_ioctl, | 213 | .unlocked_ioctl = udf_ioctl, |
213 | .fsync = simple_fsync, | 214 | .fsync = generic_file_fsync, |
214 | }; | 215 | }; |
diff --git a/fs/udf/file.c b/fs/udf/file.c index baae3a723946..94e06d6bddbd 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -34,7 +34,6 @@ | |||
34 | #include <linux/errno.h> | 34 | #include <linux/errno.h> |
35 | #include <linux/smp_lock.h> | 35 | #include <linux/smp_lock.h> |
36 | #include <linux/pagemap.h> | 36 | #include <linux/pagemap.h> |
37 | #include <linux/quotaops.h> | ||
38 | #include <linux/buffer_head.h> | 37 | #include <linux/buffer_head.h> |
39 | #include <linux/aio.h> | 38 | #include <linux/aio.h> |
40 | #include <linux/smp_lock.h> | 39 | #include <linux/smp_lock.h> |
@@ -219,39 +218,16 @@ const struct file_operations udf_file_operations = { | |||
219 | .read = do_sync_read, | 218 | .read = do_sync_read, |
220 | .aio_read = generic_file_aio_read, | 219 | .aio_read = generic_file_aio_read, |
221 | .unlocked_ioctl = udf_ioctl, | 220 | .unlocked_ioctl = udf_ioctl, |
222 | .open = dquot_file_open, | 221 | .open = generic_file_open, |
223 | .mmap = generic_file_mmap, | 222 | .mmap = generic_file_mmap, |
224 | .write = do_sync_write, | 223 | .write = do_sync_write, |
225 | .aio_write = udf_file_aio_write, | 224 | .aio_write = udf_file_aio_write, |
226 | .release = udf_release_file, | 225 | .release = udf_release_file, |
227 | .fsync = simple_fsync, | 226 | .fsync = generic_file_fsync, |
228 | .splice_read = generic_file_splice_read, | 227 | .splice_read = generic_file_splice_read, |
229 | .llseek = generic_file_llseek, | 228 | .llseek = generic_file_llseek, |
230 | }; | 229 | }; |
231 | 230 | ||
232 | int udf_setattr(struct dentry *dentry, struct iattr *iattr) | ||
233 | { | ||
234 | struct inode *inode = dentry->d_inode; | ||
235 | int error; | ||
236 | |||
237 | error = inode_change_ok(inode, iattr); | ||
238 | if (error) | ||
239 | return error; | ||
240 | |||
241 | if (is_quota_modification(inode, iattr)) | ||
242 | dquot_initialize(inode); | ||
243 | |||
244 | if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || | ||
245 | (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { | ||
246 | error = dquot_transfer(inode, iattr); | ||
247 | if (error) | ||
248 | return error; | ||
249 | } | ||
250 | |||
251 | return inode_setattr(inode, iattr); | ||
252 | } | ||
253 | |||
254 | const struct inode_operations udf_file_inode_operations = { | 231 | const struct inode_operations udf_file_inode_operations = { |
255 | .truncate = udf_truncate, | 232 | .truncate = udf_truncate, |
256 | .setattr = udf_setattr, | ||
257 | }; | 233 | }; |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 2b5586c7f02a..18cd7111185d 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -20,7 +20,6 @@ | |||
20 | 20 | ||
21 | #include "udfdecl.h" | 21 | #include "udfdecl.h" |
22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
23 | #include <linux/quotaops.h> | ||
24 | #include <linux/sched.h> | 23 | #include <linux/sched.h> |
25 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
26 | 25 | ||
@@ -32,13 +31,6 @@ void udf_free_inode(struct inode *inode) | |||
32 | struct super_block *sb = inode->i_sb; | 31 | struct super_block *sb = inode->i_sb; |
33 | struct udf_sb_info *sbi = UDF_SB(sb); | 32 | struct udf_sb_info *sbi = UDF_SB(sb); |
34 | 33 | ||
35 | /* | ||
36 | * Note: we must free any quota before locking the superblock, | ||
37 | * as writing the quota to disk may need the lock as well. | ||
38 | */ | ||
39 | dquot_free_inode(inode); | ||
40 | dquot_drop(inode); | ||
41 | |||
42 | clear_inode(inode); | 34 | clear_inode(inode); |
43 | 35 | ||
44 | mutex_lock(&sbi->s_alloc_mutex); | 36 | mutex_lock(&sbi->s_alloc_mutex); |
@@ -61,7 +53,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
61 | struct super_block *sb = dir->i_sb; | 53 | struct super_block *sb = dir->i_sb; |
62 | struct udf_sb_info *sbi = UDF_SB(sb); | 54 | struct udf_sb_info *sbi = UDF_SB(sb); |
63 | struct inode *inode; | 55 | struct inode *inode; |
64 | int block, ret; | 56 | int block; |
65 | uint32_t start = UDF_I(dir)->i_location.logicalBlockNum; | 57 | uint32_t start = UDF_I(dir)->i_location.logicalBlockNum; |
66 | struct udf_inode_info *iinfo; | 58 | struct udf_inode_info *iinfo; |
67 | struct udf_inode_info *dinfo = UDF_I(dir); | 59 | struct udf_inode_info *dinfo = UDF_I(dir); |
@@ -146,17 +138,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
146 | insert_inode_hash(inode); | 138 | insert_inode_hash(inode); |
147 | mark_inode_dirty(inode); | 139 | mark_inode_dirty(inode); |
148 | 140 | ||
149 | dquot_initialize(inode); | ||
150 | ret = dquot_alloc_inode(inode); | ||
151 | if (ret) { | ||
152 | dquot_drop(inode); | ||
153 | inode->i_flags |= S_NOQUOTA; | ||
154 | inode->i_nlink = 0; | ||
155 | iput(inode); | ||
156 | *err = ret; | ||
157 | return NULL; | ||
158 | } | ||
159 | |||
160 | *err = 0; | 141 | *err = 0; |
161 | return inode; | 142 | return inode; |
162 | } | 143 | } |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 8a3fbd177cab..124852bcf6fe 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -36,7 +36,6 @@ | |||
36 | #include <linux/pagemap.h> | 36 | #include <linux/pagemap.h> |
37 | #include <linux/buffer_head.h> | 37 | #include <linux/buffer_head.h> |
38 | #include <linux/writeback.h> | 38 | #include <linux/writeback.h> |
39 | #include <linux/quotaops.h> | ||
40 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
41 | #include <linux/crc-itu-t.h> | 40 | #include <linux/crc-itu-t.h> |
42 | 41 | ||
@@ -71,9 +70,6 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); | |||
71 | 70 | ||
72 | void udf_delete_inode(struct inode *inode) | 71 | void udf_delete_inode(struct inode *inode) |
73 | { | 72 | { |
74 | if (!is_bad_inode(inode)) | ||
75 | dquot_initialize(inode); | ||
76 | |||
77 | truncate_inode_pages(&inode->i_data, 0); | 73 | truncate_inode_pages(&inode->i_data, 0); |
78 | 74 | ||
79 | if (is_bad_inode(inode)) | 75 | if (is_bad_inode(inode)) |
@@ -113,7 +109,6 @@ void udf_clear_inode(struct inode *inode) | |||
113 | (unsigned long long)iinfo->i_lenExtents); | 109 | (unsigned long long)iinfo->i_lenExtents); |
114 | } | 110 | } |
115 | 111 | ||
116 | dquot_drop(inode); | ||
117 | kfree(iinfo->i_ext.i_data); | 112 | kfree(iinfo->i_ext.i_data); |
118 | iinfo->i_ext.i_data = NULL; | 113 | iinfo->i_ext.i_data = NULL; |
119 | } | 114 | } |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 585f733615dc..bf5fc674193c 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/errno.h> | 27 | #include <linux/errno.h> |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/quotaops.h> | ||
31 | #include <linux/smp_lock.h> | 30 | #include <linux/smp_lock.h> |
32 | #include <linux/buffer_head.h> | 31 | #include <linux/buffer_head.h> |
33 | #include <linux/sched.h> | 32 | #include <linux/sched.h> |
@@ -563,8 +562,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, | |||
563 | int err; | 562 | int err; |
564 | struct udf_inode_info *iinfo; | 563 | struct udf_inode_info *iinfo; |
565 | 564 | ||
566 | dquot_initialize(dir); | ||
567 | |||
568 | lock_kernel(); | 565 | lock_kernel(); |
569 | inode = udf_new_inode(dir, mode, &err); | 566 | inode = udf_new_inode(dir, mode, &err); |
570 | if (!inode) { | 567 | if (!inode) { |
@@ -617,8 +614,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode, | |||
617 | if (!old_valid_dev(rdev)) | 614 | if (!old_valid_dev(rdev)) |
618 | return -EINVAL; | 615 | return -EINVAL; |
619 | 616 | ||
620 | dquot_initialize(dir); | ||
621 | |||
622 | lock_kernel(); | 617 | lock_kernel(); |
623 | err = -EIO; | 618 | err = -EIO; |
624 | inode = udf_new_inode(dir, mode, &err); | 619 | inode = udf_new_inode(dir, mode, &err); |
@@ -664,8 +659,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
664 | struct udf_inode_info *dinfo = UDF_I(dir); | 659 | struct udf_inode_info *dinfo = UDF_I(dir); |
665 | struct udf_inode_info *iinfo; | 660 | struct udf_inode_info *iinfo; |
666 | 661 | ||
667 | dquot_initialize(dir); | ||
668 | |||
669 | lock_kernel(); | 662 | lock_kernel(); |
670 | err = -EMLINK; | 663 | err = -EMLINK; |
671 | if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) | 664 | if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) |
@@ -800,8 +793,6 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) | |||
800 | struct fileIdentDesc *fi, cfi; | 793 | struct fileIdentDesc *fi, cfi; |
801 | struct kernel_lb_addr tloc; | 794 | struct kernel_lb_addr tloc; |
802 | 795 | ||
803 | dquot_initialize(dir); | ||
804 | |||
805 | retval = -ENOENT; | 796 | retval = -ENOENT; |
806 | lock_kernel(); | 797 | lock_kernel(); |
807 | fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); | 798 | fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); |
@@ -848,8 +839,6 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry) | |||
848 | struct fileIdentDesc cfi; | 839 | struct fileIdentDesc cfi; |
849 | struct kernel_lb_addr tloc; | 840 | struct kernel_lb_addr tloc; |
850 | 841 | ||
851 | dquot_initialize(dir); | ||
852 | |||
853 | retval = -ENOENT; | 842 | retval = -ENOENT; |
854 | lock_kernel(); | 843 | lock_kernel(); |
855 | fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); | 844 | fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); |
@@ -904,8 +893,6 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
904 | struct buffer_head *bh; | 893 | struct buffer_head *bh; |
905 | struct udf_inode_info *iinfo; | 894 | struct udf_inode_info *iinfo; |
906 | 895 | ||
907 | dquot_initialize(dir); | ||
908 | |||
909 | lock_kernel(); | 896 | lock_kernel(); |
910 | inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err); | 897 | inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err); |
911 | if (!inode) | 898 | if (!inode) |
@@ -1075,8 +1062,6 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir, | |||
1075 | int err; | 1062 | int err; |
1076 | struct buffer_head *bh; | 1063 | struct buffer_head *bh; |
1077 | 1064 | ||
1078 | dquot_initialize(dir); | ||
1079 | |||
1080 | lock_kernel(); | 1065 | lock_kernel(); |
1081 | if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { | 1066 | if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { |
1082 | unlock_kernel(); | 1067 | unlock_kernel(); |
@@ -1139,9 +1124,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1139 | struct kernel_lb_addr tloc; | 1124 | struct kernel_lb_addr tloc; |
1140 | struct udf_inode_info *old_iinfo = UDF_I(old_inode); | 1125 | struct udf_inode_info *old_iinfo = UDF_I(old_inode); |
1141 | 1126 | ||
1142 | dquot_initialize(old_dir); | ||
1143 | dquot_initialize(new_dir); | ||
1144 | |||
1145 | lock_kernel(); | 1127 | lock_kernel(); |
1146 | ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); | 1128 | ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); |
1147 | if (ofi) { | 1129 | if (ofi) { |
@@ -1387,7 +1369,6 @@ const struct export_operations udf_export_ops = { | |||
1387 | const struct inode_operations udf_dir_inode_operations = { | 1369 | const struct inode_operations udf_dir_inode_operations = { |
1388 | .lookup = udf_lookup, | 1370 | .lookup = udf_lookup, |
1389 | .create = udf_create, | 1371 | .create = udf_create, |
1390 | .setattr = udf_setattr, | ||
1391 | .link = udf_link, | 1372 | .link = udf_link, |
1392 | .unlink = udf_unlink, | 1373 | .unlink = udf_unlink, |
1393 | .symlink = udf_symlink, | 1374 | .symlink = udf_symlink, |
@@ -1400,5 +1381,4 @@ const struct inode_operations udf_symlink_inode_operations = { | |||
1400 | .readlink = generic_readlink, | 1381 | .readlink = generic_readlink, |
1401 | .follow_link = page_follow_link_light, | 1382 | .follow_link = page_follow_link_light, |
1402 | .put_link = page_put_link, | 1383 | .put_link = page_put_link, |
1403 | .setattr = udf_setattr, | ||
1404 | }; | 1384 | }; |
diff --git a/fs/udf/super.c b/fs/udf/super.c index 1e4543cbcd27..612d1e2e285a 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -557,6 +557,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) | |||
557 | { | 557 | { |
558 | struct udf_options uopt; | 558 | struct udf_options uopt; |
559 | struct udf_sb_info *sbi = UDF_SB(sb); | 559 | struct udf_sb_info *sbi = UDF_SB(sb); |
560 | int error = 0; | ||
560 | 561 | ||
561 | uopt.flags = sbi->s_flags; | 562 | uopt.flags = sbi->s_flags; |
562 | uopt.uid = sbi->s_uid; | 563 | uopt.uid = sbi->s_uid; |
@@ -582,17 +583,17 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) | |||
582 | *flags |= MS_RDONLY; | 583 | *flags |= MS_RDONLY; |
583 | } | 584 | } |
584 | 585 | ||
585 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { | 586 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
586 | unlock_kernel(); | 587 | goto out_unlock; |
587 | return 0; | 588 | |
588 | } | ||
589 | if (*flags & MS_RDONLY) | 589 | if (*flags & MS_RDONLY) |
590 | udf_close_lvid(sb); | 590 | udf_close_lvid(sb); |
591 | else | 591 | else |
592 | udf_open_lvid(sb); | 592 | udf_open_lvid(sb); |
593 | 593 | ||
594 | out_unlock: | ||
594 | unlock_kernel(); | 595 | unlock_kernel(); |
595 | return 0; | 596 | return error; |
596 | } | 597 | } |
597 | 598 | ||
598 | /* Check Volume Structure Descriptors (ECMA 167 2/9.1) */ | 599 | /* Check Volume Structure Descriptors (ECMA 167 2/9.1) */ |
@@ -1939,7 +1940,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1939 | /* Fill in the rest of the superblock */ | 1940 | /* Fill in the rest of the superblock */ |
1940 | sb->s_op = &udf_sb_ops; | 1941 | sb->s_op = &udf_sb_ops; |
1941 | sb->s_export_op = &udf_export_ops; | 1942 | sb->s_export_op = &udf_export_ops; |
1942 | sb->dq_op = NULL; | 1943 | |
1943 | sb->s_dirt = 0; | 1944 | sb->s_dirt = 0; |
1944 | sb->s_magic = UDF_SUPER_MAGIC; | 1945 | sb->s_magic = UDF_SUPER_MAGIC; |
1945 | sb->s_time_gran = 1000; | 1946 | sb->s_time_gran = 1000; |
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 9079ff7d6255..2bac0354891f 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h | |||
@@ -131,7 +131,6 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, | |||
131 | 131 | ||
132 | /* file.c */ | 132 | /* file.c */ |
133 | extern long udf_ioctl(struct file *, unsigned int, unsigned long); | 133 | extern long udf_ioctl(struct file *, unsigned int, unsigned long); |
134 | extern int udf_setattr(struct dentry *dentry, struct iattr *iattr); | ||
135 | /* inode.c */ | 134 | /* inode.c */ |
136 | extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); | 135 | extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); |
137 | extern int udf_sync_inode(struct inode *); | 136 | extern int udf_sync_inode(struct inode *); |
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 5cfa4d85ccf2..048484fb10d2 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/stat.h> | 12 | #include <linux/stat.h> |
13 | #include <linux/time.h> | 13 | #include <linux/time.h> |
14 | #include <linux/string.h> | 14 | #include <linux/string.h> |
15 | #include <linux/quotaops.h> | ||
16 | #include <linux/buffer_head.h> | 15 | #include <linux/buffer_head.h> |
17 | #include <linux/capability.h> | 16 | #include <linux/capability.h> |
18 | #include <linux/bitops.h> | 17 | #include <linux/bitops.h> |
@@ -85,9 +84,6 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) | |||
85 | "bit already cleared for fragment %u", i); | 84 | "bit already cleared for fragment %u", i); |
86 | } | 85 | } |
87 | 86 | ||
88 | dquot_free_block(inode, count); | ||
89 | |||
90 | |||
91 | fs32_add(sb, &ucg->cg_cs.cs_nffree, count); | 87 | fs32_add(sb, &ucg->cg_cs.cs_nffree, count); |
92 | uspi->cs_total.cs_nffree += count; | 88 | uspi->cs_total.cs_nffree += count; |
93 | fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); | 89 | fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); |
@@ -195,7 +191,6 @@ do_more: | |||
195 | ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); | 191 | ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); |
196 | if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) | 192 | if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) |
197 | ufs_clusteracct (sb, ucpi, blkno, 1); | 193 | ufs_clusteracct (sb, ucpi, blkno, 1); |
198 | dquot_free_block(inode, uspi->s_fpb); | ||
199 | 194 | ||
200 | fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1); | 195 | fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1); |
201 | uspi->cs_total.cs_nbfree++; | 196 | uspi->cs_total.cs_nbfree++; |
@@ -511,7 +506,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, | |||
511 | struct ufs_cg_private_info * ucpi; | 506 | struct ufs_cg_private_info * ucpi; |
512 | struct ufs_cylinder_group * ucg; | 507 | struct ufs_cylinder_group * ucg; |
513 | unsigned cgno, fragno, fragoff, count, fragsize, i; | 508 | unsigned cgno, fragno, fragoff, count, fragsize, i; |
514 | int ret; | ||
515 | 509 | ||
516 | UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n", | 510 | UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n", |
517 | (unsigned long long)fragment, oldcount, newcount); | 511 | (unsigned long long)fragment, oldcount, newcount); |
@@ -557,11 +551,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, | |||
557 | fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1); | 551 | fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1); |
558 | for (i = oldcount; i < newcount; i++) | 552 | for (i = oldcount; i < newcount; i++) |
559 | ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i); | 553 | ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i); |
560 | ret = dquot_alloc_block(inode, count); | ||
561 | if (ret) { | ||
562 | *err = ret; | ||
563 | return 0; | ||
564 | } | ||
565 | 554 | ||
566 | fs32_sub(sb, &ucg->cg_cs.cs_nffree, count); | 555 | fs32_sub(sb, &ucg->cg_cs.cs_nffree, count); |
567 | fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); | 556 | fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); |
@@ -598,7 +587,6 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno, | |||
598 | struct ufs_cylinder_group * ucg; | 587 | struct ufs_cylinder_group * ucg; |
599 | unsigned oldcg, i, j, k, allocsize; | 588 | unsigned oldcg, i, j, k, allocsize; |
600 | u64 result; | 589 | u64 result; |
601 | int ret; | ||
602 | 590 | ||
603 | UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n", | 591 | UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n", |
604 | inode->i_ino, cgno, (unsigned long long)goal, count); | 592 | inode->i_ino, cgno, (unsigned long long)goal, count); |
@@ -667,7 +655,6 @@ cg_found: | |||
667 | for (i = count; i < uspi->s_fpb; i++) | 655 | for (i = count; i < uspi->s_fpb; i++) |
668 | ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i); | 656 | ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i); |
669 | i = uspi->s_fpb - count; | 657 | i = uspi->s_fpb - count; |
670 | dquot_free_block(inode, i); | ||
671 | 658 | ||
672 | fs32_add(sb, &ucg->cg_cs.cs_nffree, i); | 659 | fs32_add(sb, &ucg->cg_cs.cs_nffree, i); |
673 | uspi->cs_total.cs_nffree += i; | 660 | uspi->cs_total.cs_nffree += i; |
@@ -679,11 +666,6 @@ cg_found: | |||
679 | result = ufs_bitmap_search (sb, ucpi, goal, allocsize); | 666 | result = ufs_bitmap_search (sb, ucpi, goal, allocsize); |
680 | if (result == INVBLOCK) | 667 | if (result == INVBLOCK) |
681 | return 0; | 668 | return 0; |
682 | ret = dquot_alloc_block(inode, count); | ||
683 | if (ret) { | ||
684 | *err = ret; | ||
685 | return 0; | ||
686 | } | ||
687 | for (i = 0; i < count; i++) | 669 | for (i = 0; i < count; i++) |
688 | ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i); | 670 | ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i); |
689 | 671 | ||
@@ -718,7 +700,6 @@ static u64 ufs_alloccg_block(struct inode *inode, | |||
718 | struct ufs_super_block_first * usb1; | 700 | struct ufs_super_block_first * usb1; |
719 | struct ufs_cylinder_group * ucg; | 701 | struct ufs_cylinder_group * ucg; |
720 | u64 result, blkno; | 702 | u64 result, blkno; |
721 | int ret; | ||
722 | 703 | ||
723 | UFSD("ENTER, goal %llu\n", (unsigned long long)goal); | 704 | UFSD("ENTER, goal %llu\n", (unsigned long long)goal); |
724 | 705 | ||
@@ -752,11 +733,6 @@ gotit: | |||
752 | ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); | 733 | ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); |
753 | if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) | 734 | if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) |
754 | ufs_clusteracct (sb, ucpi, blkno, -1); | 735 | ufs_clusteracct (sb, ucpi, blkno, -1); |
755 | ret = dquot_alloc_block(inode, uspi->s_fpb); | ||
756 | if (ret) { | ||
757 | *err = ret; | ||
758 | return INVBLOCK; | ||
759 | } | ||
760 | 736 | ||
761 | fs32_sub(sb, &ucg->cg_cs.cs_nbfree, 1); | 737 | fs32_sub(sb, &ucg->cg_cs.cs_nbfree, 1); |
762 | uspi->cs_total.cs_nbfree--; | 738 | uspi->cs_total.cs_nbfree--; |
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 317a0d444f6b..ec784756dc65 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c | |||
@@ -666,6 +666,6 @@ not_empty: | |||
666 | const struct file_operations ufs_dir_operations = { | 666 | const struct file_operations ufs_dir_operations = { |
667 | .read = generic_read_dir, | 667 | .read = generic_read_dir, |
668 | .readdir = ufs_readdir, | 668 | .readdir = ufs_readdir, |
669 | .fsync = simple_fsync, | 669 | .fsync = generic_file_fsync, |
670 | .llseek = generic_file_llseek, | 670 | .llseek = generic_file_llseek, |
671 | }; | 671 | }; |
diff --git a/fs/ufs/file.c b/fs/ufs/file.c index a8962cecde5b..33afa20d4509 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c | |||
@@ -24,7 +24,6 @@ | |||
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/quotaops.h> | ||
28 | 27 | ||
29 | #include "ufs_fs.h" | 28 | #include "ufs_fs.h" |
30 | #include "ufs.h" | 29 | #include "ufs.h" |
@@ -41,7 +40,7 @@ const struct file_operations ufs_file_operations = { | |||
41 | .write = do_sync_write, | 40 | .write = do_sync_write, |
42 | .aio_write = generic_file_aio_write, | 41 | .aio_write = generic_file_aio_write, |
43 | .mmap = generic_file_mmap, | 42 | .mmap = generic_file_mmap, |
44 | .open = dquot_file_open, | 43 | .open = generic_file_open, |
45 | .fsync = simple_fsync, | 44 | .fsync = generic_file_fsync, |
46 | .splice_read = generic_file_splice_read, | 45 | .splice_read = generic_file_splice_read, |
47 | }; | 46 | }; |
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 3a959d55084d..594480e537d2 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/time.h> | 27 | #include <linux/time.h> |
28 | #include <linux/stat.h> | 28 | #include <linux/stat.h> |
29 | #include <linux/string.h> | 29 | #include <linux/string.h> |
30 | #include <linux/quotaops.h> | ||
31 | #include <linux/buffer_head.h> | 30 | #include <linux/buffer_head.h> |
32 | #include <linux/sched.h> | 31 | #include <linux/sched.h> |
33 | #include <linux/bitops.h> | 32 | #include <linux/bitops.h> |
@@ -95,9 +94,6 @@ void ufs_free_inode (struct inode * inode) | |||
95 | 94 | ||
96 | is_directory = S_ISDIR(inode->i_mode); | 95 | is_directory = S_ISDIR(inode->i_mode); |
97 | 96 | ||
98 | dquot_free_inode(inode); | ||
99 | dquot_drop(inode); | ||
100 | |||
101 | clear_inode (inode); | 97 | clear_inode (inode); |
102 | 98 | ||
103 | if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit)) | 99 | if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit)) |
@@ -347,21 +343,12 @@ cg_found: | |||
347 | 343 | ||
348 | unlock_super (sb); | 344 | unlock_super (sb); |
349 | 345 | ||
350 | dquot_initialize(inode); | ||
351 | err = dquot_alloc_inode(inode); | ||
352 | if (err) { | ||
353 | dquot_drop(inode); | ||
354 | goto fail_without_unlock; | ||
355 | } | ||
356 | |||
357 | UFSD("allocating inode %lu\n", inode->i_ino); | 346 | UFSD("allocating inode %lu\n", inode->i_ino); |
358 | UFSD("EXIT\n"); | 347 | UFSD("EXIT\n"); |
359 | return inode; | 348 | return inode; |
360 | 349 | ||
361 | fail_remove_inode: | 350 | fail_remove_inode: |
362 | unlock_super(sb); | 351 | unlock_super(sb); |
363 | fail_without_unlock: | ||
364 | inode->i_flags |= S_NOQUOTA; | ||
365 | inode->i_nlink = 0; | 352 | inode->i_nlink = 0; |
366 | iput(inode); | 353 | iput(inode); |
367 | UFSD("EXIT (FAILED): err %d\n", err); | 354 | UFSD("EXIT (FAILED): err %d\n", err); |
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index cffa756f1047..73fe773aa034 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c | |||
@@ -37,7 +37,6 @@ | |||
37 | #include <linux/smp_lock.h> | 37 | #include <linux/smp_lock.h> |
38 | #include <linux/buffer_head.h> | 38 | #include <linux/buffer_head.h> |
39 | #include <linux/writeback.h> | 39 | #include <linux/writeback.h> |
40 | #include <linux/quotaops.h> | ||
41 | 40 | ||
42 | #include "ufs_fs.h" | 41 | #include "ufs_fs.h" |
43 | #include "ufs.h" | 42 | #include "ufs.h" |
@@ -910,9 +909,6 @@ void ufs_delete_inode (struct inode * inode) | |||
910 | { | 909 | { |
911 | loff_t old_i_size; | 910 | loff_t old_i_size; |
912 | 911 | ||
913 | if (!is_bad_inode(inode)) | ||
914 | dquot_initialize(inode); | ||
915 | |||
916 | truncate_inode_pages(&inode->i_data, 0); | 912 | truncate_inode_pages(&inode->i_data, 0); |
917 | if (is_bad_inode(inode)) | 913 | if (is_bad_inode(inode)) |
918 | goto no_delete; | 914 | goto no_delete; |
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index eabc02eb1294..b056f02b1fb3 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/time.h> | 30 | #include <linux/time.h> |
31 | #include <linux/fs.h> | 31 | #include <linux/fs.h> |
32 | #include <linux/smp_lock.h> | 32 | #include <linux/smp_lock.h> |
33 | #include <linux/quotaops.h> | ||
34 | 33 | ||
35 | #include "ufs_fs.h" | 34 | #include "ufs_fs.h" |
36 | #include "ufs.h" | 35 | #include "ufs.h" |
@@ -86,8 +85,6 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode, | |||
86 | 85 | ||
87 | UFSD("BEGIN\n"); | 86 | UFSD("BEGIN\n"); |
88 | 87 | ||
89 | dquot_initialize(dir); | ||
90 | |||
91 | inode = ufs_new_inode(dir, mode); | 88 | inode = ufs_new_inode(dir, mode); |
92 | err = PTR_ERR(inode); | 89 | err = PTR_ERR(inode); |
93 | 90 | ||
@@ -112,8 +109,6 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t | |||
112 | if (!old_valid_dev(rdev)) | 109 | if (!old_valid_dev(rdev)) |
113 | return -EINVAL; | 110 | return -EINVAL; |
114 | 111 | ||
115 | dquot_initialize(dir); | ||
116 | |||
117 | inode = ufs_new_inode(dir, mode); | 112 | inode = ufs_new_inode(dir, mode); |
118 | err = PTR_ERR(inode); | 113 | err = PTR_ERR(inode); |
119 | if (!IS_ERR(inode)) { | 114 | if (!IS_ERR(inode)) { |
@@ -138,8 +133,6 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, | |||
138 | if (l > sb->s_blocksize) | 133 | if (l > sb->s_blocksize) |
139 | goto out_notlocked; | 134 | goto out_notlocked; |
140 | 135 | ||
141 | dquot_initialize(dir); | ||
142 | |||
143 | lock_kernel(); | 136 | lock_kernel(); |
144 | inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); | 137 | inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); |
145 | err = PTR_ERR(inode); | 138 | err = PTR_ERR(inode); |
@@ -185,8 +178,6 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, | |||
185 | return -EMLINK; | 178 | return -EMLINK; |
186 | } | 179 | } |
187 | 180 | ||
188 | dquot_initialize(dir); | ||
189 | |||
190 | inode->i_ctime = CURRENT_TIME_SEC; | 181 | inode->i_ctime = CURRENT_TIME_SEC; |
191 | inode_inc_link_count(inode); | 182 | inode_inc_link_count(inode); |
192 | atomic_inc(&inode->i_count); | 183 | atomic_inc(&inode->i_count); |
@@ -204,8 +195,6 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode) | |||
204 | if (dir->i_nlink >= UFS_LINK_MAX) | 195 | if (dir->i_nlink >= UFS_LINK_MAX) |
205 | goto out; | 196 | goto out; |
206 | 197 | ||
207 | dquot_initialize(dir); | ||
208 | |||
209 | lock_kernel(); | 198 | lock_kernel(); |
210 | inode_inc_link_count(dir); | 199 | inode_inc_link_count(dir); |
211 | 200 | ||
@@ -250,8 +239,6 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry) | |||
250 | struct page *page; | 239 | struct page *page; |
251 | int err = -ENOENT; | 240 | int err = -ENOENT; |
252 | 241 | ||
253 | dquot_initialize(dir); | ||
254 | |||
255 | de = ufs_find_entry(dir, &dentry->d_name, &page); | 242 | de = ufs_find_entry(dir, &dentry->d_name, &page); |
256 | if (!de) | 243 | if (!de) |
257 | goto out; | 244 | goto out; |
@@ -296,9 +283,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
296 | struct ufs_dir_entry *old_de; | 283 | struct ufs_dir_entry *old_de; |
297 | int err = -ENOENT; | 284 | int err = -ENOENT; |
298 | 285 | ||
299 | dquot_initialize(old_dir); | ||
300 | dquot_initialize(new_dir); | ||
301 | |||
302 | old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); | 286 | old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); |
303 | if (!old_de) | 287 | if (!old_de) |
304 | goto out; | 288 | goto out; |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 14743d935a93..3ec5a9eb6efb 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -77,7 +77,6 @@ | |||
77 | 77 | ||
78 | #include <linux/errno.h> | 78 | #include <linux/errno.h> |
79 | #include <linux/fs.h> | 79 | #include <linux/fs.h> |
80 | #include <linux/quotaops.h> | ||
81 | #include <linux/slab.h> | 80 | #include <linux/slab.h> |
82 | #include <linux/time.h> | 81 | #include <linux/time.h> |
83 | #include <linux/stat.h> | 82 | #include <linux/stat.h> |
@@ -918,6 +917,7 @@ again: | |||
918 | sbi->s_bytesex = BYTESEX_LE; | 917 | sbi->s_bytesex = BYTESEX_LE; |
919 | switch ((uspi->fs_magic = fs32_to_cpu(sb, usb3->fs_magic))) { | 918 | switch ((uspi->fs_magic = fs32_to_cpu(sb, usb3->fs_magic))) { |
920 | case UFS_MAGIC: | 919 | case UFS_MAGIC: |
920 | case UFS_MAGIC_BW: | ||
921 | case UFS2_MAGIC: | 921 | case UFS2_MAGIC: |
922 | case UFS_MAGIC_LFN: | 922 | case UFS_MAGIC_LFN: |
923 | case UFS_MAGIC_FEA: | 923 | case UFS_MAGIC_FEA: |
@@ -927,6 +927,7 @@ again: | |||
927 | sbi->s_bytesex = BYTESEX_BE; | 927 | sbi->s_bytesex = BYTESEX_BE; |
928 | switch ((uspi->fs_magic = fs32_to_cpu(sb, usb3->fs_magic))) { | 928 | switch ((uspi->fs_magic = fs32_to_cpu(sb, usb3->fs_magic))) { |
929 | case UFS_MAGIC: | 929 | case UFS_MAGIC: |
930 | case UFS_MAGIC_BW: | ||
930 | case UFS2_MAGIC: | 931 | case UFS2_MAGIC: |
931 | case UFS_MAGIC_LFN: | 932 | case UFS_MAGIC_LFN: |
932 | case UFS_MAGIC_FEA: | 933 | case UFS_MAGIC_FEA: |
@@ -1045,7 +1046,7 @@ magic_found: | |||
1045 | */ | 1046 | */ |
1046 | sb->s_op = &ufs_super_ops; | 1047 | sb->s_op = &ufs_super_ops; |
1047 | sb->s_export_op = &ufs_export_ops; | 1048 | sb->s_export_op = &ufs_export_ops; |
1048 | sb->dq_op = NULL; /***/ | 1049 | |
1049 | sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic); | 1050 | sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic); |
1050 | 1051 | ||
1051 | uspi->s_sblkno = fs32_to_cpu(sb, usb1->fs_sblkno); | 1052 | uspi->s_sblkno = fs32_to_cpu(sb, usb1->fs_sblkno); |
@@ -1435,126 +1436,19 @@ static void destroy_inodecache(void) | |||
1435 | kmem_cache_destroy(ufs_inode_cachep); | 1436 | kmem_cache_destroy(ufs_inode_cachep); |
1436 | } | 1437 | } |
1437 | 1438 | ||
1438 | static void ufs_clear_inode(struct inode *inode) | ||
1439 | { | ||
1440 | dquot_drop(inode); | ||
1441 | } | ||
1442 | |||
1443 | #ifdef CONFIG_QUOTA | ||
1444 | static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t); | ||
1445 | static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t); | ||
1446 | #endif | ||
1447 | |||
1448 | static const struct super_operations ufs_super_ops = { | 1439 | static const struct super_operations ufs_super_ops = { |
1449 | .alloc_inode = ufs_alloc_inode, | 1440 | .alloc_inode = ufs_alloc_inode, |
1450 | .destroy_inode = ufs_destroy_inode, | 1441 | .destroy_inode = ufs_destroy_inode, |
1451 | .write_inode = ufs_write_inode, | 1442 | .write_inode = ufs_write_inode, |
1452 | .delete_inode = ufs_delete_inode, | 1443 | .delete_inode = ufs_delete_inode, |
1453 | .clear_inode = ufs_clear_inode, | ||
1454 | .put_super = ufs_put_super, | 1444 | .put_super = ufs_put_super, |
1455 | .write_super = ufs_write_super, | 1445 | .write_super = ufs_write_super, |
1456 | .sync_fs = ufs_sync_fs, | 1446 | .sync_fs = ufs_sync_fs, |
1457 | .statfs = ufs_statfs, | 1447 | .statfs = ufs_statfs, |
1458 | .remount_fs = ufs_remount, | 1448 | .remount_fs = ufs_remount, |
1459 | .show_options = ufs_show_options, | 1449 | .show_options = ufs_show_options, |
1460 | #ifdef CONFIG_QUOTA | ||
1461 | .quota_read = ufs_quota_read, | ||
1462 | .quota_write = ufs_quota_write, | ||
1463 | #endif | ||
1464 | }; | 1450 | }; |
1465 | 1451 | ||
1466 | #ifdef CONFIG_QUOTA | ||
1467 | |||
1468 | /* Read data from quotafile - avoid pagecache and such because we cannot afford | ||
1469 | * acquiring the locks... As quota files are never truncated and quota code | ||
1470 | * itself serializes the operations (and noone else should touch the files) | ||
1471 | * we don't have to be afraid of races */ | ||
1472 | static ssize_t ufs_quota_read(struct super_block *sb, int type, char *data, | ||
1473 | size_t len, loff_t off) | ||
1474 | { | ||
1475 | struct inode *inode = sb_dqopt(sb)->files[type]; | ||
1476 | sector_t blk = off >> sb->s_blocksize_bits; | ||
1477 | int err = 0; | ||
1478 | int offset = off & (sb->s_blocksize - 1); | ||
1479 | int tocopy; | ||
1480 | size_t toread; | ||
1481 | struct buffer_head *bh; | ||
1482 | loff_t i_size = i_size_read(inode); | ||
1483 | |||
1484 | if (off > i_size) | ||
1485 | return 0; | ||
1486 | if (off+len > i_size) | ||
1487 | len = i_size-off; | ||
1488 | toread = len; | ||
1489 | while (toread > 0) { | ||
1490 | tocopy = sb->s_blocksize - offset < toread ? | ||
1491 | sb->s_blocksize - offset : toread; | ||
1492 | |||
1493 | bh = ufs_bread(inode, blk, 0, &err); | ||
1494 | if (err) | ||
1495 | return err; | ||
1496 | if (!bh) /* A hole? */ | ||
1497 | memset(data, 0, tocopy); | ||
1498 | else { | ||
1499 | memcpy(data, bh->b_data+offset, tocopy); | ||
1500 | brelse(bh); | ||
1501 | } | ||
1502 | offset = 0; | ||
1503 | toread -= tocopy; | ||
1504 | data += tocopy; | ||
1505 | blk++; | ||
1506 | } | ||
1507 | return len; | ||
1508 | } | ||
1509 | |||
1510 | /* Write to quotafile */ | ||
1511 | static ssize_t ufs_quota_write(struct super_block *sb, int type, | ||
1512 | const char *data, size_t len, loff_t off) | ||
1513 | { | ||
1514 | struct inode *inode = sb_dqopt(sb)->files[type]; | ||
1515 | sector_t blk = off >> sb->s_blocksize_bits; | ||
1516 | int err = 0; | ||
1517 | int offset = off & (sb->s_blocksize - 1); | ||
1518 | int tocopy; | ||
1519 | size_t towrite = len; | ||
1520 | struct buffer_head *bh; | ||
1521 | |||
1522 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); | ||
1523 | while (towrite > 0) { | ||
1524 | tocopy = sb->s_blocksize - offset < towrite ? | ||
1525 | sb->s_blocksize - offset : towrite; | ||
1526 | |||
1527 | bh = ufs_bread(inode, blk, 1, &err); | ||
1528 | if (!bh) | ||
1529 | goto out; | ||
1530 | lock_buffer(bh); | ||
1531 | memcpy(bh->b_data+offset, data, tocopy); | ||
1532 | flush_dcache_page(bh->b_page); | ||
1533 | set_buffer_uptodate(bh); | ||
1534 | mark_buffer_dirty(bh); | ||
1535 | unlock_buffer(bh); | ||
1536 | brelse(bh); | ||
1537 | offset = 0; | ||
1538 | towrite -= tocopy; | ||
1539 | data += tocopy; | ||
1540 | blk++; | ||
1541 | } | ||
1542 | out: | ||
1543 | if (len == towrite) { | ||
1544 | mutex_unlock(&inode->i_mutex); | ||
1545 | return err; | ||
1546 | } | ||
1547 | if (inode->i_size < off+len-towrite) | ||
1548 | i_size_write(inode, off+len-towrite); | ||
1549 | inode->i_version++; | ||
1550 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; | ||
1551 | mark_inode_dirty(inode); | ||
1552 | mutex_unlock(&inode->i_mutex); | ||
1553 | return len - towrite; | ||
1554 | } | ||
1555 | |||
1556 | #endif | ||
1557 | |||
1558 | static int ufs_get_sb(struct file_system_type *fs_type, | 1452 | static int ufs_get_sb(struct file_system_type *fs_type, |
1559 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 1453 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) |
1560 | { | 1454 | { |
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index f294c44577dc..589e01a465ba 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include <linux/buffer_head.h> | 44 | #include <linux/buffer_head.h> |
45 | #include <linux/blkdev.h> | 45 | #include <linux/blkdev.h> |
46 | #include <linux/sched.h> | 46 | #include <linux/sched.h> |
47 | #include <linux/quotaops.h> | ||
48 | 47 | ||
49 | #include "ufs_fs.h" | 48 | #include "ufs_fs.h" |
50 | #include "ufs.h" | 49 | #include "ufs.h" |
@@ -501,12 +500,10 @@ out: | |||
501 | return err; | 500 | return err; |
502 | } | 501 | } |
503 | 502 | ||
504 | |||
505 | /* | 503 | /* |
506 | * We don't define our `inode->i_op->truncate', and call it here, | 504 | * TODO: |
507 | * because of: | 505 | * - truncate case should use proper ordering instead of using |
508 | * - there is no way to know old size | 506 | * simple_setsize |
509 | * - there is no way inform user about error, if it happens in `truncate' | ||
510 | */ | 507 | */ |
511 | int ufs_setattr(struct dentry *dentry, struct iattr *attr) | 508 | int ufs_setattr(struct dentry *dentry, struct iattr *attr) |
512 | { | 509 | { |
@@ -518,19 +515,10 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr) | |||
518 | if (error) | 515 | if (error) |
519 | return error; | 516 | return error; |
520 | 517 | ||
521 | if (is_quota_modification(inode, attr)) | ||
522 | dquot_initialize(inode); | ||
523 | |||
524 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | ||
525 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | ||
526 | error = dquot_transfer(inode, attr); | ||
527 | if (error) | ||
528 | return error; | ||
529 | } | ||
530 | if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { | 518 | if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { |
531 | loff_t old_i_size = inode->i_size; | 519 | loff_t old_i_size = inode->i_size; |
532 | 520 | ||
533 | error = vmtruncate(inode, attr->ia_size); | 521 | error = simple_setsize(inode, attr->ia_size); |
534 | if (error) | 522 | if (error) |
535 | return error; | 523 | return error; |
536 | error = ufs_truncate(inode, old_i_size); | 524 | error = ufs_truncate(inode, old_i_size); |
diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h index 6943ec677c0b..8aba544f9fad 100644 --- a/fs/ufs/ufs_fs.h +++ b/fs/ufs/ufs_fs.h | |||
@@ -48,6 +48,7 @@ typedef __u16 __bitwise __fs16; | |||
48 | #define UFS_SECTOR_SIZE 512 | 48 | #define UFS_SECTOR_SIZE 512 |
49 | #define UFS_SECTOR_BITS 9 | 49 | #define UFS_SECTOR_BITS 9 |
50 | #define UFS_MAGIC 0x00011954 | 50 | #define UFS_MAGIC 0x00011954 |
51 | #define UFS_MAGIC_BW 0x0f242697 | ||
51 | #define UFS2_MAGIC 0x19540119 | 52 | #define UFS2_MAGIC 0x19540119 |
52 | #define UFS_CIGAM 0x54190100 /* byteswapped MAGIC */ | 53 | #define UFS_CIGAM 0x54190100 /* byteswapped MAGIC */ |
53 | 54 | ||
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index b4769e40e8bc..c8fb13f83b3f 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -77,6 +77,7 @@ xfs-y += xfs_alloc.o \ | |||
77 | xfs_itable.o \ | 77 | xfs_itable.o \ |
78 | xfs_dfrag.o \ | 78 | xfs_dfrag.o \ |
79 | xfs_log.o \ | 79 | xfs_log.o \ |
80 | xfs_log_cil.o \ | ||
80 | xfs_log_recover.o \ | 81 | xfs_log_recover.o \ |
81 | xfs_mount.o \ | 82 | xfs_mount.o \ |
82 | xfs_mru_cache.o \ | 83 | xfs_mru_cache.o \ |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index f01de3c55c43..649ade8ef598 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -37,6 +37,7 @@ | |||
37 | 37 | ||
38 | #include "xfs_sb.h" | 38 | #include "xfs_sb.h" |
39 | #include "xfs_inum.h" | 39 | #include "xfs_inum.h" |
40 | #include "xfs_log.h" | ||
40 | #include "xfs_ag.h" | 41 | #include "xfs_ag.h" |
41 | #include "xfs_dmapi.h" | 42 | #include "xfs_dmapi.h" |
42 | #include "xfs_mount.h" | 43 | #include "xfs_mount.h" |
@@ -850,6 +851,12 @@ xfs_buf_lock_value( | |||
850 | * Note that this in no way locks the underlying pages, so it is only | 851 | * Note that this in no way locks the underlying pages, so it is only |
851 | * useful for synchronizing concurrent use of buffer objects, not for | 852 | * useful for synchronizing concurrent use of buffer objects, not for |
852 | * synchronizing independent access to the underlying pages. | 853 | * synchronizing independent access to the underlying pages. |
854 | * | ||
855 | * If we come across a stale, pinned, locked buffer, we know that we | ||
856 | * are being asked to lock a buffer that has been reallocated. Because | ||
857 | * it is pinned, we know that the log has not been pushed to disk and | ||
858 | * hence it will still be locked. Rather than sleeping until someone | ||
859 | * else pushes the log, push it ourselves before trying to get the lock. | ||
853 | */ | 860 | */ |
854 | void | 861 | void |
855 | xfs_buf_lock( | 862 | xfs_buf_lock( |
@@ -857,6 +864,8 @@ xfs_buf_lock( | |||
857 | { | 864 | { |
858 | trace_xfs_buf_lock(bp, _RET_IP_); | 865 | trace_xfs_buf_lock(bp, _RET_IP_); |
859 | 866 | ||
867 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | ||
868 | xfs_log_force(bp->b_mount, 0); | ||
860 | if (atomic_read(&bp->b_io_remaining)) | 869 | if (atomic_read(&bp->b_io_remaining)) |
861 | blk_run_address_space(bp->b_target->bt_mapping); | 870 | blk_run_address_space(bp->b_target->bt_mapping); |
862 | down(&bp->b_sema); | 871 | down(&bp->b_sema); |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index d8fb1b5d6cb5..257a56b127cf 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -100,10 +100,10 @@ xfs_iozero( | |||
100 | STATIC int | 100 | STATIC int |
101 | xfs_file_fsync( | 101 | xfs_file_fsync( |
102 | struct file *file, | 102 | struct file *file, |
103 | struct dentry *dentry, | ||
104 | int datasync) | 103 | int datasync) |
105 | { | 104 | { |
106 | struct xfs_inode *ip = XFS_I(dentry->d_inode); | 105 | struct inode *inode = file->f_mapping->host; |
106 | struct xfs_inode *ip = XFS_I(inode); | ||
107 | struct xfs_trans *tp; | 107 | struct xfs_trans *tp; |
108 | int error = 0; | 108 | int error = 0; |
109 | int log_flushed = 0; | 109 | int log_flushed = 0; |
@@ -140,8 +140,8 @@ xfs_file_fsync( | |||
140 | * might gets cleared when the inode gets written out via the AIL | 140 | * might gets cleared when the inode gets written out via the AIL |
141 | * or xfs_iflush_cluster. | 141 | * or xfs_iflush_cluster. |
142 | */ | 142 | */ |
143 | if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) || | 143 | if (((inode->i_state & I_DIRTY_DATASYNC) || |
144 | ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) && | 144 | ((inode->i_state & I_DIRTY_SYNC) && !datasync)) && |
145 | ip->i_update_core) { | 145 | ip->i_update_core) { |
146 | /* | 146 | /* |
147 | * Kick off a transaction to log the inode core to get the | 147 | * Kick off a transaction to log the inode core to get the |
@@ -868,7 +868,7 @@ write_retry: | |||
868 | mutex_lock(&inode->i_mutex); | 868 | mutex_lock(&inode->i_mutex); |
869 | xfs_ilock(ip, iolock); | 869 | xfs_ilock(ip, iolock); |
870 | 870 | ||
871 | error2 = -xfs_file_fsync(file, file->f_path.dentry, | 871 | error2 = -xfs_file_fsync(file, |
872 | (file->f_flags & __O_SYNC) ? 0 : 1); | 872 | (file->f_flags & __O_SYNC) ? 0 : 1); |
873 | if (!error) | 873 | if (!error) |
874 | error = error2; | 874 | error = error2; |
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index e31bf21fe5d3..9ac8aea91529 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include "xfs_dmapi.h" | 19 | #include "xfs_dmapi.h" |
20 | #include "xfs_sb.h" | 20 | #include "xfs_sb.h" |
21 | #include "xfs_inum.h" | 21 | #include "xfs_inum.h" |
22 | #include "xfs_log.h" | ||
22 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
23 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
24 | #include "xfs_quota.h" | 25 | #include "xfs_quota.h" |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index f24dbe5efde3..f2d1718c9165 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -119,6 +119,8 @@ mempool_t *xfs_ioend_pool; | |||
119 | #define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ | 119 | #define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ |
120 | #define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ | 120 | #define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ |
121 | #define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ | 121 | #define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ |
122 | #define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ | ||
123 | #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ | ||
122 | 124 | ||
123 | /* | 125 | /* |
124 | * Table driven mount option parser. | 126 | * Table driven mount option parser. |
@@ -374,6 +376,13 @@ xfs_parseargs( | |||
374 | mp->m_flags |= XFS_MOUNT_DMAPI; | 376 | mp->m_flags |= XFS_MOUNT_DMAPI; |
375 | } else if (!strcmp(this_char, MNTOPT_DMI)) { | 377 | } else if (!strcmp(this_char, MNTOPT_DMI)) { |
376 | mp->m_flags |= XFS_MOUNT_DMAPI; | 378 | mp->m_flags |= XFS_MOUNT_DMAPI; |
379 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { | ||
380 | mp->m_flags |= XFS_MOUNT_DELAYLOG; | ||
381 | cmn_err(CE_WARN, | ||
382 | "Enabling EXPERIMENTAL delayed logging feature " | ||
383 | "- use at your own risk.\n"); | ||
384 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { | ||
385 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; | ||
377 | } else if (!strcmp(this_char, "ihashsize")) { | 386 | } else if (!strcmp(this_char, "ihashsize")) { |
378 | cmn_err(CE_WARN, | 387 | cmn_err(CE_WARN, |
379 | "XFS: ihashsize no longer used, option is deprecated."); | 388 | "XFS: ihashsize no longer used, option is deprecated."); |
@@ -535,6 +544,7 @@ xfs_showargs( | |||
535 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, | 544 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, |
536 | { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI }, | 545 | { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI }, |
537 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, | 546 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, |
547 | { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, | ||
538 | { 0, NULL } | 548 | { 0, NULL } |
539 | }; | 549 | }; |
540 | static struct proc_xfs_info xfs_info_unset[] = { | 550 | static struct proc_xfs_info xfs_info_unset[] = { |
@@ -1755,7 +1765,7 @@ xfs_init_zones(void) | |||
1755 | * but it is much faster. | 1765 | * but it is much faster. |
1756 | */ | 1766 | */ |
1757 | xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) + | 1767 | xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) + |
1758 | (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) / | 1768 | (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / |
1759 | NBWORD) * sizeof(int))), "xfs_buf_item"); | 1769 | NBWORD) * sizeof(int))), "xfs_buf_item"); |
1760 | if (!xfs_buf_item_zone) | 1770 | if (!xfs_buf_item_zone) |
1761 | goto out_destroy_trans_zone; | 1771 | goto out_destroy_trans_zone; |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 8a319cfd2901..ff6bc797baf2 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -1059,83 +1059,112 @@ TRACE_EVENT(xfs_bunmap, | |||
1059 | 1059 | ||
1060 | ); | 1060 | ); |
1061 | 1061 | ||
1062 | #define XFS_BUSY_SYNC \ | ||
1063 | { 0, "async" }, \ | ||
1064 | { 1, "sync" } | ||
1065 | |||
1062 | TRACE_EVENT(xfs_alloc_busy, | 1066 | TRACE_EVENT(xfs_alloc_busy, |
1063 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, | 1067 | TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno, |
1064 | xfs_extlen_t len, int slot), | 1068 | xfs_agblock_t agbno, xfs_extlen_t len, int sync), |
1065 | TP_ARGS(mp, agno, agbno, len, slot), | 1069 | TP_ARGS(trans, agno, agbno, len, sync), |
1066 | TP_STRUCT__entry( | 1070 | TP_STRUCT__entry( |
1067 | __field(dev_t, dev) | 1071 | __field(dev_t, dev) |
1072 | __field(struct xfs_trans *, tp) | ||
1073 | __field(int, tid) | ||
1068 | __field(xfs_agnumber_t, agno) | 1074 | __field(xfs_agnumber_t, agno) |
1069 | __field(xfs_agblock_t, agbno) | 1075 | __field(xfs_agblock_t, agbno) |
1070 | __field(xfs_extlen_t, len) | 1076 | __field(xfs_extlen_t, len) |
1071 | __field(int, slot) | 1077 | __field(int, sync) |
1072 | ), | 1078 | ), |
1073 | TP_fast_assign( | 1079 | TP_fast_assign( |
1074 | __entry->dev = mp->m_super->s_dev; | 1080 | __entry->dev = trans->t_mountp->m_super->s_dev; |
1081 | __entry->tp = trans; | ||
1082 | __entry->tid = trans->t_ticket->t_tid; | ||
1075 | __entry->agno = agno; | 1083 | __entry->agno = agno; |
1076 | __entry->agbno = agbno; | 1084 | __entry->agbno = agbno; |
1077 | __entry->len = len; | 1085 | __entry->len = len; |
1078 | __entry->slot = slot; | 1086 | __entry->sync = sync; |
1079 | ), | 1087 | ), |
1080 | TP_printk("dev %d:%d agno %u agbno %u len %u slot %d", | 1088 | TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s", |
1081 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1089 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1090 | __entry->tp, | ||
1091 | __entry->tid, | ||
1082 | __entry->agno, | 1092 | __entry->agno, |
1083 | __entry->agbno, | 1093 | __entry->agbno, |
1084 | __entry->len, | 1094 | __entry->len, |
1085 | __entry->slot) | 1095 | __print_symbolic(__entry->sync, XFS_BUSY_SYNC)) |
1086 | 1096 | ||
1087 | ); | 1097 | ); |
1088 | 1098 | ||
1089 | #define XFS_BUSY_STATES \ | ||
1090 | { 0, "found" }, \ | ||
1091 | { 1, "missing" } | ||
1092 | |||
1093 | TRACE_EVENT(xfs_alloc_unbusy, | 1099 | TRACE_EVENT(xfs_alloc_unbusy, |
1094 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1100 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1095 | int slot, int found), | 1101 | xfs_agblock_t agbno, xfs_extlen_t len), |
1096 | TP_ARGS(mp, agno, slot, found), | 1102 | TP_ARGS(mp, agno, agbno, len), |
1097 | TP_STRUCT__entry( | 1103 | TP_STRUCT__entry( |
1098 | __field(dev_t, dev) | 1104 | __field(dev_t, dev) |
1099 | __field(xfs_agnumber_t, agno) | 1105 | __field(xfs_agnumber_t, agno) |
1100 | __field(int, slot) | 1106 | __field(xfs_agblock_t, agbno) |
1101 | __field(int, found) | 1107 | __field(xfs_extlen_t, len) |
1102 | ), | 1108 | ), |
1103 | TP_fast_assign( | 1109 | TP_fast_assign( |
1104 | __entry->dev = mp->m_super->s_dev; | 1110 | __entry->dev = mp->m_super->s_dev; |
1105 | __entry->agno = agno; | 1111 | __entry->agno = agno; |
1106 | __entry->slot = slot; | 1112 | __entry->agbno = agbno; |
1107 | __entry->found = found; | 1113 | __entry->len = len; |
1108 | ), | 1114 | ), |
1109 | TP_printk("dev %d:%d agno %u slot %d %s", | 1115 | TP_printk("dev %d:%d agno %u agbno %u len %u", |
1110 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1116 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1111 | __entry->agno, | 1117 | __entry->agno, |
1112 | __entry->slot, | 1118 | __entry->agbno, |
1113 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | 1119 | __entry->len) |
1114 | ); | 1120 | ); |
1115 | 1121 | ||
1122 | #define XFS_BUSY_STATES \ | ||
1123 | { 0, "missing" }, \ | ||
1124 | { 1, "found" } | ||
1125 | |||
1116 | TRACE_EVENT(xfs_alloc_busysearch, | 1126 | TRACE_EVENT(xfs_alloc_busysearch, |
1117 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, | 1127 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1118 | xfs_extlen_t len, xfs_lsn_t lsn), | 1128 | xfs_agblock_t agbno, xfs_extlen_t len, int found), |
1119 | TP_ARGS(mp, agno, agbno, len, lsn), | 1129 | TP_ARGS(mp, agno, agbno, len, found), |
1120 | TP_STRUCT__entry( | 1130 | TP_STRUCT__entry( |
1121 | __field(dev_t, dev) | 1131 | __field(dev_t, dev) |
1122 | __field(xfs_agnumber_t, agno) | 1132 | __field(xfs_agnumber_t, agno) |
1123 | __field(xfs_agblock_t, agbno) | 1133 | __field(xfs_agblock_t, agbno) |
1124 | __field(xfs_extlen_t, len) | 1134 | __field(xfs_extlen_t, len) |
1125 | __field(xfs_lsn_t, lsn) | 1135 | __field(int, found) |
1126 | ), | 1136 | ), |
1127 | TP_fast_assign( | 1137 | TP_fast_assign( |
1128 | __entry->dev = mp->m_super->s_dev; | 1138 | __entry->dev = mp->m_super->s_dev; |
1129 | __entry->agno = agno; | 1139 | __entry->agno = agno; |
1130 | __entry->agbno = agbno; | 1140 | __entry->agbno = agbno; |
1131 | __entry->len = len; | 1141 | __entry->len = len; |
1132 | __entry->lsn = lsn; | 1142 | __entry->found = found; |
1133 | ), | 1143 | ), |
1134 | TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx", | 1144 | TP_printk("dev %d:%d agno %u agbno %u len %u %s", |
1135 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1145 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1136 | __entry->agno, | 1146 | __entry->agno, |
1137 | __entry->agbno, | 1147 | __entry->agbno, |
1138 | __entry->len, | 1148 | __entry->len, |
1149 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | ||
1150 | ); | ||
1151 | |||
1152 | TRACE_EVENT(xfs_trans_commit_lsn, | ||
1153 | TP_PROTO(struct xfs_trans *trans), | ||
1154 | TP_ARGS(trans), | ||
1155 | TP_STRUCT__entry( | ||
1156 | __field(dev_t, dev) | ||
1157 | __field(struct xfs_trans *, tp) | ||
1158 | __field(xfs_lsn_t, lsn) | ||
1159 | ), | ||
1160 | TP_fast_assign( | ||
1161 | __entry->dev = trans->t_mountp->m_super->s_dev; | ||
1162 | __entry->tp = trans; | ||
1163 | __entry->lsn = trans->t_commit_lsn; | ||
1164 | ), | ||
1165 | TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx", | ||
1166 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1167 | __entry->tp, | ||
1139 | __entry->lsn) | 1168 | __entry->lsn) |
1140 | ); | 1169 | ); |
1141 | 1170 | ||
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index b89ec5df0129..585e7633dfc7 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
@@ -344,9 +344,9 @@ xfs_qm_init_dquot_blk( | |||
344 | for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) | 344 | for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) |
345 | xfs_qm_dqinit_core(curid, type, d); | 345 | xfs_qm_dqinit_core(curid, type, d); |
346 | xfs_trans_dquot_buf(tp, bp, | 346 | xfs_trans_dquot_buf(tp, bp, |
347 | (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF : | 347 | (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : |
348 | ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF : | 348 | ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : |
349 | XFS_BLI_GDQUOT_BUF))); | 349 | XFS_BLF_GDQUOT_BUF))); |
350 | xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); | 350 | xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); |
351 | } | 351 | } |
352 | 352 | ||
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index abb8222b88c9..401f364ad36c 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -175,14 +175,20 @@ typedef struct xfs_agfl { | |||
175 | } xfs_agfl_t; | 175 | } xfs_agfl_t; |
176 | 176 | ||
177 | /* | 177 | /* |
178 | * Busy block/extent entry. Used in perag to mark blocks that have been freed | 178 | * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that |
179 | * but whose transactions aren't committed to disk yet. | 179 | * have been freed but whose transactions aren't committed to disk yet. |
180 | * | ||
181 | * Note that we use the transaction ID to record the transaction, not the | ||
182 | * transaction structure itself. See xfs_alloc_busy_insert() for details. | ||
180 | */ | 183 | */ |
181 | typedef struct xfs_perag_busy { | 184 | struct xfs_busy_extent { |
182 | xfs_agblock_t busy_start; | 185 | struct rb_node rb_node; /* ag by-bno indexed search tree */ |
183 | xfs_extlen_t busy_length; | 186 | struct list_head list; /* transaction busy extent list */ |
184 | struct xfs_trans *busy_tp; /* transaction that did the free */ | 187 | xfs_agnumber_t agno; |
185 | } xfs_perag_busy_t; | 188 | xfs_agblock_t bno; |
189 | xfs_extlen_t length; | ||
190 | xlog_tid_t tid; /* transaction that created this */ | ||
191 | }; | ||
186 | 192 | ||
187 | /* | 193 | /* |
188 | * Per-ag incore structure, copies of information in agf and agi, | 194 | * Per-ag incore structure, copies of information in agf and agi, |
@@ -216,7 +222,8 @@ typedef struct xfs_perag { | |||
216 | xfs_agino_t pagl_leftrec; | 222 | xfs_agino_t pagl_leftrec; |
217 | xfs_agino_t pagl_rightrec; | 223 | xfs_agino_t pagl_rightrec; |
218 | #ifdef __KERNEL__ | 224 | #ifdef __KERNEL__ |
219 | spinlock_t pagb_lock; /* lock for pagb_list */ | 225 | spinlock_t pagb_lock; /* lock for pagb_tree */ |
226 | struct rb_root pagb_tree; /* ordered tree of busy extents */ | ||
220 | 227 | ||
221 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ | 228 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ |
222 | 229 | ||
@@ -226,7 +233,6 @@ typedef struct xfs_perag { | |||
226 | int pag_ici_reclaimable; /* reclaimable inodes */ | 233 | int pag_ici_reclaimable; /* reclaimable inodes */ |
227 | #endif | 234 | #endif |
228 | int pagb_count; /* pagb slots in use */ | 235 | int pagb_count; /* pagb slots in use */ |
229 | xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ | ||
230 | } xfs_perag_t; | 236 | } xfs_perag_t; |
231 | 237 | ||
232 | /* | 238 | /* |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 94cddbfb2560..a7fbe8a99b12 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -46,11 +46,9 @@ | |||
46 | #define XFSA_FIXUP_BNO_OK 1 | 46 | #define XFSA_FIXUP_BNO_OK 1 |
47 | #define XFSA_FIXUP_CNT_OK 2 | 47 | #define XFSA_FIXUP_CNT_OK 2 |
48 | 48 | ||
49 | STATIC void | 49 | static int |
50 | xfs_alloc_search_busy(xfs_trans_t *tp, | 50 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, |
51 | xfs_agnumber_t agno, | 51 | xfs_agblock_t bno, xfs_extlen_t len); |
52 | xfs_agblock_t bno, | ||
53 | xfs_extlen_t len); | ||
54 | 52 | ||
55 | /* | 53 | /* |
56 | * Prototypes for per-ag allocation routines | 54 | * Prototypes for per-ag allocation routines |
@@ -540,9 +538,16 @@ xfs_alloc_ag_vextent( | |||
540 | be32_to_cpu(agf->agf_length)); | 538 | be32_to_cpu(agf->agf_length)); |
541 | xfs_alloc_log_agf(args->tp, args->agbp, | 539 | xfs_alloc_log_agf(args->tp, args->agbp, |
542 | XFS_AGF_FREEBLKS); | 540 | XFS_AGF_FREEBLKS); |
543 | /* search the busylist for these blocks */ | 541 | /* |
544 | xfs_alloc_search_busy(args->tp, args->agno, | 542 | * Search the busylist for these blocks and mark the |
545 | args->agbno, args->len); | 543 | * transaction as synchronous if blocks are found. This |
544 | * avoids the need to block due to a synchronous log | ||
545 | * force to ensure correct ordering as the synchronous | ||
546 | * transaction will guarantee that for us. | ||
547 | */ | ||
548 | if (xfs_alloc_busy_search(args->mp, args->agno, | ||
549 | args->agbno, args->len)) | ||
550 | xfs_trans_set_sync(args->tp); | ||
546 | } | 551 | } |
547 | if (!args->isfl) | 552 | if (!args->isfl) |
548 | xfs_trans_mod_sb(args->tp, | 553 | xfs_trans_mod_sb(args->tp, |
@@ -1693,7 +1698,7 @@ xfs_free_ag_extent( | |||
1693 | * when the iclog commits to disk. If a busy block is allocated, | 1698 | * when the iclog commits to disk. If a busy block is allocated, |
1694 | * the iclog is pushed up to the LSN that freed the block. | 1699 | * the iclog is pushed up to the LSN that freed the block. |
1695 | */ | 1700 | */ |
1696 | xfs_alloc_mark_busy(tp, agno, bno, len); | 1701 | xfs_alloc_busy_insert(tp, agno, bno, len); |
1697 | return 0; | 1702 | return 0; |
1698 | 1703 | ||
1699 | error0: | 1704 | error0: |
@@ -1989,14 +1994,20 @@ xfs_alloc_get_freelist( | |||
1989 | *bnop = bno; | 1994 | *bnop = bno; |
1990 | 1995 | ||
1991 | /* | 1996 | /* |
1992 | * As blocks are freed, they are added to the per-ag busy list | 1997 | * As blocks are freed, they are added to the per-ag busy list and |
1993 | * and remain there until the freeing transaction is committed to | 1998 | * remain there until the freeing transaction is committed to disk. |
1994 | * disk. Now that we have allocated blocks, this list must be | 1999 | * Now that we have allocated blocks, this list must be searched to see |
1995 | * searched to see if a block is being reused. If one is, then | 2000 | * if a block is being reused. If one is, then the freeing transaction |
1996 | * the freeing transaction must be pushed to disk NOW by forcing | 2001 | * must be pushed to disk before this transaction. |
1997 | * to disk all iclogs up that transaction's LSN. | 2002 | * |
2003 | * We do this by setting the current transaction to a sync transaction | ||
2004 | * which guarantees that the freeing transaction is on disk before this | ||
2005 | * transaction. This is done instead of a synchronous log force here so | ||
2006 | * that we don't sit and wait with the AGF locked in the transaction | ||
2007 | * during the log force. | ||
1998 | */ | 2008 | */ |
1999 | xfs_alloc_search_busy(tp, be32_to_cpu(agf->agf_seqno), bno, 1); | 2009 | if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1)) |
2010 | xfs_trans_set_sync(tp); | ||
2000 | return 0; | 2011 | return 0; |
2001 | } | 2012 | } |
2002 | 2013 | ||
@@ -2201,7 +2212,7 @@ xfs_alloc_read_agf( | |||
2201 | be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); | 2212 | be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); |
2202 | spin_lock_init(&pag->pagb_lock); | 2213 | spin_lock_init(&pag->pagb_lock); |
2203 | pag->pagb_count = 0; | 2214 | pag->pagb_count = 0; |
2204 | memset(pag->pagb_list, 0, sizeof(pag->pagb_list)); | 2215 | pag->pagb_tree = RB_ROOT; |
2205 | pag->pagf_init = 1; | 2216 | pag->pagf_init = 1; |
2206 | } | 2217 | } |
2207 | #ifdef DEBUG | 2218 | #ifdef DEBUG |
@@ -2479,127 +2490,263 @@ error0: | |||
2479 | * list is reused, the transaction that freed it must be forced to disk | 2490 | * list is reused, the transaction that freed it must be forced to disk |
2480 | * before continuing to use the block. | 2491 | * before continuing to use the block. |
2481 | * | 2492 | * |
2482 | * xfs_alloc_mark_busy - add to the per-ag busy list | 2493 | * xfs_alloc_busy_insert - add to the per-ag busy list |
2483 | * xfs_alloc_clear_busy - remove an item from the per-ag busy list | 2494 | * xfs_alloc_busy_clear - remove an item from the per-ag busy list |
2495 | * xfs_alloc_busy_search - search for a busy extent | ||
2496 | */ | ||
2497 | |||
2498 | /* | ||
2499 | * Insert a new extent into the busy tree. | ||
2500 | * | ||
2501 | * The busy extent tree is indexed by the start block of the busy extent. | ||
2502 | * there can be multiple overlapping ranges in the busy extent tree but only | ||
2503 | * ever one entry at a given start block. The reason for this is that | ||
2504 | * multi-block extents can be freed, then smaller chunks of that extent | ||
2505 | * allocated and freed again before the first transaction commit is on disk. | ||
2506 | * If the exact same start block is freed a second time, we have to wait for | ||
2507 | * that busy extent to pass out of the tree before the new extent is inserted. | ||
2508 | * There are two main cases we have to handle here. | ||
2509 | * | ||
2510 | * The first case is a transaction that triggers a "free - allocate - free" | ||
2511 | * cycle. This can occur during btree manipulations as a btree block is freed | ||
2512 | * to the freelist, then allocated from the free list, then freed again. In | ||
2513 | * this case, the second extxpnet free is what triggers the duplicate and as | ||
2514 | * such the transaction IDs should match. Because the extent was allocated in | ||
2515 | * this transaction, the transaction must be marked as synchronous. This is | ||
2516 | * true for all cases where the free/alloc/free occurs in the one transaction, | ||
2517 | * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case. | ||
2518 | * This serves to catch violations of the second case quite effectively. | ||
2519 | * | ||
2520 | * The second case is where the free/alloc/free occur in different | ||
2521 | * transactions. In this case, the thread freeing the extent the second time | ||
2522 | * can't mark the extent busy immediately because it is already tracked in a | ||
2523 | * transaction that may be committing. When the log commit for the existing | ||
2524 | * busy extent completes, the busy extent will be removed from the tree. If we | ||
2525 | * allow the second busy insert to continue using that busy extent structure, | ||
2526 | * it can be freed before this transaction is safely in the log. Hence our | ||
2527 | * only option in this case is to force the log to remove the existing busy | ||
2528 | * extent from the list before we insert the new one with the current | ||
2529 | * transaction ID. | ||
2530 | * | ||
2531 | * The problem we are trying to avoid in the free-alloc-free in separate | ||
2532 | * transactions is most easily described with a timeline: | ||
2533 | * | ||
2534 | * Thread 1 Thread 2 Thread 3 xfslogd | ||
2535 | * xact alloc | ||
2536 | * free X | ||
2537 | * mark busy | ||
2538 | * commit xact | ||
2539 | * free xact | ||
2540 | * xact alloc | ||
2541 | * alloc X | ||
2542 | * busy search | ||
2543 | * mark xact sync | ||
2544 | * commit xact | ||
2545 | * free xact | ||
2546 | * force log | ||
2547 | * checkpoint starts | ||
2548 | * .... | ||
2549 | * xact alloc | ||
2550 | * free X | ||
2551 | * mark busy | ||
2552 | * finds match | ||
2553 | * *** KABOOM! *** | ||
2554 | * .... | ||
2555 | * log IO completes | ||
2556 | * unbusy X | ||
2557 | * checkpoint completes | ||
2558 | * | ||
2559 | * By issuing a log force in thread 3 @ "KABOOM", the thread will block until | ||
2560 | * the checkpoint completes, and the busy extent it matched will have been | ||
2561 | * removed from the tree when it is woken. Hence it can then continue safely. | ||
2562 | * | ||
2563 | * However, to ensure this matching process is robust, we need to use the | ||
2564 | * transaction ID for identifying transaction, as delayed logging results in | ||
2565 | * the busy extent and transaction lifecycles being different. i.e. the busy | ||
2566 | * extent is active for a lot longer than the transaction. Hence the | ||
2567 | * transaction structure can be freed and reallocated, then mark the same | ||
2568 | * extent busy again in the new transaction. In this case the new transaction | ||
2569 | * will have a different tid but can have the same address, and hence we need | ||
2570 | * to check against the tid. | ||
2571 | * | ||
2572 | * Future: for delayed logging, we could avoid the log force if the extent was | ||
2573 | * first freed in the current checkpoint sequence. This, however, requires the | ||
2574 | * ability to pin the current checkpoint in memory until this transaction | ||
2575 | * commits to ensure that both the original free and the current one combine | ||
2576 | * logically into the one checkpoint. If the checkpoint sequences are | ||
2577 | * different, however, we still need to wait on a log force. | ||
2484 | */ | 2578 | */ |
2485 | void | 2579 | void |
2486 | xfs_alloc_mark_busy(xfs_trans_t *tp, | 2580 | xfs_alloc_busy_insert( |
2487 | xfs_agnumber_t agno, | 2581 | struct xfs_trans *tp, |
2488 | xfs_agblock_t bno, | 2582 | xfs_agnumber_t agno, |
2489 | xfs_extlen_t len) | 2583 | xfs_agblock_t bno, |
2584 | xfs_extlen_t len) | ||
2490 | { | 2585 | { |
2491 | xfs_perag_busy_t *bsy; | 2586 | struct xfs_busy_extent *new; |
2587 | struct xfs_busy_extent *busyp; | ||
2492 | struct xfs_perag *pag; | 2588 | struct xfs_perag *pag; |
2493 | int n; | 2589 | struct rb_node **rbp; |
2590 | struct rb_node *parent; | ||
2591 | int match; | ||
2494 | 2592 | ||
2495 | pag = xfs_perag_get(tp->t_mountp, agno); | ||
2496 | spin_lock(&pag->pagb_lock); | ||
2497 | 2593 | ||
2498 | /* search pagb_list for an open slot */ | 2594 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); |
2499 | for (bsy = pag->pagb_list, n = 0; | 2595 | if (!new) { |
2500 | n < XFS_PAGB_NUM_SLOTS; | 2596 | /* |
2501 | bsy++, n++) { | 2597 | * No Memory! Since it is now not possible to track the free |
2502 | if (bsy->busy_tp == NULL) { | 2598 | * block, make this a synchronous transaction to insure that |
2503 | break; | 2599 | * the block is not reused before this transaction commits. |
2504 | } | 2600 | */ |
2601 | trace_xfs_alloc_busy(tp, agno, bno, len, 1); | ||
2602 | xfs_trans_set_sync(tp); | ||
2603 | return; | ||
2505 | } | 2604 | } |
2506 | 2605 | ||
2507 | trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len, n); | 2606 | new->agno = agno; |
2607 | new->bno = bno; | ||
2608 | new->length = len; | ||
2609 | new->tid = xfs_log_get_trans_ident(tp); | ||
2508 | 2610 | ||
2509 | if (n < XFS_PAGB_NUM_SLOTS) { | 2611 | INIT_LIST_HEAD(&new->list); |
2510 | bsy = &pag->pagb_list[n]; | 2612 | |
2511 | pag->pagb_count++; | 2613 | /* trace before insert to be able to see failed inserts */ |
2512 | bsy->busy_start = bno; | 2614 | trace_xfs_alloc_busy(tp, agno, bno, len, 0); |
2513 | bsy->busy_length = len; | 2615 | |
2514 | bsy->busy_tp = tp; | 2616 | pag = xfs_perag_get(tp->t_mountp, new->agno); |
2515 | xfs_trans_add_busy(tp, agno, n); | 2617 | restart: |
2516 | } else { | 2618 | spin_lock(&pag->pagb_lock); |
2619 | rbp = &pag->pagb_tree.rb_node; | ||
2620 | parent = NULL; | ||
2621 | busyp = NULL; | ||
2622 | match = 0; | ||
2623 | while (*rbp && match >= 0) { | ||
2624 | parent = *rbp; | ||
2625 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); | ||
2626 | |||
2627 | if (new->bno < busyp->bno) { | ||
2628 | /* may overlap, but exact start block is lower */ | ||
2629 | rbp = &(*rbp)->rb_left; | ||
2630 | if (new->bno + new->length > busyp->bno) | ||
2631 | match = busyp->tid == new->tid ? 1 : -1; | ||
2632 | } else if (new->bno > busyp->bno) { | ||
2633 | /* may overlap, but exact start block is higher */ | ||
2634 | rbp = &(*rbp)->rb_right; | ||
2635 | if (bno < busyp->bno + busyp->length) | ||
2636 | match = busyp->tid == new->tid ? 1 : -1; | ||
2637 | } else { | ||
2638 | match = busyp->tid == new->tid ? 1 : -1; | ||
2639 | break; | ||
2640 | } | ||
2641 | } | ||
2642 | if (match < 0) { | ||
2643 | /* overlap marked busy in different transaction */ | ||
2644 | spin_unlock(&pag->pagb_lock); | ||
2645 | xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); | ||
2646 | goto restart; | ||
2647 | } | ||
2648 | if (match > 0) { | ||
2517 | /* | 2649 | /* |
2518 | * The busy list is full! Since it is now not possible to | 2650 | * overlap marked busy in same transaction. Update if exact |
2519 | * track the free block, make this a synchronous transaction | 2651 | * start block match, otherwise combine the busy extents into |
2520 | * to insure that the block is not reused before this | 2652 | * a single range. |
2521 | * transaction commits. | ||
2522 | */ | 2653 | */ |
2523 | xfs_trans_set_sync(tp); | 2654 | if (busyp->bno == new->bno) { |
2524 | } | 2655 | busyp->length = max(busyp->length, new->length); |
2656 | spin_unlock(&pag->pagb_lock); | ||
2657 | ASSERT(tp->t_flags & XFS_TRANS_SYNC); | ||
2658 | xfs_perag_put(pag); | ||
2659 | kmem_free(new); | ||
2660 | return; | ||
2661 | } | ||
2662 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2663 | new->length = max(busyp->bno + busyp->length, | ||
2664 | new->bno + new->length) - | ||
2665 | min(busyp->bno, new->bno); | ||
2666 | new->bno = min(busyp->bno, new->bno); | ||
2667 | } else | ||
2668 | busyp = NULL; | ||
2525 | 2669 | ||
2670 | rb_link_node(&new->rb_node, parent, rbp); | ||
2671 | rb_insert_color(&new->rb_node, &pag->pagb_tree); | ||
2672 | |||
2673 | list_add(&new->list, &tp->t_busy); | ||
2526 | spin_unlock(&pag->pagb_lock); | 2674 | spin_unlock(&pag->pagb_lock); |
2527 | xfs_perag_put(pag); | 2675 | xfs_perag_put(pag); |
2676 | kmem_free(busyp); | ||
2528 | } | 2677 | } |
2529 | 2678 | ||
2530 | void | 2679 | /* |
2531 | xfs_alloc_clear_busy(xfs_trans_t *tp, | 2680 | * Search for a busy extent within the range of the extent we are about to |
2532 | xfs_agnumber_t agno, | 2681 | * allocate. You need to be holding the busy extent tree lock when calling |
2533 | int idx) | 2682 | * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy |
2683 | * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact | ||
2684 | * match. This is done so that a non-zero return indicates an overlap that | ||
2685 | * will require a synchronous transaction, but it can still be | ||
2686 | * used to distinguish between a partial or exact match. | ||
2687 | */ | ||
2688 | static int | ||
2689 | xfs_alloc_busy_search( | ||
2690 | struct xfs_mount *mp, | ||
2691 | xfs_agnumber_t agno, | ||
2692 | xfs_agblock_t bno, | ||
2693 | xfs_extlen_t len) | ||
2534 | { | 2694 | { |
2535 | struct xfs_perag *pag; | 2695 | struct xfs_perag *pag; |
2536 | xfs_perag_busy_t *list; | 2696 | struct rb_node *rbp; |
2697 | struct xfs_busy_extent *busyp; | ||
2698 | int match = 0; | ||
2537 | 2699 | ||
2538 | ASSERT(idx < XFS_PAGB_NUM_SLOTS); | 2700 | pag = xfs_perag_get(mp, agno); |
2539 | pag = xfs_perag_get(tp->t_mountp, agno); | ||
2540 | spin_lock(&pag->pagb_lock); | 2701 | spin_lock(&pag->pagb_lock); |
2541 | list = pag->pagb_list; | ||
2542 | 2702 | ||
2543 | trace_xfs_alloc_unbusy(tp->t_mountp, agno, idx, list[idx].busy_tp == tp); | 2703 | rbp = pag->pagb_tree.rb_node; |
2544 | 2704 | ||
2545 | if (list[idx].busy_tp == tp) { | 2705 | /* find closest start bno overlap */ |
2546 | list[idx].busy_tp = NULL; | 2706 | while (rbp) { |
2547 | pag->pagb_count--; | 2707 | busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node); |
2708 | if (bno < busyp->bno) { | ||
2709 | /* may overlap, but exact start block is lower */ | ||
2710 | if (bno + len > busyp->bno) | ||
2711 | match = -1; | ||
2712 | rbp = rbp->rb_left; | ||
2713 | } else if (bno > busyp->bno) { | ||
2714 | /* may overlap, but exact start block is higher */ | ||
2715 | if (bno < busyp->bno + busyp->length) | ||
2716 | match = -1; | ||
2717 | rbp = rbp->rb_right; | ||
2718 | } else { | ||
2719 | /* bno matches busyp, length determines exact match */ | ||
2720 | match = (busyp->length == len) ? 1 : -1; | ||
2721 | break; | ||
2722 | } | ||
2548 | } | 2723 | } |
2549 | |||
2550 | spin_unlock(&pag->pagb_lock); | 2724 | spin_unlock(&pag->pagb_lock); |
2725 | trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match); | ||
2551 | xfs_perag_put(pag); | 2726 | xfs_perag_put(pag); |
2727 | return match; | ||
2552 | } | 2728 | } |
2553 | 2729 | ||
2554 | 2730 | void | |
2555 | /* | 2731 | xfs_alloc_busy_clear( |
2556 | * If we find the extent in the busy list, force the log out to get the | 2732 | struct xfs_mount *mp, |
2557 | * extent out of the busy list so the caller can use it straight away. | 2733 | struct xfs_busy_extent *busyp) |
2558 | */ | ||
2559 | STATIC void | ||
2560 | xfs_alloc_search_busy(xfs_trans_t *tp, | ||
2561 | xfs_agnumber_t agno, | ||
2562 | xfs_agblock_t bno, | ||
2563 | xfs_extlen_t len) | ||
2564 | { | 2734 | { |
2565 | struct xfs_perag *pag; | 2735 | struct xfs_perag *pag; |
2566 | xfs_perag_busy_t *bsy; | ||
2567 | xfs_agblock_t uend, bend; | ||
2568 | xfs_lsn_t lsn = 0; | ||
2569 | int cnt; | ||
2570 | 2736 | ||
2571 | pag = xfs_perag_get(tp->t_mountp, agno); | 2737 | trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, |
2572 | spin_lock(&pag->pagb_lock); | 2738 | busyp->length); |
2573 | cnt = pag->pagb_count; | ||
2574 | 2739 | ||
2575 | /* | 2740 | ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, |
2576 | * search pagb_list for this slot, skipping open slots. We have to | 2741 | busyp->length) == 1); |
2577 | * search the entire array as there may be multiple overlaps and | ||
2578 | * we have to get the most recent LSN for the log force to push out | ||
2579 | * all the transactions that span the range. | ||
2580 | */ | ||
2581 | uend = bno + len - 1; | ||
2582 | for (cnt = 0; cnt < pag->pagb_count; cnt++) { | ||
2583 | bsy = &pag->pagb_list[cnt]; | ||
2584 | if (!bsy->busy_tp) | ||
2585 | continue; | ||
2586 | 2742 | ||
2587 | bend = bsy->busy_start + bsy->busy_length - 1; | 2743 | list_del_init(&busyp->list); |
2588 | if (bno > bend || uend < bsy->busy_start) | ||
2589 | continue; | ||
2590 | 2744 | ||
2591 | /* (start1,length1) within (start2, length2) */ | 2745 | pag = xfs_perag_get(mp, busyp->agno); |
2592 | if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0) | 2746 | spin_lock(&pag->pagb_lock); |
2593 | lsn = bsy->busy_tp->t_commit_lsn; | 2747 | rb_erase(&busyp->rb_node, &pag->pagb_tree); |
2594 | } | ||
2595 | spin_unlock(&pag->pagb_lock); | 2748 | spin_unlock(&pag->pagb_lock); |
2596 | xfs_perag_put(pag); | 2749 | xfs_perag_put(pag); |
2597 | trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn); | ||
2598 | 2750 | ||
2599 | /* | 2751 | kmem_free(busyp); |
2600 | * If a block was found, force the log through the LSN of the | ||
2601 | * transaction that freed the block | ||
2602 | */ | ||
2603 | if (lsn) | ||
2604 | xfs_log_force_lsn(tp->t_mountp, lsn, XFS_LOG_SYNC); | ||
2605 | } | 2752 | } |
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 599bffa39784..6d05199b667c 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -22,6 +22,7 @@ struct xfs_buf; | |||
22 | struct xfs_mount; | 22 | struct xfs_mount; |
23 | struct xfs_perag; | 23 | struct xfs_perag; |
24 | struct xfs_trans; | 24 | struct xfs_trans; |
25 | struct xfs_busy_extent; | ||
25 | 26 | ||
26 | /* | 27 | /* |
27 | * Freespace allocation types. Argument to xfs_alloc_[v]extent. | 28 | * Freespace allocation types. Argument to xfs_alloc_[v]extent. |
@@ -119,15 +120,13 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp, | |||
119 | #ifdef __KERNEL__ | 120 | #ifdef __KERNEL__ |
120 | 121 | ||
121 | void | 122 | void |
122 | xfs_alloc_mark_busy(xfs_trans_t *tp, | 123 | xfs_alloc_busy_insert(xfs_trans_t *tp, |
123 | xfs_agnumber_t agno, | 124 | xfs_agnumber_t agno, |
124 | xfs_agblock_t bno, | 125 | xfs_agblock_t bno, |
125 | xfs_extlen_t len); | 126 | xfs_extlen_t len); |
126 | 127 | ||
127 | void | 128 | void |
128 | xfs_alloc_clear_busy(xfs_trans_t *tp, | 129 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); |
129 | xfs_agnumber_t ag, | ||
130 | int idx); | ||
131 | 130 | ||
132 | #endif /* __KERNEL__ */ | 131 | #endif /* __KERNEL__ */ |
133 | 132 | ||
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index b726e10d2c1c..83f494218759 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
@@ -134,7 +134,7 @@ xfs_allocbt_free_block( | |||
134 | * disk. If a busy block is allocated, the iclog is pushed up to the | 134 | * disk. If a busy block is allocated, the iclog is pushed up to the |
135 | * LSN that freed the block. | 135 | * LSN that freed the block. |
136 | */ | 136 | */ |
137 | xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); | 137 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); |
138 | xfs_trans_agbtree_delta(cur->bc_tp, -1); | 138 | xfs_trans_agbtree_delta(cur->bc_tp, -1); |
139 | return 0; | 139 | return 0; |
140 | } | 140 | } |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 240340a4727b..02a80984aa05 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -64,7 +64,7 @@ xfs_buf_item_log_debug( | |||
64 | nbytes = last - first + 1; | 64 | nbytes = last - first + 1; |
65 | bfset(bip->bli_logged, first, nbytes); | 65 | bfset(bip->bli_logged, first, nbytes); |
66 | for (x = 0; x < nbytes; x++) { | 66 | for (x = 0; x < nbytes; x++) { |
67 | chunk_num = byte >> XFS_BLI_SHIFT; | 67 | chunk_num = byte >> XFS_BLF_SHIFT; |
68 | word_num = chunk_num >> BIT_TO_WORD_SHIFT; | 68 | word_num = chunk_num >> BIT_TO_WORD_SHIFT; |
69 | bit_num = chunk_num & (NBWORD - 1); | 69 | bit_num = chunk_num & (NBWORD - 1); |
70 | wordp = &(bip->bli_format.blf_data_map[word_num]); | 70 | wordp = &(bip->bli_format.blf_data_map[word_num]); |
@@ -166,7 +166,7 @@ xfs_buf_item_size( | |||
166 | * cancel flag in it. | 166 | * cancel flag in it. |
167 | */ | 167 | */ |
168 | trace_xfs_buf_item_size_stale(bip); | 168 | trace_xfs_buf_item_size_stale(bip); |
169 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 169 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
170 | return 1; | 170 | return 1; |
171 | } | 171 | } |
172 | 172 | ||
@@ -197,9 +197,9 @@ xfs_buf_item_size( | |||
197 | } else if (next_bit != last_bit + 1) { | 197 | } else if (next_bit != last_bit + 1) { |
198 | last_bit = next_bit; | 198 | last_bit = next_bit; |
199 | nvecs++; | 199 | nvecs++; |
200 | } else if (xfs_buf_offset(bp, next_bit * XFS_BLI_CHUNK) != | 200 | } else if (xfs_buf_offset(bp, next_bit * XFS_BLF_CHUNK) != |
201 | (xfs_buf_offset(bp, last_bit * XFS_BLI_CHUNK) + | 201 | (xfs_buf_offset(bp, last_bit * XFS_BLF_CHUNK) + |
202 | XFS_BLI_CHUNK)) { | 202 | XFS_BLF_CHUNK)) { |
203 | last_bit = next_bit; | 203 | last_bit = next_bit; |
204 | nvecs++; | 204 | nvecs++; |
205 | } else { | 205 | } else { |
@@ -254,6 +254,20 @@ xfs_buf_item_format( | |||
254 | vecp++; | 254 | vecp++; |
255 | nvecs = 1; | 255 | nvecs = 1; |
256 | 256 | ||
257 | /* | ||
258 | * If it is an inode buffer, transfer the in-memory state to the | ||
259 | * format flags and clear the in-memory state. We do not transfer | ||
260 | * this state if the inode buffer allocation has not yet been committed | ||
261 | * to the log as setting the XFS_BLI_INODE_BUF flag will prevent | ||
262 | * correct replay of the inode allocation. | ||
263 | */ | ||
264 | if (bip->bli_flags & XFS_BLI_INODE_BUF) { | ||
265 | if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && | ||
266 | xfs_log_item_in_current_chkpt(&bip->bli_item))) | ||
267 | bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; | ||
268 | bip->bli_flags &= ~XFS_BLI_INODE_BUF; | ||
269 | } | ||
270 | |||
257 | if (bip->bli_flags & XFS_BLI_STALE) { | 271 | if (bip->bli_flags & XFS_BLI_STALE) { |
258 | /* | 272 | /* |
259 | * The buffer is stale, so all we need to log | 273 | * The buffer is stale, so all we need to log |
@@ -261,7 +275,7 @@ xfs_buf_item_format( | |||
261 | * cancel flag in it. | 275 | * cancel flag in it. |
262 | */ | 276 | */ |
263 | trace_xfs_buf_item_format_stale(bip); | 277 | trace_xfs_buf_item_format_stale(bip); |
264 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 278 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
265 | bip->bli_format.blf_size = nvecs; | 279 | bip->bli_format.blf_size = nvecs; |
266 | return; | 280 | return; |
267 | } | 281 | } |
@@ -294,28 +308,28 @@ xfs_buf_item_format( | |||
294 | * keep counting and scanning. | 308 | * keep counting and scanning. |
295 | */ | 309 | */ |
296 | if (next_bit == -1) { | 310 | if (next_bit == -1) { |
297 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 311 | buffer_offset = first_bit * XFS_BLF_CHUNK; |
298 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 312 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
299 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 313 | vecp->i_len = nbits * XFS_BLF_CHUNK; |
300 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; | 314 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; |
301 | nvecs++; | 315 | nvecs++; |
302 | break; | 316 | break; |
303 | } else if (next_bit != last_bit + 1) { | 317 | } else if (next_bit != last_bit + 1) { |
304 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 318 | buffer_offset = first_bit * XFS_BLF_CHUNK; |
305 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 319 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
306 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 320 | vecp->i_len = nbits * XFS_BLF_CHUNK; |
307 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; | 321 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; |
308 | nvecs++; | 322 | nvecs++; |
309 | vecp++; | 323 | vecp++; |
310 | first_bit = next_bit; | 324 | first_bit = next_bit; |
311 | last_bit = next_bit; | 325 | last_bit = next_bit; |
312 | nbits = 1; | 326 | nbits = 1; |
313 | } else if (xfs_buf_offset(bp, next_bit << XFS_BLI_SHIFT) != | 327 | } else if (xfs_buf_offset(bp, next_bit << XFS_BLF_SHIFT) != |
314 | (xfs_buf_offset(bp, last_bit << XFS_BLI_SHIFT) + | 328 | (xfs_buf_offset(bp, last_bit << XFS_BLF_SHIFT) + |
315 | XFS_BLI_CHUNK)) { | 329 | XFS_BLF_CHUNK)) { |
316 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 330 | buffer_offset = first_bit * XFS_BLF_CHUNK; |
317 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 331 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
318 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 332 | vecp->i_len = nbits * XFS_BLF_CHUNK; |
319 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; | 333 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; |
320 | /* You would think we need to bump the nvecs here too, but we do not | 334 | /* You would think we need to bump the nvecs here too, but we do not |
321 | * this number is used by recovery, and it gets confused by the boundary | 335 | * this number is used by recovery, and it gets confused by the boundary |
@@ -341,10 +355,15 @@ xfs_buf_item_format( | |||
341 | } | 355 | } |
342 | 356 | ||
343 | /* | 357 | /* |
344 | * This is called to pin the buffer associated with the buf log | 358 | * This is called to pin the buffer associated with the buf log item in memory |
345 | * item in memory so it cannot be written out. Simply call bpin() | 359 | * so it cannot be written out. Simply call bpin() on the buffer to do this. |
346 | * on the buffer to do this. | 360 | * |
361 | * We also always take a reference to the buffer log item here so that the bli | ||
362 | * is held while the item is pinned in memory. This means that we can | ||
363 | * unconditionally drop the reference count a transaction holds when the | ||
364 | * transaction is completed. | ||
347 | */ | 365 | */ |
366 | |||
348 | STATIC void | 367 | STATIC void |
349 | xfs_buf_item_pin( | 368 | xfs_buf_item_pin( |
350 | xfs_buf_log_item_t *bip) | 369 | xfs_buf_log_item_t *bip) |
@@ -356,6 +375,7 @@ xfs_buf_item_pin( | |||
356 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 375 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
357 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || | 376 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || |
358 | (bip->bli_flags & XFS_BLI_STALE)); | 377 | (bip->bli_flags & XFS_BLI_STALE)); |
378 | atomic_inc(&bip->bli_refcount); | ||
359 | trace_xfs_buf_item_pin(bip); | 379 | trace_xfs_buf_item_pin(bip); |
360 | xfs_bpin(bp); | 380 | xfs_bpin(bp); |
361 | } | 381 | } |
@@ -393,7 +413,7 @@ xfs_buf_item_unpin( | |||
393 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | 413 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); |
394 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | 414 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); |
395 | ASSERT(XFS_BUF_ISSTALE(bp)); | 415 | ASSERT(XFS_BUF_ISSTALE(bp)); |
396 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 416 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
397 | trace_xfs_buf_item_unpin_stale(bip); | 417 | trace_xfs_buf_item_unpin_stale(bip); |
398 | 418 | ||
399 | /* | 419 | /* |
@@ -489,20 +509,23 @@ xfs_buf_item_trylock( | |||
489 | } | 509 | } |
490 | 510 | ||
491 | /* | 511 | /* |
492 | * Release the buffer associated with the buf log item. | 512 | * Release the buffer associated with the buf log item. If there is no dirty |
493 | * If there is no dirty logged data associated with the | 513 | * logged data associated with the buffer recorded in the buf log item, then |
494 | * buffer recorded in the buf log item, then free the | 514 | * free the buf log item and remove the reference to it in the buffer. |
495 | * buf log item and remove the reference to it in the | 515 | * |
496 | * buffer. | 516 | * This call ignores the recursion count. It is only called when the buffer |
517 | * should REALLY be unlocked, regardless of the recursion count. | ||
497 | * | 518 | * |
498 | * This call ignores the recursion count. It is only called | 519 | * We unconditionally drop the transaction's reference to the log item. If the |
499 | * when the buffer should REALLY be unlocked, regardless | 520 | * item was logged, then another reference was taken when it was pinned, so we |
500 | * of the recursion count. | 521 | * can safely drop the transaction reference now. This also allows us to avoid |
522 | * potential races with the unpin code freeing the bli by not referencing the | ||
523 | * bli after we've dropped the reference count. | ||
501 | * | 524 | * |
502 | * If the XFS_BLI_HOLD flag is set in the buf log item, then | 525 | * If the XFS_BLI_HOLD flag is set in the buf log item, then free the log item |
503 | * free the log item if necessary but do not unlock the buffer. | 526 | * if necessary but do not unlock the buffer. This is for support of |
504 | * This is for support of xfs_trans_bhold(). Make sure the | 527 | * xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't |
505 | * XFS_BLI_HOLD field is cleared if we don't free the item. | 528 | * free the item. |
506 | */ | 529 | */ |
507 | STATIC void | 530 | STATIC void |
508 | xfs_buf_item_unlock( | 531 | xfs_buf_item_unlock( |
@@ -514,73 +537,54 @@ xfs_buf_item_unlock( | |||
514 | 537 | ||
515 | bp = bip->bli_buf; | 538 | bp = bip->bli_buf; |
516 | 539 | ||
517 | /* | 540 | /* Clear the buffer's association with this transaction. */ |
518 | * Clear the buffer's association with this transaction. | ||
519 | */ | ||
520 | XFS_BUF_SET_FSPRIVATE2(bp, NULL); | 541 | XFS_BUF_SET_FSPRIVATE2(bp, NULL); |
521 | 542 | ||
522 | /* | 543 | /* |
523 | * If this is a transaction abort, don't return early. | 544 | * If this is a transaction abort, don't return early. Instead, allow |
524 | * Instead, allow the brelse to happen. | 545 | * the brelse to happen. Normally it would be done for stale |
525 | * Normally it would be done for stale (cancelled) buffers | 546 | * (cancelled) buffers at unpin time, but we'll never go through the |
526 | * at unpin time, but we'll never go through the pin/unpin | 547 | * pin/unpin cycle if we abort inside commit. |
527 | * cycle if we abort inside commit. | ||
528 | */ | 548 | */ |
529 | aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0; | 549 | aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0; |
530 | 550 | ||
531 | /* | 551 | /* |
532 | * If the buf item is marked stale, then don't do anything. | 552 | * Before possibly freeing the buf item, determine if we should |
533 | * We'll unlock the buffer and free the buf item when the | 553 | * release the buffer at the end of this routine. |
534 | * buffer is unpinned for the last time. | ||
535 | */ | 554 | */ |
536 | if (bip->bli_flags & XFS_BLI_STALE) { | 555 | hold = bip->bli_flags & XFS_BLI_HOLD; |
537 | bip->bli_flags &= ~XFS_BLI_LOGGED; | 556 | |
538 | trace_xfs_buf_item_unlock_stale(bip); | 557 | /* Clear the per transaction state. */ |
539 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 558 | bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD); |
540 | if (!aborted) | ||
541 | return; | ||
542 | } | ||
543 | 559 | ||
544 | /* | 560 | /* |
545 | * Drop the transaction's reference to the log item if | 561 | * If the buf item is marked stale, then don't do anything. We'll |
546 | * it was not logged as part of the transaction. Otherwise | 562 | * unlock the buffer and free the buf item when the buffer is unpinned |
547 | * we'll drop the reference in xfs_buf_item_unpin() when | 563 | * for the last time. |
548 | * the transaction is really through with the buffer. | ||
549 | */ | 564 | */ |
550 | if (!(bip->bli_flags & XFS_BLI_LOGGED)) { | 565 | if (bip->bli_flags & XFS_BLI_STALE) { |
551 | atomic_dec(&bip->bli_refcount); | 566 | trace_xfs_buf_item_unlock_stale(bip); |
552 | } else { | 567 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
553 | /* | 568 | if (!aborted) { |
554 | * Clear the logged flag since this is per | 569 | atomic_dec(&bip->bli_refcount); |
555 | * transaction state. | 570 | return; |
556 | */ | 571 | } |
557 | bip->bli_flags &= ~XFS_BLI_LOGGED; | ||
558 | } | 572 | } |
559 | 573 | ||
560 | /* | ||
561 | * Before possibly freeing the buf item, determine if we should | ||
562 | * release the buffer at the end of this routine. | ||
563 | */ | ||
564 | hold = bip->bli_flags & XFS_BLI_HOLD; | ||
565 | trace_xfs_buf_item_unlock(bip); | 574 | trace_xfs_buf_item_unlock(bip); |
566 | 575 | ||
567 | /* | 576 | /* |
568 | * If the buf item isn't tracking any data, free it. | 577 | * If the buf item isn't tracking any data, free it, otherwise drop the |
569 | * Otherwise, if XFS_BLI_HOLD is set clear it. | 578 | * reference we hold to it. |
570 | */ | 579 | */ |
571 | if (xfs_bitmap_empty(bip->bli_format.blf_data_map, | 580 | if (xfs_bitmap_empty(bip->bli_format.blf_data_map, |
572 | bip->bli_format.blf_map_size)) { | 581 | bip->bli_format.blf_map_size)) |
573 | xfs_buf_item_relse(bp); | 582 | xfs_buf_item_relse(bp); |
574 | } else if (hold) { | 583 | else |
575 | bip->bli_flags &= ~XFS_BLI_HOLD; | 584 | atomic_dec(&bip->bli_refcount); |
576 | } | ||
577 | 585 | ||
578 | /* | 586 | if (!hold) |
579 | * Release the buffer if XFS_BLI_HOLD was not set. | ||
580 | */ | ||
581 | if (!hold) { | ||
582 | xfs_buf_relse(bp); | 587 | xfs_buf_relse(bp); |
583 | } | ||
584 | } | 588 | } |
585 | 589 | ||
586 | /* | 590 | /* |
@@ -717,12 +721,12 @@ xfs_buf_item_init( | |||
717 | } | 721 | } |
718 | 722 | ||
719 | /* | 723 | /* |
720 | * chunks is the number of XFS_BLI_CHUNK size pieces | 724 | * chunks is the number of XFS_BLF_CHUNK size pieces |
721 | * the buffer can be divided into. Make sure not to | 725 | * the buffer can be divided into. Make sure not to |
722 | * truncate any pieces. map_size is the size of the | 726 | * truncate any pieces. map_size is the size of the |
723 | * bitmap needed to describe the chunks of the buffer. | 727 | * bitmap needed to describe the chunks of the buffer. |
724 | */ | 728 | */ |
725 | chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLI_CHUNK - 1)) >> XFS_BLI_SHIFT); | 729 | chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLF_CHUNK - 1)) >> XFS_BLF_SHIFT); |
726 | map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT); | 730 | map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT); |
727 | 731 | ||
728 | bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, | 732 | bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, |
@@ -790,8 +794,8 @@ xfs_buf_item_log( | |||
790 | /* | 794 | /* |
791 | * Convert byte offsets to bit numbers. | 795 | * Convert byte offsets to bit numbers. |
792 | */ | 796 | */ |
793 | first_bit = first >> XFS_BLI_SHIFT; | 797 | first_bit = first >> XFS_BLF_SHIFT; |
794 | last_bit = last >> XFS_BLI_SHIFT; | 798 | last_bit = last >> XFS_BLF_SHIFT; |
795 | 799 | ||
796 | /* | 800 | /* |
797 | * Calculate the total number of bits to be set. | 801 | * Calculate the total number of bits to be set. |
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index df4454511f73..f20bb472d582 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h | |||
@@ -41,22 +41,22 @@ typedef struct xfs_buf_log_format { | |||
41 | * This flag indicates that the buffer contains on disk inodes | 41 | * This flag indicates that the buffer contains on disk inodes |
42 | * and requires special recovery handling. | 42 | * and requires special recovery handling. |
43 | */ | 43 | */ |
44 | #define XFS_BLI_INODE_BUF 0x1 | 44 | #define XFS_BLF_INODE_BUF 0x1 |
45 | /* | 45 | /* |
46 | * This flag indicates that the buffer should not be replayed | 46 | * This flag indicates that the buffer should not be replayed |
47 | * during recovery because its blocks are being freed. | 47 | * during recovery because its blocks are being freed. |
48 | */ | 48 | */ |
49 | #define XFS_BLI_CANCEL 0x2 | 49 | #define XFS_BLF_CANCEL 0x2 |
50 | /* | 50 | /* |
51 | * This flag indicates that the buffer contains on disk | 51 | * This flag indicates that the buffer contains on disk |
52 | * user or group dquots and may require special recovery handling. | 52 | * user or group dquots and may require special recovery handling. |
53 | */ | 53 | */ |
54 | #define XFS_BLI_UDQUOT_BUF 0x4 | 54 | #define XFS_BLF_UDQUOT_BUF 0x4 |
55 | #define XFS_BLI_PDQUOT_BUF 0x8 | 55 | #define XFS_BLF_PDQUOT_BUF 0x8 |
56 | #define XFS_BLI_GDQUOT_BUF 0x10 | 56 | #define XFS_BLF_GDQUOT_BUF 0x10 |
57 | 57 | ||
58 | #define XFS_BLI_CHUNK 128 | 58 | #define XFS_BLF_CHUNK 128 |
59 | #define XFS_BLI_SHIFT 7 | 59 | #define XFS_BLF_SHIFT 7 |
60 | #define BIT_TO_WORD_SHIFT 5 | 60 | #define BIT_TO_WORD_SHIFT 5 |
61 | #define NBWORD (NBBY * sizeof(unsigned int)) | 61 | #define NBWORD (NBBY * sizeof(unsigned int)) |
62 | 62 | ||
@@ -69,6 +69,7 @@ typedef struct xfs_buf_log_format { | |||
69 | #define XFS_BLI_LOGGED 0x08 | 69 | #define XFS_BLI_LOGGED 0x08 |
70 | #define XFS_BLI_INODE_ALLOC_BUF 0x10 | 70 | #define XFS_BLI_INODE_ALLOC_BUF 0x10 |
71 | #define XFS_BLI_STALE_INODE 0x20 | 71 | #define XFS_BLI_STALE_INODE 0x20 |
72 | #define XFS_BLI_INODE_BUF 0x40 | ||
72 | 73 | ||
73 | #define XFS_BLI_FLAGS \ | 74 | #define XFS_BLI_FLAGS \ |
74 | { XFS_BLI_HOLD, "HOLD" }, \ | 75 | { XFS_BLI_HOLD, "HOLD" }, \ |
@@ -76,7 +77,8 @@ typedef struct xfs_buf_log_format { | |||
76 | { XFS_BLI_STALE, "STALE" }, \ | 77 | { XFS_BLI_STALE, "STALE" }, \ |
77 | { XFS_BLI_LOGGED, "LOGGED" }, \ | 78 | { XFS_BLI_LOGGED, "LOGGED" }, \ |
78 | { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ | 79 | { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ |
79 | { XFS_BLI_STALE_INODE, "STALE_INODE" } | 80 | { XFS_BLI_STALE_INODE, "STALE_INODE" }, \ |
81 | { XFS_BLI_INODE_BUF, "INODE_BUF" } | ||
80 | 82 | ||
81 | 83 | ||
82 | #ifdef __KERNEL__ | 84 | #ifdef __KERNEL__ |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index ef96175c0744..047b8a8e5c29 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -170,7 +170,7 @@ xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...) | |||
170 | va_list ap; | 170 | va_list ap; |
171 | 171 | ||
172 | #ifdef DEBUG | 172 | #ifdef DEBUG |
173 | xfs_panic_mask |= XFS_PTAG_SHUTDOWN_CORRUPT; | 173 | xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); |
174 | #endif | 174 | #endif |
175 | 175 | ||
176 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag) | 176 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag) |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 3038dd52c72a..5215abc8023a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -54,9 +54,6 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, | |||
54 | STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); | 54 | STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); |
55 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); | 55 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); |
56 | STATIC void xlog_dealloc_log(xlog_t *log); | 56 | STATIC void xlog_dealloc_log(xlog_t *log); |
57 | STATIC int xlog_write(struct log *log, struct xfs_log_vec *log_vector, | ||
58 | struct xlog_ticket *tic, xfs_lsn_t *start_lsn, | ||
59 | xlog_in_core_t **commit_iclog, uint flags); | ||
60 | 57 | ||
61 | /* local state machine functions */ | 58 | /* local state machine functions */ |
62 | STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); | 59 | STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); |
@@ -86,14 +83,6 @@ STATIC int xlog_regrant_write_log_space(xlog_t *log, | |||
86 | STATIC void xlog_ungrant_log_space(xlog_t *log, | 83 | STATIC void xlog_ungrant_log_space(xlog_t *log, |
87 | xlog_ticket_t *ticket); | 84 | xlog_ticket_t *ticket); |
88 | 85 | ||
89 | |||
90 | /* local ticket functions */ | ||
91 | STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log, | ||
92 | int unit_bytes, | ||
93 | int count, | ||
94 | char clientid, | ||
95 | uint flags); | ||
96 | |||
97 | #if defined(DEBUG) | 86 | #if defined(DEBUG) |
98 | STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); | 87 | STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); |
99 | STATIC void xlog_verify_grant_head(xlog_t *log, int equals); | 88 | STATIC void xlog_verify_grant_head(xlog_t *log, int equals); |
@@ -360,6 +349,15 @@ xfs_log_reserve( | |||
360 | ASSERT(flags & XFS_LOG_PERM_RESERV); | 349 | ASSERT(flags & XFS_LOG_PERM_RESERV); |
361 | internal_ticket = *ticket; | 350 | internal_ticket = *ticket; |
362 | 351 | ||
352 | /* | ||
353 | * this is a new transaction on the ticket, so we need to | ||
354 | * change the transaction ID so that the next transaction has a | ||
355 | * different TID in the log. Just add one to the existing tid | ||
356 | * so that we can see chains of rolling transactions in the log | ||
357 | * easily. | ||
358 | */ | ||
359 | internal_ticket->t_tid++; | ||
360 | |||
363 | trace_xfs_log_reserve(log, internal_ticket); | 361 | trace_xfs_log_reserve(log, internal_ticket); |
364 | 362 | ||
365 | xlog_grant_push_ail(mp, internal_ticket->t_unit_res); | 363 | xlog_grant_push_ail(mp, internal_ticket->t_unit_res); |
@@ -367,7 +365,8 @@ xfs_log_reserve( | |||
367 | } else { | 365 | } else { |
368 | /* may sleep if need to allocate more tickets */ | 366 | /* may sleep if need to allocate more tickets */ |
369 | internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt, | 367 | internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt, |
370 | client, flags); | 368 | client, flags, |
369 | KM_SLEEP|KM_MAYFAIL); | ||
371 | if (!internal_ticket) | 370 | if (!internal_ticket) |
372 | return XFS_ERROR(ENOMEM); | 371 | return XFS_ERROR(ENOMEM); |
373 | internal_ticket->t_trans_type = t_type; | 372 | internal_ticket->t_trans_type = t_type; |
@@ -452,6 +451,13 @@ xfs_log_mount( | |||
452 | /* Normal transactions can now occur */ | 451 | /* Normal transactions can now occur */ |
453 | mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; | 452 | mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; |
454 | 453 | ||
454 | /* | ||
455 | * Now the log has been fully initialised and we know were our | ||
456 | * space grant counters are, we can initialise the permanent ticket | ||
457 | * needed for delayed logging to work. | ||
458 | */ | ||
459 | xlog_cil_init_post_recovery(mp->m_log); | ||
460 | |||
455 | return 0; | 461 | return 0; |
456 | 462 | ||
457 | out_destroy_ail: | 463 | out_destroy_ail: |
@@ -658,6 +664,10 @@ xfs_log_item_init( | |||
658 | item->li_ailp = mp->m_ail; | 664 | item->li_ailp = mp->m_ail; |
659 | item->li_type = type; | 665 | item->li_type = type; |
660 | item->li_ops = ops; | 666 | item->li_ops = ops; |
667 | item->li_lv = NULL; | ||
668 | |||
669 | INIT_LIST_HEAD(&item->li_ail); | ||
670 | INIT_LIST_HEAD(&item->li_cil); | ||
661 | } | 671 | } |
662 | 672 | ||
663 | /* | 673 | /* |
@@ -1168,6 +1178,9 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1168 | *iclogp = log->l_iclog; /* complete ring */ | 1178 | *iclogp = log->l_iclog; /* complete ring */ |
1169 | log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ | 1179 | log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ |
1170 | 1180 | ||
1181 | error = xlog_cil_init(log); | ||
1182 | if (error) | ||
1183 | goto out_free_iclog; | ||
1171 | return log; | 1184 | return log; |
1172 | 1185 | ||
1173 | out_free_iclog: | 1186 | out_free_iclog: |
@@ -1494,6 +1507,8 @@ xlog_dealloc_log(xlog_t *log) | |||
1494 | xlog_in_core_t *iclog, *next_iclog; | 1507 | xlog_in_core_t *iclog, *next_iclog; |
1495 | int i; | 1508 | int i; |
1496 | 1509 | ||
1510 | xlog_cil_destroy(log); | ||
1511 | |||
1497 | iclog = log->l_iclog; | 1512 | iclog = log->l_iclog; |
1498 | for (i=0; i<log->l_iclog_bufs; i++) { | 1513 | for (i=0; i<log->l_iclog_bufs; i++) { |
1499 | sv_destroy(&iclog->ic_force_wait); | 1514 | sv_destroy(&iclog->ic_force_wait); |
@@ -1536,8 +1551,10 @@ xlog_state_finish_copy(xlog_t *log, | |||
1536 | * print out info relating to regions written which consume | 1551 | * print out info relating to regions written which consume |
1537 | * the reservation | 1552 | * the reservation |
1538 | */ | 1553 | */ |
1539 | STATIC void | 1554 | void |
1540 | xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) | 1555 | xlog_print_tic_res( |
1556 | struct xfs_mount *mp, | ||
1557 | struct xlog_ticket *ticket) | ||
1541 | { | 1558 | { |
1542 | uint i; | 1559 | uint i; |
1543 | uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); | 1560 | uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); |
@@ -1637,6 +1654,10 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) | |||
1637 | "bad-rtype" : res_type_str[r_type-1]), | 1654 | "bad-rtype" : res_type_str[r_type-1]), |
1638 | ticket->t_res_arr[i].r_len); | 1655 | ticket->t_res_arr[i].r_len); |
1639 | } | 1656 | } |
1657 | |||
1658 | xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, | ||
1659 | "xfs_log_write: reservation ran out. Need to up reservation"); | ||
1660 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
1640 | } | 1661 | } |
1641 | 1662 | ||
1642 | /* | 1663 | /* |
@@ -1865,7 +1886,7 @@ xlog_write_copy_finish( | |||
1865 | * we don't update ic_offset until the end when we know exactly how many | 1886 | * we don't update ic_offset until the end when we know exactly how many |
1866 | * bytes have been written out. | 1887 | * bytes have been written out. |
1867 | */ | 1888 | */ |
1868 | STATIC int | 1889 | int |
1869 | xlog_write( | 1890 | xlog_write( |
1870 | struct log *log, | 1891 | struct log *log, |
1871 | struct xfs_log_vec *log_vector, | 1892 | struct xfs_log_vec *log_vector, |
@@ -1889,22 +1910,26 @@ xlog_write( | |||
1889 | *start_lsn = 0; | 1910 | *start_lsn = 0; |
1890 | 1911 | ||
1891 | len = xlog_write_calc_vec_length(ticket, log_vector); | 1912 | len = xlog_write_calc_vec_length(ticket, log_vector); |
1892 | if (ticket->t_curr_res < len) { | 1913 | if (log->l_cilp) { |
1893 | xlog_print_tic_res(log->l_mp, ticket); | 1914 | /* |
1894 | #ifdef DEBUG | 1915 | * Region headers and bytes are already accounted for. |
1895 | xlog_panic( | 1916 | * We only need to take into account start records and |
1896 | "xfs_log_write: reservation ran out. Need to up reservation"); | 1917 | * split regions in this function. |
1897 | #else | 1918 | */ |
1898 | /* Customer configurable panic */ | 1919 | if (ticket->t_flags & XLOG_TIC_INITED) |
1899 | xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, log->l_mp, | 1920 | ticket->t_curr_res -= sizeof(xlog_op_header_t); |
1900 | "xfs_log_write: reservation ran out. Need to up reservation"); | ||
1901 | 1921 | ||
1902 | /* If we did not panic, shutdown the filesystem */ | 1922 | /* |
1903 | xfs_force_shutdown(log->l_mp, SHUTDOWN_CORRUPT_INCORE); | 1923 | * Commit record headers need to be accounted for. These |
1904 | #endif | 1924 | * come in as separate writes so are easy to detect. |
1905 | } | 1925 | */ |
1926 | if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) | ||
1927 | ticket->t_curr_res -= sizeof(xlog_op_header_t); | ||
1928 | } else | ||
1929 | ticket->t_curr_res -= len; | ||
1906 | 1930 | ||
1907 | ticket->t_curr_res -= len; | 1931 | if (ticket->t_curr_res < 0) |
1932 | xlog_print_tic_res(log->l_mp, ticket); | ||
1908 | 1933 | ||
1909 | index = 0; | 1934 | index = 0; |
1910 | lv = log_vector; | 1935 | lv = log_vector; |
@@ -3000,6 +3025,8 @@ _xfs_log_force( | |||
3000 | 3025 | ||
3001 | XFS_STATS_INC(xs_log_force); | 3026 | XFS_STATS_INC(xs_log_force); |
3002 | 3027 | ||
3028 | xlog_cil_push(log, 1); | ||
3029 | |||
3003 | spin_lock(&log->l_icloglock); | 3030 | spin_lock(&log->l_icloglock); |
3004 | 3031 | ||
3005 | iclog = log->l_iclog; | 3032 | iclog = log->l_iclog; |
@@ -3149,6 +3176,12 @@ _xfs_log_force_lsn( | |||
3149 | 3176 | ||
3150 | XFS_STATS_INC(xs_log_force); | 3177 | XFS_STATS_INC(xs_log_force); |
3151 | 3178 | ||
3179 | if (log->l_cilp) { | ||
3180 | lsn = xlog_cil_push_lsn(log, lsn); | ||
3181 | if (lsn == NULLCOMMITLSN) | ||
3182 | return 0; | ||
3183 | } | ||
3184 | |||
3152 | try_again: | 3185 | try_again: |
3153 | spin_lock(&log->l_icloglock); | 3186 | spin_lock(&log->l_icloglock); |
3154 | iclog = log->l_iclog; | 3187 | iclog = log->l_iclog; |
@@ -3313,22 +3346,30 @@ xfs_log_ticket_get( | |||
3313 | return ticket; | 3346 | return ticket; |
3314 | } | 3347 | } |
3315 | 3348 | ||
3349 | xlog_tid_t | ||
3350 | xfs_log_get_trans_ident( | ||
3351 | struct xfs_trans *tp) | ||
3352 | { | ||
3353 | return tp->t_ticket->t_tid; | ||
3354 | } | ||
3355 | |||
3316 | /* | 3356 | /* |
3317 | * Allocate and initialise a new log ticket. | 3357 | * Allocate and initialise a new log ticket. |
3318 | */ | 3358 | */ |
3319 | STATIC xlog_ticket_t * | 3359 | xlog_ticket_t * |
3320 | xlog_ticket_alloc( | 3360 | xlog_ticket_alloc( |
3321 | struct log *log, | 3361 | struct log *log, |
3322 | int unit_bytes, | 3362 | int unit_bytes, |
3323 | int cnt, | 3363 | int cnt, |
3324 | char client, | 3364 | char client, |
3325 | uint xflags) | 3365 | uint xflags, |
3366 | int alloc_flags) | ||
3326 | { | 3367 | { |
3327 | struct xlog_ticket *tic; | 3368 | struct xlog_ticket *tic; |
3328 | uint num_headers; | 3369 | uint num_headers; |
3329 | int iclog_space; | 3370 | int iclog_space; |
3330 | 3371 | ||
3331 | tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL); | 3372 | tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags); |
3332 | if (!tic) | 3373 | if (!tic) |
3333 | return NULL; | 3374 | return NULL; |
3334 | 3375 | ||
@@ -3647,6 +3688,11 @@ xlog_state_ioerror( | |||
3647 | * c. nothing new gets queued up after (a) and (b) are done. | 3688 | * c. nothing new gets queued up after (a) and (b) are done. |
3648 | * d. if !logerror, flush the iclogs to disk, then seal them off | 3689 | * d. if !logerror, flush the iclogs to disk, then seal them off |
3649 | * for business. | 3690 | * for business. |
3691 | * | ||
3692 | * Note: for delayed logging the !logerror case needs to flush the regions | ||
3693 | * held in memory out to the iclogs before flushing them to disk. This needs | ||
3694 | * to be done before the log is marked as shutdown, otherwise the flush to the | ||
3695 | * iclogs will fail. | ||
3650 | */ | 3696 | */ |
3651 | int | 3697 | int |
3652 | xfs_log_force_umount( | 3698 | xfs_log_force_umount( |
@@ -3680,6 +3726,16 @@ xfs_log_force_umount( | |||
3680 | return 1; | 3726 | return 1; |
3681 | } | 3727 | } |
3682 | retval = 0; | 3728 | retval = 0; |
3729 | |||
3730 | /* | ||
3731 | * Flush the in memory commit item list before marking the log as | ||
3732 | * being shut down. We need to do it in this order to ensure all the | ||
3733 | * completed transactions are flushed to disk with the xfs_log_force() | ||
3734 | * call below. | ||
3735 | */ | ||
3736 | if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) | ||
3737 | xlog_cil_push(log, 1); | ||
3738 | |||
3683 | /* | 3739 | /* |
3684 | * We must hold both the GRANT lock and the LOG lock, | 3740 | * We must hold both the GRANT lock and the LOG lock, |
3685 | * before we mark the filesystem SHUTDOWN and wake | 3741 | * before we mark the filesystem SHUTDOWN and wake |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 229d1f36ba9a..04c78e642cc8 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -19,7 +19,6 @@ | |||
19 | #define __XFS_LOG_H__ | 19 | #define __XFS_LOG_H__ |
20 | 20 | ||
21 | /* get lsn fields */ | 21 | /* get lsn fields */ |
22 | |||
23 | #define CYCLE_LSN(lsn) ((uint)((lsn)>>32)) | 22 | #define CYCLE_LSN(lsn) ((uint)((lsn)>>32)) |
24 | #define BLOCK_LSN(lsn) ((uint)(lsn)) | 23 | #define BLOCK_LSN(lsn) ((uint)(lsn)) |
25 | 24 | ||
@@ -114,6 +113,9 @@ struct xfs_log_vec { | |||
114 | struct xfs_log_vec *lv_next; /* next lv in build list */ | 113 | struct xfs_log_vec *lv_next; /* next lv in build list */ |
115 | int lv_niovecs; /* number of iovecs in lv */ | 114 | int lv_niovecs; /* number of iovecs in lv */ |
116 | struct xfs_log_iovec *lv_iovecp; /* iovec array */ | 115 | struct xfs_log_iovec *lv_iovecp; /* iovec array */ |
116 | struct xfs_log_item *lv_item; /* owner */ | ||
117 | char *lv_buf; /* formatted buffer */ | ||
118 | int lv_buf_len; /* size of formatted buffer */ | ||
117 | }; | 119 | }; |
118 | 120 | ||
119 | /* | 121 | /* |
@@ -134,6 +136,7 @@ struct xlog_in_core; | |||
134 | struct xlog_ticket; | 136 | struct xlog_ticket; |
135 | struct xfs_log_item; | 137 | struct xfs_log_item; |
136 | struct xfs_item_ops; | 138 | struct xfs_item_ops; |
139 | struct xfs_trans; | ||
137 | 140 | ||
138 | void xfs_log_item_init(struct xfs_mount *mp, | 141 | void xfs_log_item_init(struct xfs_mount *mp, |
139 | struct xfs_log_item *item, | 142 | struct xfs_log_item *item, |
@@ -187,9 +190,16 @@ int xfs_log_need_covered(struct xfs_mount *mp); | |||
187 | 190 | ||
188 | void xlog_iodone(struct xfs_buf *); | 191 | void xlog_iodone(struct xfs_buf *); |
189 | 192 | ||
190 | struct xlog_ticket * xfs_log_ticket_get(struct xlog_ticket *ticket); | 193 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); |
191 | void xfs_log_ticket_put(struct xlog_ticket *ticket); | 194 | void xfs_log_ticket_put(struct xlog_ticket *ticket); |
192 | 195 | ||
196 | xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); | ||
197 | |||
198 | int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, | ||
199 | struct xfs_log_vec *log_vector, | ||
200 | xfs_lsn_t *commit_lsn, int flags); | ||
201 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); | ||
202 | |||
193 | #endif | 203 | #endif |
194 | 204 | ||
195 | 205 | ||
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c new file mode 100644 index 000000000000..bb17cc044bf3 --- /dev/null +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -0,0 +1,725 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2010 Red Hat, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write the Free Software Foundation, | ||
15 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
16 | */ | ||
17 | |||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_types.h" | ||
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | ||
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | ||
25 | #include "xfs_trans_priv.h" | ||
26 | #include "xfs_log_priv.h" | ||
27 | #include "xfs_sb.h" | ||
28 | #include "xfs_ag.h" | ||
29 | #include "xfs_dir2.h" | ||
30 | #include "xfs_dmapi.h" | ||
31 | #include "xfs_mount.h" | ||
32 | #include "xfs_error.h" | ||
33 | #include "xfs_alloc.h" | ||
34 | |||
35 | /* | ||
36 | * Perform initial CIL structure initialisation. If the CIL is not | ||
37 | * enabled in this filesystem, ensure the log->l_cilp is null so | ||
38 | * we can check this conditional to determine if we are doing delayed | ||
39 | * logging or not. | ||
40 | */ | ||
41 | int | ||
42 | xlog_cil_init( | ||
43 | struct log *log) | ||
44 | { | ||
45 | struct xfs_cil *cil; | ||
46 | struct xfs_cil_ctx *ctx; | ||
47 | |||
48 | log->l_cilp = NULL; | ||
49 | if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG)) | ||
50 | return 0; | ||
51 | |||
52 | cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); | ||
53 | if (!cil) | ||
54 | return ENOMEM; | ||
55 | |||
56 | ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL); | ||
57 | if (!ctx) { | ||
58 | kmem_free(cil); | ||
59 | return ENOMEM; | ||
60 | } | ||
61 | |||
62 | INIT_LIST_HEAD(&cil->xc_cil); | ||
63 | INIT_LIST_HEAD(&cil->xc_committing); | ||
64 | spin_lock_init(&cil->xc_cil_lock); | ||
65 | init_rwsem(&cil->xc_ctx_lock); | ||
66 | sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait"); | ||
67 | |||
68 | INIT_LIST_HEAD(&ctx->committing); | ||
69 | INIT_LIST_HEAD(&ctx->busy_extents); | ||
70 | ctx->sequence = 1; | ||
71 | ctx->cil = cil; | ||
72 | cil->xc_ctx = ctx; | ||
73 | |||
74 | cil->xc_log = log; | ||
75 | log->l_cilp = cil; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | void | ||
80 | xlog_cil_destroy( | ||
81 | struct log *log) | ||
82 | { | ||
83 | if (!log->l_cilp) | ||
84 | return; | ||
85 | |||
86 | if (log->l_cilp->xc_ctx) { | ||
87 | if (log->l_cilp->xc_ctx->ticket) | ||
88 | xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket); | ||
89 | kmem_free(log->l_cilp->xc_ctx); | ||
90 | } | ||
91 | |||
92 | ASSERT(list_empty(&log->l_cilp->xc_cil)); | ||
93 | kmem_free(log->l_cilp); | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * Allocate a new ticket. Failing to get a new ticket makes it really hard to | ||
98 | * recover, so we don't allow failure here. Also, we allocate in a context that | ||
99 | * we don't want to be issuing transactions from, so we need to tell the | ||
100 | * allocation code this as well. | ||
101 | * | ||
102 | * We don't reserve any space for the ticket - we are going to steal whatever | ||
103 | * space we require from transactions as they commit. To ensure we reserve all | ||
104 | * the space required, we need to set the current reservation of the ticket to | ||
105 | * zero so that we know to steal the initial transaction overhead from the | ||
106 | * first transaction commit. | ||
107 | */ | ||
108 | static struct xlog_ticket * | ||
109 | xlog_cil_ticket_alloc( | ||
110 | struct log *log) | ||
111 | { | ||
112 | struct xlog_ticket *tic; | ||
113 | |||
114 | tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0, | ||
115 | KM_SLEEP|KM_NOFS); | ||
116 | tic->t_trans_type = XFS_TRANS_CHECKPOINT; | ||
117 | |||
118 | /* | ||
119 | * set the current reservation to zero so we know to steal the basic | ||
120 | * transaction overhead reservation from the first transaction commit. | ||
121 | */ | ||
122 | tic->t_curr_res = 0; | ||
123 | return tic; | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * After the first stage of log recovery is done, we know where the head and | ||
128 | * tail of the log are. We need this log initialisation done before we can | ||
129 | * initialise the first CIL checkpoint context. | ||
130 | * | ||
131 | * Here we allocate a log ticket to track space usage during a CIL push. This | ||
132 | * ticket is passed to xlog_write() directly so that we don't slowly leak log | ||
133 | * space by failing to account for space used by log headers and additional | ||
134 | * region headers for split regions. | ||
135 | */ | ||
136 | void | ||
137 | xlog_cil_init_post_recovery( | ||
138 | struct log *log) | ||
139 | { | ||
140 | if (!log->l_cilp) | ||
141 | return; | ||
142 | |||
143 | log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); | ||
144 | log->l_cilp->xc_ctx->sequence = 1; | ||
145 | log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle, | ||
146 | log->l_curr_block); | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Insert the log item into the CIL and calculate the difference in space | ||
151 | * consumed by the item. Add the space to the checkpoint ticket and calculate | ||
152 | * if the change requires additional log metadata. If it does, take that space | ||
153 | * as well. Remove the amount of space we addded to the checkpoint ticket from | ||
154 | * the current transaction ticket so that the accounting works out correctly. | ||
155 | * | ||
156 | * If this is the first time the item is being placed into the CIL in this | ||
157 | * context, pin it so it can't be written to disk until the CIL is flushed to | ||
158 | * the iclog and the iclog written to disk. | ||
159 | */ | ||
160 | static void | ||
161 | xlog_cil_insert( | ||
162 | struct log *log, | ||
163 | struct xlog_ticket *ticket, | ||
164 | struct xfs_log_item *item, | ||
165 | struct xfs_log_vec *lv) | ||
166 | { | ||
167 | struct xfs_cil *cil = log->l_cilp; | ||
168 | struct xfs_log_vec *old = lv->lv_item->li_lv; | ||
169 | struct xfs_cil_ctx *ctx = cil->xc_ctx; | ||
170 | int len; | ||
171 | int diff_iovecs; | ||
172 | int iclog_space; | ||
173 | |||
174 | if (old) { | ||
175 | /* existing lv on log item, space used is a delta */ | ||
176 | ASSERT(!list_empty(&item->li_cil)); | ||
177 | ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); | ||
178 | |||
179 | len = lv->lv_buf_len - old->lv_buf_len; | ||
180 | diff_iovecs = lv->lv_niovecs - old->lv_niovecs; | ||
181 | kmem_free(old->lv_buf); | ||
182 | kmem_free(old); | ||
183 | } else { | ||
184 | /* new lv, must pin the log item */ | ||
185 | ASSERT(!lv->lv_item->li_lv); | ||
186 | ASSERT(list_empty(&item->li_cil)); | ||
187 | |||
188 | len = lv->lv_buf_len; | ||
189 | diff_iovecs = lv->lv_niovecs; | ||
190 | IOP_PIN(lv->lv_item); | ||
191 | |||
192 | } | ||
193 | len += diff_iovecs * sizeof(xlog_op_header_t); | ||
194 | |||
195 | /* attach new log vector to log item */ | ||
196 | lv->lv_item->li_lv = lv; | ||
197 | |||
198 | spin_lock(&cil->xc_cil_lock); | ||
199 | list_move_tail(&item->li_cil, &cil->xc_cil); | ||
200 | ctx->nvecs += diff_iovecs; | ||
201 | |||
202 | /* | ||
203 | * If this is the first time the item is being committed to the CIL, | ||
204 | * store the sequence number on the log item so we can tell | ||
205 | * in future commits whether this is the first checkpoint the item is | ||
206 | * being committed into. | ||
207 | */ | ||
208 | if (!item->li_seq) | ||
209 | item->li_seq = ctx->sequence; | ||
210 | |||
211 | /* | ||
212 | * Now transfer enough transaction reservation to the context ticket | ||
213 | * for the checkpoint. The context ticket is special - the unit | ||
214 | * reservation has to grow as well as the current reservation as we | ||
215 | * steal from tickets so we can correctly determine the space used | ||
216 | * during the transaction commit. | ||
217 | */ | ||
218 | if (ctx->ticket->t_curr_res == 0) { | ||
219 | /* first commit in checkpoint, steal the header reservation */ | ||
220 | ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len); | ||
221 | ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; | ||
222 | ticket->t_curr_res -= ctx->ticket->t_unit_res; | ||
223 | } | ||
224 | |||
225 | /* do we need space for more log record headers? */ | ||
226 | iclog_space = log->l_iclog_size - log->l_iclog_hsize; | ||
227 | if (len > 0 && (ctx->space_used / iclog_space != | ||
228 | (ctx->space_used + len) / iclog_space)) { | ||
229 | int hdrs; | ||
230 | |||
231 | hdrs = (len + iclog_space - 1) / iclog_space; | ||
232 | /* need to take into account split region headers, too */ | ||
233 | hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); | ||
234 | ctx->ticket->t_unit_res += hdrs; | ||
235 | ctx->ticket->t_curr_res += hdrs; | ||
236 | ticket->t_curr_res -= hdrs; | ||
237 | ASSERT(ticket->t_curr_res >= len); | ||
238 | } | ||
239 | ticket->t_curr_res -= len; | ||
240 | ctx->space_used += len; | ||
241 | |||
242 | spin_unlock(&cil->xc_cil_lock); | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Format log item into a flat buffers | ||
247 | * | ||
248 | * For delayed logging, we need to hold a formatted buffer containing all the | ||
249 | * changes on the log item. This enables us to relog the item in memory and | ||
250 | * write it out asynchronously without needing to relock the object that was | ||
251 | * modified at the time it gets written into the iclog. | ||
252 | * | ||
253 | * This function builds a vector for the changes in each log item in the | ||
254 | * transaction. It then works out the length of the buffer needed for each log | ||
255 | * item, allocates them and formats the vector for the item into the buffer. | ||
256 | * The buffer is then attached to the log item are then inserted into the | ||
257 | * Committed Item List for tracking until the next checkpoint is written out. | ||
258 | * | ||
259 | * We don't set up region headers during this process; we simply copy the | ||
260 | * regions into the flat buffer. We can do this because we still have to do a | ||
261 | * formatting step to write the regions into the iclog buffer. Writing the | ||
262 | * ophdrs during the iclog write means that we can support splitting large | ||
263 | * regions across iclog boundares without needing a change in the format of the | ||
264 | * item/region encapsulation. | ||
265 | * | ||
266 | * Hence what we need to do now is change the rewrite the vector array to point | ||
267 | * to the copied region inside the buffer we just allocated. This allows us to | ||
268 | * format the regions into the iclog as though they are being formatted | ||
269 | * directly out of the objects themselves. | ||
270 | */ | ||
271 | static void | ||
272 | xlog_cil_format_items( | ||
273 | struct log *log, | ||
274 | struct xfs_log_vec *log_vector, | ||
275 | struct xlog_ticket *ticket, | ||
276 | xfs_lsn_t *start_lsn) | ||
277 | { | ||
278 | struct xfs_log_vec *lv; | ||
279 | |||
280 | if (start_lsn) | ||
281 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
282 | |||
283 | ASSERT(log_vector); | ||
284 | for (lv = log_vector; lv; lv = lv->lv_next) { | ||
285 | void *ptr; | ||
286 | int index; | ||
287 | int len = 0; | ||
288 | |||
289 | /* build the vector array and calculate it's length */ | ||
290 | IOP_FORMAT(lv->lv_item, lv->lv_iovecp); | ||
291 | for (index = 0; index < lv->lv_niovecs; index++) | ||
292 | len += lv->lv_iovecp[index].i_len; | ||
293 | |||
294 | lv->lv_buf_len = len; | ||
295 | lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); | ||
296 | ptr = lv->lv_buf; | ||
297 | |||
298 | for (index = 0; index < lv->lv_niovecs; index++) { | ||
299 | struct xfs_log_iovec *vec = &lv->lv_iovecp[index]; | ||
300 | |||
301 | memcpy(ptr, vec->i_addr, vec->i_len); | ||
302 | vec->i_addr = ptr; | ||
303 | ptr += vec->i_len; | ||
304 | } | ||
305 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); | ||
306 | |||
307 | xlog_cil_insert(log, ticket, lv->lv_item, lv); | ||
308 | } | ||
309 | } | ||
310 | |||
311 | static void | ||
312 | xlog_cil_free_logvec( | ||
313 | struct xfs_log_vec *log_vector) | ||
314 | { | ||
315 | struct xfs_log_vec *lv; | ||
316 | |||
317 | for (lv = log_vector; lv; ) { | ||
318 | struct xfs_log_vec *next = lv->lv_next; | ||
319 | kmem_free(lv->lv_buf); | ||
320 | kmem_free(lv); | ||
321 | lv = next; | ||
322 | } | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Commit a transaction with the given vector to the Committed Item List. | ||
327 | * | ||
328 | * To do this, we need to format the item, pin it in memory if required and | ||
329 | * account for the space used by the transaction. Once we have done that we | ||
330 | * need to release the unused reservation for the transaction, attach the | ||
331 | * transaction to the checkpoint context so we carry the busy extents through | ||
332 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
333 | * | ||
334 | * For more specific information about the order of operations in | ||
335 | * xfs_log_commit_cil() please refer to the comments in | ||
336 | * xfs_trans_commit_iclog(). | ||
337 | * | ||
338 | * Called with the context lock already held in read mode to lock out | ||
339 | * background commit, returns without it held once background commits are | ||
340 | * allowed again. | ||
341 | */ | ||
342 | int | ||
343 | xfs_log_commit_cil( | ||
344 | struct xfs_mount *mp, | ||
345 | struct xfs_trans *tp, | ||
346 | struct xfs_log_vec *log_vector, | ||
347 | xfs_lsn_t *commit_lsn, | ||
348 | int flags) | ||
349 | { | ||
350 | struct log *log = mp->m_log; | ||
351 | int log_flags = 0; | ||
352 | int push = 0; | ||
353 | |||
354 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
355 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
356 | |||
357 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
358 | xlog_cil_free_logvec(log_vector); | ||
359 | return XFS_ERROR(EIO); | ||
360 | } | ||
361 | |||
362 | /* lock out background commit */ | ||
363 | down_read(&log->l_cilp->xc_ctx_lock); | ||
364 | xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
365 | |||
366 | /* check we didn't blow the reservation */ | ||
367 | if (tp->t_ticket->t_curr_res < 0) | ||
368 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
369 | |||
370 | /* attach the transaction to the CIL if it has any busy extents */ | ||
371 | if (!list_empty(&tp->t_busy)) { | ||
372 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
373 | list_splice_init(&tp->t_busy, | ||
374 | &log->l_cilp->xc_ctx->busy_extents); | ||
375 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
376 | } | ||
377 | |||
378 | tp->t_commit_lsn = *commit_lsn; | ||
379 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
380 | xfs_trans_unreserve_and_mod_sb(tp); | ||
381 | |||
382 | /* check for background commit before unlock */ | ||
383 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
384 | push = 1; | ||
385 | up_read(&log->l_cilp->xc_ctx_lock); | ||
386 | |||
387 | /* | ||
388 | * We need to push CIL every so often so we don't cache more than we | ||
389 | * can fit in the log. The limit really is that a checkpoint can't be | ||
390 | * more than half the log (the current checkpoint is not allowed to | ||
391 | * overwrite the previous checkpoint), but commit latency and memory | ||
392 | * usage limit this to a smaller size in most cases. | ||
393 | */ | ||
394 | if (push) | ||
395 | xlog_cil_push(log, 0); | ||
396 | return 0; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Mark all items committed and clear busy extents. We free the log vector | ||
401 | * chains in a separate pass so that we unpin the log items as quickly as | ||
402 | * possible. | ||
403 | */ | ||
404 | static void | ||
405 | xlog_cil_committed( | ||
406 | void *args, | ||
407 | int abort) | ||
408 | { | ||
409 | struct xfs_cil_ctx *ctx = args; | ||
410 | struct xfs_log_vec *lv; | ||
411 | int abortflag = abort ? XFS_LI_ABORTED : 0; | ||
412 | struct xfs_busy_extent *busyp, *n; | ||
413 | |||
414 | /* unpin all the log items */ | ||
415 | for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) { | ||
416 | xfs_trans_item_committed(lv->lv_item, ctx->start_lsn, | ||
417 | abortflag); | ||
418 | } | ||
419 | |||
420 | list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) | ||
421 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); | ||
422 | |||
423 | spin_lock(&ctx->cil->xc_cil_lock); | ||
424 | list_del(&ctx->committing); | ||
425 | spin_unlock(&ctx->cil->xc_cil_lock); | ||
426 | |||
427 | xlog_cil_free_logvec(ctx->lv_chain); | ||
428 | kmem_free(ctx); | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * Push the Committed Item List to the log. If the push_now flag is not set, | ||
433 | * then it is a background flush and so we can chose to ignore it. | ||
434 | */ | ||
435 | int | ||
436 | xlog_cil_push( | ||
437 | struct log *log, | ||
438 | int push_now) | ||
439 | { | ||
440 | struct xfs_cil *cil = log->l_cilp; | ||
441 | struct xfs_log_vec *lv; | ||
442 | struct xfs_cil_ctx *ctx; | ||
443 | struct xfs_cil_ctx *new_ctx; | ||
444 | struct xlog_in_core *commit_iclog; | ||
445 | struct xlog_ticket *tic; | ||
446 | int num_lv; | ||
447 | int num_iovecs; | ||
448 | int len; | ||
449 | int error = 0; | ||
450 | struct xfs_trans_header thdr; | ||
451 | struct xfs_log_iovec lhdr; | ||
452 | struct xfs_log_vec lvhdr = { NULL }; | ||
453 | xfs_lsn_t commit_lsn; | ||
454 | |||
455 | if (!cil) | ||
456 | return 0; | ||
457 | |||
458 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | ||
459 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | ||
460 | |||
461 | /* lock out transaction commit, but don't block on background push */ | ||
462 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | ||
463 | if (!push_now) | ||
464 | goto out_free_ticket; | ||
465 | down_write(&cil->xc_ctx_lock); | ||
466 | } | ||
467 | ctx = cil->xc_ctx; | ||
468 | |||
469 | /* check if we've anything to push */ | ||
470 | if (list_empty(&cil->xc_cil)) | ||
471 | goto out_skip; | ||
472 | |||
473 | /* check for spurious background flush */ | ||
474 | if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | ||
475 | goto out_skip; | ||
476 | |||
477 | /* | ||
478 | * pull all the log vectors off the items in the CIL, and | ||
479 | * remove the items from the CIL. We don't need the CIL lock | ||
480 | * here because it's only needed on the transaction commit | ||
481 | * side which is currently locked out by the flush lock. | ||
482 | */ | ||
483 | lv = NULL; | ||
484 | num_lv = 0; | ||
485 | num_iovecs = 0; | ||
486 | len = 0; | ||
487 | while (!list_empty(&cil->xc_cil)) { | ||
488 | struct xfs_log_item *item; | ||
489 | int i; | ||
490 | |||
491 | item = list_first_entry(&cil->xc_cil, | ||
492 | struct xfs_log_item, li_cil); | ||
493 | list_del_init(&item->li_cil); | ||
494 | if (!ctx->lv_chain) | ||
495 | ctx->lv_chain = item->li_lv; | ||
496 | else | ||
497 | lv->lv_next = item->li_lv; | ||
498 | lv = item->li_lv; | ||
499 | item->li_lv = NULL; | ||
500 | |||
501 | num_lv++; | ||
502 | num_iovecs += lv->lv_niovecs; | ||
503 | for (i = 0; i < lv->lv_niovecs; i++) | ||
504 | len += lv->lv_iovecp[i].i_len; | ||
505 | } | ||
506 | |||
507 | /* | ||
508 | * initialise the new context and attach it to the CIL. Then attach | ||
509 | * the current context to the CIL committing lsit so it can be found | ||
510 | * during log forces to extract the commit lsn of the sequence that | ||
511 | * needs to be forced. | ||
512 | */ | ||
513 | INIT_LIST_HEAD(&new_ctx->committing); | ||
514 | INIT_LIST_HEAD(&new_ctx->busy_extents); | ||
515 | new_ctx->sequence = ctx->sequence + 1; | ||
516 | new_ctx->cil = cil; | ||
517 | cil->xc_ctx = new_ctx; | ||
518 | |||
519 | /* | ||
520 | * The switch is now done, so we can drop the context lock and move out | ||
521 | * of a shared context. We can't just go straight to the commit record, | ||
522 | * though - we need to synchronise with previous and future commits so | ||
523 | * that the commit records are correctly ordered in the log to ensure | ||
524 | * that we process items during log IO completion in the correct order. | ||
525 | * | ||
526 | * For example, if we get an EFI in one checkpoint and the EFD in the | ||
527 | * next (e.g. due to log forces), we do not want the checkpoint with | ||
528 | * the EFD to be committed before the checkpoint with the EFI. Hence | ||
529 | * we must strictly order the commit records of the checkpoints so | ||
530 | * that: a) the checkpoint callbacks are attached to the iclogs in the | ||
531 | * correct order; and b) the checkpoints are replayed in correct order | ||
532 | * in log recovery. | ||
533 | * | ||
534 | * Hence we need to add this context to the committing context list so | ||
535 | * that higher sequences will wait for us to write out a commit record | ||
536 | * before they do. | ||
537 | */ | ||
538 | spin_lock(&cil->xc_cil_lock); | ||
539 | list_add(&ctx->committing, &cil->xc_committing); | ||
540 | spin_unlock(&cil->xc_cil_lock); | ||
541 | up_write(&cil->xc_ctx_lock); | ||
542 | |||
543 | /* | ||
544 | * Build a checkpoint transaction header and write it to the log to | ||
545 | * begin the transaction. We need to account for the space used by the | ||
546 | * transaction header here as it is not accounted for in xlog_write(). | ||
547 | * | ||
548 | * The LSN we need to pass to the log items on transaction commit is | ||
549 | * the LSN reported by the first log vector write. If we use the commit | ||
550 | * record lsn then we can move the tail beyond the grant write head. | ||
551 | */ | ||
552 | tic = ctx->ticket; | ||
553 | thdr.th_magic = XFS_TRANS_HEADER_MAGIC; | ||
554 | thdr.th_type = XFS_TRANS_CHECKPOINT; | ||
555 | thdr.th_tid = tic->t_tid; | ||
556 | thdr.th_num_items = num_iovecs; | ||
557 | lhdr.i_addr = (xfs_caddr_t)&thdr; | ||
558 | lhdr.i_len = sizeof(xfs_trans_header_t); | ||
559 | lhdr.i_type = XLOG_REG_TYPE_TRANSHDR; | ||
560 | tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t); | ||
561 | |||
562 | lvhdr.lv_niovecs = 1; | ||
563 | lvhdr.lv_iovecp = &lhdr; | ||
564 | lvhdr.lv_next = ctx->lv_chain; | ||
565 | |||
566 | error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); | ||
567 | if (error) | ||
568 | goto out_abort; | ||
569 | |||
570 | /* | ||
571 | * now that we've written the checkpoint into the log, strictly | ||
572 | * order the commit records so replay will get them in the right order. | ||
573 | */ | ||
574 | restart: | ||
575 | spin_lock(&cil->xc_cil_lock); | ||
576 | list_for_each_entry(new_ctx, &cil->xc_committing, committing) { | ||
577 | /* | ||
578 | * Higher sequences will wait for this one so skip them. | ||
579 | * Don't wait for own own sequence, either. | ||
580 | */ | ||
581 | if (new_ctx->sequence >= ctx->sequence) | ||
582 | continue; | ||
583 | if (!new_ctx->commit_lsn) { | ||
584 | /* | ||
585 | * It is still being pushed! Wait for the push to | ||
586 | * complete, then start again from the beginning. | ||
587 | */ | ||
588 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | ||
589 | goto restart; | ||
590 | } | ||
591 | } | ||
592 | spin_unlock(&cil->xc_cil_lock); | ||
593 | |||
594 | commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); | ||
595 | if (error || commit_lsn == -1) | ||
596 | goto out_abort; | ||
597 | |||
598 | /* attach all the transactions w/ busy extents to iclog */ | ||
599 | ctx->log_cb.cb_func = xlog_cil_committed; | ||
600 | ctx->log_cb.cb_arg = ctx; | ||
601 | error = xfs_log_notify(log->l_mp, commit_iclog, &ctx->log_cb); | ||
602 | if (error) | ||
603 | goto out_abort; | ||
604 | |||
605 | /* | ||
606 | * now the checkpoint commit is complete and we've attached the | ||
607 | * callbacks to the iclog we can assign the commit LSN to the context | ||
608 | * and wake up anyone who is waiting for the commit to complete. | ||
609 | */ | ||
610 | spin_lock(&cil->xc_cil_lock); | ||
611 | ctx->commit_lsn = commit_lsn; | ||
612 | sv_broadcast(&cil->xc_commit_wait); | ||
613 | spin_unlock(&cil->xc_cil_lock); | ||
614 | |||
615 | /* release the hounds! */ | ||
616 | return xfs_log_release_iclog(log->l_mp, commit_iclog); | ||
617 | |||
618 | out_skip: | ||
619 | up_write(&cil->xc_ctx_lock); | ||
620 | out_free_ticket: | ||
621 | xfs_log_ticket_put(new_ctx->ticket); | ||
622 | kmem_free(new_ctx); | ||
623 | return 0; | ||
624 | |||
625 | out_abort: | ||
626 | xlog_cil_committed(ctx, XFS_LI_ABORTED); | ||
627 | return XFS_ERROR(EIO); | ||
628 | } | ||
629 | |||
630 | /* | ||
631 | * Conditionally push the CIL based on the sequence passed in. | ||
632 | * | ||
633 | * We only need to push if we haven't already pushed the sequence | ||
634 | * number given. Hence the only time we will trigger a push here is | ||
635 | * if the push sequence is the same as the current context. | ||
636 | * | ||
637 | * We return the current commit lsn to allow the callers to determine if a | ||
638 | * iclog flush is necessary following this call. | ||
639 | * | ||
640 | * XXX: Initially, just push the CIL unconditionally and return whatever | ||
641 | * commit lsn is there. It'll be empty, so this is broken for now. | ||
642 | */ | ||
643 | xfs_lsn_t | ||
644 | xlog_cil_push_lsn( | ||
645 | struct log *log, | ||
646 | xfs_lsn_t push_seq) | ||
647 | { | ||
648 | struct xfs_cil *cil = log->l_cilp; | ||
649 | struct xfs_cil_ctx *ctx; | ||
650 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; | ||
651 | |||
652 | restart: | ||
653 | down_write(&cil->xc_ctx_lock); | ||
654 | ASSERT(push_seq <= cil->xc_ctx->sequence); | ||
655 | |||
656 | /* check to see if we need to force out the current context */ | ||
657 | if (push_seq == cil->xc_ctx->sequence) { | ||
658 | up_write(&cil->xc_ctx_lock); | ||
659 | xlog_cil_push(log, 1); | ||
660 | goto restart; | ||
661 | } | ||
662 | |||
663 | /* | ||
664 | * See if we can find a previous sequence still committing. | ||
665 | * We can drop the flush lock as soon as we have the cil lock | ||
666 | * because we are now only comparing contexts protected by | ||
667 | * the cil lock. | ||
668 | * | ||
669 | * We need to wait for all previous sequence commits to complete | ||
670 | * before allowing the force of push_seq to go ahead. Hence block | ||
671 | * on commits for those as well. | ||
672 | */ | ||
673 | spin_lock(&cil->xc_cil_lock); | ||
674 | up_write(&cil->xc_ctx_lock); | ||
675 | list_for_each_entry(ctx, &cil->xc_committing, committing) { | ||
676 | if (ctx->sequence > push_seq) | ||
677 | continue; | ||
678 | if (!ctx->commit_lsn) { | ||
679 | /* | ||
680 | * It is still being pushed! Wait for the push to | ||
681 | * complete, then start again from the beginning. | ||
682 | */ | ||
683 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | ||
684 | goto restart; | ||
685 | } | ||
686 | if (ctx->sequence != push_seq) | ||
687 | continue; | ||
688 | /* found it! */ | ||
689 | commit_lsn = ctx->commit_lsn; | ||
690 | } | ||
691 | spin_unlock(&cil->xc_cil_lock); | ||
692 | return commit_lsn; | ||
693 | } | ||
694 | |||
695 | /* | ||
696 | * Check if the current log item was first committed in this sequence. | ||
697 | * We can't rely on just the log item being in the CIL, we have to check | ||
698 | * the recorded commit sequence number. | ||
699 | * | ||
700 | * Note: for this to be used in a non-racy manner, it has to be called with | ||
701 | * CIL flushing locked out. As a result, it should only be used during the | ||
702 | * transaction commit process when deciding what to format into the item. | ||
703 | */ | ||
704 | bool | ||
705 | xfs_log_item_in_current_chkpt( | ||
706 | struct xfs_log_item *lip) | ||
707 | { | ||
708 | struct xfs_cil_ctx *ctx; | ||
709 | |||
710 | if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG)) | ||
711 | return false; | ||
712 | if (list_empty(&lip->li_cil)) | ||
713 | return false; | ||
714 | |||
715 | ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; | ||
716 | |||
717 | /* | ||
718 | * li_seq is written on the first commit of a log item to record the | ||
719 | * first checkpoint it is written to. Hence if it is different to the | ||
720 | * current sequence, we're in a new checkpoint. | ||
721 | */ | ||
722 | if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0) | ||
723 | return false; | ||
724 | return true; | ||
725 | } | ||
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 9cf695154451..8c072618965c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -152,8 +152,6 @@ static inline uint xlog_get_client_id(__be32 i) | |||
152 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ | 152 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ |
153 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being | 153 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being |
154 | shutdown */ | 154 | shutdown */ |
155 | typedef __uint32_t xlog_tid_t; | ||
156 | |||
157 | 155 | ||
158 | #ifdef __KERNEL__ | 156 | #ifdef __KERNEL__ |
159 | /* | 157 | /* |
@@ -379,6 +377,99 @@ typedef struct xlog_in_core { | |||
379 | } xlog_in_core_t; | 377 | } xlog_in_core_t; |
380 | 378 | ||
381 | /* | 379 | /* |
380 | * The CIL context is used to aggregate per-transaction details as well be | ||
381 | * passed to the iclog for checkpoint post-commit processing. After being | ||
382 | * passed to the iclog, another context needs to be allocated for tracking the | ||
383 | * next set of transactions to be aggregated into a checkpoint. | ||
384 | */ | ||
385 | struct xfs_cil; | ||
386 | |||
387 | struct xfs_cil_ctx { | ||
388 | struct xfs_cil *cil; | ||
389 | xfs_lsn_t sequence; /* chkpt sequence # */ | ||
390 | xfs_lsn_t start_lsn; /* first LSN of chkpt commit */ | ||
391 | xfs_lsn_t commit_lsn; /* chkpt commit record lsn */ | ||
392 | struct xlog_ticket *ticket; /* chkpt ticket */ | ||
393 | int nvecs; /* number of regions */ | ||
394 | int space_used; /* aggregate size of regions */ | ||
395 | struct list_head busy_extents; /* busy extents in chkpt */ | ||
396 | struct xfs_log_vec *lv_chain; /* logvecs being pushed */ | ||
397 | xfs_log_callback_t log_cb; /* completion callback hook. */ | ||
398 | struct list_head committing; /* ctx committing list */ | ||
399 | }; | ||
400 | |||
401 | /* | ||
402 | * Committed Item List structure | ||
403 | * | ||
404 | * This structure is used to track log items that have been committed but not | ||
405 | * yet written into the log. It is used only when the delayed logging mount | ||
406 | * option is enabled. | ||
407 | * | ||
408 | * This structure tracks the list of committing checkpoint contexts so | ||
409 | * we can avoid the problem of having to hold out new transactions during a | ||
410 | * flush until we have a the commit record LSN of the checkpoint. We can | ||
411 | * traverse the list of committing contexts in xlog_cil_push_lsn() to find a | ||
412 | * sequence match and extract the commit LSN directly from there. If the | ||
413 | * checkpoint is still in the process of committing, we can block waiting for | ||
414 | * the commit LSN to be determined as well. This should make synchronous | ||
415 | * operations almost as efficient as the old logging methods. | ||
416 | */ | ||
417 | struct xfs_cil { | ||
418 | struct log *xc_log; | ||
419 | struct list_head xc_cil; | ||
420 | spinlock_t xc_cil_lock; | ||
421 | struct xfs_cil_ctx *xc_ctx; | ||
422 | struct rw_semaphore xc_ctx_lock; | ||
423 | struct list_head xc_committing; | ||
424 | sv_t xc_commit_wait; | ||
425 | }; | ||
426 | |||
427 | /* | ||
428 | * The amount of log space we should the CIL to aggregate is difficult to size. | ||
429 | * Whatever we chose we have to make we can get a reservation for the log space | ||
430 | * effectively, that it is large enough to capture sufficient relogging to | ||
431 | * reduce log buffer IO significantly, but it is not too large for the log or | ||
432 | * induces too much latency when writing out through the iclogs. We track both | ||
433 | * space consumed and the number of vectors in the checkpoint context, so we | ||
434 | * need to decide which to use for limiting. | ||
435 | * | ||
436 | * Every log buffer we write out during a push needs a header reserved, which | ||
437 | * is at least one sector and more for v2 logs. Hence we need a reservation of | ||
438 | * at least 512 bytes per 32k of log space just for the LR headers. That means | ||
439 | * 16KB of reservation per megabyte of delayed logging space we will consume, | ||
440 | * plus various headers. The number of headers will vary based on the num of | ||
441 | * io vectors, so limiting on a specific number of vectors is going to result | ||
442 | * in transactions of varying size. IOWs, it is more consistent to track and | ||
443 | * limit space consumed in the log rather than by the number of objects being | ||
444 | * logged in order to prevent checkpoint ticket overruns. | ||
445 | * | ||
446 | * Further, use of static reservations through the log grant mechanism is | ||
447 | * problematic. It introduces a lot of complexity (e.g. reserve grant vs write | ||
448 | * grant) and a significant deadlock potential because regranting write space | ||
449 | * can block on log pushes. Hence if we have to regrant log space during a log | ||
450 | * push, we can deadlock. | ||
451 | * | ||
452 | * However, we can avoid this by use of a dynamic "reservation stealing" | ||
453 | * technique during transaction commit whereby unused reservation space in the | ||
454 | * transaction ticket is transferred to the CIL ctx commit ticket to cover the | ||
455 | * space needed by the checkpoint transaction. This means that we never need to | ||
456 | * specifically reserve space for the CIL checkpoint transaction, nor do we | ||
457 | * need to regrant space once the checkpoint completes. This also means the | ||
458 | * checkpoint transaction ticket is specific to the checkpoint context, rather | ||
459 | * than the CIL itself. | ||
460 | * | ||
461 | * With dynamic reservations, we can basically make up arbitrary limits for the | ||
462 | * checkpoint size so long as they don't violate any other size rules. Hence | ||
463 | * the initial maximum size for the checkpoint transaction will be set to a | ||
464 | * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit | ||
465 | * right now based on the latency of writing out a large amount of data through | ||
466 | * the circular iclog buffers. | ||
467 | */ | ||
468 | |||
469 | #define XLOG_CIL_SPACE_LIMIT(log) \ | ||
470 | (min((log->l_logsize >> 2), (8 * 1024 * 1024))) | ||
471 | |||
472 | /* | ||
382 | * The reservation head lsn is not made up of a cycle number and block number. | 473 | * The reservation head lsn is not made up of a cycle number and block number. |
383 | * Instead, it uses a cycle number and byte number. Logs don't expect to | 474 | * Instead, it uses a cycle number and byte number. Logs don't expect to |
384 | * overflow 31 bits worth of byte offset, so using a byte number will mean | 475 | * overflow 31 bits worth of byte offset, so using a byte number will mean |
@@ -388,6 +479,7 @@ typedef struct log { | |||
388 | /* The following fields don't need locking */ | 479 | /* The following fields don't need locking */ |
389 | struct xfs_mount *l_mp; /* mount point */ | 480 | struct xfs_mount *l_mp; /* mount point */ |
390 | struct xfs_ail *l_ailp; /* AIL log is working with */ | 481 | struct xfs_ail *l_ailp; /* AIL log is working with */ |
482 | struct xfs_cil *l_cilp; /* CIL log is working with */ | ||
391 | struct xfs_buf *l_xbuf; /* extra buffer for log | 483 | struct xfs_buf *l_xbuf; /* extra buffer for log |
392 | * wrapping */ | 484 | * wrapping */ |
393 | struct xfs_buftarg *l_targ; /* buftarg of log */ | 485 | struct xfs_buftarg *l_targ; /* buftarg of log */ |
@@ -438,14 +530,17 @@ typedef struct log { | |||
438 | 530 | ||
439 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) | 531 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) |
440 | 532 | ||
441 | |||
442 | /* common routines */ | 533 | /* common routines */ |
443 | extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); | 534 | extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); |
444 | extern int xlog_recover(xlog_t *log); | 535 | extern int xlog_recover(xlog_t *log); |
445 | extern int xlog_recover_finish(xlog_t *log); | 536 | extern int xlog_recover_finish(xlog_t *log); |
446 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); | 537 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); |
447 | 538 | ||
448 | extern kmem_zone_t *xfs_log_ticket_zone; | 539 | extern kmem_zone_t *xfs_log_ticket_zone; |
540 | struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, | ||
541 | int count, char client, uint xflags, | ||
542 | int alloc_flags); | ||
543 | |||
449 | 544 | ||
450 | static inline void | 545 | static inline void |
451 | xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) | 546 | xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) |
@@ -455,6 +550,21 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) | |||
455 | *off += bytes; | 550 | *off += bytes; |
456 | } | 551 | } |
457 | 552 | ||
553 | void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); | ||
554 | int xlog_write(struct log *log, struct xfs_log_vec *log_vector, | ||
555 | struct xlog_ticket *tic, xfs_lsn_t *start_lsn, | ||
556 | xlog_in_core_t **commit_iclog, uint flags); | ||
557 | |||
558 | /* | ||
559 | * Committed Item List interfaces | ||
560 | */ | ||
561 | int xlog_cil_init(struct log *log); | ||
562 | void xlog_cil_init_post_recovery(struct log *log); | ||
563 | void xlog_cil_destroy(struct log *log); | ||
564 | |||
565 | int xlog_cil_push(struct log *log, int push_now); | ||
566 | xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence); | ||
567 | |||
458 | /* | 568 | /* |
459 | * Unmount record type is used as a pseudo transaction type for the ticket. | 569 | * Unmount record type is used as a pseudo transaction type for the ticket. |
460 | * It's value must be outside the range of XFS_TRANS_* values. | 570 | * It's value must be outside the range of XFS_TRANS_* values. |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 0de08e366315..14a69aec2c0b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -1576,7 +1576,7 @@ xlog_recover_reorder_trans( | |||
1576 | 1576 | ||
1577 | switch (ITEM_TYPE(item)) { | 1577 | switch (ITEM_TYPE(item)) { |
1578 | case XFS_LI_BUF: | 1578 | case XFS_LI_BUF: |
1579 | if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) { | 1579 | if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { |
1580 | trace_xfs_log_recover_item_reorder_head(log, | 1580 | trace_xfs_log_recover_item_reorder_head(log, |
1581 | trans, item, pass); | 1581 | trans, item, pass); |
1582 | list_move(&item->ri_list, &trans->r_itemq); | 1582 | list_move(&item->ri_list, &trans->r_itemq); |
@@ -1638,7 +1638,7 @@ xlog_recover_do_buffer_pass1( | |||
1638 | /* | 1638 | /* |
1639 | * If this isn't a cancel buffer item, then just return. | 1639 | * If this isn't a cancel buffer item, then just return. |
1640 | */ | 1640 | */ |
1641 | if (!(flags & XFS_BLI_CANCEL)) { | 1641 | if (!(flags & XFS_BLF_CANCEL)) { |
1642 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); | 1642 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); |
1643 | return; | 1643 | return; |
1644 | } | 1644 | } |
@@ -1696,7 +1696,7 @@ xlog_recover_do_buffer_pass1( | |||
1696 | * Check to see whether the buffer being recovered has a corresponding | 1696 | * Check to see whether the buffer being recovered has a corresponding |
1697 | * entry in the buffer cancel record table. If it does then return 1 | 1697 | * entry in the buffer cancel record table. If it does then return 1 |
1698 | * so that it will be cancelled, otherwise return 0. If the buffer is | 1698 | * so that it will be cancelled, otherwise return 0. If the buffer is |
1699 | * actually a buffer cancel item (XFS_BLI_CANCEL is set), then decrement | 1699 | * actually a buffer cancel item (XFS_BLF_CANCEL is set), then decrement |
1700 | * the refcount on the entry in the table and remove it from the table | 1700 | * the refcount on the entry in the table and remove it from the table |
1701 | * if this is the last reference. | 1701 | * if this is the last reference. |
1702 | * | 1702 | * |
@@ -1721,7 +1721,7 @@ xlog_check_buffer_cancelled( | |||
1721 | * There is nothing in the table built in pass one, | 1721 | * There is nothing in the table built in pass one, |
1722 | * so this buffer must not be cancelled. | 1722 | * so this buffer must not be cancelled. |
1723 | */ | 1723 | */ |
1724 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1724 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1725 | return 0; | 1725 | return 0; |
1726 | } | 1726 | } |
1727 | 1727 | ||
@@ -1733,7 +1733,7 @@ xlog_check_buffer_cancelled( | |||
1733 | * There is no corresponding entry in the table built | 1733 | * There is no corresponding entry in the table built |
1734 | * in pass one, so this buffer has not been cancelled. | 1734 | * in pass one, so this buffer has not been cancelled. |
1735 | */ | 1735 | */ |
1736 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1736 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1737 | return 0; | 1737 | return 0; |
1738 | } | 1738 | } |
1739 | 1739 | ||
@@ -1752,7 +1752,7 @@ xlog_check_buffer_cancelled( | |||
1752 | * one in the table and remove it if this is the | 1752 | * one in the table and remove it if this is the |
1753 | * last reference. | 1753 | * last reference. |
1754 | */ | 1754 | */ |
1755 | if (flags & XFS_BLI_CANCEL) { | 1755 | if (flags & XFS_BLF_CANCEL) { |
1756 | bcp->bc_refcount--; | 1756 | bcp->bc_refcount--; |
1757 | if (bcp->bc_refcount == 0) { | 1757 | if (bcp->bc_refcount == 0) { |
1758 | if (prevp == NULL) { | 1758 | if (prevp == NULL) { |
@@ -1772,7 +1772,7 @@ xlog_check_buffer_cancelled( | |||
1772 | * We didn't find a corresponding entry in the table, so | 1772 | * We didn't find a corresponding entry in the table, so |
1773 | * return 0 so that the buffer is NOT cancelled. | 1773 | * return 0 so that the buffer is NOT cancelled. |
1774 | */ | 1774 | */ |
1775 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1775 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1776 | return 0; | 1776 | return 0; |
1777 | } | 1777 | } |
1778 | 1778 | ||
@@ -1874,8 +1874,8 @@ xlog_recover_do_inode_buffer( | |||
1874 | nbits = xfs_contig_bits(data_map, map_size, | 1874 | nbits = xfs_contig_bits(data_map, map_size, |
1875 | bit); | 1875 | bit); |
1876 | ASSERT(nbits > 0); | 1876 | ASSERT(nbits > 0); |
1877 | reg_buf_offset = bit << XFS_BLI_SHIFT; | 1877 | reg_buf_offset = bit << XFS_BLF_SHIFT; |
1878 | reg_buf_bytes = nbits << XFS_BLI_SHIFT; | 1878 | reg_buf_bytes = nbits << XFS_BLF_SHIFT; |
1879 | item_index++; | 1879 | item_index++; |
1880 | } | 1880 | } |
1881 | 1881 | ||
@@ -1889,7 +1889,7 @@ xlog_recover_do_inode_buffer( | |||
1889 | } | 1889 | } |
1890 | 1890 | ||
1891 | ASSERT(item->ri_buf[item_index].i_addr != NULL); | 1891 | ASSERT(item->ri_buf[item_index].i_addr != NULL); |
1892 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLI_CHUNK) == 0); | 1892 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); |
1893 | ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); | 1893 | ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); |
1894 | 1894 | ||
1895 | /* | 1895 | /* |
@@ -1955,9 +1955,9 @@ xlog_recover_do_reg_buffer( | |||
1955 | nbits = xfs_contig_bits(data_map, map_size, bit); | 1955 | nbits = xfs_contig_bits(data_map, map_size, bit); |
1956 | ASSERT(nbits > 0); | 1956 | ASSERT(nbits > 0); |
1957 | ASSERT(item->ri_buf[i].i_addr != NULL); | 1957 | ASSERT(item->ri_buf[i].i_addr != NULL); |
1958 | ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0); | 1958 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); |
1959 | ASSERT(XFS_BUF_COUNT(bp) >= | 1959 | ASSERT(XFS_BUF_COUNT(bp) >= |
1960 | ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT)); | 1960 | ((uint)bit << XFS_BLF_SHIFT)+(nbits<<XFS_BLF_SHIFT)); |
1961 | 1961 | ||
1962 | /* | 1962 | /* |
1963 | * Do a sanity check if this is a dquot buffer. Just checking | 1963 | * Do a sanity check if this is a dquot buffer. Just checking |
@@ -1966,7 +1966,7 @@ xlog_recover_do_reg_buffer( | |||
1966 | */ | 1966 | */ |
1967 | error = 0; | 1967 | error = 0; |
1968 | if (buf_f->blf_flags & | 1968 | if (buf_f->blf_flags & |
1969 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { | 1969 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
1970 | if (item->ri_buf[i].i_addr == NULL) { | 1970 | if (item->ri_buf[i].i_addr == NULL) { |
1971 | cmn_err(CE_ALERT, | 1971 | cmn_err(CE_ALERT, |
1972 | "XFS: NULL dquot in %s.", __func__); | 1972 | "XFS: NULL dquot in %s.", __func__); |
@@ -1987,9 +1987,9 @@ xlog_recover_do_reg_buffer( | |||
1987 | } | 1987 | } |
1988 | 1988 | ||
1989 | memcpy(xfs_buf_offset(bp, | 1989 | memcpy(xfs_buf_offset(bp, |
1990 | (uint)bit << XFS_BLI_SHIFT), /* dest */ | 1990 | (uint)bit << XFS_BLF_SHIFT), /* dest */ |
1991 | item->ri_buf[i].i_addr, /* source */ | 1991 | item->ri_buf[i].i_addr, /* source */ |
1992 | nbits<<XFS_BLI_SHIFT); /* length */ | 1992 | nbits<<XFS_BLF_SHIFT); /* length */ |
1993 | next: | 1993 | next: |
1994 | i++; | 1994 | i++; |
1995 | bit += nbits; | 1995 | bit += nbits; |
@@ -2148,11 +2148,11 @@ xlog_recover_do_dquot_buffer( | |||
2148 | } | 2148 | } |
2149 | 2149 | ||
2150 | type = 0; | 2150 | type = 0; |
2151 | if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF) | 2151 | if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) |
2152 | type |= XFS_DQ_USER; | 2152 | type |= XFS_DQ_USER; |
2153 | if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF) | 2153 | if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF) |
2154 | type |= XFS_DQ_PROJ; | 2154 | type |= XFS_DQ_PROJ; |
2155 | if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF) | 2155 | if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF) |
2156 | type |= XFS_DQ_GROUP; | 2156 | type |= XFS_DQ_GROUP; |
2157 | /* | 2157 | /* |
2158 | * This type of quotas was turned off, so ignore this buffer | 2158 | * This type of quotas was turned off, so ignore this buffer |
@@ -2173,7 +2173,7 @@ xlog_recover_do_dquot_buffer( | |||
2173 | * here which overlaps that may be stale. | 2173 | * here which overlaps that may be stale. |
2174 | * | 2174 | * |
2175 | * When meta-data buffers are freed at run time we log a buffer item | 2175 | * When meta-data buffers are freed at run time we log a buffer item |
2176 | * with the XFS_BLI_CANCEL bit set to indicate that previous copies | 2176 | * with the XFS_BLF_CANCEL bit set to indicate that previous copies |
2177 | * of the buffer in the log should not be replayed at recovery time. | 2177 | * of the buffer in the log should not be replayed at recovery time. |
2178 | * This is so that if the blocks covered by the buffer are reused for | 2178 | * This is so that if the blocks covered by the buffer are reused for |
2179 | * file data before we crash we don't end up replaying old, freed | 2179 | * file data before we crash we don't end up replaying old, freed |
@@ -2207,7 +2207,7 @@ xlog_recover_do_buffer_trans( | |||
2207 | if (pass == XLOG_RECOVER_PASS1) { | 2207 | if (pass == XLOG_RECOVER_PASS1) { |
2208 | /* | 2208 | /* |
2209 | * In this pass we're only looking for buf items | 2209 | * In this pass we're only looking for buf items |
2210 | * with the XFS_BLI_CANCEL bit set. | 2210 | * with the XFS_BLF_CANCEL bit set. |
2211 | */ | 2211 | */ |
2212 | xlog_recover_do_buffer_pass1(log, buf_f); | 2212 | xlog_recover_do_buffer_pass1(log, buf_f); |
2213 | return 0; | 2213 | return 0; |
@@ -2244,7 +2244,7 @@ xlog_recover_do_buffer_trans( | |||
2244 | 2244 | ||
2245 | mp = log->l_mp; | 2245 | mp = log->l_mp; |
2246 | buf_flags = XBF_LOCK; | 2246 | buf_flags = XBF_LOCK; |
2247 | if (!(flags & XFS_BLI_INODE_BUF)) | 2247 | if (!(flags & XFS_BLF_INODE_BUF)) |
2248 | buf_flags |= XBF_MAPPED; | 2248 | buf_flags |= XBF_MAPPED; |
2249 | 2249 | ||
2250 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); | 2250 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); |
@@ -2257,10 +2257,10 @@ xlog_recover_do_buffer_trans( | |||
2257 | } | 2257 | } |
2258 | 2258 | ||
2259 | error = 0; | 2259 | error = 0; |
2260 | if (flags & XFS_BLI_INODE_BUF) { | 2260 | if (flags & XFS_BLF_INODE_BUF) { |
2261 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); | 2261 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
2262 | } else if (flags & | 2262 | } else if (flags & |
2263 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { | 2263 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
2264 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); | 2264 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); |
2265 | } else { | 2265 | } else { |
2266 | xlog_recover_do_reg_buffer(mp, item, bp, buf_f); | 2266 | xlog_recover_do_reg_buffer(mp, item, bp, buf_f); |
diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/xfs_log_recover.h index 75d749207258..1c55ccbb379d 100644 --- a/fs/xfs/xfs_log_recover.h +++ b/fs/xfs/xfs_log_recover.h | |||
@@ -28,7 +28,7 @@ | |||
28 | #define XLOG_RHASH(tid) \ | 28 | #define XLOG_RHASH(tid) \ |
29 | ((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1)) | 29 | ((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1)) |
30 | 30 | ||
31 | #define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK / 2 + 1) | 31 | #define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK / 2 + 1) |
32 | 32 | ||
33 | 33 | ||
34 | /* | 34 | /* |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 9ff48a16a7ee..1d2c7eed4eda 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -268,6 +268,7 @@ typedef struct xfs_mount { | |||
268 | #define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops | 268 | #define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops |
269 | must be synchronous except | 269 | must be synchronous except |
270 | for space allocations */ | 270 | for space allocations */ |
271 | #define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */ | ||
271 | #define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */ | 272 | #define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */ |
272 | #define XFS_MOUNT_WAS_CLEAN (1ULL << 3) | 273 | #define XFS_MOUNT_WAS_CLEAN (1ULL << 3) |
273 | #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem | 274 | #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index be578ecb4af2..ce558efa2ea0 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include "xfs_trans_priv.h" | 44 | #include "xfs_trans_priv.h" |
45 | #include "xfs_trans_space.h" | 45 | #include "xfs_trans_space.h" |
46 | #include "xfs_inode_item.h" | 46 | #include "xfs_inode_item.h" |
47 | #include "xfs_trace.h" | ||
47 | 48 | ||
48 | kmem_zone_t *xfs_trans_zone; | 49 | kmem_zone_t *xfs_trans_zone; |
49 | 50 | ||
@@ -243,9 +244,8 @@ _xfs_trans_alloc( | |||
243 | tp->t_type = type; | 244 | tp->t_type = type; |
244 | tp->t_mountp = mp; | 245 | tp->t_mountp = mp; |
245 | tp->t_items_free = XFS_LIC_NUM_SLOTS; | 246 | tp->t_items_free = XFS_LIC_NUM_SLOTS; |
246 | tp->t_busy_free = XFS_LBC_NUM_SLOTS; | ||
247 | xfs_lic_init(&(tp->t_items)); | 247 | xfs_lic_init(&(tp->t_items)); |
248 | XFS_LBC_INIT(&(tp->t_busy)); | 248 | INIT_LIST_HEAD(&tp->t_busy); |
249 | return tp; | 249 | return tp; |
250 | } | 250 | } |
251 | 251 | ||
@@ -255,8 +255,13 @@ _xfs_trans_alloc( | |||
255 | */ | 255 | */ |
256 | STATIC void | 256 | STATIC void |
257 | xfs_trans_free( | 257 | xfs_trans_free( |
258 | xfs_trans_t *tp) | 258 | struct xfs_trans *tp) |
259 | { | 259 | { |
260 | struct xfs_busy_extent *busyp, *n; | ||
261 | |||
262 | list_for_each_entry_safe(busyp, n, &tp->t_busy, list) | ||
263 | xfs_alloc_busy_clear(tp->t_mountp, busyp); | ||
264 | |||
260 | atomic_dec(&tp->t_mountp->m_active_trans); | 265 | atomic_dec(&tp->t_mountp->m_active_trans); |
261 | xfs_trans_free_dqinfo(tp); | 266 | xfs_trans_free_dqinfo(tp); |
262 | kmem_zone_free(xfs_trans_zone, tp); | 267 | kmem_zone_free(xfs_trans_zone, tp); |
@@ -285,9 +290,8 @@ xfs_trans_dup( | |||
285 | ntp->t_type = tp->t_type; | 290 | ntp->t_type = tp->t_type; |
286 | ntp->t_mountp = tp->t_mountp; | 291 | ntp->t_mountp = tp->t_mountp; |
287 | ntp->t_items_free = XFS_LIC_NUM_SLOTS; | 292 | ntp->t_items_free = XFS_LIC_NUM_SLOTS; |
288 | ntp->t_busy_free = XFS_LBC_NUM_SLOTS; | ||
289 | xfs_lic_init(&(ntp->t_items)); | 293 | xfs_lic_init(&(ntp->t_items)); |
290 | XFS_LBC_INIT(&(ntp->t_busy)); | 294 | INIT_LIST_HEAD(&ntp->t_busy); |
291 | 295 | ||
292 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); | 296 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); |
293 | ASSERT(tp->t_ticket != NULL); | 297 | ASSERT(tp->t_ticket != NULL); |
@@ -423,7 +427,6 @@ undo_blocks: | |||
423 | return error; | 427 | return error; |
424 | } | 428 | } |
425 | 429 | ||
426 | |||
427 | /* | 430 | /* |
428 | * Record the indicated change to the given field for application | 431 | * Record the indicated change to the given field for application |
429 | * to the file system's superblock when the transaction commits. | 432 | * to the file system's superblock when the transaction commits. |
@@ -652,7 +655,7 @@ xfs_trans_apply_sb_deltas( | |||
652 | * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we | 655 | * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we |
653 | * still need to update the incore superblock with the changes. | 656 | * still need to update the incore superblock with the changes. |
654 | */ | 657 | */ |
655 | STATIC void | 658 | void |
656 | xfs_trans_unreserve_and_mod_sb( | 659 | xfs_trans_unreserve_and_mod_sb( |
657 | xfs_trans_t *tp) | 660 | xfs_trans_t *tp) |
658 | { | 661 | { |
@@ -880,7 +883,7 @@ xfs_trans_fill_vecs( | |||
880 | * they could be immediately flushed and we'd have to race with the flusher | 883 | * they could be immediately flushed and we'd have to race with the flusher |
881 | * trying to pull the item from the AIL as we add it. | 884 | * trying to pull the item from the AIL as we add it. |
882 | */ | 885 | */ |
883 | static void | 886 | void |
884 | xfs_trans_item_committed( | 887 | xfs_trans_item_committed( |
885 | struct xfs_log_item *lip, | 888 | struct xfs_log_item *lip, |
886 | xfs_lsn_t commit_lsn, | 889 | xfs_lsn_t commit_lsn, |
@@ -930,26 +933,6 @@ xfs_trans_item_committed( | |||
930 | IOP_UNPIN(lip); | 933 | IOP_UNPIN(lip); |
931 | } | 934 | } |
932 | 935 | ||
933 | /* Clear all the per-AG busy list items listed in this transaction */ | ||
934 | static void | ||
935 | xfs_trans_clear_busy_extents( | ||
936 | struct xfs_trans *tp) | ||
937 | { | ||
938 | xfs_log_busy_chunk_t *lbcp; | ||
939 | xfs_log_busy_slot_t *lbsp; | ||
940 | int i; | ||
941 | |||
942 | for (lbcp = &tp->t_busy; lbcp != NULL; lbcp = lbcp->lbc_next) { | ||
943 | i = 0; | ||
944 | for (lbsp = lbcp->lbc_busy; i < lbcp->lbc_unused; i++, lbsp++) { | ||
945 | if (XFS_LBC_ISFREE(lbcp, i)) | ||
946 | continue; | ||
947 | xfs_alloc_clear_busy(tp, lbsp->lbc_ag, lbsp->lbc_idx); | ||
948 | } | ||
949 | } | ||
950 | xfs_trans_free_busy(tp); | ||
951 | } | ||
952 | |||
953 | /* | 936 | /* |
954 | * This is typically called by the LM when a transaction has been fully | 937 | * This is typically called by the LM when a transaction has been fully |
955 | * committed to disk. It needs to unpin the items which have | 938 | * committed to disk. It needs to unpin the items which have |
@@ -984,7 +967,6 @@ xfs_trans_committed( | |||
984 | kmem_free(licp); | 967 | kmem_free(licp); |
985 | } | 968 | } |
986 | 969 | ||
987 | xfs_trans_clear_busy_extents(tp); | ||
988 | xfs_trans_free(tp); | 970 | xfs_trans_free(tp); |
989 | } | 971 | } |
990 | 972 | ||
@@ -1012,8 +994,7 @@ xfs_trans_uncommit( | |||
1012 | xfs_trans_unreserve_and_mod_sb(tp); | 994 | xfs_trans_unreserve_and_mod_sb(tp); |
1013 | xfs_trans_unreserve_and_mod_dquots(tp); | 995 | xfs_trans_unreserve_and_mod_dquots(tp); |
1014 | 996 | ||
1015 | xfs_trans_free_items(tp, flags); | 997 | xfs_trans_free_items(tp, NULLCOMMITLSN, flags); |
1016 | xfs_trans_free_busy(tp); | ||
1017 | xfs_trans_free(tp); | 998 | xfs_trans_free(tp); |
1018 | } | 999 | } |
1019 | 1000 | ||
@@ -1075,6 +1056,8 @@ xfs_trans_commit_iclog( | |||
1075 | *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); | 1056 | *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); |
1076 | 1057 | ||
1077 | tp->t_commit_lsn = *commit_lsn; | 1058 | tp->t_commit_lsn = *commit_lsn; |
1059 | trace_xfs_trans_commit_lsn(tp); | ||
1060 | |||
1078 | if (nvec > XFS_TRANS_LOGVEC_COUNT) | 1061 | if (nvec > XFS_TRANS_LOGVEC_COUNT) |
1079 | kmem_free(log_vector); | 1062 | kmem_free(log_vector); |
1080 | 1063 | ||
@@ -1161,6 +1144,93 @@ xfs_trans_commit_iclog( | |||
1161 | return xfs_log_release_iclog(mp, commit_iclog); | 1144 | return xfs_log_release_iclog(mp, commit_iclog); |
1162 | } | 1145 | } |
1163 | 1146 | ||
1147 | /* | ||
1148 | * Walk the log items and allocate log vector structures for | ||
1149 | * each item large enough to fit all the vectors they require. | ||
1150 | * Note that this format differs from the old log vector format in | ||
1151 | * that there is no transaction header in these log vectors. | ||
1152 | */ | ||
1153 | STATIC struct xfs_log_vec * | ||
1154 | xfs_trans_alloc_log_vecs( | ||
1155 | xfs_trans_t *tp) | ||
1156 | { | ||
1157 | xfs_log_item_desc_t *lidp; | ||
1158 | struct xfs_log_vec *lv = NULL; | ||
1159 | struct xfs_log_vec *ret_lv = NULL; | ||
1160 | |||
1161 | lidp = xfs_trans_first_item(tp); | ||
1162 | |||
1163 | /* Bail out if we didn't find a log item. */ | ||
1164 | if (!lidp) { | ||
1165 | ASSERT(0); | ||
1166 | return NULL; | ||
1167 | } | ||
1168 | |||
1169 | while (lidp != NULL) { | ||
1170 | struct xfs_log_vec *new_lv; | ||
1171 | |||
1172 | /* Skip items which aren't dirty in this transaction. */ | ||
1173 | if (!(lidp->lid_flags & XFS_LID_DIRTY)) { | ||
1174 | lidp = xfs_trans_next_item(tp, lidp); | ||
1175 | continue; | ||
1176 | } | ||
1177 | |||
1178 | /* Skip items that do not have any vectors for writing */ | ||
1179 | lidp->lid_size = IOP_SIZE(lidp->lid_item); | ||
1180 | if (!lidp->lid_size) { | ||
1181 | lidp = xfs_trans_next_item(tp, lidp); | ||
1182 | continue; | ||
1183 | } | ||
1184 | |||
1185 | new_lv = kmem_zalloc(sizeof(*new_lv) + | ||
1186 | lidp->lid_size * sizeof(struct xfs_log_iovec), | ||
1187 | KM_SLEEP); | ||
1188 | |||
1189 | /* The allocated iovec region lies beyond the log vector. */ | ||
1190 | new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; | ||
1191 | new_lv->lv_niovecs = lidp->lid_size; | ||
1192 | new_lv->lv_item = lidp->lid_item; | ||
1193 | if (!ret_lv) | ||
1194 | ret_lv = new_lv; | ||
1195 | else | ||
1196 | lv->lv_next = new_lv; | ||
1197 | lv = new_lv; | ||
1198 | lidp = xfs_trans_next_item(tp, lidp); | ||
1199 | } | ||
1200 | |||
1201 | return ret_lv; | ||
1202 | } | ||
1203 | |||
1204 | static int | ||
1205 | xfs_trans_commit_cil( | ||
1206 | struct xfs_mount *mp, | ||
1207 | struct xfs_trans *tp, | ||
1208 | xfs_lsn_t *commit_lsn, | ||
1209 | int flags) | ||
1210 | { | ||
1211 | struct xfs_log_vec *log_vector; | ||
1212 | int error; | ||
1213 | |||
1214 | /* | ||
1215 | * Get each log item to allocate a vector structure for | ||
1216 | * the log item to to pass to the log write code. The | ||
1217 | * CIL commit code will format the vector and save it away. | ||
1218 | */ | ||
1219 | log_vector = xfs_trans_alloc_log_vecs(tp); | ||
1220 | if (!log_vector) | ||
1221 | return ENOMEM; | ||
1222 | |||
1223 | error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); | ||
1224 | if (error) | ||
1225 | return error; | ||
1226 | |||
1227 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | ||
1228 | |||
1229 | /* xfs_trans_free_items() unlocks them first */ | ||
1230 | xfs_trans_free_items(tp, *commit_lsn, 0); | ||
1231 | xfs_trans_free(tp); | ||
1232 | return 0; | ||
1233 | } | ||
1164 | 1234 | ||
1165 | /* | 1235 | /* |
1166 | * xfs_trans_commit | 1236 | * xfs_trans_commit |
@@ -1221,7 +1291,11 @@ _xfs_trans_commit( | |||
1221 | xfs_trans_apply_sb_deltas(tp); | 1291 | xfs_trans_apply_sb_deltas(tp); |
1222 | xfs_trans_apply_dquot_deltas(tp); | 1292 | xfs_trans_apply_dquot_deltas(tp); |
1223 | 1293 | ||
1224 | error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags); | 1294 | if (mp->m_flags & XFS_MOUNT_DELAYLOG) |
1295 | error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags); | ||
1296 | else | ||
1297 | error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags); | ||
1298 | |||
1225 | if (error == ENOMEM) { | 1299 | if (error == ENOMEM) { |
1226 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); | 1300 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
1227 | error = XFS_ERROR(EIO); | 1301 | error = XFS_ERROR(EIO); |
@@ -1259,8 +1333,7 @@ out_unreserve: | |||
1259 | error = XFS_ERROR(EIO); | 1333 | error = XFS_ERROR(EIO); |
1260 | } | 1334 | } |
1261 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1335 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1262 | xfs_trans_free_items(tp, error ? XFS_TRANS_ABORT : 0); | 1336 | xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0); |
1263 | xfs_trans_free_busy(tp); | ||
1264 | xfs_trans_free(tp); | 1337 | xfs_trans_free(tp); |
1265 | 1338 | ||
1266 | XFS_STATS_INC(xs_trans_empty); | 1339 | XFS_STATS_INC(xs_trans_empty); |
@@ -1338,8 +1411,7 @@ xfs_trans_cancel( | |||
1338 | /* mark this thread as no longer being in a transaction */ | 1411 | /* mark this thread as no longer being in a transaction */ |
1339 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1412 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1340 | 1413 | ||
1341 | xfs_trans_free_items(tp, flags); | 1414 | xfs_trans_free_items(tp, NULLCOMMITLSN, flags); |
1342 | xfs_trans_free_busy(tp); | ||
1343 | xfs_trans_free(tp); | 1415 | xfs_trans_free(tp); |
1344 | } | 1416 | } |
1345 | 1417 | ||
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index c62beee0921e..8c69e7824f68 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -106,7 +106,8 @@ typedef struct xfs_trans_header { | |||
106 | #define XFS_TRANS_GROWFSRT_FREE 39 | 106 | #define XFS_TRANS_GROWFSRT_FREE 39 |
107 | #define XFS_TRANS_SWAPEXT 40 | 107 | #define XFS_TRANS_SWAPEXT 40 |
108 | #define XFS_TRANS_SB_COUNT 41 | 108 | #define XFS_TRANS_SB_COUNT 41 |
109 | #define XFS_TRANS_TYPE_MAX 41 | 109 | #define XFS_TRANS_CHECKPOINT 42 |
110 | #define XFS_TRANS_TYPE_MAX 42 | ||
110 | /* new transaction types need to be reflected in xfs_logprint(8) */ | 111 | /* new transaction types need to be reflected in xfs_logprint(8) */ |
111 | 112 | ||
112 | #define XFS_TRANS_TYPES \ | 113 | #define XFS_TRANS_TYPES \ |
@@ -148,6 +149,7 @@ typedef struct xfs_trans_header { | |||
148 | { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ | 149 | { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ |
149 | { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ | 150 | { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ |
150 | { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ | 151 | { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ |
152 | { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \ | ||
151 | { XFS_TRANS_DUMMY1, "DUMMY1" }, \ | 153 | { XFS_TRANS_DUMMY1, "DUMMY1" }, \ |
152 | { XFS_TRANS_DUMMY2, "DUMMY2" }, \ | 154 | { XFS_TRANS_DUMMY2, "DUMMY2" }, \ |
153 | { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } | 155 | { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } |
@@ -813,6 +815,7 @@ struct xfs_log_item_desc; | |||
813 | struct xfs_mount; | 815 | struct xfs_mount; |
814 | struct xfs_trans; | 816 | struct xfs_trans; |
815 | struct xfs_dquot_acct; | 817 | struct xfs_dquot_acct; |
818 | struct xfs_busy_extent; | ||
816 | 819 | ||
817 | typedef struct xfs_log_item { | 820 | typedef struct xfs_log_item { |
818 | struct list_head li_ail; /* AIL pointers */ | 821 | struct list_head li_ail; /* AIL pointers */ |
@@ -828,6 +831,11 @@ typedef struct xfs_log_item { | |||
828 | /* buffer item iodone */ | 831 | /* buffer item iodone */ |
829 | /* callback func */ | 832 | /* callback func */ |
830 | struct xfs_item_ops *li_ops; /* function list */ | 833 | struct xfs_item_ops *li_ops; /* function list */ |
834 | |||
835 | /* delayed logging */ | ||
836 | struct list_head li_cil; /* CIL pointers */ | ||
837 | struct xfs_log_vec *li_lv; /* active log vector */ | ||
838 | xfs_lsn_t li_seq; /* CIL commit seq */ | ||
831 | } xfs_log_item_t; | 839 | } xfs_log_item_t; |
832 | 840 | ||
833 | #define XFS_LI_IN_AIL 0x1 | 841 | #define XFS_LI_IN_AIL 0x1 |
@@ -872,34 +880,6 @@ typedef struct xfs_item_ops { | |||
872 | #define XFS_ITEM_PUSHBUF 3 | 880 | #define XFS_ITEM_PUSHBUF 3 |
873 | 881 | ||
874 | /* | 882 | /* |
875 | * This structure is used to maintain a list of block ranges that have been | ||
876 | * freed in the transaction. The ranges are listed in the perag[] busy list | ||
877 | * between when they're freed and the transaction is committed to disk. | ||
878 | */ | ||
879 | |||
880 | typedef struct xfs_log_busy_slot { | ||
881 | xfs_agnumber_t lbc_ag; | ||
882 | ushort lbc_idx; /* index in perag.busy[] */ | ||
883 | } xfs_log_busy_slot_t; | ||
884 | |||
885 | #define XFS_LBC_NUM_SLOTS 31 | ||
886 | typedef struct xfs_log_busy_chunk { | ||
887 | struct xfs_log_busy_chunk *lbc_next; | ||
888 | uint lbc_free; /* free slots bitmask */ | ||
889 | ushort lbc_unused; /* first unused */ | ||
890 | xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS]; | ||
891 | } xfs_log_busy_chunk_t; | ||
892 | |||
893 | #define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1) | ||
894 | #define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1) | ||
895 | |||
896 | #define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK) | ||
897 | #define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot))) | ||
898 | #define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)])) | ||
899 | #define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK) | ||
900 | #define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot))) | ||
901 | |||
902 | /* | ||
903 | * This is the type of function which can be given to xfs_trans_callback() | 883 | * This is the type of function which can be given to xfs_trans_callback() |
904 | * to be called upon the transaction's commit to disk. | 884 | * to be called upon the transaction's commit to disk. |
905 | */ | 885 | */ |
@@ -950,8 +930,7 @@ typedef struct xfs_trans { | |||
950 | unsigned int t_items_free; /* log item descs free */ | 930 | unsigned int t_items_free; /* log item descs free */ |
951 | xfs_log_item_chunk_t t_items; /* first log item desc chunk */ | 931 | xfs_log_item_chunk_t t_items; /* first log item desc chunk */ |
952 | xfs_trans_header_t t_header; /* header for in-log trans */ | 932 | xfs_trans_header_t t_header; /* header for in-log trans */ |
953 | unsigned int t_busy_free; /* busy descs free */ | 933 | struct list_head t_busy; /* list of busy extents */ |
954 | xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */ | ||
955 | unsigned long t_pflags; /* saved process flags state */ | 934 | unsigned long t_pflags; /* saved process flags state */ |
956 | } xfs_trans_t; | 935 | } xfs_trans_t; |
957 | 936 | ||
@@ -1025,9 +1004,6 @@ int _xfs_trans_commit(xfs_trans_t *, | |||
1025 | void xfs_trans_cancel(xfs_trans_t *, int); | 1004 | void xfs_trans_cancel(xfs_trans_t *, int); |
1026 | int xfs_trans_ail_init(struct xfs_mount *); | 1005 | int xfs_trans_ail_init(struct xfs_mount *); |
1027 | void xfs_trans_ail_destroy(struct xfs_mount *); | 1006 | void xfs_trans_ail_destroy(struct xfs_mount *); |
1028 | xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, | ||
1029 | xfs_agnumber_t ag, | ||
1030 | xfs_extlen_t idx); | ||
1031 | 1007 | ||
1032 | extern kmem_zone_t *xfs_trans_zone; | 1008 | extern kmem_zone_t *xfs_trans_zone; |
1033 | 1009 | ||
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 9cd809025f3a..63d81a22f4fd 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -114,7 +114,7 @@ _xfs_trans_bjoin( | |||
114 | xfs_buf_item_init(bp, tp->t_mountp); | 114 | xfs_buf_item_init(bp, tp->t_mountp); |
115 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 115 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
116 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 116 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
117 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); | 117 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); |
118 | ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); | 118 | ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); |
119 | if (reset_recur) | 119 | if (reset_recur) |
120 | bip->bli_recur = 0; | 120 | bip->bli_recur = 0; |
@@ -511,7 +511,7 @@ xfs_trans_brelse(xfs_trans_t *tp, | |||
511 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 511 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
512 | ASSERT(bip->bli_item.li_type == XFS_LI_BUF); | 512 | ASSERT(bip->bli_item.li_type == XFS_LI_BUF); |
513 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 513 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
514 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); | 514 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); |
515 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 515 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
516 | 516 | ||
517 | /* | 517 | /* |
@@ -619,7 +619,7 @@ xfs_trans_bhold(xfs_trans_t *tp, | |||
619 | 619 | ||
620 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 620 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
621 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 621 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
622 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); | 622 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); |
623 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 623 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
624 | bip->bli_flags |= XFS_BLI_HOLD; | 624 | bip->bli_flags |= XFS_BLI_HOLD; |
625 | trace_xfs_trans_bhold(bip); | 625 | trace_xfs_trans_bhold(bip); |
@@ -641,7 +641,7 @@ xfs_trans_bhold_release(xfs_trans_t *tp, | |||
641 | 641 | ||
642 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 642 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
643 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 643 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
644 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); | 644 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); |
645 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 645 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
646 | ASSERT(bip->bli_flags & XFS_BLI_HOLD); | 646 | ASSERT(bip->bli_flags & XFS_BLI_HOLD); |
647 | bip->bli_flags &= ~XFS_BLI_HOLD; | 647 | bip->bli_flags &= ~XFS_BLI_HOLD; |
@@ -704,7 +704,7 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
704 | bip->bli_flags &= ~XFS_BLI_STALE; | 704 | bip->bli_flags &= ~XFS_BLI_STALE; |
705 | ASSERT(XFS_BUF_ISSTALE(bp)); | 705 | ASSERT(XFS_BUF_ISSTALE(bp)); |
706 | XFS_BUF_UNSTALE(bp); | 706 | XFS_BUF_UNSTALE(bp); |
707 | bip->bli_format.blf_flags &= ~XFS_BLI_CANCEL; | 707 | bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL; |
708 | } | 708 | } |
709 | 709 | ||
710 | lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); | 710 | lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); |
@@ -762,8 +762,8 @@ xfs_trans_binval( | |||
762 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | 762 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); |
763 | ASSERT(XFS_BUF_ISSTALE(bp)); | 763 | ASSERT(XFS_BUF_ISSTALE(bp)); |
764 | ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); | 764 | ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); |
765 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_INODE_BUF)); | 765 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); |
766 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 766 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
767 | ASSERT(lidp->lid_flags & XFS_LID_DIRTY); | 767 | ASSERT(lidp->lid_flags & XFS_LID_DIRTY); |
768 | ASSERT(tp->t_flags & XFS_TRANS_DIRTY); | 768 | ASSERT(tp->t_flags & XFS_TRANS_DIRTY); |
769 | return; | 769 | return; |
@@ -774,7 +774,7 @@ xfs_trans_binval( | |||
774 | * in the buf log item. The STALE flag will be used in | 774 | * in the buf log item. The STALE flag will be used in |
775 | * xfs_buf_item_unpin() to determine if it should clean up | 775 | * xfs_buf_item_unpin() to determine if it should clean up |
776 | * when the last reference to the buf item is given up. | 776 | * when the last reference to the buf item is given up. |
777 | * We set the XFS_BLI_CANCEL flag in the buf log format structure | 777 | * We set the XFS_BLF_CANCEL flag in the buf log format structure |
778 | * and log the buf item. This will be used at recovery time | 778 | * and log the buf item. This will be used at recovery time |
779 | * to determine that copies of the buffer in the log before | 779 | * to determine that copies of the buffer in the log before |
780 | * this should not be replayed. | 780 | * this should not be replayed. |
@@ -792,9 +792,9 @@ xfs_trans_binval( | |||
792 | XFS_BUF_UNDELAYWRITE(bp); | 792 | XFS_BUF_UNDELAYWRITE(bp); |
793 | XFS_BUF_STALE(bp); | 793 | XFS_BUF_STALE(bp); |
794 | bip->bli_flags |= XFS_BLI_STALE; | 794 | bip->bli_flags |= XFS_BLI_STALE; |
795 | bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY); | 795 | bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); |
796 | bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF; | 796 | bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; |
797 | bip->bli_format.blf_flags |= XFS_BLI_CANCEL; | 797 | bip->bli_format.blf_flags |= XFS_BLF_CANCEL; |
798 | memset((char *)(bip->bli_format.blf_data_map), 0, | 798 | memset((char *)(bip->bli_format.blf_data_map), 0, |
799 | (bip->bli_format.blf_map_size * sizeof(uint))); | 799 | (bip->bli_format.blf_map_size * sizeof(uint))); |
800 | lidp->lid_flags |= XFS_LID_DIRTY; | 800 | lidp->lid_flags |= XFS_LID_DIRTY; |
@@ -802,16 +802,16 @@ xfs_trans_binval( | |||
802 | } | 802 | } |
803 | 803 | ||
804 | /* | 804 | /* |
805 | * This call is used to indicate that the buffer contains on-disk | 805 | * This call is used to indicate that the buffer contains on-disk inodes which |
806 | * inodes which must be handled specially during recovery. They | 806 | * must be handled specially during recovery. They require special handling |
807 | * require special handling because only the di_next_unlinked from | 807 | * because only the di_next_unlinked from the inodes in the buffer should be |
808 | * the inodes in the buffer should be recovered. The rest of the | 808 | * recovered. The rest of the data in the buffer is logged via the inodes |
809 | * data in the buffer is logged via the inodes themselves. | 809 | * themselves. |
810 | * | 810 | * |
811 | * All we do is set the XFS_BLI_INODE_BUF flag in the buffer's log | 811 | * All we do is set the XFS_BLI_INODE_BUF flag in the items flags so it can be |
812 | * format structure so that we'll know what to do at recovery time. | 812 | * transferred to the buffer's log format structure so that we'll know what to |
813 | * do at recovery time. | ||
813 | */ | 814 | */ |
814 | /* ARGSUSED */ | ||
815 | void | 815 | void |
816 | xfs_trans_inode_buf( | 816 | xfs_trans_inode_buf( |
817 | xfs_trans_t *tp, | 817 | xfs_trans_t *tp, |
@@ -826,7 +826,7 @@ xfs_trans_inode_buf( | |||
826 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 826 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
827 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 827 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
828 | 828 | ||
829 | bip->bli_format.blf_flags |= XFS_BLI_INODE_BUF; | 829 | bip->bli_flags |= XFS_BLI_INODE_BUF; |
830 | } | 830 | } |
831 | 831 | ||
832 | /* | 832 | /* |
@@ -908,9 +908,9 @@ xfs_trans_dquot_buf( | |||
908 | ASSERT(XFS_BUF_ISBUSY(bp)); | 908 | ASSERT(XFS_BUF_ISBUSY(bp)); |
909 | ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); | 909 | ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); |
910 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); | 910 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); |
911 | ASSERT(type == XFS_BLI_UDQUOT_BUF || | 911 | ASSERT(type == XFS_BLF_UDQUOT_BUF || |
912 | type == XFS_BLI_PDQUOT_BUF || | 912 | type == XFS_BLF_PDQUOT_BUF || |
913 | type == XFS_BLI_GDQUOT_BUF); | 913 | type == XFS_BLF_GDQUOT_BUF); |
914 | 914 | ||
915 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 915 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
916 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 916 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index eb3fc57f9eef..f11d37d06dcc 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c | |||
@@ -299,6 +299,7 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) | |||
299 | void | 299 | void |
300 | xfs_trans_free_items( | 300 | xfs_trans_free_items( |
301 | xfs_trans_t *tp, | 301 | xfs_trans_t *tp, |
302 | xfs_lsn_t commit_lsn, | ||
302 | int flags) | 303 | int flags) |
303 | { | 304 | { |
304 | xfs_log_item_chunk_t *licp; | 305 | xfs_log_item_chunk_t *licp; |
@@ -311,7 +312,7 @@ xfs_trans_free_items( | |||
311 | * Special case the embedded chunk so we don't free it below. | 312 | * Special case the embedded chunk so we don't free it below. |
312 | */ | 313 | */ |
313 | if (!xfs_lic_are_all_free(licp)) { | 314 | if (!xfs_lic_are_all_free(licp)) { |
314 | (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); | 315 | (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn); |
315 | xfs_lic_all_free(licp); | 316 | xfs_lic_all_free(licp); |
316 | licp->lic_unused = 0; | 317 | licp->lic_unused = 0; |
317 | } | 318 | } |
@@ -322,7 +323,7 @@ xfs_trans_free_items( | |||
322 | */ | 323 | */ |
323 | while (licp != NULL) { | 324 | while (licp != NULL) { |
324 | ASSERT(!xfs_lic_are_all_free(licp)); | 325 | ASSERT(!xfs_lic_are_all_free(licp)); |
325 | (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); | 326 | (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn); |
326 | next_licp = licp->lic_next; | 327 | next_licp = licp->lic_next; |
327 | kmem_free(licp); | 328 | kmem_free(licp); |
328 | licp = next_licp; | 329 | licp = next_licp; |
@@ -438,112 +439,3 @@ xfs_trans_unlock_chunk( | |||
438 | 439 | ||
439 | return freed; | 440 | return freed; |
440 | } | 441 | } |
441 | |||
442 | |||
443 | /* | ||
444 | * This is called to add the given busy item to the transaction's | ||
445 | * list of busy items. It must find a free busy item descriptor | ||
446 | * or allocate a new one and add the item to that descriptor. | ||
447 | * The function returns a pointer to busy descriptor used to point | ||
448 | * to the new busy entry. The log busy entry will now point to its new | ||
449 | * descriptor with its ???? field. | ||
450 | */ | ||
451 | xfs_log_busy_slot_t * | ||
452 | xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx) | ||
453 | { | ||
454 | xfs_log_busy_chunk_t *lbcp; | ||
455 | xfs_log_busy_slot_t *lbsp; | ||
456 | int i=0; | ||
457 | |||
458 | /* | ||
459 | * If there are no free descriptors, allocate a new chunk | ||
460 | * of them and put it at the front of the chunk list. | ||
461 | */ | ||
462 | if (tp->t_busy_free == 0) { | ||
463 | lbcp = (xfs_log_busy_chunk_t*) | ||
464 | kmem_alloc(sizeof(xfs_log_busy_chunk_t), KM_SLEEP); | ||
465 | ASSERT(lbcp != NULL); | ||
466 | /* | ||
467 | * Initialize the chunk, and then | ||
468 | * claim the first slot in the newly allocated chunk. | ||
469 | */ | ||
470 | XFS_LBC_INIT(lbcp); | ||
471 | XFS_LBC_CLAIM(lbcp, 0); | ||
472 | lbcp->lbc_unused = 1; | ||
473 | lbsp = XFS_LBC_SLOT(lbcp, 0); | ||
474 | |||
475 | /* | ||
476 | * Link in the new chunk and update the free count. | ||
477 | */ | ||
478 | lbcp->lbc_next = tp->t_busy.lbc_next; | ||
479 | tp->t_busy.lbc_next = lbcp; | ||
480 | tp->t_busy_free = XFS_LIC_NUM_SLOTS - 1; | ||
481 | |||
482 | /* | ||
483 | * Initialize the descriptor and the generic portion | ||
484 | * of the log item. | ||
485 | * | ||
486 | * Point the new slot at this item and return it. | ||
487 | * Also point the log item at its currently active | ||
488 | * descriptor and set the item's mount pointer. | ||
489 | */ | ||
490 | lbsp->lbc_ag = ag; | ||
491 | lbsp->lbc_idx = idx; | ||
492 | return lbsp; | ||
493 | } | ||
494 | |||
495 | /* | ||
496 | * Find the free descriptor. It is somewhere in the chunklist | ||
497 | * of descriptors. | ||
498 | */ | ||
499 | lbcp = &tp->t_busy; | ||
500 | while (lbcp != NULL) { | ||
501 | if (XFS_LBC_VACANCY(lbcp)) { | ||
502 | if (lbcp->lbc_unused <= XFS_LBC_MAX_SLOT) { | ||
503 | i = lbcp->lbc_unused; | ||
504 | break; | ||
505 | } else { | ||
506 | /* out-of-order vacancy */ | ||
507 | cmn_err(CE_DEBUG, "OOO vacancy lbcp 0x%p\n", lbcp); | ||
508 | ASSERT(0); | ||
509 | } | ||
510 | } | ||
511 | lbcp = lbcp->lbc_next; | ||
512 | } | ||
513 | ASSERT(lbcp != NULL); | ||
514 | /* | ||
515 | * If we find a free descriptor, claim it, | ||
516 | * initialize it, and return it. | ||
517 | */ | ||
518 | XFS_LBC_CLAIM(lbcp, i); | ||
519 | if (lbcp->lbc_unused <= i) { | ||
520 | lbcp->lbc_unused = i + 1; | ||
521 | } | ||
522 | lbsp = XFS_LBC_SLOT(lbcp, i); | ||
523 | tp->t_busy_free--; | ||
524 | lbsp->lbc_ag = ag; | ||
525 | lbsp->lbc_idx = idx; | ||
526 | return lbsp; | ||
527 | } | ||
528 | |||
529 | |||
530 | /* | ||
531 | * xfs_trans_free_busy | ||
532 | * Free all of the busy lists from a transaction | ||
533 | */ | ||
534 | void | ||
535 | xfs_trans_free_busy(xfs_trans_t *tp) | ||
536 | { | ||
537 | xfs_log_busy_chunk_t *lbcp; | ||
538 | xfs_log_busy_chunk_t *lbcq; | ||
539 | |||
540 | lbcp = tp->t_busy.lbc_next; | ||
541 | while (lbcp != NULL) { | ||
542 | lbcq = lbcp->lbc_next; | ||
543 | kmem_free(lbcp); | ||
544 | lbcp = lbcq; | ||
545 | } | ||
546 | |||
547 | XFS_LBC_INIT(&tp->t_busy); | ||
548 | tp->t_busy.lbc_unused = 0; | ||
549 | } | ||
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 73e2ad397432..c6e4f2c8de6e 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -35,13 +35,14 @@ struct xfs_log_item_desc *xfs_trans_find_item(struct xfs_trans *, | |||
35 | struct xfs_log_item_desc *xfs_trans_first_item(struct xfs_trans *); | 35 | struct xfs_log_item_desc *xfs_trans_first_item(struct xfs_trans *); |
36 | struct xfs_log_item_desc *xfs_trans_next_item(struct xfs_trans *, | 36 | struct xfs_log_item_desc *xfs_trans_next_item(struct xfs_trans *, |
37 | struct xfs_log_item_desc *); | 37 | struct xfs_log_item_desc *); |
38 | void xfs_trans_free_items(struct xfs_trans *, int); | 38 | |
39 | void xfs_trans_unlock_items(struct xfs_trans *, | 39 | void xfs_trans_unlock_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn); |
40 | xfs_lsn_t); | 40 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, |
41 | void xfs_trans_free_busy(xfs_trans_t *tp); | 41 | int flags); |
42 | xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, | 42 | |
43 | xfs_agnumber_t ag, | 43 | void xfs_trans_item_committed(struct xfs_log_item *lip, |
44 | xfs_extlen_t idx); | 44 | xfs_lsn_t commit_lsn, int aborted); |
45 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); | ||
45 | 46 | ||
46 | /* | 47 | /* |
47 | * AIL traversal cursor. | 48 | * AIL traversal cursor. |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index b09904555d07..320775295e32 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
@@ -75,6 +75,8 @@ typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ | |||
75 | 75 | ||
76 | typedef __uint16_t xfs_prid_t; /* prid_t truncated to 16bits in XFS */ | 76 | typedef __uint16_t xfs_prid_t; /* prid_t truncated to 16bits in XFS */ |
77 | 77 | ||
78 | typedef __uint32_t xlog_tid_t; /* transaction ID type */ | ||
79 | |||
78 | /* | 80 | /* |
79 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. | 81 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. |
80 | * Disk based types: | 82 | * Disk based types: |