diff options
Diffstat (limited to 'fs')
269 files changed, 5546 insertions, 3607 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index ed835836e0dc..32ef4009d030 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h | |||
@@ -40,7 +40,9 @@ | |||
40 | extern struct file_system_type v9fs_fs_type; | 40 | extern struct file_system_type v9fs_fs_type; |
41 | extern const struct address_space_operations v9fs_addr_operations; | 41 | extern const struct address_space_operations v9fs_addr_operations; |
42 | extern const struct file_operations v9fs_file_operations; | 42 | extern const struct file_operations v9fs_file_operations; |
43 | extern const struct file_operations v9fs_file_operations_dotl; | ||
43 | extern const struct file_operations v9fs_dir_operations; | 44 | extern const struct file_operations v9fs_dir_operations; |
45 | extern const struct file_operations v9fs_dir_operations_dotl; | ||
44 | extern const struct dentry_operations v9fs_dentry_operations; | 46 | extern const struct dentry_operations v9fs_dentry_operations; |
45 | extern const struct dentry_operations v9fs_cached_dentry_operations; | 47 | extern const struct dentry_operations v9fs_cached_dentry_operations; |
46 | 48 | ||
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 0adfd64dfcee..d61e3b28ce37 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
@@ -203,3 +203,11 @@ const struct file_operations v9fs_dir_operations = { | |||
203 | .open = v9fs_file_open, | 203 | .open = v9fs_file_open, |
204 | .release = v9fs_dir_release, | 204 | .release = v9fs_dir_release, |
205 | }; | 205 | }; |
206 | |||
207 | const struct file_operations v9fs_dir_operations_dotl = { | ||
208 | .read = generic_read_dir, | ||
209 | .llseek = generic_file_llseek, | ||
210 | .readdir = v9fs_dir_readdir, | ||
211 | .open = v9fs_file_open, | ||
212 | .release = v9fs_dir_release, | ||
213 | }; | ||
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index df52d488d2a6..25b300e1c9d7 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
@@ -296,3 +296,14 @@ const struct file_operations v9fs_file_operations = { | |||
296 | .mmap = generic_file_readonly_mmap, | 296 | .mmap = generic_file_readonly_mmap, |
297 | .fsync = v9fs_file_fsync, | 297 | .fsync = v9fs_file_fsync, |
298 | }; | 298 | }; |
299 | |||
300 | const struct file_operations v9fs_file_operations_dotl = { | ||
301 | .llseek = generic_file_llseek, | ||
302 | .read = v9fs_file_read, | ||
303 | .write = v9fs_file_write, | ||
304 | .open = v9fs_file_open, | ||
305 | .release = v9fs_dir_release, | ||
306 | .lock = v9fs_file_lock, | ||
307 | .mmap = generic_file_readonly_mmap, | ||
308 | .fsync = v9fs_file_fsync, | ||
309 | }; | ||
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index f2434fc9d2c4..4331b3b5ee1c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -44,9 +44,12 @@ | |||
44 | #include "cache.h" | 44 | #include "cache.h" |
45 | 45 | ||
46 | static const struct inode_operations v9fs_dir_inode_operations; | 46 | static const struct inode_operations v9fs_dir_inode_operations; |
47 | static const struct inode_operations v9fs_dir_inode_operations_ext; | 47 | static const struct inode_operations v9fs_dir_inode_operations_dotu; |
48 | static const struct inode_operations v9fs_dir_inode_operations_dotl; | ||
48 | static const struct inode_operations v9fs_file_inode_operations; | 49 | static const struct inode_operations v9fs_file_inode_operations; |
50 | static const struct inode_operations v9fs_file_inode_operations_dotl; | ||
49 | static const struct inode_operations v9fs_symlink_inode_operations; | 51 | static const struct inode_operations v9fs_symlink_inode_operations; |
52 | static const struct inode_operations v9fs_symlink_inode_operations_dotl; | ||
50 | 53 | ||
51 | /** | 54 | /** |
52 | * unixmode2p9mode - convert unix mode bits to plan 9 | 55 | * unixmode2p9mode - convert unix mode bits to plan 9 |
@@ -253,9 +256,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) | |||
253 | return ERR_PTR(-ENOMEM); | 256 | return ERR_PTR(-ENOMEM); |
254 | } | 257 | } |
255 | 258 | ||
256 | inode->i_mode = mode; | 259 | inode_init_owner(inode, NULL, mode); |
257 | inode->i_uid = current_fsuid(); | ||
258 | inode->i_gid = current_fsgid(); | ||
259 | inode->i_blocks = 0; | 260 | inode->i_blocks = 0; |
260 | inode->i_rdev = 0; | 261 | inode->i_rdev = 0; |
261 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 262 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
@@ -275,25 +276,44 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) | |||
275 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | 276 | init_special_inode(inode, inode->i_mode, inode->i_rdev); |
276 | break; | 277 | break; |
277 | case S_IFREG: | 278 | case S_IFREG: |
278 | inode->i_op = &v9fs_file_inode_operations; | 279 | if (v9fs_proto_dotl(v9ses)) { |
279 | inode->i_fop = &v9fs_file_operations; | 280 | inode->i_op = &v9fs_file_inode_operations_dotl; |
281 | inode->i_fop = &v9fs_file_operations_dotl; | ||
282 | } else { | ||
283 | inode->i_op = &v9fs_file_inode_operations; | ||
284 | inode->i_fop = &v9fs_file_operations; | ||
285 | } | ||
286 | |||
280 | break; | 287 | break; |
288 | |||
281 | case S_IFLNK: | 289 | case S_IFLNK: |
282 | if (!v9fs_proto_dotu(v9ses)) { | 290 | if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { |
283 | P9_DPRINTK(P9_DEBUG_ERROR, | 291 | P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with " |
284 | "extended modes used w/o 9P2000.u\n"); | 292 | "legacy protocol.\n"); |
285 | err = -EINVAL; | 293 | err = -EINVAL; |
286 | goto error; | 294 | goto error; |
287 | } | 295 | } |
288 | inode->i_op = &v9fs_symlink_inode_operations; | 296 | |
297 | if (v9fs_proto_dotl(v9ses)) | ||
298 | inode->i_op = &v9fs_symlink_inode_operations_dotl; | ||
299 | else | ||
300 | inode->i_op = &v9fs_symlink_inode_operations; | ||
301 | |||
289 | break; | 302 | break; |
290 | case S_IFDIR: | 303 | case S_IFDIR: |
291 | inc_nlink(inode); | 304 | inc_nlink(inode); |
292 | if (v9fs_proto_dotu(v9ses)) | 305 | if (v9fs_proto_dotl(v9ses)) |
293 | inode->i_op = &v9fs_dir_inode_operations_ext; | 306 | inode->i_op = &v9fs_dir_inode_operations_dotl; |
307 | else if (v9fs_proto_dotu(v9ses)) | ||
308 | inode->i_op = &v9fs_dir_inode_operations_dotu; | ||
294 | else | 309 | else |
295 | inode->i_op = &v9fs_dir_inode_operations; | 310 | inode->i_op = &v9fs_dir_inode_operations; |
296 | inode->i_fop = &v9fs_dir_operations; | 311 | |
312 | if (v9fs_proto_dotl(v9ses)) | ||
313 | inode->i_fop = &v9fs_dir_operations_dotl; | ||
314 | else | ||
315 | inode->i_fop = &v9fs_dir_operations; | ||
316 | |||
297 | break; | 317 | break; |
298 | default: | 318 | default: |
299 | P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", | 319 | P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", |
@@ -434,14 +454,12 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) | |||
434 | { | 454 | { |
435 | int retval; | 455 | int retval; |
436 | struct inode *file_inode; | 456 | struct inode *file_inode; |
437 | struct v9fs_session_info *v9ses; | ||
438 | struct p9_fid *v9fid; | 457 | struct p9_fid *v9fid; |
439 | 458 | ||
440 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, | 459 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, |
441 | rmdir); | 460 | rmdir); |
442 | 461 | ||
443 | file_inode = file->d_inode; | 462 | file_inode = file->d_inode; |
444 | v9ses = v9fs_inode2v9ses(file_inode); | ||
445 | v9fid = v9fs_fid_clone(file); | 463 | v9fid = v9fs_fid_clone(file); |
446 | if (IS_ERR(v9fid)) | 464 | if (IS_ERR(v9fid)) |
447 | return PTR_ERR(v9fid); | 465 | return PTR_ERR(v9fid); |
@@ -484,12 +502,11 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
484 | ofid = NULL; | 502 | ofid = NULL; |
485 | fid = NULL; | 503 | fid = NULL; |
486 | name = (char *) dentry->d_name.name; | 504 | name = (char *) dentry->d_name.name; |
487 | dfid = v9fs_fid_clone(dentry->d_parent); | 505 | dfid = v9fs_fid_lookup(dentry->d_parent); |
488 | if (IS_ERR(dfid)) { | 506 | if (IS_ERR(dfid)) { |
489 | err = PTR_ERR(dfid); | 507 | err = PTR_ERR(dfid); |
490 | P9_DPRINTK(P9_DEBUG_VFS, "fid clone failed %d\n", err); | 508 | P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); |
491 | dfid = NULL; | 509 | return ERR_PTR(err); |
492 | goto error; | ||
493 | } | 510 | } |
494 | 511 | ||
495 | /* clone a fid to use for creation */ | 512 | /* clone a fid to use for creation */ |
@@ -497,8 +514,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
497 | if (IS_ERR(ofid)) { | 514 | if (IS_ERR(ofid)) { |
498 | err = PTR_ERR(ofid); | 515 | err = PTR_ERR(ofid); |
499 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); | 516 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); |
500 | ofid = NULL; | 517 | return ERR_PTR(err); |
501 | goto error; | ||
502 | } | 518 | } |
503 | 519 | ||
504 | err = p9_client_fcreate(ofid, name, perm, mode, extension); | 520 | err = p9_client_fcreate(ofid, name, perm, mode, extension); |
@@ -508,14 +524,13 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
508 | } | 524 | } |
509 | 525 | ||
510 | /* now walk from the parent so we can get unopened fid */ | 526 | /* now walk from the parent so we can get unopened fid */ |
511 | fid = p9_client_walk(dfid, 1, &name, 0); | 527 | fid = p9_client_walk(dfid, 1, &name, 1); |
512 | if (IS_ERR(fid)) { | 528 | if (IS_ERR(fid)) { |
513 | err = PTR_ERR(fid); | 529 | err = PTR_ERR(fid); |
514 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); | 530 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); |
515 | fid = NULL; | 531 | fid = NULL; |
516 | goto error; | 532 | goto error; |
517 | } else | 533 | } |
518 | dfid = NULL; | ||
519 | 534 | ||
520 | /* instantiate inode and assign the unopened fid to the dentry */ | 535 | /* instantiate inode and assign the unopened fid to the dentry */ |
521 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); | 536 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); |
@@ -538,9 +553,6 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
538 | return ofid; | 553 | return ofid; |
539 | 554 | ||
540 | error: | 555 | error: |
541 | if (dfid) | ||
542 | p9_client_clunk(dfid); | ||
543 | |||
544 | if (ofid) | 556 | if (ofid) |
545 | p9_client_clunk(ofid); | 557 | p9_client_clunk(ofid); |
546 | 558 | ||
@@ -675,8 +687,8 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
675 | if (IS_ERR(fid)) { | 687 | if (IS_ERR(fid)) { |
676 | result = PTR_ERR(fid); | 688 | result = PTR_ERR(fid); |
677 | if (result == -ENOENT) { | 689 | if (result == -ENOENT) { |
678 | d_add(dentry, NULL); | 690 | inode = NULL; |
679 | return NULL; | 691 | goto inst_out; |
680 | } | 692 | } |
681 | 693 | ||
682 | return ERR_PTR(result); | 694 | return ERR_PTR(result); |
@@ -693,7 +705,8 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
693 | if (result < 0) | 705 | if (result < 0) |
694 | goto error; | 706 | goto error; |
695 | 707 | ||
696 | if ((fid->qid.version) && (v9ses->cache)) | 708 | inst_out: |
709 | if (v9ses->cache) | ||
697 | dentry->d_op = &v9fs_cached_dentry_operations; | 710 | dentry->d_op = &v9fs_cached_dentry_operations; |
698 | else | 711 | else |
699 | dentry->d_op = &v9fs_dentry_operations; | 712 | dentry->d_op = &v9fs_dentry_operations; |
@@ -772,6 +785,13 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
772 | goto clunk_olddir; | 785 | goto clunk_olddir; |
773 | } | 786 | } |
774 | 787 | ||
788 | if (v9fs_proto_dotl(v9ses)) { | ||
789 | retval = p9_client_rename(oldfid, newdirfid, | ||
790 | (char *) new_dentry->d_name.name); | ||
791 | if (retval != -ENOSYS) | ||
792 | goto clunk_newdir; | ||
793 | } | ||
794 | |||
775 | /* 9P can only handle file rename in the same directory */ | 795 | /* 9P can only handle file rename in the same directory */ |
776 | if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) { | 796 | if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) { |
777 | P9_DPRINTK(P9_DEBUG_ERROR, | 797 | P9_DPRINTK(P9_DEBUG_ERROR, |
@@ -1197,6 +1217,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) | |||
1197 | sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev)); | 1217 | sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev)); |
1198 | else if (S_ISFIFO(mode)) | 1218 | else if (S_ISFIFO(mode)) |
1199 | *name = 0; | 1219 | *name = 0; |
1220 | else if (S_ISSOCK(mode)) | ||
1221 | *name = 0; | ||
1200 | else { | 1222 | else { |
1201 | __putname(name); | 1223 | __putname(name); |
1202 | return -EINVAL; | 1224 | return -EINVAL; |
@@ -1208,7 +1230,21 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) | |||
1208 | return retval; | 1230 | return retval; |
1209 | } | 1231 | } |
1210 | 1232 | ||
1211 | static const struct inode_operations v9fs_dir_inode_operations_ext = { | 1233 | static const struct inode_operations v9fs_dir_inode_operations_dotu = { |
1234 | .create = v9fs_vfs_create, | ||
1235 | .lookup = v9fs_vfs_lookup, | ||
1236 | .symlink = v9fs_vfs_symlink, | ||
1237 | .link = v9fs_vfs_link, | ||
1238 | .unlink = v9fs_vfs_unlink, | ||
1239 | .mkdir = v9fs_vfs_mkdir, | ||
1240 | .rmdir = v9fs_vfs_rmdir, | ||
1241 | .mknod = v9fs_vfs_mknod, | ||
1242 | .rename = v9fs_vfs_rename, | ||
1243 | .getattr = v9fs_vfs_getattr, | ||
1244 | .setattr = v9fs_vfs_setattr, | ||
1245 | }; | ||
1246 | |||
1247 | static const struct inode_operations v9fs_dir_inode_operations_dotl = { | ||
1212 | .create = v9fs_vfs_create, | 1248 | .create = v9fs_vfs_create, |
1213 | .lookup = v9fs_vfs_lookup, | 1249 | .lookup = v9fs_vfs_lookup, |
1214 | .symlink = v9fs_vfs_symlink, | 1250 | .symlink = v9fs_vfs_symlink, |
@@ -1239,6 +1275,11 @@ static const struct inode_operations v9fs_file_inode_operations = { | |||
1239 | .setattr = v9fs_vfs_setattr, | 1275 | .setattr = v9fs_vfs_setattr, |
1240 | }; | 1276 | }; |
1241 | 1277 | ||
1278 | static const struct inode_operations v9fs_file_inode_operations_dotl = { | ||
1279 | .getattr = v9fs_vfs_getattr, | ||
1280 | .setattr = v9fs_vfs_setattr, | ||
1281 | }; | ||
1282 | |||
1242 | static const struct inode_operations v9fs_symlink_inode_operations = { | 1283 | static const struct inode_operations v9fs_symlink_inode_operations = { |
1243 | .readlink = generic_readlink, | 1284 | .readlink = generic_readlink, |
1244 | .follow_link = v9fs_vfs_follow_link, | 1285 | .follow_link = v9fs_vfs_follow_link, |
@@ -1246,3 +1287,11 @@ static const struct inode_operations v9fs_symlink_inode_operations = { | |||
1246 | .getattr = v9fs_vfs_getattr, | 1287 | .getattr = v9fs_vfs_getattr, |
1247 | .setattr = v9fs_vfs_setattr, | 1288 | .setattr = v9fs_vfs_setattr, |
1248 | }; | 1289 | }; |
1290 | |||
1291 | static const struct inode_operations v9fs_symlink_inode_operations_dotl = { | ||
1292 | .readlink = generic_readlink, | ||
1293 | .follow_link = v9fs_vfs_follow_link, | ||
1294 | .put_link = v9fs_vfs_put_link, | ||
1295 | .getattr = v9fs_vfs_getattr, | ||
1296 | .setattr = v9fs_vfs_setattr, | ||
1297 | }; | ||
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 806da5d3b3a0..be74d020436e 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/idr.h> | 38 | #include <linux/idr.h> |
39 | #include <linux/sched.h> | 39 | #include <linux/sched.h> |
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include <linux/statfs.h> | ||
41 | #include <net/9p/9p.h> | 42 | #include <net/9p/9p.h> |
42 | #include <net/9p/client.h> | 43 | #include <net/9p/client.h> |
43 | 44 | ||
@@ -45,7 +46,7 @@ | |||
45 | #include "v9fs_vfs.h" | 46 | #include "v9fs_vfs.h" |
46 | #include "fid.h" | 47 | #include "fid.h" |
47 | 48 | ||
48 | static const struct super_operations v9fs_super_ops; | 49 | static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl; |
49 | 50 | ||
50 | /** | 51 | /** |
51 | * v9fs_set_super - set the superblock | 52 | * v9fs_set_super - set the superblock |
@@ -76,7 +77,10 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, | |||
76 | sb->s_blocksize_bits = fls(v9ses->maxdata - 1); | 77 | sb->s_blocksize_bits = fls(v9ses->maxdata - 1); |
77 | sb->s_blocksize = 1 << sb->s_blocksize_bits; | 78 | sb->s_blocksize = 1 << sb->s_blocksize_bits; |
78 | sb->s_magic = V9FS_MAGIC; | 79 | sb->s_magic = V9FS_MAGIC; |
79 | sb->s_op = &v9fs_super_ops; | 80 | if (v9fs_proto_dotl(v9ses)) |
81 | sb->s_op = &v9fs_super_ops_dotl; | ||
82 | else | ||
83 | sb->s_op = &v9fs_super_ops; | ||
80 | sb->s_bdi = &v9ses->bdi; | 84 | sb->s_bdi = &v9ses->bdi; |
81 | 85 | ||
82 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | | 86 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | |
@@ -211,6 +215,42 @@ v9fs_umount_begin(struct super_block *sb) | |||
211 | v9fs_session_begin_cancel(v9ses); | 215 | v9fs_session_begin_cancel(v9ses); |
212 | } | 216 | } |
213 | 217 | ||
218 | static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
219 | { | ||
220 | struct v9fs_session_info *v9ses; | ||
221 | struct p9_fid *fid; | ||
222 | struct p9_rstatfs rs; | ||
223 | int res; | ||
224 | |||
225 | fid = v9fs_fid_lookup(dentry); | ||
226 | if (IS_ERR(fid)) { | ||
227 | res = PTR_ERR(fid); | ||
228 | goto done; | ||
229 | } | ||
230 | |||
231 | v9ses = v9fs_inode2v9ses(dentry->d_inode); | ||
232 | if (v9fs_proto_dotl(v9ses)) { | ||
233 | res = p9_client_statfs(fid, &rs); | ||
234 | if (res == 0) { | ||
235 | buf->f_type = rs.type; | ||
236 | buf->f_bsize = rs.bsize; | ||
237 | buf->f_blocks = rs.blocks; | ||
238 | buf->f_bfree = rs.bfree; | ||
239 | buf->f_bavail = rs.bavail; | ||
240 | buf->f_files = rs.files; | ||
241 | buf->f_ffree = rs.ffree; | ||
242 | buf->f_fsid.val[0] = rs.fsid & 0xFFFFFFFFUL; | ||
243 | buf->f_fsid.val[1] = (rs.fsid >> 32) & 0xFFFFFFFFUL; | ||
244 | buf->f_namelen = rs.namelen; | ||
245 | } | ||
246 | if (res != -ENOSYS) | ||
247 | goto done; | ||
248 | } | ||
249 | res = simple_statfs(dentry, buf); | ||
250 | done: | ||
251 | return res; | ||
252 | } | ||
253 | |||
214 | static const struct super_operations v9fs_super_ops = { | 254 | static const struct super_operations v9fs_super_ops = { |
215 | #ifdef CONFIG_9P_FSCACHE | 255 | #ifdef CONFIG_9P_FSCACHE |
216 | .alloc_inode = v9fs_alloc_inode, | 256 | .alloc_inode = v9fs_alloc_inode, |
@@ -222,6 +262,17 @@ static const struct super_operations v9fs_super_ops = { | |||
222 | .umount_begin = v9fs_umount_begin, | 262 | .umount_begin = v9fs_umount_begin, |
223 | }; | 263 | }; |
224 | 264 | ||
265 | static const struct super_operations v9fs_super_ops_dotl = { | ||
266 | #ifdef CONFIG_9P_FSCACHE | ||
267 | .alloc_inode = v9fs_alloc_inode, | ||
268 | .destroy_inode = v9fs_destroy_inode, | ||
269 | #endif | ||
270 | .statfs = v9fs_statfs, | ||
271 | .clear_inode = v9fs_clear_inode, | ||
272 | .show_options = generic_show_options, | ||
273 | .umount_begin = v9fs_umount_begin, | ||
274 | }; | ||
275 | |||
225 | struct file_system_type v9fs_fs_type = { | 276 | struct file_system_type v9fs_fs_type = { |
226 | .name = "9p", | 277 | .name = "9p", |
227 | .get_sb = v9fs_get_sb, | 278 | .get_sb = v9fs_get_sb, |
diff --git a/fs/Makefile b/fs/Makefile index 97f340f14ba2..e6ec1d309b1d 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ | |||
11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ |
12 | seq_file.o xattr.o libfs.o fs-writeback.o \ | 12 | seq_file.o xattr.o libfs.o fs-writeback.o \ |
13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ | 13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ |
14 | stack.o fs_struct.o | 14 | stack.o fs_struct.o statfs.o |
15 | 15 | ||
16 | ifeq ($(CONFIG_BLOCK),y) | 16 | ifeq ($(CONFIG_BLOCK),y) |
17 | obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o | 17 | obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index adc1cb771b57..b42d5cc1d6d2 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -189,13 +189,9 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index, | |||
189 | struct key *key) | 189 | struct key *key) |
190 | { | 190 | { |
191 | struct page *page; | 191 | struct page *page; |
192 | struct file file = { | ||
193 | .private_data = key, | ||
194 | }; | ||
195 | |||
196 | _enter("{%lu},%lu", dir->i_ino, index); | 192 | _enter("{%lu},%lu", dir->i_ino, index); |
197 | 193 | ||
198 | page = read_mapping_page(dir->i_mapping, index, &file); | 194 | page = read_cache_page(dir->i_mapping, index, afs_page_filler, key); |
199 | if (!IS_ERR(page)) { | 195 | if (!IS_ERR(page)) { |
200 | kmap(page); | 196 | kmap(page); |
201 | if (!PageChecked(page)) | 197 | if (!PageChecked(page)) |
diff --git a/fs/afs/file.c b/fs/afs/file.c index 0df9bc2b724d..14d89fa58fee 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
@@ -121,34 +121,19 @@ static void afs_file_readpage_read_complete(struct page *page, | |||
121 | #endif | 121 | #endif |
122 | 122 | ||
123 | /* | 123 | /* |
124 | * AFS read page from file, directory or symlink | 124 | * read page from file, directory or symlink, given a key to use |
125 | */ | 125 | */ |
126 | static int afs_readpage(struct file *file, struct page *page) | 126 | int afs_page_filler(void *data, struct page *page) |
127 | { | 127 | { |
128 | struct afs_vnode *vnode; | 128 | struct inode *inode = page->mapping->host; |
129 | struct inode *inode; | 129 | struct afs_vnode *vnode = AFS_FS_I(inode); |
130 | struct key *key; | 130 | struct key *key = data; |
131 | size_t len; | 131 | size_t len; |
132 | off_t offset; | 132 | off_t offset; |
133 | int ret; | 133 | int ret; |
134 | 134 | ||
135 | inode = page->mapping->host; | ||
136 | |||
137 | if (file) { | ||
138 | key = file->private_data; | ||
139 | ASSERT(key != NULL); | ||
140 | } else { | ||
141 | key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell); | ||
142 | if (IS_ERR(key)) { | ||
143 | ret = PTR_ERR(key); | ||
144 | goto error_nokey; | ||
145 | } | ||
146 | } | ||
147 | |||
148 | _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); | 135 | _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); |
149 | 136 | ||
150 | vnode = AFS_FS_I(inode); | ||
151 | |||
152 | BUG_ON(!PageLocked(page)); | 137 | BUG_ON(!PageLocked(page)); |
153 | 138 | ||
154 | ret = -ESTALE; | 139 | ret = -ESTALE; |
@@ -214,31 +199,56 @@ static int afs_readpage(struct file *file, struct page *page) | |||
214 | unlock_page(page); | 199 | unlock_page(page); |
215 | } | 200 | } |
216 | 201 | ||
217 | if (!file) | ||
218 | key_put(key); | ||
219 | _leave(" = 0"); | 202 | _leave(" = 0"); |
220 | return 0; | 203 | return 0; |
221 | 204 | ||
222 | error: | 205 | error: |
223 | SetPageError(page); | 206 | SetPageError(page); |
224 | unlock_page(page); | 207 | unlock_page(page); |
225 | if (!file) | ||
226 | key_put(key); | ||
227 | error_nokey: | ||
228 | _leave(" = %d", ret); | 208 | _leave(" = %d", ret); |
229 | return ret; | 209 | return ret; |
230 | } | 210 | } |
231 | 211 | ||
232 | /* | 212 | /* |
213 | * read page from file, directory or symlink, given a file to nominate the key | ||
214 | * to be used | ||
215 | */ | ||
216 | static int afs_readpage(struct file *file, struct page *page) | ||
217 | { | ||
218 | struct key *key; | ||
219 | int ret; | ||
220 | |||
221 | if (file) { | ||
222 | key = file->private_data; | ||
223 | ASSERT(key != NULL); | ||
224 | ret = afs_page_filler(key, page); | ||
225 | } else { | ||
226 | struct inode *inode = page->mapping->host; | ||
227 | key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell); | ||
228 | if (IS_ERR(key)) { | ||
229 | ret = PTR_ERR(key); | ||
230 | } else { | ||
231 | ret = afs_page_filler(key, page); | ||
232 | key_put(key); | ||
233 | } | ||
234 | } | ||
235 | return ret; | ||
236 | } | ||
237 | |||
238 | /* | ||
233 | * read a set of pages | 239 | * read a set of pages |
234 | */ | 240 | */ |
235 | static int afs_readpages(struct file *file, struct address_space *mapping, | 241 | static int afs_readpages(struct file *file, struct address_space *mapping, |
236 | struct list_head *pages, unsigned nr_pages) | 242 | struct list_head *pages, unsigned nr_pages) |
237 | { | 243 | { |
244 | struct key *key = file->private_data; | ||
238 | struct afs_vnode *vnode; | 245 | struct afs_vnode *vnode; |
239 | int ret = 0; | 246 | int ret = 0; |
240 | 247 | ||
241 | _enter(",{%lu},,%d", mapping->host->i_ino, nr_pages); | 248 | _enter("{%d},{%lu},,%d", |
249 | key_serial(key), mapping->host->i_ino, nr_pages); | ||
250 | |||
251 | ASSERT(key != NULL); | ||
242 | 252 | ||
243 | vnode = AFS_FS_I(mapping->host); | 253 | vnode = AFS_FS_I(mapping->host); |
244 | if (vnode->flags & AFS_VNODE_DELETED) { | 254 | if (vnode->flags & AFS_VNODE_DELETED) { |
@@ -279,7 +289,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping, | |||
279 | } | 289 | } |
280 | 290 | ||
281 | /* load the missing pages from the network */ | 291 | /* load the missing pages from the network */ |
282 | ret = read_cache_pages(mapping, pages, (void *) afs_readpage, file); | 292 | ret = read_cache_pages(mapping, pages, afs_page_filler, key); |
283 | 293 | ||
284 | _leave(" = %d [netting]", ret); | 294 | _leave(" = %d [netting]", ret); |
285 | return ret; | 295 | return ret; |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index a10f2582844f..807f284cc75e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -494,6 +494,7 @@ extern const struct file_operations afs_file_operations; | |||
494 | 494 | ||
495 | extern int afs_open(struct inode *, struct file *); | 495 | extern int afs_open(struct inode *, struct file *); |
496 | extern int afs_release(struct inode *, struct file *); | 496 | extern int afs_release(struct inode *, struct file *); |
497 | extern int afs_page_filler(void *, struct page *); | ||
497 | 498 | ||
498 | /* | 499 | /* |
499 | * flock.c | 500 | * flock.c |
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index b3feddc4f7d6..a9e23039ea34 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c | |||
@@ -49,9 +49,6 @@ static unsigned long afs_mntpt_expiry_timeout = 10 * 60; | |||
49 | */ | 49 | */ |
50 | int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) | 50 | int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) |
51 | { | 51 | { |
52 | struct file file = { | ||
53 | .private_data = key, | ||
54 | }; | ||
55 | struct page *page; | 52 | struct page *page; |
56 | size_t size; | 53 | size_t size; |
57 | char *buf; | 54 | char *buf; |
@@ -61,7 +58,8 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) | |||
61 | vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); | 58 | vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); |
62 | 59 | ||
63 | /* read the contents of the symlink into the pagecache */ | 60 | /* read the contents of the symlink into the pagecache */ |
64 | page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, &file); | 61 | page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, |
62 | afs_page_filler, key); | ||
65 | if (IS_ERR(page)) { | 63 | if (IS_ERR(page)) { |
66 | ret = PTR_ERR(page); | 64 | ret = PTR_ERR(page); |
67 | goto out; | 65 | goto out; |
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index e4b75d6eda83..9bd4b3876c99 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c | |||
@@ -205,7 +205,7 @@ static struct inode *anon_inode_mkinode(void) | |||
205 | * that it already _is_ on the dirty list. | 205 | * that it already _is_ on the dirty list. |
206 | */ | 206 | */ |
207 | inode->i_state = I_DIRTY; | 207 | inode->i_state = I_DIRTY; |
208 | inode->i_mode = S_IRUSR | S_IWUSR; | 208 | inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; |
209 | inode->i_uid = current_fsuid(); | 209 | inode->i_uid = current_fsuid(); |
210 | inode->i_gid = current_fsgid(); | 210 | inode->i_gid = current_fsgid(); |
211 | inode->i_flags |= S_PRIVATE; | 211 | inode->i_flags |= S_PRIVATE; |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index e8e5e63ac950..db4117ed7803 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -18,13 +18,14 @@ | |||
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/param.h> | 19 | #include <linux/param.h> |
20 | #include <linux/time.h> | 20 | #include <linux/time.h> |
21 | #include <linux/smp_lock.h> | ||
21 | #include "autofs_i.h" | 22 | #include "autofs_i.h" |
22 | 23 | ||
23 | static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); | 24 | static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); |
24 | static int autofs4_dir_unlink(struct inode *,struct dentry *); | 25 | static int autofs4_dir_unlink(struct inode *,struct dentry *); |
25 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); | 26 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); |
26 | static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); | 27 | static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); |
27 | static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); | 28 | static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long); |
28 | static int autofs4_dir_open(struct inode *inode, struct file *file); | 29 | static int autofs4_dir_open(struct inode *inode, struct file *file); |
29 | static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); | 30 | static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); |
30 | static void *autofs4_follow_link(struct dentry *, struct nameidata *); | 31 | static void *autofs4_follow_link(struct dentry *, struct nameidata *); |
@@ -38,7 +39,7 @@ const struct file_operations autofs4_root_operations = { | |||
38 | .read = generic_read_dir, | 39 | .read = generic_read_dir, |
39 | .readdir = dcache_readdir, | 40 | .readdir = dcache_readdir, |
40 | .llseek = dcache_dir_lseek, | 41 | .llseek = dcache_dir_lseek, |
41 | .ioctl = autofs4_root_ioctl, | 42 | .unlocked_ioctl = autofs4_root_ioctl, |
42 | }; | 43 | }; |
43 | 44 | ||
44 | const struct file_operations autofs4_dir_operations = { | 45 | const struct file_operations autofs4_dir_operations = { |
@@ -902,8 +903,8 @@ int is_autofs4_dentry(struct dentry *dentry) | |||
902 | * ioctl()'s on the root directory is the chief method for the daemon to | 903 | * ioctl()'s on the root directory is the chief method for the daemon to |
903 | * generate kernel reactions | 904 | * generate kernel reactions |
904 | */ | 905 | */ |
905 | static int autofs4_root_ioctl(struct inode *inode, struct file *filp, | 906 | static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, |
906 | unsigned int cmd, unsigned long arg) | 907 | unsigned int cmd, unsigned long arg) |
907 | { | 908 | { |
908 | struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb); | 909 | struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb); |
909 | void __user *p = (void __user *)arg; | 910 | void __user *p = (void __user *)arg; |
@@ -947,3 +948,16 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp, | |||
947 | return -ENOSYS; | 948 | return -ENOSYS; |
948 | } | 949 | } |
949 | } | 950 | } |
951 | |||
952 | static long autofs4_root_ioctl(struct file *filp, | ||
953 | unsigned int cmd, unsigned long arg) | ||
954 | { | ||
955 | long ret; | ||
956 | struct inode *inode = filp->f_dentry->d_inode; | ||
957 | |||
958 | lock_kernel(); | ||
959 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | ||
960 | unlock_kernel(); | ||
961 | |||
962 | return ret; | ||
963 | } | ||
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 1e41aadb1068..8f73841fc974 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -105,14 +105,12 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
105 | } | 105 | } |
106 | set_bit(ino, info->si_imap); | 106 | set_bit(ino, info->si_imap); |
107 | info->si_freei--; | 107 | info->si_freei--; |
108 | inode->i_uid = current_fsuid(); | 108 | inode_init_owner(inode, dir, mode); |
109 | inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current_fsgid(); | ||
110 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 109 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
111 | inode->i_blocks = 0; | 110 | inode->i_blocks = 0; |
112 | inode->i_op = &bfs_file_inops; | 111 | inode->i_op = &bfs_file_inops; |
113 | inode->i_fop = &bfs_file_operations; | 112 | inode->i_fop = &bfs_file_operations; |
114 | inode->i_mapping->a_ops = &bfs_aops; | 113 | inode->i_mapping->a_ops = &bfs_aops; |
115 | inode->i_mode = mode; | ||
116 | inode->i_ino = ino; | 114 | inode->i_ino = ino; |
117 | BFS_I(inode)->i_dsk_ino = ino; | 115 | BFS_I(inode)->i_dsk_ino = ino; |
118 | BFS_I(inode)->i_sblock = 0; | 116 | BFS_I(inode)->i_sblock = 0; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 6dcee88c2e5d..26e5f5026620 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -245,37 +245,14 @@ struct super_block *freeze_bdev(struct block_device *bdev) | |||
245 | sb = get_active_super(bdev); | 245 | sb = get_active_super(bdev); |
246 | if (!sb) | 246 | if (!sb) |
247 | goto out; | 247 | goto out; |
248 | if (sb->s_flags & MS_RDONLY) { | 248 | error = freeze_super(sb); |
249 | sb->s_frozen = SB_FREEZE_TRANS; | 249 | if (error) { |
250 | up_write(&sb->s_umount); | 250 | deactivate_super(sb); |
251 | bdev->bd_fsfreeze_count--; | ||
251 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 252 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
252 | return sb; | 253 | return ERR_PTR(error); |
253 | } | ||
254 | |||
255 | sb->s_frozen = SB_FREEZE_WRITE; | ||
256 | smp_wmb(); | ||
257 | |||
258 | sync_filesystem(sb); | ||
259 | |||
260 | sb->s_frozen = SB_FREEZE_TRANS; | ||
261 | smp_wmb(); | ||
262 | |||
263 | sync_blockdev(sb->s_bdev); | ||
264 | |||
265 | if (sb->s_op->freeze_fs) { | ||
266 | error = sb->s_op->freeze_fs(sb); | ||
267 | if (error) { | ||
268 | printk(KERN_ERR | ||
269 | "VFS:Filesystem freeze failed\n"); | ||
270 | sb->s_frozen = SB_UNFROZEN; | ||
271 | deactivate_locked_super(sb); | ||
272 | bdev->bd_fsfreeze_count--; | ||
273 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
274 | return ERR_PTR(error); | ||
275 | } | ||
276 | } | 254 | } |
277 | up_write(&sb->s_umount); | 255 | deactivate_super(sb); |
278 | |||
279 | out: | 256 | out: |
280 | sync_blockdev(bdev); | 257 | sync_blockdev(bdev); |
281 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 258 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
@@ -296,40 +273,22 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) | |||
296 | 273 | ||
297 | mutex_lock(&bdev->bd_fsfreeze_mutex); | 274 | mutex_lock(&bdev->bd_fsfreeze_mutex); |
298 | if (!bdev->bd_fsfreeze_count) | 275 | if (!bdev->bd_fsfreeze_count) |
299 | goto out_unlock; | 276 | goto out; |
300 | 277 | ||
301 | error = 0; | 278 | error = 0; |
302 | if (--bdev->bd_fsfreeze_count > 0) | 279 | if (--bdev->bd_fsfreeze_count > 0) |
303 | goto out_unlock; | 280 | goto out; |
304 | 281 | ||
305 | if (!sb) | 282 | if (!sb) |
306 | goto out_unlock; | 283 | goto out; |
307 | |||
308 | BUG_ON(sb->s_bdev != bdev); | ||
309 | down_write(&sb->s_umount); | ||
310 | if (sb->s_flags & MS_RDONLY) | ||
311 | goto out_unfrozen; | ||
312 | |||
313 | if (sb->s_op->unfreeze_fs) { | ||
314 | error = sb->s_op->unfreeze_fs(sb); | ||
315 | if (error) { | ||
316 | printk(KERN_ERR | ||
317 | "VFS:Filesystem thaw failed\n"); | ||
318 | sb->s_frozen = SB_FREEZE_TRANS; | ||
319 | bdev->bd_fsfreeze_count++; | ||
320 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
321 | return error; | ||
322 | } | ||
323 | } | ||
324 | |||
325 | out_unfrozen: | ||
326 | sb->s_frozen = SB_UNFROZEN; | ||
327 | smp_wmb(); | ||
328 | wake_up(&sb->s_wait_unfrozen); | ||
329 | 284 | ||
330 | if (sb) | 285 | error = thaw_super(sb); |
331 | deactivate_locked_super(sb); | 286 | if (error) { |
332 | out_unlock: | 287 | bdev->bd_fsfreeze_count++; |
288 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
289 | return error; | ||
290 | } | ||
291 | out: | ||
333 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 292 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
334 | return 0; | 293 | return 0; |
335 | } | 294 | } |
@@ -417,7 +376,7 @@ int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) | |||
417 | */ | 376 | */ |
418 | mutex_unlock(&bd_inode->i_mutex); | 377 | mutex_unlock(&bd_inode->i_mutex); |
419 | 378 | ||
420 | error = blkdev_issue_flush(bdev, NULL); | 379 | error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); |
421 | if (error == -EOPNOTSUPP) | 380 | if (error == -EOPNOTSUPP) |
422 | error = 0; | 381 | error = 0; |
423 | 382 | ||
@@ -668,41 +627,209 @@ void bd_forget(struct inode *inode) | |||
668 | iput(bdev->bd_inode); | 627 | iput(bdev->bd_inode); |
669 | } | 628 | } |
670 | 629 | ||
671 | int bd_claim(struct block_device *bdev, void *holder) | 630 | /** |
631 | * bd_may_claim - test whether a block device can be claimed | ||
632 | * @bdev: block device of interest | ||
633 | * @whole: whole block device containing @bdev, may equal @bdev | ||
634 | * @holder: holder trying to claim @bdev | ||
635 | * | ||
636 | * Test whther @bdev can be claimed by @holder. | ||
637 | * | ||
638 | * CONTEXT: | ||
639 | * spin_lock(&bdev_lock). | ||
640 | * | ||
641 | * RETURNS: | ||
642 | * %true if @bdev can be claimed, %false otherwise. | ||
643 | */ | ||
644 | static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, | ||
645 | void *holder) | ||
672 | { | 646 | { |
673 | int res; | ||
674 | spin_lock(&bdev_lock); | ||
675 | |||
676 | /* first decide result */ | ||
677 | if (bdev->bd_holder == holder) | 647 | if (bdev->bd_holder == holder) |
678 | res = 0; /* already a holder */ | 648 | return true; /* already a holder */ |
679 | else if (bdev->bd_holder != NULL) | 649 | else if (bdev->bd_holder != NULL) |
680 | res = -EBUSY; /* held by someone else */ | 650 | return false; /* held by someone else */ |
681 | else if (bdev->bd_contains == bdev) | 651 | else if (bdev->bd_contains == bdev) |
682 | res = 0; /* is a whole device which isn't held */ | 652 | return true; /* is a whole device which isn't held */ |
683 | 653 | ||
684 | else if (bdev->bd_contains->bd_holder == bd_claim) | 654 | else if (whole->bd_holder == bd_claim) |
685 | res = 0; /* is a partition of a device that is being partitioned */ | 655 | return true; /* is a partition of a device that is being partitioned */ |
686 | else if (bdev->bd_contains->bd_holder != NULL) | 656 | else if (whole->bd_holder != NULL) |
687 | res = -EBUSY; /* is a partition of a held device */ | 657 | return false; /* is a partition of a held device */ |
688 | else | 658 | else |
689 | res = 0; /* is a partition of an un-held device */ | 659 | return true; /* is a partition of an un-held device */ |
660 | } | ||
661 | |||
662 | /** | ||
663 | * bd_prepare_to_claim - prepare to claim a block device | ||
664 | * @bdev: block device of interest | ||
665 | * @whole: the whole device containing @bdev, may equal @bdev | ||
666 | * @holder: holder trying to claim @bdev | ||
667 | * | ||
668 | * Prepare to claim @bdev. This function fails if @bdev is already | ||
669 | * claimed by another holder and waits if another claiming is in | ||
670 | * progress. This function doesn't actually claim. On successful | ||
671 | * return, the caller has ownership of bd_claiming and bd_holder[s]. | ||
672 | * | ||
673 | * CONTEXT: | ||
674 | * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab | ||
675 | * it multiple times. | ||
676 | * | ||
677 | * RETURNS: | ||
678 | * 0 if @bdev can be claimed, -EBUSY otherwise. | ||
679 | */ | ||
680 | static int bd_prepare_to_claim(struct block_device *bdev, | ||
681 | struct block_device *whole, void *holder) | ||
682 | { | ||
683 | retry: | ||
684 | /* if someone else claimed, fail */ | ||
685 | if (!bd_may_claim(bdev, whole, holder)) | ||
686 | return -EBUSY; | ||
687 | |||
688 | /* if someone else is claiming, wait for it to finish */ | ||
689 | if (whole->bd_claiming && whole->bd_claiming != holder) { | ||
690 | wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0); | ||
691 | DEFINE_WAIT(wait); | ||
692 | |||
693 | prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); | ||
694 | spin_unlock(&bdev_lock); | ||
695 | schedule(); | ||
696 | finish_wait(wq, &wait); | ||
697 | spin_lock(&bdev_lock); | ||
698 | goto retry; | ||
699 | } | ||
700 | |||
701 | /* yay, all mine */ | ||
702 | return 0; | ||
703 | } | ||
690 | 704 | ||
691 | /* now impose change */ | 705 | /** |
692 | if (res==0) { | 706 | * bd_start_claiming - start claiming a block device |
707 | * @bdev: block device of interest | ||
708 | * @holder: holder trying to claim @bdev | ||
709 | * | ||
710 | * @bdev is about to be opened exclusively. Check @bdev can be opened | ||
711 | * exclusively and mark that an exclusive open is in progress. Each | ||
712 | * successful call to this function must be matched with a call to | ||
713 | * either bd_claim() or bd_abort_claiming(). If this function | ||
714 | * succeeds, the matching bd_claim() is guaranteed to succeed. | ||
715 | * | ||
716 | * CONTEXT: | ||
717 | * Might sleep. | ||
718 | * | ||
719 | * RETURNS: | ||
720 | * Pointer to the block device containing @bdev on success, ERR_PTR() | ||
721 | * value on failure. | ||
722 | */ | ||
723 | static struct block_device *bd_start_claiming(struct block_device *bdev, | ||
724 | void *holder) | ||
725 | { | ||
726 | struct gendisk *disk; | ||
727 | struct block_device *whole; | ||
728 | int partno, err; | ||
729 | |||
730 | might_sleep(); | ||
731 | |||
732 | /* | ||
733 | * @bdev might not have been initialized properly yet, look up | ||
734 | * and grab the outer block device the hard way. | ||
735 | */ | ||
736 | disk = get_gendisk(bdev->bd_dev, &partno); | ||
737 | if (!disk) | ||
738 | return ERR_PTR(-ENXIO); | ||
739 | |||
740 | whole = bdget_disk(disk, 0); | ||
741 | put_disk(disk); | ||
742 | if (!whole) | ||
743 | return ERR_PTR(-ENOMEM); | ||
744 | |||
745 | /* prepare to claim, if successful, mark claiming in progress */ | ||
746 | spin_lock(&bdev_lock); | ||
747 | |||
748 | err = bd_prepare_to_claim(bdev, whole, holder); | ||
749 | if (err == 0) { | ||
750 | whole->bd_claiming = holder; | ||
751 | spin_unlock(&bdev_lock); | ||
752 | return whole; | ||
753 | } else { | ||
754 | spin_unlock(&bdev_lock); | ||
755 | bdput(whole); | ||
756 | return ERR_PTR(err); | ||
757 | } | ||
758 | } | ||
759 | |||
760 | /* releases bdev_lock */ | ||
761 | static void __bd_abort_claiming(struct block_device *whole, void *holder) | ||
762 | { | ||
763 | BUG_ON(whole->bd_claiming != holder); | ||
764 | whole->bd_claiming = NULL; | ||
765 | wake_up_bit(&whole->bd_claiming, 0); | ||
766 | |||
767 | spin_unlock(&bdev_lock); | ||
768 | bdput(whole); | ||
769 | } | ||
770 | |||
771 | /** | ||
772 | * bd_abort_claiming - abort claiming a block device | ||
773 | * @whole: whole block device returned by bd_start_claiming() | ||
774 | * @holder: holder trying to claim @bdev | ||
775 | * | ||
776 | * Abort a claiming block started by bd_start_claiming(). Note that | ||
777 | * @whole is not the block device to be claimed but the whole device | ||
778 | * returned by bd_start_claiming(). | ||
779 | * | ||
780 | * CONTEXT: | ||
781 | * Grabs and releases bdev_lock. | ||
782 | */ | ||
783 | static void bd_abort_claiming(struct block_device *whole, void *holder) | ||
784 | { | ||
785 | spin_lock(&bdev_lock); | ||
786 | __bd_abort_claiming(whole, holder); /* releases bdev_lock */ | ||
787 | } | ||
788 | |||
789 | /** | ||
790 | * bd_claim - claim a block device | ||
791 | * @bdev: block device to claim | ||
792 | * @holder: holder trying to claim @bdev | ||
793 | * | ||
794 | * Try to claim @bdev which must have been opened successfully. This | ||
795 | * function may be called with or without preceding | ||
796 | * blk_start_claiming(). In the former case, this function is always | ||
797 | * successful and terminates the claiming block. | ||
798 | * | ||
799 | * CONTEXT: | ||
800 | * Might sleep. | ||
801 | * | ||
802 | * RETURNS: | ||
803 | * 0 if successful, -EBUSY if @bdev is already claimed. | ||
804 | */ | ||
805 | int bd_claim(struct block_device *bdev, void *holder) | ||
806 | { | ||
807 | struct block_device *whole = bdev->bd_contains; | ||
808 | int res; | ||
809 | |||
810 | might_sleep(); | ||
811 | |||
812 | spin_lock(&bdev_lock); | ||
813 | |||
814 | res = bd_prepare_to_claim(bdev, whole, holder); | ||
815 | if (res == 0) { | ||
693 | /* note that for a whole device bd_holders | 816 | /* note that for a whole device bd_holders |
694 | * will be incremented twice, and bd_holder will | 817 | * will be incremented twice, and bd_holder will |
695 | * be set to bd_claim before being set to holder | 818 | * be set to bd_claim before being set to holder |
696 | */ | 819 | */ |
697 | bdev->bd_contains->bd_holders ++; | 820 | whole->bd_holders++; |
698 | bdev->bd_contains->bd_holder = bd_claim; | 821 | whole->bd_holder = bd_claim; |
699 | bdev->bd_holders++; | 822 | bdev->bd_holders++; |
700 | bdev->bd_holder = holder; | 823 | bdev->bd_holder = holder; |
701 | } | 824 | } |
702 | spin_unlock(&bdev_lock); | 825 | |
826 | if (whole->bd_claiming) | ||
827 | __bd_abort_claiming(whole, holder); /* releases bdev_lock */ | ||
828 | else | ||
829 | spin_unlock(&bdev_lock); | ||
830 | |||
703 | return res; | 831 | return res; |
704 | } | 832 | } |
705 | |||
706 | EXPORT_SYMBOL(bd_claim); | 833 | EXPORT_SYMBOL(bd_claim); |
707 | 834 | ||
708 | void bd_release(struct block_device *bdev) | 835 | void bd_release(struct block_device *bdev) |
@@ -1316,6 +1443,7 @@ EXPORT_SYMBOL(blkdev_get); | |||
1316 | 1443 | ||
1317 | static int blkdev_open(struct inode * inode, struct file * filp) | 1444 | static int blkdev_open(struct inode * inode, struct file * filp) |
1318 | { | 1445 | { |
1446 | struct block_device *whole = NULL; | ||
1319 | struct block_device *bdev; | 1447 | struct block_device *bdev; |
1320 | int res; | 1448 | int res; |
1321 | 1449 | ||
@@ -1338,22 +1466,25 @@ static int blkdev_open(struct inode * inode, struct file * filp) | |||
1338 | if (bdev == NULL) | 1466 | if (bdev == NULL) |
1339 | return -ENOMEM; | 1467 | return -ENOMEM; |
1340 | 1468 | ||
1469 | if (filp->f_mode & FMODE_EXCL) { | ||
1470 | whole = bd_start_claiming(bdev, filp); | ||
1471 | if (IS_ERR(whole)) { | ||
1472 | bdput(bdev); | ||
1473 | return PTR_ERR(whole); | ||
1474 | } | ||
1475 | } | ||
1476 | |||
1341 | filp->f_mapping = bdev->bd_inode->i_mapping; | 1477 | filp->f_mapping = bdev->bd_inode->i_mapping; |
1342 | 1478 | ||
1343 | res = blkdev_get(bdev, filp->f_mode); | 1479 | res = blkdev_get(bdev, filp->f_mode); |
1344 | if (res) | ||
1345 | return res; | ||
1346 | 1480 | ||
1347 | if (filp->f_mode & FMODE_EXCL) { | 1481 | if (whole) { |
1348 | res = bd_claim(bdev, filp); | 1482 | if (res == 0) |
1349 | if (res) | 1483 | BUG_ON(bd_claim(bdev, filp) != 0); |
1350 | goto out_blkdev_put; | 1484 | else |
1485 | bd_abort_claiming(whole, filp); | ||
1351 | } | 1486 | } |
1352 | 1487 | ||
1353 | return 0; | ||
1354 | |||
1355 | out_blkdev_put: | ||
1356 | blkdev_put(bdev, filp->f_mode); | ||
1357 | return res; | 1488 | return res; |
1358 | } | 1489 | } |
1359 | 1490 | ||
@@ -1564,27 +1695,34 @@ EXPORT_SYMBOL(lookup_bdev); | |||
1564 | */ | 1695 | */ |
1565 | struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) | 1696 | struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) |
1566 | { | 1697 | { |
1567 | struct block_device *bdev; | 1698 | struct block_device *bdev, *whole; |
1568 | int error = 0; | 1699 | int error; |
1569 | 1700 | ||
1570 | bdev = lookup_bdev(path); | 1701 | bdev = lookup_bdev(path); |
1571 | if (IS_ERR(bdev)) | 1702 | if (IS_ERR(bdev)) |
1572 | return bdev; | 1703 | return bdev; |
1573 | 1704 | ||
1705 | whole = bd_start_claiming(bdev, holder); | ||
1706 | if (IS_ERR(whole)) { | ||
1707 | bdput(bdev); | ||
1708 | return whole; | ||
1709 | } | ||
1710 | |||
1574 | error = blkdev_get(bdev, mode); | 1711 | error = blkdev_get(bdev, mode); |
1575 | if (error) | 1712 | if (error) |
1576 | return ERR_PTR(error); | 1713 | goto out_abort_claiming; |
1714 | |||
1577 | error = -EACCES; | 1715 | error = -EACCES; |
1578 | if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) | 1716 | if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) |
1579 | goto blkdev_put; | 1717 | goto out_blkdev_put; |
1580 | error = bd_claim(bdev, holder); | ||
1581 | if (error) | ||
1582 | goto blkdev_put; | ||
1583 | 1718 | ||
1719 | BUG_ON(bd_claim(bdev, holder) != 0); | ||
1584 | return bdev; | 1720 | return bdev; |
1585 | 1721 | ||
1586 | blkdev_put: | 1722 | out_blkdev_put: |
1587 | blkdev_put(bdev, mode); | 1723 | blkdev_put(bdev, mode); |
1724 | out_abort_claiming: | ||
1725 | bd_abort_claiming(whole, holder); | ||
1588 | return ERR_PTR(error); | 1726 | return ERR_PTR(error); |
1589 | } | 1727 | } |
1590 | 1728 | ||
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6ef7b26724ec..8d432cd9d580 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -282,14 +282,14 @@ int btrfs_acl_chmod(struct inode *inode) | |||
282 | return ret; | 282 | return ret; |
283 | } | 283 | } |
284 | 284 | ||
285 | struct xattr_handler btrfs_xattr_acl_default_handler = { | 285 | const struct xattr_handler btrfs_xattr_acl_default_handler = { |
286 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 286 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
287 | .flags = ACL_TYPE_DEFAULT, | 287 | .flags = ACL_TYPE_DEFAULT, |
288 | .get = btrfs_xattr_acl_get, | 288 | .get = btrfs_xattr_acl_get, |
289 | .set = btrfs_xattr_acl_set, | 289 | .set = btrfs_xattr_acl_set, |
290 | }; | 290 | }; |
291 | 291 | ||
292 | struct xattr_handler btrfs_xattr_acl_access_handler = { | 292 | const struct xattr_handler btrfs_xattr_acl_access_handler = { |
293 | .prefix = POSIX_ACL_XATTR_ACCESS, | 293 | .prefix = POSIX_ACL_XATTR_ACCESS, |
294 | .flags = ACL_TYPE_ACCESS, | 294 | .flags = ACL_TYPE_ACCESS, |
295 | .get = btrfs_xattr_acl_get, | 295 | .get = btrfs_xattr_acl_get, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b34d32fdaaec..c6a4f459ad76 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1589,7 +1589,7 @@ static void btrfs_issue_discard(struct block_device *bdev, | |||
1589 | u64 start, u64 len) | 1589 | u64 start, u64 len) |
1590 | { | 1590 | { |
1591 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, | 1591 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, |
1592 | DISCARD_FL_BARRIER); | 1592 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); |
1593 | } | 1593 | } |
1594 | 1594 | ||
1595 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1595 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2bfdc641d4e3..d601629b85d1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -4121,16 +4121,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4121 | if (ret != 0) | 4121 | if (ret != 0) |
4122 | goto fail; | 4122 | goto fail; |
4123 | 4123 | ||
4124 | inode->i_uid = current_fsuid(); | 4124 | inode_init_owner(inode, dir, mode); |
4125 | |||
4126 | if (dir && (dir->i_mode & S_ISGID)) { | ||
4127 | inode->i_gid = dir->i_gid; | ||
4128 | if (S_ISDIR(mode)) | ||
4129 | mode |= S_ISGID; | ||
4130 | } else | ||
4131 | inode->i_gid = current_fsgid(); | ||
4132 | |||
4133 | inode->i_mode = mode; | ||
4134 | inode->i_ino = objectid; | 4125 | inode->i_ino = objectid; |
4135 | inode_set_bytes(inode, 0); | 4126 | inode_set_bytes(inode, 0); |
4136 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 4127 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 193b58f7d3f3..59acd3eb288a 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -282,7 +282,7 @@ err: | |||
282 | * List of handlers for synthetic system.* attributes. All real ondisk | 282 | * List of handlers for synthetic system.* attributes. All real ondisk |
283 | * attributes are handled directly. | 283 | * attributes are handled directly. |
284 | */ | 284 | */ |
285 | struct xattr_handler *btrfs_xattr_handlers[] = { | 285 | const struct xattr_handler *btrfs_xattr_handlers[] = { |
286 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 286 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
287 | &btrfs_xattr_acl_access_handler, | 287 | &btrfs_xattr_acl_access_handler, |
288 | &btrfs_xattr_acl_default_handler, | 288 | &btrfs_xattr_acl_default_handler, |
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index 721efa0346e0..7a43fd640bbb 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h | |||
@@ -21,9 +21,9 @@ | |||
21 | 21 | ||
22 | #include <linux/xattr.h> | 22 | #include <linux/xattr.h> |
23 | 23 | ||
24 | extern struct xattr_handler btrfs_xattr_acl_access_handler; | 24 | extern const struct xattr_handler btrfs_xattr_acl_access_handler; |
25 | extern struct xattr_handler btrfs_xattr_acl_default_handler; | 25 | extern const struct xattr_handler btrfs_xattr_acl_default_handler; |
26 | extern struct xattr_handler *btrfs_xattr_handlers[]; | 26 | extern const struct xattr_handler *btrfs_xattr_handlers[]; |
27 | 27 | ||
28 | extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, | 28 | extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, |
29 | void *buffer, size_t size); | 29 | void *buffer, size_t size); |
diff --git a/fs/buffer.c b/fs/buffer.c index c9c266db0624..e8aa7081d25c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -275,6 +275,7 @@ void invalidate_bdev(struct block_device *bdev) | |||
275 | return; | 275 | return; |
276 | 276 | ||
277 | invalidate_bh_lrus(); | 277 | invalidate_bh_lrus(); |
278 | lru_add_drain_all(); /* make sure all lru add caches are flushed */ | ||
278 | invalidate_mapping_pages(mapping, 0, -1); | 279 | invalidate_mapping_pages(mapping, 0, -1); |
279 | } | 280 | } |
280 | EXPORT_SYMBOL(invalidate_bdev); | 281 | EXPORT_SYMBOL(invalidate_bdev); |
@@ -560,26 +561,17 @@ repeat: | |||
560 | return err; | 561 | return err; |
561 | } | 562 | } |
562 | 563 | ||
563 | static void do_thaw_all(struct work_struct *work) | 564 | static void do_thaw_one(struct super_block *sb, void *unused) |
564 | { | 565 | { |
565 | struct super_block *sb; | ||
566 | char b[BDEVNAME_SIZE]; | 566 | char b[BDEVNAME_SIZE]; |
567 | while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) | ||
568 | printk(KERN_WARNING "Emergency Thaw on %s\n", | ||
569 | bdevname(sb->s_bdev, b)); | ||
570 | } | ||
567 | 571 | ||
568 | spin_lock(&sb_lock); | 572 | static void do_thaw_all(struct work_struct *work) |
569 | restart: | 573 | { |
570 | list_for_each_entry(sb, &super_blocks, s_list) { | 574 | iterate_supers(do_thaw_one, NULL); |
571 | sb->s_count++; | ||
572 | spin_unlock(&sb_lock); | ||
573 | down_read(&sb->s_umount); | ||
574 | while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) | ||
575 | printk(KERN_WARNING "Emergency Thaw on %s\n", | ||
576 | bdevname(sb->s_bdev, b)); | ||
577 | up_read(&sb->s_umount); | ||
578 | spin_lock(&sb_lock); | ||
579 | if (__put_super_and_need_restart(sb)) | ||
580 | goto restart; | ||
581 | } | ||
582 | spin_unlock(&sb_lock); | ||
583 | kfree(work); | 575 | kfree(work); |
584 | printk(KERN_WARNING "Emergency Thaw complete\n"); | 576 | printk(KERN_WARNING "Emergency Thaw complete\n"); |
585 | } | 577 | } |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index a9005d862ed4..d9c60b84949a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -274,7 +274,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
274 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 274 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; |
275 | int rc = 0; | 275 | int rc = 0; |
276 | struct page **pages; | 276 | struct page **pages; |
277 | struct pagevec pvec; | ||
278 | loff_t offset; | 277 | loff_t offset; |
279 | u64 len; | 278 | u64 len; |
280 | 279 | ||
@@ -297,8 +296,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
297 | if (rc < 0) | 296 | if (rc < 0) |
298 | goto out; | 297 | goto out; |
299 | 298 | ||
300 | /* set uptodate and add to lru in pagevec-sized chunks */ | ||
301 | pagevec_init(&pvec, 0); | ||
302 | for (; !list_empty(page_list) && len > 0; | 299 | for (; !list_empty(page_list) && len > 0; |
303 | rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) { | 300 | rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) { |
304 | struct page *page = | 301 | struct page *page = |
@@ -312,7 +309,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
312 | zero_user_segment(page, s, PAGE_CACHE_SIZE); | 309 | zero_user_segment(page, s, PAGE_CACHE_SIZE); |
313 | } | 310 | } |
314 | 311 | ||
315 | if (add_to_page_cache(page, mapping, page->index, GFP_NOFS)) { | 312 | if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { |
316 | page_cache_release(page); | 313 | page_cache_release(page); |
317 | dout("readpages %p add_to_page_cache failed %p\n", | 314 | dout("readpages %p add_to_page_cache failed %p\n", |
318 | inode, page); | 315 | inode, page); |
@@ -323,10 +320,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
323 | flush_dcache_page(page); | 320 | flush_dcache_page(page); |
324 | SetPageUptodate(page); | 321 | SetPageUptodate(page); |
325 | unlock_page(page); | 322 | unlock_page(page); |
326 | if (pagevec_add(&pvec, page) == 0) | 323 | page_cache_release(page); |
327 | pagevec_lru_add_file(&pvec); /* add to lru */ | ||
328 | } | 324 | } |
329 | pagevec_lru_add_file(&pvec); | ||
330 | rc = 0; | 325 | rc = 0; |
331 | 326 | ||
332 | out: | 327 | out: |
@@ -568,7 +563,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
568 | ceph_release_pages(req->r_pages, req->r_num_pages); | 563 | ceph_release_pages(req->r_pages, req->r_num_pages); |
569 | if (req->r_pages_from_pool) | 564 | if (req->r_pages_from_pool) |
570 | mempool_free(req->r_pages, | 565 | mempool_free(req->r_pages, |
571 | ceph_client(inode->i_sb)->wb_pagevec_pool); | 566 | ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool); |
572 | else | 567 | else |
573 | kfree(req->r_pages); | 568 | kfree(req->r_pages); |
574 | ceph_osdc_put_request(req); | 569 | ceph_osdc_put_request(req); |
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 818afe72e6c7..9f46de2ba7a7 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c | |||
@@ -150,7 +150,8 @@ int ceph_build_auth_request(struct ceph_auth_client *ac, | |||
150 | 150 | ||
151 | ret = ac->ops->build_request(ac, p + sizeof(u32), end); | 151 | ret = ac->ops->build_request(ac, p + sizeof(u32), end); |
152 | if (ret < 0) { | 152 | if (ret < 0) { |
153 | pr_err("error %d building request\n", ret); | 153 | pr_err("error %d building auth method %s request\n", ret, |
154 | ac->ops->name); | ||
154 | return ret; | 155 | return ret; |
155 | } | 156 | } |
156 | dout(" built request %d bytes\n", ret); | 157 | dout(" built request %d bytes\n", ret); |
@@ -216,8 +217,8 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, | |||
216 | if (ac->protocol != protocol) { | 217 | if (ac->protocol != protocol) { |
217 | ret = ceph_auth_init_protocol(ac, protocol); | 218 | ret = ceph_auth_init_protocol(ac, protocol); |
218 | if (ret) { | 219 | if (ret) { |
219 | pr_err("error %d on auth protocol %d init\n", | 220 | pr_err("error %d on auth method %s init\n", |
220 | ret, protocol); | 221 | ret, ac->ops->name); |
221 | goto out; | 222 | goto out; |
222 | } | 223 | } |
223 | } | 224 | } |
@@ -229,7 +230,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, | |||
229 | if (ret == -EAGAIN) { | 230 | if (ret == -EAGAIN) { |
230 | return ceph_build_auth_request(ac, reply_buf, reply_len); | 231 | return ceph_build_auth_request(ac, reply_buf, reply_len); |
231 | } else if (ret) { | 232 | } else if (ret) { |
232 | pr_err("authentication error %d\n", ret); | 233 | pr_err("auth method '%s' error %d\n", ac->ops->name, ret); |
233 | return ret; | 234 | return ret; |
234 | } | 235 | } |
235 | return 0; | 236 | return 0; |
diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h index ca4f57cfb267..4429a707c021 100644 --- a/fs/ceph/auth.h +++ b/fs/ceph/auth.h | |||
@@ -15,6 +15,8 @@ struct ceph_auth_client; | |||
15 | struct ceph_authorizer; | 15 | struct ceph_authorizer; |
16 | 16 | ||
17 | struct ceph_auth_client_ops { | 17 | struct ceph_auth_client_ops { |
18 | const char *name; | ||
19 | |||
18 | /* | 20 | /* |
19 | * true if we are authenticated and can connect to | 21 | * true if we are authenticated and can connect to |
20 | * services. | 22 | * services. |
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c index 8cd9e3af07f7..24407c119291 100644 --- a/fs/ceph/auth_none.c +++ b/fs/ceph/auth_none.c | |||
@@ -94,6 +94,7 @@ static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac, | |||
94 | } | 94 | } |
95 | 95 | ||
96 | static const struct ceph_auth_client_ops ceph_auth_none_ops = { | 96 | static const struct ceph_auth_client_ops ceph_auth_none_ops = { |
97 | .name = "none", | ||
97 | .reset = reset, | 98 | .reset = reset, |
98 | .destroy = destroy, | 99 | .destroy = destroy, |
99 | .is_authenticated = is_authenticated, | 100 | .is_authenticated = is_authenticated, |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index fee5a08da881..7b206231566d 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
@@ -127,7 +127,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
127 | int ret; | 127 | int ret; |
128 | char *dbuf; | 128 | char *dbuf; |
129 | char *ticket_buf; | 129 | char *ticket_buf; |
130 | u8 struct_v; | 130 | u8 reply_struct_v; |
131 | 131 | ||
132 | dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); | 132 | dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); |
133 | if (!dbuf) | 133 | if (!dbuf) |
@@ -139,14 +139,14 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
139 | goto out_dbuf; | 139 | goto out_dbuf; |
140 | 140 | ||
141 | ceph_decode_need(&p, end, 1 + sizeof(u32), bad); | 141 | ceph_decode_need(&p, end, 1 + sizeof(u32), bad); |
142 | struct_v = ceph_decode_8(&p); | 142 | reply_struct_v = ceph_decode_8(&p); |
143 | if (struct_v != 1) | 143 | if (reply_struct_v != 1) |
144 | goto bad; | 144 | goto bad; |
145 | num = ceph_decode_32(&p); | 145 | num = ceph_decode_32(&p); |
146 | dout("%d tickets\n", num); | 146 | dout("%d tickets\n", num); |
147 | while (num--) { | 147 | while (num--) { |
148 | int type; | 148 | int type; |
149 | u8 struct_v; | 149 | u8 tkt_struct_v, blob_struct_v; |
150 | struct ceph_x_ticket_handler *th; | 150 | struct ceph_x_ticket_handler *th; |
151 | void *dp, *dend; | 151 | void *dp, *dend; |
152 | int dlen; | 152 | int dlen; |
@@ -165,8 +165,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
165 | type = ceph_decode_32(&p); | 165 | type = ceph_decode_32(&p); |
166 | dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); | 166 | dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); |
167 | 167 | ||
168 | struct_v = ceph_decode_8(&p); | 168 | tkt_struct_v = ceph_decode_8(&p); |
169 | if (struct_v != 1) | 169 | if (tkt_struct_v != 1) |
170 | goto bad; | 170 | goto bad; |
171 | 171 | ||
172 | th = get_ticket_handler(ac, type); | 172 | th = get_ticket_handler(ac, type); |
@@ -186,8 +186,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
186 | dend = dbuf + dlen; | 186 | dend = dbuf + dlen; |
187 | dp = dbuf; | 187 | dp = dbuf; |
188 | 188 | ||
189 | struct_v = ceph_decode_8(&dp); | 189 | tkt_struct_v = ceph_decode_8(&dp); |
190 | if (struct_v != 1) | 190 | if (tkt_struct_v != 1) |
191 | goto bad; | 191 | goto bad; |
192 | 192 | ||
193 | memcpy(&old_key, &th->session_key, sizeof(old_key)); | 193 | memcpy(&old_key, &th->session_key, sizeof(old_key)); |
@@ -224,7 +224,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
224 | tpend = tp + dlen; | 224 | tpend = tp + dlen; |
225 | dout(" ticket blob is %d bytes\n", dlen); | 225 | dout(" ticket blob is %d bytes\n", dlen); |
226 | ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); | 226 | ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); |
227 | struct_v = ceph_decode_8(&tp); | 227 | blob_struct_v = ceph_decode_8(&tp); |
228 | new_secret_id = ceph_decode_64(&tp); | 228 | new_secret_id = ceph_decode_64(&tp); |
229 | ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); | 229 | ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); |
230 | if (ret) | 230 | if (ret) |
@@ -618,6 +618,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, | |||
618 | 618 | ||
619 | 619 | ||
620 | static const struct ceph_auth_client_ops ceph_x_ops = { | 620 | static const struct ceph_auth_client_ops ceph_x_ops = { |
621 | .name = "x", | ||
621 | .is_authenticated = ceph_x_is_authenticated, | 622 | .is_authenticated = ceph_x_is_authenticated, |
622 | .build_request = ceph_x_build_request, | 623 | .build_request = ceph_x_build_request, |
623 | .handle_reply = ceph_x_handle_reply, | 624 | .handle_reply = ceph_x_handle_reply, |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index d9400534b279..0dd0b81e64f7 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -867,7 +867,8 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
867 | { | 867 | { |
868 | struct ceph_mds_session *session = cap->session; | 868 | struct ceph_mds_session *session = cap->session; |
869 | struct ceph_inode_info *ci = cap->ci; | 869 | struct ceph_inode_info *ci = cap->ci; |
870 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 870 | struct ceph_mds_client *mdsc = |
871 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | ||
871 | int removed = 0; | 872 | int removed = 0; |
872 | 873 | ||
873 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); | 874 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); |
@@ -937,9 +938,9 @@ static int send_cap_msg(struct ceph_mds_session *session, | |||
937 | seq, issue_seq, mseq, follows, size, max_size, | 938 | seq, issue_seq, mseq, follows, size, max_size, |
938 | xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); | 939 | xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); |
939 | 940 | ||
940 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), 0, 0, NULL); | 941 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS); |
941 | if (IS_ERR(msg)) | 942 | if (!msg) |
942 | return PTR_ERR(msg); | 943 | return -ENOMEM; |
943 | 944 | ||
944 | msg->hdr.tid = cpu_to_le64(flush_tid); | 945 | msg->hdr.tid = cpu_to_le64(flush_tid); |
945 | 946 | ||
@@ -1298,7 +1299,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
1298 | */ | 1299 | */ |
1299 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | 1300 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) |
1300 | { | 1301 | { |
1301 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 1302 | struct ceph_mds_client *mdsc = |
1303 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | ||
1302 | struct inode *inode = &ci->vfs_inode; | 1304 | struct inode *inode = &ci->vfs_inode; |
1303 | int was = ci->i_dirty_caps; | 1305 | int was = ci->i_dirty_caps; |
1304 | int dirty = 0; | 1306 | int dirty = 0; |
@@ -1336,7 +1338,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1336 | static int __mark_caps_flushing(struct inode *inode, | 1338 | static int __mark_caps_flushing(struct inode *inode, |
1337 | struct ceph_mds_session *session) | 1339 | struct ceph_mds_session *session) |
1338 | { | 1340 | { |
1339 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 1341 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
1340 | struct ceph_inode_info *ci = ceph_inode(inode); | 1342 | struct ceph_inode_info *ci = ceph_inode(inode); |
1341 | int flushing; | 1343 | int flushing; |
1342 | 1344 | ||
@@ -1663,7 +1665,7 @@ ack: | |||
1663 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, | 1665 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, |
1664 | unsigned *flush_tid) | 1666 | unsigned *flush_tid) |
1665 | { | 1667 | { |
1666 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 1668 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
1667 | struct ceph_inode_info *ci = ceph_inode(inode); | 1669 | struct ceph_inode_info *ci = ceph_inode(inode); |
1668 | int unlock_session = session ? 0 : 1; | 1670 | int unlock_session = session ? 0 : 1; |
1669 | int flushing = 0; | 1671 | int flushing = 0; |
@@ -1716,10 +1718,9 @@ out_unlocked: | |||
1716 | static int caps_are_flushed(struct inode *inode, unsigned tid) | 1718 | static int caps_are_flushed(struct inode *inode, unsigned tid) |
1717 | { | 1719 | { |
1718 | struct ceph_inode_info *ci = ceph_inode(inode); | 1720 | struct ceph_inode_info *ci = ceph_inode(inode); |
1719 | int dirty, i, ret = 1; | 1721 | int i, ret = 1; |
1720 | 1722 | ||
1721 | spin_lock(&inode->i_lock); | 1723 | spin_lock(&inode->i_lock); |
1722 | dirty = __ceph_caps_dirty(ci); | ||
1723 | for (i = 0; i < CEPH_CAP_BITS; i++) | 1724 | for (i = 0; i < CEPH_CAP_BITS; i++) |
1724 | if ((ci->i_flushing_caps & (1 << i)) && | 1725 | if ((ci->i_flushing_caps & (1 << i)) && |
1725 | ci->i_cap_flush_tid[i] <= tid) { | 1726 | ci->i_cap_flush_tid[i] <= tid) { |
@@ -1829,7 +1830,8 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1829 | err = wait_event_interruptible(ci->i_cap_wq, | 1830 | err = wait_event_interruptible(ci->i_cap_wq, |
1830 | caps_are_flushed(inode, flush_tid)); | 1831 | caps_are_flushed(inode, flush_tid)); |
1831 | } else { | 1832 | } else { |
1832 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 1833 | struct ceph_mds_client *mdsc = |
1834 | &ceph_sb_to_client(inode->i_sb)->mdsc; | ||
1833 | 1835 | ||
1834 | spin_lock(&inode->i_lock); | 1836 | spin_lock(&inode->i_lock); |
1835 | if (__ceph_caps_dirty(ci)) | 1837 | if (__ceph_caps_dirty(ci)) |
@@ -2411,7 +2413,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
2411 | __releases(inode->i_lock) | 2413 | __releases(inode->i_lock) |
2412 | { | 2414 | { |
2413 | struct ceph_inode_info *ci = ceph_inode(inode); | 2415 | struct ceph_inode_info *ci = ceph_inode(inode); |
2414 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 2416 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
2415 | unsigned seq = le32_to_cpu(m->seq); | 2417 | unsigned seq = le32_to_cpu(m->seq); |
2416 | int dirty = le32_to_cpu(m->dirty); | 2418 | int dirty = le32_to_cpu(m->dirty); |
2417 | int cleaned = 0; | 2419 | int cleaned = 0; |
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 0c2241ef3653..3b9eeed097b3 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h | |||
@@ -19,7 +19,7 @@ | |||
19 | * Ceph release version | 19 | * Ceph release version |
20 | */ | 20 | */ |
21 | #define CEPH_VERSION_MAJOR 0 | 21 | #define CEPH_VERSION_MAJOR 0 |
22 | #define CEPH_VERSION_MINOR 19 | 22 | #define CEPH_VERSION_MINOR 20 |
23 | #define CEPH_VERSION_PATCH 0 | 23 | #define CEPH_VERSION_PATCH 0 |
24 | 24 | ||
25 | #define _CEPH_STRINGIFY(x) #x | 25 | #define _CEPH_STRINGIFY(x) #x |
@@ -36,7 +36,7 @@ | |||
36 | * client-facing protocol. | 36 | * client-facing protocol. |
37 | */ | 37 | */ |
38 | #define CEPH_OSD_PROTOCOL 8 /* cluster internal */ | 38 | #define CEPH_OSD_PROTOCOL 8 /* cluster internal */ |
39 | #define CEPH_MDS_PROTOCOL 9 /* cluster internal */ | 39 | #define CEPH_MDS_PROTOCOL 12 /* cluster internal */ |
40 | #define CEPH_MON_PROTOCOL 5 /* cluster internal */ | 40 | #define CEPH_MON_PROTOCOL 5 /* cluster internal */ |
41 | #define CEPH_OSDC_PROTOCOL 24 /* server/client */ | 41 | #define CEPH_OSDC_PROTOCOL 24 /* server/client */ |
42 | #define CEPH_MDSC_PROTOCOL 32 /* server/client */ | 42 | #define CEPH_MDSC_PROTOCOL 32 /* server/client */ |
@@ -53,8 +53,18 @@ | |||
53 | /* | 53 | /* |
54 | * feature bits | 54 | * feature bits |
55 | */ | 55 | */ |
56 | #define CEPH_FEATURE_SUPPORTED 0 | 56 | #define CEPH_FEATURE_UID 1 |
57 | #define CEPH_FEATURE_REQUIRED 0 | 57 | #define CEPH_FEATURE_NOSRCADDR 2 |
58 | #define CEPH_FEATURE_FLOCK 4 | ||
59 | |||
60 | #define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
61 | #define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID | ||
62 | #define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK | ||
63 | #define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID | ||
64 | #define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
65 | #define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID | ||
66 | #define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
67 | #define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
58 | 68 | ||
59 | 69 | ||
60 | /* | 70 | /* |
@@ -91,6 +101,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | |||
91 | #define CEPH_AUTH_NONE 0x1 | 101 | #define CEPH_AUTH_NONE 0x1 |
92 | #define CEPH_AUTH_CEPHX 0x2 | 102 | #define CEPH_AUTH_CEPHX 0x2 |
93 | 103 | ||
104 | #define CEPH_AUTH_UID_DEFAULT ((__u64) -1) | ||
105 | |||
94 | 106 | ||
95 | /********************************************* | 107 | /********************************************* |
96 | * message layer | 108 | * message layer |
@@ -128,11 +140,27 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | |||
128 | #define CEPH_MSG_CLIENT_SNAP 0x312 | 140 | #define CEPH_MSG_CLIENT_SNAP 0x312 |
129 | #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 | 141 | #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 |
130 | 142 | ||
143 | /* pool ops */ | ||
144 | #define CEPH_MSG_POOLOP_REPLY 48 | ||
145 | #define CEPH_MSG_POOLOP 49 | ||
146 | |||
147 | |||
131 | /* osd */ | 148 | /* osd */ |
132 | #define CEPH_MSG_OSD_MAP 41 | 149 | #define CEPH_MSG_OSD_MAP 41 |
133 | #define CEPH_MSG_OSD_OP 42 | 150 | #define CEPH_MSG_OSD_OP 42 |
134 | #define CEPH_MSG_OSD_OPREPLY 43 | 151 | #define CEPH_MSG_OSD_OPREPLY 43 |
135 | 152 | ||
153 | /* pool operations */ | ||
154 | enum { | ||
155 | POOL_OP_CREATE = 0x01, | ||
156 | POOL_OP_DELETE = 0x02, | ||
157 | POOL_OP_AUID_CHANGE = 0x03, | ||
158 | POOL_OP_CREATE_SNAP = 0x11, | ||
159 | POOL_OP_DELETE_SNAP = 0x12, | ||
160 | POOL_OP_CREATE_UNMANAGED_SNAP = 0x21, | ||
161 | POOL_OP_DELETE_UNMANAGED_SNAP = 0x22, | ||
162 | }; | ||
163 | |||
136 | struct ceph_mon_request_header { | 164 | struct ceph_mon_request_header { |
137 | __le64 have_version; | 165 | __le64 have_version; |
138 | __le16 session_mon; | 166 | __le16 session_mon; |
@@ -155,6 +183,31 @@ struct ceph_mon_statfs_reply { | |||
155 | struct ceph_statfs st; | 183 | struct ceph_statfs st; |
156 | } __attribute__ ((packed)); | 184 | } __attribute__ ((packed)); |
157 | 185 | ||
186 | const char *ceph_pool_op_name(int op); | ||
187 | |||
188 | struct ceph_mon_poolop { | ||
189 | struct ceph_mon_request_header monhdr; | ||
190 | struct ceph_fsid fsid; | ||
191 | __le32 pool; | ||
192 | __le32 op; | ||
193 | __le64 auid; | ||
194 | __le64 snapid; | ||
195 | __le32 name_len; | ||
196 | } __attribute__ ((packed)); | ||
197 | |||
198 | struct ceph_mon_poolop_reply { | ||
199 | struct ceph_mon_request_header monhdr; | ||
200 | struct ceph_fsid fsid; | ||
201 | __le32 reply_code; | ||
202 | __le32 epoch; | ||
203 | char has_data; | ||
204 | char data[0]; | ||
205 | } __attribute__ ((packed)); | ||
206 | |||
207 | struct ceph_mon_unmanaged_snap { | ||
208 | __le64 snapid; | ||
209 | } __attribute__ ((packed)); | ||
210 | |||
158 | struct ceph_osd_getmap { | 211 | struct ceph_osd_getmap { |
159 | struct ceph_mon_request_header monhdr; | 212 | struct ceph_mon_request_header monhdr; |
160 | struct ceph_fsid fsid; | 213 | struct ceph_fsid fsid; |
@@ -308,6 +361,7 @@ union ceph_mds_request_args { | |||
308 | struct { | 361 | struct { |
309 | __le32 frag; /* which dir fragment */ | 362 | __le32 frag; /* which dir fragment */ |
310 | __le32 max_entries; /* how many dentries to grab */ | 363 | __le32 max_entries; /* how many dentries to grab */ |
364 | __le32 max_bytes; | ||
311 | } __attribute__ ((packed)) readdir; | 365 | } __attribute__ ((packed)) readdir; |
312 | struct { | 366 | struct { |
313 | __le32 mode; | 367 | __le32 mode; |
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c index 8e4be6a80c62..7503aee828ce 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/ceph_strings.c | |||
@@ -10,7 +10,6 @@ const char *ceph_entity_type_name(int type) | |||
10 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | 10 | case CEPH_ENTITY_TYPE_OSD: return "osd"; |
11 | case CEPH_ENTITY_TYPE_MON: return "mon"; | 11 | case CEPH_ENTITY_TYPE_MON: return "mon"; |
12 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | 12 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; |
13 | case CEPH_ENTITY_TYPE_ADMIN: return "admin"; | ||
14 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; | 13 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; |
15 | default: return "unknown"; | 14 | default: return "unknown"; |
16 | } | 15 | } |
@@ -45,6 +44,7 @@ const char *ceph_osd_op_name(int op) | |||
45 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | 44 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; |
46 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | 45 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; |
47 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | 46 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; |
47 | case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; | ||
48 | 48 | ||
49 | case CEPH_OSD_OP_PULL: return "pull"; | 49 | case CEPH_OSD_OP_PULL: return "pull"; |
50 | case CEPH_OSD_OP_PUSH: return "push"; | 50 | case CEPH_OSD_OP_PUSH: return "push"; |
@@ -174,3 +174,17 @@ const char *ceph_snap_op_name(int o) | |||
174 | } | 174 | } |
175 | return "???"; | 175 | return "???"; |
176 | } | 176 | } |
177 | |||
178 | const char *ceph_pool_op_name(int op) | ||
179 | { | ||
180 | switch (op) { | ||
181 | case POOL_OP_CREATE: return "create"; | ||
182 | case POOL_OP_DELETE: return "delete"; | ||
183 | case POOL_OP_AUID_CHANGE: return "auid change"; | ||
184 | case POOL_OP_CREATE_SNAP: return "create snap"; | ||
185 | case POOL_OP_DELETE_SNAP: return "delete snap"; | ||
186 | case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; | ||
187 | case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; | ||
188 | } | ||
189 | return "???"; | ||
190 | } | ||
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f7048da92acc..3be33fb066cc 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -113,7 +113,7 @@ static int osdmap_show(struct seq_file *s, void *p) | |||
113 | static int monc_show(struct seq_file *s, void *p) | 113 | static int monc_show(struct seq_file *s, void *p) |
114 | { | 114 | { |
115 | struct ceph_client *client = s->private; | 115 | struct ceph_client *client = s->private; |
116 | struct ceph_mon_statfs_request *req; | 116 | struct ceph_mon_generic_request *req; |
117 | struct ceph_mon_client *monc = &client->monc; | 117 | struct ceph_mon_client *monc = &client->monc; |
118 | struct rb_node *rp; | 118 | struct rb_node *rp; |
119 | 119 | ||
@@ -126,9 +126,14 @@ static int monc_show(struct seq_file *s, void *p) | |||
126 | if (monc->want_next_osdmap) | 126 | if (monc->want_next_osdmap) |
127 | seq_printf(s, "want next osdmap\n"); | 127 | seq_printf(s, "want next osdmap\n"); |
128 | 128 | ||
129 | for (rp = rb_first(&monc->statfs_request_tree); rp; rp = rb_next(rp)) { | 129 | for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { |
130 | req = rb_entry(rp, struct ceph_mon_statfs_request, node); | 130 | __u16 op; |
131 | seq_printf(s, "%lld statfs\n", req->tid); | 131 | req = rb_entry(rp, struct ceph_mon_generic_request, node); |
132 | op = le16_to_cpu(req->request->hdr.type); | ||
133 | if (op == CEPH_MSG_STATFS) | ||
134 | seq_printf(s, "%lld statfs\n", req->tid); | ||
135 | else | ||
136 | seq_printf(s, "%lld unknown\n", req->tid); | ||
132 | } | 137 | } |
133 | 138 | ||
134 | mutex_unlock(&monc->mutex); | 139 | mutex_unlock(&monc->mutex); |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 650d2db5ed26..4fd30900eff7 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -51,8 +51,11 @@ int ceph_init_dentry(struct dentry *dentry) | |||
51 | return -ENOMEM; /* oh well */ | 51 | return -ENOMEM; /* oh well */ |
52 | 52 | ||
53 | spin_lock(&dentry->d_lock); | 53 | spin_lock(&dentry->d_lock); |
54 | if (dentry->d_fsdata) /* lost a race */ | 54 | if (dentry->d_fsdata) { |
55 | /* lost a race */ | ||
56 | kmem_cache_free(ceph_dentry_cachep, di); | ||
55 | goto out_unlock; | 57 | goto out_unlock; |
58 | } | ||
56 | di->dentry = dentry; | 59 | di->dentry = dentry; |
57 | di->lease_session = NULL; | 60 | di->lease_session = NULL; |
58 | dentry->d_fsdata = di; | 61 | dentry->d_fsdata = di; |
@@ -125,7 +128,8 @@ more: | |||
125 | dentry = list_entry(p, struct dentry, d_u.d_child); | 128 | dentry = list_entry(p, struct dentry, d_u.d_child); |
126 | di = ceph_dentry(dentry); | 129 | di = ceph_dentry(dentry); |
127 | while (1) { | 130 | while (1) { |
128 | dout(" p %p/%p d_subdirs %p/%p\n", p->prev, p->next, | 131 | dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, |
132 | d_unhashed(dentry) ? "!hashed" : "hashed", | ||
129 | parent->d_subdirs.prev, parent->d_subdirs.next); | 133 | parent->d_subdirs.prev, parent->d_subdirs.next); |
130 | if (p == &parent->d_subdirs) { | 134 | if (p == &parent->d_subdirs) { |
131 | fi->at_end = 1; | 135 | fi->at_end = 1; |
@@ -229,6 +233,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
229 | u32 ftype; | 233 | u32 ftype; |
230 | struct ceph_mds_reply_info_parsed *rinfo; | 234 | struct ceph_mds_reply_info_parsed *rinfo; |
231 | const int max_entries = client->mount_args->max_readdir; | 235 | const int max_entries = client->mount_args->max_readdir; |
236 | const int max_bytes = client->mount_args->max_readdir_bytes; | ||
232 | 237 | ||
233 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); | 238 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); |
234 | if (fi->at_end) | 239 | if (fi->at_end) |
@@ -312,6 +317,7 @@ more: | |||
312 | req->r_readdir_offset = fi->next_offset; | 317 | req->r_readdir_offset = fi->next_offset; |
313 | req->r_args.readdir.frag = cpu_to_le32(frag); | 318 | req->r_args.readdir.frag = cpu_to_le32(frag); |
314 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); | 319 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); |
320 | req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes); | ||
315 | req->r_num_caps = max_entries + 1; | 321 | req->r_num_caps = max_entries + 1; |
316 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 322 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
317 | if (err < 0) { | 323 | if (err < 0) { |
@@ -335,7 +341,7 @@ more: | |||
335 | if (req->r_reply_info.dir_end) { | 341 | if (req->r_reply_info.dir_end) { |
336 | kfree(fi->last_name); | 342 | kfree(fi->last_name); |
337 | fi->last_name = NULL; | 343 | fi->last_name = NULL; |
338 | fi->next_offset = 0; | 344 | fi->next_offset = 2; |
339 | } else { | 345 | } else { |
340 | rinfo = &req->r_reply_info; | 346 | rinfo = &req->r_reply_info; |
341 | err = note_last_dentry(fi, | 347 | err = note_last_dentry(fi, |
@@ -478,7 +484,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) | |||
478 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | 484 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, |
479 | struct dentry *dentry, int err) | 485 | struct dentry *dentry, int err) |
480 | { | 486 | { |
481 | struct ceph_client *client = ceph_client(dentry->d_sb); | 487 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); |
482 | struct inode *parent = dentry->d_parent->d_inode; | 488 | struct inode *parent = dentry->d_parent->d_inode; |
483 | 489 | ||
484 | /* .snap dir? */ | 490 | /* .snap dir? */ |
@@ -568,7 +574,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | |||
568 | !is_root_ceph_dentry(dir, dentry) && | 574 | !is_root_ceph_dentry(dir, dentry) && |
569 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 575 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && |
570 | (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { | 576 | (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { |
571 | di->offset = ci->i_max_offset++; | ||
572 | spin_unlock(&dir->i_lock); | 577 | spin_unlock(&dir->i_lock); |
573 | dout(" dir %p complete, -ENOENT\n", dir); | 578 | dout(" dir %p complete, -ENOENT\n", dir); |
574 | d_add(dentry, NULL); | 579 | d_add(dentry, NULL); |
@@ -888,13 +893,22 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
888 | 893 | ||
889 | /* ensure target dentry is invalidated, despite | 894 | /* ensure target dentry is invalidated, despite |
890 | rehashing bug in vfs_rename_dir */ | 895 | rehashing bug in vfs_rename_dir */ |
891 | new_dentry->d_time = jiffies; | 896 | ceph_invalidate_dentry_lease(new_dentry); |
892 | ceph_dentry(new_dentry)->lease_shared_gen = 0; | ||
893 | } | 897 | } |
894 | ceph_mdsc_put_request(req); | 898 | ceph_mdsc_put_request(req); |
895 | return err; | 899 | return err; |
896 | } | 900 | } |
897 | 901 | ||
902 | /* | ||
903 | * Ensure a dentry lease will no longer revalidate. | ||
904 | */ | ||
905 | void ceph_invalidate_dentry_lease(struct dentry *dentry) | ||
906 | { | ||
907 | spin_lock(&dentry->d_lock); | ||
908 | dentry->d_time = jiffies; | ||
909 | ceph_dentry(dentry)->lease_shared_gen = 0; | ||
910 | spin_unlock(&dentry->d_lock); | ||
911 | } | ||
898 | 912 | ||
899 | /* | 913 | /* |
900 | * Check if dentry lease is valid. If not, delete the lease. Try to | 914 | * Check if dentry lease is valid. If not, delete the lease. Try to |
@@ -972,8 +986,9 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
972 | { | 986 | { |
973 | struct inode *dir = dentry->d_parent->d_inode; | 987 | struct inode *dir = dentry->d_parent->d_inode; |
974 | 988 | ||
975 | dout("d_revalidate %p '%.*s' inode %p\n", dentry, | 989 | dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, |
976 | dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | 990 | dentry->d_name.len, dentry->d_name.name, dentry->d_inode, |
991 | ceph_dentry(dentry)->offset); | ||
977 | 992 | ||
978 | /* always trust cached snapped dentries, snapdir dentry */ | 993 | /* always trust cached snapped dentries, snapdir dentry */ |
979 | if (ceph_snap(dir) != CEPH_NOSNAP) { | 994 | if (ceph_snap(dir) != CEPH_NOSNAP) { |
@@ -1050,7 +1065,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, | |||
1050 | struct ceph_inode_info *ci = ceph_inode(inode); | 1065 | struct ceph_inode_info *ci = ceph_inode(inode); |
1051 | int left; | 1066 | int left; |
1052 | 1067 | ||
1053 | if (!ceph_test_opt(ceph_client(inode->i_sb), DIRSTAT)) | 1068 | if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) |
1054 | return -EISDIR; | 1069 | return -EISDIR; |
1055 | 1070 | ||
1056 | if (!cf->dir_info) { | 1071 | if (!cf->dir_info) { |
@@ -1152,7 +1167,7 @@ void ceph_dentry_lru_add(struct dentry *dn) | |||
1152 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, | 1167 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, |
1153 | dn->d_name.len, dn->d_name.name); | 1168 | dn->d_name.len, dn->d_name.name); |
1154 | if (di) { | 1169 | if (di) { |
1155 | mdsc = &ceph_client(dn->d_sb)->mdsc; | 1170 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; |
1156 | spin_lock(&mdsc->dentry_lru_lock); | 1171 | spin_lock(&mdsc->dentry_lru_lock); |
1157 | list_add_tail(&di->lru, &mdsc->dentry_lru); | 1172 | list_add_tail(&di->lru, &mdsc->dentry_lru); |
1158 | mdsc->num_dentry++; | 1173 | mdsc->num_dentry++; |
@@ -1165,10 +1180,10 @@ void ceph_dentry_lru_touch(struct dentry *dn) | |||
1165 | struct ceph_dentry_info *di = ceph_dentry(dn); | 1180 | struct ceph_dentry_info *di = ceph_dentry(dn); |
1166 | struct ceph_mds_client *mdsc; | 1181 | struct ceph_mds_client *mdsc; |
1167 | 1182 | ||
1168 | dout("dentry_lru_touch %p %p '%.*s'\n", di, dn, | 1183 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, |
1169 | dn->d_name.len, dn->d_name.name); | 1184 | dn->d_name.len, dn->d_name.name, di->offset); |
1170 | if (di) { | 1185 | if (di) { |
1171 | mdsc = &ceph_client(dn->d_sb)->mdsc; | 1186 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; |
1172 | spin_lock(&mdsc->dentry_lru_lock); | 1187 | spin_lock(&mdsc->dentry_lru_lock); |
1173 | list_move_tail(&di->lru, &mdsc->dentry_lru); | 1188 | list_move_tail(&di->lru, &mdsc->dentry_lru); |
1174 | spin_unlock(&mdsc->dentry_lru_lock); | 1189 | spin_unlock(&mdsc->dentry_lru_lock); |
@@ -1183,7 +1198,7 @@ void ceph_dentry_lru_del(struct dentry *dn) | |||
1183 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, | 1198 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, |
1184 | dn->d_name.len, dn->d_name.name); | 1199 | dn->d_name.len, dn->d_name.name); |
1185 | if (di) { | 1200 | if (di) { |
1186 | mdsc = &ceph_client(dn->d_sb)->mdsc; | 1201 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; |
1187 | spin_lock(&mdsc->dentry_lru_lock); | 1202 | spin_lock(&mdsc->dentry_lru_lock); |
1188 | list_del_init(&di->lru); | 1203 | list_del_init(&di->lru); |
1189 | mdsc->num_dentry--; | 1204 | mdsc->num_dentry--; |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 9d67572fb328..17447644d675 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -93,11 +93,11 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
93 | return ERR_PTR(-ESTALE); | 93 | return ERR_PTR(-ESTALE); |
94 | 94 | ||
95 | dentry = d_obtain_alias(inode); | 95 | dentry = d_obtain_alias(inode); |
96 | if (!dentry) { | 96 | if (IS_ERR(dentry)) { |
97 | pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n", | 97 | pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n", |
98 | fh->ino, inode); | 98 | fh->ino, inode); |
99 | iput(inode); | 99 | iput(inode); |
100 | return ERR_PTR(-ENOMEM); | 100 | return dentry; |
101 | } | 101 | } |
102 | err = ceph_init_dentry(dentry); | 102 | err = ceph_init_dentry(dentry); |
103 | 103 | ||
@@ -115,7 +115,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
115 | static struct dentry *__cfh_to_dentry(struct super_block *sb, | 115 | static struct dentry *__cfh_to_dentry(struct super_block *sb, |
116 | struct ceph_nfs_confh *cfh) | 116 | struct ceph_nfs_confh *cfh) |
117 | { | 117 | { |
118 | struct ceph_mds_client *mdsc = &ceph_client(sb)->mdsc; | 118 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc; |
119 | struct inode *inode; | 119 | struct inode *inode; |
120 | struct dentry *dentry; | 120 | struct dentry *dentry; |
121 | struct ceph_vino vino; | 121 | struct ceph_vino vino; |
@@ -149,11 +149,11 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, | |||
149 | } | 149 | } |
150 | 150 | ||
151 | dentry = d_obtain_alias(inode); | 151 | dentry = d_obtain_alias(inode); |
152 | if (!dentry) { | 152 | if (IS_ERR(dentry)) { |
153 | pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n", | 153 | pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n", |
154 | cfh->ino, inode); | 154 | cfh->ino, inode); |
155 | iput(inode); | 155 | iput(inode); |
156 | return ERR_PTR(-ENOMEM); | 156 | return dentry; |
157 | } | 157 | } |
158 | err = ceph_init_dentry(dentry); | 158 | err = ceph_init_dentry(dentry); |
159 | if (err < 0) { | 159 | if (err < 0) { |
@@ -202,11 +202,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, | |||
202 | return ERR_PTR(-ESTALE); | 202 | return ERR_PTR(-ESTALE); |
203 | 203 | ||
204 | dentry = d_obtain_alias(inode); | 204 | dentry = d_obtain_alias(inode); |
205 | if (!dentry) { | 205 | if (IS_ERR(dentry)) { |
206 | pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", | 206 | pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", |
207 | cfh->ino, inode); | 207 | cfh->ino, inode); |
208 | iput(inode); | 208 | iput(inode); |
209 | return ERR_PTR(-ENOMEM); | 209 | return dentry; |
210 | } | 210 | } |
211 | err = ceph_init_dentry(dentry); | 211 | err = ceph_init_dentry(dentry); |
212 | if (err < 0) { | 212 | if (err < 0) { |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ed6f19721d6e..6512b6701b9e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -317,16 +317,16 @@ void ceph_release_page_vector(struct page **pages, int num_pages) | |||
317 | /* | 317 | /* |
318 | * allocate a vector new pages | 318 | * allocate a vector new pages |
319 | */ | 319 | */ |
320 | static struct page **alloc_page_vector(int num_pages) | 320 | struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) |
321 | { | 321 | { |
322 | struct page **pages; | 322 | struct page **pages; |
323 | int i; | 323 | int i; |
324 | 324 | ||
325 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | 325 | pages = kmalloc(sizeof(*pages) * num_pages, flags); |
326 | if (!pages) | 326 | if (!pages) |
327 | return ERR_PTR(-ENOMEM); | 327 | return ERR_PTR(-ENOMEM); |
328 | for (i = 0; i < num_pages; i++) { | 328 | for (i = 0; i < num_pages; i++) { |
329 | pages[i] = alloc_page(GFP_NOFS); | 329 | pages[i] = __page_cache_alloc(flags); |
330 | if (pages[i] == NULL) { | 330 | if (pages[i] == NULL) { |
331 | ceph_release_page_vector(pages, i); | 331 | ceph_release_page_vector(pages, i); |
332 | return ERR_PTR(-ENOMEM); | 332 | return ERR_PTR(-ENOMEM); |
@@ -540,7 +540,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
540 | * in sequence. | 540 | * in sequence. |
541 | */ | 541 | */ |
542 | } else { | 542 | } else { |
543 | pages = alloc_page_vector(num_pages); | 543 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
544 | } | 544 | } |
545 | if (IS_ERR(pages)) | 545 | if (IS_ERR(pages)) |
546 | return PTR_ERR(pages); | 546 | return PTR_ERR(pages); |
@@ -649,8 +649,8 @@ more: | |||
649 | do_sync, | 649 | do_sync, |
650 | ci->i_truncate_seq, ci->i_truncate_size, | 650 | ci->i_truncate_seq, ci->i_truncate_size, |
651 | &mtime, false, 2); | 651 | &mtime, false, 2); |
652 | if (IS_ERR(req)) | 652 | if (!req) |
653 | return PTR_ERR(req); | 653 | return -ENOMEM; |
654 | 654 | ||
655 | num_pages = calc_pages_for(pos, len); | 655 | num_pages = calc_pages_for(pos, len); |
656 | 656 | ||
@@ -668,7 +668,7 @@ more: | |||
668 | truncate_inode_pages_range(inode->i_mapping, pos, | 668 | truncate_inode_pages_range(inode->i_mapping, pos, |
669 | (pos+len) | (PAGE_CACHE_SIZE-1)); | 669 | (pos+len) | (PAGE_CACHE_SIZE-1)); |
670 | } else { | 670 | } else { |
671 | pages = alloc_page_vector(num_pages); | 671 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
672 | if (IS_ERR(pages)) { | 672 | if (IS_ERR(pages)) { |
673 | ret = PTR_ERR(pages); | 673 | ret = PTR_ERR(pages); |
674 | goto out; | 674 | goto out; |
@@ -809,7 +809,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
809 | struct file *file = iocb->ki_filp; | 809 | struct file *file = iocb->ki_filp; |
810 | struct inode *inode = file->f_dentry->d_inode; | 810 | struct inode *inode = file->f_dentry->d_inode; |
811 | struct ceph_inode_info *ci = ceph_inode(inode); | 811 | struct ceph_inode_info *ci = ceph_inode(inode); |
812 | struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc; | 812 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; |
813 | loff_t endoff = pos + iov->iov_len; | 813 | loff_t endoff = pos + iov->iov_len; |
814 | int got = 0; | 814 | int got = 0; |
815 | int ret, err; | 815 | int ret, err; |
@@ -844,8 +844,7 @@ retry_snap: | |||
844 | if ((ret >= 0 || ret == -EIOCBQUEUED) && | 844 | if ((ret >= 0 || ret == -EIOCBQUEUED) && |
845 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) | 845 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) |
846 | || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { | 846 | || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { |
847 | err = vfs_fsync_range(file, file->f_path.dentry, | 847 | err = vfs_fsync_range(file, pos, pos + ret - 1, 1); |
848 | pos, pos + ret - 1, 1); | ||
849 | if (err < 0) | 848 | if (err < 0) |
850 | ret = err; | 849 | ret = err; |
851 | } | 850 | } |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 85b4d2ffdeba..a81b8b662c7b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -384,7 +384,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
384 | */ | 384 | */ |
385 | if (ci->i_snap_realm) { | 385 | if (ci->i_snap_realm) { |
386 | struct ceph_mds_client *mdsc = | 386 | struct ceph_mds_client *mdsc = |
387 | &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 387 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
388 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 388 | struct ceph_snap_realm *realm = ci->i_snap_realm; |
389 | 389 | ||
390 | dout(" dropping residual ref to snap realm %p\n", realm); | 390 | dout(" dropping residual ref to snap realm %p\n", realm); |
@@ -619,11 +619,12 @@ static int fill_inode(struct inode *inode, | |||
619 | memcpy(ci->i_xattrs.blob->vec.iov_base, | 619 | memcpy(ci->i_xattrs.blob->vec.iov_base, |
620 | iinfo->xattr_data, iinfo->xattr_len); | 620 | iinfo->xattr_data, iinfo->xattr_len); |
621 | ci->i_xattrs.version = le64_to_cpu(info->xattr_version); | 621 | ci->i_xattrs.version = le64_to_cpu(info->xattr_version); |
622 | xattr_blob = NULL; | ||
622 | } | 623 | } |
623 | 624 | ||
624 | inode->i_mapping->a_ops = &ceph_aops; | 625 | inode->i_mapping->a_ops = &ceph_aops; |
625 | inode->i_mapping->backing_dev_info = | 626 | inode->i_mapping->backing_dev_info = |
626 | &ceph_client(inode->i_sb)->backing_dev_info; | 627 | &ceph_sb_to_client(inode->i_sb)->backing_dev_info; |
627 | 628 | ||
628 | switch (inode->i_mode & S_IFMT) { | 629 | switch (inode->i_mode & S_IFMT) { |
629 | case S_IFIFO: | 630 | case S_IFIFO: |
@@ -674,14 +675,15 @@ static int fill_inode(struct inode *inode, | |||
674 | /* set dir completion flag? */ | 675 | /* set dir completion flag? */ |
675 | if (ci->i_files == 0 && ci->i_subdirs == 0 && | 676 | if (ci->i_files == 0 && ci->i_subdirs == 0 && |
676 | ceph_snap(inode) == CEPH_NOSNAP && | 677 | ceph_snap(inode) == CEPH_NOSNAP && |
677 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED)) { | 678 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && |
679 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | ||
678 | dout(" marking %p complete (empty)\n", inode); | 680 | dout(" marking %p complete (empty)\n", inode); |
679 | ci->i_ceph_flags |= CEPH_I_COMPLETE; | 681 | ci->i_ceph_flags |= CEPH_I_COMPLETE; |
680 | ci->i_max_offset = 2; | 682 | ci->i_max_offset = 2; |
681 | } | 683 | } |
682 | 684 | ||
683 | /* it may be better to set st_size in getattr instead? */ | 685 | /* it may be better to set st_size in getattr instead? */ |
684 | if (ceph_test_opt(ceph_client(inode->i_sb), RBYTES)) | 686 | if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) |
685 | inode->i_size = ci->i_rbytes; | 687 | inode->i_size = ci->i_rbytes; |
686 | break; | 688 | break; |
687 | default: | 689 | default: |
@@ -802,6 +804,37 @@ out_unlock: | |||
802 | } | 804 | } |
803 | 805 | ||
804 | /* | 806 | /* |
807 | * Set dentry's directory position based on the current dir's max, and | ||
808 | * order it in d_subdirs, so that dcache_readdir behaves. | ||
809 | */ | ||
810 | static void ceph_set_dentry_offset(struct dentry *dn) | ||
811 | { | ||
812 | struct dentry *dir = dn->d_parent; | ||
813 | struct inode *inode = dn->d_parent->d_inode; | ||
814 | struct ceph_dentry_info *di; | ||
815 | |||
816 | BUG_ON(!inode); | ||
817 | |||
818 | di = ceph_dentry(dn); | ||
819 | |||
820 | spin_lock(&inode->i_lock); | ||
821 | if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | ||
822 | spin_unlock(&inode->i_lock); | ||
823 | return; | ||
824 | } | ||
825 | di->offset = ceph_inode(inode)->i_max_offset++; | ||
826 | spin_unlock(&inode->i_lock); | ||
827 | |||
828 | spin_lock(&dcache_lock); | ||
829 | spin_lock(&dn->d_lock); | ||
830 | list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); | ||
831 | dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, | ||
832 | dn->d_u.d_child.prev, dn->d_u.d_child.next); | ||
833 | spin_unlock(&dn->d_lock); | ||
834 | spin_unlock(&dcache_lock); | ||
835 | } | ||
836 | |||
837 | /* | ||
805 | * splice a dentry to an inode. | 838 | * splice a dentry to an inode. |
806 | * caller must hold directory i_mutex for this to be safe. | 839 | * caller must hold directory i_mutex for this to be safe. |
807 | * | 840 | * |
@@ -814,6 +847,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
814 | { | 847 | { |
815 | struct dentry *realdn; | 848 | struct dentry *realdn; |
816 | 849 | ||
850 | BUG_ON(dn->d_inode); | ||
851 | |||
817 | /* dn must be unhashed */ | 852 | /* dn must be unhashed */ |
818 | if (!d_unhashed(dn)) | 853 | if (!d_unhashed(dn)) |
819 | d_drop(dn); | 854 | d_drop(dn); |
@@ -835,44 +870,17 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
835 | dn = realdn; | 870 | dn = realdn; |
836 | } else { | 871 | } else { |
837 | BUG_ON(!ceph_dentry(dn)); | 872 | BUG_ON(!ceph_dentry(dn)); |
838 | |||
839 | dout("dn %p attached to %p ino %llx.%llx\n", | 873 | dout("dn %p attached to %p ino %llx.%llx\n", |
840 | dn, dn->d_inode, ceph_vinop(dn->d_inode)); | 874 | dn, dn->d_inode, ceph_vinop(dn->d_inode)); |
841 | } | 875 | } |
842 | if ((!prehash || *prehash) && d_unhashed(dn)) | 876 | if ((!prehash || *prehash) && d_unhashed(dn)) |
843 | d_rehash(dn); | 877 | d_rehash(dn); |
878 | ceph_set_dentry_offset(dn); | ||
844 | out: | 879 | out: |
845 | return dn; | 880 | return dn; |
846 | } | 881 | } |
847 | 882 | ||
848 | /* | 883 | /* |
849 | * Set dentry's directory position based on the current dir's max, and | ||
850 | * order it in d_subdirs, so that dcache_readdir behaves. | ||
851 | */ | ||
852 | static void ceph_set_dentry_offset(struct dentry *dn) | ||
853 | { | ||
854 | struct dentry *dir = dn->d_parent; | ||
855 | struct inode *inode = dn->d_parent->d_inode; | ||
856 | struct ceph_dentry_info *di; | ||
857 | |||
858 | BUG_ON(!inode); | ||
859 | |||
860 | di = ceph_dentry(dn); | ||
861 | |||
862 | spin_lock(&inode->i_lock); | ||
863 | di->offset = ceph_inode(inode)->i_max_offset++; | ||
864 | spin_unlock(&inode->i_lock); | ||
865 | |||
866 | spin_lock(&dcache_lock); | ||
867 | spin_lock(&dn->d_lock); | ||
868 | list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); | ||
869 | dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, | ||
870 | dn->d_u.d_child.prev, dn->d_u.d_child.next); | ||
871 | spin_unlock(&dn->d_lock); | ||
872 | spin_unlock(&dcache_lock); | ||
873 | } | ||
874 | |||
875 | /* | ||
876 | * Incorporate results into the local cache. This is either just | 884 | * Incorporate results into the local cache. This is either just |
877 | * one inode, or a directory, dentry, and possibly linked-to inode (e.g., | 885 | * one inode, or a directory, dentry, and possibly linked-to inode (e.g., |
878 | * after a lookup). | 886 | * after a lookup). |
@@ -933,14 +941,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
933 | 941 | ||
934 | if (!rinfo->head->is_target && !rinfo->head->is_dentry) { | 942 | if (!rinfo->head->is_target && !rinfo->head->is_dentry) { |
935 | dout("fill_trace reply is empty!\n"); | 943 | dout("fill_trace reply is empty!\n"); |
936 | if (rinfo->head->result == 0 && req->r_locked_dir) { | 944 | if (rinfo->head->result == 0 && req->r_locked_dir) |
937 | struct ceph_inode_info *ci = | 945 | ceph_invalidate_dir_request(req); |
938 | ceph_inode(req->r_locked_dir); | ||
939 | dout(" clearing %p complete (empty trace)\n", | ||
940 | req->r_locked_dir); | ||
941 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
942 | ci->i_release_count++; | ||
943 | } | ||
944 | return 0; | 946 | return 0; |
945 | } | 947 | } |
946 | 948 | ||
@@ -1011,13 +1013,18 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1011 | req->r_old_dentry->d_name.len, | 1013 | req->r_old_dentry->d_name.len, |
1012 | req->r_old_dentry->d_name.name, | 1014 | req->r_old_dentry->d_name.name, |
1013 | dn, dn->d_name.len, dn->d_name.name); | 1015 | dn, dn->d_name.len, dn->d_name.name); |
1016 | |||
1014 | /* ensure target dentry is invalidated, despite | 1017 | /* ensure target dentry is invalidated, despite |
1015 | rehashing bug in vfs_rename_dir */ | 1018 | rehashing bug in vfs_rename_dir */ |
1016 | dn->d_time = jiffies; | 1019 | ceph_invalidate_dentry_lease(dn); |
1017 | ceph_dentry(dn)->lease_shared_gen = 0; | 1020 | |
1018 | /* take overwritten dentry's readdir offset */ | 1021 | /* take overwritten dentry's readdir offset */ |
1022 | dout("dn %p gets %p offset %lld (old offset %lld)\n", | ||
1023 | req->r_old_dentry, dn, ceph_dentry(dn)->offset, | ||
1024 | ceph_dentry(req->r_old_dentry)->offset); | ||
1019 | ceph_dentry(req->r_old_dentry)->offset = | 1025 | ceph_dentry(req->r_old_dentry)->offset = |
1020 | ceph_dentry(dn)->offset; | 1026 | ceph_dentry(dn)->offset; |
1027 | |||
1021 | dn = req->r_old_dentry; /* use old_dentry */ | 1028 | dn = req->r_old_dentry; /* use old_dentry */ |
1022 | in = dn->d_inode; | 1029 | in = dn->d_inode; |
1023 | } | 1030 | } |
@@ -1059,7 +1066,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1059 | goto done; | 1066 | goto done; |
1060 | } | 1067 | } |
1061 | req->r_dentry = dn; /* may have spliced */ | 1068 | req->r_dentry = dn; /* may have spliced */ |
1062 | ceph_set_dentry_offset(dn); | ||
1063 | igrab(in); | 1069 | igrab(in); |
1064 | } else if (ceph_ino(in) == vino.ino && | 1070 | } else if (ceph_ino(in) == vino.ino && |
1065 | ceph_snap(in) == vino.snap) { | 1071 | ceph_snap(in) == vino.snap) { |
@@ -1102,7 +1108,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1102 | err = PTR_ERR(dn); | 1108 | err = PTR_ERR(dn); |
1103 | goto done; | 1109 | goto done; |
1104 | } | 1110 | } |
1105 | ceph_set_dentry_offset(dn); | ||
1106 | req->r_dentry = dn; /* may have spliced */ | 1111 | req->r_dentry = dn; /* may have spliced */ |
1107 | igrab(in); | 1112 | igrab(in); |
1108 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ | 1113 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ |
@@ -1429,7 +1434,7 @@ void ceph_queue_vmtruncate(struct inode *inode) | |||
1429 | { | 1434 | { |
1430 | struct ceph_inode_info *ci = ceph_inode(inode); | 1435 | struct ceph_inode_info *ci = ceph_inode(inode); |
1431 | 1436 | ||
1432 | if (queue_work(ceph_client(inode->i_sb)->trunc_wq, | 1437 | if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, |
1433 | &ci->i_vmtruncate_work)) { | 1438 | &ci->i_vmtruncate_work)) { |
1434 | dout("ceph_queue_vmtruncate %p\n", inode); | 1439 | dout("ceph_queue_vmtruncate %p\n", inode); |
1435 | igrab(inode); | 1440 | igrab(inode); |
@@ -1518,7 +1523,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1518 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1523 | struct inode *parent_inode = dentry->d_parent->d_inode; |
1519 | const unsigned int ia_valid = attr->ia_valid; | 1524 | const unsigned int ia_valid = attr->ia_valid; |
1520 | struct ceph_mds_request *req; | 1525 | struct ceph_mds_request *req; |
1521 | struct ceph_mds_client *mdsc = &ceph_client(dentry->d_sb)->mdsc; | 1526 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc; |
1522 | int issued; | 1527 | int issued; |
1523 | int release = 0, dirtied = 0; | 1528 | int release = 0, dirtied = 0; |
1524 | int mask = 0; | 1529 | int mask = 0; |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 8a5bcae62846..d085f07756b4 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -98,7 +98,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
98 | struct ceph_ioctl_dataloc dl; | 98 | struct ceph_ioctl_dataloc dl; |
99 | struct inode *inode = file->f_dentry->d_inode; | 99 | struct inode *inode = file->f_dentry->d_inode; |
100 | struct ceph_inode_info *ci = ceph_inode(inode); | 100 | struct ceph_inode_info *ci = ceph_inode(inode); |
101 | struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc; | 101 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; |
102 | u64 len = 1, olen; | 102 | u64 len = 1, olen; |
103 | u64 tmp; | 103 | u64 tmp; |
104 | struct ceph_object_layout ol; | 104 | struct ceph_object_layout ol; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 24561a557e01..885aa5710cfd 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -40,7 +40,7 @@ | |||
40 | static void __wake_requests(struct ceph_mds_client *mdsc, | 40 | static void __wake_requests(struct ceph_mds_client *mdsc, |
41 | struct list_head *head); | 41 | struct list_head *head); |
42 | 42 | ||
43 | const static struct ceph_connection_operations mds_con_ops; | 43 | static const struct ceph_connection_operations mds_con_ops; |
44 | 44 | ||
45 | 45 | ||
46 | /* | 46 | /* |
@@ -665,10 +665,10 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) | |||
665 | struct ceph_msg *msg; | 665 | struct ceph_msg *msg; |
666 | struct ceph_mds_session_head *h; | 666 | struct ceph_mds_session_head *h; |
667 | 667 | ||
668 | msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), 0, 0, NULL); | 668 | msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS); |
669 | if (IS_ERR(msg)) { | 669 | if (!msg) { |
670 | pr_err("create_session_msg ENOMEM creating msg\n"); | 670 | pr_err("create_session_msg ENOMEM creating msg\n"); |
671 | return ERR_PTR(PTR_ERR(msg)); | 671 | return NULL; |
672 | } | 672 | } |
673 | h = msg->front.iov_base; | 673 | h = msg->front.iov_base; |
674 | h->op = cpu_to_le32(op); | 674 | h->op = cpu_to_le32(op); |
@@ -687,7 +687,6 @@ static int __open_session(struct ceph_mds_client *mdsc, | |||
687 | struct ceph_msg *msg; | 687 | struct ceph_msg *msg; |
688 | int mstate; | 688 | int mstate; |
689 | int mds = session->s_mds; | 689 | int mds = session->s_mds; |
690 | int err = 0; | ||
691 | 690 | ||
692 | /* wait for mds to go active? */ | 691 | /* wait for mds to go active? */ |
693 | mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); | 692 | mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); |
@@ -698,13 +697,9 @@ static int __open_session(struct ceph_mds_client *mdsc, | |||
698 | 697 | ||
699 | /* send connect message */ | 698 | /* send connect message */ |
700 | msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq); | 699 | msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq); |
701 | if (IS_ERR(msg)) { | 700 | if (!msg) |
702 | err = PTR_ERR(msg); | 701 | return -ENOMEM; |
703 | goto out; | ||
704 | } | ||
705 | ceph_con_send(&session->s_con, msg); | 702 | ceph_con_send(&session->s_con, msg); |
706 | |||
707 | out: | ||
708 | return 0; | 703 | return 0; |
709 | } | 704 | } |
710 | 705 | ||
@@ -804,12 +799,49 @@ out: | |||
804 | } | 799 | } |
805 | 800 | ||
806 | static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | 801 | static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, |
807 | void *arg) | 802 | void *arg) |
808 | { | 803 | { |
809 | struct ceph_inode_info *ci = ceph_inode(inode); | 804 | struct ceph_inode_info *ci = ceph_inode(inode); |
805 | int drop = 0; | ||
806 | |||
810 | dout("removing cap %p, ci is %p, inode is %p\n", | 807 | dout("removing cap %p, ci is %p, inode is %p\n", |
811 | cap, ci, &ci->vfs_inode); | 808 | cap, ci, &ci->vfs_inode); |
812 | ceph_remove_cap(cap); | 809 | spin_lock(&inode->i_lock); |
810 | __ceph_remove_cap(cap); | ||
811 | if (!__ceph_is_any_real_caps(ci)) { | ||
812 | struct ceph_mds_client *mdsc = | ||
813 | &ceph_sb_to_client(inode->i_sb)->mdsc; | ||
814 | |||
815 | spin_lock(&mdsc->cap_dirty_lock); | ||
816 | if (!list_empty(&ci->i_dirty_item)) { | ||
817 | pr_info(" dropping dirty %s state for %p %lld\n", | ||
818 | ceph_cap_string(ci->i_dirty_caps), | ||
819 | inode, ceph_ino(inode)); | ||
820 | ci->i_dirty_caps = 0; | ||
821 | list_del_init(&ci->i_dirty_item); | ||
822 | drop = 1; | ||
823 | } | ||
824 | if (!list_empty(&ci->i_flushing_item)) { | ||
825 | pr_info(" dropping dirty+flushing %s state for %p %lld\n", | ||
826 | ceph_cap_string(ci->i_flushing_caps), | ||
827 | inode, ceph_ino(inode)); | ||
828 | ci->i_flushing_caps = 0; | ||
829 | list_del_init(&ci->i_flushing_item); | ||
830 | mdsc->num_cap_flushing--; | ||
831 | drop = 1; | ||
832 | } | ||
833 | if (drop && ci->i_wrbuffer_ref) { | ||
834 | pr_info(" dropping dirty data for %p %lld\n", | ||
835 | inode, ceph_ino(inode)); | ||
836 | ci->i_wrbuffer_ref = 0; | ||
837 | ci->i_wrbuffer_ref_head = 0; | ||
838 | drop++; | ||
839 | } | ||
840 | spin_unlock(&mdsc->cap_dirty_lock); | ||
841 | } | ||
842 | spin_unlock(&inode->i_lock); | ||
843 | while (drop--) | ||
844 | iput(inode); | ||
813 | return 0; | 845 | return 0; |
814 | } | 846 | } |
815 | 847 | ||
@@ -821,6 +853,7 @@ static void remove_session_caps(struct ceph_mds_session *session) | |||
821 | dout("remove_session_caps on %p\n", session); | 853 | dout("remove_session_caps on %p\n", session); |
822 | iterate_session_caps(session, remove_session_caps_cb, NULL); | 854 | iterate_session_caps(session, remove_session_caps_cb, NULL); |
823 | BUG_ON(session->s_nr_caps > 0); | 855 | BUG_ON(session->s_nr_caps > 0); |
856 | BUG_ON(!list_empty(&session->s_cap_flushing)); | ||
824 | cleanup_cap_releases(session); | 857 | cleanup_cap_releases(session); |
825 | } | 858 | } |
826 | 859 | ||
@@ -883,8 +916,8 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, | |||
883 | ceph_mds_state_name(state)); | 916 | ceph_mds_state_name(state)); |
884 | msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, | 917 | msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, |
885 | ++session->s_renew_seq); | 918 | ++session->s_renew_seq); |
886 | if (IS_ERR(msg)) | 919 | if (!msg) |
887 | return PTR_ERR(msg); | 920 | return -ENOMEM; |
888 | ceph_con_send(&session->s_con, msg); | 921 | ceph_con_send(&session->s_con, msg); |
889 | return 0; | 922 | return 0; |
890 | } | 923 | } |
@@ -931,17 +964,15 @@ static int request_close_session(struct ceph_mds_client *mdsc, | |||
931 | struct ceph_mds_session *session) | 964 | struct ceph_mds_session *session) |
932 | { | 965 | { |
933 | struct ceph_msg *msg; | 966 | struct ceph_msg *msg; |
934 | int err = 0; | ||
935 | 967 | ||
936 | dout("request_close_session mds%d state %s seq %lld\n", | 968 | dout("request_close_session mds%d state %s seq %lld\n", |
937 | session->s_mds, session_state_name(session->s_state), | 969 | session->s_mds, session_state_name(session->s_state), |
938 | session->s_seq); | 970 | session->s_seq); |
939 | msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); | 971 | msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); |
940 | if (IS_ERR(msg)) | 972 | if (!msg) |
941 | err = PTR_ERR(msg); | 973 | return -ENOMEM; |
942 | else | 974 | ceph_con_send(&session->s_con, msg); |
943 | ceph_con_send(&session->s_con, msg); | 975 | return 0; |
944 | return err; | ||
945 | } | 976 | } |
946 | 977 | ||
947 | /* | 978 | /* |
@@ -1059,7 +1090,7 @@ static int add_cap_releases(struct ceph_mds_client *mdsc, | |||
1059 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { | 1090 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { |
1060 | spin_unlock(&session->s_cap_lock); | 1091 | spin_unlock(&session->s_cap_lock); |
1061 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, | 1092 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, |
1062 | 0, 0, NULL); | 1093 | GFP_NOFS); |
1063 | if (!msg) | 1094 | if (!msg) |
1064 | goto out_unlocked; | 1095 | goto out_unlocked; |
1065 | dout("add_cap_releases %p msg %p now %d\n", session, msg, | 1096 | dout("add_cap_releases %p msg %p now %d\n", session, msg, |
@@ -1151,10 +1182,8 @@ static void send_cap_releases(struct ceph_mds_client *mdsc, | |||
1151 | struct ceph_msg *msg; | 1182 | struct ceph_msg *msg; |
1152 | 1183 | ||
1153 | dout("send_cap_releases mds%d\n", session->s_mds); | 1184 | dout("send_cap_releases mds%d\n", session->s_mds); |
1154 | while (1) { | 1185 | spin_lock(&session->s_cap_lock); |
1155 | spin_lock(&session->s_cap_lock); | 1186 | while (!list_empty(&session->s_cap_releases_done)) { |
1156 | if (list_empty(&session->s_cap_releases_done)) | ||
1157 | break; | ||
1158 | msg = list_first_entry(&session->s_cap_releases_done, | 1187 | msg = list_first_entry(&session->s_cap_releases_done, |
1159 | struct ceph_msg, list_head); | 1188 | struct ceph_msg, list_head); |
1160 | list_del_init(&msg->list_head); | 1189 | list_del_init(&msg->list_head); |
@@ -1162,10 +1191,49 @@ static void send_cap_releases(struct ceph_mds_client *mdsc, | |||
1162 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 1191 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
1163 | dout("send_cap_releases mds%d %p\n", session->s_mds, msg); | 1192 | dout("send_cap_releases mds%d %p\n", session->s_mds, msg); |
1164 | ceph_con_send(&session->s_con, msg); | 1193 | ceph_con_send(&session->s_con, msg); |
1194 | spin_lock(&session->s_cap_lock); | ||
1165 | } | 1195 | } |
1166 | spin_unlock(&session->s_cap_lock); | 1196 | spin_unlock(&session->s_cap_lock); |
1167 | } | 1197 | } |
1168 | 1198 | ||
1199 | static void discard_cap_releases(struct ceph_mds_client *mdsc, | ||
1200 | struct ceph_mds_session *session) | ||
1201 | { | ||
1202 | struct ceph_msg *msg; | ||
1203 | struct ceph_mds_cap_release *head; | ||
1204 | unsigned num; | ||
1205 | |||
1206 | dout("discard_cap_releases mds%d\n", session->s_mds); | ||
1207 | spin_lock(&session->s_cap_lock); | ||
1208 | |||
1209 | /* zero out the in-progress message */ | ||
1210 | msg = list_first_entry(&session->s_cap_releases, | ||
1211 | struct ceph_msg, list_head); | ||
1212 | head = msg->front.iov_base; | ||
1213 | num = le32_to_cpu(head->num); | ||
1214 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); | ||
1215 | head->num = cpu_to_le32(0); | ||
1216 | session->s_num_cap_releases += num; | ||
1217 | |||
1218 | /* requeue completed messages */ | ||
1219 | while (!list_empty(&session->s_cap_releases_done)) { | ||
1220 | msg = list_first_entry(&session->s_cap_releases_done, | ||
1221 | struct ceph_msg, list_head); | ||
1222 | list_del_init(&msg->list_head); | ||
1223 | |||
1224 | head = msg->front.iov_base; | ||
1225 | num = le32_to_cpu(head->num); | ||
1226 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, | ||
1227 | num); | ||
1228 | session->s_num_cap_releases += num; | ||
1229 | head->num = cpu_to_le32(0); | ||
1230 | msg->front.iov_len = sizeof(*head); | ||
1231 | list_add(&msg->list_head, &session->s_cap_releases); | ||
1232 | } | ||
1233 | |||
1234 | spin_unlock(&session->s_cap_lock); | ||
1235 | } | ||
1236 | |||
1169 | /* | 1237 | /* |
1170 | * requests | 1238 | * requests |
1171 | */ | 1239 | */ |
@@ -1181,6 +1249,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) | |||
1181 | if (!req) | 1249 | if (!req) |
1182 | return ERR_PTR(-ENOMEM); | 1250 | return ERR_PTR(-ENOMEM); |
1183 | 1251 | ||
1252 | mutex_init(&req->r_fill_mutex); | ||
1184 | req->r_started = jiffies; | 1253 | req->r_started = jiffies; |
1185 | req->r_resend_mds = -1; | 1254 | req->r_resend_mds = -1; |
1186 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); | 1255 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); |
@@ -1251,7 +1320,7 @@ retry: | |||
1251 | len += 1 + temp->d_name.len; | 1320 | len += 1 + temp->d_name.len; |
1252 | temp = temp->d_parent; | 1321 | temp = temp->d_parent; |
1253 | if (temp == NULL) { | 1322 | if (temp == NULL) { |
1254 | pr_err("build_path_dentry corrupt dentry %p\n", dentry); | 1323 | pr_err("build_path corrupt dentry %p\n", dentry); |
1255 | return ERR_PTR(-EINVAL); | 1324 | return ERR_PTR(-EINVAL); |
1256 | } | 1325 | } |
1257 | } | 1326 | } |
@@ -1267,7 +1336,7 @@ retry: | |||
1267 | struct inode *inode = temp->d_inode; | 1336 | struct inode *inode = temp->d_inode; |
1268 | 1337 | ||
1269 | if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { | 1338 | if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { |
1270 | dout("build_path_dentry path+%d: %p SNAPDIR\n", | 1339 | dout("build_path path+%d: %p SNAPDIR\n", |
1271 | pos, temp); | 1340 | pos, temp); |
1272 | } else if (stop_on_nosnap && inode && | 1341 | } else if (stop_on_nosnap && inode && |
1273 | ceph_snap(inode) == CEPH_NOSNAP) { | 1342 | ceph_snap(inode) == CEPH_NOSNAP) { |
@@ -1278,20 +1347,18 @@ retry: | |||
1278 | break; | 1347 | break; |
1279 | strncpy(path + pos, temp->d_name.name, | 1348 | strncpy(path + pos, temp->d_name.name, |
1280 | temp->d_name.len); | 1349 | temp->d_name.len); |
1281 | dout("build_path_dentry path+%d: %p '%.*s'\n", | ||
1282 | pos, temp, temp->d_name.len, path + pos); | ||
1283 | } | 1350 | } |
1284 | if (pos) | 1351 | if (pos) |
1285 | path[--pos] = '/'; | 1352 | path[--pos] = '/'; |
1286 | temp = temp->d_parent; | 1353 | temp = temp->d_parent; |
1287 | if (temp == NULL) { | 1354 | if (temp == NULL) { |
1288 | pr_err("build_path_dentry corrupt dentry\n"); | 1355 | pr_err("build_path corrupt dentry\n"); |
1289 | kfree(path); | 1356 | kfree(path); |
1290 | return ERR_PTR(-EINVAL); | 1357 | return ERR_PTR(-EINVAL); |
1291 | } | 1358 | } |
1292 | } | 1359 | } |
1293 | if (pos != 0) { | 1360 | if (pos != 0) { |
1294 | pr_err("build_path_dentry did not end path lookup where " | 1361 | pr_err("build_path did not end path lookup where " |
1295 | "expected, namelen is %d, pos is %d\n", len, pos); | 1362 | "expected, namelen is %d, pos is %d\n", len, pos); |
1296 | /* presumably this is only possible if racing with a | 1363 | /* presumably this is only possible if racing with a |
1297 | rename of one of the parent directories (we can not | 1364 | rename of one of the parent directories (we can not |
@@ -1303,7 +1370,7 @@ retry: | |||
1303 | 1370 | ||
1304 | *base = ceph_ino(temp->d_inode); | 1371 | *base = ceph_ino(temp->d_inode); |
1305 | *plen = len; | 1372 | *plen = len; |
1306 | dout("build_path_dentry on %p %d built %llx '%.*s'\n", | 1373 | dout("build_path on %p %d built %llx '%.*s'\n", |
1307 | dentry, atomic_read(&dentry->d_count), *base, len, path); | 1374 | dentry, atomic_read(&dentry->d_count), *base, len, path); |
1308 | return path; | 1375 | return path; |
1309 | } | 1376 | } |
@@ -1426,9 +1493,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1426 | if (req->r_old_dentry_drop) | 1493 | if (req->r_old_dentry_drop) |
1427 | len += req->r_old_dentry->d_name.len; | 1494 | len += req->r_old_dentry->d_name.len; |
1428 | 1495 | ||
1429 | msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, 0, 0, NULL); | 1496 | msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS); |
1430 | if (IS_ERR(msg)) | 1497 | if (!msg) { |
1498 | msg = ERR_PTR(-ENOMEM); | ||
1431 | goto out_free2; | 1499 | goto out_free2; |
1500 | } | ||
1432 | 1501 | ||
1433 | msg->hdr.tid = cpu_to_le64(req->r_tid); | 1502 | msg->hdr.tid = cpu_to_le64(req->r_tid); |
1434 | 1503 | ||
@@ -1517,9 +1586,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
1517 | } | 1586 | } |
1518 | msg = create_request_message(mdsc, req, mds); | 1587 | msg = create_request_message(mdsc, req, mds); |
1519 | if (IS_ERR(msg)) { | 1588 | if (IS_ERR(msg)) { |
1520 | req->r_reply = ERR_PTR(PTR_ERR(msg)); | 1589 | req->r_err = PTR_ERR(msg); |
1521 | complete_request(mdsc, req); | 1590 | complete_request(mdsc, req); |
1522 | return -PTR_ERR(msg); | 1591 | return PTR_ERR(msg); |
1523 | } | 1592 | } |
1524 | req->r_request = msg; | 1593 | req->r_request = msg; |
1525 | 1594 | ||
@@ -1552,7 +1621,7 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
1552 | int mds = -1; | 1621 | int mds = -1; |
1553 | int err = -EAGAIN; | 1622 | int err = -EAGAIN; |
1554 | 1623 | ||
1555 | if (req->r_reply) | 1624 | if (req->r_err || req->r_got_result) |
1556 | goto out; | 1625 | goto out; |
1557 | 1626 | ||
1558 | if (req->r_timeout && | 1627 | if (req->r_timeout && |
@@ -1609,7 +1678,7 @@ out: | |||
1609 | return err; | 1678 | return err; |
1610 | 1679 | ||
1611 | finish: | 1680 | finish: |
1612 | req->r_reply = ERR_PTR(err); | 1681 | req->r_err = err; |
1613 | complete_request(mdsc, req); | 1682 | complete_request(mdsc, req); |
1614 | goto out; | 1683 | goto out; |
1615 | } | 1684 | } |
@@ -1630,10 +1699,9 @@ static void __wake_requests(struct ceph_mds_client *mdsc, | |||
1630 | 1699 | ||
1631 | /* | 1700 | /* |
1632 | * Wake up threads with requests pending for @mds, so that they can | 1701 | * Wake up threads with requests pending for @mds, so that they can |
1633 | * resubmit their requests to a possibly different mds. If @all is set, | 1702 | * resubmit their requests to a possibly different mds. |
1634 | * wake up if their requests has been forwarded to @mds, too. | ||
1635 | */ | 1703 | */ |
1636 | static void kick_requests(struct ceph_mds_client *mdsc, int mds, int all) | 1704 | static void kick_requests(struct ceph_mds_client *mdsc, int mds) |
1637 | { | 1705 | { |
1638 | struct ceph_mds_request *req; | 1706 | struct ceph_mds_request *req; |
1639 | struct rb_node *p; | 1707 | struct rb_node *p; |
@@ -1689,64 +1757,78 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | |||
1689 | __register_request(mdsc, req, dir); | 1757 | __register_request(mdsc, req, dir); |
1690 | __do_request(mdsc, req); | 1758 | __do_request(mdsc, req); |
1691 | 1759 | ||
1692 | /* wait */ | 1760 | if (req->r_err) { |
1693 | if (!req->r_reply) { | 1761 | err = req->r_err; |
1694 | mutex_unlock(&mdsc->mutex); | 1762 | __unregister_request(mdsc, req); |
1695 | if (req->r_timeout) { | 1763 | dout("do_request early error %d\n", err); |
1696 | err = (long)wait_for_completion_interruptible_timeout( | 1764 | goto out; |
1697 | &req->r_completion, req->r_timeout); | ||
1698 | if (err == 0) | ||
1699 | req->r_reply = ERR_PTR(-EIO); | ||
1700 | else if (err < 0) | ||
1701 | req->r_reply = ERR_PTR(err); | ||
1702 | } else { | ||
1703 | err = wait_for_completion_interruptible( | ||
1704 | &req->r_completion); | ||
1705 | if (err) | ||
1706 | req->r_reply = ERR_PTR(err); | ||
1707 | } | ||
1708 | mutex_lock(&mdsc->mutex); | ||
1709 | } | 1765 | } |
1710 | 1766 | ||
1711 | if (IS_ERR(req->r_reply)) { | 1767 | /* wait */ |
1712 | err = PTR_ERR(req->r_reply); | 1768 | mutex_unlock(&mdsc->mutex); |
1713 | req->r_reply = NULL; | 1769 | dout("do_request waiting\n"); |
1770 | if (req->r_timeout) { | ||
1771 | err = (long)wait_for_completion_interruptible_timeout( | ||
1772 | &req->r_completion, req->r_timeout); | ||
1773 | if (err == 0) | ||
1774 | err = -EIO; | ||
1775 | } else { | ||
1776 | err = wait_for_completion_interruptible(&req->r_completion); | ||
1777 | } | ||
1778 | dout("do_request waited, got %d\n", err); | ||
1779 | mutex_lock(&mdsc->mutex); | ||
1714 | 1780 | ||
1715 | if (err == -ERESTARTSYS) { | 1781 | /* only abort if we didn't race with a real reply */ |
1716 | /* aborted */ | 1782 | if (req->r_got_result) { |
1717 | req->r_aborted = true; | 1783 | err = le32_to_cpu(req->r_reply_info.head->result); |
1784 | } else if (err < 0) { | ||
1785 | dout("aborted request %lld with %d\n", req->r_tid, err); | ||
1718 | 1786 | ||
1719 | if (req->r_locked_dir && | 1787 | /* |
1720 | (req->r_op & CEPH_MDS_OP_WRITE)) { | 1788 | * ensure we aren't running concurrently with |
1721 | struct ceph_inode_info *ci = | 1789 | * ceph_fill_trace or ceph_readdir_prepopulate, which |
1722 | ceph_inode(req->r_locked_dir); | 1790 | * rely on locks (dir mutex) held by our caller. |
1791 | */ | ||
1792 | mutex_lock(&req->r_fill_mutex); | ||
1793 | req->r_err = err; | ||
1794 | req->r_aborted = true; | ||
1795 | mutex_unlock(&req->r_fill_mutex); | ||
1723 | 1796 | ||
1724 | dout("aborted, clearing I_COMPLETE on %p\n", | 1797 | if (req->r_locked_dir && |
1725 | req->r_locked_dir); | 1798 | (req->r_op & CEPH_MDS_OP_WRITE)) |
1726 | spin_lock(&req->r_locked_dir->i_lock); | 1799 | ceph_invalidate_dir_request(req); |
1727 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
1728 | ci->i_release_count++; | ||
1729 | spin_unlock(&req->r_locked_dir->i_lock); | ||
1730 | } | ||
1731 | } else { | ||
1732 | /* clean up this request */ | ||
1733 | __unregister_request(mdsc, req); | ||
1734 | if (!list_empty(&req->r_unsafe_item)) | ||
1735 | list_del_init(&req->r_unsafe_item); | ||
1736 | complete(&req->r_safe_completion); | ||
1737 | } | ||
1738 | } else if (req->r_err) { | ||
1739 | err = req->r_err; | ||
1740 | } else { | 1800 | } else { |
1741 | err = le32_to_cpu(req->r_reply_info.head->result); | 1801 | err = req->r_err; |
1742 | } | 1802 | } |
1743 | mutex_unlock(&mdsc->mutex); | ||
1744 | 1803 | ||
1804 | out: | ||
1805 | mutex_unlock(&mdsc->mutex); | ||
1745 | dout("do_request %p done, result %d\n", req, err); | 1806 | dout("do_request %p done, result %d\n", req, err); |
1746 | return err; | 1807 | return err; |
1747 | } | 1808 | } |
1748 | 1809 | ||
1749 | /* | 1810 | /* |
1811 | * Invalidate dir I_COMPLETE, dentry lease state on an aborted MDS | ||
1812 | * namespace request. | ||
1813 | */ | ||
1814 | void ceph_invalidate_dir_request(struct ceph_mds_request *req) | ||
1815 | { | ||
1816 | struct inode *inode = req->r_locked_dir; | ||
1817 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
1818 | |||
1819 | dout("invalidate_dir_request %p (I_COMPLETE, lease(s))\n", inode); | ||
1820 | spin_lock(&inode->i_lock); | ||
1821 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
1822 | ci->i_release_count++; | ||
1823 | spin_unlock(&inode->i_lock); | ||
1824 | |||
1825 | if (req->r_dentry) | ||
1826 | ceph_invalidate_dentry_lease(req->r_dentry); | ||
1827 | if (req->r_old_dentry) | ||
1828 | ceph_invalidate_dentry_lease(req->r_old_dentry); | ||
1829 | } | ||
1830 | |||
1831 | /* | ||
1750 | * Handle mds reply. | 1832 | * Handle mds reply. |
1751 | * | 1833 | * |
1752 | * We take the session mutex and parse and process the reply immediately. | 1834 | * We take the session mutex and parse and process the reply immediately. |
@@ -1797,6 +1879,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1797 | mutex_unlock(&mdsc->mutex); | 1879 | mutex_unlock(&mdsc->mutex); |
1798 | goto out; | 1880 | goto out; |
1799 | } | 1881 | } |
1882 | if (req->r_got_safe && !head->safe) { | ||
1883 | pr_warning("got unsafe after safe on %llu from mds%d\n", | ||
1884 | tid, mds); | ||
1885 | mutex_unlock(&mdsc->mutex); | ||
1886 | goto out; | ||
1887 | } | ||
1800 | 1888 | ||
1801 | result = le32_to_cpu(head->result); | 1889 | result = le32_to_cpu(head->result); |
1802 | 1890 | ||
@@ -1838,11 +1926,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1838 | mutex_unlock(&mdsc->mutex); | 1926 | mutex_unlock(&mdsc->mutex); |
1839 | goto out; | 1927 | goto out; |
1840 | } | 1928 | } |
1841 | } | 1929 | } else { |
1842 | |||
1843 | BUG_ON(req->r_reply); | ||
1844 | |||
1845 | if (!head->safe) { | ||
1846 | req->r_got_unsafe = true; | 1930 | req->r_got_unsafe = true; |
1847 | list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); | 1931 | list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); |
1848 | } | 1932 | } |
@@ -1871,21 +1955,30 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1871 | } | 1955 | } |
1872 | 1956 | ||
1873 | /* insert trace into our cache */ | 1957 | /* insert trace into our cache */ |
1958 | mutex_lock(&req->r_fill_mutex); | ||
1874 | err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); | 1959 | err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); |
1875 | if (err == 0) { | 1960 | if (err == 0) { |
1876 | if (result == 0 && rinfo->dir_nr) | 1961 | if (result == 0 && rinfo->dir_nr) |
1877 | ceph_readdir_prepopulate(req, req->r_session); | 1962 | ceph_readdir_prepopulate(req, req->r_session); |
1878 | ceph_unreserve_caps(&req->r_caps_reservation); | 1963 | ceph_unreserve_caps(&req->r_caps_reservation); |
1879 | } | 1964 | } |
1965 | mutex_unlock(&req->r_fill_mutex); | ||
1880 | 1966 | ||
1881 | up_read(&mdsc->snap_rwsem); | 1967 | up_read(&mdsc->snap_rwsem); |
1882 | out_err: | 1968 | out_err: |
1883 | if (err) { | 1969 | mutex_lock(&mdsc->mutex); |
1884 | req->r_err = err; | 1970 | if (!req->r_aborted) { |
1971 | if (err) { | ||
1972 | req->r_err = err; | ||
1973 | } else { | ||
1974 | req->r_reply = msg; | ||
1975 | ceph_msg_get(msg); | ||
1976 | req->r_got_result = true; | ||
1977 | } | ||
1885 | } else { | 1978 | } else { |
1886 | req->r_reply = msg; | 1979 | dout("reply arrived after request %lld was aborted\n", tid); |
1887 | ceph_msg_get(msg); | ||
1888 | } | 1980 | } |
1981 | mutex_unlock(&mdsc->mutex); | ||
1889 | 1982 | ||
1890 | add_cap_releases(mdsc, req->r_session, -1); | 1983 | add_cap_releases(mdsc, req->r_session, -1); |
1891 | mutex_unlock(&session->s_mutex); | 1984 | mutex_unlock(&session->s_mutex); |
@@ -1984,6 +2077,8 @@ static void handle_session(struct ceph_mds_session *session, | |||
1984 | 2077 | ||
1985 | switch (op) { | 2078 | switch (op) { |
1986 | case CEPH_SESSION_OPEN: | 2079 | case CEPH_SESSION_OPEN: |
2080 | if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) | ||
2081 | pr_info("mds%d reconnect success\n", session->s_mds); | ||
1987 | session->s_state = CEPH_MDS_SESSION_OPEN; | 2082 | session->s_state = CEPH_MDS_SESSION_OPEN; |
1988 | renewed_caps(mdsc, session, 0); | 2083 | renewed_caps(mdsc, session, 0); |
1989 | wake = 1; | 2084 | wake = 1; |
@@ -1997,10 +2092,12 @@ static void handle_session(struct ceph_mds_session *session, | |||
1997 | break; | 2092 | break; |
1998 | 2093 | ||
1999 | case CEPH_SESSION_CLOSE: | 2094 | case CEPH_SESSION_CLOSE: |
2095 | if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) | ||
2096 | pr_info("mds%d reconnect denied\n", session->s_mds); | ||
2000 | remove_session_caps(session); | 2097 | remove_session_caps(session); |
2001 | wake = 1; /* for good measure */ | 2098 | wake = 1; /* for good measure */ |
2002 | complete(&mdsc->session_close_waiters); | 2099 | complete(&mdsc->session_close_waiters); |
2003 | kick_requests(mdsc, mds, 0); /* cur only */ | 2100 | kick_requests(mdsc, mds); |
2004 | break; | 2101 | break; |
2005 | 2102 | ||
2006 | case CEPH_SESSION_STALE: | 2103 | case CEPH_SESSION_STALE: |
@@ -2132,54 +2229,44 @@ out: | |||
2132 | * | 2229 | * |
2133 | * called with mdsc->mutex held. | 2230 | * called with mdsc->mutex held. |
2134 | */ | 2231 | */ |
2135 | static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | 2232 | static void send_mds_reconnect(struct ceph_mds_client *mdsc, |
2233 | struct ceph_mds_session *session) | ||
2136 | { | 2234 | { |
2137 | struct ceph_mds_session *session = NULL; | ||
2138 | struct ceph_msg *reply; | 2235 | struct ceph_msg *reply; |
2139 | struct rb_node *p; | 2236 | struct rb_node *p; |
2237 | int mds = session->s_mds; | ||
2140 | int err = -ENOMEM; | 2238 | int err = -ENOMEM; |
2141 | struct ceph_pagelist *pagelist; | 2239 | struct ceph_pagelist *pagelist; |
2142 | 2240 | ||
2143 | pr_info("reconnect to recovering mds%d\n", mds); | 2241 | pr_info("mds%d reconnect start\n", mds); |
2144 | 2242 | ||
2145 | pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); | 2243 | pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); |
2146 | if (!pagelist) | 2244 | if (!pagelist) |
2147 | goto fail_nopagelist; | 2245 | goto fail_nopagelist; |
2148 | ceph_pagelist_init(pagelist); | 2246 | ceph_pagelist_init(pagelist); |
2149 | 2247 | ||
2150 | reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, 0, 0, NULL); | 2248 | reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS); |
2151 | if (IS_ERR(reply)) { | 2249 | if (!reply) |
2152 | err = PTR_ERR(reply); | ||
2153 | goto fail_nomsg; | 2250 | goto fail_nomsg; |
2154 | } | ||
2155 | |||
2156 | /* find session */ | ||
2157 | session = __ceph_lookup_mds_session(mdsc, mds); | ||
2158 | mutex_unlock(&mdsc->mutex); /* drop lock for duration */ | ||
2159 | 2251 | ||
2160 | if (session) { | 2252 | mutex_lock(&session->s_mutex); |
2161 | mutex_lock(&session->s_mutex); | 2253 | session->s_state = CEPH_MDS_SESSION_RECONNECTING; |
2254 | session->s_seq = 0; | ||
2162 | 2255 | ||
2163 | session->s_state = CEPH_MDS_SESSION_RECONNECTING; | 2256 | ceph_con_open(&session->s_con, |
2164 | session->s_seq = 0; | 2257 | ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); |
2165 | 2258 | ||
2166 | ceph_con_open(&session->s_con, | 2259 | /* replay unsafe requests */ |
2167 | ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); | 2260 | replay_unsafe_requests(mdsc, session); |
2168 | |||
2169 | /* replay unsafe requests */ | ||
2170 | replay_unsafe_requests(mdsc, session); | ||
2171 | } else { | ||
2172 | dout("no session for mds%d, will send short reconnect\n", | ||
2173 | mds); | ||
2174 | } | ||
2175 | 2261 | ||
2176 | down_read(&mdsc->snap_rwsem); | 2262 | down_read(&mdsc->snap_rwsem); |
2177 | 2263 | ||
2178 | if (!session) | ||
2179 | goto send; | ||
2180 | dout("session %p state %s\n", session, | 2264 | dout("session %p state %s\n", session, |
2181 | session_state_name(session->s_state)); | 2265 | session_state_name(session->s_state)); |
2182 | 2266 | ||
2267 | /* drop old cap expires; we're about to reestablish that state */ | ||
2268 | discard_cap_releases(mdsc, session); | ||
2269 | |||
2183 | /* traverse this session's caps */ | 2270 | /* traverse this session's caps */ |
2184 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); | 2271 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); |
2185 | if (err) | 2272 | if (err) |
@@ -2208,36 +2295,29 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | |||
2208 | goto fail; | 2295 | goto fail; |
2209 | } | 2296 | } |
2210 | 2297 | ||
2211 | send: | ||
2212 | reply->pagelist = pagelist; | 2298 | reply->pagelist = pagelist; |
2213 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | 2299 | reply->hdr.data_len = cpu_to_le32(pagelist->length); |
2214 | reply->nr_pages = calc_pages_for(0, pagelist->length); | 2300 | reply->nr_pages = calc_pages_for(0, pagelist->length); |
2215 | ceph_con_send(&session->s_con, reply); | 2301 | ceph_con_send(&session->s_con, reply); |
2216 | 2302 | ||
2217 | session->s_state = CEPH_MDS_SESSION_OPEN; | ||
2218 | mutex_unlock(&session->s_mutex); | 2303 | mutex_unlock(&session->s_mutex); |
2219 | 2304 | ||
2220 | mutex_lock(&mdsc->mutex); | 2305 | mutex_lock(&mdsc->mutex); |
2221 | __wake_requests(mdsc, &session->s_waiting); | 2306 | __wake_requests(mdsc, &session->s_waiting); |
2222 | mutex_unlock(&mdsc->mutex); | 2307 | mutex_unlock(&mdsc->mutex); |
2223 | 2308 | ||
2224 | ceph_put_mds_session(session); | ||
2225 | |||
2226 | up_read(&mdsc->snap_rwsem); | 2309 | up_read(&mdsc->snap_rwsem); |
2227 | mutex_lock(&mdsc->mutex); | ||
2228 | return; | 2310 | return; |
2229 | 2311 | ||
2230 | fail: | 2312 | fail: |
2231 | ceph_msg_put(reply); | 2313 | ceph_msg_put(reply); |
2232 | up_read(&mdsc->snap_rwsem); | 2314 | up_read(&mdsc->snap_rwsem); |
2233 | mutex_unlock(&session->s_mutex); | 2315 | mutex_unlock(&session->s_mutex); |
2234 | ceph_put_mds_session(session); | ||
2235 | fail_nomsg: | 2316 | fail_nomsg: |
2236 | ceph_pagelist_release(pagelist); | 2317 | ceph_pagelist_release(pagelist); |
2237 | kfree(pagelist); | 2318 | kfree(pagelist); |
2238 | fail_nopagelist: | 2319 | fail_nopagelist: |
2239 | pr_err("error %d preparing reconnect for mds%d\n", err, mds); | 2320 | pr_err("error %d preparing reconnect for mds%d\n", err, mds); |
2240 | mutex_lock(&mdsc->mutex); | ||
2241 | return; | 2321 | return; |
2242 | } | 2322 | } |
2243 | 2323 | ||
@@ -2290,7 +2370,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2290 | } | 2370 | } |
2291 | 2371 | ||
2292 | /* kick any requests waiting on the recovering mds */ | 2372 | /* kick any requests waiting on the recovering mds */ |
2293 | kick_requests(mdsc, i, 1); | 2373 | kick_requests(mdsc, i); |
2294 | } else if (oldstate == newstate) { | 2374 | } else if (oldstate == newstate) { |
2295 | continue; /* nothing new with this mds */ | 2375 | continue; /* nothing new with this mds */ |
2296 | } | 2376 | } |
@@ -2299,22 +2379,21 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2299 | * send reconnect? | 2379 | * send reconnect? |
2300 | */ | 2380 | */ |
2301 | if (s->s_state == CEPH_MDS_SESSION_RESTARTING && | 2381 | if (s->s_state == CEPH_MDS_SESSION_RESTARTING && |
2302 | newstate >= CEPH_MDS_STATE_RECONNECT) | 2382 | newstate >= CEPH_MDS_STATE_RECONNECT) { |
2303 | send_mds_reconnect(mdsc, i); | 2383 | mutex_unlock(&mdsc->mutex); |
2384 | send_mds_reconnect(mdsc, s); | ||
2385 | mutex_lock(&mdsc->mutex); | ||
2386 | } | ||
2304 | 2387 | ||
2305 | /* | 2388 | /* |
2306 | * kick requests on any mds that has gone active. | 2389 | * kick request on any mds that has gone active. |
2307 | * | ||
2308 | * kick requests on cur or forwarder: we may have sent | ||
2309 | * the request to mds1, mds1 told us it forwarded it | ||
2310 | * to mds2, but then we learn mds1 failed and can't be | ||
2311 | * sure it successfully forwarded our request before | ||
2312 | * it died. | ||
2313 | */ | 2390 | */ |
2314 | if (oldstate < CEPH_MDS_STATE_ACTIVE && | 2391 | if (oldstate < CEPH_MDS_STATE_ACTIVE && |
2315 | newstate >= CEPH_MDS_STATE_ACTIVE) { | 2392 | newstate >= CEPH_MDS_STATE_ACTIVE) { |
2316 | pr_info("mds%d reconnect completed\n", s->s_mds); | 2393 | if (oldstate != CEPH_MDS_STATE_CREATING && |
2317 | kick_requests(mdsc, i, 1); | 2394 | oldstate != CEPH_MDS_STATE_STARTING) |
2395 | pr_info("mds%d recovery completed\n", s->s_mds); | ||
2396 | kick_requests(mdsc, i); | ||
2318 | ceph_kick_flushing_caps(mdsc, s); | 2397 | ceph_kick_flushing_caps(mdsc, s); |
2319 | wake_up_session_caps(s, 1); | 2398 | wake_up_session_caps(s, 1); |
2320 | } | 2399 | } |
@@ -2457,8 +2536,8 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, | |||
2457 | dnamelen = dentry->d_name.len; | 2536 | dnamelen = dentry->d_name.len; |
2458 | len += dnamelen; | 2537 | len += dnamelen; |
2459 | 2538 | ||
2460 | msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, 0, 0, NULL); | 2539 | msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS); |
2461 | if (IS_ERR(msg)) | 2540 | if (!msg) |
2462 | return; | 2541 | return; |
2463 | lease = msg->front.iov_base; | 2542 | lease = msg->front.iov_base; |
2464 | lease->action = action; | 2543 | lease->action = action; |
@@ -2603,7 +2682,9 @@ static void delayed_work(struct work_struct *work) | |||
2603 | else | 2682 | else |
2604 | ceph_con_keepalive(&s->s_con); | 2683 | ceph_con_keepalive(&s->s_con); |
2605 | add_cap_releases(mdsc, s, -1); | 2684 | add_cap_releases(mdsc, s, -1); |
2606 | send_cap_releases(mdsc, s); | 2685 | if (s->s_state == CEPH_MDS_SESSION_OPEN || |
2686 | s->s_state == CEPH_MDS_SESSION_HUNG) | ||
2687 | send_cap_releases(mdsc, s); | ||
2607 | mutex_unlock(&s->s_mutex); | 2688 | mutex_unlock(&s->s_mutex); |
2608 | ceph_put_mds_session(s); | 2689 | ceph_put_mds_session(s); |
2609 | 2690 | ||
@@ -2620,6 +2701,9 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2620 | mdsc->client = client; | 2701 | mdsc->client = client; |
2621 | mutex_init(&mdsc->mutex); | 2702 | mutex_init(&mdsc->mutex); |
2622 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); | 2703 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); |
2704 | if (mdsc->mdsmap == NULL) | ||
2705 | return -ENOMEM; | ||
2706 | |||
2623 | init_completion(&mdsc->safe_umount_waiters); | 2707 | init_completion(&mdsc->safe_umount_waiters); |
2624 | init_completion(&mdsc->session_close_waiters); | 2708 | init_completion(&mdsc->session_close_waiters); |
2625 | INIT_LIST_HEAD(&mdsc->waiting_for_map); | 2709 | INIT_LIST_HEAD(&mdsc->waiting_for_map); |
@@ -2645,6 +2729,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2645 | init_waitqueue_head(&mdsc->cap_flushing_wq); | 2729 | init_waitqueue_head(&mdsc->cap_flushing_wq); |
2646 | spin_lock_init(&mdsc->dentry_lru_lock); | 2730 | spin_lock_init(&mdsc->dentry_lru_lock); |
2647 | INIT_LIST_HEAD(&mdsc->dentry_lru); | 2731 | INIT_LIST_HEAD(&mdsc->dentry_lru); |
2732 | |||
2648 | return 0; | 2733 | return 0; |
2649 | } | 2734 | } |
2650 | 2735 | ||
@@ -2740,6 +2825,9 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
2740 | { | 2825 | { |
2741 | u64 want_tid, want_flush; | 2826 | u64 want_tid, want_flush; |
2742 | 2827 | ||
2828 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | ||
2829 | return; | ||
2830 | |||
2743 | dout("sync\n"); | 2831 | dout("sync\n"); |
2744 | mutex_lock(&mdsc->mutex); | 2832 | mutex_lock(&mdsc->mutex); |
2745 | want_tid = mdsc->last_tid; | 2833 | want_tid = mdsc->last_tid; |
@@ -2922,9 +3010,10 @@ static void con_put(struct ceph_connection *con) | |||
2922 | static void peer_reset(struct ceph_connection *con) | 3010 | static void peer_reset(struct ceph_connection *con) |
2923 | { | 3011 | { |
2924 | struct ceph_mds_session *s = con->private; | 3012 | struct ceph_mds_session *s = con->private; |
3013 | struct ceph_mds_client *mdsc = s->s_mdsc; | ||
2925 | 3014 | ||
2926 | pr_err("mds%d gave us the boot. IMPLEMENT RECONNECT.\n", | 3015 | pr_warning("mds%d closed our session\n", s->s_mds); |
2927 | s->s_mds); | 3016 | send_mds_reconnect(mdsc, s); |
2928 | } | 3017 | } |
2929 | 3018 | ||
2930 | static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | 3019 | static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) |
@@ -3031,7 +3120,7 @@ static int invalidate_authorizer(struct ceph_connection *con) | |||
3031 | return ceph_monc_validate_auth(&mdsc->client->monc); | 3120 | return ceph_monc_validate_auth(&mdsc->client->monc); |
3032 | } | 3121 | } |
3033 | 3122 | ||
3034 | const static struct ceph_connection_operations mds_con_ops = { | 3123 | static const struct ceph_connection_operations mds_con_ops = { |
3035 | .get = con_get, | 3124 | .get = con_get, |
3036 | .put = con_put, | 3125 | .put = con_put, |
3037 | .dispatch = dispatch, | 3126 | .dispatch = dispatch, |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 961cc6f65878..d9936c4f1212 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -165,6 +165,8 @@ struct ceph_mds_request { | |||
165 | struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */ | 165 | struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */ |
166 | struct inode *r_target_inode; /* resulting inode */ | 166 | struct inode *r_target_inode; /* resulting inode */ |
167 | 167 | ||
168 | struct mutex r_fill_mutex; | ||
169 | |||
168 | union ceph_mds_request_args r_args; | 170 | union ceph_mds_request_args r_args; |
169 | int r_fmode; /* file mode, if expecting cap */ | 171 | int r_fmode; /* file mode, if expecting cap */ |
170 | 172 | ||
@@ -213,7 +215,7 @@ struct ceph_mds_request { | |||
213 | struct completion r_safe_completion; | 215 | struct completion r_safe_completion; |
214 | ceph_mds_request_callback_t r_callback; | 216 | ceph_mds_request_callback_t r_callback; |
215 | struct list_head r_unsafe_item; /* per-session unsafe list item */ | 217 | struct list_head r_unsafe_item; /* per-session unsafe list item */ |
216 | bool r_got_unsafe, r_got_safe; | 218 | bool r_got_unsafe, r_got_safe, r_got_result; |
217 | 219 | ||
218 | bool r_did_prepopulate; | 220 | bool r_did_prepopulate; |
219 | u32 r_readdir_offset; | 221 | u32 r_readdir_offset; |
@@ -301,6 +303,8 @@ extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, | |||
301 | struct inode *inode, | 303 | struct inode *inode, |
302 | struct dentry *dn, int mask); | 304 | struct dentry *dn, int mask); |
303 | 305 | ||
306 | extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); | ||
307 | |||
304 | extern struct ceph_mds_request * | 308 | extern struct ceph_mds_request * |
305 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); | 309 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); |
306 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, | 310 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, |
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index cd4fadb6491a..60b74839ebec 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -39,18 +39,6 @@ static void queue_con(struct ceph_connection *con); | |||
39 | static void con_work(struct work_struct *); | 39 | static void con_work(struct work_struct *); |
40 | static void ceph_fault(struct ceph_connection *con); | 40 | static void ceph_fault(struct ceph_connection *con); |
41 | 41 | ||
42 | const char *ceph_name_type_str(int t) | ||
43 | { | ||
44 | switch (t) { | ||
45 | case CEPH_ENTITY_TYPE_MON: return "mon"; | ||
46 | case CEPH_ENTITY_TYPE_MDS: return "mds"; | ||
47 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | ||
48 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | ||
49 | case CEPH_ENTITY_TYPE_ADMIN: return "admin"; | ||
50 | default: return "???"; | ||
51 | } | ||
52 | } | ||
53 | |||
54 | /* | 42 | /* |
55 | * nicely render a sockaddr as a string. | 43 | * nicely render a sockaddr as a string. |
56 | */ | 44 | */ |
@@ -340,6 +328,7 @@ static void reset_connection(struct ceph_connection *con) | |||
340 | ceph_msg_put(con->out_msg); | 328 | ceph_msg_put(con->out_msg); |
341 | con->out_msg = NULL; | 329 | con->out_msg = NULL; |
342 | } | 330 | } |
331 | con->out_keepalive_pending = false; | ||
343 | con->in_seq = 0; | 332 | con->in_seq = 0; |
344 | con->in_seq_acked = 0; | 333 | con->in_seq_acked = 0; |
345 | } | 334 | } |
@@ -357,6 +346,7 @@ void ceph_con_close(struct ceph_connection *con) | |||
357 | clear_bit(WRITE_PENDING, &con->state); | 346 | clear_bit(WRITE_PENDING, &con->state); |
358 | mutex_lock(&con->mutex); | 347 | mutex_lock(&con->mutex); |
359 | reset_connection(con); | 348 | reset_connection(con); |
349 | con->peer_global_seq = 0; | ||
360 | cancel_delayed_work(&con->work); | 350 | cancel_delayed_work(&con->work); |
361 | mutex_unlock(&con->mutex); | 351 | mutex_unlock(&con->mutex); |
362 | queue_con(con); | 352 | queue_con(con); |
@@ -661,7 +651,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||
661 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 651 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, |
662 | con->connect_seq, global_seq, proto); | 652 | con->connect_seq, global_seq, proto); |
663 | 653 | ||
664 | con->out_connect.features = CEPH_FEATURE_SUPPORTED; | 654 | con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT; |
665 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 655 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); |
666 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 656 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); |
667 | con->out_connect.global_seq = cpu_to_le32(global_seq); | 657 | con->out_connect.global_seq = cpu_to_le32(global_seq); |
@@ -1124,8 +1114,8 @@ static void fail_protocol(struct ceph_connection *con) | |||
1124 | 1114 | ||
1125 | static int process_connect(struct ceph_connection *con) | 1115 | static int process_connect(struct ceph_connection *con) |
1126 | { | 1116 | { |
1127 | u64 sup_feat = CEPH_FEATURE_SUPPORTED; | 1117 | u64 sup_feat = CEPH_FEATURE_SUPPORTED_CLIENT; |
1128 | u64 req_feat = CEPH_FEATURE_REQUIRED; | 1118 | u64 req_feat = CEPH_FEATURE_REQUIRED_CLIENT; |
1129 | u64 server_feat = le64_to_cpu(con->in_reply.features); | 1119 | u64 server_feat = le64_to_cpu(con->in_reply.features); |
1130 | 1120 | ||
1131 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 1121 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); |
@@ -1233,6 +1223,7 @@ static int process_connect(struct ceph_connection *con) | |||
1233 | clear_bit(CONNECTING, &con->state); | 1223 | clear_bit(CONNECTING, &con->state); |
1234 | con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); | 1224 | con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); |
1235 | con->connect_seq++; | 1225 | con->connect_seq++; |
1226 | con->peer_features = server_feat; | ||
1236 | dout("process_connect got READY gseq %d cseq %d (%d)\n", | 1227 | dout("process_connect got READY gseq %d cseq %d (%d)\n", |
1237 | con->peer_global_seq, | 1228 | con->peer_global_seq, |
1238 | le32_to_cpu(con->in_reply.connect_seq), | 1229 | le32_to_cpu(con->in_reply.connect_seq), |
@@ -1402,19 +1393,17 @@ static int read_partial_message(struct ceph_connection *con) | |||
1402 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); | 1393 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); |
1403 | if (skip) { | 1394 | if (skip) { |
1404 | /* skip this message */ | 1395 | /* skip this message */ |
1405 | dout("alloc_msg returned NULL, skipping message\n"); | 1396 | dout("alloc_msg said skip message\n"); |
1406 | con->in_base_pos = -front_len - middle_len - data_len - | 1397 | con->in_base_pos = -front_len - middle_len - data_len - |
1407 | sizeof(m->footer); | 1398 | sizeof(m->footer); |
1408 | con->in_tag = CEPH_MSGR_TAG_READY; | 1399 | con->in_tag = CEPH_MSGR_TAG_READY; |
1409 | con->in_seq++; | 1400 | con->in_seq++; |
1410 | return 0; | 1401 | return 0; |
1411 | } | 1402 | } |
1412 | if (IS_ERR(con->in_msg)) { | 1403 | if (!con->in_msg) { |
1413 | ret = PTR_ERR(con->in_msg); | ||
1414 | con->in_msg = NULL; | ||
1415 | con->error_msg = | 1404 | con->error_msg = |
1416 | "error allocating memory for incoming message"; | 1405 | "error allocating memory for incoming message"; |
1417 | return ret; | 1406 | return -ENOMEM; |
1418 | } | 1407 | } |
1419 | m = con->in_msg; | 1408 | m = con->in_msg; |
1420 | m->front.iov_len = 0; /* haven't read it yet */ | 1409 | m->front.iov_len = 0; /* haven't read it yet */ |
@@ -1514,14 +1503,14 @@ static void process_message(struct ceph_connection *con) | |||
1514 | 1503 | ||
1515 | /* if first message, set peer_name */ | 1504 | /* if first message, set peer_name */ |
1516 | if (con->peer_name.type == 0) | 1505 | if (con->peer_name.type == 0) |
1517 | con->peer_name = msg->hdr.src.name; | 1506 | con->peer_name = msg->hdr.src; |
1518 | 1507 | ||
1519 | con->in_seq++; | 1508 | con->in_seq++; |
1520 | mutex_unlock(&con->mutex); | 1509 | mutex_unlock(&con->mutex); |
1521 | 1510 | ||
1522 | dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", | 1511 | dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", |
1523 | msg, le64_to_cpu(msg->hdr.seq), | 1512 | msg, le64_to_cpu(msg->hdr.seq), |
1524 | ENTITY_NAME(msg->hdr.src.name), | 1513 | ENTITY_NAME(msg->hdr.src), |
1525 | le16_to_cpu(msg->hdr.type), | 1514 | le16_to_cpu(msg->hdr.type), |
1526 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), | 1515 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), |
1527 | le32_to_cpu(msg->hdr.front_len), | 1516 | le32_to_cpu(msg->hdr.front_len), |
@@ -1546,7 +1535,6 @@ static int try_write(struct ceph_connection *con) | |||
1546 | dout("try_write start %p state %lu nref %d\n", con, con->state, | 1535 | dout("try_write start %p state %lu nref %d\n", con, con->state, |
1547 | atomic_read(&con->nref)); | 1536 | atomic_read(&con->nref)); |
1548 | 1537 | ||
1549 | mutex_lock(&con->mutex); | ||
1550 | more: | 1538 | more: |
1551 | dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); | 1539 | dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); |
1552 | 1540 | ||
@@ -1639,7 +1627,6 @@ do_next: | |||
1639 | done: | 1627 | done: |
1640 | ret = 0; | 1628 | ret = 0; |
1641 | out: | 1629 | out: |
1642 | mutex_unlock(&con->mutex); | ||
1643 | dout("try_write done on %p\n", con); | 1630 | dout("try_write done on %p\n", con); |
1644 | return ret; | 1631 | return ret; |
1645 | } | 1632 | } |
@@ -1651,7 +1638,6 @@ out: | |||
1651 | */ | 1638 | */ |
1652 | static int try_read(struct ceph_connection *con) | 1639 | static int try_read(struct ceph_connection *con) |
1653 | { | 1640 | { |
1654 | struct ceph_messenger *msgr; | ||
1655 | int ret = -1; | 1641 | int ret = -1; |
1656 | 1642 | ||
1657 | if (!con->sock) | 1643 | if (!con->sock) |
@@ -1661,9 +1647,6 @@ static int try_read(struct ceph_connection *con) | |||
1661 | return 0; | 1647 | return 0; |
1662 | 1648 | ||
1663 | dout("try_read start on %p\n", con); | 1649 | dout("try_read start on %p\n", con); |
1664 | msgr = con->msgr; | ||
1665 | |||
1666 | mutex_lock(&con->mutex); | ||
1667 | 1650 | ||
1668 | more: | 1651 | more: |
1669 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, | 1652 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, |
@@ -1758,7 +1741,6 @@ more: | |||
1758 | done: | 1741 | done: |
1759 | ret = 0; | 1742 | ret = 0; |
1760 | out: | 1743 | out: |
1761 | mutex_unlock(&con->mutex); | ||
1762 | dout("try_read done on %p\n", con); | 1744 | dout("try_read done on %p\n", con); |
1763 | return ret; | 1745 | return ret; |
1764 | 1746 | ||
@@ -1830,6 +1812,8 @@ more: | |||
1830 | dout("con_work %p start, clearing QUEUED\n", con); | 1812 | dout("con_work %p start, clearing QUEUED\n", con); |
1831 | clear_bit(QUEUED, &con->state); | 1813 | clear_bit(QUEUED, &con->state); |
1832 | 1814 | ||
1815 | mutex_lock(&con->mutex); | ||
1816 | |||
1833 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ | 1817 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ |
1834 | dout("con_work CLOSED\n"); | 1818 | dout("con_work CLOSED\n"); |
1835 | con_close_socket(con); | 1819 | con_close_socket(con); |
@@ -1844,11 +1828,16 @@ more: | |||
1844 | if (test_and_clear_bit(SOCK_CLOSED, &con->state) || | 1828 | if (test_and_clear_bit(SOCK_CLOSED, &con->state) || |
1845 | try_read(con) < 0 || | 1829 | try_read(con) < 0 || |
1846 | try_write(con) < 0) { | 1830 | try_write(con) < 0) { |
1831 | mutex_unlock(&con->mutex); | ||
1847 | backoff = 1; | 1832 | backoff = 1; |
1848 | ceph_fault(con); /* error/fault path */ | 1833 | ceph_fault(con); /* error/fault path */ |
1834 | goto done_unlocked; | ||
1849 | } | 1835 | } |
1850 | 1836 | ||
1851 | done: | 1837 | done: |
1838 | mutex_unlock(&con->mutex); | ||
1839 | |||
1840 | done_unlocked: | ||
1852 | clear_bit(BUSY, &con->state); | 1841 | clear_bit(BUSY, &con->state); |
1853 | dout("con->state=%lu\n", con->state); | 1842 | dout("con->state=%lu\n", con->state); |
1854 | if (test_bit(QUEUED, &con->state)) { | 1843 | if (test_bit(QUEUED, &con->state)) { |
@@ -1947,7 +1936,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | |||
1947 | 1936 | ||
1948 | /* the zero page is needed if a request is "canceled" while the message | 1937 | /* the zero page is needed if a request is "canceled" while the message |
1949 | * is being written over the socket */ | 1938 | * is being written over the socket */ |
1950 | msgr->zero_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 1939 | msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO); |
1951 | if (!msgr->zero_page) { | 1940 | if (!msgr->zero_page) { |
1952 | kfree(msgr); | 1941 | kfree(msgr); |
1953 | return ERR_PTR(-ENOMEM); | 1942 | return ERR_PTR(-ENOMEM); |
@@ -1987,9 +1976,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||
1987 | } | 1976 | } |
1988 | 1977 | ||
1989 | /* set src+dst */ | 1978 | /* set src+dst */ |
1990 | msg->hdr.src.name = con->msgr->inst.name; | 1979 | msg->hdr.src = con->msgr->inst.name; |
1991 | msg->hdr.src.addr = con->msgr->my_enc_addr; | ||
1992 | msg->hdr.orig_src = msg->hdr.src; | ||
1993 | 1980 | ||
1994 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); | 1981 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); |
1995 | 1982 | ||
@@ -2083,12 +2070,11 @@ void ceph_con_keepalive(struct ceph_connection *con) | |||
2083 | * construct a new message with given type, size | 2070 | * construct a new message with given type, size |
2084 | * the new msg has a ref count of 1. | 2071 | * the new msg has a ref count of 1. |
2085 | */ | 2072 | */ |
2086 | struct ceph_msg *ceph_msg_new(int type, int front_len, | 2073 | struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) |
2087 | int page_len, int page_off, struct page **pages) | ||
2088 | { | 2074 | { |
2089 | struct ceph_msg *m; | 2075 | struct ceph_msg *m; |
2090 | 2076 | ||
2091 | m = kmalloc(sizeof(*m), GFP_NOFS); | 2077 | m = kmalloc(sizeof(*m), flags); |
2092 | if (m == NULL) | 2078 | if (m == NULL) |
2093 | goto out; | 2079 | goto out; |
2094 | kref_init(&m->kref); | 2080 | kref_init(&m->kref); |
@@ -2100,8 +2086,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
2100 | m->hdr.version = 0; | 2086 | m->hdr.version = 0; |
2101 | m->hdr.front_len = cpu_to_le32(front_len); | 2087 | m->hdr.front_len = cpu_to_le32(front_len); |
2102 | m->hdr.middle_len = 0; | 2088 | m->hdr.middle_len = 0; |
2103 | m->hdr.data_len = cpu_to_le32(page_len); | 2089 | m->hdr.data_len = 0; |
2104 | m->hdr.data_off = cpu_to_le16(page_off); | 2090 | m->hdr.data_off = 0; |
2105 | m->hdr.reserved = 0; | 2091 | m->hdr.reserved = 0; |
2106 | m->footer.front_crc = 0; | 2092 | m->footer.front_crc = 0; |
2107 | m->footer.middle_crc = 0; | 2093 | m->footer.middle_crc = 0; |
@@ -2115,11 +2101,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
2115 | /* front */ | 2101 | /* front */ |
2116 | if (front_len) { | 2102 | if (front_len) { |
2117 | if (front_len > PAGE_CACHE_SIZE) { | 2103 | if (front_len > PAGE_CACHE_SIZE) { |
2118 | m->front.iov_base = __vmalloc(front_len, GFP_NOFS, | 2104 | m->front.iov_base = __vmalloc(front_len, flags, |
2119 | PAGE_KERNEL); | 2105 | PAGE_KERNEL); |
2120 | m->front_is_vmalloc = true; | 2106 | m->front_is_vmalloc = true; |
2121 | } else { | 2107 | } else { |
2122 | m->front.iov_base = kmalloc(front_len, GFP_NOFS); | 2108 | m->front.iov_base = kmalloc(front_len, flags); |
2123 | } | 2109 | } |
2124 | if (m->front.iov_base == NULL) { | 2110 | if (m->front.iov_base == NULL) { |
2125 | pr_err("msg_new can't allocate %d bytes\n", | 2111 | pr_err("msg_new can't allocate %d bytes\n", |
@@ -2135,19 +2121,18 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
2135 | m->middle = NULL; | 2121 | m->middle = NULL; |
2136 | 2122 | ||
2137 | /* data */ | 2123 | /* data */ |
2138 | m->nr_pages = calc_pages_for(page_off, page_len); | 2124 | m->nr_pages = 0; |
2139 | m->pages = pages; | 2125 | m->pages = NULL; |
2140 | m->pagelist = NULL; | 2126 | m->pagelist = NULL; |
2141 | 2127 | ||
2142 | dout("ceph_msg_new %p page %d~%d -> %d\n", m, page_off, page_len, | 2128 | dout("ceph_msg_new %p front %d\n", m, front_len); |
2143 | m->nr_pages); | ||
2144 | return m; | 2129 | return m; |
2145 | 2130 | ||
2146 | out2: | 2131 | out2: |
2147 | ceph_msg_put(m); | 2132 | ceph_msg_put(m); |
2148 | out: | 2133 | out: |
2149 | pr_err("msg_new can't create type %d len %d\n", type, front_len); | 2134 | pr_err("msg_new can't create type %d front %d\n", type, front_len); |
2150 | return ERR_PTR(-ENOMEM); | 2135 | return NULL; |
2151 | } | 2136 | } |
2152 | 2137 | ||
2153 | /* | 2138 | /* |
@@ -2190,29 +2175,25 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | |||
2190 | mutex_unlock(&con->mutex); | 2175 | mutex_unlock(&con->mutex); |
2191 | msg = con->ops->alloc_msg(con, hdr, skip); | 2176 | msg = con->ops->alloc_msg(con, hdr, skip); |
2192 | mutex_lock(&con->mutex); | 2177 | mutex_lock(&con->mutex); |
2193 | if (IS_ERR(msg)) | 2178 | if (!msg || *skip) |
2194 | return msg; | ||
2195 | |||
2196 | if (*skip) | ||
2197 | return NULL; | 2179 | return NULL; |
2198 | } | 2180 | } |
2199 | if (!msg) { | 2181 | if (!msg) { |
2200 | *skip = 0; | 2182 | *skip = 0; |
2201 | msg = ceph_msg_new(type, front_len, 0, 0, NULL); | 2183 | msg = ceph_msg_new(type, front_len, GFP_NOFS); |
2202 | if (!msg) { | 2184 | if (!msg) { |
2203 | pr_err("unable to allocate msg type %d len %d\n", | 2185 | pr_err("unable to allocate msg type %d len %d\n", |
2204 | type, front_len); | 2186 | type, front_len); |
2205 | return ERR_PTR(-ENOMEM); | 2187 | return NULL; |
2206 | } | 2188 | } |
2207 | } | 2189 | } |
2208 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); | 2190 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); |
2209 | 2191 | ||
2210 | if (middle_len) { | 2192 | if (middle_len && !msg->middle) { |
2211 | ret = ceph_alloc_middle(con, msg); | 2193 | ret = ceph_alloc_middle(con, msg); |
2212 | |||
2213 | if (ret < 0) { | 2194 | if (ret < 0) { |
2214 | ceph_msg_put(msg); | 2195 | ceph_msg_put(msg); |
2215 | return msg; | 2196 | return NULL; |
2216 | } | 2197 | } |
2217 | } | 2198 | } |
2218 | 2199 | ||
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index a5caf91cc971..00a9430b1ffc 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h | |||
@@ -49,10 +49,8 @@ struct ceph_connection_operations { | |||
49 | int *skip); | 49 | int *skip); |
50 | }; | 50 | }; |
51 | 51 | ||
52 | extern const char *ceph_name_type_str(int t); | ||
53 | |||
54 | /* use format string %s%d */ | 52 | /* use format string %s%d */ |
55 | #define ENTITY_NAME(n) ceph_name_type_str((n).type), le64_to_cpu((n).num) | 53 | #define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num) |
56 | 54 | ||
57 | struct ceph_messenger { | 55 | struct ceph_messenger { |
58 | struct ceph_entity_inst inst; /* my name+address */ | 56 | struct ceph_entity_inst inst; /* my name+address */ |
@@ -144,6 +142,7 @@ struct ceph_connection { | |||
144 | struct ceph_entity_addr peer_addr; /* peer address */ | 142 | struct ceph_entity_addr peer_addr; /* peer address */ |
145 | struct ceph_entity_name peer_name; /* peer name */ | 143 | struct ceph_entity_name peer_name; /* peer name */ |
146 | struct ceph_entity_addr peer_addr_for_me; | 144 | struct ceph_entity_addr peer_addr_for_me; |
145 | unsigned peer_features; | ||
147 | u32 connect_seq; /* identify the most recent connection | 146 | u32 connect_seq; /* identify the most recent connection |
148 | attempt for this connection, client */ | 147 | attempt for this connection, client */ |
149 | u32 peer_global_seq; /* peer's global seq for this connection */ | 148 | u32 peer_global_seq; /* peer's global seq for this connection */ |
@@ -158,7 +157,6 @@ struct ceph_connection { | |||
158 | struct list_head out_queue; | 157 | struct list_head out_queue; |
159 | struct list_head out_sent; /* sending or sent but unacked */ | 158 | struct list_head out_sent; /* sending or sent but unacked */ |
160 | u64 out_seq; /* last message queued for send */ | 159 | u64 out_seq; /* last message queued for send */ |
161 | u64 out_seq_sent; /* last message sent */ | ||
162 | bool out_keepalive_pending; | 160 | bool out_keepalive_pending; |
163 | 161 | ||
164 | u64 in_seq, in_seq_acked; /* last message received, acked */ | 162 | u64 in_seq, in_seq_acked; /* last message received, acked */ |
@@ -234,9 +232,7 @@ extern void ceph_con_keepalive(struct ceph_connection *con); | |||
234 | extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); | 232 | extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); |
235 | extern void ceph_con_put(struct ceph_connection *con); | 233 | extern void ceph_con_put(struct ceph_connection *con); |
236 | 234 | ||
237 | extern struct ceph_msg *ceph_msg_new(int type, int front_len, | 235 | extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags); |
238 | int page_len, int page_off, | ||
239 | struct page **pages); | ||
240 | extern void ceph_msg_kfree(struct ceph_msg *m); | 236 | extern void ceph_msg_kfree(struct ceph_msg *m); |
241 | 237 | ||
242 | 238 | ||
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 8fdc011ca956..f6510a476e7e 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c | |||
@@ -28,7 +28,7 @@ | |||
28 | * resend any outstanding requests. | 28 | * resend any outstanding requests. |
29 | */ | 29 | */ |
30 | 30 | ||
31 | const static struct ceph_connection_operations mon_con_ops; | 31 | static const struct ceph_connection_operations mon_con_ops; |
32 | 32 | ||
33 | static int __validate_auth(struct ceph_mon_client *monc); | 33 | static int __validate_auth(struct ceph_mon_client *monc); |
34 | 34 | ||
@@ -104,6 +104,7 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) | |||
104 | monc->pending_auth = 1; | 104 | monc->pending_auth = 1; |
105 | monc->m_auth->front.iov_len = len; | 105 | monc->m_auth->front.iov_len = len; |
106 | monc->m_auth->hdr.front_len = cpu_to_le32(len); | 106 | monc->m_auth->hdr.front_len = cpu_to_le32(len); |
107 | ceph_con_revoke(monc->con, monc->m_auth); | ||
107 | ceph_msg_get(monc->m_auth); /* keep our ref */ | 108 | ceph_msg_get(monc->m_auth); /* keep our ref */ |
108 | ceph_con_send(monc->con, monc->m_auth); | 109 | ceph_con_send(monc->con, monc->m_auth); |
109 | } | 110 | } |
@@ -187,16 +188,12 @@ static void __send_subscribe(struct ceph_mon_client *monc) | |||
187 | monc->want_next_osdmap); | 188 | monc->want_next_osdmap); |
188 | if ((__sub_expired(monc) && !monc->sub_sent) || | 189 | if ((__sub_expired(monc) && !monc->sub_sent) || |
189 | monc->want_next_osdmap == 1) { | 190 | monc->want_next_osdmap == 1) { |
190 | struct ceph_msg *msg; | 191 | struct ceph_msg *msg = monc->m_subscribe; |
191 | struct ceph_mon_subscribe_item *i; | 192 | struct ceph_mon_subscribe_item *i; |
192 | void *p, *end; | 193 | void *p, *end; |
193 | 194 | ||
194 | msg = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, 0, 0, NULL); | ||
195 | if (!msg) | ||
196 | return; | ||
197 | |||
198 | p = msg->front.iov_base; | 195 | p = msg->front.iov_base; |
199 | end = p + msg->front.iov_len; | 196 | end = p + msg->front_max; |
200 | 197 | ||
201 | dout("__send_subscribe to 'mdsmap' %u+\n", | 198 | dout("__send_subscribe to 'mdsmap' %u+\n", |
202 | (unsigned)monc->have_mdsmap); | 199 | (unsigned)monc->have_mdsmap); |
@@ -226,7 +223,8 @@ static void __send_subscribe(struct ceph_mon_client *monc) | |||
226 | 223 | ||
227 | msg->front.iov_len = p - msg->front.iov_base; | 224 | msg->front.iov_len = p - msg->front.iov_base; |
228 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 225 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
229 | ceph_con_send(monc->con, msg); | 226 | ceph_con_revoke(monc->con, msg); |
227 | ceph_con_send(monc->con, ceph_msg_get(msg)); | ||
230 | 228 | ||
231 | monc->sub_sent = jiffies | 1; /* never 0 */ | 229 | monc->sub_sent = jiffies | 1; /* never 0 */ |
232 | } | 230 | } |
@@ -353,14 +351,14 @@ out: | |||
353 | /* | 351 | /* |
354 | * statfs | 352 | * statfs |
355 | */ | 353 | */ |
356 | static struct ceph_mon_statfs_request *__lookup_statfs( | 354 | static struct ceph_mon_generic_request *__lookup_generic_req( |
357 | struct ceph_mon_client *monc, u64 tid) | 355 | struct ceph_mon_client *monc, u64 tid) |
358 | { | 356 | { |
359 | struct ceph_mon_statfs_request *req; | 357 | struct ceph_mon_generic_request *req; |
360 | struct rb_node *n = monc->statfs_request_tree.rb_node; | 358 | struct rb_node *n = monc->generic_request_tree.rb_node; |
361 | 359 | ||
362 | while (n) { | 360 | while (n) { |
363 | req = rb_entry(n, struct ceph_mon_statfs_request, node); | 361 | req = rb_entry(n, struct ceph_mon_generic_request, node); |
364 | if (tid < req->tid) | 362 | if (tid < req->tid) |
365 | n = n->rb_left; | 363 | n = n->rb_left; |
366 | else if (tid > req->tid) | 364 | else if (tid > req->tid) |
@@ -371,16 +369,16 @@ static struct ceph_mon_statfs_request *__lookup_statfs( | |||
371 | return NULL; | 369 | return NULL; |
372 | } | 370 | } |
373 | 371 | ||
374 | static void __insert_statfs(struct ceph_mon_client *monc, | 372 | static void __insert_generic_request(struct ceph_mon_client *monc, |
375 | struct ceph_mon_statfs_request *new) | 373 | struct ceph_mon_generic_request *new) |
376 | { | 374 | { |
377 | struct rb_node **p = &monc->statfs_request_tree.rb_node; | 375 | struct rb_node **p = &monc->generic_request_tree.rb_node; |
378 | struct rb_node *parent = NULL; | 376 | struct rb_node *parent = NULL; |
379 | struct ceph_mon_statfs_request *req = NULL; | 377 | struct ceph_mon_generic_request *req = NULL; |
380 | 378 | ||
381 | while (*p) { | 379 | while (*p) { |
382 | parent = *p; | 380 | parent = *p; |
383 | req = rb_entry(parent, struct ceph_mon_statfs_request, node); | 381 | req = rb_entry(parent, struct ceph_mon_generic_request, node); |
384 | if (new->tid < req->tid) | 382 | if (new->tid < req->tid) |
385 | p = &(*p)->rb_left; | 383 | p = &(*p)->rb_left; |
386 | else if (new->tid > req->tid) | 384 | else if (new->tid > req->tid) |
@@ -390,113 +388,157 @@ static void __insert_statfs(struct ceph_mon_client *monc, | |||
390 | } | 388 | } |
391 | 389 | ||
392 | rb_link_node(&new->node, parent, p); | 390 | rb_link_node(&new->node, parent, p); |
393 | rb_insert_color(&new->node, &monc->statfs_request_tree); | 391 | rb_insert_color(&new->node, &monc->generic_request_tree); |
392 | } | ||
393 | |||
394 | static void release_generic_request(struct kref *kref) | ||
395 | { | ||
396 | struct ceph_mon_generic_request *req = | ||
397 | container_of(kref, struct ceph_mon_generic_request, kref); | ||
398 | |||
399 | if (req->reply) | ||
400 | ceph_msg_put(req->reply); | ||
401 | if (req->request) | ||
402 | ceph_msg_put(req->request); | ||
403 | } | ||
404 | |||
405 | static void put_generic_request(struct ceph_mon_generic_request *req) | ||
406 | { | ||
407 | kref_put(&req->kref, release_generic_request); | ||
408 | } | ||
409 | |||
410 | static void get_generic_request(struct ceph_mon_generic_request *req) | ||
411 | { | ||
412 | kref_get(&req->kref); | ||
413 | } | ||
414 | |||
415 | static struct ceph_msg *get_generic_reply(struct ceph_connection *con, | ||
416 | struct ceph_msg_header *hdr, | ||
417 | int *skip) | ||
418 | { | ||
419 | struct ceph_mon_client *monc = con->private; | ||
420 | struct ceph_mon_generic_request *req; | ||
421 | u64 tid = le64_to_cpu(hdr->tid); | ||
422 | struct ceph_msg *m; | ||
423 | |||
424 | mutex_lock(&monc->mutex); | ||
425 | req = __lookup_generic_req(monc, tid); | ||
426 | if (!req) { | ||
427 | dout("get_generic_reply %lld dne\n", tid); | ||
428 | *skip = 1; | ||
429 | m = NULL; | ||
430 | } else { | ||
431 | dout("get_generic_reply %lld got %p\n", tid, req->reply); | ||
432 | m = ceph_msg_get(req->reply); | ||
433 | /* | ||
434 | * we don't need to track the connection reading into | ||
435 | * this reply because we only have one open connection | ||
436 | * at a time, ever. | ||
437 | */ | ||
438 | } | ||
439 | mutex_unlock(&monc->mutex); | ||
440 | return m; | ||
394 | } | 441 | } |
395 | 442 | ||
396 | static void handle_statfs_reply(struct ceph_mon_client *monc, | 443 | static void handle_statfs_reply(struct ceph_mon_client *monc, |
397 | struct ceph_msg *msg) | 444 | struct ceph_msg *msg) |
398 | { | 445 | { |
399 | struct ceph_mon_statfs_request *req; | 446 | struct ceph_mon_generic_request *req; |
400 | struct ceph_mon_statfs_reply *reply = msg->front.iov_base; | 447 | struct ceph_mon_statfs_reply *reply = msg->front.iov_base; |
401 | u64 tid; | 448 | u64 tid = le64_to_cpu(msg->hdr.tid); |
402 | 449 | ||
403 | if (msg->front.iov_len != sizeof(*reply)) | 450 | if (msg->front.iov_len != sizeof(*reply)) |
404 | goto bad; | 451 | goto bad; |
405 | tid = le64_to_cpu(msg->hdr.tid); | ||
406 | dout("handle_statfs_reply %p tid %llu\n", msg, tid); | 452 | dout("handle_statfs_reply %p tid %llu\n", msg, tid); |
407 | 453 | ||
408 | mutex_lock(&monc->mutex); | 454 | mutex_lock(&monc->mutex); |
409 | req = __lookup_statfs(monc, tid); | 455 | req = __lookup_generic_req(monc, tid); |
410 | if (req) { | 456 | if (req) { |
411 | *req->buf = reply->st; | 457 | *(struct ceph_statfs *)req->buf = reply->st; |
412 | req->result = 0; | 458 | req->result = 0; |
459 | get_generic_request(req); | ||
413 | } | 460 | } |
414 | mutex_unlock(&monc->mutex); | 461 | mutex_unlock(&monc->mutex); |
415 | if (req) | 462 | if (req) { |
416 | complete(&req->completion); | 463 | complete(&req->completion); |
464 | put_generic_request(req); | ||
465 | } | ||
417 | return; | 466 | return; |
418 | 467 | ||
419 | bad: | 468 | bad: |
420 | pr_err("corrupt statfs reply, no tid\n"); | 469 | pr_err("corrupt generic reply, no tid\n"); |
421 | ceph_msg_dump(msg); | 470 | ceph_msg_dump(msg); |
422 | } | 471 | } |
423 | 472 | ||
424 | /* | 473 | /* |
425 | * (re)send a statfs request | 474 | * Do a synchronous statfs(). |
426 | */ | 475 | */ |
427 | static int send_statfs(struct ceph_mon_client *monc, | 476 | int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) |
428 | struct ceph_mon_statfs_request *req) | ||
429 | { | 477 | { |
430 | struct ceph_msg *msg; | 478 | struct ceph_mon_generic_request *req; |
431 | struct ceph_mon_statfs *h; | 479 | struct ceph_mon_statfs *h; |
480 | int err; | ||
432 | 481 | ||
433 | dout("send_statfs tid %llu\n", req->tid); | 482 | req = kzalloc(sizeof(*req), GFP_NOFS); |
434 | msg = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), 0, 0, NULL); | 483 | if (!req) |
435 | if (IS_ERR(msg)) | 484 | return -ENOMEM; |
436 | return PTR_ERR(msg); | 485 | |
437 | req->request = msg; | 486 | kref_init(&req->kref); |
438 | msg->hdr.tid = cpu_to_le64(req->tid); | 487 | req->buf = buf; |
439 | h = msg->front.iov_base; | 488 | init_completion(&req->completion); |
489 | |||
490 | err = -ENOMEM; | ||
491 | req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS); | ||
492 | if (!req->request) | ||
493 | goto out; | ||
494 | req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS); | ||
495 | if (!req->reply) | ||
496 | goto out; | ||
497 | |||
498 | /* fill out request */ | ||
499 | h = req->request->front.iov_base; | ||
440 | h->monhdr.have_version = 0; | 500 | h->monhdr.have_version = 0; |
441 | h->monhdr.session_mon = cpu_to_le16(-1); | 501 | h->monhdr.session_mon = cpu_to_le16(-1); |
442 | h->monhdr.session_mon_tid = 0; | 502 | h->monhdr.session_mon_tid = 0; |
443 | h->fsid = monc->monmap->fsid; | 503 | h->fsid = monc->monmap->fsid; |
444 | ceph_con_send(monc->con, msg); | ||
445 | return 0; | ||
446 | } | ||
447 | |||
448 | /* | ||
449 | * Do a synchronous statfs(). | ||
450 | */ | ||
451 | int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | ||
452 | { | ||
453 | struct ceph_mon_statfs_request req; | ||
454 | int err; | ||
455 | |||
456 | req.buf = buf; | ||
457 | init_completion(&req.completion); | ||
458 | |||
459 | /* allocate memory for reply */ | ||
460 | err = ceph_msgpool_resv(&monc->msgpool_statfs_reply, 1); | ||
461 | if (err) | ||
462 | return err; | ||
463 | 504 | ||
464 | /* register request */ | 505 | /* register request */ |
465 | mutex_lock(&monc->mutex); | 506 | mutex_lock(&monc->mutex); |
466 | req.tid = ++monc->last_tid; | 507 | req->tid = ++monc->last_tid; |
467 | req.last_attempt = jiffies; | 508 | req->request->hdr.tid = cpu_to_le64(req->tid); |
468 | req.delay = BASE_DELAY_INTERVAL; | 509 | __insert_generic_request(monc, req); |
469 | __insert_statfs(monc, &req); | 510 | monc->num_generic_requests++; |
470 | monc->num_statfs_requests++; | ||
471 | mutex_unlock(&monc->mutex); | 511 | mutex_unlock(&monc->mutex); |
472 | 512 | ||
473 | /* send request and wait */ | 513 | /* send request and wait */ |
474 | err = send_statfs(monc, &req); | 514 | ceph_con_send(monc->con, ceph_msg_get(req->request)); |
475 | if (!err) | 515 | err = wait_for_completion_interruptible(&req->completion); |
476 | err = wait_for_completion_interruptible(&req.completion); | ||
477 | 516 | ||
478 | mutex_lock(&monc->mutex); | 517 | mutex_lock(&monc->mutex); |
479 | rb_erase(&req.node, &monc->statfs_request_tree); | 518 | rb_erase(&req->node, &monc->generic_request_tree); |
480 | monc->num_statfs_requests--; | 519 | monc->num_generic_requests--; |
481 | ceph_msgpool_resv(&monc->msgpool_statfs_reply, -1); | ||
482 | mutex_unlock(&monc->mutex); | 520 | mutex_unlock(&monc->mutex); |
483 | 521 | ||
484 | if (!err) | 522 | if (!err) |
485 | err = req.result; | 523 | err = req->result; |
524 | |||
525 | out: | ||
526 | kref_put(&req->kref, release_generic_request); | ||
486 | return err; | 527 | return err; |
487 | } | 528 | } |
488 | 529 | ||
489 | /* | 530 | /* |
490 | * Resend pending statfs requests. | 531 | * Resend pending statfs requests. |
491 | */ | 532 | */ |
492 | static void __resend_statfs(struct ceph_mon_client *monc) | 533 | static void __resend_generic_request(struct ceph_mon_client *monc) |
493 | { | 534 | { |
494 | struct ceph_mon_statfs_request *req; | 535 | struct ceph_mon_generic_request *req; |
495 | struct rb_node *p; | 536 | struct rb_node *p; |
496 | 537 | ||
497 | for (p = rb_first(&monc->statfs_request_tree); p; p = rb_next(p)) { | 538 | for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) { |
498 | req = rb_entry(p, struct ceph_mon_statfs_request, node); | 539 | req = rb_entry(p, struct ceph_mon_generic_request, node); |
499 | send_statfs(monc, req); | 540 | ceph_con_revoke(monc->con, req->request); |
541 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | ||
500 | } | 542 | } |
501 | } | 543 | } |
502 | 544 | ||
@@ -586,26 +628,26 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
586 | CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | | 628 | CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | |
587 | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; | 629 | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; |
588 | 630 | ||
589 | /* msg pools */ | 631 | /* msgs */ |
590 | err = ceph_msgpool_init(&monc->msgpool_subscribe_ack, | 632 | err = -ENOMEM; |
591 | sizeof(struct ceph_mon_subscribe_ack), 1, false); | 633 | monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, |
592 | if (err < 0) | 634 | sizeof(struct ceph_mon_subscribe_ack), |
635 | GFP_NOFS); | ||
636 | if (!monc->m_subscribe_ack) | ||
593 | goto out_monmap; | 637 | goto out_monmap; |
594 | err = ceph_msgpool_init(&monc->msgpool_statfs_reply, | 638 | |
595 | sizeof(struct ceph_mon_statfs_reply), 0, false); | 639 | monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS); |
596 | if (err < 0) | 640 | if (!monc->m_subscribe) |
597 | goto out_pool1; | 641 | goto out_subscribe_ack; |
598 | err = ceph_msgpool_init(&monc->msgpool_auth_reply, 4096, 1, false); | 642 | |
599 | if (err < 0) | 643 | monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS); |
600 | goto out_pool2; | 644 | if (!monc->m_auth_reply) |
601 | 645 | goto out_subscribe; | |
602 | monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, 0, 0, NULL); | 646 | |
647 | monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS); | ||
603 | monc->pending_auth = 0; | 648 | monc->pending_auth = 0; |
604 | if (IS_ERR(monc->m_auth)) { | 649 | if (!monc->m_auth) |
605 | err = PTR_ERR(monc->m_auth); | 650 | goto out_auth_reply; |
606 | monc->m_auth = NULL; | ||
607 | goto out_pool3; | ||
608 | } | ||
609 | 651 | ||
610 | monc->cur_mon = -1; | 652 | monc->cur_mon = -1; |
611 | monc->hunting = true; | 653 | monc->hunting = true; |
@@ -613,8 +655,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
613 | monc->sub_sent = 0; | 655 | monc->sub_sent = 0; |
614 | 656 | ||
615 | INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); | 657 | INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); |
616 | monc->statfs_request_tree = RB_ROOT; | 658 | monc->generic_request_tree = RB_ROOT; |
617 | monc->num_statfs_requests = 0; | 659 | monc->num_generic_requests = 0; |
618 | monc->last_tid = 0; | 660 | monc->last_tid = 0; |
619 | 661 | ||
620 | monc->have_mdsmap = 0; | 662 | monc->have_mdsmap = 0; |
@@ -622,12 +664,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
622 | monc->want_next_osdmap = 1; | 664 | monc->want_next_osdmap = 1; |
623 | return 0; | 665 | return 0; |
624 | 666 | ||
625 | out_pool3: | 667 | out_auth_reply: |
626 | ceph_msgpool_destroy(&monc->msgpool_auth_reply); | 668 | ceph_msg_put(monc->m_auth_reply); |
627 | out_pool2: | 669 | out_subscribe: |
628 | ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); | 670 | ceph_msg_put(monc->m_subscribe); |
629 | out_pool1: | 671 | out_subscribe_ack: |
630 | ceph_msgpool_destroy(&monc->msgpool_statfs_reply); | 672 | ceph_msg_put(monc->m_subscribe_ack); |
631 | out_monmap: | 673 | out_monmap: |
632 | kfree(monc->monmap); | 674 | kfree(monc->monmap); |
633 | out: | 675 | out: |
@@ -651,9 +693,9 @@ void ceph_monc_stop(struct ceph_mon_client *monc) | |||
651 | ceph_auth_destroy(monc->auth); | 693 | ceph_auth_destroy(monc->auth); |
652 | 694 | ||
653 | ceph_msg_put(monc->m_auth); | 695 | ceph_msg_put(monc->m_auth); |
654 | ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); | 696 | ceph_msg_put(monc->m_auth_reply); |
655 | ceph_msgpool_destroy(&monc->msgpool_statfs_reply); | 697 | ceph_msg_put(monc->m_subscribe); |
656 | ceph_msgpool_destroy(&monc->msgpool_auth_reply); | 698 | ceph_msg_put(monc->m_subscribe_ack); |
657 | 699 | ||
658 | kfree(monc->monmap); | 700 | kfree(monc->monmap); |
659 | } | 701 | } |
@@ -681,7 +723,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc, | |||
681 | monc->client->msgr->inst.name.num = monc->auth->global_id; | 723 | monc->client->msgr->inst.name.num = monc->auth->global_id; |
682 | 724 | ||
683 | __send_subscribe(monc); | 725 | __send_subscribe(monc); |
684 | __resend_statfs(monc); | 726 | __resend_generic_request(monc); |
685 | } | 727 | } |
686 | mutex_unlock(&monc->mutex); | 728 | mutex_unlock(&monc->mutex); |
687 | } | 729 | } |
@@ -770,18 +812,17 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, | |||
770 | 812 | ||
771 | switch (type) { | 813 | switch (type) { |
772 | case CEPH_MSG_MON_SUBSCRIBE_ACK: | 814 | case CEPH_MSG_MON_SUBSCRIBE_ACK: |
773 | m = ceph_msgpool_get(&monc->msgpool_subscribe_ack, front_len); | 815 | m = ceph_msg_get(monc->m_subscribe_ack); |
774 | break; | 816 | break; |
775 | case CEPH_MSG_STATFS_REPLY: | 817 | case CEPH_MSG_STATFS_REPLY: |
776 | m = ceph_msgpool_get(&monc->msgpool_statfs_reply, front_len); | 818 | return get_generic_reply(con, hdr, skip); |
777 | break; | ||
778 | case CEPH_MSG_AUTH_REPLY: | 819 | case CEPH_MSG_AUTH_REPLY: |
779 | m = ceph_msgpool_get(&monc->msgpool_auth_reply, front_len); | 820 | m = ceph_msg_get(monc->m_auth_reply); |
780 | break; | 821 | break; |
781 | case CEPH_MSG_MON_MAP: | 822 | case CEPH_MSG_MON_MAP: |
782 | case CEPH_MSG_MDS_MAP: | 823 | case CEPH_MSG_MDS_MAP: |
783 | case CEPH_MSG_OSD_MAP: | 824 | case CEPH_MSG_OSD_MAP: |
784 | m = ceph_msg_new(type, front_len, 0, 0, NULL); | 825 | m = ceph_msg_new(type, front_len, GFP_NOFS); |
785 | break; | 826 | break; |
786 | } | 827 | } |
787 | 828 | ||
@@ -826,7 +867,7 @@ out: | |||
826 | mutex_unlock(&monc->mutex); | 867 | mutex_unlock(&monc->mutex); |
827 | } | 868 | } |
828 | 869 | ||
829 | const static struct ceph_connection_operations mon_con_ops = { | 870 | static const struct ceph_connection_operations mon_con_ops = { |
830 | .get = ceph_con_get, | 871 | .get = ceph_con_get, |
831 | .put = ceph_con_put, | 872 | .put = ceph_con_put, |
832 | .dispatch = dispatch, | 873 | .dispatch = dispatch, |
diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index b958ad5afa06..174d794321d0 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h | |||
@@ -2,10 +2,10 @@ | |||
2 | #define _FS_CEPH_MON_CLIENT_H | 2 | #define _FS_CEPH_MON_CLIENT_H |
3 | 3 | ||
4 | #include <linux/completion.h> | 4 | #include <linux/completion.h> |
5 | #include <linux/kref.h> | ||
5 | #include <linux/rbtree.h> | 6 | #include <linux/rbtree.h> |
6 | 7 | ||
7 | #include "messenger.h" | 8 | #include "messenger.h" |
8 | #include "msgpool.h" | ||
9 | 9 | ||
10 | struct ceph_client; | 10 | struct ceph_client; |
11 | struct ceph_mount_args; | 11 | struct ceph_mount_args; |
@@ -22,7 +22,7 @@ struct ceph_monmap { | |||
22 | }; | 22 | }; |
23 | 23 | ||
24 | struct ceph_mon_client; | 24 | struct ceph_mon_client; |
25 | struct ceph_mon_statfs_request; | 25 | struct ceph_mon_generic_request; |
26 | 26 | ||
27 | 27 | ||
28 | /* | 28 | /* |
@@ -40,17 +40,19 @@ struct ceph_mon_request { | |||
40 | }; | 40 | }; |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * statfs() is done a bit differently because we need to get data back | 43 | * ceph_mon_generic_request is being used for the statfs and poolop requests |
44 | * which are bening done a bit differently because we need to get data back | ||
44 | * to the caller | 45 | * to the caller |
45 | */ | 46 | */ |
46 | struct ceph_mon_statfs_request { | 47 | struct ceph_mon_generic_request { |
48 | struct kref kref; | ||
47 | u64 tid; | 49 | u64 tid; |
48 | struct rb_node node; | 50 | struct rb_node node; |
49 | int result; | 51 | int result; |
50 | struct ceph_statfs *buf; | 52 | void *buf; |
51 | struct completion completion; | 53 | struct completion completion; |
52 | unsigned long last_attempt, delay; /* jiffies */ | ||
53 | struct ceph_msg *request; /* original request */ | 54 | struct ceph_msg *request; /* original request */ |
55 | struct ceph_msg *reply; /* and reply */ | ||
54 | }; | 56 | }; |
55 | 57 | ||
56 | struct ceph_mon_client { | 58 | struct ceph_mon_client { |
@@ -61,7 +63,7 @@ struct ceph_mon_client { | |||
61 | struct delayed_work delayed_work; | 63 | struct delayed_work delayed_work; |
62 | 64 | ||
63 | struct ceph_auth_client *auth; | 65 | struct ceph_auth_client *auth; |
64 | struct ceph_msg *m_auth; | 66 | struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack; |
65 | int pending_auth; | 67 | int pending_auth; |
66 | 68 | ||
67 | bool hunting; | 69 | bool hunting; |
@@ -70,14 +72,9 @@ struct ceph_mon_client { | |||
70 | struct ceph_connection *con; | 72 | struct ceph_connection *con; |
71 | bool have_fsid; | 73 | bool have_fsid; |
72 | 74 | ||
73 | /* msg pools */ | 75 | /* pending generic requests */ |
74 | struct ceph_msgpool msgpool_subscribe_ack; | 76 | struct rb_root generic_request_tree; |
75 | struct ceph_msgpool msgpool_statfs_reply; | 77 | int num_generic_requests; |
76 | struct ceph_msgpool msgpool_auth_reply; | ||
77 | |||
78 | /* pending statfs requests */ | ||
79 | struct rb_root statfs_request_tree; | ||
80 | int num_statfs_requests; | ||
81 | u64 last_tid; | 78 | u64 last_tid; |
82 | 79 | ||
83 | /* mds/osd map */ | 80 | /* mds/osd map */ |
diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index ca3b44a89f2d..dd65a6438131 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c | |||
@@ -7,180 +7,58 @@ | |||
7 | 7 | ||
8 | #include "msgpool.h" | 8 | #include "msgpool.h" |
9 | 9 | ||
10 | /* | 10 | static void *alloc_fn(gfp_t gfp_mask, void *arg) |
11 | * We use msg pools to preallocate memory for messages we expect to | 11 | { |
12 | * receive over the wire, to avoid getting ourselves into OOM | 12 | struct ceph_msgpool *pool = arg; |
13 | * conditions at unexpected times. We take use a few different | 13 | void *p; |
14 | * strategies: | ||
15 | * | ||
16 | * - for request/response type interactions, we preallocate the | ||
17 | * memory needed for the response when we generate the request. | ||
18 | * | ||
19 | * - for messages we can receive at any time from the MDS, we preallocate | ||
20 | * a pool of messages we can re-use. | ||
21 | * | ||
22 | * - for writeback, we preallocate some number of messages to use for | ||
23 | * requests and their replies, so that we always make forward | ||
24 | * progress. | ||
25 | * | ||
26 | * The msgpool behaves like a mempool_t, but keeps preallocated | ||
27 | * ceph_msgs strung together on a list_head instead of using a pointer | ||
28 | * vector. This avoids vector reallocation when we adjust the number | ||
29 | * of preallocated items (which happens frequently). | ||
30 | */ | ||
31 | 14 | ||
15 | p = ceph_msg_new(0, pool->front_len, gfp_mask); | ||
16 | if (!p) | ||
17 | pr_err("msgpool %s alloc failed\n", pool->name); | ||
18 | return p; | ||
19 | } | ||
32 | 20 | ||
33 | /* | 21 | static void free_fn(void *element, void *arg) |
34 | * Allocate or release as necessary to meet our target pool size. | ||
35 | */ | ||
36 | static int __fill_msgpool(struct ceph_msgpool *pool) | ||
37 | { | 22 | { |
38 | struct ceph_msg *msg; | 23 | ceph_msg_put(element); |
39 | |||
40 | while (pool->num < pool->min) { | ||
41 | dout("fill_msgpool %p %d/%d allocating\n", pool, pool->num, | ||
42 | pool->min); | ||
43 | spin_unlock(&pool->lock); | ||
44 | msg = ceph_msg_new(0, pool->front_len, 0, 0, NULL); | ||
45 | spin_lock(&pool->lock); | ||
46 | if (IS_ERR(msg)) | ||
47 | return PTR_ERR(msg); | ||
48 | msg->pool = pool; | ||
49 | list_add(&msg->list_head, &pool->msgs); | ||
50 | pool->num++; | ||
51 | } | ||
52 | while (pool->num > pool->min) { | ||
53 | msg = list_first_entry(&pool->msgs, struct ceph_msg, list_head); | ||
54 | dout("fill_msgpool %p %d/%d releasing %p\n", pool, pool->num, | ||
55 | pool->min, msg); | ||
56 | list_del_init(&msg->list_head); | ||
57 | pool->num--; | ||
58 | ceph_msg_kfree(msg); | ||
59 | } | ||
60 | return 0; | ||
61 | } | 24 | } |
62 | 25 | ||
63 | int ceph_msgpool_init(struct ceph_msgpool *pool, | 26 | int ceph_msgpool_init(struct ceph_msgpool *pool, |
64 | int front_len, int min, bool blocking) | 27 | int front_len, int size, bool blocking, const char *name) |
65 | { | 28 | { |
66 | int ret; | ||
67 | |||
68 | dout("msgpool_init %p front_len %d min %d\n", pool, front_len, min); | ||
69 | spin_lock_init(&pool->lock); | ||
70 | pool->front_len = front_len; | 29 | pool->front_len = front_len; |
71 | INIT_LIST_HEAD(&pool->msgs); | 30 | pool->pool = mempool_create(size, alloc_fn, free_fn, pool); |
72 | pool->num = 0; | 31 | if (!pool->pool) |
73 | pool->min = min; | 32 | return -ENOMEM; |
74 | pool->blocking = blocking; | 33 | pool->name = name; |
75 | init_waitqueue_head(&pool->wait); | 34 | return 0; |
76 | |||
77 | spin_lock(&pool->lock); | ||
78 | ret = __fill_msgpool(pool); | ||
79 | spin_unlock(&pool->lock); | ||
80 | return ret; | ||
81 | } | 35 | } |
82 | 36 | ||
83 | void ceph_msgpool_destroy(struct ceph_msgpool *pool) | 37 | void ceph_msgpool_destroy(struct ceph_msgpool *pool) |
84 | { | 38 | { |
85 | dout("msgpool_destroy %p\n", pool); | 39 | mempool_destroy(pool->pool); |
86 | spin_lock(&pool->lock); | ||
87 | pool->min = 0; | ||
88 | __fill_msgpool(pool); | ||
89 | spin_unlock(&pool->lock); | ||
90 | } | 40 | } |
91 | 41 | ||
92 | int ceph_msgpool_resv(struct ceph_msgpool *pool, int delta) | 42 | struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, |
43 | int front_len) | ||
93 | { | 44 | { |
94 | int ret; | 45 | if (front_len > pool->front_len) { |
95 | 46 | pr_err("msgpool_get pool %s need front %d, pool size is %d\n", | |
96 | spin_lock(&pool->lock); | 47 | pool->name, front_len, pool->front_len); |
97 | dout("msgpool_resv %p delta %d\n", pool, delta); | ||
98 | pool->min += delta; | ||
99 | ret = __fill_msgpool(pool); | ||
100 | spin_unlock(&pool->lock); | ||
101 | return ret; | ||
102 | } | ||
103 | |||
104 | struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) | ||
105 | { | ||
106 | wait_queue_t wait; | ||
107 | struct ceph_msg *msg; | ||
108 | |||
109 | if (front_len && front_len > pool->front_len) { | ||
110 | pr_err("msgpool_get pool %p need front %d, pool size is %d\n", | ||
111 | pool, front_len, pool->front_len); | ||
112 | WARN_ON(1); | 48 | WARN_ON(1); |
113 | 49 | ||
114 | /* try to alloc a fresh message */ | 50 | /* try to alloc a fresh message */ |
115 | msg = ceph_msg_new(0, front_len, 0, 0, NULL); | 51 | return ceph_msg_new(0, front_len, GFP_NOFS); |
116 | if (!IS_ERR(msg)) | ||
117 | return msg; | ||
118 | } | ||
119 | |||
120 | if (!front_len) | ||
121 | front_len = pool->front_len; | ||
122 | |||
123 | if (pool->blocking) { | ||
124 | /* mempool_t behavior; first try to alloc */ | ||
125 | msg = ceph_msg_new(0, front_len, 0, 0, NULL); | ||
126 | if (!IS_ERR(msg)) | ||
127 | return msg; | ||
128 | } | 52 | } |
129 | 53 | ||
130 | while (1) { | 54 | return mempool_alloc(pool->pool, GFP_NOFS); |
131 | spin_lock(&pool->lock); | ||
132 | if (likely(pool->num)) { | ||
133 | msg = list_entry(pool->msgs.next, struct ceph_msg, | ||
134 | list_head); | ||
135 | list_del_init(&msg->list_head); | ||
136 | pool->num--; | ||
137 | dout("msgpool_get %p got %p, now %d/%d\n", pool, msg, | ||
138 | pool->num, pool->min); | ||
139 | spin_unlock(&pool->lock); | ||
140 | return msg; | ||
141 | } | ||
142 | pr_err("msgpool_get %p now %d/%d, %s\n", pool, pool->num, | ||
143 | pool->min, pool->blocking ? "waiting" : "may fail"); | ||
144 | spin_unlock(&pool->lock); | ||
145 | |||
146 | if (!pool->blocking) { | ||
147 | WARN_ON(1); | ||
148 | |||
149 | /* maybe we can allocate it now? */ | ||
150 | msg = ceph_msg_new(0, front_len, 0, 0, NULL); | ||
151 | if (!IS_ERR(msg)) | ||
152 | return msg; | ||
153 | |||
154 | pr_err("msgpool_get %p empty + alloc failed\n", pool); | ||
155 | return ERR_PTR(-ENOMEM); | ||
156 | } | ||
157 | |||
158 | init_wait(&wait); | ||
159 | prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); | ||
160 | schedule(); | ||
161 | finish_wait(&pool->wait, &wait); | ||
162 | } | ||
163 | } | 55 | } |
164 | 56 | ||
165 | void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) | 57 | void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) |
166 | { | 58 | { |
167 | spin_lock(&pool->lock); | 59 | /* reset msg front_len; user may have changed it */ |
168 | if (pool->num < pool->min) { | 60 | msg->front.iov_len = pool->front_len; |
169 | /* reset msg front_len; user may have changed it */ | 61 | msg->hdr.front_len = cpu_to_le32(pool->front_len); |
170 | msg->front.iov_len = pool->front_len; | ||
171 | msg->hdr.front_len = cpu_to_le32(pool->front_len); | ||
172 | 62 | ||
173 | kref_set(&msg->kref, 1); /* retake a single ref */ | 63 | kref_init(&msg->kref); /* retake single ref */ |
174 | list_add(&msg->list_head, &pool->msgs); | ||
175 | pool->num++; | ||
176 | dout("msgpool_put %p reclaim %p, now %d/%d\n", pool, msg, | ||
177 | pool->num, pool->min); | ||
178 | spin_unlock(&pool->lock); | ||
179 | wake_up(&pool->wait); | ||
180 | } else { | ||
181 | dout("msgpool_put %p drop %p, at %d/%d\n", pool, msg, | ||
182 | pool->num, pool->min); | ||
183 | spin_unlock(&pool->lock); | ||
184 | ceph_msg_kfree(msg); | ||
185 | } | ||
186 | } | 64 | } |
diff --git a/fs/ceph/msgpool.h b/fs/ceph/msgpool.h index bc834bfcd720..a362605f9368 100644 --- a/fs/ceph/msgpool.h +++ b/fs/ceph/msgpool.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef _FS_CEPH_MSGPOOL | 1 | #ifndef _FS_CEPH_MSGPOOL |
2 | #define _FS_CEPH_MSGPOOL | 2 | #define _FS_CEPH_MSGPOOL |
3 | 3 | ||
4 | #include <linux/mempool.h> | ||
4 | #include "messenger.h" | 5 | #include "messenger.h" |
5 | 6 | ||
6 | /* | 7 | /* |
@@ -8,18 +9,15 @@ | |||
8 | * avoid unexpected OOM conditions. | 9 | * avoid unexpected OOM conditions. |
9 | */ | 10 | */ |
10 | struct ceph_msgpool { | 11 | struct ceph_msgpool { |
11 | spinlock_t lock; | 12 | const char *name; |
13 | mempool_t *pool; | ||
12 | int front_len; /* preallocated payload size */ | 14 | int front_len; /* preallocated payload size */ |
13 | struct list_head msgs; /* msgs in the pool; each has 1 ref */ | ||
14 | int num, min; /* cur, min # msgs in the pool */ | ||
15 | bool blocking; | ||
16 | wait_queue_head_t wait; | ||
17 | }; | 15 | }; |
18 | 16 | ||
19 | extern int ceph_msgpool_init(struct ceph_msgpool *pool, | 17 | extern int ceph_msgpool_init(struct ceph_msgpool *pool, |
20 | int front_len, int size, bool blocking); | 18 | int front_len, int size, bool blocking, |
19 | const char *name); | ||
21 | extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); | 20 | extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); |
22 | extern int ceph_msgpool_resv(struct ceph_msgpool *, int delta); | ||
23 | extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, | 21 | extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, |
24 | int front_len); | 22 | int front_len); |
25 | extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); | 23 | extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); |
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h index 8aaab414f3f8..892a0298dfdf 100644 --- a/fs/ceph/msgr.h +++ b/fs/ceph/msgr.h | |||
@@ -50,7 +50,6 @@ struct ceph_entity_name { | |||
50 | #define CEPH_ENTITY_TYPE_MDS 0x02 | 50 | #define CEPH_ENTITY_TYPE_MDS 0x02 |
51 | #define CEPH_ENTITY_TYPE_OSD 0x04 | 51 | #define CEPH_ENTITY_TYPE_OSD 0x04 |
52 | #define CEPH_ENTITY_TYPE_CLIENT 0x08 | 52 | #define CEPH_ENTITY_TYPE_CLIENT 0x08 |
53 | #define CEPH_ENTITY_TYPE_ADMIN 0x10 | ||
54 | #define CEPH_ENTITY_TYPE_AUTH 0x20 | 53 | #define CEPH_ENTITY_TYPE_AUTH 0x20 |
55 | 54 | ||
56 | #define CEPH_ENTITY_TYPE_ANY 0xFF | 55 | #define CEPH_ENTITY_TYPE_ANY 0xFF |
@@ -120,7 +119,7 @@ struct ceph_msg_connect_reply { | |||
120 | /* | 119 | /* |
121 | * message header | 120 | * message header |
122 | */ | 121 | */ |
123 | struct ceph_msg_header { | 122 | struct ceph_msg_header_old { |
124 | __le64 seq; /* message seq# for this session */ | 123 | __le64 seq; /* message seq# for this session */ |
125 | __le64 tid; /* transaction id */ | 124 | __le64 tid; /* transaction id */ |
126 | __le16 type; /* message type */ | 125 | __le16 type; /* message type */ |
@@ -138,6 +137,24 @@ struct ceph_msg_header { | |||
138 | __le32 crc; /* header crc32c */ | 137 | __le32 crc; /* header crc32c */ |
139 | } __attribute__ ((packed)); | 138 | } __attribute__ ((packed)); |
140 | 139 | ||
140 | struct ceph_msg_header { | ||
141 | __le64 seq; /* message seq# for this session */ | ||
142 | __le64 tid; /* transaction id */ | ||
143 | __le16 type; /* message type */ | ||
144 | __le16 priority; /* priority. higher value == higher priority */ | ||
145 | __le16 version; /* version of message encoding */ | ||
146 | |||
147 | __le32 front_len; /* bytes in main payload */ | ||
148 | __le32 middle_len;/* bytes in middle payload */ | ||
149 | __le32 data_len; /* bytes of data payload */ | ||
150 | __le16 data_off; /* sender: include full offset; | ||
151 | receiver: mask against ~PAGE_MASK */ | ||
152 | |||
153 | struct ceph_entity_name src; | ||
154 | __le32 reserved; | ||
155 | __le32 crc; /* header crc32c */ | ||
156 | } __attribute__ ((packed)); | ||
157 | |||
141 | #define CEPH_MSG_PRIO_LOW 64 | 158 | #define CEPH_MSG_PRIO_LOW 64 |
142 | #define CEPH_MSG_PRIO_DEFAULT 127 | 159 | #define CEPH_MSG_PRIO_DEFAULT 127 |
143 | #define CEPH_MSG_PRIO_HIGH 196 | 160 | #define CEPH_MSG_PRIO_HIGH 196 |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 3514f71ff85f..afa7bb3895c4 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #define OSD_OP_FRONT_LEN 4096 | 16 | #define OSD_OP_FRONT_LEN 4096 |
17 | #define OSD_OPREPLY_FRONT_LEN 512 | 17 | #define OSD_OPREPLY_FRONT_LEN 512 |
18 | 18 | ||
19 | const static struct ceph_connection_operations osd_con_ops; | 19 | static const struct ceph_connection_operations osd_con_ops; |
20 | static int __kick_requests(struct ceph_osd_client *osdc, | 20 | static int __kick_requests(struct ceph_osd_client *osdc, |
21 | struct ceph_osd *kickosd); | 21 | struct ceph_osd *kickosd); |
22 | 22 | ||
@@ -147,7 +147,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
147 | req = kzalloc(sizeof(*req), GFP_NOFS); | 147 | req = kzalloc(sizeof(*req), GFP_NOFS); |
148 | } | 148 | } |
149 | if (req == NULL) | 149 | if (req == NULL) |
150 | return ERR_PTR(-ENOMEM); | 150 | return NULL; |
151 | 151 | ||
152 | req->r_osdc = osdc; | 152 | req->r_osdc = osdc; |
153 | req->r_mempool = use_mempool; | 153 | req->r_mempool = use_mempool; |
@@ -164,10 +164,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
164 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); | 164 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); |
165 | else | 165 | else |
166 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, | 166 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, |
167 | OSD_OPREPLY_FRONT_LEN, 0, 0, NULL); | 167 | OSD_OPREPLY_FRONT_LEN, GFP_NOFS); |
168 | if (IS_ERR(msg)) { | 168 | if (!msg) { |
169 | ceph_osdc_put_request(req); | 169 | ceph_osdc_put_request(req); |
170 | return ERR_PTR(PTR_ERR(msg)); | 170 | return NULL; |
171 | } | 171 | } |
172 | req->r_reply = msg; | 172 | req->r_reply = msg; |
173 | 173 | ||
@@ -178,10 +178,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
178 | if (use_mempool) | 178 | if (use_mempool) |
179 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); | 179 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); |
180 | else | 180 | else |
181 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, 0, 0, NULL); | 181 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS); |
182 | if (IS_ERR(msg)) { | 182 | if (!msg) { |
183 | ceph_osdc_put_request(req); | 183 | ceph_osdc_put_request(req); |
184 | return ERR_PTR(PTR_ERR(msg)); | 184 | return NULL; |
185 | } | 185 | } |
186 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); | 186 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); |
187 | memset(msg->front.iov_base, 0, msg->front.iov_len); | 187 | memset(msg->front.iov_base, 0, msg->front.iov_len); |
@@ -715,7 +715,7 @@ static void handle_timeout(struct work_struct *work) | |||
715 | * should mark the osd as failed and we should find out about | 715 | * should mark the osd as failed and we should find out about |
716 | * it from an updated osd map. | 716 | * it from an updated osd map. |
717 | */ | 717 | */ |
718 | while (!list_empty(&osdc->req_lru)) { | 718 | while (timeout && !list_empty(&osdc->req_lru)) { |
719 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, | 719 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, |
720 | r_req_lru_item); | 720 | r_req_lru_item); |
721 | 721 | ||
@@ -1078,6 +1078,7 @@ done: | |||
1078 | if (newmap) | 1078 | if (newmap) |
1079 | kick_requests(osdc, NULL); | 1079 | kick_requests(osdc, NULL); |
1080 | up_read(&osdc->map_sem); | 1080 | up_read(&osdc->map_sem); |
1081 | wake_up(&osdc->client->auth_wq); | ||
1081 | return; | 1082 | return; |
1082 | 1083 | ||
1083 | bad: | 1084 | bad: |
@@ -1087,45 +1088,6 @@ bad: | |||
1087 | return; | 1088 | return; |
1088 | } | 1089 | } |
1089 | 1090 | ||
1090 | |||
1091 | /* | ||
1092 | * A read request prepares specific pages that data is to be read into. | ||
1093 | * When a message is being read off the wire, we call prepare_pages to | ||
1094 | * find those pages. | ||
1095 | * 0 = success, -1 failure. | ||
1096 | */ | ||
1097 | static int __prepare_pages(struct ceph_connection *con, | ||
1098 | struct ceph_msg_header *hdr, | ||
1099 | struct ceph_osd_request *req, | ||
1100 | u64 tid, | ||
1101 | struct ceph_msg *m) | ||
1102 | { | ||
1103 | struct ceph_osd *osd = con->private; | ||
1104 | struct ceph_osd_client *osdc; | ||
1105 | int ret = -1; | ||
1106 | int data_len = le32_to_cpu(hdr->data_len); | ||
1107 | unsigned data_off = le16_to_cpu(hdr->data_off); | ||
1108 | |||
1109 | int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); | ||
1110 | |||
1111 | if (!osd) | ||
1112 | return -1; | ||
1113 | |||
1114 | osdc = osd->o_osdc; | ||
1115 | |||
1116 | dout("__prepare_pages on msg %p tid %llu, has %d pages, want %d\n", m, | ||
1117 | tid, req->r_num_pages, want); | ||
1118 | if (unlikely(req->r_num_pages < want)) | ||
1119 | goto out; | ||
1120 | m->pages = req->r_pages; | ||
1121 | m->nr_pages = req->r_num_pages; | ||
1122 | ret = 0; /* success */ | ||
1123 | out: | ||
1124 | BUG_ON(ret < 0 || m->nr_pages < want); | ||
1125 | |||
1126 | return ret; | ||
1127 | } | ||
1128 | |||
1129 | /* | 1091 | /* |
1130 | * Register request, send initial attempt. | 1092 | * Register request, send initial attempt. |
1131 | */ | 1093 | */ |
@@ -1252,11 +1214,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | |||
1252 | if (!osdc->req_mempool) | 1214 | if (!osdc->req_mempool) |
1253 | goto out; | 1215 | goto out; |
1254 | 1216 | ||
1255 | err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true); | 1217 | err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true, |
1218 | "osd_op"); | ||
1256 | if (err < 0) | 1219 | if (err < 0) |
1257 | goto out_mempool; | 1220 | goto out_mempool; |
1258 | err = ceph_msgpool_init(&osdc->msgpool_op_reply, | 1221 | err = ceph_msgpool_init(&osdc->msgpool_op_reply, |
1259 | OSD_OPREPLY_FRONT_LEN, 10, true); | 1222 | OSD_OPREPLY_FRONT_LEN, 10, true, |
1223 | "osd_op_reply"); | ||
1260 | if (err < 0) | 1224 | if (err < 0) |
1261 | goto out_msgpool; | 1225 | goto out_msgpool; |
1262 | return 0; | 1226 | return 0; |
@@ -1302,8 +1266,8 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
1302 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, | 1266 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, |
1303 | NULL, 0, truncate_seq, truncate_size, NULL, | 1267 | NULL, 0, truncate_seq, truncate_size, NULL, |
1304 | false, 1); | 1268 | false, 1); |
1305 | if (IS_ERR(req)) | 1269 | if (!req) |
1306 | return PTR_ERR(req); | 1270 | return -ENOMEM; |
1307 | 1271 | ||
1308 | /* it may be a short read due to an object boundary */ | 1272 | /* it may be a short read due to an object boundary */ |
1309 | req->r_pages = pages; | 1273 | req->r_pages = pages; |
@@ -1345,8 +1309,8 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
1345 | snapc, do_sync, | 1309 | snapc, do_sync, |
1346 | truncate_seq, truncate_size, mtime, | 1310 | truncate_seq, truncate_size, mtime, |
1347 | nofail, 1); | 1311 | nofail, 1); |
1348 | if (IS_ERR(req)) | 1312 | if (!req) |
1349 | return PTR_ERR(req); | 1313 | return -ENOMEM; |
1350 | 1314 | ||
1351 | /* it may be a short write due to an object boundary */ | 1315 | /* it may be a short write due to an object boundary */ |
1352 | req->r_pages = pages; | 1316 | req->r_pages = pages; |
@@ -1394,7 +1358,8 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
1394 | } | 1358 | } |
1395 | 1359 | ||
1396 | /* | 1360 | /* |
1397 | * lookup and return message for incoming reply | 1361 | * lookup and return message for incoming reply. set up reply message |
1362 | * pages. | ||
1398 | */ | 1363 | */ |
1399 | static struct ceph_msg *get_reply(struct ceph_connection *con, | 1364 | static struct ceph_msg *get_reply(struct ceph_connection *con, |
1400 | struct ceph_msg_header *hdr, | 1365 | struct ceph_msg_header *hdr, |
@@ -1407,7 +1372,6 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
1407 | int front = le32_to_cpu(hdr->front_len); | 1372 | int front = le32_to_cpu(hdr->front_len); |
1408 | int data_len = le32_to_cpu(hdr->data_len); | 1373 | int data_len = le32_to_cpu(hdr->data_len); |
1409 | u64 tid; | 1374 | u64 tid; |
1410 | int err; | ||
1411 | 1375 | ||
1412 | tid = le64_to_cpu(hdr->tid); | 1376 | tid = le64_to_cpu(hdr->tid); |
1413 | mutex_lock(&osdc->request_mutex); | 1377 | mutex_lock(&osdc->request_mutex); |
@@ -1425,13 +1389,14 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
1425 | req->r_reply, req->r_con_filling_msg); | 1389 | req->r_reply, req->r_con_filling_msg); |
1426 | ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); | 1390 | ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); |
1427 | ceph_con_put(req->r_con_filling_msg); | 1391 | ceph_con_put(req->r_con_filling_msg); |
1392 | req->r_con_filling_msg = NULL; | ||
1428 | } | 1393 | } |
1429 | 1394 | ||
1430 | if (front > req->r_reply->front.iov_len) { | 1395 | if (front > req->r_reply->front.iov_len) { |
1431 | pr_warning("get_reply front %d > preallocated %d\n", | 1396 | pr_warning("get_reply front %d > preallocated %d\n", |
1432 | front, (int)req->r_reply->front.iov_len); | 1397 | front, (int)req->r_reply->front.iov_len); |
1433 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, 0, 0, NULL); | 1398 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS); |
1434 | if (IS_ERR(m)) | 1399 | if (!m) |
1435 | goto out; | 1400 | goto out; |
1436 | ceph_msg_put(req->r_reply); | 1401 | ceph_msg_put(req->r_reply); |
1437 | req->r_reply = m; | 1402 | req->r_reply = m; |
@@ -1439,12 +1404,19 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
1439 | m = ceph_msg_get(req->r_reply); | 1404 | m = ceph_msg_get(req->r_reply); |
1440 | 1405 | ||
1441 | if (data_len > 0) { | 1406 | if (data_len > 0) { |
1442 | err = __prepare_pages(con, hdr, req, tid, m); | 1407 | unsigned data_off = le16_to_cpu(hdr->data_off); |
1443 | if (err < 0) { | 1408 | int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); |
1409 | |||
1410 | if (unlikely(req->r_num_pages < want)) { | ||
1411 | pr_warning("tid %lld reply %d > expected %d pages\n", | ||
1412 | tid, want, m->nr_pages); | ||
1444 | *skip = 1; | 1413 | *skip = 1; |
1445 | ceph_msg_put(m); | 1414 | ceph_msg_put(m); |
1446 | m = ERR_PTR(err); | 1415 | m = NULL; |
1416 | goto out; | ||
1447 | } | 1417 | } |
1418 | m->pages = req->r_pages; | ||
1419 | m->nr_pages = req->r_num_pages; | ||
1448 | } | 1420 | } |
1449 | *skip = 0; | 1421 | *skip = 0; |
1450 | req->r_con_filling_msg = ceph_con_get(con); | 1422 | req->r_con_filling_msg = ceph_con_get(con); |
@@ -1466,7 +1438,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, | |||
1466 | 1438 | ||
1467 | switch (type) { | 1439 | switch (type) { |
1468 | case CEPH_MSG_OSD_MAP: | 1440 | case CEPH_MSG_OSD_MAP: |
1469 | return ceph_msg_new(type, front, 0, 0, NULL); | 1441 | return ceph_msg_new(type, front, GFP_NOFS); |
1470 | case CEPH_MSG_OSD_OPREPLY: | 1442 | case CEPH_MSG_OSD_OPREPLY: |
1471 | return get_reply(con, hdr, skip); | 1443 | return get_reply(con, hdr, skip); |
1472 | default: | 1444 | default: |
@@ -1552,7 +1524,7 @@ static int invalidate_authorizer(struct ceph_connection *con) | |||
1552 | return ceph_monc_validate_auth(&osdc->client->monc); | 1524 | return ceph_monc_validate_auth(&osdc->client->monc); |
1553 | } | 1525 | } |
1554 | 1526 | ||
1555 | const static struct ceph_connection_operations osd_con_ops = { | 1527 | static const struct ceph_connection_operations osd_con_ops = { |
1556 | .get = get_osd_con, | 1528 | .get = get_osd_con, |
1557 | .put = put_osd_con, | 1529 | .put = put_osd_con, |
1558 | .dispatch = dispatch, | 1530 | .dispatch = dispatch, |
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c index 5f8dbf7c745a..b6859f47d364 100644 --- a/fs/ceph/pagelist.c +++ b/fs/ceph/pagelist.c | |||
@@ -20,7 +20,7 @@ int ceph_pagelist_release(struct ceph_pagelist *pl) | |||
20 | 20 | ||
21 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | 21 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) |
22 | { | 22 | { |
23 | struct page *page = alloc_page(GFP_NOFS); | 23 | struct page *page = __page_cache_alloc(GFP_NOFS); |
24 | if (!page) | 24 | if (!page) |
25 | return -ENOMEM; | 25 | return -ENOMEM; |
26 | pl->room += PAGE_SIZE; | 26 | pl->room += PAGE_SIZE; |
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index fd56451a871f..8fcc023056c7 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
@@ -101,8 +101,8 @@ struct ceph_pg_pool { | |||
101 | __le64 snap_seq; /* seq for per-pool snapshot */ | 101 | __le64 snap_seq; /* seq for per-pool snapshot */ |
102 | __le32 snap_epoch; /* epoch of last snap */ | 102 | __le32 snap_epoch; /* epoch of last snap */ |
103 | __le32 num_snaps; | 103 | __le32 num_snaps; |
104 | __le32 num_removed_snap_intervals; | 104 | __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ |
105 | __le64 uid; | 105 | __le64 auid; /* who owns the pg */ |
106 | } __attribute__ ((packed)); | 106 | } __attribute__ ((packed)); |
107 | 107 | ||
108 | /* | 108 | /* |
@@ -208,6 +208,7 @@ enum { | |||
208 | /* read */ | 208 | /* read */ |
209 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, | 209 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, |
210 | CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, | 210 | CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, |
211 | CEPH_OSD_OP_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3, | ||
211 | 212 | ||
212 | /* write */ | 213 | /* write */ |
213 | CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, | 214 | CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, |
@@ -305,6 +306,22 @@ enum { | |||
305 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ | 306 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ |
306 | #define EBLACKLISTED ESHUTDOWN /* blacklisted */ | 307 | #define EBLACKLISTED ESHUTDOWN /* blacklisted */ |
307 | 308 | ||
309 | /* xattr comparison */ | ||
310 | enum { | ||
311 | CEPH_OSD_CMPXATTR_OP_NOP = 0, | ||
312 | CEPH_OSD_CMPXATTR_OP_EQ = 1, | ||
313 | CEPH_OSD_CMPXATTR_OP_NE = 2, | ||
314 | CEPH_OSD_CMPXATTR_OP_GT = 3, | ||
315 | CEPH_OSD_CMPXATTR_OP_GTE = 4, | ||
316 | CEPH_OSD_CMPXATTR_OP_LT = 5, | ||
317 | CEPH_OSD_CMPXATTR_OP_LTE = 6 | ||
318 | }; | ||
319 | |||
320 | enum { | ||
321 | CEPH_OSD_CMPXATTR_MODE_STRING = 1, | ||
322 | CEPH_OSD_CMPXATTR_MODE_U64 = 2 | ||
323 | }; | ||
324 | |||
308 | /* | 325 | /* |
309 | * an individual object operation. each may be accompanied by some data | 326 | * an individual object operation. each may be accompanied by some data |
310 | * payload | 327 | * payload |
@@ -321,6 +338,8 @@ struct ceph_osd_op { | |||
321 | struct { | 338 | struct { |
322 | __le32 name_len; | 339 | __le32 name_len; |
323 | __le32 value_len; | 340 | __le32 value_len; |
341 | __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ | ||
342 | __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ | ||
324 | } __attribute__ ((packed)) xattr; | 343 | } __attribute__ ((packed)) xattr; |
325 | struct { | 344 | struct { |
326 | __u8 class_len; | 345 | __u8 class_len; |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index d5114db70453..c0b26b6badba 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -512,7 +512,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||
512 | struct ceph_cap_snap *capsnap) | 512 | struct ceph_cap_snap *capsnap) |
513 | { | 513 | { |
514 | struct inode *inode = &ci->vfs_inode; | 514 | struct inode *inode = &ci->vfs_inode; |
515 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 515 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
516 | 516 | ||
517 | BUG_ON(capsnap->writing); | 517 | BUG_ON(capsnap->writing); |
518 | capsnap->size = inode->i_size; | 518 | capsnap->size = inode->i_size; |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 110857ba9269..7c663d9b9f81 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -8,14 +8,11 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/mount.h> | 9 | #include <linux/mount.h> |
10 | #include <linux/parser.h> | 10 | #include <linux/parser.h> |
11 | #include <linux/rwsem.h> | ||
12 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
13 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
14 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
15 | #include <linux/statfs.h> | 14 | #include <linux/statfs.h> |
16 | #include <linux/string.h> | 15 | #include <linux/string.h> |
17 | #include <linux/version.h> | ||
18 | #include <linux/vmalloc.h> | ||
19 | 16 | ||
20 | #include "decode.h" | 17 | #include "decode.h" |
21 | #include "super.h" | 18 | #include "super.h" |
@@ -107,12 +104,40 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
107 | static int ceph_syncfs(struct super_block *sb, int wait) | 104 | static int ceph_syncfs(struct super_block *sb, int wait) |
108 | { | 105 | { |
109 | dout("sync_fs %d\n", wait); | 106 | dout("sync_fs %d\n", wait); |
110 | ceph_osdc_sync(&ceph_client(sb)->osdc); | 107 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); |
111 | ceph_mdsc_sync(&ceph_client(sb)->mdsc); | 108 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); |
112 | dout("sync_fs %d done\n", wait); | 109 | dout("sync_fs %d done\n", wait); |
113 | return 0; | 110 | return 0; |
114 | } | 111 | } |
115 | 112 | ||
113 | static int default_congestion_kb(void) | ||
114 | { | ||
115 | int congestion_kb; | ||
116 | |||
117 | /* | ||
118 | * Copied from NFS | ||
119 | * | ||
120 | * congestion size, scale with available memory. | ||
121 | * | ||
122 | * 64MB: 8192k | ||
123 | * 128MB: 11585k | ||
124 | * 256MB: 16384k | ||
125 | * 512MB: 23170k | ||
126 | * 1GB: 32768k | ||
127 | * 2GB: 46340k | ||
128 | * 4GB: 65536k | ||
129 | * 8GB: 92681k | ||
130 | * 16GB: 131072k | ||
131 | * | ||
132 | * This allows larger machines to have larger/more transfers. | ||
133 | * Limit the default to 256M | ||
134 | */ | ||
135 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
136 | if (congestion_kb > 256*1024) | ||
137 | congestion_kb = 256*1024; | ||
138 | |||
139 | return congestion_kb; | ||
140 | } | ||
116 | 141 | ||
117 | /** | 142 | /** |
118 | * ceph_show_options - Show mount options in /proc/mounts | 143 | * ceph_show_options - Show mount options in /proc/mounts |
@@ -138,6 +163,35 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
138 | seq_puts(m, ",nocrc"); | 163 | seq_puts(m, ",nocrc"); |
139 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) | 164 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) |
140 | seq_puts(m, ",noasyncreaddir"); | 165 | seq_puts(m, ",noasyncreaddir"); |
166 | |||
167 | if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
168 | seq_printf(m, ",mount_timeout=%d", args->mount_timeout); | ||
169 | if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
170 | seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl); | ||
171 | if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
172 | seq_printf(m, ",osdtimeout=%d", args->osd_timeout); | ||
173 | if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
174 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
175 | args->osd_keepalive_timeout); | ||
176 | if (args->wsize) | ||
177 | seq_printf(m, ",wsize=%d", args->wsize); | ||
178 | if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
179 | seq_printf(m, ",rsize=%d", args->rsize); | ||
180 | if (args->congestion_kb != default_congestion_kb()) | ||
181 | seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb); | ||
182 | if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
183 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
184 | args->caps_wanted_delay_min); | ||
185 | if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
186 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
187 | args->caps_wanted_delay_max); | ||
188 | if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
189 | seq_printf(m, ",cap_release_safety=%d", | ||
190 | args->cap_release_safety); | ||
191 | if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
192 | seq_printf(m, ",readdir_max_entries=%d", args->max_readdir); | ||
193 | if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
194 | seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes); | ||
141 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | 195 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) |
142 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); | 196 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); |
143 | if (args->name) | 197 | if (args->name) |
@@ -161,35 +215,6 @@ static void ceph_inode_init_once(void *foo) | |||
161 | inode_init_once(&ci->vfs_inode); | 215 | inode_init_once(&ci->vfs_inode); |
162 | } | 216 | } |
163 | 217 | ||
164 | static int default_congestion_kb(void) | ||
165 | { | ||
166 | int congestion_kb; | ||
167 | |||
168 | /* | ||
169 | * Copied from NFS | ||
170 | * | ||
171 | * congestion size, scale with available memory. | ||
172 | * | ||
173 | * 64MB: 8192k | ||
174 | * 128MB: 11585k | ||
175 | * 256MB: 16384k | ||
176 | * 512MB: 23170k | ||
177 | * 1GB: 32768k | ||
178 | * 2GB: 46340k | ||
179 | * 4GB: 65536k | ||
180 | * 8GB: 92681k | ||
181 | * 16GB: 131072k | ||
182 | * | ||
183 | * This allows larger machines to have larger/more transfers. | ||
184 | * Limit the default to 256M | ||
185 | */ | ||
186 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
187 | if (congestion_kb > 256*1024) | ||
188 | congestion_kb = 256*1024; | ||
189 | |||
190 | return congestion_kb; | ||
191 | } | ||
192 | |||
193 | static int __init init_caches(void) | 218 | static int __init init_caches(void) |
194 | { | 219 | { |
195 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", | 220 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", |
@@ -308,7 +333,9 @@ enum { | |||
308 | Opt_osd_idle_ttl, | 333 | Opt_osd_idle_ttl, |
309 | Opt_caps_wanted_delay_min, | 334 | Opt_caps_wanted_delay_min, |
310 | Opt_caps_wanted_delay_max, | 335 | Opt_caps_wanted_delay_max, |
336 | Opt_cap_release_safety, | ||
311 | Opt_readdir_max_entries, | 337 | Opt_readdir_max_entries, |
338 | Opt_readdir_max_bytes, | ||
312 | Opt_congestion_kb, | 339 | Opt_congestion_kb, |
313 | Opt_last_int, | 340 | Opt_last_int, |
314 | /* int args above */ | 341 | /* int args above */ |
@@ -339,7 +366,9 @@ static match_table_t arg_tokens = { | |||
339 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | 366 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, |
340 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, | 367 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, |
341 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, | 368 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, |
369 | {Opt_cap_release_safety, "cap_release_safety=%d"}, | ||
342 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, | 370 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, |
371 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | ||
343 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 372 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
344 | /* int args above */ | 373 | /* int args above */ |
345 | {Opt_snapdirname, "snapdirname=%s"}, | 374 | {Opt_snapdirname, "snapdirname=%s"}, |
@@ -388,8 +417,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
388 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | 417 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; |
389 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | 418 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; |
390 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | 419 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); |
391 | args->cap_release_safety = CEPH_CAPS_PER_RELEASE * 4; | 420 | args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; |
392 | args->max_readdir = 1024; | 421 | args->max_readdir = CEPH_MAX_READDIR_DEFAULT; |
422 | args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
393 | args->congestion_kb = default_congestion_kb(); | 423 | args->congestion_kb = default_congestion_kb(); |
394 | 424 | ||
395 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | 425 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ |
@@ -497,6 +527,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
497 | case Opt_readdir_max_entries: | 527 | case Opt_readdir_max_entries: |
498 | args->max_readdir = intval; | 528 | args->max_readdir = intval; |
499 | break; | 529 | break; |
530 | case Opt_readdir_max_bytes: | ||
531 | args->max_readdir_bytes = intval; | ||
532 | break; | ||
500 | case Opt_congestion_kb: | 533 | case Opt_congestion_kb: |
501 | args->congestion_kb = intval; | 534 | args->congestion_kb = intval; |
502 | break; | 535 | break; |
@@ -682,9 +715,10 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | |||
682 | /* | 715 | /* |
683 | * true if we have the mon map (and have thus joined the cluster) | 716 | * true if we have the mon map (and have thus joined the cluster) |
684 | */ | 717 | */ |
685 | static int have_mon_map(struct ceph_client *client) | 718 | static int have_mon_and_osd_map(struct ceph_client *client) |
686 | { | 719 | { |
687 | return client->monc.monmap && client->monc.monmap->epoch; | 720 | return client->monc.monmap && client->monc.monmap->epoch && |
721 | client->osdc.osdmap && client->osdc.osdmap->epoch; | ||
688 | } | 722 | } |
689 | 723 | ||
690 | /* | 724 | /* |
@@ -762,7 +796,7 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | |||
762 | if (err < 0) | 796 | if (err < 0) |
763 | goto out; | 797 | goto out; |
764 | 798 | ||
765 | while (!have_mon_map(client)) { | 799 | while (!have_mon_and_osd_map(client)) { |
766 | err = -EIO; | 800 | err = -EIO; |
767 | if (timeout && time_after_eq(jiffies, started + timeout)) | 801 | if (timeout && time_after_eq(jiffies, started + timeout)) |
768 | goto out; | 802 | goto out; |
@@ -770,8 +804,8 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | |||
770 | /* wait */ | 804 | /* wait */ |
771 | dout("mount waiting for mon_map\n"); | 805 | dout("mount waiting for mon_map\n"); |
772 | err = wait_event_interruptible_timeout(client->auth_wq, | 806 | err = wait_event_interruptible_timeout(client->auth_wq, |
773 | have_mon_map(client) || (client->auth_err < 0), | 807 | have_mon_and_osd_map(client) || (client->auth_err < 0), |
774 | timeout); | 808 | timeout); |
775 | if (err == -EINTR || err == -ERESTARTSYS) | 809 | if (err == -EINTR || err == -ERESTARTSYS) |
776 | goto out; | 810 | goto out; |
777 | if (client->auth_err < 0) { | 811 | if (client->auth_err < 0) { |
@@ -884,6 +918,8 @@ static int ceph_compare_super(struct super_block *sb, void *data) | |||
884 | /* | 918 | /* |
885 | * construct our own bdi so we can control readahead, etc. | 919 | * construct our own bdi so we can control readahead, etc. |
886 | */ | 920 | */ |
921 | static atomic_long_t bdi_seq = ATOMIC_INIT(0); | ||
922 | |||
887 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | 923 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) |
888 | { | 924 | { |
889 | int err; | 925 | int err; |
@@ -893,7 +929,8 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | |||
893 | client->backing_dev_info.ra_pages = | 929 | client->backing_dev_info.ra_pages = |
894 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) | 930 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) |
895 | >> PAGE_SHIFT; | 931 | >> PAGE_SHIFT; |
896 | err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); | 932 | err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d", |
933 | atomic_long_inc_return(&bdi_seq)); | ||
897 | if (!err) | 934 | if (!err) |
898 | sb->s_bdi = &client->backing_dev_info; | 935 | sb->s_bdi = &client->backing_dev_info; |
899 | return err; | 936 | return err; |
@@ -932,9 +969,9 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
932 | goto out; | 969 | goto out; |
933 | } | 970 | } |
934 | 971 | ||
935 | if (ceph_client(sb) != client) { | 972 | if (ceph_sb_to_client(sb) != client) { |
936 | ceph_destroy_client(client); | 973 | ceph_destroy_client(client); |
937 | client = ceph_client(sb); | 974 | client = ceph_sb_to_client(sb); |
938 | dout("get_sb got existing client %p\n", client); | 975 | dout("get_sb got existing client %p\n", client); |
939 | } else { | 976 | } else { |
940 | dout("get_sb using new client %p\n", client); | 977 | dout("get_sb using new client %p\n", client); |
@@ -952,8 +989,7 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
952 | 989 | ||
953 | out_splat: | 990 | out_splat: |
954 | ceph_mdsc_close_sessions(&client->mdsc); | 991 | ceph_mdsc_close_sessions(&client->mdsc); |
955 | up_write(&sb->s_umount); | 992 | deactivate_locked_super(sb); |
956 | deactivate_super(sb); | ||
957 | goto out_final; | 993 | goto out_final; |
958 | 994 | ||
959 | out: | 995 | out: |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 13513b80d87f..3725c9ee9d08 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -52,24 +52,25 @@ | |||
52 | 52 | ||
53 | struct ceph_mount_args { | 53 | struct ceph_mount_args { |
54 | int sb_flags; | 54 | int sb_flags; |
55 | int flags; | ||
56 | struct ceph_fsid fsid; | ||
57 | struct ceph_entity_addr my_addr; | ||
55 | int num_mon; | 58 | int num_mon; |
56 | struct ceph_entity_addr *mon_addr; | 59 | struct ceph_entity_addr *mon_addr; |
57 | int flags; | ||
58 | int mount_timeout; | 60 | int mount_timeout; |
59 | int osd_idle_ttl; | 61 | int osd_idle_ttl; |
60 | int caps_wanted_delay_min, caps_wanted_delay_max; | ||
61 | struct ceph_fsid fsid; | ||
62 | struct ceph_entity_addr my_addr; | ||
63 | int wsize; | ||
64 | int rsize; /* max readahead */ | ||
65 | int max_readdir; /* max readdir size */ | ||
66 | int congestion_kb; /* max readdir size */ | ||
67 | int osd_timeout; | 62 | int osd_timeout; |
68 | int osd_keepalive_timeout; | 63 | int osd_keepalive_timeout; |
64 | int wsize; | ||
65 | int rsize; /* max readahead */ | ||
66 | int congestion_kb; /* max writeback in flight */ | ||
67 | int caps_wanted_delay_min, caps_wanted_delay_max; | ||
68 | int cap_release_safety; | ||
69 | int max_readdir; /* max readdir result (entires) */ | ||
70 | int max_readdir_bytes; /* max readdir result (bytes) */ | ||
69 | char *snapdir_name; /* default ".snap" */ | 71 | char *snapdir_name; /* default ".snap" */ |
70 | char *name; | 72 | char *name; |
71 | char *secret; | 73 | char *secret; |
72 | int cap_release_safety; | ||
73 | }; | 74 | }; |
74 | 75 | ||
75 | /* | 76 | /* |
@@ -80,13 +81,14 @@ struct ceph_mount_args { | |||
80 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 | 81 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 |
81 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | 82 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 |
82 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ | 83 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ |
84 | #define CEPH_MAX_READDIR_DEFAULT 1024 | ||
85 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) | ||
83 | 86 | ||
84 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | 87 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) |
85 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) | 88 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) |
86 | 89 | ||
87 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" | 90 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" |
88 | #define CEPH_AUTH_NAME_DEFAULT "guest" | 91 | #define CEPH_AUTH_NAME_DEFAULT "guest" |
89 | |||
90 | /* | 92 | /* |
91 | * Delay telling the MDS we no longer want caps, in case we reopen | 93 | * Delay telling the MDS we no longer want caps, in case we reopen |
92 | * the file. Delay a minimum amount of time, even if we send a cap | 94 | * the file. Delay a minimum amount of time, even if we send a cap |
@@ -96,6 +98,7 @@ struct ceph_mount_args { | |||
96 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ | 98 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ |
97 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ | 99 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ |
98 | 100 | ||
101 | #define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) | ||
99 | 102 | ||
100 | /* mount state */ | 103 | /* mount state */ |
101 | enum { | 104 | enum { |
@@ -160,12 +163,6 @@ struct ceph_client { | |||
160 | #endif | 163 | #endif |
161 | }; | 164 | }; |
162 | 165 | ||
163 | static inline struct ceph_client *ceph_client(struct super_block *sb) | ||
164 | { | ||
165 | return sb->s_fs_info; | ||
166 | } | ||
167 | |||
168 | |||
169 | /* | 166 | /* |
170 | * File i/o capability. This tracks shared state with the metadata | 167 | * File i/o capability. This tracks shared state with the metadata |
171 | * server that allows us to cache or writeback attributes or to read | 168 | * server that allows us to cache or writeback attributes or to read |
@@ -871,6 +868,7 @@ extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | |||
871 | extern void ceph_dentry_lru_add(struct dentry *dn); | 868 | extern void ceph_dentry_lru_add(struct dentry *dn); |
872 | extern void ceph_dentry_lru_touch(struct dentry *dn); | 869 | extern void ceph_dentry_lru_touch(struct dentry *dn); |
873 | extern void ceph_dentry_lru_del(struct dentry *dn); | 870 | extern void ceph_dentry_lru_del(struct dentry *dn); |
871 | extern void ceph_invalidate_dentry_lease(struct dentry *dentry); | ||
874 | 872 | ||
875 | /* | 873 | /* |
876 | * our d_ops vary depending on whether the inode is live, | 874 | * our d_ops vary depending on whether the inode is live, |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 2845422907fc..68aeebc69681 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -7,7 +7,8 @@ | |||
7 | 7 | ||
8 | static bool ceph_is_valid_xattr(const char *name) | 8 | static bool ceph_is_valid_xattr(const char *name) |
9 | { | 9 | { |
10 | return !strncmp(name, XATTR_SECURITY_PREFIX, | 10 | return !strncmp(name, "ceph.", 5) || |
11 | !strncmp(name, XATTR_SECURITY_PREFIX, | ||
11 | XATTR_SECURITY_PREFIX_LEN) || | 12 | XATTR_SECURITY_PREFIX_LEN) || |
12 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || | 13 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || |
13 | !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); | 14 | !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); |
@@ -76,14 +77,14 @@ static size_t ceph_vxattrcb_rctime(struct ceph_inode_info *ci, char *val, | |||
76 | } | 77 | } |
77 | 78 | ||
78 | static struct ceph_vxattr_cb ceph_dir_vxattrs[] = { | 79 | static struct ceph_vxattr_cb ceph_dir_vxattrs[] = { |
79 | { true, "user.ceph.dir.entries", ceph_vxattrcb_entries}, | 80 | { true, "ceph.dir.entries", ceph_vxattrcb_entries}, |
80 | { true, "user.ceph.dir.files", ceph_vxattrcb_files}, | 81 | { true, "ceph.dir.files", ceph_vxattrcb_files}, |
81 | { true, "user.ceph.dir.subdirs", ceph_vxattrcb_subdirs}, | 82 | { true, "ceph.dir.subdirs", ceph_vxattrcb_subdirs}, |
82 | { true, "user.ceph.dir.rentries", ceph_vxattrcb_rentries}, | 83 | { true, "ceph.dir.rentries", ceph_vxattrcb_rentries}, |
83 | { true, "user.ceph.dir.rfiles", ceph_vxattrcb_rfiles}, | 84 | { true, "ceph.dir.rfiles", ceph_vxattrcb_rfiles}, |
84 | { true, "user.ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs}, | 85 | { true, "ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs}, |
85 | { true, "user.ceph.dir.rbytes", ceph_vxattrcb_rbytes}, | 86 | { true, "ceph.dir.rbytes", ceph_vxattrcb_rbytes}, |
86 | { true, "user.ceph.dir.rctime", ceph_vxattrcb_rctime}, | 87 | { true, "ceph.dir.rctime", ceph_vxattrcb_rctime}, |
87 | { true, NULL, NULL } | 88 | { true, NULL, NULL } |
88 | }; | 89 | }; |
89 | 90 | ||
@@ -107,7 +108,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, | |||
107 | } | 108 | } |
108 | 109 | ||
109 | static struct ceph_vxattr_cb ceph_file_vxattrs[] = { | 110 | static struct ceph_vxattr_cb ceph_file_vxattrs[] = { |
110 | { true, "user.ceph.layout", ceph_vxattrcb_layout}, | 111 | { true, "ceph.layout", ceph_vxattrcb_layout}, |
111 | { NULL, NULL } | 112 | { NULL, NULL } |
112 | }; | 113 | }; |
113 | 114 | ||
@@ -186,12 +187,6 @@ static int __set_xattr(struct ceph_inode_info *ci, | |||
186 | ci->i_xattrs.names_size -= xattr->name_len; | 187 | ci->i_xattrs.names_size -= xattr->name_len; |
187 | ci->i_xattrs.vals_size -= xattr->val_len; | 188 | ci->i_xattrs.vals_size -= xattr->val_len; |
188 | } | 189 | } |
189 | if (!xattr) { | ||
190 | pr_err("__set_xattr ENOMEM on %p %llx.%llx xattr %s=%s\n", | ||
191 | &ci->vfs_inode, ceph_vinop(&ci->vfs_inode), name, | ||
192 | xattr->val); | ||
193 | return -ENOMEM; | ||
194 | } | ||
195 | ci->i_xattrs.names_size += name_len; | 190 | ci->i_xattrs.names_size += name_len; |
196 | ci->i_xattrs.vals_size += val_len; | 191 | ci->i_xattrs.vals_size += val_len; |
197 | if (val) | 192 | if (val) |
@@ -574,7 +569,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) | |||
574 | ci->i_xattrs.version, ci->i_xattrs.index_version); | 569 | ci->i_xattrs.version, ci->i_xattrs.index_version); |
575 | 570 | ||
576 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && | 571 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && |
577 | (ci->i_xattrs.index_version > ci->i_xattrs.version)) { | 572 | (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { |
578 | goto list_xattr; | 573 | goto list_xattr; |
579 | } else { | 574 | } else { |
580 | spin_unlock(&inode->i_lock); | 575 | spin_unlock(&inode->i_lock); |
@@ -622,7 +617,7 @@ out: | |||
622 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | 617 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, |
623 | const char *value, size_t size, int flags) | 618 | const char *value, size_t size, int flags) |
624 | { | 619 | { |
625 | struct ceph_client *client = ceph_client(dentry->d_sb); | 620 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); |
626 | struct inode *inode = dentry->d_inode; | 621 | struct inode *inode = dentry->d_inode; |
627 | struct ceph_inode_info *ci = ceph_inode(inode); | 622 | struct ceph_inode_info *ci = ceph_inode(inode); |
628 | struct inode *parent_inode = dentry->d_parent->d_inode; | 623 | struct inode *parent_inode = dentry->d_parent->d_inode; |
@@ -641,7 +636,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | |||
641 | return -ENOMEM; | 636 | return -ENOMEM; |
642 | err = -ENOMEM; | 637 | err = -ENOMEM; |
643 | for (i = 0; i < nr_pages; i++) { | 638 | for (i = 0; i < nr_pages; i++) { |
644 | pages[i] = alloc_page(GFP_NOFS); | 639 | pages[i] = __page_cache_alloc(GFP_NOFS); |
645 | if (!pages[i]) { | 640 | if (!pages[i]) { |
646 | nr_pages = i; | 641 | nr_pages = i; |
647 | goto out; | 642 | goto out; |
@@ -779,7 +774,7 @@ out: | |||
779 | 774 | ||
780 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) | 775 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) |
781 | { | 776 | { |
782 | struct ceph_client *client = ceph_client(dentry->d_sb); | 777 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); |
783 | struct ceph_mds_client *mdsc = &client->mdsc; | 778 | struct ceph_mds_client *mdsc = &client->mdsc; |
784 | struct inode *inode = dentry->d_inode; | 779 | struct inode *inode = dentry->d_inode; |
785 | struct inode *parent_inode = dentry->d_parent->d_inode; | 780 | struct inode *parent_inode = dentry->d_parent->d_inode; |
diff --git a/fs/coda/file.c b/fs/coda/file.c index 4c813f2cdc52..7196077b1688 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c | |||
@@ -217,7 +217,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync) | |||
217 | BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); | 217 | BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); |
218 | host_file = cfi->cfi_container; | 218 | host_file = cfi->cfi_container; |
219 | 219 | ||
220 | err = vfs_fsync(host_file, host_file->f_path.dentry, datasync); | 220 | err = vfs_fsync(host_file, datasync); |
221 | if ( !err && !datasync ) { | 221 | if ( !err && !datasync ) { |
222 | lock_kernel(); | 222 | lock_kernel(); |
223 | err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode)); | 223 | err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode)); |
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index 773f2ce9aa06..ca25d96d45c9 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Pioctl operations for Coda. | 2 | * Pioctl operations for Coda. |
3 | * Original version: (C) 1996 Peter Braam | 3 | * Original version: (C) 1996 Peter Braam |
4 | * Rewritten for Linux 2.1: (C) 1997 Carnegie Mellon University | 4 | * Rewritten for Linux 2.1: (C) 1997 Carnegie Mellon University |
5 | * | 5 | * |
6 | * Carnegie Mellon encourages users of this code to contribute improvements | 6 | * Carnegie Mellon encourages users of this code to contribute improvements |
@@ -23,21 +23,22 @@ | |||
23 | #include <linux/coda_fs_i.h> | 23 | #include <linux/coda_fs_i.h> |
24 | #include <linux/coda_psdev.h> | 24 | #include <linux/coda_psdev.h> |
25 | 25 | ||
26 | #include <linux/smp_lock.h> | ||
27 | |||
26 | /* pioctl ops */ | 28 | /* pioctl ops */ |
27 | static int coda_ioctl_permission(struct inode *inode, int mask); | 29 | static int coda_ioctl_permission(struct inode *inode, int mask); |
28 | static int coda_pioctl(struct inode * inode, struct file * filp, | 30 | static long coda_pioctl(struct file *filp, unsigned int cmd, |
29 | unsigned int cmd, unsigned long user_data); | 31 | unsigned long user_data); |
30 | 32 | ||
31 | /* exported from this file */ | 33 | /* exported from this file */ |
32 | const struct inode_operations coda_ioctl_inode_operations = | 34 | const struct inode_operations coda_ioctl_inode_operations = { |
33 | { | ||
34 | .permission = coda_ioctl_permission, | 35 | .permission = coda_ioctl_permission, |
35 | .setattr = coda_setattr, | 36 | .setattr = coda_setattr, |
36 | }; | 37 | }; |
37 | 38 | ||
38 | const struct file_operations coda_ioctl_operations = { | 39 | const struct file_operations coda_ioctl_operations = { |
39 | .owner = THIS_MODULE, | 40 | .owner = THIS_MODULE, |
40 | .ioctl = coda_pioctl, | 41 | .unlocked_ioctl = coda_pioctl, |
41 | }; | 42 | }; |
42 | 43 | ||
43 | /* the coda pioctl inode ops */ | 44 | /* the coda pioctl inode ops */ |
@@ -46,48 +47,53 @@ static int coda_ioctl_permission(struct inode *inode, int mask) | |||
46 | return (mask & MAY_EXEC) ? -EACCES : 0; | 47 | return (mask & MAY_EXEC) ? -EACCES : 0; |
47 | } | 48 | } |
48 | 49 | ||
49 | static int coda_pioctl(struct inode * inode, struct file * filp, | 50 | static long coda_pioctl(struct file *filp, unsigned int cmd, |
50 | unsigned int cmd, unsigned long user_data) | 51 | unsigned long user_data) |
51 | { | 52 | { |
52 | struct path path; | 53 | struct path path; |
53 | int error; | 54 | int error; |
54 | struct PioctlData data; | 55 | struct PioctlData data; |
55 | struct inode *target_inode = NULL; | 56 | struct inode *inode = filp->f_dentry->d_inode; |
56 | struct coda_inode_info *cnp; | 57 | struct inode *target_inode = NULL; |
58 | struct coda_inode_info *cnp; | ||
57 | 59 | ||
58 | /* get the Pioctl data arguments from user space */ | 60 | lock_kernel(); |
59 | if (copy_from_user(&data, (void __user *)user_data, sizeof(data))) { | 61 | |
60 | return -EINVAL; | 62 | /* get the Pioctl data arguments from user space */ |
61 | } | 63 | if (copy_from_user(&data, (void __user *)user_data, sizeof(data))) { |
62 | 64 | error = -EINVAL; | |
63 | /* | 65 | goto out; |
64 | * Look up the pathname. Note that the pathname is in | ||
65 | * user memory, and namei takes care of this | ||
66 | */ | ||
67 | if (data.follow) { | ||
68 | error = user_path(data.path, &path); | ||
69 | } else { | ||
70 | error = user_lpath(data.path, &path); | ||
71 | } | 66 | } |
72 | 67 | ||
73 | if ( error ) { | 68 | /* |
74 | return error; | 69 | * Look up the pathname. Note that the pathname is in |
75 | } else { | 70 | * user memory, and namei takes care of this |
71 | */ | ||
72 | if (data.follow) | ||
73 | error = user_path(data.path, &path); | ||
74 | else | ||
75 | error = user_lpath(data.path, &path); | ||
76 | |||
77 | if (error) | ||
78 | goto out; | ||
79 | else | ||
76 | target_inode = path.dentry->d_inode; | 80 | target_inode = path.dentry->d_inode; |
77 | } | 81 | |
78 | |||
79 | /* return if it is not a Coda inode */ | 82 | /* return if it is not a Coda inode */ |
80 | if ( target_inode->i_sb != inode->i_sb ) { | 83 | if (target_inode->i_sb != inode->i_sb) { |
81 | path_put(&path); | 84 | path_put(&path); |
82 | return -EINVAL; | 85 | error = -EINVAL; |
86 | goto out; | ||
83 | } | 87 | } |
84 | 88 | ||
85 | /* now proceed to make the upcall */ | 89 | /* now proceed to make the upcall */ |
86 | cnp = ITOC(target_inode); | 90 | cnp = ITOC(target_inode); |
87 | 91 | ||
88 | error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data); | 92 | error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data); |
89 | 93 | ||
90 | path_put(&path); | 94 | path_put(&path); |
91 | return error; | ||
92 | } | ||
93 | 95 | ||
96 | out: | ||
97 | unlock_kernel(); | ||
98 | return error; | ||
99 | } | ||
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index be4392ca2098..66b9cf79c5ba 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c | |||
@@ -73,8 +73,7 @@ static unsigned int coda_psdev_poll(struct file *file, poll_table * wait) | |||
73 | return mask; | 73 | return mask; |
74 | } | 74 | } |
75 | 75 | ||
76 | static int coda_psdev_ioctl(struct inode * inode, struct file * filp, | 76 | static long coda_psdev_ioctl(struct file * filp, unsigned int cmd, unsigned long arg) |
77 | unsigned int cmd, unsigned long arg) | ||
78 | { | 77 | { |
79 | unsigned int data; | 78 | unsigned int data; |
80 | 79 | ||
@@ -344,7 +343,7 @@ static const struct file_operations coda_psdev_fops = { | |||
344 | .read = coda_psdev_read, | 343 | .read = coda_psdev_read, |
345 | .write = coda_psdev_write, | 344 | .write = coda_psdev_write, |
346 | .poll = coda_psdev_poll, | 345 | .poll = coda_psdev_poll, |
347 | .ioctl = coda_psdev_ioctl, | 346 | .unlocked_ioctl = coda_psdev_ioctl, |
348 | .open = coda_psdev_open, | 347 | .open = coda_psdev_open, |
349 | .release = coda_psdev_release, | 348 | .release = coda_psdev_release, |
350 | }; | 349 | }; |
diff --git a/fs/dcache.c b/fs/dcache.c index f1358e5c3a59..d96047b4a633 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -536,7 +536,7 @@ restart: | |||
536 | */ | 536 | */ |
537 | static void prune_dcache(int count) | 537 | static void prune_dcache(int count) |
538 | { | 538 | { |
539 | struct super_block *sb; | 539 | struct super_block *sb, *n; |
540 | int w_count; | 540 | int w_count; |
541 | int unused = dentry_stat.nr_unused; | 541 | int unused = dentry_stat.nr_unused; |
542 | int prune_ratio; | 542 | int prune_ratio; |
@@ -545,13 +545,14 @@ static void prune_dcache(int count) | |||
545 | if (unused == 0 || count == 0) | 545 | if (unused == 0 || count == 0) |
546 | return; | 546 | return; |
547 | spin_lock(&dcache_lock); | 547 | spin_lock(&dcache_lock); |
548 | restart: | ||
549 | if (count >= unused) | 548 | if (count >= unused) |
550 | prune_ratio = 1; | 549 | prune_ratio = 1; |
551 | else | 550 | else |
552 | prune_ratio = unused / count; | 551 | prune_ratio = unused / count; |
553 | spin_lock(&sb_lock); | 552 | spin_lock(&sb_lock); |
554 | list_for_each_entry(sb, &super_blocks, s_list) { | 553 | list_for_each_entry_safe(sb, n, &super_blocks, s_list) { |
554 | if (list_empty(&sb->s_instances)) | ||
555 | continue; | ||
555 | if (sb->s_nr_dentry_unused == 0) | 556 | if (sb->s_nr_dentry_unused == 0) |
556 | continue; | 557 | continue; |
557 | sb->s_count++; | 558 | sb->s_count++; |
@@ -590,14 +591,10 @@ restart: | |||
590 | } | 591 | } |
591 | spin_lock(&sb_lock); | 592 | spin_lock(&sb_lock); |
592 | count -= pruned; | 593 | count -= pruned; |
593 | /* | 594 | __put_super(sb); |
594 | * restart only when sb is no longer on the list and | 595 | /* more work left to do? */ |
595 | * we have more work to do. | 596 | if (count <= 0) |
596 | */ | 597 | break; |
597 | if (__put_super_and_need_restart(sb) && count > 0) { | ||
598 | spin_unlock(&sb_lock); | ||
599 | goto restart; | ||
600 | } | ||
601 | } | 598 | } |
602 | spin_unlock(&sb_lock); | 599 | spin_unlock(&sb_lock); |
603 | spin_unlock(&dcache_lock); | 600 | spin_unlock(&dcache_lock); |
@@ -1529,6 +1526,7 @@ void d_delete(struct dentry * dentry) | |||
1529 | spin_lock(&dentry->d_lock); | 1526 | spin_lock(&dentry->d_lock); |
1530 | isdir = S_ISDIR(dentry->d_inode->i_mode); | 1527 | isdir = S_ISDIR(dentry->d_inode->i_mode); |
1531 | if (atomic_read(&dentry->d_count) == 1) { | 1528 | if (atomic_read(&dentry->d_count) == 1) { |
1529 | dentry->d_flags &= ~DCACHE_CANT_MOUNT; | ||
1532 | dentry_iput(dentry); | 1530 | dentry_iput(dentry); |
1533 | fsnotify_nameremove(dentry, isdir); | 1531 | fsnotify_nameremove(dentry, isdir); |
1534 | return; | 1532 | return; |
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 0120247b41c0..8b3ffd5b5235 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
@@ -384,18 +384,15 @@ static int devpts_get_sb(struct file_system_type *fs_type, | |||
384 | s->s_flags |= MS_ACTIVE; | 384 | s->s_flags |= MS_ACTIVE; |
385 | } | 385 | } |
386 | 386 | ||
387 | simple_set_mnt(mnt, s); | ||
388 | |||
389 | memcpy(&(DEVPTS_SB(s))->mount_opts, &opts, sizeof(opts)); | 387 | memcpy(&(DEVPTS_SB(s))->mount_opts, &opts, sizeof(opts)); |
390 | 388 | ||
391 | error = mknod_ptmx(s); | 389 | error = mknod_ptmx(s); |
392 | if (error) | 390 | if (error) |
393 | goto out_dput; | 391 | goto out_undo_sget; |
394 | 392 | ||
395 | return 0; | 393 | simple_set_mnt(mnt, s); |
396 | 394 | ||
397 | out_dput: | 395 | return 0; |
398 | dput(s->s_root); /* undo dget() in simple_set_mnt() */ | ||
399 | 396 | ||
400 | out_undo_sget: | 397 | out_undo_sget: |
401 | deactivate_locked_super(s); | 398 | deactivate_locked_super(s); |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 31f4b0e6d72c..83c4f600786a 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -12,7 +12,7 @@ | |||
12 | /* A global variable is a bit ugly, but it keeps the code simple */ | 12 | /* A global variable is a bit ugly, but it keeps the code simple */ |
13 | int sysctl_drop_caches; | 13 | int sysctl_drop_caches; |
14 | 14 | ||
15 | static void drop_pagecache_sb(struct super_block *sb) | 15 | static void drop_pagecache_sb(struct super_block *sb, void *unused) |
16 | { | 16 | { |
17 | struct inode *inode, *toput_inode = NULL; | 17 | struct inode *inode, *toput_inode = NULL; |
18 | 18 | ||
@@ -33,26 +33,6 @@ static void drop_pagecache_sb(struct super_block *sb) | |||
33 | iput(toput_inode); | 33 | iput(toput_inode); |
34 | } | 34 | } |
35 | 35 | ||
36 | static void drop_pagecache(void) | ||
37 | { | ||
38 | struct super_block *sb; | ||
39 | |||
40 | spin_lock(&sb_lock); | ||
41 | restart: | ||
42 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
43 | sb->s_count++; | ||
44 | spin_unlock(&sb_lock); | ||
45 | down_read(&sb->s_umount); | ||
46 | if (sb->s_root) | ||
47 | drop_pagecache_sb(sb); | ||
48 | up_read(&sb->s_umount); | ||
49 | spin_lock(&sb_lock); | ||
50 | if (__put_super_and_need_restart(sb)) | ||
51 | goto restart; | ||
52 | } | ||
53 | spin_unlock(&sb_lock); | ||
54 | } | ||
55 | |||
56 | static void drop_slab(void) | 36 | static void drop_slab(void) |
57 | { | 37 | { |
58 | int nr_objects; | 38 | int nr_objects; |
@@ -68,7 +48,7 @@ int drop_caches_sysctl_handler(ctl_table *table, int write, | |||
68 | proc_dointvec_minmax(table, write, buffer, length, ppos); | 48 | proc_dointvec_minmax(table, write, buffer, length, ppos); |
69 | if (write) { | 49 | if (write) { |
70 | if (sysctl_drop_caches & 1) | 50 | if (sysctl_drop_caches & 1) |
71 | drop_pagecache(); | 51 | iterate_supers(drop_pagecache_sb, NULL); |
72 | if (sysctl_drop_caches & 2) | 52 | if (sysctl_drop_caches & 2) |
73 | drop_slab(); | 53 | drop_slab(); |
74 | } | 54 | } |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index bfc2e0f78f00..0032a9f5a3a9 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -731,15 +731,14 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, | |||
731 | int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, | 731 | int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, |
732 | struct page *page_for_lower, | 732 | struct page *page_for_lower, |
733 | size_t offset_in_page, size_t size); | 733 | size_t offset_in_page, size_t size); |
734 | int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | 734 | int ecryptfs_write(struct inode *inode, char *data, loff_t offset, size_t size); |
735 | size_t size); | ||
736 | int ecryptfs_read_lower(char *data, loff_t offset, size_t size, | 735 | int ecryptfs_read_lower(char *data, loff_t offset, size_t size, |
737 | struct inode *ecryptfs_inode); | 736 | struct inode *ecryptfs_inode); |
738 | int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, | 737 | int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, |
739 | pgoff_t page_index, | 738 | pgoff_t page_index, |
740 | size_t offset_in_page, size_t size, | 739 | size_t offset_in_page, size_t size, |
741 | struct inode *ecryptfs_inode); | 740 | struct inode *ecryptfs_inode); |
742 | struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); | 741 | struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index); |
743 | int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); | 742 | int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); |
744 | int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, | 743 | int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, |
745 | struct user_namespace *user_ns); | 744 | struct user_namespace *user_ns); |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index e7440a6f5ebf..3bdddbcc785f 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
@@ -276,9 +276,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file) | |||
276 | static int | 276 | static int |
277 | ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 277 | ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync) |
278 | { | 278 | { |
279 | return vfs_fsync(ecryptfs_file_to_lower(file), | 279 | return vfs_fsync(ecryptfs_file_to_lower(file), datasync); |
280 | ecryptfs_dentry_to_lower(dentry), | ||
281 | datasync); | ||
282 | } | 280 | } |
283 | 281 | ||
284 | static int ecryptfs_fasync(int fd, struct file *file, int flag) | 282 | static int ecryptfs_fasync(int fd, struct file *file, int flag) |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index e2d4418affac..65dee2f336ae 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -142,19 +142,10 @@ out: | |||
142 | static int grow_file(struct dentry *ecryptfs_dentry) | 142 | static int grow_file(struct dentry *ecryptfs_dentry) |
143 | { | 143 | { |
144 | struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode; | 144 | struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode; |
145 | struct file fake_file; | ||
146 | struct ecryptfs_file_info tmp_file_info; | ||
147 | char zero_virt[] = { 0x00 }; | 145 | char zero_virt[] = { 0x00 }; |
148 | int rc = 0; | 146 | int rc = 0; |
149 | 147 | ||
150 | memset(&fake_file, 0, sizeof(fake_file)); | 148 | rc = ecryptfs_write(ecryptfs_inode, zero_virt, 0, 1); |
151 | fake_file.f_path.dentry = ecryptfs_dentry; | ||
152 | memset(&tmp_file_info, 0, sizeof(tmp_file_info)); | ||
153 | ecryptfs_set_file_private(&fake_file, &tmp_file_info); | ||
154 | ecryptfs_set_file_lower( | ||
155 | &fake_file, | ||
156 | ecryptfs_inode_to_private(ecryptfs_inode)->lower_file); | ||
157 | rc = ecryptfs_write(&fake_file, zero_virt, 0, 1); | ||
158 | i_size_write(ecryptfs_inode, 0); | 149 | i_size_write(ecryptfs_inode, 0); |
159 | rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); | 150 | rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); |
160 | ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat.flags |= | 151 | ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat.flags |= |
@@ -784,8 +775,6 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
784 | { | 775 | { |
785 | int rc = 0; | 776 | int rc = 0; |
786 | struct inode *inode = dentry->d_inode; | 777 | struct inode *inode = dentry->d_inode; |
787 | struct dentry *lower_dentry; | ||
788 | struct file fake_ecryptfs_file; | ||
789 | struct ecryptfs_crypt_stat *crypt_stat; | 778 | struct ecryptfs_crypt_stat *crypt_stat; |
790 | loff_t i_size = i_size_read(inode); | 779 | loff_t i_size = i_size_read(inode); |
791 | loff_t lower_size_before_truncate; | 780 | loff_t lower_size_before_truncate; |
@@ -796,23 +785,6 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
796 | goto out; | 785 | goto out; |
797 | } | 786 | } |
798 | crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; | 787 | crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; |
799 | /* Set up a fake ecryptfs file, this is used to interface with | ||
800 | * the file in the underlying filesystem so that the | ||
801 | * truncation has an effect there as well. */ | ||
802 | memset(&fake_ecryptfs_file, 0, sizeof(fake_ecryptfs_file)); | ||
803 | fake_ecryptfs_file.f_path.dentry = dentry; | ||
804 | /* Released at out_free: label */ | ||
805 | ecryptfs_set_file_private(&fake_ecryptfs_file, | ||
806 | kmem_cache_alloc(ecryptfs_file_info_cache, | ||
807 | GFP_KERNEL)); | ||
808 | if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) { | ||
809 | rc = -ENOMEM; | ||
810 | goto out; | ||
811 | } | ||
812 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
813 | ecryptfs_set_file_lower( | ||
814 | &fake_ecryptfs_file, | ||
815 | ecryptfs_inode_to_private(dentry->d_inode)->lower_file); | ||
816 | /* Switch on growing or shrinking file */ | 788 | /* Switch on growing or shrinking file */ |
817 | if (ia->ia_size > i_size) { | 789 | if (ia->ia_size > i_size) { |
818 | char zero[] = { 0x00 }; | 790 | char zero[] = { 0x00 }; |
@@ -822,7 +794,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
822 | * this triggers code that will fill in 0's throughout | 794 | * this triggers code that will fill in 0's throughout |
823 | * the intermediate portion of the previous end of the | 795 | * the intermediate portion of the previous end of the |
824 | * file and the new and of the file */ | 796 | * file and the new and of the file */ |
825 | rc = ecryptfs_write(&fake_ecryptfs_file, zero, | 797 | rc = ecryptfs_write(inode, zero, |
826 | (ia->ia_size - 1), 1); | 798 | (ia->ia_size - 1), 1); |
827 | } else { /* ia->ia_size < i_size_read(inode) */ | 799 | } else { /* ia->ia_size < i_size_read(inode) */ |
828 | /* We're chopping off all the pages down to the page | 800 | /* We're chopping off all the pages down to the page |
@@ -835,10 +807,10 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
835 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | 807 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { |
836 | rc = vmtruncate(inode, ia->ia_size); | 808 | rc = vmtruncate(inode, ia->ia_size); |
837 | if (rc) | 809 | if (rc) |
838 | goto out_free; | 810 | goto out; |
839 | lower_ia->ia_size = ia->ia_size; | 811 | lower_ia->ia_size = ia->ia_size; |
840 | lower_ia->ia_valid |= ATTR_SIZE; | 812 | lower_ia->ia_valid |= ATTR_SIZE; |
841 | goto out_free; | 813 | goto out; |
842 | } | 814 | } |
843 | if (num_zeros) { | 815 | if (num_zeros) { |
844 | char *zeros_virt; | 816 | char *zeros_virt; |
@@ -846,16 +818,16 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
846 | zeros_virt = kzalloc(num_zeros, GFP_KERNEL); | 818 | zeros_virt = kzalloc(num_zeros, GFP_KERNEL); |
847 | if (!zeros_virt) { | 819 | if (!zeros_virt) { |
848 | rc = -ENOMEM; | 820 | rc = -ENOMEM; |
849 | goto out_free; | 821 | goto out; |
850 | } | 822 | } |
851 | rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt, | 823 | rc = ecryptfs_write(inode, zeros_virt, |
852 | ia->ia_size, num_zeros); | 824 | ia->ia_size, num_zeros); |
853 | kfree(zeros_virt); | 825 | kfree(zeros_virt); |
854 | if (rc) { | 826 | if (rc) { |
855 | printk(KERN_ERR "Error attempting to zero out " | 827 | printk(KERN_ERR "Error attempting to zero out " |
856 | "the remainder of the end page on " | 828 | "the remainder of the end page on " |
857 | "reducing truncate; rc = [%d]\n", rc); | 829 | "reducing truncate; rc = [%d]\n", rc); |
858 | goto out_free; | 830 | goto out; |
859 | } | 831 | } |
860 | } | 832 | } |
861 | vmtruncate(inode, ia->ia_size); | 833 | vmtruncate(inode, ia->ia_size); |
@@ -864,7 +836,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
864 | printk(KERN_ERR "Problem with " | 836 | printk(KERN_ERR "Problem with " |
865 | "ecryptfs_write_inode_size_to_metadata; " | 837 | "ecryptfs_write_inode_size_to_metadata; " |
866 | "rc = [%d]\n", rc); | 838 | "rc = [%d]\n", rc); |
867 | goto out_free; | 839 | goto out; |
868 | } | 840 | } |
869 | /* We are reducing the size of the ecryptfs file, and need to | 841 | /* We are reducing the size of the ecryptfs file, and need to |
870 | * know if we need to reduce the size of the lower file. */ | 842 | * know if we need to reduce the size of the lower file. */ |
@@ -878,10 +850,6 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
878 | } else | 850 | } else |
879 | lower_ia->ia_valid &= ~ATTR_SIZE; | 851 | lower_ia->ia_valid &= ~ATTR_SIZE; |
880 | } | 852 | } |
881 | out_free: | ||
882 | if (ecryptfs_file_to_private(&fake_ecryptfs_file)) | ||
883 | kmem_cache_free(ecryptfs_file_info_cache, | ||
884 | ecryptfs_file_to_private(&fake_ecryptfs_file)); | ||
885 | out: | 853 | out: |
886 | return rc; | 854 | return rc; |
887 | } | 855 | } |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 760983d0f25e..cbd4e18adb20 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -281,7 +281,7 @@ static void ecryptfs_init_mount_crypt_stat( | |||
281 | * | 281 | * |
282 | * Returns zero on success; non-zero on error | 282 | * Returns zero on success; non-zero on error |
283 | */ | 283 | */ |
284 | static int ecryptfs_parse_options(struct super_block *sb, char *options) | 284 | static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) |
285 | { | 285 | { |
286 | char *p; | 286 | char *p; |
287 | int rc = 0; | 287 | int rc = 0; |
@@ -293,7 +293,7 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) | |||
293 | int fn_cipher_key_bytes; | 293 | int fn_cipher_key_bytes; |
294 | int fn_cipher_key_bytes_set = 0; | 294 | int fn_cipher_key_bytes_set = 0; |
295 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = | 295 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = |
296 | &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; | 296 | &sbi->mount_crypt_stat; |
297 | substring_t args[MAX_OPT_ARGS]; | 297 | substring_t args[MAX_OPT_ARGS]; |
298 | int token; | 298 | int token; |
299 | char *sig_src; | 299 | char *sig_src; |
@@ -483,68 +483,7 @@ out: | |||
483 | } | 483 | } |
484 | 484 | ||
485 | struct kmem_cache *ecryptfs_sb_info_cache; | 485 | struct kmem_cache *ecryptfs_sb_info_cache; |
486 | 486 | static struct file_system_type ecryptfs_fs_type; | |
487 | /** | ||
488 | * ecryptfs_fill_super | ||
489 | * @sb: The ecryptfs super block | ||
490 | * @raw_data: The options passed to mount | ||
491 | * @silent: Not used but required by function prototype | ||
492 | * | ||
493 | * Sets up what we can of the sb, rest is done in ecryptfs_read_super | ||
494 | * | ||
495 | * Returns zero on success; non-zero otherwise | ||
496 | */ | ||
497 | static int | ||
498 | ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) | ||
499 | { | ||
500 | struct ecryptfs_sb_info *esi; | ||
501 | int rc = 0; | ||
502 | |||
503 | /* Released in ecryptfs_put_super() */ | ||
504 | ecryptfs_set_superblock_private(sb, | ||
505 | kmem_cache_zalloc(ecryptfs_sb_info_cache, | ||
506 | GFP_KERNEL)); | ||
507 | esi = ecryptfs_superblock_to_private(sb); | ||
508 | if (!esi) { | ||
509 | ecryptfs_printk(KERN_WARNING, "Out of memory\n"); | ||
510 | rc = -ENOMEM; | ||
511 | goto out; | ||
512 | } | ||
513 | |||
514 | rc = bdi_setup_and_register(&esi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); | ||
515 | if (rc) | ||
516 | goto out; | ||
517 | |||
518 | sb->s_bdi = &esi->bdi; | ||
519 | sb->s_op = &ecryptfs_sops; | ||
520 | /* Released through deactivate_super(sb) from get_sb_nodev */ | ||
521 | sb->s_root = d_alloc(NULL, &(const struct qstr) { | ||
522 | .hash = 0,.name = "/",.len = 1}); | ||
523 | if (!sb->s_root) { | ||
524 | ecryptfs_printk(KERN_ERR, "d_alloc failed\n"); | ||
525 | rc = -ENOMEM; | ||
526 | goto out; | ||
527 | } | ||
528 | sb->s_root->d_op = &ecryptfs_dops; | ||
529 | sb->s_root->d_sb = sb; | ||
530 | sb->s_root->d_parent = sb->s_root; | ||
531 | /* Released in d_release when dput(sb->s_root) is called */ | ||
532 | /* through deactivate_super(sb) from get_sb_nodev() */ | ||
533 | ecryptfs_set_dentry_private(sb->s_root, | ||
534 | kmem_cache_zalloc(ecryptfs_dentry_info_cache, | ||
535 | GFP_KERNEL)); | ||
536 | if (!ecryptfs_dentry_to_private(sb->s_root)) { | ||
537 | ecryptfs_printk(KERN_ERR, | ||
538 | "dentry_info_cache alloc failed\n"); | ||
539 | rc = -ENOMEM; | ||
540 | goto out; | ||
541 | } | ||
542 | rc = 0; | ||
543 | out: | ||
544 | /* Should be able to rely on deactivate_super called from | ||
545 | * get_sb_nodev */ | ||
546 | return rc; | ||
547 | } | ||
548 | 487 | ||
549 | /** | 488 | /** |
550 | * ecryptfs_read_super | 489 | * ecryptfs_read_super |
@@ -565,6 +504,13 @@ static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) | |||
565 | ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); | 504 | ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); |
566 | goto out; | 505 | goto out; |
567 | } | 506 | } |
507 | if (path.dentry->d_sb->s_type == &ecryptfs_fs_type) { | ||
508 | rc = -EINVAL; | ||
509 | printk(KERN_ERR "Mount on filesystem of type " | ||
510 | "eCryptfs explicitly disallowed due to " | ||
511 | "known incompatibilities\n"); | ||
512 | goto out_free; | ||
513 | } | ||
568 | ecryptfs_set_superblock_lower(sb, path.dentry->d_sb); | 514 | ecryptfs_set_superblock_lower(sb, path.dentry->d_sb); |
569 | sb->s_maxbytes = path.dentry->d_sb->s_maxbytes; | 515 | sb->s_maxbytes = path.dentry->d_sb->s_maxbytes; |
570 | sb->s_blocksize = path.dentry->d_sb->s_blocksize; | 516 | sb->s_blocksize = path.dentry->d_sb->s_blocksize; |
@@ -588,11 +534,8 @@ out: | |||
588 | * @dev_name: The path to mount over | 534 | * @dev_name: The path to mount over |
589 | * @raw_data: The options passed into the kernel | 535 | * @raw_data: The options passed into the kernel |
590 | * | 536 | * |
591 | * The whole ecryptfs_get_sb process is broken into 4 functions: | 537 | * The whole ecryptfs_get_sb process is broken into 3 functions: |
592 | * ecryptfs_parse_options(): handle options passed to ecryptfs, if any | 538 | * ecryptfs_parse_options(): handle options passed to ecryptfs, if any |
593 | * ecryptfs_fill_super(): used by get_sb_nodev, fills out the super_block | ||
594 | * with as much information as it can before needing | ||
595 | * the lower filesystem. | ||
596 | * ecryptfs_read_super(): this accesses the lower filesystem and uses | 539 | * ecryptfs_read_super(): this accesses the lower filesystem and uses |
597 | * ecryptfs_interpose to perform most of the linking | 540 | * ecryptfs_interpose to perform most of the linking |
598 | * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c) | 541 | * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c) |
@@ -601,30 +544,78 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, | |||
601 | const char *dev_name, void *raw_data, | 544 | const char *dev_name, void *raw_data, |
602 | struct vfsmount *mnt) | 545 | struct vfsmount *mnt) |
603 | { | 546 | { |
547 | struct super_block *s; | ||
548 | struct ecryptfs_sb_info *sbi; | ||
549 | struct ecryptfs_dentry_info *root_info; | ||
550 | const char *err = "Getting sb failed"; | ||
604 | int rc; | 551 | int rc; |
605 | struct super_block *sb; | ||
606 | 552 | ||
607 | rc = get_sb_nodev(fs_type, flags, raw_data, ecryptfs_fill_super, mnt); | 553 | sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL); |
608 | if (rc < 0) { | 554 | if (!sbi) { |
609 | printk(KERN_ERR "Getting sb failed; rc = [%d]\n", rc); | 555 | rc = -ENOMEM; |
610 | goto out; | 556 | goto out; |
611 | } | 557 | } |
612 | sb = mnt->mnt_sb; | 558 | |
613 | rc = ecryptfs_parse_options(sb, raw_data); | 559 | rc = ecryptfs_parse_options(sbi, raw_data); |
614 | if (rc) { | 560 | if (rc) { |
615 | printk(KERN_ERR "Error parsing options; rc = [%d]\n", rc); | 561 | err = "Error parsing options"; |
616 | goto out_abort; | 562 | goto out; |
563 | } | ||
564 | |||
565 | s = sget(fs_type, NULL, set_anon_super, NULL); | ||
566 | if (IS_ERR(s)) { | ||
567 | rc = PTR_ERR(s); | ||
568 | goto out; | ||
617 | } | 569 | } |
618 | rc = ecryptfs_read_super(sb, dev_name); | 570 | |
571 | s->s_flags = flags; | ||
572 | rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); | ||
619 | if (rc) { | 573 | if (rc) { |
620 | printk(KERN_ERR "Reading sb failed; rc = [%d]\n", rc); | 574 | deactivate_locked_super(s); |
621 | goto out_abort; | 575 | goto out; |
622 | } | 576 | } |
623 | goto out; | 577 | |
624 | out_abort: | 578 | ecryptfs_set_superblock_private(s, sbi); |
625 | dput(sb->s_root); /* aka mnt->mnt_root, as set by get_sb_nodev() */ | 579 | s->s_bdi = &sbi->bdi; |
626 | deactivate_locked_super(sb); | 580 | |
581 | /* ->kill_sb() will take care of sbi after that point */ | ||
582 | sbi = NULL; | ||
583 | s->s_op = &ecryptfs_sops; | ||
584 | |||
585 | rc = -ENOMEM; | ||
586 | s->s_root = d_alloc(NULL, &(const struct qstr) { | ||
587 | .hash = 0,.name = "/",.len = 1}); | ||
588 | if (!s->s_root) { | ||
589 | deactivate_locked_super(s); | ||
590 | goto out; | ||
591 | } | ||
592 | s->s_root->d_op = &ecryptfs_dops; | ||
593 | s->s_root->d_sb = s; | ||
594 | s->s_root->d_parent = s->s_root; | ||
595 | |||
596 | root_info = kmem_cache_zalloc(ecryptfs_dentry_info_cache, GFP_KERNEL); | ||
597 | if (!root_info) { | ||
598 | deactivate_locked_super(s); | ||
599 | goto out; | ||
600 | } | ||
601 | /* ->kill_sb() will take care of root_info */ | ||
602 | ecryptfs_set_dentry_private(s->s_root, root_info); | ||
603 | s->s_flags |= MS_ACTIVE; | ||
604 | rc = ecryptfs_read_super(s, dev_name); | ||
605 | if (rc) { | ||
606 | deactivate_locked_super(s); | ||
607 | err = "Reading sb failed"; | ||
608 | goto out; | ||
609 | } | ||
610 | simple_set_mnt(mnt, s); | ||
611 | return 0; | ||
612 | |||
627 | out: | 613 | out: |
614 | if (sbi) { | ||
615 | ecryptfs_destroy_mount_crypt_stat(&sbi->mount_crypt_stat); | ||
616 | kmem_cache_free(ecryptfs_sb_info_cache, sbi); | ||
617 | } | ||
618 | printk(KERN_ERR "%s; rc = [%d]\n", err, rc); | ||
628 | return rc; | 619 | return rc; |
629 | } | 620 | } |
630 | 621 | ||
@@ -633,11 +624,16 @@ out: | |||
633 | * @sb: The ecryptfs super block | 624 | * @sb: The ecryptfs super block |
634 | * | 625 | * |
635 | * Used to bring the superblock down and free the private data. | 626 | * Used to bring the superblock down and free the private data. |
636 | * Private data is free'd in ecryptfs_put_super() | ||
637 | */ | 627 | */ |
638 | static void ecryptfs_kill_block_super(struct super_block *sb) | 628 | static void ecryptfs_kill_block_super(struct super_block *sb) |
639 | { | 629 | { |
640 | generic_shutdown_super(sb); | 630 | struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); |
631 | kill_anon_super(sb); | ||
632 | if (!sb_info) | ||
633 | return; | ||
634 | ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); | ||
635 | bdi_destroy(&sb_info->bdi); | ||
636 | kmem_cache_free(ecryptfs_sb_info_cache, sb_info); | ||
641 | } | 637 | } |
642 | 638 | ||
643 | static struct file_system_type ecryptfs_fs_type = { | 639 | static struct file_system_type ecryptfs_fs_type = { |
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 2ee9a3a7b68c..b1d82756544b 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c | |||
@@ -44,17 +44,9 @@ | |||
44 | * Returns locked and up-to-date page (if ok), with increased | 44 | * Returns locked and up-to-date page (if ok), with increased |
45 | * refcnt. | 45 | * refcnt. |
46 | */ | 46 | */ |
47 | struct page *ecryptfs_get_locked_page(struct file *file, loff_t index) | 47 | struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index) |
48 | { | 48 | { |
49 | struct dentry *dentry; | 49 | struct page *page = read_mapping_page(inode->i_mapping, index, NULL); |
50 | struct inode *inode; | ||
51 | struct address_space *mapping; | ||
52 | struct page *page; | ||
53 | |||
54 | dentry = file->f_path.dentry; | ||
55 | inode = dentry->d_inode; | ||
56 | mapping = inode->i_mapping; | ||
57 | page = read_mapping_page(mapping, index, (void *)file); | ||
58 | if (!IS_ERR(page)) | 50 | if (!IS_ERR(page)) |
59 | lock_page(page); | 51 | lock_page(page); |
60 | return page; | 52 | return page; |
@@ -198,7 +190,7 @@ out: | |||
198 | static int ecryptfs_readpage(struct file *file, struct page *page) | 190 | static int ecryptfs_readpage(struct file *file, struct page *page) |
199 | { | 191 | { |
200 | struct ecryptfs_crypt_stat *crypt_stat = | 192 | struct ecryptfs_crypt_stat *crypt_stat = |
201 | &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; | 193 | &ecryptfs_inode_to_private(page->mapping->host)->crypt_stat; |
202 | int rc = 0; | 194 | int rc = 0; |
203 | 195 | ||
204 | if (!crypt_stat | 196 | if (!crypt_stat |
@@ -300,8 +292,7 @@ static int ecryptfs_write_begin(struct file *file, | |||
300 | 292 | ||
301 | if (!PageUptodate(page)) { | 293 | if (!PageUptodate(page)) { |
302 | struct ecryptfs_crypt_stat *crypt_stat = | 294 | struct ecryptfs_crypt_stat *crypt_stat = |
303 | &ecryptfs_inode_to_private( | 295 | &ecryptfs_inode_to_private(mapping->host)->crypt_stat; |
304 | file->f_path.dentry->d_inode)->crypt_stat; | ||
305 | 296 | ||
306 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) | 297 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) |
307 | || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { | 298 | || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { |
@@ -487,7 +478,7 @@ static int ecryptfs_write_end(struct file *file, | |||
487 | unsigned to = from + copied; | 478 | unsigned to = from + copied; |
488 | struct inode *ecryptfs_inode = mapping->host; | 479 | struct inode *ecryptfs_inode = mapping->host; |
489 | struct ecryptfs_crypt_stat *crypt_stat = | 480 | struct ecryptfs_crypt_stat *crypt_stat = |
490 | &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; | 481 | &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; |
491 | int rc; | 482 | int rc; |
492 | 483 | ||
493 | if (crypt_stat->flags & ECRYPTFS_NEW_FILE) { | 484 | if (crypt_stat->flags & ECRYPTFS_NEW_FILE) { |
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 0cc4fafd6552..db184ef15d3d 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c | |||
@@ -93,7 +93,7 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, | |||
93 | 93 | ||
94 | /** | 94 | /** |
95 | * ecryptfs_write | 95 | * ecryptfs_write |
96 | * @ecryptfs_file: The eCryptfs file into which to write | 96 | * @ecryptfs_inode: The eCryptfs file into which to write |
97 | * @data: Virtual address where data to write is located | 97 | * @data: Virtual address where data to write is located |
98 | * @offset: Offset in the eCryptfs file at which to begin writing the | 98 | * @offset: Offset in the eCryptfs file at which to begin writing the |
99 | * data from @data | 99 | * data from @data |
@@ -109,12 +109,11 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, | |||
109 | * | 109 | * |
110 | * Returns zero on success; non-zero otherwise | 110 | * Returns zero on success; non-zero otherwise |
111 | */ | 111 | */ |
112 | int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | 112 | int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, |
113 | size_t size) | 113 | size_t size) |
114 | { | 114 | { |
115 | struct page *ecryptfs_page; | 115 | struct page *ecryptfs_page; |
116 | struct ecryptfs_crypt_stat *crypt_stat; | 116 | struct ecryptfs_crypt_stat *crypt_stat; |
117 | struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode; | ||
118 | char *ecryptfs_page_virt; | 117 | char *ecryptfs_page_virt; |
119 | loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode); | 118 | loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode); |
120 | loff_t data_offset = 0; | 119 | loff_t data_offset = 0; |
@@ -145,7 +144,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | |||
145 | if (num_bytes > total_remaining_zeros) | 144 | if (num_bytes > total_remaining_zeros) |
146 | num_bytes = total_remaining_zeros; | 145 | num_bytes = total_remaining_zeros; |
147 | } | 146 | } |
148 | ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_file, | 147 | ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_inode, |
149 | ecryptfs_page_idx); | 148 | ecryptfs_page_idx); |
150 | if (IS_ERR(ecryptfs_page)) { | 149 | if (IS_ERR(ecryptfs_page)) { |
151 | rc = PTR_ERR(ecryptfs_page); | 150 | rc = PTR_ERR(ecryptfs_page); |
@@ -302,10 +301,10 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, | |||
302 | int ecryptfs_read(char *data, loff_t offset, size_t size, | 301 | int ecryptfs_read(char *data, loff_t offset, size_t size, |
303 | struct file *ecryptfs_file) | 302 | struct file *ecryptfs_file) |
304 | { | 303 | { |
304 | struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode; | ||
305 | struct page *ecryptfs_page; | 305 | struct page *ecryptfs_page; |
306 | char *ecryptfs_page_virt; | 306 | char *ecryptfs_page_virt; |
307 | loff_t ecryptfs_file_size = | 307 | loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode); |
308 | i_size_read(ecryptfs_file->f_dentry->d_inode); | ||
309 | loff_t data_offset = 0; | 308 | loff_t data_offset = 0; |
310 | loff_t pos; | 309 | loff_t pos; |
311 | int rc = 0; | 310 | int rc = 0; |
@@ -327,7 +326,7 @@ int ecryptfs_read(char *data, loff_t offset, size_t size, | |||
327 | 326 | ||
328 | if (num_bytes > total_remaining_bytes) | 327 | if (num_bytes > total_remaining_bytes) |
329 | num_bytes = total_remaining_bytes; | 328 | num_bytes = total_remaining_bytes; |
330 | ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_file, | 329 | ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_inode, |
331 | ecryptfs_page_idx); | 330 | ecryptfs_page_idx); |
332 | if (IS_ERR(ecryptfs_page)) { | 331 | if (IS_ERR(ecryptfs_page)) { |
333 | rc = PTR_ERR(ecryptfs_page); | 332 | rc = PTR_ERR(ecryptfs_page); |
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 0c0ae491d231..0435886e4a9f 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c | |||
@@ -109,27 +109,6 @@ void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode) | |||
109 | } | 109 | } |
110 | 110 | ||
111 | /** | 111 | /** |
112 | * ecryptfs_put_super | ||
113 | * @sb: Pointer to the ecryptfs super block | ||
114 | * | ||
115 | * Final actions when unmounting a file system. | ||
116 | * This will handle deallocation and release of our private data. | ||
117 | */ | ||
118 | static void ecryptfs_put_super(struct super_block *sb) | ||
119 | { | ||
120 | struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); | ||
121 | |||
122 | lock_kernel(); | ||
123 | |||
124 | ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); | ||
125 | bdi_destroy(&sb_info->bdi); | ||
126 | kmem_cache_free(ecryptfs_sb_info_cache, sb_info); | ||
127 | ecryptfs_set_superblock_private(sb, NULL); | ||
128 | |||
129 | unlock_kernel(); | ||
130 | } | ||
131 | |||
132 | /** | ||
133 | * ecryptfs_statfs | 112 | * ecryptfs_statfs |
134 | * @sb: The ecryptfs super block | 113 | * @sb: The ecryptfs super block |
135 | * @buf: The struct kstatfs to fill in with stats | 114 | * @buf: The struct kstatfs to fill in with stats |
@@ -203,7 +182,6 @@ const struct super_operations ecryptfs_sops = { | |||
203 | .alloc_inode = ecryptfs_alloc_inode, | 182 | .alloc_inode = ecryptfs_alloc_inode, |
204 | .destroy_inode = ecryptfs_destroy_inode, | 183 | .destroy_inode = ecryptfs_destroy_inode, |
205 | .drop_inode = generic_delete_inode, | 184 | .drop_inode = generic_delete_inode, |
206 | .put_super = ecryptfs_put_super, | ||
207 | .statfs = ecryptfs_statfs, | 185 | .statfs = ecryptfs_statfs, |
208 | .remount_fs = NULL, | 186 | .remount_fs = NULL, |
209 | .clear_inode = ecryptfs_clear_inode, | 187 | .clear_inode = ecryptfs_clear_inode, |
@@ -242,9 +242,10 @@ static int __bprm_mm_init(struct linux_binprm *bprm) | |||
242 | * use STACK_TOP because that can depend on attributes which aren't | 242 | * use STACK_TOP because that can depend on attributes which aren't |
243 | * configured yet. | 243 | * configured yet. |
244 | */ | 244 | */ |
245 | BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); | ||
245 | vma->vm_end = STACK_TOP_MAX; | 246 | vma->vm_end = STACK_TOP_MAX; |
246 | vma->vm_start = vma->vm_end - PAGE_SIZE; | 247 | vma->vm_start = vma->vm_end - PAGE_SIZE; |
247 | vma->vm_flags = VM_STACK_FLAGS; | 248 | vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; |
248 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); | 249 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); |
249 | INIT_LIST_HEAD(&vma->anon_vma_chain); | 250 | INIT_LIST_HEAD(&vma->anon_vma_chain); |
250 | err = insert_vm_struct(mm, vma); | 251 | err = insert_vm_struct(mm, vma); |
@@ -616,6 +617,7 @@ int setup_arg_pages(struct linux_binprm *bprm, | |||
616 | else if (executable_stack == EXSTACK_DISABLE_X) | 617 | else if (executable_stack == EXSTACK_DISABLE_X) |
617 | vm_flags &= ~VM_EXEC; | 618 | vm_flags &= ~VM_EXEC; |
618 | vm_flags |= mm->def_flags; | 619 | vm_flags |= mm->def_flags; |
620 | vm_flags |= VM_STACK_INCOMPLETE_SETUP; | ||
619 | 621 | ||
620 | ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end, | 622 | ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end, |
621 | vm_flags); | 623 | vm_flags); |
@@ -630,6 +632,9 @@ int setup_arg_pages(struct linux_binprm *bprm, | |||
630 | goto out_unlock; | 632 | goto out_unlock; |
631 | } | 633 | } |
632 | 634 | ||
635 | /* mprotect_fixup is overkill to remove the temporary stack flags */ | ||
636 | vma->vm_flags &= ~VM_STACK_INCOMPLETE_SETUP; | ||
637 | |||
633 | stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */ | 638 | stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */ |
634 | stack_size = vma->vm_end - vma->vm_start; | 639 | stack_size = vma->vm_end - vma->vm_start; |
635 | /* | 640 | /* |
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 4cfab1cc75c0..d91e9d829bc1 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c | |||
@@ -608,7 +608,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent) | |||
608 | de->inode_no = cpu_to_le64(parent->i_ino); | 608 | de->inode_no = cpu_to_le64(parent->i_ino); |
609 | memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR)); | 609 | memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR)); |
610 | exofs_set_de_type(de, inode); | 610 | exofs_set_de_type(de, inode); |
611 | kunmap_atomic(page, KM_USER0); | 611 | kunmap_atomic(kaddr, KM_USER0); |
612 | err = exofs_commit_chunk(page, 0, chunk_size); | 612 | err = exofs_commit_chunk(page, 0, chunk_size); |
613 | fail: | 613 | fail: |
614 | page_cache_release(page); | 614 | page_cache_release(page); |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 76d2a79ef93e..4bb6ef822e46 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -755,6 +755,21 @@ static int exofs_write_end(struct file *file, struct address_space *mapping, | |||
755 | return ret; | 755 | return ret; |
756 | } | 756 | } |
757 | 757 | ||
758 | static int exofs_releasepage(struct page *page, gfp_t gfp) | ||
759 | { | ||
760 | EXOFS_DBGMSG("page 0x%lx\n", page->index); | ||
761 | WARN_ON(1); | ||
762 | return try_to_free_buffers(page); | ||
763 | } | ||
764 | |||
765 | static void exofs_invalidatepage(struct page *page, unsigned long offset) | ||
766 | { | ||
767 | EXOFS_DBGMSG("page_has_buffers=>%d\n", page_has_buffers(page)); | ||
768 | WARN_ON(1); | ||
769 | |||
770 | block_invalidatepage(page, offset); | ||
771 | } | ||
772 | |||
758 | const struct address_space_operations exofs_aops = { | 773 | const struct address_space_operations exofs_aops = { |
759 | .readpage = exofs_readpage, | 774 | .readpage = exofs_readpage, |
760 | .readpages = exofs_readpages, | 775 | .readpages = exofs_readpages, |
@@ -762,6 +777,21 @@ const struct address_space_operations exofs_aops = { | |||
762 | .writepages = exofs_writepages, | 777 | .writepages = exofs_writepages, |
763 | .write_begin = exofs_write_begin_export, | 778 | .write_begin = exofs_write_begin_export, |
764 | .write_end = exofs_write_end, | 779 | .write_end = exofs_write_end, |
780 | .releasepage = exofs_releasepage, | ||
781 | .set_page_dirty = __set_page_dirty_nobuffers, | ||
782 | .invalidatepage = exofs_invalidatepage, | ||
783 | |||
784 | /* Not implemented Yet */ | ||
785 | .bmap = NULL, /* TODO: use osd's OSD_ACT_READ_MAP */ | ||
786 | .direct_IO = NULL, /* TODO: Should be trivial to do */ | ||
787 | |||
788 | /* With these NULL has special meaning or default is not exported */ | ||
789 | .sync_page = NULL, | ||
790 | .get_xip_mem = NULL, | ||
791 | .migratepage = NULL, | ||
792 | .launder_page = NULL, | ||
793 | .is_partially_uptodate = NULL, | ||
794 | .error_remove_page = NULL, | ||
765 | }; | 795 | }; |
766 | 796 | ||
767 | /****************************************************************************** | 797 | /****************************************************************************** |
@@ -1123,16 +1153,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1123 | sbi = sb->s_fs_info; | 1153 | sbi = sb->s_fs_info; |
1124 | 1154 | ||
1125 | sb->s_dirt = 1; | 1155 | sb->s_dirt = 1; |
1126 | inode->i_uid = current->cred->fsuid; | 1156 | inode_init_owner(inode, dir, mode); |
1127 | if (dir->i_mode & S_ISGID) { | ||
1128 | inode->i_gid = dir->i_gid; | ||
1129 | if (S_ISDIR(mode)) | ||
1130 | mode |= S_ISGID; | ||
1131 | } else { | ||
1132 | inode->i_gid = current->cred->fsgid; | ||
1133 | } | ||
1134 | inode->i_mode = mode; | ||
1135 | |||
1136 | inode->i_ino = sbi->s_nextid++; | 1157 | inode->i_ino = sbi->s_nextid++; |
1137 | inode->i_blkbits = EXOFS_BLKSHIFT; | 1158 | inode->i_blkbits = EXOFS_BLKSHIFT; |
1138 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 1159 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index a99e54318c3d..ca7e2a0ed98a 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c | |||
@@ -420,7 +420,7 @@ release_and_out: | |||
420 | return error; | 420 | return error; |
421 | } | 421 | } |
422 | 422 | ||
423 | struct xattr_handler ext2_xattr_acl_access_handler = { | 423 | const struct xattr_handler ext2_xattr_acl_access_handler = { |
424 | .prefix = POSIX_ACL_XATTR_ACCESS, | 424 | .prefix = POSIX_ACL_XATTR_ACCESS, |
425 | .flags = ACL_TYPE_ACCESS, | 425 | .flags = ACL_TYPE_ACCESS, |
426 | .list = ext2_xattr_list_acl_access, | 426 | .list = ext2_xattr_list_acl_access, |
@@ -428,7 +428,7 @@ struct xattr_handler ext2_xattr_acl_access_handler = { | |||
428 | .set = ext2_xattr_set_acl, | 428 | .set = ext2_xattr_set_acl, |
429 | }; | 429 | }; |
430 | 430 | ||
431 | struct xattr_handler ext2_xattr_acl_default_handler = { | 431 | const struct xattr_handler ext2_xattr_acl_default_handler = { |
432 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 432 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
433 | .flags = ACL_TYPE_DEFAULT, | 433 | .flags = ACL_TYPE_DEFAULT, |
434 | .list = ext2_xattr_list_acl_default, | 434 | .list = ext2_xattr_list_acl_default, |
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 3cf038c055d7..e8766a396776 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c | |||
@@ -1332,6 +1332,12 @@ retry_alloc: | |||
1332 | 1332 | ||
1333 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | 1333 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); |
1334 | /* | 1334 | /* |
1335 | * skip this group (and avoid loading bitmap) if there | ||
1336 | * are no free blocks | ||
1337 | */ | ||
1338 | if (!free_blocks) | ||
1339 | continue; | ||
1340 | /* | ||
1335 | * skip this group if the number of | 1341 | * skip this group if the number of |
1336 | * free blocks is less than half of the reservation | 1342 | * free blocks is less than half of the reservation |
1337 | * window size. | 1343 | * window size. |
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index ad7d572ee8dc..938dbc739d00 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c | |||
@@ -106,7 +106,7 @@ void ext2_free_inode (struct inode * inode) | |||
106 | struct super_block * sb = inode->i_sb; | 106 | struct super_block * sb = inode->i_sb; |
107 | int is_directory; | 107 | int is_directory; |
108 | unsigned long ino; | 108 | unsigned long ino; |
109 | struct buffer_head *bitmap_bh = NULL; | 109 | struct buffer_head *bitmap_bh; |
110 | unsigned long block_group; | 110 | unsigned long block_group; |
111 | unsigned long bit; | 111 | unsigned long bit; |
112 | struct ext2_super_block * es; | 112 | struct ext2_super_block * es; |
@@ -135,14 +135,13 @@ void ext2_free_inode (struct inode * inode) | |||
135 | ino > le32_to_cpu(es->s_inodes_count)) { | 135 | ino > le32_to_cpu(es->s_inodes_count)) { |
136 | ext2_error (sb, "ext2_free_inode", | 136 | ext2_error (sb, "ext2_free_inode", |
137 | "reserved or nonexistent inode %lu", ino); | 137 | "reserved or nonexistent inode %lu", ino); |
138 | goto error_return; | 138 | return; |
139 | } | 139 | } |
140 | block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); | 140 | block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); |
141 | bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb); | 141 | bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb); |
142 | brelse(bitmap_bh); | ||
143 | bitmap_bh = read_inode_bitmap(sb, block_group); | 142 | bitmap_bh = read_inode_bitmap(sb, block_group); |
144 | if (!bitmap_bh) | 143 | if (!bitmap_bh) |
145 | goto error_return; | 144 | return; |
146 | 145 | ||
147 | /* Ok, now we can actually update the inode bitmaps.. */ | 146 | /* Ok, now we can actually update the inode bitmaps.. */ |
148 | if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group), | 147 | if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group), |
@@ -154,7 +153,7 @@ void ext2_free_inode (struct inode * inode) | |||
154 | mark_buffer_dirty(bitmap_bh); | 153 | mark_buffer_dirty(bitmap_bh); |
155 | if (sb->s_flags & MS_SYNCHRONOUS) | 154 | if (sb->s_flags & MS_SYNCHRONOUS) |
156 | sync_dirty_buffer(bitmap_bh); | 155 | sync_dirty_buffer(bitmap_bh); |
157 | error_return: | 156 | |
158 | brelse(bitmap_bh); | 157 | brelse(bitmap_bh); |
159 | } | 158 | } |
160 | 159 | ||
@@ -550,16 +549,12 @@ got: | |||
550 | 549 | ||
551 | sb->s_dirt = 1; | 550 | sb->s_dirt = 1; |
552 | mark_buffer_dirty(bh2); | 551 | mark_buffer_dirty(bh2); |
553 | inode->i_uid = current_fsuid(); | 552 | if (test_opt(sb, GRPID)) { |
554 | if (test_opt (sb, GRPID)) | 553 | inode->i_mode = mode; |
555 | inode->i_gid = dir->i_gid; | 554 | inode->i_uid = current_fsuid(); |
556 | else if (dir->i_mode & S_ISGID) { | ||
557 | inode->i_gid = dir->i_gid; | 555 | inode->i_gid = dir->i_gid; |
558 | if (S_ISDIR(mode)) | ||
559 | mode |= S_ISGID; | ||
560 | } else | 556 | } else |
561 | inode->i_gid = current_fsgid(); | 557 | inode_init_owner(inode, dir, mode); |
562 | inode->i_mode = mode; | ||
563 | 558 | ||
564 | inode->i_ino = ino; | 559 | inode->i_ino = ino; |
565 | inode->i_blocks = 0; | 560 | inode->i_blocks = 0; |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index fc13cc119aad..527c46d9bc1f 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -22,7 +22,6 @@ | |||
22 | * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000 | 22 | * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000 |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/smp_lock.h> | ||
26 | #include <linux/time.h> | 25 | #include <linux/time.h> |
27 | #include <linux/highuid.h> | 26 | #include <linux/highuid.h> |
28 | #include <linux/pagemap.h> | 27 | #include <linux/pagemap.h> |
@@ -1406,11 +1405,11 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) | |||
1406 | /* If this is the first large file | 1405 | /* If this is the first large file |
1407 | * created, add a flag to the superblock. | 1406 | * created, add a flag to the superblock. |
1408 | */ | 1407 | */ |
1409 | lock_kernel(); | 1408 | spin_lock(&EXT2_SB(sb)->s_lock); |
1410 | ext2_update_dynamic_rev(sb); | 1409 | ext2_update_dynamic_rev(sb); |
1411 | EXT2_SET_RO_COMPAT_FEATURE(sb, | 1410 | EXT2_SET_RO_COMPAT_FEATURE(sb, |
1412 | EXT2_FEATURE_RO_COMPAT_LARGE_FILE); | 1411 | EXT2_FEATURE_RO_COMPAT_LARGE_FILE); |
1413 | unlock_kernel(); | 1412 | spin_unlock(&EXT2_SB(sb)->s_lock); |
1414 | ext2_write_super(sb); | 1413 | ext2_write_super(sb); |
1415 | } | 1414 | } |
1416 | } | 1415 | } |
@@ -1467,7 +1466,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr) | |||
1467 | if (error) | 1466 | if (error) |
1468 | return error; | 1467 | return error; |
1469 | 1468 | ||
1470 | if (iattr->ia_valid & ATTR_SIZE) | 1469 | if (is_quota_modification(inode, iattr)) |
1471 | dquot_initialize(inode); | 1470 | dquot_initialize(inode); |
1472 | if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || | 1471 | if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || |
1473 | (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { | 1472 | (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 42e4a303b675..71e9eb1fa696 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/random.h> | 26 | #include <linux/random.h> |
27 | #include <linux/buffer_head.h> | 27 | #include <linux/buffer_head.h> |
28 | #include <linux/exportfs.h> | 28 | #include <linux/exportfs.h> |
29 | #include <linux/smp_lock.h> | ||
30 | #include <linux/vfs.h> | 29 | #include <linux/vfs.h> |
31 | #include <linux/seq_file.h> | 30 | #include <linux/seq_file.h> |
32 | #include <linux/mount.h> | 31 | #include <linux/mount.h> |
@@ -39,7 +38,7 @@ | |||
39 | #include "xip.h" | 38 | #include "xip.h" |
40 | 39 | ||
41 | static void ext2_sync_super(struct super_block *sb, | 40 | static void ext2_sync_super(struct super_block *sb, |
42 | struct ext2_super_block *es); | 41 | struct ext2_super_block *es, int wait); |
43 | static int ext2_remount (struct super_block * sb, int * flags, char * data); | 42 | static int ext2_remount (struct super_block * sb, int * flags, char * data); |
44 | static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); | 43 | static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); |
45 | static int ext2_sync_fs(struct super_block *sb, int wait); | 44 | static int ext2_sync_fs(struct super_block *sb, int wait); |
@@ -52,9 +51,11 @@ void ext2_error (struct super_block * sb, const char * function, | |||
52 | struct ext2_super_block *es = sbi->s_es; | 51 | struct ext2_super_block *es = sbi->s_es; |
53 | 52 | ||
54 | if (!(sb->s_flags & MS_RDONLY)) { | 53 | if (!(sb->s_flags & MS_RDONLY)) { |
54 | spin_lock(&sbi->s_lock); | ||
55 | sbi->s_mount_state |= EXT2_ERROR_FS; | 55 | sbi->s_mount_state |= EXT2_ERROR_FS; |
56 | es->s_state |= cpu_to_le16(EXT2_ERROR_FS); | 56 | es->s_state |= cpu_to_le16(EXT2_ERROR_FS); |
57 | ext2_sync_super(sb, es); | 57 | spin_unlock(&sbi->s_lock); |
58 | ext2_sync_super(sb, es, 1); | ||
58 | } | 59 | } |
59 | 60 | ||
60 | va_start(args, fmt); | 61 | va_start(args, fmt); |
@@ -84,6 +85,9 @@ void ext2_msg(struct super_block *sb, const char *prefix, | |||
84 | va_end(args); | 85 | va_end(args); |
85 | } | 86 | } |
86 | 87 | ||
88 | /* | ||
89 | * This must be called with sbi->s_lock held. | ||
90 | */ | ||
87 | void ext2_update_dynamic_rev(struct super_block *sb) | 91 | void ext2_update_dynamic_rev(struct super_block *sb) |
88 | { | 92 | { |
89 | struct ext2_super_block *es = EXT2_SB(sb)->s_es; | 93 | struct ext2_super_block *es = EXT2_SB(sb)->s_es; |
@@ -115,8 +119,6 @@ static void ext2_put_super (struct super_block * sb) | |||
115 | int i; | 119 | int i; |
116 | struct ext2_sb_info *sbi = EXT2_SB(sb); | 120 | struct ext2_sb_info *sbi = EXT2_SB(sb); |
117 | 121 | ||
118 | lock_kernel(); | ||
119 | |||
120 | if (sb->s_dirt) | 122 | if (sb->s_dirt) |
121 | ext2_write_super(sb); | 123 | ext2_write_super(sb); |
122 | 124 | ||
@@ -124,8 +126,10 @@ static void ext2_put_super (struct super_block * sb) | |||
124 | if (!(sb->s_flags & MS_RDONLY)) { | 126 | if (!(sb->s_flags & MS_RDONLY)) { |
125 | struct ext2_super_block *es = sbi->s_es; | 127 | struct ext2_super_block *es = sbi->s_es; |
126 | 128 | ||
129 | spin_lock(&sbi->s_lock); | ||
127 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 130 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
128 | ext2_sync_super(sb, es); | 131 | spin_unlock(&sbi->s_lock); |
132 | ext2_sync_super(sb, es, 1); | ||
129 | } | 133 | } |
130 | db_count = sbi->s_gdb_count; | 134 | db_count = sbi->s_gdb_count; |
131 | for (i = 0; i < db_count; i++) | 135 | for (i = 0; i < db_count; i++) |
@@ -140,8 +144,6 @@ static void ext2_put_super (struct super_block * sb) | |||
140 | sb->s_fs_info = NULL; | 144 | sb->s_fs_info = NULL; |
141 | kfree(sbi->s_blockgroup_lock); | 145 | kfree(sbi->s_blockgroup_lock); |
142 | kfree(sbi); | 146 | kfree(sbi); |
143 | |||
144 | unlock_kernel(); | ||
145 | } | 147 | } |
146 | 148 | ||
147 | static struct kmem_cache * ext2_inode_cachep; | 149 | static struct kmem_cache * ext2_inode_cachep; |
@@ -209,6 +211,7 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
209 | struct ext2_super_block *es = sbi->s_es; | 211 | struct ext2_super_block *es = sbi->s_es; |
210 | unsigned long def_mount_opts; | 212 | unsigned long def_mount_opts; |
211 | 213 | ||
214 | spin_lock(&sbi->s_lock); | ||
212 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | 215 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); |
213 | 216 | ||
214 | if (sbi->s_sb_block != 1) | 217 | if (sbi->s_sb_block != 1) |
@@ -281,6 +284,7 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
281 | if (!test_opt(sb, RESERVATION)) | 284 | if (!test_opt(sb, RESERVATION)) |
282 | seq_puts(seq, ",noreservation"); | 285 | seq_puts(seq, ",noreservation"); |
283 | 286 | ||
287 | spin_unlock(&sbi->s_lock); | ||
284 | return 0; | 288 | return 0; |
285 | } | 289 | } |
286 | 290 | ||
@@ -606,7 +610,6 @@ static int ext2_setup_super (struct super_block * sb, | |||
606 | if (!le16_to_cpu(es->s_max_mnt_count)) | 610 | if (!le16_to_cpu(es->s_max_mnt_count)) |
607 | es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); | 611 | es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); |
608 | le16_add_cpu(&es->s_mnt_count, 1); | 612 | le16_add_cpu(&es->s_mnt_count, 1); |
609 | ext2_write_super(sb); | ||
610 | if (test_opt (sb, DEBUG)) | 613 | if (test_opt (sb, DEBUG)) |
611 | ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, " | 614 | ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, " |
612 | "bpg=%lu, ipg=%lu, mo=%04lx]", | 615 | "bpg=%lu, ipg=%lu, mo=%04lx]", |
@@ -767,6 +770,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) | |||
767 | sb->s_fs_info = sbi; | 770 | sb->s_fs_info = sbi; |
768 | sbi->s_sb_block = sb_block; | 771 | sbi->s_sb_block = sb_block; |
769 | 772 | ||
773 | spin_lock_init(&sbi->s_lock); | ||
774 | |||
770 | /* | 775 | /* |
771 | * See what the current blocksize for the device is, and | 776 | * See what the current blocksize for the device is, and |
772 | * use that as the blocksize. Otherwise (or if the blocksize | 777 | * use that as the blocksize. Otherwise (or if the blocksize |
@@ -1079,7 +1084,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) | |||
1079 | if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) | 1084 | if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) |
1080 | ext2_msg(sb, KERN_WARNING, | 1085 | ext2_msg(sb, KERN_WARNING, |
1081 | "warning: mounting ext3 filesystem as ext2"); | 1086 | "warning: mounting ext3 filesystem as ext2"); |
1082 | ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); | 1087 | if (ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY)) |
1088 | sb->s_flags |= MS_RDONLY; | ||
1089 | ext2_write_super(sb); | ||
1083 | return 0; | 1090 | return 0; |
1084 | 1091 | ||
1085 | cantfind_ext2: | 1092 | cantfind_ext2: |
@@ -1120,30 +1127,26 @@ static void ext2_clear_super_error(struct super_block *sb) | |||
1120 | * be remapped. Nothing we can do but to retry the | 1127 | * be remapped. Nothing we can do but to retry the |
1121 | * write and hope for the best. | 1128 | * write and hope for the best. |
1122 | */ | 1129 | */ |
1123 | printk(KERN_ERR "EXT2-fs: %s previous I/O error to " | 1130 | ext2_msg(sb, KERN_ERR, |
1124 | "superblock detected", sb->s_id); | 1131 | "previous I/O error to superblock detected\n"); |
1125 | clear_buffer_write_io_error(sbh); | 1132 | clear_buffer_write_io_error(sbh); |
1126 | set_buffer_uptodate(sbh); | 1133 | set_buffer_uptodate(sbh); |
1127 | } | 1134 | } |
1128 | } | 1135 | } |
1129 | 1136 | ||
1130 | static void ext2_commit_super (struct super_block * sb, | 1137 | static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, |
1131 | struct ext2_super_block * es) | 1138 | int wait) |
1132 | { | ||
1133 | ext2_clear_super_error(sb); | ||
1134 | es->s_wtime = cpu_to_le32(get_seconds()); | ||
1135 | mark_buffer_dirty(EXT2_SB(sb)->s_sbh); | ||
1136 | sb->s_dirt = 0; | ||
1137 | } | ||
1138 | |||
1139 | static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) | ||
1140 | { | 1139 | { |
1141 | ext2_clear_super_error(sb); | 1140 | ext2_clear_super_error(sb); |
1141 | spin_lock(&EXT2_SB(sb)->s_lock); | ||
1142 | es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); | 1142 | es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); |
1143 | es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); | 1143 | es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); |
1144 | es->s_wtime = cpu_to_le32(get_seconds()); | 1144 | es->s_wtime = cpu_to_le32(get_seconds()); |
1145 | /* unlock before we do IO */ | ||
1146 | spin_unlock(&EXT2_SB(sb)->s_lock); | ||
1145 | mark_buffer_dirty(EXT2_SB(sb)->s_sbh); | 1147 | mark_buffer_dirty(EXT2_SB(sb)->s_sbh); |
1146 | sync_dirty_buffer(EXT2_SB(sb)->s_sbh); | 1148 | if (wait) |
1149 | sync_dirty_buffer(EXT2_SB(sb)->s_sbh); | ||
1147 | sb->s_dirt = 0; | 1150 | sb->s_dirt = 0; |
1148 | } | 1151 | } |
1149 | 1152 | ||
@@ -1157,43 +1160,18 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) | |||
1157 | * may have been checked while mounted and e2fsck may have | 1160 | * may have been checked while mounted and e2fsck may have |
1158 | * set s_state to EXT2_VALID_FS after some corrections. | 1161 | * set s_state to EXT2_VALID_FS after some corrections. |
1159 | */ | 1162 | */ |
1160 | |||
1161 | static int ext2_sync_fs(struct super_block *sb, int wait) | 1163 | static int ext2_sync_fs(struct super_block *sb, int wait) |
1162 | { | 1164 | { |
1165 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
1163 | struct ext2_super_block *es = EXT2_SB(sb)->s_es; | 1166 | struct ext2_super_block *es = EXT2_SB(sb)->s_es; |
1164 | struct buffer_head *sbh = EXT2_SB(sb)->s_sbh; | ||
1165 | |||
1166 | lock_kernel(); | ||
1167 | if (buffer_write_io_error(sbh)) { | ||
1168 | /* | ||
1169 | * Oh, dear. A previous attempt to write the | ||
1170 | * superblock failed. This could happen because the | ||
1171 | * USB device was yanked out. Or it could happen to | ||
1172 | * be a transient write error and maybe the block will | ||
1173 | * be remapped. Nothing we can do but to retry the | ||
1174 | * write and hope for the best. | ||
1175 | */ | ||
1176 | ext2_msg(sb, KERN_ERR, | ||
1177 | "previous I/O error to superblock detected\n"); | ||
1178 | clear_buffer_write_io_error(sbh); | ||
1179 | set_buffer_uptodate(sbh); | ||
1180 | } | ||
1181 | 1167 | ||
1168 | spin_lock(&sbi->s_lock); | ||
1182 | if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { | 1169 | if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { |
1183 | ext2_debug("setting valid to 0\n"); | 1170 | ext2_debug("setting valid to 0\n"); |
1184 | es->s_state &= cpu_to_le16(~EXT2_VALID_FS); | 1171 | es->s_state &= cpu_to_le16(~EXT2_VALID_FS); |
1185 | es->s_free_blocks_count = | ||
1186 | cpu_to_le32(ext2_count_free_blocks(sb)); | ||
1187 | es->s_free_inodes_count = | ||
1188 | cpu_to_le32(ext2_count_free_inodes(sb)); | ||
1189 | es->s_mtime = cpu_to_le32(get_seconds()); | ||
1190 | ext2_sync_super(sb, es); | ||
1191 | } else { | ||
1192 | ext2_commit_super(sb, es); | ||
1193 | } | 1172 | } |
1194 | sb->s_dirt = 0; | 1173 | spin_unlock(&sbi->s_lock); |
1195 | unlock_kernel(); | 1174 | ext2_sync_super(sb, es, wait); |
1196 | |||
1197 | return 0; | 1175 | return 0; |
1198 | } | 1176 | } |
1199 | 1177 | ||
@@ -1215,7 +1193,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1215 | unsigned long old_sb_flags; | 1193 | unsigned long old_sb_flags; |
1216 | int err; | 1194 | int err; |
1217 | 1195 | ||
1218 | lock_kernel(); | 1196 | spin_lock(&sbi->s_lock); |
1219 | 1197 | ||
1220 | /* Store the old options */ | 1198 | /* Store the old options */ |
1221 | old_sb_flags = sb->s_flags; | 1199 | old_sb_flags = sb->s_flags; |
@@ -1254,13 +1232,13 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1254 | sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; | 1232 | sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; |
1255 | } | 1233 | } |
1256 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { | 1234 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { |
1257 | unlock_kernel(); | 1235 | spin_unlock(&sbi->s_lock); |
1258 | return 0; | 1236 | return 0; |
1259 | } | 1237 | } |
1260 | if (*flags & MS_RDONLY) { | 1238 | if (*flags & MS_RDONLY) { |
1261 | if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || | 1239 | if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || |
1262 | !(sbi->s_mount_state & EXT2_VALID_FS)) { | 1240 | !(sbi->s_mount_state & EXT2_VALID_FS)) { |
1263 | unlock_kernel(); | 1241 | spin_unlock(&sbi->s_lock); |
1264 | return 0; | 1242 | return 0; |
1265 | } | 1243 | } |
1266 | /* | 1244 | /* |
@@ -1269,6 +1247,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1269 | */ | 1247 | */ |
1270 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 1248 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
1271 | es->s_mtime = cpu_to_le32(get_seconds()); | 1249 | es->s_mtime = cpu_to_le32(get_seconds()); |
1250 | spin_unlock(&sbi->s_lock); | ||
1251 | ext2_sync_super(sb, es, 1); | ||
1272 | } else { | 1252 | } else { |
1273 | __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, | 1253 | __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, |
1274 | ~EXT2_FEATURE_RO_COMPAT_SUPP); | 1254 | ~EXT2_FEATURE_RO_COMPAT_SUPP); |
@@ -1288,16 +1268,16 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1288 | sbi->s_mount_state = le16_to_cpu(es->s_state); | 1268 | sbi->s_mount_state = le16_to_cpu(es->s_state); |
1289 | if (!ext2_setup_super (sb, es, 0)) | 1269 | if (!ext2_setup_super (sb, es, 0)) |
1290 | sb->s_flags &= ~MS_RDONLY; | 1270 | sb->s_flags &= ~MS_RDONLY; |
1271 | spin_unlock(&sbi->s_lock); | ||
1272 | ext2_write_super(sb); | ||
1291 | } | 1273 | } |
1292 | ext2_sync_super(sb, es); | ||
1293 | unlock_kernel(); | ||
1294 | return 0; | 1274 | return 0; |
1295 | restore_opts: | 1275 | restore_opts: |
1296 | sbi->s_mount_opt = old_opts.s_mount_opt; | 1276 | sbi->s_mount_opt = old_opts.s_mount_opt; |
1297 | sbi->s_resuid = old_opts.s_resuid; | 1277 | sbi->s_resuid = old_opts.s_resuid; |
1298 | sbi->s_resgid = old_opts.s_resgid; | 1278 | sbi->s_resgid = old_opts.s_resgid; |
1299 | sb->s_flags = old_sb_flags; | 1279 | sb->s_flags = old_sb_flags; |
1300 | unlock_kernel(); | 1280 | spin_unlock(&sbi->s_lock); |
1301 | return err; | 1281 | return err; |
1302 | } | 1282 | } |
1303 | 1283 | ||
@@ -1308,6 +1288,8 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
1308 | struct ext2_super_block *es = sbi->s_es; | 1288 | struct ext2_super_block *es = sbi->s_es; |
1309 | u64 fsid; | 1289 | u64 fsid; |
1310 | 1290 | ||
1291 | spin_lock(&sbi->s_lock); | ||
1292 | |||
1311 | if (test_opt (sb, MINIX_DF)) | 1293 | if (test_opt (sb, MINIX_DF)) |
1312 | sbi->s_overhead_last = 0; | 1294 | sbi->s_overhead_last = 0; |
1313 | else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { | 1295 | else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { |
@@ -1362,6 +1344,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
1362 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); | 1344 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); |
1363 | buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; | 1345 | buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; |
1364 | buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; | 1346 | buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; |
1347 | spin_unlock(&sbi->s_lock); | ||
1365 | return 0; | 1348 | return 0; |
1366 | } | 1349 | } |
1367 | 1350 | ||
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index e44dc92609be..7c3915780b19 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c | |||
@@ -101,7 +101,7 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *, | |||
101 | 101 | ||
102 | static struct mb_cache *ext2_xattr_cache; | 102 | static struct mb_cache *ext2_xattr_cache; |
103 | 103 | ||
104 | static struct xattr_handler *ext2_xattr_handler_map[] = { | 104 | static const struct xattr_handler *ext2_xattr_handler_map[] = { |
105 | [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler, | 105 | [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler, |
106 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | 106 | #ifdef CONFIG_EXT2_FS_POSIX_ACL |
107 | [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext2_xattr_acl_access_handler, | 107 | [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext2_xattr_acl_access_handler, |
@@ -113,7 +113,7 @@ static struct xattr_handler *ext2_xattr_handler_map[] = { | |||
113 | #endif | 113 | #endif |
114 | }; | 114 | }; |
115 | 115 | ||
116 | struct xattr_handler *ext2_xattr_handlers[] = { | 116 | const struct xattr_handler *ext2_xattr_handlers[] = { |
117 | &ext2_xattr_user_handler, | 117 | &ext2_xattr_user_handler, |
118 | &ext2_xattr_trusted_handler, | 118 | &ext2_xattr_trusted_handler, |
119 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | 119 | #ifdef CONFIG_EXT2_FS_POSIX_ACL |
@@ -126,10 +126,10 @@ struct xattr_handler *ext2_xattr_handlers[] = { | |||
126 | NULL | 126 | NULL |
127 | }; | 127 | }; |
128 | 128 | ||
129 | static inline struct xattr_handler * | 129 | static inline const struct xattr_handler * |
130 | ext2_xattr_handler(int name_index) | 130 | ext2_xattr_handler(int name_index) |
131 | { | 131 | { |
132 | struct xattr_handler *handler = NULL; | 132 | const struct xattr_handler *handler = NULL; |
133 | 133 | ||
134 | if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map)) | 134 | if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map)) |
135 | handler = ext2_xattr_handler_map[name_index]; | 135 | handler = ext2_xattr_handler_map[name_index]; |
@@ -298,7 +298,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", | |||
298 | /* list the attribute names */ | 298 | /* list the attribute names */ |
299 | for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); | 299 | for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); |
300 | entry = EXT2_XATTR_NEXT(entry)) { | 300 | entry = EXT2_XATTR_NEXT(entry)) { |
301 | struct xattr_handler *handler = | 301 | const struct xattr_handler *handler = |
302 | ext2_xattr_handler(entry->e_name_index); | 302 | ext2_xattr_handler(entry->e_name_index); |
303 | 303 | ||
304 | if (handler) { | 304 | if (handler) { |
@@ -345,7 +345,9 @@ static void ext2_xattr_update_super_block(struct super_block *sb) | |||
345 | if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) | 345 | if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) |
346 | return; | 346 | return; |
347 | 347 | ||
348 | spin_lock(&EXT2_SB(sb)->s_lock); | ||
348 | EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR); | 349 | EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR); |
350 | spin_unlock(&EXT2_SB(sb)->s_lock); | ||
349 | sb->s_dirt = 1; | 351 | sb->s_dirt = 1; |
350 | mark_buffer_dirty(EXT2_SB(sb)->s_sbh); | 352 | mark_buffer_dirty(EXT2_SB(sb)->s_sbh); |
351 | } | 353 | } |
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h index bf8175b2ced9..a1a1c2184616 100644 --- a/fs/ext2/xattr.h +++ b/fs/ext2/xattr.h | |||
@@ -55,11 +55,11 @@ struct ext2_xattr_entry { | |||
55 | 55 | ||
56 | # ifdef CONFIG_EXT2_FS_XATTR | 56 | # ifdef CONFIG_EXT2_FS_XATTR |
57 | 57 | ||
58 | extern struct xattr_handler ext2_xattr_user_handler; | 58 | extern const struct xattr_handler ext2_xattr_user_handler; |
59 | extern struct xattr_handler ext2_xattr_trusted_handler; | 59 | extern const struct xattr_handler ext2_xattr_trusted_handler; |
60 | extern struct xattr_handler ext2_xattr_acl_access_handler; | 60 | extern const struct xattr_handler ext2_xattr_acl_access_handler; |
61 | extern struct xattr_handler ext2_xattr_acl_default_handler; | 61 | extern const struct xattr_handler ext2_xattr_acl_default_handler; |
62 | extern struct xattr_handler ext2_xattr_security_handler; | 62 | extern const struct xattr_handler ext2_xattr_security_handler; |
63 | 63 | ||
64 | extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); | 64 | extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); |
65 | 65 | ||
@@ -72,7 +72,7 @@ extern void ext2_xattr_put_super(struct super_block *); | |||
72 | extern int init_ext2_xattr(void); | 72 | extern int init_ext2_xattr(void); |
73 | extern void exit_ext2_xattr(void); | 73 | extern void exit_ext2_xattr(void); |
74 | 74 | ||
75 | extern struct xattr_handler *ext2_xattr_handlers[]; | 75 | extern const struct xattr_handler *ext2_xattr_handlers[]; |
76 | 76 | ||
77 | # else /* CONFIG_EXT2_FS_XATTR */ | 77 | # else /* CONFIG_EXT2_FS_XATTR */ |
78 | 78 | ||
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index b118c6383c6d..3004e15d5da5 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c | |||
@@ -67,7 +67,7 @@ ext2_init_security(struct inode *inode, struct inode *dir) | |||
67 | return err; | 67 | return err; |
68 | } | 68 | } |
69 | 69 | ||
70 | struct xattr_handler ext2_xattr_security_handler = { | 70 | const struct xattr_handler ext2_xattr_security_handler = { |
71 | .prefix = XATTR_SECURITY_PREFIX, | 71 | .prefix = XATTR_SECURITY_PREFIX, |
72 | .list = ext2_xattr_security_list, | 72 | .list = ext2_xattr_security_list, |
73 | .get = ext2_xattr_security_get, | 73 | .get = ext2_xattr_security_get, |
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 2a26d71f4771..667e46a8d62d 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c | |||
@@ -50,7 +50,7 @@ ext2_xattr_trusted_set(struct dentry *dentry, const char *name, | |||
50 | value, size, flags); | 50 | value, size, flags); |
51 | } | 51 | } |
52 | 52 | ||
53 | struct xattr_handler ext2_xattr_trusted_handler = { | 53 | const struct xattr_handler ext2_xattr_trusted_handler = { |
54 | .prefix = XATTR_TRUSTED_PREFIX, | 54 | .prefix = XATTR_TRUSTED_PREFIX, |
55 | .list = ext2_xattr_trusted_list, | 55 | .list = ext2_xattr_trusted_list, |
56 | .get = ext2_xattr_trusted_get, | 56 | .get = ext2_xattr_trusted_get, |
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index 3f6caf3684b4..099d20f47163 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c | |||
@@ -54,7 +54,7 @@ ext2_xattr_user_set(struct dentry *dentry, const char *name, | |||
54 | name, value, size, flags); | 54 | name, value, size, flags); |
55 | } | 55 | } |
56 | 56 | ||
57 | struct xattr_handler ext2_xattr_user_handler = { | 57 | const struct xattr_handler ext2_xattr_user_handler = { |
58 | .prefix = XATTR_USER_PREFIX, | 58 | .prefix = XATTR_USER_PREFIX, |
59 | .list = ext2_xattr_user_list, | 59 | .list = ext2_xattr_user_list, |
60 | .get = ext2_xattr_user_get, | 60 | .get = ext2_xattr_user_get, |
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 82ba34158661..01552abbca3c 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c | |||
@@ -456,7 +456,7 @@ release_and_out: | |||
456 | return error; | 456 | return error; |
457 | } | 457 | } |
458 | 458 | ||
459 | struct xattr_handler ext3_xattr_acl_access_handler = { | 459 | const struct xattr_handler ext3_xattr_acl_access_handler = { |
460 | .prefix = POSIX_ACL_XATTR_ACCESS, | 460 | .prefix = POSIX_ACL_XATTR_ACCESS, |
461 | .flags = ACL_TYPE_ACCESS, | 461 | .flags = ACL_TYPE_ACCESS, |
462 | .list = ext3_xattr_list_acl_access, | 462 | .list = ext3_xattr_list_acl_access, |
@@ -464,7 +464,7 @@ struct xattr_handler ext3_xattr_acl_access_handler = { | |||
464 | .set = ext3_xattr_set_acl, | 464 | .set = ext3_xattr_set_acl, |
465 | }; | 465 | }; |
466 | 466 | ||
467 | struct xattr_handler ext3_xattr_acl_default_handler = { | 467 | const struct xattr_handler ext3_xattr_acl_default_handler = { |
468 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 468 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
469 | .flags = ACL_TYPE_DEFAULT, | 469 | .flags = ACL_TYPE_DEFAULT, |
470 | .list = ext3_xattr_list_acl_default, | 470 | .list = ext3_xattr_list_acl_default, |
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index a177122a1b25..4a32511f4ded 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
@@ -1584,6 +1584,12 @@ retry_alloc: | |||
1584 | goto io_error; | 1584 | goto io_error; |
1585 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | 1585 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); |
1586 | /* | 1586 | /* |
1587 | * skip this group (and avoid loading bitmap) if there | ||
1588 | * are no free blocks | ||
1589 | */ | ||
1590 | if (!free_blocks) | ||
1591 | continue; | ||
1592 | /* | ||
1587 | * skip this group if the number of | 1593 | * skip this group if the number of |
1588 | * free blocks is less than half of the reservation | 1594 | * free blocks is less than half of the reservation |
1589 | * window size. | 1595 | * window size. |
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index 8209f266e9ad..fcf7487734b6 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c | |||
@@ -48,7 +48,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
48 | struct inode *inode = dentry->d_inode; | 48 | struct inode *inode = dentry->d_inode; |
49 | struct ext3_inode_info *ei = EXT3_I(inode); | 49 | struct ext3_inode_info *ei = EXT3_I(inode); |
50 | journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; | 50 | journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; |
51 | int ret = 0; | 51 | int ret, needs_barrier = 0; |
52 | tid_t commit_tid; | 52 | tid_t commit_tid; |
53 | 53 | ||
54 | if (inode->i_sb->s_flags & MS_RDONLY) | 54 | if (inode->i_sb->s_flags & MS_RDONLY) |
@@ -70,28 +70,27 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
70 | * (they were dirtied by commit). But that's OK - the blocks are | 70 | * (they were dirtied by commit). But that's OK - the blocks are |
71 | * safe in-journal, which is all fsync() needs to ensure. | 71 | * safe in-journal, which is all fsync() needs to ensure. |
72 | */ | 72 | */ |
73 | if (ext3_should_journal_data(inode)) { | 73 | if (ext3_should_journal_data(inode)) |
74 | ret = ext3_force_commit(inode->i_sb); | 74 | return ext3_force_commit(inode->i_sb); |
75 | goto out; | ||
76 | } | ||
77 | 75 | ||
78 | if (datasync) | 76 | if (datasync) |
79 | commit_tid = atomic_read(&ei->i_datasync_tid); | 77 | commit_tid = atomic_read(&ei->i_datasync_tid); |
80 | else | 78 | else |
81 | commit_tid = atomic_read(&ei->i_sync_tid); | 79 | commit_tid = atomic_read(&ei->i_sync_tid); |
82 | 80 | ||
83 | if (log_start_commit(journal, commit_tid)) { | 81 | if (test_opt(inode->i_sb, BARRIER) && |
84 | log_wait_commit(journal, commit_tid); | 82 | !journal_trans_will_send_data_barrier(journal, commit_tid)) |
85 | goto out; | 83 | needs_barrier = 1; |
86 | } | 84 | log_start_commit(journal, commit_tid); |
85 | ret = log_wait_commit(journal, commit_tid); | ||
87 | 86 | ||
88 | /* | 87 | /* |
89 | * In case we didn't commit a transaction, we have to flush | 88 | * In case we didn't commit a transaction, we have to flush |
90 | * disk caches manually so that data really is on persistent | 89 | * disk caches manually so that data really is on persistent |
91 | * storage | 90 | * storage |
92 | */ | 91 | */ |
93 | if (test_opt(inode->i_sb, BARRIER)) | 92 | if (needs_barrier) |
94 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 93 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, |
95 | out: | 94 | BLKDEV_IFL_WAIT); |
96 | return ret; | 95 | return ret; |
97 | } | 96 | } |
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 0d0e97ed3ff6..498021eb88fb 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c | |||
@@ -538,16 +538,13 @@ got: | |||
538 | if (S_ISDIR(mode)) | 538 | if (S_ISDIR(mode)) |
539 | percpu_counter_inc(&sbi->s_dirs_counter); | 539 | percpu_counter_inc(&sbi->s_dirs_counter); |
540 | 540 | ||
541 | inode->i_uid = current_fsuid(); | 541 | |
542 | if (test_opt (sb, GRPID)) | 542 | if (test_opt(sb, GRPID)) { |
543 | inode->i_gid = dir->i_gid; | 543 | inode->i_mode = mode; |
544 | else if (dir->i_mode & S_ISGID) { | 544 | inode->i_uid = current_fsuid(); |
545 | inode->i_gid = dir->i_gid; | 545 | inode->i_gid = dir->i_gid; |
546 | if (S_ISDIR(mode)) | ||
547 | mode |= S_ISGID; | ||
548 | } else | 546 | } else |
549 | inode->i_gid = current_fsgid(); | 547 | inode_init_owner(inode, dir, mode); |
550 | inode->i_mode = mode; | ||
551 | 548 | ||
552 | inode->i_ino = ino; | 549 | inode->i_ino = ino; |
553 | /* This is the optimal IO size (for stat), not the fs block size */ | 550 | /* This is the optimal IO size (for stat), not the fs block size */ |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index ea33bdf0a300..735f0190ec2a 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -3151,7 +3151,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) | |||
3151 | if (error) | 3151 | if (error) |
3152 | return error; | 3152 | return error; |
3153 | 3153 | ||
3154 | if (ia_valid & ATTR_SIZE) | 3154 | if (is_quota_modification(inode, attr)) |
3155 | dquot_initialize(inode); | 3155 | dquot_initialize(inode); |
3156 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 3156 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || |
3157 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 3157 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 1bee604cc6cd..0fc1293d0e96 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -653,8 +653,12 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
653 | seq_printf(seq, ",commit=%u", | 653 | seq_printf(seq, ",commit=%u", |
654 | (unsigned) (sbi->s_commit_interval / HZ)); | 654 | (unsigned) (sbi->s_commit_interval / HZ)); |
655 | } | 655 | } |
656 | if (test_opt(sb, BARRIER)) | 656 | |
657 | seq_puts(seq, ",barrier=1"); | 657 | /* |
658 | * Always display barrier state so it's clear what the status is. | ||
659 | */ | ||
660 | seq_puts(seq, ",barrier="); | ||
661 | seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); | ||
658 | if (test_opt(sb, NOBH)) | 662 | if (test_opt(sb, NOBH)) |
659 | seq_puts(seq, ",nobh"); | 663 | seq_puts(seq, ",nobh"); |
660 | 664 | ||
@@ -810,8 +814,8 @@ enum { | |||
810 | Opt_data_err_abort, Opt_data_err_ignore, | 814 | Opt_data_err_abort, Opt_data_err_ignore, |
811 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 815 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
812 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, | 816 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, |
813 | Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, | 817 | Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, |
814 | Opt_usrquota, Opt_grpquota | 818 | Opt_resize, Opt_usrquota, Opt_grpquota |
815 | }; | 819 | }; |
816 | 820 | ||
817 | static const match_table_t tokens = { | 821 | static const match_table_t tokens = { |
@@ -865,6 +869,8 @@ static const match_table_t tokens = { | |||
865 | {Opt_quota, "quota"}, | 869 | {Opt_quota, "quota"}, |
866 | {Opt_usrquota, "usrquota"}, | 870 | {Opt_usrquota, "usrquota"}, |
867 | {Opt_barrier, "barrier=%u"}, | 871 | {Opt_barrier, "barrier=%u"}, |
872 | {Opt_barrier, "barrier"}, | ||
873 | {Opt_nobarrier, "nobarrier"}, | ||
868 | {Opt_resize, "resize"}, | 874 | {Opt_resize, "resize"}, |
869 | {Opt_err, NULL}, | 875 | {Opt_err, NULL}, |
870 | }; | 876 | }; |
@@ -967,7 +973,11 @@ static int parse_options (char *options, struct super_block *sb, | |||
967 | int token; | 973 | int token; |
968 | if (!*p) | 974 | if (!*p) |
969 | continue; | 975 | continue; |
970 | 976 | /* | |
977 | * Initialize args struct so we know whether arg was | ||
978 | * found; some options take optional arguments. | ||
979 | */ | ||
980 | args[0].to = args[0].from = 0; | ||
971 | token = match_token(p, tokens, args); | 981 | token = match_token(p, tokens, args); |
972 | switch (token) { | 982 | switch (token) { |
973 | case Opt_bsd_df: | 983 | case Opt_bsd_df: |
@@ -1215,9 +1225,15 @@ set_qf_format: | |||
1215 | case Opt_abort: | 1225 | case Opt_abort: |
1216 | set_opt(sbi->s_mount_opt, ABORT); | 1226 | set_opt(sbi->s_mount_opt, ABORT); |
1217 | break; | 1227 | break; |
1228 | case Opt_nobarrier: | ||
1229 | clear_opt(sbi->s_mount_opt, BARRIER); | ||
1230 | break; | ||
1218 | case Opt_barrier: | 1231 | case Opt_barrier: |
1219 | if (match_int(&args[0], &option)) | 1232 | if (args[0].from) { |
1220 | return 0; | 1233 | if (match_int(&args[0], &option)) |
1234 | return 0; | ||
1235 | } else | ||
1236 | option = 1; /* No argument, default to 1 */ | ||
1221 | if (option) | 1237 | if (option) |
1222 | set_opt(sbi->s_mount_opt, BARRIER); | 1238 | set_opt(sbi->s_mount_opt, BARRIER); |
1223 | else | 1239 | else |
@@ -1890,21 +1906,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1890 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | 1906 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
1891 | spin_lock_init(&sbi->s_next_gen_lock); | 1907 | spin_lock_init(&sbi->s_next_gen_lock); |
1892 | 1908 | ||
1893 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | ||
1894 | ext3_count_free_blocks(sb)); | ||
1895 | if (!err) { | ||
1896 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | ||
1897 | ext3_count_free_inodes(sb)); | ||
1898 | } | ||
1899 | if (!err) { | ||
1900 | err = percpu_counter_init(&sbi->s_dirs_counter, | ||
1901 | ext3_count_dirs(sb)); | ||
1902 | } | ||
1903 | if (err) { | ||
1904 | ext3_msg(sb, KERN_ERR, "error: insufficient memory"); | ||
1905 | goto failed_mount3; | ||
1906 | } | ||
1907 | |||
1908 | /* per fileystem reservation list head & lock */ | 1909 | /* per fileystem reservation list head & lock */ |
1909 | spin_lock_init(&sbi->s_rsv_window_lock); | 1910 | spin_lock_init(&sbi->s_rsv_window_lock); |
1910 | sbi->s_rsv_window_root = RB_ROOT; | 1911 | sbi->s_rsv_window_root = RB_ROOT; |
@@ -1945,15 +1946,29 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1945 | if (!test_opt(sb, NOLOAD) && | 1946 | if (!test_opt(sb, NOLOAD) && |
1946 | EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { | 1947 | EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { |
1947 | if (ext3_load_journal(sb, es, journal_devnum)) | 1948 | if (ext3_load_journal(sb, es, journal_devnum)) |
1948 | goto failed_mount3; | 1949 | goto failed_mount2; |
1949 | } else if (journal_inum) { | 1950 | } else if (journal_inum) { |
1950 | if (ext3_create_journal(sb, es, journal_inum)) | 1951 | if (ext3_create_journal(sb, es, journal_inum)) |
1951 | goto failed_mount3; | 1952 | goto failed_mount2; |
1952 | } else { | 1953 | } else { |
1953 | if (!silent) | 1954 | if (!silent) |
1954 | ext3_msg(sb, KERN_ERR, | 1955 | ext3_msg(sb, KERN_ERR, |
1955 | "error: no journal found. " | 1956 | "error: no journal found. " |
1956 | "mounting ext3 over ext2?"); | 1957 | "mounting ext3 over ext2?"); |
1958 | goto failed_mount2; | ||
1959 | } | ||
1960 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | ||
1961 | ext3_count_free_blocks(sb)); | ||
1962 | if (!err) { | ||
1963 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | ||
1964 | ext3_count_free_inodes(sb)); | ||
1965 | } | ||
1966 | if (!err) { | ||
1967 | err = percpu_counter_init(&sbi->s_dirs_counter, | ||
1968 | ext3_count_dirs(sb)); | ||
1969 | } | ||
1970 | if (err) { | ||
1971 | ext3_msg(sb, KERN_ERR, "error: insufficient memory"); | ||
1957 | goto failed_mount3; | 1972 | goto failed_mount3; |
1958 | } | 1973 | } |
1959 | 1974 | ||
@@ -1978,7 +1993,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1978 | ext3_msg(sb, KERN_ERR, | 1993 | ext3_msg(sb, KERN_ERR, |
1979 | "error: journal does not support " | 1994 | "error: journal does not support " |
1980 | "requested data journaling mode"); | 1995 | "requested data journaling mode"); |
1981 | goto failed_mount4; | 1996 | goto failed_mount3; |
1982 | } | 1997 | } |
1983 | default: | 1998 | default: |
1984 | break; | 1999 | break; |
@@ -2001,19 +2016,19 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
2001 | if (IS_ERR(root)) { | 2016 | if (IS_ERR(root)) { |
2002 | ext3_msg(sb, KERN_ERR, "error: get root inode failed"); | 2017 | ext3_msg(sb, KERN_ERR, "error: get root inode failed"); |
2003 | ret = PTR_ERR(root); | 2018 | ret = PTR_ERR(root); |
2004 | goto failed_mount4; | 2019 | goto failed_mount3; |
2005 | } | 2020 | } |
2006 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { | 2021 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { |
2007 | iput(root); | 2022 | iput(root); |
2008 | ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); | 2023 | ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); |
2009 | goto failed_mount4; | 2024 | goto failed_mount3; |
2010 | } | 2025 | } |
2011 | sb->s_root = d_alloc_root(root); | 2026 | sb->s_root = d_alloc_root(root); |
2012 | if (!sb->s_root) { | 2027 | if (!sb->s_root) { |
2013 | ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); | 2028 | ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); |
2014 | iput(root); | 2029 | iput(root); |
2015 | ret = -ENOMEM; | 2030 | ret = -ENOMEM; |
2016 | goto failed_mount4; | 2031 | goto failed_mount3; |
2017 | } | 2032 | } |
2018 | 2033 | ||
2019 | ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); | 2034 | ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); |
@@ -2039,12 +2054,11 @@ cantfind_ext3: | |||
2039 | sb->s_id); | 2054 | sb->s_id); |
2040 | goto failed_mount; | 2055 | goto failed_mount; |
2041 | 2056 | ||
2042 | failed_mount4: | ||
2043 | journal_destroy(sbi->s_journal); | ||
2044 | failed_mount3: | 2057 | failed_mount3: |
2045 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 2058 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
2046 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 2059 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
2047 | percpu_counter_destroy(&sbi->s_dirs_counter); | 2060 | percpu_counter_destroy(&sbi->s_dirs_counter); |
2061 | journal_destroy(sbi->s_journal); | ||
2048 | failed_mount2: | 2062 | failed_mount2: |
2049 | for (i = 0; i < db_count; i++) | 2063 | for (i = 0; i < db_count; i++) |
2050 | brelse(sbi->s_group_desc[i]); | 2064 | brelse(sbi->s_group_desc[i]); |
@@ -2278,6 +2292,9 @@ static int ext3_load_journal(struct super_block *sb, | |||
2278 | return -EINVAL; | 2292 | return -EINVAL; |
2279 | } | 2293 | } |
2280 | 2294 | ||
2295 | if (!(journal->j_flags & JFS_BARRIER)) | ||
2296 | printk(KERN_INFO "EXT3-fs: barriers not enabled\n"); | ||
2297 | |||
2281 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { | 2298 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { |
2282 | err = journal_update_format(journal); | 2299 | err = journal_update_format(journal); |
2283 | if (err) { | 2300 | if (err) { |
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 534a94c3a933..71fb8d65e54c 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c | |||
@@ -104,7 +104,7 @@ static int ext3_xattr_list(struct dentry *dentry, char *buffer, | |||
104 | 104 | ||
105 | static struct mb_cache *ext3_xattr_cache; | 105 | static struct mb_cache *ext3_xattr_cache; |
106 | 106 | ||
107 | static struct xattr_handler *ext3_xattr_handler_map[] = { | 107 | static const struct xattr_handler *ext3_xattr_handler_map[] = { |
108 | [EXT3_XATTR_INDEX_USER] = &ext3_xattr_user_handler, | 108 | [EXT3_XATTR_INDEX_USER] = &ext3_xattr_user_handler, |
109 | #ifdef CONFIG_EXT3_FS_POSIX_ACL | 109 | #ifdef CONFIG_EXT3_FS_POSIX_ACL |
110 | [EXT3_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext3_xattr_acl_access_handler, | 110 | [EXT3_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext3_xattr_acl_access_handler, |
@@ -116,7 +116,7 @@ static struct xattr_handler *ext3_xattr_handler_map[] = { | |||
116 | #endif | 116 | #endif |
117 | }; | 117 | }; |
118 | 118 | ||
119 | struct xattr_handler *ext3_xattr_handlers[] = { | 119 | const struct xattr_handler *ext3_xattr_handlers[] = { |
120 | &ext3_xattr_user_handler, | 120 | &ext3_xattr_user_handler, |
121 | &ext3_xattr_trusted_handler, | 121 | &ext3_xattr_trusted_handler, |
122 | #ifdef CONFIG_EXT3_FS_POSIX_ACL | 122 | #ifdef CONFIG_EXT3_FS_POSIX_ACL |
@@ -129,10 +129,10 @@ struct xattr_handler *ext3_xattr_handlers[] = { | |||
129 | NULL | 129 | NULL |
130 | }; | 130 | }; |
131 | 131 | ||
132 | static inline struct xattr_handler * | 132 | static inline const struct xattr_handler * |
133 | ext3_xattr_handler(int name_index) | 133 | ext3_xattr_handler(int name_index) |
134 | { | 134 | { |
135 | struct xattr_handler *handler = NULL; | 135 | const struct xattr_handler *handler = NULL; |
136 | 136 | ||
137 | if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map)) | 137 | if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map)) |
138 | handler = ext3_xattr_handler_map[name_index]; | 138 | handler = ext3_xattr_handler_map[name_index]; |
@@ -338,7 +338,7 @@ ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry, | |||
338 | size_t rest = buffer_size; | 338 | size_t rest = buffer_size; |
339 | 339 | ||
340 | for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) { | 340 | for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) { |
341 | struct xattr_handler *handler = | 341 | const struct xattr_handler *handler = |
342 | ext3_xattr_handler(entry->e_name_index); | 342 | ext3_xattr_handler(entry->e_name_index); |
343 | 343 | ||
344 | if (handler) { | 344 | if (handler) { |
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h index 148a4dfc82ab..377fe7201169 100644 --- a/fs/ext3/xattr.h +++ b/fs/ext3/xattr.h | |||
@@ -58,11 +58,11 @@ struct ext3_xattr_entry { | |||
58 | 58 | ||
59 | # ifdef CONFIG_EXT3_FS_XATTR | 59 | # ifdef CONFIG_EXT3_FS_XATTR |
60 | 60 | ||
61 | extern struct xattr_handler ext3_xattr_user_handler; | 61 | extern const struct xattr_handler ext3_xattr_user_handler; |
62 | extern struct xattr_handler ext3_xattr_trusted_handler; | 62 | extern const struct xattr_handler ext3_xattr_trusted_handler; |
63 | extern struct xattr_handler ext3_xattr_acl_access_handler; | 63 | extern const struct xattr_handler ext3_xattr_acl_access_handler; |
64 | extern struct xattr_handler ext3_xattr_acl_default_handler; | 64 | extern const struct xattr_handler ext3_xattr_acl_default_handler; |
65 | extern struct xattr_handler ext3_xattr_security_handler; | 65 | extern const struct xattr_handler ext3_xattr_security_handler; |
66 | 66 | ||
67 | extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); | 67 | extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); |
68 | 68 | ||
@@ -76,7 +76,7 @@ extern void ext3_xattr_put_super(struct super_block *); | |||
76 | extern int init_ext3_xattr(void); | 76 | extern int init_ext3_xattr(void); |
77 | extern void exit_ext3_xattr(void); | 77 | extern void exit_ext3_xattr(void); |
78 | 78 | ||
79 | extern struct xattr_handler *ext3_xattr_handlers[]; | 79 | extern const struct xattr_handler *ext3_xattr_handlers[]; |
80 | 80 | ||
81 | # else /* CONFIG_EXT3_FS_XATTR */ | 81 | # else /* CONFIG_EXT3_FS_XATTR */ |
82 | 82 | ||
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c index 3af91f476dff..03a99bfc59f9 100644 --- a/fs/ext3/xattr_security.c +++ b/fs/ext3/xattr_security.c | |||
@@ -69,7 +69,7 @@ ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir) | |||
69 | return err; | 69 | return err; |
70 | } | 70 | } |
71 | 71 | ||
72 | struct xattr_handler ext3_xattr_security_handler = { | 72 | const struct xattr_handler ext3_xattr_security_handler = { |
73 | .prefix = XATTR_SECURITY_PREFIX, | 73 | .prefix = XATTR_SECURITY_PREFIX, |
74 | .list = ext3_xattr_security_list, | 74 | .list = ext3_xattr_security_list, |
75 | .get = ext3_xattr_security_get, | 75 | .get = ext3_xattr_security_get, |
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c index e5562845ed96..dc8edda9ffe0 100644 --- a/fs/ext3/xattr_trusted.c +++ b/fs/ext3/xattr_trusted.c | |||
@@ -51,7 +51,7 @@ ext3_xattr_trusted_set(struct dentry *dentry, const char *name, | |||
51 | value, size, flags); | 51 | value, size, flags); |
52 | } | 52 | } |
53 | 53 | ||
54 | struct xattr_handler ext3_xattr_trusted_handler = { | 54 | const struct xattr_handler ext3_xattr_trusted_handler = { |
55 | .prefix = XATTR_TRUSTED_PREFIX, | 55 | .prefix = XATTR_TRUSTED_PREFIX, |
56 | .list = ext3_xattr_trusted_list, | 56 | .list = ext3_xattr_trusted_list, |
57 | .get = ext3_xattr_trusted_get, | 57 | .get = ext3_xattr_trusted_get, |
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c index 3bcfe9ee0a68..7a321974d584 100644 --- a/fs/ext3/xattr_user.c +++ b/fs/ext3/xattr_user.c | |||
@@ -54,7 +54,7 @@ ext3_xattr_user_set(struct dentry *dentry, const char *name, | |||
54 | name, value, size, flags); | 54 | name, value, size, flags); |
55 | } | 55 | } |
56 | 56 | ||
57 | struct xattr_handler ext3_xattr_user_handler = { | 57 | const struct xattr_handler ext3_xattr_user_handler = { |
58 | .prefix = XATTR_USER_PREFIX, | 58 | .prefix = XATTR_USER_PREFIX, |
59 | .list = ext3_xattr_user_list, | 59 | .list = ext3_xattr_user_list, |
60 | .get = ext3_xattr_user_get, | 60 | .get = ext3_xattr_user_get, |
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 8a2a29d35a6f..feaf498feaa6 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -454,7 +454,7 @@ release_and_out: | |||
454 | return error; | 454 | return error; |
455 | } | 455 | } |
456 | 456 | ||
457 | struct xattr_handler ext4_xattr_acl_access_handler = { | 457 | const struct xattr_handler ext4_xattr_acl_access_handler = { |
458 | .prefix = POSIX_ACL_XATTR_ACCESS, | 458 | .prefix = POSIX_ACL_XATTR_ACCESS, |
459 | .flags = ACL_TYPE_ACCESS, | 459 | .flags = ACL_TYPE_ACCESS, |
460 | .list = ext4_xattr_list_acl_access, | 460 | .list = ext4_xattr_list_acl_access, |
@@ -462,7 +462,7 @@ struct xattr_handler ext4_xattr_acl_access_handler = { | |||
462 | .set = ext4_xattr_set_acl, | 462 | .set = ext4_xattr_set_acl, |
463 | }; | 463 | }; |
464 | 464 | ||
465 | struct xattr_handler ext4_xattr_acl_default_handler = { | 465 | const struct xattr_handler ext4_xattr_acl_default_handler = { |
466 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 466 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
467 | .flags = ACL_TYPE_DEFAULT, | 467 | .flags = ACL_TYPE_DEFAULT, |
468 | .list = ext4_xattr_list_acl_default, | 468 | .list = ext4_xattr_list_acl_default, |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 0d0c3239c1cd..ef3d980e67cb 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -100,9 +100,11 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
100 | if (ext4_should_writeback_data(inode) && | 100 | if (ext4_should_writeback_data(inode) && |
101 | (journal->j_fs_dev != journal->j_dev) && | 101 | (journal->j_fs_dev != journal->j_dev) && |
102 | (journal->j_flags & JBD2_BARRIER)) | 102 | (journal->j_flags & JBD2_BARRIER)) |
103 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 103 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, |
104 | NULL, BLKDEV_IFL_WAIT); | ||
104 | jbd2_log_wait_commit(journal, commit_tid); | 105 | jbd2_log_wait_commit(journal, commit_tid); |
105 | } else if (journal->j_flags & JBD2_BARRIER) | 106 | } else if (journal->j_flags & JBD2_BARRIER) |
106 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 107 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, |
108 | BLKDEV_IFL_WAIT); | ||
107 | return ret; | 109 | return ret; |
108 | } | 110 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 57f6eef6ccd6..1a0e183a2f04 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -979,16 +979,12 @@ got: | |||
979 | atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); | 979 | atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); |
980 | } | 980 | } |
981 | 981 | ||
982 | inode->i_uid = current_fsuid(); | 982 | if (test_opt(sb, GRPID)) { |
983 | if (test_opt(sb, GRPID)) | 983 | inode->i_mode = mode; |
984 | inode->i_uid = current_fsuid(); | ||
984 | inode->i_gid = dir->i_gid; | 985 | inode->i_gid = dir->i_gid; |
985 | else if (dir->i_mode & S_ISGID) { | ||
986 | inode->i_gid = dir->i_gid; | ||
987 | if (S_ISDIR(mode)) | ||
988 | mode |= S_ISGID; | ||
989 | } else | 986 | } else |
990 | inode->i_gid = current_fsgid(); | 987 | inode_init_owner(inode, dir, mode); |
991 | inode->i_mode = mode; | ||
992 | 988 | ||
993 | inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); | 989 | inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); |
994 | /* This is the optimal IO size (for stat), not the fs block size */ | 990 | /* This is the optimal IO size (for stat), not the fs block size */ |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 81d605412844..3e0f6af9d08d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -5425,7 +5425,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5425 | if (error) | 5425 | if (error) |
5426 | return error; | 5426 | return error; |
5427 | 5427 | ||
5428 | if (ia_valid & ATTR_SIZE) | 5428 | if (is_quota_modification(inode, attr)) |
5429 | dquot_initialize(inode); | 5429 | dquot_initialize(inode); |
5430 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 5430 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || |
5431 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 5431 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index b4c5aa8489d8..2de0e9515089 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -97,7 +97,7 @@ static int ext4_xattr_list(struct dentry *dentry, char *buffer, | |||
97 | 97 | ||
98 | static struct mb_cache *ext4_xattr_cache; | 98 | static struct mb_cache *ext4_xattr_cache; |
99 | 99 | ||
100 | static struct xattr_handler *ext4_xattr_handler_map[] = { | 100 | static const struct xattr_handler *ext4_xattr_handler_map[] = { |
101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, | 101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, |
102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, | 103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, |
@@ -109,7 +109,7 @@ static struct xattr_handler *ext4_xattr_handler_map[] = { | |||
109 | #endif | 109 | #endif |
110 | }; | 110 | }; |
111 | 111 | ||
112 | struct xattr_handler *ext4_xattr_handlers[] = { | 112 | const struct xattr_handler *ext4_xattr_handlers[] = { |
113 | &ext4_xattr_user_handler, | 113 | &ext4_xattr_user_handler, |
114 | &ext4_xattr_trusted_handler, | 114 | &ext4_xattr_trusted_handler, |
115 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 115 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
@@ -122,10 +122,10 @@ struct xattr_handler *ext4_xattr_handlers[] = { | |||
122 | NULL | 122 | NULL |
123 | }; | 123 | }; |
124 | 124 | ||
125 | static inline struct xattr_handler * | 125 | static inline const struct xattr_handler * |
126 | ext4_xattr_handler(int name_index) | 126 | ext4_xattr_handler(int name_index) |
127 | { | 127 | { |
128 | struct xattr_handler *handler = NULL; | 128 | const struct xattr_handler *handler = NULL; |
129 | 129 | ||
130 | if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map)) | 130 | if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map)) |
131 | handler = ext4_xattr_handler_map[name_index]; | 131 | handler = ext4_xattr_handler_map[name_index]; |
@@ -332,7 +332,7 @@ ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry, | |||
332 | size_t rest = buffer_size; | 332 | size_t rest = buffer_size; |
333 | 333 | ||
334 | for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { | 334 | for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { |
335 | struct xattr_handler *handler = | 335 | const struct xattr_handler *handler = |
336 | ext4_xattr_handler(entry->e_name_index); | 336 | ext4_xattr_handler(entry->e_name_index); |
337 | 337 | ||
338 | if (handler) { | 338 | if (handler) { |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 8ede88b18c29..518e96e43905 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -65,11 +65,11 @@ struct ext4_xattr_entry { | |||
65 | 65 | ||
66 | # ifdef CONFIG_EXT4_FS_XATTR | 66 | # ifdef CONFIG_EXT4_FS_XATTR |
67 | 67 | ||
68 | extern struct xattr_handler ext4_xattr_user_handler; | 68 | extern const struct xattr_handler ext4_xattr_user_handler; |
69 | extern struct xattr_handler ext4_xattr_trusted_handler; | 69 | extern const struct xattr_handler ext4_xattr_trusted_handler; |
70 | extern struct xattr_handler ext4_xattr_acl_access_handler; | 70 | extern const struct xattr_handler ext4_xattr_acl_access_handler; |
71 | extern struct xattr_handler ext4_xattr_acl_default_handler; | 71 | extern const struct xattr_handler ext4_xattr_acl_default_handler; |
72 | extern struct xattr_handler ext4_xattr_security_handler; | 72 | extern const struct xattr_handler ext4_xattr_security_handler; |
73 | 73 | ||
74 | extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); | 74 | extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); |
75 | 75 | ||
@@ -86,7 +86,7 @@ extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | |||
86 | extern int init_ext4_xattr(void); | 86 | extern int init_ext4_xattr(void); |
87 | extern void exit_ext4_xattr(void); | 87 | extern void exit_ext4_xattr(void); |
88 | 88 | ||
89 | extern struct xattr_handler *ext4_xattr_handlers[]; | 89 | extern const struct xattr_handler *ext4_xattr_handlers[]; |
90 | 90 | ||
91 | # else /* CONFIG_EXT4_FS_XATTR */ | 91 | # else /* CONFIG_EXT4_FS_XATTR */ |
92 | 92 | ||
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 8b145e98df07..9b21268e121c 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c | |||
@@ -69,7 +69,7 @@ ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir) | |||
69 | return err; | 69 | return err; |
70 | } | 70 | } |
71 | 71 | ||
72 | struct xattr_handler ext4_xattr_security_handler = { | 72 | const struct xattr_handler ext4_xattr_security_handler = { |
73 | .prefix = XATTR_SECURITY_PREFIX, | 73 | .prefix = XATTR_SECURITY_PREFIX, |
74 | .list = ext4_xattr_security_list, | 74 | .list = ext4_xattr_security_list, |
75 | .get = ext4_xattr_security_get, | 75 | .get = ext4_xattr_security_get, |
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index 15b50edc6587..37e6ebca2cc3 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c | |||
@@ -51,7 +51,7 @@ ext4_xattr_trusted_set(struct dentry *dentry, const char *name, | |||
51 | name, value, size, flags); | 51 | name, value, size, flags); |
52 | } | 52 | } |
53 | 53 | ||
54 | struct xattr_handler ext4_xattr_trusted_handler = { | 54 | const struct xattr_handler ext4_xattr_trusted_handler = { |
55 | .prefix = XATTR_TRUSTED_PREFIX, | 55 | .prefix = XATTR_TRUSTED_PREFIX, |
56 | .list = ext4_xattr_trusted_list, | 56 | .list = ext4_xattr_trusted_list, |
57 | .get = ext4_xattr_trusted_get, | 57 | .get = ext4_xattr_trusted_get, |
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index c4ce05746ce1..98c375352d0e 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c | |||
@@ -54,7 +54,7 @@ ext4_xattr_user_set(struct dentry *dentry, const char *name, | |||
54 | name, value, size, flags); | 54 | name, value, size, flags); |
55 | } | 55 | } |
56 | 56 | ||
57 | struct xattr_handler ext4_xattr_user_handler = { | 57 | const struct xattr_handler ext4_xattr_user_handler = { |
58 | .prefix = XATTR_USER_PREFIX, | 58 | .prefix = XATTR_USER_PREFIX, |
59 | .list = ext4_xattr_user_list, | 59 | .list = ext4_xattr_user_list, |
60 | .get = ext4_xattr_user_get, | 60 | .get = ext4_xattr_user_get, |
diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 113f0a1e565d..ae8200f84e39 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c | |||
@@ -242,9 +242,10 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) | |||
242 | while (*fclus < cluster) { | 242 | while (*fclus < cluster) { |
243 | /* prevent the infinite loop of cluster chain */ | 243 | /* prevent the infinite loop of cluster chain */ |
244 | if (*fclus > limit) { | 244 | if (*fclus > limit) { |
245 | fat_fs_error(sb, "%s: detected the cluster chain loop" | 245 | fat_fs_error_ratelimit(sb, |
246 | " (i_pos %lld)", __func__, | 246 | "%s: detected the cluster chain loop" |
247 | MSDOS_I(inode)->i_pos); | 247 | " (i_pos %lld)", __func__, |
248 | MSDOS_I(inode)->i_pos); | ||
248 | nr = -EIO; | 249 | nr = -EIO; |
249 | goto out; | 250 | goto out; |
250 | } | 251 | } |
@@ -253,9 +254,9 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) | |||
253 | if (nr < 0) | 254 | if (nr < 0) |
254 | goto out; | 255 | goto out; |
255 | else if (nr == FAT_ENT_FREE) { | 256 | else if (nr == FAT_ENT_FREE) { |
256 | fat_fs_error(sb, "%s: invalid cluster chain" | 257 | fat_fs_error_ratelimit(sb, "%s: invalid cluster chain" |
257 | " (i_pos %lld)", __func__, | 258 | " (i_pos %lld)", __func__, |
258 | MSDOS_I(inode)->i_pos); | 259 | MSDOS_I(inode)->i_pos); |
259 | nr = -EIO; | 260 | nr = -EIO; |
260 | goto out; | 261 | goto out; |
261 | } else if (nr == FAT_ENT_EOF) { | 262 | } else if (nr == FAT_ENT_EOF) { |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 530b4ca01510..ee42b9e0b16a 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/buffer_head.h> | 19 | #include <linux/buffer_head.h> |
20 | #include <linux/compat.h> | 20 | #include <linux/compat.h> |
21 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
22 | #include <linux/kernel.h> | ||
22 | #include "fat.h" | 23 | #include "fat.h" |
23 | 24 | ||
24 | /* | 25 | /* |
@@ -140,28 +141,22 @@ static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len, | |||
140 | { | 141 | { |
141 | const wchar_t *ip; | 142 | const wchar_t *ip; |
142 | wchar_t ec; | 143 | wchar_t ec; |
143 | unsigned char *op, nc; | 144 | unsigned char *op; |
144 | int charlen; | 145 | int charlen; |
145 | int k; | ||
146 | 146 | ||
147 | ip = uni; | 147 | ip = uni; |
148 | op = ascii; | 148 | op = ascii; |
149 | 149 | ||
150 | while (*ip && ((len - NLS_MAX_CHARSET_SIZE) > 0)) { | 150 | while (*ip && ((len - NLS_MAX_CHARSET_SIZE) > 0)) { |
151 | ec = *ip++; | 151 | ec = *ip++; |
152 | if ( (charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE)) > 0) { | 152 | if ((charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE)) > 0) { |
153 | op += charlen; | 153 | op += charlen; |
154 | len -= charlen; | 154 | len -= charlen; |
155 | } else { | 155 | } else { |
156 | if (uni_xlate == 1) { | 156 | if (uni_xlate == 1) { |
157 | *op = ':'; | 157 | *op++ = ':'; |
158 | for (k = 4; k > 0; k--) { | 158 | op = pack_hex_byte(op, ec >> 8); |
159 | nc = ec & 0xF; | 159 | op = pack_hex_byte(op, ec); |
160 | op[k] = nc > 9 ? nc + ('a' - 10) | ||
161 | : nc + '0'; | ||
162 | ec >>= 4; | ||
163 | } | ||
164 | op += 5; | ||
165 | len -= 5; | 160 | len -= 5; |
166 | } else { | 161 | } else { |
167 | *op++ = '?'; | 162 | *op++ = '?'; |
@@ -758,9 +753,10 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp, | |||
758 | return ret; | 753 | return ret; |
759 | } | 754 | } |
760 | 755 | ||
761 | static int fat_dir_ioctl(struct inode *inode, struct file *filp, | 756 | static long fat_dir_ioctl(struct file *filp, unsigned int cmd, |
762 | unsigned int cmd, unsigned long arg) | 757 | unsigned long arg) |
763 | { | 758 | { |
759 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
764 | struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg; | 760 | struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg; |
765 | int short_only, both; | 761 | int short_only, both; |
766 | 762 | ||
@@ -774,7 +770,7 @@ static int fat_dir_ioctl(struct inode *inode, struct file *filp, | |||
774 | both = 1; | 770 | both = 1; |
775 | break; | 771 | break; |
776 | default: | 772 | default: |
777 | return fat_generic_ioctl(inode, filp, cmd, arg); | 773 | return fat_generic_ioctl(filp, cmd, arg); |
778 | } | 774 | } |
779 | 775 | ||
780 | if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2]))) | 776 | if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2]))) |
@@ -814,7 +810,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, | |||
814 | both = 1; | 810 | both = 1; |
815 | break; | 811 | break; |
816 | default: | 812 | default: |
817 | return -ENOIOCTLCMD; | 813 | return fat_generic_ioctl(filp, cmd, (unsigned long)arg); |
818 | } | 814 | } |
819 | 815 | ||
820 | if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2]))) | 816 | if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2]))) |
@@ -836,7 +832,7 @@ const struct file_operations fat_dir_operations = { | |||
836 | .llseek = generic_file_llseek, | 832 | .llseek = generic_file_llseek, |
837 | .read = generic_read_dir, | 833 | .read = generic_read_dir, |
838 | .readdir = fat_readdir, | 834 | .readdir = fat_readdir, |
839 | .ioctl = fat_dir_ioctl, | 835 | .unlocked_ioctl = fat_dir_ioctl, |
840 | #ifdef CONFIG_COMPAT | 836 | #ifdef CONFIG_COMPAT |
841 | .compat_ioctl = fat_compat_dir_ioctl, | 837 | .compat_ioctl = fat_compat_dir_ioctl, |
842 | #endif | 838 | #endif |
diff --git a/fs/fat/fat.h b/fs/fat/fat.h index e6efdfa0f6db..53dba57b49a1 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/nls.h> | 6 | #include <linux/nls.h> |
7 | #include <linux/fs.h> | 7 | #include <linux/fs.h> |
8 | #include <linux/mutex.h> | 8 | #include <linux/mutex.h> |
9 | #include <linux/ratelimit.h> | ||
9 | #include <linux/msdos_fs.h> | 10 | #include <linux/msdos_fs.h> |
10 | 11 | ||
11 | /* | 12 | /* |
@@ -82,6 +83,8 @@ struct msdos_sb_info { | |||
82 | struct fatent_operations *fatent_ops; | 83 | struct fatent_operations *fatent_ops; |
83 | struct inode *fat_inode; | 84 | struct inode *fat_inode; |
84 | 85 | ||
86 | struct ratelimit_state ratelimit; | ||
87 | |||
85 | spinlock_t inode_hash_lock; | 88 | spinlock_t inode_hash_lock; |
86 | struct hlist_head inode_hashtable[FAT_HASH_SIZE]; | 89 | struct hlist_head inode_hashtable[FAT_HASH_SIZE]; |
87 | }; | 90 | }; |
@@ -298,8 +301,8 @@ extern int fat_free_clusters(struct inode *inode, int cluster); | |||
298 | extern int fat_count_free_clusters(struct super_block *sb); | 301 | extern int fat_count_free_clusters(struct super_block *sb); |
299 | 302 | ||
300 | /* fat/file.c */ | 303 | /* fat/file.c */ |
301 | extern int fat_generic_ioctl(struct inode *inode, struct file *filp, | 304 | extern long fat_generic_ioctl(struct file *filp, unsigned int cmd, |
302 | unsigned int cmd, unsigned long arg); | 305 | unsigned long arg); |
303 | extern const struct file_operations fat_file_operations; | 306 | extern const struct file_operations fat_file_operations; |
304 | extern const struct inode_operations fat_file_inode_operations; | 307 | extern const struct inode_operations fat_file_inode_operations; |
305 | extern int fat_setattr(struct dentry * dentry, struct iattr * attr); | 308 | extern int fat_setattr(struct dentry * dentry, struct iattr * attr); |
@@ -322,8 +325,13 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
322 | extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, | 325 | extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, |
323 | struct inode *i2); | 326 | struct inode *i2); |
324 | /* fat/misc.c */ | 327 | /* fat/misc.c */ |
325 | extern void fat_fs_error(struct super_block *s, const char *fmt, ...) | 328 | extern void |
326 | __attribute__ ((format (printf, 2, 3))) __cold; | 329 | __fat_fs_error(struct super_block *s, int report, const char *fmt, ...) |
330 | __attribute__ ((format (printf, 3, 4))) __cold; | ||
331 | #define fat_fs_error(s, fmt, args...) \ | ||
332 | __fat_fs_error(s, 1, fmt , ## args) | ||
333 | #define fat_fs_error_ratelimit(s, fmt, args...) \ | ||
334 | __fat_fs_error(s, __ratelimit(&MSDOS_SB(s)->ratelimit), fmt , ## args) | ||
327 | extern int fat_clusters_flush(struct super_block *sb); | 335 | extern int fat_clusters_flush(struct super_block *sb); |
328 | extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); | 336 | extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); |
329 | extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, | 337 | extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, |
diff --git a/fs/fat/file.c b/fs/fat/file.c index e8c159de236b..a14c2f6a489e 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
@@ -8,6 +8,7 @@ | |||
8 | 8 | ||
9 | #include <linux/capability.h> | 9 | #include <linux/capability.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/compat.h> | ||
11 | #include <linux/mount.h> | 12 | #include <linux/mount.h> |
12 | #include <linux/time.h> | 13 | #include <linux/time.h> |
13 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
@@ -114,9 +115,9 @@ out: | |||
114 | return err; | 115 | return err; |
115 | } | 116 | } |
116 | 117 | ||
117 | int fat_generic_ioctl(struct inode *inode, struct file *filp, | 118 | long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
118 | unsigned int cmd, unsigned long arg) | ||
119 | { | 119 | { |
120 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
120 | u32 __user *user_attr = (u32 __user *)arg; | 121 | u32 __user *user_attr = (u32 __user *)arg; |
121 | 122 | ||
122 | switch (cmd) { | 123 | switch (cmd) { |
@@ -129,6 +130,15 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, | |||
129 | } | 130 | } |
130 | } | 131 | } |
131 | 132 | ||
133 | #ifdef CONFIG_COMPAT | ||
134 | static long fat_generic_compat_ioctl(struct file *filp, unsigned int cmd, | ||
135 | unsigned long arg) | ||
136 | |||
137 | { | ||
138 | return fat_generic_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); | ||
139 | } | ||
140 | #endif | ||
141 | |||
132 | static int fat_file_release(struct inode *inode, struct file *filp) | 142 | static int fat_file_release(struct inode *inode, struct file *filp) |
133 | { | 143 | { |
134 | if ((filp->f_mode & FMODE_WRITE) && | 144 | if ((filp->f_mode & FMODE_WRITE) && |
@@ -159,7 +169,10 @@ const struct file_operations fat_file_operations = { | |||
159 | .aio_write = generic_file_aio_write, | 169 | .aio_write = generic_file_aio_write, |
160 | .mmap = generic_file_mmap, | 170 | .mmap = generic_file_mmap, |
161 | .release = fat_file_release, | 171 | .release = fat_file_release, |
162 | .ioctl = fat_generic_ioctl, | 172 | .unlocked_ioctl = fat_generic_ioctl, |
173 | #ifdef CONFIG_COMPAT | ||
174 | .compat_ioctl = fat_generic_compat_ioctl, | ||
175 | #endif | ||
163 | .fsync = fat_file_fsync, | 176 | .fsync = fat_file_fsync, |
164 | .splice_read = generic_file_splice_read, | 177 | .splice_read = generic_file_splice_read, |
165 | }; | 178 | }; |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 0ce143bd7d56..ed33904926ee 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -1250,6 +1250,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
1250 | sb->s_op = &fat_sops; | 1250 | sb->s_op = &fat_sops; |
1251 | sb->s_export_op = &fat_export_ops; | 1251 | sb->s_export_op = &fat_export_ops; |
1252 | sbi->dir_ops = fs_dir_inode_ops; | 1252 | sbi->dir_ops = fs_dir_inode_ops; |
1253 | ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, | ||
1254 | DEFAULT_RATELIMIT_BURST); | ||
1253 | 1255 | ||
1254 | error = parse_options(data, isvfat, silent, &debug, &sbi->options); | 1256 | error = parse_options(data, isvfat, silent, &debug, &sbi->options); |
1255 | if (error) | 1257 | if (error) |
@@ -1497,10 +1499,8 @@ out_fail: | |||
1497 | iput(fat_inode); | 1499 | iput(fat_inode); |
1498 | if (root_inode) | 1500 | if (root_inode) |
1499 | iput(root_inode); | 1501 | iput(root_inode); |
1500 | if (sbi->nls_io) | 1502 | unload_nls(sbi->nls_io); |
1501 | unload_nls(sbi->nls_io); | 1503 | unload_nls(sbi->nls_disk); |
1502 | if (sbi->nls_disk) | ||
1503 | unload_nls(sbi->nls_disk); | ||
1504 | if (sbi->options.iocharset != fat_default_iocharset) | 1504 | if (sbi->options.iocharset != fat_default_iocharset) |
1505 | kfree(sbi->options.iocharset); | 1505 | kfree(sbi->options.iocharset); |
1506 | sb->s_fs_info = NULL; | 1506 | sb->s_fs_info = NULL; |
diff --git a/fs/fat/misc.c b/fs/fat/misc.c index d3da05f26465..1fa23f6ffba5 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c | |||
@@ -20,27 +20,29 @@ | |||
20 | * In case the file system is remounted read-only, it can be made writable | 20 | * In case the file system is remounted read-only, it can be made writable |
21 | * again by remounting it. | 21 | * again by remounting it. |
22 | */ | 22 | */ |
23 | void fat_fs_error(struct super_block *s, const char *fmt, ...) | 23 | void __fat_fs_error(struct super_block *s, int report, const char *fmt, ...) |
24 | { | 24 | { |
25 | struct fat_mount_options *opts = &MSDOS_SB(s)->options; | 25 | struct fat_mount_options *opts = &MSDOS_SB(s)->options; |
26 | va_list args; | 26 | va_list args; |
27 | 27 | ||
28 | printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id); | 28 | if (report) { |
29 | printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id); | ||
29 | 30 | ||
30 | printk(KERN_ERR " "); | 31 | printk(KERN_ERR " "); |
31 | va_start(args, fmt); | 32 | va_start(args, fmt); |
32 | vprintk(fmt, args); | 33 | vprintk(fmt, args); |
33 | va_end(args); | 34 | va_end(args); |
34 | printk("\n"); | 35 | printk("\n"); |
36 | } | ||
35 | 37 | ||
36 | if (opts->errors == FAT_ERRORS_PANIC) | 38 | if (opts->errors == FAT_ERRORS_PANIC) |
37 | panic(" FAT fs panic from previous error\n"); | 39 | panic("FAT: fs panic from previous error\n"); |
38 | else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) { | 40 | else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) { |
39 | s->s_flags |= MS_RDONLY; | 41 | s->s_flags |= MS_RDONLY; |
40 | printk(KERN_ERR " File system has been set read-only\n"); | 42 | printk(KERN_ERR "FAT: Filesystem has been set read-only\n"); |
41 | } | 43 | } |
42 | } | 44 | } |
43 | EXPORT_SYMBOL_GPL(fat_fs_error); | 45 | EXPORT_SYMBOL_GPL(__fat_fs_error); |
44 | 46 | ||
45 | /* Flushes the number of free clusters on FAT32 */ | 47 | /* Flushes the number of free clusters on FAT32 */ |
46 | /* XXX: Need to write one per FSINFO block. Currently only writes 1 */ | 48 | /* XXX: Need to write one per FSINFO block. Currently only writes 1 */ |
diff --git a/fs/fcntl.c b/fs/fcntl.c index 0a140741b39e..f74d270ba155 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/dnotify.h> | 14 | #include <linux/dnotify.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/pipe_fs_i.h> | ||
17 | #include <linux/security.h> | 18 | #include <linux/security.h> |
18 | #include <linux/ptrace.h> | 19 | #include <linux/ptrace.h> |
19 | #include <linux/signal.h> | 20 | #include <linux/signal.h> |
@@ -412,6 +413,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | |||
412 | case F_NOTIFY: | 413 | case F_NOTIFY: |
413 | err = fcntl_dirnotify(fd, filp, arg); | 414 | err = fcntl_dirnotify(fd, filp, arg); |
414 | break; | 415 | break; |
416 | case F_SETPIPE_SZ: | ||
417 | case F_GETPIPE_SZ: | ||
418 | err = pipe_fcntl(filp, cmd, arg); | ||
419 | break; | ||
415 | default: | 420 | default: |
416 | break; | 421 | break; |
417 | } | 422 | } |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 4b37f7cea4dd..ea8592b90696 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -42,9 +42,10 @@ struct wb_writeback_args { | |||
42 | long nr_pages; | 42 | long nr_pages; |
43 | struct super_block *sb; | 43 | struct super_block *sb; |
44 | enum writeback_sync_modes sync_mode; | 44 | enum writeback_sync_modes sync_mode; |
45 | int for_kupdate:1; | 45 | unsigned int for_kupdate:1; |
46 | int range_cyclic:1; | 46 | unsigned int range_cyclic:1; |
47 | int for_background:1; | 47 | unsigned int for_background:1; |
48 | unsigned int sb_pinned:1; | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | /* | 51 | /* |
@@ -192,7 +193,8 @@ static void bdi_wait_on_work_clear(struct bdi_work *work) | |||
192 | } | 193 | } |
193 | 194 | ||
194 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | 195 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, |
195 | struct wb_writeback_args *args) | 196 | struct wb_writeback_args *args, |
197 | int wait) | ||
196 | { | 198 | { |
197 | struct bdi_work *work; | 199 | struct bdi_work *work; |
198 | 200 | ||
@@ -204,6 +206,8 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | |||
204 | if (work) { | 206 | if (work) { |
205 | bdi_work_init(work, args); | 207 | bdi_work_init(work, args); |
206 | bdi_queue_work(bdi, work); | 208 | bdi_queue_work(bdi, work); |
209 | if (wait) | ||
210 | bdi_wait_on_work_clear(work); | ||
207 | } else { | 211 | } else { |
208 | struct bdi_writeback *wb = &bdi->wb; | 212 | struct bdi_writeback *wb = &bdi->wb; |
209 | 213 | ||
@@ -230,6 +234,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
230 | .sync_mode = WB_SYNC_ALL, | 234 | .sync_mode = WB_SYNC_ALL, |
231 | .nr_pages = LONG_MAX, | 235 | .nr_pages = LONG_MAX, |
232 | .range_cyclic = 0, | 236 | .range_cyclic = 0, |
237 | /* | ||
238 | * Setting sb_pinned is not necessary for WB_SYNC_ALL, but | ||
239 | * lets make it explicitly clear. | ||
240 | */ | ||
241 | .sb_pinned = 1, | ||
233 | }; | 242 | }; |
234 | struct bdi_work work; | 243 | struct bdi_work work; |
235 | 244 | ||
@@ -245,21 +254,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
245 | * @bdi: the backing device to write from | 254 | * @bdi: the backing device to write from |
246 | * @sb: write inodes from this super_block | 255 | * @sb: write inodes from this super_block |
247 | * @nr_pages: the number of pages to write | 256 | * @nr_pages: the number of pages to write |
257 | * @sb_locked: caller already holds sb umount sem. | ||
248 | * | 258 | * |
249 | * Description: | 259 | * Description: |
250 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only | 260 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only |
251 | * started when this function returns, we make no guarentees on | 261 | * started when this function returns, we make no guarentees on |
252 | * completion. Caller need not hold sb s_umount semaphore. | 262 | * completion. Caller specifies whether sb umount sem is held already or not. |
253 | * | 263 | * |
254 | */ | 264 | */ |
255 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | 265 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, |
256 | long nr_pages) | 266 | long nr_pages, int sb_locked) |
257 | { | 267 | { |
258 | struct wb_writeback_args args = { | 268 | struct wb_writeback_args args = { |
259 | .sb = sb, | 269 | .sb = sb, |
260 | .sync_mode = WB_SYNC_NONE, | 270 | .sync_mode = WB_SYNC_NONE, |
261 | .nr_pages = nr_pages, | 271 | .nr_pages = nr_pages, |
262 | .range_cyclic = 1, | 272 | .range_cyclic = 1, |
273 | .sb_pinned = sb_locked, | ||
263 | }; | 274 | }; |
264 | 275 | ||
265 | /* | 276 | /* |
@@ -271,7 +282,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | |||
271 | args.for_background = 1; | 282 | args.for_background = 1; |
272 | } | 283 | } |
273 | 284 | ||
274 | bdi_alloc_queue_work(bdi, &args); | 285 | bdi_alloc_queue_work(bdi, &args, sb_locked); |
275 | } | 286 | } |
276 | 287 | ||
277 | /* | 288 | /* |
@@ -398,11 +409,11 @@ static void inode_wait_for_writeback(struct inode *inode) | |||
398 | wait_queue_head_t *wqh; | 409 | wait_queue_head_t *wqh; |
399 | 410 | ||
400 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); | 411 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); |
401 | do { | 412 | while (inode->i_state & I_SYNC) { |
402 | spin_unlock(&inode_lock); | 413 | spin_unlock(&inode_lock); |
403 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); | 414 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); |
404 | spin_lock(&inode_lock); | 415 | spin_lock(&inode_lock); |
405 | } while (inode->i_state & I_SYNC); | 416 | } |
406 | } | 417 | } |
407 | 418 | ||
408 | /* | 419 | /* |
@@ -452,11 +463,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
452 | 463 | ||
453 | BUG_ON(inode->i_state & I_SYNC); | 464 | BUG_ON(inode->i_state & I_SYNC); |
454 | 465 | ||
455 | /* Set I_SYNC, reset I_DIRTY */ | 466 | /* Set I_SYNC, reset I_DIRTY_PAGES */ |
456 | dirty = inode->i_state & I_DIRTY; | ||
457 | inode->i_state |= I_SYNC; | 467 | inode->i_state |= I_SYNC; |
458 | inode->i_state &= ~I_DIRTY; | 468 | inode->i_state &= ~I_DIRTY_PAGES; |
459 | |||
460 | spin_unlock(&inode_lock); | 469 | spin_unlock(&inode_lock); |
461 | 470 | ||
462 | ret = do_writepages(mapping, wbc); | 471 | ret = do_writepages(mapping, wbc); |
@@ -472,6 +481,15 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
472 | ret = err; | 481 | ret = err; |
473 | } | 482 | } |
474 | 483 | ||
484 | /* | ||
485 | * Some filesystems may redirty the inode during the writeback | ||
486 | * due to delalloc, clear dirty metadata flags right before | ||
487 | * write_inode() | ||
488 | */ | ||
489 | spin_lock(&inode_lock); | ||
490 | dirty = inode->i_state & I_DIRTY; | ||
491 | inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); | ||
492 | spin_unlock(&inode_lock); | ||
475 | /* Don't write the inode if only I_DIRTY_PAGES was set */ | 493 | /* Don't write the inode if only I_DIRTY_PAGES was set */ |
476 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 494 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
477 | int err = write_inode(inode, wbc); | 495 | int err = write_inode(inode, wbc); |
@@ -577,7 +595,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, | |||
577 | /* | 595 | /* |
578 | * Caller must already hold the ref for this | 596 | * Caller must already hold the ref for this |
579 | */ | 597 | */ |
580 | if (wbc->sync_mode == WB_SYNC_ALL) { | 598 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { |
581 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 599 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
582 | return SB_NOT_PINNED; | 600 | return SB_NOT_PINNED; |
583 | } | 601 | } |
@@ -751,6 +769,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
751 | .for_kupdate = args->for_kupdate, | 769 | .for_kupdate = args->for_kupdate, |
752 | .for_background = args->for_background, | 770 | .for_background = args->for_background, |
753 | .range_cyclic = args->range_cyclic, | 771 | .range_cyclic = args->range_cyclic, |
772 | .sb_pinned = args->sb_pinned, | ||
754 | }; | 773 | }; |
755 | unsigned long oldest_jif; | 774 | unsigned long oldest_jif; |
756 | long wrote = 0; | 775 | long wrote = 0; |
@@ -852,6 +871,12 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
852 | unsigned long expired; | 871 | unsigned long expired; |
853 | long nr_pages; | 872 | long nr_pages; |
854 | 873 | ||
874 | /* | ||
875 | * When set to zero, disable periodic writeback | ||
876 | */ | ||
877 | if (!dirty_writeback_interval) | ||
878 | return 0; | ||
879 | |||
855 | expired = wb->last_old_flush + | 880 | expired = wb->last_old_flush + |
856 | msecs_to_jiffies(dirty_writeback_interval * 10); | 881 | msecs_to_jiffies(dirty_writeback_interval * 10); |
857 | if (time_before(jiffies, expired)) | 882 | if (time_before(jiffies, expired)) |
@@ -887,6 +912,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
887 | 912 | ||
888 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 913 | while ((work = get_next_work_item(bdi, wb)) != NULL) { |
889 | struct wb_writeback_args args = work->args; | 914 | struct wb_writeback_args args = work->args; |
915 | int post_clear; | ||
890 | 916 | ||
891 | /* | 917 | /* |
892 | * Override sync mode, in case we must wait for completion | 918 | * Override sync mode, in case we must wait for completion |
@@ -894,11 +920,13 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
894 | if (force_wait) | 920 | if (force_wait) |
895 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; | 921 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; |
896 | 922 | ||
923 | post_clear = WB_SYNC_ALL || args.sb_pinned; | ||
924 | |||
897 | /* | 925 | /* |
898 | * If this isn't a data integrity operation, just notify | 926 | * If this isn't a data integrity operation, just notify |
899 | * that we have seen this work and we are now starting it. | 927 | * that we have seen this work and we are now starting it. |
900 | */ | 928 | */ |
901 | if (args.sync_mode == WB_SYNC_NONE) | 929 | if (!post_clear) |
902 | wb_clear_pending(wb, work); | 930 | wb_clear_pending(wb, work); |
903 | 931 | ||
904 | wrote += wb_writeback(wb, &args); | 932 | wrote += wb_writeback(wb, &args); |
@@ -907,7 +935,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
907 | * This is a data integrity writeback, so only do the | 935 | * This is a data integrity writeback, so only do the |
908 | * notification when we have completed the work. | 936 | * notification when we have completed the work. |
909 | */ | 937 | */ |
910 | if (args.sync_mode == WB_SYNC_ALL) | 938 | if (post_clear) |
911 | wb_clear_pending(wb, work); | 939 | wb_clear_pending(wb, work); |
912 | } | 940 | } |
913 | 941 | ||
@@ -947,8 +975,17 @@ int bdi_writeback_task(struct bdi_writeback *wb) | |||
947 | break; | 975 | break; |
948 | } | 976 | } |
949 | 977 | ||
950 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); | 978 | if (dirty_writeback_interval) { |
951 | schedule_timeout_interruptible(wait_jiffies); | 979 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); |
980 | schedule_timeout_interruptible(wait_jiffies); | ||
981 | } else { | ||
982 | set_current_state(TASK_INTERRUPTIBLE); | ||
983 | if (list_empty_careful(&wb->bdi->work_list) && | ||
984 | !kthread_should_stop()) | ||
985 | schedule(); | ||
986 | __set_current_state(TASK_RUNNING); | ||
987 | } | ||
988 | |||
952 | try_to_freeze(); | 989 | try_to_freeze(); |
953 | } | 990 | } |
954 | 991 | ||
@@ -974,7 +1011,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages) | |||
974 | if (!bdi_has_dirty_io(bdi)) | 1011 | if (!bdi_has_dirty_io(bdi)) |
975 | continue; | 1012 | continue; |
976 | 1013 | ||
977 | bdi_alloc_queue_work(bdi, &args); | 1014 | bdi_alloc_queue_work(bdi, &args, 0); |
978 | } | 1015 | } |
979 | 1016 | ||
980 | rcu_read_unlock(); | 1017 | rcu_read_unlock(); |
@@ -1183,6 +1220,18 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1183 | iput(old_inode); | 1220 | iput(old_inode); |
1184 | } | 1221 | } |
1185 | 1222 | ||
1223 | static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) | ||
1224 | { | ||
1225 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | ||
1226 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
1227 | long nr_to_write; | ||
1228 | |||
1229 | nr_to_write = nr_dirty + nr_unstable + | ||
1230 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
1231 | |||
1232 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked); | ||
1233 | } | ||
1234 | |||
1186 | /** | 1235 | /** |
1187 | * writeback_inodes_sb - writeback dirty inodes from given super_block | 1236 | * writeback_inodes_sb - writeback dirty inodes from given super_block |
1188 | * @sb: the superblock | 1237 | * @sb: the superblock |
@@ -1194,18 +1243,23 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1194 | */ | 1243 | */ |
1195 | void writeback_inodes_sb(struct super_block *sb) | 1244 | void writeback_inodes_sb(struct super_block *sb) |
1196 | { | 1245 | { |
1197 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1246 | __writeback_inodes_sb(sb, 0); |
1198 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
1199 | long nr_to_write; | ||
1200 | |||
1201 | nr_to_write = nr_dirty + nr_unstable + | ||
1202 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
1203 | |||
1204 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write); | ||
1205 | } | 1247 | } |
1206 | EXPORT_SYMBOL(writeback_inodes_sb); | 1248 | EXPORT_SYMBOL(writeback_inodes_sb); |
1207 | 1249 | ||
1208 | /** | 1250 | /** |
1251 | * writeback_inodes_sb_locked - writeback dirty inodes from given super_block | ||
1252 | * @sb: the superblock | ||
1253 | * | ||
1254 | * Like writeback_inodes_sb(), except the caller already holds the | ||
1255 | * sb umount sem. | ||
1256 | */ | ||
1257 | void writeback_inodes_sb_locked(struct super_block *sb) | ||
1258 | { | ||
1259 | __writeback_inodes_sb(sb, 1); | ||
1260 | } | ||
1261 | |||
1262 | /** | ||
1209 | * writeback_inodes_sb_if_idle - start writeback if none underway | 1263 | * writeback_inodes_sb_if_idle - start writeback if none underway |
1210 | * @sb: the superblock | 1264 | * @sb: the superblock |
1211 | * | 1265 | * |
diff --git a/fs/generic_acl.c b/fs/generic_acl.c index fe5df5457656..99800e564157 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c | |||
@@ -201,7 +201,7 @@ generic_check_acl(struct inode *inode, int mask) | |||
201 | return -EAGAIN; | 201 | return -EAGAIN; |
202 | } | 202 | } |
203 | 203 | ||
204 | struct xattr_handler generic_acl_access_handler = { | 204 | const struct xattr_handler generic_acl_access_handler = { |
205 | .prefix = POSIX_ACL_XATTR_ACCESS, | 205 | .prefix = POSIX_ACL_XATTR_ACCESS, |
206 | .flags = ACL_TYPE_ACCESS, | 206 | .flags = ACL_TYPE_ACCESS, |
207 | .list = generic_acl_list, | 207 | .list = generic_acl_list, |
@@ -209,7 +209,7 @@ struct xattr_handler generic_acl_access_handler = { | |||
209 | .set = generic_acl_set, | 209 | .set = generic_acl_set, |
210 | }; | 210 | }; |
211 | 211 | ||
212 | struct xattr_handler generic_acl_default_handler = { | 212 | const struct xattr_handler generic_acl_default_handler = { |
213 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 213 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
214 | .flags = ACL_TYPE_DEFAULT, | 214 | .flags = ACL_TYPE_DEFAULT, |
215 | .list = generic_acl_list, | 215 | .list = generic_acl_list, |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index ca991d776592..48171f4c943d 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
@@ -339,7 +339,7 @@ out: | |||
339 | return error; | 339 | return error; |
340 | } | 340 | } |
341 | 341 | ||
342 | struct xattr_handler gfs2_xattr_system_handler = { | 342 | const struct xattr_handler gfs2_xattr_system_handler = { |
343 | .prefix = XATTR_SYSTEM_PREFIX, | 343 | .prefix = XATTR_SYSTEM_PREFIX, |
344 | .flags = GFS2_EATYPE_SYS, | 344 | .flags = GFS2_EATYPE_SYS, |
345 | .get = gfs2_xattr_system_get, | 345 | .get = gfs2_xattr_system_get, |
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index 9306a2e6620c..b522b0cb39ea 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h | |||
@@ -19,6 +19,6 @@ | |||
19 | extern int gfs2_check_acl(struct inode *inode, int mask); | 19 | extern int gfs2_check_acl(struct inode *inode, int mask); |
20 | extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); | 20 | extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); |
21 | extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); | 21 | extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); |
22 | extern struct xattr_handler gfs2_xattr_system_handler; | 22 | extern const struct xattr_handler gfs2_xattr_system_handler; |
23 | 23 | ||
24 | #endif /* __ACL_DOT_H__ */ | 24 | #endif /* __ACL_DOT_H__ */ |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index d5f4661287f9..49667d68769e 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -1476,8 +1476,8 @@ static int gfs2_quota_get_xstate(struct super_block *sb, | |||
1476 | return 0; | 1476 | return 0; |
1477 | } | 1477 | } |
1478 | 1478 | ||
1479 | static int gfs2_xquota_get(struct super_block *sb, int type, qid_t id, | 1479 | static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, |
1480 | struct fs_disk_quota *fdq) | 1480 | struct fs_disk_quota *fdq) |
1481 | { | 1481 | { |
1482 | struct gfs2_sbd *sdp = sb->s_fs_info; | 1482 | struct gfs2_sbd *sdp = sb->s_fs_info; |
1483 | struct gfs2_quota_lvb *qlvb; | 1483 | struct gfs2_quota_lvb *qlvb; |
@@ -1521,8 +1521,8 @@ out: | |||
1521 | /* GFS2 only supports a subset of the XFS fields */ | 1521 | /* GFS2 only supports a subset of the XFS fields */ |
1522 | #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) | 1522 | #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) |
1523 | 1523 | ||
1524 | static int gfs2_xquota_set(struct super_block *sb, int type, qid_t id, | 1524 | static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, |
1525 | struct fs_disk_quota *fdq) | 1525 | struct fs_disk_quota *fdq) |
1526 | { | 1526 | { |
1527 | struct gfs2_sbd *sdp = sb->s_fs_info; | 1527 | struct gfs2_sbd *sdp = sb->s_fs_info; |
1528 | struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); | 1528 | struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); |
@@ -1629,7 +1629,7 @@ out_put: | |||
1629 | const struct quotactl_ops gfs2_quotactl_ops = { | 1629 | const struct quotactl_ops gfs2_quotactl_ops = { |
1630 | .quota_sync = gfs2_quota_sync, | 1630 | .quota_sync = gfs2_quota_sync, |
1631 | .get_xstate = gfs2_quota_get_xstate, | 1631 | .get_xstate = gfs2_quota_get_xstate, |
1632 | .get_xquota = gfs2_xquota_get, | 1632 | .get_dqblk = gfs2_get_dqblk, |
1633 | .set_xquota = gfs2_xquota_set, | 1633 | .set_dqblk = gfs2_set_dqblk, |
1634 | }; | 1634 | }; |
1635 | 1635 | ||
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 6daf4c65a3c8..171a744f8e45 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -854,7 +854,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | |||
854 | if ((start + nr_sects) != blk) { | 854 | if ((start + nr_sects) != blk) { |
855 | rv = blkdev_issue_discard(bdev, start, | 855 | rv = blkdev_issue_discard(bdev, start, |
856 | nr_sects, GFP_NOFS, | 856 | nr_sects, GFP_NOFS, |
857 | DISCARD_FL_BARRIER); | 857 | BLKDEV_IFL_WAIT | |
858 | BLKDEV_IFL_BARRIER); | ||
858 | if (rv) | 859 | if (rv) |
859 | goto fail; | 860 | goto fail; |
860 | nr_sects = 0; | 861 | nr_sects = 0; |
@@ -869,7 +870,7 @@ start_new_extent: | |||
869 | } | 870 | } |
870 | if (nr_sects) { | 871 | if (nr_sects) { |
871 | rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, | 872 | rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, |
872 | DISCARD_FL_BARRIER); | 873 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); |
873 | if (rv) | 874 | if (rv) |
874 | goto fail; | 875 | goto fail; |
875 | } | 876 | } |
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 3df60f2d84e3..a0464680af0b 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h | |||
@@ -54,7 +54,7 @@ extern struct file_system_type gfs2meta_fs_type; | |||
54 | extern const struct export_operations gfs2_export_ops; | 54 | extern const struct export_operations gfs2_export_ops; |
55 | extern const struct super_operations gfs2_super_ops; | 55 | extern const struct super_operations gfs2_super_ops; |
56 | extern const struct dentry_operations gfs2_dops; | 56 | extern const struct dentry_operations gfs2_dops; |
57 | extern struct xattr_handler *gfs2_xattr_handlers[]; | 57 | extern const struct xattr_handler *gfs2_xattr_handlers[]; |
58 | 58 | ||
59 | #endif /* __SUPER_DOT_H__ */ | 59 | #endif /* __SUPER_DOT_H__ */ |
60 | 60 | ||
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index c2ebdf2c01d4..82f93da00d1b 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c | |||
@@ -1535,21 +1535,21 @@ out_alloc: | |||
1535 | return error; | 1535 | return error; |
1536 | } | 1536 | } |
1537 | 1537 | ||
1538 | static struct xattr_handler gfs2_xattr_user_handler = { | 1538 | static const struct xattr_handler gfs2_xattr_user_handler = { |
1539 | .prefix = XATTR_USER_PREFIX, | 1539 | .prefix = XATTR_USER_PREFIX, |
1540 | .flags = GFS2_EATYPE_USR, | 1540 | .flags = GFS2_EATYPE_USR, |
1541 | .get = gfs2_xattr_get, | 1541 | .get = gfs2_xattr_get, |
1542 | .set = gfs2_xattr_set, | 1542 | .set = gfs2_xattr_set, |
1543 | }; | 1543 | }; |
1544 | 1544 | ||
1545 | static struct xattr_handler gfs2_xattr_security_handler = { | 1545 | static const struct xattr_handler gfs2_xattr_security_handler = { |
1546 | .prefix = XATTR_SECURITY_PREFIX, | 1546 | .prefix = XATTR_SECURITY_PREFIX, |
1547 | .flags = GFS2_EATYPE_SECURITY, | 1547 | .flags = GFS2_EATYPE_SECURITY, |
1548 | .get = gfs2_xattr_get, | 1548 | .get = gfs2_xattr_get, |
1549 | .set = gfs2_xattr_set, | 1549 | .set = gfs2_xattr_set, |
1550 | }; | 1550 | }; |
1551 | 1551 | ||
1552 | struct xattr_handler *gfs2_xattr_handlers[] = { | 1552 | const struct xattr_handler *gfs2_xattr_handlers[] = { |
1553 | &gfs2_xattr_user_handler, | 1553 | &gfs2_xattr_user_handler, |
1554 | &gfs2_xattr_security_handler, | 1554 | &gfs2_xattr_security_handler, |
1555 | &gfs2_xattr_system_handler, | 1555 | &gfs2_xattr_system_handler, |
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 5f4023678251..764fd1bdca88 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c | |||
@@ -494,7 +494,7 @@ const struct inode_operations hfsplus_dir_inode_operations = { | |||
494 | const struct file_operations hfsplus_dir_operations = { | 494 | const struct file_operations hfsplus_dir_operations = { |
495 | .read = generic_read_dir, | 495 | .read = generic_read_dir, |
496 | .readdir = hfsplus_readdir, | 496 | .readdir = hfsplus_readdir, |
497 | .ioctl = hfsplus_ioctl, | 497 | .unlocked_ioctl = hfsplus_ioctl, |
498 | .llseek = generic_file_llseek, | 498 | .llseek = generic_file_llseek, |
499 | .release = hfsplus_dir_release, | 499 | .release = hfsplus_dir_release, |
500 | }; | 500 | }; |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 5c10d803d9df..6505c30ad965 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
@@ -337,8 +337,7 @@ struct inode *hfsplus_new_inode(struct super_block *, int); | |||
337 | void hfsplus_delete_inode(struct inode *); | 337 | void hfsplus_delete_inode(struct inode *); |
338 | 338 | ||
339 | /* ioctl.c */ | 339 | /* ioctl.c */ |
340 | int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | 340 | long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); |
341 | unsigned long arg); | ||
342 | int hfsplus_setxattr(struct dentry *dentry, const char *name, | 341 | int hfsplus_setxattr(struct dentry *dentry, const char *name, |
343 | const void *value, size_t size, int flags); | 342 | const void *value, size_t size, int flags); |
344 | ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | 343 | ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 1bcf597c0562..9bbb82924a22 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
@@ -285,7 +285,7 @@ static const struct file_operations hfsplus_file_operations = { | |||
285 | .fsync = file_fsync, | 285 | .fsync = file_fsync, |
286 | .open = hfsplus_file_open, | 286 | .open = hfsplus_file_open, |
287 | .release = hfsplus_file_release, | 287 | .release = hfsplus_file_release, |
288 | .ioctl = hfsplus_ioctl, | 288 | .unlocked_ioctl = hfsplus_ioctl, |
289 | }; | 289 | }; |
290 | 290 | ||
291 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) | 291 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) |
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index f457d2ca51ab..ac405f099026 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c | |||
@@ -17,14 +17,16 @@ | |||
17 | #include <linux/mount.h> | 17 | #include <linux/mount.h> |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/xattr.h> | 19 | #include <linux/xattr.h> |
20 | #include <linux/smp_lock.h> | ||
20 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
21 | #include "hfsplus_fs.h" | 22 | #include "hfsplus_fs.h" |
22 | 23 | ||
23 | int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | 24 | long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
24 | unsigned long arg) | ||
25 | { | 25 | { |
26 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
26 | unsigned int flags; | 27 | unsigned int flags; |
27 | 28 | ||
29 | lock_kernel(); | ||
28 | switch (cmd) { | 30 | switch (cmd) { |
29 | case HFSPLUS_IOC_EXT2_GETFLAGS: | 31 | case HFSPLUS_IOC_EXT2_GETFLAGS: |
30 | flags = 0; | 32 | flags = 0; |
@@ -38,8 +40,10 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | |||
38 | case HFSPLUS_IOC_EXT2_SETFLAGS: { | 40 | case HFSPLUS_IOC_EXT2_SETFLAGS: { |
39 | int err = 0; | 41 | int err = 0; |
40 | err = mnt_want_write(filp->f_path.mnt); | 42 | err = mnt_want_write(filp->f_path.mnt); |
41 | if (err) | 43 | if (err) { |
44 | unlock_kernel(); | ||
42 | return err; | 45 | return err; |
46 | } | ||
43 | 47 | ||
44 | if (!is_owner_or_cap(inode)) { | 48 | if (!is_owner_or_cap(inode)) { |
45 | err = -EACCES; | 49 | err = -EACCES; |
@@ -85,9 +89,11 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | |||
85 | mark_inode_dirty(inode); | 89 | mark_inode_dirty(inode); |
86 | setflags_out: | 90 | setflags_out: |
87 | mnt_drop_write(filp->f_path.mnt); | 91 | mnt_drop_write(filp->f_path.mnt); |
92 | unlock_kernel(); | ||
88 | return err; | 93 | return err; |
89 | } | 94 | } |
90 | default: | 95 | default: |
96 | unlock_kernel(); | ||
91 | return -ENOTTY; | 97 | return -ENOTTY; |
92 | } | 98 | } |
93 | } | 99 | } |
diff --git a/fs/inode.c b/fs/inode.c index 258ec22bb298..2bee20ae3d65 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -286,11 +286,9 @@ static void init_once(void *foo) | |||
286 | */ | 286 | */ |
287 | void __iget(struct inode *inode) | 287 | void __iget(struct inode *inode) |
288 | { | 288 | { |
289 | if (atomic_read(&inode->i_count)) { | 289 | if (atomic_inc_return(&inode->i_count) != 1) |
290 | atomic_inc(&inode->i_count); | ||
291 | return; | 290 | return; |
292 | } | 291 | |
293 | atomic_inc(&inode->i_count); | ||
294 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | 292 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) |
295 | list_move(&inode->i_list, &inode_in_use); | 293 | list_move(&inode->i_list, &inode_in_use); |
296 | inodes_stat.nr_unused--; | 294 | inodes_stat.nr_unused--; |
@@ -1608,3 +1606,23 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) | |||
1608 | inode->i_ino); | 1606 | inode->i_ino); |
1609 | } | 1607 | } |
1610 | EXPORT_SYMBOL(init_special_inode); | 1608 | EXPORT_SYMBOL(init_special_inode); |
1609 | |||
1610 | /** | ||
1611 | * Init uid,gid,mode for new inode according to posix standards | ||
1612 | * @inode: New inode | ||
1613 | * @dir: Directory inode | ||
1614 | * @mode: mode of the new inode | ||
1615 | */ | ||
1616 | void inode_init_owner(struct inode *inode, const struct inode *dir, | ||
1617 | mode_t mode) | ||
1618 | { | ||
1619 | inode->i_uid = current_fsuid(); | ||
1620 | if (dir && dir->i_mode & S_ISGID) { | ||
1621 | inode->i_gid = dir->i_gid; | ||
1622 | if (S_ISDIR(mode)) | ||
1623 | mode |= S_ISGID; | ||
1624 | } else | ||
1625 | inode->i_gid = current_fsgid(); | ||
1626 | inode->i_mode = mode; | ||
1627 | } | ||
1628 | EXPORT_SYMBOL(inode_init_owner); | ||
diff --git a/fs/internal.h b/fs/internal.h index 8a03a5447bdf..6b706bc60a66 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -87,6 +87,8 @@ extern struct file *get_empty_filp(void); | |||
87 | * super.c | 87 | * super.c |
88 | */ | 88 | */ |
89 | extern int do_remount_sb(struct super_block *, int, void *, int); | 89 | extern int do_remount_sb(struct super_block *, int, void *, int); |
90 | extern void __put_super(struct super_block *sb); | ||
91 | extern void put_super(struct super_block *sb); | ||
90 | 92 | ||
91 | /* | 93 | /* |
92 | * open.c | 94 | * open.c |
diff --git a/fs/ioctl.c b/fs/ioctl.c index 7faefb4da939..2d140a713861 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -525,15 +525,8 @@ static int ioctl_fsfreeze(struct file *filp) | |||
525 | if (sb->s_op->freeze_fs == NULL) | 525 | if (sb->s_op->freeze_fs == NULL) |
526 | return -EOPNOTSUPP; | 526 | return -EOPNOTSUPP; |
527 | 527 | ||
528 | /* If a blockdevice-backed filesystem isn't specified, return. */ | ||
529 | if (sb->s_bdev == NULL) | ||
530 | return -EINVAL; | ||
531 | |||
532 | /* Freeze */ | 528 | /* Freeze */ |
533 | sb = freeze_bdev(sb->s_bdev); | 529 | return freeze_super(sb); |
534 | if (IS_ERR(sb)) | ||
535 | return PTR_ERR(sb); | ||
536 | return 0; | ||
537 | } | 530 | } |
538 | 531 | ||
539 | static int ioctl_fsthaw(struct file *filp) | 532 | static int ioctl_fsthaw(struct file *filp) |
@@ -543,12 +536,8 @@ static int ioctl_fsthaw(struct file *filp) | |||
543 | if (!capable(CAP_SYS_ADMIN)) | 536 | if (!capable(CAP_SYS_ADMIN)) |
544 | return -EPERM; | 537 | return -EPERM; |
545 | 538 | ||
546 | /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */ | ||
547 | if (sb->s_bdev == NULL) | ||
548 | return -EINVAL; | ||
549 | |||
550 | /* Thaw */ | 539 | /* Thaw */ |
551 | return thaw_bdev(sb->s_bdev, sb); | 540 | return thaw_super(sb); |
552 | } | 541 | } |
553 | 542 | ||
554 | /* | 543 | /* |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index ecb44c94ba8d..28a9ddaa0c49 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -786,6 +786,12 @@ wait_for_iobuf: | |||
786 | 786 | ||
787 | jbd_debug(3, "JBD: commit phase 6\n"); | 787 | jbd_debug(3, "JBD: commit phase 6\n"); |
788 | 788 | ||
789 | /* All metadata is written, now write commit record and do cleanup */ | ||
790 | spin_lock(&journal->j_state_lock); | ||
791 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | ||
792 | commit_transaction->t_state = T_COMMIT_RECORD; | ||
793 | spin_unlock(&journal->j_state_lock); | ||
794 | |||
789 | if (journal_write_commit_record(journal, commit_transaction)) | 795 | if (journal_write_commit_record(journal, commit_transaction)) |
790 | err = -EIO; | 796 | err = -EIO; |
791 | 797 | ||
@@ -923,7 +929,7 @@ restart_loop: | |||
923 | 929 | ||
924 | jbd_debug(3, "JBD: commit phase 8\n"); | 930 | jbd_debug(3, "JBD: commit phase 8\n"); |
925 | 931 | ||
926 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | 932 | J_ASSERT(commit_transaction->t_state == T_COMMIT_RECORD); |
927 | 933 | ||
928 | commit_transaction->t_state = T_FINISHED; | 934 | commit_transaction->t_state = T_FINISHED; |
929 | J_ASSERT(commit_transaction == journal->j_committing_transaction); | 935 | J_ASSERT(commit_transaction == journal->j_committing_transaction); |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index bd224eec9b07..93d1e47647bd 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -565,6 +565,38 @@ int log_wait_commit(journal_t *journal, tid_t tid) | |||
565 | } | 565 | } |
566 | 566 | ||
567 | /* | 567 | /* |
568 | * Return 1 if a given transaction has not yet sent barrier request | ||
569 | * connected with a transaction commit. If 0 is returned, transaction | ||
570 | * may or may not have sent the barrier. Used to avoid sending barrier | ||
571 | * twice in common cases. | ||
572 | */ | ||
573 | int journal_trans_will_send_data_barrier(journal_t *journal, tid_t tid) | ||
574 | { | ||
575 | int ret = 0; | ||
576 | transaction_t *commit_trans; | ||
577 | |||
578 | if (!(journal->j_flags & JFS_BARRIER)) | ||
579 | return 0; | ||
580 | spin_lock(&journal->j_state_lock); | ||
581 | /* Transaction already committed? */ | ||
582 | if (tid_geq(journal->j_commit_sequence, tid)) | ||
583 | goto out; | ||
584 | /* | ||
585 | * Transaction is being committed and we already proceeded to | ||
586 | * writing commit record? | ||
587 | */ | ||
588 | commit_trans = journal->j_committing_transaction; | ||
589 | if (commit_trans && commit_trans->t_tid == tid && | ||
590 | commit_trans->t_state >= T_COMMIT_RECORD) | ||
591 | goto out; | ||
592 | ret = 1; | ||
593 | out: | ||
594 | spin_unlock(&journal->j_state_lock); | ||
595 | return ret; | ||
596 | } | ||
597 | EXPORT_SYMBOL(journal_trans_will_send_data_barrier); | ||
598 | |||
599 | /* | ||
568 | * Log buffer allocation routines: | 600 | * Log buffer allocation routines: |
569 | */ | 601 | */ |
570 | 602 | ||
@@ -1157,6 +1189,7 @@ int journal_destroy(journal_t *journal) | |||
1157 | { | 1189 | { |
1158 | int err = 0; | 1190 | int err = 0; |
1159 | 1191 | ||
1192 | |||
1160 | /* Wait for the commit thread to wake up and die. */ | 1193 | /* Wait for the commit thread to wake up and die. */ |
1161 | journal_kill_thread(journal); | 1194 | journal_kill_thread(journal); |
1162 | 1195 | ||
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 30beb11ef928..076d1cc44f95 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -530,7 +530,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
530 | */ | 530 | */ |
531 | if ((journal->j_fs_dev != journal->j_dev) && | 531 | if ((journal->j_fs_dev != journal->j_dev) && |
532 | (journal->j_flags & JBD2_BARRIER)) | 532 | (journal->j_flags & JBD2_BARRIER)) |
533 | blkdev_issue_flush(journal->j_fs_dev, NULL); | 533 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, |
534 | BLKDEV_IFL_WAIT); | ||
534 | if (!(journal->j_flags & JBD2_ABORT)) | 535 | if (!(journal->j_flags & JBD2_ABORT)) |
535 | jbd2_journal_update_superblock(journal, 1); | 536 | jbd2_journal_update_superblock(journal, 1); |
536 | return 0; | 537 | return 0; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 671da7fb7ffd..75716d3d2be0 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -717,7 +717,8 @@ start_journal_io: | |||
717 | if (commit_transaction->t_flushed_data_blocks && | 717 | if (commit_transaction->t_flushed_data_blocks && |
718 | (journal->j_fs_dev != journal->j_dev) && | 718 | (journal->j_fs_dev != journal->j_dev) && |
719 | (journal->j_flags & JBD2_BARRIER)) | 719 | (journal->j_flags & JBD2_BARRIER)) |
720 | blkdev_issue_flush(journal->j_fs_dev, NULL); | 720 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, |
721 | BLKDEV_IFL_WAIT); | ||
721 | 722 | ||
722 | /* Done it all: now write the commit record asynchronously. */ | 723 | /* Done it all: now write the commit record asynchronously. */ |
723 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | 724 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, |
@@ -727,7 +728,8 @@ start_journal_io: | |||
727 | if (err) | 728 | if (err) |
728 | __jbd2_journal_abort_hard(journal); | 729 | __jbd2_journal_abort_hard(journal); |
729 | if (journal->j_flags & JBD2_BARRIER) | 730 | if (journal->j_flags & JBD2_BARRIER) |
730 | blkdev_issue_flush(journal->j_dev, NULL); | 731 | blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL, |
732 | BLKDEV_IFL_WAIT); | ||
731 | } | 733 | } |
732 | 734 | ||
733 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | 735 | err = journal_finish_inode_data_buffers(journal, commit_transaction); |
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 7cdc3196476a..a33aab6b5e68 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c | |||
@@ -419,7 +419,7 @@ static int jffs2_acl_setxattr(struct dentry *dentry, const char *name, | |||
419 | return rc; | 419 | return rc; |
420 | } | 420 | } |
421 | 421 | ||
422 | struct xattr_handler jffs2_acl_access_xattr_handler = { | 422 | const struct xattr_handler jffs2_acl_access_xattr_handler = { |
423 | .prefix = POSIX_ACL_XATTR_ACCESS, | 423 | .prefix = POSIX_ACL_XATTR_ACCESS, |
424 | .flags = ACL_TYPE_DEFAULT, | 424 | .flags = ACL_TYPE_DEFAULT, |
425 | .list = jffs2_acl_access_listxattr, | 425 | .list = jffs2_acl_access_listxattr, |
@@ -427,7 +427,7 @@ struct xattr_handler jffs2_acl_access_xattr_handler = { | |||
427 | .set = jffs2_acl_setxattr, | 427 | .set = jffs2_acl_setxattr, |
428 | }; | 428 | }; |
429 | 429 | ||
430 | struct xattr_handler jffs2_acl_default_xattr_handler = { | 430 | const struct xattr_handler jffs2_acl_default_xattr_handler = { |
431 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 431 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
432 | .flags = ACL_TYPE_DEFAULT, | 432 | .flags = ACL_TYPE_DEFAULT, |
433 | .list = jffs2_acl_default_listxattr, | 433 | .list = jffs2_acl_default_listxattr, |
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index f0ba63e3c36b..5e42de8d9541 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h | |||
@@ -31,8 +31,8 @@ extern int jffs2_acl_chmod(struct inode *); | |||
31 | extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); | 31 | extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); |
32 | extern int jffs2_init_acl_post(struct inode *); | 32 | extern int jffs2_init_acl_post(struct inode *); |
33 | 33 | ||
34 | extern struct xattr_handler jffs2_acl_access_xattr_handler; | 34 | extern const struct xattr_handler jffs2_acl_access_xattr_handler; |
35 | extern struct xattr_handler jffs2_acl_default_xattr_handler; | 35 | extern const struct xattr_handler jffs2_acl_default_xattr_handler; |
36 | 36 | ||
37 | #else | 37 | #else |
38 | 38 | ||
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c index eaccee058583..239f51216a68 100644 --- a/fs/jffs2/security.c +++ b/fs/jffs2/security.c | |||
@@ -77,7 +77,7 @@ static size_t jffs2_security_listxattr(struct dentry *dentry, char *list, | |||
77 | return retlen; | 77 | return retlen; |
78 | } | 78 | } |
79 | 79 | ||
80 | struct xattr_handler jffs2_security_xattr_handler = { | 80 | const struct xattr_handler jffs2_security_xattr_handler = { |
81 | .prefix = XATTR_SECURITY_PREFIX, | 81 | .prefix = XATTR_SECURITY_PREFIX, |
82 | .list = jffs2_security_listxattr, | 82 | .list = jffs2_security_listxattr, |
83 | .set = jffs2_security_setxattr, | 83 | .set = jffs2_security_setxattr, |
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 9e75c62c85d6..a2d58c96f1b4 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c | |||
@@ -904,7 +904,7 @@ struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c, | |||
904 | * do_jffs2_setxattr(inode, xprefix, xname, buffer, size, flags) | 904 | * do_jffs2_setxattr(inode, xprefix, xname, buffer, size, flags) |
905 | * is an implementation of setxattr handler on jffs2. | 905 | * is an implementation of setxattr handler on jffs2. |
906 | * -------------------------------------------------- */ | 906 | * -------------------------------------------------- */ |
907 | struct xattr_handler *jffs2_xattr_handlers[] = { | 907 | const struct xattr_handler *jffs2_xattr_handlers[] = { |
908 | &jffs2_user_xattr_handler, | 908 | &jffs2_user_xattr_handler, |
909 | #ifdef CONFIG_JFFS2_FS_SECURITY | 909 | #ifdef CONFIG_JFFS2_FS_SECURITY |
910 | &jffs2_security_xattr_handler, | 910 | &jffs2_security_xattr_handler, |
@@ -917,8 +917,8 @@ struct xattr_handler *jffs2_xattr_handlers[] = { | |||
917 | NULL | 917 | NULL |
918 | }; | 918 | }; |
919 | 919 | ||
920 | static struct xattr_handler *xprefix_to_handler(int xprefix) { | 920 | static const struct xattr_handler *xprefix_to_handler(int xprefix) { |
921 | struct xattr_handler *ret; | 921 | const struct xattr_handler *ret; |
922 | 922 | ||
923 | switch (xprefix) { | 923 | switch (xprefix) { |
924 | case JFFS2_XPREFIX_USER: | 924 | case JFFS2_XPREFIX_USER: |
@@ -955,7 +955,7 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
955 | struct jffs2_inode_cache *ic = f->inocache; | 955 | struct jffs2_inode_cache *ic = f->inocache; |
956 | struct jffs2_xattr_ref *ref, **pref; | 956 | struct jffs2_xattr_ref *ref, **pref; |
957 | struct jffs2_xattr_datum *xd; | 957 | struct jffs2_xattr_datum *xd; |
958 | struct xattr_handler *xhandle; | 958 | const struct xattr_handler *xhandle; |
959 | ssize_t len, rc; | 959 | ssize_t len, rc; |
960 | int retry = 0; | 960 | int retry = 0; |
961 | 961 | ||
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h index 6e3b5ddfb7ab..cf4f5759b42b 100644 --- a/fs/jffs2/xattr.h +++ b/fs/jffs2/xattr.h | |||
@@ -93,9 +93,9 @@ extern int do_jffs2_getxattr(struct inode *inode, int xprefix, const char *xname | |||
93 | extern int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, | 93 | extern int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, |
94 | const char *buffer, size_t size, int flags); | 94 | const char *buffer, size_t size, int flags); |
95 | 95 | ||
96 | extern struct xattr_handler *jffs2_xattr_handlers[]; | 96 | extern const struct xattr_handler *jffs2_xattr_handlers[]; |
97 | extern struct xattr_handler jffs2_user_xattr_handler; | 97 | extern const struct xattr_handler jffs2_user_xattr_handler; |
98 | extern struct xattr_handler jffs2_trusted_xattr_handler; | 98 | extern const struct xattr_handler jffs2_trusted_xattr_handler; |
99 | 99 | ||
100 | extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t); | 100 | extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t); |
101 | #define jffs2_getxattr generic_getxattr | 101 | #define jffs2_getxattr generic_getxattr |
@@ -122,7 +122,7 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t); | |||
122 | 122 | ||
123 | #ifdef CONFIG_JFFS2_FS_SECURITY | 123 | #ifdef CONFIG_JFFS2_FS_SECURITY |
124 | extern int jffs2_init_security(struct inode *inode, struct inode *dir); | 124 | extern int jffs2_init_security(struct inode *inode, struct inode *dir); |
125 | extern struct xattr_handler jffs2_security_xattr_handler; | 125 | extern const struct xattr_handler jffs2_security_xattr_handler; |
126 | #else | 126 | #else |
127 | #define jffs2_init_security(inode,dir) (0) | 127 | #define jffs2_init_security(inode,dir) (0) |
128 | #endif /* CONFIG_JFFS2_FS_SECURITY */ | 128 | #endif /* CONFIG_JFFS2_FS_SECURITY */ |
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c index 3e5a5e356e05..1c868194c504 100644 --- a/fs/jffs2/xattr_trusted.c +++ b/fs/jffs2/xattr_trusted.c | |||
@@ -47,7 +47,7 @@ static size_t jffs2_trusted_listxattr(struct dentry *dentry, char *list, | |||
47 | return retlen; | 47 | return retlen; |
48 | } | 48 | } |
49 | 49 | ||
50 | struct xattr_handler jffs2_trusted_xattr_handler = { | 50 | const struct xattr_handler jffs2_trusted_xattr_handler = { |
51 | .prefix = XATTR_TRUSTED_PREFIX, | 51 | .prefix = XATTR_TRUSTED_PREFIX, |
52 | .list = jffs2_trusted_listxattr, | 52 | .list = jffs2_trusted_listxattr, |
53 | .set = jffs2_trusted_setxattr, | 53 | .set = jffs2_trusted_setxattr, |
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c index 8544af67dffe..916b5c966039 100644 --- a/fs/jffs2/xattr_user.c +++ b/fs/jffs2/xattr_user.c | |||
@@ -47,7 +47,7 @@ static size_t jffs2_user_listxattr(struct dentry *dentry, char *list, | |||
47 | return retlen; | 47 | return retlen; |
48 | } | 48 | } |
49 | 49 | ||
50 | struct xattr_handler jffs2_user_xattr_handler = { | 50 | const struct xattr_handler jffs2_user_xattr_handler = { |
51 | .prefix = XATTR_USER_PREFIX, | 51 | .prefix = XATTR_USER_PREFIX, |
52 | .list = jffs2_user_listxattr, | 52 | .list = jffs2_user_listxattr, |
53 | .set = jffs2_user_setxattr, | 53 | .set = jffs2_user_setxattr, |
diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 14ba982b3f24..85d9ec659225 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c | |||
@@ -98,7 +98,7 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
98 | if (rc) | 98 | if (rc) |
99 | return rc; | 99 | return rc; |
100 | 100 | ||
101 | if (iattr->ia_valid & ATTR_SIZE) | 101 | if (is_quota_modification(inode, iattr)) |
102 | dquot_initialize(inode); | 102 | dquot_initialize(inode); |
103 | if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || | 103 | if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || |
104 | (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { | 104 | (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { |
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 829921b67765..2686531e235a 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c | |||
@@ -98,14 +98,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) | |||
98 | goto fail_unlock; | 98 | goto fail_unlock; |
99 | } | 99 | } |
100 | 100 | ||
101 | inode->i_uid = current_fsuid(); | 101 | inode_init_owner(inode, parent, mode); |
102 | if (parent->i_mode & S_ISGID) { | ||
103 | inode->i_gid = parent->i_gid; | ||
104 | if (S_ISDIR(mode)) | ||
105 | mode |= S_ISGID; | ||
106 | } else | ||
107 | inode->i_gid = current_fsgid(); | ||
108 | |||
109 | /* | 102 | /* |
110 | * New inodes need to save sane values on disk when | 103 | * New inodes need to save sane values on disk when |
111 | * uid & gid mount options are used | 104 | * uid & gid mount options are used |
@@ -121,7 +114,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode) | |||
121 | if (rc) | 114 | if (rc) |
122 | goto fail_drop; | 115 | goto fail_drop; |
123 | 116 | ||
124 | inode->i_mode = mode; | ||
125 | /* inherit flags from parent */ | 117 | /* inherit flags from parent */ |
126 | jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT; | 118 | jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT; |
127 | 119 | ||
@@ -134,7 +126,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) | |||
134 | if (S_ISLNK(mode)) | 126 | if (S_ISLNK(mode)) |
135 | jfs_inode->mode2 &= ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL); | 127 | jfs_inode->mode2 &= ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL); |
136 | } | 128 | } |
137 | jfs_inode->mode2 |= mode; | 129 | jfs_inode->mode2 |= inode->i_mode; |
138 | 130 | ||
139 | inode->i_blocks = 0; | 131 | inode->i_blocks = 0; |
140 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 132 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 755a92e8daa7..f602e230e162 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c | |||
@@ -358,14 +358,7 @@ struct inode *logfs_new_inode(struct inode *dir, int mode) | |||
358 | inode->i_mode = mode; | 358 | inode->i_mode = mode; |
359 | logfs_set_ino_generation(sb, inode); | 359 | logfs_set_ino_generation(sb, inode); |
360 | 360 | ||
361 | inode->i_uid = current_fsuid(); | 361 | inode_init_owner(inode, dir, mode); |
362 | inode->i_gid = current_fsgid(); | ||
363 | if (dir->i_mode & S_ISGID) { | ||
364 | inode->i_gid = dir->i_gid; | ||
365 | if (S_ISDIR(mode)) | ||
366 | inode->i_mode |= S_ISGID; | ||
367 | } | ||
368 | |||
369 | logfs_inode_setops(inode); | 362 | logfs_inode_setops(inode); |
370 | insert_inode_hash(inode); | 363 | insert_inode_hash(inode); |
371 | 364 | ||
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 6ac693faae49..482779fe4e7c 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c | |||
@@ -221,7 +221,7 @@ void minix_free_inode(struct inode * inode) | |||
221 | clear_inode(inode); /* clear in-memory copy */ | 221 | clear_inode(inode); /* clear in-memory copy */ |
222 | } | 222 | } |
223 | 223 | ||
224 | struct inode * minix_new_inode(const struct inode * dir, int * error) | 224 | struct inode *minix_new_inode(const struct inode *dir, int mode, int *error) |
225 | { | 225 | { |
226 | struct super_block *sb = dir->i_sb; | 226 | struct super_block *sb = dir->i_sb; |
227 | struct minix_sb_info *sbi = minix_sb(sb); | 227 | struct minix_sb_info *sbi = minix_sb(sb); |
@@ -263,8 +263,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error) | |||
263 | iput(inode); | 263 | iput(inode); |
264 | return NULL; | 264 | return NULL; |
265 | } | 265 | } |
266 | inode->i_uid = current_fsuid(); | 266 | inode_init_owner(inode, dir, mode); |
267 | inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current_fsgid(); | ||
268 | inode->i_ino = j; | 267 | inode->i_ino = j; |
269 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 268 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
270 | inode->i_blocks = 0; | 269 | inode->i_blocks = 0; |
diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 9dcf95b42116..111f34ee9e3b 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h | |||
@@ -46,7 +46,7 @@ struct minix_sb_info { | |||
46 | extern struct inode *minix_iget(struct super_block *, unsigned long); | 46 | extern struct inode *minix_iget(struct super_block *, unsigned long); |
47 | extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **); | 47 | extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **); |
48 | extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **); | 48 | extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **); |
49 | extern struct inode * minix_new_inode(const struct inode * dir, int * error); | 49 | extern struct inode * minix_new_inode(const struct inode *, int, int *); |
50 | extern void minix_free_inode(struct inode * inode); | 50 | extern void minix_free_inode(struct inode * inode); |
51 | extern unsigned long minix_count_free_inodes(struct minix_sb_info *sbi); | 51 | extern unsigned long minix_count_free_inodes(struct minix_sb_info *sbi); |
52 | extern int minix_new_block(struct inode * inode); | 52 | extern int minix_new_block(struct inode * inode); |
diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 32b131cd6121..e20ee85955d1 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c | |||
@@ -46,10 +46,9 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, dev_ | |||
46 | if (!old_valid_dev(rdev)) | 46 | if (!old_valid_dev(rdev)) |
47 | return -EINVAL; | 47 | return -EINVAL; |
48 | 48 | ||
49 | inode = minix_new_inode(dir, &error); | 49 | inode = minix_new_inode(dir, mode, &error); |
50 | 50 | ||
51 | if (inode) { | 51 | if (inode) { |
52 | inode->i_mode = mode; | ||
53 | minix_set_inode(inode, rdev); | 52 | minix_set_inode(inode, rdev); |
54 | mark_inode_dirty(inode); | 53 | mark_inode_dirty(inode); |
55 | error = add_nondir(dentry, inode); | 54 | error = add_nondir(dentry, inode); |
@@ -73,11 +72,10 @@ static int minix_symlink(struct inode * dir, struct dentry *dentry, | |||
73 | if (i > dir->i_sb->s_blocksize) | 72 | if (i > dir->i_sb->s_blocksize) |
74 | goto out; | 73 | goto out; |
75 | 74 | ||
76 | inode = minix_new_inode(dir, &err); | 75 | inode = minix_new_inode(dir, S_IFLNK | 0777, &err); |
77 | if (!inode) | 76 | if (!inode) |
78 | goto out; | 77 | goto out; |
79 | 78 | ||
80 | inode->i_mode = S_IFLNK | 0777; | ||
81 | minix_set_inode(inode, 0); | 79 | minix_set_inode(inode, 0); |
82 | err = page_symlink(inode, symname, i); | 80 | err = page_symlink(inode, symname, i); |
83 | if (err) | 81 | if (err) |
@@ -117,13 +115,10 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode) | |||
117 | 115 | ||
118 | inode_inc_link_count(dir); | 116 | inode_inc_link_count(dir); |
119 | 117 | ||
120 | inode = minix_new_inode(dir, &err); | 118 | inode = minix_new_inode(dir, mode, &err); |
121 | if (!inode) | 119 | if (!inode) |
122 | goto out_dir; | 120 | goto out_dir; |
123 | 121 | ||
124 | inode->i_mode = S_IFDIR | mode; | ||
125 | if (dir->i_mode & S_ISGID) | ||
126 | inode->i_mode |= S_ISGID; | ||
127 | minix_set_inode(inode, 0); | 122 | minix_set_inode(inode, 0); |
128 | 123 | ||
129 | inode_inc_link_count(inode); | 124 | inode_inc_link_count(inode); |
diff --git a/fs/namei.c b/fs/namei.c index b86b96fe1dc3..48e1f60520ea 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -523,9 +523,10 @@ static void path_put_conditional(struct path *path, struct nameidata *nd) | |||
523 | static inline void path_to_nameidata(struct path *path, struct nameidata *nd) | 523 | static inline void path_to_nameidata(struct path *path, struct nameidata *nd) |
524 | { | 524 | { |
525 | dput(nd->path.dentry); | 525 | dput(nd->path.dentry); |
526 | if (nd->path.mnt != path->mnt) | 526 | if (nd->path.mnt != path->mnt) { |
527 | mntput(nd->path.mnt); | 527 | mntput(nd->path.mnt); |
528 | nd->path.mnt = path->mnt; | 528 | nd->path.mnt = path->mnt; |
529 | } | ||
529 | nd->path.dentry = path->dentry; | 530 | nd->path.dentry = path->dentry; |
530 | } | 531 | } |
531 | 532 | ||
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 7edfcd4d5e52..92dde6f8d893 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c | |||
@@ -51,7 +51,7 @@ const struct file_operations ncp_dir_operations = | |||
51 | { | 51 | { |
52 | .read = generic_read_dir, | 52 | .read = generic_read_dir, |
53 | .readdir = ncp_readdir, | 53 | .readdir = ncp_readdir, |
54 | .ioctl = ncp_ioctl, | 54 | .unlocked_ioctl = ncp_ioctl, |
55 | #ifdef CONFIG_COMPAT | 55 | #ifdef CONFIG_COMPAT |
56 | .compat_ioctl = ncp_compat_ioctl, | 56 | .compat_ioctl = ncp_compat_ioctl, |
57 | #endif | 57 | #endif |
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 1daabb90e0a5..b93870892892 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c | |||
@@ -295,7 +295,7 @@ const struct file_operations ncp_file_operations = | |||
295 | .llseek = ncp_remote_llseek, | 295 | .llseek = ncp_remote_llseek, |
296 | .read = ncp_file_read, | 296 | .read = ncp_file_read, |
297 | .write = ncp_file_write, | 297 | .write = ncp_file_write, |
298 | .ioctl = ncp_ioctl, | 298 | .unlocked_ioctl = ncp_ioctl, |
299 | #ifdef CONFIG_COMPAT | 299 | #ifdef CONFIG_COMPAT |
300 | .compat_ioctl = ncp_compat_ioctl, | 300 | .compat_ioctl = ncp_compat_ioctl, |
301 | #endif | 301 | #endif |
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 60a5e2864ea8..023c03d02070 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/smp_lock.h> | 20 | #include <linux/smp_lock.h> |
21 | #include <linux/vmalloc.h> | 21 | #include <linux/vmalloc.h> |
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/smp_lock.h> | ||
23 | 24 | ||
24 | #include <linux/ncp_fs.h> | 25 | #include <linux/ncp_fs.h> |
25 | 26 | ||
@@ -261,9 +262,9 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) | |||
261 | } | 262 | } |
262 | #endif /* CONFIG_NCPFS_NLS */ | 263 | #endif /* CONFIG_NCPFS_NLS */ |
263 | 264 | ||
264 | static int __ncp_ioctl(struct inode *inode, struct file *filp, | 265 | static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
265 | unsigned int cmd, unsigned long arg) | ||
266 | { | 266 | { |
267 | struct inode *inode = filp->f_dentry->d_inode; | ||
267 | struct ncp_server *server = NCP_SERVER(inode); | 268 | struct ncp_server *server = NCP_SERVER(inode); |
268 | int result; | 269 | int result; |
269 | struct ncp_ioctl_request request; | 270 | struct ncp_ioctl_request request; |
@@ -841,11 +842,11 @@ static int ncp_ioctl_need_write(unsigned int cmd) | |||
841 | } | 842 | } |
842 | } | 843 | } |
843 | 844 | ||
844 | int ncp_ioctl(struct inode *inode, struct file *filp, | 845 | long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
845 | unsigned int cmd, unsigned long arg) | ||
846 | { | 846 | { |
847 | int ret; | 847 | long ret; |
848 | 848 | ||
849 | lock_kernel(); | ||
849 | if (ncp_ioctl_need_write(cmd)) { | 850 | if (ncp_ioctl_need_write(cmd)) { |
850 | /* | 851 | /* |
851 | * inside the ioctl(), any failures which | 852 | * inside the ioctl(), any failures which |
@@ -853,24 +854,28 @@ int ncp_ioctl(struct inode *inode, struct file *filp, | |||
853 | * -EACCESS, so it seems consistent to keep | 854 | * -EACCESS, so it seems consistent to keep |
854 | * that here. | 855 | * that here. |
855 | */ | 856 | */ |
856 | if (mnt_want_write(filp->f_path.mnt)) | 857 | if (mnt_want_write(filp->f_path.mnt)) { |
857 | return -EACCES; | 858 | ret = -EACCES; |
859 | goto out; | ||
860 | } | ||
858 | } | 861 | } |
859 | ret = __ncp_ioctl(inode, filp, cmd, arg); | 862 | ret = __ncp_ioctl(filp, cmd, arg); |
860 | if (ncp_ioctl_need_write(cmd)) | 863 | if (ncp_ioctl_need_write(cmd)) |
861 | mnt_drop_write(filp->f_path.mnt); | 864 | mnt_drop_write(filp->f_path.mnt); |
865 | |||
866 | out: | ||
867 | unlock_kernel(); | ||
862 | return ret; | 868 | return ret; |
863 | } | 869 | } |
864 | 870 | ||
865 | #ifdef CONFIG_COMPAT | 871 | #ifdef CONFIG_COMPAT |
866 | long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 872 | long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
867 | { | 873 | { |
868 | struct inode *inode = file->f_path.dentry->d_inode; | 874 | long ret; |
869 | int ret; | ||
870 | 875 | ||
871 | lock_kernel(); | 876 | lock_kernel(); |
872 | arg = (unsigned long) compat_ptr(arg); | 877 | arg = (unsigned long) compat_ptr(arg); |
873 | ret = ncp_ioctl(inode, file, cmd, arg); | 878 | ret = ncp_ioctl(file, cmd, arg); |
874 | unlock_kernel(); | 879 | unlock_kernel(); |
875 | return ret; | 880 | return ret; |
876 | } | 881 | } |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2f8b1157daa2..04214fc5c304 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -1060,7 +1060,7 @@ static int nfs_parse_mount_options(char *raw, | |||
1060 | goto out_nomem; | 1060 | goto out_nomem; |
1061 | rc = strict_strtoul(string, 10, &option); | 1061 | rc = strict_strtoul(string, 10, &option); |
1062 | kfree(string); | 1062 | kfree(string); |
1063 | if (rc != 0 || option > USHORT_MAX) | 1063 | if (rc != 0 || option > USHRT_MAX) |
1064 | goto out_invalid_value; | 1064 | goto out_invalid_value; |
1065 | mnt->nfs_server.port = option; | 1065 | mnt->nfs_server.port = option; |
1066 | break; | 1066 | break; |
@@ -1181,7 +1181,7 @@ static int nfs_parse_mount_options(char *raw, | |||
1181 | goto out_nomem; | 1181 | goto out_nomem; |
1182 | rc = strict_strtoul(string, 10, &option); | 1182 | rc = strict_strtoul(string, 10, &option); |
1183 | kfree(string); | 1183 | kfree(string); |
1184 | if (rc != 0 || option > USHORT_MAX) | 1184 | if (rc != 0 || option > USHRT_MAX) |
1185 | goto out_invalid_value; | 1185 | goto out_invalid_value; |
1186 | mnt->mount_server.port = option; | 1186 | mnt->mount_server.port = option; |
1187 | break; | 1187 | break; |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 7a9ae3254a4b..7e26caab2a26 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -44,8 +44,7 @@ | |||
44 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 44 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
45 | 45 | ||
46 | /* Globals */ | 46 | /* Globals */ |
47 | static struct path rec_dir; | 47 | static struct file *rec_file; |
48 | static int rec_dir_init = 0; | ||
49 | 48 | ||
50 | static int | 49 | static int |
51 | nfs4_save_creds(const struct cred **original_creds) | 50 | nfs4_save_creds(const struct cred **original_creds) |
@@ -117,33 +116,28 @@ out_no_tfm: | |||
117 | return status; | 116 | return status; |
118 | } | 117 | } |
119 | 118 | ||
120 | static void | ||
121 | nfsd4_sync_rec_dir(void) | ||
122 | { | ||
123 | vfs_fsync(NULL, rec_dir.dentry, 0); | ||
124 | } | ||
125 | |||
126 | int | 119 | int |
127 | nfsd4_create_clid_dir(struct nfs4_client *clp) | 120 | nfsd4_create_clid_dir(struct nfs4_client *clp) |
128 | { | 121 | { |
129 | const struct cred *original_cred; | 122 | const struct cred *original_cred; |
130 | char *dname = clp->cl_recdir; | 123 | char *dname = clp->cl_recdir; |
131 | struct dentry *dentry; | 124 | struct dentry *dir, *dentry; |
132 | int status; | 125 | int status; |
133 | 126 | ||
134 | dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); | 127 | dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); |
135 | 128 | ||
136 | if (!rec_dir_init || clp->cl_firststate) | 129 | if (!rec_file || clp->cl_firststate) |
137 | return 0; | 130 | return 0; |
138 | 131 | ||
139 | status = nfs4_save_creds(&original_cred); | 132 | status = nfs4_save_creds(&original_cred); |
140 | if (status < 0) | 133 | if (status < 0) |
141 | return status; | 134 | return status; |
142 | 135 | ||
136 | dir = rec_file->f_path.dentry; | ||
143 | /* lock the parent */ | 137 | /* lock the parent */ |
144 | mutex_lock(&rec_dir.dentry->d_inode->i_mutex); | 138 | mutex_lock(&dir->d_inode->i_mutex); |
145 | 139 | ||
146 | dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1); | 140 | dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1); |
147 | if (IS_ERR(dentry)) { | 141 | if (IS_ERR(dentry)) { |
148 | status = PTR_ERR(dentry); | 142 | status = PTR_ERR(dentry); |
149 | goto out_unlock; | 143 | goto out_unlock; |
@@ -153,18 +147,18 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) | |||
153 | dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); | 147 | dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); |
154 | goto out_put; | 148 | goto out_put; |
155 | } | 149 | } |
156 | status = mnt_want_write(rec_dir.mnt); | 150 | status = mnt_want_write(rec_file->f_path.mnt); |
157 | if (status) | 151 | if (status) |
158 | goto out_put; | 152 | goto out_put; |
159 | status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU); | 153 | status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU); |
160 | mnt_drop_write(rec_dir.mnt); | 154 | mnt_drop_write(rec_file->f_path.mnt); |
161 | out_put: | 155 | out_put: |
162 | dput(dentry); | 156 | dput(dentry); |
163 | out_unlock: | 157 | out_unlock: |
164 | mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); | 158 | mutex_unlock(&dir->d_inode->i_mutex); |
165 | if (status == 0) { | 159 | if (status == 0) { |
166 | clp->cl_firststate = 1; | 160 | clp->cl_firststate = 1; |
167 | nfsd4_sync_rec_dir(); | 161 | vfs_fsync(rec_file, 0); |
168 | } | 162 | } |
169 | nfs4_reset_creds(original_cred); | 163 | nfs4_reset_creds(original_cred); |
170 | dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); | 164 | dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); |
@@ -206,14 +200,14 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) | |||
206 | struct dentry *dentry; | 200 | struct dentry *dentry; |
207 | int status; | 201 | int status; |
208 | 202 | ||
209 | if (!rec_dir_init) | 203 | if (!rec_file) |
210 | return 0; | 204 | return 0; |
211 | 205 | ||
212 | status = nfs4_save_creds(&original_cred); | 206 | status = nfs4_save_creds(&original_cred); |
213 | if (status < 0) | 207 | if (status < 0) |
214 | return status; | 208 | return status; |
215 | 209 | ||
216 | filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY, | 210 | filp = dentry_open(dget(dir), mntget(rec_file->f_path.mnt), O_RDONLY, |
217 | current_cred()); | 211 | current_cred()); |
218 | status = PTR_ERR(filp); | 212 | status = PTR_ERR(filp); |
219 | if (IS_ERR(filp)) | 213 | if (IS_ERR(filp)) |
@@ -250,13 +244,14 @@ out: | |||
250 | static int | 244 | static int |
251 | nfsd4_unlink_clid_dir(char *name, int namlen) | 245 | nfsd4_unlink_clid_dir(char *name, int namlen) |
252 | { | 246 | { |
253 | struct dentry *dentry; | 247 | struct dentry *dir, *dentry; |
254 | int status; | 248 | int status; |
255 | 249 | ||
256 | dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); | 250 | dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); |
257 | 251 | ||
258 | mutex_lock_nested(&rec_dir.dentry->d_inode->i_mutex, I_MUTEX_PARENT); | 252 | dir = rec_file->f_path.dentry; |
259 | dentry = lookup_one_len(name, rec_dir.dentry, namlen); | 253 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); |
254 | dentry = lookup_one_len(name, dir, namlen); | ||
260 | if (IS_ERR(dentry)) { | 255 | if (IS_ERR(dentry)) { |
261 | status = PTR_ERR(dentry); | 256 | status = PTR_ERR(dentry); |
262 | goto out_unlock; | 257 | goto out_unlock; |
@@ -264,11 +259,11 @@ nfsd4_unlink_clid_dir(char *name, int namlen) | |||
264 | status = -ENOENT; | 259 | status = -ENOENT; |
265 | if (!dentry->d_inode) | 260 | if (!dentry->d_inode) |
266 | goto out; | 261 | goto out; |
267 | status = vfs_rmdir(rec_dir.dentry->d_inode, dentry); | 262 | status = vfs_rmdir(dir->d_inode, dentry); |
268 | out: | 263 | out: |
269 | dput(dentry); | 264 | dput(dentry); |
270 | out_unlock: | 265 | out_unlock: |
271 | mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); | 266 | mutex_unlock(&dir->d_inode->i_mutex); |
272 | return status; | 267 | return status; |
273 | } | 268 | } |
274 | 269 | ||
@@ -278,10 +273,10 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) | |||
278 | const struct cred *original_cred; | 273 | const struct cred *original_cred; |
279 | int status; | 274 | int status; |
280 | 275 | ||
281 | if (!rec_dir_init || !clp->cl_firststate) | 276 | if (!rec_file || !clp->cl_firststate) |
282 | return; | 277 | return; |
283 | 278 | ||
284 | status = mnt_want_write(rec_dir.mnt); | 279 | status = mnt_want_write(rec_file->f_path.mnt); |
285 | if (status) | 280 | if (status) |
286 | goto out; | 281 | goto out; |
287 | clp->cl_firststate = 0; | 282 | clp->cl_firststate = 0; |
@@ -293,8 +288,8 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) | |||
293 | status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); | 288 | status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); |
294 | nfs4_reset_creds(original_cred); | 289 | nfs4_reset_creds(original_cred); |
295 | if (status == 0) | 290 | if (status == 0) |
296 | nfsd4_sync_rec_dir(); | 291 | vfs_fsync(rec_file, 0); |
297 | mnt_drop_write(rec_dir.mnt); | 292 | mnt_drop_write(rec_file->f_path.mnt); |
298 | out: | 293 | out: |
299 | if (status) | 294 | if (status) |
300 | printk("NFSD: Failed to remove expired client state directory" | 295 | printk("NFSD: Failed to remove expired client state directory" |
@@ -323,19 +318,19 @@ void | |||
323 | nfsd4_recdir_purge_old(void) { | 318 | nfsd4_recdir_purge_old(void) { |
324 | int status; | 319 | int status; |
325 | 320 | ||
326 | if (!rec_dir_init) | 321 | if (!rec_file) |
327 | return; | 322 | return; |
328 | status = mnt_want_write(rec_dir.mnt); | 323 | status = mnt_want_write(rec_file->f_path.mnt); |
329 | if (status) | 324 | if (status) |
330 | goto out; | 325 | goto out; |
331 | status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old); | 326 | status = nfsd4_list_rec_dir(rec_file->f_path.dentry, purge_old); |
332 | if (status == 0) | 327 | if (status == 0) |
333 | nfsd4_sync_rec_dir(); | 328 | vfs_fsync(rec_file, 0); |
334 | mnt_drop_write(rec_dir.mnt); | 329 | mnt_drop_write(rec_file->f_path.mnt); |
335 | out: | 330 | out: |
336 | if (status) | 331 | if (status) |
337 | printk("nfsd4: failed to purge old clients from recovery" | 332 | printk("nfsd4: failed to purge old clients from recovery" |
338 | " directory %s\n", rec_dir.dentry->d_name.name); | 333 | " directory %s\n", rec_file->f_path.dentry->d_name.name); |
339 | } | 334 | } |
340 | 335 | ||
341 | static int | 336 | static int |
@@ -355,10 +350,13 @@ int | |||
355 | nfsd4_recdir_load(void) { | 350 | nfsd4_recdir_load(void) { |
356 | int status; | 351 | int status; |
357 | 352 | ||
358 | status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir); | 353 | if (!rec_file) |
354 | return 0; | ||
355 | |||
356 | status = nfsd4_list_rec_dir(rec_file->f_path.dentry, load_recdir); | ||
359 | if (status) | 357 | if (status) |
360 | printk("nfsd4: failed loading clients from recovery" | 358 | printk("nfsd4: failed loading clients from recovery" |
361 | " directory %s\n", rec_dir.dentry->d_name.name); | 359 | " directory %s\n", rec_file->f_path.dentry->d_name.name); |
362 | return status; | 360 | return status; |
363 | } | 361 | } |
364 | 362 | ||
@@ -375,7 +373,7 @@ nfsd4_init_recdir(char *rec_dirname) | |||
375 | printk("NFSD: Using %s as the NFSv4 state recovery directory\n", | 373 | printk("NFSD: Using %s as the NFSv4 state recovery directory\n", |
376 | rec_dirname); | 374 | rec_dirname); |
377 | 375 | ||
378 | BUG_ON(rec_dir_init); | 376 | BUG_ON(rec_file); |
379 | 377 | ||
380 | status = nfs4_save_creds(&original_cred); | 378 | status = nfs4_save_creds(&original_cred); |
381 | if (status < 0) { | 379 | if (status < 0) { |
@@ -385,22 +383,21 @@ nfsd4_init_recdir(char *rec_dirname) | |||
385 | return; | 383 | return; |
386 | } | 384 | } |
387 | 385 | ||
388 | status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, | 386 | rec_file = filp_open(rec_dirname, O_RDONLY | O_DIRECTORY, 0); |
389 | &rec_dir); | 387 | if (IS_ERR(rec_file)) { |
390 | if (status) | ||
391 | printk("NFSD: unable to find recovery directory %s\n", | 388 | printk("NFSD: unable to find recovery directory %s\n", |
392 | rec_dirname); | 389 | rec_dirname); |
390 | rec_file = NULL; | ||
391 | } | ||
393 | 392 | ||
394 | if (!status) | ||
395 | rec_dir_init = 1; | ||
396 | nfs4_reset_creds(original_cred); | 393 | nfs4_reset_creds(original_cred); |
397 | } | 394 | } |
398 | 395 | ||
399 | void | 396 | void |
400 | nfsd4_shutdown_recdir(void) | 397 | nfsd4_shutdown_recdir(void) |
401 | { | 398 | { |
402 | if (!rec_dir_init) | 399 | if (!rec_file) |
403 | return; | 400 | return; |
404 | rec_dir_init = 0; | 401 | fput(rec_file); |
405 | path_put(&rec_dir); | 402 | rec_file = NULL; |
406 | } | 403 | } |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index bc3194ea01f5..508941c23af7 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -998,7 +998,7 @@ static ssize_t __write_ports_addxprt(char *buf) | |||
998 | if (sscanf(buf, "%15s %4u", transport, &port) != 2) | 998 | if (sscanf(buf, "%15s %4u", transport, &port) != 2) |
999 | return -EINVAL; | 999 | return -EINVAL; |
1000 | 1000 | ||
1001 | if (port < 1 || port > USHORT_MAX) | 1001 | if (port < 1 || port > USHRT_MAX) |
1002 | return -EINVAL; | 1002 | return -EINVAL; |
1003 | 1003 | ||
1004 | err = nfsd_create_serv(); | 1004 | err = nfsd_create_serv(); |
@@ -1040,7 +1040,7 @@ static ssize_t __write_ports_delxprt(char *buf) | |||
1040 | if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2) | 1040 | if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2) |
1041 | return -EINVAL; | 1041 | return -EINVAL; |
1042 | 1042 | ||
1043 | if (port < 1 || port > USHORT_MAX || nfsd_serv == NULL) | 1043 | if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) |
1044 | return -EINVAL; | 1044 | return -EINVAL; |
1045 | 1045 | ||
1046 | xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); | 1046 | xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 23c06f77f4ca..ebbf3b6b2457 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -999,7 +999,7 @@ static int wait_for_concurrent_writes(struct file *file) | |||
999 | 999 | ||
1000 | if (inode->i_state & I_DIRTY) { | 1000 | if (inode->i_state & I_DIRTY) { |
1001 | dprintk("nfsd: write sync %d\n", task_pid_nr(current)); | 1001 | dprintk("nfsd: write sync %d\n", task_pid_nr(current)); |
1002 | err = vfs_fsync(file, file->f_path.dentry, 0); | 1002 | err = vfs_fsync(file, 0); |
1003 | } | 1003 | } |
1004 | last_ino = inode->i_ino; | 1004 | last_ino = inode->i_ino; |
1005 | last_dev = inode->i_sb->s_dev; | 1005 | last_dev = inode->i_sb->s_dev; |
@@ -1175,8 +1175,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1175 | if (err) | 1175 | if (err) |
1176 | goto out; | 1176 | goto out; |
1177 | if (EX_ISSYNC(fhp->fh_export)) { | 1177 | if (EX_ISSYNC(fhp->fh_export)) { |
1178 | int err2 = vfs_fsync_range(file, file->f_path.dentry, | 1178 | int err2 = vfs_fsync_range(file, offset, end, 0); |
1179 | offset, end, 0); | ||
1180 | 1179 | ||
1181 | if (err2 != -EINVAL) | 1180 | if (err2 != -EINVAL) |
1182 | err = nfserrno(err2); | 1181 | err = nfserrno(err2); |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 5e226d4b41d3..39e038ac8fcb 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -280,16 +280,7 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) | |||
280 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ | 280 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ |
281 | 281 | ||
282 | atomic_inc(&sbi->s_inodes_count); | 282 | atomic_inc(&sbi->s_inodes_count); |
283 | 283 | inode_init_owner(inode, dir, mode); | |
284 | inode->i_uid = current_fsuid(); | ||
285 | if (dir->i_mode & S_ISGID) { | ||
286 | inode->i_gid = dir->i_gid; | ||
287 | if (S_ISDIR(mode)) | ||
288 | mode |= S_ISGID; | ||
289 | } else | ||
290 | inode->i_gid = current_fsgid(); | ||
291 | |||
292 | inode->i_mode = mode; | ||
293 | inode->i_ino = ino; | 284 | inode->i_ino = ino; |
294 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 285 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
295 | 286 | ||
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index a756168a21c2..8c1097327abc 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -674,7 +674,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, | |||
674 | start * sects_per_block, | 674 | start * sects_per_block, |
675 | nblocks * sects_per_block, | 675 | nblocks * sects_per_block, |
676 | GFP_NOFS, | 676 | GFP_NOFS, |
677 | DISCARD_FL_BARRIER); | 677 | BLKDEV_IFL_BARRIER); |
678 | if (ret < 0) | 678 | if (ret < 0) |
679 | return ret; | 679 | return ret; |
680 | nblocks = 0; | 680 | nblocks = 0; |
@@ -684,7 +684,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, | |||
684 | ret = blkdev_issue_discard(nilfs->ns_bdev, | 684 | ret = blkdev_issue_discard(nilfs->ns_bdev, |
685 | start * sects_per_block, | 685 | start * sects_per_block, |
686 | nblocks * sects_per_block, | 686 | nblocks * sects_per_block, |
687 | GFP_NOFS, DISCARD_FL_BARRIER); | 687 | GFP_NOFS, BLKDEV_IFL_BARRIER); |
688 | return ret; | 688 | return ret; |
689 | } | 689 | } |
690 | 690 | ||
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c index 40b1cf914ccb..27b75ebc7460 100644 --- a/fs/notify/inotify/inotify.c +++ b/fs/notify/inotify/inotify.c | |||
@@ -110,14 +110,10 @@ EXPORT_SYMBOL_GPL(get_inotify_watch); | |||
110 | int pin_inotify_watch(struct inotify_watch *watch) | 110 | int pin_inotify_watch(struct inotify_watch *watch) |
111 | { | 111 | { |
112 | struct super_block *sb = watch->inode->i_sb; | 112 | struct super_block *sb = watch->inode->i_sb; |
113 | spin_lock(&sb_lock); | 113 | if (atomic_inc_not_zero(&sb->s_active)) { |
114 | if (sb->s_count >= S_BIAS) { | ||
115 | atomic_inc(&sb->s_active); | ||
116 | spin_unlock(&sb_lock); | ||
117 | atomic_inc(&watch->count); | 114 | atomic_inc(&watch->count); |
118 | return 1; | 115 | return 1; |
119 | } | 116 | } |
120 | spin_unlock(&sb_lock); | ||
121 | return 0; | 117 | return 0; |
122 | } | 118 | } |
123 | 119 | ||
@@ -515,34 +511,8 @@ EXPORT_SYMBOL_GPL(inotify_init_watch); | |||
515 | * done. Cleanup is just deactivate_super(). However, that leaves a messy | 511 | * done. Cleanup is just deactivate_super(). However, that leaves a messy |
516 | * case - what if we *are* racing with umount() and active references to | 512 | * case - what if we *are* racing with umount() and active references to |
517 | * superblock can't be acquired anymore? We can bump ->s_count, grab | 513 | * superblock can't be acquired anymore? We can bump ->s_count, grab |
518 | * ->s_umount, which will almost certainly wait until the superblock is shut | 514 | * ->s_umount, which will wait until the superblock is shut down and the |
519 | * down and the watch in question is pining for fjords. That's fine, but | 515 | * watch in question is pining for fjords. |
520 | * there is a problem - we might have hit the window between ->s_active | ||
521 | * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock | ||
522 | * is past the point of no return and is heading for shutdown) and the | ||
523 | * moment when deactivate_super() acquires ->s_umount. We could just do | ||
524 | * drop_super() yield() and retry, but that's rather antisocial and this | ||
525 | * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having | ||
526 | * found that we'd got there first (i.e. that ->s_root is non-NULL) we know | ||
527 | * that we won't race with inotify_umount_inodes(). So we could grab a | ||
528 | * reference to watch and do the rest as above, just with drop_super() instead | ||
529 | * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we | ||
530 | * could grab ->s_umount. So the watch could've been gone already. | ||
531 | * | ||
532 | * That still can be dealt with - we need to save watch->wd, do idr_find() | ||
533 | * and compare its result with our pointer. If they match, we either have | ||
534 | * the damn thing still alive or we'd lost not one but two races at once, | ||
535 | * the watch had been killed and a new one got created with the same ->wd | ||
536 | * at the same address. That couldn't have happened in inotify_destroy(), | ||
537 | * but inotify_rm_wd() could run into that. Still, "new one got created" | ||
538 | * is not a problem - we have every right to kill it or leave it alone, | ||
539 | * whatever's more convenient. | ||
540 | * | ||
541 | * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as | ||
542 | * "grab it and kill it" check. If it's been our original watch, we are | ||
543 | * fine, if it's a newcomer - nevermind, just pretend that we'd won the | ||
544 | * race and kill the fscker anyway; we are safe since we know that its | ||
545 | * superblock won't be going away. | ||
546 | * | 516 | * |
547 | * And yes, this is far beyond mere "not very pretty"; so's the entire | 517 | * And yes, this is far beyond mere "not very pretty"; so's the entire |
548 | * concept of inotify to start with. | 518 | * concept of inotify to start with. |
@@ -556,57 +526,31 @@ EXPORT_SYMBOL_GPL(inotify_init_watch); | |||
556 | * Called with ih->mutex held, drops it. Possible return values: | 526 | * Called with ih->mutex held, drops it. Possible return values: |
557 | * 0 - nothing to do, it has died | 527 | * 0 - nothing to do, it has died |
558 | * 1 - remove it, drop the reference and deactivate_super() | 528 | * 1 - remove it, drop the reference and deactivate_super() |
559 | * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid | ||
560 | * that variant, since it involved a lot of PITA, but that's the best that | ||
561 | * could've been done. | ||
562 | */ | 529 | */ |
563 | static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) | 530 | static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) |
564 | { | 531 | { |
565 | struct super_block *sb = watch->inode->i_sb; | 532 | struct super_block *sb = watch->inode->i_sb; |
566 | s32 wd = watch->wd; | ||
567 | 533 | ||
568 | spin_lock(&sb_lock); | 534 | if (atomic_inc_not_zero(&sb->s_active)) { |
569 | if (sb->s_count >= S_BIAS) { | ||
570 | atomic_inc(&sb->s_active); | ||
571 | spin_unlock(&sb_lock); | ||
572 | get_inotify_watch(watch); | 535 | get_inotify_watch(watch); |
573 | mutex_unlock(&ih->mutex); | 536 | mutex_unlock(&ih->mutex); |
574 | return 1; /* the best outcome */ | 537 | return 1; /* the best outcome */ |
575 | } | 538 | } |
539 | spin_lock(&sb_lock); | ||
576 | sb->s_count++; | 540 | sb->s_count++; |
577 | spin_unlock(&sb_lock); | 541 | spin_unlock(&sb_lock); |
578 | mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ | 542 | mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ |
579 | down_read(&sb->s_umount); | 543 | down_read(&sb->s_umount); |
580 | if (likely(!sb->s_root)) { | 544 | /* fs is already shut down; the watch is dead */ |
581 | /* fs is already shut down; the watch is dead */ | 545 | drop_super(sb); |
582 | drop_super(sb); | 546 | return 0; |
583 | return 0; | ||
584 | } | ||
585 | /* raced with the final deactivate_super() */ | ||
586 | mutex_lock(&ih->mutex); | ||
587 | if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) { | ||
588 | /* the watch is dead */ | ||
589 | mutex_unlock(&ih->mutex); | ||
590 | drop_super(sb); | ||
591 | return 0; | ||
592 | } | ||
593 | /* still alive or freed and reused with the same sb and wd; kill */ | ||
594 | get_inotify_watch(watch); | ||
595 | mutex_unlock(&ih->mutex); | ||
596 | return 2; | ||
597 | } | 547 | } |
598 | 548 | ||
599 | static void unpin_and_kill(struct inotify_watch *watch, int how) | 549 | static void unpin_and_kill(struct inotify_watch *watch) |
600 | { | 550 | { |
601 | struct super_block *sb = watch->inode->i_sb; | 551 | struct super_block *sb = watch->inode->i_sb; |
602 | put_inotify_watch(watch); | 552 | put_inotify_watch(watch); |
603 | switch (how) { | 553 | deactivate_super(sb); |
604 | case 1: | ||
605 | deactivate_super(sb); | ||
606 | break; | ||
607 | case 2: | ||
608 | drop_super(sb); | ||
609 | } | ||
610 | } | 554 | } |
611 | 555 | ||
612 | /** | 556 | /** |
@@ -628,7 +572,6 @@ void inotify_destroy(struct inotify_handle *ih) | |||
628 | struct list_head *watches; | 572 | struct list_head *watches; |
629 | struct super_block *sb; | 573 | struct super_block *sb; |
630 | struct inode *inode; | 574 | struct inode *inode; |
631 | int how; | ||
632 | 575 | ||
633 | mutex_lock(&ih->mutex); | 576 | mutex_lock(&ih->mutex); |
634 | watches = &ih->watches; | 577 | watches = &ih->watches; |
@@ -638,8 +581,7 @@ void inotify_destroy(struct inotify_handle *ih) | |||
638 | } | 581 | } |
639 | watch = list_first_entry(watches, struct inotify_watch, h_list); | 582 | watch = list_first_entry(watches, struct inotify_watch, h_list); |
640 | sb = watch->inode->i_sb; | 583 | sb = watch->inode->i_sb; |
641 | how = pin_to_kill(ih, watch); | 584 | if (!pin_to_kill(ih, watch)) |
642 | if (!how) | ||
643 | continue; | 585 | continue; |
644 | 586 | ||
645 | inode = watch->inode; | 587 | inode = watch->inode; |
@@ -654,7 +596,7 @@ void inotify_destroy(struct inotify_handle *ih) | |||
654 | 596 | ||
655 | mutex_unlock(&ih->mutex); | 597 | mutex_unlock(&ih->mutex); |
656 | mutex_unlock(&inode->inotify_mutex); | 598 | mutex_unlock(&inode->inotify_mutex); |
657 | unpin_and_kill(watch, how); | 599 | unpin_and_kill(watch); |
658 | } | 600 | } |
659 | 601 | ||
660 | /* free this handle: the put matching the get in inotify_init() */ | 602 | /* free this handle: the put matching the get in inotify_init() */ |
@@ -857,7 +799,6 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
857 | struct inotify_watch *watch; | 799 | struct inotify_watch *watch; |
858 | struct super_block *sb; | 800 | struct super_block *sb; |
859 | struct inode *inode; | 801 | struct inode *inode; |
860 | int how; | ||
861 | 802 | ||
862 | mutex_lock(&ih->mutex); | 803 | mutex_lock(&ih->mutex); |
863 | watch = idr_find(&ih->idr, wd); | 804 | watch = idr_find(&ih->idr, wd); |
@@ -866,8 +807,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
866 | return -EINVAL; | 807 | return -EINVAL; |
867 | } | 808 | } |
868 | sb = watch->inode->i_sb; | 809 | sb = watch->inode->i_sb; |
869 | how = pin_to_kill(ih, watch); | 810 | if (!pin_to_kill(ih, watch)) |
870 | if (!how) | ||
871 | return 0; | 811 | return 0; |
872 | 812 | ||
873 | inode = watch->inode; | 813 | inode = watch->inode; |
@@ -881,7 +821,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
881 | 821 | ||
882 | mutex_unlock(&ih->mutex); | 822 | mutex_unlock(&ih->mutex); |
883 | mutex_unlock(&inode->inotify_mutex); | 823 | mutex_unlock(&inode->inotify_mutex); |
884 | unpin_and_kill(watch, how); | 824 | unpin_and_kill(watch); |
885 | 825 | ||
886 | return 0; | 826 | return 0; |
887 | } | 827 | } |
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 8804f093ba75..a1924a0d2ab0 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
@@ -98,9 +98,6 @@ static int ntfs_file_open(struct inode *vi, struct file *filp) | |||
98 | * the page at all. For a more detailed explanation see ntfs_truncate() in | 98 | * the page at all. For a more detailed explanation see ntfs_truncate() in |
99 | * fs/ntfs/inode.c. | 99 | * fs/ntfs/inode.c. |
100 | * | 100 | * |
101 | * @cached_page and @lru_pvec are just optimizations for dealing with multiple | ||
102 | * pages. | ||
103 | * | ||
104 | * Return 0 on success and -errno on error. In the case that an error is | 101 | * Return 0 on success and -errno on error. In the case that an error is |
105 | * encountered it is possible that the initialized size will already have been | 102 | * encountered it is possible that the initialized size will already have been |
106 | * incremented some way towards @new_init_size but it is guaranteed that if | 103 | * incremented some way towards @new_init_size but it is guaranteed that if |
@@ -110,8 +107,7 @@ static int ntfs_file_open(struct inode *vi, struct file *filp) | |||
110 | * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be | 107 | * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be |
111 | * held by the caller. | 108 | * held by the caller. |
112 | */ | 109 | */ |
113 | static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size, | 110 | static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size) |
114 | struct page **cached_page, struct pagevec *lru_pvec) | ||
115 | { | 111 | { |
116 | s64 old_init_size; | 112 | s64 old_init_size; |
117 | loff_t old_i_size; | 113 | loff_t old_i_size; |
@@ -403,18 +399,13 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov, | |||
403 | * Obtain @nr_pages locked page cache pages from the mapping @mapping and | 399 | * Obtain @nr_pages locked page cache pages from the mapping @mapping and |
404 | * starting at index @index. | 400 | * starting at index @index. |
405 | * | 401 | * |
406 | * If a page is newly created, increment its refcount and add it to the | 402 | * If a page is newly created, add it to lru list |
407 | * caller's lru-buffering pagevec @lru_pvec. | ||
408 | * | ||
409 | * This is the same as mm/filemap.c::__grab_cache_page(), except that @nr_pages | ||
410 | * are obtained at once instead of just one page and that 0 is returned on | ||
411 | * success and -errno on error. | ||
412 | * | 403 | * |
413 | * Note, the page locks are obtained in ascending page index order. | 404 | * Note, the page locks are obtained in ascending page index order. |
414 | */ | 405 | */ |
415 | static inline int __ntfs_grab_cache_pages(struct address_space *mapping, | 406 | static inline int __ntfs_grab_cache_pages(struct address_space *mapping, |
416 | pgoff_t index, const unsigned nr_pages, struct page **pages, | 407 | pgoff_t index, const unsigned nr_pages, struct page **pages, |
417 | struct page **cached_page, struct pagevec *lru_pvec) | 408 | struct page **cached_page) |
418 | { | 409 | { |
419 | int err, nr; | 410 | int err, nr; |
420 | 411 | ||
@@ -430,7 +421,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, | |||
430 | goto err_out; | 421 | goto err_out; |
431 | } | 422 | } |
432 | } | 423 | } |
433 | err = add_to_page_cache(*cached_page, mapping, index, | 424 | err = add_to_page_cache_lru(*cached_page, mapping, index, |
434 | GFP_KERNEL); | 425 | GFP_KERNEL); |
435 | if (unlikely(err)) { | 426 | if (unlikely(err)) { |
436 | if (err == -EEXIST) | 427 | if (err == -EEXIST) |
@@ -438,9 +429,6 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, | |||
438 | goto err_out; | 429 | goto err_out; |
439 | } | 430 | } |
440 | pages[nr] = *cached_page; | 431 | pages[nr] = *cached_page; |
441 | page_cache_get(*cached_page); | ||
442 | if (unlikely(!pagevec_add(lru_pvec, *cached_page))) | ||
443 | __pagevec_lru_add_file(lru_pvec); | ||
444 | *cached_page = NULL; | 432 | *cached_page = NULL; |
445 | } | 433 | } |
446 | index++; | 434 | index++; |
@@ -1800,7 +1788,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
1800 | ssize_t status, written; | 1788 | ssize_t status, written; |
1801 | unsigned nr_pages; | 1789 | unsigned nr_pages; |
1802 | int err; | 1790 | int err; |
1803 | struct pagevec lru_pvec; | ||
1804 | 1791 | ||
1805 | ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " | 1792 | ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " |
1806 | "pos 0x%llx, count 0x%lx.", | 1793 | "pos 0x%llx, count 0x%lx.", |
@@ -1912,7 +1899,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
1912 | } | 1899 | } |
1913 | } | 1900 | } |
1914 | } | 1901 | } |
1915 | pagevec_init(&lru_pvec, 0); | ||
1916 | written = 0; | 1902 | written = 0; |
1917 | /* | 1903 | /* |
1918 | * If the write starts beyond the initialized size, extend it up to the | 1904 | * If the write starts beyond the initialized size, extend it up to the |
@@ -1925,8 +1911,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
1925 | ll = ni->initialized_size; | 1911 | ll = ni->initialized_size; |
1926 | read_unlock_irqrestore(&ni->size_lock, flags); | 1912 | read_unlock_irqrestore(&ni->size_lock, flags); |
1927 | if (pos > ll) { | 1913 | if (pos > ll) { |
1928 | err = ntfs_attr_extend_initialized(ni, pos, &cached_page, | 1914 | err = ntfs_attr_extend_initialized(ni, pos); |
1929 | &lru_pvec); | ||
1930 | if (err < 0) { | 1915 | if (err < 0) { |
1931 | ntfs_error(vol->sb, "Cannot perform write to inode " | 1916 | ntfs_error(vol->sb, "Cannot perform write to inode " |
1932 | "0x%lx, attribute type 0x%x, because " | 1917 | "0x%lx, attribute type 0x%x, because " |
@@ -2012,7 +1997,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
2012 | ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes); | 1997 | ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes); |
2013 | /* Get and lock @do_pages starting at index @start_idx. */ | 1998 | /* Get and lock @do_pages starting at index @start_idx. */ |
2014 | status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, | 1999 | status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, |
2015 | pages, &cached_page, &lru_pvec); | 2000 | pages, &cached_page); |
2016 | if (unlikely(status)) | 2001 | if (unlikely(status)) |
2017 | break; | 2002 | break; |
2018 | /* | 2003 | /* |
@@ -2077,7 +2062,6 @@ err_out: | |||
2077 | *ppos = pos; | 2062 | *ppos = pos; |
2078 | if (cached_page) | 2063 | if (cached_page) |
2079 | page_cache_release(cached_page); | 2064 | page_cache_release(cached_page); |
2080 | pagevec_lru_add_file(&lru_pvec); | ||
2081 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", | 2065 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", |
2082 | written ? "written" : "status", (unsigned long)written, | 2066 | written ? "written" : "status", (unsigned long)written, |
2083 | (long)status); | 2067 | (long)status); |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index e13fc9e8fcdc..da702294d7e7 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -489,7 +489,7 @@ cleanup: | |||
489 | return ret; | 489 | return ret; |
490 | } | 490 | } |
491 | 491 | ||
492 | struct xattr_handler ocfs2_xattr_acl_access_handler = { | 492 | const struct xattr_handler ocfs2_xattr_acl_access_handler = { |
493 | .prefix = POSIX_ACL_XATTR_ACCESS, | 493 | .prefix = POSIX_ACL_XATTR_ACCESS, |
494 | .flags = ACL_TYPE_ACCESS, | 494 | .flags = ACL_TYPE_ACCESS, |
495 | .list = ocfs2_xattr_list_acl_access, | 495 | .list = ocfs2_xattr_list_acl_access, |
@@ -497,7 +497,7 @@ struct xattr_handler ocfs2_xattr_acl_access_handler = { | |||
497 | .set = ocfs2_xattr_set_acl, | 497 | .set = ocfs2_xattr_set_acl, |
498 | }; | 498 | }; |
499 | 499 | ||
500 | struct xattr_handler ocfs2_xattr_acl_default_handler = { | 500 | const struct xattr_handler ocfs2_xattr_acl_default_handler = { |
501 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 501 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
502 | .flags = ACL_TYPE_DEFAULT, | 502 | .flags = ACL_TYPE_DEFAULT, |
503 | .list = ocfs2_xattr_list_acl_default, | 503 | .list = ocfs2_xattr_list_acl_default, |
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index b7428c5d0d3b..ec6d12339593 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c | |||
@@ -403,7 +403,7 @@ void ocfs2_block_check_compute(void *data, size_t blocksize, | |||
403 | * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no | 403 | * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no |
404 | * larger than 16 bits. | 404 | * larger than 16 bits. |
405 | */ | 405 | */ |
406 | BUG_ON(ecc > USHORT_MAX); | 406 | BUG_ON(ecc > USHRT_MAX); |
407 | 407 | ||
408 | bc->bc_crc32e = cpu_to_le32(crc); | 408 | bc->bc_crc32e = cpu_to_le32(crc); |
409 | bc->bc_ecc = cpu_to_le16((u16)ecc); | 409 | bc->bc_ecc = cpu_to_le16((u16)ecc); |
@@ -508,7 +508,7 @@ void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr, | |||
508 | * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no | 508 | * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no |
509 | * larger than 16 bits. | 509 | * larger than 16 bits. |
510 | */ | 510 | */ |
511 | BUG_ON(ecc > USHORT_MAX); | 511 | BUG_ON(ecc > USHRT_MAX); |
512 | 512 | ||
513 | bc->bc_crc32e = cpu_to_le32(crc); | 513 | bc->bc_crc32e = cpu_to_le32(crc); |
514 | bc->bc_ecc = cpu_to_le16((u16)ecc); | 514 | bc->bc_ecc = cpu_to_le16((u16)ecc); |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 50c4ee805da4..39eb16ac5f98 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -3897,7 +3897,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) | |||
3897 | oinfo->dqi_gi.dqi_free_entry = | 3897 | oinfo->dqi_gi.dqi_free_entry = |
3898 | be32_to_cpu(lvb->lvb_free_entry); | 3898 | be32_to_cpu(lvb->lvb_free_entry); |
3899 | } else { | 3899 | } else { |
3900 | status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh); | 3900 | status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode, |
3901 | oinfo->dqi_giblk, &bh); | ||
3901 | if (status) { | 3902 | if (status) { |
3902 | mlog_errno(status); | 3903 | mlog_errno(status); |
3903 | goto bail; | 3904 | goto bail; |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index f74f1400eccd..97e54b9e654b 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -933,9 +933,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
933 | struct ocfs2_super *osb = OCFS2_SB(sb); | 933 | struct ocfs2_super *osb = OCFS2_SB(sb); |
934 | struct buffer_head *bh = NULL; | 934 | struct buffer_head *bh = NULL; |
935 | handle_t *handle = NULL; | 935 | handle_t *handle = NULL; |
936 | int qtype; | ||
937 | struct dquot *transfer_from[MAXQUOTAS] = { }; | ||
938 | struct dquot *transfer_to[MAXQUOTAS] = { }; | 936 | struct dquot *transfer_to[MAXQUOTAS] = { }; |
937 | int qtype; | ||
939 | 938 | ||
940 | mlog_entry("(0x%p, '%.*s')\n", dentry, | 939 | mlog_entry("(0x%p, '%.*s')\n", dentry, |
941 | dentry->d_name.len, dentry->d_name.name); | 940 | dentry->d_name.len, dentry->d_name.name); |
@@ -966,10 +965,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
966 | if (status) | 965 | if (status) |
967 | return status; | 966 | return status; |
968 | 967 | ||
968 | if (is_quota_modification(inode, attr)) | ||
969 | dquot_initialize(inode); | ||
969 | size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; | 970 | size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; |
970 | if (size_change) { | 971 | if (size_change) { |
971 | dquot_initialize(inode); | ||
972 | |||
973 | status = ocfs2_rw_lock(inode, 1); | 972 | status = ocfs2_rw_lock(inode, 1); |
974 | if (status < 0) { | 973 | if (status < 0) { |
975 | mlog_errno(status); | 974 | mlog_errno(status); |
@@ -1019,9 +1018,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1019 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { | 1018 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { |
1020 | transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, | 1019 | transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, |
1021 | USRQUOTA); | 1020 | USRQUOTA); |
1022 | transfer_from[USRQUOTA] = dqget(sb, inode->i_uid, | 1021 | if (!transfer_to[USRQUOTA]) { |
1023 | USRQUOTA); | ||
1024 | if (!transfer_to[USRQUOTA] || !transfer_from[USRQUOTA]) { | ||
1025 | status = -ESRCH; | 1022 | status = -ESRCH; |
1026 | goto bail_unlock; | 1023 | goto bail_unlock; |
1027 | } | 1024 | } |
@@ -1031,9 +1028,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1031 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { | 1028 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { |
1032 | transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, | 1029 | transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, |
1033 | GRPQUOTA); | 1030 | GRPQUOTA); |
1034 | transfer_from[GRPQUOTA] = dqget(sb, inode->i_gid, | 1031 | if (!transfer_to[GRPQUOTA]) { |
1035 | GRPQUOTA); | ||
1036 | if (!transfer_to[GRPQUOTA] || !transfer_from[GRPQUOTA]) { | ||
1037 | status = -ESRCH; | 1032 | status = -ESRCH; |
1038 | goto bail_unlock; | 1033 | goto bail_unlock; |
1039 | } | 1034 | } |
@@ -1045,7 +1040,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1045 | mlog_errno(status); | 1040 | mlog_errno(status); |
1046 | goto bail_unlock; | 1041 | goto bail_unlock; |
1047 | } | 1042 | } |
1048 | status = dquot_transfer(inode, attr); | 1043 | status = __dquot_transfer(inode, transfer_to); |
1049 | if (status < 0) | 1044 | if (status < 0) |
1050 | goto bail_commit; | 1045 | goto bail_commit; |
1051 | } else { | 1046 | } else { |
@@ -1085,10 +1080,8 @@ bail: | |||
1085 | brelse(bh); | 1080 | brelse(bh); |
1086 | 1081 | ||
1087 | /* Release quota pointers in case we acquired them */ | 1082 | /* Release quota pointers in case we acquired them */ |
1088 | for (qtype = 0; qtype < MAXQUOTAS; qtype++) { | 1083 | for (qtype = 0; qtype < MAXQUOTAS; qtype++) |
1089 | dqput(transfer_to[qtype]); | 1084 | dqput(transfer_to[qtype]); |
1090 | dqput(transfer_from[qtype]); | ||
1091 | } | ||
1092 | 1085 | ||
1093 | if (!status && attr->ia_valid & ATTR_MODE) { | 1086 | if (!status && attr->ia_valid & ATTR_MODE) { |
1094 | status = ocfs2_acl_chmod(inode); | 1087 | status = ocfs2_acl_chmod(inode); |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index db5dd3ed4df4..f171b51a74f7 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -204,14 +204,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode) | |||
204 | inode->i_nlink = 2; | 204 | inode->i_nlink = 2; |
205 | else | 205 | else |
206 | inode->i_nlink = 1; | 206 | inode->i_nlink = 1; |
207 | inode->i_uid = current_fsuid(); | 207 | inode_init_owner(inode, dir, mode); |
208 | if (dir->i_mode & S_ISGID) { | ||
209 | inode->i_gid = dir->i_gid; | ||
210 | if (S_ISDIR(mode)) | ||
211 | mode |= S_ISGID; | ||
212 | } else | ||
213 | inode->i_gid = current_fsgid(); | ||
214 | inode->i_mode = mode; | ||
215 | dquot_initialize(inode); | 208 | dquot_initialize(inode); |
216 | return inode; | 209 | return inode; |
217 | } | 210 | } |
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index 123bc520a2c0..196fcb52d95d 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h | |||
@@ -23,6 +23,7 @@ | |||
23 | struct ocfs2_dquot { | 23 | struct ocfs2_dquot { |
24 | struct dquot dq_dquot; /* Generic VFS dquot */ | 24 | struct dquot dq_dquot; /* Generic VFS dquot */ |
25 | loff_t dq_local_off; /* Offset in the local quota file */ | 25 | loff_t dq_local_off; /* Offset in the local quota file */ |
26 | u64 dq_local_phys_blk; /* Physical block carrying quota structure */ | ||
26 | struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */ | 27 | struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */ |
27 | unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */ | 28 | unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */ |
28 | s64 dq_origspace; /* Last globally synced space usage */ | 29 | s64 dq_origspace; /* Last globally synced space usage */ |
@@ -51,8 +52,9 @@ struct ocfs2_mem_dqinfo { | |||
51 | struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ | 52 | struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ |
52 | struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */ | 53 | struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */ |
53 | int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */ | 54 | int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */ |
55 | u64 dqi_giblk; /* Number of block with global information header */ | ||
54 | struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */ | 56 | struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */ |
55 | struct buffer_head *dqi_ibh; /* Buffer with information header */ | 57 | struct buffer_head *dqi_libh; /* Buffer with local information header */ |
56 | struct qtree_mem_dqinfo dqi_gi; /* Info about global file */ | 58 | struct qtree_mem_dqinfo dqi_gi; /* Info about global file */ |
57 | struct delayed_work dqi_sync_work; /* Work for syncing dquots */ | 59 | struct delayed_work dqi_sync_work; /* Work for syncing dquots */ |
58 | struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery | 60 | struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery |
@@ -102,8 +104,12 @@ static inline int ocfs2_global_release_dquot(struct dquot *dquot) | |||
102 | 104 | ||
103 | int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); | 105 | int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); |
104 | void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); | 106 | void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); |
105 | int ocfs2_read_quota_block(struct inode *inode, u64 v_block, | 107 | int ocfs2_validate_quota_block(struct super_block *sb, struct buffer_head *bh); |
106 | struct buffer_head **bh); | 108 | int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block, |
109 | struct buffer_head **bh); | ||
110 | int ocfs2_create_local_dquot(struct dquot *dquot); | ||
111 | int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot); | ||
112 | int ocfs2_local_write_dquot(struct dquot *dquot); | ||
107 | 113 | ||
108 | extern const struct dquot_operations ocfs2_quota_operations; | 114 | extern const struct dquot_operations ocfs2_quota_operations; |
109 | extern struct quota_format_type ocfs2_quota_format; | 115 | extern struct quota_format_type ocfs2_quota_format; |
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 04ae76d8c6ab..2bb35fe00511 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c | |||
@@ -25,8 +25,44 @@ | |||
25 | #include "dlmglue.h" | 25 | #include "dlmglue.h" |
26 | #include "uptodate.h" | 26 | #include "uptodate.h" |
27 | #include "super.h" | 27 | #include "super.h" |
28 | #include "buffer_head_io.h" | ||
28 | #include "quota.h" | 29 | #include "quota.h" |
29 | 30 | ||
31 | /* | ||
32 | * Locking of quotas with OCFS2 is rather complex. Here are rules that | ||
33 | * should be obeyed by all the functions: | ||
34 | * - any write of quota structure (either to local or global file) is protected | ||
35 | * by dqio_mutex or dquot->dq_lock. | ||
36 | * - any modification of global quota file holds inode cluster lock, i_mutex, | ||
37 | * and ip_alloc_sem of the global quota file (achieved by | ||
38 | * ocfs2_lock_global_qf). It also has to hold qinfo_lock. | ||
39 | * - an allocation of new blocks for local quota file is protected by | ||
40 | * its ip_alloc_sem | ||
41 | * | ||
42 | * A rough sketch of locking dependencies (lf = local file, gf = global file): | ||
43 | * Normal filesystem operation: | ||
44 | * start_trans -> dqio_mutex -> write to lf | ||
45 | * Syncing of local and global file: | ||
46 | * ocfs2_lock_global_qf -> start_trans -> dqio_mutex -> qinfo_lock -> | ||
47 | * write to gf | ||
48 | * -> write to lf | ||
49 | * Acquire dquot for the first time: | ||
50 | * dq_lock -> ocfs2_lock_global_qf -> qinfo_lock -> read from gf | ||
51 | * -> alloc space for gf | ||
52 | * -> start_trans -> qinfo_lock -> write to gf | ||
53 | * -> ip_alloc_sem of lf -> alloc space for lf | ||
54 | * -> write to lf | ||
55 | * Release last reference to dquot: | ||
56 | * dq_lock -> ocfs2_lock_global_qf -> start_trans -> qinfo_lock -> write to gf | ||
57 | * -> write to lf | ||
58 | * Note that all the above operations also hold the inode cluster lock of lf. | ||
59 | * Recovery: | ||
60 | * inode cluster lock of recovered lf | ||
61 | * -> read bitmaps -> ip_alloc_sem of lf | ||
62 | * -> ocfs2_lock_global_qf -> start_trans -> dqio_mutex -> qinfo_lock -> | ||
63 | * write to gf | ||
64 | */ | ||
65 | |||
30 | static struct workqueue_struct *ocfs2_quota_wq = NULL; | 66 | static struct workqueue_struct *ocfs2_quota_wq = NULL; |
31 | 67 | ||
32 | static void qsync_work_fn(struct work_struct *work); | 68 | static void qsync_work_fn(struct work_struct *work); |
@@ -91,8 +127,7 @@ struct qtree_fmt_operations ocfs2_global_ops = { | |||
91 | .is_id = ocfs2_global_is_id, | 127 | .is_id = ocfs2_global_is_id, |
92 | }; | 128 | }; |
93 | 129 | ||
94 | static int ocfs2_validate_quota_block(struct super_block *sb, | 130 | int ocfs2_validate_quota_block(struct super_block *sb, struct buffer_head *bh) |
95 | struct buffer_head *bh) | ||
96 | { | 131 | { |
97 | struct ocfs2_disk_dqtrailer *dqt = | 132 | struct ocfs2_disk_dqtrailer *dqt = |
98 | ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data); | 133 | ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data); |
@@ -110,54 +145,19 @@ static int ocfs2_validate_quota_block(struct super_block *sb, | |||
110 | return ocfs2_validate_meta_ecc(sb, bh->b_data, &dqt->dq_check); | 145 | return ocfs2_validate_meta_ecc(sb, bh->b_data, &dqt->dq_check); |
111 | } | 146 | } |
112 | 147 | ||
113 | int ocfs2_read_quota_block(struct inode *inode, u64 v_block, | 148 | int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block, |
114 | struct buffer_head **bh) | 149 | struct buffer_head **bhp) |
115 | { | 150 | { |
116 | int rc = 0; | 151 | int rc; |
117 | struct buffer_head *tmp = *bh; | 152 | |
118 | 153 | *bhp = NULL; | |
119 | if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) { | 154 | rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, 1, bhp, 0, |
120 | ocfs2_error(inode->i_sb, | 155 | ocfs2_validate_quota_block); |
121 | "Quota file %llu is probably corrupted! Requested " | ||
122 | "to read block %Lu but file has size only %Lu\n", | ||
123 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
124 | (unsigned long long)v_block, | ||
125 | (unsigned long long)i_size_read(inode)); | ||
126 | return -EIO; | ||
127 | } | ||
128 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, | ||
129 | ocfs2_validate_quota_block); | ||
130 | if (rc) | 156 | if (rc) |
131 | mlog_errno(rc); | 157 | mlog_errno(rc); |
132 | |||
133 | /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */ | ||
134 | if (!rc && !*bh) | ||
135 | *bh = tmp; | ||
136 | |||
137 | return rc; | 158 | return rc; |
138 | } | 159 | } |
139 | 160 | ||
140 | static int ocfs2_get_quota_block(struct inode *inode, int block, | ||
141 | struct buffer_head **bh) | ||
142 | { | ||
143 | u64 pblock, pcount; | ||
144 | int err; | ||
145 | |||
146 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
147 | err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount, NULL); | ||
148 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
149 | if (err) { | ||
150 | mlog_errno(err); | ||
151 | return err; | ||
152 | } | ||
153 | *bh = sb_getblk(inode->i_sb, pblock); | ||
154 | if (!*bh) { | ||
155 | err = -EIO; | ||
156 | mlog_errno(err); | ||
157 | } | ||
158 | return err; | ||
159 | } | ||
160 | |||
161 | /* Read data from global quotafile - avoid pagecache and such because we cannot | 161 | /* Read data from global quotafile - avoid pagecache and such because we cannot |
162 | * afford acquiring the locks... We use quota cluster lock to serialize | 162 | * afford acquiring the locks... We use quota cluster lock to serialize |
163 | * operations. Caller is responsible for acquiring it. */ | 163 | * operations. Caller is responsible for acquiring it. */ |
@@ -172,6 +172,7 @@ ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data, | |||
172 | int err = 0; | 172 | int err = 0; |
173 | struct buffer_head *bh; | 173 | struct buffer_head *bh; |
174 | size_t toread, tocopy; | 174 | size_t toread, tocopy; |
175 | u64 pblock = 0, pcount = 0; | ||
175 | 176 | ||
176 | if (off > i_size) | 177 | if (off > i_size) |
177 | return 0; | 178 | return 0; |
@@ -180,8 +181,19 @@ ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data, | |||
180 | toread = len; | 181 | toread = len; |
181 | while (toread > 0) { | 182 | while (toread > 0) { |
182 | tocopy = min_t(size_t, (sb->s_blocksize - offset), toread); | 183 | tocopy = min_t(size_t, (sb->s_blocksize - offset), toread); |
184 | if (!pcount) { | ||
185 | err = ocfs2_extent_map_get_blocks(gqinode, blk, &pblock, | ||
186 | &pcount, NULL); | ||
187 | if (err) { | ||
188 | mlog_errno(err); | ||
189 | return err; | ||
190 | } | ||
191 | } else { | ||
192 | pcount--; | ||
193 | pblock++; | ||
194 | } | ||
183 | bh = NULL; | 195 | bh = NULL; |
184 | err = ocfs2_read_quota_block(gqinode, blk, &bh); | 196 | err = ocfs2_read_quota_phys_block(gqinode, pblock, &bh); |
185 | if (err) { | 197 | if (err) { |
186 | mlog_errno(err); | 198 | mlog_errno(err); |
187 | return err; | 199 | return err; |
@@ -209,6 +221,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, | |||
209 | int err = 0, new = 0, ja_type; | 221 | int err = 0, new = 0, ja_type; |
210 | struct buffer_head *bh = NULL; | 222 | struct buffer_head *bh = NULL; |
211 | handle_t *handle = journal_current_handle(); | 223 | handle_t *handle = journal_current_handle(); |
224 | u64 pblock, pcount; | ||
212 | 225 | ||
213 | if (!handle) { | 226 | if (!handle) { |
214 | mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled " | 227 | mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled " |
@@ -221,12 +234,11 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, | |||
221 | len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; | 234 | len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; |
222 | } | 235 | } |
223 | 236 | ||
224 | mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); | ||
225 | if (gqinode->i_size < off + len) { | 237 | if (gqinode->i_size < off + len) { |
226 | loff_t rounded_end = | 238 | loff_t rounded_end = |
227 | ocfs2_align_bytes_to_blocks(sb, off + len); | 239 | ocfs2_align_bytes_to_blocks(sb, off + len); |
228 | 240 | ||
229 | /* Space is already allocated in ocfs2_global_read_dquot() */ | 241 | /* Space is already allocated in ocfs2_acquire_dquot() */ |
230 | err = ocfs2_simple_size_update(gqinode, | 242 | err = ocfs2_simple_size_update(gqinode, |
231 | oinfo->dqi_gqi_bh, | 243 | oinfo->dqi_gqi_bh, |
232 | rounded_end); | 244 | rounded_end); |
@@ -234,13 +246,20 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, | |||
234 | goto out; | 246 | goto out; |
235 | new = 1; | 247 | new = 1; |
236 | } | 248 | } |
249 | err = ocfs2_extent_map_get_blocks(gqinode, blk, &pblock, &pcount, NULL); | ||
250 | if (err) { | ||
251 | mlog_errno(err); | ||
252 | goto out; | ||
253 | } | ||
237 | /* Not rewriting whole block? */ | 254 | /* Not rewriting whole block? */ |
238 | if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) && | 255 | if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) && |
239 | !new) { | 256 | !new) { |
240 | err = ocfs2_read_quota_block(gqinode, blk, &bh); | 257 | err = ocfs2_read_quota_phys_block(gqinode, pblock, &bh); |
241 | ja_type = OCFS2_JOURNAL_ACCESS_WRITE; | 258 | ja_type = OCFS2_JOURNAL_ACCESS_WRITE; |
242 | } else { | 259 | } else { |
243 | err = ocfs2_get_quota_block(gqinode, blk, &bh); | 260 | bh = sb_getblk(sb, pblock); |
261 | if (!bh) | ||
262 | err = -ENOMEM; | ||
244 | ja_type = OCFS2_JOURNAL_ACCESS_CREATE; | 263 | ja_type = OCFS2_JOURNAL_ACCESS_CREATE; |
245 | } | 264 | } |
246 | if (err) { | 265 | if (err) { |
@@ -265,13 +284,11 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, | |||
265 | brelse(bh); | 284 | brelse(bh); |
266 | out: | 285 | out: |
267 | if (err) { | 286 | if (err) { |
268 | mutex_unlock(&gqinode->i_mutex); | ||
269 | mlog_errno(err); | 287 | mlog_errno(err); |
270 | return err; | 288 | return err; |
271 | } | 289 | } |
272 | gqinode->i_version++; | 290 | gqinode->i_version++; |
273 | ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh); | 291 | ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh); |
274 | mutex_unlock(&gqinode->i_mutex); | ||
275 | return len; | 292 | return len; |
276 | } | 293 | } |
277 | 294 | ||
@@ -289,11 +306,23 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) | |||
289 | else | 306 | else |
290 | WARN_ON(bh != oinfo->dqi_gqi_bh); | 307 | WARN_ON(bh != oinfo->dqi_gqi_bh); |
291 | spin_unlock(&dq_data_lock); | 308 | spin_unlock(&dq_data_lock); |
309 | if (ex) { | ||
310 | mutex_lock(&oinfo->dqi_gqinode->i_mutex); | ||
311 | down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); | ||
312 | } else { | ||
313 | down_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); | ||
314 | } | ||
292 | return 0; | 315 | return 0; |
293 | } | 316 | } |
294 | 317 | ||
295 | void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) | 318 | void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) |
296 | { | 319 | { |
320 | if (ex) { | ||
321 | up_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); | ||
322 | mutex_unlock(&oinfo->dqi_gqinode->i_mutex); | ||
323 | } else { | ||
324 | up_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); | ||
325 | } | ||
297 | ocfs2_inode_unlock(oinfo->dqi_gqinode, ex); | 326 | ocfs2_inode_unlock(oinfo->dqi_gqinode, ex); |
298 | brelse(oinfo->dqi_gqi_bh); | 327 | brelse(oinfo->dqi_gqi_bh); |
299 | spin_lock(&dq_data_lock); | 328 | spin_lock(&dq_data_lock); |
@@ -311,6 +340,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type) | |||
311 | struct ocfs2_global_disk_dqinfo dinfo; | 340 | struct ocfs2_global_disk_dqinfo dinfo; |
312 | struct mem_dqinfo *info = sb_dqinfo(sb, type); | 341 | struct mem_dqinfo *info = sb_dqinfo(sb, type); |
313 | struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; | 342 | struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; |
343 | u64 pcount; | ||
314 | int status; | 344 | int status; |
315 | 345 | ||
316 | mlog_entry_void(); | 346 | mlog_entry_void(); |
@@ -337,9 +367,19 @@ int ocfs2_global_read_info(struct super_block *sb, int type) | |||
337 | mlog_errno(status); | 367 | mlog_errno(status); |
338 | goto out_err; | 368 | goto out_err; |
339 | } | 369 | } |
370 | |||
371 | status = ocfs2_extent_map_get_blocks(gqinode, 0, &oinfo->dqi_giblk, | ||
372 | &pcount, NULL); | ||
373 | if (status < 0) | ||
374 | goto out_unlock; | ||
375 | |||
376 | status = ocfs2_qinfo_lock(oinfo, 0); | ||
377 | if (status < 0) | ||
378 | goto out_unlock; | ||
340 | status = sb->s_op->quota_read(sb, type, (char *)&dinfo, | 379 | status = sb->s_op->quota_read(sb, type, (char *)&dinfo, |
341 | sizeof(struct ocfs2_global_disk_dqinfo), | 380 | sizeof(struct ocfs2_global_disk_dqinfo), |
342 | OCFS2_GLOBAL_INFO_OFF); | 381 | OCFS2_GLOBAL_INFO_OFF); |
382 | ocfs2_qinfo_unlock(oinfo, 0); | ||
343 | ocfs2_unlock_global_qf(oinfo, 0); | 383 | ocfs2_unlock_global_qf(oinfo, 0); |
344 | if (status != sizeof(struct ocfs2_global_disk_dqinfo)) { | 384 | if (status != sizeof(struct ocfs2_global_disk_dqinfo)) { |
345 | mlog(ML_ERROR, "Cannot read global quota info (%d).\n", | 385 | mlog(ML_ERROR, "Cannot read global quota info (%d).\n", |
@@ -366,6 +406,10 @@ int ocfs2_global_read_info(struct super_block *sb, int type) | |||
366 | out_err: | 406 | out_err: |
367 | mlog_exit(status); | 407 | mlog_exit(status); |
368 | return status; | 408 | return status; |
409 | out_unlock: | ||
410 | ocfs2_unlock_global_qf(oinfo, 0); | ||
411 | mlog_errno(status); | ||
412 | goto out_err; | ||
369 | } | 413 | } |
370 | 414 | ||
371 | /* Write information to global quota file. Expects exlusive lock on quota | 415 | /* Write information to global quota file. Expects exlusive lock on quota |
@@ -424,78 +468,10 @@ static int ocfs2_global_qinit_alloc(struct super_block *sb, int type) | |||
424 | 468 | ||
425 | static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type) | 469 | static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type) |
426 | { | 470 | { |
427 | /* We modify all the allocated blocks, tree root, and info block */ | 471 | /* We modify all the allocated blocks, tree root, info block and |
472 | * the inode */ | ||
428 | return (ocfs2_global_qinit_alloc(sb, type) + 2) * | 473 | return (ocfs2_global_qinit_alloc(sb, type) + 2) * |
429 | OCFS2_QUOTA_BLOCK_UPDATE_CREDITS; | 474 | OCFS2_QUOTA_BLOCK_UPDATE_CREDITS + 1; |
430 | } | ||
431 | |||
432 | /* Read in information from global quota file and acquire a reference to it. | ||
433 | * dquot_acquire() has already started the transaction and locked quota file */ | ||
434 | int ocfs2_global_read_dquot(struct dquot *dquot) | ||
435 | { | ||
436 | int err, err2, ex = 0; | ||
437 | struct super_block *sb = dquot->dq_sb; | ||
438 | int type = dquot->dq_type; | ||
439 | struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; | ||
440 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
441 | struct inode *gqinode = info->dqi_gqinode; | ||
442 | int need_alloc = ocfs2_global_qinit_alloc(sb, type); | ||
443 | handle_t *handle = NULL; | ||
444 | |||
445 | err = ocfs2_qinfo_lock(info, 0); | ||
446 | if (err < 0) | ||
447 | goto out; | ||
448 | err = qtree_read_dquot(&info->dqi_gi, dquot); | ||
449 | if (err < 0) | ||
450 | goto out_qlock; | ||
451 | OCFS2_DQUOT(dquot)->dq_use_count++; | ||
452 | OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; | ||
453 | OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; | ||
454 | ocfs2_qinfo_unlock(info, 0); | ||
455 | |||
456 | if (!dquot->dq_off) { /* No real quota entry? */ | ||
457 | ex = 1; | ||
458 | /* | ||
459 | * Add blocks to quota file before we start a transaction since | ||
460 | * locking allocators ranks above a transaction start | ||
461 | */ | ||
462 | WARN_ON(journal_current_handle()); | ||
463 | down_write(&OCFS2_I(gqinode)->ip_alloc_sem); | ||
464 | err = ocfs2_extend_no_holes(gqinode, | ||
465 | gqinode->i_size + (need_alloc << sb->s_blocksize_bits), | ||
466 | gqinode->i_size); | ||
467 | up_write(&OCFS2_I(gqinode)->ip_alloc_sem); | ||
468 | if (err < 0) | ||
469 | goto out; | ||
470 | } | ||
471 | |||
472 | handle = ocfs2_start_trans(osb, | ||
473 | ocfs2_calc_global_qinit_credits(sb, type)); | ||
474 | if (IS_ERR(handle)) { | ||
475 | err = PTR_ERR(handle); | ||
476 | goto out; | ||
477 | } | ||
478 | err = ocfs2_qinfo_lock(info, ex); | ||
479 | if (err < 0) | ||
480 | goto out_trans; | ||
481 | err = qtree_write_dquot(&info->dqi_gi, dquot); | ||
482 | if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { | ||
483 | err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); | ||
484 | if (!err) | ||
485 | err = err2; | ||
486 | } | ||
487 | out_qlock: | ||
488 | if (ex) | ||
489 | ocfs2_qinfo_unlock(info, 1); | ||
490 | else | ||
491 | ocfs2_qinfo_unlock(info, 0); | ||
492 | out_trans: | ||
493 | if (handle) | ||
494 | ocfs2_commit_trans(osb, handle); | ||
495 | out: | ||
496 | if (err < 0) | ||
497 | mlog_errno(err); | ||
498 | return err; | ||
499 | } | 475 | } |
500 | 476 | ||
501 | /* Sync local information about quota modifications with global quota file. | 477 | /* Sync local information about quota modifications with global quota file. |
@@ -636,14 +612,13 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type) | |||
636 | } | 612 | } |
637 | mutex_lock(&sb_dqopt(sb)->dqio_mutex); | 613 | mutex_lock(&sb_dqopt(sb)->dqio_mutex); |
638 | status = ocfs2_sync_dquot(dquot); | 614 | status = ocfs2_sync_dquot(dquot); |
639 | mutex_unlock(&sb_dqopt(sb)->dqio_mutex); | ||
640 | if (status < 0) | 615 | if (status < 0) |
641 | mlog_errno(status); | 616 | mlog_errno(status); |
642 | /* We have to write local structure as well... */ | 617 | /* We have to write local structure as well... */ |
643 | dquot_mark_dquot_dirty(dquot); | 618 | status = ocfs2_local_write_dquot(dquot); |
644 | status = dquot_commit(dquot); | ||
645 | if (status < 0) | 619 | if (status < 0) |
646 | mlog_errno(status); | 620 | mlog_errno(status); |
621 | mutex_unlock(&sb_dqopt(sb)->dqio_mutex); | ||
647 | ocfs2_commit_trans(osb, handle); | 622 | ocfs2_commit_trans(osb, handle); |
648 | out_ilock: | 623 | out_ilock: |
649 | ocfs2_unlock_global_qf(oinfo, 1); | 624 | ocfs2_unlock_global_qf(oinfo, 1); |
@@ -682,7 +657,9 @@ static int ocfs2_write_dquot(struct dquot *dquot) | |||
682 | mlog_errno(status); | 657 | mlog_errno(status); |
683 | goto out; | 658 | goto out; |
684 | } | 659 | } |
685 | status = dquot_commit(dquot); | 660 | mutex_lock(&sb_dqopt(dquot->dq_sb)->dqio_mutex); |
661 | status = ocfs2_local_write_dquot(dquot); | ||
662 | mutex_unlock(&sb_dqopt(dquot->dq_sb)->dqio_mutex); | ||
686 | ocfs2_commit_trans(osb, handle); | 663 | ocfs2_commit_trans(osb, handle); |
687 | out: | 664 | out: |
688 | mlog_exit(status); | 665 | mlog_exit(status); |
@@ -713,6 +690,10 @@ static int ocfs2_release_dquot(struct dquot *dquot) | |||
713 | 690 | ||
714 | mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); | 691 | mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); |
715 | 692 | ||
693 | mutex_lock(&dquot->dq_lock); | ||
694 | /* Check whether we are not racing with some other dqget() */ | ||
695 | if (atomic_read(&dquot->dq_count) > 1) | ||
696 | goto out; | ||
716 | status = ocfs2_lock_global_qf(oinfo, 1); | 697 | status = ocfs2_lock_global_qf(oinfo, 1); |
717 | if (status < 0) | 698 | if (status < 0) |
718 | goto out; | 699 | goto out; |
@@ -723,30 +704,113 @@ static int ocfs2_release_dquot(struct dquot *dquot) | |||
723 | mlog_errno(status); | 704 | mlog_errno(status); |
724 | goto out_ilock; | 705 | goto out_ilock; |
725 | } | 706 | } |
726 | status = dquot_release(dquot); | 707 | |
708 | status = ocfs2_global_release_dquot(dquot); | ||
709 | if (status < 0) { | ||
710 | mlog_errno(status); | ||
711 | goto out_trans; | ||
712 | } | ||
713 | status = ocfs2_local_release_dquot(handle, dquot); | ||
714 | /* | ||
715 | * If we fail here, we cannot do much as global structure is | ||
716 | * already released. So just complain... | ||
717 | */ | ||
718 | if (status < 0) | ||
719 | mlog_errno(status); | ||
720 | clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); | ||
721 | out_trans: | ||
727 | ocfs2_commit_trans(osb, handle); | 722 | ocfs2_commit_trans(osb, handle); |
728 | out_ilock: | 723 | out_ilock: |
729 | ocfs2_unlock_global_qf(oinfo, 1); | 724 | ocfs2_unlock_global_qf(oinfo, 1); |
730 | out: | 725 | out: |
726 | mutex_unlock(&dquot->dq_lock); | ||
731 | mlog_exit(status); | 727 | mlog_exit(status); |
732 | return status; | 728 | return status; |
733 | } | 729 | } |
734 | 730 | ||
731 | /* | ||
732 | * Read global dquot structure from disk or create it if it does | ||
733 | * not exist. Also update use count of the global structure and | ||
734 | * create structure in node-local quota file. | ||
735 | */ | ||
735 | static int ocfs2_acquire_dquot(struct dquot *dquot) | 736 | static int ocfs2_acquire_dquot(struct dquot *dquot) |
736 | { | 737 | { |
737 | struct ocfs2_mem_dqinfo *oinfo = | 738 | int status = 0, err; |
738 | sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; | 739 | int ex = 0; |
739 | int status = 0; | 740 | struct super_block *sb = dquot->dq_sb; |
741 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
742 | int type = dquot->dq_type; | ||
743 | struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; | ||
744 | struct inode *gqinode = info->dqi_gqinode; | ||
745 | int need_alloc = ocfs2_global_qinit_alloc(sb, type); | ||
746 | handle_t *handle; | ||
740 | 747 | ||
741 | mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); | 748 | mlog_entry("id=%u, type=%d", dquot->dq_id, type); |
742 | /* We need an exclusive lock, because we're going to update use count | 749 | mutex_lock(&dquot->dq_lock); |
743 | * and instantiate possibly new dquot structure */ | 750 | /* |
744 | status = ocfs2_lock_global_qf(oinfo, 1); | 751 | * We need an exclusive lock, because we're going to update use count |
752 | * and instantiate possibly new dquot structure | ||
753 | */ | ||
754 | status = ocfs2_lock_global_qf(info, 1); | ||
745 | if (status < 0) | 755 | if (status < 0) |
746 | goto out; | 756 | goto out; |
747 | status = dquot_acquire(dquot); | 757 | if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { |
748 | ocfs2_unlock_global_qf(oinfo, 1); | 758 | status = ocfs2_qinfo_lock(info, 0); |
759 | if (status < 0) | ||
760 | goto out_dq; | ||
761 | status = qtree_read_dquot(&info->dqi_gi, dquot); | ||
762 | ocfs2_qinfo_unlock(info, 0); | ||
763 | if (status < 0) | ||
764 | goto out_dq; | ||
765 | } | ||
766 | set_bit(DQ_READ_B, &dquot->dq_flags); | ||
767 | |||
768 | OCFS2_DQUOT(dquot)->dq_use_count++; | ||
769 | OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; | ||
770 | OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; | ||
771 | if (!dquot->dq_off) { /* No real quota entry? */ | ||
772 | ex = 1; | ||
773 | /* | ||
774 | * Add blocks to quota file before we start a transaction since | ||
775 | * locking allocators ranks above a transaction start | ||
776 | */ | ||
777 | WARN_ON(journal_current_handle()); | ||
778 | status = ocfs2_extend_no_holes(gqinode, | ||
779 | gqinode->i_size + (need_alloc << sb->s_blocksize_bits), | ||
780 | gqinode->i_size); | ||
781 | if (status < 0) | ||
782 | goto out_dq; | ||
783 | } | ||
784 | |||
785 | handle = ocfs2_start_trans(osb, | ||
786 | ocfs2_calc_global_qinit_credits(sb, type)); | ||
787 | if (IS_ERR(handle)) { | ||
788 | status = PTR_ERR(handle); | ||
789 | goto out_dq; | ||
790 | } | ||
791 | status = ocfs2_qinfo_lock(info, ex); | ||
792 | if (status < 0) | ||
793 | goto out_trans; | ||
794 | status = qtree_write_dquot(&info->dqi_gi, dquot); | ||
795 | if (ex && info_dirty(sb_dqinfo(sb, type))) { | ||
796 | err = __ocfs2_global_write_info(sb, type); | ||
797 | if (!status) | ||
798 | status = err; | ||
799 | } | ||
800 | ocfs2_qinfo_unlock(info, ex); | ||
801 | out_trans: | ||
802 | ocfs2_commit_trans(osb, handle); | ||
803 | out_dq: | ||
804 | ocfs2_unlock_global_qf(info, 1); | ||
805 | if (status < 0) | ||
806 | goto out; | ||
807 | |||
808 | status = ocfs2_create_local_dquot(dquot); | ||
809 | if (status < 0) | ||
810 | goto out; | ||
811 | set_bit(DQ_ACTIVE_B, &dquot->dq_flags); | ||
749 | out: | 812 | out: |
813 | mutex_unlock(&dquot->dq_lock); | ||
750 | mlog_exit(status); | 814 | mlog_exit(status); |
751 | return status; | 815 | return status; |
752 | } | 816 | } |
@@ -768,7 +832,6 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) | |||
768 | struct ocfs2_super *osb = OCFS2_SB(sb); | 832 | struct ocfs2_super *osb = OCFS2_SB(sb); |
769 | 833 | ||
770 | mlog_entry("id=%u, type=%d", dquot->dq_id, type); | 834 | mlog_entry("id=%u, type=%d", dquot->dq_id, type); |
771 | dquot_mark_dquot_dirty(dquot); | ||
772 | 835 | ||
773 | /* In case user set some limits, sync dquot immediately to global | 836 | /* In case user set some limits, sync dquot immediately to global |
774 | * quota file so that information propagates quicker */ | 837 | * quota file so that information propagates quicker */ |
@@ -791,14 +854,16 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) | |||
791 | mlog_errno(status); | 854 | mlog_errno(status); |
792 | goto out_ilock; | 855 | goto out_ilock; |
793 | } | 856 | } |
857 | mutex_lock(&sb_dqopt(sb)->dqio_mutex); | ||
794 | status = ocfs2_sync_dquot(dquot); | 858 | status = ocfs2_sync_dquot(dquot); |
795 | if (status < 0) { | 859 | if (status < 0) { |
796 | mlog_errno(status); | 860 | mlog_errno(status); |
797 | goto out_trans; | 861 | goto out_dlock; |
798 | } | 862 | } |
799 | /* Now write updated local dquot structure */ | 863 | /* Now write updated local dquot structure */ |
800 | status = dquot_commit(dquot); | 864 | status = ocfs2_local_write_dquot(dquot); |
801 | out_trans: | 865 | out_dlock: |
866 | mutex_unlock(&sb_dqopt(sb)->dqio_mutex); | ||
802 | ocfs2_commit_trans(osb, handle); | 867 | ocfs2_commit_trans(osb, handle); |
803 | out_ilock: | 868 | out_ilock: |
804 | ocfs2_unlock_global_qf(oinfo, 1); | 869 | ocfs2_unlock_global_qf(oinfo, 1); |
@@ -850,7 +915,7 @@ static void ocfs2_destroy_dquot(struct dquot *dquot) | |||
850 | } | 915 | } |
851 | 916 | ||
852 | const struct dquot_operations ocfs2_quota_operations = { | 917 | const struct dquot_operations ocfs2_quota_operations = { |
853 | .write_dquot = ocfs2_write_dquot, | 918 | /* We never make dquot dirty so .write_dquot is never called */ |
854 | .acquire_dquot = ocfs2_acquire_dquot, | 919 | .acquire_dquot = ocfs2_acquire_dquot, |
855 | .release_dquot = ocfs2_release_dquot, | 920 | .release_dquot = ocfs2_release_dquot, |
856 | .mark_dirty = ocfs2_mark_dquot_dirty, | 921 | .mark_dirty = ocfs2_mark_dquot_dirty, |
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 884b641f199e..8bd70d4d184d 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "dlmglue.h" | 22 | #include "dlmglue.h" |
23 | #include "quota.h" | 23 | #include "quota.h" |
24 | #include "uptodate.h" | 24 | #include "uptodate.h" |
25 | #include "super.h" | ||
25 | 26 | ||
26 | /* Number of local quota structures per block */ | 27 | /* Number of local quota structures per block */ |
27 | static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) | 28 | static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) |
@@ -129,6 +130,39 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh, | |||
129 | return 0; | 130 | return 0; |
130 | } | 131 | } |
131 | 132 | ||
133 | /* | ||
134 | * Read quota block from a given logical offset. | ||
135 | * | ||
136 | * This function acquires ip_alloc_sem and thus it must not be called with a | ||
137 | * transaction started. | ||
138 | */ | ||
139 | static int ocfs2_read_quota_block(struct inode *inode, u64 v_block, | ||
140 | struct buffer_head **bh) | ||
141 | { | ||
142 | int rc = 0; | ||
143 | struct buffer_head *tmp = *bh; | ||
144 | |||
145 | if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) { | ||
146 | ocfs2_error(inode->i_sb, | ||
147 | "Quota file %llu is probably corrupted! Requested " | ||
148 | "to read block %Lu but file has size only %Lu\n", | ||
149 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
150 | (unsigned long long)v_block, | ||
151 | (unsigned long long)i_size_read(inode)); | ||
152 | return -EIO; | ||
153 | } | ||
154 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, | ||
155 | ocfs2_validate_quota_block); | ||
156 | if (rc) | ||
157 | mlog_errno(rc); | ||
158 | |||
159 | /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */ | ||
160 | if (!rc && !*bh) | ||
161 | *bh = tmp; | ||
162 | |||
163 | return rc; | ||
164 | } | ||
165 | |||
132 | /* Check whether we understand format of quota files */ | 166 | /* Check whether we understand format of quota files */ |
133 | static int ocfs2_local_check_quota_file(struct super_block *sb, int type) | 167 | static int ocfs2_local_check_quota_file(struct super_block *sb, int type) |
134 | { | 168 | { |
@@ -671,7 +705,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) | |||
671 | INIT_LIST_HEAD(&oinfo->dqi_chunk); | 705 | INIT_LIST_HEAD(&oinfo->dqi_chunk); |
672 | oinfo->dqi_rec = NULL; | 706 | oinfo->dqi_rec = NULL; |
673 | oinfo->dqi_lqi_bh = NULL; | 707 | oinfo->dqi_lqi_bh = NULL; |
674 | oinfo->dqi_ibh = NULL; | 708 | oinfo->dqi_libh = NULL; |
675 | 709 | ||
676 | status = ocfs2_global_read_info(sb, type); | 710 | status = ocfs2_global_read_info(sb, type); |
677 | if (status < 0) | 711 | if (status < 0) |
@@ -697,7 +731,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) | |||
697 | info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags); | 731 | info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags); |
698 | oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks); | 732 | oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks); |
699 | oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks); | 733 | oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks); |
700 | oinfo->dqi_ibh = bh; | 734 | oinfo->dqi_libh = bh; |
701 | 735 | ||
702 | /* We crashed when using local quota file? */ | 736 | /* We crashed when using local quota file? */ |
703 | if (!(info->dqi_flags & OLQF_CLEAN)) { | 737 | if (!(info->dqi_flags & OLQF_CLEAN)) { |
@@ -759,7 +793,7 @@ static int ocfs2_local_write_info(struct super_block *sb, int type) | |||
759 | { | 793 | { |
760 | struct mem_dqinfo *info = sb_dqinfo(sb, type); | 794 | struct mem_dqinfo *info = sb_dqinfo(sb, type); |
761 | struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv) | 795 | struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv) |
762 | ->dqi_ibh; | 796 | ->dqi_libh; |
763 | int status; | 797 | int status; |
764 | 798 | ||
765 | status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info, | 799 | status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info, |
@@ -782,10 +816,6 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) | |||
782 | int mark_clean = 1, len; | 816 | int mark_clean = 1, len; |
783 | int status; | 817 | int status; |
784 | 818 | ||
785 | /* At this point we know there are no more dquots and thus | ||
786 | * even if there's some sync in the pdflush queue, it won't | ||
787 | * find any dquots and return without doing anything */ | ||
788 | cancel_delayed_work_sync(&oinfo->dqi_sync_work); | ||
789 | iput(oinfo->dqi_gqinode); | 819 | iput(oinfo->dqi_gqinode); |
790 | ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock); | 820 | ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock); |
791 | ocfs2_lock_res_free(&oinfo->dqi_gqlock); | 821 | ocfs2_lock_res_free(&oinfo->dqi_gqlock); |
@@ -820,7 +850,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) | |||
820 | /* Mark local file as clean */ | 850 | /* Mark local file as clean */ |
821 | info->dqi_flags |= OLQF_CLEAN; | 851 | info->dqi_flags |= OLQF_CLEAN; |
822 | status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], | 852 | status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], |
823 | oinfo->dqi_ibh, | 853 | oinfo->dqi_libh, |
824 | olq_update_info, | 854 | olq_update_info, |
825 | info); | 855 | info); |
826 | if (status < 0) { | 856 | if (status < 0) { |
@@ -830,7 +860,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) | |||
830 | 860 | ||
831 | out: | 861 | out: |
832 | ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1); | 862 | ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1); |
833 | brelse(oinfo->dqi_ibh); | 863 | brelse(oinfo->dqi_libh); |
834 | brelse(oinfo->dqi_lqi_bh); | 864 | brelse(oinfo->dqi_lqi_bh); |
835 | kfree(oinfo); | 865 | kfree(oinfo); |
836 | return 0; | 866 | return 0; |
@@ -858,22 +888,21 @@ static void olq_set_dquot(struct buffer_head *bh, void *private) | |||
858 | } | 888 | } |
859 | 889 | ||
860 | /* Write dquot to local quota file */ | 890 | /* Write dquot to local quota file */ |
861 | static int ocfs2_local_write_dquot(struct dquot *dquot) | 891 | int ocfs2_local_write_dquot(struct dquot *dquot) |
862 | { | 892 | { |
863 | struct super_block *sb = dquot->dq_sb; | 893 | struct super_block *sb = dquot->dq_sb; |
864 | struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); | 894 | struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); |
865 | struct buffer_head *bh = NULL; | 895 | struct buffer_head *bh; |
896 | struct inode *lqinode = sb_dqopt(sb)->files[dquot->dq_type]; | ||
866 | int status; | 897 | int status; |
867 | 898 | ||
868 | status = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type], | 899 | status = ocfs2_read_quota_phys_block(lqinode, od->dq_local_phys_blk, |
869 | ol_dqblk_file_block(sb, od->dq_local_off), | 900 | &bh); |
870 | &bh); | ||
871 | if (status) { | 901 | if (status) { |
872 | mlog_errno(status); | 902 | mlog_errno(status); |
873 | goto out; | 903 | goto out; |
874 | } | 904 | } |
875 | status = ocfs2_modify_bh(sb_dqopt(sb)->files[dquot->dq_type], bh, | 905 | status = ocfs2_modify_bh(lqinode, bh, olq_set_dquot, od); |
876 | olq_set_dquot, od); | ||
877 | if (status < 0) { | 906 | if (status < 0) { |
878 | mlog_errno(status); | 907 | mlog_errno(status); |
879 | goto out; | 908 | goto out; |
@@ -973,10 +1002,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( | |||
973 | } | 1002 | } |
974 | 1003 | ||
975 | /* Initialize chunk header */ | 1004 | /* Initialize chunk header */ |
976 | down_read(&OCFS2_I(lqinode)->ip_alloc_sem); | ||
977 | status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, | 1005 | status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, |
978 | &p_blkno, NULL, NULL); | 1006 | &p_blkno, NULL, NULL); |
979 | up_read(&OCFS2_I(lqinode)->ip_alloc_sem); | ||
980 | if (status < 0) { | 1007 | if (status < 0) { |
981 | mlog_errno(status); | 1008 | mlog_errno(status); |
982 | goto out_trans; | 1009 | goto out_trans; |
@@ -1004,10 +1031,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( | |||
1004 | ocfs2_journal_dirty(handle, bh); | 1031 | ocfs2_journal_dirty(handle, bh); |
1005 | 1032 | ||
1006 | /* Initialize new block with structures */ | 1033 | /* Initialize new block with structures */ |
1007 | down_read(&OCFS2_I(lqinode)->ip_alloc_sem); | ||
1008 | status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1, | 1034 | status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1, |
1009 | &p_blkno, NULL, NULL); | 1035 | &p_blkno, NULL, NULL); |
1010 | up_read(&OCFS2_I(lqinode)->ip_alloc_sem); | ||
1011 | if (status < 0) { | 1036 | if (status < 0) { |
1012 | mlog_errno(status); | 1037 | mlog_errno(status); |
1013 | goto out_trans; | 1038 | goto out_trans; |
@@ -1104,10 +1129,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( | |||
1104 | } | 1129 | } |
1105 | 1130 | ||
1106 | /* Get buffer from the just added block */ | 1131 | /* Get buffer from the just added block */ |
1107 | down_read(&OCFS2_I(lqinode)->ip_alloc_sem); | ||
1108 | status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, | 1132 | status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, |
1109 | &p_blkno, NULL, NULL); | 1133 | &p_blkno, NULL, NULL); |
1110 | up_read(&OCFS2_I(lqinode)->ip_alloc_sem); | ||
1111 | if (status < 0) { | 1134 | if (status < 0) { |
1112 | mlog_errno(status); | 1135 | mlog_errno(status); |
1113 | goto out; | 1136 | goto out; |
@@ -1188,7 +1211,7 @@ static void olq_alloc_dquot(struct buffer_head *bh, void *private) | |||
1188 | } | 1211 | } |
1189 | 1212 | ||
1190 | /* Create dquot in the local file for given id */ | 1213 | /* Create dquot in the local file for given id */ |
1191 | static int ocfs2_create_local_dquot(struct dquot *dquot) | 1214 | int ocfs2_create_local_dquot(struct dquot *dquot) |
1192 | { | 1215 | { |
1193 | struct super_block *sb = dquot->dq_sb; | 1216 | struct super_block *sb = dquot->dq_sb; |
1194 | int type = dquot->dq_type; | 1217 | int type = dquot->dq_type; |
@@ -1197,17 +1220,27 @@ static int ocfs2_create_local_dquot(struct dquot *dquot) | |||
1197 | struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); | 1220 | struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); |
1198 | int offset; | 1221 | int offset; |
1199 | int status; | 1222 | int status; |
1223 | u64 pcount; | ||
1200 | 1224 | ||
1225 | down_write(&OCFS2_I(lqinode)->ip_alloc_sem); | ||
1201 | chunk = ocfs2_find_free_entry(sb, type, &offset); | 1226 | chunk = ocfs2_find_free_entry(sb, type, &offset); |
1202 | if (!chunk) { | 1227 | if (!chunk) { |
1203 | chunk = ocfs2_extend_local_quota_file(sb, type, &offset); | 1228 | chunk = ocfs2_extend_local_quota_file(sb, type, &offset); |
1204 | if (IS_ERR(chunk)) | 1229 | if (IS_ERR(chunk)) { |
1205 | return PTR_ERR(chunk); | 1230 | status = PTR_ERR(chunk); |
1231 | goto out; | ||
1232 | } | ||
1206 | } else if (IS_ERR(chunk)) { | 1233 | } else if (IS_ERR(chunk)) { |
1207 | return PTR_ERR(chunk); | 1234 | status = PTR_ERR(chunk); |
1235 | goto out; | ||
1208 | } | 1236 | } |
1209 | od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset); | 1237 | od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset); |
1210 | od->dq_chunk = chunk; | 1238 | od->dq_chunk = chunk; |
1239 | status = ocfs2_extent_map_get_blocks(lqinode, | ||
1240 | ol_dqblk_block(sb, chunk->qc_num, offset), | ||
1241 | &od->dq_local_phys_blk, | ||
1242 | &pcount, | ||
1243 | NULL); | ||
1211 | 1244 | ||
1212 | /* Initialize dquot structure on disk */ | 1245 | /* Initialize dquot structure on disk */ |
1213 | status = ocfs2_local_write_dquot(dquot); | 1246 | status = ocfs2_local_write_dquot(dquot); |
@@ -1224,39 +1257,15 @@ static int ocfs2_create_local_dquot(struct dquot *dquot) | |||
1224 | goto out; | 1257 | goto out; |
1225 | } | 1258 | } |
1226 | out: | 1259 | out: |
1260 | up_write(&OCFS2_I(lqinode)->ip_alloc_sem); | ||
1227 | return status; | 1261 | return status; |
1228 | } | 1262 | } |
1229 | 1263 | ||
1230 | /* Create entry in local file for dquot, load data from the global file */ | 1264 | /* |
1231 | static int ocfs2_local_read_dquot(struct dquot *dquot) | 1265 | * Release dquot structure from local quota file. ocfs2_release_dquot() has |
1232 | { | 1266 | * already started a transaction and written all changes to global quota file |
1233 | int status; | 1267 | */ |
1234 | 1268 | int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) | |
1235 | mlog_entry("id=%u, type=%d\n", dquot->dq_id, dquot->dq_type); | ||
1236 | |||
1237 | status = ocfs2_global_read_dquot(dquot); | ||
1238 | if (status < 0) { | ||
1239 | mlog_errno(status); | ||
1240 | goto out_err; | ||
1241 | } | ||
1242 | |||
1243 | /* Now create entry in the local quota file */ | ||
1244 | status = ocfs2_create_local_dquot(dquot); | ||
1245 | if (status < 0) { | ||
1246 | mlog_errno(status); | ||
1247 | goto out_err; | ||
1248 | } | ||
1249 | mlog_exit(0); | ||
1250 | return 0; | ||
1251 | out_err: | ||
1252 | mlog_exit(status); | ||
1253 | return status; | ||
1254 | } | ||
1255 | |||
1256 | /* Release dquot structure from local quota file. ocfs2_release_dquot() has | ||
1257 | * already started a transaction and obtained exclusive lock for global | ||
1258 | * quota file. */ | ||
1259 | static int ocfs2_local_release_dquot(struct dquot *dquot) | ||
1260 | { | 1269 | { |
1261 | int status; | 1270 | int status; |
1262 | int type = dquot->dq_type; | 1271 | int type = dquot->dq_type; |
@@ -1264,15 +1273,6 @@ static int ocfs2_local_release_dquot(struct dquot *dquot) | |||
1264 | struct super_block *sb = dquot->dq_sb; | 1273 | struct super_block *sb = dquot->dq_sb; |
1265 | struct ocfs2_local_disk_chunk *dchunk; | 1274 | struct ocfs2_local_disk_chunk *dchunk; |
1266 | int offset; | 1275 | int offset; |
1267 | handle_t *handle = journal_current_handle(); | ||
1268 | |||
1269 | BUG_ON(!handle); | ||
1270 | /* First write all local changes to global file */ | ||
1271 | status = ocfs2_global_release_dquot(dquot); | ||
1272 | if (status < 0) { | ||
1273 | mlog_errno(status); | ||
1274 | goto out; | ||
1275 | } | ||
1276 | 1276 | ||
1277 | status = ocfs2_journal_access_dq(handle, | 1277 | status = ocfs2_journal_access_dq(handle, |
1278 | INODE_CACHE(sb_dqopt(sb)->files[type]), | 1278 | INODE_CACHE(sb_dqopt(sb)->files[type]), |
@@ -1305,9 +1305,6 @@ static const struct quota_format_ops ocfs2_format_ops = { | |||
1305 | .read_file_info = ocfs2_local_read_info, | 1305 | .read_file_info = ocfs2_local_read_info, |
1306 | .write_file_info = ocfs2_global_write_info, | 1306 | .write_file_info = ocfs2_global_write_info, |
1307 | .free_file_info = ocfs2_local_free_info, | 1307 | .free_file_info = ocfs2_local_free_info, |
1308 | .read_dqblk = ocfs2_local_read_dquot, | ||
1309 | .commit_dqblk = ocfs2_local_write_dquot, | ||
1310 | .release_dqblk = ocfs2_local_release_dquot, | ||
1311 | }; | 1308 | }; |
1312 | 1309 | ||
1313 | struct quota_format_type ocfs2_quota_format = { | 1310 | struct quota_format_type ocfs2_quota_format = { |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 1c2c39f6f0b6..2c26ce251cb3 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -938,12 +938,16 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) | |||
938 | int type; | 938 | int type; |
939 | struct inode *inode; | 939 | struct inode *inode; |
940 | struct super_block *sb = osb->sb; | 940 | struct super_block *sb = osb->sb; |
941 | struct ocfs2_mem_dqinfo *oinfo; | ||
941 | 942 | ||
942 | /* We mostly ignore errors in this function because there's not much | 943 | /* We mostly ignore errors in this function because there's not much |
943 | * we can do when we see them */ | 944 | * we can do when we see them */ |
944 | for (type = 0; type < MAXQUOTAS; type++) { | 945 | for (type = 0; type < MAXQUOTAS; type++) { |
945 | if (!sb_has_quota_loaded(sb, type)) | 946 | if (!sb_has_quota_loaded(sb, type)) |
946 | continue; | 947 | continue; |
948 | /* Cancel periodic syncing before we grab dqonoff_mutex */ | ||
949 | oinfo = sb_dqinfo(sb, type)->dqi_priv; | ||
950 | cancel_delayed_work_sync(&oinfo->dqi_sync_work); | ||
947 | inode = igrab(sb->s_dquot.files[type]); | 951 | inode = igrab(sb->s_dquot.files[type]); |
948 | /* Turn off quotas. This will remove all dquot structures from | 952 | /* Turn off quotas. This will remove all dquot structures from |
949 | * memory and so they will be automatically synced to global | 953 | * memory and so they will be automatically synced to global |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 98ee6c44102d..e97b34842cfe 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -97,7 +97,7 @@ static struct ocfs2_xattr_def_value_root def_xv = { | |||
97 | .xv.xr_list.l_count = cpu_to_le16(1), | 97 | .xv.xr_list.l_count = cpu_to_le16(1), |
98 | }; | 98 | }; |
99 | 99 | ||
100 | struct xattr_handler *ocfs2_xattr_handlers[] = { | 100 | const struct xattr_handler *ocfs2_xattr_handlers[] = { |
101 | &ocfs2_xattr_user_handler, | 101 | &ocfs2_xattr_user_handler, |
102 | &ocfs2_xattr_acl_access_handler, | 102 | &ocfs2_xattr_acl_access_handler, |
103 | &ocfs2_xattr_acl_default_handler, | 103 | &ocfs2_xattr_acl_default_handler, |
@@ -106,7 +106,7 @@ struct xattr_handler *ocfs2_xattr_handlers[] = { | |||
106 | NULL | 106 | NULL |
107 | }; | 107 | }; |
108 | 108 | ||
109 | static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { | 109 | static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { |
110 | [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, | 110 | [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, |
111 | [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] | 111 | [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] |
112 | = &ocfs2_xattr_acl_access_handler, | 112 | = &ocfs2_xattr_acl_access_handler, |
@@ -540,7 +540,7 @@ static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, | |||
540 | 540 | ||
541 | static inline const char *ocfs2_xattr_prefix(int name_index) | 541 | static inline const char *ocfs2_xattr_prefix(int name_index) |
542 | { | 542 | { |
543 | struct xattr_handler *handler = NULL; | 543 | const struct xattr_handler *handler = NULL; |
544 | 544 | ||
545 | if (name_index > 0 && name_index < OCFS2_XATTR_MAX) | 545 | if (name_index > 0 && name_index < OCFS2_XATTR_MAX) |
546 | handler = ocfs2_xattr_handler_map[name_index]; | 546 | handler = ocfs2_xattr_handler_map[name_index]; |
@@ -7213,7 +7213,7 @@ int ocfs2_init_security_set(handle_t *handle, | |||
7213 | xattr_ac, data_ac); | 7213 | xattr_ac, data_ac); |
7214 | } | 7214 | } |
7215 | 7215 | ||
7216 | struct xattr_handler ocfs2_xattr_security_handler = { | 7216 | const struct xattr_handler ocfs2_xattr_security_handler = { |
7217 | .prefix = XATTR_SECURITY_PREFIX, | 7217 | .prefix = XATTR_SECURITY_PREFIX, |
7218 | .list = ocfs2_xattr_security_list, | 7218 | .list = ocfs2_xattr_security_list, |
7219 | .get = ocfs2_xattr_security_get, | 7219 | .get = ocfs2_xattr_security_get, |
@@ -7257,7 +7257,7 @@ static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name, | |||
7257 | name, value, size, flags); | 7257 | name, value, size, flags); |
7258 | } | 7258 | } |
7259 | 7259 | ||
7260 | struct xattr_handler ocfs2_xattr_trusted_handler = { | 7260 | const struct xattr_handler ocfs2_xattr_trusted_handler = { |
7261 | .prefix = XATTR_TRUSTED_PREFIX, | 7261 | .prefix = XATTR_TRUSTED_PREFIX, |
7262 | .list = ocfs2_xattr_trusted_list, | 7262 | .list = ocfs2_xattr_trusted_list, |
7263 | .get = ocfs2_xattr_trusted_get, | 7263 | .get = ocfs2_xattr_trusted_get, |
@@ -7313,7 +7313,7 @@ static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name, | |||
7313 | name, value, size, flags); | 7313 | name, value, size, flags); |
7314 | } | 7314 | } |
7315 | 7315 | ||
7316 | struct xattr_handler ocfs2_xattr_user_handler = { | 7316 | const struct xattr_handler ocfs2_xattr_user_handler = { |
7317 | .prefix = XATTR_USER_PREFIX, | 7317 | .prefix = XATTR_USER_PREFIX, |
7318 | .list = ocfs2_xattr_user_list, | 7318 | .list = ocfs2_xattr_user_list, |
7319 | .get = ocfs2_xattr_user_get, | 7319 | .get = ocfs2_xattr_user_get, |
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index abd72a47f520..aa64bb37a65b 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h | |||
@@ -37,12 +37,12 @@ struct ocfs2_security_xattr_info { | |||
37 | size_t value_len; | 37 | size_t value_len; |
38 | }; | 38 | }; |
39 | 39 | ||
40 | extern struct xattr_handler ocfs2_xattr_user_handler; | 40 | extern const struct xattr_handler ocfs2_xattr_user_handler; |
41 | extern struct xattr_handler ocfs2_xattr_trusted_handler; | 41 | extern const struct xattr_handler ocfs2_xattr_trusted_handler; |
42 | extern struct xattr_handler ocfs2_xattr_security_handler; | 42 | extern const struct xattr_handler ocfs2_xattr_security_handler; |
43 | extern struct xattr_handler ocfs2_xattr_acl_access_handler; | 43 | extern const struct xattr_handler ocfs2_xattr_acl_access_handler; |
44 | extern struct xattr_handler ocfs2_xattr_acl_default_handler; | 44 | extern const struct xattr_handler ocfs2_xattr_acl_default_handler; |
45 | extern struct xattr_handler *ocfs2_xattr_handlers[]; | 45 | extern const struct xattr_handler *ocfs2_xattr_handlers[]; |
46 | 46 | ||
47 | ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); | 47 | ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); |
48 | int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int, | 48 | int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int, |
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index b44bb835e8ea..089839a6cc64 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c | |||
@@ -37,9 +37,7 @@ struct inode *omfs_new_inode(struct inode *dir, int mode) | |||
37 | goto fail; | 37 | goto fail; |
38 | 38 | ||
39 | inode->i_ino = new_block; | 39 | inode->i_ino = new_block; |
40 | inode->i_mode = mode; | 40 | inode_init_owner(inode, NULL, mode); |
41 | inode->i_uid = current_fsuid(); | ||
42 | inode->i_gid = current_fsgid(); | ||
43 | inode->i_mapping->a_ops = &omfs_aops; | 41 | inode->i_mapping->a_ops = &omfs_aops; |
44 | 42 | ||
45 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 43 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/securebits.h> | 17 | #include <linux/securebits.h> |
18 | #include <linux/security.h> | 18 | #include <linux/security.h> |
19 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
20 | #include <linux/vfs.h> | ||
21 | #include <linux/fcntl.h> | 20 | #include <linux/fcntl.h> |
22 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
23 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
@@ -33,171 +32,6 @@ | |||
33 | 32 | ||
34 | #include "internal.h" | 33 | #include "internal.h" |
35 | 34 | ||
36 | int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
37 | { | ||
38 | int retval = -ENODEV; | ||
39 | |||
40 | if (dentry) { | ||
41 | retval = -ENOSYS; | ||
42 | if (dentry->d_sb->s_op->statfs) { | ||
43 | memset(buf, 0, sizeof(*buf)); | ||
44 | retval = security_sb_statfs(dentry); | ||
45 | if (retval) | ||
46 | return retval; | ||
47 | retval = dentry->d_sb->s_op->statfs(dentry, buf); | ||
48 | if (retval == 0 && buf->f_frsize == 0) | ||
49 | buf->f_frsize = buf->f_bsize; | ||
50 | } | ||
51 | } | ||
52 | return retval; | ||
53 | } | ||
54 | |||
55 | EXPORT_SYMBOL(vfs_statfs); | ||
56 | |||
57 | static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) | ||
58 | { | ||
59 | struct kstatfs st; | ||
60 | int retval; | ||
61 | |||
62 | retval = vfs_statfs(dentry, &st); | ||
63 | if (retval) | ||
64 | return retval; | ||
65 | |||
66 | if (sizeof(*buf) == sizeof(st)) | ||
67 | memcpy(buf, &st, sizeof(st)); | ||
68 | else { | ||
69 | if (sizeof buf->f_blocks == 4) { | ||
70 | if ((st.f_blocks | st.f_bfree | st.f_bavail | | ||
71 | st.f_bsize | st.f_frsize) & | ||
72 | 0xffffffff00000000ULL) | ||
73 | return -EOVERFLOW; | ||
74 | /* | ||
75 | * f_files and f_ffree may be -1; it's okay to stuff | ||
76 | * that into 32 bits | ||
77 | */ | ||
78 | if (st.f_files != -1 && | ||
79 | (st.f_files & 0xffffffff00000000ULL)) | ||
80 | return -EOVERFLOW; | ||
81 | if (st.f_ffree != -1 && | ||
82 | (st.f_ffree & 0xffffffff00000000ULL)) | ||
83 | return -EOVERFLOW; | ||
84 | } | ||
85 | |||
86 | buf->f_type = st.f_type; | ||
87 | buf->f_bsize = st.f_bsize; | ||
88 | buf->f_blocks = st.f_blocks; | ||
89 | buf->f_bfree = st.f_bfree; | ||
90 | buf->f_bavail = st.f_bavail; | ||
91 | buf->f_files = st.f_files; | ||
92 | buf->f_ffree = st.f_ffree; | ||
93 | buf->f_fsid = st.f_fsid; | ||
94 | buf->f_namelen = st.f_namelen; | ||
95 | buf->f_frsize = st.f_frsize; | ||
96 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); | ||
97 | } | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) | ||
102 | { | ||
103 | struct kstatfs st; | ||
104 | int retval; | ||
105 | |||
106 | retval = vfs_statfs(dentry, &st); | ||
107 | if (retval) | ||
108 | return retval; | ||
109 | |||
110 | if (sizeof(*buf) == sizeof(st)) | ||
111 | memcpy(buf, &st, sizeof(st)); | ||
112 | else { | ||
113 | buf->f_type = st.f_type; | ||
114 | buf->f_bsize = st.f_bsize; | ||
115 | buf->f_blocks = st.f_blocks; | ||
116 | buf->f_bfree = st.f_bfree; | ||
117 | buf->f_bavail = st.f_bavail; | ||
118 | buf->f_files = st.f_files; | ||
119 | buf->f_ffree = st.f_ffree; | ||
120 | buf->f_fsid = st.f_fsid; | ||
121 | buf->f_namelen = st.f_namelen; | ||
122 | buf->f_frsize = st.f_frsize; | ||
123 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); | ||
124 | } | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) | ||
129 | { | ||
130 | struct path path; | ||
131 | int error; | ||
132 | |||
133 | error = user_path(pathname, &path); | ||
134 | if (!error) { | ||
135 | struct statfs tmp; | ||
136 | error = vfs_statfs_native(path.dentry, &tmp); | ||
137 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
138 | error = -EFAULT; | ||
139 | path_put(&path); | ||
140 | } | ||
141 | return error; | ||
142 | } | ||
143 | |||
144 | SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) | ||
145 | { | ||
146 | struct path path; | ||
147 | long error; | ||
148 | |||
149 | if (sz != sizeof(*buf)) | ||
150 | return -EINVAL; | ||
151 | error = user_path(pathname, &path); | ||
152 | if (!error) { | ||
153 | struct statfs64 tmp; | ||
154 | error = vfs_statfs64(path.dentry, &tmp); | ||
155 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
156 | error = -EFAULT; | ||
157 | path_put(&path); | ||
158 | } | ||
159 | return error; | ||
160 | } | ||
161 | |||
162 | SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) | ||
163 | { | ||
164 | struct file * file; | ||
165 | struct statfs tmp; | ||
166 | int error; | ||
167 | |||
168 | error = -EBADF; | ||
169 | file = fget(fd); | ||
170 | if (!file) | ||
171 | goto out; | ||
172 | error = vfs_statfs_native(file->f_path.dentry, &tmp); | ||
173 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
174 | error = -EFAULT; | ||
175 | fput(file); | ||
176 | out: | ||
177 | return error; | ||
178 | } | ||
179 | |||
180 | SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) | ||
181 | { | ||
182 | struct file * file; | ||
183 | struct statfs64 tmp; | ||
184 | int error; | ||
185 | |||
186 | if (sz != sizeof(*buf)) | ||
187 | return -EINVAL; | ||
188 | |||
189 | error = -EBADF; | ||
190 | file = fget(fd); | ||
191 | if (!file) | ||
192 | goto out; | ||
193 | error = vfs_statfs64(file->f_path.dentry, &tmp); | ||
194 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
195 | error = -EFAULT; | ||
196 | fput(file); | ||
197 | out: | ||
198 | return error; | ||
199 | } | ||
200 | |||
201 | int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, | 35 | int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, |
202 | struct file *filp) | 36 | struct file *filp) |
203 | { | 37 | { |
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c index a97b477ac0fc..6921e7890be6 100644 --- a/fs/partitions/acorn.c +++ b/fs/partitions/acorn.c | |||
@@ -70,14 +70,14 @@ struct riscix_record { | |||
70 | 70 | ||
71 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ | 71 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ |
72 | defined(CONFIG_ACORN_PARTITION_ADFS) | 72 | defined(CONFIG_ACORN_PARTITION_ADFS) |
73 | static int | 73 | static int riscix_partition(struct parsed_partitions *state, |
74 | riscix_partition(struct parsed_partitions *state, struct block_device *bdev, | 74 | unsigned long first_sect, int slot, |
75 | unsigned long first_sect, int slot, unsigned long nr_sects) | 75 | unsigned long nr_sects) |
76 | { | 76 | { |
77 | Sector sect; | 77 | Sector sect; |
78 | struct riscix_record *rr; | 78 | struct riscix_record *rr; |
79 | 79 | ||
80 | rr = (struct riscix_record *)read_dev_sector(bdev, first_sect, §); | 80 | rr = read_part_sector(state, first_sect, §); |
81 | if (!rr) | 81 | if (!rr) |
82 | return -1; | 82 | return -1; |
83 | 83 | ||
@@ -123,9 +123,9 @@ struct linux_part { | |||
123 | 123 | ||
124 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ | 124 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ |
125 | defined(CONFIG_ACORN_PARTITION_ADFS) | 125 | defined(CONFIG_ACORN_PARTITION_ADFS) |
126 | static int | 126 | static int linux_partition(struct parsed_partitions *state, |
127 | linux_partition(struct parsed_partitions *state, struct block_device *bdev, | 127 | unsigned long first_sect, int slot, |
128 | unsigned long first_sect, int slot, unsigned long nr_sects) | 128 | unsigned long nr_sects) |
129 | { | 129 | { |
130 | Sector sect; | 130 | Sector sect; |
131 | struct linux_part *linuxp; | 131 | struct linux_part *linuxp; |
@@ -135,7 +135,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev, | |||
135 | 135 | ||
136 | put_partition(state, slot++, first_sect, size); | 136 | put_partition(state, slot++, first_sect, size); |
137 | 137 | ||
138 | linuxp = (struct linux_part *)read_dev_sector(bdev, first_sect, §); | 138 | linuxp = read_part_sector(state, first_sect, §); |
139 | if (!linuxp) | 139 | if (!linuxp) |
140 | return -1; | 140 | return -1; |
141 | 141 | ||
@@ -157,8 +157,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev, | |||
157 | #endif | 157 | #endif |
158 | 158 | ||
159 | #ifdef CONFIG_ACORN_PARTITION_CUMANA | 159 | #ifdef CONFIG_ACORN_PARTITION_CUMANA |
160 | int | 160 | int adfspart_check_CUMANA(struct parsed_partitions *state) |
161 | adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev) | ||
162 | { | 161 | { |
163 | unsigned long first_sector = 0; | 162 | unsigned long first_sector = 0; |
164 | unsigned int start_blk = 0; | 163 | unsigned int start_blk = 0; |
@@ -185,7 +184,7 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev | |||
185 | struct adfs_discrecord *dr; | 184 | struct adfs_discrecord *dr; |
186 | unsigned int nr_sects; | 185 | unsigned int nr_sects; |
187 | 186 | ||
188 | data = read_dev_sector(bdev, start_blk * 2 + 6, §); | 187 | data = read_part_sector(state, start_blk * 2 + 6, §); |
189 | if (!data) | 188 | if (!data) |
190 | return -1; | 189 | return -1; |
191 | 190 | ||
@@ -217,14 +216,14 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev | |||
217 | #ifdef CONFIG_ACORN_PARTITION_RISCIX | 216 | #ifdef CONFIG_ACORN_PARTITION_RISCIX |
218 | case PARTITION_RISCIX_SCSI: | 217 | case PARTITION_RISCIX_SCSI: |
219 | /* RISCiX - we don't know how to find the next one. */ | 218 | /* RISCiX - we don't know how to find the next one. */ |
220 | slot = riscix_partition(state, bdev, first_sector, | 219 | slot = riscix_partition(state, first_sector, slot, |
221 | slot, nr_sects); | 220 | nr_sects); |
222 | break; | 221 | break; |
223 | #endif | 222 | #endif |
224 | 223 | ||
225 | case PARTITION_LINUX: | 224 | case PARTITION_LINUX: |
226 | slot = linux_partition(state, bdev, first_sector, | 225 | slot = linux_partition(state, first_sector, slot, |
227 | slot, nr_sects); | 226 | nr_sects); |
228 | break; | 227 | break; |
229 | } | 228 | } |
230 | put_dev_sector(sect); | 229 | put_dev_sector(sect); |
@@ -249,8 +248,7 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev | |||
249 | * hda1 = ADFS partition on first drive. | 248 | * hda1 = ADFS partition on first drive. |
250 | * hda2 = non-ADFS partition. | 249 | * hda2 = non-ADFS partition. |
251 | */ | 250 | */ |
252 | int | 251 | int adfspart_check_ADFS(struct parsed_partitions *state) |
253 | adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev) | ||
254 | { | 252 | { |
255 | unsigned long start_sect, nr_sects, sectscyl, heads; | 253 | unsigned long start_sect, nr_sects, sectscyl, heads; |
256 | Sector sect; | 254 | Sector sect; |
@@ -259,7 +257,7 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev) | |||
259 | unsigned char id; | 257 | unsigned char id; |
260 | int slot = 1; | 258 | int slot = 1; |
261 | 259 | ||
262 | data = read_dev_sector(bdev, 6, §); | 260 | data = read_part_sector(state, 6, §); |
263 | if (!data) | 261 | if (!data) |
264 | return -1; | 262 | return -1; |
265 | 263 | ||
@@ -278,21 +276,21 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev) | |||
278 | /* | 276 | /* |
279 | * Work out start of non-adfs partition. | 277 | * Work out start of non-adfs partition. |
280 | */ | 278 | */ |
281 | nr_sects = (bdev->bd_inode->i_size >> 9) - start_sect; | 279 | nr_sects = (state->bdev->bd_inode->i_size >> 9) - start_sect; |
282 | 280 | ||
283 | if (start_sect) { | 281 | if (start_sect) { |
284 | switch (id) { | 282 | switch (id) { |
285 | #ifdef CONFIG_ACORN_PARTITION_RISCIX | 283 | #ifdef CONFIG_ACORN_PARTITION_RISCIX |
286 | case PARTITION_RISCIX_SCSI: | 284 | case PARTITION_RISCIX_SCSI: |
287 | case PARTITION_RISCIX_MFM: | 285 | case PARTITION_RISCIX_MFM: |
288 | slot = riscix_partition(state, bdev, start_sect, | 286 | slot = riscix_partition(state, start_sect, slot, |
289 | slot, nr_sects); | 287 | nr_sects); |
290 | break; | 288 | break; |
291 | #endif | 289 | #endif |
292 | 290 | ||
293 | case PARTITION_LINUX: | 291 | case PARTITION_LINUX: |
294 | slot = linux_partition(state, bdev, start_sect, | 292 | slot = linux_partition(state, start_sect, slot, |
295 | slot, nr_sects); | 293 | nr_sects); |
296 | break; | 294 | break; |
297 | } | 295 | } |
298 | } | 296 | } |
@@ -308,10 +306,11 @@ struct ics_part { | |||
308 | __le32 size; | 306 | __le32 size; |
309 | }; | 307 | }; |
310 | 308 | ||
311 | static int adfspart_check_ICSLinux(struct block_device *bdev, unsigned long block) | 309 | static int adfspart_check_ICSLinux(struct parsed_partitions *state, |
310 | unsigned long block) | ||
312 | { | 311 | { |
313 | Sector sect; | 312 | Sector sect; |
314 | unsigned char *data = read_dev_sector(bdev, block, §); | 313 | unsigned char *data = read_part_sector(state, block, §); |
315 | int result = 0; | 314 | int result = 0; |
316 | 315 | ||
317 | if (data) { | 316 | if (data) { |
@@ -349,8 +348,7 @@ static inline int valid_ics_sector(const unsigned char *data) | |||
349 | * hda2 = ADFS partition 1 on first drive. | 348 | * hda2 = ADFS partition 1 on first drive. |
350 | * ..etc.. | 349 | * ..etc.. |
351 | */ | 350 | */ |
352 | int | 351 | int adfspart_check_ICS(struct parsed_partitions *state) |
353 | adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev) | ||
354 | { | 352 | { |
355 | const unsigned char *data; | 353 | const unsigned char *data; |
356 | const struct ics_part *p; | 354 | const struct ics_part *p; |
@@ -360,7 +358,7 @@ adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev) | |||
360 | /* | 358 | /* |
361 | * Try ICS style partitions - sector 0 contains partition info. | 359 | * Try ICS style partitions - sector 0 contains partition info. |
362 | */ | 360 | */ |
363 | data = read_dev_sector(bdev, 0, §); | 361 | data = read_part_sector(state, 0, §); |
364 | if (!data) | 362 | if (!data) |
365 | return -1; | 363 | return -1; |
366 | 364 | ||
@@ -392,7 +390,7 @@ adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev) | |||
392 | * partition is. We must not make this visible | 390 | * partition is. We must not make this visible |
393 | * to the filesystem. | 391 | * to the filesystem. |
394 | */ | 392 | */ |
395 | if (size > 1 && adfspart_check_ICSLinux(bdev, start)) { | 393 | if (size > 1 && adfspart_check_ICSLinux(state, start)) { |
396 | start += 1; | 394 | start += 1; |
397 | size -= 1; | 395 | size -= 1; |
398 | } | 396 | } |
@@ -446,8 +444,7 @@ static inline int valid_ptec_sector(const unsigned char *data) | |||
446 | * hda2 = ADFS partition 1 on first drive. | 444 | * hda2 = ADFS partition 1 on first drive. |
447 | * ..etc.. | 445 | * ..etc.. |
448 | */ | 446 | */ |
449 | int | 447 | int adfspart_check_POWERTEC(struct parsed_partitions *state) |
450 | adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bdev) | ||
451 | { | 448 | { |
452 | Sector sect; | 449 | Sector sect; |
453 | const unsigned char *data; | 450 | const unsigned char *data; |
@@ -455,7 +452,7 @@ adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bd | |||
455 | int slot = 1; | 452 | int slot = 1; |
456 | int i; | 453 | int i; |
457 | 454 | ||
458 | data = read_dev_sector(bdev, 0, §); | 455 | data = read_part_sector(state, 0, §); |
459 | if (!data) | 456 | if (!data) |
460 | return -1; | 457 | return -1; |
461 | 458 | ||
@@ -508,8 +505,7 @@ static const char eesox_name[] = { | |||
508 | * 1. The individual ADFS boot block entries that are placed on the disk. | 505 | * 1. The individual ADFS boot block entries that are placed on the disk. |
509 | * 2. The start address of the next entry. | 506 | * 2. The start address of the next entry. |
510 | */ | 507 | */ |
511 | int | 508 | int adfspart_check_EESOX(struct parsed_partitions *state) |
512 | adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev) | ||
513 | { | 509 | { |
514 | Sector sect; | 510 | Sector sect; |
515 | const unsigned char *data; | 511 | const unsigned char *data; |
@@ -518,7 +514,7 @@ adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev) | |||
518 | sector_t start = 0; | 514 | sector_t start = 0; |
519 | int i, slot = 1; | 515 | int i, slot = 1; |
520 | 516 | ||
521 | data = read_dev_sector(bdev, 7, §); | 517 | data = read_part_sector(state, 7, §); |
522 | if (!data) | 518 | if (!data) |
523 | return -1; | 519 | return -1; |
524 | 520 | ||
@@ -545,7 +541,7 @@ adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev) | |||
545 | if (i != 0) { | 541 | if (i != 0) { |
546 | sector_t size; | 542 | sector_t size; |
547 | 543 | ||
548 | size = get_capacity(bdev->bd_disk); | 544 | size = get_capacity(state->bdev->bd_disk); |
549 | put_partition(state, slot++, start, size - start); | 545 | put_partition(state, slot++, start, size - start); |
550 | printk("\n"); | 546 | printk("\n"); |
551 | } | 547 | } |
diff --git a/fs/partitions/acorn.h b/fs/partitions/acorn.h index 81fd50ecc080..ede828529692 100644 --- a/fs/partitions/acorn.h +++ b/fs/partitions/acorn.h | |||
@@ -7,8 +7,8 @@ | |||
7 | * format, and everyone stick to it? | 7 | * format, and everyone stick to it? |
8 | */ | 8 | */ |
9 | 9 | ||
10 | int adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev); | 10 | int adfspart_check_CUMANA(struct parsed_partitions *state); |
11 | int adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev); | 11 | int adfspart_check_ADFS(struct parsed_partitions *state); |
12 | int adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev); | 12 | int adfspart_check_ICS(struct parsed_partitions *state); |
13 | int adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bdev); | 13 | int adfspart_check_POWERTEC(struct parsed_partitions *state); |
14 | int adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev); | 14 | int adfspart_check_EESOX(struct parsed_partitions *state); |
diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c index 9917a8c360f2..ba443d4229f8 100644 --- a/fs/partitions/amiga.c +++ b/fs/partitions/amiga.c | |||
@@ -23,8 +23,7 @@ checksum_block(__be32 *m, int size) | |||
23 | return sum; | 23 | return sum; |
24 | } | 24 | } |
25 | 25 | ||
26 | int | 26 | int amiga_partition(struct parsed_partitions *state) |
27 | amiga_partition(struct parsed_partitions *state, struct block_device *bdev) | ||
28 | { | 27 | { |
29 | Sector sect; | 28 | Sector sect; |
30 | unsigned char *data; | 29 | unsigned char *data; |
@@ -38,11 +37,11 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
38 | for (blk = 0; ; blk++, put_dev_sector(sect)) { | 37 | for (blk = 0; ; blk++, put_dev_sector(sect)) { |
39 | if (blk == RDB_ALLOCATION_LIMIT) | 38 | if (blk == RDB_ALLOCATION_LIMIT) |
40 | goto rdb_done; | 39 | goto rdb_done; |
41 | data = read_dev_sector(bdev, blk, §); | 40 | data = read_part_sector(state, blk, §); |
42 | if (!data) { | 41 | if (!data) { |
43 | if (warn_no_part) | 42 | if (warn_no_part) |
44 | printk("Dev %s: unable to read RDB block %d\n", | 43 | printk("Dev %s: unable to read RDB block %d\n", |
45 | bdevname(bdev, b), blk); | 44 | bdevname(state->bdev, b), blk); |
46 | res = -1; | 45 | res = -1; |
47 | goto rdb_done; | 46 | goto rdb_done; |
48 | } | 47 | } |
@@ -64,7 +63,7 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
64 | } | 63 | } |
65 | 64 | ||
66 | printk("Dev %s: RDB in block %d has bad checksum\n", | 65 | printk("Dev %s: RDB in block %d has bad checksum\n", |
67 | bdevname(bdev, b), blk); | 66 | bdevname(state->bdev, b), blk); |
68 | } | 67 | } |
69 | 68 | ||
70 | /* blksize is blocks per 512 byte standard block */ | 69 | /* blksize is blocks per 512 byte standard block */ |
@@ -75,11 +74,11 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
75 | put_dev_sector(sect); | 74 | put_dev_sector(sect); |
76 | for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { | 75 | for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { |
77 | blk *= blksize; /* Read in terms partition table understands */ | 76 | blk *= blksize; /* Read in terms partition table understands */ |
78 | data = read_dev_sector(bdev, blk, §); | 77 | data = read_part_sector(state, blk, §); |
79 | if (!data) { | 78 | if (!data) { |
80 | if (warn_no_part) | 79 | if (warn_no_part) |
81 | printk("Dev %s: unable to read partition block %d\n", | 80 | printk("Dev %s: unable to read partition block %d\n", |
82 | bdevname(bdev, b), blk); | 81 | bdevname(state->bdev, b), blk); |
83 | res = -1; | 82 | res = -1; |
84 | goto rdb_done; | 83 | goto rdb_done; |
85 | } | 84 | } |
diff --git a/fs/partitions/amiga.h b/fs/partitions/amiga.h index 2f3e9ce22d53..d094585cadaa 100644 --- a/fs/partitions/amiga.h +++ b/fs/partitions/amiga.h | |||
@@ -2,5 +2,5 @@ | |||
2 | * fs/partitions/amiga.h | 2 | * fs/partitions/amiga.h |
3 | */ | 3 | */ |
4 | 4 | ||
5 | int amiga_partition(struct parsed_partitions *state, struct block_device *bdev); | 5 | int amiga_partition(struct parsed_partitions *state); |
6 | 6 | ||
diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c index 1f3572d5b755..4439ff1b6cec 100644 --- a/fs/partitions/atari.c +++ b/fs/partitions/atari.c | |||
@@ -30,7 +30,7 @@ static inline int OK_id(char *s) | |||
30 | memcmp (s, "RAW", 3) == 0 ; | 30 | memcmp (s, "RAW", 3) == 0 ; |
31 | } | 31 | } |
32 | 32 | ||
33 | int atari_partition(struct parsed_partitions *state, struct block_device *bdev) | 33 | int atari_partition(struct parsed_partitions *state) |
34 | { | 34 | { |
35 | Sector sect; | 35 | Sector sect; |
36 | struct rootsector *rs; | 36 | struct rootsector *rs; |
@@ -42,12 +42,12 @@ int atari_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
42 | int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */ | 42 | int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */ |
43 | #endif | 43 | #endif |
44 | 44 | ||
45 | rs = (struct rootsector *) read_dev_sector(bdev, 0, §); | 45 | rs = read_part_sector(state, 0, §); |
46 | if (!rs) | 46 | if (!rs) |
47 | return -1; | 47 | return -1; |
48 | 48 | ||
49 | /* Verify this is an Atari rootsector: */ | 49 | /* Verify this is an Atari rootsector: */ |
50 | hd_size = bdev->bd_inode->i_size >> 9; | 50 | hd_size = state->bdev->bd_inode->i_size >> 9; |
51 | if (!VALID_PARTITION(&rs->part[0], hd_size) && | 51 | if (!VALID_PARTITION(&rs->part[0], hd_size) && |
52 | !VALID_PARTITION(&rs->part[1], hd_size) && | 52 | !VALID_PARTITION(&rs->part[1], hd_size) && |
53 | !VALID_PARTITION(&rs->part[2], hd_size) && | 53 | !VALID_PARTITION(&rs->part[2], hd_size) && |
@@ -84,7 +84,7 @@ int atari_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
84 | printk(" XGM<"); | 84 | printk(" XGM<"); |
85 | partsect = extensect = be32_to_cpu(pi->st); | 85 | partsect = extensect = be32_to_cpu(pi->st); |
86 | while (1) { | 86 | while (1) { |
87 | xrs = (struct rootsector *)read_dev_sector(bdev, partsect, §2); | 87 | xrs = read_part_sector(state, partsect, §2); |
88 | if (!xrs) { | 88 | if (!xrs) { |
89 | printk (" block %ld read failed\n", partsect); | 89 | printk (" block %ld read failed\n", partsect); |
90 | put_dev_sector(sect); | 90 | put_dev_sector(sect); |
diff --git a/fs/partitions/atari.h b/fs/partitions/atari.h index 63186b00e135..fe2d32a89f36 100644 --- a/fs/partitions/atari.h +++ b/fs/partitions/atari.h | |||
@@ -31,4 +31,4 @@ struct rootsector | |||
31 | u16 checksum; /* checksum for bootable disks */ | 31 | u16 checksum; /* checksum for bootable disks */ |
32 | } __attribute__((__packed__)); | 32 | } __attribute__((__packed__)); |
33 | 33 | ||
34 | int atari_partition(struct parsed_partitions *state, struct block_device *bdev); | 34 | int atari_partition(struct parsed_partitions *state); |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index e238ab23a9e7..5dcd4b0c5533 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -45,7 +45,7 @@ extern void md_autodetect_dev(dev_t dev); | |||
45 | 45 | ||
46 | int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ | 46 | int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ |
47 | 47 | ||
48 | static int (*check_part[])(struct parsed_partitions *, struct block_device *) = { | 48 | static int (*check_part[])(struct parsed_partitions *) = { |
49 | /* | 49 | /* |
50 | * Probe partition formats with tables at disk address 0 | 50 | * Probe partition formats with tables at disk address 0 |
51 | * that also have an ADFS boot block at 0xdc0. | 51 | * that also have an ADFS boot block at 0xdc0. |
@@ -161,10 +161,11 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
161 | struct parsed_partitions *state; | 161 | struct parsed_partitions *state; |
162 | int i, res, err; | 162 | int i, res, err; |
163 | 163 | ||
164 | state = kmalloc(sizeof(struct parsed_partitions), GFP_KERNEL); | 164 | state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL); |
165 | if (!state) | 165 | if (!state) |
166 | return NULL; | 166 | return NULL; |
167 | 167 | ||
168 | state->bdev = bdev; | ||
168 | disk_name(hd, 0, state->name); | 169 | disk_name(hd, 0, state->name); |
169 | printk(KERN_INFO " %s:", state->name); | 170 | printk(KERN_INFO " %s:", state->name); |
170 | if (isdigit(state->name[strlen(state->name)-1])) | 171 | if (isdigit(state->name[strlen(state->name)-1])) |
@@ -174,7 +175,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
174 | i = res = err = 0; | 175 | i = res = err = 0; |
175 | while (!res && check_part[i]) { | 176 | while (!res && check_part[i]) { |
176 | memset(&state->parts, 0, sizeof(state->parts)); | 177 | memset(&state->parts, 0, sizeof(state->parts)); |
177 | res = check_part[i++](state, bdev); | 178 | res = check_part[i++](state); |
178 | if (res < 0) { | 179 | if (res < 0) { |
179 | /* We have hit an I/O error which we don't report now. | 180 | /* We have hit an I/O error which we don't report now. |
180 | * But record it, and let the others do their job. | 181 | * But record it, and let the others do their job. |
@@ -186,6 +187,8 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
186 | } | 187 | } |
187 | if (res > 0) | 188 | if (res > 0) |
188 | return state; | 189 | return state; |
190 | if (state->access_beyond_eod) | ||
191 | err = -ENOSPC; | ||
189 | if (err) | 192 | if (err) |
190 | /* The partition is unrecognized. So report I/O errors if there were any */ | 193 | /* The partition is unrecognized. So report I/O errors if there were any */ |
191 | res = err; | 194 | res = err; |
@@ -538,12 +541,33 @@ exit: | |||
538 | disk_part_iter_exit(&piter); | 541 | disk_part_iter_exit(&piter); |
539 | } | 542 | } |
540 | 543 | ||
544 | static bool disk_unlock_native_capacity(struct gendisk *disk) | ||
545 | { | ||
546 | const struct block_device_operations *bdops = disk->fops; | ||
547 | |||
548 | if (bdops->unlock_native_capacity && | ||
549 | !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) { | ||
550 | printk(KERN_CONT "enabling native capacity\n"); | ||
551 | bdops->unlock_native_capacity(disk); | ||
552 | disk->flags |= GENHD_FL_NATIVE_CAPACITY; | ||
553 | return true; | ||
554 | } else { | ||
555 | printk(KERN_CONT "truncated\n"); | ||
556 | return false; | ||
557 | } | ||
558 | } | ||
559 | |||
541 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | 560 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) |
542 | { | 561 | { |
562 | struct parsed_partitions *state = NULL; | ||
543 | struct disk_part_iter piter; | 563 | struct disk_part_iter piter; |
544 | struct hd_struct *part; | 564 | struct hd_struct *part; |
545 | struct parsed_partitions *state; | ||
546 | int p, highest, res; | 565 | int p, highest, res; |
566 | rescan: | ||
567 | if (state && !IS_ERR(state)) { | ||
568 | kfree(state); | ||
569 | state = NULL; | ||
570 | } | ||
547 | 571 | ||
548 | if (bdev->bd_part_count) | 572 | if (bdev->bd_part_count) |
549 | return -EBUSY; | 573 | return -EBUSY; |
@@ -562,8 +586,32 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | |||
562 | bdev->bd_invalidated = 0; | 586 | bdev->bd_invalidated = 0; |
563 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) | 587 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) |
564 | return 0; | 588 | return 0; |
565 | if (IS_ERR(state)) /* I/O error reading the partition table */ | 589 | if (IS_ERR(state)) { |
590 | /* | ||
591 | * I/O error reading the partition table. If any | ||
592 | * partition code tried to read beyond EOD, retry | ||
593 | * after unlocking native capacity. | ||
594 | */ | ||
595 | if (PTR_ERR(state) == -ENOSPC) { | ||
596 | printk(KERN_WARNING "%s: partition table beyond EOD, ", | ||
597 | disk->disk_name); | ||
598 | if (disk_unlock_native_capacity(disk)) | ||
599 | goto rescan; | ||
600 | } | ||
566 | return -EIO; | 601 | return -EIO; |
602 | } | ||
603 | /* | ||
604 | * If any partition code tried to read beyond EOD, try | ||
605 | * unlocking native capacity even if partition table is | ||
606 | * sucessfully read as we could be missing some partitions. | ||
607 | */ | ||
608 | if (state->access_beyond_eod) { | ||
609 | printk(KERN_WARNING | ||
610 | "%s: partition table partially beyond EOD, ", | ||
611 | disk->disk_name); | ||
612 | if (disk_unlock_native_capacity(disk)) | ||
613 | goto rescan; | ||
614 | } | ||
567 | 615 | ||
568 | /* tell userspace that the media / partition table may have changed */ | 616 | /* tell userspace that the media / partition table may have changed */ |
569 | kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); | 617 | kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); |
@@ -581,7 +629,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | |||
581 | /* add partitions */ | 629 | /* add partitions */ |
582 | for (p = 1; p < state->limit; p++) { | 630 | for (p = 1; p < state->limit; p++) { |
583 | sector_t size, from; | 631 | sector_t size, from; |
584 | try_scan: | 632 | |
585 | size = state->parts[p].size; | 633 | size = state->parts[p].size; |
586 | if (!size) | 634 | if (!size) |
587 | continue; | 635 | continue; |
@@ -589,30 +637,21 @@ try_scan: | |||
589 | from = state->parts[p].from; | 637 | from = state->parts[p].from; |
590 | if (from >= get_capacity(disk)) { | 638 | if (from >= get_capacity(disk)) { |
591 | printk(KERN_WARNING | 639 | printk(KERN_WARNING |
592 | "%s: p%d ignored, start %llu is behind the end of the disk\n", | 640 | "%s: p%d start %llu is beyond EOD, ", |
593 | disk->disk_name, p, (unsigned long long) from); | 641 | disk->disk_name, p, (unsigned long long) from); |
642 | if (disk_unlock_native_capacity(disk)) | ||
643 | goto rescan; | ||
594 | continue; | 644 | continue; |
595 | } | 645 | } |
596 | 646 | ||
597 | if (from + size > get_capacity(disk)) { | 647 | if (from + size > get_capacity(disk)) { |
598 | const struct block_device_operations *bdops = disk->fops; | ||
599 | unsigned long long capacity; | ||
600 | |||
601 | printk(KERN_WARNING | 648 | printk(KERN_WARNING |
602 | "%s: p%d size %llu exceeds device capacity, ", | 649 | "%s: p%d size %llu extends beyond EOD, ", |
603 | disk->disk_name, p, (unsigned long long) size); | 650 | disk->disk_name, p, (unsigned long long) size); |
604 | 651 | ||
605 | if (bdops->set_capacity && | 652 | if (disk_unlock_native_capacity(disk)) { |
606 | (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) { | 653 | /* free state and restart */ |
607 | printk(KERN_CONT "enabling native capacity\n"); | 654 | goto rescan; |
608 | capacity = bdops->set_capacity(disk, ~0ULL); | ||
609 | disk->flags |= GENHD_FL_NATIVE_CAPACITY; | ||
610 | if (capacity > get_capacity(disk)) { | ||
611 | set_capacity(disk, capacity); | ||
612 | check_disk_size_change(disk, bdev); | ||
613 | bdev->bd_invalidated = 0; | ||
614 | } | ||
615 | goto try_scan; | ||
616 | } else { | 655 | } else { |
617 | /* | 656 | /* |
618 | * we can not ignore partitions of broken tables | 657 | * we can not ignore partitions of broken tables |
@@ -620,7 +659,6 @@ try_scan: | |||
620 | * we limit them to the end of the disk to avoid | 659 | * we limit them to the end of the disk to avoid |
621 | * creating invalid block devices | 660 | * creating invalid block devices |
622 | */ | 661 | */ |
623 | printk(KERN_CONT "limited to end of disk\n"); | ||
624 | size = get_capacity(disk) - from; | 662 | size = get_capacity(disk) - from; |
625 | } | 663 | } |
626 | } | 664 | } |
diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 98dbe1a84528..52f8bd399396 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h | |||
@@ -6,6 +6,7 @@ | |||
6 | * description. | 6 | * description. |
7 | */ | 7 | */ |
8 | struct parsed_partitions { | 8 | struct parsed_partitions { |
9 | struct block_device *bdev; | ||
9 | char name[BDEVNAME_SIZE]; | 10 | char name[BDEVNAME_SIZE]; |
10 | struct { | 11 | struct { |
11 | sector_t from; | 12 | sector_t from; |
@@ -14,8 +15,19 @@ struct parsed_partitions { | |||
14 | } parts[DISK_MAX_PARTS]; | 15 | } parts[DISK_MAX_PARTS]; |
15 | int next; | 16 | int next; |
16 | int limit; | 17 | int limit; |
18 | bool access_beyond_eod; | ||
17 | }; | 19 | }; |
18 | 20 | ||
21 | static inline void *read_part_sector(struct parsed_partitions *state, | ||
22 | sector_t n, Sector *p) | ||
23 | { | ||
24 | if (n >= get_capacity(state->bdev->bd_disk)) { | ||
25 | state->access_beyond_eod = true; | ||
26 | return NULL; | ||
27 | } | ||
28 | return read_dev_sector(state->bdev, n, p); | ||
29 | } | ||
30 | |||
19 | static inline void | 31 | static inline void |
20 | put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) | 32 | put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) |
21 | { | 33 | { |
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index 91babdae7587..9efb2cfe2410 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c | |||
@@ -140,8 +140,7 @@ efi_crc32(const void *buf, unsigned long len) | |||
140 | * the part[0] entry for this disk, and is the number of | 140 | * the part[0] entry for this disk, and is the number of |
141 | * physical sectors available on the disk. | 141 | * physical sectors available on the disk. |
142 | */ | 142 | */ |
143 | static u64 | 143 | static u64 last_lba(struct block_device *bdev) |
144 | last_lba(struct block_device *bdev) | ||
145 | { | 144 | { |
146 | if (!bdev || !bdev->bd_inode) | 145 | if (!bdev || !bdev->bd_inode) |
147 | return 0; | 146 | return 0; |
@@ -181,27 +180,28 @@ is_pmbr_valid(legacy_mbr *mbr) | |||
181 | 180 | ||
182 | /** | 181 | /** |
183 | * read_lba(): Read bytes from disk, starting at given LBA | 182 | * read_lba(): Read bytes from disk, starting at given LBA |
184 | * @bdev | 183 | * @state |
185 | * @lba | 184 | * @lba |
186 | * @buffer | 185 | * @buffer |
187 | * @size_t | 186 | * @size_t |
188 | * | 187 | * |
189 | * Description: Reads @count bytes from @bdev into @buffer. | 188 | * Description: Reads @count bytes from @state->bdev into @buffer. |
190 | * Returns number of bytes read on success, 0 on error. | 189 | * Returns number of bytes read on success, 0 on error. |
191 | */ | 190 | */ |
192 | static size_t | 191 | static size_t read_lba(struct parsed_partitions *state, |
193 | read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count) | 192 | u64 lba, u8 *buffer, size_t count) |
194 | { | 193 | { |
195 | size_t totalreadcount = 0; | 194 | size_t totalreadcount = 0; |
195 | struct block_device *bdev = state->bdev; | ||
196 | sector_t n = lba * (bdev_logical_block_size(bdev) / 512); | 196 | sector_t n = lba * (bdev_logical_block_size(bdev) / 512); |
197 | 197 | ||
198 | if (!bdev || !buffer || lba > last_lba(bdev)) | 198 | if (!buffer || lba > last_lba(bdev)) |
199 | return 0; | 199 | return 0; |
200 | 200 | ||
201 | while (count) { | 201 | while (count) { |
202 | int copied = 512; | 202 | int copied = 512; |
203 | Sector sect; | 203 | Sector sect; |
204 | unsigned char *data = read_dev_sector(bdev, n++, §); | 204 | unsigned char *data = read_part_sector(state, n++, §); |
205 | if (!data) | 205 | if (!data) |
206 | break; | 206 | break; |
207 | if (copied > count) | 207 | if (copied > count) |
@@ -217,19 +217,20 @@ read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count) | |||
217 | 217 | ||
218 | /** | 218 | /** |
219 | * alloc_read_gpt_entries(): reads partition entries from disk | 219 | * alloc_read_gpt_entries(): reads partition entries from disk |
220 | * @bdev | 220 | * @state |
221 | * @gpt - GPT header | 221 | * @gpt - GPT header |
222 | * | 222 | * |
223 | * Description: Returns ptes on success, NULL on error. | 223 | * Description: Returns ptes on success, NULL on error. |
224 | * Allocates space for PTEs based on information found in @gpt. | 224 | * Allocates space for PTEs based on information found in @gpt. |
225 | * Notes: remember to free pte when you're done! | 225 | * Notes: remember to free pte when you're done! |
226 | */ | 226 | */ |
227 | static gpt_entry * | 227 | static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, |
228 | alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt) | 228 | gpt_header *gpt) |
229 | { | 229 | { |
230 | size_t count; | 230 | size_t count; |
231 | gpt_entry *pte; | 231 | gpt_entry *pte; |
232 | if (!bdev || !gpt) | 232 | |
233 | if (!gpt) | ||
233 | return NULL; | 234 | return NULL; |
234 | 235 | ||
235 | count = le32_to_cpu(gpt->num_partition_entries) * | 236 | count = le32_to_cpu(gpt->num_partition_entries) * |
@@ -240,7 +241,7 @@ alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt) | |||
240 | if (!pte) | 241 | if (!pte) |
241 | return NULL; | 242 | return NULL; |
242 | 243 | ||
243 | if (read_lba(bdev, le64_to_cpu(gpt->partition_entry_lba), | 244 | if (read_lba(state, le64_to_cpu(gpt->partition_entry_lba), |
244 | (u8 *) pte, | 245 | (u8 *) pte, |
245 | count) < count) { | 246 | count) < count) { |
246 | kfree(pte); | 247 | kfree(pte); |
@@ -252,27 +253,24 @@ alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt) | |||
252 | 253 | ||
253 | /** | 254 | /** |
254 | * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk | 255 | * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk |
255 | * @bdev | 256 | * @state |
256 | * @lba is the Logical Block Address of the partition table | 257 | * @lba is the Logical Block Address of the partition table |
257 | * | 258 | * |
258 | * Description: returns GPT header on success, NULL on error. Allocates | 259 | * Description: returns GPT header on success, NULL on error. Allocates |
259 | * and fills a GPT header starting at @ from @bdev. | 260 | * and fills a GPT header starting at @ from @state->bdev. |
260 | * Note: remember to free gpt when finished with it. | 261 | * Note: remember to free gpt when finished with it. |
261 | */ | 262 | */ |
262 | static gpt_header * | 263 | static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state, |
263 | alloc_read_gpt_header(struct block_device *bdev, u64 lba) | 264 | u64 lba) |
264 | { | 265 | { |
265 | gpt_header *gpt; | 266 | gpt_header *gpt; |
266 | unsigned ssz = bdev_logical_block_size(bdev); | 267 | unsigned ssz = bdev_logical_block_size(state->bdev); |
267 | |||
268 | if (!bdev) | ||
269 | return NULL; | ||
270 | 268 | ||
271 | gpt = kzalloc(ssz, GFP_KERNEL); | 269 | gpt = kzalloc(ssz, GFP_KERNEL); |
272 | if (!gpt) | 270 | if (!gpt) |
273 | return NULL; | 271 | return NULL; |
274 | 272 | ||
275 | if (read_lba(bdev, lba, (u8 *) gpt, ssz) < ssz) { | 273 | if (read_lba(state, lba, (u8 *) gpt, ssz) < ssz) { |
276 | kfree(gpt); | 274 | kfree(gpt); |
277 | gpt=NULL; | 275 | gpt=NULL; |
278 | return NULL; | 276 | return NULL; |
@@ -283,7 +281,7 @@ alloc_read_gpt_header(struct block_device *bdev, u64 lba) | |||
283 | 281 | ||
284 | /** | 282 | /** |
285 | * is_gpt_valid() - tests one GPT header and PTEs for validity | 283 | * is_gpt_valid() - tests one GPT header and PTEs for validity |
286 | * @bdev | 284 | * @state |
287 | * @lba is the logical block address of the GPT header to test | 285 | * @lba is the logical block address of the GPT header to test |
288 | * @gpt is a GPT header ptr, filled on return. | 286 | * @gpt is a GPT header ptr, filled on return. |
289 | * @ptes is a PTEs ptr, filled on return. | 287 | * @ptes is a PTEs ptr, filled on return. |
@@ -291,16 +289,15 @@ alloc_read_gpt_header(struct block_device *bdev, u64 lba) | |||
291 | * Description: returns 1 if valid, 0 on error. | 289 | * Description: returns 1 if valid, 0 on error. |
292 | * If valid, returns pointers to newly allocated GPT header and PTEs. | 290 | * If valid, returns pointers to newly allocated GPT header and PTEs. |
293 | */ | 291 | */ |
294 | static int | 292 | static int is_gpt_valid(struct parsed_partitions *state, u64 lba, |
295 | is_gpt_valid(struct block_device *bdev, u64 lba, | 293 | gpt_header **gpt, gpt_entry **ptes) |
296 | gpt_header **gpt, gpt_entry **ptes) | ||
297 | { | 294 | { |
298 | u32 crc, origcrc; | 295 | u32 crc, origcrc; |
299 | u64 lastlba; | 296 | u64 lastlba; |
300 | 297 | ||
301 | if (!bdev || !gpt || !ptes) | 298 | if (!ptes) |
302 | return 0; | 299 | return 0; |
303 | if (!(*gpt = alloc_read_gpt_header(bdev, lba))) | 300 | if (!(*gpt = alloc_read_gpt_header(state, lba))) |
304 | return 0; | 301 | return 0; |
305 | 302 | ||
306 | /* Check the GUID Partition Table signature */ | 303 | /* Check the GUID Partition Table signature */ |
@@ -336,7 +333,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba, | |||
336 | /* Check the first_usable_lba and last_usable_lba are | 333 | /* Check the first_usable_lba and last_usable_lba are |
337 | * within the disk. | 334 | * within the disk. |
338 | */ | 335 | */ |
339 | lastlba = last_lba(bdev); | 336 | lastlba = last_lba(state->bdev); |
340 | if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) { | 337 | if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) { |
341 | pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n", | 338 | pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n", |
342 | (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), | 339 | (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), |
@@ -350,7 +347,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba, | |||
350 | goto fail; | 347 | goto fail; |
351 | } | 348 | } |
352 | 349 | ||
353 | if (!(*ptes = alloc_read_gpt_entries(bdev, *gpt))) | 350 | if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) |
354 | goto fail; | 351 | goto fail; |
355 | 352 | ||
356 | /* Check the GUID Partition Entry Array CRC */ | 353 | /* Check the GUID Partition Entry Array CRC */ |
@@ -495,7 +492,7 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba) | |||
495 | 492 | ||
496 | /** | 493 | /** |
497 | * find_valid_gpt() - Search disk for valid GPT headers and PTEs | 494 | * find_valid_gpt() - Search disk for valid GPT headers and PTEs |
498 | * @bdev | 495 | * @state |
499 | * @gpt is a GPT header ptr, filled on return. | 496 | * @gpt is a GPT header ptr, filled on return. |
500 | * @ptes is a PTEs ptr, filled on return. | 497 | * @ptes is a PTEs ptr, filled on return. |
501 | * Description: Returns 1 if valid, 0 on error. | 498 | * Description: Returns 1 if valid, 0 on error. |
@@ -508,24 +505,25 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba) | |||
508 | * This protects against devices which misreport their size, and forces | 505 | * This protects against devices which misreport their size, and forces |
509 | * the user to decide to use the Alternate GPT. | 506 | * the user to decide to use the Alternate GPT. |
510 | */ | 507 | */ |
511 | static int | 508 | static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt, |
512 | find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) | 509 | gpt_entry **ptes) |
513 | { | 510 | { |
514 | int good_pgpt = 0, good_agpt = 0, good_pmbr = 0; | 511 | int good_pgpt = 0, good_agpt = 0, good_pmbr = 0; |
515 | gpt_header *pgpt = NULL, *agpt = NULL; | 512 | gpt_header *pgpt = NULL, *agpt = NULL; |
516 | gpt_entry *pptes = NULL, *aptes = NULL; | 513 | gpt_entry *pptes = NULL, *aptes = NULL; |
517 | legacy_mbr *legacymbr; | 514 | legacy_mbr *legacymbr; |
518 | u64 lastlba; | 515 | u64 lastlba; |
519 | if (!bdev || !gpt || !ptes) | 516 | |
517 | if (!ptes) | ||
520 | return 0; | 518 | return 0; |
521 | 519 | ||
522 | lastlba = last_lba(bdev); | 520 | lastlba = last_lba(state->bdev); |
523 | if (!force_gpt) { | 521 | if (!force_gpt) { |
524 | /* This will be added to the EFI Spec. per Intel after v1.02. */ | 522 | /* This will be added to the EFI Spec. per Intel after v1.02. */ |
525 | legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL); | 523 | legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL); |
526 | if (legacymbr) { | 524 | if (legacymbr) { |
527 | read_lba(bdev, 0, (u8 *) legacymbr, | 525 | read_lba(state, 0, (u8 *) legacymbr, |
528 | sizeof (*legacymbr)); | 526 | sizeof (*legacymbr)); |
529 | good_pmbr = is_pmbr_valid(legacymbr); | 527 | good_pmbr = is_pmbr_valid(legacymbr); |
530 | kfree(legacymbr); | 528 | kfree(legacymbr); |
531 | } | 529 | } |
@@ -533,15 +531,14 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) | |||
533 | goto fail; | 531 | goto fail; |
534 | } | 532 | } |
535 | 533 | ||
536 | good_pgpt = is_gpt_valid(bdev, GPT_PRIMARY_PARTITION_TABLE_LBA, | 534 | good_pgpt = is_gpt_valid(state, GPT_PRIMARY_PARTITION_TABLE_LBA, |
537 | &pgpt, &pptes); | 535 | &pgpt, &pptes); |
538 | if (good_pgpt) | 536 | if (good_pgpt) |
539 | good_agpt = is_gpt_valid(bdev, | 537 | good_agpt = is_gpt_valid(state, |
540 | le64_to_cpu(pgpt->alternate_lba), | 538 | le64_to_cpu(pgpt->alternate_lba), |
541 | &agpt, &aptes); | 539 | &agpt, &aptes); |
542 | if (!good_agpt && force_gpt) | 540 | if (!good_agpt && force_gpt) |
543 | good_agpt = is_gpt_valid(bdev, lastlba, | 541 | good_agpt = is_gpt_valid(state, lastlba, &agpt, &aptes); |
544 | &agpt, &aptes); | ||
545 | 542 | ||
546 | /* The obviously unsuccessful case */ | 543 | /* The obviously unsuccessful case */ |
547 | if (!good_pgpt && !good_agpt) | 544 | if (!good_pgpt && !good_agpt) |
@@ -583,9 +580,8 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) | |||
583 | } | 580 | } |
584 | 581 | ||
585 | /** | 582 | /** |
586 | * efi_partition(struct parsed_partitions *state, struct block_device *bdev) | 583 | * efi_partition(struct parsed_partitions *state) |
587 | * @state | 584 | * @state |
588 | * @bdev | ||
589 | * | 585 | * |
590 | * Description: called from check.c, if the disk contains GPT | 586 | * Description: called from check.c, if the disk contains GPT |
591 | * partitions, sets up partition entries in the kernel. | 587 | * partitions, sets up partition entries in the kernel. |
@@ -602,15 +598,14 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) | |||
602 | * 1 if successful | 598 | * 1 if successful |
603 | * | 599 | * |
604 | */ | 600 | */ |
605 | int | 601 | int efi_partition(struct parsed_partitions *state) |
606 | efi_partition(struct parsed_partitions *state, struct block_device *bdev) | ||
607 | { | 602 | { |
608 | gpt_header *gpt = NULL; | 603 | gpt_header *gpt = NULL; |
609 | gpt_entry *ptes = NULL; | 604 | gpt_entry *ptes = NULL; |
610 | u32 i; | 605 | u32 i; |
611 | unsigned ssz = bdev_logical_block_size(bdev) / 512; | 606 | unsigned ssz = bdev_logical_block_size(state->bdev) / 512; |
612 | 607 | ||
613 | if (!find_valid_gpt(bdev, &gpt, &ptes) || !gpt || !ptes) { | 608 | if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) { |
614 | kfree(gpt); | 609 | kfree(gpt); |
615 | kfree(ptes); | 610 | kfree(ptes); |
616 | return 0; | 611 | return 0; |
@@ -623,7 +618,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
623 | u64 size = le64_to_cpu(ptes[i].ending_lba) - | 618 | u64 size = le64_to_cpu(ptes[i].ending_lba) - |
624 | le64_to_cpu(ptes[i].starting_lba) + 1ULL; | 619 | le64_to_cpu(ptes[i].starting_lba) + 1ULL; |
625 | 620 | ||
626 | if (!is_pte_valid(&ptes[i], last_lba(bdev))) | 621 | if (!is_pte_valid(&ptes[i], last_lba(state->bdev))) |
627 | continue; | 622 | continue; |
628 | 623 | ||
629 | put_partition(state, i+1, start * ssz, size * ssz); | 624 | put_partition(state, i+1, start * ssz, size * ssz); |
@@ -631,7 +626,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
631 | /* If this is a RAID volume, tell md */ | 626 | /* If this is a RAID volume, tell md */ |
632 | if (!efi_guidcmp(ptes[i].partition_type_guid, | 627 | if (!efi_guidcmp(ptes[i].partition_type_guid, |
633 | PARTITION_LINUX_RAID_GUID)) | 628 | PARTITION_LINUX_RAID_GUID)) |
634 | state->parts[i+1].flags = 1; | 629 | state->parts[i + 1].flags = ADDPART_FLAG_RAID; |
635 | } | 630 | } |
636 | kfree(ptes); | 631 | kfree(ptes); |
637 | kfree(gpt); | 632 | kfree(gpt); |
diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h index 6998b589abf9..b69ab729558f 100644 --- a/fs/partitions/efi.h +++ b/fs/partitions/efi.h | |||
@@ -110,7 +110,7 @@ typedef struct _legacy_mbr { | |||
110 | } __attribute__ ((packed)) legacy_mbr; | 110 | } __attribute__ ((packed)) legacy_mbr; |
111 | 111 | ||
112 | /* Functions */ | 112 | /* Functions */ |
113 | extern int efi_partition(struct parsed_partitions *state, struct block_device *bdev); | 113 | extern int efi_partition(struct parsed_partitions *state); |
114 | 114 | ||
115 | #endif | 115 | #endif |
116 | 116 | ||
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index fc71aab08460..3e73de5967ff 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c | |||
@@ -58,9 +58,9 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) { | |||
58 | 58 | ||
59 | /* | 59 | /* |
60 | */ | 60 | */ |
61 | int | 61 | int ibm_partition(struct parsed_partitions *state) |
62 | ibm_partition(struct parsed_partitions *state, struct block_device *bdev) | ||
63 | { | 62 | { |
63 | struct block_device *bdev = state->bdev; | ||
64 | int blocksize, res; | 64 | int blocksize, res; |
65 | loff_t i_size, offset, size, fmt_size; | 65 | loff_t i_size, offset, size, fmt_size; |
66 | dasd_information2_t *info; | 66 | dasd_information2_t *info; |
@@ -100,7 +100,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
100 | /* | 100 | /* |
101 | * Get volume label, extract name and type. | 101 | * Get volume label, extract name and type. |
102 | */ | 102 | */ |
103 | data = read_dev_sector(bdev, info->label_block*(blocksize/512), §); | 103 | data = read_part_sector(state, info->label_block*(blocksize/512), |
104 | §); | ||
104 | if (data == NULL) | 105 | if (data == NULL) |
105 | goto out_readerr; | 106 | goto out_readerr; |
106 | 107 | ||
@@ -193,8 +194,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
193 | */ | 194 | */ |
194 | blk = cchhb2blk(&label->vol.vtoc, geo) + 1; | 195 | blk = cchhb2blk(&label->vol.vtoc, geo) + 1; |
195 | counter = 0; | 196 | counter = 0; |
196 | data = read_dev_sector(bdev, blk * (blocksize/512), | 197 | data = read_part_sector(state, blk * (blocksize/512), |
197 | §); | 198 | §); |
198 | while (data != NULL) { | 199 | while (data != NULL) { |
199 | struct vtoc_format1_label f1; | 200 | struct vtoc_format1_label f1; |
200 | 201 | ||
@@ -208,9 +209,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
208 | || f1.DS1FMTID == _ascebc['7'] | 209 | || f1.DS1FMTID == _ascebc['7'] |
209 | || f1.DS1FMTID == _ascebc['9']) { | 210 | || f1.DS1FMTID == _ascebc['9']) { |
210 | blk++; | 211 | blk++; |
211 | data = read_dev_sector(bdev, blk * | 212 | data = read_part_sector(state, |
212 | (blocksize/512), | 213 | blk * (blocksize/512), §); |
213 | §); | ||
214 | continue; | 214 | continue; |
215 | } | 215 | } |
216 | 216 | ||
@@ -230,9 +230,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
230 | size * (blocksize >> 9)); | 230 | size * (blocksize >> 9)); |
231 | counter++; | 231 | counter++; |
232 | blk++; | 232 | blk++; |
233 | data = read_dev_sector(bdev, | 233 | data = read_part_sector(state, |
234 | blk * (blocksize/512), | 234 | blk * (blocksize/512), §); |
235 | §); | ||
236 | } | 235 | } |
237 | 236 | ||
238 | if (!data) | 237 | if (!data) |
diff --git a/fs/partitions/ibm.h b/fs/partitions/ibm.h index 31f85a6ac459..08fb0804a812 100644 --- a/fs/partitions/ibm.h +++ b/fs/partitions/ibm.h | |||
@@ -1 +1 @@ | |||
int ibm_partition(struct parsed_partitions *, struct block_device *); | int ibm_partition(struct parsed_partitions *); | ||
diff --git a/fs/partitions/karma.c b/fs/partitions/karma.c index 176d89bcf123..1cc928bb762f 100644 --- a/fs/partitions/karma.c +++ b/fs/partitions/karma.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include "check.h" | 9 | #include "check.h" |
10 | #include "karma.h" | 10 | #include "karma.h" |
11 | 11 | ||
12 | int karma_partition(struct parsed_partitions *state, struct block_device *bdev) | 12 | int karma_partition(struct parsed_partitions *state) |
13 | { | 13 | { |
14 | int i; | 14 | int i; |
15 | int slot = 1; | 15 | int slot = 1; |
@@ -29,7 +29,7 @@ int karma_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
29 | } __attribute__((packed)) *label; | 29 | } __attribute__((packed)) *label; |
30 | struct d_partition *p; | 30 | struct d_partition *p; |
31 | 31 | ||
32 | data = read_dev_sector(bdev, 0, §); | 32 | data = read_part_sector(state, 0, §); |
33 | if (!data) | 33 | if (!data) |
34 | return -1; | 34 | return -1; |
35 | 35 | ||
diff --git a/fs/partitions/karma.h b/fs/partitions/karma.h index ecf7d3f2a3d8..c764b2e9df21 100644 --- a/fs/partitions/karma.h +++ b/fs/partitions/karma.h | |||
@@ -4,5 +4,5 @@ | |||
4 | 4 | ||
5 | #define KARMA_LABEL_MAGIC 0xAB56 | 5 | #define KARMA_LABEL_MAGIC 0xAB56 |
6 | 6 | ||
7 | int karma_partition(struct parsed_partitions *state, struct block_device *bdev); | 7 | int karma_partition(struct parsed_partitions *state); |
8 | 8 | ||
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 8652fb99e962..648c9d8f3357 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/pagemap.h> | 27 | #include <linux/pagemap.h> |
28 | #include <linux/stringify.h> | 28 | #include <linux/stringify.h> |
29 | #include <linux/kernel.h> | ||
29 | #include "ldm.h" | 30 | #include "ldm.h" |
30 | #include "check.h" | 31 | #include "check.h" |
31 | #include "msdos.h" | 32 | #include "msdos.h" |
@@ -77,17 +78,16 @@ static int ldm_parse_hexbyte (const u8 *src) | |||
77 | int h; | 78 | int h; |
78 | 79 | ||
79 | /* high part */ | 80 | /* high part */ |
80 | if ((x = src[0] - '0') <= '9'-'0') h = x; | 81 | x = h = hex_to_bin(src[0]); |
81 | else if ((x = src[0] - 'a') <= 'f'-'a') h = x+10; | 82 | if (h < 0) |
82 | else if ((x = src[0] - 'A') <= 'F'-'A') h = x+10; | 83 | return -1; |
83 | else return -1; | ||
84 | h <<= 4; | ||
85 | 84 | ||
86 | /* low part */ | 85 | /* low part */ |
87 | if ((x = src[1] - '0') <= '9'-'0') return h | x; | 86 | h = hex_to_bin(src[1]); |
88 | if ((x = src[1] - 'a') <= 'f'-'a') return h | (x+10); | 87 | if (h < 0) |
89 | if ((x = src[1] - 'A') <= 'F'-'A') return h | (x+10); | 88 | return -1; |
90 | return -1; | 89 | |
90 | return (x << 4) + h; | ||
91 | } | 91 | } |
92 | 92 | ||
93 | /** | 93 | /** |
@@ -309,7 +309,7 @@ static bool ldm_compare_tocblocks (const struct tocblock *toc1, | |||
309 | 309 | ||
310 | /** | 310 | /** |
311 | * ldm_validate_privheads - Compare the primary privhead with its backups | 311 | * ldm_validate_privheads - Compare the primary privhead with its backups |
312 | * @bdev: Device holding the LDM Database | 312 | * @state: Partition check state including device holding the LDM Database |
313 | * @ph1: Memory struct to fill with ph contents | 313 | * @ph1: Memory struct to fill with ph contents |
314 | * | 314 | * |
315 | * Read and compare all three privheads from disk. | 315 | * Read and compare all three privheads from disk. |
@@ -321,8 +321,8 @@ static bool ldm_compare_tocblocks (const struct tocblock *toc1, | |||
321 | * Return: 'true' Success | 321 | * Return: 'true' Success |
322 | * 'false' Error | 322 | * 'false' Error |
323 | */ | 323 | */ |
324 | static bool ldm_validate_privheads (struct block_device *bdev, | 324 | static bool ldm_validate_privheads(struct parsed_partitions *state, |
325 | struct privhead *ph1) | 325 | struct privhead *ph1) |
326 | { | 326 | { |
327 | static const int off[3] = { OFF_PRIV1, OFF_PRIV2, OFF_PRIV3 }; | 327 | static const int off[3] = { OFF_PRIV1, OFF_PRIV2, OFF_PRIV3 }; |
328 | struct privhead *ph[3] = { ph1 }; | 328 | struct privhead *ph[3] = { ph1 }; |
@@ -332,7 +332,7 @@ static bool ldm_validate_privheads (struct block_device *bdev, | |||
332 | long num_sects; | 332 | long num_sects; |
333 | int i; | 333 | int i; |
334 | 334 | ||
335 | BUG_ON (!bdev || !ph1); | 335 | BUG_ON (!state || !ph1); |
336 | 336 | ||
337 | ph[1] = kmalloc (sizeof (*ph[1]), GFP_KERNEL); | 337 | ph[1] = kmalloc (sizeof (*ph[1]), GFP_KERNEL); |
338 | ph[2] = kmalloc (sizeof (*ph[2]), GFP_KERNEL); | 338 | ph[2] = kmalloc (sizeof (*ph[2]), GFP_KERNEL); |
@@ -346,8 +346,8 @@ static bool ldm_validate_privheads (struct block_device *bdev, | |||
346 | 346 | ||
347 | /* Read and parse privheads */ | 347 | /* Read and parse privheads */ |
348 | for (i = 0; i < 3; i++) { | 348 | for (i = 0; i < 3; i++) { |
349 | data = read_dev_sector (bdev, | 349 | data = read_part_sector(state, ph[0]->config_start + off[i], |
350 | ph[0]->config_start + off[i], §); | 350 | §); |
351 | if (!data) { | 351 | if (!data) { |
352 | ldm_crit ("Disk read failed."); | 352 | ldm_crit ("Disk read failed."); |
353 | goto out; | 353 | goto out; |
@@ -363,7 +363,7 @@ static bool ldm_validate_privheads (struct block_device *bdev, | |||
363 | } | 363 | } |
364 | } | 364 | } |
365 | 365 | ||
366 | num_sects = bdev->bd_inode->i_size >> 9; | 366 | num_sects = state->bdev->bd_inode->i_size >> 9; |
367 | 367 | ||
368 | if ((ph[0]->config_start > num_sects) || | 368 | if ((ph[0]->config_start > num_sects) || |
369 | ((ph[0]->config_start + ph[0]->config_size) > num_sects)) { | 369 | ((ph[0]->config_start + ph[0]->config_size) > num_sects)) { |
@@ -397,20 +397,20 @@ out: | |||
397 | 397 | ||
398 | /** | 398 | /** |
399 | * ldm_validate_tocblocks - Validate the table of contents and its backups | 399 | * ldm_validate_tocblocks - Validate the table of contents and its backups |
400 | * @bdev: Device holding the LDM Database | 400 | * @state: Partition check state including device holding the LDM Database |
401 | * @base: Offset, into @bdev, of the database | 401 | * @base: Offset, into @state->bdev, of the database |
402 | * @ldb: Cache of the database structures | 402 | * @ldb: Cache of the database structures |
403 | * | 403 | * |
404 | * Find and compare the four tables of contents of the LDM Database stored on | 404 | * Find and compare the four tables of contents of the LDM Database stored on |
405 | * @bdev and return the parsed information into @toc1. | 405 | * @state->bdev and return the parsed information into @toc1. |
406 | * | 406 | * |
407 | * The offsets and sizes of the configs are range-checked against a privhead. | 407 | * The offsets and sizes of the configs are range-checked against a privhead. |
408 | * | 408 | * |
409 | * Return: 'true' @toc1 contains validated TOCBLOCK info | 409 | * Return: 'true' @toc1 contains validated TOCBLOCK info |
410 | * 'false' @toc1 contents are undefined | 410 | * 'false' @toc1 contents are undefined |
411 | */ | 411 | */ |
412 | static bool ldm_validate_tocblocks(struct block_device *bdev, | 412 | static bool ldm_validate_tocblocks(struct parsed_partitions *state, |
413 | unsigned long base, struct ldmdb *ldb) | 413 | unsigned long base, struct ldmdb *ldb) |
414 | { | 414 | { |
415 | static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4}; | 415 | static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4}; |
416 | struct tocblock *tb[4]; | 416 | struct tocblock *tb[4]; |
@@ -420,7 +420,7 @@ static bool ldm_validate_tocblocks(struct block_device *bdev, | |||
420 | int i, nr_tbs; | 420 | int i, nr_tbs; |
421 | bool result = false; | 421 | bool result = false; |
422 | 422 | ||
423 | BUG_ON(!bdev || !ldb); | 423 | BUG_ON(!state || !ldb); |
424 | ph = &ldb->ph; | 424 | ph = &ldb->ph; |
425 | tb[0] = &ldb->toc; | 425 | tb[0] = &ldb->toc; |
426 | tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL); | 426 | tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL); |
@@ -437,7 +437,7 @@ static bool ldm_validate_tocblocks(struct block_device *bdev, | |||
437 | * skip any that fail as long as we get at least one valid TOCBLOCK. | 437 | * skip any that fail as long as we get at least one valid TOCBLOCK. |
438 | */ | 438 | */ |
439 | for (nr_tbs = i = 0; i < 4; i++) { | 439 | for (nr_tbs = i = 0; i < 4; i++) { |
440 | data = read_dev_sector(bdev, base + off[i], §); | 440 | data = read_part_sector(state, base + off[i], §); |
441 | if (!data) { | 441 | if (!data) { |
442 | ldm_error("Disk read failed for TOCBLOCK %d.", i); | 442 | ldm_error("Disk read failed for TOCBLOCK %d.", i); |
443 | continue; | 443 | continue; |
@@ -473,7 +473,7 @@ err: | |||
473 | 473 | ||
474 | /** | 474 | /** |
475 | * ldm_validate_vmdb - Read the VMDB and validate it | 475 | * ldm_validate_vmdb - Read the VMDB and validate it |
476 | * @bdev: Device holding the LDM Database | 476 | * @state: Partition check state including device holding the LDM Database |
477 | * @base: Offset, into @bdev, of the database | 477 | * @base: Offset, into @bdev, of the database |
478 | * @ldb: Cache of the database structures | 478 | * @ldb: Cache of the database structures |
479 | * | 479 | * |
@@ -483,8 +483,8 @@ err: | |||
483 | * Return: 'true' @ldb contains validated VBDB info | 483 | * Return: 'true' @ldb contains validated VBDB info |
484 | * 'false' @ldb contents are undefined | 484 | * 'false' @ldb contents are undefined |
485 | */ | 485 | */ |
486 | static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base, | 486 | static bool ldm_validate_vmdb(struct parsed_partitions *state, |
487 | struct ldmdb *ldb) | 487 | unsigned long base, struct ldmdb *ldb) |
488 | { | 488 | { |
489 | Sector sect; | 489 | Sector sect; |
490 | u8 *data; | 490 | u8 *data; |
@@ -492,12 +492,12 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base, | |||
492 | struct vmdb *vm; | 492 | struct vmdb *vm; |
493 | struct tocblock *toc; | 493 | struct tocblock *toc; |
494 | 494 | ||
495 | BUG_ON (!bdev || !ldb); | 495 | BUG_ON (!state || !ldb); |
496 | 496 | ||
497 | vm = &ldb->vm; | 497 | vm = &ldb->vm; |
498 | toc = &ldb->toc; | 498 | toc = &ldb->toc; |
499 | 499 | ||
500 | data = read_dev_sector (bdev, base + OFF_VMDB, §); | 500 | data = read_part_sector(state, base + OFF_VMDB, §); |
501 | if (!data) { | 501 | if (!data) { |
502 | ldm_crit ("Disk read failed."); | 502 | ldm_crit ("Disk read failed."); |
503 | return false; | 503 | return false; |
@@ -534,21 +534,21 @@ out: | |||
534 | 534 | ||
535 | /** | 535 | /** |
536 | * ldm_validate_partition_table - Determine whether bdev might be a dynamic disk | 536 | * ldm_validate_partition_table - Determine whether bdev might be a dynamic disk |
537 | * @bdev: Device holding the LDM Database | 537 | * @state: Partition check state including device holding the LDM Database |
538 | * | 538 | * |
539 | * This function provides a weak test to decide whether the device is a dynamic | 539 | * This function provides a weak test to decide whether the device is a dynamic |
540 | * disk or not. It looks for an MS-DOS-style partition table containing at | 540 | * disk or not. It looks for an MS-DOS-style partition table containing at |
541 | * least one partition of type 0x42 (formerly SFS, now used by Windows for | 541 | * least one partition of type 0x42 (formerly SFS, now used by Windows for |
542 | * dynamic disks). | 542 | * dynamic disks). |
543 | * | 543 | * |
544 | * N.B. The only possible error can come from the read_dev_sector and that is | 544 | * N.B. The only possible error can come from the read_part_sector and that is |
545 | * only likely to happen if the underlying device is strange. If that IS | 545 | * only likely to happen if the underlying device is strange. If that IS |
546 | * the case we should return zero to let someone else try. | 546 | * the case we should return zero to let someone else try. |
547 | * | 547 | * |
548 | * Return: 'true' @bdev is a dynamic disk | 548 | * Return: 'true' @state->bdev is a dynamic disk |
549 | * 'false' @bdev is not a dynamic disk, or an error occurred | 549 | * 'false' @state->bdev is not a dynamic disk, or an error occurred |
550 | */ | 550 | */ |
551 | static bool ldm_validate_partition_table (struct block_device *bdev) | 551 | static bool ldm_validate_partition_table(struct parsed_partitions *state) |
552 | { | 552 | { |
553 | Sector sect; | 553 | Sector sect; |
554 | u8 *data; | 554 | u8 *data; |
@@ -556,9 +556,9 @@ static bool ldm_validate_partition_table (struct block_device *bdev) | |||
556 | int i; | 556 | int i; |
557 | bool result = false; | 557 | bool result = false; |
558 | 558 | ||
559 | BUG_ON (!bdev); | 559 | BUG_ON(!state); |
560 | 560 | ||
561 | data = read_dev_sector (bdev, 0, §); | 561 | data = read_part_sector(state, 0, §); |
562 | if (!data) { | 562 | if (!data) { |
563 | ldm_crit ("Disk read failed."); | 563 | ldm_crit ("Disk read failed."); |
564 | return false; | 564 | return false; |
@@ -1391,8 +1391,8 @@ static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb) | |||
1391 | 1391 | ||
1392 | /** | 1392 | /** |
1393 | * ldm_get_vblks - Read the on-disk database of VBLKs into memory | 1393 | * ldm_get_vblks - Read the on-disk database of VBLKs into memory |
1394 | * @bdev: Device holding the LDM Database | 1394 | * @state: Partition check state including device holding the LDM Database |
1395 | * @base: Offset, into @bdev, of the database | 1395 | * @base: Offset, into @state->bdev, of the database |
1396 | * @ldb: Cache of the database structures | 1396 | * @ldb: Cache of the database structures |
1397 | * | 1397 | * |
1398 | * To use the information from the VBLKs, they need to be read from the disk, | 1398 | * To use the information from the VBLKs, they need to be read from the disk, |
@@ -1401,8 +1401,8 @@ static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb) | |||
1401 | * Return: 'true' All the VBLKs were read successfully | 1401 | * Return: 'true' All the VBLKs were read successfully |
1402 | * 'false' An error occurred | 1402 | * 'false' An error occurred |
1403 | */ | 1403 | */ |
1404 | static bool ldm_get_vblks (struct block_device *bdev, unsigned long base, | 1404 | static bool ldm_get_vblks(struct parsed_partitions *state, unsigned long base, |
1405 | struct ldmdb *ldb) | 1405 | struct ldmdb *ldb) |
1406 | { | 1406 | { |
1407 | int size, perbuf, skip, finish, s, v, recs; | 1407 | int size, perbuf, skip, finish, s, v, recs; |
1408 | u8 *data = NULL; | 1408 | u8 *data = NULL; |
@@ -1410,7 +1410,7 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base, | |||
1410 | bool result = false; | 1410 | bool result = false; |
1411 | LIST_HEAD (frags); | 1411 | LIST_HEAD (frags); |
1412 | 1412 | ||
1413 | BUG_ON (!bdev || !ldb); | 1413 | BUG_ON(!state || !ldb); |
1414 | 1414 | ||
1415 | size = ldb->vm.vblk_size; | 1415 | size = ldb->vm.vblk_size; |
1416 | perbuf = 512 / size; | 1416 | perbuf = 512 / size; |
@@ -1418,7 +1418,7 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base, | |||
1418 | finish = (size * ldb->vm.last_vblk_seq) >> 9; | 1418 | finish = (size * ldb->vm.last_vblk_seq) >> 9; |
1419 | 1419 | ||
1420 | for (s = skip; s < finish; s++) { /* For each sector */ | 1420 | for (s = skip; s < finish; s++) { /* For each sector */ |
1421 | data = read_dev_sector (bdev, base + OFF_VMDB + s, §); | 1421 | data = read_part_sector(state, base + OFF_VMDB + s, §); |
1422 | if (!data) { | 1422 | if (!data) { |
1423 | ldm_crit ("Disk read failed."); | 1423 | ldm_crit ("Disk read failed."); |
1424 | goto out; | 1424 | goto out; |
@@ -1474,8 +1474,7 @@ static void ldm_free_vblks (struct list_head *lh) | |||
1474 | 1474 | ||
1475 | /** | 1475 | /** |
1476 | * ldm_partition - Find out whether a device is a dynamic disk and handle it | 1476 | * ldm_partition - Find out whether a device is a dynamic disk and handle it |
1477 | * @pp: List of the partitions parsed so far | 1477 | * @state: Partition check state including device holding the LDM Database |
1478 | * @bdev: Device holding the LDM Database | ||
1479 | * | 1478 | * |
1480 | * This determines whether the device @bdev is a dynamic disk and if so creates | 1479 | * This determines whether the device @bdev is a dynamic disk and if so creates |
1481 | * the partitions necessary in the gendisk structure pointed to by @hd. | 1480 | * the partitions necessary in the gendisk structure pointed to by @hd. |
@@ -1485,21 +1484,21 @@ static void ldm_free_vblks (struct list_head *lh) | |||
1485 | * example, if the device is hda, we would have: hda1: LDM database, hda2, hda3, | 1484 | * example, if the device is hda, we would have: hda1: LDM database, hda2, hda3, |
1486 | * and so on: the actual data containing partitions. | 1485 | * and so on: the actual data containing partitions. |
1487 | * | 1486 | * |
1488 | * Return: 1 Success, @bdev is a dynamic disk and we handled it | 1487 | * Return: 1 Success, @state->bdev is a dynamic disk and we handled it |
1489 | * 0 Success, @bdev is not a dynamic disk | 1488 | * 0 Success, @state->bdev is not a dynamic disk |
1490 | * -1 An error occurred before enough information had been read | 1489 | * -1 An error occurred before enough information had been read |
1491 | * Or @bdev is a dynamic disk, but it may be corrupted | 1490 | * Or @state->bdev is a dynamic disk, but it may be corrupted |
1492 | */ | 1491 | */ |
1493 | int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev) | 1492 | int ldm_partition(struct parsed_partitions *state) |
1494 | { | 1493 | { |
1495 | struct ldmdb *ldb; | 1494 | struct ldmdb *ldb; |
1496 | unsigned long base; | 1495 | unsigned long base; |
1497 | int result = -1; | 1496 | int result = -1; |
1498 | 1497 | ||
1499 | BUG_ON (!pp || !bdev); | 1498 | BUG_ON(!state); |
1500 | 1499 | ||
1501 | /* Look for signs of a Dynamic Disk */ | 1500 | /* Look for signs of a Dynamic Disk */ |
1502 | if (!ldm_validate_partition_table (bdev)) | 1501 | if (!ldm_validate_partition_table(state)) |
1503 | return 0; | 1502 | return 0; |
1504 | 1503 | ||
1505 | ldb = kmalloc (sizeof (*ldb), GFP_KERNEL); | 1504 | ldb = kmalloc (sizeof (*ldb), GFP_KERNEL); |
@@ -1509,15 +1508,15 @@ int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev) | |||
1509 | } | 1508 | } |
1510 | 1509 | ||
1511 | /* Parse and check privheads. */ | 1510 | /* Parse and check privheads. */ |
1512 | if (!ldm_validate_privheads (bdev, &ldb->ph)) | 1511 | if (!ldm_validate_privheads(state, &ldb->ph)) |
1513 | goto out; /* Already logged */ | 1512 | goto out; /* Already logged */ |
1514 | 1513 | ||
1515 | /* All further references are relative to base (database start). */ | 1514 | /* All further references are relative to base (database start). */ |
1516 | base = ldb->ph.config_start; | 1515 | base = ldb->ph.config_start; |
1517 | 1516 | ||
1518 | /* Parse and check tocs and vmdb. */ | 1517 | /* Parse and check tocs and vmdb. */ |
1519 | if (!ldm_validate_tocblocks (bdev, base, ldb) || | 1518 | if (!ldm_validate_tocblocks(state, base, ldb) || |
1520 | !ldm_validate_vmdb (bdev, base, ldb)) | 1519 | !ldm_validate_vmdb(state, base, ldb)) |
1521 | goto out; /* Already logged */ | 1520 | goto out; /* Already logged */ |
1522 | 1521 | ||
1523 | /* Initialize vblk lists in ldmdb struct */ | 1522 | /* Initialize vblk lists in ldmdb struct */ |
@@ -1527,13 +1526,13 @@ int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev) | |||
1527 | INIT_LIST_HEAD (&ldb->v_comp); | 1526 | INIT_LIST_HEAD (&ldb->v_comp); |
1528 | INIT_LIST_HEAD (&ldb->v_part); | 1527 | INIT_LIST_HEAD (&ldb->v_part); |
1529 | 1528 | ||
1530 | if (!ldm_get_vblks (bdev, base, ldb)) { | 1529 | if (!ldm_get_vblks(state, base, ldb)) { |
1531 | ldm_crit ("Failed to read the VBLKs from the database."); | 1530 | ldm_crit ("Failed to read the VBLKs from the database."); |
1532 | goto cleanup; | 1531 | goto cleanup; |
1533 | } | 1532 | } |
1534 | 1533 | ||
1535 | /* Finally, create the data partition devices. */ | 1534 | /* Finally, create the data partition devices. */ |
1536 | if (ldm_create_data_partitions (pp, ldb)) { | 1535 | if (ldm_create_data_partitions(state, ldb)) { |
1537 | ldm_debug ("Parsed LDM database successfully."); | 1536 | ldm_debug ("Parsed LDM database successfully."); |
1538 | result = 1; | 1537 | result = 1; |
1539 | } | 1538 | } |
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h index 30e08e809c1d..d1fb50b28d86 100644 --- a/fs/partitions/ldm.h +++ b/fs/partitions/ldm.h | |||
@@ -209,7 +209,7 @@ struct ldmdb { /* Cache of the database */ | |||
209 | struct list_head v_part; | 209 | struct list_head v_part; |
210 | }; | 210 | }; |
211 | 211 | ||
212 | int ldm_partition (struct parsed_partitions *state, struct block_device *bdev); | 212 | int ldm_partition(struct parsed_partitions *state); |
213 | 213 | ||
214 | #endif /* _FS_PT_LDM_H_ */ | 214 | #endif /* _FS_PT_LDM_H_ */ |
215 | 215 | ||
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c index d4a0fad3563b..74465ff7c263 100644 --- a/fs/partitions/mac.c +++ b/fs/partitions/mac.c | |||
@@ -27,7 +27,7 @@ static inline void mac_fix_string(char *stg, int len) | |||
27 | stg[i] = 0; | 27 | stg[i] = 0; |
28 | } | 28 | } |
29 | 29 | ||
30 | int mac_partition(struct parsed_partitions *state, struct block_device *bdev) | 30 | int mac_partition(struct parsed_partitions *state) |
31 | { | 31 | { |
32 | int slot = 1; | 32 | int slot = 1; |
33 | Sector sect; | 33 | Sector sect; |
@@ -42,7 +42,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
42 | struct mac_driver_desc *md; | 42 | struct mac_driver_desc *md; |
43 | 43 | ||
44 | /* Get 0th block and look at the first partition map entry. */ | 44 | /* Get 0th block and look at the first partition map entry. */ |
45 | md = (struct mac_driver_desc *) read_dev_sector(bdev, 0, §); | 45 | md = read_part_sector(state, 0, §); |
46 | if (!md) | 46 | if (!md) |
47 | return -1; | 47 | return -1; |
48 | if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC) { | 48 | if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC) { |
@@ -51,7 +51,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
51 | } | 51 | } |
52 | secsize = be16_to_cpu(md->block_size); | 52 | secsize = be16_to_cpu(md->block_size); |
53 | put_dev_sector(sect); | 53 | put_dev_sector(sect); |
54 | data = read_dev_sector(bdev, secsize/512, §); | 54 | data = read_part_sector(state, secsize/512, §); |
55 | if (!data) | 55 | if (!data) |
56 | return -1; | 56 | return -1; |
57 | part = (struct mac_partition *) (data + secsize%512); | 57 | part = (struct mac_partition *) (data + secsize%512); |
@@ -64,7 +64,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
64 | for (blk = 1; blk <= blocks_in_map; ++blk) { | 64 | for (blk = 1; blk <= blocks_in_map; ++blk) { |
65 | int pos = blk * secsize; | 65 | int pos = blk * secsize; |
66 | put_dev_sector(sect); | 66 | put_dev_sector(sect); |
67 | data = read_dev_sector(bdev, pos/512, §); | 67 | data = read_part_sector(state, pos/512, §); |
68 | if (!data) | 68 | if (!data) |
69 | return -1; | 69 | return -1; |
70 | part = (struct mac_partition *) (data + pos%512); | 70 | part = (struct mac_partition *) (data + pos%512); |
@@ -75,7 +75,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
75 | be32_to_cpu(part->block_count) * (secsize/512)); | 75 | be32_to_cpu(part->block_count) * (secsize/512)); |
76 | 76 | ||
77 | if (!strnicmp(part->type, "Linux_RAID", 10)) | 77 | if (!strnicmp(part->type, "Linux_RAID", 10)) |
78 | state->parts[slot].flags = 1; | 78 | state->parts[slot].flags = ADDPART_FLAG_RAID; |
79 | #ifdef CONFIG_PPC_PMAC | 79 | #ifdef CONFIG_PPC_PMAC |
80 | /* | 80 | /* |
81 | * If this is the first bootable partition, tell the | 81 | * If this is the first bootable partition, tell the |
@@ -123,7 +123,8 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
123 | } | 123 | } |
124 | #ifdef CONFIG_PPC_PMAC | 124 | #ifdef CONFIG_PPC_PMAC |
125 | if (found_root_goodness) | 125 | if (found_root_goodness) |
126 | note_bootable_part(bdev->bd_dev, found_root, found_root_goodness); | 126 | note_bootable_part(state->bdev->bd_dev, found_root, |
127 | found_root_goodness); | ||
127 | #endif | 128 | #endif |
128 | 129 | ||
129 | put_dev_sector(sect); | 130 | put_dev_sector(sect); |
diff --git a/fs/partitions/mac.h b/fs/partitions/mac.h index bbf26e1386fa..3c7d98436380 100644 --- a/fs/partitions/mac.h +++ b/fs/partitions/mac.h | |||
@@ -41,4 +41,4 @@ struct mac_driver_desc { | |||
41 | /* ... more stuff */ | 41 | /* ... more stuff */ |
42 | }; | 42 | }; |
43 | 43 | ||
44 | int mac_partition(struct parsed_partitions *state, struct block_device *bdev); | 44 | int mac_partition(struct parsed_partitions *state); |
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 90be97f1f5a8..15bfb7b1e044 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c | |||
@@ -64,7 +64,7 @@ msdos_magic_present(unsigned char *p) | |||
64 | #define AIX_LABEL_MAGIC2 0xC2 | 64 | #define AIX_LABEL_MAGIC2 0xC2 |
65 | #define AIX_LABEL_MAGIC3 0xD4 | 65 | #define AIX_LABEL_MAGIC3 0xD4 |
66 | #define AIX_LABEL_MAGIC4 0xC1 | 66 | #define AIX_LABEL_MAGIC4 0xC1 |
67 | static int aix_magic_present(unsigned char *p, struct block_device *bdev) | 67 | static int aix_magic_present(struct parsed_partitions *state, unsigned char *p) |
68 | { | 68 | { |
69 | struct partition *pt = (struct partition *) (p + 0x1be); | 69 | struct partition *pt = (struct partition *) (p + 0x1be); |
70 | Sector sect; | 70 | Sector sect; |
@@ -85,7 +85,7 @@ static int aix_magic_present(unsigned char *p, struct block_device *bdev) | |||
85 | is_extended_partition(pt)) | 85 | is_extended_partition(pt)) |
86 | return 0; | 86 | return 0; |
87 | } | 87 | } |
88 | d = read_dev_sector(bdev, 7, §); | 88 | d = read_part_sector(state, 7, §); |
89 | if (d) { | 89 | if (d) { |
90 | if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M') | 90 | if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M') |
91 | ret = 1; | 91 | ret = 1; |
@@ -105,15 +105,14 @@ static int aix_magic_present(unsigned char *p, struct block_device *bdev) | |||
105 | * only for the actual data partitions. | 105 | * only for the actual data partitions. |
106 | */ | 106 | */ |
107 | 107 | ||
108 | static void | 108 | static void parse_extended(struct parsed_partitions *state, |
109 | parse_extended(struct parsed_partitions *state, struct block_device *bdev, | 109 | sector_t first_sector, sector_t first_size) |
110 | sector_t first_sector, sector_t first_size) | ||
111 | { | 110 | { |
112 | struct partition *p; | 111 | struct partition *p; |
113 | Sector sect; | 112 | Sector sect; |
114 | unsigned char *data; | 113 | unsigned char *data; |
115 | sector_t this_sector, this_size; | 114 | sector_t this_sector, this_size; |
116 | sector_t sector_size = bdev_logical_block_size(bdev) / 512; | 115 | sector_t sector_size = bdev_logical_block_size(state->bdev) / 512; |
117 | int loopct = 0; /* number of links followed | 116 | int loopct = 0; /* number of links followed |
118 | without finding a data partition */ | 117 | without finding a data partition */ |
119 | int i; | 118 | int i; |
@@ -126,7 +125,7 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev, | |||
126 | return; | 125 | return; |
127 | if (state->next == state->limit) | 126 | if (state->next == state->limit) |
128 | return; | 127 | return; |
129 | data = read_dev_sector(bdev, this_sector, §); | 128 | data = read_part_sector(state, this_sector, §); |
130 | if (!data) | 129 | if (!data) |
131 | return; | 130 | return; |
132 | 131 | ||
@@ -198,9 +197,8 @@ done: | |||
198 | /* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also | 197 | /* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also |
199 | indicates linux swap. Be careful before believing this is Solaris. */ | 198 | indicates linux swap. Be careful before believing this is Solaris. */ |
200 | 199 | ||
201 | static void | 200 | static void parse_solaris_x86(struct parsed_partitions *state, |
202 | parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, | 201 | sector_t offset, sector_t size, int origin) |
203 | sector_t offset, sector_t size, int origin) | ||
204 | { | 202 | { |
205 | #ifdef CONFIG_SOLARIS_X86_PARTITION | 203 | #ifdef CONFIG_SOLARIS_X86_PARTITION |
206 | Sector sect; | 204 | Sector sect; |
@@ -208,7 +206,7 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, | |||
208 | int i; | 206 | int i; |
209 | short max_nparts; | 207 | short max_nparts; |
210 | 208 | ||
211 | v = (struct solaris_x86_vtoc *)read_dev_sector(bdev, offset+1, §); | 209 | v = read_part_sector(state, offset + 1, §); |
212 | if (!v) | 210 | if (!v) |
213 | return; | 211 | return; |
214 | if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) { | 212 | if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) { |
@@ -245,16 +243,15 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, | |||
245 | * Create devices for BSD partitions listed in a disklabel, under a | 243 | * Create devices for BSD partitions listed in a disklabel, under a |
246 | * dos-like partition. See parse_extended() for more information. | 244 | * dos-like partition. See parse_extended() for more information. |
247 | */ | 245 | */ |
248 | static void | 246 | static void parse_bsd(struct parsed_partitions *state, |
249 | parse_bsd(struct parsed_partitions *state, struct block_device *bdev, | 247 | sector_t offset, sector_t size, int origin, char *flavour, |
250 | sector_t offset, sector_t size, int origin, char *flavour, | 248 | int max_partitions) |
251 | int max_partitions) | ||
252 | { | 249 | { |
253 | Sector sect; | 250 | Sector sect; |
254 | struct bsd_disklabel *l; | 251 | struct bsd_disklabel *l; |
255 | struct bsd_partition *p; | 252 | struct bsd_partition *p; |
256 | 253 | ||
257 | l = (struct bsd_disklabel *)read_dev_sector(bdev, offset+1, §); | 254 | l = read_part_sector(state, offset + 1, §); |
258 | if (!l) | 255 | if (!l) |
259 | return; | 256 | return; |
260 | if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) { | 257 | if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) { |
@@ -291,33 +288,28 @@ parse_bsd(struct parsed_partitions *state, struct block_device *bdev, | |||
291 | } | 288 | } |
292 | #endif | 289 | #endif |
293 | 290 | ||
294 | static void | 291 | static void parse_freebsd(struct parsed_partitions *state, |
295 | parse_freebsd(struct parsed_partitions *state, struct block_device *bdev, | 292 | sector_t offset, sector_t size, int origin) |
296 | sector_t offset, sector_t size, int origin) | ||
297 | { | 293 | { |
298 | #ifdef CONFIG_BSD_DISKLABEL | 294 | #ifdef CONFIG_BSD_DISKLABEL |
299 | parse_bsd(state, bdev, offset, size, origin, | 295 | parse_bsd(state, offset, size, origin, "bsd", BSD_MAXPARTITIONS); |
300 | "bsd", BSD_MAXPARTITIONS); | ||
301 | #endif | 296 | #endif |
302 | } | 297 | } |
303 | 298 | ||
304 | static void | 299 | static void parse_netbsd(struct parsed_partitions *state, |
305 | parse_netbsd(struct parsed_partitions *state, struct block_device *bdev, | 300 | sector_t offset, sector_t size, int origin) |
306 | sector_t offset, sector_t size, int origin) | ||
307 | { | 301 | { |
308 | #ifdef CONFIG_BSD_DISKLABEL | 302 | #ifdef CONFIG_BSD_DISKLABEL |
309 | parse_bsd(state, bdev, offset, size, origin, | 303 | parse_bsd(state, offset, size, origin, "netbsd", BSD_MAXPARTITIONS); |
310 | "netbsd", BSD_MAXPARTITIONS); | ||
311 | #endif | 304 | #endif |
312 | } | 305 | } |
313 | 306 | ||
314 | static void | 307 | static void parse_openbsd(struct parsed_partitions *state, |
315 | parse_openbsd(struct parsed_partitions *state, struct block_device *bdev, | 308 | sector_t offset, sector_t size, int origin) |
316 | sector_t offset, sector_t size, int origin) | ||
317 | { | 309 | { |
318 | #ifdef CONFIG_BSD_DISKLABEL | 310 | #ifdef CONFIG_BSD_DISKLABEL |
319 | parse_bsd(state, bdev, offset, size, origin, | 311 | parse_bsd(state, offset, size, origin, "openbsd", |
320 | "openbsd", OPENBSD_MAXPARTITIONS); | 312 | OPENBSD_MAXPARTITIONS); |
321 | #endif | 313 | #endif |
322 | } | 314 | } |
323 | 315 | ||
@@ -325,16 +317,15 @@ parse_openbsd(struct parsed_partitions *state, struct block_device *bdev, | |||
325 | * Create devices for Unixware partitions listed in a disklabel, under a | 317 | * Create devices for Unixware partitions listed in a disklabel, under a |
326 | * dos-like partition. See parse_extended() for more information. | 318 | * dos-like partition. See parse_extended() for more information. |
327 | */ | 319 | */ |
328 | static void | 320 | static void parse_unixware(struct parsed_partitions *state, |
329 | parse_unixware(struct parsed_partitions *state, struct block_device *bdev, | 321 | sector_t offset, sector_t size, int origin) |
330 | sector_t offset, sector_t size, int origin) | ||
331 | { | 322 | { |
332 | #ifdef CONFIG_UNIXWARE_DISKLABEL | 323 | #ifdef CONFIG_UNIXWARE_DISKLABEL |
333 | Sector sect; | 324 | Sector sect; |
334 | struct unixware_disklabel *l; | 325 | struct unixware_disklabel *l; |
335 | struct unixware_slice *p; | 326 | struct unixware_slice *p; |
336 | 327 | ||
337 | l = (struct unixware_disklabel *)read_dev_sector(bdev, offset+29, §); | 328 | l = read_part_sector(state, offset + 29, §); |
338 | if (!l) | 329 | if (!l) |
339 | return; | 330 | return; |
340 | if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC || | 331 | if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC || |
@@ -365,9 +356,8 @@ parse_unixware(struct parsed_partitions *state, struct block_device *bdev, | |||
365 | * Anand Krishnamurthy <anandk@wiproge.med.ge.com> | 356 | * Anand Krishnamurthy <anandk@wiproge.med.ge.com> |
366 | * Rajeev V. Pillai <rajeevvp@yahoo.com> | 357 | * Rajeev V. Pillai <rajeevvp@yahoo.com> |
367 | */ | 358 | */ |
368 | static void | 359 | static void parse_minix(struct parsed_partitions *state, |
369 | parse_minix(struct parsed_partitions *state, struct block_device *bdev, | 360 | sector_t offset, sector_t size, int origin) |
370 | sector_t offset, sector_t size, int origin) | ||
371 | { | 361 | { |
372 | #ifdef CONFIG_MINIX_SUBPARTITION | 362 | #ifdef CONFIG_MINIX_SUBPARTITION |
373 | Sector sect; | 363 | Sector sect; |
@@ -375,7 +365,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev, | |||
375 | struct partition *p; | 365 | struct partition *p; |
376 | int i; | 366 | int i; |
377 | 367 | ||
378 | data = read_dev_sector(bdev, offset, §); | 368 | data = read_part_sector(state, offset, §); |
379 | if (!data) | 369 | if (!data) |
380 | return; | 370 | return; |
381 | 371 | ||
@@ -404,8 +394,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev, | |||
404 | 394 | ||
405 | static struct { | 395 | static struct { |
406 | unsigned char id; | 396 | unsigned char id; |
407 | void (*parse)(struct parsed_partitions *, struct block_device *, | 397 | void (*parse)(struct parsed_partitions *, sector_t, sector_t, int); |
408 | sector_t, sector_t, int); | ||
409 | } subtypes[] = { | 398 | } subtypes[] = { |
410 | {FREEBSD_PARTITION, parse_freebsd}, | 399 | {FREEBSD_PARTITION, parse_freebsd}, |
411 | {NETBSD_PARTITION, parse_netbsd}, | 400 | {NETBSD_PARTITION, parse_netbsd}, |
@@ -417,16 +406,16 @@ static struct { | |||
417 | {0, NULL}, | 406 | {0, NULL}, |
418 | }; | 407 | }; |
419 | 408 | ||
420 | int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) | 409 | int msdos_partition(struct parsed_partitions *state) |
421 | { | 410 | { |
422 | sector_t sector_size = bdev_logical_block_size(bdev) / 512; | 411 | sector_t sector_size = bdev_logical_block_size(state->bdev) / 512; |
423 | Sector sect; | 412 | Sector sect; |
424 | unsigned char *data; | 413 | unsigned char *data; |
425 | struct partition *p; | 414 | struct partition *p; |
426 | struct fat_boot_sector *fb; | 415 | struct fat_boot_sector *fb; |
427 | int slot; | 416 | int slot; |
428 | 417 | ||
429 | data = read_dev_sector(bdev, 0, §); | 418 | data = read_part_sector(state, 0, §); |
430 | if (!data) | 419 | if (!data) |
431 | return -1; | 420 | return -1; |
432 | if (!msdos_magic_present(data + 510)) { | 421 | if (!msdos_magic_present(data + 510)) { |
@@ -434,7 +423,7 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
434 | return 0; | 423 | return 0; |
435 | } | 424 | } |
436 | 425 | ||
437 | if (aix_magic_present(data, bdev)) { | 426 | if (aix_magic_present(state, data)) { |
438 | put_dev_sector(sect); | 427 | put_dev_sector(sect); |
439 | printk( " [AIX]"); | 428 | printk( " [AIX]"); |
440 | return 0; | 429 | return 0; |
@@ -503,13 +492,13 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
503 | put_partition(state, slot, start, n); | 492 | put_partition(state, slot, start, n); |
504 | 493 | ||
505 | printk(" <"); | 494 | printk(" <"); |
506 | parse_extended(state, bdev, start, size); | 495 | parse_extended(state, start, size); |
507 | printk(" >"); | 496 | printk(" >"); |
508 | continue; | 497 | continue; |
509 | } | 498 | } |
510 | put_partition(state, slot, start, size); | 499 | put_partition(state, slot, start, size); |
511 | if (SYS_IND(p) == LINUX_RAID_PARTITION) | 500 | if (SYS_IND(p) == LINUX_RAID_PARTITION) |
512 | state->parts[slot].flags = 1; | 501 | state->parts[slot].flags = ADDPART_FLAG_RAID; |
513 | if (SYS_IND(p) == DM6_PARTITION) | 502 | if (SYS_IND(p) == DM6_PARTITION) |
514 | printk("[DM]"); | 503 | printk("[DM]"); |
515 | if (SYS_IND(p) == EZD_PARTITION) | 504 | if (SYS_IND(p) == EZD_PARTITION) |
@@ -532,8 +521,8 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
532 | 521 | ||
533 | if (!subtypes[n].parse) | 522 | if (!subtypes[n].parse) |
534 | continue; | 523 | continue; |
535 | subtypes[n].parse(state, bdev, start_sect(p)*sector_size, | 524 | subtypes[n].parse(state, start_sect(p) * sector_size, |
536 | nr_sects(p)*sector_size, slot); | 525 | nr_sects(p) * sector_size, slot); |
537 | } | 526 | } |
538 | put_dev_sector(sect); | 527 | put_dev_sector(sect); |
539 | return 1; | 528 | return 1; |
diff --git a/fs/partitions/msdos.h b/fs/partitions/msdos.h index 01e5e0b6902d..38c781c490b3 100644 --- a/fs/partitions/msdos.h +++ b/fs/partitions/msdos.h | |||
@@ -4,5 +4,5 @@ | |||
4 | 4 | ||
5 | #define MSDOS_LABEL_MAGIC 0xAA55 | 5 | #define MSDOS_LABEL_MAGIC 0xAA55 |
6 | 6 | ||
7 | int msdos_partition(struct parsed_partitions *state, struct block_device *bdev); | 7 | int msdos_partition(struct parsed_partitions *state); |
8 | 8 | ||
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c index c05c17bc5df3..fc22b85d436a 100644 --- a/fs/partitions/osf.c +++ b/fs/partitions/osf.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include "check.h" | 10 | #include "check.h" |
11 | #include "osf.h" | 11 | #include "osf.h" |
12 | 12 | ||
13 | int osf_partition(struct parsed_partitions *state, struct block_device *bdev) | 13 | int osf_partition(struct parsed_partitions *state) |
14 | { | 14 | { |
15 | int i; | 15 | int i; |
16 | int slot = 1; | 16 | int slot = 1; |
@@ -49,7 +49,7 @@ int osf_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
49 | } * label; | 49 | } * label; |
50 | struct d_partition * partition; | 50 | struct d_partition * partition; |
51 | 51 | ||
52 | data = read_dev_sector(bdev, 0, §); | 52 | data = read_part_sector(state, 0, §); |
53 | if (!data) | 53 | if (!data) |
54 | return -1; | 54 | return -1; |
55 | 55 | ||
diff --git a/fs/partitions/osf.h b/fs/partitions/osf.h index 427b8eab314b..20ed2315ec16 100644 --- a/fs/partitions/osf.h +++ b/fs/partitions/osf.h | |||
@@ -4,4 +4,4 @@ | |||
4 | 4 | ||
5 | #define DISKLABELMAGIC (0x82564557UL) | 5 | #define DISKLABELMAGIC (0x82564557UL) |
6 | 6 | ||
7 | int osf_partition(struct parsed_partitions *state, struct block_device *bdev); | 7 | int osf_partition(struct parsed_partitions *state); |
diff --git a/fs/partitions/sgi.c b/fs/partitions/sgi.c index ed5ac83fe83a..43b1df9aa16c 100644 --- a/fs/partitions/sgi.c +++ b/fs/partitions/sgi.c | |||
@@ -27,7 +27,7 @@ struct sgi_disklabel { | |||
27 | __be32 _unused1; /* Padding */ | 27 | __be32 _unused1; /* Padding */ |
28 | }; | 28 | }; |
29 | 29 | ||
30 | int sgi_partition(struct parsed_partitions *state, struct block_device *bdev) | 30 | int sgi_partition(struct parsed_partitions *state) |
31 | { | 31 | { |
32 | int i, csum; | 32 | int i, csum; |
33 | __be32 magic; | 33 | __be32 magic; |
@@ -39,7 +39,7 @@ int sgi_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
39 | struct sgi_partition *p; | 39 | struct sgi_partition *p; |
40 | char b[BDEVNAME_SIZE]; | 40 | char b[BDEVNAME_SIZE]; |
41 | 41 | ||
42 | label = (struct sgi_disklabel *) read_dev_sector(bdev, 0, §); | 42 | label = read_part_sector(state, 0, §); |
43 | if (!label) | 43 | if (!label) |
44 | return -1; | 44 | return -1; |
45 | p = &label->partitions[0]; | 45 | p = &label->partitions[0]; |
@@ -57,7 +57,7 @@ int sgi_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
57 | } | 57 | } |
58 | if(csum) { | 58 | if(csum) { |
59 | printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n", | 59 | printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n", |
60 | bdevname(bdev, b)); | 60 | bdevname(state->bdev, b)); |
61 | put_dev_sector(sect); | 61 | put_dev_sector(sect); |
62 | return 0; | 62 | return 0; |
63 | } | 63 | } |
diff --git a/fs/partitions/sgi.h b/fs/partitions/sgi.h index 5d5595c09928..b9553ebdd5a9 100644 --- a/fs/partitions/sgi.h +++ b/fs/partitions/sgi.h | |||
@@ -2,7 +2,7 @@ | |||
2 | * fs/partitions/sgi.h | 2 | * fs/partitions/sgi.h |
3 | */ | 3 | */ |
4 | 4 | ||
5 | extern int sgi_partition(struct parsed_partitions *state, struct block_device *bdev); | 5 | extern int sgi_partition(struct parsed_partitions *state); |
6 | 6 | ||
7 | #define SGI_LABEL_MAGIC 0x0be5a941 | 7 | #define SGI_LABEL_MAGIC 0x0be5a941 |
8 | 8 | ||
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c index c95e6a62c01d..a32660e25f7f 100644 --- a/fs/partitions/sun.c +++ b/fs/partitions/sun.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include "check.h" | 10 | #include "check.h" |
11 | #include "sun.h" | 11 | #include "sun.h" |
12 | 12 | ||
13 | int sun_partition(struct parsed_partitions *state, struct block_device *bdev) | 13 | int sun_partition(struct parsed_partitions *state) |
14 | { | 14 | { |
15 | int i; | 15 | int i; |
16 | __be16 csum; | 16 | __be16 csum; |
@@ -61,7 +61,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
61 | int use_vtoc; | 61 | int use_vtoc; |
62 | int nparts; | 62 | int nparts; |
63 | 63 | ||
64 | label = (struct sun_disklabel *)read_dev_sector(bdev, 0, §); | 64 | label = read_part_sector(state, 0, §); |
65 | if (!label) | 65 | if (!label) |
66 | return -1; | 66 | return -1; |
67 | 67 | ||
@@ -78,7 +78,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
78 | csum ^= *ush--; | 78 | csum ^= *ush--; |
79 | if (csum) { | 79 | if (csum) { |
80 | printk("Dev %s Sun disklabel: Csum bad, label corrupted\n", | 80 | printk("Dev %s Sun disklabel: Csum bad, label corrupted\n", |
81 | bdevname(bdev, b)); | 81 | bdevname(state->bdev, b)); |
82 | put_dev_sector(sect); | 82 | put_dev_sector(sect); |
83 | return 0; | 83 | return 0; |
84 | } | 84 | } |
diff --git a/fs/partitions/sun.h b/fs/partitions/sun.h index 7f864d1f86d4..2424baa8319f 100644 --- a/fs/partitions/sun.h +++ b/fs/partitions/sun.h | |||
@@ -5,4 +5,4 @@ | |||
5 | #define SUN_LABEL_MAGIC 0xDABE | 5 | #define SUN_LABEL_MAGIC 0xDABE |
6 | #define SUN_VTOC_SANITY 0x600DDEEE | 6 | #define SUN_VTOC_SANITY 0x600DDEEE |
7 | 7 | ||
8 | int sun_partition(struct parsed_partitions *state, struct block_device *bdev); | 8 | int sun_partition(struct parsed_partitions *state); |
diff --git a/fs/partitions/sysv68.c b/fs/partitions/sysv68.c index 4eba27b78643..9030c864428e 100644 --- a/fs/partitions/sysv68.c +++ b/fs/partitions/sysv68.c | |||
@@ -46,7 +46,7 @@ struct slice { | |||
46 | }; | 46 | }; |
47 | 47 | ||
48 | 48 | ||
49 | int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev) | 49 | int sysv68_partition(struct parsed_partitions *state) |
50 | { | 50 | { |
51 | int i, slices; | 51 | int i, slices; |
52 | int slot = 1; | 52 | int slot = 1; |
@@ -55,7 +55,7 @@ int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
55 | struct dkblk0 *b; | 55 | struct dkblk0 *b; |
56 | struct slice *slice; | 56 | struct slice *slice; |
57 | 57 | ||
58 | data = read_dev_sector(bdev, 0, §); | 58 | data = read_part_sector(state, 0, §); |
59 | if (!data) | 59 | if (!data) |
60 | return -1; | 60 | return -1; |
61 | 61 | ||
@@ -68,7 +68,7 @@ int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
68 | i = be32_to_cpu(b->dk_ios.ios_slcblk); | 68 | i = be32_to_cpu(b->dk_ios.ios_slcblk); |
69 | put_dev_sector(sect); | 69 | put_dev_sector(sect); |
70 | 70 | ||
71 | data = read_dev_sector(bdev, i, §); | 71 | data = read_part_sector(state, i, §); |
72 | if (!data) | 72 | if (!data) |
73 | return -1; | 73 | return -1; |
74 | 74 | ||
diff --git a/fs/partitions/sysv68.h b/fs/partitions/sysv68.h index fa733f68431b..bf2f5ffa97ac 100644 --- a/fs/partitions/sysv68.h +++ b/fs/partitions/sysv68.h | |||
@@ -1 +1 @@ | |||
extern int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev); | extern int sysv68_partition(struct parsed_partitions *state); | ||
diff --git a/fs/partitions/ultrix.c b/fs/partitions/ultrix.c index ec852c11dce4..db9eef260364 100644 --- a/fs/partitions/ultrix.c +++ b/fs/partitions/ultrix.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include "check.h" | 9 | #include "check.h" |
10 | #include "ultrix.h" | 10 | #include "ultrix.h" |
11 | 11 | ||
12 | int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev) | 12 | int ultrix_partition(struct parsed_partitions *state) |
13 | { | 13 | { |
14 | int i; | 14 | int i; |
15 | Sector sect; | 15 | Sector sect; |
@@ -26,7 +26,7 @@ int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
26 | #define PT_MAGIC 0x032957 /* Partition magic number */ | 26 | #define PT_MAGIC 0x032957 /* Partition magic number */ |
27 | #define PT_VALID 1 /* Indicates if struct is valid */ | 27 | #define PT_VALID 1 /* Indicates if struct is valid */ |
28 | 28 | ||
29 | data = read_dev_sector(bdev, (16384 - sizeof(*label))/512, §); | 29 | data = read_part_sector(state, (16384 - sizeof(*label))/512, §); |
30 | if (!data) | 30 | if (!data) |
31 | return -1; | 31 | return -1; |
32 | 32 | ||
diff --git a/fs/partitions/ultrix.h b/fs/partitions/ultrix.h index a74bf8e2d370..a3cc00b2bded 100644 --- a/fs/partitions/ultrix.h +++ b/fs/partitions/ultrix.h | |||
@@ -2,4 +2,4 @@ | |||
2 | * fs/partitions/ultrix.h | 2 | * fs/partitions/ultrix.h |
3 | */ | 3 | */ |
4 | 4 | ||
5 | int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev); | 5 | int ultrix_partition(struct parsed_partitions *state); |
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/log2.h> | ||
14 | #include <linux/mount.h> | 15 | #include <linux/mount.h> |
15 | #include <linux/pipe_fs_i.h> | 16 | #include <linux/pipe_fs_i.h> |
16 | #include <linux/uio.h> | 17 | #include <linux/uio.h> |
@@ -18,11 +19,18 @@ | |||
18 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
19 | #include <linux/audit.h> | 20 | #include <linux/audit.h> |
20 | #include <linux/syscalls.h> | 21 | #include <linux/syscalls.h> |
22 | #include <linux/fcntl.h> | ||
21 | 23 | ||
22 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
23 | #include <asm/ioctls.h> | 25 | #include <asm/ioctls.h> |
24 | 26 | ||
25 | /* | 27 | /* |
28 | * The max size that a non-root user is allowed to grow the pipe. Can | ||
29 | * be set by root in /proc/sys/fs/pipe-max-pages | ||
30 | */ | ||
31 | unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; | ||
32 | |||
33 | /* | ||
26 | * We use a start+len construction, which provides full use of the | 34 | * We use a start+len construction, which provides full use of the |
27 | * allocated memory. | 35 | * allocated memory. |
28 | * -- Florian Coosmann (FGC) | 36 | * -- Florian Coosmann (FGC) |
@@ -390,7 +398,7 @@ redo: | |||
390 | if (!buf->len) { | 398 | if (!buf->len) { |
391 | buf->ops = NULL; | 399 | buf->ops = NULL; |
392 | ops->release(pipe, buf); | 400 | ops->release(pipe, buf); |
393 | curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); | 401 | curbuf = (curbuf + 1) & (pipe->buffers - 1); |
394 | pipe->curbuf = curbuf; | 402 | pipe->curbuf = curbuf; |
395 | pipe->nrbufs = --bufs; | 403 | pipe->nrbufs = --bufs; |
396 | do_wakeup = 1; | 404 | do_wakeup = 1; |
@@ -472,7 +480,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, | |||
472 | chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ | 480 | chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ |
473 | if (pipe->nrbufs && chars != 0) { | 481 | if (pipe->nrbufs && chars != 0) { |
474 | int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & | 482 | int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & |
475 | (PIPE_BUFFERS-1); | 483 | (pipe->buffers - 1); |
476 | struct pipe_buffer *buf = pipe->bufs + lastbuf; | 484 | struct pipe_buffer *buf = pipe->bufs + lastbuf; |
477 | const struct pipe_buf_operations *ops = buf->ops; | 485 | const struct pipe_buf_operations *ops = buf->ops; |
478 | int offset = buf->offset + buf->len; | 486 | int offset = buf->offset + buf->len; |
@@ -518,8 +526,8 @@ redo1: | |||
518 | break; | 526 | break; |
519 | } | 527 | } |
520 | bufs = pipe->nrbufs; | 528 | bufs = pipe->nrbufs; |
521 | if (bufs < PIPE_BUFFERS) { | 529 | if (bufs < pipe->buffers) { |
522 | int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); | 530 | int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1); |
523 | struct pipe_buffer *buf = pipe->bufs + newbuf; | 531 | struct pipe_buffer *buf = pipe->bufs + newbuf; |
524 | struct page *page = pipe->tmp_page; | 532 | struct page *page = pipe->tmp_page; |
525 | char *src; | 533 | char *src; |
@@ -580,7 +588,7 @@ redo2: | |||
580 | if (!total_len) | 588 | if (!total_len) |
581 | break; | 589 | break; |
582 | } | 590 | } |
583 | if (bufs < PIPE_BUFFERS) | 591 | if (bufs < pipe->buffers) |
584 | continue; | 592 | continue; |
585 | if (filp->f_flags & O_NONBLOCK) { | 593 | if (filp->f_flags & O_NONBLOCK) { |
586 | if (!ret) | 594 | if (!ret) |
@@ -640,7 +648,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
640 | nrbufs = pipe->nrbufs; | 648 | nrbufs = pipe->nrbufs; |
641 | while (--nrbufs >= 0) { | 649 | while (--nrbufs >= 0) { |
642 | count += pipe->bufs[buf].len; | 650 | count += pipe->bufs[buf].len; |
643 | buf = (buf+1) & (PIPE_BUFFERS-1); | 651 | buf = (buf+1) & (pipe->buffers - 1); |
644 | } | 652 | } |
645 | mutex_unlock(&inode->i_mutex); | 653 | mutex_unlock(&inode->i_mutex); |
646 | 654 | ||
@@ -671,7 +679,7 @@ pipe_poll(struct file *filp, poll_table *wait) | |||
671 | } | 679 | } |
672 | 680 | ||
673 | if (filp->f_mode & FMODE_WRITE) { | 681 | if (filp->f_mode & FMODE_WRITE) { |
674 | mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; | 682 | mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0; |
675 | /* | 683 | /* |
676 | * Most Unices do not set POLLERR for FIFOs but on Linux they | 684 | * Most Unices do not set POLLERR for FIFOs but on Linux they |
677 | * behave exactly like pipes for poll(). | 685 | * behave exactly like pipes for poll(). |
@@ -877,25 +885,32 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode) | |||
877 | 885 | ||
878 | pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); | 886 | pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); |
879 | if (pipe) { | 887 | if (pipe) { |
880 | init_waitqueue_head(&pipe->wait); | 888 | pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL); |
881 | pipe->r_counter = pipe->w_counter = 1; | 889 | if (pipe->bufs) { |
882 | pipe->inode = inode; | 890 | init_waitqueue_head(&pipe->wait); |
891 | pipe->r_counter = pipe->w_counter = 1; | ||
892 | pipe->inode = inode; | ||
893 | pipe->buffers = PIPE_DEF_BUFFERS; | ||
894 | return pipe; | ||
895 | } | ||
896 | kfree(pipe); | ||
883 | } | 897 | } |
884 | 898 | ||
885 | return pipe; | 899 | return NULL; |
886 | } | 900 | } |
887 | 901 | ||
888 | void __free_pipe_info(struct pipe_inode_info *pipe) | 902 | void __free_pipe_info(struct pipe_inode_info *pipe) |
889 | { | 903 | { |
890 | int i; | 904 | int i; |
891 | 905 | ||
892 | for (i = 0; i < PIPE_BUFFERS; i++) { | 906 | for (i = 0; i < pipe->buffers; i++) { |
893 | struct pipe_buffer *buf = pipe->bufs + i; | 907 | struct pipe_buffer *buf = pipe->bufs + i; |
894 | if (buf->ops) | 908 | if (buf->ops) |
895 | buf->ops->release(pipe, buf); | 909 | buf->ops->release(pipe, buf); |
896 | } | 910 | } |
897 | if (pipe->tmp_page) | 911 | if (pipe->tmp_page) |
898 | __free_page(pipe->tmp_page); | 912 | __free_page(pipe->tmp_page); |
913 | kfree(pipe->bufs); | ||
899 | kfree(pipe); | 914 | kfree(pipe); |
900 | } | 915 | } |
901 | 916 | ||
@@ -1094,6 +1109,89 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes) | |||
1094 | } | 1109 | } |
1095 | 1110 | ||
1096 | /* | 1111 | /* |
1112 | * Allocate a new array of pipe buffers and copy the info over. Returns the | ||
1113 | * pipe size if successful, or return -ERROR on error. | ||
1114 | */ | ||
1115 | static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) | ||
1116 | { | ||
1117 | struct pipe_buffer *bufs; | ||
1118 | |||
1119 | /* | ||
1120 | * Must be a power-of-2 currently | ||
1121 | */ | ||
1122 | if (!is_power_of_2(arg)) | ||
1123 | return -EINVAL; | ||
1124 | |||
1125 | /* | ||
1126 | * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't | ||
1127 | * expect a lot of shrink+grow operations, just free and allocate | ||
1128 | * again like we would do for growing. If the pipe currently | ||
1129 | * contains more buffers than arg, then return busy. | ||
1130 | */ | ||
1131 | if (arg < pipe->nrbufs) | ||
1132 | return -EBUSY; | ||
1133 | |||
1134 | bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); | ||
1135 | if (unlikely(!bufs)) | ||
1136 | return -ENOMEM; | ||
1137 | |||
1138 | /* | ||
1139 | * The pipe array wraps around, so just start the new one at zero | ||
1140 | * and adjust the indexes. | ||
1141 | */ | ||
1142 | if (pipe->nrbufs) { | ||
1143 | const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1); | ||
1144 | const unsigned int head = pipe->nrbufs - tail; | ||
1145 | |||
1146 | if (head) | ||
1147 | memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer)); | ||
1148 | if (tail) | ||
1149 | memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer)); | ||
1150 | } | ||
1151 | |||
1152 | pipe->curbuf = 0; | ||
1153 | kfree(pipe->bufs); | ||
1154 | pipe->bufs = bufs; | ||
1155 | pipe->buffers = arg; | ||
1156 | return arg; | ||
1157 | } | ||
1158 | |||
1159 | long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | ||
1160 | { | ||
1161 | struct pipe_inode_info *pipe; | ||
1162 | long ret; | ||
1163 | |||
1164 | pipe = file->f_path.dentry->d_inode->i_pipe; | ||
1165 | if (!pipe) | ||
1166 | return -EBADF; | ||
1167 | |||
1168 | mutex_lock(&pipe->inode->i_mutex); | ||
1169 | |||
1170 | switch (cmd) { | ||
1171 | case F_SETPIPE_SZ: | ||
1172 | if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) | ||
1173 | return -EINVAL; | ||
1174 | /* | ||
1175 | * The pipe needs to be at least 2 pages large to | ||
1176 | * guarantee POSIX behaviour. | ||
1177 | */ | ||
1178 | if (arg < 2) | ||
1179 | return -EINVAL; | ||
1180 | ret = pipe_set_size(pipe, arg); | ||
1181 | break; | ||
1182 | case F_GETPIPE_SZ: | ||
1183 | ret = pipe->buffers; | ||
1184 | break; | ||
1185 | default: | ||
1186 | ret = -EINVAL; | ||
1187 | break; | ||
1188 | } | ||
1189 | |||
1190 | mutex_unlock(&pipe->inode->i_mutex); | ||
1191 | return ret; | ||
1192 | } | ||
1193 | |||
1194 | /* | ||
1097 | * pipefs should _never_ be mounted by userland - too much of security hassle, | 1195 | * pipefs should _never_ be mounted by userland - too much of security hassle, |
1098 | * no real gain from having the whole whorehouse mounted. So we don't need | 1196 | * no real gain from having the whole whorehouse mounted. So we don't need |
1099 | * any operations on the root directory. However, we need a non-trivial | 1197 | * any operations on the root directory. However, we need a non-trivial |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 47f5b145f56e..aea1d3f1ffb5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -634,6 +634,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
634 | return err; | 634 | return err; |
635 | } | 635 | } |
636 | 636 | ||
637 | #ifdef CONFIG_HUGETLB_PAGE | ||
637 | static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) | 638 | static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) |
638 | { | 639 | { |
639 | u64 pme = 0; | 640 | u64 pme = 0; |
@@ -664,6 +665,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
664 | 665 | ||
665 | return err; | 666 | return err; |
666 | } | 667 | } |
668 | #endif /* HUGETLB_PAGE */ | ||
667 | 669 | ||
668 | /* | 670 | /* |
669 | * /proc/pid/pagemap - an array mapping virtual pages to pfns | 671 | * /proc/pid/pagemap - an array mapping virtual pages to pfns |
@@ -733,7 +735,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
733 | 735 | ||
734 | pagemap_walk.pmd_entry = pagemap_pte_range; | 736 | pagemap_walk.pmd_entry = pagemap_pte_range; |
735 | pagemap_walk.pte_hole = pagemap_pte_hole; | 737 | pagemap_walk.pte_hole = pagemap_pte_hole; |
738 | #ifdef CONFIG_HUGETLB_PAGE | ||
736 | pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; | 739 | pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; |
740 | #endif | ||
737 | pagemap_walk.mm = mm; | 741 | pagemap_walk.mm = mm; |
738 | pagemap_walk.private = ± | 742 | pagemap_walk.private = ± |
739 | 743 | ||
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 788b5802a7ce..655a4c52b8c3 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -82,7 +82,7 @@ | |||
82 | 82 | ||
83 | /* | 83 | /* |
84 | * There are three quota SMP locks. dq_list_lock protects all lists with quotas | 84 | * There are three quota SMP locks. dq_list_lock protects all lists with quotas |
85 | * and quota formats, dqstats structure containing statistics about the lists | 85 | * and quota formats. |
86 | * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and | 86 | * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and |
87 | * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. | 87 | * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. |
88 | * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly | 88 | * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly |
@@ -132,7 +132,9 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock); | |||
132 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); | 132 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); |
133 | EXPORT_SYMBOL(dq_data_lock); | 133 | EXPORT_SYMBOL(dq_data_lock); |
134 | 134 | ||
135 | #if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING) | ||
135 | static char *quotatypes[] = INITQFNAMES; | 136 | static char *quotatypes[] = INITQFNAMES; |
137 | #endif | ||
136 | static struct quota_format_type *quota_formats; /* List of registered formats */ | 138 | static struct quota_format_type *quota_formats; /* List of registered formats */ |
137 | static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; | 139 | static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; |
138 | 140 | ||
@@ -226,6 +228,10 @@ static struct hlist_head *dquot_hash; | |||
226 | 228 | ||
227 | struct dqstats dqstats; | 229 | struct dqstats dqstats; |
228 | EXPORT_SYMBOL(dqstats); | 230 | EXPORT_SYMBOL(dqstats); |
231 | #ifdef CONFIG_SMP | ||
232 | struct dqstats *dqstats_pcpu; | ||
233 | EXPORT_SYMBOL(dqstats_pcpu); | ||
234 | #endif | ||
229 | 235 | ||
230 | static qsize_t inode_get_rsv_space(struct inode *inode); | 236 | static qsize_t inode_get_rsv_space(struct inode *inode); |
231 | static void __dquot_initialize(struct inode *inode, int type); | 237 | static void __dquot_initialize(struct inode *inode, int type); |
@@ -273,7 +279,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, | |||
273 | static inline void put_dquot_last(struct dquot *dquot) | 279 | static inline void put_dquot_last(struct dquot *dquot) |
274 | { | 280 | { |
275 | list_add_tail(&dquot->dq_free, &free_dquots); | 281 | list_add_tail(&dquot->dq_free, &free_dquots); |
276 | dqstats.free_dquots++; | 282 | dqstats_inc(DQST_FREE_DQUOTS); |
277 | } | 283 | } |
278 | 284 | ||
279 | static inline void remove_free_dquot(struct dquot *dquot) | 285 | static inline void remove_free_dquot(struct dquot *dquot) |
@@ -281,7 +287,7 @@ static inline void remove_free_dquot(struct dquot *dquot) | |||
281 | if (list_empty(&dquot->dq_free)) | 287 | if (list_empty(&dquot->dq_free)) |
282 | return; | 288 | return; |
283 | list_del_init(&dquot->dq_free); | 289 | list_del_init(&dquot->dq_free); |
284 | dqstats.free_dquots--; | 290 | dqstats_dec(DQST_FREE_DQUOTS); |
285 | } | 291 | } |
286 | 292 | ||
287 | static inline void put_inuse(struct dquot *dquot) | 293 | static inline void put_inuse(struct dquot *dquot) |
@@ -289,12 +295,12 @@ static inline void put_inuse(struct dquot *dquot) | |||
289 | /* We add to the back of inuse list so we don't have to restart | 295 | /* We add to the back of inuse list so we don't have to restart |
290 | * when traversing this list and we block */ | 296 | * when traversing this list and we block */ |
291 | list_add_tail(&dquot->dq_inuse, &inuse_list); | 297 | list_add_tail(&dquot->dq_inuse, &inuse_list); |
292 | dqstats.allocated_dquots++; | 298 | dqstats_inc(DQST_ALLOC_DQUOTS); |
293 | } | 299 | } |
294 | 300 | ||
295 | static inline void remove_inuse(struct dquot *dquot) | 301 | static inline void remove_inuse(struct dquot *dquot) |
296 | { | 302 | { |
297 | dqstats.allocated_dquots--; | 303 | dqstats_dec(DQST_ALLOC_DQUOTS); |
298 | list_del(&dquot->dq_inuse); | 304 | list_del(&dquot->dq_inuse); |
299 | } | 305 | } |
300 | /* | 306 | /* |
@@ -317,14 +323,23 @@ static inline int mark_dquot_dirty(struct dquot *dquot) | |||
317 | return dquot->dq_sb->dq_op->mark_dirty(dquot); | 323 | return dquot->dq_sb->dq_op->mark_dirty(dquot); |
318 | } | 324 | } |
319 | 325 | ||
326 | /* Mark dquot dirty in atomic manner, and return it's old dirty flag state */ | ||
320 | int dquot_mark_dquot_dirty(struct dquot *dquot) | 327 | int dquot_mark_dquot_dirty(struct dquot *dquot) |
321 | { | 328 | { |
329 | int ret = 1; | ||
330 | |||
331 | /* If quota is dirty already, we don't have to acquire dq_list_lock */ | ||
332 | if (test_bit(DQ_MOD_B, &dquot->dq_flags)) | ||
333 | return 1; | ||
334 | |||
322 | spin_lock(&dq_list_lock); | 335 | spin_lock(&dq_list_lock); |
323 | if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) | 336 | if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) { |
324 | list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> | 337 | list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> |
325 | info[dquot->dq_type].dqi_dirty_list); | 338 | info[dquot->dq_type].dqi_dirty_list); |
339 | ret = 0; | ||
340 | } | ||
326 | spin_unlock(&dq_list_lock); | 341 | spin_unlock(&dq_list_lock); |
327 | return 0; | 342 | return ret; |
328 | } | 343 | } |
329 | EXPORT_SYMBOL(dquot_mark_dquot_dirty); | 344 | EXPORT_SYMBOL(dquot_mark_dquot_dirty); |
330 | 345 | ||
@@ -550,8 +565,8 @@ int dquot_scan_active(struct super_block *sb, | |||
550 | continue; | 565 | continue; |
551 | /* Now we have active dquot so we can just increase use count */ | 566 | /* Now we have active dquot so we can just increase use count */ |
552 | atomic_inc(&dquot->dq_count); | 567 | atomic_inc(&dquot->dq_count); |
553 | dqstats.lookups++; | ||
554 | spin_unlock(&dq_list_lock); | 568 | spin_unlock(&dq_list_lock); |
569 | dqstats_inc(DQST_LOOKUPS); | ||
555 | dqput(old_dquot); | 570 | dqput(old_dquot); |
556 | old_dquot = dquot; | 571 | old_dquot = dquot; |
557 | ret = fn(dquot, priv); | 572 | ret = fn(dquot, priv); |
@@ -596,8 +611,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait) | |||
596 | * holding reference so we can safely just increase | 611 | * holding reference so we can safely just increase |
597 | * use count */ | 612 | * use count */ |
598 | atomic_inc(&dquot->dq_count); | 613 | atomic_inc(&dquot->dq_count); |
599 | dqstats.lookups++; | ||
600 | spin_unlock(&dq_list_lock); | 614 | spin_unlock(&dq_list_lock); |
615 | dqstats_inc(DQST_LOOKUPS); | ||
601 | sb->dq_op->write_dquot(dquot); | 616 | sb->dq_op->write_dquot(dquot); |
602 | dqput(dquot); | 617 | dqput(dquot); |
603 | spin_lock(&dq_list_lock); | 618 | spin_lock(&dq_list_lock); |
@@ -609,9 +624,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait) | |||
609 | if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt) | 624 | if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt) |
610 | && info_dirty(&dqopt->info[cnt])) | 625 | && info_dirty(&dqopt->info[cnt])) |
611 | sb->dq_op->write_info(sb, cnt); | 626 | sb->dq_op->write_info(sb, cnt); |
612 | spin_lock(&dq_list_lock); | 627 | dqstats_inc(DQST_SYNCS); |
613 | dqstats.syncs++; | ||
614 | spin_unlock(&dq_list_lock); | ||
615 | mutex_unlock(&dqopt->dqonoff_mutex); | 628 | mutex_unlock(&dqopt->dqonoff_mutex); |
616 | 629 | ||
617 | if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)) | 630 | if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)) |
@@ -663,6 +676,22 @@ static void prune_dqcache(int count) | |||
663 | } | 676 | } |
664 | } | 677 | } |
665 | 678 | ||
679 | static int dqstats_read(unsigned int type) | ||
680 | { | ||
681 | int count = 0; | ||
682 | #ifdef CONFIG_SMP | ||
683 | int cpu; | ||
684 | for_each_possible_cpu(cpu) | ||
685 | count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type]; | ||
686 | /* Statistics reading is racy, but absolute accuracy isn't required */ | ||
687 | if (count < 0) | ||
688 | count = 0; | ||
689 | #else | ||
690 | count = dqstats.stat[type]; | ||
691 | #endif | ||
692 | return count; | ||
693 | } | ||
694 | |||
666 | /* | 695 | /* |
667 | * This is called from kswapd when we think we need some | 696 | * This is called from kswapd when we think we need some |
668 | * more memory | 697 | * more memory |
@@ -675,7 +704,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) | |||
675 | prune_dqcache(nr); | 704 | prune_dqcache(nr); |
676 | spin_unlock(&dq_list_lock); | 705 | spin_unlock(&dq_list_lock); |
677 | } | 706 | } |
678 | return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure; | 707 | return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure; |
679 | } | 708 | } |
680 | 709 | ||
681 | static struct shrinker dqcache_shrinker = { | 710 | static struct shrinker dqcache_shrinker = { |
@@ -703,10 +732,7 @@ void dqput(struct dquot *dquot) | |||
703 | BUG(); | 732 | BUG(); |
704 | } | 733 | } |
705 | #endif | 734 | #endif |
706 | 735 | dqstats_inc(DQST_DROPS); | |
707 | spin_lock(&dq_list_lock); | ||
708 | dqstats.drops++; | ||
709 | spin_unlock(&dq_list_lock); | ||
710 | we_slept: | 736 | we_slept: |
711 | spin_lock(&dq_list_lock); | 737 | spin_lock(&dq_list_lock); |
712 | if (atomic_read(&dquot->dq_count) > 1) { | 738 | if (atomic_read(&dquot->dq_count) > 1) { |
@@ -823,15 +849,15 @@ we_slept: | |||
823 | put_inuse(dquot); | 849 | put_inuse(dquot); |
824 | /* hash it first so it can be found */ | 850 | /* hash it first so it can be found */ |
825 | insert_dquot_hash(dquot); | 851 | insert_dquot_hash(dquot); |
826 | dqstats.lookups++; | ||
827 | spin_unlock(&dq_list_lock); | 852 | spin_unlock(&dq_list_lock); |
853 | dqstats_inc(DQST_LOOKUPS); | ||
828 | } else { | 854 | } else { |
829 | if (!atomic_read(&dquot->dq_count)) | 855 | if (!atomic_read(&dquot->dq_count)) |
830 | remove_free_dquot(dquot); | 856 | remove_free_dquot(dquot); |
831 | atomic_inc(&dquot->dq_count); | 857 | atomic_inc(&dquot->dq_count); |
832 | dqstats.cache_hits++; | ||
833 | dqstats.lookups++; | ||
834 | spin_unlock(&dq_list_lock); | 858 | spin_unlock(&dq_list_lock); |
859 | dqstats_inc(DQST_CACHE_HITS); | ||
860 | dqstats_inc(DQST_LOOKUPS); | ||
835 | } | 861 | } |
836 | /* Wait for dq_lock - after this we know that either dquot_release() is | 862 | /* Wait for dq_lock - after this we know that either dquot_release() is |
837 | * already finished or it will be canceled due to dq_count > 1 test */ | 863 | * already finished or it will be canceled due to dq_count > 1 test */ |
@@ -1677,16 +1703,19 @@ EXPORT_SYMBOL(dquot_free_inode); | |||
1677 | 1703 | ||
1678 | /* | 1704 | /* |
1679 | * Transfer the number of inode and blocks from one diskquota to an other. | 1705 | * Transfer the number of inode and blocks from one diskquota to an other. |
1706 | * On success, dquot references in transfer_to are consumed and references | ||
1707 | * to original dquots that need to be released are placed there. On failure, | ||
1708 | * references are kept untouched. | ||
1680 | * | 1709 | * |
1681 | * This operation can block, but only after everything is updated | 1710 | * This operation can block, but only after everything is updated |
1682 | * A transaction must be started when entering this function. | 1711 | * A transaction must be started when entering this function. |
1712 | * | ||
1683 | */ | 1713 | */ |
1684 | static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask) | 1714 | int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) |
1685 | { | 1715 | { |
1686 | qsize_t space, cur_space; | 1716 | qsize_t space, cur_space; |
1687 | qsize_t rsv_space = 0; | 1717 | qsize_t rsv_space = 0; |
1688 | struct dquot *transfer_from[MAXQUOTAS]; | 1718 | struct dquot *transfer_from[MAXQUOTAS] = {}; |
1689 | struct dquot *transfer_to[MAXQUOTAS]; | ||
1690 | int cnt, ret = 0; | 1719 | int cnt, ret = 0; |
1691 | char warntype_to[MAXQUOTAS]; | 1720 | char warntype_to[MAXQUOTAS]; |
1692 | char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; | 1721 | char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; |
@@ -1696,19 +1725,12 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask | |||
1696 | if (IS_NOQUOTA(inode)) | 1725 | if (IS_NOQUOTA(inode)) |
1697 | return 0; | 1726 | return 0; |
1698 | /* Initialize the arrays */ | 1727 | /* Initialize the arrays */ |
1699 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | 1728 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) |
1700 | transfer_from[cnt] = NULL; | ||
1701 | transfer_to[cnt] = NULL; | ||
1702 | warntype_to[cnt] = QUOTA_NL_NOWARN; | 1729 | warntype_to[cnt] = QUOTA_NL_NOWARN; |
1703 | } | ||
1704 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | ||
1705 | if (mask & (1 << cnt)) | ||
1706 | transfer_to[cnt] = dqget(inode->i_sb, chid[cnt], cnt); | ||
1707 | } | ||
1708 | down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1730 | down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1709 | if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ | 1731 | if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ |
1710 | up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1732 | up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1711 | goto put_all; | 1733 | return 0; |
1712 | } | 1734 | } |
1713 | spin_lock(&dq_data_lock); | 1735 | spin_lock(&dq_data_lock); |
1714 | cur_space = inode_get_bytes(inode); | 1736 | cur_space = inode_get_bytes(inode); |
@@ -1760,47 +1782,41 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask | |||
1760 | 1782 | ||
1761 | mark_all_dquot_dirty(transfer_from); | 1783 | mark_all_dquot_dirty(transfer_from); |
1762 | mark_all_dquot_dirty(transfer_to); | 1784 | mark_all_dquot_dirty(transfer_to); |
1763 | /* The reference we got is transferred to the inode */ | 1785 | /* Pass back references to put */ |
1764 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) | 1786 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) |
1765 | transfer_to[cnt] = NULL; | 1787 | transfer_to[cnt] = transfer_from[cnt]; |
1766 | warn_put_all: | 1788 | warn: |
1767 | flush_warnings(transfer_to, warntype_to); | 1789 | flush_warnings(transfer_to, warntype_to); |
1768 | flush_warnings(transfer_from, warntype_from_inodes); | 1790 | flush_warnings(transfer_from, warntype_from_inodes); |
1769 | flush_warnings(transfer_from, warntype_from_space); | 1791 | flush_warnings(transfer_from, warntype_from_space); |
1770 | put_all: | ||
1771 | dqput_all(transfer_from); | ||
1772 | dqput_all(transfer_to); | ||
1773 | return ret; | 1792 | return ret; |
1774 | over_quota: | 1793 | over_quota: |
1775 | spin_unlock(&dq_data_lock); | 1794 | spin_unlock(&dq_data_lock); |
1776 | up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1795 | up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1777 | /* Clear dquot pointers we don't want to dqput() */ | 1796 | goto warn; |
1778 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) | ||
1779 | transfer_from[cnt] = NULL; | ||
1780 | goto warn_put_all; | ||
1781 | } | 1797 | } |
1798 | EXPORT_SYMBOL(__dquot_transfer); | ||
1782 | 1799 | ||
1783 | /* Wrapper for transferring ownership of an inode for uid/gid only | 1800 | /* Wrapper for transferring ownership of an inode for uid/gid only |
1784 | * Called from FSXXX_setattr() | 1801 | * Called from FSXXX_setattr() |
1785 | */ | 1802 | */ |
1786 | int dquot_transfer(struct inode *inode, struct iattr *iattr) | 1803 | int dquot_transfer(struct inode *inode, struct iattr *iattr) |
1787 | { | 1804 | { |
1788 | qid_t chid[MAXQUOTAS]; | 1805 | struct dquot *transfer_to[MAXQUOTAS] = {}; |
1789 | unsigned long mask = 0; | 1806 | struct super_block *sb = inode->i_sb; |
1807 | int ret; | ||
1790 | 1808 | ||
1791 | if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) { | 1809 | if (!sb_any_quota_active(sb) || IS_NOQUOTA(inode)) |
1792 | mask |= 1 << USRQUOTA; | 1810 | return 0; |
1793 | chid[USRQUOTA] = iattr->ia_uid; | 1811 | |
1794 | } | 1812 | if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) |
1795 | if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) { | 1813 | transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA); |
1796 | mask |= 1 << GRPQUOTA; | 1814 | if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) |
1797 | chid[GRPQUOTA] = iattr->ia_gid; | 1815 | transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_uid, GRPQUOTA); |
1798 | } | 1816 | |
1799 | if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) { | 1817 | ret = __dquot_transfer(inode, transfer_to); |
1800 | dquot_initialize(inode); | 1818 | dqput_all(transfer_to); |
1801 | return __dquot_transfer(inode, chid, mask); | 1819 | return ret; |
1802 | } | ||
1803 | return 0; | ||
1804 | } | 1820 | } |
1805 | EXPORT_SYMBOL(dquot_transfer); | 1821 | EXPORT_SYMBOL(dquot_transfer); |
1806 | 1822 | ||
@@ -2275,25 +2291,30 @@ static inline qsize_t stoqb(qsize_t space) | |||
2275 | } | 2291 | } |
2276 | 2292 | ||
2277 | /* Generic routine for getting common part of quota structure */ | 2293 | /* Generic routine for getting common part of quota structure */ |
2278 | static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) | 2294 | static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) |
2279 | { | 2295 | { |
2280 | struct mem_dqblk *dm = &dquot->dq_dqb; | 2296 | struct mem_dqblk *dm = &dquot->dq_dqb; |
2281 | 2297 | ||
2298 | memset(di, 0, sizeof(*di)); | ||
2299 | di->d_version = FS_DQUOT_VERSION; | ||
2300 | di->d_flags = dquot->dq_type == USRQUOTA ? | ||
2301 | XFS_USER_QUOTA : XFS_GROUP_QUOTA; | ||
2302 | di->d_id = dquot->dq_id; | ||
2303 | |||
2282 | spin_lock(&dq_data_lock); | 2304 | spin_lock(&dq_data_lock); |
2283 | di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit); | 2305 | di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); |
2284 | di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit); | 2306 | di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit); |
2285 | di->dqb_curspace = dm->dqb_curspace + dm->dqb_rsvspace; | 2307 | di->d_ino_hardlimit = dm->dqb_ihardlimit; |
2286 | di->dqb_ihardlimit = dm->dqb_ihardlimit; | 2308 | di->d_ino_softlimit = dm->dqb_isoftlimit; |
2287 | di->dqb_isoftlimit = dm->dqb_isoftlimit; | 2309 | di->d_bcount = dm->dqb_curspace + dm->dqb_rsvspace; |
2288 | di->dqb_curinodes = dm->dqb_curinodes; | 2310 | di->d_icount = dm->dqb_curinodes; |
2289 | di->dqb_btime = dm->dqb_btime; | 2311 | di->d_btimer = dm->dqb_btime; |
2290 | di->dqb_itime = dm->dqb_itime; | 2312 | di->d_itimer = dm->dqb_itime; |
2291 | di->dqb_valid = QIF_ALL; | ||
2292 | spin_unlock(&dq_data_lock); | 2313 | spin_unlock(&dq_data_lock); |
2293 | } | 2314 | } |
2294 | 2315 | ||
2295 | int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, | 2316 | int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, |
2296 | struct if_dqblk *di) | 2317 | struct fs_disk_quota *di) |
2297 | { | 2318 | { |
2298 | struct dquot *dquot; | 2319 | struct dquot *dquot; |
2299 | 2320 | ||
@@ -2307,51 +2328,70 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, | |||
2307 | } | 2328 | } |
2308 | EXPORT_SYMBOL(vfs_get_dqblk); | 2329 | EXPORT_SYMBOL(vfs_get_dqblk); |
2309 | 2330 | ||
2331 | #define VFS_FS_DQ_MASK \ | ||
2332 | (FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \ | ||
2333 | FS_DQ_ICOUNT | FS_DQ_ISOFT | FS_DQ_IHARD | \ | ||
2334 | FS_DQ_BTIMER | FS_DQ_ITIMER) | ||
2335 | |||
2310 | /* Generic routine for setting common part of quota structure */ | 2336 | /* Generic routine for setting common part of quota structure */ |
2311 | static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) | 2337 | static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) |
2312 | { | 2338 | { |
2313 | struct mem_dqblk *dm = &dquot->dq_dqb; | 2339 | struct mem_dqblk *dm = &dquot->dq_dqb; |
2314 | int check_blim = 0, check_ilim = 0; | 2340 | int check_blim = 0, check_ilim = 0; |
2315 | struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; | 2341 | struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; |
2316 | 2342 | ||
2317 | if ((di->dqb_valid & QIF_BLIMITS && | 2343 | if (di->d_fieldmask & ~VFS_FS_DQ_MASK) |
2318 | (di->dqb_bhardlimit > dqi->dqi_maxblimit || | 2344 | return -EINVAL; |
2319 | di->dqb_bsoftlimit > dqi->dqi_maxblimit)) || | 2345 | |
2320 | (di->dqb_valid & QIF_ILIMITS && | 2346 | if (((di->d_fieldmask & FS_DQ_BSOFT) && |
2321 | (di->dqb_ihardlimit > dqi->dqi_maxilimit || | 2347 | (di->d_blk_softlimit > dqi->dqi_maxblimit)) || |
2322 | di->dqb_isoftlimit > dqi->dqi_maxilimit))) | 2348 | ((di->d_fieldmask & FS_DQ_BHARD) && |
2349 | (di->d_blk_hardlimit > dqi->dqi_maxblimit)) || | ||
2350 | ((di->d_fieldmask & FS_DQ_ISOFT) && | ||
2351 | (di->d_ino_softlimit > dqi->dqi_maxilimit)) || | ||
2352 | ((di->d_fieldmask & FS_DQ_IHARD) && | ||
2353 | (di->d_ino_hardlimit > dqi->dqi_maxilimit))) | ||
2323 | return -ERANGE; | 2354 | return -ERANGE; |
2324 | 2355 | ||
2325 | spin_lock(&dq_data_lock); | 2356 | spin_lock(&dq_data_lock); |
2326 | if (di->dqb_valid & QIF_SPACE) { | 2357 | if (di->d_fieldmask & FS_DQ_BCOUNT) { |
2327 | dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace; | 2358 | dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace; |
2328 | check_blim = 1; | 2359 | check_blim = 1; |
2329 | set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); | 2360 | set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); |
2330 | } | 2361 | } |
2331 | if (di->dqb_valid & QIF_BLIMITS) { | 2362 | |
2332 | dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit); | 2363 | if (di->d_fieldmask & FS_DQ_BSOFT) |
2333 | dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit); | 2364 | dm->dqb_bsoftlimit = qbtos(di->d_blk_softlimit); |
2365 | if (di->d_fieldmask & FS_DQ_BHARD) | ||
2366 | dm->dqb_bhardlimit = qbtos(di->d_blk_hardlimit); | ||
2367 | if (di->d_fieldmask & (FS_DQ_BSOFT | FS_DQ_BHARD)) { | ||
2334 | check_blim = 1; | 2368 | check_blim = 1; |
2335 | set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); | 2369 | set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); |
2336 | } | 2370 | } |
2337 | if (di->dqb_valid & QIF_INODES) { | 2371 | |
2338 | dm->dqb_curinodes = di->dqb_curinodes; | 2372 | if (di->d_fieldmask & FS_DQ_ICOUNT) { |
2373 | dm->dqb_curinodes = di->d_icount; | ||
2339 | check_ilim = 1; | 2374 | check_ilim = 1; |
2340 | set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); | 2375 | set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); |
2341 | } | 2376 | } |
2342 | if (di->dqb_valid & QIF_ILIMITS) { | 2377 | |
2343 | dm->dqb_isoftlimit = di->dqb_isoftlimit; | 2378 | if (di->d_fieldmask & FS_DQ_ISOFT) |
2344 | dm->dqb_ihardlimit = di->dqb_ihardlimit; | 2379 | dm->dqb_isoftlimit = di->d_ino_softlimit; |
2380 | if (di->d_fieldmask & FS_DQ_IHARD) | ||
2381 | dm->dqb_ihardlimit = di->d_ino_hardlimit; | ||
2382 | if (di->d_fieldmask & (FS_DQ_ISOFT | FS_DQ_IHARD)) { | ||
2345 | check_ilim = 1; | 2383 | check_ilim = 1; |
2346 | set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); | 2384 | set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); |
2347 | } | 2385 | } |
2348 | if (di->dqb_valid & QIF_BTIME) { | 2386 | |
2349 | dm->dqb_btime = di->dqb_btime; | 2387 | if (di->d_fieldmask & FS_DQ_BTIMER) { |
2388 | dm->dqb_btime = di->d_btimer; | ||
2350 | check_blim = 1; | 2389 | check_blim = 1; |
2351 | set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); | 2390 | set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); |
2352 | } | 2391 | } |
2353 | if (di->dqb_valid & QIF_ITIME) { | 2392 | |
2354 | dm->dqb_itime = di->dqb_itime; | 2393 | if (di->d_fieldmask & FS_DQ_ITIMER) { |
2394 | dm->dqb_itime = di->d_itimer; | ||
2355 | check_ilim = 1; | 2395 | check_ilim = 1; |
2356 | set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); | 2396 | set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); |
2357 | } | 2397 | } |
@@ -2361,7 +2401,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) | |||
2361 | dm->dqb_curspace < dm->dqb_bsoftlimit) { | 2401 | dm->dqb_curspace < dm->dqb_bsoftlimit) { |
2362 | dm->dqb_btime = 0; | 2402 | dm->dqb_btime = 0; |
2363 | clear_bit(DQ_BLKS_B, &dquot->dq_flags); | 2403 | clear_bit(DQ_BLKS_B, &dquot->dq_flags); |
2364 | } else if (!(di->dqb_valid & QIF_BTIME)) | 2404 | } else if (!(di->d_fieldmask & FS_DQ_BTIMER)) |
2365 | /* Set grace only if user hasn't provided his own... */ | 2405 | /* Set grace only if user hasn't provided his own... */ |
2366 | dm->dqb_btime = get_seconds() + dqi->dqi_bgrace; | 2406 | dm->dqb_btime = get_seconds() + dqi->dqi_bgrace; |
2367 | } | 2407 | } |
@@ -2370,7 +2410,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) | |||
2370 | dm->dqb_curinodes < dm->dqb_isoftlimit) { | 2410 | dm->dqb_curinodes < dm->dqb_isoftlimit) { |
2371 | dm->dqb_itime = 0; | 2411 | dm->dqb_itime = 0; |
2372 | clear_bit(DQ_INODES_B, &dquot->dq_flags); | 2412 | clear_bit(DQ_INODES_B, &dquot->dq_flags); |
2373 | } else if (!(di->dqb_valid & QIF_ITIME)) | 2413 | } else if (!(di->d_fieldmask & FS_DQ_ITIMER)) |
2374 | /* Set grace only if user hasn't provided his own... */ | 2414 | /* Set grace only if user hasn't provided his own... */ |
2375 | dm->dqb_itime = get_seconds() + dqi->dqi_igrace; | 2415 | dm->dqb_itime = get_seconds() + dqi->dqi_igrace; |
2376 | } | 2416 | } |
@@ -2386,7 +2426,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) | |||
2386 | } | 2426 | } |
2387 | 2427 | ||
2388 | int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, | 2428 | int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, |
2389 | struct if_dqblk *di) | 2429 | struct fs_disk_quota *di) |
2390 | { | 2430 | { |
2391 | struct dquot *dquot; | 2431 | struct dquot *dquot; |
2392 | int rc; | 2432 | int rc; |
@@ -2465,62 +2505,74 @@ const struct quotactl_ops vfs_quotactl_ops = { | |||
2465 | .set_dqblk = vfs_set_dqblk | 2505 | .set_dqblk = vfs_set_dqblk |
2466 | }; | 2506 | }; |
2467 | 2507 | ||
2508 | |||
2509 | static int do_proc_dqstats(struct ctl_table *table, int write, | ||
2510 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
2511 | { | ||
2512 | #ifdef CONFIG_SMP | ||
2513 | /* Update global table */ | ||
2514 | unsigned int type = (int *)table->data - dqstats.stat; | ||
2515 | dqstats.stat[type] = dqstats_read(type); | ||
2516 | #endif | ||
2517 | return proc_dointvec(table, write, buffer, lenp, ppos); | ||
2518 | } | ||
2519 | |||
2468 | static ctl_table fs_dqstats_table[] = { | 2520 | static ctl_table fs_dqstats_table[] = { |
2469 | { | 2521 | { |
2470 | .procname = "lookups", | 2522 | .procname = "lookups", |
2471 | .data = &dqstats.lookups, | 2523 | .data = &dqstats.stat[DQST_LOOKUPS], |
2472 | .maxlen = sizeof(int), | 2524 | .maxlen = sizeof(int), |
2473 | .mode = 0444, | 2525 | .mode = 0444, |
2474 | .proc_handler = proc_dointvec, | 2526 | .proc_handler = do_proc_dqstats, |
2475 | }, | 2527 | }, |
2476 | { | 2528 | { |
2477 | .procname = "drops", | 2529 | .procname = "drops", |
2478 | .data = &dqstats.drops, | 2530 | .data = &dqstats.stat[DQST_DROPS], |
2479 | .maxlen = sizeof(int), | 2531 | .maxlen = sizeof(int), |
2480 | .mode = 0444, | 2532 | .mode = 0444, |
2481 | .proc_handler = proc_dointvec, | 2533 | .proc_handler = do_proc_dqstats, |
2482 | }, | 2534 | }, |
2483 | { | 2535 | { |
2484 | .procname = "reads", | 2536 | .procname = "reads", |
2485 | .data = &dqstats.reads, | 2537 | .data = &dqstats.stat[DQST_READS], |
2486 | .maxlen = sizeof(int), | 2538 | .maxlen = sizeof(int), |
2487 | .mode = 0444, | 2539 | .mode = 0444, |
2488 | .proc_handler = proc_dointvec, | 2540 | .proc_handler = do_proc_dqstats, |
2489 | }, | 2541 | }, |
2490 | { | 2542 | { |
2491 | .procname = "writes", | 2543 | .procname = "writes", |
2492 | .data = &dqstats.writes, | 2544 | .data = &dqstats.stat[DQST_WRITES], |
2493 | .maxlen = sizeof(int), | 2545 | .maxlen = sizeof(int), |
2494 | .mode = 0444, | 2546 | .mode = 0444, |
2495 | .proc_handler = proc_dointvec, | 2547 | .proc_handler = do_proc_dqstats, |
2496 | }, | 2548 | }, |
2497 | { | 2549 | { |
2498 | .procname = "cache_hits", | 2550 | .procname = "cache_hits", |
2499 | .data = &dqstats.cache_hits, | 2551 | .data = &dqstats.stat[DQST_CACHE_HITS], |
2500 | .maxlen = sizeof(int), | 2552 | .maxlen = sizeof(int), |
2501 | .mode = 0444, | 2553 | .mode = 0444, |
2502 | .proc_handler = proc_dointvec, | 2554 | .proc_handler = do_proc_dqstats, |
2503 | }, | 2555 | }, |
2504 | { | 2556 | { |
2505 | .procname = "allocated_dquots", | 2557 | .procname = "allocated_dquots", |
2506 | .data = &dqstats.allocated_dquots, | 2558 | .data = &dqstats.stat[DQST_ALLOC_DQUOTS], |
2507 | .maxlen = sizeof(int), | 2559 | .maxlen = sizeof(int), |
2508 | .mode = 0444, | 2560 | .mode = 0444, |
2509 | .proc_handler = proc_dointvec, | 2561 | .proc_handler = do_proc_dqstats, |
2510 | }, | 2562 | }, |
2511 | { | 2563 | { |
2512 | .procname = "free_dquots", | 2564 | .procname = "free_dquots", |
2513 | .data = &dqstats.free_dquots, | 2565 | .data = &dqstats.stat[DQST_FREE_DQUOTS], |
2514 | .maxlen = sizeof(int), | 2566 | .maxlen = sizeof(int), |
2515 | .mode = 0444, | 2567 | .mode = 0444, |
2516 | .proc_handler = proc_dointvec, | 2568 | .proc_handler = do_proc_dqstats, |
2517 | }, | 2569 | }, |
2518 | { | 2570 | { |
2519 | .procname = "syncs", | 2571 | .procname = "syncs", |
2520 | .data = &dqstats.syncs, | 2572 | .data = &dqstats.stat[DQST_SYNCS], |
2521 | .maxlen = sizeof(int), | 2573 | .maxlen = sizeof(int), |
2522 | .mode = 0444, | 2574 | .mode = 0444, |
2523 | .proc_handler = proc_dointvec, | 2575 | .proc_handler = do_proc_dqstats, |
2524 | }, | 2576 | }, |
2525 | #ifdef CONFIG_PRINT_QUOTA_WARNING | 2577 | #ifdef CONFIG_PRINT_QUOTA_WARNING |
2526 | { | 2578 | { |
@@ -2572,6 +2624,13 @@ static int __init dquot_init(void) | |||
2572 | if (!dquot_hash) | 2624 | if (!dquot_hash) |
2573 | panic("Cannot create dquot hash table"); | 2625 | panic("Cannot create dquot hash table"); |
2574 | 2626 | ||
2627 | #ifdef CONFIG_SMP | ||
2628 | dqstats_pcpu = alloc_percpu(struct dqstats); | ||
2629 | if (!dqstats_pcpu) | ||
2630 | panic("Cannot create dquot stats table"); | ||
2631 | #endif | ||
2632 | memset(&dqstats, 0, sizeof(struct dqstats)); | ||
2633 | |||
2575 | /* Find power-of-two hlist_heads which can fit into allocation */ | 2634 | /* Find power-of-two hlist_heads which can fit into allocation */ |
2576 | nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head); | 2635 | nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head); |
2577 | dq_hash_bits = 0; | 2636 | dq_hash_bits = 0; |
diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 95388f9b7356..ce3dfd066f59 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c | |||
@@ -45,36 +45,22 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd, | |||
45 | return security_quotactl(cmd, type, id, sb); | 45 | return security_quotactl(cmd, type, id, sb); |
46 | } | 46 | } |
47 | 47 | ||
48 | static void quota_sync_one(struct super_block *sb, void *arg) | ||
49 | { | ||
50 | if (sb->s_qcop && sb->s_qcop->quota_sync) | ||
51 | sb->s_qcop->quota_sync(sb, *(int *)arg, 1); | ||
52 | } | ||
53 | |||
48 | static int quota_sync_all(int type) | 54 | static int quota_sync_all(int type) |
49 | { | 55 | { |
50 | struct super_block *sb; | ||
51 | int ret; | 56 | int ret; |
52 | 57 | ||
53 | if (type >= MAXQUOTAS) | 58 | if (type >= MAXQUOTAS) |
54 | return -EINVAL; | 59 | return -EINVAL; |
55 | ret = security_quotactl(Q_SYNC, type, 0, NULL); | 60 | ret = security_quotactl(Q_SYNC, type, 0, NULL); |
56 | if (ret) | 61 | if (!ret) |
57 | return ret; | 62 | iterate_supers(quota_sync_one, &type); |
58 | 63 | return ret; | |
59 | spin_lock(&sb_lock); | ||
60 | restart: | ||
61 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
62 | if (!sb->s_qcop || !sb->s_qcop->quota_sync) | ||
63 | continue; | ||
64 | |||
65 | sb->s_count++; | ||
66 | spin_unlock(&sb_lock); | ||
67 | down_read(&sb->s_umount); | ||
68 | if (sb->s_root) | ||
69 | sb->s_qcop->quota_sync(sb, type, 1); | ||
70 | up_read(&sb->s_umount); | ||
71 | spin_lock(&sb_lock); | ||
72 | if (__put_super_and_need_restart(sb)) | ||
73 | goto restart; | ||
74 | } | ||
75 | spin_unlock(&sb_lock); | ||
76 | |||
77 | return 0; | ||
78 | } | 64 | } |
79 | 65 | ||
80 | static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, | 66 | static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, |
@@ -113,8 +99,6 @@ static int quota_getinfo(struct super_block *sb, int type, void __user *addr) | |||
113 | struct if_dqinfo info; | 99 | struct if_dqinfo info; |
114 | int ret; | 100 | int ret; |
115 | 101 | ||
116 | if (!sb_has_quota_active(sb, type)) | ||
117 | return -ESRCH; | ||
118 | if (!sb->s_qcop->get_info) | 102 | if (!sb->s_qcop->get_info) |
119 | return -ENOSYS; | 103 | return -ENOSYS; |
120 | ret = sb->s_qcop->get_info(sb, type, &info); | 104 | ret = sb->s_qcop->get_info(sb, type, &info); |
@@ -129,43 +113,80 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr) | |||
129 | 113 | ||
130 | if (copy_from_user(&info, addr, sizeof(info))) | 114 | if (copy_from_user(&info, addr, sizeof(info))) |
131 | return -EFAULT; | 115 | return -EFAULT; |
132 | if (!sb_has_quota_active(sb, type)) | ||
133 | return -ESRCH; | ||
134 | if (!sb->s_qcop->set_info) | 116 | if (!sb->s_qcop->set_info) |
135 | return -ENOSYS; | 117 | return -ENOSYS; |
136 | return sb->s_qcop->set_info(sb, type, &info); | 118 | return sb->s_qcop->set_info(sb, type, &info); |
137 | } | 119 | } |
138 | 120 | ||
121 | static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src) | ||
122 | { | ||
123 | dst->dqb_bhardlimit = src->d_blk_hardlimit; | ||
124 | dst->dqb_bsoftlimit = src->d_blk_softlimit; | ||
125 | dst->dqb_curspace = src->d_bcount; | ||
126 | dst->dqb_ihardlimit = src->d_ino_hardlimit; | ||
127 | dst->dqb_isoftlimit = src->d_ino_softlimit; | ||
128 | dst->dqb_curinodes = src->d_icount; | ||
129 | dst->dqb_btime = src->d_btimer; | ||
130 | dst->dqb_itime = src->d_itimer; | ||
131 | dst->dqb_valid = QIF_ALL; | ||
132 | } | ||
133 | |||
139 | static int quota_getquota(struct super_block *sb, int type, qid_t id, | 134 | static int quota_getquota(struct super_block *sb, int type, qid_t id, |
140 | void __user *addr) | 135 | void __user *addr) |
141 | { | 136 | { |
137 | struct fs_disk_quota fdq; | ||
142 | struct if_dqblk idq; | 138 | struct if_dqblk idq; |
143 | int ret; | 139 | int ret; |
144 | 140 | ||
145 | if (!sb_has_quota_active(sb, type)) | ||
146 | return -ESRCH; | ||
147 | if (!sb->s_qcop->get_dqblk) | 141 | if (!sb->s_qcop->get_dqblk) |
148 | return -ENOSYS; | 142 | return -ENOSYS; |
149 | ret = sb->s_qcop->get_dqblk(sb, type, id, &idq); | 143 | ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq); |
150 | if (ret) | 144 | if (ret) |
151 | return ret; | 145 | return ret; |
146 | copy_to_if_dqblk(&idq, &fdq); | ||
152 | if (copy_to_user(addr, &idq, sizeof(idq))) | 147 | if (copy_to_user(addr, &idq, sizeof(idq))) |
153 | return -EFAULT; | 148 | return -EFAULT; |
154 | return 0; | 149 | return 0; |
155 | } | 150 | } |
156 | 151 | ||
152 | static void copy_from_if_dqblk(struct fs_disk_quota *dst, struct if_dqblk *src) | ||
153 | { | ||
154 | dst->d_blk_hardlimit = src->dqb_bhardlimit; | ||
155 | dst->d_blk_softlimit = src->dqb_bsoftlimit; | ||
156 | dst->d_bcount = src->dqb_curspace; | ||
157 | dst->d_ino_hardlimit = src->dqb_ihardlimit; | ||
158 | dst->d_ino_softlimit = src->dqb_isoftlimit; | ||
159 | dst->d_icount = src->dqb_curinodes; | ||
160 | dst->d_btimer = src->dqb_btime; | ||
161 | dst->d_itimer = src->dqb_itime; | ||
162 | |||
163 | dst->d_fieldmask = 0; | ||
164 | if (src->dqb_valid & QIF_BLIMITS) | ||
165 | dst->d_fieldmask |= FS_DQ_BSOFT | FS_DQ_BHARD; | ||
166 | if (src->dqb_valid & QIF_SPACE) | ||
167 | dst->d_fieldmask |= FS_DQ_BCOUNT; | ||
168 | if (src->dqb_valid & QIF_ILIMITS) | ||
169 | dst->d_fieldmask |= FS_DQ_ISOFT | FS_DQ_IHARD; | ||
170 | if (src->dqb_valid & QIF_INODES) | ||
171 | dst->d_fieldmask |= FS_DQ_ICOUNT; | ||
172 | if (src->dqb_valid & QIF_BTIME) | ||
173 | dst->d_fieldmask |= FS_DQ_BTIMER; | ||
174 | if (src->dqb_valid & QIF_ITIME) | ||
175 | dst->d_fieldmask |= FS_DQ_ITIMER; | ||
176 | } | ||
177 | |||
157 | static int quota_setquota(struct super_block *sb, int type, qid_t id, | 178 | static int quota_setquota(struct super_block *sb, int type, qid_t id, |
158 | void __user *addr) | 179 | void __user *addr) |
159 | { | 180 | { |
181 | struct fs_disk_quota fdq; | ||
160 | struct if_dqblk idq; | 182 | struct if_dqblk idq; |
161 | 183 | ||
162 | if (copy_from_user(&idq, addr, sizeof(idq))) | 184 | if (copy_from_user(&idq, addr, sizeof(idq))) |
163 | return -EFAULT; | 185 | return -EFAULT; |
164 | if (!sb_has_quota_active(sb, type)) | ||
165 | return -ESRCH; | ||
166 | if (!sb->s_qcop->set_dqblk) | 186 | if (!sb->s_qcop->set_dqblk) |
167 | return -ENOSYS; | 187 | return -ENOSYS; |
168 | return sb->s_qcop->set_dqblk(sb, type, id, &idq); | 188 | copy_from_if_dqblk(&fdq, &idq); |
189 | return sb->s_qcop->set_dqblk(sb, type, id, &fdq); | ||
169 | } | 190 | } |
170 | 191 | ||
171 | static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) | 192 | static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) |
@@ -199,9 +220,9 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id, | |||
199 | 220 | ||
200 | if (copy_from_user(&fdq, addr, sizeof(fdq))) | 221 | if (copy_from_user(&fdq, addr, sizeof(fdq))) |
201 | return -EFAULT; | 222 | return -EFAULT; |
202 | if (!sb->s_qcop->set_xquota) | 223 | if (!sb->s_qcop->set_dqblk) |
203 | return -ENOSYS; | 224 | return -ENOSYS; |
204 | return sb->s_qcop->set_xquota(sb, type, id, &fdq); | 225 | return sb->s_qcop->set_dqblk(sb, type, id, &fdq); |
205 | } | 226 | } |
206 | 227 | ||
207 | static int quota_getxquota(struct super_block *sb, int type, qid_t id, | 228 | static int quota_getxquota(struct super_block *sb, int type, qid_t id, |
@@ -210,9 +231,9 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id, | |||
210 | struct fs_disk_quota fdq; | 231 | struct fs_disk_quota fdq; |
211 | int ret; | 232 | int ret; |
212 | 233 | ||
213 | if (!sb->s_qcop->get_xquota) | 234 | if (!sb->s_qcop->get_dqblk) |
214 | return -ENOSYS; | 235 | return -ENOSYS; |
215 | ret = sb->s_qcop->get_xquota(sb, type, id, &fdq); | 236 | ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq); |
216 | if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) | 237 | if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) |
217 | return -EFAULT; | 238 | return -EFAULT; |
218 | return ret; | 239 | return ret; |
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index f81f4bcfb178..24f03407eeb5 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c | |||
@@ -60,9 +60,17 @@ static ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) | |||
60 | static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) | 60 | static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) |
61 | { | 61 | { |
62 | struct super_block *sb = info->dqi_sb; | 62 | struct super_block *sb = info->dqi_sb; |
63 | ssize_t ret; | ||
63 | 64 | ||
64 | return sb->s_op->quota_write(sb, info->dqi_type, buf, | 65 | ret = sb->s_op->quota_write(sb, info->dqi_type, buf, |
65 | info->dqi_usable_bs, blk << info->dqi_blocksize_bits); | 66 | info->dqi_usable_bs, blk << info->dqi_blocksize_bits); |
67 | if (ret != info->dqi_usable_bs) { | ||
68 | q_warn(KERN_WARNING "VFS: dquota write failed on " | ||
69 | "dev %s\n", sb->s_id); | ||
70 | if (ret >= 0) | ||
71 | ret = -EIO; | ||
72 | } | ||
73 | return ret; | ||
66 | } | 74 | } |
67 | 75 | ||
68 | /* Remove empty block from list and return it */ | 76 | /* Remove empty block from list and return it */ |
@@ -152,7 +160,7 @@ static int remove_free_dqentry(struct qtree_mem_dqinfo *info, char *buf, | |||
152 | dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); | 160 | dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); |
153 | /* No matter whether write succeeds block is out of list */ | 161 | /* No matter whether write succeeds block is out of list */ |
154 | if (write_blk(info, blk, buf) < 0) | 162 | if (write_blk(info, blk, buf) < 0) |
155 | printk(KERN_ERR | 163 | q_warn(KERN_ERR |
156 | "VFS: Can't write block (%u) with free entries.\n", | 164 | "VFS: Can't write block (%u) with free entries.\n", |
157 | blk); | 165 | blk); |
158 | return 0; | 166 | return 0; |
@@ -244,7 +252,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, | |||
244 | if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { | 252 | if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { |
245 | *err = remove_free_dqentry(info, buf, blk); | 253 | *err = remove_free_dqentry(info, buf, blk); |
246 | if (*err < 0) { | 254 | if (*err < 0) { |
247 | printk(KERN_ERR "VFS: find_free_dqentry(): Can't " | 255 | q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't " |
248 | "remove block (%u) from entry free list.\n", | 256 | "remove block (%u) from entry free list.\n", |
249 | blk); | 257 | blk); |
250 | goto out_buf; | 258 | goto out_buf; |
@@ -268,7 +276,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, | |||
268 | #endif | 276 | #endif |
269 | *err = write_blk(info, blk, buf); | 277 | *err = write_blk(info, blk, buf); |
270 | if (*err < 0) { | 278 | if (*err < 0) { |
271 | printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota " | 279 | q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't write quota " |
272 | "data block %u.\n", blk); | 280 | "data block %u.\n", blk); |
273 | goto out_buf; | 281 | goto out_buf; |
274 | } | 282 | } |
@@ -303,7 +311,7 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, | |||
303 | } else { | 311 | } else { |
304 | ret = read_blk(info, *treeblk, buf); | 312 | ret = read_blk(info, *treeblk, buf); |
305 | if (ret < 0) { | 313 | if (ret < 0) { |
306 | printk(KERN_ERR "VFS: Can't read tree quota block " | 314 | q_warn(KERN_ERR "VFS: Can't read tree quota block " |
307 | "%u.\n", *treeblk); | 315 | "%u.\n", *treeblk); |
308 | goto out_buf; | 316 | goto out_buf; |
309 | } | 317 | } |
@@ -365,7 +373,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | |||
365 | if (!dquot->dq_off) { | 373 | if (!dquot->dq_off) { |
366 | ret = dq_insert_tree(info, dquot); | 374 | ret = dq_insert_tree(info, dquot); |
367 | if (ret < 0) { | 375 | if (ret < 0) { |
368 | printk(KERN_ERR "VFS: Error %zd occurred while " | 376 | q_warn(KERN_ERR "VFS: Error %zd occurred while " |
369 | "creating quota.\n", ret); | 377 | "creating quota.\n", ret); |
370 | kfree(ddquot); | 378 | kfree(ddquot); |
371 | return ret; | 379 | return ret; |
@@ -377,14 +385,14 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | |||
377 | ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, | 385 | ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, |
378 | dquot->dq_off); | 386 | dquot->dq_off); |
379 | if (ret != info->dqi_entry_size) { | 387 | if (ret != info->dqi_entry_size) { |
380 | printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", | 388 | q_warn(KERN_WARNING "VFS: dquota write failed on dev %s\n", |
381 | sb->s_id); | 389 | sb->s_id); |
382 | if (ret >= 0) | 390 | if (ret >= 0) |
383 | ret = -ENOSPC; | 391 | ret = -ENOSPC; |
384 | } else { | 392 | } else { |
385 | ret = 0; | 393 | ret = 0; |
386 | } | 394 | } |
387 | dqstats.writes++; | 395 | dqstats_inc(DQST_WRITES); |
388 | kfree(ddquot); | 396 | kfree(ddquot); |
389 | 397 | ||
390 | return ret; | 398 | return ret; |
@@ -402,14 +410,14 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, | |||
402 | if (!buf) | 410 | if (!buf) |
403 | return -ENOMEM; | 411 | return -ENOMEM; |
404 | if (dquot->dq_off >> info->dqi_blocksize_bits != blk) { | 412 | if (dquot->dq_off >> info->dqi_blocksize_bits != blk) { |
405 | printk(KERN_ERR "VFS: Quota structure has offset to other " | 413 | q_warn(KERN_ERR "VFS: Quota structure has offset to other " |
406 | "block (%u) than it should (%u).\n", blk, | 414 | "block (%u) than it should (%u).\n", blk, |
407 | (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); | 415 | (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); |
408 | goto out_buf; | 416 | goto out_buf; |
409 | } | 417 | } |
410 | ret = read_blk(info, blk, buf); | 418 | ret = read_blk(info, blk, buf); |
411 | if (ret < 0) { | 419 | if (ret < 0) { |
412 | printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk); | 420 | q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", blk); |
413 | goto out_buf; | 421 | goto out_buf; |
414 | } | 422 | } |
415 | dh = (struct qt_disk_dqdbheader *)buf; | 423 | dh = (struct qt_disk_dqdbheader *)buf; |
@@ -419,7 +427,7 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, | |||
419 | if (ret >= 0) | 427 | if (ret >= 0) |
420 | ret = put_free_dqblk(info, buf, blk); | 428 | ret = put_free_dqblk(info, buf, blk); |
421 | if (ret < 0) { | 429 | if (ret < 0) { |
422 | printk(KERN_ERR "VFS: Can't move quota data block (%u) " | 430 | q_warn(KERN_ERR "VFS: Can't move quota data block (%u) " |
423 | "to free list.\n", blk); | 431 | "to free list.\n", blk); |
424 | goto out_buf; | 432 | goto out_buf; |
425 | } | 433 | } |
@@ -432,14 +440,14 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, | |||
432 | /* Insert will write block itself */ | 440 | /* Insert will write block itself */ |
433 | ret = insert_free_dqentry(info, buf, blk); | 441 | ret = insert_free_dqentry(info, buf, blk); |
434 | if (ret < 0) { | 442 | if (ret < 0) { |
435 | printk(KERN_ERR "VFS: Can't insert quota data " | 443 | q_warn(KERN_ERR "VFS: Can't insert quota data " |
436 | "block (%u) to free entry list.\n", blk); | 444 | "block (%u) to free entry list.\n", blk); |
437 | goto out_buf; | 445 | goto out_buf; |
438 | } | 446 | } |
439 | } else { | 447 | } else { |
440 | ret = write_blk(info, blk, buf); | 448 | ret = write_blk(info, blk, buf); |
441 | if (ret < 0) { | 449 | if (ret < 0) { |
442 | printk(KERN_ERR "VFS: Can't write quota data " | 450 | q_warn(KERN_ERR "VFS: Can't write quota data " |
443 | "block %u\n", blk); | 451 | "block %u\n", blk); |
444 | goto out_buf; | 452 | goto out_buf; |
445 | } | 453 | } |
@@ -464,7 +472,7 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, | |||
464 | return -ENOMEM; | 472 | return -ENOMEM; |
465 | ret = read_blk(info, *blk, buf); | 473 | ret = read_blk(info, *blk, buf); |
466 | if (ret < 0) { | 474 | if (ret < 0) { |
467 | printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); | 475 | q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); |
468 | goto out_buf; | 476 | goto out_buf; |
469 | } | 477 | } |
470 | newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); | 478 | newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); |
@@ -488,7 +496,7 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, | |||
488 | } else { | 496 | } else { |
489 | ret = write_blk(info, *blk, buf); | 497 | ret = write_blk(info, *blk, buf); |
490 | if (ret < 0) | 498 | if (ret < 0) |
491 | printk(KERN_ERR "VFS: Can't write quota tree " | 499 | q_warn(KERN_ERR "VFS: Can't write quota tree " |
492 | "block %u.\n", *blk); | 500 | "block %u.\n", *blk); |
493 | } | 501 | } |
494 | } | 502 | } |
@@ -521,7 +529,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, | |||
521 | return -ENOMEM; | 529 | return -ENOMEM; |
522 | ret = read_blk(info, blk, buf); | 530 | ret = read_blk(info, blk, buf); |
523 | if (ret < 0) { | 531 | if (ret < 0) { |
524 | printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); | 532 | q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); |
525 | goto out_buf; | 533 | goto out_buf; |
526 | } | 534 | } |
527 | ddquot = buf + sizeof(struct qt_disk_dqdbheader); | 535 | ddquot = buf + sizeof(struct qt_disk_dqdbheader); |
@@ -531,7 +539,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, | |||
531 | ddquot += info->dqi_entry_size; | 539 | ddquot += info->dqi_entry_size; |
532 | } | 540 | } |
533 | if (i == qtree_dqstr_in_blk(info)) { | 541 | if (i == qtree_dqstr_in_blk(info)) { |
534 | printk(KERN_ERR "VFS: Quota for id %u referenced " | 542 | q_warn(KERN_ERR "VFS: Quota for id %u referenced " |
535 | "but not present.\n", dquot->dq_id); | 543 | "but not present.\n", dquot->dq_id); |
536 | ret = -EIO; | 544 | ret = -EIO; |
537 | goto out_buf; | 545 | goto out_buf; |
@@ -556,7 +564,7 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, | |||
556 | return -ENOMEM; | 564 | return -ENOMEM; |
557 | ret = read_blk(info, blk, buf); | 565 | ret = read_blk(info, blk, buf); |
558 | if (ret < 0) { | 566 | if (ret < 0) { |
559 | printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); | 567 | q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); |
560 | goto out_buf; | 568 | goto out_buf; |
561 | } | 569 | } |
562 | ret = 0; | 570 | ret = 0; |
@@ -599,7 +607,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | |||
599 | offset = find_dqentry(info, dquot); | 607 | offset = find_dqentry(info, dquot); |
600 | if (offset <= 0) { /* Entry not present? */ | 608 | if (offset <= 0) { /* Entry not present? */ |
601 | if (offset < 0) | 609 | if (offset < 0) |
602 | printk(KERN_ERR "VFS: Can't read quota " | 610 | q_warn(KERN_ERR "VFS: Can't read quota " |
603 | "structure for id %u.\n", dquot->dq_id); | 611 | "structure for id %u.\n", dquot->dq_id); |
604 | dquot->dq_off = 0; | 612 | dquot->dq_off = 0; |
605 | set_bit(DQ_FAKE_B, &dquot->dq_flags); | 613 | set_bit(DQ_FAKE_B, &dquot->dq_flags); |
@@ -617,7 +625,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | |||
617 | if (ret != info->dqi_entry_size) { | 625 | if (ret != info->dqi_entry_size) { |
618 | if (ret >= 0) | 626 | if (ret >= 0) |
619 | ret = -EIO; | 627 | ret = -EIO; |
620 | printk(KERN_ERR "VFS: Error while reading quota " | 628 | q_warn(KERN_ERR "VFS: Error while reading quota " |
621 | "structure for id %u.\n", dquot->dq_id); | 629 | "structure for id %u.\n", dquot->dq_id); |
622 | set_bit(DQ_FAKE_B, &dquot->dq_flags); | 630 | set_bit(DQ_FAKE_B, &dquot->dq_flags); |
623 | memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); | 631 | memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); |
@@ -634,7 +642,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | |||
634 | spin_unlock(&dq_data_lock); | 642 | spin_unlock(&dq_data_lock); |
635 | kfree(ddquot); | 643 | kfree(ddquot); |
636 | out: | 644 | out: |
637 | dqstats.reads++; | 645 | dqstats_inc(DQST_READS); |
638 | return ret; | 646 | return ret; |
639 | } | 647 | } |
640 | EXPORT_SYMBOL(qtree_read_dquot); | 648 | EXPORT_SYMBOL(qtree_read_dquot); |
diff --git a/fs/quota/quota_tree.h b/fs/quota/quota_tree.h index a1ab8db81a51..ccc3e71fb1d8 100644 --- a/fs/quota/quota_tree.h +++ b/fs/quota/quota_tree.h | |||
@@ -22,4 +22,10 @@ struct qt_disk_dqdbheader { | |||
22 | 22 | ||
23 | #define QT_TREEOFF 1 /* Offset of tree in file in blocks */ | 23 | #define QT_TREEOFF 1 /* Offset of tree in file in blocks */ |
24 | 24 | ||
25 | #define q_warn(fmt, args...) \ | ||
26 | do { \ | ||
27 | if (printk_ratelimit()) \ | ||
28 | printk(fmt, ## args); \ | ||
29 | } while(0) | ||
30 | |||
25 | #endif /* _LINUX_QUOTAIO_TREE_H */ | 31 | #endif /* _LINUX_QUOTAIO_TREE_H */ |
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c index 2ae757e9c008..4af344c5852a 100644 --- a/fs/quota/quota_v1.c +++ b/fs/quota/quota_v1.c | |||
@@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot) | |||
71 | dquot->dq_dqb.dqb_ihardlimit == 0 && | 71 | dquot->dq_dqb.dqb_ihardlimit == 0 && |
72 | dquot->dq_dqb.dqb_isoftlimit == 0) | 72 | dquot->dq_dqb.dqb_isoftlimit == 0) |
73 | set_bit(DQ_FAKE_B, &dquot->dq_flags); | 73 | set_bit(DQ_FAKE_B, &dquot->dq_flags); |
74 | dqstats.reads++; | 74 | dqstats_inc(DQST_READS); |
75 | 75 | ||
76 | return 0; | 76 | return 0; |
77 | } | 77 | } |
@@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot) | |||
104 | ret = 0; | 104 | ret = 0; |
105 | 105 | ||
106 | out: | 106 | out: |
107 | dqstats.writes++; | 107 | dqstats_inc(DQST_WRITES); |
108 | 108 | ||
109 | return ret; | 109 | return ret; |
110 | } | 110 | } |
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index e3da02f4986f..135206af1458 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c | |||
@@ -63,7 +63,7 @@ static int v2_read_header(struct super_block *sb, int type, | |||
63 | size = sb->s_op->quota_read(sb, type, (char *)dqhead, | 63 | size = sb->s_op->quota_read(sb, type, (char *)dqhead, |
64 | sizeof(struct v2_disk_dqheader), 0); | 64 | sizeof(struct v2_disk_dqheader), 0); |
65 | if (size != sizeof(struct v2_disk_dqheader)) { | 65 | if (size != sizeof(struct v2_disk_dqheader)) { |
66 | printk(KERN_WARNING "quota_v2: Failed header read:" | 66 | q_warn(KERN_WARNING "quota_v2: Failed header read:" |
67 | " expected=%zd got=%zd\n", | 67 | " expected=%zd got=%zd\n", |
68 | sizeof(struct v2_disk_dqheader), size); | 68 | sizeof(struct v2_disk_dqheader), size); |
69 | return 0; | 69 | return 0; |
@@ -106,7 +106,7 @@ static int v2_read_file_info(struct super_block *sb, int type) | |||
106 | size = sb->s_op->quota_read(sb, type, (char *)&dinfo, | 106 | size = sb->s_op->quota_read(sb, type, (char *)&dinfo, |
107 | sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); | 107 | sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); |
108 | if (size != sizeof(struct v2_disk_dqinfo)) { | 108 | if (size != sizeof(struct v2_disk_dqinfo)) { |
109 | printk(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n", | 109 | q_warn(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n", |
110 | sb->s_id); | 110 | sb->s_id); |
111 | return -1; | 111 | return -1; |
112 | } | 112 | } |
@@ -167,7 +167,7 @@ static int v2_write_file_info(struct super_block *sb, int type) | |||
167 | size = sb->s_op->quota_write(sb, type, (char *)&dinfo, | 167 | size = sb->s_op->quota_write(sb, type, (char *)&dinfo, |
168 | sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); | 168 | sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); |
169 | if (size != sizeof(struct v2_disk_dqinfo)) { | 169 | if (size != sizeof(struct v2_disk_dqinfo)) { |
170 | printk(KERN_WARNING "Can't write info structure on device %s.\n", | 170 | q_warn(KERN_WARNING "Can't write info structure on device %s.\n", |
171 | sb->s_id); | 171 | sb->s_id); |
172 | return -1; | 172 | return -1; |
173 | } | 173 | } |
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index c94853473ca9..a5ebae70dc6d 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
@@ -52,14 +52,13 @@ static struct backing_dev_info ramfs_backing_dev_info = { | |||
52 | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, | 52 | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, |
53 | }; | 53 | }; |
54 | 54 | ||
55 | struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) | 55 | struct inode *ramfs_get_inode(struct super_block *sb, |
56 | const struct inode *dir, int mode, dev_t dev) | ||
56 | { | 57 | { |
57 | struct inode * inode = new_inode(sb); | 58 | struct inode * inode = new_inode(sb); |
58 | 59 | ||
59 | if (inode) { | 60 | if (inode) { |
60 | inode->i_mode = mode; | 61 | inode_init_owner(inode, dir, mode); |
61 | inode->i_uid = current_fsuid(); | ||
62 | inode->i_gid = current_fsgid(); | ||
63 | inode->i_mapping->a_ops = &ramfs_aops; | 62 | inode->i_mapping->a_ops = &ramfs_aops; |
64 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; | 63 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; |
65 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); | 64 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); |
@@ -95,15 +94,10 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) | |||
95 | static int | 94 | static int |
96 | ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) | 95 | ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) |
97 | { | 96 | { |
98 | struct inode * inode = ramfs_get_inode(dir->i_sb, mode, dev); | 97 | struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev); |
99 | int error = -ENOSPC; | 98 | int error = -ENOSPC; |
100 | 99 | ||
101 | if (inode) { | 100 | if (inode) { |
102 | if (dir->i_mode & S_ISGID) { | ||
103 | inode->i_gid = dir->i_gid; | ||
104 | if (S_ISDIR(mode)) | ||
105 | inode->i_mode |= S_ISGID; | ||
106 | } | ||
107 | d_instantiate(dentry, inode); | 101 | d_instantiate(dentry, inode); |
108 | dget(dentry); /* Extra count - pin the dentry in core */ | 102 | dget(dentry); /* Extra count - pin the dentry in core */ |
109 | error = 0; | 103 | error = 0; |
@@ -130,13 +124,11 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * | |||
130 | struct inode *inode; | 124 | struct inode *inode; |
131 | int error = -ENOSPC; | 125 | int error = -ENOSPC; |
132 | 126 | ||
133 | inode = ramfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); | 127 | inode = ramfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0); |
134 | if (inode) { | 128 | if (inode) { |
135 | int l = strlen(symname)+1; | 129 | int l = strlen(symname)+1; |
136 | error = page_symlink(inode, symname, l); | 130 | error = page_symlink(inode, symname, l); |
137 | if (!error) { | 131 | if (!error) { |
138 | if (dir->i_mode & S_ISGID) | ||
139 | inode->i_gid = dir->i_gid; | ||
140 | d_instantiate(dentry, inode); | 132 | d_instantiate(dentry, inode); |
141 | dget(dentry); | 133 | dget(dentry); |
142 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 134 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
@@ -214,7 +206,7 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts) | |||
214 | return 0; | 206 | return 0; |
215 | } | 207 | } |
216 | 208 | ||
217 | static int ramfs_fill_super(struct super_block * sb, void * data, int silent) | 209 | int ramfs_fill_super(struct super_block *sb, void *data, int silent) |
218 | { | 210 | { |
219 | struct ramfs_fs_info *fsi; | 211 | struct ramfs_fs_info *fsi; |
220 | struct inode *inode = NULL; | 212 | struct inode *inode = NULL; |
@@ -241,7 +233,7 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent) | |||
241 | sb->s_op = &ramfs_ops; | 233 | sb->s_op = &ramfs_ops; |
242 | sb->s_time_gran = 1; | 234 | sb->s_time_gran = 1; |
243 | 235 | ||
244 | inode = ramfs_get_inode(sb, S_IFDIR | fsi->mount_opts.mode, 0); | 236 | inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0); |
245 | if (!inode) { | 237 | if (!inode) { |
246 | err = -ENOMEM; | 238 | err = -ENOMEM; |
247 | goto fail; | 239 | goto fail; |
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 1d9c12714c5c..9977df9f3a54 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -147,7 +147,8 @@ static int reiserfs_sync_file(struct file *filp, | |||
147 | barrier_done = reiserfs_commit_for_inode(inode); | 147 | barrier_done = reiserfs_commit_for_inode(inode); |
148 | reiserfs_write_unlock(inode->i_sb); | 148 | reiserfs_write_unlock(inode->i_sb); |
149 | if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) | 149 | if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) |
150 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 150 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, |
151 | BLKDEV_IFL_WAIT); | ||
151 | if (barrier_done < 0) | 152 | if (barrier_done < 0) |
152 | return barrier_done; | 153 | return barrier_done; |
153 | return (err < 0) ? -EIO : 0; | 154 | return (err < 0) ? -EIO : 0; |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index dc2c65e04853..0f22fdaf54ac 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -3076,9 +3076,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3076 | ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); | 3076 | ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); |
3077 | 3077 | ||
3078 | depth = reiserfs_write_lock_once(inode->i_sb); | 3078 | depth = reiserfs_write_lock_once(inode->i_sb); |
3079 | if (attr->ia_valid & ATTR_SIZE) { | 3079 | if (is_quota_modification(inode, attr)) |
3080 | dquot_initialize(inode); | 3080 | dquot_initialize(inode); |
3081 | 3081 | ||
3082 | if (attr->ia_valid & ATTR_SIZE) { | ||
3082 | /* version 2 items will be caught by the s_maxbytes check | 3083 | /* version 2 items will be caught by the s_maxbytes check |
3083 | ** done for us in vmtruncate | 3084 | ** done for us in vmtruncate |
3084 | */ | 3085 | */ |
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index d0c43cb99ffc..ee78d4a0086a 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c | |||
@@ -561,23 +561,13 @@ static int drop_new_inode(struct inode *inode) | |||
561 | */ | 561 | */ |
562 | static int new_inode_init(struct inode *inode, struct inode *dir, int mode) | 562 | static int new_inode_init(struct inode *inode, struct inode *dir, int mode) |
563 | { | 563 | { |
564 | |||
565 | /* the quota init calls have to know who to charge the quota to, so | ||
566 | ** we have to set uid and gid here | ||
567 | */ | ||
568 | inode->i_uid = current_fsuid(); | ||
569 | inode->i_mode = mode; | ||
570 | /* Make inode invalid - just in case we are going to drop it before | 564 | /* Make inode invalid - just in case we are going to drop it before |
571 | * the initialization happens */ | 565 | * the initialization happens */ |
572 | INODE_PKEY(inode)->k_objectid = 0; | 566 | INODE_PKEY(inode)->k_objectid = 0; |
573 | 567 | /* the quota init calls have to know who to charge the quota to, so | |
574 | if (dir->i_mode & S_ISGID) { | 568 | ** we have to set uid and gid here |
575 | inode->i_gid = dir->i_gid; | 569 | */ |
576 | if (S_ISDIR(mode)) | 570 | inode_init_owner(inode, dir, mode); |
577 | inode->i_mode |= S_ISGID; | ||
578 | } else { | ||
579 | inode->i_gid = current_fsgid(); | ||
580 | } | ||
581 | dquot_initialize(inode); | 571 | dquot_initialize(inode); |
582 | return 0; | 572 | return 0; |
583 | } | 573 | } |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index e7cc00e636dc..8c4cf273c672 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -723,11 +723,11 @@ out: | |||
723 | (handler) = *(handlers)++) | 723 | (handler) = *(handlers)++) |
724 | 724 | ||
725 | /* This is the implementation for the xattr plugin infrastructure */ | 725 | /* This is the implementation for the xattr plugin infrastructure */ |
726 | static inline struct xattr_handler * | 726 | static inline const struct xattr_handler * |
727 | find_xattr_handler_prefix(struct xattr_handler **handlers, | 727 | find_xattr_handler_prefix(const struct xattr_handler **handlers, |
728 | const char *name) | 728 | const char *name) |
729 | { | 729 | { |
730 | struct xattr_handler *xah; | 730 | const struct xattr_handler *xah; |
731 | 731 | ||
732 | if (!handlers) | 732 | if (!handlers) |
733 | return NULL; | 733 | return NULL; |
@@ -748,7 +748,7 @@ ssize_t | |||
748 | reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, | 748 | reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, |
749 | size_t size) | 749 | size_t size) |
750 | { | 750 | { |
751 | struct xattr_handler *handler; | 751 | const struct xattr_handler *handler; |
752 | 752 | ||
753 | handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); | 753 | handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); |
754 | 754 | ||
@@ -767,7 +767,7 @@ int | |||
767 | reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 767 | reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
768 | size_t size, int flags) | 768 | size_t size, int flags) |
769 | { | 769 | { |
770 | struct xattr_handler *handler; | 770 | const struct xattr_handler *handler; |
771 | 771 | ||
772 | handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); | 772 | handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); |
773 | 773 | ||
@@ -784,7 +784,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
784 | */ | 784 | */ |
785 | int reiserfs_removexattr(struct dentry *dentry, const char *name) | 785 | int reiserfs_removexattr(struct dentry *dentry, const char *name) |
786 | { | 786 | { |
787 | struct xattr_handler *handler; | 787 | const struct xattr_handler *handler; |
788 | handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); | 788 | handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); |
789 | 789 | ||
790 | if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) | 790 | if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) |
@@ -807,7 +807,7 @@ static int listxattr_filler(void *buf, const char *name, int namelen, | |||
807 | size_t size; | 807 | size_t size; |
808 | if (name[0] != '.' || | 808 | if (name[0] != '.' || |
809 | (namelen != 1 && (name[1] != '.' || namelen != 2))) { | 809 | (namelen != 1 && (name[1] != '.' || namelen != 2))) { |
810 | struct xattr_handler *handler; | 810 | const struct xattr_handler *handler; |
811 | handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr, | 811 | handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr, |
812 | name); | 812 | name); |
813 | if (!handler) /* Unsupported xattr name */ | 813 | if (!handler) /* Unsupported xattr name */ |
@@ -920,7 +920,7 @@ static int create_privroot(struct dentry *dentry) { return 0; } | |||
920 | #endif | 920 | #endif |
921 | 921 | ||
922 | /* Actual operations that are exported to VFS-land */ | 922 | /* Actual operations that are exported to VFS-land */ |
923 | struct xattr_handler *reiserfs_xattr_handlers[] = { | 923 | const struct xattr_handler *reiserfs_xattr_handlers[] = { |
924 | #ifdef CONFIG_REISERFS_FS_XATTR | 924 | #ifdef CONFIG_REISERFS_FS_XATTR |
925 | &reiserfs_xattr_user_handler, | 925 | &reiserfs_xattr_user_handler, |
926 | &reiserfs_xattr_trusted_handler, | 926 | &reiserfs_xattr_trusted_handler, |
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 9cdb759645a9..536d697a8a28 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c | |||
@@ -500,7 +500,7 @@ static size_t posix_acl_access_list(struct dentry *dentry, char *list, | |||
500 | return size; | 500 | return size; |
501 | } | 501 | } |
502 | 502 | ||
503 | struct xattr_handler reiserfs_posix_acl_access_handler = { | 503 | const struct xattr_handler reiserfs_posix_acl_access_handler = { |
504 | .prefix = POSIX_ACL_XATTR_ACCESS, | 504 | .prefix = POSIX_ACL_XATTR_ACCESS, |
505 | .flags = ACL_TYPE_ACCESS, | 505 | .flags = ACL_TYPE_ACCESS, |
506 | .get = posix_acl_get, | 506 | .get = posix_acl_get, |
@@ -520,7 +520,7 @@ static size_t posix_acl_default_list(struct dentry *dentry, char *list, | |||
520 | return size; | 520 | return size; |
521 | } | 521 | } |
522 | 522 | ||
523 | struct xattr_handler reiserfs_posix_acl_default_handler = { | 523 | const struct xattr_handler reiserfs_posix_acl_default_handler = { |
524 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 524 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
525 | .flags = ACL_TYPE_DEFAULT, | 525 | .flags = ACL_TYPE_DEFAULT, |
526 | .get = posix_acl_get, | 526 | .get = posix_acl_get, |
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index 7271a477c041..237c6928d3c6 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c | |||
@@ -111,7 +111,7 @@ void reiserfs_security_free(struct reiserfs_security_handle *sec) | |||
111 | sec->value = NULL; | 111 | sec->value = NULL; |
112 | } | 112 | } |
113 | 113 | ||
114 | struct xattr_handler reiserfs_xattr_security_handler = { | 114 | const struct xattr_handler reiserfs_xattr_security_handler = { |
115 | .prefix = XATTR_SECURITY_PREFIX, | 115 | .prefix = XATTR_SECURITY_PREFIX, |
116 | .get = security_get, | 116 | .get = security_get, |
117 | .set = security_set, | 117 | .set = security_set, |
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c index 5b08aaca3daf..9883736ce3ec 100644 --- a/fs/reiserfs/xattr_trusted.c +++ b/fs/reiserfs/xattr_trusted.c | |||
@@ -48,7 +48,7 @@ static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size, | |||
48 | return len; | 48 | return len; |
49 | } | 49 | } |
50 | 50 | ||
51 | struct xattr_handler reiserfs_xattr_trusted_handler = { | 51 | const struct xattr_handler reiserfs_xattr_trusted_handler = { |
52 | .prefix = XATTR_TRUSTED_PREFIX, | 52 | .prefix = XATTR_TRUSTED_PREFIX, |
53 | .get = trusted_get, | 53 | .get = trusted_get, |
54 | .set = trusted_set, | 54 | .set = trusted_set, |
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index 75d59c49b911..45ae1a00013a 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c | |||
@@ -44,7 +44,7 @@ static size_t user_list(struct dentry *dentry, char *list, size_t list_size, | |||
44 | return len; | 44 | return len; |
45 | } | 45 | } |
46 | 46 | ||
47 | struct xattr_handler reiserfs_xattr_user_handler = { | 47 | const struct xattr_handler reiserfs_xattr_user_handler = { |
48 | .prefix = XATTR_USER_PREFIX, | 48 | .prefix = XATTR_USER_PREFIX, |
49 | .get = user_get, | 49 | .get = user_get, |
50 | .set = user_set, | 50 | .set = user_set, |
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index 3e4803b4427e..6c978428892d 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c | |||
@@ -39,7 +39,7 @@ const struct file_operations smb_dir_operations = | |||
39 | { | 39 | { |
40 | .read = generic_read_dir, | 40 | .read = generic_read_dir, |
41 | .readdir = smb_readdir, | 41 | .readdir = smb_readdir, |
42 | .ioctl = smb_ioctl, | 42 | .unlocked_ioctl = smb_ioctl, |
43 | .open = smb_dir_open, | 43 | .open = smb_dir_open, |
44 | }; | 44 | }; |
45 | 45 | ||
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index dbf6548bbf06..84ecf0e43f91 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c | |||
@@ -437,7 +437,7 @@ const struct file_operations smb_file_operations = | |||
437 | .aio_read = smb_file_aio_read, | 437 | .aio_read = smb_file_aio_read, |
438 | .write = do_sync_write, | 438 | .write = do_sync_write, |
439 | .aio_write = smb_file_aio_write, | 439 | .aio_write = smb_file_aio_write, |
440 | .ioctl = smb_ioctl, | 440 | .unlocked_ioctl = smb_ioctl, |
441 | .mmap = smb_file_mmap, | 441 | .mmap = smb_file_mmap, |
442 | .open = smb_file_open, | 442 | .open = smb_file_open, |
443 | .release = smb_file_release, | 443 | .release = smb_file_release, |
diff --git a/fs/smbfs/ioctl.c b/fs/smbfs/ioctl.c index dbae1f8ea26f..07215312ad39 100644 --- a/fs/smbfs/ioctl.c +++ b/fs/smbfs/ioctl.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/time.h> | 13 | #include <linux/time.h> |
14 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
15 | #include <linux/highuid.h> | 15 | #include <linux/highuid.h> |
16 | #include <linux/smp_lock.h> | ||
16 | #include <linux/net.h> | 17 | #include <linux/net.h> |
17 | 18 | ||
18 | #include <linux/smb_fs.h> | 19 | #include <linux/smb_fs.h> |
@@ -22,14 +23,14 @@ | |||
22 | 23 | ||
23 | #include "proto.h" | 24 | #include "proto.h" |
24 | 25 | ||
25 | int | 26 | long |
26 | smb_ioctl(struct inode *inode, struct file *filp, | 27 | smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
27 | unsigned int cmd, unsigned long arg) | ||
28 | { | 28 | { |
29 | struct smb_sb_info *server = server_from_inode(inode); | 29 | struct smb_sb_info *server = server_from_inode(filp->f_path.dentry->d_inode); |
30 | struct smb_conn_opt opt; | 30 | struct smb_conn_opt opt; |
31 | int result = -EINVAL; | 31 | int result = -EINVAL; |
32 | 32 | ||
33 | lock_kernel(); | ||
33 | switch (cmd) { | 34 | switch (cmd) { |
34 | uid16_t uid16; | 35 | uid16_t uid16; |
35 | uid_t uid32; | 36 | uid_t uid32; |
@@ -62,6 +63,7 @@ smb_ioctl(struct inode *inode, struct file *filp, | |||
62 | default: | 63 | default: |
63 | break; | 64 | break; |
64 | } | 65 | } |
66 | unlock_kernel(); | ||
65 | 67 | ||
66 | return result; | 68 | return result; |
67 | } | 69 | } |
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h index 03f456c1b7d4..05939a6f43e6 100644 --- a/fs/smbfs/proto.h +++ b/fs/smbfs/proto.h | |||
@@ -67,7 +67,7 @@ extern const struct address_space_operations smb_file_aops; | |||
67 | extern const struct file_operations smb_file_operations; | 67 | extern const struct file_operations smb_file_operations; |
68 | extern const struct inode_operations smb_file_inode_operations; | 68 | extern const struct inode_operations smb_file_inode_operations; |
69 | /* ioctl.c */ | 69 | /* ioctl.c */ |
70 | extern int smb_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); | 70 | extern long smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); |
71 | /* smbiod.c */ | 71 | /* smbiod.c */ |
72 | extern void smbiod_wake_up(void); | 72 | extern void smbiod_wake_up(void); |
73 | extern int smbiod_register_server(struct smb_sb_info *server); | 73 | extern int smbiod_register_server(struct smb_sb_info *server); |
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c index 54350b59046b..00b2909bd469 100644 --- a/fs/smbfs/symlink.c +++ b/fs/smbfs/symlink.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/pagemap.h> | 15 | #include <linux/pagemap.h> |
16 | #include <linux/net.h> | 16 | #include <linux/net.h> |
17 | #include <linux/namei.h> | 17 | #include <linux/namei.h> |
18 | #include <linux/slab.h> | ||
19 | 18 | ||
20 | #include <asm/uaccess.h> | 19 | #include <asm/uaccess.h> |
21 | #include <asm/system.h> | 20 | #include <asm/system.h> |
diff --git a/fs/splice.c b/fs/splice.c index 9313b6124a2e..ac22b00d86c3 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -193,8 +193,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
193 | break; | 193 | break; |
194 | } | 194 | } |
195 | 195 | ||
196 | if (pipe->nrbufs < PIPE_BUFFERS) { | 196 | if (pipe->nrbufs < pipe->buffers) { |
197 | int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); | 197 | int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); |
198 | struct pipe_buffer *buf = pipe->bufs + newbuf; | 198 | struct pipe_buffer *buf = pipe->bufs + newbuf; |
199 | 199 | ||
200 | buf->page = spd->pages[page_nr]; | 200 | buf->page = spd->pages[page_nr]; |
@@ -214,7 +214,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
214 | 214 | ||
215 | if (!--spd->nr_pages) | 215 | if (!--spd->nr_pages) |
216 | break; | 216 | break; |
217 | if (pipe->nrbufs < PIPE_BUFFERS) | 217 | if (pipe->nrbufs < pipe->buffers) |
218 | continue; | 218 | continue; |
219 | 219 | ||
220 | break; | 220 | break; |
@@ -265,6 +265,36 @@ static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) | |||
265 | page_cache_release(spd->pages[i]); | 265 | page_cache_release(spd->pages[i]); |
266 | } | 266 | } |
267 | 267 | ||
268 | /* | ||
269 | * Check if we need to grow the arrays holding pages and partial page | ||
270 | * descriptions. | ||
271 | */ | ||
272 | int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) | ||
273 | { | ||
274 | if (pipe->buffers <= PIPE_DEF_BUFFERS) | ||
275 | return 0; | ||
276 | |||
277 | spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL); | ||
278 | spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL); | ||
279 | |||
280 | if (spd->pages && spd->partial) | ||
281 | return 0; | ||
282 | |||
283 | kfree(spd->pages); | ||
284 | kfree(spd->partial); | ||
285 | return -ENOMEM; | ||
286 | } | ||
287 | |||
288 | void splice_shrink_spd(struct pipe_inode_info *pipe, | ||
289 | struct splice_pipe_desc *spd) | ||
290 | { | ||
291 | if (pipe->buffers <= PIPE_DEF_BUFFERS) | ||
292 | return; | ||
293 | |||
294 | kfree(spd->pages); | ||
295 | kfree(spd->partial); | ||
296 | } | ||
297 | |||
268 | static int | 298 | static int |
269 | __generic_file_splice_read(struct file *in, loff_t *ppos, | 299 | __generic_file_splice_read(struct file *in, loff_t *ppos, |
270 | struct pipe_inode_info *pipe, size_t len, | 300 | struct pipe_inode_info *pipe, size_t len, |
@@ -272,8 +302,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
272 | { | 302 | { |
273 | struct address_space *mapping = in->f_mapping; | 303 | struct address_space *mapping = in->f_mapping; |
274 | unsigned int loff, nr_pages, req_pages; | 304 | unsigned int loff, nr_pages, req_pages; |
275 | struct page *pages[PIPE_BUFFERS]; | 305 | struct page *pages[PIPE_DEF_BUFFERS]; |
276 | struct partial_page partial[PIPE_BUFFERS]; | 306 | struct partial_page partial[PIPE_DEF_BUFFERS]; |
277 | struct page *page; | 307 | struct page *page; |
278 | pgoff_t index, end_index; | 308 | pgoff_t index, end_index; |
279 | loff_t isize; | 309 | loff_t isize; |
@@ -286,15 +316,18 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
286 | .spd_release = spd_release_page, | 316 | .spd_release = spd_release_page, |
287 | }; | 317 | }; |
288 | 318 | ||
319 | if (splice_grow_spd(pipe, &spd)) | ||
320 | return -ENOMEM; | ||
321 | |||
289 | index = *ppos >> PAGE_CACHE_SHIFT; | 322 | index = *ppos >> PAGE_CACHE_SHIFT; |
290 | loff = *ppos & ~PAGE_CACHE_MASK; | 323 | loff = *ppos & ~PAGE_CACHE_MASK; |
291 | req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 324 | req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
292 | nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS); | 325 | nr_pages = min(req_pages, pipe->buffers); |
293 | 326 | ||
294 | /* | 327 | /* |
295 | * Lookup the (hopefully) full range of pages we need. | 328 | * Lookup the (hopefully) full range of pages we need. |
296 | */ | 329 | */ |
297 | spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); | 330 | spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages); |
298 | index += spd.nr_pages; | 331 | index += spd.nr_pages; |
299 | 332 | ||
300 | /* | 333 | /* |
@@ -335,7 +368,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
335 | unlock_page(page); | 368 | unlock_page(page); |
336 | } | 369 | } |
337 | 370 | ||
338 | pages[spd.nr_pages++] = page; | 371 | spd.pages[spd.nr_pages++] = page; |
339 | index++; | 372 | index++; |
340 | } | 373 | } |
341 | 374 | ||
@@ -356,7 +389,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
356 | * this_len is the max we'll use from this page | 389 | * this_len is the max we'll use from this page |
357 | */ | 390 | */ |
358 | this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); | 391 | this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); |
359 | page = pages[page_nr]; | 392 | page = spd.pages[page_nr]; |
360 | 393 | ||
361 | if (PageReadahead(page)) | 394 | if (PageReadahead(page)) |
362 | page_cache_async_readahead(mapping, &in->f_ra, in, | 395 | page_cache_async_readahead(mapping, &in->f_ra, in, |
@@ -393,8 +426,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
393 | error = -ENOMEM; | 426 | error = -ENOMEM; |
394 | break; | 427 | break; |
395 | } | 428 | } |
396 | page_cache_release(pages[page_nr]); | 429 | page_cache_release(spd.pages[page_nr]); |
397 | pages[page_nr] = page; | 430 | spd.pages[page_nr] = page; |
398 | } | 431 | } |
399 | /* | 432 | /* |
400 | * page was already under io and is now done, great | 433 | * page was already under io and is now done, great |
@@ -451,8 +484,8 @@ fill_it: | |||
451 | len = this_len; | 484 | len = this_len; |
452 | } | 485 | } |
453 | 486 | ||
454 | partial[page_nr].offset = loff; | 487 | spd.partial[page_nr].offset = loff; |
455 | partial[page_nr].len = this_len; | 488 | spd.partial[page_nr].len = this_len; |
456 | len -= this_len; | 489 | len -= this_len; |
457 | loff = 0; | 490 | loff = 0; |
458 | spd.nr_pages++; | 491 | spd.nr_pages++; |
@@ -464,12 +497,13 @@ fill_it: | |||
464 | * we got, 'nr_pages' is how many pages are in the map. | 497 | * we got, 'nr_pages' is how many pages are in the map. |
465 | */ | 498 | */ |
466 | while (page_nr < nr_pages) | 499 | while (page_nr < nr_pages) |
467 | page_cache_release(pages[page_nr++]); | 500 | page_cache_release(spd.pages[page_nr++]); |
468 | in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; | 501 | in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
469 | 502 | ||
470 | if (spd.nr_pages) | 503 | if (spd.nr_pages) |
471 | return splice_to_pipe(pipe, &spd); | 504 | error = splice_to_pipe(pipe, &spd); |
472 | 505 | ||
506 | splice_shrink_spd(pipe, &spd); | ||
473 | return error; | 507 | return error; |
474 | } | 508 | } |
475 | 509 | ||
@@ -560,9 +594,9 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
560 | unsigned int nr_pages; | 594 | unsigned int nr_pages; |
561 | unsigned int nr_freed; | 595 | unsigned int nr_freed; |
562 | size_t offset; | 596 | size_t offset; |
563 | struct page *pages[PIPE_BUFFERS]; | 597 | struct page *pages[PIPE_DEF_BUFFERS]; |
564 | struct partial_page partial[PIPE_BUFFERS]; | 598 | struct partial_page partial[PIPE_DEF_BUFFERS]; |
565 | struct iovec vec[PIPE_BUFFERS]; | 599 | struct iovec *vec, __vec[PIPE_DEF_BUFFERS]; |
566 | pgoff_t index; | 600 | pgoff_t index; |
567 | ssize_t res; | 601 | ssize_t res; |
568 | size_t this_len; | 602 | size_t this_len; |
@@ -576,11 +610,22 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
576 | .spd_release = spd_release_page, | 610 | .spd_release = spd_release_page, |
577 | }; | 611 | }; |
578 | 612 | ||
613 | if (splice_grow_spd(pipe, &spd)) | ||
614 | return -ENOMEM; | ||
615 | |||
616 | res = -ENOMEM; | ||
617 | vec = __vec; | ||
618 | if (pipe->buffers > PIPE_DEF_BUFFERS) { | ||
619 | vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL); | ||
620 | if (!vec) | ||
621 | goto shrink_ret; | ||
622 | } | ||
623 | |||
579 | index = *ppos >> PAGE_CACHE_SHIFT; | 624 | index = *ppos >> PAGE_CACHE_SHIFT; |
580 | offset = *ppos & ~PAGE_CACHE_MASK; | 625 | offset = *ppos & ~PAGE_CACHE_MASK; |
581 | nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 626 | nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
582 | 627 | ||
583 | for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) { | 628 | for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) { |
584 | struct page *page; | 629 | struct page *page; |
585 | 630 | ||
586 | page = alloc_page(GFP_USER); | 631 | page = alloc_page(GFP_USER); |
@@ -591,7 +636,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
591 | this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); | 636 | this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); |
592 | vec[i].iov_base = (void __user *) page_address(page); | 637 | vec[i].iov_base = (void __user *) page_address(page); |
593 | vec[i].iov_len = this_len; | 638 | vec[i].iov_len = this_len; |
594 | pages[i] = page; | 639 | spd.pages[i] = page; |
595 | spd.nr_pages++; | 640 | spd.nr_pages++; |
596 | len -= this_len; | 641 | len -= this_len; |
597 | offset = 0; | 642 | offset = 0; |
@@ -610,11 +655,11 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
610 | nr_freed = 0; | 655 | nr_freed = 0; |
611 | for (i = 0; i < spd.nr_pages; i++) { | 656 | for (i = 0; i < spd.nr_pages; i++) { |
612 | this_len = min_t(size_t, vec[i].iov_len, res); | 657 | this_len = min_t(size_t, vec[i].iov_len, res); |
613 | partial[i].offset = 0; | 658 | spd.partial[i].offset = 0; |
614 | partial[i].len = this_len; | 659 | spd.partial[i].len = this_len; |
615 | if (!this_len) { | 660 | if (!this_len) { |
616 | __free_page(pages[i]); | 661 | __free_page(spd.pages[i]); |
617 | pages[i] = NULL; | 662 | spd.pages[i] = NULL; |
618 | nr_freed++; | 663 | nr_freed++; |
619 | } | 664 | } |
620 | res -= this_len; | 665 | res -= this_len; |
@@ -625,13 +670,18 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
625 | if (res > 0) | 670 | if (res > 0) |
626 | *ppos += res; | 671 | *ppos += res; |
627 | 672 | ||
673 | shrink_ret: | ||
674 | if (vec != __vec) | ||
675 | kfree(vec); | ||
676 | splice_shrink_spd(pipe, &spd); | ||
628 | return res; | 677 | return res; |
629 | 678 | ||
630 | err: | 679 | err: |
631 | for (i = 0; i < spd.nr_pages; i++) | 680 | for (i = 0; i < spd.nr_pages; i++) |
632 | __free_page(pages[i]); | 681 | __free_page(spd.pages[i]); |
633 | 682 | ||
634 | return error; | 683 | res = error; |
684 | goto shrink_ret; | ||
635 | } | 685 | } |
636 | EXPORT_SYMBOL(default_file_splice_read); | 686 | EXPORT_SYMBOL(default_file_splice_read); |
637 | 687 | ||
@@ -784,7 +834,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, | |||
784 | if (!buf->len) { | 834 | if (!buf->len) { |
785 | buf->ops = NULL; | 835 | buf->ops = NULL; |
786 | ops->release(pipe, buf); | 836 | ops->release(pipe, buf); |
787 | pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); | 837 | pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); |
788 | pipe->nrbufs--; | 838 | pipe->nrbufs--; |
789 | if (pipe->inode) | 839 | if (pipe->inode) |
790 | sd->need_wakeup = true; | 840 | sd->need_wakeup = true; |
@@ -1211,7 +1261,7 @@ out_release: | |||
1211 | * If we did an incomplete transfer we must release | 1261 | * If we did an incomplete transfer we must release |
1212 | * the pipe buffers in question: | 1262 | * the pipe buffers in question: |
1213 | */ | 1263 | */ |
1214 | for (i = 0; i < PIPE_BUFFERS; i++) { | 1264 | for (i = 0; i < pipe->buffers; i++) { |
1215 | struct pipe_buffer *buf = pipe->bufs + i; | 1265 | struct pipe_buffer *buf = pipe->bufs + i; |
1216 | 1266 | ||
1217 | if (buf->ops) { | 1267 | if (buf->ops) { |
@@ -1371,7 +1421,8 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1371 | */ | 1421 | */ |
1372 | static int get_iovec_page_array(const struct iovec __user *iov, | 1422 | static int get_iovec_page_array(const struct iovec __user *iov, |
1373 | unsigned int nr_vecs, struct page **pages, | 1423 | unsigned int nr_vecs, struct page **pages, |
1374 | struct partial_page *partial, int aligned) | 1424 | struct partial_page *partial, int aligned, |
1425 | unsigned int pipe_buffers) | ||
1375 | { | 1426 | { |
1376 | int buffers = 0, error = 0; | 1427 | int buffers = 0, error = 0; |
1377 | 1428 | ||
@@ -1414,8 +1465,8 @@ static int get_iovec_page_array(const struct iovec __user *iov, | |||
1414 | break; | 1465 | break; |
1415 | 1466 | ||
1416 | npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1467 | npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1417 | if (npages > PIPE_BUFFERS - buffers) | 1468 | if (npages > pipe_buffers - buffers) |
1418 | npages = PIPE_BUFFERS - buffers; | 1469 | npages = pipe_buffers - buffers; |
1419 | 1470 | ||
1420 | error = get_user_pages_fast((unsigned long)base, npages, | 1471 | error = get_user_pages_fast((unsigned long)base, npages, |
1421 | 0, &pages[buffers]); | 1472 | 0, &pages[buffers]); |
@@ -1450,7 +1501,7 @@ static int get_iovec_page_array(const struct iovec __user *iov, | |||
1450 | * or if we mapped the max number of pages that we have | 1501 | * or if we mapped the max number of pages that we have |
1451 | * room for. | 1502 | * room for. |
1452 | */ | 1503 | */ |
1453 | if (error < npages || buffers == PIPE_BUFFERS) | 1504 | if (error < npages || buffers == pipe_buffers) |
1454 | break; | 1505 | break; |
1455 | 1506 | ||
1456 | nr_vecs--; | 1507 | nr_vecs--; |
@@ -1593,8 +1644,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, | |||
1593 | unsigned long nr_segs, unsigned int flags) | 1644 | unsigned long nr_segs, unsigned int flags) |
1594 | { | 1645 | { |
1595 | struct pipe_inode_info *pipe; | 1646 | struct pipe_inode_info *pipe; |
1596 | struct page *pages[PIPE_BUFFERS]; | 1647 | struct page *pages[PIPE_DEF_BUFFERS]; |
1597 | struct partial_page partial[PIPE_BUFFERS]; | 1648 | struct partial_page partial[PIPE_DEF_BUFFERS]; |
1598 | struct splice_pipe_desc spd = { | 1649 | struct splice_pipe_desc spd = { |
1599 | .pages = pages, | 1650 | .pages = pages, |
1600 | .partial = partial, | 1651 | .partial = partial, |
@@ -1602,17 +1653,25 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, | |||
1602 | .ops = &user_page_pipe_buf_ops, | 1653 | .ops = &user_page_pipe_buf_ops, |
1603 | .spd_release = spd_release_page, | 1654 | .spd_release = spd_release_page, |
1604 | }; | 1655 | }; |
1656 | long ret; | ||
1605 | 1657 | ||
1606 | pipe = pipe_info(file->f_path.dentry->d_inode); | 1658 | pipe = pipe_info(file->f_path.dentry->d_inode); |
1607 | if (!pipe) | 1659 | if (!pipe) |
1608 | return -EBADF; | 1660 | return -EBADF; |
1609 | 1661 | ||
1610 | spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, | 1662 | if (splice_grow_spd(pipe, &spd)) |
1611 | flags & SPLICE_F_GIFT); | 1663 | return -ENOMEM; |
1664 | |||
1665 | spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, | ||
1666 | spd.partial, flags & SPLICE_F_GIFT, | ||
1667 | pipe->buffers); | ||
1612 | if (spd.nr_pages <= 0) | 1668 | if (spd.nr_pages <= 0) |
1613 | return spd.nr_pages; | 1669 | ret = spd.nr_pages; |
1670 | else | ||
1671 | ret = splice_to_pipe(pipe, &spd); | ||
1614 | 1672 | ||
1615 | return splice_to_pipe(pipe, &spd); | 1673 | splice_shrink_spd(pipe, &spd); |
1674 | return ret; | ||
1616 | } | 1675 | } |
1617 | 1676 | ||
1618 | /* | 1677 | /* |
@@ -1738,13 +1797,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
1738 | * Check ->nrbufs without the inode lock first. This function | 1797 | * Check ->nrbufs without the inode lock first. This function |
1739 | * is speculative anyways, so missing one is ok. | 1798 | * is speculative anyways, so missing one is ok. |
1740 | */ | 1799 | */ |
1741 | if (pipe->nrbufs < PIPE_BUFFERS) | 1800 | if (pipe->nrbufs < pipe->buffers) |
1742 | return 0; | 1801 | return 0; |
1743 | 1802 | ||
1744 | ret = 0; | 1803 | ret = 0; |
1745 | pipe_lock(pipe); | 1804 | pipe_lock(pipe); |
1746 | 1805 | ||
1747 | while (pipe->nrbufs >= PIPE_BUFFERS) { | 1806 | while (pipe->nrbufs >= pipe->buffers) { |
1748 | if (!pipe->readers) { | 1807 | if (!pipe->readers) { |
1749 | send_sig(SIGPIPE, current, 0); | 1808 | send_sig(SIGPIPE, current, 0); |
1750 | ret = -EPIPE; | 1809 | ret = -EPIPE; |
@@ -1810,7 +1869,7 @@ retry: | |||
1810 | * Cannot make any progress, because either the input | 1869 | * Cannot make any progress, because either the input |
1811 | * pipe is empty or the output pipe is full. | 1870 | * pipe is empty or the output pipe is full. |
1812 | */ | 1871 | */ |
1813 | if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) { | 1872 | if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) { |
1814 | /* Already processed some buffers, break */ | 1873 | /* Already processed some buffers, break */ |
1815 | if (ret) | 1874 | if (ret) |
1816 | break; | 1875 | break; |
@@ -1831,7 +1890,7 @@ retry: | |||
1831 | } | 1890 | } |
1832 | 1891 | ||
1833 | ibuf = ipipe->bufs + ipipe->curbuf; | 1892 | ibuf = ipipe->bufs + ipipe->curbuf; |
1834 | nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS; | 1893 | nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); |
1835 | obuf = opipe->bufs + nbuf; | 1894 | obuf = opipe->bufs + nbuf; |
1836 | 1895 | ||
1837 | if (len >= ibuf->len) { | 1896 | if (len >= ibuf->len) { |
@@ -1841,7 +1900,7 @@ retry: | |||
1841 | *obuf = *ibuf; | 1900 | *obuf = *ibuf; |
1842 | ibuf->ops = NULL; | 1901 | ibuf->ops = NULL; |
1843 | opipe->nrbufs++; | 1902 | opipe->nrbufs++; |
1844 | ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS; | 1903 | ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1); |
1845 | ipipe->nrbufs--; | 1904 | ipipe->nrbufs--; |
1846 | input_wakeup = true; | 1905 | input_wakeup = true; |
1847 | } else { | 1906 | } else { |
@@ -1914,11 +1973,11 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1914 | * If we have iterated all input buffers or ran out of | 1973 | * If we have iterated all input buffers or ran out of |
1915 | * output room, break. | 1974 | * output room, break. |
1916 | */ | 1975 | */ |
1917 | if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) | 1976 | if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) |
1918 | break; | 1977 | break; |
1919 | 1978 | ||
1920 | ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); | 1979 | ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1)); |
1921 | nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); | 1980 | nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); |
1922 | 1981 | ||
1923 | /* | 1982 | /* |
1924 | * Get a reference to this pipe buffer, | 1983 | * Get a reference to this pipe buffer, |
diff --git a/fs/statfs.c b/fs/statfs.c new file mode 100644 index 000000000000..4ef021f3b612 --- /dev/null +++ b/fs/statfs.c | |||
@@ -0,0 +1,196 @@ | |||
1 | #include <linux/syscalls.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/fs.h> | ||
4 | #include <linux/file.h> | ||
5 | #include <linux/namei.h> | ||
6 | #include <linux/statfs.h> | ||
7 | #include <linux/security.h> | ||
8 | #include <linux/uaccess.h> | ||
9 | |||
10 | int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
11 | { | ||
12 | int retval = -ENODEV; | ||
13 | |||
14 | if (dentry) { | ||
15 | retval = -ENOSYS; | ||
16 | if (dentry->d_sb->s_op->statfs) { | ||
17 | memset(buf, 0, sizeof(*buf)); | ||
18 | retval = security_sb_statfs(dentry); | ||
19 | if (retval) | ||
20 | return retval; | ||
21 | retval = dentry->d_sb->s_op->statfs(dentry, buf); | ||
22 | if (retval == 0 && buf->f_frsize == 0) | ||
23 | buf->f_frsize = buf->f_bsize; | ||
24 | } | ||
25 | } | ||
26 | return retval; | ||
27 | } | ||
28 | |||
29 | EXPORT_SYMBOL(vfs_statfs); | ||
30 | |||
31 | static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) | ||
32 | { | ||
33 | struct kstatfs st; | ||
34 | int retval; | ||
35 | |||
36 | retval = vfs_statfs(dentry, &st); | ||
37 | if (retval) | ||
38 | return retval; | ||
39 | |||
40 | if (sizeof(*buf) == sizeof(st)) | ||
41 | memcpy(buf, &st, sizeof(st)); | ||
42 | else { | ||
43 | if (sizeof buf->f_blocks == 4) { | ||
44 | if ((st.f_blocks | st.f_bfree | st.f_bavail | | ||
45 | st.f_bsize | st.f_frsize) & | ||
46 | 0xffffffff00000000ULL) | ||
47 | return -EOVERFLOW; | ||
48 | /* | ||
49 | * f_files and f_ffree may be -1; it's okay to stuff | ||
50 | * that into 32 bits | ||
51 | */ | ||
52 | if (st.f_files != -1 && | ||
53 | (st.f_files & 0xffffffff00000000ULL)) | ||
54 | return -EOVERFLOW; | ||
55 | if (st.f_ffree != -1 && | ||
56 | (st.f_ffree & 0xffffffff00000000ULL)) | ||
57 | return -EOVERFLOW; | ||
58 | } | ||
59 | |||
60 | buf->f_type = st.f_type; | ||
61 | buf->f_bsize = st.f_bsize; | ||
62 | buf->f_blocks = st.f_blocks; | ||
63 | buf->f_bfree = st.f_bfree; | ||
64 | buf->f_bavail = st.f_bavail; | ||
65 | buf->f_files = st.f_files; | ||
66 | buf->f_ffree = st.f_ffree; | ||
67 | buf->f_fsid = st.f_fsid; | ||
68 | buf->f_namelen = st.f_namelen; | ||
69 | buf->f_frsize = st.f_frsize; | ||
70 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); | ||
71 | } | ||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) | ||
76 | { | ||
77 | struct kstatfs st; | ||
78 | int retval; | ||
79 | |||
80 | retval = vfs_statfs(dentry, &st); | ||
81 | if (retval) | ||
82 | return retval; | ||
83 | |||
84 | if (sizeof(*buf) == sizeof(st)) | ||
85 | memcpy(buf, &st, sizeof(st)); | ||
86 | else { | ||
87 | buf->f_type = st.f_type; | ||
88 | buf->f_bsize = st.f_bsize; | ||
89 | buf->f_blocks = st.f_blocks; | ||
90 | buf->f_bfree = st.f_bfree; | ||
91 | buf->f_bavail = st.f_bavail; | ||
92 | buf->f_files = st.f_files; | ||
93 | buf->f_ffree = st.f_ffree; | ||
94 | buf->f_fsid = st.f_fsid; | ||
95 | buf->f_namelen = st.f_namelen; | ||
96 | buf->f_frsize = st.f_frsize; | ||
97 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); | ||
98 | } | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) | ||
103 | { | ||
104 | struct path path; | ||
105 | int error; | ||
106 | |||
107 | error = user_path(pathname, &path); | ||
108 | if (!error) { | ||
109 | struct statfs tmp; | ||
110 | error = vfs_statfs_native(path.dentry, &tmp); | ||
111 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
112 | error = -EFAULT; | ||
113 | path_put(&path); | ||
114 | } | ||
115 | return error; | ||
116 | } | ||
117 | |||
118 | SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) | ||
119 | { | ||
120 | struct path path; | ||
121 | long error; | ||
122 | |||
123 | if (sz != sizeof(*buf)) | ||
124 | return -EINVAL; | ||
125 | error = user_path(pathname, &path); | ||
126 | if (!error) { | ||
127 | struct statfs64 tmp; | ||
128 | error = vfs_statfs64(path.dentry, &tmp); | ||
129 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
130 | error = -EFAULT; | ||
131 | path_put(&path); | ||
132 | } | ||
133 | return error; | ||
134 | } | ||
135 | |||
136 | SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) | ||
137 | { | ||
138 | struct file *file; | ||
139 | struct statfs tmp; | ||
140 | int error; | ||
141 | |||
142 | error = -EBADF; | ||
143 | file = fget(fd); | ||
144 | if (!file) | ||
145 | goto out; | ||
146 | error = vfs_statfs_native(file->f_path.dentry, &tmp); | ||
147 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
148 | error = -EFAULT; | ||
149 | fput(file); | ||
150 | out: | ||
151 | return error; | ||
152 | } | ||
153 | |||
154 | SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) | ||
155 | { | ||
156 | struct file *file; | ||
157 | struct statfs64 tmp; | ||
158 | int error; | ||
159 | |||
160 | if (sz != sizeof(*buf)) | ||
161 | return -EINVAL; | ||
162 | |||
163 | error = -EBADF; | ||
164 | file = fget(fd); | ||
165 | if (!file) | ||
166 | goto out; | ||
167 | error = vfs_statfs64(file->f_path.dentry, &tmp); | ||
168 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
169 | error = -EFAULT; | ||
170 | fput(file); | ||
171 | out: | ||
172 | return error; | ||
173 | } | ||
174 | |||
175 | SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) | ||
176 | { | ||
177 | struct super_block *s; | ||
178 | struct ustat tmp; | ||
179 | struct kstatfs sbuf; | ||
180 | int err; | ||
181 | |||
182 | s = user_get_super(new_decode_dev(dev)); | ||
183 | if (!s) | ||
184 | return -EINVAL; | ||
185 | |||
186 | err = vfs_statfs(s->s_root, &sbuf); | ||
187 | drop_super(s); | ||
188 | if (err) | ||
189 | return err; | ||
190 | |||
191 | memset(&tmp,0,sizeof(struct ustat)); | ||
192 | tmp.f_tfree = sbuf.f_bfree; | ||
193 | tmp.f_tinode = sbuf.f_ffree; | ||
194 | |||
195 | return copy_to_user(ubuf, &tmp, sizeof(struct ustat)) ? -EFAULT : 0; | ||
196 | } | ||
diff --git a/fs/super.c b/fs/super.c index 1527e6a0ee35..69688b15f1fa 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -22,23 +22,15 @@ | |||
22 | 22 | ||
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/init.h> | ||
26 | #include <linux/smp_lock.h> | ||
27 | #include <linux/acct.h> | 25 | #include <linux/acct.h> |
28 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
29 | #include <linux/quotaops.h> | 27 | #include <linux/quotaops.h> |
30 | #include <linux/namei.h> | ||
31 | #include <linux/mount.h> | 28 | #include <linux/mount.h> |
32 | #include <linux/security.h> | 29 | #include <linux/security.h> |
33 | #include <linux/syscalls.h> | ||
34 | #include <linux/vfs.h> | ||
35 | #include <linux/writeback.h> /* for the emergency remount stuff */ | 30 | #include <linux/writeback.h> /* for the emergency remount stuff */ |
36 | #include <linux/idr.h> | 31 | #include <linux/idr.h> |
37 | #include <linux/kobject.h> | ||
38 | #include <linux/mutex.h> | 32 | #include <linux/mutex.h> |
39 | #include <linux/file.h> | ||
40 | #include <linux/backing-dev.h> | 33 | #include <linux/backing-dev.h> |
41 | #include <asm/uaccess.h> | ||
42 | #include "internal.h" | 34 | #include "internal.h" |
43 | 35 | ||
44 | 36 | ||
@@ -93,9 +85,10 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
93 | * subclass. | 85 | * subclass. |
94 | */ | 86 | */ |
95 | down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); | 87 | down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); |
96 | s->s_count = S_BIAS; | 88 | s->s_count = 1; |
97 | atomic_set(&s->s_active, 1); | 89 | atomic_set(&s->s_active, 1); |
98 | mutex_init(&s->s_vfs_rename_mutex); | 90 | mutex_init(&s->s_vfs_rename_mutex); |
91 | lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); | ||
99 | mutex_init(&s->s_dquot.dqio_mutex); | 92 | mutex_init(&s->s_dquot.dqio_mutex); |
100 | mutex_init(&s->s_dquot.dqonoff_mutex); | 93 | mutex_init(&s->s_dquot.dqonoff_mutex); |
101 | init_rwsem(&s->s_dquot.dqptr_sem); | 94 | init_rwsem(&s->s_dquot.dqptr_sem); |
@@ -127,39 +120,14 @@ static inline void destroy_super(struct super_block *s) | |||
127 | /* Superblock refcounting */ | 120 | /* Superblock refcounting */ |
128 | 121 | ||
129 | /* | 122 | /* |
130 | * Drop a superblock's refcount. Returns non-zero if the superblock was | 123 | * Drop a superblock's refcount. The caller must hold sb_lock. |
131 | * destroyed. The caller must hold sb_lock. | ||
132 | */ | 124 | */ |
133 | static int __put_super(struct super_block *sb) | 125 | void __put_super(struct super_block *sb) |
134 | { | 126 | { |
135 | int ret = 0; | ||
136 | |||
137 | if (!--sb->s_count) { | 127 | if (!--sb->s_count) { |
128 | list_del_init(&sb->s_list); | ||
138 | destroy_super(sb); | 129 | destroy_super(sb); |
139 | ret = 1; | ||
140 | } | 130 | } |
141 | return ret; | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * Drop a superblock's refcount. | ||
146 | * Returns non-zero if the superblock is about to be destroyed and | ||
147 | * at least is already removed from super_blocks list, so if we are | ||
148 | * making a loop through super blocks then we need to restart. | ||
149 | * The caller must hold sb_lock. | ||
150 | */ | ||
151 | int __put_super_and_need_restart(struct super_block *sb) | ||
152 | { | ||
153 | /* check for race with generic_shutdown_super() */ | ||
154 | if (list_empty(&sb->s_list)) { | ||
155 | /* super block is removed, need to restart... */ | ||
156 | __put_super(sb); | ||
157 | return 1; | ||
158 | } | ||
159 | /* can't be the last, since s_list is still in use */ | ||
160 | sb->s_count--; | ||
161 | BUG_ON(sb->s_count == 0); | ||
162 | return 0; | ||
163 | } | 131 | } |
164 | 132 | ||
165 | /** | 133 | /** |
@@ -178,57 +146,48 @@ void put_super(struct super_block *sb) | |||
178 | 146 | ||
179 | 147 | ||
180 | /** | 148 | /** |
181 | * deactivate_super - drop an active reference to superblock | 149 | * deactivate_locked_super - drop an active reference to superblock |
182 | * @s: superblock to deactivate | 150 | * @s: superblock to deactivate |
183 | * | 151 | * |
184 | * Drops an active reference to superblock, acquiring a temprory one if | 152 | * Drops an active reference to superblock, converting it into a temprory |
185 | * there is no active references left. In that case we lock superblock, | 153 | * one if there is no other active references left. In that case we |
186 | * tell fs driver to shut it down and drop the temporary reference we | 154 | * tell fs driver to shut it down and drop the temporary reference we |
187 | * had just acquired. | 155 | * had just acquired. |
156 | * | ||
157 | * Caller holds exclusive lock on superblock; that lock is released. | ||
188 | */ | 158 | */ |
189 | void deactivate_super(struct super_block *s) | 159 | void deactivate_locked_super(struct super_block *s) |
190 | { | 160 | { |
191 | struct file_system_type *fs = s->s_type; | 161 | struct file_system_type *fs = s->s_type; |
192 | if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { | 162 | if (atomic_dec_and_test(&s->s_active)) { |
193 | s->s_count -= S_BIAS-1; | ||
194 | spin_unlock(&sb_lock); | ||
195 | vfs_dq_off(s, 0); | 163 | vfs_dq_off(s, 0); |
196 | down_write(&s->s_umount); | ||
197 | fs->kill_sb(s); | 164 | fs->kill_sb(s); |
198 | put_filesystem(fs); | 165 | put_filesystem(fs); |
199 | put_super(s); | 166 | put_super(s); |
167 | } else { | ||
168 | up_write(&s->s_umount); | ||
200 | } | 169 | } |
201 | } | 170 | } |
202 | 171 | ||
203 | EXPORT_SYMBOL(deactivate_super); | 172 | EXPORT_SYMBOL(deactivate_locked_super); |
204 | 173 | ||
205 | /** | 174 | /** |
206 | * deactivate_locked_super - drop an active reference to superblock | 175 | * deactivate_super - drop an active reference to superblock |
207 | * @s: superblock to deactivate | 176 | * @s: superblock to deactivate |
208 | * | 177 | * |
209 | * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that | 178 | * Variant of deactivate_locked_super(), except that superblock is *not* |
210 | * it does not unlock it until it's all over. As the result, it's safe to | 179 | * locked by caller. If we are going to drop the final active reference, |
211 | * use to dispose of new superblock on ->get_sb() failure exits - nobody | 180 | * lock will be acquired prior to that. |
212 | * will see the sucker until it's all over. Equivalent using up_write + | ||
213 | * deactivate_super is safe for that purpose only if superblock is either | ||
214 | * safe to use or has NULL ->s_root when we unlock. | ||
215 | */ | 181 | */ |
216 | void deactivate_locked_super(struct super_block *s) | 182 | void deactivate_super(struct super_block *s) |
217 | { | 183 | { |
218 | struct file_system_type *fs = s->s_type; | 184 | if (!atomic_add_unless(&s->s_active, -1, 1)) { |
219 | if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { | 185 | down_write(&s->s_umount); |
220 | s->s_count -= S_BIAS-1; | 186 | deactivate_locked_super(s); |
221 | spin_unlock(&sb_lock); | ||
222 | vfs_dq_off(s, 0); | ||
223 | fs->kill_sb(s); | ||
224 | put_filesystem(fs); | ||
225 | put_super(s); | ||
226 | } else { | ||
227 | up_write(&s->s_umount); | ||
228 | } | 187 | } |
229 | } | 188 | } |
230 | 189 | ||
231 | EXPORT_SYMBOL(deactivate_locked_super); | 190 | EXPORT_SYMBOL(deactivate_super); |
232 | 191 | ||
233 | /** | 192 | /** |
234 | * grab_super - acquire an active reference | 193 | * grab_super - acquire an active reference |
@@ -243,22 +202,17 @@ EXPORT_SYMBOL(deactivate_locked_super); | |||
243 | */ | 202 | */ |
244 | static int grab_super(struct super_block *s) __releases(sb_lock) | 203 | static int grab_super(struct super_block *s) __releases(sb_lock) |
245 | { | 204 | { |
205 | if (atomic_inc_not_zero(&s->s_active)) { | ||
206 | spin_unlock(&sb_lock); | ||
207 | return 1; | ||
208 | } | ||
209 | /* it's going away */ | ||
246 | s->s_count++; | 210 | s->s_count++; |
247 | spin_unlock(&sb_lock); | 211 | spin_unlock(&sb_lock); |
212 | /* wait for it to die */ | ||
248 | down_write(&s->s_umount); | 213 | down_write(&s->s_umount); |
249 | if (s->s_root) { | ||
250 | spin_lock(&sb_lock); | ||
251 | if (s->s_count > S_BIAS) { | ||
252 | atomic_inc(&s->s_active); | ||
253 | s->s_count--; | ||
254 | spin_unlock(&sb_lock); | ||
255 | return 1; | ||
256 | } | ||
257 | spin_unlock(&sb_lock); | ||
258 | } | ||
259 | up_write(&s->s_umount); | 214 | up_write(&s->s_umount); |
260 | put_super(s); | 215 | put_super(s); |
261 | yield(); | ||
262 | return 0; | 216 | return 0; |
263 | } | 217 | } |
264 | 218 | ||
@@ -321,8 +275,7 @@ void generic_shutdown_super(struct super_block *sb) | |||
321 | } | 275 | } |
322 | spin_lock(&sb_lock); | 276 | spin_lock(&sb_lock); |
323 | /* should be initialized for __put_super_and_need_restart() */ | 277 | /* should be initialized for __put_super_and_need_restart() */ |
324 | list_del_init(&sb->s_list); | 278 | list_del_init(&sb->s_instances); |
325 | list_del(&sb->s_instances); | ||
326 | spin_unlock(&sb_lock); | 279 | spin_unlock(&sb_lock); |
327 | up_write(&sb->s_umount); | 280 | up_write(&sb->s_umount); |
328 | } | 281 | } |
@@ -357,6 +310,7 @@ retry: | |||
357 | up_write(&s->s_umount); | 310 | up_write(&s->s_umount); |
358 | destroy_super(s); | 311 | destroy_super(s); |
359 | } | 312 | } |
313 | down_write(&old->s_umount); | ||
360 | return old; | 314 | return old; |
361 | } | 315 | } |
362 | } | 316 | } |
@@ -408,11 +362,12 @@ EXPORT_SYMBOL(drop_super); | |||
408 | */ | 362 | */ |
409 | void sync_supers(void) | 363 | void sync_supers(void) |
410 | { | 364 | { |
411 | struct super_block *sb; | 365 | struct super_block *sb, *n; |
412 | 366 | ||
413 | spin_lock(&sb_lock); | 367 | spin_lock(&sb_lock); |
414 | restart: | 368 | list_for_each_entry_safe(sb, n, &super_blocks, s_list) { |
415 | list_for_each_entry(sb, &super_blocks, s_list) { | 369 | if (list_empty(&sb->s_instances)) |
370 | continue; | ||
416 | if (sb->s_op->write_super && sb->s_dirt) { | 371 | if (sb->s_op->write_super && sb->s_dirt) { |
417 | sb->s_count++; | 372 | sb->s_count++; |
418 | spin_unlock(&sb_lock); | 373 | spin_unlock(&sb_lock); |
@@ -423,14 +378,43 @@ restart: | |||
423 | up_read(&sb->s_umount); | 378 | up_read(&sb->s_umount); |
424 | 379 | ||
425 | spin_lock(&sb_lock); | 380 | spin_lock(&sb_lock); |
426 | if (__put_super_and_need_restart(sb)) | 381 | __put_super(sb); |
427 | goto restart; | ||
428 | } | 382 | } |
429 | } | 383 | } |
430 | spin_unlock(&sb_lock); | 384 | spin_unlock(&sb_lock); |
431 | } | 385 | } |
432 | 386 | ||
433 | /** | 387 | /** |
388 | * iterate_supers - call function for all active superblocks | ||
389 | * @f: function to call | ||
390 | * @arg: argument to pass to it | ||
391 | * | ||
392 | * Scans the superblock list and calls given function, passing it | ||
393 | * locked superblock and given argument. | ||
394 | */ | ||
395 | void iterate_supers(void (*f)(struct super_block *, void *), void *arg) | ||
396 | { | ||
397 | struct super_block *sb, *n; | ||
398 | |||
399 | spin_lock(&sb_lock); | ||
400 | list_for_each_entry_safe(sb, n, &super_blocks, s_list) { | ||
401 | if (list_empty(&sb->s_instances)) | ||
402 | continue; | ||
403 | sb->s_count++; | ||
404 | spin_unlock(&sb_lock); | ||
405 | |||
406 | down_read(&sb->s_umount); | ||
407 | if (sb->s_root) | ||
408 | f(sb, arg); | ||
409 | up_read(&sb->s_umount); | ||
410 | |||
411 | spin_lock(&sb_lock); | ||
412 | __put_super(sb); | ||
413 | } | ||
414 | spin_unlock(&sb_lock); | ||
415 | } | ||
416 | |||
417 | /** | ||
434 | * get_super - get the superblock of a device | 418 | * get_super - get the superblock of a device |
435 | * @bdev: device to get the superblock for | 419 | * @bdev: device to get the superblock for |
436 | * | 420 | * |
@@ -438,7 +422,7 @@ restart: | |||
438 | * mounted on the device given. %NULL is returned if no match is found. | 422 | * mounted on the device given. %NULL is returned if no match is found. |
439 | */ | 423 | */ |
440 | 424 | ||
441 | struct super_block * get_super(struct block_device *bdev) | 425 | struct super_block *get_super(struct block_device *bdev) |
442 | { | 426 | { |
443 | struct super_block *sb; | 427 | struct super_block *sb; |
444 | 428 | ||
@@ -448,17 +432,20 @@ struct super_block * get_super(struct block_device *bdev) | |||
448 | spin_lock(&sb_lock); | 432 | spin_lock(&sb_lock); |
449 | rescan: | 433 | rescan: |
450 | list_for_each_entry(sb, &super_blocks, s_list) { | 434 | list_for_each_entry(sb, &super_blocks, s_list) { |
435 | if (list_empty(&sb->s_instances)) | ||
436 | continue; | ||
451 | if (sb->s_bdev == bdev) { | 437 | if (sb->s_bdev == bdev) { |
452 | sb->s_count++; | 438 | sb->s_count++; |
453 | spin_unlock(&sb_lock); | 439 | spin_unlock(&sb_lock); |
454 | down_read(&sb->s_umount); | 440 | down_read(&sb->s_umount); |
441 | /* still alive? */ | ||
455 | if (sb->s_root) | 442 | if (sb->s_root) |
456 | return sb; | 443 | return sb; |
457 | up_read(&sb->s_umount); | 444 | up_read(&sb->s_umount); |
458 | /* restart only when sb is no longer on the list */ | 445 | /* nope, got unmounted */ |
459 | spin_lock(&sb_lock); | 446 | spin_lock(&sb_lock); |
460 | if (__put_super_and_need_restart(sb)) | 447 | __put_super(sb); |
461 | goto rescan; | 448 | goto rescan; |
462 | } | 449 | } |
463 | } | 450 | } |
464 | spin_unlock(&sb_lock); | 451 | spin_unlock(&sb_lock); |
@@ -473,7 +460,7 @@ EXPORT_SYMBOL(get_super); | |||
473 | * | 460 | * |
474 | * Scans the superblock list and finds the superblock of the file system | 461 | * Scans the superblock list and finds the superblock of the file system |
475 | * mounted on the device given. Returns the superblock with an active | 462 | * mounted on the device given. Returns the superblock with an active |
476 | * reference and s_umount held exclusively or %NULL if none was found. | 463 | * reference or %NULL if none was found. |
477 | */ | 464 | */ |
478 | struct super_block *get_active_super(struct block_device *bdev) | 465 | struct super_block *get_active_super(struct block_device *bdev) |
479 | { | 466 | { |
@@ -482,81 +469,49 @@ struct super_block *get_active_super(struct block_device *bdev) | |||
482 | if (!bdev) | 469 | if (!bdev) |
483 | return NULL; | 470 | return NULL; |
484 | 471 | ||
472 | restart: | ||
485 | spin_lock(&sb_lock); | 473 | spin_lock(&sb_lock); |
486 | list_for_each_entry(sb, &super_blocks, s_list) { | 474 | list_for_each_entry(sb, &super_blocks, s_list) { |
487 | if (sb->s_bdev != bdev) | 475 | if (list_empty(&sb->s_instances)) |
488 | continue; | 476 | continue; |
489 | 477 | if (sb->s_bdev == bdev) { | |
490 | sb->s_count++; | 478 | if (grab_super(sb)) /* drops sb_lock */ |
491 | spin_unlock(&sb_lock); | ||
492 | down_write(&sb->s_umount); | ||
493 | if (sb->s_root) { | ||
494 | spin_lock(&sb_lock); | ||
495 | if (sb->s_count > S_BIAS) { | ||
496 | atomic_inc(&sb->s_active); | ||
497 | sb->s_count--; | ||
498 | spin_unlock(&sb_lock); | ||
499 | return sb; | 479 | return sb; |
500 | } | 480 | else |
501 | spin_unlock(&sb_lock); | 481 | goto restart; |
502 | } | 482 | } |
503 | up_write(&sb->s_umount); | ||
504 | put_super(sb); | ||
505 | yield(); | ||
506 | spin_lock(&sb_lock); | ||
507 | } | 483 | } |
508 | spin_unlock(&sb_lock); | 484 | spin_unlock(&sb_lock); |
509 | return NULL; | 485 | return NULL; |
510 | } | 486 | } |
511 | 487 | ||
512 | struct super_block * user_get_super(dev_t dev) | 488 | struct super_block *user_get_super(dev_t dev) |
513 | { | 489 | { |
514 | struct super_block *sb; | 490 | struct super_block *sb; |
515 | 491 | ||
516 | spin_lock(&sb_lock); | 492 | spin_lock(&sb_lock); |
517 | rescan: | 493 | rescan: |
518 | list_for_each_entry(sb, &super_blocks, s_list) { | 494 | list_for_each_entry(sb, &super_blocks, s_list) { |
495 | if (list_empty(&sb->s_instances)) | ||
496 | continue; | ||
519 | if (sb->s_dev == dev) { | 497 | if (sb->s_dev == dev) { |
520 | sb->s_count++; | 498 | sb->s_count++; |
521 | spin_unlock(&sb_lock); | 499 | spin_unlock(&sb_lock); |
522 | down_read(&sb->s_umount); | 500 | down_read(&sb->s_umount); |
501 | /* still alive? */ | ||
523 | if (sb->s_root) | 502 | if (sb->s_root) |
524 | return sb; | 503 | return sb; |
525 | up_read(&sb->s_umount); | 504 | up_read(&sb->s_umount); |
526 | /* restart only when sb is no longer on the list */ | 505 | /* nope, got unmounted */ |
527 | spin_lock(&sb_lock); | 506 | spin_lock(&sb_lock); |
528 | if (__put_super_and_need_restart(sb)) | 507 | __put_super(sb); |
529 | goto rescan; | 508 | goto rescan; |
530 | } | 509 | } |
531 | } | 510 | } |
532 | spin_unlock(&sb_lock); | 511 | spin_unlock(&sb_lock); |
533 | return NULL; | 512 | return NULL; |
534 | } | 513 | } |
535 | 514 | ||
536 | SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) | ||
537 | { | ||
538 | struct super_block *s; | ||
539 | struct ustat tmp; | ||
540 | struct kstatfs sbuf; | ||
541 | int err = -EINVAL; | ||
542 | |||
543 | s = user_get_super(new_decode_dev(dev)); | ||
544 | if (s == NULL) | ||
545 | goto out; | ||
546 | err = vfs_statfs(s->s_root, &sbuf); | ||
547 | drop_super(s); | ||
548 | if (err) | ||
549 | goto out; | ||
550 | |||
551 | memset(&tmp,0,sizeof(struct ustat)); | ||
552 | tmp.f_tfree = sbuf.f_bfree; | ||
553 | tmp.f_tinode = sbuf.f_ffree; | ||
554 | |||
555 | err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0; | ||
556 | out: | ||
557 | return err; | ||
558 | } | ||
559 | |||
560 | /** | 515 | /** |
561 | * do_remount_sb - asks filesystem to change mount options. | 516 | * do_remount_sb - asks filesystem to change mount options. |
562 | * @sb: superblock in question | 517 | * @sb: superblock in question |
@@ -622,24 +577,24 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
622 | 577 | ||
623 | static void do_emergency_remount(struct work_struct *work) | 578 | static void do_emergency_remount(struct work_struct *work) |
624 | { | 579 | { |
625 | struct super_block *sb; | 580 | struct super_block *sb, *n; |
626 | 581 | ||
627 | spin_lock(&sb_lock); | 582 | spin_lock(&sb_lock); |
628 | list_for_each_entry(sb, &super_blocks, s_list) { | 583 | list_for_each_entry_safe(sb, n, &super_blocks, s_list) { |
584 | if (list_empty(&sb->s_instances)) | ||
585 | continue; | ||
629 | sb->s_count++; | 586 | sb->s_count++; |
630 | spin_unlock(&sb_lock); | 587 | spin_unlock(&sb_lock); |
631 | down_write(&sb->s_umount); | 588 | down_write(&sb->s_umount); |
632 | if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { | 589 | if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { |
633 | /* | 590 | /* |
634 | * ->remount_fs needs lock_kernel(). | ||
635 | * | ||
636 | * What lock protects sb->s_flags?? | 591 | * What lock protects sb->s_flags?? |
637 | */ | 592 | */ |
638 | do_remount_sb(sb, MS_RDONLY, NULL, 1); | 593 | do_remount_sb(sb, MS_RDONLY, NULL, 1); |
639 | } | 594 | } |
640 | up_write(&sb->s_umount); | 595 | up_write(&sb->s_umount); |
641 | put_super(sb); | ||
642 | spin_lock(&sb_lock); | 596 | spin_lock(&sb_lock); |
597 | __put_super(sb); | ||
643 | } | 598 | } |
644 | spin_unlock(&sb_lock); | 599 | spin_unlock(&sb_lock); |
645 | kfree(work); | 600 | kfree(work); |
@@ -990,6 +945,96 @@ out: | |||
990 | 945 | ||
991 | EXPORT_SYMBOL_GPL(vfs_kern_mount); | 946 | EXPORT_SYMBOL_GPL(vfs_kern_mount); |
992 | 947 | ||
948 | /** | ||
949 | * freeze_super -- lock the filesystem and force it into a consistent state | ||
950 | * @super: the super to lock | ||
951 | * | ||
952 | * Syncs the super to make sure the filesystem is consistent and calls the fs's | ||
953 | * freeze_fs. Subsequent calls to this without first thawing the fs will return | ||
954 | * -EBUSY. | ||
955 | */ | ||
956 | int freeze_super(struct super_block *sb) | ||
957 | { | ||
958 | int ret; | ||
959 | |||
960 | atomic_inc(&sb->s_active); | ||
961 | down_write(&sb->s_umount); | ||
962 | if (sb->s_frozen) { | ||
963 | deactivate_locked_super(sb); | ||
964 | return -EBUSY; | ||
965 | } | ||
966 | |||
967 | if (sb->s_flags & MS_RDONLY) { | ||
968 | sb->s_frozen = SB_FREEZE_TRANS; | ||
969 | smp_wmb(); | ||
970 | up_write(&sb->s_umount); | ||
971 | return 0; | ||
972 | } | ||
973 | |||
974 | sb->s_frozen = SB_FREEZE_WRITE; | ||
975 | smp_wmb(); | ||
976 | |||
977 | sync_filesystem(sb); | ||
978 | |||
979 | sb->s_frozen = SB_FREEZE_TRANS; | ||
980 | smp_wmb(); | ||
981 | |||
982 | sync_blockdev(sb->s_bdev); | ||
983 | if (sb->s_op->freeze_fs) { | ||
984 | ret = sb->s_op->freeze_fs(sb); | ||
985 | if (ret) { | ||
986 | printk(KERN_ERR | ||
987 | "VFS:Filesystem freeze failed\n"); | ||
988 | sb->s_frozen = SB_UNFROZEN; | ||
989 | deactivate_locked_super(sb); | ||
990 | return ret; | ||
991 | } | ||
992 | } | ||
993 | up_write(&sb->s_umount); | ||
994 | return 0; | ||
995 | } | ||
996 | EXPORT_SYMBOL(freeze_super); | ||
997 | |||
998 | /** | ||
999 | * thaw_super -- unlock filesystem | ||
1000 | * @sb: the super to thaw | ||
1001 | * | ||
1002 | * Unlocks the filesystem and marks it writeable again after freeze_super(). | ||
1003 | */ | ||
1004 | int thaw_super(struct super_block *sb) | ||
1005 | { | ||
1006 | int error; | ||
1007 | |||
1008 | down_write(&sb->s_umount); | ||
1009 | if (sb->s_frozen == SB_UNFROZEN) { | ||
1010 | up_write(&sb->s_umount); | ||
1011 | return -EINVAL; | ||
1012 | } | ||
1013 | |||
1014 | if (sb->s_flags & MS_RDONLY) | ||
1015 | goto out; | ||
1016 | |||
1017 | if (sb->s_op->unfreeze_fs) { | ||
1018 | error = sb->s_op->unfreeze_fs(sb); | ||
1019 | if (error) { | ||
1020 | printk(KERN_ERR | ||
1021 | "VFS:Filesystem thaw failed\n"); | ||
1022 | sb->s_frozen = SB_FREEZE_TRANS; | ||
1023 | up_write(&sb->s_umount); | ||
1024 | return error; | ||
1025 | } | ||
1026 | } | ||
1027 | |||
1028 | out: | ||
1029 | sb->s_frozen = SB_UNFROZEN; | ||
1030 | smp_wmb(); | ||
1031 | wake_up(&sb->s_wait_unfrozen); | ||
1032 | deactivate_locked_super(sb); | ||
1033 | |||
1034 | return 0; | ||
1035 | } | ||
1036 | EXPORT_SYMBOL(thaw_super); | ||
1037 | |||
993 | static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) | 1038 | static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) |
994 | { | 1039 | { |
995 | int err; | 1040 | int err; |
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) | |||
42 | if (wait) | 42 | if (wait) |
43 | sync_inodes_sb(sb); | 43 | sync_inodes_sb(sb); |
44 | else | 44 | else |
45 | writeback_inodes_sb(sb); | 45 | writeback_inodes_sb_locked(sb); |
46 | 46 | ||
47 | if (sb->s_op->sync_fs) | 47 | if (sb->s_op->sync_fs) |
48 | sb->s_op->sync_fs(sb, wait); | 48 | sb->s_op->sync_fs(sb, wait); |
@@ -77,50 +77,18 @@ int sync_filesystem(struct super_block *sb) | |||
77 | } | 77 | } |
78 | EXPORT_SYMBOL_GPL(sync_filesystem); | 78 | EXPORT_SYMBOL_GPL(sync_filesystem); |
79 | 79 | ||
80 | static void sync_one_sb(struct super_block *sb, void *arg) | ||
81 | { | ||
82 | if (!(sb->s_flags & MS_RDONLY) && sb->s_bdi) | ||
83 | __sync_filesystem(sb, *(int *)arg); | ||
84 | } | ||
80 | /* | 85 | /* |
81 | * Sync all the data for all the filesystems (called by sys_sync() and | 86 | * Sync all the data for all the filesystems (called by sys_sync() and |
82 | * emergency sync) | 87 | * emergency sync) |
83 | * | ||
84 | * This operation is careful to avoid the livelock which could easily happen | ||
85 | * if two or more filesystems are being continuously dirtied. s_need_sync | ||
86 | * is used only here. We set it against all filesystems and then clear it as | ||
87 | * we sync them. So redirtied filesystems are skipped. | ||
88 | * | ||
89 | * But if process A is currently running sync_filesystems and then process B | ||
90 | * calls sync_filesystems as well, process B will set all the s_need_sync | ||
91 | * flags again, which will cause process A to resync everything. Fix that with | ||
92 | * a local mutex. | ||
93 | */ | 88 | */ |
94 | static void sync_filesystems(int wait) | 89 | static void sync_filesystems(int wait) |
95 | { | 90 | { |
96 | struct super_block *sb; | 91 | iterate_supers(sync_one_sb, &wait); |
97 | static DEFINE_MUTEX(mutex); | ||
98 | |||
99 | mutex_lock(&mutex); /* Could be down_interruptible */ | ||
100 | spin_lock(&sb_lock); | ||
101 | list_for_each_entry(sb, &super_blocks, s_list) | ||
102 | sb->s_need_sync = 1; | ||
103 | |||
104 | restart: | ||
105 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
106 | if (!sb->s_need_sync) | ||
107 | continue; | ||
108 | sb->s_need_sync = 0; | ||
109 | sb->s_count++; | ||
110 | spin_unlock(&sb_lock); | ||
111 | |||
112 | down_read(&sb->s_umount); | ||
113 | if (!(sb->s_flags & MS_RDONLY) && sb->s_root && sb->s_bdi) | ||
114 | __sync_filesystem(sb, wait); | ||
115 | up_read(&sb->s_umount); | ||
116 | |||
117 | /* restart only when sb is no longer on the list */ | ||
118 | spin_lock(&sb_lock); | ||
119 | if (__put_super_and_need_restart(sb)) | ||
120 | goto restart; | ||
121 | } | ||
122 | spin_unlock(&sb_lock); | ||
123 | mutex_unlock(&mutex); | ||
124 | } | 92 | } |
125 | 93 | ||
126 | /* | 94 | /* |
@@ -190,7 +158,6 @@ EXPORT_SYMBOL(file_fsync); | |||
190 | /** | 158 | /** |
191 | * vfs_fsync_range - helper to sync a range of data & metadata to disk | 159 | * vfs_fsync_range - helper to sync a range of data & metadata to disk |
192 | * @file: file to sync | 160 | * @file: file to sync |
193 | * @dentry: dentry of @file | ||
194 | * @start: offset in bytes of the beginning of data range to sync | 161 | * @start: offset in bytes of the beginning of data range to sync |
195 | * @end: offset in bytes of the end of data range (inclusive) | 162 | * @end: offset in bytes of the end of data range (inclusive) |
196 | * @datasync: perform only datasync | 163 | * @datasync: perform only datasync |
@@ -198,32 +165,13 @@ EXPORT_SYMBOL(file_fsync); | |||
198 | * Write back data in range @start..@end and metadata for @file to disk. If | 165 | * Write back data in range @start..@end and metadata for @file to disk. If |
199 | * @datasync is set only metadata needed to access modified file data is | 166 | * @datasync is set only metadata needed to access modified file data is |
200 | * written. | 167 | * written. |
201 | * | ||
202 | * In case this function is called from nfsd @file may be %NULL and | ||
203 | * only @dentry is set. This can only happen when the filesystem | ||
204 | * implements the export_operations API. | ||
205 | */ | 168 | */ |
206 | int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start, | 169 | int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) |
207 | loff_t end, int datasync) | ||
208 | { | 170 | { |
209 | const struct file_operations *fop; | 171 | struct address_space *mapping = file->f_mapping; |
210 | struct address_space *mapping; | ||
211 | int err, ret; | 172 | int err, ret; |
212 | 173 | ||
213 | /* | 174 | if (!file->f_op || !file->f_op->fsync) { |
214 | * Get mapping and operations from the file in case we have | ||
215 | * as file, or get the default values for them in case we | ||
216 | * don't have a struct file available. Damn nfsd.. | ||
217 | */ | ||
218 | if (file) { | ||
219 | mapping = file->f_mapping; | ||
220 | fop = file->f_op; | ||
221 | } else { | ||
222 | mapping = dentry->d_inode->i_mapping; | ||
223 | fop = dentry->d_inode->i_fop; | ||
224 | } | ||
225 | |||
226 | if (!fop || !fop->fsync) { | ||
227 | ret = -EINVAL; | 175 | ret = -EINVAL; |
228 | goto out; | 176 | goto out; |
229 | } | 177 | } |
@@ -235,7 +183,7 @@ int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start, | |||
235 | * livelocks in fsync_buffers_list(). | 183 | * livelocks in fsync_buffers_list(). |
236 | */ | 184 | */ |
237 | mutex_lock(&mapping->host->i_mutex); | 185 | mutex_lock(&mapping->host->i_mutex); |
238 | err = fop->fsync(file, dentry, datasync); | 186 | err = file->f_op->fsync(file, file->f_path.dentry, datasync); |
239 | if (!ret) | 187 | if (!ret) |
240 | ret = err; | 188 | ret = err; |
241 | mutex_unlock(&mapping->host->i_mutex); | 189 | mutex_unlock(&mapping->host->i_mutex); |
@@ -248,19 +196,14 @@ EXPORT_SYMBOL(vfs_fsync_range); | |||
248 | /** | 196 | /** |
249 | * vfs_fsync - perform a fsync or fdatasync on a file | 197 | * vfs_fsync - perform a fsync or fdatasync on a file |
250 | * @file: file to sync | 198 | * @file: file to sync |
251 | * @dentry: dentry of @file | ||
252 | * @datasync: only perform a fdatasync operation | 199 | * @datasync: only perform a fdatasync operation |
253 | * | 200 | * |
254 | * Write back data and metadata for @file to disk. If @datasync is | 201 | * Write back data and metadata for @file to disk. If @datasync is |
255 | * set only metadata needed to access modified file data is written. | 202 | * set only metadata needed to access modified file data is written. |
256 | * | ||
257 | * In case this function is called from nfsd @file may be %NULL and | ||
258 | * only @dentry is set. This can only happen when the filesystem | ||
259 | * implements the export_operations API. | ||
260 | */ | 203 | */ |
261 | int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 204 | int vfs_fsync(struct file *file, int datasync) |
262 | { | 205 | { |
263 | return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync); | 206 | return vfs_fsync_range(file, 0, LLONG_MAX, datasync); |
264 | } | 207 | } |
265 | EXPORT_SYMBOL(vfs_fsync); | 208 | EXPORT_SYMBOL(vfs_fsync); |
266 | 209 | ||
@@ -271,7 +214,7 @@ static int do_fsync(unsigned int fd, int datasync) | |||
271 | 214 | ||
272 | file = fget(fd); | 215 | file = fget(fd); |
273 | if (file) { | 216 | if (file) { |
274 | ret = vfs_fsync(file, file->f_path.dentry, datasync); | 217 | ret = vfs_fsync(file, datasync); |
275 | fput(file); | 218 | fput(file); |
276 | } | 219 | } |
277 | return ret; | 220 | return ret; |
@@ -299,8 +242,7 @@ int generic_write_sync(struct file *file, loff_t pos, loff_t count) | |||
299 | { | 242 | { |
300 | if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) | 243 | if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) |
301 | return 0; | 244 | return 0; |
302 | return vfs_fsync_range(file, file->f_path.dentry, pos, | 245 | return vfs_fsync_range(file, pos, pos + count - 1, |
303 | pos + count - 1, | ||
304 | (file->f_flags & __O_SYNC) ? 0 : 1); | 246 | (file->f_flags & __O_SYNC) ? 0 : 1); |
305 | } | 247 | } |
306 | EXPORT_SYMBOL(generic_write_sync); | 248 | EXPORT_SYMBOL(generic_write_sync); |
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index e9d293593e52..4e321f7353fa 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c | |||
@@ -46,9 +46,9 @@ struct bin_buffer { | |||
46 | }; | 46 | }; |
47 | 47 | ||
48 | static int | 48 | static int |
49 | fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) | 49 | fill_read(struct file *file, char *buffer, loff_t off, size_t count) |
50 | { | 50 | { |
51 | struct sysfs_dirent *attr_sd = dentry->d_fsdata; | 51 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; |
52 | struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; | 52 | struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; |
53 | struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; | 53 | struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; |
54 | int rc; | 54 | int rc; |
@@ -59,7 +59,7 @@ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) | |||
59 | 59 | ||
60 | rc = -EIO; | 60 | rc = -EIO; |
61 | if (attr->read) | 61 | if (attr->read) |
62 | rc = attr->read(kobj, attr, buffer, off, count); | 62 | rc = attr->read(file, kobj, attr, buffer, off, count); |
63 | 63 | ||
64 | sysfs_put_active(attr_sd); | 64 | sysfs_put_active(attr_sd); |
65 | 65 | ||
@@ -70,8 +70,7 @@ static ssize_t | |||
70 | read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) | 70 | read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) |
71 | { | 71 | { |
72 | struct bin_buffer *bb = file->private_data; | 72 | struct bin_buffer *bb = file->private_data; |
73 | struct dentry *dentry = file->f_path.dentry; | 73 | int size = file->f_path.dentry->d_inode->i_size; |
74 | int size = dentry->d_inode->i_size; | ||
75 | loff_t offs = *off; | 74 | loff_t offs = *off; |
76 | int count = min_t(size_t, bytes, PAGE_SIZE); | 75 | int count = min_t(size_t, bytes, PAGE_SIZE); |
77 | char *temp; | 76 | char *temp; |
@@ -92,7 +91,7 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) | |||
92 | 91 | ||
93 | mutex_lock(&bb->mutex); | 92 | mutex_lock(&bb->mutex); |
94 | 93 | ||
95 | count = fill_read(dentry, bb->buffer, offs, count); | 94 | count = fill_read(file, bb->buffer, offs, count); |
96 | if (count < 0) { | 95 | if (count < 0) { |
97 | mutex_unlock(&bb->mutex); | 96 | mutex_unlock(&bb->mutex); |
98 | goto out_free; | 97 | goto out_free; |
@@ -117,9 +116,9 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) | |||
117 | } | 116 | } |
118 | 117 | ||
119 | static int | 118 | static int |
120 | flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) | 119 | flush_write(struct file *file, char *buffer, loff_t offset, size_t count) |
121 | { | 120 | { |
122 | struct sysfs_dirent *attr_sd = dentry->d_fsdata; | 121 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; |
123 | struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; | 122 | struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; |
124 | struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; | 123 | struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; |
125 | int rc; | 124 | int rc; |
@@ -130,7 +129,7 @@ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) | |||
130 | 129 | ||
131 | rc = -EIO; | 130 | rc = -EIO; |
132 | if (attr->write) | 131 | if (attr->write) |
133 | rc = attr->write(kobj, attr, buffer, offset, count); | 132 | rc = attr->write(file, kobj, attr, buffer, offset, count); |
134 | 133 | ||
135 | sysfs_put_active(attr_sd); | 134 | sysfs_put_active(attr_sd); |
136 | 135 | ||
@@ -141,8 +140,7 @@ static ssize_t write(struct file *file, const char __user *userbuf, | |||
141 | size_t bytes, loff_t *off) | 140 | size_t bytes, loff_t *off) |
142 | { | 141 | { |
143 | struct bin_buffer *bb = file->private_data; | 142 | struct bin_buffer *bb = file->private_data; |
144 | struct dentry *dentry = file->f_path.dentry; | 143 | int size = file->f_path.dentry->d_inode->i_size; |
145 | int size = dentry->d_inode->i_size; | ||
146 | loff_t offs = *off; | 144 | loff_t offs = *off; |
147 | int count = min_t(size_t, bytes, PAGE_SIZE); | 145 | int count = min_t(size_t, bytes, PAGE_SIZE); |
148 | char *temp; | 146 | char *temp; |
@@ -165,7 +163,7 @@ static ssize_t write(struct file *file, const char __user *userbuf, | |||
165 | 163 | ||
166 | memcpy(bb->buffer, temp, count); | 164 | memcpy(bb->buffer, temp, count); |
167 | 165 | ||
168 | count = flush_write(dentry, bb->buffer, offs, count); | 166 | count = flush_write(file, bb->buffer, offs, count); |
169 | mutex_unlock(&bb->mutex); | 167 | mutex_unlock(&bb->mutex); |
170 | 168 | ||
171 | if (count > 0) | 169 | if (count > 0) |
@@ -363,7 +361,7 @@ static int mmap(struct file *file, struct vm_area_struct *vma) | |||
363 | if (!attr->mmap) | 361 | if (!attr->mmap) |
364 | goto out_put; | 362 | goto out_put; |
365 | 363 | ||
366 | rc = attr->mmap(kobj, attr, vma); | 364 | rc = attr->mmap(file, kobj, attr, vma); |
367 | if (rc) | 365 | if (rc) |
368 | goto out_put; | 366 | goto out_put; |
369 | 367 | ||
@@ -501,7 +499,7 @@ int sysfs_create_bin_file(struct kobject *kobj, | |||
501 | void sysfs_remove_bin_file(struct kobject *kobj, | 499 | void sysfs_remove_bin_file(struct kobject *kobj, |
502 | const struct bin_attribute *attr) | 500 | const struct bin_attribute *attr) |
503 | { | 501 | { |
504 | sysfs_hash_and_remove(kobj->sd, attr->attr.name); | 502 | sysfs_hash_and_remove(kobj->sd, NULL, attr->attr.name); |
505 | } | 503 | } |
506 | 504 | ||
507 | EXPORT_SYMBOL_GPL(sysfs_create_bin_file); | 505 | EXPORT_SYMBOL_GPL(sysfs_create_bin_file); |
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 590717861c7a..7e54bac8c4b0 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
@@ -380,7 +380,7 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) | |||
380 | { | 380 | { |
381 | struct sysfs_inode_attrs *ps_iattr; | 381 | struct sysfs_inode_attrs *ps_iattr; |
382 | 382 | ||
383 | if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) | 383 | if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name)) |
384 | return -EEXIST; | 384 | return -EEXIST; |
385 | 385 | ||
386 | sd->s_parent = sysfs_get(acxt->parent_sd); | 386 | sd->s_parent = sysfs_get(acxt->parent_sd); |
@@ -533,13 +533,17 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) | |||
533 | * Pointer to sysfs_dirent if found, NULL if not. | 533 | * Pointer to sysfs_dirent if found, NULL if not. |
534 | */ | 534 | */ |
535 | struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, | 535 | struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, |
536 | const void *ns, | ||
536 | const unsigned char *name) | 537 | const unsigned char *name) |
537 | { | 538 | { |
538 | struct sysfs_dirent *sd; | 539 | struct sysfs_dirent *sd; |
539 | 540 | ||
540 | for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) | 541 | for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) { |
542 | if (ns && sd->s_ns && (sd->s_ns != ns)) | ||
543 | continue; | ||
541 | if (!strcmp(sd->s_name, name)) | 544 | if (!strcmp(sd->s_name, name)) |
542 | return sd; | 545 | return sd; |
546 | } | ||
543 | return NULL; | 547 | return NULL; |
544 | } | 548 | } |
545 | 549 | ||
@@ -558,12 +562,13 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, | |||
558 | * Pointer to sysfs_dirent if found, NULL if not. | 562 | * Pointer to sysfs_dirent if found, NULL if not. |
559 | */ | 563 | */ |
560 | struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, | 564 | struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, |
565 | const void *ns, | ||
561 | const unsigned char *name) | 566 | const unsigned char *name) |
562 | { | 567 | { |
563 | struct sysfs_dirent *sd; | 568 | struct sysfs_dirent *sd; |
564 | 569 | ||
565 | mutex_lock(&sysfs_mutex); | 570 | mutex_lock(&sysfs_mutex); |
566 | sd = sysfs_find_dirent(parent_sd, name); | 571 | sd = sysfs_find_dirent(parent_sd, ns, name); |
567 | sysfs_get(sd); | 572 | sysfs_get(sd); |
568 | mutex_unlock(&sysfs_mutex); | 573 | mutex_unlock(&sysfs_mutex); |
569 | 574 | ||
@@ -572,7 +577,8 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, | |||
572 | EXPORT_SYMBOL_GPL(sysfs_get_dirent); | 577 | EXPORT_SYMBOL_GPL(sysfs_get_dirent); |
573 | 578 | ||
574 | static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, | 579 | static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, |
575 | const char *name, struct sysfs_dirent **p_sd) | 580 | enum kobj_ns_type type, const void *ns, const char *name, |
581 | struct sysfs_dirent **p_sd) | ||
576 | { | 582 | { |
577 | umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; | 583 | umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; |
578 | struct sysfs_addrm_cxt acxt; | 584 | struct sysfs_addrm_cxt acxt; |
@@ -583,6 +589,9 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, | |||
583 | sd = sysfs_new_dirent(name, mode, SYSFS_DIR); | 589 | sd = sysfs_new_dirent(name, mode, SYSFS_DIR); |
584 | if (!sd) | 590 | if (!sd) |
585 | return -ENOMEM; | 591 | return -ENOMEM; |
592 | |||
593 | sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT); | ||
594 | sd->s_ns = ns; | ||
586 | sd->s_dir.kobj = kobj; | 595 | sd->s_dir.kobj = kobj; |
587 | 596 | ||
588 | /* link in */ | 597 | /* link in */ |
@@ -601,7 +610,33 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, | |||
601 | int sysfs_create_subdir(struct kobject *kobj, const char *name, | 610 | int sysfs_create_subdir(struct kobject *kobj, const char *name, |
602 | struct sysfs_dirent **p_sd) | 611 | struct sysfs_dirent **p_sd) |
603 | { | 612 | { |
604 | return create_dir(kobj, kobj->sd, name, p_sd); | 613 | return create_dir(kobj, kobj->sd, |
614 | KOBJ_NS_TYPE_NONE, NULL, name, p_sd); | ||
615 | } | ||
616 | |||
617 | /** | ||
618 | * sysfs_read_ns_type: return associated ns_type | ||
619 | * @kobj: the kobject being queried | ||
620 | * | ||
621 | * Each kobject can be tagged with exactly one namespace type | ||
622 | * (i.e. network or user). Return the ns_type associated with | ||
623 | * this object if any | ||
624 | */ | ||
625 | static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj) | ||
626 | { | ||
627 | const struct kobj_ns_type_operations *ops; | ||
628 | enum kobj_ns_type type; | ||
629 | |||
630 | ops = kobj_child_ns_ops(kobj); | ||
631 | if (!ops) | ||
632 | return KOBJ_NS_TYPE_NONE; | ||
633 | |||
634 | type = ops->type; | ||
635 | BUG_ON(type <= KOBJ_NS_TYPE_NONE); | ||
636 | BUG_ON(type >= KOBJ_NS_TYPES); | ||
637 | BUG_ON(!kobj_ns_type_registered(type)); | ||
638 | |||
639 | return type; | ||
605 | } | 640 | } |
606 | 641 | ||
607 | /** | 642 | /** |
@@ -610,7 +645,9 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, | |||
610 | */ | 645 | */ |
611 | int sysfs_create_dir(struct kobject * kobj) | 646 | int sysfs_create_dir(struct kobject * kobj) |
612 | { | 647 | { |
648 | enum kobj_ns_type type; | ||
613 | struct sysfs_dirent *parent_sd, *sd; | 649 | struct sysfs_dirent *parent_sd, *sd; |
650 | const void *ns = NULL; | ||
614 | int error = 0; | 651 | int error = 0; |
615 | 652 | ||
616 | BUG_ON(!kobj); | 653 | BUG_ON(!kobj); |
@@ -620,7 +657,11 @@ int sysfs_create_dir(struct kobject * kobj) | |||
620 | else | 657 | else |
621 | parent_sd = &sysfs_root; | 658 | parent_sd = &sysfs_root; |
622 | 659 | ||
623 | error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd); | 660 | if (sysfs_ns_type(parent_sd)) |
661 | ns = kobj->ktype->namespace(kobj); | ||
662 | type = sysfs_read_ns_type(kobj); | ||
663 | |||
664 | error = create_dir(kobj, parent_sd, type, ns, kobject_name(kobj), &sd); | ||
624 | if (!error) | 665 | if (!error) |
625 | kobj->sd = sd; | 666 | kobj->sd = sd; |
626 | return error; | 667 | return error; |
@@ -630,13 +671,19 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, | |||
630 | struct nameidata *nd) | 671 | struct nameidata *nd) |
631 | { | 672 | { |
632 | struct dentry *ret = NULL; | 673 | struct dentry *ret = NULL; |
633 | struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata; | 674 | struct dentry *parent = dentry->d_parent; |
675 | struct sysfs_dirent *parent_sd = parent->d_fsdata; | ||
634 | struct sysfs_dirent *sd; | 676 | struct sysfs_dirent *sd; |
635 | struct inode *inode; | 677 | struct inode *inode; |
678 | enum kobj_ns_type type; | ||
679 | const void *ns; | ||
636 | 680 | ||
637 | mutex_lock(&sysfs_mutex); | 681 | mutex_lock(&sysfs_mutex); |
638 | 682 | ||
639 | sd = sysfs_find_dirent(parent_sd, dentry->d_name.name); | 683 | type = sysfs_ns_type(parent_sd); |
684 | ns = sysfs_info(dir->i_sb)->ns[type]; | ||
685 | |||
686 | sd = sysfs_find_dirent(parent_sd, ns, dentry->d_name.name); | ||
640 | 687 | ||
641 | /* no such entry */ | 688 | /* no such entry */ |
642 | if (!sd) { | 689 | if (!sd) { |
@@ -735,7 +782,8 @@ void sysfs_remove_dir(struct kobject * kobj) | |||
735 | } | 782 | } |
736 | 783 | ||
737 | int sysfs_rename(struct sysfs_dirent *sd, | 784 | int sysfs_rename(struct sysfs_dirent *sd, |
738 | struct sysfs_dirent *new_parent_sd, const char *new_name) | 785 | struct sysfs_dirent *new_parent_sd, const void *new_ns, |
786 | const char *new_name) | ||
739 | { | 787 | { |
740 | const char *dup_name = NULL; | 788 | const char *dup_name = NULL; |
741 | int error; | 789 | int error; |
@@ -743,12 +791,12 @@ int sysfs_rename(struct sysfs_dirent *sd, | |||
743 | mutex_lock(&sysfs_mutex); | 791 | mutex_lock(&sysfs_mutex); |
744 | 792 | ||
745 | error = 0; | 793 | error = 0; |
746 | if ((sd->s_parent == new_parent_sd) && | 794 | if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) && |
747 | (strcmp(sd->s_name, new_name) == 0)) | 795 | (strcmp(sd->s_name, new_name) == 0)) |
748 | goto out; /* nothing to rename */ | 796 | goto out; /* nothing to rename */ |
749 | 797 | ||
750 | error = -EEXIST; | 798 | error = -EEXIST; |
751 | if (sysfs_find_dirent(new_parent_sd, new_name)) | 799 | if (sysfs_find_dirent(new_parent_sd, new_ns, new_name)) |
752 | goto out; | 800 | goto out; |
753 | 801 | ||
754 | /* rename sysfs_dirent */ | 802 | /* rename sysfs_dirent */ |
@@ -770,6 +818,7 @@ int sysfs_rename(struct sysfs_dirent *sd, | |||
770 | sd->s_parent = new_parent_sd; | 818 | sd->s_parent = new_parent_sd; |
771 | sysfs_link_sibling(sd); | 819 | sysfs_link_sibling(sd); |
772 | } | 820 | } |
821 | sd->s_ns = new_ns; | ||
773 | 822 | ||
774 | error = 0; | 823 | error = 0; |
775 | out: | 824 | out: |
@@ -780,19 +829,28 @@ int sysfs_rename(struct sysfs_dirent *sd, | |||
780 | 829 | ||
781 | int sysfs_rename_dir(struct kobject *kobj, const char *new_name) | 830 | int sysfs_rename_dir(struct kobject *kobj, const char *new_name) |
782 | { | 831 | { |
783 | return sysfs_rename(kobj->sd, kobj->sd->s_parent, new_name); | 832 | struct sysfs_dirent *parent_sd = kobj->sd->s_parent; |
833 | const void *new_ns = NULL; | ||
834 | |||
835 | if (sysfs_ns_type(parent_sd)) | ||
836 | new_ns = kobj->ktype->namespace(kobj); | ||
837 | |||
838 | return sysfs_rename(kobj->sd, parent_sd, new_ns, new_name); | ||
784 | } | 839 | } |
785 | 840 | ||
786 | int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) | 841 | int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) |
787 | { | 842 | { |
788 | struct sysfs_dirent *sd = kobj->sd; | 843 | struct sysfs_dirent *sd = kobj->sd; |
789 | struct sysfs_dirent *new_parent_sd; | 844 | struct sysfs_dirent *new_parent_sd; |
845 | const void *new_ns = NULL; | ||
790 | 846 | ||
791 | BUG_ON(!sd->s_parent); | 847 | BUG_ON(!sd->s_parent); |
848 | if (sysfs_ns_type(sd->s_parent)) | ||
849 | new_ns = kobj->ktype->namespace(kobj); | ||
792 | new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? | 850 | new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? |
793 | new_parent_kobj->sd : &sysfs_root; | 851 | new_parent_kobj->sd : &sysfs_root; |
794 | 852 | ||
795 | return sysfs_rename(sd, new_parent_sd, sd->s_name); | 853 | return sysfs_rename(sd, new_parent_sd, new_ns, sd->s_name); |
796 | } | 854 | } |
797 | 855 | ||
798 | /* Relationship between s_mode and the DT_xxx types */ | 856 | /* Relationship between s_mode and the DT_xxx types */ |
@@ -807,32 +865,35 @@ static int sysfs_dir_release(struct inode *inode, struct file *filp) | |||
807 | return 0; | 865 | return 0; |
808 | } | 866 | } |
809 | 867 | ||
810 | static struct sysfs_dirent *sysfs_dir_pos(struct sysfs_dirent *parent_sd, | 868 | static struct sysfs_dirent *sysfs_dir_pos(const void *ns, |
811 | ino_t ino, struct sysfs_dirent *pos) | 869 | struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) |
812 | { | 870 | { |
813 | if (pos) { | 871 | if (pos) { |
814 | int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && | 872 | int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && |
815 | pos->s_parent == parent_sd && | 873 | pos->s_parent == parent_sd && |
816 | ino == pos->s_ino; | 874 | ino == pos->s_ino; |
817 | sysfs_put(pos); | 875 | sysfs_put(pos); |
818 | if (valid) | 876 | if (!valid) |
819 | return pos; | 877 | pos = NULL; |
820 | } | 878 | } |
821 | pos = NULL; | 879 | if (!pos && (ino > 1) && (ino < INT_MAX)) { |
822 | if ((ino > 1) && (ino < INT_MAX)) { | ||
823 | pos = parent_sd->s_dir.children; | 880 | pos = parent_sd->s_dir.children; |
824 | while (pos && (ino > pos->s_ino)) | 881 | while (pos && (ino > pos->s_ino)) |
825 | pos = pos->s_sibling; | 882 | pos = pos->s_sibling; |
826 | } | 883 | } |
884 | while (pos && pos->s_ns && pos->s_ns != ns) | ||
885 | pos = pos->s_sibling; | ||
827 | return pos; | 886 | return pos; |
828 | } | 887 | } |
829 | 888 | ||
830 | static struct sysfs_dirent *sysfs_dir_next_pos(struct sysfs_dirent *parent_sd, | 889 | static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, |
831 | ino_t ino, struct sysfs_dirent *pos) | 890 | struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) |
832 | { | 891 | { |
833 | pos = sysfs_dir_pos(parent_sd, ino, pos); | 892 | pos = sysfs_dir_pos(ns, parent_sd, ino, pos); |
834 | if (pos) | 893 | if (pos) |
835 | pos = pos->s_sibling; | 894 | pos = pos->s_sibling; |
895 | while (pos && pos->s_ns && pos->s_ns != ns) | ||
896 | pos = pos->s_sibling; | ||
836 | return pos; | 897 | return pos; |
837 | } | 898 | } |
838 | 899 | ||
@@ -841,8 +902,13 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
841 | struct dentry *dentry = filp->f_path.dentry; | 902 | struct dentry *dentry = filp->f_path.dentry; |
842 | struct sysfs_dirent * parent_sd = dentry->d_fsdata; | 903 | struct sysfs_dirent * parent_sd = dentry->d_fsdata; |
843 | struct sysfs_dirent *pos = filp->private_data; | 904 | struct sysfs_dirent *pos = filp->private_data; |
905 | enum kobj_ns_type type; | ||
906 | const void *ns; | ||
844 | ino_t ino; | 907 | ino_t ino; |
845 | 908 | ||
909 | type = sysfs_ns_type(parent_sd); | ||
910 | ns = sysfs_info(dentry->d_sb)->ns[type]; | ||
911 | |||
846 | if (filp->f_pos == 0) { | 912 | if (filp->f_pos == 0) { |
847 | ino = parent_sd->s_ino; | 913 | ino = parent_sd->s_ino; |
848 | if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) | 914 | if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) |
@@ -857,9 +923,9 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
857 | filp->f_pos++; | 923 | filp->f_pos++; |
858 | } | 924 | } |
859 | mutex_lock(&sysfs_mutex); | 925 | mutex_lock(&sysfs_mutex); |
860 | for (pos = sysfs_dir_pos(parent_sd, filp->f_pos, pos); | 926 | for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); |
861 | pos; | 927 | pos; |
862 | pos = sysfs_dir_next_pos(parent_sd, filp->f_pos, pos)) { | 928 | pos = sysfs_dir_next_pos(ns, parent_sd, filp->f_pos, pos)) { |
863 | const char * name; | 929 | const char * name; |
864 | unsigned int type; | 930 | unsigned int type; |
865 | int len, ret; | 931 | int len, ret; |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index e222b2582746..1beaa739d0a6 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -478,9 +478,12 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr) | |||
478 | mutex_lock(&sysfs_mutex); | 478 | mutex_lock(&sysfs_mutex); |
479 | 479 | ||
480 | if (sd && dir) | 480 | if (sd && dir) |
481 | sd = sysfs_find_dirent(sd, dir); | 481 | /* Only directories are tagged, so no need to pass |
482 | * a tag explicitly. | ||
483 | */ | ||
484 | sd = sysfs_find_dirent(sd, NULL, dir); | ||
482 | if (sd && attr) | 485 | if (sd && attr) |
483 | sd = sysfs_find_dirent(sd, attr); | 486 | sd = sysfs_find_dirent(sd, NULL, attr); |
484 | if (sd) | 487 | if (sd) |
485 | sysfs_notify_dirent(sd); | 488 | sysfs_notify_dirent(sd); |
486 | 489 | ||
@@ -569,7 +572,7 @@ int sysfs_add_file_to_group(struct kobject *kobj, | |||
569 | int error; | 572 | int error; |
570 | 573 | ||
571 | if (group) | 574 | if (group) |
572 | dir_sd = sysfs_get_dirent(kobj->sd, group); | 575 | dir_sd = sysfs_get_dirent(kobj->sd, NULL, group); |
573 | else | 576 | else |
574 | dir_sd = sysfs_get(kobj->sd); | 577 | dir_sd = sysfs_get(kobj->sd); |
575 | 578 | ||
@@ -599,7 +602,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) | |||
599 | mutex_lock(&sysfs_mutex); | 602 | mutex_lock(&sysfs_mutex); |
600 | 603 | ||
601 | rc = -ENOENT; | 604 | rc = -ENOENT; |
602 | sd = sysfs_find_dirent(kobj->sd, attr->name); | 605 | sd = sysfs_find_dirent(kobj->sd, NULL, attr->name); |
603 | if (!sd) | 606 | if (!sd) |
604 | goto out; | 607 | goto out; |
605 | 608 | ||
@@ -624,7 +627,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file); | |||
624 | 627 | ||
625 | void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) | 628 | void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) |
626 | { | 629 | { |
627 | sysfs_hash_and_remove(kobj->sd, attr->name); | 630 | sysfs_hash_and_remove(kobj->sd, NULL, attr->name); |
628 | } | 631 | } |
629 | 632 | ||
630 | void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr) | 633 | void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr) |
@@ -646,11 +649,11 @@ void sysfs_remove_file_from_group(struct kobject *kobj, | |||
646 | struct sysfs_dirent *dir_sd; | 649 | struct sysfs_dirent *dir_sd; |
647 | 650 | ||
648 | if (group) | 651 | if (group) |
649 | dir_sd = sysfs_get_dirent(kobj->sd, group); | 652 | dir_sd = sysfs_get_dirent(kobj->sd, NULL, group); |
650 | else | 653 | else |
651 | dir_sd = sysfs_get(kobj->sd); | 654 | dir_sd = sysfs_get(kobj->sd); |
652 | if (dir_sd) { | 655 | if (dir_sd) { |
653 | sysfs_hash_and_remove(dir_sd, attr->name); | 656 | sysfs_hash_and_remove(dir_sd, NULL, attr->name); |
654 | sysfs_put(dir_sd); | 657 | sysfs_put(dir_sd); |
655 | } | 658 | } |
656 | } | 659 | } |
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index fe611949a7f7..23c1e598792a 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c | |||
@@ -23,7 +23,7 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, | |||
23 | int i; | 23 | int i; |
24 | 24 | ||
25 | for (i = 0, attr = grp->attrs; *attr; i++, attr++) | 25 | for (i = 0, attr = grp->attrs; *attr; i++, attr++) |
26 | sysfs_hash_and_remove(dir_sd, (*attr)->name); | 26 | sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); |
27 | } | 27 | } |
28 | 28 | ||
29 | static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, | 29 | static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, |
@@ -39,7 +39,7 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, | |||
39 | * visibility. Do this by first removing then | 39 | * visibility. Do this by first removing then |
40 | * re-adding (if required) the file */ | 40 | * re-adding (if required) the file */ |
41 | if (update) | 41 | if (update) |
42 | sysfs_hash_and_remove(dir_sd, (*attr)->name); | 42 | sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); |
43 | if (grp->is_visible) { | 43 | if (grp->is_visible) { |
44 | mode = grp->is_visible(kobj, *attr, i); | 44 | mode = grp->is_visible(kobj, *attr, i); |
45 | if (!mode) | 45 | if (!mode) |
@@ -132,7 +132,7 @@ void sysfs_remove_group(struct kobject * kobj, | |||
132 | struct sysfs_dirent *sd; | 132 | struct sysfs_dirent *sd; |
133 | 133 | ||
134 | if (grp->name) { | 134 | if (grp->name) { |
135 | sd = sysfs_get_dirent(dir_sd, grp->name); | 135 | sd = sysfs_get_dirent(dir_sd, NULL, grp->name); |
136 | if (!sd) { | 136 | if (!sd) { |
137 | WARN(!sd, KERN_WARNING "sysfs group %p not found for " | 137 | WARN(!sd, KERN_WARNING "sysfs group %p not found for " |
138 | "kobject '%s'\n", grp, kobject_name(kobj)); | 138 | "kobject '%s'\n", grp, kobject_name(kobj)); |
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index a4a0a9419711..bbd77e95cf7f 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c | |||
@@ -324,7 +324,7 @@ void sysfs_delete_inode(struct inode *inode) | |||
324 | sysfs_put(sd); | 324 | sysfs_put(sd); |
325 | } | 325 | } |
326 | 326 | ||
327 | int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name) | 327 | int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const char *name) |
328 | { | 328 | { |
329 | struct sysfs_addrm_cxt acxt; | 329 | struct sysfs_addrm_cxt acxt; |
330 | struct sysfs_dirent *sd; | 330 | struct sysfs_dirent *sd; |
@@ -334,7 +334,9 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name) | |||
334 | 334 | ||
335 | sysfs_addrm_start(&acxt, dir_sd); | 335 | sysfs_addrm_start(&acxt, dir_sd); |
336 | 336 | ||
337 | sd = sysfs_find_dirent(dir_sd, name); | 337 | sd = sysfs_find_dirent(dir_sd, ns, name); |
338 | if (sd && (sd->s_ns != ns)) | ||
339 | sd = NULL; | ||
338 | if (sd) | 340 | if (sd) |
339 | sysfs_remove_one(&acxt, sd); | 341 | sysfs_remove_one(&acxt, sd); |
340 | 342 | ||
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 776137828dca..281c0c9bc39f 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c | |||
@@ -35,7 +35,7 @@ static const struct super_operations sysfs_ops = { | |||
35 | struct sysfs_dirent sysfs_root = { | 35 | struct sysfs_dirent sysfs_root = { |
36 | .s_name = "", | 36 | .s_name = "", |
37 | .s_count = ATOMIC_INIT(1), | 37 | .s_count = ATOMIC_INIT(1), |
38 | .s_flags = SYSFS_DIR, | 38 | .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT), |
39 | .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, | 39 | .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, |
40 | .s_ino = 1, | 40 | .s_ino = 1, |
41 | }; | 41 | }; |
@@ -72,18 +72,107 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) | |||
72 | return 0; | 72 | return 0; |
73 | } | 73 | } |
74 | 74 | ||
75 | static int sysfs_test_super(struct super_block *sb, void *data) | ||
76 | { | ||
77 | struct sysfs_super_info *sb_info = sysfs_info(sb); | ||
78 | struct sysfs_super_info *info = data; | ||
79 | enum kobj_ns_type type; | ||
80 | int found = 1; | ||
81 | |||
82 | for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) { | ||
83 | if (sb_info->ns[type] != info->ns[type]) | ||
84 | found = 0; | ||
85 | } | ||
86 | return found; | ||
87 | } | ||
88 | |||
89 | static int sysfs_set_super(struct super_block *sb, void *data) | ||
90 | { | ||
91 | int error; | ||
92 | error = set_anon_super(sb, data); | ||
93 | if (!error) | ||
94 | sb->s_fs_info = data; | ||
95 | return error; | ||
96 | } | ||
97 | |||
75 | static int sysfs_get_sb(struct file_system_type *fs_type, | 98 | static int sysfs_get_sb(struct file_system_type *fs_type, |
76 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 99 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) |
77 | { | 100 | { |
78 | return get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt); | 101 | struct sysfs_super_info *info; |
102 | enum kobj_ns_type type; | ||
103 | struct super_block *sb; | ||
104 | int error; | ||
105 | |||
106 | error = -ENOMEM; | ||
107 | info = kzalloc(sizeof(*info), GFP_KERNEL); | ||
108 | if (!info) | ||
109 | goto out; | ||
110 | |||
111 | for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) | ||
112 | info->ns[type] = kobj_ns_current(type); | ||
113 | |||
114 | sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); | ||
115 | if (IS_ERR(sb) || sb->s_fs_info != info) | ||
116 | kfree(info); | ||
117 | if (IS_ERR(sb)) { | ||
118 | error = PTR_ERR(sb); | ||
119 | goto out; | ||
120 | } | ||
121 | if (!sb->s_root) { | ||
122 | sb->s_flags = flags; | ||
123 | error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); | ||
124 | if (error) { | ||
125 | deactivate_locked_super(sb); | ||
126 | goto out; | ||
127 | } | ||
128 | sb->s_flags |= MS_ACTIVE; | ||
129 | } | ||
130 | |||
131 | simple_set_mnt(mnt, sb); | ||
132 | error = 0; | ||
133 | out: | ||
134 | return error; | ||
135 | } | ||
136 | |||
137 | static void sysfs_kill_sb(struct super_block *sb) | ||
138 | { | ||
139 | struct sysfs_super_info *info = sysfs_info(sb); | ||
140 | |||
141 | /* Remove the superblock from fs_supers/s_instances | ||
142 | * so we can't find it, before freeing sysfs_super_info. | ||
143 | */ | ||
144 | kill_anon_super(sb); | ||
145 | kfree(info); | ||
79 | } | 146 | } |
80 | 147 | ||
81 | static struct file_system_type sysfs_fs_type = { | 148 | static struct file_system_type sysfs_fs_type = { |
82 | .name = "sysfs", | 149 | .name = "sysfs", |
83 | .get_sb = sysfs_get_sb, | 150 | .get_sb = sysfs_get_sb, |
84 | .kill_sb = kill_anon_super, | 151 | .kill_sb = sysfs_kill_sb, |
85 | }; | 152 | }; |
86 | 153 | ||
154 | void sysfs_exit_ns(enum kobj_ns_type type, const void *ns) | ||
155 | { | ||
156 | struct super_block *sb; | ||
157 | |||
158 | mutex_lock(&sysfs_mutex); | ||
159 | spin_lock(&sb_lock); | ||
160 | list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) { | ||
161 | struct sysfs_super_info *info = sysfs_info(sb); | ||
162 | /* | ||
163 | * If we see a superblock on the fs_supers/s_instances | ||
164 | * list the unmount has not completed and sb->s_fs_info | ||
165 | * points to a valid struct sysfs_super_info. | ||
166 | */ | ||
167 | /* Ignore superblocks with the wrong ns */ | ||
168 | if (info->ns[type] != ns) | ||
169 | continue; | ||
170 | info->ns[type] = NULL; | ||
171 | } | ||
172 | spin_unlock(&sb_lock); | ||
173 | mutex_unlock(&sysfs_mutex); | ||
174 | } | ||
175 | |||
87 | int __init sysfs_init(void) | 176 | int __init sysfs_init(void) |
88 | { | 177 | { |
89 | int err = -ENOMEM; | 178 | int err = -ENOMEM; |
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 942f239a2132..f71246bebfe4 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c | |||
@@ -58,6 +58,8 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, | |||
58 | if (!sd) | 58 | if (!sd) |
59 | goto out_put; | 59 | goto out_put; |
60 | 60 | ||
61 | if (sysfs_ns_type(parent_sd)) | ||
62 | sd->s_ns = target->ktype->namespace(target); | ||
61 | sd->s_symlink.target_sd = target_sd; | 63 | sd->s_symlink.target_sd = target_sd; |
62 | target_sd = NULL; /* reference is now owned by the symlink */ | 64 | target_sd = NULL; /* reference is now owned by the symlink */ |
63 | 65 | ||
@@ -107,6 +109,26 @@ int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target, | |||
107 | } | 109 | } |
108 | 110 | ||
109 | /** | 111 | /** |
112 | * sysfs_delete_link - remove symlink in object's directory. | ||
113 | * @kobj: object we're acting for. | ||
114 | * @targ: object we're pointing to. | ||
115 | * @name: name of the symlink to remove. | ||
116 | * | ||
117 | * Unlike sysfs_remove_link sysfs_delete_link has enough information | ||
118 | * to successfully delete symlinks in tagged directories. | ||
119 | */ | ||
120 | void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, | ||
121 | const char *name) | ||
122 | { | ||
123 | const void *ns = NULL; | ||
124 | spin_lock(&sysfs_assoc_lock); | ||
125 | if (targ->sd) | ||
126 | ns = targ->sd->s_ns; | ||
127 | spin_unlock(&sysfs_assoc_lock); | ||
128 | sysfs_hash_and_remove(kobj->sd, ns, name); | ||
129 | } | ||
130 | |||
131 | /** | ||
110 | * sysfs_remove_link - remove symlink in object's directory. | 132 | * sysfs_remove_link - remove symlink in object's directory. |
111 | * @kobj: object we're acting for. | 133 | * @kobj: object we're acting for. |
112 | * @name: name of the symlink to remove. | 134 | * @name: name of the symlink to remove. |
@@ -121,7 +143,7 @@ void sysfs_remove_link(struct kobject * kobj, const char * name) | |||
121 | else | 143 | else |
122 | parent_sd = kobj->sd; | 144 | parent_sd = kobj->sd; |
123 | 145 | ||
124 | sysfs_hash_and_remove(parent_sd, name); | 146 | sysfs_hash_and_remove(parent_sd, NULL, name); |
125 | } | 147 | } |
126 | 148 | ||
127 | /** | 149 | /** |
@@ -137,6 +159,7 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, | |||
137 | const char *old, const char *new) | 159 | const char *old, const char *new) |
138 | { | 160 | { |
139 | struct sysfs_dirent *parent_sd, *sd = NULL; | 161 | struct sysfs_dirent *parent_sd, *sd = NULL; |
162 | const void *old_ns = NULL, *new_ns = NULL; | ||
140 | int result; | 163 | int result; |
141 | 164 | ||
142 | if (!kobj) | 165 | if (!kobj) |
@@ -144,8 +167,11 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, | |||
144 | else | 167 | else |
145 | parent_sd = kobj->sd; | 168 | parent_sd = kobj->sd; |
146 | 169 | ||
170 | if (targ->sd) | ||
171 | old_ns = targ->sd->s_ns; | ||
172 | |||
147 | result = -ENOENT; | 173 | result = -ENOENT; |
148 | sd = sysfs_get_dirent(parent_sd, old); | 174 | sd = sysfs_get_dirent(parent_sd, old_ns, old); |
149 | if (!sd) | 175 | if (!sd) |
150 | goto out; | 176 | goto out; |
151 | 177 | ||
@@ -155,7 +181,10 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, | |||
155 | if (sd->s_symlink.target_sd->s_dir.kobj != targ) | 181 | if (sd->s_symlink.target_sd->s_dir.kobj != targ) |
156 | goto out; | 182 | goto out; |
157 | 183 | ||
158 | result = sysfs_rename(sd, parent_sd, new); | 184 | if (sysfs_ns_type(parent_sd)) |
185 | new_ns = targ->ktype->namespace(targ); | ||
186 | |||
187 | result = sysfs_rename(sd, parent_sd, new_ns, new); | ||
159 | 188 | ||
160 | out: | 189 | out: |
161 | sysfs_put(sd); | 190 | sysfs_put(sd); |
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 30f5a44fb5d3..6a13105b5594 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h | |||
@@ -58,6 +58,7 @@ struct sysfs_dirent { | |||
58 | struct sysfs_dirent *s_sibling; | 58 | struct sysfs_dirent *s_sibling; |
59 | const char *s_name; | 59 | const char *s_name; |
60 | 60 | ||
61 | const void *s_ns; /* namespace tag */ | ||
61 | union { | 62 | union { |
62 | struct sysfs_elem_dir s_dir; | 63 | struct sysfs_elem_dir s_dir; |
63 | struct sysfs_elem_symlink s_symlink; | 64 | struct sysfs_elem_symlink s_symlink; |
@@ -81,14 +82,27 @@ struct sysfs_dirent { | |||
81 | #define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) | 82 | #define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) |
82 | #define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) | 83 | #define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) |
83 | 84 | ||
84 | #define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK | 85 | /* identify any namespace tag on sysfs_dirents */ |
85 | #define SYSFS_FLAG_REMOVED 0x0200 | 86 | #define SYSFS_NS_TYPE_MASK 0xff00 |
87 | #define SYSFS_NS_TYPE_SHIFT 8 | ||
88 | |||
89 | #define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK) | ||
90 | #define SYSFS_FLAG_REMOVED 0x020000 | ||
86 | 91 | ||
87 | static inline unsigned int sysfs_type(struct sysfs_dirent *sd) | 92 | static inline unsigned int sysfs_type(struct sysfs_dirent *sd) |
88 | { | 93 | { |
89 | return sd->s_flags & SYSFS_TYPE_MASK; | 94 | return sd->s_flags & SYSFS_TYPE_MASK; |
90 | } | 95 | } |
91 | 96 | ||
97 | /* | ||
98 | * Return any namespace tags on this dirent. | ||
99 | * enum kobj_ns_type is defined in linux/kobject.h | ||
100 | */ | ||
101 | static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd) | ||
102 | { | ||
103 | return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT; | ||
104 | } | ||
105 | |||
92 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 106 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
93 | #define sysfs_dirent_init_lockdep(sd) \ | 107 | #define sysfs_dirent_init_lockdep(sd) \ |
94 | do { \ | 108 | do { \ |
@@ -114,6 +128,16 @@ struct sysfs_addrm_cxt { | |||
114 | /* | 128 | /* |
115 | * mount.c | 129 | * mount.c |
116 | */ | 130 | */ |
131 | |||
132 | /* | ||
133 | * Each sb is associated with a set of namespace tags (i.e. | ||
134 | * the network namespace of the task which mounted this sysfs | ||
135 | * instance). | ||
136 | */ | ||
137 | struct sysfs_super_info { | ||
138 | const void *ns[KOBJ_NS_TYPES]; | ||
139 | }; | ||
140 | #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) | ||
117 | extern struct sysfs_dirent sysfs_root; | 141 | extern struct sysfs_dirent sysfs_root; |
118 | extern struct kmem_cache *sysfs_dir_cachep; | 142 | extern struct kmem_cache *sysfs_dir_cachep; |
119 | 143 | ||
@@ -137,8 +161,10 @@ void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); | |||
137 | void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); | 161 | void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); |
138 | 162 | ||
139 | struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, | 163 | struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, |
164 | const void *ns, | ||
140 | const unsigned char *name); | 165 | const unsigned char *name); |
141 | struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, | 166 | struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, |
167 | const void *ns, | ||
142 | const unsigned char *name); | 168 | const unsigned char *name); |
143 | struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type); | 169 | struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type); |
144 | 170 | ||
@@ -149,7 +175,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, | |||
149 | void sysfs_remove_subdir(struct sysfs_dirent *sd); | 175 | void sysfs_remove_subdir(struct sysfs_dirent *sd); |
150 | 176 | ||
151 | int sysfs_rename(struct sysfs_dirent *sd, | 177 | int sysfs_rename(struct sysfs_dirent *sd, |
152 | struct sysfs_dirent *new_parent_sd, const char *new_name); | 178 | struct sysfs_dirent *new_parent_sd, const void *ns, const char *new_name); |
153 | 179 | ||
154 | static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) | 180 | static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) |
155 | { | 181 | { |
@@ -179,7 +205,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); | |||
179 | int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); | 205 | int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); |
180 | int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 206 | int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
181 | size_t size, int flags); | 207 | size_t size, int flags); |
182 | int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); | 208 | int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const char *name); |
183 | int sysfs_inode_init(void); | 209 | int sysfs_inode_init(void); |
184 | 210 | ||
185 | /* | 211 | /* |
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index 241e9765cfad..bbd69bdb0fa8 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c | |||
@@ -159,15 +159,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode) | |||
159 | *sbi->s_sb_fic_count = cpu_to_fs16(sbi, count); | 159 | *sbi->s_sb_fic_count = cpu_to_fs16(sbi, count); |
160 | fs16_add(sbi, sbi->s_sb_total_free_inodes, -1); | 160 | fs16_add(sbi, sbi->s_sb_total_free_inodes, -1); |
161 | dirty_sb(sb); | 161 | dirty_sb(sb); |
162 | 162 | inode_init_owner(inode, dir, mode); | |
163 | if (dir->i_mode & S_ISGID) { | ||
164 | inode->i_gid = dir->i_gid; | ||
165 | if (S_ISDIR(mode)) | ||
166 | mode |= S_ISGID; | ||
167 | } else | ||
168 | inode->i_gid = current_fsgid(); | ||
169 | |||
170 | inode->i_uid = current_fsuid(); | ||
171 | inode->i_ino = fs16_to_cpu(sbi, ino); | 163 | inode->i_ino = fs16_to_cpu(sbi, ino); |
172 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 164 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
173 | inode->i_blocks = 0; | 165 | inode->i_blocks = 0; |
@@ -176,7 +168,6 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode) | |||
176 | insert_inode_hash(inode); | 168 | insert_inode_hash(inode); |
177 | mark_inode_dirty(inode); | 169 | mark_inode_dirty(inode); |
178 | 170 | ||
179 | inode->i_mode = mode; /* for sysv_write_inode() */ | ||
180 | sysv_write_inode(inode, 0); /* ensure inode not allocated again */ | 171 | sysv_write_inode(inode, 0); /* ensure inode not allocated again */ |
181 | mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ | 172 | mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ |
182 | /* That's it. */ | 173 | /* That's it. */ |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 401e503d44a1..87ebcce72213 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -104,14 +104,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, | |||
104 | */ | 104 | */ |
105 | inode->i_flags |= (S_NOCMTIME); | 105 | inode->i_flags |= (S_NOCMTIME); |
106 | 106 | ||
107 | inode->i_uid = current_fsuid(); | 107 | inode_init_owner(inode, dir, mode); |
108 | if (dir->i_mode & S_ISGID) { | ||
109 | inode->i_gid = dir->i_gid; | ||
110 | if (S_ISDIR(mode)) | ||
111 | mode |= S_ISGID; | ||
112 | } else | ||
113 | inode->i_gid = current_fsgid(); | ||
114 | inode->i_mode = mode; | ||
115 | inode->i_mtime = inode->i_atime = inode->i_ctime = | 108 | inode->i_mtime = inode->i_atime = inode->i_ctime = |
116 | ubifs_current_time(inode); | 109 | ubifs_current_time(inode); |
117 | inode->i_mapping->nrpages = 0; | 110 | inode->i_mapping->nrpages = 0; |
diff --git a/fs/udf/dir.c b/fs/udf/dir.c index f0f2a436251e..3a84455c2a77 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c | |||
@@ -209,6 +209,6 @@ static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
209 | const struct file_operations udf_dir_operations = { | 209 | const struct file_operations udf_dir_operations = { |
210 | .read = generic_read_dir, | 210 | .read = generic_read_dir, |
211 | .readdir = udf_readdir, | 211 | .readdir = udf_readdir, |
212 | .ioctl = udf_ioctl, | 212 | .unlocked_ioctl = udf_ioctl, |
213 | .fsync = simple_fsync, | 213 | .fsync = simple_fsync, |
214 | }; | 214 | }; |
diff --git a/fs/udf/file.c b/fs/udf/file.c index 4b6a46ccbf46..baae3a723946 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/quotaops.h> | 37 | #include <linux/quotaops.h> |
38 | #include <linux/buffer_head.h> | 38 | #include <linux/buffer_head.h> |
39 | #include <linux/aio.h> | 39 | #include <linux/aio.h> |
40 | #include <linux/smp_lock.h> | ||
40 | 41 | ||
41 | #include "udf_i.h" | 42 | #include "udf_i.h" |
42 | #include "udf_sb.h" | 43 | #include "udf_sb.h" |
@@ -144,50 +145,60 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
144 | return retval; | 145 | return retval; |
145 | } | 146 | } |
146 | 147 | ||
147 | int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | 148 | long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
148 | unsigned long arg) | ||
149 | { | 149 | { |
150 | struct inode *inode = filp->f_dentry->d_inode; | ||
150 | long old_block, new_block; | 151 | long old_block, new_block; |
151 | int result = -EINVAL; | 152 | int result = -EINVAL; |
152 | 153 | ||
154 | lock_kernel(); | ||
155 | |||
153 | if (file_permission(filp, MAY_READ) != 0) { | 156 | if (file_permission(filp, MAY_READ) != 0) { |
154 | udf_debug("no permission to access inode %lu\n", | 157 | udf_debug("no permission to access inode %lu\n", inode->i_ino); |
155 | inode->i_ino); | 158 | result = -EPERM; |
156 | return -EPERM; | 159 | goto out; |
157 | } | 160 | } |
158 | 161 | ||
159 | if (!arg) { | 162 | if (!arg) { |
160 | udf_debug("invalid argument to udf_ioctl\n"); | 163 | udf_debug("invalid argument to udf_ioctl\n"); |
161 | return -EINVAL; | 164 | result = -EINVAL; |
165 | goto out; | ||
162 | } | 166 | } |
163 | 167 | ||
164 | switch (cmd) { | 168 | switch (cmd) { |
165 | case UDF_GETVOLIDENT: | 169 | case UDF_GETVOLIDENT: |
166 | if (copy_to_user((char __user *)arg, | 170 | if (copy_to_user((char __user *)arg, |
167 | UDF_SB(inode->i_sb)->s_volume_ident, 32)) | 171 | UDF_SB(inode->i_sb)->s_volume_ident, 32)) |
168 | return -EFAULT; | 172 | result = -EFAULT; |
169 | else | 173 | else |
170 | return 0; | 174 | result = 0; |
175 | goto out; | ||
171 | case UDF_RELOCATE_BLOCKS: | 176 | case UDF_RELOCATE_BLOCKS: |
172 | if (!capable(CAP_SYS_ADMIN)) | 177 | if (!capable(CAP_SYS_ADMIN)) { |
173 | return -EACCES; | 178 | result = -EACCES; |
174 | if (get_user(old_block, (long __user *)arg)) | 179 | goto out; |
175 | return -EFAULT; | 180 | } |
181 | if (get_user(old_block, (long __user *)arg)) { | ||
182 | result = -EFAULT; | ||
183 | goto out; | ||
184 | } | ||
176 | result = udf_relocate_blocks(inode->i_sb, | 185 | result = udf_relocate_blocks(inode->i_sb, |
177 | old_block, &new_block); | 186 | old_block, &new_block); |
178 | if (result == 0) | 187 | if (result == 0) |
179 | result = put_user(new_block, (long __user *)arg); | 188 | result = put_user(new_block, (long __user *)arg); |
180 | return result; | 189 | goto out; |
181 | case UDF_GETEASIZE: | 190 | case UDF_GETEASIZE: |
182 | result = put_user(UDF_I(inode)->i_lenEAttr, (int __user *)arg); | 191 | result = put_user(UDF_I(inode)->i_lenEAttr, (int __user *)arg); |
183 | break; | 192 | goto out; |
184 | case UDF_GETEABLOCK: | 193 | case UDF_GETEABLOCK: |
185 | result = copy_to_user((char __user *)arg, | 194 | result = copy_to_user((char __user *)arg, |
186 | UDF_I(inode)->i_ext.i_data, | 195 | UDF_I(inode)->i_ext.i_data, |
187 | UDF_I(inode)->i_lenEAttr) ? -EFAULT : 0; | 196 | UDF_I(inode)->i_lenEAttr) ? -EFAULT : 0; |
188 | break; | 197 | goto out; |
189 | } | 198 | } |
190 | 199 | ||
200 | out: | ||
201 | unlock_kernel(); | ||
191 | return result; | 202 | return result; |
192 | } | 203 | } |
193 | 204 | ||
@@ -207,7 +218,7 @@ static int udf_release_file(struct inode *inode, struct file *filp) | |||
207 | const struct file_operations udf_file_operations = { | 218 | const struct file_operations udf_file_operations = { |
208 | .read = do_sync_read, | 219 | .read = do_sync_read, |
209 | .aio_read = generic_file_aio_read, | 220 | .aio_read = generic_file_aio_read, |
210 | .ioctl = udf_ioctl, | 221 | .unlocked_ioctl = udf_ioctl, |
211 | .open = dquot_file_open, | 222 | .open = dquot_file_open, |
212 | .mmap = generic_file_mmap, | 223 | .mmap = generic_file_mmap, |
213 | .write = do_sync_write, | 224 | .write = do_sync_write, |
@@ -227,7 +238,7 @@ int udf_setattr(struct dentry *dentry, struct iattr *iattr) | |||
227 | if (error) | 238 | if (error) |
228 | return error; | 239 | return error; |
229 | 240 | ||
230 | if (iattr->ia_valid & ATTR_SIZE) | 241 | if (is_quota_modification(inode, iattr)) |
231 | dquot_initialize(inode); | 242 | dquot_initialize(inode); |
232 | 243 | ||
233 | if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || | 244 | if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index fb68c9cd0c3e..2b5586c7f02a 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -124,15 +124,8 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
124 | udf_updated_lvid(sb); | 124 | udf_updated_lvid(sb); |
125 | } | 125 | } |
126 | mutex_unlock(&sbi->s_alloc_mutex); | 126 | mutex_unlock(&sbi->s_alloc_mutex); |
127 | inode->i_mode = mode; | 127 | |
128 | inode->i_uid = current_fsuid(); | 128 | inode_init_owner(inode, dir, mode); |
129 | if (dir->i_mode & S_ISGID) { | ||
130 | inode->i_gid = dir->i_gid; | ||
131 | if (S_ISDIR(mode)) | ||
132 | mode |= S_ISGID; | ||
133 | } else { | ||
134 | inode->i_gid = current_fsgid(); | ||
135 | } | ||
136 | 129 | ||
137 | iinfo->i_location.logicalBlockNum = block; | 130 | iinfo->i_location.logicalBlockNum = block; |
138 | iinfo->i_location.partitionReferenceNum = | 131 | iinfo->i_location.partitionReferenceNum = |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 75816025f95f..585f733615dc 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -579,7 +579,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, | |||
579 | inode->i_data.a_ops = &udf_aops; | 579 | inode->i_data.a_ops = &udf_aops; |
580 | inode->i_op = &udf_file_inode_operations; | 580 | inode->i_op = &udf_file_inode_operations; |
581 | inode->i_fop = &udf_file_operations; | 581 | inode->i_fop = &udf_file_operations; |
582 | inode->i_mode = mode; | ||
583 | mark_inode_dirty(inode); | 582 | mark_inode_dirty(inode); |
584 | 583 | ||
585 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); | 584 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); |
@@ -627,7 +626,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode, | |||
627 | goto out; | 626 | goto out; |
628 | 627 | ||
629 | iinfo = UDF_I(inode); | 628 | iinfo = UDF_I(inode); |
630 | inode->i_uid = current_fsuid(); | ||
631 | init_special_inode(inode, mode, rdev); | 629 | init_special_inode(inode, mode, rdev); |
632 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); | 630 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); |
633 | if (!fi) { | 631 | if (!fi) { |
@@ -674,7 +672,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
674 | goto out; | 672 | goto out; |
675 | 673 | ||
676 | err = -EIO; | 674 | err = -EIO; |
677 | inode = udf_new_inode(dir, S_IFDIR, &err); | 675 | inode = udf_new_inode(dir, S_IFDIR | mode, &err); |
678 | if (!inode) | 676 | if (!inode) |
679 | goto out; | 677 | goto out; |
680 | 678 | ||
@@ -697,9 +695,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
697 | FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT; | 695 | FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT; |
698 | udf_write_fi(inode, &cfi, fi, &fibh, NULL, NULL); | 696 | udf_write_fi(inode, &cfi, fi, &fibh, NULL, NULL); |
699 | brelse(fibh.sbh); | 697 | brelse(fibh.sbh); |
700 | inode->i_mode = S_IFDIR | mode; | ||
701 | if (dir->i_mode & S_ISGID) | ||
702 | inode->i_mode |= S_ISGID; | ||
703 | mark_inode_dirty(inode); | 698 | mark_inode_dirty(inode); |
704 | 699 | ||
705 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); | 700 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); |
@@ -912,7 +907,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
912 | dquot_initialize(dir); | 907 | dquot_initialize(dir); |
913 | 908 | ||
914 | lock_kernel(); | 909 | lock_kernel(); |
915 | inode = udf_new_inode(dir, S_IFLNK, &err); | 910 | inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err); |
916 | if (!inode) | 911 | if (!inode) |
917 | goto out; | 912 | goto out; |
918 | 913 | ||
@@ -923,7 +918,6 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
923 | } | 918 | } |
924 | 919 | ||
925 | iinfo = UDF_I(inode); | 920 | iinfo = UDF_I(inode); |
926 | inode->i_mode = S_IFLNK | S_IRWXUGO; | ||
927 | inode->i_data.a_ops = &udf_symlink_aops; | 921 | inode->i_data.a_ops = &udf_symlink_aops; |
928 | inode->i_op = &udf_symlink_inode_operations; | 922 | inode->i_op = &udf_symlink_inode_operations; |
929 | 923 | ||
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 702a1148e702..9079ff7d6255 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h | |||
@@ -130,8 +130,7 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, | |||
130 | uint8_t *, uint8_t *); | 130 | uint8_t *, uint8_t *); |
131 | 131 | ||
132 | /* file.c */ | 132 | /* file.c */ |
133 | extern int udf_ioctl(struct inode *, struct file *, unsigned int, | 133 | extern long udf_ioctl(struct file *, unsigned int, unsigned long); |
134 | unsigned long); | ||
135 | extern int udf_setattr(struct dentry *dentry, struct iattr *iattr); | 134 | extern int udf_setattr(struct dentry *dentry, struct iattr *iattr); |
136 | /* inode.c */ | 135 | /* inode.c */ |
137 | extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); | 136 | extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); |
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 230ecf608026..3a959d55084d 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c | |||
@@ -303,15 +303,7 @@ cg_found: | |||
303 | sb->s_dirt = 1; | 303 | sb->s_dirt = 1; |
304 | 304 | ||
305 | inode->i_ino = cg * uspi->s_ipg + bit; | 305 | inode->i_ino = cg * uspi->s_ipg + bit; |
306 | inode->i_mode = mode; | 306 | inode_init_owner(inode, dir, mode); |
307 | inode->i_uid = current_fsuid(); | ||
308 | if (dir->i_mode & S_ISGID) { | ||
309 | inode->i_gid = dir->i_gid; | ||
310 | if (S_ISDIR(mode)) | ||
311 | inode->i_mode |= S_ISGID; | ||
312 | } else | ||
313 | inode->i_gid = current_fsgid(); | ||
314 | |||
315 | inode->i_blocks = 0; | 307 | inode->i_blocks = 0; |
316 | inode->i_generation = 0; | 308 | inode->i_generation = 0; |
317 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 309 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 80b68c3702d1..cffa756f1047 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c | |||
@@ -603,7 +603,7 @@ static void ufs_set_inode_ops(struct inode *inode) | |||
603 | if (!inode->i_blocks) | 603 | if (!inode->i_blocks) |
604 | inode->i_op = &ufs_fast_symlink_inode_operations; | 604 | inode->i_op = &ufs_fast_symlink_inode_operations; |
605 | else { | 605 | else { |
606 | inode->i_op = &page_symlink_inode_operations; | 606 | inode->i_op = &ufs_symlink_inode_operations; |
607 | inode->i_mapping->a_ops = &ufs_aops; | 607 | inode->i_mapping->a_ops = &ufs_aops; |
608 | } | 608 | } |
609 | } else | 609 | } else |
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 118556243e7a..eabc02eb1294 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c | |||
@@ -148,7 +148,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, | |||
148 | 148 | ||
149 | if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { | 149 | if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { |
150 | /* slow symlink */ | 150 | /* slow symlink */ |
151 | inode->i_op = &page_symlink_inode_operations; | 151 | inode->i_op = &ufs_symlink_inode_operations; |
152 | inode->i_mapping->a_ops = &ufs_aops; | 152 | inode->i_mapping->a_ops = &ufs_aops; |
153 | err = page_symlink(inode, symname, l); | 153 | err = page_symlink(inode, symname, l); |
154 | if (err) | 154 | if (err) |
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c index c0156eda44bc..d283628b4778 100644 --- a/fs/ufs/symlink.c +++ b/fs/ufs/symlink.c | |||
@@ -42,4 +42,12 @@ static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
42 | const struct inode_operations ufs_fast_symlink_inode_operations = { | 42 | const struct inode_operations ufs_fast_symlink_inode_operations = { |
43 | .readlink = generic_readlink, | 43 | .readlink = generic_readlink, |
44 | .follow_link = ufs_follow_link, | 44 | .follow_link = ufs_follow_link, |
45 | .setattr = ufs_setattr, | ||
46 | }; | ||
47 | |||
48 | const struct inode_operations ufs_symlink_inode_operations = { | ||
49 | .readlink = generic_readlink, | ||
50 | .follow_link = page_follow_link_light, | ||
51 | .put_link = page_put_link, | ||
52 | .setattr = ufs_setattr, | ||
45 | }; | 53 | }; |
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index d3b6270cb377..f294c44577dc 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c | |||
@@ -508,7 +508,7 @@ out: | |||
508 | * - there is no way to know old size | 508 | * - there is no way to know old size |
509 | * - there is no way inform user about error, if it happens in `truncate' | 509 | * - there is no way inform user about error, if it happens in `truncate' |
510 | */ | 510 | */ |
511 | static int ufs_setattr(struct dentry *dentry, struct iattr *attr) | 511 | int ufs_setattr(struct dentry *dentry, struct iattr *attr) |
512 | { | 512 | { |
513 | struct inode *inode = dentry->d_inode; | 513 | struct inode *inode = dentry->d_inode; |
514 | unsigned int ia_valid = attr->ia_valid; | 514 | unsigned int ia_valid = attr->ia_valid; |
@@ -518,18 +518,18 @@ static int ufs_setattr(struct dentry *dentry, struct iattr *attr) | |||
518 | if (error) | 518 | if (error) |
519 | return error; | 519 | return error; |
520 | 520 | ||
521 | if (is_quota_modification(inode, attr)) | ||
522 | dquot_initialize(inode); | ||
523 | |||
521 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 524 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || |
522 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 525 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { |
523 | error = dquot_transfer(inode, attr); | 526 | error = dquot_transfer(inode, attr); |
524 | if (error) | 527 | if (error) |
525 | return error; | 528 | return error; |
526 | } | 529 | } |
527 | if (ia_valid & ATTR_SIZE && | 530 | if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { |
528 | attr->ia_size != i_size_read(inode)) { | ||
529 | loff_t old_i_size = inode->i_size; | 531 | loff_t old_i_size = inode->i_size; |
530 | 532 | ||
531 | dquot_initialize(inode); | ||
532 | |||
533 | error = vmtruncate(inode, attr->ia_size); | 533 | error = vmtruncate(inode, attr->ia_size); |
534 | if (error) | 534 | if (error) |
535 | return error; | 535 | return error; |
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 43f9f5d5670e..179ae6b3180a 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h | |||
@@ -122,9 +122,11 @@ extern void ufs_panic (struct super_block *, const char *, const char *, ...) __ | |||
122 | 122 | ||
123 | /* symlink.c */ | 123 | /* symlink.c */ |
124 | extern const struct inode_operations ufs_fast_symlink_inode_operations; | 124 | extern const struct inode_operations ufs_fast_symlink_inode_operations; |
125 | extern const struct inode_operations ufs_symlink_inode_operations; | ||
125 | 126 | ||
126 | /* truncate.c */ | 127 | /* truncate.c */ |
127 | extern int ufs_truncate (struct inode *, loff_t); | 128 | extern int ufs_truncate (struct inode *, loff_t); |
129 | extern int ufs_setattr(struct dentry *dentry, struct iattr *attr); | ||
128 | 130 | ||
129 | static inline struct ufs_sb_info *UFS_SB(struct super_block *sb) | 131 | static inline struct ufs_sb_info *UFS_SB(struct super_block *sb) |
130 | { | 132 | { |
diff --git a/fs/xattr.c b/fs/xattr.c index 46f87e828b48..01bb8135e14a 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -590,10 +590,10 @@ strcmp_prefix(const char *a, const char *a_prefix) | |||
590 | /* | 590 | /* |
591 | * Find the xattr_handler with the matching prefix. | 591 | * Find the xattr_handler with the matching prefix. |
592 | */ | 592 | */ |
593 | static struct xattr_handler * | 593 | static const struct xattr_handler * |
594 | xattr_resolve_name(struct xattr_handler **handlers, const char **name) | 594 | xattr_resolve_name(const struct xattr_handler **handlers, const char **name) |
595 | { | 595 | { |
596 | struct xattr_handler *handler; | 596 | const struct xattr_handler *handler; |
597 | 597 | ||
598 | if (!*name) | 598 | if (!*name) |
599 | return NULL; | 599 | return NULL; |
@@ -614,7 +614,7 @@ xattr_resolve_name(struct xattr_handler **handlers, const char **name) | |||
614 | ssize_t | 614 | ssize_t |
615 | generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) | 615 | generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) |
616 | { | 616 | { |
617 | struct xattr_handler *handler; | 617 | const struct xattr_handler *handler; |
618 | 618 | ||
619 | handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); | 619 | handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); |
620 | if (!handler) | 620 | if (!handler) |
@@ -629,7 +629,7 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s | |||
629 | ssize_t | 629 | ssize_t |
630 | generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) | 630 | generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) |
631 | { | 631 | { |
632 | struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr; | 632 | const struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr; |
633 | unsigned int size = 0; | 633 | unsigned int size = 0; |
634 | 634 | ||
635 | if (!buffer) { | 635 | if (!buffer) { |
@@ -659,7 +659,7 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
659 | int | 659 | int |
660 | generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) | 660 | generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) |
661 | { | 661 | { |
662 | struct xattr_handler *handler; | 662 | const struct xattr_handler *handler; |
663 | 663 | ||
664 | if (size == 0) | 664 | if (size == 0) |
665 | value = ""; /* empty EA, do not remove */ | 665 | value = ""; /* empty EA, do not remove */ |
@@ -676,7 +676,7 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz | |||
676 | int | 676 | int |
677 | generic_removexattr(struct dentry *dentry, const char *name) | 677 | generic_removexattr(struct dentry *dentry, const char *name) |
678 | { | 678 | { |
679 | struct xattr_handler *handler; | 679 | const struct xattr_handler *handler; |
680 | 680 | ||
681 | handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); | 681 | handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); |
682 | if (!handler) | 682 | if (!handler) |
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index b4769e40e8bc..c8fb13f83b3f 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -77,6 +77,7 @@ xfs-y += xfs_alloc.o \ | |||
77 | xfs_itable.o \ | 77 | xfs_itable.o \ |
78 | xfs_dfrag.o \ | 78 | xfs_dfrag.o \ |
79 | xfs_log.o \ | 79 | xfs_log.o \ |
80 | xfs_log_cil.o \ | ||
80 | xfs_log_recover.o \ | 81 | xfs_log_recover.o \ |
81 | xfs_mount.o \ | 82 | xfs_mount.o \ |
82 | xfs_mru_cache.o \ | 83 | xfs_mru_cache.o \ |
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index a7bc925c4d60..9f769b5b38fc 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c | |||
@@ -440,14 +440,14 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
440 | return error; | 440 | return error; |
441 | } | 441 | } |
442 | 442 | ||
443 | struct xattr_handler xfs_xattr_acl_access_handler = { | 443 | const struct xattr_handler xfs_xattr_acl_access_handler = { |
444 | .prefix = POSIX_ACL_XATTR_ACCESS, | 444 | .prefix = POSIX_ACL_XATTR_ACCESS, |
445 | .flags = ACL_TYPE_ACCESS, | 445 | .flags = ACL_TYPE_ACCESS, |
446 | .get = xfs_xattr_acl_get, | 446 | .get = xfs_xattr_acl_get, |
447 | .set = xfs_xattr_acl_set, | 447 | .set = xfs_xattr_acl_set, |
448 | }; | 448 | }; |
449 | 449 | ||
450 | struct xattr_handler xfs_xattr_acl_default_handler = { | 450 | const struct xattr_handler xfs_xattr_acl_default_handler = { |
451 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 451 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
452 | .flags = ACL_TYPE_DEFAULT, | 452 | .flags = ACL_TYPE_DEFAULT, |
453 | .get = xfs_xattr_acl_get, | 453 | .get = xfs_xattr_acl_get, |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index f01de3c55c43..649ade8ef598 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -37,6 +37,7 @@ | |||
37 | 37 | ||
38 | #include "xfs_sb.h" | 38 | #include "xfs_sb.h" |
39 | #include "xfs_inum.h" | 39 | #include "xfs_inum.h" |
40 | #include "xfs_log.h" | ||
40 | #include "xfs_ag.h" | 41 | #include "xfs_ag.h" |
41 | #include "xfs_dmapi.h" | 42 | #include "xfs_dmapi.h" |
42 | #include "xfs_mount.h" | 43 | #include "xfs_mount.h" |
@@ -850,6 +851,12 @@ xfs_buf_lock_value( | |||
850 | * Note that this in no way locks the underlying pages, so it is only | 851 | * Note that this in no way locks the underlying pages, so it is only |
851 | * useful for synchronizing concurrent use of buffer objects, not for | 852 | * useful for synchronizing concurrent use of buffer objects, not for |
852 | * synchronizing independent access to the underlying pages. | 853 | * synchronizing independent access to the underlying pages. |
854 | * | ||
855 | * If we come across a stale, pinned, locked buffer, we know that we | ||
856 | * are being asked to lock a buffer that has been reallocated. Because | ||
857 | * it is pinned, we know that the log has not been pushed to disk and | ||
858 | * hence it will still be locked. Rather than sleeping until someone | ||
859 | * else pushes the log, push it ourselves before trying to get the lock. | ||
853 | */ | 860 | */ |
854 | void | 861 | void |
855 | xfs_buf_lock( | 862 | xfs_buf_lock( |
@@ -857,6 +864,8 @@ xfs_buf_lock( | |||
857 | { | 864 | { |
858 | trace_xfs_buf_lock(bp, _RET_IP_); | 865 | trace_xfs_buf_lock(bp, _RET_IP_); |
859 | 866 | ||
867 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | ||
868 | xfs_log_force(bp->b_mount, 0); | ||
860 | if (atomic_read(&bp->b_io_remaining)) | 869 | if (atomic_read(&bp->b_io_remaining)) |
861 | blk_run_address_space(bp->b_target->bt_mapping); | 870 | blk_run_address_space(bp->b_target->bt_mapping); |
862 | down(&bp->b_sema); | 871 | down(&bp->b_sema); |
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 1947514ce1ad..9ac8aea91529 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include "xfs_dmapi.h" | 19 | #include "xfs_dmapi.h" |
20 | #include "xfs_sb.h" | 20 | #include "xfs_sb.h" |
21 | #include "xfs_inum.h" | 21 | #include "xfs_inum.h" |
22 | #include "xfs_log.h" | ||
22 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
23 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
24 | #include "xfs_quota.h" | 25 | #include "xfs_quota.h" |
@@ -97,7 +98,7 @@ xfs_fs_set_xstate( | |||
97 | } | 98 | } |
98 | 99 | ||
99 | STATIC int | 100 | STATIC int |
100 | xfs_fs_get_xquota( | 101 | xfs_fs_get_dqblk( |
101 | struct super_block *sb, | 102 | struct super_block *sb, |
102 | int type, | 103 | int type, |
103 | qid_t id, | 104 | qid_t id, |
@@ -114,7 +115,7 @@ xfs_fs_get_xquota( | |||
114 | } | 115 | } |
115 | 116 | ||
116 | STATIC int | 117 | STATIC int |
117 | xfs_fs_set_xquota( | 118 | xfs_fs_set_dqblk( |
118 | struct super_block *sb, | 119 | struct super_block *sb, |
119 | int type, | 120 | int type, |
120 | qid_t id, | 121 | qid_t id, |
@@ -135,6 +136,6 @@ xfs_fs_set_xquota( | |||
135 | const struct quotactl_ops xfs_quotactl_operations = { | 136 | const struct quotactl_ops xfs_quotactl_operations = { |
136 | .get_xstate = xfs_fs_get_xstate, | 137 | .get_xstate = xfs_fs_get_xstate, |
137 | .set_xstate = xfs_fs_set_xstate, | 138 | .set_xstate = xfs_fs_set_xstate, |
138 | .get_xquota = xfs_fs_get_xquota, | 139 | .get_dqblk = xfs_fs_get_dqblk, |
139 | .set_xquota = xfs_fs_set_xquota, | 140 | .set_dqblk = xfs_fs_set_dqblk, |
140 | }; | 141 | }; |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index e9002513e08f..f2d1718c9165 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -119,6 +119,8 @@ mempool_t *xfs_ioend_pool; | |||
119 | #define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ | 119 | #define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ |
120 | #define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ | 120 | #define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ |
121 | #define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ | 121 | #define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ |
122 | #define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ | ||
123 | #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ | ||
122 | 124 | ||
123 | /* | 125 | /* |
124 | * Table driven mount option parser. | 126 | * Table driven mount option parser. |
@@ -374,6 +376,13 @@ xfs_parseargs( | |||
374 | mp->m_flags |= XFS_MOUNT_DMAPI; | 376 | mp->m_flags |= XFS_MOUNT_DMAPI; |
375 | } else if (!strcmp(this_char, MNTOPT_DMI)) { | 377 | } else if (!strcmp(this_char, MNTOPT_DMI)) { |
376 | mp->m_flags |= XFS_MOUNT_DMAPI; | 378 | mp->m_flags |= XFS_MOUNT_DMAPI; |
379 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { | ||
380 | mp->m_flags |= XFS_MOUNT_DELAYLOG; | ||
381 | cmn_err(CE_WARN, | ||
382 | "Enabling EXPERIMENTAL delayed logging feature " | ||
383 | "- use at your own risk.\n"); | ||
384 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { | ||
385 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; | ||
377 | } else if (!strcmp(this_char, "ihashsize")) { | 386 | } else if (!strcmp(this_char, "ihashsize")) { |
378 | cmn_err(CE_WARN, | 387 | cmn_err(CE_WARN, |
379 | "XFS: ihashsize no longer used, option is deprecated."); | 388 | "XFS: ihashsize no longer used, option is deprecated."); |
@@ -535,6 +544,7 @@ xfs_showargs( | |||
535 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, | 544 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, |
536 | { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI }, | 545 | { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI }, |
537 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, | 546 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, |
547 | { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, | ||
538 | { 0, NULL } | 548 | { 0, NULL } |
539 | }; | 549 | }; |
540 | static struct proc_xfs_info xfs_info_unset[] = { | 550 | static struct proc_xfs_info xfs_info_unset[] = { |
@@ -725,7 +735,8 @@ void | |||
725 | xfs_blkdev_issue_flush( | 735 | xfs_blkdev_issue_flush( |
726 | xfs_buftarg_t *buftarg) | 736 | xfs_buftarg_t *buftarg) |
727 | { | 737 | { |
728 | blkdev_issue_flush(buftarg->bt_bdev, NULL); | 738 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, |
739 | BLKDEV_IFL_WAIT); | ||
729 | } | 740 | } |
730 | 741 | ||
731 | STATIC void | 742 | STATIC void |
@@ -1754,7 +1765,7 @@ xfs_init_zones(void) | |||
1754 | * but it is much faster. | 1765 | * but it is much faster. |
1755 | */ | 1766 | */ |
1756 | xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) + | 1767 | xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) + |
1757 | (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) / | 1768 | (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / |
1758 | NBWORD) * sizeof(int))), "xfs_buf_item"); | 1769 | NBWORD) * sizeof(int))), "xfs_buf_item"); |
1759 | if (!xfs_buf_item_zone) | 1770 | if (!xfs_buf_item_zone) |
1760 | goto out_destroy_trans_zone; | 1771 | goto out_destroy_trans_zone; |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 233d4b9881b1..519618e9279e 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
@@ -85,7 +85,7 @@ extern __uint64_t xfs_max_file_offset(unsigned int); | |||
85 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); | 85 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); |
86 | 86 | ||
87 | extern const struct export_operations xfs_export_operations; | 87 | extern const struct export_operations xfs_export_operations; |
88 | extern struct xattr_handler *xfs_xattr_handlers[]; | 88 | extern const struct xattr_handler *xfs_xattr_handlers[]; |
89 | extern const struct quotactl_ops xfs_quotactl_operations; | 89 | extern const struct quotactl_ops xfs_quotactl_operations; |
90 | 90 | ||
91 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) | 91 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 8a319cfd2901..ff6bc797baf2 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -1059,83 +1059,112 @@ TRACE_EVENT(xfs_bunmap, | |||
1059 | 1059 | ||
1060 | ); | 1060 | ); |
1061 | 1061 | ||
1062 | #define XFS_BUSY_SYNC \ | ||
1063 | { 0, "async" }, \ | ||
1064 | { 1, "sync" } | ||
1065 | |||
1062 | TRACE_EVENT(xfs_alloc_busy, | 1066 | TRACE_EVENT(xfs_alloc_busy, |
1063 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, | 1067 | TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno, |
1064 | xfs_extlen_t len, int slot), | 1068 | xfs_agblock_t agbno, xfs_extlen_t len, int sync), |
1065 | TP_ARGS(mp, agno, agbno, len, slot), | 1069 | TP_ARGS(trans, agno, agbno, len, sync), |
1066 | TP_STRUCT__entry( | 1070 | TP_STRUCT__entry( |
1067 | __field(dev_t, dev) | 1071 | __field(dev_t, dev) |
1072 | __field(struct xfs_trans *, tp) | ||
1073 | __field(int, tid) | ||
1068 | __field(xfs_agnumber_t, agno) | 1074 | __field(xfs_agnumber_t, agno) |
1069 | __field(xfs_agblock_t, agbno) | 1075 | __field(xfs_agblock_t, agbno) |
1070 | __field(xfs_extlen_t, len) | 1076 | __field(xfs_extlen_t, len) |
1071 | __field(int, slot) | 1077 | __field(int, sync) |
1072 | ), | 1078 | ), |
1073 | TP_fast_assign( | 1079 | TP_fast_assign( |
1074 | __entry->dev = mp->m_super->s_dev; | 1080 | __entry->dev = trans->t_mountp->m_super->s_dev; |
1081 | __entry->tp = trans; | ||
1082 | __entry->tid = trans->t_ticket->t_tid; | ||
1075 | __entry->agno = agno; | 1083 | __entry->agno = agno; |
1076 | __entry->agbno = agbno; | 1084 | __entry->agbno = agbno; |
1077 | __entry->len = len; | 1085 | __entry->len = len; |
1078 | __entry->slot = slot; | 1086 | __entry->sync = sync; |
1079 | ), | 1087 | ), |
1080 | TP_printk("dev %d:%d agno %u agbno %u len %u slot %d", | 1088 | TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s", |
1081 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1089 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1090 | __entry->tp, | ||
1091 | __entry->tid, | ||
1082 | __entry->agno, | 1092 | __entry->agno, |
1083 | __entry->agbno, | 1093 | __entry->agbno, |
1084 | __entry->len, | 1094 | __entry->len, |
1085 | __entry->slot) | 1095 | __print_symbolic(__entry->sync, XFS_BUSY_SYNC)) |
1086 | 1096 | ||
1087 | ); | 1097 | ); |
1088 | 1098 | ||
1089 | #define XFS_BUSY_STATES \ | ||
1090 | { 0, "found" }, \ | ||
1091 | { 1, "missing" } | ||
1092 | |||
1093 | TRACE_EVENT(xfs_alloc_unbusy, | 1099 | TRACE_EVENT(xfs_alloc_unbusy, |
1094 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1100 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1095 | int slot, int found), | 1101 | xfs_agblock_t agbno, xfs_extlen_t len), |
1096 | TP_ARGS(mp, agno, slot, found), | 1102 | TP_ARGS(mp, agno, agbno, len), |
1097 | TP_STRUCT__entry( | 1103 | TP_STRUCT__entry( |
1098 | __field(dev_t, dev) | 1104 | __field(dev_t, dev) |
1099 | __field(xfs_agnumber_t, agno) | 1105 | __field(xfs_agnumber_t, agno) |
1100 | __field(int, slot) | 1106 | __field(xfs_agblock_t, agbno) |
1101 | __field(int, found) | 1107 | __field(xfs_extlen_t, len) |
1102 | ), | 1108 | ), |
1103 | TP_fast_assign( | 1109 | TP_fast_assign( |
1104 | __entry->dev = mp->m_super->s_dev; | 1110 | __entry->dev = mp->m_super->s_dev; |
1105 | __entry->agno = agno; | 1111 | __entry->agno = agno; |
1106 | __entry->slot = slot; | 1112 | __entry->agbno = agbno; |
1107 | __entry->found = found; | 1113 | __entry->len = len; |
1108 | ), | 1114 | ), |
1109 | TP_printk("dev %d:%d agno %u slot %d %s", | 1115 | TP_printk("dev %d:%d agno %u agbno %u len %u", |
1110 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1116 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1111 | __entry->agno, | 1117 | __entry->agno, |
1112 | __entry->slot, | 1118 | __entry->agbno, |
1113 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | 1119 | __entry->len) |
1114 | ); | 1120 | ); |
1115 | 1121 | ||
1122 | #define XFS_BUSY_STATES \ | ||
1123 | { 0, "missing" }, \ | ||
1124 | { 1, "found" } | ||
1125 | |||
1116 | TRACE_EVENT(xfs_alloc_busysearch, | 1126 | TRACE_EVENT(xfs_alloc_busysearch, |
1117 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, | 1127 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1118 | xfs_extlen_t len, xfs_lsn_t lsn), | 1128 | xfs_agblock_t agbno, xfs_extlen_t len, int found), |
1119 | TP_ARGS(mp, agno, agbno, len, lsn), | 1129 | TP_ARGS(mp, agno, agbno, len, found), |
1120 | TP_STRUCT__entry( | 1130 | TP_STRUCT__entry( |
1121 | __field(dev_t, dev) | 1131 | __field(dev_t, dev) |
1122 | __field(xfs_agnumber_t, agno) | 1132 | __field(xfs_agnumber_t, agno) |
1123 | __field(xfs_agblock_t, agbno) | 1133 | __field(xfs_agblock_t, agbno) |
1124 | __field(xfs_extlen_t, len) | 1134 | __field(xfs_extlen_t, len) |
1125 | __field(xfs_lsn_t, lsn) | 1135 | __field(int, found) |
1126 | ), | 1136 | ), |
1127 | TP_fast_assign( | 1137 | TP_fast_assign( |
1128 | __entry->dev = mp->m_super->s_dev; | 1138 | __entry->dev = mp->m_super->s_dev; |
1129 | __entry->agno = agno; | 1139 | __entry->agno = agno; |
1130 | __entry->agbno = agbno; | 1140 | __entry->agbno = agbno; |
1131 | __entry->len = len; | 1141 | __entry->len = len; |
1132 | __entry->lsn = lsn; | 1142 | __entry->found = found; |
1133 | ), | 1143 | ), |
1134 | TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx", | 1144 | TP_printk("dev %d:%d agno %u agbno %u len %u %s", |
1135 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1145 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1136 | __entry->agno, | 1146 | __entry->agno, |
1137 | __entry->agbno, | 1147 | __entry->agbno, |
1138 | __entry->len, | 1148 | __entry->len, |
1149 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | ||
1150 | ); | ||
1151 | |||
1152 | TRACE_EVENT(xfs_trans_commit_lsn, | ||
1153 | TP_PROTO(struct xfs_trans *trans), | ||
1154 | TP_ARGS(trans), | ||
1155 | TP_STRUCT__entry( | ||
1156 | __field(dev_t, dev) | ||
1157 | __field(struct xfs_trans *, tp) | ||
1158 | __field(xfs_lsn_t, lsn) | ||
1159 | ), | ||
1160 | TP_fast_assign( | ||
1161 | __entry->dev = trans->t_mountp->m_super->s_dev; | ||
1162 | __entry->tp = trans; | ||
1163 | __entry->lsn = trans->t_commit_lsn; | ||
1164 | ), | ||
1165 | TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx", | ||
1166 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1167 | __entry->tp, | ||
1139 | __entry->lsn) | 1168 | __entry->lsn) |
1140 | ); | 1169 | ); |
1141 | 1170 | ||
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c index fa01b9daba6b..87d3e03878c8 100644 --- a/fs/xfs/linux-2.6/xfs_xattr.c +++ b/fs/xfs/linux-2.6/xfs_xattr.c | |||
@@ -72,28 +72,28 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, | |||
72 | (void *)value, size, xflags); | 72 | (void *)value, size, xflags); |
73 | } | 73 | } |
74 | 74 | ||
75 | static struct xattr_handler xfs_xattr_user_handler = { | 75 | static const struct xattr_handler xfs_xattr_user_handler = { |
76 | .prefix = XATTR_USER_PREFIX, | 76 | .prefix = XATTR_USER_PREFIX, |
77 | .flags = 0, /* no flags implies user namespace */ | 77 | .flags = 0, /* no flags implies user namespace */ |
78 | .get = xfs_xattr_get, | 78 | .get = xfs_xattr_get, |
79 | .set = xfs_xattr_set, | 79 | .set = xfs_xattr_set, |
80 | }; | 80 | }; |
81 | 81 | ||
82 | static struct xattr_handler xfs_xattr_trusted_handler = { | 82 | static const struct xattr_handler xfs_xattr_trusted_handler = { |
83 | .prefix = XATTR_TRUSTED_PREFIX, | 83 | .prefix = XATTR_TRUSTED_PREFIX, |
84 | .flags = ATTR_ROOT, | 84 | .flags = ATTR_ROOT, |
85 | .get = xfs_xattr_get, | 85 | .get = xfs_xattr_get, |
86 | .set = xfs_xattr_set, | 86 | .set = xfs_xattr_set, |
87 | }; | 87 | }; |
88 | 88 | ||
89 | static struct xattr_handler xfs_xattr_security_handler = { | 89 | static const struct xattr_handler xfs_xattr_security_handler = { |
90 | .prefix = XATTR_SECURITY_PREFIX, | 90 | .prefix = XATTR_SECURITY_PREFIX, |
91 | .flags = ATTR_SECURE, | 91 | .flags = ATTR_SECURE, |
92 | .get = xfs_xattr_get, | 92 | .get = xfs_xattr_get, |
93 | .set = xfs_xattr_set, | 93 | .set = xfs_xattr_set, |
94 | }; | 94 | }; |
95 | 95 | ||
96 | struct xattr_handler *xfs_xattr_handlers[] = { | 96 | const struct xattr_handler *xfs_xattr_handlers[] = { |
97 | &xfs_xattr_user_handler, | 97 | &xfs_xattr_user_handler, |
98 | &xfs_xattr_trusted_handler, | 98 | &xfs_xattr_trusted_handler, |
99 | &xfs_xattr_security_handler, | 99 | &xfs_xattr_security_handler, |
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index b89ec5df0129..585e7633dfc7 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
@@ -344,9 +344,9 @@ xfs_qm_init_dquot_blk( | |||
344 | for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) | 344 | for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) |
345 | xfs_qm_dqinit_core(curid, type, d); | 345 | xfs_qm_dqinit_core(curid, type, d); |
346 | xfs_trans_dquot_buf(tp, bp, | 346 | xfs_trans_dquot_buf(tp, bp, |
347 | (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF : | 347 | (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : |
348 | ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF : | 348 | ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : |
349 | XFS_BLI_GDQUOT_BUF))); | 349 | XFS_BLF_GDQUOT_BUF))); |
350 | xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); | 350 | xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); |
351 | } | 351 | } |
352 | 352 | ||
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 26fa43140f2e..92b002f1805f 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -448,6 +448,9 @@ xfs_qm_scall_getqstat( | |||
448 | return 0; | 448 | return 0; |
449 | } | 449 | } |
450 | 450 | ||
451 | #define XFS_DQ_MASK \ | ||
452 | (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK) | ||
453 | |||
451 | /* | 454 | /* |
452 | * Adjust quota limits, and start/stop timers accordingly. | 455 | * Adjust quota limits, and start/stop timers accordingly. |
453 | */ | 456 | */ |
@@ -465,9 +468,10 @@ xfs_qm_scall_setqlim( | |||
465 | int error; | 468 | int error; |
466 | xfs_qcnt_t hard, soft; | 469 | xfs_qcnt_t hard, soft; |
467 | 470 | ||
468 | if ((newlim->d_fieldmask & | 471 | if (newlim->d_fieldmask & ~XFS_DQ_MASK) |
469 | (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0) | 472 | return EINVAL; |
470 | return (0); | 473 | if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) |
474 | return 0; | ||
471 | 475 | ||
472 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); | 476 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); |
473 | if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, | 477 | if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, |
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index d13eeba2c8f8..0135e2a669d7 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h | |||
@@ -49,8 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode); | |||
49 | extern int posix_acl_access_exists(struct inode *inode); | 49 | extern int posix_acl_access_exists(struct inode *inode); |
50 | extern int posix_acl_default_exists(struct inode *inode); | 50 | extern int posix_acl_default_exists(struct inode *inode); |
51 | 51 | ||
52 | extern struct xattr_handler xfs_xattr_acl_access_handler; | 52 | extern const struct xattr_handler xfs_xattr_acl_access_handler; |
53 | extern struct xattr_handler xfs_xattr_acl_default_handler; | 53 | extern const struct xattr_handler xfs_xattr_acl_default_handler; |
54 | #else | 54 | #else |
55 | # define xfs_check_acl NULL | 55 | # define xfs_check_acl NULL |
56 | # define xfs_get_acl(inode, type) NULL | 56 | # define xfs_get_acl(inode, type) NULL |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index abb8222b88c9..401f364ad36c 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -175,14 +175,20 @@ typedef struct xfs_agfl { | |||
175 | } xfs_agfl_t; | 175 | } xfs_agfl_t; |
176 | 176 | ||
177 | /* | 177 | /* |
178 | * Busy block/extent entry. Used in perag to mark blocks that have been freed | 178 | * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that |
179 | * but whose transactions aren't committed to disk yet. | 179 | * have been freed but whose transactions aren't committed to disk yet. |
180 | * | ||
181 | * Note that we use the transaction ID to record the transaction, not the | ||
182 | * transaction structure itself. See xfs_alloc_busy_insert() for details. | ||
180 | */ | 183 | */ |
181 | typedef struct xfs_perag_busy { | 184 | struct xfs_busy_extent { |
182 | xfs_agblock_t busy_start; | 185 | struct rb_node rb_node; /* ag by-bno indexed search tree */ |
183 | xfs_extlen_t busy_length; | 186 | struct list_head list; /* transaction busy extent list */ |
184 | struct xfs_trans *busy_tp; /* transaction that did the free */ | 187 | xfs_agnumber_t agno; |
185 | } xfs_perag_busy_t; | 188 | xfs_agblock_t bno; |
189 | xfs_extlen_t length; | ||
190 | xlog_tid_t tid; /* transaction that created this */ | ||
191 | }; | ||
186 | 192 | ||
187 | /* | 193 | /* |
188 | * Per-ag incore structure, copies of information in agf and agi, | 194 | * Per-ag incore structure, copies of information in agf and agi, |
@@ -216,7 +222,8 @@ typedef struct xfs_perag { | |||
216 | xfs_agino_t pagl_leftrec; | 222 | xfs_agino_t pagl_leftrec; |
217 | xfs_agino_t pagl_rightrec; | 223 | xfs_agino_t pagl_rightrec; |
218 | #ifdef __KERNEL__ | 224 | #ifdef __KERNEL__ |
219 | spinlock_t pagb_lock; /* lock for pagb_list */ | 225 | spinlock_t pagb_lock; /* lock for pagb_tree */ |
226 | struct rb_root pagb_tree; /* ordered tree of busy extents */ | ||
220 | 227 | ||
221 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ | 228 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ |
222 | 229 | ||
@@ -226,7 +233,6 @@ typedef struct xfs_perag { | |||
226 | int pag_ici_reclaimable; /* reclaimable inodes */ | 233 | int pag_ici_reclaimable; /* reclaimable inodes */ |
227 | #endif | 234 | #endif |
228 | int pagb_count; /* pagb slots in use */ | 235 | int pagb_count; /* pagb slots in use */ |
229 | xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ | ||
230 | } xfs_perag_t; | 236 | } xfs_perag_t; |
231 | 237 | ||
232 | /* | 238 | /* |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 94cddbfb2560..a7fbe8a99b12 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -46,11 +46,9 @@ | |||
46 | #define XFSA_FIXUP_BNO_OK 1 | 46 | #define XFSA_FIXUP_BNO_OK 1 |
47 | #define XFSA_FIXUP_CNT_OK 2 | 47 | #define XFSA_FIXUP_CNT_OK 2 |
48 | 48 | ||
49 | STATIC void | 49 | static int |
50 | xfs_alloc_search_busy(xfs_trans_t *tp, | 50 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, |
51 | xfs_agnumber_t agno, | 51 | xfs_agblock_t bno, xfs_extlen_t len); |
52 | xfs_agblock_t bno, | ||
53 | xfs_extlen_t len); | ||
54 | 52 | ||
55 | /* | 53 | /* |
56 | * Prototypes for per-ag allocation routines | 54 | * Prototypes for per-ag allocation routines |
@@ -540,9 +538,16 @@ xfs_alloc_ag_vextent( | |||
540 | be32_to_cpu(agf->agf_length)); | 538 | be32_to_cpu(agf->agf_length)); |
541 | xfs_alloc_log_agf(args->tp, args->agbp, | 539 | xfs_alloc_log_agf(args->tp, args->agbp, |
542 | XFS_AGF_FREEBLKS); | 540 | XFS_AGF_FREEBLKS); |
543 | /* search the busylist for these blocks */ | 541 | /* |
544 | xfs_alloc_search_busy(args->tp, args->agno, | 542 | * Search the busylist for these blocks and mark the |
545 | args->agbno, args->len); | 543 | * transaction as synchronous if blocks are found. This |
544 | * avoids the need to block due to a synchronous log | ||
545 | * force to ensure correct ordering as the synchronous | ||
546 | * transaction will guarantee that for us. | ||
547 | */ | ||
548 | if (xfs_alloc_busy_search(args->mp, args->agno, | ||
549 | args->agbno, args->len)) | ||
550 | xfs_trans_set_sync(args->tp); | ||
546 | } | 551 | } |
547 | if (!args->isfl) | 552 | if (!args->isfl) |
548 | xfs_trans_mod_sb(args->tp, | 553 | xfs_trans_mod_sb(args->tp, |
@@ -1693,7 +1698,7 @@ xfs_free_ag_extent( | |||
1693 | * when the iclog commits to disk. If a busy block is allocated, | 1698 | * when the iclog commits to disk. If a busy block is allocated, |
1694 | * the iclog is pushed up to the LSN that freed the block. | 1699 | * the iclog is pushed up to the LSN that freed the block. |
1695 | */ | 1700 | */ |
1696 | xfs_alloc_mark_busy(tp, agno, bno, len); | 1701 | xfs_alloc_busy_insert(tp, agno, bno, len); |
1697 | return 0; | 1702 | return 0; |
1698 | 1703 | ||
1699 | error0: | 1704 | error0: |
@@ -1989,14 +1994,20 @@ xfs_alloc_get_freelist( | |||
1989 | *bnop = bno; | 1994 | *bnop = bno; |
1990 | 1995 | ||
1991 | /* | 1996 | /* |
1992 | * As blocks are freed, they are added to the per-ag busy list | 1997 | * As blocks are freed, they are added to the per-ag busy list and |
1993 | * and remain there until the freeing transaction is committed to | 1998 | * remain there until the freeing transaction is committed to disk. |
1994 | * disk. Now that we have allocated blocks, this list must be | 1999 | * Now that we have allocated blocks, this list must be searched to see |
1995 | * searched to see if a block is being reused. If one is, then | 2000 | * if a block is being reused. If one is, then the freeing transaction |
1996 | * the freeing transaction must be pushed to disk NOW by forcing | 2001 | * must be pushed to disk before this transaction. |
1997 | * to disk all iclogs up that transaction's LSN. | 2002 | * |
2003 | * We do this by setting the current transaction to a sync transaction | ||
2004 | * which guarantees that the freeing transaction is on disk before this | ||
2005 | * transaction. This is done instead of a synchronous log force here so | ||
2006 | * that we don't sit and wait with the AGF locked in the transaction | ||
2007 | * during the log force. | ||
1998 | */ | 2008 | */ |
1999 | xfs_alloc_search_busy(tp, be32_to_cpu(agf->agf_seqno), bno, 1); | 2009 | if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1)) |
2010 | xfs_trans_set_sync(tp); | ||
2000 | return 0; | 2011 | return 0; |
2001 | } | 2012 | } |
2002 | 2013 | ||
@@ -2201,7 +2212,7 @@ xfs_alloc_read_agf( | |||
2201 | be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); | 2212 | be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); |
2202 | spin_lock_init(&pag->pagb_lock); | 2213 | spin_lock_init(&pag->pagb_lock); |
2203 | pag->pagb_count = 0; | 2214 | pag->pagb_count = 0; |
2204 | memset(pag->pagb_list, 0, sizeof(pag->pagb_list)); | 2215 | pag->pagb_tree = RB_ROOT; |
2205 | pag->pagf_init = 1; | 2216 | pag->pagf_init = 1; |
2206 | } | 2217 | } |
2207 | #ifdef DEBUG | 2218 | #ifdef DEBUG |
@@ -2479,127 +2490,263 @@ error0: | |||
2479 | * list is reused, the transaction that freed it must be forced to disk | 2490 | * list is reused, the transaction that freed it must be forced to disk |
2480 | * before continuing to use the block. | 2491 | * before continuing to use the block. |
2481 | * | 2492 | * |
2482 | * xfs_alloc_mark_busy - add to the per-ag busy list | 2493 | * xfs_alloc_busy_insert - add to the per-ag busy list |
2483 | * xfs_alloc_clear_busy - remove an item from the per-ag busy list | 2494 | * xfs_alloc_busy_clear - remove an item from the per-ag busy list |
2495 | * xfs_alloc_busy_search - search for a busy extent | ||
2496 | */ | ||
2497 | |||
2498 | /* | ||
2499 | * Insert a new extent into the busy tree. | ||
2500 | * | ||
2501 | * The busy extent tree is indexed by the start block of the busy extent. | ||
2502 | * there can be multiple overlapping ranges in the busy extent tree but only | ||
2503 | * ever one entry at a given start block. The reason for this is that | ||
2504 | * multi-block extents can be freed, then smaller chunks of that extent | ||
2505 | * allocated and freed again before the first transaction commit is on disk. | ||
2506 | * If the exact same start block is freed a second time, we have to wait for | ||
2507 | * that busy extent to pass out of the tree before the new extent is inserted. | ||
2508 | * There are two main cases we have to handle here. | ||
2509 | * | ||
2510 | * The first case is a transaction that triggers a "free - allocate - free" | ||
2511 | * cycle. This can occur during btree manipulations as a btree block is freed | ||
2512 | * to the freelist, then allocated from the free list, then freed again. In | ||
2513 | * this case, the second extxpnet free is what triggers the duplicate and as | ||
2514 | * such the transaction IDs should match. Because the extent was allocated in | ||
2515 | * this transaction, the transaction must be marked as synchronous. This is | ||
2516 | * true for all cases where the free/alloc/free occurs in the one transaction, | ||
2517 | * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case. | ||
2518 | * This serves to catch violations of the second case quite effectively. | ||
2519 | * | ||
2520 | * The second case is where the free/alloc/free occur in different | ||
2521 | * transactions. In this case, the thread freeing the extent the second time | ||
2522 | * can't mark the extent busy immediately because it is already tracked in a | ||
2523 | * transaction that may be committing. When the log commit for the existing | ||
2524 | * busy extent completes, the busy extent will be removed from the tree. If we | ||
2525 | * allow the second busy insert to continue using that busy extent structure, | ||
2526 | * it can be freed before this transaction is safely in the log. Hence our | ||
2527 | * only option in this case is to force the log to remove the existing busy | ||
2528 | * extent from the list before we insert the new one with the current | ||
2529 | * transaction ID. | ||
2530 | * | ||
2531 | * The problem we are trying to avoid in the free-alloc-free in separate | ||
2532 | * transactions is most easily described with a timeline: | ||
2533 | * | ||
2534 | * Thread 1 Thread 2 Thread 3 xfslogd | ||
2535 | * xact alloc | ||
2536 | * free X | ||
2537 | * mark busy | ||
2538 | * commit xact | ||
2539 | * free xact | ||
2540 | * xact alloc | ||
2541 | * alloc X | ||
2542 | * busy search | ||
2543 | * mark xact sync | ||
2544 | * commit xact | ||
2545 | * free xact | ||
2546 | * force log | ||
2547 | * checkpoint starts | ||
2548 | * .... | ||
2549 | * xact alloc | ||
2550 | * free X | ||
2551 | * mark busy | ||
2552 | * finds match | ||
2553 | * *** KABOOM! *** | ||
2554 | * .... | ||
2555 | * log IO completes | ||
2556 | * unbusy X | ||
2557 | * checkpoint completes | ||
2558 | * | ||
2559 | * By issuing a log force in thread 3 @ "KABOOM", the thread will block until | ||
2560 | * the checkpoint completes, and the busy extent it matched will have been | ||
2561 | * removed from the tree when it is woken. Hence it can then continue safely. | ||
2562 | * | ||
2563 | * However, to ensure this matching process is robust, we need to use the | ||
2564 | * transaction ID for identifying transaction, as delayed logging results in | ||
2565 | * the busy extent and transaction lifecycles being different. i.e. the busy | ||
2566 | * extent is active for a lot longer than the transaction. Hence the | ||
2567 | * transaction structure can be freed and reallocated, then mark the same | ||
2568 | * extent busy again in the new transaction. In this case the new transaction | ||
2569 | * will have a different tid but can have the same address, and hence we need | ||
2570 | * to check against the tid. | ||
2571 | * | ||
2572 | * Future: for delayed logging, we could avoid the log force if the extent was | ||
2573 | * first freed in the current checkpoint sequence. This, however, requires the | ||
2574 | * ability to pin the current checkpoint in memory until this transaction | ||
2575 | * commits to ensure that both the original free and the current one combine | ||
2576 | * logically into the one checkpoint. If the checkpoint sequences are | ||
2577 | * different, however, we still need to wait on a log force. | ||
2484 | */ | 2578 | */ |
2485 | void | 2579 | void |
2486 | xfs_alloc_mark_busy(xfs_trans_t *tp, | 2580 | xfs_alloc_busy_insert( |
2487 | xfs_agnumber_t agno, | 2581 | struct xfs_trans *tp, |
2488 | xfs_agblock_t bno, | 2582 | xfs_agnumber_t agno, |
2489 | xfs_extlen_t len) | 2583 | xfs_agblock_t bno, |
2584 | xfs_extlen_t len) | ||
2490 | { | 2585 | { |
2491 | xfs_perag_busy_t *bsy; | 2586 | struct xfs_busy_extent *new; |
2587 | struct xfs_busy_extent *busyp; | ||
2492 | struct xfs_perag *pag; | 2588 | struct xfs_perag *pag; |
2493 | int n; | 2589 | struct rb_node **rbp; |
2590 | struct rb_node *parent; | ||
2591 | int match; | ||
2494 | 2592 | ||
2495 | pag = xfs_perag_get(tp->t_mountp, agno); | ||
2496 | spin_lock(&pag->pagb_lock); | ||
2497 | 2593 | ||
2498 | /* search pagb_list for an open slot */ | 2594 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); |
2499 | for (bsy = pag->pagb_list, n = 0; | 2595 | if (!new) { |
2500 | n < XFS_PAGB_NUM_SLOTS; | 2596 | /* |
2501 | bsy++, n++) { | 2597 | * No Memory! Since it is now not possible to track the free |
2502 | if (bsy->busy_tp == NULL) { | 2598 | * block, make this a synchronous transaction to insure that |
2503 | break; | 2599 | * the block is not reused before this transaction commits. |
2504 | } | 2600 | */ |
2601 | trace_xfs_alloc_busy(tp, agno, bno, len, 1); | ||
2602 | xfs_trans_set_sync(tp); | ||
2603 | return; | ||
2505 | } | 2604 | } |
2506 | 2605 | ||
2507 | trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len, n); | 2606 | new->agno = agno; |
2607 | new->bno = bno; | ||
2608 | new->length = len; | ||
2609 | new->tid = xfs_log_get_trans_ident(tp); | ||
2508 | 2610 | ||
2509 | if (n < XFS_PAGB_NUM_SLOTS) { | 2611 | INIT_LIST_HEAD(&new->list); |
2510 | bsy = &pag->pagb_list[n]; | 2612 | |
2511 | pag->pagb_count++; | 2613 | /* trace before insert to be able to see failed inserts */ |
2512 | bsy->busy_start = bno; | 2614 | trace_xfs_alloc_busy(tp, agno, bno, len, 0); |
2513 | bsy->busy_length = len; | 2615 | |
2514 | bsy->busy_tp = tp; | 2616 | pag = xfs_perag_get(tp->t_mountp, new->agno); |
2515 | xfs_trans_add_busy(tp, agno, n); | 2617 | restart: |
2516 | } else { | 2618 | spin_lock(&pag->pagb_lock); |
2619 | rbp = &pag->pagb_tree.rb_node; | ||
2620 | parent = NULL; | ||
2621 | busyp = NULL; | ||
2622 | match = 0; | ||
2623 | while (*rbp && match >= 0) { | ||
2624 | parent = *rbp; | ||
2625 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); | ||
2626 | |||
2627 | if (new->bno < busyp->bno) { | ||
2628 | /* may overlap, but exact start block is lower */ | ||
2629 | rbp = &(*rbp)->rb_left; | ||
2630 | if (new->bno + new->length > busyp->bno) | ||
2631 | match = busyp->tid == new->tid ? 1 : -1; | ||
2632 | } else if (new->bno > busyp->bno) { | ||
2633 | /* may overlap, but exact start block is higher */ | ||
2634 | rbp = &(*rbp)->rb_right; | ||
2635 | if (bno < busyp->bno + busyp->length) | ||
2636 | match = busyp->tid == new->tid ? 1 : -1; | ||
2637 | } else { | ||
2638 | match = busyp->tid == new->tid ? 1 : -1; | ||
2639 | break; | ||
2640 | } | ||
2641 | } | ||
2642 | if (match < 0) { | ||
2643 | /* overlap marked busy in different transaction */ | ||
2644 | spin_unlock(&pag->pagb_lock); | ||
2645 | xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); | ||
2646 | goto restart; | ||
2647 | } | ||
2648 | if (match > 0) { | ||
2517 | /* | 2649 | /* |
2518 | * The busy list is full! Since it is now not possible to | 2650 | * overlap marked busy in same transaction. Update if exact |
2519 | * track the free block, make this a synchronous transaction | 2651 | * start block match, otherwise combine the busy extents into |
2520 | * to insure that the block is not reused before this | 2652 | * a single range. |
2521 | * transaction commits. | ||
2522 | */ | 2653 | */ |
2523 | xfs_trans_set_sync(tp); | 2654 | if (busyp->bno == new->bno) { |
2524 | } | 2655 | busyp->length = max(busyp->length, new->length); |
2656 | spin_unlock(&pag->pagb_lock); | ||
2657 | ASSERT(tp->t_flags & XFS_TRANS_SYNC); | ||
2658 | xfs_perag_put(pag); | ||
2659 | kmem_free(new); | ||
2660 | return; | ||
2661 | } | ||
2662 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2663 | new->length = max(busyp->bno + busyp->length, | ||
2664 | new->bno + new->length) - | ||
2665 | min(busyp->bno, new->bno); | ||
2666 | new->bno = min(busyp->bno, new->bno); | ||
2667 | } else | ||
2668 | busyp = NULL; | ||
2525 | 2669 | ||
2670 | rb_link_node(&new->rb_node, parent, rbp); | ||
2671 | rb_insert_color(&new->rb_node, &pag->pagb_tree); | ||
2672 | |||
2673 | list_add(&new->list, &tp->t_busy); | ||
2526 | spin_unlock(&pag->pagb_lock); | 2674 | spin_unlock(&pag->pagb_lock); |
2527 | xfs_perag_put(pag); | 2675 | xfs_perag_put(pag); |
2676 | kmem_free(busyp); | ||
2528 | } | 2677 | } |
2529 | 2678 | ||
2530 | void | 2679 | /* |
2531 | xfs_alloc_clear_busy(xfs_trans_t *tp, | 2680 | * Search for a busy extent within the range of the extent we are about to |
2532 | xfs_agnumber_t agno, | 2681 | * allocate. You need to be holding the busy extent tree lock when calling |
2533 | int idx) | 2682 | * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy |
2683 | * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact | ||
2684 | * match. This is done so that a non-zero return indicates an overlap that | ||
2685 | * will require a synchronous transaction, but it can still be | ||
2686 | * used to distinguish between a partial or exact match. | ||
2687 | */ | ||
2688 | static int | ||
2689 | xfs_alloc_busy_search( | ||
2690 | struct xfs_mount *mp, | ||
2691 | xfs_agnumber_t agno, | ||
2692 | xfs_agblock_t bno, | ||
2693 | xfs_extlen_t len) | ||
2534 | { | 2694 | { |
2535 | struct xfs_perag *pag; | 2695 | struct xfs_perag *pag; |
2536 | xfs_perag_busy_t *list; | 2696 | struct rb_node *rbp; |
2697 | struct xfs_busy_extent *busyp; | ||
2698 | int match = 0; | ||
2537 | 2699 | ||
2538 | ASSERT(idx < XFS_PAGB_NUM_SLOTS); | 2700 | pag = xfs_perag_get(mp, agno); |
2539 | pag = xfs_perag_get(tp->t_mountp, agno); | ||
2540 | spin_lock(&pag->pagb_lock); | 2701 | spin_lock(&pag->pagb_lock); |
2541 | list = pag->pagb_list; | ||
2542 | 2702 | ||
2543 | trace_xfs_alloc_unbusy(tp->t_mountp, agno, idx, list[idx].busy_tp == tp); | 2703 | rbp = pag->pagb_tree.rb_node; |
2544 | 2704 | ||
2545 | if (list[idx].busy_tp == tp) { | 2705 | /* find closest start bno overlap */ |
2546 | list[idx].busy_tp = NULL; | 2706 | while (rbp) { |
2547 | pag->pagb_count--; | 2707 | busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node); |
2708 | if (bno < busyp->bno) { | ||
2709 | /* may overlap, but exact start block is lower */ | ||
2710 | if (bno + len > busyp->bno) | ||
2711 | match = -1; | ||
2712 | rbp = rbp->rb_left; | ||
2713 | } else if (bno > busyp->bno) { | ||
2714 | /* may overlap, but exact start block is higher */ | ||
2715 | if (bno < busyp->bno + busyp->length) | ||
2716 | match = -1; | ||
2717 | rbp = rbp->rb_right; | ||
2718 | } else { | ||
2719 | /* bno matches busyp, length determines exact match */ | ||
2720 | match = (busyp->length == len) ? 1 : -1; | ||
2721 | break; | ||
2722 | } | ||
2548 | } | 2723 | } |
2549 | |||
2550 | spin_unlock(&pag->pagb_lock); | 2724 | spin_unlock(&pag->pagb_lock); |
2725 | trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match); | ||
2551 | xfs_perag_put(pag); | 2726 | xfs_perag_put(pag); |
2727 | return match; | ||
2552 | } | 2728 | } |
2553 | 2729 | ||
2554 | 2730 | void | |
2555 | /* | 2731 | xfs_alloc_busy_clear( |
2556 | * If we find the extent in the busy list, force the log out to get the | 2732 | struct xfs_mount *mp, |
2557 | * extent out of the busy list so the caller can use it straight away. | 2733 | struct xfs_busy_extent *busyp) |
2558 | */ | ||
2559 | STATIC void | ||
2560 | xfs_alloc_search_busy(xfs_trans_t *tp, | ||
2561 | xfs_agnumber_t agno, | ||
2562 | xfs_agblock_t bno, | ||
2563 | xfs_extlen_t len) | ||
2564 | { | 2734 | { |
2565 | struct xfs_perag *pag; | 2735 | struct xfs_perag *pag; |
2566 | xfs_perag_busy_t *bsy; | ||
2567 | xfs_agblock_t uend, bend; | ||
2568 | xfs_lsn_t lsn = 0; | ||
2569 | int cnt; | ||
2570 | 2736 | ||
2571 | pag = xfs_perag_get(tp->t_mountp, agno); | 2737 | trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, |
2572 | spin_lock(&pag->pagb_lock); | 2738 | busyp->length); |
2573 | cnt = pag->pagb_count; | ||
2574 | 2739 | ||
2575 | /* | 2740 | ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, |
2576 | * search pagb_list for this slot, skipping open slots. We have to | 2741 | busyp->length) == 1); |
2577 | * search the entire array as there may be multiple overlaps and | ||
2578 | * we have to get the most recent LSN for the log force to push out | ||
2579 | * all the transactions that span the range. | ||
2580 | */ | ||
2581 | uend = bno + len - 1; | ||
2582 | for (cnt = 0; cnt < pag->pagb_count; cnt++) { | ||
2583 | bsy = &pag->pagb_list[cnt]; | ||
2584 | if (!bsy->busy_tp) | ||
2585 | continue; | ||
2586 | 2742 | ||
2587 | bend = bsy->busy_start + bsy->busy_length - 1; | 2743 | list_del_init(&busyp->list); |
2588 | if (bno > bend || uend < bsy->busy_start) | ||
2589 | continue; | ||
2590 | 2744 | ||
2591 | /* (start1,length1) within (start2, length2) */ | 2745 | pag = xfs_perag_get(mp, busyp->agno); |
2592 | if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0) | 2746 | spin_lock(&pag->pagb_lock); |
2593 | lsn = bsy->busy_tp->t_commit_lsn; | 2747 | rb_erase(&busyp->rb_node, &pag->pagb_tree); |
2594 | } | ||
2595 | spin_unlock(&pag->pagb_lock); | 2748 | spin_unlock(&pag->pagb_lock); |
2596 | xfs_perag_put(pag); | 2749 | xfs_perag_put(pag); |
2597 | trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn); | ||
2598 | 2750 | ||
2599 | /* | 2751 | kmem_free(busyp); |
2600 | * If a block was found, force the log through the LSN of the | ||
2601 | * transaction that freed the block | ||
2602 | */ | ||
2603 | if (lsn) | ||
2604 | xfs_log_force_lsn(tp->t_mountp, lsn, XFS_LOG_SYNC); | ||
2605 | } | 2752 | } |
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 599bffa39784..6d05199b667c 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -22,6 +22,7 @@ struct xfs_buf; | |||
22 | struct xfs_mount; | 22 | struct xfs_mount; |
23 | struct xfs_perag; | 23 | struct xfs_perag; |
24 | struct xfs_trans; | 24 | struct xfs_trans; |
25 | struct xfs_busy_extent; | ||
25 | 26 | ||
26 | /* | 27 | /* |
27 | * Freespace allocation types. Argument to xfs_alloc_[v]extent. | 28 | * Freespace allocation types. Argument to xfs_alloc_[v]extent. |
@@ -119,15 +120,13 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp, | |||
119 | #ifdef __KERNEL__ | 120 | #ifdef __KERNEL__ |
120 | 121 | ||
121 | void | 122 | void |
122 | xfs_alloc_mark_busy(xfs_trans_t *tp, | 123 | xfs_alloc_busy_insert(xfs_trans_t *tp, |
123 | xfs_agnumber_t agno, | 124 | xfs_agnumber_t agno, |
124 | xfs_agblock_t bno, | 125 | xfs_agblock_t bno, |
125 | xfs_extlen_t len); | 126 | xfs_extlen_t len); |
126 | 127 | ||
127 | void | 128 | void |
128 | xfs_alloc_clear_busy(xfs_trans_t *tp, | 129 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); |
129 | xfs_agnumber_t ag, | ||
130 | int idx); | ||
131 | 130 | ||
132 | #endif /* __KERNEL__ */ | 131 | #endif /* __KERNEL__ */ |
133 | 132 | ||
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index b726e10d2c1c..83f494218759 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
@@ -134,7 +134,7 @@ xfs_allocbt_free_block( | |||
134 | * disk. If a busy block is allocated, the iclog is pushed up to the | 134 | * disk. If a busy block is allocated, the iclog is pushed up to the |
135 | * LSN that freed the block. | 135 | * LSN that freed the block. |
136 | */ | 136 | */ |
137 | xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); | 137 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); |
138 | xfs_trans_agbtree_delta(cur->bc_tp, -1); | 138 | xfs_trans_agbtree_delta(cur->bc_tp, -1); |
139 | return 0; | 139 | return 0; |
140 | } | 140 | } |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 240340a4727b..02a80984aa05 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -64,7 +64,7 @@ xfs_buf_item_log_debug( | |||
64 | nbytes = last - first + 1; | 64 | nbytes = last - first + 1; |
65 | bfset(bip->bli_logged, first, nbytes); | 65 | bfset(bip->bli_logged, first, nbytes); |
66 | for (x = 0; x < nbytes; x++) { | 66 | for (x = 0; x < nbytes; x++) { |
67 | chunk_num = byte >> XFS_BLI_SHIFT; | 67 | chunk_num = byte >> XFS_BLF_SHIFT; |
68 | word_num = chunk_num >> BIT_TO_WORD_SHIFT; | 68 | word_num = chunk_num >> BIT_TO_WORD_SHIFT; |
69 | bit_num = chunk_num & (NBWORD - 1); | 69 | bit_num = chunk_num & (NBWORD - 1); |
70 | wordp = &(bip->bli_format.blf_data_map[word_num]); | 70 | wordp = &(bip->bli_format.blf_data_map[word_num]); |
@@ -166,7 +166,7 @@ xfs_buf_item_size( | |||
166 | * cancel flag in it. | 166 | * cancel flag in it. |
167 | */ | 167 | */ |
168 | trace_xfs_buf_item_size_stale(bip); | 168 | trace_xfs_buf_item_size_stale(bip); |
169 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 169 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
170 | return 1; | 170 | return 1; |
171 | } | 171 | } |
172 | 172 | ||
@@ -197,9 +197,9 @@ xfs_buf_item_size( | |||
197 | } else if (next_bit != last_bit + 1) { | 197 | } else if (next_bit != last_bit + 1) { |
198 | last_bit = next_bit; | 198 | last_bit = next_bit; |
199 | nvecs++; | 199 | nvecs++; |
200 | } else if (xfs_buf_offset(bp, next_bit * XFS_BLI_CHUNK) != | 200 | } else if (xfs_buf_offset(bp, next_bit * XFS_BLF_CHUNK) != |
201 | (xfs_buf_offset(bp, last_bit * XFS_BLI_CHUNK) + | 201 | (xfs_buf_offset(bp, last_bit * XFS_BLF_CHUNK) + |
202 | XFS_BLI_CHUNK)) { | 202 | XFS_BLF_CHUNK)) { |
203 | last_bit = next_bit; | 203 | last_bit = next_bit; |
204 | nvecs++; | 204 | nvecs++; |
205 | } else { | 205 | } else { |
@@ -254,6 +254,20 @@ xfs_buf_item_format( | |||
254 | vecp++; | 254 | vecp++; |
255 | nvecs = 1; | 255 | nvecs = 1; |
256 | 256 | ||
257 | /* | ||
258 | * If it is an inode buffer, transfer the in-memory state to the | ||
259 | * format flags and clear the in-memory state. We do not transfer | ||
260 | * this state if the inode buffer allocation has not yet been committed | ||
261 | * to the log as setting the XFS_BLI_INODE_BUF flag will prevent | ||
262 | * correct replay of the inode allocation. | ||
263 | */ | ||
264 | if (bip->bli_flags & XFS_BLI_INODE_BUF) { | ||
265 | if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && | ||
266 | xfs_log_item_in_current_chkpt(&bip->bli_item))) | ||
267 | bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; | ||
268 | bip->bli_flags &= ~XFS_BLI_INODE_BUF; | ||
269 | } | ||
270 | |||
257 | if (bip->bli_flags & XFS_BLI_STALE) { | 271 | if (bip->bli_flags & XFS_BLI_STALE) { |
258 | /* | 272 | /* |
259 | * The buffer is stale, so all we need to log | 273 | * The buffer is stale, so all we need to log |
@@ -261,7 +275,7 @@ xfs_buf_item_format( | |||
261 | * cancel flag in it. | 275 | * cancel flag in it. |
262 | */ | 276 | */ |
263 | trace_xfs_buf_item_format_stale(bip); | 277 | trace_xfs_buf_item_format_stale(bip); |
264 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 278 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
265 | bip->bli_format.blf_size = nvecs; | 279 | bip->bli_format.blf_size = nvecs; |
266 | return; | 280 | return; |
267 | } | 281 | } |
@@ -294,28 +308,28 @@ xfs_buf_item_format( | |||
294 | * keep counting and scanning. | 308 | * keep counting and scanning. |
295 | */ | 309 | */ |
296 | if (next_bit == -1) { | 310 | if (next_bit == -1) { |
297 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 311 | buffer_offset = first_bit * XFS_BLF_CHUNK; |
298 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 312 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
299 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 313 | vecp->i_len = nbits * XFS_BLF_CHUNK; |
300 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; | 314 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; |
301 | nvecs++; | 315 | nvecs++; |
302 | break; | 316 | break; |
303 | } else if (next_bit != last_bit + 1) { | 317 | } else if (next_bit != last_bit + 1) { |
304 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 318 | buffer_offset = first_bit * XFS_BLF_CHUNK; |
305 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 319 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
306 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 320 | vecp->i_len = nbits * XFS_BLF_CHUNK; |
307 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; | 321 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; |
308 | nvecs++; | 322 | nvecs++; |
309 | vecp++; | 323 | vecp++; |
310 | first_bit = next_bit; | 324 | first_bit = next_bit; |
311 | last_bit = next_bit; | 325 | last_bit = next_bit; |
312 | nbits = 1; | 326 | nbits = 1; |
313 | } else if (xfs_buf_offset(bp, next_bit << XFS_BLI_SHIFT) != | 327 | } else if (xfs_buf_offset(bp, next_bit << XFS_BLF_SHIFT) != |
314 | (xfs_buf_offset(bp, last_bit << XFS_BLI_SHIFT) + | 328 | (xfs_buf_offset(bp, last_bit << XFS_BLF_SHIFT) + |
315 | XFS_BLI_CHUNK)) { | 329 | XFS_BLF_CHUNK)) { |
316 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 330 | buffer_offset = first_bit * XFS_BLF_CHUNK; |
317 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 331 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
318 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 332 | vecp->i_len = nbits * XFS_BLF_CHUNK; |
319 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; | 333 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; |
320 | /* You would think we need to bump the nvecs here too, but we do not | 334 | /* You would think we need to bump the nvecs here too, but we do not |
321 | * this number is used by recovery, and it gets confused by the boundary | 335 | * this number is used by recovery, and it gets confused by the boundary |
@@ -341,10 +355,15 @@ xfs_buf_item_format( | |||
341 | } | 355 | } |
342 | 356 | ||
343 | /* | 357 | /* |
344 | * This is called to pin the buffer associated with the buf log | 358 | * This is called to pin the buffer associated with the buf log item in memory |
345 | * item in memory so it cannot be written out. Simply call bpin() | 359 | * so it cannot be written out. Simply call bpin() on the buffer to do this. |
346 | * on the buffer to do this. | 360 | * |
361 | * We also always take a reference to the buffer log item here so that the bli | ||
362 | * is held while the item is pinned in memory. This means that we can | ||
363 | * unconditionally drop the reference count a transaction holds when the | ||
364 | * transaction is completed. | ||
347 | */ | 365 | */ |
366 | |||
348 | STATIC void | 367 | STATIC void |
349 | xfs_buf_item_pin( | 368 | xfs_buf_item_pin( |
350 | xfs_buf_log_item_t *bip) | 369 | xfs_buf_log_item_t *bip) |
@@ -356,6 +375,7 @@ xfs_buf_item_pin( | |||
356 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 375 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
357 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || | 376 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || |
358 | (bip->bli_flags & XFS_BLI_STALE)); | 377 | (bip->bli_flags & XFS_BLI_STALE)); |
378 | atomic_inc(&bip->bli_refcount); | ||
359 | trace_xfs_buf_item_pin(bip); | 379 | trace_xfs_buf_item_pin(bip); |
360 | xfs_bpin(bp); | 380 | xfs_bpin(bp); |
361 | } | 381 | } |
@@ -393,7 +413,7 @@ xfs_buf_item_unpin( | |||
393 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | 413 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); |
394 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | 414 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); |
395 | ASSERT(XFS_BUF_ISSTALE(bp)); | 415 | ASSERT(XFS_BUF_ISSTALE(bp)); |
396 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 416 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
397 | trace_xfs_buf_item_unpin_stale(bip); | 417 | trace_xfs_buf_item_unpin_stale(bip); |
398 | 418 | ||
399 | /* | 419 | /* |
@@ -489,20 +509,23 @@ xfs_buf_item_trylock( | |||
489 | } | 509 | } |
490 | 510 | ||
491 | /* | 511 | /* |
492 | * Release the buffer associated with the buf log item. | 512 | * Release the buffer associated with the buf log item. If there is no dirty |
493 | * If there is no dirty logged data associated with the | 513 | * logged data associated with the buffer recorded in the buf log item, then |
494 | * buffer recorded in the buf log item, then free the | 514 | * free the buf log item and remove the reference to it in the buffer. |
495 | * buf log item and remove the reference to it in the | 515 | * |
496 | * buffer. | 516 | * This call ignores the recursion count. It is only called when the buffer |
517 | * should REALLY be unlocked, regardless of the recursion count. | ||
497 | * | 518 | * |
498 | * This call ignores the recursion count. It is only called | 519 | * We unconditionally drop the transaction's reference to the log item. If the |
499 | * when the buffer should REALLY be unlocked, regardless | 520 | * item was logged, then another reference was taken when it was pinned, so we |
500 | * of the recursion count. | 521 | * can safely drop the transaction reference now. This also allows us to avoid |
522 | * potential races with the unpin code freeing the bli by not referencing the | ||
523 | * bli after we've dropped the reference count. | ||
501 | * | 524 | * |
502 | * If the XFS_BLI_HOLD flag is set in the buf log item, then | 525 | * If the XFS_BLI_HOLD flag is set in the buf log item, then free the log item |
503 | * free the log item if necessary but do not unlock the buffer. | 526 | * if necessary but do not unlock the buffer. This is for support of |
504 | * This is for support of xfs_trans_bhold(). Make sure the | 527 | * xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't |
505 | * XFS_BLI_HOLD field is cleared if we don't free the item. | 528 | * free the item. |
506 | */ | 529 | */ |
507 | STATIC void | 530 | STATIC void |
508 | xfs_buf_item_unlock( | 531 | xfs_buf_item_unlock( |
@@ -514,73 +537,54 @@ xfs_buf_item_unlock( | |||
514 | 537 | ||
515 | bp = bip->bli_buf; | 538 | bp = bip->bli_buf; |
516 | 539 | ||
517 | /* | 540 | /* Clear the buffer's association with this transaction. */ |
518 | * Clear the buffer's association with this transaction. | ||
519 | */ | ||
520 | XFS_BUF_SET_FSPRIVATE2(bp, NULL); | 541 | XFS_BUF_SET_FSPRIVATE2(bp, NULL); |
521 | 542 | ||
522 | /* | 543 | /* |
523 | * If this is a transaction abort, don't return early. | 544 | * If this is a transaction abort, don't return early. Instead, allow |
524 | * Instead, allow the brelse to happen. | 545 | * the brelse to happen. Normally it would be done for stale |
525 | * Normally it would be done for stale (cancelled) buffers | 546 | * (cancelled) buffers at unpin time, but we'll never go through the |
526 | * at unpin time, but we'll never go through the pin/unpin | 547 | * pin/unpin cycle if we abort inside commit. |
527 | * cycle if we abort inside commit. | ||
528 | */ | 548 | */ |
529 | aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0; | 549 | aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0; |
530 | 550 | ||
531 | /* | 551 | /* |
532 | * If the buf item is marked stale, then don't do anything. | 552 | * Before possibly freeing the buf item, determine if we should |
533 | * We'll unlock the buffer and free the buf item when the | 553 | * release the buffer at the end of this routine. |
534 | * buffer is unpinned for the last time. | ||
535 | */ | 554 | */ |
536 | if (bip->bli_flags & XFS_BLI_STALE) { | 555 | hold = bip->bli_flags & XFS_BLI_HOLD; |
537 | bip->bli_flags &= ~XFS_BLI_LOGGED; | 556 | |
538 | trace_xfs_buf_item_unlock_stale(bip); | 557 | /* Clear the per transaction state. */ |
539 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 558 | bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD); |
540 | if (!aborted) | ||
541 | return; | ||
542 | } | ||
543 | 559 | ||
544 | /* | 560 | /* |
545 | * Drop the transaction's reference to the log item if | 561 | * If the buf item is marked stale, then don't do anything. We'll |
546 | * it was not logged as part of the transaction. Otherwise | 562 | * unlock the buffer and free the buf item when the buffer is unpinned |
547 | * we'll drop the reference in xfs_buf_item_unpin() when | 563 | * for the last time. |
548 | * the transaction is really through with the buffer. | ||
549 | */ | 564 | */ |
550 | if (!(bip->bli_flags & XFS_BLI_LOGGED)) { | 565 | if (bip->bli_flags & XFS_BLI_STALE) { |
551 | atomic_dec(&bip->bli_refcount); | 566 | trace_xfs_buf_item_unlock_stale(bip); |
552 | } else { | 567 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
553 | /* | 568 | if (!aborted) { |
554 | * Clear the logged flag since this is per | 569 | atomic_dec(&bip->bli_refcount); |
555 | * transaction state. | 570 | return; |
556 | */ | 571 | } |
557 | bip->bli_flags &= ~XFS_BLI_LOGGED; | ||
558 | } | 572 | } |
559 | 573 | ||
560 | /* | ||
561 | * Before possibly freeing the buf item, determine if we should | ||
562 | * release the buffer at the end of this routine. | ||
563 | */ | ||
564 | hold = bip->bli_flags & XFS_BLI_HOLD; | ||
565 | trace_xfs_buf_item_unlock(bip); | 574 | trace_xfs_buf_item_unlock(bip); |
566 | 575 | ||
567 | /* | 576 | /* |
568 | * If the buf item isn't tracking any data, free it. | 577 | * If the buf item isn't tracking any data, free it, otherwise drop the |
569 | * Otherwise, if XFS_BLI_HOLD is set clear it. | 578 | * reference we hold to it. |
570 | */ | 579 | */ |
571 | if (xfs_bitmap_empty(bip->bli_format.blf_data_map, | 580 | if (xfs_bitmap_empty(bip->bli_format.blf_data_map, |
572 | bip->bli_format.blf_map_size)) { | 581 | bip->bli_format.blf_map_size)) |
573 | xfs_buf_item_relse(bp); | 582 | xfs_buf_item_relse(bp); |
574 | } else if (hold) { | 583 | else |
575 | bip->bli_flags &= ~XFS_BLI_HOLD; | 584 | atomic_dec(&bip->bli_refcount); |
576 | } | ||
577 | 585 | ||
578 | /* | 586 | if (!hold) |
579 | * Release the buffer if XFS_BLI_HOLD was not set. | ||
580 | */ | ||
581 | if (!hold) { | ||
582 | xfs_buf_relse(bp); | 587 | xfs_buf_relse(bp); |
583 | } | ||
584 | } | 588 | } |
585 | 589 | ||
586 | /* | 590 | /* |
@@ -717,12 +721,12 @@ xfs_buf_item_init( | |||
717 | } | 721 | } |
718 | 722 | ||
719 | /* | 723 | /* |
720 | * chunks is the number of XFS_BLI_CHUNK size pieces | 724 | * chunks is the number of XFS_BLF_CHUNK size pieces |
721 | * the buffer can be divided into. Make sure not to | 725 | * the buffer can be divided into. Make sure not to |
722 | * truncate any pieces. map_size is the size of the | 726 | * truncate any pieces. map_size is the size of the |
723 | * bitmap needed to describe the chunks of the buffer. | 727 | * bitmap needed to describe the chunks of the buffer. |
724 | */ | 728 | */ |
725 | chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLI_CHUNK - 1)) >> XFS_BLI_SHIFT); | 729 | chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLF_CHUNK - 1)) >> XFS_BLF_SHIFT); |
726 | map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT); | 730 | map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT); |
727 | 731 | ||
728 | bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, | 732 | bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, |
@@ -790,8 +794,8 @@ xfs_buf_item_log( | |||
790 | /* | 794 | /* |
791 | * Convert byte offsets to bit numbers. | 795 | * Convert byte offsets to bit numbers. |
792 | */ | 796 | */ |
793 | first_bit = first >> XFS_BLI_SHIFT; | 797 | first_bit = first >> XFS_BLF_SHIFT; |
794 | last_bit = last >> XFS_BLI_SHIFT; | 798 | last_bit = last >> XFS_BLF_SHIFT; |
795 | 799 | ||
796 | /* | 800 | /* |
797 | * Calculate the total number of bits to be set. | 801 | * Calculate the total number of bits to be set. |
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index df4454511f73..f20bb472d582 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h | |||
@@ -41,22 +41,22 @@ typedef struct xfs_buf_log_format { | |||
41 | * This flag indicates that the buffer contains on disk inodes | 41 | * This flag indicates that the buffer contains on disk inodes |
42 | * and requires special recovery handling. | 42 | * and requires special recovery handling. |
43 | */ | 43 | */ |
44 | #define XFS_BLI_INODE_BUF 0x1 | 44 | #define XFS_BLF_INODE_BUF 0x1 |
45 | /* | 45 | /* |
46 | * This flag indicates that the buffer should not be replayed | 46 | * This flag indicates that the buffer should not be replayed |
47 | * during recovery because its blocks are being freed. | 47 | * during recovery because its blocks are being freed. |
48 | */ | 48 | */ |
49 | #define XFS_BLI_CANCEL 0x2 | 49 | #define XFS_BLF_CANCEL 0x2 |
50 | /* | 50 | /* |
51 | * This flag indicates that the buffer contains on disk | 51 | * This flag indicates that the buffer contains on disk |
52 | * user or group dquots and may require special recovery handling. | 52 | * user or group dquots and may require special recovery handling. |
53 | */ | 53 | */ |
54 | #define XFS_BLI_UDQUOT_BUF 0x4 | 54 | #define XFS_BLF_UDQUOT_BUF 0x4 |
55 | #define XFS_BLI_PDQUOT_BUF 0x8 | 55 | #define XFS_BLF_PDQUOT_BUF 0x8 |
56 | #define XFS_BLI_GDQUOT_BUF 0x10 | 56 | #define XFS_BLF_GDQUOT_BUF 0x10 |
57 | 57 | ||
58 | #define XFS_BLI_CHUNK 128 | 58 | #define XFS_BLF_CHUNK 128 |
59 | #define XFS_BLI_SHIFT 7 | 59 | #define XFS_BLF_SHIFT 7 |
60 | #define BIT_TO_WORD_SHIFT 5 | 60 | #define BIT_TO_WORD_SHIFT 5 |
61 | #define NBWORD (NBBY * sizeof(unsigned int)) | 61 | #define NBWORD (NBBY * sizeof(unsigned int)) |
62 | 62 | ||
@@ -69,6 +69,7 @@ typedef struct xfs_buf_log_format { | |||
69 | #define XFS_BLI_LOGGED 0x08 | 69 | #define XFS_BLI_LOGGED 0x08 |
70 | #define XFS_BLI_INODE_ALLOC_BUF 0x10 | 70 | #define XFS_BLI_INODE_ALLOC_BUF 0x10 |
71 | #define XFS_BLI_STALE_INODE 0x20 | 71 | #define XFS_BLI_STALE_INODE 0x20 |
72 | #define XFS_BLI_INODE_BUF 0x40 | ||
72 | 73 | ||
73 | #define XFS_BLI_FLAGS \ | 74 | #define XFS_BLI_FLAGS \ |
74 | { XFS_BLI_HOLD, "HOLD" }, \ | 75 | { XFS_BLI_HOLD, "HOLD" }, \ |
@@ -76,7 +77,8 @@ typedef struct xfs_buf_log_format { | |||
76 | { XFS_BLI_STALE, "STALE" }, \ | 77 | { XFS_BLI_STALE, "STALE" }, \ |
77 | { XFS_BLI_LOGGED, "LOGGED" }, \ | 78 | { XFS_BLI_LOGGED, "LOGGED" }, \ |
78 | { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ | 79 | { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ |
79 | { XFS_BLI_STALE_INODE, "STALE_INODE" } | 80 | { XFS_BLI_STALE_INODE, "STALE_INODE" }, \ |
81 | { XFS_BLI_INODE_BUF, "INODE_BUF" } | ||
80 | 82 | ||
81 | 83 | ||
82 | #ifdef __KERNEL__ | 84 | #ifdef __KERNEL__ |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index ef96175c0744..047b8a8e5c29 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -170,7 +170,7 @@ xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...) | |||
170 | va_list ap; | 170 | va_list ap; |
171 | 171 | ||
172 | #ifdef DEBUG | 172 | #ifdef DEBUG |
173 | xfs_panic_mask |= XFS_PTAG_SHUTDOWN_CORRUPT; | 173 | xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); |
174 | #endif | 174 | #endif |
175 | 175 | ||
176 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag) | 176 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag) |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 3038dd52c72a..5215abc8023a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -54,9 +54,6 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, | |||
54 | STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); | 54 | STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); |
55 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); | 55 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); |
56 | STATIC void xlog_dealloc_log(xlog_t *log); | 56 | STATIC void xlog_dealloc_log(xlog_t *log); |
57 | STATIC int xlog_write(struct log *log, struct xfs_log_vec *log_vector, | ||
58 | struct xlog_ticket *tic, xfs_lsn_t *start_lsn, | ||
59 | xlog_in_core_t **commit_iclog, uint flags); | ||
60 | 57 | ||
61 | /* local state machine functions */ | 58 | /* local state machine functions */ |
62 | STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); | 59 | STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); |
@@ -86,14 +83,6 @@ STATIC int xlog_regrant_write_log_space(xlog_t *log, | |||
86 | STATIC void xlog_ungrant_log_space(xlog_t *log, | 83 | STATIC void xlog_ungrant_log_space(xlog_t *log, |
87 | xlog_ticket_t *ticket); | 84 | xlog_ticket_t *ticket); |
88 | 85 | ||
89 | |||
90 | /* local ticket functions */ | ||
91 | STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log, | ||
92 | int unit_bytes, | ||
93 | int count, | ||
94 | char clientid, | ||
95 | uint flags); | ||
96 | |||
97 | #if defined(DEBUG) | 86 | #if defined(DEBUG) |
98 | STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); | 87 | STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); |
99 | STATIC void xlog_verify_grant_head(xlog_t *log, int equals); | 88 | STATIC void xlog_verify_grant_head(xlog_t *log, int equals); |
@@ -360,6 +349,15 @@ xfs_log_reserve( | |||
360 | ASSERT(flags & XFS_LOG_PERM_RESERV); | 349 | ASSERT(flags & XFS_LOG_PERM_RESERV); |
361 | internal_ticket = *ticket; | 350 | internal_ticket = *ticket; |
362 | 351 | ||
352 | /* | ||
353 | * this is a new transaction on the ticket, so we need to | ||
354 | * change the transaction ID so that the next transaction has a | ||
355 | * different TID in the log. Just add one to the existing tid | ||
356 | * so that we can see chains of rolling transactions in the log | ||
357 | * easily. | ||
358 | */ | ||
359 | internal_ticket->t_tid++; | ||
360 | |||
363 | trace_xfs_log_reserve(log, internal_ticket); | 361 | trace_xfs_log_reserve(log, internal_ticket); |
364 | 362 | ||
365 | xlog_grant_push_ail(mp, internal_ticket->t_unit_res); | 363 | xlog_grant_push_ail(mp, internal_ticket->t_unit_res); |
@@ -367,7 +365,8 @@ xfs_log_reserve( | |||
367 | } else { | 365 | } else { |
368 | /* may sleep if need to allocate more tickets */ | 366 | /* may sleep if need to allocate more tickets */ |
369 | internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt, | 367 | internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt, |
370 | client, flags); | 368 | client, flags, |
369 | KM_SLEEP|KM_MAYFAIL); | ||
371 | if (!internal_ticket) | 370 | if (!internal_ticket) |
372 | return XFS_ERROR(ENOMEM); | 371 | return XFS_ERROR(ENOMEM); |
373 | internal_ticket->t_trans_type = t_type; | 372 | internal_ticket->t_trans_type = t_type; |
@@ -452,6 +451,13 @@ xfs_log_mount( | |||
452 | /* Normal transactions can now occur */ | 451 | /* Normal transactions can now occur */ |
453 | mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; | 452 | mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; |
454 | 453 | ||
454 | /* | ||
455 | * Now the log has been fully initialised and we know were our | ||
456 | * space grant counters are, we can initialise the permanent ticket | ||
457 | * needed for delayed logging to work. | ||
458 | */ | ||
459 | xlog_cil_init_post_recovery(mp->m_log); | ||
460 | |||
455 | return 0; | 461 | return 0; |
456 | 462 | ||
457 | out_destroy_ail: | 463 | out_destroy_ail: |
@@ -658,6 +664,10 @@ xfs_log_item_init( | |||
658 | item->li_ailp = mp->m_ail; | 664 | item->li_ailp = mp->m_ail; |
659 | item->li_type = type; | 665 | item->li_type = type; |
660 | item->li_ops = ops; | 666 | item->li_ops = ops; |
667 | item->li_lv = NULL; | ||
668 | |||
669 | INIT_LIST_HEAD(&item->li_ail); | ||
670 | INIT_LIST_HEAD(&item->li_cil); | ||
661 | } | 671 | } |
662 | 672 | ||
663 | /* | 673 | /* |
@@ -1168,6 +1178,9 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1168 | *iclogp = log->l_iclog; /* complete ring */ | 1178 | *iclogp = log->l_iclog; /* complete ring */ |
1169 | log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ | 1179 | log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ |
1170 | 1180 | ||
1181 | error = xlog_cil_init(log); | ||
1182 | if (error) | ||
1183 | goto out_free_iclog; | ||
1171 | return log; | 1184 | return log; |
1172 | 1185 | ||
1173 | out_free_iclog: | 1186 | out_free_iclog: |
@@ -1494,6 +1507,8 @@ xlog_dealloc_log(xlog_t *log) | |||
1494 | xlog_in_core_t *iclog, *next_iclog; | 1507 | xlog_in_core_t *iclog, *next_iclog; |
1495 | int i; | 1508 | int i; |
1496 | 1509 | ||
1510 | xlog_cil_destroy(log); | ||
1511 | |||
1497 | iclog = log->l_iclog; | 1512 | iclog = log->l_iclog; |
1498 | for (i=0; i<log->l_iclog_bufs; i++) { | 1513 | for (i=0; i<log->l_iclog_bufs; i++) { |
1499 | sv_destroy(&iclog->ic_force_wait); | 1514 | sv_destroy(&iclog->ic_force_wait); |
@@ -1536,8 +1551,10 @@ xlog_state_finish_copy(xlog_t *log, | |||
1536 | * print out info relating to regions written which consume | 1551 | * print out info relating to regions written which consume |
1537 | * the reservation | 1552 | * the reservation |
1538 | */ | 1553 | */ |
1539 | STATIC void | 1554 | void |
1540 | xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) | 1555 | xlog_print_tic_res( |
1556 | struct xfs_mount *mp, | ||
1557 | struct xlog_ticket *ticket) | ||
1541 | { | 1558 | { |
1542 | uint i; | 1559 | uint i; |
1543 | uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); | 1560 | uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); |
@@ -1637,6 +1654,10 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) | |||
1637 | "bad-rtype" : res_type_str[r_type-1]), | 1654 | "bad-rtype" : res_type_str[r_type-1]), |
1638 | ticket->t_res_arr[i].r_len); | 1655 | ticket->t_res_arr[i].r_len); |
1639 | } | 1656 | } |
1657 | |||
1658 | xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, | ||
1659 | "xfs_log_write: reservation ran out. Need to up reservation"); | ||
1660 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
1640 | } | 1661 | } |
1641 | 1662 | ||
1642 | /* | 1663 | /* |
@@ -1865,7 +1886,7 @@ xlog_write_copy_finish( | |||
1865 | * we don't update ic_offset until the end when we know exactly how many | 1886 | * we don't update ic_offset until the end when we know exactly how many |
1866 | * bytes have been written out. | 1887 | * bytes have been written out. |
1867 | */ | 1888 | */ |
1868 | STATIC int | 1889 | int |
1869 | xlog_write( | 1890 | xlog_write( |
1870 | struct log *log, | 1891 | struct log *log, |
1871 | struct xfs_log_vec *log_vector, | 1892 | struct xfs_log_vec *log_vector, |
@@ -1889,22 +1910,26 @@ xlog_write( | |||
1889 | *start_lsn = 0; | 1910 | *start_lsn = 0; |
1890 | 1911 | ||
1891 | len = xlog_write_calc_vec_length(ticket, log_vector); | 1912 | len = xlog_write_calc_vec_length(ticket, log_vector); |
1892 | if (ticket->t_curr_res < len) { | 1913 | if (log->l_cilp) { |
1893 | xlog_print_tic_res(log->l_mp, ticket); | 1914 | /* |
1894 | #ifdef DEBUG | 1915 | * Region headers and bytes are already accounted for. |
1895 | xlog_panic( | 1916 | * We only need to take into account start records and |
1896 | "xfs_log_write: reservation ran out. Need to up reservation"); | 1917 | * split regions in this function. |
1897 | #else | 1918 | */ |
1898 | /* Customer configurable panic */ | 1919 | if (ticket->t_flags & XLOG_TIC_INITED) |
1899 | xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, log->l_mp, | 1920 | ticket->t_curr_res -= sizeof(xlog_op_header_t); |
1900 | "xfs_log_write: reservation ran out. Need to up reservation"); | ||
1901 | 1921 | ||
1902 | /* If we did not panic, shutdown the filesystem */ | 1922 | /* |
1903 | xfs_force_shutdown(log->l_mp, SHUTDOWN_CORRUPT_INCORE); | 1923 | * Commit record headers need to be accounted for. These |
1904 | #endif | 1924 | * come in as separate writes so are easy to detect. |
1905 | } | 1925 | */ |
1926 | if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) | ||
1927 | ticket->t_curr_res -= sizeof(xlog_op_header_t); | ||
1928 | } else | ||
1929 | ticket->t_curr_res -= len; | ||
1906 | 1930 | ||
1907 | ticket->t_curr_res -= len; | 1931 | if (ticket->t_curr_res < 0) |
1932 | xlog_print_tic_res(log->l_mp, ticket); | ||
1908 | 1933 | ||
1909 | index = 0; | 1934 | index = 0; |
1910 | lv = log_vector; | 1935 | lv = log_vector; |
@@ -3000,6 +3025,8 @@ _xfs_log_force( | |||
3000 | 3025 | ||
3001 | XFS_STATS_INC(xs_log_force); | 3026 | XFS_STATS_INC(xs_log_force); |
3002 | 3027 | ||
3028 | xlog_cil_push(log, 1); | ||
3029 | |||
3003 | spin_lock(&log->l_icloglock); | 3030 | spin_lock(&log->l_icloglock); |
3004 | 3031 | ||
3005 | iclog = log->l_iclog; | 3032 | iclog = log->l_iclog; |
@@ -3149,6 +3176,12 @@ _xfs_log_force_lsn( | |||
3149 | 3176 | ||
3150 | XFS_STATS_INC(xs_log_force); | 3177 | XFS_STATS_INC(xs_log_force); |
3151 | 3178 | ||
3179 | if (log->l_cilp) { | ||
3180 | lsn = xlog_cil_push_lsn(log, lsn); | ||
3181 | if (lsn == NULLCOMMITLSN) | ||
3182 | return 0; | ||
3183 | } | ||
3184 | |||
3152 | try_again: | 3185 | try_again: |
3153 | spin_lock(&log->l_icloglock); | 3186 | spin_lock(&log->l_icloglock); |
3154 | iclog = log->l_iclog; | 3187 | iclog = log->l_iclog; |
@@ -3313,22 +3346,30 @@ xfs_log_ticket_get( | |||
3313 | return ticket; | 3346 | return ticket; |
3314 | } | 3347 | } |
3315 | 3348 | ||
3349 | xlog_tid_t | ||
3350 | xfs_log_get_trans_ident( | ||
3351 | struct xfs_trans *tp) | ||
3352 | { | ||
3353 | return tp->t_ticket->t_tid; | ||
3354 | } | ||
3355 | |||
3316 | /* | 3356 | /* |
3317 | * Allocate and initialise a new log ticket. | 3357 | * Allocate and initialise a new log ticket. |
3318 | */ | 3358 | */ |
3319 | STATIC xlog_ticket_t * | 3359 | xlog_ticket_t * |
3320 | xlog_ticket_alloc( | 3360 | xlog_ticket_alloc( |
3321 | struct log *log, | 3361 | struct log *log, |
3322 | int unit_bytes, | 3362 | int unit_bytes, |
3323 | int cnt, | 3363 | int cnt, |
3324 | char client, | 3364 | char client, |
3325 | uint xflags) | 3365 | uint xflags, |
3366 | int alloc_flags) | ||
3326 | { | 3367 | { |
3327 | struct xlog_ticket *tic; | 3368 | struct xlog_ticket *tic; |
3328 | uint num_headers; | 3369 | uint num_headers; |
3329 | int iclog_space; | 3370 | int iclog_space; |
3330 | 3371 | ||
3331 | tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL); | 3372 | tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags); |
3332 | if (!tic) | 3373 | if (!tic) |
3333 | return NULL; | 3374 | return NULL; |
3334 | 3375 | ||
@@ -3647,6 +3688,11 @@ xlog_state_ioerror( | |||
3647 | * c. nothing new gets queued up after (a) and (b) are done. | 3688 | * c. nothing new gets queued up after (a) and (b) are done. |
3648 | * d. if !logerror, flush the iclogs to disk, then seal them off | 3689 | * d. if !logerror, flush the iclogs to disk, then seal them off |
3649 | * for business. | 3690 | * for business. |
3691 | * | ||
3692 | * Note: for delayed logging the !logerror case needs to flush the regions | ||
3693 | * held in memory out to the iclogs before flushing them to disk. This needs | ||
3694 | * to be done before the log is marked as shutdown, otherwise the flush to the | ||
3695 | * iclogs will fail. | ||
3650 | */ | 3696 | */ |
3651 | int | 3697 | int |
3652 | xfs_log_force_umount( | 3698 | xfs_log_force_umount( |
@@ -3680,6 +3726,16 @@ xfs_log_force_umount( | |||
3680 | return 1; | 3726 | return 1; |
3681 | } | 3727 | } |
3682 | retval = 0; | 3728 | retval = 0; |
3729 | |||
3730 | /* | ||
3731 | * Flush the in memory commit item list before marking the log as | ||
3732 | * being shut down. We need to do it in this order to ensure all the | ||
3733 | * completed transactions are flushed to disk with the xfs_log_force() | ||
3734 | * call below. | ||
3735 | */ | ||
3736 | if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) | ||
3737 | xlog_cil_push(log, 1); | ||
3738 | |||
3683 | /* | 3739 | /* |
3684 | * We must hold both the GRANT lock and the LOG lock, | 3740 | * We must hold both the GRANT lock and the LOG lock, |
3685 | * before we mark the filesystem SHUTDOWN and wake | 3741 | * before we mark the filesystem SHUTDOWN and wake |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 229d1f36ba9a..04c78e642cc8 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -19,7 +19,6 @@ | |||
19 | #define __XFS_LOG_H__ | 19 | #define __XFS_LOG_H__ |
20 | 20 | ||
21 | /* get lsn fields */ | 21 | /* get lsn fields */ |
22 | |||
23 | #define CYCLE_LSN(lsn) ((uint)((lsn)>>32)) | 22 | #define CYCLE_LSN(lsn) ((uint)((lsn)>>32)) |
24 | #define BLOCK_LSN(lsn) ((uint)(lsn)) | 23 | #define BLOCK_LSN(lsn) ((uint)(lsn)) |
25 | 24 | ||
@@ -114,6 +113,9 @@ struct xfs_log_vec { | |||
114 | struct xfs_log_vec *lv_next; /* next lv in build list */ | 113 | struct xfs_log_vec *lv_next; /* next lv in build list */ |
115 | int lv_niovecs; /* number of iovecs in lv */ | 114 | int lv_niovecs; /* number of iovecs in lv */ |
116 | struct xfs_log_iovec *lv_iovecp; /* iovec array */ | 115 | struct xfs_log_iovec *lv_iovecp; /* iovec array */ |
116 | struct xfs_log_item *lv_item; /* owner */ | ||
117 | char *lv_buf; /* formatted buffer */ | ||
118 | int lv_buf_len; /* size of formatted buffer */ | ||
117 | }; | 119 | }; |
118 | 120 | ||
119 | /* | 121 | /* |
@@ -134,6 +136,7 @@ struct xlog_in_core; | |||
134 | struct xlog_ticket; | 136 | struct xlog_ticket; |
135 | struct xfs_log_item; | 137 | struct xfs_log_item; |
136 | struct xfs_item_ops; | 138 | struct xfs_item_ops; |
139 | struct xfs_trans; | ||
137 | 140 | ||
138 | void xfs_log_item_init(struct xfs_mount *mp, | 141 | void xfs_log_item_init(struct xfs_mount *mp, |
139 | struct xfs_log_item *item, | 142 | struct xfs_log_item *item, |
@@ -187,9 +190,16 @@ int xfs_log_need_covered(struct xfs_mount *mp); | |||
187 | 190 | ||
188 | void xlog_iodone(struct xfs_buf *); | 191 | void xlog_iodone(struct xfs_buf *); |
189 | 192 | ||
190 | struct xlog_ticket * xfs_log_ticket_get(struct xlog_ticket *ticket); | 193 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); |
191 | void xfs_log_ticket_put(struct xlog_ticket *ticket); | 194 | void xfs_log_ticket_put(struct xlog_ticket *ticket); |
192 | 195 | ||
196 | xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); | ||
197 | |||
198 | int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, | ||
199 | struct xfs_log_vec *log_vector, | ||
200 | xfs_lsn_t *commit_lsn, int flags); | ||
201 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); | ||
202 | |||
193 | #endif | 203 | #endif |
194 | 204 | ||
195 | 205 | ||
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c new file mode 100644 index 000000000000..bb17cc044bf3 --- /dev/null +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -0,0 +1,725 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2010 Red Hat, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write the Free Software Foundation, | ||
15 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
16 | */ | ||
17 | |||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_types.h" | ||
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | ||
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | ||
25 | #include "xfs_trans_priv.h" | ||
26 | #include "xfs_log_priv.h" | ||
27 | #include "xfs_sb.h" | ||
28 | #include "xfs_ag.h" | ||
29 | #include "xfs_dir2.h" | ||
30 | #include "xfs_dmapi.h" | ||
31 | #include "xfs_mount.h" | ||
32 | #include "xfs_error.h" | ||
33 | #include "xfs_alloc.h" | ||
34 | |||
35 | /* | ||
36 | * Perform initial CIL structure initialisation. If the CIL is not | ||
37 | * enabled in this filesystem, ensure the log->l_cilp is null so | ||
38 | * we can check this conditional to determine if we are doing delayed | ||
39 | * logging or not. | ||
40 | */ | ||
41 | int | ||
42 | xlog_cil_init( | ||
43 | struct log *log) | ||
44 | { | ||
45 | struct xfs_cil *cil; | ||
46 | struct xfs_cil_ctx *ctx; | ||
47 | |||
48 | log->l_cilp = NULL; | ||
49 | if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG)) | ||
50 | return 0; | ||
51 | |||
52 | cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); | ||
53 | if (!cil) | ||
54 | return ENOMEM; | ||
55 | |||
56 | ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL); | ||
57 | if (!ctx) { | ||
58 | kmem_free(cil); | ||
59 | return ENOMEM; | ||
60 | } | ||
61 | |||
62 | INIT_LIST_HEAD(&cil->xc_cil); | ||
63 | INIT_LIST_HEAD(&cil->xc_committing); | ||
64 | spin_lock_init(&cil->xc_cil_lock); | ||
65 | init_rwsem(&cil->xc_ctx_lock); | ||
66 | sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait"); | ||
67 | |||
68 | INIT_LIST_HEAD(&ctx->committing); | ||
69 | INIT_LIST_HEAD(&ctx->busy_extents); | ||
70 | ctx->sequence = 1; | ||
71 | ctx->cil = cil; | ||
72 | cil->xc_ctx = ctx; | ||
73 | |||
74 | cil->xc_log = log; | ||
75 | log->l_cilp = cil; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | void | ||
80 | xlog_cil_destroy( | ||
81 | struct log *log) | ||
82 | { | ||
83 | if (!log->l_cilp) | ||
84 | return; | ||
85 | |||
86 | if (log->l_cilp->xc_ctx) { | ||
87 | if (log->l_cilp->xc_ctx->ticket) | ||
88 | xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket); | ||
89 | kmem_free(log->l_cilp->xc_ctx); | ||
90 | } | ||
91 | |||
92 | ASSERT(list_empty(&log->l_cilp->xc_cil)); | ||
93 | kmem_free(log->l_cilp); | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * Allocate a new ticket. Failing to get a new ticket makes it really hard to | ||
98 | * recover, so we don't allow failure here. Also, we allocate in a context that | ||
99 | * we don't want to be issuing transactions from, so we need to tell the | ||
100 | * allocation code this as well. | ||
101 | * | ||
102 | * We don't reserve any space for the ticket - we are going to steal whatever | ||
103 | * space we require from transactions as they commit. To ensure we reserve all | ||
104 | * the space required, we need to set the current reservation of the ticket to | ||
105 | * zero so that we know to steal the initial transaction overhead from the | ||
106 | * first transaction commit. | ||
107 | */ | ||
108 | static struct xlog_ticket * | ||
109 | xlog_cil_ticket_alloc( | ||
110 | struct log *log) | ||
111 | { | ||
112 | struct xlog_ticket *tic; | ||
113 | |||
114 | tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0, | ||
115 | KM_SLEEP|KM_NOFS); | ||
116 | tic->t_trans_type = XFS_TRANS_CHECKPOINT; | ||
117 | |||
118 | /* | ||
119 | * set the current reservation to zero so we know to steal the basic | ||
120 | * transaction overhead reservation from the first transaction commit. | ||
121 | */ | ||
122 | tic->t_curr_res = 0; | ||
123 | return tic; | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * After the first stage of log recovery is done, we know where the head and | ||
128 | * tail of the log are. We need this log initialisation done before we can | ||
129 | * initialise the first CIL checkpoint context. | ||
130 | * | ||
131 | * Here we allocate a log ticket to track space usage during a CIL push. This | ||
132 | * ticket is passed to xlog_write() directly so that we don't slowly leak log | ||
133 | * space by failing to account for space used by log headers and additional | ||
134 | * region headers for split regions. | ||
135 | */ | ||
136 | void | ||
137 | xlog_cil_init_post_recovery( | ||
138 | struct log *log) | ||
139 | { | ||
140 | if (!log->l_cilp) | ||
141 | return; | ||
142 | |||
143 | log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); | ||
144 | log->l_cilp->xc_ctx->sequence = 1; | ||
145 | log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle, | ||
146 | log->l_curr_block); | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Insert the log item into the CIL and calculate the difference in space | ||
151 | * consumed by the item. Add the space to the checkpoint ticket and calculate | ||
152 | * if the change requires additional log metadata. If it does, take that space | ||
153 | * as well. Remove the amount of space we addded to the checkpoint ticket from | ||
154 | * the current transaction ticket so that the accounting works out correctly. | ||
155 | * | ||
156 | * If this is the first time the item is being placed into the CIL in this | ||
157 | * context, pin it so it can't be written to disk until the CIL is flushed to | ||
158 | * the iclog and the iclog written to disk. | ||
159 | */ | ||
160 | static void | ||
161 | xlog_cil_insert( | ||
162 | struct log *log, | ||
163 | struct xlog_ticket *ticket, | ||
164 | struct xfs_log_item *item, | ||
165 | struct xfs_log_vec *lv) | ||
166 | { | ||
167 | struct xfs_cil *cil = log->l_cilp; | ||
168 | struct xfs_log_vec *old = lv->lv_item->li_lv; | ||
169 | struct xfs_cil_ctx *ctx = cil->xc_ctx; | ||
170 | int len; | ||
171 | int diff_iovecs; | ||
172 | int iclog_space; | ||
173 | |||
174 | if (old) { | ||
175 | /* existing lv on log item, space used is a delta */ | ||
176 | ASSERT(!list_empty(&item->li_cil)); | ||
177 | ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); | ||
178 | |||
179 | len = lv->lv_buf_len - old->lv_buf_len; | ||
180 | diff_iovecs = lv->lv_niovecs - old->lv_niovecs; | ||
181 | kmem_free(old->lv_buf); | ||
182 | kmem_free(old); | ||
183 | } else { | ||
184 | /* new lv, must pin the log item */ | ||
185 | ASSERT(!lv->lv_item->li_lv); | ||
186 | ASSERT(list_empty(&item->li_cil)); | ||
187 | |||
188 | len = lv->lv_buf_len; | ||
189 | diff_iovecs = lv->lv_niovecs; | ||
190 | IOP_PIN(lv->lv_item); | ||
191 | |||
192 | } | ||
193 | len += diff_iovecs * sizeof(xlog_op_header_t); | ||
194 | |||
195 | /* attach new log vector to log item */ | ||
196 | lv->lv_item->li_lv = lv; | ||
197 | |||
198 | spin_lock(&cil->xc_cil_lock); | ||
199 | list_move_tail(&item->li_cil, &cil->xc_cil); | ||
200 | ctx->nvecs += diff_iovecs; | ||
201 | |||
202 | /* | ||
203 | * If this is the first time the item is being committed to the CIL, | ||
204 | * store the sequence number on the log item so we can tell | ||
205 | * in future commits whether this is the first checkpoint the item is | ||
206 | * being committed into. | ||
207 | */ | ||
208 | if (!item->li_seq) | ||
209 | item->li_seq = ctx->sequence; | ||
210 | |||
211 | /* | ||
212 | * Now transfer enough transaction reservation to the context ticket | ||
213 | * for the checkpoint. The context ticket is special - the unit | ||
214 | * reservation has to grow as well as the current reservation as we | ||
215 | * steal from tickets so we can correctly determine the space used | ||
216 | * during the transaction commit. | ||
217 | */ | ||
218 | if (ctx->ticket->t_curr_res == 0) { | ||
219 | /* first commit in checkpoint, steal the header reservation */ | ||
220 | ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len); | ||
221 | ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; | ||
222 | ticket->t_curr_res -= ctx->ticket->t_unit_res; | ||
223 | } | ||
224 | |||
225 | /* do we need space for more log record headers? */ | ||
226 | iclog_space = log->l_iclog_size - log->l_iclog_hsize; | ||
227 | if (len > 0 && (ctx->space_used / iclog_space != | ||
228 | (ctx->space_used + len) / iclog_space)) { | ||
229 | int hdrs; | ||
230 | |||
231 | hdrs = (len + iclog_space - 1) / iclog_space; | ||
232 | /* need to take into account split region headers, too */ | ||
233 | hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); | ||
234 | ctx->ticket->t_unit_res += hdrs; | ||
235 | ctx->ticket->t_curr_res += hdrs; | ||
236 | ticket->t_curr_res -= hdrs; | ||
237 | ASSERT(ticket->t_curr_res >= len); | ||
238 | } | ||
239 | ticket->t_curr_res -= len; | ||
240 | ctx->space_used += len; | ||
241 | |||
242 | spin_unlock(&cil->xc_cil_lock); | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Format log item into a flat buffers | ||
247 | * | ||
248 | * For delayed logging, we need to hold a formatted buffer containing all the | ||
249 | * changes on the log item. This enables us to relog the item in memory and | ||
250 | * write it out asynchronously without needing to relock the object that was | ||
251 | * modified at the time it gets written into the iclog. | ||
252 | * | ||
253 | * This function builds a vector for the changes in each log item in the | ||
254 | * transaction. It then works out the length of the buffer needed for each log | ||
255 | * item, allocates them and formats the vector for the item into the buffer. | ||
256 | * The buffer is then attached to the log item are then inserted into the | ||
257 | * Committed Item List for tracking until the next checkpoint is written out. | ||
258 | * | ||
259 | * We don't set up region headers during this process; we simply copy the | ||
260 | * regions into the flat buffer. We can do this because we still have to do a | ||
261 | * formatting step to write the regions into the iclog buffer. Writing the | ||
262 | * ophdrs during the iclog write means that we can support splitting large | ||
263 | * regions across iclog boundares without needing a change in the format of the | ||
264 | * item/region encapsulation. | ||
265 | * | ||
266 | * Hence what we need to do now is change the rewrite the vector array to point | ||
267 | * to the copied region inside the buffer we just allocated. This allows us to | ||
268 | * format the regions into the iclog as though they are being formatted | ||
269 | * directly out of the objects themselves. | ||
270 | */ | ||
271 | static void | ||
272 | xlog_cil_format_items( | ||
273 | struct log *log, | ||
274 | struct xfs_log_vec *log_vector, | ||
275 | struct xlog_ticket *ticket, | ||
276 | xfs_lsn_t *start_lsn) | ||
277 | { | ||
278 | struct xfs_log_vec *lv; | ||
279 | |||
280 | if (start_lsn) | ||
281 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
282 | |||
283 | ASSERT(log_vector); | ||
284 | for (lv = log_vector; lv; lv = lv->lv_next) { | ||
285 | void *ptr; | ||
286 | int index; | ||
287 | int len = 0; | ||
288 | |||
289 | /* build the vector array and calculate it's length */ | ||
290 | IOP_FORMAT(lv->lv_item, lv->lv_iovecp); | ||
291 | for (index = 0; index < lv->lv_niovecs; index++) | ||
292 | len += lv->lv_iovecp[index].i_len; | ||
293 | |||
294 | lv->lv_buf_len = len; | ||
295 | lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); | ||
296 | ptr = lv->lv_buf; | ||
297 | |||
298 | for (index = 0; index < lv->lv_niovecs; index++) { | ||
299 | struct xfs_log_iovec *vec = &lv->lv_iovecp[index]; | ||
300 | |||
301 | memcpy(ptr, vec->i_addr, vec->i_len); | ||
302 | vec->i_addr = ptr; | ||
303 | ptr += vec->i_len; | ||
304 | } | ||
305 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); | ||
306 | |||
307 | xlog_cil_insert(log, ticket, lv->lv_item, lv); | ||
308 | } | ||
309 | } | ||
310 | |||
311 | static void | ||
312 | xlog_cil_free_logvec( | ||
313 | struct xfs_log_vec *log_vector) | ||
314 | { | ||
315 | struct xfs_log_vec *lv; | ||
316 | |||
317 | for (lv = log_vector; lv; ) { | ||
318 | struct xfs_log_vec *next = lv->lv_next; | ||
319 | kmem_free(lv->lv_buf); | ||
320 | kmem_free(lv); | ||
321 | lv = next; | ||
322 | } | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Commit a transaction with the given vector to the Committed Item List. | ||
327 | * | ||
328 | * To do this, we need to format the item, pin it in memory if required and | ||
329 | * account for the space used by the transaction. Once we have done that we | ||
330 | * need to release the unused reservation for the transaction, attach the | ||
331 | * transaction to the checkpoint context so we carry the busy extents through | ||
332 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
333 | * | ||
334 | * For more specific information about the order of operations in | ||
335 | * xfs_log_commit_cil() please refer to the comments in | ||
336 | * xfs_trans_commit_iclog(). | ||
337 | * | ||
338 | * Called with the context lock already held in read mode to lock out | ||
339 | * background commit, returns without it held once background commits are | ||
340 | * allowed again. | ||
341 | */ | ||
342 | int | ||
343 | xfs_log_commit_cil( | ||
344 | struct xfs_mount *mp, | ||
345 | struct xfs_trans *tp, | ||
346 | struct xfs_log_vec *log_vector, | ||
347 | xfs_lsn_t *commit_lsn, | ||
348 | int flags) | ||
349 | { | ||
350 | struct log *log = mp->m_log; | ||
351 | int log_flags = 0; | ||
352 | int push = 0; | ||
353 | |||
354 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
355 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
356 | |||
357 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
358 | xlog_cil_free_logvec(log_vector); | ||
359 | return XFS_ERROR(EIO); | ||
360 | } | ||
361 | |||
362 | /* lock out background commit */ | ||
363 | down_read(&log->l_cilp->xc_ctx_lock); | ||
364 | xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
365 | |||
366 | /* check we didn't blow the reservation */ | ||
367 | if (tp->t_ticket->t_curr_res < 0) | ||
368 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
369 | |||
370 | /* attach the transaction to the CIL if it has any busy extents */ | ||
371 | if (!list_empty(&tp->t_busy)) { | ||
372 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
373 | list_splice_init(&tp->t_busy, | ||
374 | &log->l_cilp->xc_ctx->busy_extents); | ||
375 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
376 | } | ||
377 | |||
378 | tp->t_commit_lsn = *commit_lsn; | ||
379 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
380 | xfs_trans_unreserve_and_mod_sb(tp); | ||
381 | |||
382 | /* check for background commit before unlock */ | ||
383 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
384 | push = 1; | ||
385 | up_read(&log->l_cilp->xc_ctx_lock); | ||
386 | |||
387 | /* | ||
388 | * We need to push CIL every so often so we don't cache more than we | ||
389 | * can fit in the log. The limit really is that a checkpoint can't be | ||
390 | * more than half the log (the current checkpoint is not allowed to | ||
391 | * overwrite the previous checkpoint), but commit latency and memory | ||
392 | * usage limit this to a smaller size in most cases. | ||
393 | */ | ||
394 | if (push) | ||
395 | xlog_cil_push(log, 0); | ||
396 | return 0; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Mark all items committed and clear busy extents. We free the log vector | ||
401 | * chains in a separate pass so that we unpin the log items as quickly as | ||
402 | * possible. | ||
403 | */ | ||
404 | static void | ||
405 | xlog_cil_committed( | ||
406 | void *args, | ||
407 | int abort) | ||
408 | { | ||
409 | struct xfs_cil_ctx *ctx = args; | ||
410 | struct xfs_log_vec *lv; | ||
411 | int abortflag = abort ? XFS_LI_ABORTED : 0; | ||
412 | struct xfs_busy_extent *busyp, *n; | ||
413 | |||
414 | /* unpin all the log items */ | ||
415 | for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) { | ||
416 | xfs_trans_item_committed(lv->lv_item, ctx->start_lsn, | ||
417 | abortflag); | ||
418 | } | ||
419 | |||
420 | list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) | ||
421 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); | ||
422 | |||
423 | spin_lock(&ctx->cil->xc_cil_lock); | ||
424 | list_del(&ctx->committing); | ||
425 | spin_unlock(&ctx->cil->xc_cil_lock); | ||
426 | |||
427 | xlog_cil_free_logvec(ctx->lv_chain); | ||
428 | kmem_free(ctx); | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * Push the Committed Item List to the log. If the push_now flag is not set, | ||
433 | * then it is a background flush and so we can chose to ignore it. | ||
434 | */ | ||
435 | int | ||
436 | xlog_cil_push( | ||
437 | struct log *log, | ||
438 | int push_now) | ||
439 | { | ||
440 | struct xfs_cil *cil = log->l_cilp; | ||
441 | struct xfs_log_vec *lv; | ||
442 | struct xfs_cil_ctx *ctx; | ||
443 | struct xfs_cil_ctx *new_ctx; | ||
444 | struct xlog_in_core *commit_iclog; | ||
445 | struct xlog_ticket *tic; | ||
446 | int num_lv; | ||
447 | int num_iovecs; | ||
448 | int len; | ||
449 | int error = 0; | ||
450 | struct xfs_trans_header thdr; | ||
451 | struct xfs_log_iovec lhdr; | ||
452 | struct xfs_log_vec lvhdr = { NULL }; | ||
453 | xfs_lsn_t commit_lsn; | ||
454 | |||
455 | if (!cil) | ||
456 | return 0; | ||
457 | |||
458 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | ||
459 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | ||
460 | |||
461 | /* lock out transaction commit, but don't block on background push */ | ||
462 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | ||
463 | if (!push_now) | ||
464 | goto out_free_ticket; | ||
465 | down_write(&cil->xc_ctx_lock); | ||
466 | } | ||
467 | ctx = cil->xc_ctx; | ||
468 | |||
469 | /* check if we've anything to push */ | ||
470 | if (list_empty(&cil->xc_cil)) | ||
471 | goto out_skip; | ||
472 | |||
473 | /* check for spurious background flush */ | ||
474 | if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | ||
475 | goto out_skip; | ||
476 | |||
477 | /* | ||
478 | * pull all the log vectors off the items in the CIL, and | ||
479 | * remove the items from the CIL. We don't need the CIL lock | ||
480 | * here because it's only needed on the transaction commit | ||
481 | * side which is currently locked out by the flush lock. | ||
482 | */ | ||
483 | lv = NULL; | ||
484 | num_lv = 0; | ||
485 | num_iovecs = 0; | ||
486 | len = 0; | ||
487 | while (!list_empty(&cil->xc_cil)) { | ||
488 | struct xfs_log_item *item; | ||
489 | int i; | ||
490 | |||
491 | item = list_first_entry(&cil->xc_cil, | ||
492 | struct xfs_log_item, li_cil); | ||
493 | list_del_init(&item->li_cil); | ||
494 | if (!ctx->lv_chain) | ||
495 | ctx->lv_chain = item->li_lv; | ||
496 | else | ||
497 | lv->lv_next = item->li_lv; | ||
498 | lv = item->li_lv; | ||
499 | item->li_lv = NULL; | ||
500 | |||
501 | num_lv++; | ||
502 | num_iovecs += lv->lv_niovecs; | ||
503 | for (i = 0; i < lv->lv_niovecs; i++) | ||
504 | len += lv->lv_iovecp[i].i_len; | ||
505 | } | ||
506 | |||
507 | /* | ||
508 | * initialise the new context and attach it to the CIL. Then attach | ||
509 | * the current context to the CIL committing lsit so it can be found | ||
510 | * during log forces to extract the commit lsn of the sequence that | ||
511 | * needs to be forced. | ||
512 | */ | ||
513 | INIT_LIST_HEAD(&new_ctx->committing); | ||
514 | INIT_LIST_HEAD(&new_ctx->busy_extents); | ||
515 | new_ctx->sequence = ctx->sequence + 1; | ||
516 | new_ctx->cil = cil; | ||
517 | cil->xc_ctx = new_ctx; | ||
518 | |||
519 | /* | ||
520 | * The switch is now done, so we can drop the context lock and move out | ||
521 | * of a shared context. We can't just go straight to the commit record, | ||
522 | * though - we need to synchronise with previous and future commits so | ||
523 | * that the commit records are correctly ordered in the log to ensure | ||
524 | * that we process items during log IO completion in the correct order. | ||
525 | * | ||
526 | * For example, if we get an EFI in one checkpoint and the EFD in the | ||
527 | * next (e.g. due to log forces), we do not want the checkpoint with | ||
528 | * the EFD to be committed before the checkpoint with the EFI. Hence | ||
529 | * we must strictly order the commit records of the checkpoints so | ||
530 | * that: a) the checkpoint callbacks are attached to the iclogs in the | ||
531 | * correct order; and b) the checkpoints are replayed in correct order | ||
532 | * in log recovery. | ||
533 | * | ||
534 | * Hence we need to add this context to the committing context list so | ||
535 | * that higher sequences will wait for us to write out a commit record | ||
536 | * before they do. | ||
537 | */ | ||
538 | spin_lock(&cil->xc_cil_lock); | ||
539 | list_add(&ctx->committing, &cil->xc_committing); | ||
540 | spin_unlock(&cil->xc_cil_lock); | ||
541 | up_write(&cil->xc_ctx_lock); | ||
542 | |||
543 | /* | ||
544 | * Build a checkpoint transaction header and write it to the log to | ||
545 | * begin the transaction. We need to account for the space used by the | ||
546 | * transaction header here as it is not accounted for in xlog_write(). | ||
547 | * | ||
548 | * The LSN we need to pass to the log items on transaction commit is | ||
549 | * the LSN reported by the first log vector write. If we use the commit | ||
550 | * record lsn then we can move the tail beyond the grant write head. | ||
551 | */ | ||
552 | tic = ctx->ticket; | ||
553 | thdr.th_magic = XFS_TRANS_HEADER_MAGIC; | ||
554 | thdr.th_type = XFS_TRANS_CHECKPOINT; | ||
555 | thdr.th_tid = tic->t_tid; | ||
556 | thdr.th_num_items = num_iovecs; | ||
557 | lhdr.i_addr = (xfs_caddr_t)&thdr; | ||
558 | lhdr.i_len = sizeof(xfs_trans_header_t); | ||
559 | lhdr.i_type = XLOG_REG_TYPE_TRANSHDR; | ||
560 | tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t); | ||
561 | |||
562 | lvhdr.lv_niovecs = 1; | ||
563 | lvhdr.lv_iovecp = &lhdr; | ||
564 | lvhdr.lv_next = ctx->lv_chain; | ||
565 | |||
566 | error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); | ||
567 | if (error) | ||
568 | goto out_abort; | ||
569 | |||
570 | /* | ||
571 | * now that we've written the checkpoint into the log, strictly | ||
572 | * order the commit records so replay will get them in the right order. | ||
573 | */ | ||
574 | restart: | ||
575 | spin_lock(&cil->xc_cil_lock); | ||
576 | list_for_each_entry(new_ctx, &cil->xc_committing, committing) { | ||
577 | /* | ||
578 | * Higher sequences will wait for this one so skip them. | ||
579 | * Don't wait for own own sequence, either. | ||
580 | */ | ||
581 | if (new_ctx->sequence >= ctx->sequence) | ||
582 | continue; | ||
583 | if (!new_ctx->commit_lsn) { | ||
584 | /* | ||
585 | * It is still being pushed! Wait for the push to | ||
586 | * complete, then start again from the beginning. | ||
587 | */ | ||
588 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | ||
589 | goto restart; | ||
590 | } | ||
591 | } | ||
592 | spin_unlock(&cil->xc_cil_lock); | ||
593 | |||
594 | commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); | ||
595 | if (error || commit_lsn == -1) | ||
596 | goto out_abort; | ||
597 | |||
598 | /* attach all the transactions w/ busy extents to iclog */ | ||
599 | ctx->log_cb.cb_func = xlog_cil_committed; | ||
600 | ctx->log_cb.cb_arg = ctx; | ||
601 | error = xfs_log_notify(log->l_mp, commit_iclog, &ctx->log_cb); | ||
602 | if (error) | ||
603 | goto out_abort; | ||
604 | |||
605 | /* | ||
606 | * now the checkpoint commit is complete and we've attached the | ||
607 | * callbacks to the iclog we can assign the commit LSN to the context | ||
608 | * and wake up anyone who is waiting for the commit to complete. | ||
609 | */ | ||
610 | spin_lock(&cil->xc_cil_lock); | ||
611 | ctx->commit_lsn = commit_lsn; | ||
612 | sv_broadcast(&cil->xc_commit_wait); | ||
613 | spin_unlock(&cil->xc_cil_lock); | ||
614 | |||
615 | /* release the hounds! */ | ||
616 | return xfs_log_release_iclog(log->l_mp, commit_iclog); | ||
617 | |||
618 | out_skip: | ||
619 | up_write(&cil->xc_ctx_lock); | ||
620 | out_free_ticket: | ||
621 | xfs_log_ticket_put(new_ctx->ticket); | ||
622 | kmem_free(new_ctx); | ||
623 | return 0; | ||
624 | |||
625 | out_abort: | ||
626 | xlog_cil_committed(ctx, XFS_LI_ABORTED); | ||
627 | return XFS_ERROR(EIO); | ||
628 | } | ||
629 | |||
630 | /* | ||
631 | * Conditionally push the CIL based on the sequence passed in. | ||
632 | * | ||
633 | * We only need to push if we haven't already pushed the sequence | ||
634 | * number given. Hence the only time we will trigger a push here is | ||
635 | * if the push sequence is the same as the current context. | ||
636 | * | ||
637 | * We return the current commit lsn to allow the callers to determine if a | ||
638 | * iclog flush is necessary following this call. | ||
639 | * | ||
640 | * XXX: Initially, just push the CIL unconditionally and return whatever | ||
641 | * commit lsn is there. It'll be empty, so this is broken for now. | ||
642 | */ | ||
643 | xfs_lsn_t | ||
644 | xlog_cil_push_lsn( | ||
645 | struct log *log, | ||
646 | xfs_lsn_t push_seq) | ||
647 | { | ||
648 | struct xfs_cil *cil = log->l_cilp; | ||
649 | struct xfs_cil_ctx *ctx; | ||
650 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; | ||
651 | |||
652 | restart: | ||
653 | down_write(&cil->xc_ctx_lock); | ||
654 | ASSERT(push_seq <= cil->xc_ctx->sequence); | ||
655 | |||
656 | /* check to see if we need to force out the current context */ | ||
657 | if (push_seq == cil->xc_ctx->sequence) { | ||
658 | up_write(&cil->xc_ctx_lock); | ||
659 | xlog_cil_push(log, 1); | ||
660 | goto restart; | ||
661 | } | ||
662 | |||
663 | /* | ||
664 | * See if we can find a previous sequence still committing. | ||
665 | * We can drop the flush lock as soon as we have the cil lock | ||
666 | * because we are now only comparing contexts protected by | ||
667 | * the cil lock. | ||
668 | * | ||
669 | * We need to wait for all previous sequence commits to complete | ||
670 | * before allowing the force of push_seq to go ahead. Hence block | ||
671 | * on commits for those as well. | ||
672 | */ | ||
673 | spin_lock(&cil->xc_cil_lock); | ||
674 | up_write(&cil->xc_ctx_lock); | ||
675 | list_for_each_entry(ctx, &cil->xc_committing, committing) { | ||
676 | if (ctx->sequence > push_seq) | ||
677 | continue; | ||
678 | if (!ctx->commit_lsn) { | ||
679 | /* | ||
680 | * It is still being pushed! Wait for the push to | ||
681 | * complete, then start again from the beginning. | ||
682 | */ | ||
683 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | ||
684 | goto restart; | ||
685 | } | ||
686 | if (ctx->sequence != push_seq) | ||
687 | continue; | ||
688 | /* found it! */ | ||
689 | commit_lsn = ctx->commit_lsn; | ||
690 | } | ||
691 | spin_unlock(&cil->xc_cil_lock); | ||
692 | return commit_lsn; | ||
693 | } | ||
694 | |||
695 | /* | ||
696 | * Check if the current log item was first committed in this sequence. | ||
697 | * We can't rely on just the log item being in the CIL, we have to check | ||
698 | * the recorded commit sequence number. | ||
699 | * | ||
700 | * Note: for this to be used in a non-racy manner, it has to be called with | ||
701 | * CIL flushing locked out. As a result, it should only be used during the | ||
702 | * transaction commit process when deciding what to format into the item. | ||
703 | */ | ||
704 | bool | ||
705 | xfs_log_item_in_current_chkpt( | ||
706 | struct xfs_log_item *lip) | ||
707 | { | ||
708 | struct xfs_cil_ctx *ctx; | ||
709 | |||
710 | if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG)) | ||
711 | return false; | ||
712 | if (list_empty(&lip->li_cil)) | ||
713 | return false; | ||
714 | |||
715 | ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; | ||
716 | |||
717 | /* | ||
718 | * li_seq is written on the first commit of a log item to record the | ||
719 | * first checkpoint it is written to. Hence if it is different to the | ||
720 | * current sequence, we're in a new checkpoint. | ||
721 | */ | ||
722 | if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0) | ||
723 | return false; | ||
724 | return true; | ||
725 | } | ||
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 9cf695154451..8c072618965c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -152,8 +152,6 @@ static inline uint xlog_get_client_id(__be32 i) | |||
152 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ | 152 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ |
153 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being | 153 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being |
154 | shutdown */ | 154 | shutdown */ |
155 | typedef __uint32_t xlog_tid_t; | ||
156 | |||
157 | 155 | ||
158 | #ifdef __KERNEL__ | 156 | #ifdef __KERNEL__ |
159 | /* | 157 | /* |
@@ -379,6 +377,99 @@ typedef struct xlog_in_core { | |||
379 | } xlog_in_core_t; | 377 | } xlog_in_core_t; |
380 | 378 | ||
381 | /* | 379 | /* |
380 | * The CIL context is used to aggregate per-transaction details as well be | ||
381 | * passed to the iclog for checkpoint post-commit processing. After being | ||
382 | * passed to the iclog, another context needs to be allocated for tracking the | ||
383 | * next set of transactions to be aggregated into a checkpoint. | ||
384 | */ | ||
385 | struct xfs_cil; | ||
386 | |||
387 | struct xfs_cil_ctx { | ||
388 | struct xfs_cil *cil; | ||
389 | xfs_lsn_t sequence; /* chkpt sequence # */ | ||
390 | xfs_lsn_t start_lsn; /* first LSN of chkpt commit */ | ||
391 | xfs_lsn_t commit_lsn; /* chkpt commit record lsn */ | ||
392 | struct xlog_ticket *ticket; /* chkpt ticket */ | ||
393 | int nvecs; /* number of regions */ | ||
394 | int space_used; /* aggregate size of regions */ | ||
395 | struct list_head busy_extents; /* busy extents in chkpt */ | ||
396 | struct xfs_log_vec *lv_chain; /* logvecs being pushed */ | ||
397 | xfs_log_callback_t log_cb; /* completion callback hook. */ | ||
398 | struct list_head committing; /* ctx committing list */ | ||
399 | }; | ||
400 | |||
401 | /* | ||
402 | * Committed Item List structure | ||
403 | * | ||
404 | * This structure is used to track log items that have been committed but not | ||
405 | * yet written into the log. It is used only when the delayed logging mount | ||
406 | * option is enabled. | ||
407 | * | ||
408 | * This structure tracks the list of committing checkpoint contexts so | ||
409 | * we can avoid the problem of having to hold out new transactions during a | ||
410 | * flush until we have a the commit record LSN of the checkpoint. We can | ||
411 | * traverse the list of committing contexts in xlog_cil_push_lsn() to find a | ||
412 | * sequence match and extract the commit LSN directly from there. If the | ||
413 | * checkpoint is still in the process of committing, we can block waiting for | ||
414 | * the commit LSN to be determined as well. This should make synchronous | ||
415 | * operations almost as efficient as the old logging methods. | ||
416 | */ | ||
417 | struct xfs_cil { | ||
418 | struct log *xc_log; | ||
419 | struct list_head xc_cil; | ||
420 | spinlock_t xc_cil_lock; | ||
421 | struct xfs_cil_ctx *xc_ctx; | ||
422 | struct rw_semaphore xc_ctx_lock; | ||
423 | struct list_head xc_committing; | ||
424 | sv_t xc_commit_wait; | ||
425 | }; | ||
426 | |||
427 | /* | ||
428 | * The amount of log space we should the CIL to aggregate is difficult to size. | ||
429 | * Whatever we chose we have to make we can get a reservation for the log space | ||
430 | * effectively, that it is large enough to capture sufficient relogging to | ||
431 | * reduce log buffer IO significantly, but it is not too large for the log or | ||
432 | * induces too much latency when writing out through the iclogs. We track both | ||
433 | * space consumed and the number of vectors in the checkpoint context, so we | ||
434 | * need to decide which to use for limiting. | ||
435 | * | ||
436 | * Every log buffer we write out during a push needs a header reserved, which | ||
437 | * is at least one sector and more for v2 logs. Hence we need a reservation of | ||
438 | * at least 512 bytes per 32k of log space just for the LR headers. That means | ||
439 | * 16KB of reservation per megabyte of delayed logging space we will consume, | ||
440 | * plus various headers. The number of headers will vary based on the num of | ||
441 | * io vectors, so limiting on a specific number of vectors is going to result | ||
442 | * in transactions of varying size. IOWs, it is more consistent to track and | ||
443 | * limit space consumed in the log rather than by the number of objects being | ||
444 | * logged in order to prevent checkpoint ticket overruns. | ||
445 | * | ||
446 | * Further, use of static reservations through the log grant mechanism is | ||
447 | * problematic. It introduces a lot of complexity (e.g. reserve grant vs write | ||
448 | * grant) and a significant deadlock potential because regranting write space | ||
449 | * can block on log pushes. Hence if we have to regrant log space during a log | ||
450 | * push, we can deadlock. | ||
451 | * | ||
452 | * However, we can avoid this by use of a dynamic "reservation stealing" | ||
453 | * technique during transaction commit whereby unused reservation space in the | ||
454 | * transaction ticket is transferred to the CIL ctx commit ticket to cover the | ||
455 | * space needed by the checkpoint transaction. This means that we never need to | ||
456 | * specifically reserve space for the CIL checkpoint transaction, nor do we | ||
457 | * need to regrant space once the checkpoint completes. This also means the | ||
458 | * checkpoint transaction ticket is specific to the checkpoint context, rather | ||
459 | * than the CIL itself. | ||
460 | * | ||
461 | * With dynamic reservations, we can basically make up arbitrary limits for the | ||
462 | * checkpoint size so long as they don't violate any other size rules. Hence | ||
463 | * the initial maximum size for the checkpoint transaction will be set to a | ||
464 | * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit | ||
465 | * right now based on the latency of writing out a large amount of data through | ||
466 | * the circular iclog buffers. | ||
467 | */ | ||
468 | |||
469 | #define XLOG_CIL_SPACE_LIMIT(log) \ | ||
470 | (min((log->l_logsize >> 2), (8 * 1024 * 1024))) | ||
471 | |||
472 | /* | ||
382 | * The reservation head lsn is not made up of a cycle number and block number. | 473 | * The reservation head lsn is not made up of a cycle number and block number. |
383 | * Instead, it uses a cycle number and byte number. Logs don't expect to | 474 | * Instead, it uses a cycle number and byte number. Logs don't expect to |
384 | * overflow 31 bits worth of byte offset, so using a byte number will mean | 475 | * overflow 31 bits worth of byte offset, so using a byte number will mean |
@@ -388,6 +479,7 @@ typedef struct log { | |||
388 | /* The following fields don't need locking */ | 479 | /* The following fields don't need locking */ |
389 | struct xfs_mount *l_mp; /* mount point */ | 480 | struct xfs_mount *l_mp; /* mount point */ |
390 | struct xfs_ail *l_ailp; /* AIL log is working with */ | 481 | struct xfs_ail *l_ailp; /* AIL log is working with */ |
482 | struct xfs_cil *l_cilp; /* CIL log is working with */ | ||
391 | struct xfs_buf *l_xbuf; /* extra buffer for log | 483 | struct xfs_buf *l_xbuf; /* extra buffer for log |
392 | * wrapping */ | 484 | * wrapping */ |
393 | struct xfs_buftarg *l_targ; /* buftarg of log */ | 485 | struct xfs_buftarg *l_targ; /* buftarg of log */ |
@@ -438,14 +530,17 @@ typedef struct log { | |||
438 | 530 | ||
439 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) | 531 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) |
440 | 532 | ||
441 | |||
442 | /* common routines */ | 533 | /* common routines */ |
443 | extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); | 534 | extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); |
444 | extern int xlog_recover(xlog_t *log); | 535 | extern int xlog_recover(xlog_t *log); |
445 | extern int xlog_recover_finish(xlog_t *log); | 536 | extern int xlog_recover_finish(xlog_t *log); |
446 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); | 537 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); |
447 | 538 | ||
448 | extern kmem_zone_t *xfs_log_ticket_zone; | 539 | extern kmem_zone_t *xfs_log_ticket_zone; |
540 | struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, | ||
541 | int count, char client, uint xflags, | ||
542 | int alloc_flags); | ||
543 | |||
449 | 544 | ||
450 | static inline void | 545 | static inline void |
451 | xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) | 546 | xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) |
@@ -455,6 +550,21 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) | |||
455 | *off += bytes; | 550 | *off += bytes; |
456 | } | 551 | } |
457 | 552 | ||
553 | void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); | ||
554 | int xlog_write(struct log *log, struct xfs_log_vec *log_vector, | ||
555 | struct xlog_ticket *tic, xfs_lsn_t *start_lsn, | ||
556 | xlog_in_core_t **commit_iclog, uint flags); | ||
557 | |||
558 | /* | ||
559 | * Committed Item List interfaces | ||
560 | */ | ||
561 | int xlog_cil_init(struct log *log); | ||
562 | void xlog_cil_init_post_recovery(struct log *log); | ||
563 | void xlog_cil_destroy(struct log *log); | ||
564 | |||
565 | int xlog_cil_push(struct log *log, int push_now); | ||
566 | xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence); | ||
567 | |||
458 | /* | 568 | /* |
459 | * Unmount record type is used as a pseudo transaction type for the ticket. | 569 | * Unmount record type is used as a pseudo transaction type for the ticket. |
460 | * It's value must be outside the range of XFS_TRANS_* values. | 570 | * It's value must be outside the range of XFS_TRANS_* values. |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 0de08e366315..14a69aec2c0b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -1576,7 +1576,7 @@ xlog_recover_reorder_trans( | |||
1576 | 1576 | ||
1577 | switch (ITEM_TYPE(item)) { | 1577 | switch (ITEM_TYPE(item)) { |
1578 | case XFS_LI_BUF: | 1578 | case XFS_LI_BUF: |
1579 | if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) { | 1579 | if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { |
1580 | trace_xfs_log_recover_item_reorder_head(log, | 1580 | trace_xfs_log_recover_item_reorder_head(log, |
1581 | trans, item, pass); | 1581 | trans, item, pass); |
1582 | list_move(&item->ri_list, &trans->r_itemq); | 1582 | list_move(&item->ri_list, &trans->r_itemq); |
@@ -1638,7 +1638,7 @@ xlog_recover_do_buffer_pass1( | |||
1638 | /* | 1638 | /* |
1639 | * If this isn't a cancel buffer item, then just return. | 1639 | * If this isn't a cancel buffer item, then just return. |
1640 | */ | 1640 | */ |
1641 | if (!(flags & XFS_BLI_CANCEL)) { | 1641 | if (!(flags & XFS_BLF_CANCEL)) { |
1642 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); | 1642 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); |
1643 | return; | 1643 | return; |
1644 | } | 1644 | } |
@@ -1696,7 +1696,7 @@ xlog_recover_do_buffer_pass1( | |||
1696 | * Check to see whether the buffer being recovered has a corresponding | 1696 | * Check to see whether the buffer being recovered has a corresponding |
1697 | * entry in the buffer cancel record table. If it does then return 1 | 1697 | * entry in the buffer cancel record table. If it does then return 1 |
1698 | * so that it will be cancelled, otherwise return 0. If the buffer is | 1698 | * so that it will be cancelled, otherwise return 0. If the buffer is |
1699 | * actually a buffer cancel item (XFS_BLI_CANCEL is set), then decrement | 1699 | * actually a buffer cancel item (XFS_BLF_CANCEL is set), then decrement |
1700 | * the refcount on the entry in the table and remove it from the table | 1700 | * the refcount on the entry in the table and remove it from the table |
1701 | * if this is the last reference. | 1701 | * if this is the last reference. |
1702 | * | 1702 | * |
@@ -1721,7 +1721,7 @@ xlog_check_buffer_cancelled( | |||
1721 | * There is nothing in the table built in pass one, | 1721 | * There is nothing in the table built in pass one, |
1722 | * so this buffer must not be cancelled. | 1722 | * so this buffer must not be cancelled. |
1723 | */ | 1723 | */ |
1724 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1724 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1725 | return 0; | 1725 | return 0; |
1726 | } | 1726 | } |
1727 | 1727 | ||
@@ -1733,7 +1733,7 @@ xlog_check_buffer_cancelled( | |||
1733 | * There is no corresponding entry in the table built | 1733 | * There is no corresponding entry in the table built |
1734 | * in pass one, so this buffer has not been cancelled. | 1734 | * in pass one, so this buffer has not been cancelled. |
1735 | */ | 1735 | */ |
1736 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1736 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1737 | return 0; | 1737 | return 0; |
1738 | } | 1738 | } |
1739 | 1739 | ||
@@ -1752,7 +1752,7 @@ xlog_check_buffer_cancelled( | |||
1752 | * one in the table and remove it if this is the | 1752 | * one in the table and remove it if this is the |
1753 | * last reference. | 1753 | * last reference. |
1754 | */ | 1754 | */ |
1755 | if (flags & XFS_BLI_CANCEL) { | 1755 | if (flags & XFS_BLF_CANCEL) { |
1756 | bcp->bc_refcount--; | 1756 | bcp->bc_refcount--; |
1757 | if (bcp->bc_refcount == 0) { | 1757 | if (bcp->bc_refcount == 0) { |
1758 | if (prevp == NULL) { | 1758 | if (prevp == NULL) { |
@@ -1772,7 +1772,7 @@ xlog_check_buffer_cancelled( | |||
1772 | * We didn't find a corresponding entry in the table, so | 1772 | * We didn't find a corresponding entry in the table, so |
1773 | * return 0 so that the buffer is NOT cancelled. | 1773 | * return 0 so that the buffer is NOT cancelled. |
1774 | */ | 1774 | */ |
1775 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1775 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1776 | return 0; | 1776 | return 0; |
1777 | } | 1777 | } |
1778 | 1778 | ||
@@ -1874,8 +1874,8 @@ xlog_recover_do_inode_buffer( | |||
1874 | nbits = xfs_contig_bits(data_map, map_size, | 1874 | nbits = xfs_contig_bits(data_map, map_size, |
1875 | bit); | 1875 | bit); |
1876 | ASSERT(nbits > 0); | 1876 | ASSERT(nbits > 0); |
1877 | reg_buf_offset = bit << XFS_BLI_SHIFT; | 1877 | reg_buf_offset = bit << XFS_BLF_SHIFT; |
1878 | reg_buf_bytes = nbits << XFS_BLI_SHIFT; | 1878 | reg_buf_bytes = nbits << XFS_BLF_SHIFT; |
1879 | item_index++; | 1879 | item_index++; |
1880 | } | 1880 | } |
1881 | 1881 | ||
@@ -1889,7 +1889,7 @@ xlog_recover_do_inode_buffer( | |||
1889 | } | 1889 | } |
1890 | 1890 | ||
1891 | ASSERT(item->ri_buf[item_index].i_addr != NULL); | 1891 | ASSERT(item->ri_buf[item_index].i_addr != NULL); |
1892 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLI_CHUNK) == 0); | 1892 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); |
1893 | ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); | 1893 | ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); |
1894 | 1894 | ||
1895 | /* | 1895 | /* |
@@ -1955,9 +1955,9 @@ xlog_recover_do_reg_buffer( | |||
1955 | nbits = xfs_contig_bits(data_map, map_size, bit); | 1955 | nbits = xfs_contig_bits(data_map, map_size, bit); |
1956 | ASSERT(nbits > 0); | 1956 | ASSERT(nbits > 0); |
1957 | ASSERT(item->ri_buf[i].i_addr != NULL); | 1957 | ASSERT(item->ri_buf[i].i_addr != NULL); |
1958 | ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0); | 1958 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); |
1959 | ASSERT(XFS_BUF_COUNT(bp) >= | 1959 | ASSERT(XFS_BUF_COUNT(bp) >= |
1960 | ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT)); | 1960 | ((uint)bit << XFS_BLF_SHIFT)+(nbits<<XFS_BLF_SHIFT)); |
1961 | 1961 | ||
1962 | /* | 1962 | /* |
1963 | * Do a sanity check if this is a dquot buffer. Just checking | 1963 | * Do a sanity check if this is a dquot buffer. Just checking |
@@ -1966,7 +1966,7 @@ xlog_recover_do_reg_buffer( | |||
1966 | */ | 1966 | */ |
1967 | error = 0; | 1967 | error = 0; |
1968 | if (buf_f->blf_flags & | 1968 | if (buf_f->blf_flags & |
1969 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { | 1969 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
1970 | if (item->ri_buf[i].i_addr == NULL) { | 1970 | if (item->ri_buf[i].i_addr == NULL) { |
1971 | cmn_err(CE_ALERT, | 1971 | cmn_err(CE_ALERT, |
1972 | "XFS: NULL dquot in %s.", __func__); | 1972 | "XFS: NULL dquot in %s.", __func__); |
@@ -1987,9 +1987,9 @@ xlog_recover_do_reg_buffer( | |||
1987 | } | 1987 | } |
1988 | 1988 | ||
1989 | memcpy(xfs_buf_offset(bp, | 1989 | memcpy(xfs_buf_offset(bp, |
1990 | (uint)bit << XFS_BLI_SHIFT), /* dest */ | 1990 | (uint)bit << XFS_BLF_SHIFT), /* dest */ |
1991 | item->ri_buf[i].i_addr, /* source */ | 1991 | item->ri_buf[i].i_addr, /* source */ |
1992 | nbits<<XFS_BLI_SHIFT); /* length */ | 1992 | nbits<<XFS_BLF_SHIFT); /* length */ |
1993 | next: | 1993 | next: |
1994 | i++; | 1994 | i++; |
1995 | bit += nbits; | 1995 | bit += nbits; |
@@ -2148,11 +2148,11 @@ xlog_recover_do_dquot_buffer( | |||
2148 | } | 2148 | } |
2149 | 2149 | ||
2150 | type = 0; | 2150 | type = 0; |
2151 | if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF) | 2151 | if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) |
2152 | type |= XFS_DQ_USER; | 2152 | type |= XFS_DQ_USER; |
2153 | if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF) | 2153 | if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF) |
2154 | type |= XFS_DQ_PROJ; | 2154 | type |= XFS_DQ_PROJ; |
2155 | if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF) | 2155 | if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF) |
2156 | type |= XFS_DQ_GROUP; | 2156 | type |= XFS_DQ_GROUP; |
2157 | /* | 2157 | /* |
2158 | * This type of quotas was turned off, so ignore this buffer | 2158 | * This type of quotas was turned off, so ignore this buffer |
@@ -2173,7 +2173,7 @@ xlog_recover_do_dquot_buffer( | |||
2173 | * here which overlaps that may be stale. | 2173 | * here which overlaps that may be stale. |
2174 | * | 2174 | * |
2175 | * When meta-data buffers are freed at run time we log a buffer item | 2175 | * When meta-data buffers are freed at run time we log a buffer item |
2176 | * with the XFS_BLI_CANCEL bit set to indicate that previous copies | 2176 | * with the XFS_BLF_CANCEL bit set to indicate that previous copies |
2177 | * of the buffer in the log should not be replayed at recovery time. | 2177 | * of the buffer in the log should not be replayed at recovery time. |
2178 | * This is so that if the blocks covered by the buffer are reused for | 2178 | * This is so that if the blocks covered by the buffer are reused for |
2179 | * file data before we crash we don't end up replaying old, freed | 2179 | * file data before we crash we don't end up replaying old, freed |
@@ -2207,7 +2207,7 @@ xlog_recover_do_buffer_trans( | |||
2207 | if (pass == XLOG_RECOVER_PASS1) { | 2207 | if (pass == XLOG_RECOVER_PASS1) { |
2208 | /* | 2208 | /* |
2209 | * In this pass we're only looking for buf items | 2209 | * In this pass we're only looking for buf items |
2210 | * with the XFS_BLI_CANCEL bit set. | 2210 | * with the XFS_BLF_CANCEL bit set. |
2211 | */ | 2211 | */ |
2212 | xlog_recover_do_buffer_pass1(log, buf_f); | 2212 | xlog_recover_do_buffer_pass1(log, buf_f); |
2213 | return 0; | 2213 | return 0; |
@@ -2244,7 +2244,7 @@ xlog_recover_do_buffer_trans( | |||
2244 | 2244 | ||
2245 | mp = log->l_mp; | 2245 | mp = log->l_mp; |
2246 | buf_flags = XBF_LOCK; | 2246 | buf_flags = XBF_LOCK; |
2247 | if (!(flags & XFS_BLI_INODE_BUF)) | 2247 | if (!(flags & XFS_BLF_INODE_BUF)) |
2248 | buf_flags |= XBF_MAPPED; | 2248 | buf_flags |= XBF_MAPPED; |
2249 | 2249 | ||
2250 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); | 2250 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); |
@@ -2257,10 +2257,10 @@ xlog_recover_do_buffer_trans( | |||
2257 | } | 2257 | } |
2258 | 2258 | ||
2259 | error = 0; | 2259 | error = 0; |
2260 | if (flags & XFS_BLI_INODE_BUF) { | 2260 | if (flags & XFS_BLF_INODE_BUF) { |
2261 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); | 2261 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
2262 | } else if (flags & | 2262 | } else if (flags & |
2263 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { | 2263 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
2264 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); | 2264 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); |
2265 | } else { | 2265 | } else { |
2266 | xlog_recover_do_reg_buffer(mp, item, bp, buf_f); | 2266 | xlog_recover_do_reg_buffer(mp, item, bp, buf_f); |
diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/xfs_log_recover.h index 75d749207258..1c55ccbb379d 100644 --- a/fs/xfs/xfs_log_recover.h +++ b/fs/xfs/xfs_log_recover.h | |||
@@ -28,7 +28,7 @@ | |||
28 | #define XLOG_RHASH(tid) \ | 28 | #define XLOG_RHASH(tid) \ |
29 | ((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1)) | 29 | ((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1)) |
30 | 30 | ||
31 | #define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK / 2 + 1) | 31 | #define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK / 2 + 1) |
32 | 32 | ||
33 | 33 | ||
34 | /* | 34 | /* |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 9ff48a16a7ee..1d2c7eed4eda 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -268,6 +268,7 @@ typedef struct xfs_mount { | |||
268 | #define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops | 268 | #define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops |
269 | must be synchronous except | 269 | must be synchronous except |
270 | for space allocations */ | 270 | for space allocations */ |
271 | #define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */ | ||
271 | #define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */ | 272 | #define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */ |
272 | #define XFS_MOUNT_WAS_CLEAN (1ULL << 3) | 273 | #define XFS_MOUNT_WAS_CLEAN (1ULL << 3) |
273 | #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem | 274 | #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index be578ecb4af2..ce558efa2ea0 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include "xfs_trans_priv.h" | 44 | #include "xfs_trans_priv.h" |
45 | #include "xfs_trans_space.h" | 45 | #include "xfs_trans_space.h" |
46 | #include "xfs_inode_item.h" | 46 | #include "xfs_inode_item.h" |
47 | #include "xfs_trace.h" | ||
47 | 48 | ||
48 | kmem_zone_t *xfs_trans_zone; | 49 | kmem_zone_t *xfs_trans_zone; |
49 | 50 | ||
@@ -243,9 +244,8 @@ _xfs_trans_alloc( | |||
243 | tp->t_type = type; | 244 | tp->t_type = type; |
244 | tp->t_mountp = mp; | 245 | tp->t_mountp = mp; |
245 | tp->t_items_free = XFS_LIC_NUM_SLOTS; | 246 | tp->t_items_free = XFS_LIC_NUM_SLOTS; |
246 | tp->t_busy_free = XFS_LBC_NUM_SLOTS; | ||
247 | xfs_lic_init(&(tp->t_items)); | 247 | xfs_lic_init(&(tp->t_items)); |
248 | XFS_LBC_INIT(&(tp->t_busy)); | 248 | INIT_LIST_HEAD(&tp->t_busy); |
249 | return tp; | 249 | return tp; |
250 | } | 250 | } |
251 | 251 | ||
@@ -255,8 +255,13 @@ _xfs_trans_alloc( | |||
255 | */ | 255 | */ |
256 | STATIC void | 256 | STATIC void |
257 | xfs_trans_free( | 257 | xfs_trans_free( |
258 | xfs_trans_t *tp) | 258 | struct xfs_trans *tp) |
259 | { | 259 | { |
260 | struct xfs_busy_extent *busyp, *n; | ||
261 | |||
262 | list_for_each_entry_safe(busyp, n, &tp->t_busy, list) | ||
263 | xfs_alloc_busy_clear(tp->t_mountp, busyp); | ||
264 | |||
260 | atomic_dec(&tp->t_mountp->m_active_trans); | 265 | atomic_dec(&tp->t_mountp->m_active_trans); |
261 | xfs_trans_free_dqinfo(tp); | 266 | xfs_trans_free_dqinfo(tp); |
262 | kmem_zone_free(xfs_trans_zone, tp); | 267 | kmem_zone_free(xfs_trans_zone, tp); |
@@ -285,9 +290,8 @@ xfs_trans_dup( | |||
285 | ntp->t_type = tp->t_type; | 290 | ntp->t_type = tp->t_type; |
286 | ntp->t_mountp = tp->t_mountp; | 291 | ntp->t_mountp = tp->t_mountp; |
287 | ntp->t_items_free = XFS_LIC_NUM_SLOTS; | 292 | ntp->t_items_free = XFS_LIC_NUM_SLOTS; |
288 | ntp->t_busy_free = XFS_LBC_NUM_SLOTS; | ||
289 | xfs_lic_init(&(ntp->t_items)); | 293 | xfs_lic_init(&(ntp->t_items)); |
290 | XFS_LBC_INIT(&(ntp->t_busy)); | 294 | INIT_LIST_HEAD(&ntp->t_busy); |
291 | 295 | ||
292 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); | 296 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); |
293 | ASSERT(tp->t_ticket != NULL); | 297 | ASSERT(tp->t_ticket != NULL); |
@@ -423,7 +427,6 @@ undo_blocks: | |||
423 | return error; | 427 | return error; |
424 | } | 428 | } |
425 | 429 | ||
426 | |||
427 | /* | 430 | /* |
428 | * Record the indicated change to the given field for application | 431 | * Record the indicated change to the given field for application |
429 | * to the file system's superblock when the transaction commits. | 432 | * to the file system's superblock when the transaction commits. |
@@ -652,7 +655,7 @@ xfs_trans_apply_sb_deltas( | |||
652 | * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we | 655 | * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we |
653 | * still need to update the incore superblock with the changes. | 656 | * still need to update the incore superblock with the changes. |
654 | */ | 657 | */ |
655 | STATIC void | 658 | void |
656 | xfs_trans_unreserve_and_mod_sb( | 659 | xfs_trans_unreserve_and_mod_sb( |
657 | xfs_trans_t *tp) | 660 | xfs_trans_t *tp) |
658 | { | 661 | { |
@@ -880,7 +883,7 @@ xfs_trans_fill_vecs( | |||
880 | * they could be immediately flushed and we'd have to race with the flusher | 883 | * they could be immediately flushed and we'd have to race with the flusher |
881 | * trying to pull the item from the AIL as we add it. | 884 | * trying to pull the item from the AIL as we add it. |
882 | */ | 885 | */ |
883 | static void | 886 | void |
884 | xfs_trans_item_committed( | 887 | xfs_trans_item_committed( |
885 | struct xfs_log_item *lip, | 888 | struct xfs_log_item *lip, |
886 | xfs_lsn_t commit_lsn, | 889 | xfs_lsn_t commit_lsn, |
@@ -930,26 +933,6 @@ xfs_trans_item_committed( | |||
930 | IOP_UNPIN(lip); | 933 | IOP_UNPIN(lip); |
931 | } | 934 | } |
932 | 935 | ||
933 | /* Clear all the per-AG busy list items listed in this transaction */ | ||
934 | static void | ||
935 | xfs_trans_clear_busy_extents( | ||
936 | struct xfs_trans *tp) | ||
937 | { | ||
938 | xfs_log_busy_chunk_t *lbcp; | ||
939 | xfs_log_busy_slot_t *lbsp; | ||
940 | int i; | ||
941 | |||
942 | for (lbcp = &tp->t_busy; lbcp != NULL; lbcp = lbcp->lbc_next) { | ||
943 | i = 0; | ||
944 | for (lbsp = lbcp->lbc_busy; i < lbcp->lbc_unused; i++, lbsp++) { | ||
945 | if (XFS_LBC_ISFREE(lbcp, i)) | ||
946 | continue; | ||
947 | xfs_alloc_clear_busy(tp, lbsp->lbc_ag, lbsp->lbc_idx); | ||
948 | } | ||
949 | } | ||
950 | xfs_trans_free_busy(tp); | ||
951 | } | ||
952 | |||
953 | /* | 936 | /* |
954 | * This is typically called by the LM when a transaction has been fully | 937 | * This is typically called by the LM when a transaction has been fully |
955 | * committed to disk. It needs to unpin the items which have | 938 | * committed to disk. It needs to unpin the items which have |
@@ -984,7 +967,6 @@ xfs_trans_committed( | |||
984 | kmem_free(licp); | 967 | kmem_free(licp); |
985 | } | 968 | } |
986 | 969 | ||
987 | xfs_trans_clear_busy_extents(tp); | ||
988 | xfs_trans_free(tp); | 970 | xfs_trans_free(tp); |
989 | } | 971 | } |
990 | 972 | ||
@@ -1012,8 +994,7 @@ xfs_trans_uncommit( | |||
1012 | xfs_trans_unreserve_and_mod_sb(tp); | 994 | xfs_trans_unreserve_and_mod_sb(tp); |
1013 | xfs_trans_unreserve_and_mod_dquots(tp); | 995 | xfs_trans_unreserve_and_mod_dquots(tp); |
1014 | 996 | ||
1015 | xfs_trans_free_items(tp, flags); | 997 | xfs_trans_free_items(tp, NULLCOMMITLSN, flags); |
1016 | xfs_trans_free_busy(tp); | ||
1017 | xfs_trans_free(tp); | 998 | xfs_trans_free(tp); |
1018 | } | 999 | } |
1019 | 1000 | ||
@@ -1075,6 +1056,8 @@ xfs_trans_commit_iclog( | |||
1075 | *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); | 1056 | *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); |
1076 | 1057 | ||
1077 | tp->t_commit_lsn = *commit_lsn; | 1058 | tp->t_commit_lsn = *commit_lsn; |
1059 | trace_xfs_trans_commit_lsn(tp); | ||
1060 | |||
1078 | if (nvec > XFS_TRANS_LOGVEC_COUNT) | 1061 | if (nvec > XFS_TRANS_LOGVEC_COUNT) |
1079 | kmem_free(log_vector); | 1062 | kmem_free(log_vector); |
1080 | 1063 | ||
@@ -1161,6 +1144,93 @@ xfs_trans_commit_iclog( | |||
1161 | return xfs_log_release_iclog(mp, commit_iclog); | 1144 | return xfs_log_release_iclog(mp, commit_iclog); |
1162 | } | 1145 | } |
1163 | 1146 | ||
1147 | /* | ||
1148 | * Walk the log items and allocate log vector structures for | ||
1149 | * each item large enough to fit all the vectors they require. | ||
1150 | * Note that this format differs from the old log vector format in | ||
1151 | * that there is no transaction header in these log vectors. | ||
1152 | */ | ||
1153 | STATIC struct xfs_log_vec * | ||
1154 | xfs_trans_alloc_log_vecs( | ||
1155 | xfs_trans_t *tp) | ||
1156 | { | ||
1157 | xfs_log_item_desc_t *lidp; | ||
1158 | struct xfs_log_vec *lv = NULL; | ||
1159 | struct xfs_log_vec *ret_lv = NULL; | ||
1160 | |||
1161 | lidp = xfs_trans_first_item(tp); | ||
1162 | |||
1163 | /* Bail out if we didn't find a log item. */ | ||
1164 | if (!lidp) { | ||
1165 | ASSERT(0); | ||
1166 | return NULL; | ||
1167 | } | ||
1168 | |||
1169 | while (lidp != NULL) { | ||
1170 | struct xfs_log_vec *new_lv; | ||
1171 | |||
1172 | /* Skip items which aren't dirty in this transaction. */ | ||
1173 | if (!(lidp->lid_flags & XFS_LID_DIRTY)) { | ||
1174 | lidp = xfs_trans_next_item(tp, lidp); | ||
1175 | continue; | ||
1176 | } | ||
1177 | |||
1178 | /* Skip items that do not have any vectors for writing */ | ||
1179 | lidp->lid_size = IOP_SIZE(lidp->lid_item); | ||
1180 | if (!lidp->lid_size) { | ||
1181 | lidp = xfs_trans_next_item(tp, lidp); | ||
1182 | continue; | ||
1183 | } | ||
1184 | |||
1185 | new_lv = kmem_zalloc(sizeof(*new_lv) + | ||
1186 | lidp->lid_size * sizeof(struct xfs_log_iovec), | ||
1187 | KM_SLEEP); | ||
1188 | |||
1189 | /* The allocated iovec region lies beyond the log vector. */ | ||
1190 | new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; | ||
1191 | new_lv->lv_niovecs = lidp->lid_size; | ||
1192 | new_lv->lv_item = lidp->lid_item; | ||
1193 | if (!ret_lv) | ||
1194 | ret_lv = new_lv; | ||
1195 | else | ||
1196 | lv->lv_next = new_lv; | ||
1197 | lv = new_lv; | ||
1198 | lidp = xfs_trans_next_item(tp, lidp); | ||
1199 | } | ||
1200 | |||
1201 | return ret_lv; | ||
1202 | } | ||
1203 | |||
1204 | static int | ||
1205 | xfs_trans_commit_cil( | ||
1206 | struct xfs_mount *mp, | ||
1207 | struct xfs_trans *tp, | ||
1208 | xfs_lsn_t *commit_lsn, | ||
1209 | int flags) | ||
1210 | { | ||
1211 | struct xfs_log_vec *log_vector; | ||
1212 | int error; | ||
1213 | |||
1214 | /* | ||
1215 | * Get each log item to allocate a vector structure for | ||
1216 | * the log item to to pass to the log write code. The | ||
1217 | * CIL commit code will format the vector and save it away. | ||
1218 | */ | ||
1219 | log_vector = xfs_trans_alloc_log_vecs(tp); | ||
1220 | if (!log_vector) | ||
1221 | return ENOMEM; | ||
1222 | |||
1223 | error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); | ||
1224 | if (error) | ||
1225 | return error; | ||
1226 | |||
1227 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | ||
1228 | |||
1229 | /* xfs_trans_free_items() unlocks them first */ | ||
1230 | xfs_trans_free_items(tp, *commit_lsn, 0); | ||
1231 | xfs_trans_free(tp); | ||
1232 | return 0; | ||
1233 | } | ||
1164 | 1234 | ||
1165 | /* | 1235 | /* |
1166 | * xfs_trans_commit | 1236 | * xfs_trans_commit |
@@ -1221,7 +1291,11 @@ _xfs_trans_commit( | |||
1221 | xfs_trans_apply_sb_deltas(tp); | 1291 | xfs_trans_apply_sb_deltas(tp); |
1222 | xfs_trans_apply_dquot_deltas(tp); | 1292 | xfs_trans_apply_dquot_deltas(tp); |
1223 | 1293 | ||
1224 | error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags); | 1294 | if (mp->m_flags & XFS_MOUNT_DELAYLOG) |
1295 | error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags); | ||
1296 | else | ||
1297 | error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags); | ||
1298 | |||
1225 | if (error == ENOMEM) { | 1299 | if (error == ENOMEM) { |
1226 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); | 1300 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
1227 | error = XFS_ERROR(EIO); | 1301 | error = XFS_ERROR(EIO); |
@@ -1259,8 +1333,7 @@ out_unreserve: | |||
1259 | error = XFS_ERROR(EIO); | 1333 | error = XFS_ERROR(EIO); |
1260 | } | 1334 | } |
1261 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1335 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1262 | xfs_trans_free_items(tp, error ? XFS_TRANS_ABORT : 0); | 1336 | xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0); |
1263 | xfs_trans_free_busy(tp); | ||
1264 | xfs_trans_free(tp); | 1337 | xfs_trans_free(tp); |
1265 | 1338 | ||
1266 | XFS_STATS_INC(xs_trans_empty); | 1339 | XFS_STATS_INC(xs_trans_empty); |
@@ -1338,8 +1411,7 @@ xfs_trans_cancel( | |||
1338 | /* mark this thread as no longer being in a transaction */ | 1411 | /* mark this thread as no longer being in a transaction */ |
1339 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1412 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1340 | 1413 | ||
1341 | xfs_trans_free_items(tp, flags); | 1414 | xfs_trans_free_items(tp, NULLCOMMITLSN, flags); |
1342 | xfs_trans_free_busy(tp); | ||
1343 | xfs_trans_free(tp); | 1415 | xfs_trans_free(tp); |
1344 | } | 1416 | } |
1345 | 1417 | ||
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index c62beee0921e..8c69e7824f68 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -106,7 +106,8 @@ typedef struct xfs_trans_header { | |||
106 | #define XFS_TRANS_GROWFSRT_FREE 39 | 106 | #define XFS_TRANS_GROWFSRT_FREE 39 |
107 | #define XFS_TRANS_SWAPEXT 40 | 107 | #define XFS_TRANS_SWAPEXT 40 |
108 | #define XFS_TRANS_SB_COUNT 41 | 108 | #define XFS_TRANS_SB_COUNT 41 |
109 | #define XFS_TRANS_TYPE_MAX 41 | 109 | #define XFS_TRANS_CHECKPOINT 42 |
110 | #define XFS_TRANS_TYPE_MAX 42 | ||
110 | /* new transaction types need to be reflected in xfs_logprint(8) */ | 111 | /* new transaction types need to be reflected in xfs_logprint(8) */ |
111 | 112 | ||
112 | #define XFS_TRANS_TYPES \ | 113 | #define XFS_TRANS_TYPES \ |
@@ -148,6 +149,7 @@ typedef struct xfs_trans_header { | |||
148 | { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ | 149 | { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ |
149 | { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ | 150 | { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ |
150 | { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ | 151 | { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ |
152 | { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \ | ||
151 | { XFS_TRANS_DUMMY1, "DUMMY1" }, \ | 153 | { XFS_TRANS_DUMMY1, "DUMMY1" }, \ |
152 | { XFS_TRANS_DUMMY2, "DUMMY2" }, \ | 154 | { XFS_TRANS_DUMMY2, "DUMMY2" }, \ |
153 | { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } | 155 | { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } |
@@ -813,6 +815,7 @@ struct xfs_log_item_desc; | |||
813 | struct xfs_mount; | 815 | struct xfs_mount; |
814 | struct xfs_trans; | 816 | struct xfs_trans; |
815 | struct xfs_dquot_acct; | 817 | struct xfs_dquot_acct; |
818 | struct xfs_busy_extent; | ||
816 | 819 | ||
817 | typedef struct xfs_log_item { | 820 | typedef struct xfs_log_item { |
818 | struct list_head li_ail; /* AIL pointers */ | 821 | struct list_head li_ail; /* AIL pointers */ |
@@ -828,6 +831,11 @@ typedef struct xfs_log_item { | |||
828 | /* buffer item iodone */ | 831 | /* buffer item iodone */ |
829 | /* callback func */ | 832 | /* callback func */ |
830 | struct xfs_item_ops *li_ops; /* function list */ | 833 | struct xfs_item_ops *li_ops; /* function list */ |
834 | |||
835 | /* delayed logging */ | ||
836 | struct list_head li_cil; /* CIL pointers */ | ||
837 | struct xfs_log_vec *li_lv; /* active log vector */ | ||
838 | xfs_lsn_t li_seq; /* CIL commit seq */ | ||
831 | } xfs_log_item_t; | 839 | } xfs_log_item_t; |
832 | 840 | ||
833 | #define XFS_LI_IN_AIL 0x1 | 841 | #define XFS_LI_IN_AIL 0x1 |
@@ -872,34 +880,6 @@ typedef struct xfs_item_ops { | |||
872 | #define XFS_ITEM_PUSHBUF 3 | 880 | #define XFS_ITEM_PUSHBUF 3 |
873 | 881 | ||
874 | /* | 882 | /* |
875 | * This structure is used to maintain a list of block ranges that have been | ||
876 | * freed in the transaction. The ranges are listed in the perag[] busy list | ||
877 | * between when they're freed and the transaction is committed to disk. | ||
878 | */ | ||
879 | |||
880 | typedef struct xfs_log_busy_slot { | ||
881 | xfs_agnumber_t lbc_ag; | ||
882 | ushort lbc_idx; /* index in perag.busy[] */ | ||
883 | } xfs_log_busy_slot_t; | ||
884 | |||
885 | #define XFS_LBC_NUM_SLOTS 31 | ||
886 | typedef struct xfs_log_busy_chunk { | ||
887 | struct xfs_log_busy_chunk *lbc_next; | ||
888 | uint lbc_free; /* free slots bitmask */ | ||
889 | ushort lbc_unused; /* first unused */ | ||
890 | xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS]; | ||
891 | } xfs_log_busy_chunk_t; | ||
892 | |||
893 | #define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1) | ||
894 | #define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1) | ||
895 | |||
896 | #define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK) | ||
897 | #define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot))) | ||
898 | #define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)])) | ||
899 | #define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK) | ||
900 | #define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot))) | ||
901 | |||
902 | /* | ||
903 | * This is the type of function which can be given to xfs_trans_callback() | 883 | * This is the type of function which can be given to xfs_trans_callback() |
904 | * to be called upon the transaction's commit to disk. | 884 | * to be called upon the transaction's commit to disk. |
905 | */ | 885 | */ |
@@ -950,8 +930,7 @@ typedef struct xfs_trans { | |||
950 | unsigned int t_items_free; /* log item descs free */ | 930 | unsigned int t_items_free; /* log item descs free */ |
951 | xfs_log_item_chunk_t t_items; /* first log item desc chunk */ | 931 | xfs_log_item_chunk_t t_items; /* first log item desc chunk */ |
952 | xfs_trans_header_t t_header; /* header for in-log trans */ | 932 | xfs_trans_header_t t_header; /* header for in-log trans */ |
953 | unsigned int t_busy_free; /* busy descs free */ | 933 | struct list_head t_busy; /* list of busy extents */ |
954 | xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */ | ||
955 | unsigned long t_pflags; /* saved process flags state */ | 934 | unsigned long t_pflags; /* saved process flags state */ |
956 | } xfs_trans_t; | 935 | } xfs_trans_t; |
957 | 936 | ||
@@ -1025,9 +1004,6 @@ int _xfs_trans_commit(xfs_trans_t *, | |||
1025 | void xfs_trans_cancel(xfs_trans_t *, int); | 1004 | void xfs_trans_cancel(xfs_trans_t *, int); |
1026 | int xfs_trans_ail_init(struct xfs_mount *); | 1005 | int xfs_trans_ail_init(struct xfs_mount *); |
1027 | void xfs_trans_ail_destroy(struct xfs_mount *); | 1006 | void xfs_trans_ail_destroy(struct xfs_mount *); |
1028 | xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, | ||
1029 | xfs_agnumber_t ag, | ||
1030 | xfs_extlen_t idx); | ||
1031 | 1007 | ||
1032 | extern kmem_zone_t *xfs_trans_zone; | 1008 | extern kmem_zone_t *xfs_trans_zone; |
1033 | 1009 | ||
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 9cd809025f3a..63d81a22f4fd 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -114,7 +114,7 @@ _xfs_trans_bjoin( | |||
114 | xfs_buf_item_init(bp, tp->t_mountp); | 114 | xfs_buf_item_init(bp, tp->t_mountp); |
115 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 115 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
116 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 116 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
117 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); | 117 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); |
118 | ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); | 118 | ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); |
119 | if (reset_recur) | 119 | if (reset_recur) |
120 | bip->bli_recur = 0; | 120 | bip->bli_recur = 0; |
@@ -511,7 +511,7 @@ xfs_trans_brelse(xfs_trans_t *tp, | |||
511 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 511 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
512 | ASSERT(bip->bli_item.li_type == XFS_LI_BUF); | 512 | ASSERT(bip->bli_item.li_type == XFS_LI_BUF); |
513 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 513 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
514 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); | 514 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); |
515 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 515 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
516 | 516 | ||
517 | /* | 517 | /* |
@@ -619,7 +619,7 @@ xfs_trans_bhold(xfs_trans_t *tp, | |||
619 | 619 | ||
620 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 620 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
621 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 621 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
622 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); | 622 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); |
623 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 623 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
624 | bip->bli_flags |= XFS_BLI_HOLD; | 624 | bip->bli_flags |= XFS_BLI_HOLD; |
625 | trace_xfs_trans_bhold(bip); | 625 | trace_xfs_trans_bhold(bip); |
@@ -641,7 +641,7 @@ xfs_trans_bhold_release(xfs_trans_t *tp, | |||
641 | 641 | ||
642 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 642 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
643 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 643 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
644 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); | 644 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); |
645 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 645 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
646 | ASSERT(bip->bli_flags & XFS_BLI_HOLD); | 646 | ASSERT(bip->bli_flags & XFS_BLI_HOLD); |
647 | bip->bli_flags &= ~XFS_BLI_HOLD; | 647 | bip->bli_flags &= ~XFS_BLI_HOLD; |
@@ -704,7 +704,7 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
704 | bip->bli_flags &= ~XFS_BLI_STALE; | 704 | bip->bli_flags &= ~XFS_BLI_STALE; |
705 | ASSERT(XFS_BUF_ISSTALE(bp)); | 705 | ASSERT(XFS_BUF_ISSTALE(bp)); |
706 | XFS_BUF_UNSTALE(bp); | 706 | XFS_BUF_UNSTALE(bp); |
707 | bip->bli_format.blf_flags &= ~XFS_BLI_CANCEL; | 707 | bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL; |
708 | } | 708 | } |
709 | 709 | ||
710 | lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); | 710 | lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); |
@@ -762,8 +762,8 @@ xfs_trans_binval( | |||
762 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | 762 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); |
763 | ASSERT(XFS_BUF_ISSTALE(bp)); | 763 | ASSERT(XFS_BUF_ISSTALE(bp)); |
764 | ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); | 764 | ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); |
765 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_INODE_BUF)); | 765 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); |
766 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 766 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
767 | ASSERT(lidp->lid_flags & XFS_LID_DIRTY); | 767 | ASSERT(lidp->lid_flags & XFS_LID_DIRTY); |
768 | ASSERT(tp->t_flags & XFS_TRANS_DIRTY); | 768 | ASSERT(tp->t_flags & XFS_TRANS_DIRTY); |
769 | return; | 769 | return; |
@@ -774,7 +774,7 @@ xfs_trans_binval( | |||
774 | * in the buf log item. The STALE flag will be used in | 774 | * in the buf log item. The STALE flag will be used in |
775 | * xfs_buf_item_unpin() to determine if it should clean up | 775 | * xfs_buf_item_unpin() to determine if it should clean up |
776 | * when the last reference to the buf item is given up. | 776 | * when the last reference to the buf item is given up. |
777 | * We set the XFS_BLI_CANCEL flag in the buf log format structure | 777 | * We set the XFS_BLF_CANCEL flag in the buf log format structure |
778 | * and log the buf item. This will be used at recovery time | 778 | * and log the buf item. This will be used at recovery time |
779 | * to determine that copies of the buffer in the log before | 779 | * to determine that copies of the buffer in the log before |
780 | * this should not be replayed. | 780 | * this should not be replayed. |
@@ -792,9 +792,9 @@ xfs_trans_binval( | |||
792 | XFS_BUF_UNDELAYWRITE(bp); | 792 | XFS_BUF_UNDELAYWRITE(bp); |
793 | XFS_BUF_STALE(bp); | 793 | XFS_BUF_STALE(bp); |
794 | bip->bli_flags |= XFS_BLI_STALE; | 794 | bip->bli_flags |= XFS_BLI_STALE; |
795 | bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY); | 795 | bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); |
796 | bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF; | 796 | bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; |
797 | bip->bli_format.blf_flags |= XFS_BLI_CANCEL; | 797 | bip->bli_format.blf_flags |= XFS_BLF_CANCEL; |
798 | memset((char *)(bip->bli_format.blf_data_map), 0, | 798 | memset((char *)(bip->bli_format.blf_data_map), 0, |
799 | (bip->bli_format.blf_map_size * sizeof(uint))); | 799 | (bip->bli_format.blf_map_size * sizeof(uint))); |
800 | lidp->lid_flags |= XFS_LID_DIRTY; | 800 | lidp->lid_flags |= XFS_LID_DIRTY; |
@@ -802,16 +802,16 @@ xfs_trans_binval( | |||
802 | } | 802 | } |
803 | 803 | ||
804 | /* | 804 | /* |
805 | * This call is used to indicate that the buffer contains on-disk | 805 | * This call is used to indicate that the buffer contains on-disk inodes which |
806 | * inodes which must be handled specially during recovery. They | 806 | * must be handled specially during recovery. They require special handling |
807 | * require special handling because only the di_next_unlinked from | 807 | * because only the di_next_unlinked from the inodes in the buffer should be |
808 | * the inodes in the buffer should be recovered. The rest of the | 808 | * recovered. The rest of the data in the buffer is logged via the inodes |
809 | * data in the buffer is logged via the inodes themselves. | 809 | * themselves. |
810 | * | 810 | * |
811 | * All we do is set the XFS_BLI_INODE_BUF flag in the buffer's log | 811 | * All we do is set the XFS_BLI_INODE_BUF flag in the items flags so it can be |
812 | * format structure so that we'll know what to do at recovery time. | 812 | * transferred to the buffer's log format structure so that we'll know what to |
813 | * do at recovery time. | ||
813 | */ | 814 | */ |
814 | /* ARGSUSED */ | ||
815 | void | 815 | void |
816 | xfs_trans_inode_buf( | 816 | xfs_trans_inode_buf( |
817 | xfs_trans_t *tp, | 817 | xfs_trans_t *tp, |
@@ -826,7 +826,7 @@ xfs_trans_inode_buf( | |||
826 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 826 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
827 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 827 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
828 | 828 | ||
829 | bip->bli_format.blf_flags |= XFS_BLI_INODE_BUF; | 829 | bip->bli_flags |= XFS_BLI_INODE_BUF; |
830 | } | 830 | } |
831 | 831 | ||
832 | /* | 832 | /* |
@@ -908,9 +908,9 @@ xfs_trans_dquot_buf( | |||
908 | ASSERT(XFS_BUF_ISBUSY(bp)); | 908 | ASSERT(XFS_BUF_ISBUSY(bp)); |
909 | ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); | 909 | ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); |
910 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); | 910 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); |
911 | ASSERT(type == XFS_BLI_UDQUOT_BUF || | 911 | ASSERT(type == XFS_BLF_UDQUOT_BUF || |
912 | type == XFS_BLI_PDQUOT_BUF || | 912 | type == XFS_BLF_PDQUOT_BUF || |
913 | type == XFS_BLI_GDQUOT_BUF); | 913 | type == XFS_BLF_GDQUOT_BUF); |
914 | 914 | ||
915 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 915 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
916 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 916 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index eb3fc57f9eef..f11d37d06dcc 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c | |||
@@ -299,6 +299,7 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) | |||
299 | void | 299 | void |
300 | xfs_trans_free_items( | 300 | xfs_trans_free_items( |
301 | xfs_trans_t *tp, | 301 | xfs_trans_t *tp, |
302 | xfs_lsn_t commit_lsn, | ||
302 | int flags) | 303 | int flags) |
303 | { | 304 | { |
304 | xfs_log_item_chunk_t *licp; | 305 | xfs_log_item_chunk_t *licp; |
@@ -311,7 +312,7 @@ xfs_trans_free_items( | |||
311 | * Special case the embedded chunk so we don't free it below. | 312 | * Special case the embedded chunk so we don't free it below. |
312 | */ | 313 | */ |
313 | if (!xfs_lic_are_all_free(licp)) { | 314 | if (!xfs_lic_are_all_free(licp)) { |
314 | (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); | 315 | (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn); |
315 | xfs_lic_all_free(licp); | 316 | xfs_lic_all_free(licp); |
316 | licp->lic_unused = 0; | 317 | licp->lic_unused = 0; |
317 | } | 318 | } |
@@ -322,7 +323,7 @@ xfs_trans_free_items( | |||
322 | */ | 323 | */ |
323 | while (licp != NULL) { | 324 | while (licp != NULL) { |
324 | ASSERT(!xfs_lic_are_all_free(licp)); | 325 | ASSERT(!xfs_lic_are_all_free(licp)); |
325 | (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); | 326 | (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn); |
326 | next_licp = licp->lic_next; | 327 | next_licp = licp->lic_next; |
327 | kmem_free(licp); | 328 | kmem_free(licp); |
328 | licp = next_licp; | 329 | licp = next_licp; |
@@ -438,112 +439,3 @@ xfs_trans_unlock_chunk( | |||
438 | 439 | ||
439 | return freed; | 440 | return freed; |
440 | } | 441 | } |
441 | |||
442 | |||
443 | /* | ||
444 | * This is called to add the given busy item to the transaction's | ||
445 | * list of busy items. It must find a free busy item descriptor | ||
446 | * or allocate a new one and add the item to that descriptor. | ||
447 | * The function returns a pointer to busy descriptor used to point | ||
448 | * to the new busy entry. The log busy entry will now point to its new | ||
449 | * descriptor with its ???? field. | ||
450 | */ | ||
451 | xfs_log_busy_slot_t * | ||
452 | xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx) | ||
453 | { | ||
454 | xfs_log_busy_chunk_t *lbcp; | ||
455 | xfs_log_busy_slot_t *lbsp; | ||
456 | int i=0; | ||
457 | |||
458 | /* | ||
459 | * If there are no free descriptors, allocate a new chunk | ||
460 | * of them and put it at the front of the chunk list. | ||
461 | */ | ||
462 | if (tp->t_busy_free == 0) { | ||
463 | lbcp = (xfs_log_busy_chunk_t*) | ||
464 | kmem_alloc(sizeof(xfs_log_busy_chunk_t), KM_SLEEP); | ||
465 | ASSERT(lbcp != NULL); | ||
466 | /* | ||
467 | * Initialize the chunk, and then | ||
468 | * claim the first slot in the newly allocated chunk. | ||
469 | */ | ||
470 | XFS_LBC_INIT(lbcp); | ||
471 | XFS_LBC_CLAIM(lbcp, 0); | ||
472 | lbcp->lbc_unused = 1; | ||
473 | lbsp = XFS_LBC_SLOT(lbcp, 0); | ||
474 | |||
475 | /* | ||
476 | * Link in the new chunk and update the free count. | ||
477 | */ | ||
478 | lbcp->lbc_next = tp->t_busy.lbc_next; | ||
479 | tp->t_busy.lbc_next = lbcp; | ||
480 | tp->t_busy_free = XFS_LIC_NUM_SLOTS - 1; | ||
481 | |||
482 | /* | ||
483 | * Initialize the descriptor and the generic portion | ||
484 | * of the log item. | ||
485 | * | ||
486 | * Point the new slot at this item and return it. | ||
487 | * Also point the log item at its currently active | ||
488 | * descriptor and set the item's mount pointer. | ||
489 | */ | ||
490 | lbsp->lbc_ag = ag; | ||
491 | lbsp->lbc_idx = idx; | ||
492 | return lbsp; | ||
493 | } | ||
494 | |||
495 | /* | ||
496 | * Find the free descriptor. It is somewhere in the chunklist | ||
497 | * of descriptors. | ||
498 | */ | ||
499 | lbcp = &tp->t_busy; | ||
500 | while (lbcp != NULL) { | ||
501 | if (XFS_LBC_VACANCY(lbcp)) { | ||
502 | if (lbcp->lbc_unused <= XFS_LBC_MAX_SLOT) { | ||
503 | i = lbcp->lbc_unused; | ||
504 | break; | ||
505 | } else { | ||
506 | /* out-of-order vacancy */ | ||
507 | cmn_err(CE_DEBUG, "OOO vacancy lbcp 0x%p\n", lbcp); | ||
508 | ASSERT(0); | ||
509 | } | ||
510 | } | ||
511 | lbcp = lbcp->lbc_next; | ||
512 | } | ||
513 | ASSERT(lbcp != NULL); | ||
514 | /* | ||
515 | * If we find a free descriptor, claim it, | ||
516 | * initialize it, and return it. | ||
517 | */ | ||
518 | XFS_LBC_CLAIM(lbcp, i); | ||
519 | if (lbcp->lbc_unused <= i) { | ||
520 | lbcp->lbc_unused = i + 1; | ||
521 | } | ||
522 | lbsp = XFS_LBC_SLOT(lbcp, i); | ||
523 | tp->t_busy_free--; | ||
524 | lbsp->lbc_ag = ag; | ||
525 | lbsp->lbc_idx = idx; | ||
526 | return lbsp; | ||
527 | } | ||
528 | |||
529 | |||
530 | /* | ||
531 | * xfs_trans_free_busy | ||
532 | * Free all of the busy lists from a transaction | ||
533 | */ | ||
534 | void | ||
535 | xfs_trans_free_busy(xfs_trans_t *tp) | ||
536 | { | ||
537 | xfs_log_busy_chunk_t *lbcp; | ||
538 | xfs_log_busy_chunk_t *lbcq; | ||
539 | |||
540 | lbcp = tp->t_busy.lbc_next; | ||
541 | while (lbcp != NULL) { | ||
542 | lbcq = lbcp->lbc_next; | ||
543 | kmem_free(lbcp); | ||
544 | lbcp = lbcq; | ||
545 | } | ||
546 | |||
547 | XFS_LBC_INIT(&tp->t_busy); | ||
548 | tp->t_busy.lbc_unused = 0; | ||
549 | } | ||
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 73e2ad397432..c6e4f2c8de6e 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -35,13 +35,14 @@ struct xfs_log_item_desc *xfs_trans_find_item(struct xfs_trans *, | |||
35 | struct xfs_log_item_desc *xfs_trans_first_item(struct xfs_trans *); | 35 | struct xfs_log_item_desc *xfs_trans_first_item(struct xfs_trans *); |
36 | struct xfs_log_item_desc *xfs_trans_next_item(struct xfs_trans *, | 36 | struct xfs_log_item_desc *xfs_trans_next_item(struct xfs_trans *, |
37 | struct xfs_log_item_desc *); | 37 | struct xfs_log_item_desc *); |
38 | void xfs_trans_free_items(struct xfs_trans *, int); | 38 | |
39 | void xfs_trans_unlock_items(struct xfs_trans *, | 39 | void xfs_trans_unlock_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn); |
40 | xfs_lsn_t); | 40 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, |
41 | void xfs_trans_free_busy(xfs_trans_t *tp); | 41 | int flags); |
42 | xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, | 42 | |
43 | xfs_agnumber_t ag, | 43 | void xfs_trans_item_committed(struct xfs_log_item *lip, |
44 | xfs_extlen_t idx); | 44 | xfs_lsn_t commit_lsn, int aborted); |
45 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); | ||
45 | 46 | ||
46 | /* | 47 | /* |
47 | * AIL traversal cursor. | 48 | * AIL traversal cursor. |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index b09904555d07..320775295e32 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
@@ -75,6 +75,8 @@ typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ | |||
75 | 75 | ||
76 | typedef __uint16_t xfs_prid_t; /* prid_t truncated to 16bits in XFS */ | 76 | typedef __uint16_t xfs_prid_t; /* prid_t truncated to 16bits in XFS */ |
77 | 77 | ||
78 | typedef __uint32_t xlog_tid_t; /* transaction ID type */ | ||
79 | |||
78 | /* | 80 | /* |
79 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. | 81 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. |
80 | * Disk based types: | 82 | * Disk based types: |