diff options
Diffstat (limited to 'fs')
229 files changed, 8641 insertions, 6891 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 10b7d3c9dba8..8c92a9ba8330 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -259,7 +259,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
259 | if (v9fs_proto_dotl(v9ses)) { | 259 | if (v9fs_proto_dotl(v9ses)) { |
260 | res = p9_client_statfs(fid, &rs); | 260 | res = p9_client_statfs(fid, &rs); |
261 | if (res == 0) { | 261 | if (res == 0) { |
262 | buf->f_type = V9FS_MAGIC; | 262 | buf->f_type = rs.type; |
263 | buf->f_bsize = rs.bsize; | 263 | buf->f_bsize = rs.bsize; |
264 | buf->f_blocks = rs.blocks; | 264 | buf->f_blocks = rs.blocks; |
265 | buf->f_bfree = rs.bfree; | 265 | buf->f_bfree = rs.bfree; |
@@ -13,7 +13,7 @@ | |||
13 | #include <linux/errno.h> | 13 | #include <linux/errno.h> |
14 | #include <linux/time.h> | 14 | #include <linux/time.h> |
15 | #include <linux/aio_abi.h> | 15 | #include <linux/aio_abi.h> |
16 | #include <linux/module.h> | 16 | #include <linux/export.h> |
17 | #include <linux/syscalls.h> | 17 | #include <linux/syscalls.h> |
18 | #include <linux/backing-dev.h> | 18 | #include <linux/backing-dev.h> |
19 | #include <linux/uio.h> | 19 | #include <linux/uio.h> |
@@ -5,7 +5,7 @@ | |||
5 | * changes by Thomas Schoebel-Theuer | 5 | * changes by Thomas Schoebel-Theuer |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | 8 | #include <linux/export.h> |
9 | #include <linux/time.h> | 9 | #include <linux/time.h> |
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 22e9a78872ff..37268c5bb98b 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c | |||
@@ -9,7 +9,7 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/fs.h> | 11 | #include <linux/fs.h> |
12 | #include <linux/module.h> | 12 | #include <linux/export.h> |
13 | #include <linux/stat.h> | 13 | #include <linux/stat.h> |
14 | #include <linux/time.h> | 14 | #include <linux/time.h> |
15 | #include <linux/namei.h> | 15 | #include <linux/namei.h> |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 18276531f7c6..7d7ff206cdcb 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -1094,6 +1094,29 @@ out: | |||
1094 | */ | 1094 | */ |
1095 | 1095 | ||
1096 | /* | 1096 | /* |
1097 | * The purpose of always_dump_vma() is to make sure that special kernel mappings | ||
1098 | * that are useful for post-mortem analysis are included in every core dump. | ||
1099 | * In that way we ensure that the core dump is fully interpretable later | ||
1100 | * without matching up the same kernel and hardware config to see what PC values | ||
1101 | * meant. These special mappings include - vDSO, vsyscall, and other | ||
1102 | * architecture specific mappings | ||
1103 | */ | ||
1104 | static bool always_dump_vma(struct vm_area_struct *vma) | ||
1105 | { | ||
1106 | /* Any vsyscall mappings? */ | ||
1107 | if (vma == get_gate_vma(vma->vm_mm)) | ||
1108 | return true; | ||
1109 | /* | ||
1110 | * arch_vma_name() returns non-NULL for special architecture mappings, | ||
1111 | * such as vDSO sections. | ||
1112 | */ | ||
1113 | if (arch_vma_name(vma)) | ||
1114 | return true; | ||
1115 | |||
1116 | return false; | ||
1117 | } | ||
1118 | |||
1119 | /* | ||
1097 | * Decide what to dump of a segment, part, all or none. | 1120 | * Decide what to dump of a segment, part, all or none. |
1098 | */ | 1121 | */ |
1099 | static unsigned long vma_dump_size(struct vm_area_struct *vma, | 1122 | static unsigned long vma_dump_size(struct vm_area_struct *vma, |
@@ -1101,10 +1124,13 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, | |||
1101 | { | 1124 | { |
1102 | #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) | 1125 | #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) |
1103 | 1126 | ||
1104 | /* The vma can be set up to tell us the answer directly. */ | 1127 | /* always dump the vdso and vsyscall sections */ |
1105 | if (vma->vm_flags & VM_ALWAYSDUMP) | 1128 | if (always_dump_vma(vma)) |
1106 | goto whole; | 1129 | goto whole; |
1107 | 1130 | ||
1131 | if (vma->vm_flags & VM_NODUMP) | ||
1132 | return 0; | ||
1133 | |||
1108 | /* Hugetlb memory check */ | 1134 | /* Hugetlb memory check */ |
1109 | if (vma->vm_flags & VM_HUGETLB) { | 1135 | if (vma->vm_flags & VM_HUGETLB) { |
1110 | if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) | 1136 | if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 4e4017c08887..024d20ee3ca3 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -15,7 +15,7 @@ | |||
15 | * JAN/99 -- coded full program relocation (gerg@snapgear.com) | 15 | * JAN/99 -- coded full program relocation (gerg@snapgear.com) |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/module.h> | 18 | #include <linux/export.h> |
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 1ffb60355cae..613aa0618235 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/magic.h> | ||
22 | #include <linux/binfmts.h> | 23 | #include <linux/binfmts.h> |
23 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
24 | #include <linux/ctype.h> | 25 | #include <linux/ctype.h> |
@@ -699,7 +700,7 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent) | |||
699 | [3] = {"register", &bm_register_operations, S_IWUSR}, | 700 | [3] = {"register", &bm_register_operations, S_IWUSR}, |
700 | /* last one */ {""} | 701 | /* last one */ {""} |
701 | }; | 702 | }; |
702 | int err = simple_fill_super(sb, 0x42494e4d, bm_files); | 703 | int err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files); |
703 | if (!err) | 704 | if (!err) |
704 | sb->s_op = &s_ops; | 705 | sb->s_op = &s_ops; |
705 | return err; | 706 | return err; |
@@ -22,7 +22,7 @@ | |||
22 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | #include <linux/init.h> | 23 | #include <linux/init.h> |
24 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
25 | #include <linux/module.h> | 25 | #include <linux/export.h> |
26 | #include <linux/mempool.h> | 26 | #include <linux/mempool.h> |
27 | #include <linux/workqueue.h> | 27 | #include <linux/workqueue.h> |
28 | #include <scsi/sg.h> /* for struct sg_iovec */ | 28 | #include <scsi/sg.h> /* for struct sg_iovec */ |
diff --git a/fs/block_dev.c b/fs/block_dev.c index a9ff3000b83d..e08f6a20a5bb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/blkdev.h> | 16 | #include <linux/blkdev.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/blkpg.h> | 18 | #include <linux/blkpg.h> |
19 | #include <linux/magic.h> | ||
19 | #include <linux/buffer_head.h> | 20 | #include <linux/buffer_head.h> |
20 | #include <linux/swap.h> | 21 | #include <linux/swap.h> |
21 | #include <linux/pagevec.h> | 22 | #include <linux/pagevec.h> |
@@ -506,7 +507,7 @@ static const struct super_operations bdev_sops = { | |||
506 | static struct dentry *bd_mount(struct file_system_type *fs_type, | 507 | static struct dentry *bd_mount(struct file_system_type *fs_type, |
507 | int flags, const char *dev_name, void *data) | 508 | int flags, const char *dev_name, void *data) |
508 | { | 509 | { |
509 | return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576); | 510 | return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC); |
510 | } | 511 | } |
511 | 512 | ||
512 | static struct file_system_type bd_type = { | 513 | static struct file_system_type bd_type = { |
diff --git a/fs/buffer.c b/fs/buffer.c index 1a30db77af32..70e2017edd70 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -29,7 +29,7 @@ | |||
29 | #include <linux/file.h> | 29 | #include <linux/file.h> |
30 | #include <linux/quotaops.h> | 30 | #include <linux/quotaops.h> |
31 | #include <linux/highmem.h> | 31 | #include <linux/highmem.h> |
32 | #include <linux/module.h> | 32 | #include <linux/export.h> |
33 | #include <linux/writeback.h> | 33 | #include <linux/writeback.h> |
34 | #include <linux/hash.h> | 34 | #include <linux/hash.h> |
35 | #include <linux/suspend.h> | 35 | #include <linux/suspend.h> |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 2c489378b4cd..9fff9f3b17e4 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -677,18 +677,19 @@ static int fill_inode(struct inode *inode, | |||
677 | case S_IFLNK: | 677 | case S_IFLNK: |
678 | inode->i_op = &ceph_symlink_iops; | 678 | inode->i_op = &ceph_symlink_iops; |
679 | if (!ci->i_symlink) { | 679 | if (!ci->i_symlink) { |
680 | int symlen = iinfo->symlink_len; | 680 | u32 symlen = iinfo->symlink_len; |
681 | char *sym; | 681 | char *sym; |
682 | 682 | ||
683 | BUG_ON(symlen != inode->i_size); | ||
684 | spin_unlock(&ci->i_ceph_lock); | 683 | spin_unlock(&ci->i_ceph_lock); |
685 | 684 | ||
685 | err = -EINVAL; | ||
686 | if (WARN_ON(symlen != inode->i_size)) | ||
687 | goto out; | ||
688 | |||
686 | err = -ENOMEM; | 689 | err = -ENOMEM; |
687 | sym = kmalloc(symlen+1, GFP_NOFS); | 690 | sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS); |
688 | if (!sym) | 691 | if (!sym) |
689 | goto out; | 692 | goto out; |
690 | memcpy(sym, iinfo->symlink, symlen); | ||
691 | sym[symlen] = 0; | ||
692 | 693 | ||
693 | spin_lock(&ci->i_ceph_lock); | 694 | spin_lock(&ci->i_ceph_lock); |
694 | if (!ci->i_symlink) | 695 | if (!ci->i_symlink) |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 866e8d7ca37d..89971e137aab 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -402,7 +402,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
402 | 402 | ||
403 | spin_lock_init(&s->s_gen_ttl_lock); | 403 | spin_lock_init(&s->s_gen_ttl_lock); |
404 | s->s_cap_gen = 0; | 404 | s->s_cap_gen = 0; |
405 | s->s_cap_ttl = 0; | 405 | s->s_cap_ttl = jiffies - 1; |
406 | 406 | ||
407 | spin_lock_init(&s->s_cap_lock); | 407 | spin_lock_init(&s->s_cap_lock); |
408 | s->s_renew_requested = 0; | 408 | s->s_renew_requested = 0; |
@@ -1083,8 +1083,7 @@ static void renewed_caps(struct ceph_mds_client *mdsc, | |||
1083 | int wake = 0; | 1083 | int wake = 0; |
1084 | 1084 | ||
1085 | spin_lock(&session->s_cap_lock); | 1085 | spin_lock(&session->s_cap_lock); |
1086 | was_stale = is_renew && (session->s_cap_ttl == 0 || | 1086 | was_stale = is_renew && time_after_eq(jiffies, session->s_cap_ttl); |
1087 | time_after_eq(jiffies, session->s_cap_ttl)); | ||
1088 | 1087 | ||
1089 | session->s_cap_ttl = session->s_renew_requested + | 1088 | session->s_cap_ttl = session->s_renew_requested + |
1090 | mdsc->mdsmap->m_session_timeout*HZ; | 1089 | mdsc->mdsmap->m_session_timeout*HZ; |
@@ -2332,7 +2331,7 @@ static void handle_session(struct ceph_mds_session *session, | |||
2332 | session->s_mds); | 2331 | session->s_mds); |
2333 | spin_lock(&session->s_gen_ttl_lock); | 2332 | spin_lock(&session->s_gen_ttl_lock); |
2334 | session->s_cap_gen++; | 2333 | session->s_cap_gen++; |
2335 | session->s_cap_ttl = 0; | 2334 | session->s_cap_ttl = jiffies - 1; |
2336 | spin_unlock(&session->s_gen_ttl_lock); | 2335 | spin_unlock(&session->s_gen_ttl_lock); |
2337 | send_renew_caps(mdsc, session); | 2336 | send_renew_caps(mdsc, session); |
2338 | break; | 2337 | break; |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index a559c80f127a..f04c0961f993 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -331,7 +331,7 @@ static int build_snap_context(struct ceph_snap_realm *realm) | |||
331 | 331 | ||
332 | /* alloc new snap context */ | 332 | /* alloc new snap context */ |
333 | err = -ENOMEM; | 333 | err = -ENOMEM; |
334 | if (num > ULONG_MAX / sizeof(u64) - sizeof(*snapc)) | 334 | if (num > (ULONG_MAX - sizeof(*snapc)) / sizeof(u64)) |
335 | goto fail; | 335 | goto fail; |
336 | snapc = kzalloc(sizeof(*snapc) + num*sizeof(u64), GFP_NOFS); | 336 | snapc = kzalloc(sizeof(*snapc) + num*sizeof(u64), GFP_NOFS); |
337 | if (!snapc) | 337 | if (!snapc) |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 256f85221926..1e67dd7305a4 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -130,10 +130,12 @@ enum { | |||
130 | Opt_nodirstat, | 130 | Opt_nodirstat, |
131 | Opt_rbytes, | 131 | Opt_rbytes, |
132 | Opt_norbytes, | 132 | Opt_norbytes, |
133 | Opt_asyncreaddir, | ||
133 | Opt_noasyncreaddir, | 134 | Opt_noasyncreaddir, |
134 | Opt_dcache, | 135 | Opt_dcache, |
135 | Opt_nodcache, | 136 | Opt_nodcache, |
136 | Opt_ino32, | 137 | Opt_ino32, |
138 | Opt_noino32, | ||
137 | }; | 139 | }; |
138 | 140 | ||
139 | static match_table_t fsopt_tokens = { | 141 | static match_table_t fsopt_tokens = { |
@@ -153,10 +155,12 @@ static match_table_t fsopt_tokens = { | |||
153 | {Opt_nodirstat, "nodirstat"}, | 155 | {Opt_nodirstat, "nodirstat"}, |
154 | {Opt_rbytes, "rbytes"}, | 156 | {Opt_rbytes, "rbytes"}, |
155 | {Opt_norbytes, "norbytes"}, | 157 | {Opt_norbytes, "norbytes"}, |
158 | {Opt_asyncreaddir, "asyncreaddir"}, | ||
156 | {Opt_noasyncreaddir, "noasyncreaddir"}, | 159 | {Opt_noasyncreaddir, "noasyncreaddir"}, |
157 | {Opt_dcache, "dcache"}, | 160 | {Opt_dcache, "dcache"}, |
158 | {Opt_nodcache, "nodcache"}, | 161 | {Opt_nodcache, "nodcache"}, |
159 | {Opt_ino32, "ino32"}, | 162 | {Opt_ino32, "ino32"}, |
163 | {Opt_noino32, "noino32"}, | ||
160 | {-1, NULL} | 164 | {-1, NULL} |
161 | }; | 165 | }; |
162 | 166 | ||
@@ -232,6 +236,9 @@ static int parse_fsopt_token(char *c, void *private) | |||
232 | case Opt_norbytes: | 236 | case Opt_norbytes: |
233 | fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; | 237 | fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; |
234 | break; | 238 | break; |
239 | case Opt_asyncreaddir: | ||
240 | fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; | ||
241 | break; | ||
235 | case Opt_noasyncreaddir: | 242 | case Opt_noasyncreaddir: |
236 | fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; | 243 | fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; |
237 | break; | 244 | break; |
@@ -244,6 +251,9 @@ static int parse_fsopt_token(char *c, void *private) | |||
244 | case Opt_ino32: | 251 | case Opt_ino32: |
245 | fsopt->flags |= CEPH_MOUNT_OPT_INO32; | 252 | fsopt->flags |= CEPH_MOUNT_OPT_INO32; |
246 | break; | 253 | break; |
254 | case Opt_noino32: | ||
255 | fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; | ||
256 | break; | ||
247 | default: | 257 | default: |
248 | BUG_ON(token); | 258 | BUG_ON(token); |
249 | } | 259 | } |
@@ -334,10 +344,12 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, | |||
334 | *path += 2; | 344 | *path += 2; |
335 | dout("server path '%s'\n", *path); | 345 | dout("server path '%s'\n", *path); |
336 | 346 | ||
337 | err = ceph_parse_options(popt, options, dev_name, dev_name_end, | 347 | *popt = ceph_parse_options(options, dev_name, dev_name_end, |
338 | parse_fsopt_token, (void *)fsopt); | 348 | parse_fsopt_token, (void *)fsopt); |
339 | if (err) | 349 | if (IS_ERR(*popt)) { |
350 | err = PTR_ERR(*popt); | ||
340 | goto out; | 351 | goto out; |
352 | } | ||
341 | 353 | ||
342 | /* success */ | 354 | /* success */ |
343 | *pfsopt = fsopt; | 355 | *pfsopt = fsopt; |
@@ -926,6 +938,7 @@ static int __init init_ceph(void) | |||
926 | if (ret) | 938 | if (ret) |
927 | goto out; | 939 | goto out; |
928 | 940 | ||
941 | ceph_xattr_init(); | ||
929 | ret = register_filesystem(&ceph_fs_type); | 942 | ret = register_filesystem(&ceph_fs_type); |
930 | if (ret) | 943 | if (ret) |
931 | goto out_icache; | 944 | goto out_icache; |
@@ -935,6 +948,7 @@ static int __init init_ceph(void) | |||
935 | return 0; | 948 | return 0; |
936 | 949 | ||
937 | out_icache: | 950 | out_icache: |
951 | ceph_xattr_exit(); | ||
938 | destroy_caches(); | 952 | destroy_caches(); |
939 | out: | 953 | out: |
940 | return ret; | 954 | return ret; |
@@ -944,6 +958,7 @@ static void __exit exit_ceph(void) | |||
944 | { | 958 | { |
945 | dout("exit_ceph\n"); | 959 | dout("exit_ceph\n"); |
946 | unregister_filesystem(&ceph_fs_type); | 960 | unregister_filesystem(&ceph_fs_type); |
961 | ceph_xattr_exit(); | ||
947 | destroy_caches(); | 962 | destroy_caches(); |
948 | } | 963 | } |
949 | 964 | ||
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 1421f3d875a2..fc35036d258d 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -367,7 +367,7 @@ static inline u32 ceph_ino_to_ino32(__u64 vino) | |||
367 | u32 ino = vino & 0xffffffff; | 367 | u32 ino = vino & 0xffffffff; |
368 | ino ^= vino >> 32; | 368 | ino ^= vino >> 32; |
369 | if (!ino) | 369 | if (!ino) |
370 | ino = 1; | 370 | ino = 2; |
371 | return ino; | 371 | return ino; |
372 | } | 372 | } |
373 | 373 | ||
@@ -733,6 +733,8 @@ extern ssize_t ceph_listxattr(struct dentry *, char *, size_t); | |||
733 | extern int ceph_removexattr(struct dentry *, const char *); | 733 | extern int ceph_removexattr(struct dentry *, const char *); |
734 | extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci); | 734 | extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci); |
735 | extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci); | 735 | extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci); |
736 | extern void __init ceph_xattr_init(void); | ||
737 | extern void ceph_xattr_exit(void); | ||
736 | 738 | ||
737 | /* caps.c */ | 739 | /* caps.c */ |
738 | extern const char *ceph_cap_string(int c); | 740 | extern const char *ceph_cap_string(int c); |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index a76f697303d9..35b86331d8a5 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -8,9 +8,12 @@ | |||
8 | #include <linux/xattr.h> | 8 | #include <linux/xattr.h> |
9 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
10 | 10 | ||
11 | #define XATTR_CEPH_PREFIX "ceph." | ||
12 | #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1) | ||
13 | |||
11 | static bool ceph_is_valid_xattr(const char *name) | 14 | static bool ceph_is_valid_xattr(const char *name) |
12 | { | 15 | { |
13 | return !strncmp(name, "ceph.", 5) || | 16 | return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) || |
14 | !strncmp(name, XATTR_SECURITY_PREFIX, | 17 | !strncmp(name, XATTR_SECURITY_PREFIX, |
15 | XATTR_SECURITY_PREFIX_LEN) || | 18 | XATTR_SECURITY_PREFIX_LEN) || |
16 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || | 19 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || |
@@ -21,79 +24,91 @@ static bool ceph_is_valid_xattr(const char *name) | |||
21 | * These define virtual xattrs exposing the recursive directory | 24 | * These define virtual xattrs exposing the recursive directory |
22 | * statistics and layout metadata. | 25 | * statistics and layout metadata. |
23 | */ | 26 | */ |
24 | struct ceph_vxattr_cb { | 27 | struct ceph_vxattr { |
25 | bool readonly; | ||
26 | char *name; | 28 | char *name; |
29 | size_t name_size; /* strlen(name) + 1 (for '\0') */ | ||
27 | size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, | 30 | size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, |
28 | size_t size); | 31 | size_t size); |
32 | bool readonly; | ||
29 | }; | 33 | }; |
30 | 34 | ||
31 | /* directories */ | 35 | /* directories */ |
32 | 36 | ||
33 | static size_t ceph_vxattrcb_entries(struct ceph_inode_info *ci, char *val, | 37 | static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, |
34 | size_t size) | 38 | size_t size) |
35 | { | 39 | { |
36 | return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs); | 40 | return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs); |
37 | } | 41 | } |
38 | 42 | ||
39 | static size_t ceph_vxattrcb_files(struct ceph_inode_info *ci, char *val, | 43 | static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val, |
40 | size_t size) | 44 | size_t size) |
41 | { | 45 | { |
42 | return snprintf(val, size, "%lld", ci->i_files); | 46 | return snprintf(val, size, "%lld", ci->i_files); |
43 | } | 47 | } |
44 | 48 | ||
45 | static size_t ceph_vxattrcb_subdirs(struct ceph_inode_info *ci, char *val, | 49 | static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val, |
46 | size_t size) | 50 | size_t size) |
47 | { | 51 | { |
48 | return snprintf(val, size, "%lld", ci->i_subdirs); | 52 | return snprintf(val, size, "%lld", ci->i_subdirs); |
49 | } | 53 | } |
50 | 54 | ||
51 | static size_t ceph_vxattrcb_rentries(struct ceph_inode_info *ci, char *val, | 55 | static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val, |
52 | size_t size) | 56 | size_t size) |
53 | { | 57 | { |
54 | return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs); | 58 | return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs); |
55 | } | 59 | } |
56 | 60 | ||
57 | static size_t ceph_vxattrcb_rfiles(struct ceph_inode_info *ci, char *val, | 61 | static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val, |
58 | size_t size) | 62 | size_t size) |
59 | { | 63 | { |
60 | return snprintf(val, size, "%lld", ci->i_rfiles); | 64 | return snprintf(val, size, "%lld", ci->i_rfiles); |
61 | } | 65 | } |
62 | 66 | ||
63 | static size_t ceph_vxattrcb_rsubdirs(struct ceph_inode_info *ci, char *val, | 67 | static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val, |
64 | size_t size) | 68 | size_t size) |
65 | { | 69 | { |
66 | return snprintf(val, size, "%lld", ci->i_rsubdirs); | 70 | return snprintf(val, size, "%lld", ci->i_rsubdirs); |
67 | } | 71 | } |
68 | 72 | ||
69 | static size_t ceph_vxattrcb_rbytes(struct ceph_inode_info *ci, char *val, | 73 | static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val, |
70 | size_t size) | 74 | size_t size) |
71 | { | 75 | { |
72 | return snprintf(val, size, "%lld", ci->i_rbytes); | 76 | return snprintf(val, size, "%lld", ci->i_rbytes); |
73 | } | 77 | } |
74 | 78 | ||
75 | static size_t ceph_vxattrcb_rctime(struct ceph_inode_info *ci, char *val, | 79 | static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, |
76 | size_t size) | 80 | size_t size) |
77 | { | 81 | { |
78 | return snprintf(val, size, "%ld.%ld", (long)ci->i_rctime.tv_sec, | 82 | return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec, |
79 | (long)ci->i_rctime.tv_nsec); | 83 | (long)ci->i_rctime.tv_nsec); |
80 | } | 84 | } |
81 | 85 | ||
82 | static struct ceph_vxattr_cb ceph_dir_vxattrs[] = { | 86 | #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name |
83 | { true, "ceph.dir.entries", ceph_vxattrcb_entries}, | 87 | |
84 | { true, "ceph.dir.files", ceph_vxattrcb_files}, | 88 | #define XATTR_NAME_CEPH(_type, _name) \ |
85 | { true, "ceph.dir.subdirs", ceph_vxattrcb_subdirs}, | 89 | { \ |
86 | { true, "ceph.dir.rentries", ceph_vxattrcb_rentries}, | 90 | .name = CEPH_XATTR_NAME(_type, _name), \ |
87 | { true, "ceph.dir.rfiles", ceph_vxattrcb_rfiles}, | 91 | .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ |
88 | { true, "ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs}, | 92 | .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ |
89 | { true, "ceph.dir.rbytes", ceph_vxattrcb_rbytes}, | 93 | .readonly = true, \ |
90 | { true, "ceph.dir.rctime", ceph_vxattrcb_rctime}, | 94 | } |
91 | { true, NULL, NULL } | 95 | |
96 | static struct ceph_vxattr ceph_dir_vxattrs[] = { | ||
97 | XATTR_NAME_CEPH(dir, entries), | ||
98 | XATTR_NAME_CEPH(dir, files), | ||
99 | XATTR_NAME_CEPH(dir, subdirs), | ||
100 | XATTR_NAME_CEPH(dir, rentries), | ||
101 | XATTR_NAME_CEPH(dir, rfiles), | ||
102 | XATTR_NAME_CEPH(dir, rsubdirs), | ||
103 | XATTR_NAME_CEPH(dir, rbytes), | ||
104 | XATTR_NAME_CEPH(dir, rctime), | ||
105 | { 0 } /* Required table terminator */ | ||
92 | }; | 106 | }; |
107 | static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ | ||
93 | 108 | ||
94 | /* files */ | 109 | /* files */ |
95 | 110 | ||
96 | static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, | 111 | static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val, |
97 | size_t size) | 112 | size_t size) |
98 | { | 113 | { |
99 | int ret; | 114 | int ret; |
@@ -103,21 +118,32 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, | |||
103 | (unsigned long long)ceph_file_layout_su(ci->i_layout), | 118 | (unsigned long long)ceph_file_layout_su(ci->i_layout), |
104 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), | 119 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), |
105 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); | 120 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); |
106 | if (ceph_file_layout_pg_preferred(ci->i_layout)) | 121 | |
107 | ret += snprintf(val + ret, size, "preferred_osd=%lld\n", | 122 | if (ceph_file_layout_pg_preferred(ci->i_layout) >= 0) { |
123 | val += ret; | ||
124 | size -= ret; | ||
125 | ret += snprintf(val, size, "preferred_osd=%lld\n", | ||
108 | (unsigned long long)ceph_file_layout_pg_preferred( | 126 | (unsigned long long)ceph_file_layout_pg_preferred( |
109 | ci->i_layout)); | 127 | ci->i_layout)); |
128 | } | ||
129 | |||
110 | return ret; | 130 | return ret; |
111 | } | 131 | } |
112 | 132 | ||
113 | static struct ceph_vxattr_cb ceph_file_vxattrs[] = { | 133 | static struct ceph_vxattr ceph_file_vxattrs[] = { |
114 | { true, "ceph.file.layout", ceph_vxattrcb_layout}, | 134 | XATTR_NAME_CEPH(file, layout), |
115 | /* The following extended attribute name is deprecated */ | 135 | /* The following extended attribute name is deprecated */ |
116 | { true, "ceph.layout", ceph_vxattrcb_layout}, | 136 | { |
117 | { true, NULL, NULL } | 137 | .name = XATTR_CEPH_PREFIX "layout", |
138 | .name_size = sizeof (XATTR_CEPH_PREFIX "layout"), | ||
139 | .getxattr_cb = ceph_vxattrcb_file_layout, | ||
140 | .readonly = true, | ||
141 | }, | ||
142 | { 0 } /* Required table terminator */ | ||
118 | }; | 143 | }; |
144 | static size_t ceph_file_vxattrs_name_size; /* total size of all names */ | ||
119 | 145 | ||
120 | static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode) | 146 | static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode) |
121 | { | 147 | { |
122 | if (S_ISDIR(inode->i_mode)) | 148 | if (S_ISDIR(inode->i_mode)) |
123 | return ceph_dir_vxattrs; | 149 | return ceph_dir_vxattrs; |
@@ -126,14 +152,59 @@ static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode) | |||
126 | return NULL; | 152 | return NULL; |
127 | } | 153 | } |
128 | 154 | ||
129 | static struct ceph_vxattr_cb *ceph_match_vxattr(struct ceph_vxattr_cb *vxattr, | 155 | static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs) |
156 | { | ||
157 | if (vxattrs == ceph_dir_vxattrs) | ||
158 | return ceph_dir_vxattrs_name_size; | ||
159 | if (vxattrs == ceph_file_vxattrs) | ||
160 | return ceph_file_vxattrs_name_size; | ||
161 | BUG(); | ||
162 | |||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * Compute the aggregate size (including terminating '\0') of all | ||
168 | * virtual extended attribute names in the given vxattr table. | ||
169 | */ | ||
170 | static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs) | ||
171 | { | ||
172 | struct ceph_vxattr *vxattr; | ||
173 | size_t size = 0; | ||
174 | |||
175 | for (vxattr = vxattrs; vxattr->name; vxattr++) | ||
176 | size += vxattr->name_size; | ||
177 | |||
178 | return size; | ||
179 | } | ||
180 | |||
181 | /* Routines called at initialization and exit time */ | ||
182 | |||
183 | void __init ceph_xattr_init(void) | ||
184 | { | ||
185 | ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs); | ||
186 | ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs); | ||
187 | } | ||
188 | |||
189 | void ceph_xattr_exit(void) | ||
190 | { | ||
191 | ceph_dir_vxattrs_name_size = 0; | ||
192 | ceph_file_vxattrs_name_size = 0; | ||
193 | } | ||
194 | |||
195 | static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode, | ||
130 | const char *name) | 196 | const char *name) |
131 | { | 197 | { |
132 | do { | 198 | struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode); |
133 | if (strcmp(vxattr->name, name) == 0) | 199 | |
134 | return vxattr; | 200 | if (vxattr) { |
135 | vxattr++; | 201 | while (vxattr->name) { |
136 | } while (vxattr->name); | 202 | if (!strcmp(vxattr->name, name)) |
203 | return vxattr; | ||
204 | vxattr++; | ||
205 | } | ||
206 | } | ||
207 | |||
137 | return NULL; | 208 | return NULL; |
138 | } | 209 | } |
139 | 210 | ||
@@ -502,17 +573,15 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, | |||
502 | { | 573 | { |
503 | struct inode *inode = dentry->d_inode; | 574 | struct inode *inode = dentry->d_inode; |
504 | struct ceph_inode_info *ci = ceph_inode(inode); | 575 | struct ceph_inode_info *ci = ceph_inode(inode); |
505 | struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); | ||
506 | int err; | 576 | int err; |
507 | struct ceph_inode_xattr *xattr; | 577 | struct ceph_inode_xattr *xattr; |
508 | struct ceph_vxattr_cb *vxattr = NULL; | 578 | struct ceph_vxattr *vxattr = NULL; |
509 | 579 | ||
510 | if (!ceph_is_valid_xattr(name)) | 580 | if (!ceph_is_valid_xattr(name)) |
511 | return -ENODATA; | 581 | return -ENODATA; |
512 | 582 | ||
513 | /* let's see if a virtual xattr was requested */ | 583 | /* let's see if a virtual xattr was requested */ |
514 | if (vxattrs) | 584 | vxattr = ceph_match_vxattr(inode, name); |
515 | vxattr = ceph_match_vxattr(vxattrs, name); | ||
516 | 585 | ||
517 | spin_lock(&ci->i_ceph_lock); | 586 | spin_lock(&ci->i_ceph_lock); |
518 | dout("getxattr %p ver=%lld index_ver=%lld\n", inode, | 587 | dout("getxattr %p ver=%lld index_ver=%lld\n", inode, |
@@ -568,7 +637,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) | |||
568 | { | 637 | { |
569 | struct inode *inode = dentry->d_inode; | 638 | struct inode *inode = dentry->d_inode; |
570 | struct ceph_inode_info *ci = ceph_inode(inode); | 639 | struct ceph_inode_info *ci = ceph_inode(inode); |
571 | struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); | 640 | struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode); |
572 | u32 vir_namelen = 0; | 641 | u32 vir_namelen = 0; |
573 | u32 namelen; | 642 | u32 namelen; |
574 | int err; | 643 | int err; |
@@ -596,11 +665,12 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) | |||
596 | goto out; | 665 | goto out; |
597 | 666 | ||
598 | list_xattr: | 667 | list_xattr: |
599 | vir_namelen = 0; | 668 | /* |
600 | /* include virtual dir xattrs */ | 669 | * Start with virtual dir xattr names (if any) (including |
601 | if (vxattrs) | 670 | * terminating '\0' characters for each). |
602 | for (i = 0; vxattrs[i].name; i++) | 671 | */ |
603 | vir_namelen += strlen(vxattrs[i].name) + 1; | 672 | vir_namelen = ceph_vxattrs_name_size(vxattrs); |
673 | |||
604 | /* adding 1 byte per each variable due to the null termination */ | 674 | /* adding 1 byte per each variable due to the null termination */ |
605 | namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count; | 675 | namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count; |
606 | err = -ERANGE; | 676 | err = -ERANGE; |
@@ -698,17 +768,17 @@ int ceph_setxattr(struct dentry *dentry, const char *name, | |||
698 | const void *value, size_t size, int flags) | 768 | const void *value, size_t size, int flags) |
699 | { | 769 | { |
700 | struct inode *inode = dentry->d_inode; | 770 | struct inode *inode = dentry->d_inode; |
771 | struct ceph_vxattr *vxattr; | ||
701 | struct ceph_inode_info *ci = ceph_inode(inode); | 772 | struct ceph_inode_info *ci = ceph_inode(inode); |
702 | struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); | 773 | int issued; |
703 | int err; | 774 | int err; |
775 | int dirty; | ||
704 | int name_len = strlen(name); | 776 | int name_len = strlen(name); |
705 | int val_len = size; | 777 | int val_len = size; |
706 | char *newname = NULL; | 778 | char *newname = NULL; |
707 | char *newval = NULL; | 779 | char *newval = NULL; |
708 | struct ceph_inode_xattr *xattr = NULL; | 780 | struct ceph_inode_xattr *xattr = NULL; |
709 | int issued; | ||
710 | int required_blob_size; | 781 | int required_blob_size; |
711 | int dirty; | ||
712 | 782 | ||
713 | if (ceph_snap(inode) != CEPH_NOSNAP) | 783 | if (ceph_snap(inode) != CEPH_NOSNAP) |
714 | return -EROFS; | 784 | return -EROFS; |
@@ -716,12 +786,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name, | |||
716 | if (!ceph_is_valid_xattr(name)) | 786 | if (!ceph_is_valid_xattr(name)) |
717 | return -EOPNOTSUPP; | 787 | return -EOPNOTSUPP; |
718 | 788 | ||
719 | if (vxattrs) { | 789 | vxattr = ceph_match_vxattr(inode, name); |
720 | struct ceph_vxattr_cb *vxattr = | 790 | if (vxattr && vxattr->readonly) |
721 | ceph_match_vxattr(vxattrs, name); | 791 | return -EOPNOTSUPP; |
722 | if (vxattr && vxattr->readonly) | ||
723 | return -EOPNOTSUPP; | ||
724 | } | ||
725 | 792 | ||
726 | /* preallocate memory for xattr name, value, index node */ | 793 | /* preallocate memory for xattr name, value, index node */ |
727 | err = -ENOMEM; | 794 | err = -ENOMEM; |
@@ -730,11 +797,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name, | |||
730 | goto out; | 797 | goto out; |
731 | 798 | ||
732 | if (val_len) { | 799 | if (val_len) { |
733 | newval = kmalloc(val_len + 1, GFP_NOFS); | 800 | newval = kmemdup(value, val_len, GFP_NOFS); |
734 | if (!newval) | 801 | if (!newval) |
735 | goto out; | 802 | goto out; |
736 | memcpy(newval, value, val_len); | ||
737 | newval[val_len] = '\0'; | ||
738 | } | 803 | } |
739 | 804 | ||
740 | xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS); | 805 | xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS); |
@@ -744,6 +809,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name, | |||
744 | spin_lock(&ci->i_ceph_lock); | 809 | spin_lock(&ci->i_ceph_lock); |
745 | retry: | 810 | retry: |
746 | issued = __ceph_caps_issued(ci, NULL); | 811 | issued = __ceph_caps_issued(ci, NULL); |
812 | dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); | ||
747 | if (!(issued & CEPH_CAP_XATTR_EXCL)) | 813 | if (!(issued & CEPH_CAP_XATTR_EXCL)) |
748 | goto do_sync; | 814 | goto do_sync; |
749 | __build_xattrs(inode); | 815 | __build_xattrs(inode); |
@@ -752,7 +818,7 @@ retry: | |||
752 | 818 | ||
753 | if (!ci->i_xattrs.prealloc_blob || | 819 | if (!ci->i_xattrs.prealloc_blob || |
754 | required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { | 820 | required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { |
755 | struct ceph_buffer *blob = NULL; | 821 | struct ceph_buffer *blob; |
756 | 822 | ||
757 | spin_unlock(&ci->i_ceph_lock); | 823 | spin_unlock(&ci->i_ceph_lock); |
758 | dout(" preaallocating new blob size=%d\n", required_blob_size); | 824 | dout(" preaallocating new blob size=%d\n", required_blob_size); |
@@ -766,12 +832,13 @@ retry: | |||
766 | goto retry; | 832 | goto retry; |
767 | } | 833 | } |
768 | 834 | ||
769 | dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); | ||
770 | err = __set_xattr(ci, newname, name_len, newval, | 835 | err = __set_xattr(ci, newname, name_len, newval, |
771 | val_len, 1, 1, 1, &xattr); | 836 | val_len, 1, 1, 1, &xattr); |
837 | |||
772 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); | 838 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); |
773 | ci->i_xattrs.dirty = true; | 839 | ci->i_xattrs.dirty = true; |
774 | inode->i_ctime = CURRENT_TIME; | 840 | inode->i_ctime = CURRENT_TIME; |
841 | |||
775 | spin_unlock(&ci->i_ceph_lock); | 842 | spin_unlock(&ci->i_ceph_lock); |
776 | if (dirty) | 843 | if (dirty) |
777 | __mark_inode_dirty(inode, dirty); | 844 | __mark_inode_dirty(inode, dirty); |
@@ -816,8 +883,8 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name) | |||
816 | int ceph_removexattr(struct dentry *dentry, const char *name) | 883 | int ceph_removexattr(struct dentry *dentry, const char *name) |
817 | { | 884 | { |
818 | struct inode *inode = dentry->d_inode; | 885 | struct inode *inode = dentry->d_inode; |
886 | struct ceph_vxattr *vxattr; | ||
819 | struct ceph_inode_info *ci = ceph_inode(inode); | 887 | struct ceph_inode_info *ci = ceph_inode(inode); |
820 | struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); | ||
821 | int issued; | 888 | int issued; |
822 | int err; | 889 | int err; |
823 | int required_blob_size; | 890 | int required_blob_size; |
@@ -829,22 +896,19 @@ int ceph_removexattr(struct dentry *dentry, const char *name) | |||
829 | if (!ceph_is_valid_xattr(name)) | 896 | if (!ceph_is_valid_xattr(name)) |
830 | return -EOPNOTSUPP; | 897 | return -EOPNOTSUPP; |
831 | 898 | ||
832 | if (vxattrs) { | 899 | vxattr = ceph_match_vxattr(inode, name); |
833 | struct ceph_vxattr_cb *vxattr = | 900 | if (vxattr && vxattr->readonly) |
834 | ceph_match_vxattr(vxattrs, name); | 901 | return -EOPNOTSUPP; |
835 | if (vxattr && vxattr->readonly) | ||
836 | return -EOPNOTSUPP; | ||
837 | } | ||
838 | 902 | ||
839 | err = -ENOMEM; | 903 | err = -ENOMEM; |
840 | spin_lock(&ci->i_ceph_lock); | 904 | spin_lock(&ci->i_ceph_lock); |
841 | __build_xattrs(inode); | ||
842 | retry: | 905 | retry: |
843 | issued = __ceph_caps_issued(ci, NULL); | 906 | issued = __ceph_caps_issued(ci, NULL); |
844 | dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); | 907 | dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); |
845 | 908 | ||
846 | if (!(issued & CEPH_CAP_XATTR_EXCL)) | 909 | if (!(issued & CEPH_CAP_XATTR_EXCL)) |
847 | goto do_sync; | 910 | goto do_sync; |
911 | __build_xattrs(inode); | ||
848 | 912 | ||
849 | required_blob_size = __get_required_blob_size(ci, 0, 0); | 913 | required_blob_size = __get_required_blob_size(ci, 0, 0); |
850 | 914 | ||
@@ -865,10 +929,10 @@ retry: | |||
865 | } | 929 | } |
866 | 930 | ||
867 | err = __remove_xattr_by_name(ceph_inode(inode), name); | 931 | err = __remove_xattr_by_name(ceph_inode(inode), name); |
932 | |||
868 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); | 933 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); |
869 | ci->i_xattrs.dirty = true; | 934 | ci->i_xattrs.dirty = true; |
870 | inode->i_ctime = CURRENT_TIME; | 935 | inode->i_ctime = CURRENT_TIME; |
871 | |||
872 | spin_unlock(&ci->i_ceph_lock); | 936 | spin_unlock(&ci->i_ceph_lock); |
873 | if (dirty) | 937 | if (dirty) |
874 | __mark_inode_dirty(inode, dirty); | 938 | __mark_inode_dirty(inode, dirty); |
diff --git a/fs/cifs/README b/fs/cifs/README index 895da1dc1550..b7d782bab797 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -753,10 +753,6 @@ module loading or during the runtime by using the interface | |||
753 | 753 | ||
754 | i.e. echo "value" > /sys/module/cifs/parameters/<param> | 754 | i.e. echo "value" > /sys/module/cifs/parameters/<param> |
755 | 755 | ||
756 | 1. echo_retries - The number of echo attempts before giving up and | 756 | 1. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default. |
757 | reconnecting to the server. The default is 5. The value 0 | ||
758 | means never reconnect. | ||
759 | |||
760 | 2. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default. | ||
761 | [Y/y/1]. To disable use any of [N/n/0]. | 757 | [Y/y/1]. To disable use any of [N/n/0]. |
762 | 758 | ||
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 24b3dfc05282..573b899b5a5d 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c | |||
@@ -171,8 +171,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) | |||
171 | seq_printf(m, "TCP status: %d\n\tLocal Users To " | 171 | seq_printf(m, "TCP status: %d\n\tLocal Users To " |
172 | "Server: %d SecMode: 0x%x Req On Wire: %d", | 172 | "Server: %d SecMode: 0x%x Req On Wire: %d", |
173 | server->tcpStatus, server->srv_count, | 173 | server->tcpStatus, server->srv_count, |
174 | server->sec_mode, | 174 | server->sec_mode, in_flight(server)); |
175 | atomic_read(&server->inFlight)); | ||
176 | 175 | ||
177 | #ifdef CONFIG_CIFS_STATS2 | 176 | #ifdef CONFIG_CIFS_STATS2 |
178 | seq_printf(m, " In Send: %d In MaxReq Wait: %d", | 177 | seq_printf(m, " In Send: %d In MaxReq Wait: %d", |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 418fc42fb8b2..eee522c56ef0 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -76,12 +76,7 @@ MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 " | |||
76 | unsigned int cifs_max_pending = CIFS_MAX_REQ; | 76 | unsigned int cifs_max_pending = CIFS_MAX_REQ; |
77 | module_param(cifs_max_pending, int, 0444); | 77 | module_param(cifs_max_pending, int, 0444); |
78 | MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " | 78 | MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " |
79 | "Default: 50 Range: 2 to 256"); | 79 | "Default: 32767 Range: 2 to 32767."); |
80 | unsigned short echo_retries = 5; | ||
81 | module_param(echo_retries, ushort, 0644); | ||
82 | MODULE_PARM_DESC(echo_retries, "Number of echo attempts before giving up and " | ||
83 | "reconnecting server. Default: 5. 0 means " | ||
84 | "never reconnect."); | ||
85 | module_param(enable_oplocks, bool, 0644); | 80 | module_param(enable_oplocks, bool, 0644); |
86 | MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks (bool). Default:" | 81 | MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks (bool). Default:" |
87 | "y/Y/1"); | 82 | "y/Y/1"); |
@@ -1111,9 +1106,9 @@ init_cifs(void) | |||
1111 | if (cifs_max_pending < 2) { | 1106 | if (cifs_max_pending < 2) { |
1112 | cifs_max_pending = 2; | 1107 | cifs_max_pending = 2; |
1113 | cFYI(1, "cifs_max_pending set to min of 2"); | 1108 | cFYI(1, "cifs_max_pending set to min of 2"); |
1114 | } else if (cifs_max_pending > 256) { | 1109 | } else if (cifs_max_pending > CIFS_MAX_REQ) { |
1115 | cifs_max_pending = 256; | 1110 | cifs_max_pending = CIFS_MAX_REQ; |
1116 | cFYI(1, "cifs_max_pending set to max of 256"); | 1111 | cFYI(1, "cifs_max_pending set to max of %u", CIFS_MAX_REQ); |
1117 | } | 1112 | } |
1118 | 1113 | ||
1119 | rc = cifs_fscache_register(); | 1114 | rc = cifs_fscache_register(); |
@@ -1175,11 +1170,8 @@ static void __exit | |||
1175 | exit_cifs(void) | 1170 | exit_cifs(void) |
1176 | { | 1171 | { |
1177 | cFYI(DBG2, "exit_cifs"); | 1172 | cFYI(DBG2, "exit_cifs"); |
1178 | cifs_proc_clean(); | 1173 | unregister_filesystem(&cifs_fs_type); |
1179 | cifs_fscache_unregister(); | ||
1180 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
1181 | cifs_dfs_release_automount_timer(); | 1174 | cifs_dfs_release_automount_timer(); |
1182 | #endif | ||
1183 | #ifdef CONFIG_CIFS_ACL | 1175 | #ifdef CONFIG_CIFS_ACL |
1184 | cifs_destroy_idmaptrees(); | 1176 | cifs_destroy_idmaptrees(); |
1185 | exit_cifs_idmap(); | 1177 | exit_cifs_idmap(); |
@@ -1187,10 +1179,11 @@ exit_cifs(void) | |||
1187 | #ifdef CONFIG_CIFS_UPCALL | 1179 | #ifdef CONFIG_CIFS_UPCALL |
1188 | unregister_key_type(&cifs_spnego_key_type); | 1180 | unregister_key_type(&cifs_spnego_key_type); |
1189 | #endif | 1181 | #endif |
1190 | unregister_filesystem(&cifs_fs_type); | ||
1191 | cifs_destroy_inodecache(); | ||
1192 | cifs_destroy_mids(); | ||
1193 | cifs_destroy_request_bufs(); | 1182 | cifs_destroy_request_bufs(); |
1183 | cifs_destroy_mids(); | ||
1184 | cifs_destroy_inodecache(); | ||
1185 | cifs_fscache_unregister(); | ||
1186 | cifs_proc_clean(); | ||
1194 | } | 1187 | } |
1195 | 1188 | ||
1196 | MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>"); | 1189 | MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>"); |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 76e7d8b6da17..339ebe3ebc0d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -55,14 +55,9 @@ | |||
55 | 55 | ||
56 | /* | 56 | /* |
57 | * MAX_REQ is the maximum number of requests that WE will send | 57 | * MAX_REQ is the maximum number of requests that WE will send |
58 | * on one socket concurrently. It also matches the most common | 58 | * on one socket concurrently. |
59 | * value of max multiplex returned by servers. We may | ||
60 | * eventually want to use the negotiated value (in case | ||
61 | * future servers can handle more) when we are more confident that | ||
62 | * we will not have problems oveloading the socket with pending | ||
63 | * write data. | ||
64 | */ | 59 | */ |
65 | #define CIFS_MAX_REQ 50 | 60 | #define CIFS_MAX_REQ 32767 |
66 | 61 | ||
67 | #define RFC1001_NAME_LEN 15 | 62 | #define RFC1001_NAME_LEN 15 |
68 | #define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1) | 63 | #define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1) |
@@ -255,7 +250,9 @@ struct TCP_Server_Info { | |||
255 | bool noblocksnd; /* use blocking sendmsg */ | 250 | bool noblocksnd; /* use blocking sendmsg */ |
256 | bool noautotune; /* do not autotune send buf sizes */ | 251 | bool noautotune; /* do not autotune send buf sizes */ |
257 | bool tcp_nodelay; | 252 | bool tcp_nodelay; |
258 | atomic_t inFlight; /* number of requests on the wire to server */ | 253 | int credits; /* send no more requests at once */ |
254 | unsigned int in_flight; /* number of requests on the wire to server */ | ||
255 | spinlock_t req_lock; /* protect the two values above */ | ||
259 | struct mutex srv_mutex; | 256 | struct mutex srv_mutex; |
260 | struct task_struct *tsk; | 257 | struct task_struct *tsk; |
261 | char server_GUID[16]; | 258 | char server_GUID[16]; |
@@ -263,6 +260,7 @@ struct TCP_Server_Info { | |||
263 | bool session_estab; /* mark when very first sess is established */ | 260 | bool session_estab; /* mark when very first sess is established */ |
264 | u16 dialect; /* dialect index that server chose */ | 261 | u16 dialect; /* dialect index that server chose */ |
265 | enum securityEnum secType; | 262 | enum securityEnum secType; |
263 | bool oplocks:1; /* enable oplocks */ | ||
266 | unsigned int maxReq; /* Clients should submit no more */ | 264 | unsigned int maxReq; /* Clients should submit no more */ |
267 | /* than maxReq distinct unanswered SMBs to the server when using */ | 265 | /* than maxReq distinct unanswered SMBs to the server when using */ |
268 | /* multiplexed reads or writes */ | 266 | /* multiplexed reads or writes */ |
@@ -307,6 +305,36 @@ struct TCP_Server_Info { | |||
307 | #endif | 305 | #endif |
308 | }; | 306 | }; |
309 | 307 | ||
308 | static inline unsigned int | ||
309 | in_flight(struct TCP_Server_Info *server) | ||
310 | { | ||
311 | unsigned int num; | ||
312 | spin_lock(&server->req_lock); | ||
313 | num = server->in_flight; | ||
314 | spin_unlock(&server->req_lock); | ||
315 | return num; | ||
316 | } | ||
317 | |||
318 | static inline int* | ||
319 | get_credits_field(struct TCP_Server_Info *server) | ||
320 | { | ||
321 | /* | ||
322 | * This will change to switch statement when we reserve slots for echos | ||
323 | * and oplock breaks. | ||
324 | */ | ||
325 | return &server->credits; | ||
326 | } | ||
327 | |||
328 | static inline bool | ||
329 | has_credits(struct TCP_Server_Info *server, int *credits) | ||
330 | { | ||
331 | int num; | ||
332 | spin_lock(&server->req_lock); | ||
333 | num = *credits; | ||
334 | spin_unlock(&server->req_lock); | ||
335 | return num > 0; | ||
336 | } | ||
337 | |||
310 | /* | 338 | /* |
311 | * Macros to allow the TCP_Server_Info->net field and related code to drop out | 339 | * Macros to allow the TCP_Server_Info->net field and related code to drop out |
312 | * when CONFIG_NET_NS isn't set. | 340 | * when CONFIG_NET_NS isn't set. |
@@ -1010,9 +1038,6 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ | |||
1010 | GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ | 1038 | GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ |
1011 | GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ | 1039 | GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ |
1012 | 1040 | ||
1013 | /* reconnect after this many failed echo attempts */ | ||
1014 | GLOBAL_EXTERN unsigned short echo_retries; | ||
1015 | |||
1016 | #ifdef CONFIG_CIFS_ACL | 1041 | #ifdef CONFIG_CIFS_ACL |
1017 | GLOBAL_EXTERN struct rb_root uidtree; | 1042 | GLOBAL_EXTERN struct rb_root uidtree; |
1018 | GLOBAL_EXTERN struct rb_root gidtree; | 1043 | GLOBAL_EXTERN struct rb_root gidtree; |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 6f4e243e0f62..503e73d8bdb7 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -88,6 +88,9 @@ extern int SendReceiveBlockingLock(const unsigned int xid, | |||
88 | struct smb_hdr *in_buf , | 88 | struct smb_hdr *in_buf , |
89 | struct smb_hdr *out_buf, | 89 | struct smb_hdr *out_buf, |
90 | int *bytes_returned); | 90 | int *bytes_returned); |
91 | extern void cifs_add_credits(struct TCP_Server_Info *server, | ||
92 | const unsigned int add); | ||
93 | extern void cifs_set_credits(struct TCP_Server_Info *server, const int val); | ||
91 | extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); | 94 | extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); |
92 | extern bool is_valid_oplock_break(struct smb_hdr *smb, | 95 | extern bool is_valid_oplock_break(struct smb_hdr *smb, |
93 | struct TCP_Server_Info *); | 96 | struct TCP_Server_Info *); |
@@ -168,7 +171,13 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data, | |||
168 | const char *devname); | 171 | const char *devname); |
169 | extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); | 172 | extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); |
170 | extern void cifs_umount(struct cifs_sb_info *); | 173 | extern void cifs_umount(struct cifs_sb_info *); |
174 | |||
175 | #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) | ||
171 | extern void cifs_dfs_release_automount_timer(void); | 176 | extern void cifs_dfs_release_automount_timer(void); |
177 | #else /* ! IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) */ | ||
178 | #define cifs_dfs_release_automount_timer() do { } while (0) | ||
179 | #endif /* ! IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) */ | ||
180 | |||
172 | void cifs_proc_init(void); | 181 | void cifs_proc_init(void); |
173 | void cifs_proc_clean(void); | 182 | void cifs_proc_clean(void); |
174 | 183 | ||
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 8b7794c31591..70aac35c398f 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -458,7 +458,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses) | |||
458 | goto neg_err_exit; | 458 | goto neg_err_exit; |
459 | } | 459 | } |
460 | server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode); | 460 | server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode); |
461 | server->maxReq = le16_to_cpu(rsp->MaxMpxCount); | 461 | server->maxReq = min_t(unsigned int, |
462 | le16_to_cpu(rsp->MaxMpxCount), | ||
463 | cifs_max_pending); | ||
464 | cifs_set_credits(server, server->maxReq); | ||
462 | server->maxBuf = le16_to_cpu(rsp->MaxBufSize); | 465 | server->maxBuf = le16_to_cpu(rsp->MaxBufSize); |
463 | server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); | 466 | server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); |
464 | /* even though we do not use raw we might as well set this | 467 | /* even though we do not use raw we might as well set this |
@@ -564,7 +567,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses) | |||
564 | 567 | ||
565 | /* one byte, so no need to convert this or EncryptionKeyLen from | 568 | /* one byte, so no need to convert this or EncryptionKeyLen from |
566 | little endian */ | 569 | little endian */ |
567 | server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount); | 570 | server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount), |
571 | cifs_max_pending); | ||
572 | cifs_set_credits(server, server->maxReq); | ||
568 | /* probably no need to store and check maxvcs */ | 573 | /* probably no need to store and check maxvcs */ |
569 | server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize); | 574 | server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize); |
570 | server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); | 575 | server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); |
@@ -716,8 +721,7 @@ cifs_echo_callback(struct mid_q_entry *mid) | |||
716 | struct TCP_Server_Info *server = mid->callback_data; | 721 | struct TCP_Server_Info *server = mid->callback_data; |
717 | 722 | ||
718 | DeleteMidQEntry(mid); | 723 | DeleteMidQEntry(mid); |
719 | atomic_dec(&server->inFlight); | 724 | cifs_add_credits(server, 1); |
720 | wake_up(&server->request_q); | ||
721 | } | 725 | } |
722 | 726 | ||
723 | int | 727 | int |
@@ -1669,8 +1673,7 @@ cifs_readv_callback(struct mid_q_entry *mid) | |||
1669 | 1673 | ||
1670 | queue_work(system_nrt_wq, &rdata->work); | 1674 | queue_work(system_nrt_wq, &rdata->work); |
1671 | DeleteMidQEntry(mid); | 1675 | DeleteMidQEntry(mid); |
1672 | atomic_dec(&server->inFlight); | 1676 | cifs_add_credits(server, 1); |
1673 | wake_up(&server->request_q); | ||
1674 | } | 1677 | } |
1675 | 1678 | ||
1676 | /* cifs_async_readv - send an async write, and set up mid to handle result */ | 1679 | /* cifs_async_readv - send an async write, and set up mid to handle result */ |
@@ -2110,8 +2113,7 @@ cifs_writev_callback(struct mid_q_entry *mid) | |||
2110 | 2113 | ||
2111 | queue_work(system_nrt_wq, &wdata->work); | 2114 | queue_work(system_nrt_wq, &wdata->work); |
2112 | DeleteMidQEntry(mid); | 2115 | DeleteMidQEntry(mid); |
2113 | atomic_dec(&tcon->ses->server->inFlight); | 2116 | cifs_add_credits(tcon->ses->server, 1); |
2114 | wake_up(&tcon->ses->server->request_q); | ||
2115 | } | 2117 | } |
2116 | 2118 | ||
2117 | /* cifs_async_writev - send an async write, and set up mid to handle result */ | 2119 | /* cifs_async_writev - send an async write, and set up mid to handle result */ |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 602f77c304c9..5560e1d5e54b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -373,12 +373,22 @@ allocate_buffers(struct TCP_Server_Info *server) | |||
373 | static bool | 373 | static bool |
374 | server_unresponsive(struct TCP_Server_Info *server) | 374 | server_unresponsive(struct TCP_Server_Info *server) |
375 | { | 375 | { |
376 | if (echo_retries > 0 && server->tcpStatus == CifsGood && | 376 | /* |
377 | time_after(jiffies, server->lstrp + | 377 | * We need to wait 2 echo intervals to make sure we handle such |
378 | (echo_retries * SMB_ECHO_INTERVAL))) { | 378 | * situations right: |
379 | * 1s client sends a normal SMB request | ||
380 | * 2s client gets a response | ||
381 | * 30s echo workqueue job pops, and decides we got a response recently | ||
382 | * and don't need to send another | ||
383 | * ... | ||
384 | * 65s kernel_recvmsg times out, and we see that we haven't gotten | ||
385 | * a response in >60s. | ||
386 | */ | ||
387 | if (server->tcpStatus == CifsGood && | ||
388 | time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) { | ||
379 | cERROR(1, "Server %s has not responded in %d seconds. " | 389 | cERROR(1, "Server %s has not responded in %d seconds. " |
380 | "Reconnecting...", server->hostname, | 390 | "Reconnecting...", server->hostname, |
381 | (echo_retries * SMB_ECHO_INTERVAL / HZ)); | 391 | (2 * SMB_ECHO_INTERVAL) / HZ); |
382 | cifs_reconnect(server); | 392 | cifs_reconnect(server); |
383 | wake_up(&server->response_q); | 393 | wake_up(&server->response_q); |
384 | return true; | 394 | return true; |
@@ -642,19 +652,11 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) | |||
642 | spin_unlock(&GlobalMid_Lock); | 652 | spin_unlock(&GlobalMid_Lock); |
643 | wake_up_all(&server->response_q); | 653 | wake_up_all(&server->response_q); |
644 | 654 | ||
645 | /* | 655 | /* check if we have blocked requests that need to free */ |
646 | * Check if we have blocked requests that need to free. Note that | 656 | spin_lock(&server->req_lock); |
647 | * cifs_max_pending is normally 50, but can be set at module install | 657 | if (server->credits <= 0) |
648 | * time to as little as two. | 658 | server->credits = 1; |
649 | */ | 659 | spin_unlock(&server->req_lock); |
650 | spin_lock(&GlobalMid_Lock); | ||
651 | if (atomic_read(&server->inFlight) >= cifs_max_pending) | ||
652 | atomic_set(&server->inFlight, cifs_max_pending - 1); | ||
653 | /* | ||
654 | * We do not want to set the max_pending too low or we could end up | ||
655 | * with the counter going negative. | ||
656 | */ | ||
657 | spin_unlock(&GlobalMid_Lock); | ||
658 | /* | 660 | /* |
659 | * Although there should not be any requests blocked on this queue it | 661 | * Although there should not be any requests blocked on this queue it |
660 | * can not hurt to be paranoid and try to wake up requests that may | 662 | * can not hurt to be paranoid and try to wake up requests that may |
@@ -1909,7 +1911,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info) | |||
1909 | tcp_ses->noblocksnd = volume_info->noblocksnd; | 1911 | tcp_ses->noblocksnd = volume_info->noblocksnd; |
1910 | tcp_ses->noautotune = volume_info->noautotune; | 1912 | tcp_ses->noautotune = volume_info->noautotune; |
1911 | tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay; | 1913 | tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay; |
1912 | atomic_set(&tcp_ses->inFlight, 0); | 1914 | tcp_ses->in_flight = 0; |
1915 | tcp_ses->credits = 1; | ||
1913 | init_waitqueue_head(&tcp_ses->response_q); | 1916 | init_waitqueue_head(&tcp_ses->response_q); |
1914 | init_waitqueue_head(&tcp_ses->request_q); | 1917 | init_waitqueue_head(&tcp_ses->request_q); |
1915 | INIT_LIST_HEAD(&tcp_ses->pending_mid_q); | 1918 | INIT_LIST_HEAD(&tcp_ses->pending_mid_q); |
@@ -3371,7 +3374,7 @@ cifs_ra_pages(struct cifs_sb_info *cifs_sb) | |||
3371 | int | 3374 | int |
3372 | cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) | 3375 | cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) |
3373 | { | 3376 | { |
3374 | int rc = 0; | 3377 | int rc; |
3375 | int xid; | 3378 | int xid; |
3376 | struct cifs_ses *pSesInfo; | 3379 | struct cifs_ses *pSesInfo; |
3377 | struct cifs_tcon *tcon; | 3380 | struct cifs_tcon *tcon; |
@@ -3398,6 +3401,7 @@ try_mount_again: | |||
3398 | FreeXid(xid); | 3401 | FreeXid(xid); |
3399 | } | 3402 | } |
3400 | #endif | 3403 | #endif |
3404 | rc = 0; | ||
3401 | tcon = NULL; | 3405 | tcon = NULL; |
3402 | pSesInfo = NULL; | 3406 | pSesInfo = NULL; |
3403 | srvTcp = NULL; | 3407 | srvTcp = NULL; |
@@ -3759,9 +3763,11 @@ int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses) | |||
3759 | if (server->maxBuf != 0) | 3763 | if (server->maxBuf != 0) |
3760 | return 0; | 3764 | return 0; |
3761 | 3765 | ||
3766 | cifs_set_credits(server, 1); | ||
3762 | rc = CIFSSMBNegotiate(xid, ses); | 3767 | rc = CIFSSMBNegotiate(xid, ses); |
3763 | if (rc == -EAGAIN) { | 3768 | if (rc == -EAGAIN) { |
3764 | /* retry only once on 1st time connection */ | 3769 | /* retry only once on 1st time connection */ |
3770 | cifs_set_credits(server, 1); | ||
3765 | rc = CIFSSMBNegotiate(xid, ses); | 3771 | rc = CIFSSMBNegotiate(xid, ses); |
3766 | if (rc == -EAGAIN) | 3772 | if (rc == -EAGAIN) |
3767 | rc = -EHOSTDOWN; | 3773 | rc = -EHOSTDOWN; |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index bc7e24420ac0..d172c8ed9017 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -171,7 +171,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, | |||
171 | } | 171 | } |
172 | tcon = tlink_tcon(tlink); | 172 | tcon = tlink_tcon(tlink); |
173 | 173 | ||
174 | if (enable_oplocks) | 174 | if (tcon->ses->server->oplocks) |
175 | oplock = REQ_OPLOCK; | 175 | oplock = REQ_OPLOCK; |
176 | 176 | ||
177 | if (nd) | 177 | if (nd) |
@@ -492,7 +492,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | |||
492 | { | 492 | { |
493 | int xid; | 493 | int xid; |
494 | int rc = 0; /* to get around spurious gcc warning, set to zero here */ | 494 | int rc = 0; /* to get around spurious gcc warning, set to zero here */ |
495 | __u32 oplock = enable_oplocks ? REQ_OPLOCK : 0; | 495 | __u32 oplock; |
496 | __u16 fileHandle = 0; | 496 | __u16 fileHandle = 0; |
497 | bool posix_open = false; | 497 | bool posix_open = false; |
498 | struct cifs_sb_info *cifs_sb; | 498 | struct cifs_sb_info *cifs_sb; |
@@ -518,6 +518,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | |||
518 | } | 518 | } |
519 | pTcon = tlink_tcon(tlink); | 519 | pTcon = tlink_tcon(tlink); |
520 | 520 | ||
521 | oplock = pTcon->ses->server->oplocks ? REQ_OPLOCK : 0; | ||
522 | |||
521 | /* | 523 | /* |
522 | * Don't allow the separator character in a path component. | 524 | * Don't allow the separator character in a path component. |
523 | * The VFS will not allow "/", but "\" is allowed by posix. | 525 | * The VFS will not allow "/", but "\" is allowed by posix. |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5e64748a2917..159fcc56dc2d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -380,7 +380,7 @@ int cifs_open(struct inode *inode, struct file *file) | |||
380 | cFYI(1, "inode = 0x%p file flags are 0x%x for %s", | 380 | cFYI(1, "inode = 0x%p file flags are 0x%x for %s", |
381 | inode, file->f_flags, full_path); | 381 | inode, file->f_flags, full_path); |
382 | 382 | ||
383 | if (enable_oplocks) | 383 | if (tcon->ses->server->oplocks) |
384 | oplock = REQ_OPLOCK; | 384 | oplock = REQ_OPLOCK; |
385 | else | 385 | else |
386 | oplock = 0; | 386 | oplock = 0; |
@@ -505,7 +505,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) | |||
505 | cFYI(1, "inode = 0x%p file flags 0x%x for %s", | 505 | cFYI(1, "inode = 0x%p file flags 0x%x for %s", |
506 | inode, pCifsFile->f_flags, full_path); | 506 | inode, pCifsFile->f_flags, full_path); |
507 | 507 | ||
508 | if (enable_oplocks) | 508 | if (tcon->ses->server->oplocks) |
509 | oplock = REQ_OPLOCK; | 509 | oplock = REQ_OPLOCK; |
510 | else | 510 | else |
511 | oplock = 0; | 511 | oplock = 0; |
@@ -960,9 +960,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
960 | INIT_LIST_HEAD(&locks_to_send); | 960 | INIT_LIST_HEAD(&locks_to_send); |
961 | 961 | ||
962 | /* | 962 | /* |
963 | * Allocating count locks is enough because no locks can be added to | 963 | * Allocating count locks is enough because no FL_POSIX locks can be |
964 | * the list while we are holding cinode->lock_mutex that protects | 964 | * added to the list while we are holding cinode->lock_mutex that |
965 | * locking operations of this inode. | 965 | * protects locking operations of this inode. |
966 | */ | 966 | */ |
967 | for (; i < count; i++) { | 967 | for (; i < count; i++) { |
968 | lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); | 968 | lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); |
@@ -973,18 +973,20 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
973 | list_add_tail(&lck->llist, &locks_to_send); | 973 | list_add_tail(&lck->llist, &locks_to_send); |
974 | } | 974 | } |
975 | 975 | ||
976 | i = 0; | ||
977 | el = locks_to_send.next; | 976 | el = locks_to_send.next; |
978 | lock_flocks(); | 977 | lock_flocks(); |
979 | cifs_for_each_lock(cfile->dentry->d_inode, before) { | 978 | cifs_for_each_lock(cfile->dentry->d_inode, before) { |
979 | flock = *before; | ||
980 | if ((flock->fl_flags & FL_POSIX) == 0) | ||
981 | continue; | ||
980 | if (el == &locks_to_send) { | 982 | if (el == &locks_to_send) { |
981 | /* something is really wrong */ | 983 | /* |
984 | * The list ended. We don't have enough allocated | ||
985 | * structures - something is really wrong. | ||
986 | */ | ||
982 | cERROR(1, "Can't push all brlocks!"); | 987 | cERROR(1, "Can't push all brlocks!"); |
983 | break; | 988 | break; |
984 | } | 989 | } |
985 | flock = *before; | ||
986 | if ((flock->fl_flags & FL_POSIX) == 0) | ||
987 | continue; | ||
988 | length = 1 + flock->fl_end - flock->fl_start; | 990 | length = 1 + flock->fl_end - flock->fl_start; |
989 | if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) | 991 | if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) |
990 | type = CIFS_RDLCK; | 992 | type = CIFS_RDLCK; |
@@ -996,7 +998,6 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
996 | lck->length = length; | 998 | lck->length = length; |
997 | lck->type = type; | 999 | lck->type = type; |
998 | lck->offset = flock->fl_start; | 1000 | lck->offset = flock->fl_start; |
999 | i++; | ||
1000 | el = el->next; | 1001 | el = el->next; |
1001 | } | 1002 | } |
1002 | unlock_flocks(); | 1003 | unlock_flocks(); |
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 703ef5c6fdb1..c273c12de98e 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -690,3 +690,22 @@ backup_cred(struct cifs_sb_info *cifs_sb) | |||
690 | 690 | ||
691 | return false; | 691 | return false; |
692 | } | 692 | } |
693 | |||
694 | void | ||
695 | cifs_add_credits(struct TCP_Server_Info *server, const unsigned int add) | ||
696 | { | ||
697 | spin_lock(&server->req_lock); | ||
698 | server->credits += add; | ||
699 | server->in_flight--; | ||
700 | spin_unlock(&server->req_lock); | ||
701 | wake_up(&server->request_q); | ||
702 | } | ||
703 | |||
704 | void | ||
705 | cifs_set_credits(struct TCP_Server_Info *server, const int val) | ||
706 | { | ||
707 | spin_lock(&server->req_lock); | ||
708 | server->credits = val; | ||
709 | server->oplocks = val > 1 ? enable_oplocks : false; | ||
710 | spin_unlock(&server->req_lock); | ||
711 | } | ||
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 0cc9584f5889..310918b6fcb4 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -254,44 +254,60 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, | |||
254 | return smb_sendv(server, &iov, 1); | 254 | return smb_sendv(server, &iov, 1); |
255 | } | 255 | } |
256 | 256 | ||
257 | static int wait_for_free_request(struct TCP_Server_Info *server, | 257 | static int |
258 | const int long_op) | 258 | wait_for_free_credits(struct TCP_Server_Info *server, const int optype, |
259 | int *credits) | ||
259 | { | 260 | { |
260 | if (long_op == CIFS_ASYNC_OP) { | 261 | int rc; |
262 | |||
263 | spin_lock(&server->req_lock); | ||
264 | if (optype == CIFS_ASYNC_OP) { | ||
261 | /* oplock breaks must not be held up */ | 265 | /* oplock breaks must not be held up */ |
262 | atomic_inc(&server->inFlight); | 266 | server->in_flight++; |
267 | *credits -= 1; | ||
268 | spin_unlock(&server->req_lock); | ||
263 | return 0; | 269 | return 0; |
264 | } | 270 | } |
265 | 271 | ||
266 | spin_lock(&GlobalMid_Lock); | ||
267 | while (1) { | 272 | while (1) { |
268 | if (atomic_read(&server->inFlight) >= cifs_max_pending) { | 273 | if (*credits <= 0) { |
269 | spin_unlock(&GlobalMid_Lock); | 274 | spin_unlock(&server->req_lock); |
270 | cifs_num_waiters_inc(server); | 275 | cifs_num_waiters_inc(server); |
271 | wait_event(server->request_q, | 276 | rc = wait_event_killable(server->request_q, |
272 | atomic_read(&server->inFlight) | 277 | has_credits(server, credits)); |
273 | < cifs_max_pending); | ||
274 | cifs_num_waiters_dec(server); | 278 | cifs_num_waiters_dec(server); |
275 | spin_lock(&GlobalMid_Lock); | 279 | if (rc) |
280 | return rc; | ||
281 | spin_lock(&server->req_lock); | ||
276 | } else { | 282 | } else { |
277 | if (server->tcpStatus == CifsExiting) { | 283 | if (server->tcpStatus == CifsExiting) { |
278 | spin_unlock(&GlobalMid_Lock); | 284 | spin_unlock(&server->req_lock); |
279 | return -ENOENT; | 285 | return -ENOENT; |
280 | } | 286 | } |
281 | 287 | ||
282 | /* can not count locking commands against total | 288 | /* |
283 | as they are allowed to block on server */ | 289 | * Can not count locking commands against total |
290 | * as they are allowed to block on server. | ||
291 | */ | ||
284 | 292 | ||
285 | /* update # of requests on the wire to server */ | 293 | /* update # of requests on the wire to server */ |
286 | if (long_op != CIFS_BLOCKING_OP) | 294 | if (optype != CIFS_BLOCKING_OP) { |
287 | atomic_inc(&server->inFlight); | 295 | *credits -= 1; |
288 | spin_unlock(&GlobalMid_Lock); | 296 | server->in_flight++; |
297 | } | ||
298 | spin_unlock(&server->req_lock); | ||
289 | break; | 299 | break; |
290 | } | 300 | } |
291 | } | 301 | } |
292 | return 0; | 302 | return 0; |
293 | } | 303 | } |
294 | 304 | ||
305 | static int | ||
306 | wait_for_free_request(struct TCP_Server_Info *server, const int optype) | ||
307 | { | ||
308 | return wait_for_free_credits(server, optype, get_credits_field(server)); | ||
309 | } | ||
310 | |||
295 | static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, | 311 | static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, |
296 | struct mid_q_entry **ppmidQ) | 312 | struct mid_q_entry **ppmidQ) |
297 | { | 313 | { |
@@ -359,7 +375,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, | |||
359 | mid = AllocMidQEntry(hdr, server); | 375 | mid = AllocMidQEntry(hdr, server); |
360 | if (mid == NULL) { | 376 | if (mid == NULL) { |
361 | mutex_unlock(&server->srv_mutex); | 377 | mutex_unlock(&server->srv_mutex); |
362 | atomic_dec(&server->inFlight); | 378 | cifs_add_credits(server, 1); |
363 | wake_up(&server->request_q); | 379 | wake_up(&server->request_q); |
364 | return -ENOMEM; | 380 | return -ENOMEM; |
365 | } | 381 | } |
@@ -392,7 +408,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, | |||
392 | return rc; | 408 | return rc; |
393 | out_err: | 409 | out_err: |
394 | delete_mid(mid); | 410 | delete_mid(mid); |
395 | atomic_dec(&server->inFlight); | 411 | cifs_add_credits(server, 1); |
396 | wake_up(&server->request_q); | 412 | wake_up(&server->request_q); |
397 | return rc; | 413 | return rc; |
398 | } | 414 | } |
@@ -564,8 +580,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, | |||
564 | mutex_unlock(&ses->server->srv_mutex); | 580 | mutex_unlock(&ses->server->srv_mutex); |
565 | cifs_small_buf_release(in_buf); | 581 | cifs_small_buf_release(in_buf); |
566 | /* Update # of requests on wire to server */ | 582 | /* Update # of requests on wire to server */ |
567 | atomic_dec(&ses->server->inFlight); | 583 | cifs_add_credits(ses->server, 1); |
568 | wake_up(&ses->server->request_q); | ||
569 | return rc; | 584 | return rc; |
570 | } | 585 | } |
571 | rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); | 586 | rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); |
@@ -601,8 +616,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, | |||
601 | midQ->callback = DeleteMidQEntry; | 616 | midQ->callback = DeleteMidQEntry; |
602 | spin_unlock(&GlobalMid_Lock); | 617 | spin_unlock(&GlobalMid_Lock); |
603 | cifs_small_buf_release(in_buf); | 618 | cifs_small_buf_release(in_buf); |
604 | atomic_dec(&ses->server->inFlight); | 619 | cifs_add_credits(ses->server, 1); |
605 | wake_up(&ses->server->request_q); | ||
606 | return rc; | 620 | return rc; |
607 | } | 621 | } |
608 | spin_unlock(&GlobalMid_Lock); | 622 | spin_unlock(&GlobalMid_Lock); |
@@ -612,8 +626,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, | |||
612 | 626 | ||
613 | rc = cifs_sync_mid_result(midQ, ses->server); | 627 | rc = cifs_sync_mid_result(midQ, ses->server); |
614 | if (rc != 0) { | 628 | if (rc != 0) { |
615 | atomic_dec(&ses->server->inFlight); | 629 | cifs_add_credits(ses->server, 1); |
616 | wake_up(&ses->server->request_q); | ||
617 | return rc; | 630 | return rc; |
618 | } | 631 | } |
619 | 632 | ||
@@ -637,8 +650,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, | |||
637 | midQ->resp_buf = NULL; | 650 | midQ->resp_buf = NULL; |
638 | out: | 651 | out: |
639 | delete_mid(midQ); | 652 | delete_mid(midQ); |
640 | atomic_dec(&ses->server->inFlight); | 653 | cifs_add_credits(ses->server, 1); |
641 | wake_up(&ses->server->request_q); | ||
642 | 654 | ||
643 | return rc; | 655 | return rc; |
644 | } | 656 | } |
@@ -688,8 +700,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, | |||
688 | if (rc) { | 700 | if (rc) { |
689 | mutex_unlock(&ses->server->srv_mutex); | 701 | mutex_unlock(&ses->server->srv_mutex); |
690 | /* Update # of requests on wire to server */ | 702 | /* Update # of requests on wire to server */ |
691 | atomic_dec(&ses->server->inFlight); | 703 | cifs_add_credits(ses->server, 1); |
692 | wake_up(&ses->server->request_q); | ||
693 | return rc; | 704 | return rc; |
694 | } | 705 | } |
695 | 706 | ||
@@ -721,8 +732,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, | |||
721 | /* no longer considered to be "in-flight" */ | 732 | /* no longer considered to be "in-flight" */ |
722 | midQ->callback = DeleteMidQEntry; | 733 | midQ->callback = DeleteMidQEntry; |
723 | spin_unlock(&GlobalMid_Lock); | 734 | spin_unlock(&GlobalMid_Lock); |
724 | atomic_dec(&ses->server->inFlight); | 735 | cifs_add_credits(ses->server, 1); |
725 | wake_up(&ses->server->request_q); | ||
726 | return rc; | 736 | return rc; |
727 | } | 737 | } |
728 | spin_unlock(&GlobalMid_Lock); | 738 | spin_unlock(&GlobalMid_Lock); |
@@ -730,8 +740,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, | |||
730 | 740 | ||
731 | rc = cifs_sync_mid_result(midQ, ses->server); | 741 | rc = cifs_sync_mid_result(midQ, ses->server); |
732 | if (rc != 0) { | 742 | if (rc != 0) { |
733 | atomic_dec(&ses->server->inFlight); | 743 | cifs_add_credits(ses->server, 1); |
734 | wake_up(&ses->server->request_q); | ||
735 | return rc; | 744 | return rc; |
736 | } | 745 | } |
737 | 746 | ||
@@ -747,8 +756,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, | |||
747 | rc = cifs_check_receive(midQ, ses->server, 0); | 756 | rc = cifs_check_receive(midQ, ses->server, 0); |
748 | out: | 757 | out: |
749 | delete_mid(midQ); | 758 | delete_mid(midQ); |
750 | atomic_dec(&ses->server->inFlight); | 759 | cifs_add_credits(ses->server, 1); |
751 | wake_up(&ses->server->request_q); | ||
752 | 760 | ||
753 | return rc; | 761 | return rc; |
754 | } | 762 | } |
diff --git a/fs/compat.c b/fs/compat.c index 07880bae28a9..14483a715bbb 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/nfs4_mount.h> | 33 | #include <linux/nfs4_mount.h> |
34 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
35 | #include <linux/ctype.h> | 35 | #include <linux/ctype.h> |
36 | #include <linux/module.h> | ||
37 | #include <linux/dirent.h> | 36 | #include <linux/dirent.h> |
38 | #include <linux/fsnotify.h> | 37 | #include <linux/fsnotify.h> |
39 | #include <linux/highuid.h> | 38 | #include <linux/highuid.h> |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 10d8cd90ca6f..debdfe0fc809 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -49,7 +49,6 @@ | |||
49 | #include <linux/elevator.h> | 49 | #include <linux/elevator.h> |
50 | #include <linux/rtc.h> | 50 | #include <linux/rtc.h> |
51 | #include <linux/pci.h> | 51 | #include <linux/pci.h> |
52 | #include <linux/module.h> | ||
53 | #include <linux/serial.h> | 52 | #include <linux/serial.h> |
54 | #include <linux/if_tun.h> | 53 | #include <linux/if_tun.h> |
55 | #include <linux/ctype.h> | 54 | #include <linux/ctype.h> |
diff --git a/fs/dcache.c b/fs/dcache.c index 2b55bd0c1061..b60ddc41d783 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <linux/init.h> | 23 | #include <linux/init.h> |
24 | #include <linux/hash.h> | 24 | #include <linux/hash.h> |
25 | #include <linux/cache.h> | 25 | #include <linux/cache.h> |
26 | #include <linux/module.h> | 26 | #include <linux/export.h> |
27 | #include <linux/mount.h> | 27 | #include <linux/mount.h> |
28 | #include <linux/file.h> | 28 | #include <linux/file.h> |
29 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
@@ -2404,6 +2404,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) | |||
2404 | if (d_ancestor(alias, dentry)) { | 2404 | if (d_ancestor(alias, dentry)) { |
2405 | /* Check for loops */ | 2405 | /* Check for loops */ |
2406 | actual = ERR_PTR(-ELOOP); | 2406 | actual = ERR_PTR(-ELOOP); |
2407 | spin_unlock(&inode->i_lock); | ||
2407 | } else if (IS_ROOT(alias)) { | 2408 | } else if (IS_ROOT(alias)) { |
2408 | /* Is this an anonymous mountpoint that we | 2409 | /* Is this an anonymous mountpoint that we |
2409 | * could splice into our tree? */ | 2410 | * could splice into our tree? */ |
@@ -2413,7 +2414,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) | |||
2413 | goto found; | 2414 | goto found; |
2414 | } else { | 2415 | } else { |
2415 | /* Nope, but we must(!) avoid directory | 2416 | /* Nope, but we must(!) avoid directory |
2416 | * aliasing */ | 2417 | * aliasing. This drops inode->i_lock */ |
2417 | actual = __d_unalias(inode, dentry, alias); | 2418 | actual = __d_unalias(inode, dentry, alias); |
2418 | } | 2419 | } |
2419 | write_sequnlock(&rename_lock); | 2420 | write_sequnlock(&rename_lock); |
diff --git a/fs/dcookies.c b/fs/dcookies.c index dda0dc702d1b..17c779967828 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c | |||
@@ -13,7 +13,7 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/syscalls.h> | 15 | #include <linux/syscalls.h> |
16 | #include <linux/module.h> | 16 | #include <linux/export.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/list.h> | 18 | #include <linux/list.h> |
19 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
diff --git a/fs/eventfd.c b/fs/eventfd.c index d9a591773919..dba15fecf23e 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include <linux/spinlock.h> | 16 | #include <linux/spinlock.h> |
17 | #include <linux/anon_inodes.h> | 17 | #include <linux/anon_inodes.h> |
18 | #include <linux/syscalls.h> | 18 | #include <linux/syscalls.h> |
19 | #include <linux/module.h> | 19 | #include <linux/export.h> |
20 | #include <linux/kref.h> | 20 | #include <linux/kref.h> |
21 | #include <linux/eventfd.h> | 21 | #include <linux/eventfd.h> |
22 | 22 | ||
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 2a7dcd6ddc09..739b0985b398 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -426,6 +426,31 @@ out_unlock: | |||
426 | return error; | 426 | return error; |
427 | } | 427 | } |
428 | 428 | ||
429 | /* | ||
430 | * As described in commit 0ccf831cb lockdep: annotate epoll | ||
431 | * the use of wait queues used by epoll is done in a very controlled | ||
432 | * manner. Wake ups can nest inside each other, but are never done | ||
433 | * with the same locking. For example: | ||
434 | * | ||
435 | * dfd = socket(...); | ||
436 | * efd1 = epoll_create(); | ||
437 | * efd2 = epoll_create(); | ||
438 | * epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...); | ||
439 | * epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...); | ||
440 | * | ||
441 | * When a packet arrives to the device underneath "dfd", the net code will | ||
442 | * issue a wake_up() on its poll wake list. Epoll (efd1) has installed a | ||
443 | * callback wakeup entry on that queue, and the wake_up() performed by the | ||
444 | * "dfd" net code will end up in ep_poll_callback(). At this point epoll | ||
445 | * (efd1) notices that it may have some event ready, so it needs to wake up | ||
446 | * the waiters on its poll wait list (efd2). So it calls ep_poll_safewake() | ||
447 | * that ends up in another wake_up(), after having checked about the | ||
448 | * recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to | ||
449 | * avoid stack blasting. | ||
450 | * | ||
451 | * When CONFIG_DEBUG_LOCK_ALLOC is enabled, make sure lockdep can handle | ||
452 | * this special case of epoll. | ||
453 | */ | ||
429 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 454 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
430 | static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, | 455 | static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, |
431 | unsigned long events, int subclass) | 456 | unsigned long events, int subclass) |
@@ -698,9 +723,12 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, | |||
698 | void *priv) | 723 | void *priv) |
699 | { | 724 | { |
700 | struct epitem *epi, *tmp; | 725 | struct epitem *epi, *tmp; |
726 | poll_table pt; | ||
701 | 727 | ||
728 | init_poll_funcptr(&pt, NULL); | ||
702 | list_for_each_entry_safe(epi, tmp, head, rdllink) { | 729 | list_for_each_entry_safe(epi, tmp, head, rdllink) { |
703 | if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & | 730 | pt._key = epi->event.events; |
731 | if (epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & | ||
704 | epi->event.events) | 732 | epi->event.events) |
705 | return POLLIN | POLLRDNORM; | 733 | return POLLIN | POLLRDNORM; |
706 | else { | 734 | else { |
@@ -1048,13 +1076,11 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) | |||
1048 | */ | 1076 | */ |
1049 | static int reverse_path_check(void) | 1077 | static int reverse_path_check(void) |
1050 | { | 1078 | { |
1051 | int length = 0; | ||
1052 | int error = 0; | 1079 | int error = 0; |
1053 | struct file *current_file; | 1080 | struct file *current_file; |
1054 | 1081 | ||
1055 | /* let's call this for all tfiles */ | 1082 | /* let's call this for all tfiles */ |
1056 | list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { | 1083 | list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { |
1057 | length++; | ||
1058 | path_count_init(); | 1084 | path_count_init(); |
1059 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | 1085 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, |
1060 | reverse_path_check_proc, current_file, | 1086 | reverse_path_check_proc, current_file, |
@@ -1096,6 +1122,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1096 | /* Initialize the poll table using the queue callback */ | 1122 | /* Initialize the poll table using the queue callback */ |
1097 | epq.epi = epi; | 1123 | epq.epi = epi; |
1098 | init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); | 1124 | init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); |
1125 | epq.pt._key = event->events; | ||
1099 | 1126 | ||
1100 | /* | 1127 | /* |
1101 | * Attach the item to the poll hooks and get current event bits. | 1128 | * Attach the item to the poll hooks and get current event bits. |
@@ -1190,6 +1217,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
1190 | { | 1217 | { |
1191 | int pwake = 0; | 1218 | int pwake = 0; |
1192 | unsigned int revents; | 1219 | unsigned int revents; |
1220 | poll_table pt; | ||
1221 | |||
1222 | init_poll_funcptr(&pt, NULL); | ||
1193 | 1223 | ||
1194 | /* | 1224 | /* |
1195 | * Set the new event interest mask before calling f_op->poll(); | 1225 | * Set the new event interest mask before calling f_op->poll(); |
@@ -1197,13 +1227,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
1197 | * f_op->poll() call and the new event set registering. | 1227 | * f_op->poll() call and the new event set registering. |
1198 | */ | 1228 | */ |
1199 | epi->event.events = event->events; | 1229 | epi->event.events = event->events; |
1230 | pt._key = event->events; | ||
1200 | epi->event.data = event->data; /* protected by mtx */ | 1231 | epi->event.data = event->data; /* protected by mtx */ |
1201 | 1232 | ||
1202 | /* | 1233 | /* |
1203 | * Get current event bits. We can safely use the file* here because | 1234 | * Get current event bits. We can safely use the file* here because |
1204 | * its usage count has been increased by the caller of this function. | 1235 | * its usage count has been increased by the caller of this function. |
1205 | */ | 1236 | */ |
1206 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); | 1237 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt); |
1207 | 1238 | ||
1208 | /* | 1239 | /* |
1209 | * If the item is "hot" and it is not registered inside the ready | 1240 | * If the item is "hot" and it is not registered inside the ready |
@@ -1238,6 +1269,9 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1238 | unsigned int revents; | 1269 | unsigned int revents; |
1239 | struct epitem *epi; | 1270 | struct epitem *epi; |
1240 | struct epoll_event __user *uevent; | 1271 | struct epoll_event __user *uevent; |
1272 | poll_table pt; | ||
1273 | |||
1274 | init_poll_funcptr(&pt, NULL); | ||
1241 | 1275 | ||
1242 | /* | 1276 | /* |
1243 | * We can loop without lock because we are passed a task private list. | 1277 | * We can loop without lock because we are passed a task private list. |
@@ -1250,7 +1284,8 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1250 | 1284 | ||
1251 | list_del_init(&epi->rdllink); | 1285 | list_del_init(&epi->rdllink); |
1252 | 1286 | ||
1253 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & | 1287 | pt._key = epi->event.events; |
1288 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & | ||
1254 | epi->event.events; | 1289 | epi->event.events; |
1255 | 1290 | ||
1256 | /* | 1291 | /* |
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index a2038928f9a3..1e036b79384c 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
@@ -1743,8 +1743,11 @@ allocated: | |||
1743 | 1743 | ||
1744 | *errp = 0; | 1744 | *errp = 0; |
1745 | brelse(bitmap_bh); | 1745 | brelse(bitmap_bh); |
1746 | dquot_free_block(inode, *count-num); | 1746 | |
1747 | *count = num; | 1747 | if (num < *count) { |
1748 | dquot_free_block(inode, *count-num); | ||
1749 | *count = num; | ||
1750 | } | ||
1748 | 1751 | ||
1749 | trace_ext3_allocate_blocks(inode, goal, num, | 1752 | trace_ext3_allocate_blocks(inode, goal, num, |
1750 | (unsigned long long)ret_block); | 1753 | (unsigned long long)ret_block); |
@@ -1970,7 +1973,7 @@ static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, | |||
1970 | sbi = EXT3_SB(sb); | 1973 | sbi = EXT3_SB(sb); |
1971 | 1974 | ||
1972 | /* Walk through the whole group */ | 1975 | /* Walk through the whole group */ |
1973 | while (start < max) { | 1976 | while (start <= max) { |
1974 | start = bitmap_search_next_usable_block(start, bitmap_bh, max); | 1977 | start = bitmap_search_next_usable_block(start, bitmap_bh, max); |
1975 | if (start < 0) | 1978 | if (start < 0) |
1976 | break; | 1979 | break; |
@@ -1980,7 +1983,7 @@ static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, | |||
1980 | * Allocate contiguous free extents by setting bits in the | 1983 | * Allocate contiguous free extents by setting bits in the |
1981 | * block bitmap | 1984 | * block bitmap |
1982 | */ | 1985 | */ |
1983 | while (next < max | 1986 | while (next <= max |
1984 | && claim_block(sb_bgl_lock(sbi, group), | 1987 | && claim_block(sb_bgl_lock(sbi, group), |
1985 | next, bitmap_bh)) { | 1988 | next, bitmap_bh)) { |
1986 | next++; | 1989 | next++; |
@@ -2091,73 +2094,74 @@ err_out: | |||
2091 | */ | 2094 | */ |
2092 | int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) | 2095 | int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) |
2093 | { | 2096 | { |
2094 | ext3_grpblk_t last_block, first_block, free_blocks; | 2097 | ext3_grpblk_t last_block, first_block; |
2095 | unsigned long first_group, last_group; | 2098 | unsigned long group, first_group, last_group; |
2096 | unsigned long group, ngroups; | ||
2097 | struct ext3_group_desc *gdp; | 2099 | struct ext3_group_desc *gdp; |
2098 | struct ext3_super_block *es = EXT3_SB(sb)->s_es; | 2100 | struct ext3_super_block *es = EXT3_SB(sb)->s_es; |
2099 | uint64_t start, len, minlen, trimmed; | 2101 | uint64_t start, minlen, end, trimmed = 0; |
2102 | ext3_fsblk_t first_data_blk = | ||
2103 | le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block); | ||
2100 | ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count); | 2104 | ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count); |
2101 | int ret = 0; | 2105 | int ret = 0; |
2102 | 2106 | ||
2103 | start = (range->start >> sb->s_blocksize_bits) + | 2107 | start = range->start >> sb->s_blocksize_bits; |
2104 | le32_to_cpu(es->s_first_data_block); | 2108 | end = start + (range->len >> sb->s_blocksize_bits) - 1; |
2105 | len = range->len >> sb->s_blocksize_bits; | ||
2106 | minlen = range->minlen >> sb->s_blocksize_bits; | 2109 | minlen = range->minlen >> sb->s_blocksize_bits; |
2107 | trimmed = 0; | ||
2108 | 2110 | ||
2109 | if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb))) | 2111 | if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)) || |
2112 | unlikely(start >= max_blks)) | ||
2110 | return -EINVAL; | 2113 | return -EINVAL; |
2111 | if (start >= max_blks) | 2114 | if (end >= max_blks) |
2112 | return -EINVAL; | 2115 | end = max_blks - 1; |
2113 | if (start + len > max_blks) | 2116 | if (end <= first_data_blk) |
2114 | len = max_blks - start; | 2117 | goto out; |
2118 | if (start < first_data_blk) | ||
2119 | start = first_data_blk; | ||
2115 | 2120 | ||
2116 | ngroups = EXT3_SB(sb)->s_groups_count; | ||
2117 | smp_rmb(); | 2121 | smp_rmb(); |
2118 | 2122 | ||
2119 | /* Determine first and last group to examine based on start and len */ | 2123 | /* Determine first and last group to examine based on start and len */ |
2120 | ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start, | 2124 | ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start, |
2121 | &first_group, &first_block); | 2125 | &first_group, &first_block); |
2122 | ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) (start + len), | 2126 | ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) end, |
2123 | &last_group, &last_block); | 2127 | &last_group, &last_block); |
2124 | last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; | ||
2125 | last_block = EXT3_BLOCKS_PER_GROUP(sb); | ||
2126 | 2128 | ||
2127 | if (first_group > last_group) | 2129 | /* end now represents the last block to discard in this group */ |
2128 | return -EINVAL; | 2130 | end = EXT3_BLOCKS_PER_GROUP(sb) - 1; |
2129 | 2131 | ||
2130 | for (group = first_group; group <= last_group; group++) { | 2132 | for (group = first_group; group <= last_group; group++) { |
2131 | gdp = ext3_get_group_desc(sb, group, NULL); | 2133 | gdp = ext3_get_group_desc(sb, group, NULL); |
2132 | if (!gdp) | 2134 | if (!gdp) |
2133 | break; | 2135 | break; |
2134 | 2136 | ||
2135 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | ||
2136 | if (free_blocks < minlen) | ||
2137 | continue; | ||
2138 | |||
2139 | /* | 2137 | /* |
2140 | * For all the groups except the last one, last block will | 2138 | * For all the groups except the last one, last block will |
2141 | * always be EXT3_BLOCKS_PER_GROUP(sb), so we only need to | 2139 | * always be EXT3_BLOCKS_PER_GROUP(sb)-1, so we only need to |
2142 | * change it for the last group in which case first_block + | 2140 | * change it for the last group, note that last_block is |
2143 | * len < EXT3_BLOCKS_PER_GROUP(sb). | 2141 | * already computed earlier by ext3_get_group_no_and_offset() |
2144 | */ | 2142 | */ |
2145 | if (first_block + len < EXT3_BLOCKS_PER_GROUP(sb)) | 2143 | if (group == last_group) |
2146 | last_block = first_block + len; | 2144 | end = last_block; |
2147 | len -= last_block - first_block; | ||
2148 | 2145 | ||
2149 | ret = ext3_trim_all_free(sb, group, first_block, | 2146 | if (le16_to_cpu(gdp->bg_free_blocks_count) >= minlen) { |
2150 | last_block, minlen); | 2147 | ret = ext3_trim_all_free(sb, group, first_block, |
2151 | if (ret < 0) | 2148 | end, minlen); |
2152 | break; | 2149 | if (ret < 0) |
2150 | break; | ||
2151 | trimmed += ret; | ||
2152 | } | ||
2153 | 2153 | ||
2154 | trimmed += ret; | 2154 | /* |
2155 | * For every group except the first one, we are sure | ||
2156 | * that the first block to discard will be block #0. | ||
2157 | */ | ||
2155 | first_block = 0; | 2158 | first_block = 0; |
2156 | } | 2159 | } |
2157 | 2160 | ||
2158 | if (ret >= 0) | 2161 | if (ret > 0) |
2159 | ret = 0; | 2162 | ret = 0; |
2160 | range->len = trimmed * sb->s_blocksize; | ||
2161 | 2163 | ||
2164 | out: | ||
2165 | range->len = trimmed * sb->s_blocksize; | ||
2162 | return ret; | 2166 | return ret; |
2163 | } | 2167 | } |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2d0afeca0b47..6d3418662b54 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -756,6 +756,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, | |||
756 | struct ext3_block_alloc_info *block_i; | 756 | struct ext3_block_alloc_info *block_i; |
757 | ext3_fsblk_t current_block; | 757 | ext3_fsblk_t current_block; |
758 | struct ext3_inode_info *ei = EXT3_I(inode); | 758 | struct ext3_inode_info *ei = EXT3_I(inode); |
759 | struct timespec now; | ||
759 | 760 | ||
760 | block_i = ei->i_block_alloc_info; | 761 | block_i = ei->i_block_alloc_info; |
761 | /* | 762 | /* |
@@ -795,9 +796,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, | |||
795 | } | 796 | } |
796 | 797 | ||
797 | /* We are done with atomic stuff, now do the rest of housekeeping */ | 798 | /* We are done with atomic stuff, now do the rest of housekeeping */ |
798 | 799 | now = CURRENT_TIME_SEC; | |
799 | inode->i_ctime = CURRENT_TIME_SEC; | 800 | if (!timespec_equal(&inode->i_ctime, &now) || !where->bh) { |
800 | ext3_mark_inode_dirty(handle, inode); | 801 | inode->i_ctime = now; |
802 | ext3_mark_inode_dirty(handle, inode); | ||
803 | } | ||
801 | /* ext3_mark_inode_dirty already updated i_sync_tid */ | 804 | /* ext3_mark_inode_dirty already updated i_sync_tid */ |
802 | atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); | 805 | atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); |
803 | 806 | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index f9e2cd8cf711..4bbd07a6fa18 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -336,10 +336,10 @@ err_out: | |||
336 | * Return buffer_head on success or NULL in case of failure. | 336 | * Return buffer_head on success or NULL in case of failure. |
337 | */ | 337 | */ |
338 | struct buffer_head * | 338 | struct buffer_head * |
339 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | 339 | ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) |
340 | { | 340 | { |
341 | struct ext4_group_desc *desc; | 341 | struct ext4_group_desc *desc; |
342 | struct buffer_head *bh = NULL; | 342 | struct buffer_head *bh; |
343 | ext4_fsblk_t bitmap_blk; | 343 | ext4_fsblk_t bitmap_blk; |
344 | 344 | ||
345 | desc = ext4_get_group_desc(sb, block_group, NULL); | 345 | desc = ext4_get_group_desc(sb, block_group, NULL); |
@@ -348,9 +348,9 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
348 | bitmap_blk = ext4_block_bitmap(sb, desc); | 348 | bitmap_blk = ext4_block_bitmap(sb, desc); |
349 | bh = sb_getblk(sb, bitmap_blk); | 349 | bh = sb_getblk(sb, bitmap_blk); |
350 | if (unlikely(!bh)) { | 350 | if (unlikely(!bh)) { |
351 | ext4_error(sb, "Cannot read block bitmap - " | 351 | ext4_error(sb, "Cannot get buffer for block bitmap - " |
352 | "block_group = %u, block_bitmap = %llu", | 352 | "block_group = %u, block_bitmap = %llu", |
353 | block_group, bitmap_blk); | 353 | block_group, bitmap_blk); |
354 | return NULL; | 354 | return NULL; |
355 | } | 355 | } |
356 | 356 | ||
@@ -382,25 +382,50 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
382 | return bh; | 382 | return bh; |
383 | } | 383 | } |
384 | /* | 384 | /* |
385 | * submit the buffer_head for read. We can | 385 | * submit the buffer_head for reading |
386 | * safely mark the bitmap as uptodate now. | ||
387 | * We do it here so the bitmap uptodate bit | ||
388 | * get set with buffer lock held. | ||
389 | */ | 386 | */ |
387 | set_buffer_new(bh); | ||
390 | trace_ext4_read_block_bitmap_load(sb, block_group); | 388 | trace_ext4_read_block_bitmap_load(sb, block_group); |
391 | set_bitmap_uptodate(bh); | 389 | bh->b_end_io = ext4_end_bitmap_read; |
392 | if (bh_submit_read(bh) < 0) { | 390 | get_bh(bh); |
393 | put_bh(bh); | 391 | submit_bh(READ, bh); |
392 | return bh; | ||
393 | } | ||
394 | |||
395 | /* Returns 0 on success, 1 on error */ | ||
396 | int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, | ||
397 | struct buffer_head *bh) | ||
398 | { | ||
399 | struct ext4_group_desc *desc; | ||
400 | |||
401 | if (!buffer_new(bh)) | ||
402 | return 0; | ||
403 | desc = ext4_get_group_desc(sb, block_group, NULL); | ||
404 | if (!desc) | ||
405 | return 1; | ||
406 | wait_on_buffer(bh); | ||
407 | if (!buffer_uptodate(bh)) { | ||
394 | ext4_error(sb, "Cannot read block bitmap - " | 408 | ext4_error(sb, "Cannot read block bitmap - " |
395 | "block_group = %u, block_bitmap = %llu", | 409 | "block_group = %u, block_bitmap = %llu", |
396 | block_group, bitmap_blk); | 410 | block_group, (unsigned long long) bh->b_blocknr); |
397 | return NULL; | 411 | return 1; |
398 | } | 412 | } |
413 | clear_buffer_new(bh); | ||
414 | /* Panic or remount fs read-only if block bitmap is invalid */ | ||
399 | ext4_valid_block_bitmap(sb, desc, block_group, bh); | 415 | ext4_valid_block_bitmap(sb, desc, block_group, bh); |
400 | /* | 416 | return 0; |
401 | * file system mounted not to panic on error, | 417 | } |
402 | * continue with corrupt bitmap | 418 | |
403 | */ | 419 | struct buffer_head * |
420 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | ||
421 | { | ||
422 | struct buffer_head *bh; | ||
423 | |||
424 | bh = ext4_read_block_bitmap_nowait(sb, block_group); | ||
425 | if (ext4_wait_block_bitmap(sb, block_group, bh)) { | ||
426 | put_bh(bh); | ||
427 | return NULL; | ||
428 | } | ||
404 | return bh; | 429 | return bh; |
405 | } | 430 | } |
406 | 431 | ||
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 164c56092e58..ad56866d729a 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -91,17 +91,17 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, | |||
91 | return 0; | 91 | return 0; |
92 | 92 | ||
93 | if (filp) | 93 | if (filp) |
94 | ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0, | 94 | ext4_error_file(filp, function, line, bh->b_blocknr, |
95 | "bad entry in directory: %s - offset=%u(%u), " | 95 | "bad entry in directory: %s - offset=%u(%u), " |
96 | "inode=%u, rec_len=%d, name_len=%d", | 96 | "inode=%u, rec_len=%d, name_len=%d", |
97 | error_msg, (unsigned) (offset%bh->b_size), | 97 | error_msg, (unsigned) (offset % bh->b_size), |
98 | offset, le32_to_cpu(de->inode), | 98 | offset, le32_to_cpu(de->inode), |
99 | rlen, de->name_len); | 99 | rlen, de->name_len); |
100 | else | 100 | else |
101 | ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0, | 101 | ext4_error_inode(dir, function, line, bh->b_blocknr, |
102 | "bad entry in directory: %s - offset=%u(%u), " | 102 | "bad entry in directory: %s - offset=%u(%u), " |
103 | "inode=%u, rec_len=%d, name_len=%d", | 103 | "inode=%u, rec_len=%d, name_len=%d", |
104 | error_msg, (unsigned) (offset%bh->b_size), | 104 | error_msg, (unsigned) (offset % bh->b_size), |
105 | offset, le32_to_cpu(de->inode), | 105 | offset, le32_to_cpu(de->inode), |
106 | rlen, de->name_len); | 106 | rlen, de->name_len); |
107 | 107 | ||
@@ -425,8 +425,9 @@ static int call_filldir(struct file *filp, void *dirent, | |||
425 | sb = inode->i_sb; | 425 | sb = inode->i_sb; |
426 | 426 | ||
427 | if (!fname) { | 427 | if (!fname) { |
428 | printk(KERN_ERR "EXT4-fs: call_filldir: called with " | 428 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: " |
429 | "null fname?!?\n"); | 429 | "called with null fname?!?", __func__, __LINE__, |
430 | inode->i_ino, current->comm); | ||
430 | return 0; | 431 | return 0; |
431 | } | 432 | } |
432 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | 433 | curr_pos = hash2pos(fname->hash, fname->minor_hash); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 513004fc3d84..ded731ac8a32 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -53,7 +53,7 @@ | |||
53 | printk(KERN_DEBUG f, ## a); \ | 53 | printk(KERN_DEBUG f, ## a); \ |
54 | } while (0) | 54 | } while (0) |
55 | #else | 55 | #else |
56 | #define ext4_debug(f, a...) do {} while (0) | 56 | #define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) |
57 | #endif | 57 | #endif |
58 | 58 | ||
59 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ | 59 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ |
@@ -184,6 +184,8 @@ struct mpage_da_data { | |||
184 | #define EXT4_IO_END_UNWRITTEN 0x0001 | 184 | #define EXT4_IO_END_UNWRITTEN 0x0001 |
185 | #define EXT4_IO_END_ERROR 0x0002 | 185 | #define EXT4_IO_END_ERROR 0x0002 |
186 | #define EXT4_IO_END_QUEUED 0x0004 | 186 | #define EXT4_IO_END_QUEUED 0x0004 |
187 | #define EXT4_IO_END_DIRECT 0x0008 | ||
188 | #define EXT4_IO_END_IN_FSYNC 0x0010 | ||
187 | 189 | ||
188 | struct ext4_io_page { | 190 | struct ext4_io_page { |
189 | struct page *p_page; | 191 | struct page *p_page; |
@@ -192,18 +194,25 @@ struct ext4_io_page { | |||
192 | 194 | ||
193 | #define MAX_IO_PAGES 128 | 195 | #define MAX_IO_PAGES 128 |
194 | 196 | ||
197 | /* | ||
198 | * For converting uninitialized extents on a work queue. | ||
199 | * | ||
200 | * 'page' is only used from the writepage() path; 'pages' is only used for | ||
201 | * buffered writes; they are used to keep page references until conversion | ||
202 | * takes place. For AIO/DIO, neither field is filled in. | ||
203 | */ | ||
195 | typedef struct ext4_io_end { | 204 | typedef struct ext4_io_end { |
196 | struct list_head list; /* per-file finished IO list */ | 205 | struct list_head list; /* per-file finished IO list */ |
197 | struct inode *inode; /* file being written to */ | 206 | struct inode *inode; /* file being written to */ |
198 | unsigned int flag; /* unwritten or not */ | 207 | unsigned int flag; /* unwritten or not */ |
199 | struct page *page; /* page struct for buffer write */ | 208 | struct page *page; /* for writepage() path */ |
200 | loff_t offset; /* offset in the file */ | 209 | loff_t offset; /* offset in the file */ |
201 | ssize_t size; /* size of the extent */ | 210 | ssize_t size; /* size of the extent */ |
202 | struct work_struct work; /* data work queue */ | 211 | struct work_struct work; /* data work queue */ |
203 | struct kiocb *iocb; /* iocb struct for AIO */ | 212 | struct kiocb *iocb; /* iocb struct for AIO */ |
204 | int result; /* error value for AIO */ | 213 | int result; /* error value for AIO */ |
205 | int num_io_pages; | 214 | int num_io_pages; /* for writepages() */ |
206 | struct ext4_io_page *pages[MAX_IO_PAGES]; | 215 | struct ext4_io_page *pages[MAX_IO_PAGES]; /* for writepages() */ |
207 | } ext4_io_end_t; | 216 | } ext4_io_end_t; |
208 | 217 | ||
209 | struct ext4_io_submit { | 218 | struct ext4_io_submit { |
@@ -923,6 +932,7 @@ struct ext4_inode_info { | |||
923 | #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ | 932 | #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ |
924 | #define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ | 933 | #define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ |
925 | #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ | 934 | #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ |
935 | #define EXT4_MOUNT_ERRORS_MASK 0x00070 | ||
926 | #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ | 936 | #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ |
927 | #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ | 937 | #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ |
928 | #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ | 938 | #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ |
@@ -941,7 +951,6 @@ struct ext4_inode_info { | |||
941 | #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ | 951 | #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ |
942 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | 952 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ |
943 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 953 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
944 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | ||
945 | #define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */ | 954 | #define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */ |
946 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 955 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
947 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | 956 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ |
@@ -1142,6 +1151,7 @@ struct ext4_sb_info { | |||
1142 | unsigned int s_mount_opt; | 1151 | unsigned int s_mount_opt; |
1143 | unsigned int s_mount_opt2; | 1152 | unsigned int s_mount_opt2; |
1144 | unsigned int s_mount_flags; | 1153 | unsigned int s_mount_flags; |
1154 | unsigned int s_def_mount_opt; | ||
1145 | ext4_fsblk_t s_sb_block; | 1155 | ext4_fsblk_t s_sb_block; |
1146 | uid_t s_resuid; | 1156 | uid_t s_resuid; |
1147 | gid_t s_resgid; | 1157 | gid_t s_resgid; |
@@ -1420,8 +1430,9 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1420 | #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 | 1430 | #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 |
1421 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ | 1431 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ |
1422 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ | 1432 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ |
1423 | #define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */ | 1433 | #define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */ |
1424 | #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ | 1434 | #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ |
1435 | #define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x8000 /* data in inode */ | ||
1425 | 1436 | ||
1426 | #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR | 1437 | #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR |
1427 | #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | 1438 | #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ |
@@ -1794,8 +1805,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
1794 | ext4_group_t block_group, | 1805 | ext4_group_t block_group, |
1795 | struct buffer_head ** bh); | 1806 | struct buffer_head ** bh); |
1796 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); | 1807 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); |
1797 | struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, | 1808 | |
1798 | ext4_group_t block_group); | 1809 | extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, |
1810 | ext4_group_t block_group); | ||
1811 | extern int ext4_wait_block_bitmap(struct super_block *sb, | ||
1812 | ext4_group_t block_group, | ||
1813 | struct buffer_head *bh); | ||
1814 | extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, | ||
1815 | ext4_group_t block_group); | ||
1799 | extern void ext4_init_block_bitmap(struct super_block *sb, | 1816 | extern void ext4_init_block_bitmap(struct super_block *sb, |
1800 | struct buffer_head *bh, | 1817 | struct buffer_head *bh, |
1801 | ext4_group_t group, | 1818 | ext4_group_t group, |
@@ -1841,6 +1858,7 @@ extern void ext4_check_inodes_bitmap(struct super_block *); | |||
1841 | extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); | 1858 | extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); |
1842 | extern int ext4_init_inode_table(struct super_block *sb, | 1859 | extern int ext4_init_inode_table(struct super_block *sb, |
1843 | ext4_group_t group, int barrier); | 1860 | ext4_group_t group, int barrier); |
1861 | extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); | ||
1844 | 1862 | ||
1845 | /* mballoc.c */ | 1863 | /* mballoc.c */ |
1846 | extern long ext4_mb_stats; | 1864 | extern long ext4_mb_stats; |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index a52db3a69a30..0f58b86e3a02 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -47,9 +47,9 @@ | |||
47 | */ | 47 | */ |
48 | #define EXT_DEBUG__ | 48 | #define EXT_DEBUG__ |
49 | #ifdef EXT_DEBUG | 49 | #ifdef EXT_DEBUG |
50 | #define ext_debug(a...) printk(a) | 50 | #define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__) |
51 | #else | 51 | #else |
52 | #define ext_debug(a...) | 52 | #define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) |
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | /* | 55 | /* |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 5802fa1dab18..83b20fcf9400 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -104,6 +104,78 @@ | |||
104 | #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) | 104 | #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) |
105 | #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) | 105 | #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) |
106 | 106 | ||
107 | /** | ||
108 | * struct ext4_journal_cb_entry - Base structure for callback information. | ||
109 | * | ||
110 | * This struct is a 'seed' structure for a using with your own callback | ||
111 | * structs. If you are using callbacks you must allocate one of these | ||
112 | * or another struct of your own definition which has this struct | ||
113 | * as it's first element and pass it to ext4_journal_callback_add(). | ||
114 | */ | ||
115 | struct ext4_journal_cb_entry { | ||
116 | /* list information for other callbacks attached to the same handle */ | ||
117 | struct list_head jce_list; | ||
118 | |||
119 | /* Function to call with this callback structure */ | ||
120 | void (*jce_func)(struct super_block *sb, | ||
121 | struct ext4_journal_cb_entry *jce, int error); | ||
122 | |||
123 | /* user data goes here */ | ||
124 | }; | ||
125 | |||
126 | /** | ||
127 | * ext4_journal_callback_add: add a function to call after transaction commit | ||
128 | * @handle: active journal transaction handle to register callback on | ||
129 | * @func: callback function to call after the transaction has committed: | ||
130 | * @sb: superblock of current filesystem for transaction | ||
131 | * @jce: returned journal callback data | ||
132 | * @rc: journal state at commit (0 = transaction committed properly) | ||
133 | * @jce: journal callback data (internal and function private data struct) | ||
134 | * | ||
135 | * The registered function will be called in the context of the journal thread | ||
136 | * after the transaction for which the handle was created has completed. | ||
137 | * | ||
138 | * No locks are held when the callback function is called, so it is safe to | ||
139 | * call blocking functions from within the callback, but the callback should | ||
140 | * not block or run for too long, or the filesystem will be blocked waiting for | ||
141 | * the next transaction to commit. No journaling functions can be used, or | ||
142 | * there is a risk of deadlock. | ||
143 | * | ||
144 | * There is no guaranteed calling order of multiple registered callbacks on | ||
145 | * the same transaction. | ||
146 | */ | ||
147 | static inline void ext4_journal_callback_add(handle_t *handle, | ||
148 | void (*func)(struct super_block *sb, | ||
149 | struct ext4_journal_cb_entry *jce, | ||
150 | int rc), | ||
151 | struct ext4_journal_cb_entry *jce) | ||
152 | { | ||
153 | struct ext4_sb_info *sbi = | ||
154 | EXT4_SB(handle->h_transaction->t_journal->j_private); | ||
155 | |||
156 | /* Add the jce to transaction's private list */ | ||
157 | jce->jce_func = func; | ||
158 | spin_lock(&sbi->s_md_lock); | ||
159 | list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); | ||
160 | spin_unlock(&sbi->s_md_lock); | ||
161 | } | ||
162 | |||
163 | /** | ||
164 | * ext4_journal_callback_del: delete a registered callback | ||
165 | * @handle: active journal transaction handle on which callback was registered | ||
166 | * @jce: registered journal callback entry to unregister | ||
167 | */ | ||
168 | static inline void ext4_journal_callback_del(handle_t *handle, | ||
169 | struct ext4_journal_cb_entry *jce) | ||
170 | { | ||
171 | struct ext4_sb_info *sbi = | ||
172 | EXT4_SB(handle->h_transaction->t_journal->j_private); | ||
173 | |||
174 | spin_lock(&sbi->s_md_lock); | ||
175 | list_del_init(&jce->jce_list); | ||
176 | spin_unlock(&sbi->s_md_lock); | ||
177 | } | ||
178 | |||
107 | int | 179 | int |
108 | ext4_mark_iloc_dirty(handle_t *handle, | 180 | ext4_mark_iloc_dirty(handle_t *handle, |
109 | struct inode *inode, | 181 | struct inode *inode, |
@@ -261,43 +333,45 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle, | |||
261 | /* super.c */ | 333 | /* super.c */ |
262 | int ext4_force_commit(struct super_block *sb); | 334 | int ext4_force_commit(struct super_block *sb); |
263 | 335 | ||
264 | static inline int ext4_should_journal_data(struct inode *inode) | 336 | /* |
337 | * Ext4 inode journal modes | ||
338 | */ | ||
339 | #define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */ | ||
340 | #define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */ | ||
341 | #define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */ | ||
342 | |||
343 | static inline int ext4_inode_journal_mode(struct inode *inode) | ||
265 | { | 344 | { |
266 | if (EXT4_JOURNAL(inode) == NULL) | 345 | if (EXT4_JOURNAL(inode) == NULL) |
267 | return 0; | 346 | return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ |
268 | if (!S_ISREG(inode->i_mode)) | 347 | /* We do not support data journalling with delayed allocation */ |
269 | return 1; | 348 | if (!S_ISREG(inode->i_mode) || |
270 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) | 349 | test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) |
271 | return 1; | 350 | return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ |
272 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) | 351 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && |
273 | return 1; | 352 | !test_opt(inode->i_sb, DELALLOC)) |
274 | return 0; | 353 | return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ |
354 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) | ||
355 | return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ | ||
356 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | ||
357 | return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ | ||
358 | else | ||
359 | BUG(); | ||
360 | } | ||
361 | |||
362 | static inline int ext4_should_journal_data(struct inode *inode) | ||
363 | { | ||
364 | return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE; | ||
275 | } | 365 | } |
276 | 366 | ||
277 | static inline int ext4_should_order_data(struct inode *inode) | 367 | static inline int ext4_should_order_data(struct inode *inode) |
278 | { | 368 | { |
279 | if (EXT4_JOURNAL(inode) == NULL) | 369 | return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE; |
280 | return 0; | ||
281 | if (!S_ISREG(inode->i_mode)) | ||
282 | return 0; | ||
283 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) | ||
284 | return 0; | ||
285 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) | ||
286 | return 1; | ||
287 | return 0; | ||
288 | } | 370 | } |
289 | 371 | ||
290 | static inline int ext4_should_writeback_data(struct inode *inode) | 372 | static inline int ext4_should_writeback_data(struct inode *inode) |
291 | { | 373 | { |
292 | if (EXT4_JOURNAL(inode) == NULL) | 374 | return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE; |
293 | return 1; | ||
294 | if (!S_ISREG(inode->i_mode)) | ||
295 | return 0; | ||
296 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) | ||
297 | return 0; | ||
298 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | ||
299 | return 1; | ||
300 | return 0; | ||
301 | } | 375 | } |
302 | 376 | ||
303 | /* | 377 | /* |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 74f23c292e1b..1421938e6792 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -44,6 +44,14 @@ | |||
44 | 44 | ||
45 | #include <trace/events/ext4.h> | 45 | #include <trace/events/ext4.h> |
46 | 46 | ||
47 | /* | ||
48 | * used by extent splitting. | ||
49 | */ | ||
50 | #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ | ||
51 | due to ENOSPC */ | ||
52 | #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ | ||
53 | #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ | ||
54 | |||
47 | static int ext4_split_extent(handle_t *handle, | 55 | static int ext4_split_extent(handle_t *handle, |
48 | struct inode *inode, | 56 | struct inode *inode, |
49 | struct ext4_ext_path *path, | 57 | struct ext4_ext_path *path, |
@@ -51,6 +59,13 @@ static int ext4_split_extent(handle_t *handle, | |||
51 | int split_flag, | 59 | int split_flag, |
52 | int flags); | 60 | int flags); |
53 | 61 | ||
62 | static int ext4_split_extent_at(handle_t *handle, | ||
63 | struct inode *inode, | ||
64 | struct ext4_ext_path *path, | ||
65 | ext4_lblk_t split, | ||
66 | int split_flag, | ||
67 | int flags); | ||
68 | |||
54 | static int ext4_ext_truncate_extend_restart(handle_t *handle, | 69 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
55 | struct inode *inode, | 70 | struct inode *inode, |
56 | int needed) | 71 | int needed) |
@@ -300,6 +315,8 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | |||
300 | ext4_fsblk_t block = ext4_ext_pblock(ext); | 315 | ext4_fsblk_t block = ext4_ext_pblock(ext); |
301 | int len = ext4_ext_get_actual_len(ext); | 316 | int len = ext4_ext_get_actual_len(ext); |
302 | 317 | ||
318 | if (len == 0) | ||
319 | return 0; | ||
303 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); | 320 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); |
304 | } | 321 | } |
305 | 322 | ||
@@ -2308,7 +2325,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2308 | struct ext4_extent *ex; | 2325 | struct ext4_extent *ex; |
2309 | 2326 | ||
2310 | /* the header must be checked already in ext4_ext_remove_space() */ | 2327 | /* the header must be checked already in ext4_ext_remove_space() */ |
2311 | ext_debug("truncate since %u in leaf\n", start); | 2328 | ext_debug("truncate since %u in leaf to %u\n", start, end); |
2312 | if (!path[depth].p_hdr) | 2329 | if (!path[depth].p_hdr) |
2313 | path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); | 2330 | path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); |
2314 | eh = path[depth].p_hdr; | 2331 | eh = path[depth].p_hdr; |
@@ -2343,14 +2360,17 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2343 | ext_debug(" border %u:%u\n", a, b); | 2360 | ext_debug(" border %u:%u\n", a, b); |
2344 | 2361 | ||
2345 | /* If this extent is beyond the end of the hole, skip it */ | 2362 | /* If this extent is beyond the end of the hole, skip it */ |
2346 | if (end <= ex_ee_block) { | 2363 | if (end < ex_ee_block) { |
2347 | ex--; | 2364 | ex--; |
2348 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2365 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2349 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2366 | ex_ee_len = ext4_ext_get_actual_len(ex); |
2350 | continue; | 2367 | continue; |
2351 | } else if (b != ex_ee_block + ex_ee_len - 1) { | 2368 | } else if (b != ex_ee_block + ex_ee_len - 1) { |
2352 | EXT4_ERROR_INODE(inode," bad truncate %u:%u\n", | 2369 | EXT4_ERROR_INODE(inode, |
2353 | start, end); | 2370 | "can not handle truncate %u:%u " |
2371 | "on extent %u:%u", | ||
2372 | start, end, ex_ee_block, | ||
2373 | ex_ee_block + ex_ee_len - 1); | ||
2354 | err = -EIO; | 2374 | err = -EIO; |
2355 | goto out; | 2375 | goto out; |
2356 | } else if (a != ex_ee_block) { | 2376 | } else if (a != ex_ee_block) { |
@@ -2482,7 +2502,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) | |||
2482 | return 1; | 2502 | return 1; |
2483 | } | 2503 | } |
2484 | 2504 | ||
2485 | static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | 2505 | static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, |
2506 | ext4_lblk_t end) | ||
2486 | { | 2507 | { |
2487 | struct super_block *sb = inode->i_sb; | 2508 | struct super_block *sb = inode->i_sb; |
2488 | int depth = ext_depth(inode); | 2509 | int depth = ext_depth(inode); |
@@ -2491,7 +2512,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
2491 | handle_t *handle; | 2512 | handle_t *handle; |
2492 | int i, err; | 2513 | int i, err; |
2493 | 2514 | ||
2494 | ext_debug("truncate since %u\n", start); | 2515 | ext_debug("truncate since %u to %u\n", start, end); |
2495 | 2516 | ||
2496 | /* probably first extent we're gonna free will be last in block */ | 2517 | /* probably first extent we're gonna free will be last in block */ |
2497 | handle = ext4_journal_start(inode, depth + 1); | 2518 | handle = ext4_journal_start(inode, depth + 1); |
@@ -2504,6 +2525,61 @@ again: | |||
2504 | trace_ext4_ext_remove_space(inode, start, depth); | 2525 | trace_ext4_ext_remove_space(inode, start, depth); |
2505 | 2526 | ||
2506 | /* | 2527 | /* |
2528 | * Check if we are removing extents inside the extent tree. If that | ||
2529 | * is the case, we are going to punch a hole inside the extent tree | ||
2530 | * so we have to check whether we need to split the extent covering | ||
2531 | * the last block to remove so we can easily remove the part of it | ||
2532 | * in ext4_ext_rm_leaf(). | ||
2533 | */ | ||
2534 | if (end < EXT_MAX_BLOCKS - 1) { | ||
2535 | struct ext4_extent *ex; | ||
2536 | ext4_lblk_t ee_block; | ||
2537 | |||
2538 | /* find extent for this block */ | ||
2539 | path = ext4_ext_find_extent(inode, end, NULL); | ||
2540 | if (IS_ERR(path)) { | ||
2541 | ext4_journal_stop(handle); | ||
2542 | return PTR_ERR(path); | ||
2543 | } | ||
2544 | depth = ext_depth(inode); | ||
2545 | ex = path[depth].p_ext; | ||
2546 | if (!ex) | ||
2547 | goto cont; | ||
2548 | |||
2549 | ee_block = le32_to_cpu(ex->ee_block); | ||
2550 | |||
2551 | /* | ||
2552 | * See if the last block is inside the extent, if so split | ||
2553 | * the extent at 'end' block so we can easily remove the | ||
2554 | * tail of the first part of the split extent in | ||
2555 | * ext4_ext_rm_leaf(). | ||
2556 | */ | ||
2557 | if (end >= ee_block && | ||
2558 | end < ee_block + ext4_ext_get_actual_len(ex) - 1) { | ||
2559 | int split_flag = 0; | ||
2560 | |||
2561 | if (ext4_ext_is_uninitialized(ex)) | ||
2562 | split_flag = EXT4_EXT_MARK_UNINIT1 | | ||
2563 | EXT4_EXT_MARK_UNINIT2; | ||
2564 | |||
2565 | /* | ||
2566 | * Split the extent in two so that 'end' is the last | ||
2567 | * block in the first new extent | ||
2568 | */ | ||
2569 | err = ext4_split_extent_at(handle, inode, path, | ||
2570 | end + 1, split_flag, | ||
2571 | EXT4_GET_BLOCKS_PRE_IO | | ||
2572 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT); | ||
2573 | |||
2574 | if (err < 0) | ||
2575 | goto out; | ||
2576 | } | ||
2577 | ext4_ext_drop_refs(path); | ||
2578 | kfree(path); | ||
2579 | } | ||
2580 | cont: | ||
2581 | |||
2582 | /* | ||
2507 | * We start scanning from right side, freeing all the blocks | 2583 | * We start scanning from right side, freeing all the blocks |
2508 | * after i_size and walking into the tree depth-wise. | 2584 | * after i_size and walking into the tree depth-wise. |
2509 | */ | 2585 | */ |
@@ -2515,6 +2591,7 @@ again: | |||
2515 | } | 2591 | } |
2516 | path[0].p_depth = depth; | 2592 | path[0].p_depth = depth; |
2517 | path[0].p_hdr = ext_inode_hdr(inode); | 2593 | path[0].p_hdr = ext_inode_hdr(inode); |
2594 | |||
2518 | if (ext4_ext_check(inode, path[0].p_hdr, depth)) { | 2595 | if (ext4_ext_check(inode, path[0].p_hdr, depth)) { |
2519 | err = -EIO; | 2596 | err = -EIO; |
2520 | goto out; | 2597 | goto out; |
@@ -2526,7 +2603,7 @@ again: | |||
2526 | /* this is leaf block */ | 2603 | /* this is leaf block */ |
2527 | err = ext4_ext_rm_leaf(handle, inode, path, | 2604 | err = ext4_ext_rm_leaf(handle, inode, path, |
2528 | &partial_cluster, start, | 2605 | &partial_cluster, start, |
2529 | EXT_MAX_BLOCKS - 1); | 2606 | end); |
2530 | /* root level has p_bh == NULL, brelse() eats this */ | 2607 | /* root level has p_bh == NULL, brelse() eats this */ |
2531 | brelse(path[i].p_bh); | 2608 | brelse(path[i].p_bh); |
2532 | path[i].p_bh = NULL; | 2609 | path[i].p_bh = NULL; |
@@ -2651,17 +2728,17 @@ void ext4_ext_init(struct super_block *sb) | |||
2651 | 2728 | ||
2652 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { | 2729 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { |
2653 | #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) | 2730 | #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) |
2654 | printk(KERN_INFO "EXT4-fs: file extents enabled"); | 2731 | printk(KERN_INFO "EXT4-fs: file extents enabled" |
2655 | #ifdef AGGRESSIVE_TEST | 2732 | #ifdef AGGRESSIVE_TEST |
2656 | printk(", aggressive tests"); | 2733 | ", aggressive tests" |
2657 | #endif | 2734 | #endif |
2658 | #ifdef CHECK_BINSEARCH | 2735 | #ifdef CHECK_BINSEARCH |
2659 | printk(", check binsearch"); | 2736 | ", check binsearch" |
2660 | #endif | 2737 | #endif |
2661 | #ifdef EXTENTS_STATS | 2738 | #ifdef EXTENTS_STATS |
2662 | printk(", stats"); | 2739 | ", stats" |
2663 | #endif | 2740 | #endif |
2664 | printk("\n"); | 2741 | "\n"); |
2665 | #endif | 2742 | #endif |
2666 | #ifdef EXTENTS_STATS | 2743 | #ifdef EXTENTS_STATS |
2667 | spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); | 2744 | spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); |
@@ -2709,14 +2786,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
2709 | } | 2786 | } |
2710 | 2787 | ||
2711 | /* | 2788 | /* |
2712 | * used by extent splitting. | ||
2713 | */ | ||
2714 | #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ | ||
2715 | due to ENOSPC */ | ||
2716 | #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ | ||
2717 | #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ | ||
2718 | |||
2719 | /* | ||
2720 | * ext4_split_extent_at() splits an extent at given block. | 2789 | * ext4_split_extent_at() splits an extent at given block. |
2721 | * | 2790 | * |
2722 | * @handle: the journal handle | 2791 | * @handle: the journal handle |
@@ -3224,11 +3293,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | |||
3224 | depth = ext_depth(inode); | 3293 | depth = ext_depth(inode); |
3225 | eh = path[depth].p_hdr; | 3294 | eh = path[depth].p_hdr; |
3226 | 3295 | ||
3227 | if (unlikely(!eh->eh_entries)) { | 3296 | /* |
3228 | EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " | 3297 | * We're going to remove EOFBLOCKS_FL entirely in future so we |
3229 | "EOFBLOCKS_FL set"); | 3298 | * do not care for this case anymore. Simply remove the flag |
3230 | return -EIO; | 3299 | * if there are no extents. |
3231 | } | 3300 | */ |
3301 | if (unlikely(!eh->eh_entries)) | ||
3302 | goto out; | ||
3232 | last_ex = EXT_LAST_EXTENT(eh); | 3303 | last_ex = EXT_LAST_EXTENT(eh); |
3233 | /* | 3304 | /* |
3234 | * We should clear the EOFBLOCKS_FL flag if we are writing the | 3305 | * We should clear the EOFBLOCKS_FL flag if we are writing the |
@@ -3252,6 +3323,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | |||
3252 | for (i = depth-1; i >= 0; i--) | 3323 | for (i = depth-1; i >= 0; i--) |
3253 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) | 3324 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) |
3254 | return 0; | 3325 | return 0; |
3326 | out: | ||
3255 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | 3327 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
3256 | return ext4_mark_inode_dirty(handle, inode); | 3328 | return ext4_mark_inode_dirty(handle, inode); |
3257 | } | 3329 | } |
@@ -3710,8 +3782,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3710 | int free_on_err = 0, err = 0, depth, ret; | 3782 | int free_on_err = 0, err = 0, depth, ret; |
3711 | unsigned int allocated = 0, offset = 0; | 3783 | unsigned int allocated = 0, offset = 0; |
3712 | unsigned int allocated_clusters = 0; | 3784 | unsigned int allocated_clusters = 0; |
3713 | unsigned int punched_out = 0; | ||
3714 | unsigned int result = 0; | ||
3715 | struct ext4_allocation_request ar; | 3785 | struct ext4_allocation_request ar; |
3716 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3786 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
3717 | ext4_lblk_t cluster_offset; | 3787 | ext4_lblk_t cluster_offset; |
@@ -3721,8 +3791,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3721 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | 3791 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); |
3722 | 3792 | ||
3723 | /* check in cache */ | 3793 | /* check in cache */ |
3724 | if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && | 3794 | if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { |
3725 | ext4_ext_in_cache(inode, map->m_lblk, &newex)) { | ||
3726 | if (!newex.ee_start_lo && !newex.ee_start_hi) { | 3795 | if (!newex.ee_start_lo && !newex.ee_start_hi) { |
3727 | if ((sbi->s_cluster_ratio > 1) && | 3796 | if ((sbi->s_cluster_ratio > 1) && |
3728 | ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) | 3797 | ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) |
@@ -3790,113 +3859,25 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3790 | 3859 | ||
3791 | /* if found extent covers block, simply return it */ | 3860 | /* if found extent covers block, simply return it */ |
3792 | if (in_range(map->m_lblk, ee_block, ee_len)) { | 3861 | if (in_range(map->m_lblk, ee_block, ee_len)) { |
3793 | struct ext4_map_blocks punch_map; | ||
3794 | ext4_fsblk_t partial_cluster = 0; | ||
3795 | |||
3796 | newblock = map->m_lblk - ee_block + ee_start; | 3862 | newblock = map->m_lblk - ee_block + ee_start; |
3797 | /* number of remaining blocks in the extent */ | 3863 | /* number of remaining blocks in the extent */ |
3798 | allocated = ee_len - (map->m_lblk - ee_block); | 3864 | allocated = ee_len - (map->m_lblk - ee_block); |
3799 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, | 3865 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, |
3800 | ee_block, ee_len, newblock); | 3866 | ee_block, ee_len, newblock); |
3801 | 3867 | ||
3802 | if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) { | ||
3803 | /* | ||
3804 | * Do not put uninitialized extent | ||
3805 | * in the cache | ||
3806 | */ | ||
3807 | if (!ext4_ext_is_uninitialized(ex)) { | ||
3808 | ext4_ext_put_in_cache(inode, ee_block, | ||
3809 | ee_len, ee_start); | ||
3810 | goto out; | ||
3811 | } | ||
3812 | ret = ext4_ext_handle_uninitialized_extents( | ||
3813 | handle, inode, map, path, flags, | ||
3814 | allocated, newblock); | ||
3815 | return ret; | ||
3816 | } | ||
3817 | |||
3818 | /* | ||
3819 | * Punch out the map length, but only to the | ||
3820 | * end of the extent | ||
3821 | */ | ||
3822 | punched_out = allocated < map->m_len ? | ||
3823 | allocated : map->m_len; | ||
3824 | |||
3825 | /* | 3868 | /* |
3826 | * Sense extents need to be converted to | 3869 | * Do not put uninitialized extent |
3827 | * uninitialized, they must fit in an | 3870 | * in the cache |
3828 | * uninitialized extent | ||
3829 | */ | 3871 | */ |
3830 | if (punched_out > EXT_UNINIT_MAX_LEN) | 3872 | if (!ext4_ext_is_uninitialized(ex)) { |
3831 | punched_out = EXT_UNINIT_MAX_LEN; | 3873 | ext4_ext_put_in_cache(inode, ee_block, |
3832 | 3874 | ee_len, ee_start); | |
3833 | punch_map.m_lblk = map->m_lblk; | 3875 | goto out; |
3834 | punch_map.m_pblk = newblock; | ||
3835 | punch_map.m_len = punched_out; | ||
3836 | punch_map.m_flags = 0; | ||
3837 | |||
3838 | /* Check to see if the extent needs to be split */ | ||
3839 | if (punch_map.m_len != ee_len || | ||
3840 | punch_map.m_lblk != ee_block) { | ||
3841 | |||
3842 | ret = ext4_split_extent(handle, inode, | ||
3843 | path, &punch_map, 0, | ||
3844 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT | | ||
3845 | EXT4_GET_BLOCKS_PRE_IO); | ||
3846 | |||
3847 | if (ret < 0) { | ||
3848 | err = ret; | ||
3849 | goto out2; | ||
3850 | } | ||
3851 | /* | ||
3852 | * find extent for the block at | ||
3853 | * the start of the hole | ||
3854 | */ | ||
3855 | ext4_ext_drop_refs(path); | ||
3856 | kfree(path); | ||
3857 | |||
3858 | path = ext4_ext_find_extent(inode, | ||
3859 | map->m_lblk, NULL); | ||
3860 | if (IS_ERR(path)) { | ||
3861 | err = PTR_ERR(path); | ||
3862 | path = NULL; | ||
3863 | goto out2; | ||
3864 | } | ||
3865 | |||
3866 | depth = ext_depth(inode); | ||
3867 | ex = path[depth].p_ext; | ||
3868 | ee_len = ext4_ext_get_actual_len(ex); | ||
3869 | ee_block = le32_to_cpu(ex->ee_block); | ||
3870 | ee_start = ext4_ext_pblock(ex); | ||
3871 | |||
3872 | } | ||
3873 | |||
3874 | ext4_ext_mark_uninitialized(ex); | ||
3875 | |||
3876 | ext4_ext_invalidate_cache(inode); | ||
3877 | |||
3878 | err = ext4_ext_rm_leaf(handle, inode, path, | ||
3879 | &partial_cluster, map->m_lblk, | ||
3880 | map->m_lblk + punched_out); | ||
3881 | |||
3882 | if (!err && path->p_hdr->eh_entries == 0) { | ||
3883 | /* | ||
3884 | * Punch hole freed all of this sub tree, | ||
3885 | * so we need to correct eh_depth | ||
3886 | */ | ||
3887 | err = ext4_ext_get_access(handle, inode, path); | ||
3888 | if (err == 0) { | ||
3889 | ext_inode_hdr(inode)->eh_depth = 0; | ||
3890 | ext_inode_hdr(inode)->eh_max = | ||
3891 | cpu_to_le16(ext4_ext_space_root( | ||
3892 | inode, 0)); | ||
3893 | |||
3894 | err = ext4_ext_dirty( | ||
3895 | handle, inode, path); | ||
3896 | } | ||
3897 | } | 3876 | } |
3898 | 3877 | ret = ext4_ext_handle_uninitialized_extents( | |
3899 | goto out2; | 3878 | handle, inode, map, path, flags, |
3879 | allocated, newblock); | ||
3880 | return ret; | ||
3900 | } | 3881 | } |
3901 | } | 3882 | } |
3902 | 3883 | ||
@@ -4165,13 +4146,11 @@ out2: | |||
4165 | ext4_ext_drop_refs(path); | 4146 | ext4_ext_drop_refs(path); |
4166 | kfree(path); | 4147 | kfree(path); |
4167 | } | 4148 | } |
4168 | result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? | ||
4169 | punched_out : allocated; | ||
4170 | 4149 | ||
4171 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, | 4150 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, |
4172 | newblock, map->m_len, err ? err : result); | 4151 | newblock, map->m_len, err ? err : allocated); |
4173 | 4152 | ||
4174 | return err ? err : result; | 4153 | return err ? err : allocated; |
4175 | } | 4154 | } |
4176 | 4155 | ||
4177 | void ext4_ext_truncate(struct inode *inode) | 4156 | void ext4_ext_truncate(struct inode *inode) |
@@ -4228,7 +4207,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
4228 | 4207 | ||
4229 | last_block = (inode->i_size + sb->s_blocksize - 1) | 4208 | last_block = (inode->i_size + sb->s_blocksize - 1) |
4230 | >> EXT4_BLOCK_SIZE_BITS(sb); | 4209 | >> EXT4_BLOCK_SIZE_BITS(sb); |
4231 | err = ext4_ext_remove_space(inode, last_block); | 4210 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); |
4232 | 4211 | ||
4233 | /* In a multi-transaction truncate, we only make the final | 4212 | /* In a multi-transaction truncate, we only make the final |
4234 | * transaction synchronous. | 4213 | * transaction synchronous. |
@@ -4436,10 +4415,11 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
4436 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); | 4415 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); |
4437 | if (ret <= 0) { | 4416 | if (ret <= 0) { |
4438 | WARN_ON(ret <= 0); | 4417 | WARN_ON(ret <= 0); |
4439 | printk(KERN_ERR "%s: ext4_ext_map_blocks " | 4418 | ext4_msg(inode->i_sb, KERN_ERR, |
4440 | "returned error inode#%lu, block=%u, " | 4419 | "%s:%d: inode #%lu: block %u: len %u: " |
4441 | "max_blocks=%u", __func__, | 4420 | "ext4_ext_map_blocks returned %d", |
4442 | inode->i_ino, map.m_lblk, map.m_len); | 4421 | __func__, __LINE__, inode->i_ino, map.m_lblk, |
4422 | map.m_len, ret); | ||
4443 | } | 4423 | } |
4444 | ext4_mark_inode_dirty(handle, inode); | 4424 | ext4_mark_inode_dirty(handle, inode); |
4445 | ret2 = ext4_journal_stop(handle); | 4425 | ret2 = ext4_journal_stop(handle); |
@@ -4705,14 +4685,12 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4705 | { | 4685 | { |
4706 | struct inode *inode = file->f_path.dentry->d_inode; | 4686 | struct inode *inode = file->f_path.dentry->d_inode; |
4707 | struct super_block *sb = inode->i_sb; | 4687 | struct super_block *sb = inode->i_sb; |
4708 | struct ext4_ext_cache cache_ex; | 4688 | ext4_lblk_t first_block, stop_block; |
4709 | ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks; | ||
4710 | struct address_space *mapping = inode->i_mapping; | 4689 | struct address_space *mapping = inode->i_mapping; |
4711 | struct ext4_map_blocks map; | ||
4712 | handle_t *handle; | 4690 | handle_t *handle; |
4713 | loff_t first_page, last_page, page_len; | 4691 | loff_t first_page, last_page, page_len; |
4714 | loff_t first_page_offset, last_page_offset; | 4692 | loff_t first_page_offset, last_page_offset; |
4715 | int ret, credits, blocks_released, err = 0; | 4693 | int credits, err = 0; |
4716 | 4694 | ||
4717 | /* No need to punch hole beyond i_size */ | 4695 | /* No need to punch hole beyond i_size */ |
4718 | if (offset >= inode->i_size) | 4696 | if (offset >= inode->i_size) |
@@ -4728,10 +4706,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4728 | offset; | 4706 | offset; |
4729 | } | 4707 | } |
4730 | 4708 | ||
4731 | first_block = (offset + sb->s_blocksize - 1) >> | ||
4732 | EXT4_BLOCK_SIZE_BITS(sb); | ||
4733 | last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); | ||
4734 | |||
4735 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 4709 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
4736 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; | 4710 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; |
4737 | 4711 | ||
@@ -4810,7 +4784,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4810 | } | 4784 | } |
4811 | } | 4785 | } |
4812 | 4786 | ||
4813 | |||
4814 | /* | 4787 | /* |
4815 | * If i_size is contained in the last page, we need to | 4788 | * If i_size is contained in the last page, we need to |
4816 | * unmap and zero the partial page after i_size | 4789 | * unmap and zero the partial page after i_size |
@@ -4830,73 +4803,22 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4830 | } | 4803 | } |
4831 | } | 4804 | } |
4832 | 4805 | ||
4806 | first_block = (offset + sb->s_blocksize - 1) >> | ||
4807 | EXT4_BLOCK_SIZE_BITS(sb); | ||
4808 | stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); | ||
4809 | |||
4833 | /* If there are no blocks to remove, return now */ | 4810 | /* If there are no blocks to remove, return now */ |
4834 | if (first_block >= last_block) | 4811 | if (first_block >= stop_block) |
4835 | goto out; | 4812 | goto out; |
4836 | 4813 | ||
4837 | down_write(&EXT4_I(inode)->i_data_sem); | 4814 | down_write(&EXT4_I(inode)->i_data_sem); |
4838 | ext4_ext_invalidate_cache(inode); | 4815 | ext4_ext_invalidate_cache(inode); |
4839 | ext4_discard_preallocations(inode); | 4816 | ext4_discard_preallocations(inode); |
4840 | 4817 | ||
4841 | /* | 4818 | err = ext4_ext_remove_space(inode, first_block, stop_block - 1); |
4842 | * Loop over all the blocks and identify blocks | ||
4843 | * that need to be punched out | ||
4844 | */ | ||
4845 | iblock = first_block; | ||
4846 | blocks_released = 0; | ||
4847 | while (iblock < last_block) { | ||
4848 | max_blocks = last_block - iblock; | ||
4849 | num_blocks = 1; | ||
4850 | memset(&map, 0, sizeof(map)); | ||
4851 | map.m_lblk = iblock; | ||
4852 | map.m_len = max_blocks; | ||
4853 | ret = ext4_ext_map_blocks(handle, inode, &map, | ||
4854 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT); | ||
4855 | |||
4856 | if (ret > 0) { | ||
4857 | blocks_released += ret; | ||
4858 | num_blocks = ret; | ||
4859 | } else if (ret == 0) { | ||
4860 | /* | ||
4861 | * If map blocks could not find the block, | ||
4862 | * then it is in a hole. If the hole was | ||
4863 | * not already cached, then map blocks should | ||
4864 | * put it in the cache. So we can get the hole | ||
4865 | * out of the cache | ||
4866 | */ | ||
4867 | memset(&cache_ex, 0, sizeof(cache_ex)); | ||
4868 | if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) && | ||
4869 | !cache_ex.ec_start) { | ||
4870 | |||
4871 | /* The hole is cached */ | ||
4872 | num_blocks = cache_ex.ec_block + | ||
4873 | cache_ex.ec_len - iblock; | ||
4874 | |||
4875 | } else { | ||
4876 | /* The block could not be identified */ | ||
4877 | err = -EIO; | ||
4878 | break; | ||
4879 | } | ||
4880 | } else { | ||
4881 | /* Map blocks error */ | ||
4882 | err = ret; | ||
4883 | break; | ||
4884 | } | ||
4885 | |||
4886 | if (num_blocks == 0) { | ||
4887 | /* This condition should never happen */ | ||
4888 | ext_debug("Block lookup failed"); | ||
4889 | err = -EIO; | ||
4890 | break; | ||
4891 | } | ||
4892 | |||
4893 | iblock += num_blocks; | ||
4894 | } | ||
4895 | 4819 | ||
4896 | if (blocks_released > 0) { | 4820 | ext4_ext_invalidate_cache(inode); |
4897 | ext4_ext_invalidate_cache(inode); | 4821 | ext4_discard_preallocations(inode); |
4898 | ext4_discard_preallocations(inode); | ||
4899 | } | ||
4900 | 4822 | ||
4901 | if (IS_SYNC(inode)) | 4823 | if (IS_SYNC(inode)) |
4902 | ext4_handle_sync(handle); | 4824 | ext4_handle_sync(handle); |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 00a2cb753efd..bb6c7d811313 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -89,6 +89,7 @@ int ext4_flush_completed_IO(struct inode *inode) | |||
89 | io = list_entry(ei->i_completed_io_list.next, | 89 | io = list_entry(ei->i_completed_io_list.next, |
90 | ext4_io_end_t, list); | 90 | ext4_io_end_t, list); |
91 | list_del_init(&io->list); | 91 | list_del_init(&io->list); |
92 | io->flag |= EXT4_IO_END_IN_FSYNC; | ||
92 | /* | 93 | /* |
93 | * Calling ext4_end_io_nolock() to convert completed | 94 | * Calling ext4_end_io_nolock() to convert completed |
94 | * IO to written. | 95 | * IO to written. |
@@ -108,6 +109,7 @@ int ext4_flush_completed_IO(struct inode *inode) | |||
108 | if (ret < 0) | 109 | if (ret < 0) |
109 | ret2 = ret; | 110 | ret2 = ret; |
110 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 111 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
112 | io->flag &= ~EXT4_IO_END_IN_FSYNC; | ||
111 | } | 113 | } |
112 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 114 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
113 | return (ret2 < 0) ? ret2 : 0; | 115 | return (ret2 < 0) ? ret2 : 0; |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 25d8c9781ad9..409c2ee7750a 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, | |||
92 | return EXT4_INODES_PER_GROUP(sb); | 92 | return EXT4_INODES_PER_GROUP(sb); |
93 | } | 93 | } |
94 | 94 | ||
95 | void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) | ||
96 | { | ||
97 | if (uptodate) { | ||
98 | set_buffer_uptodate(bh); | ||
99 | set_bitmap_uptodate(bh); | ||
100 | } | ||
101 | unlock_buffer(bh); | ||
102 | put_bh(bh); | ||
103 | } | ||
104 | |||
95 | /* | 105 | /* |
96 | * Read the inode allocation bitmap for a given block_group, reading | 106 | * Read the inode allocation bitmap for a given block_group, reading |
97 | * into the specified slot in the superblock's bitmap cache. | 107 | * into the specified slot in the superblock's bitmap cache. |
@@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
147 | return bh; | 157 | return bh; |
148 | } | 158 | } |
149 | /* | 159 | /* |
150 | * submit the buffer_head for read. We can | 160 | * submit the buffer_head for reading |
151 | * safely mark the bitmap as uptodate now. | ||
152 | * We do it here so the bitmap uptodate bit | ||
153 | * get set with buffer lock held. | ||
154 | */ | 161 | */ |
155 | trace_ext4_load_inode_bitmap(sb, block_group); | 162 | trace_ext4_load_inode_bitmap(sb, block_group); |
156 | set_bitmap_uptodate(bh); | 163 | bh->b_end_io = ext4_end_bitmap_read; |
157 | if (bh_submit_read(bh) < 0) { | 164 | get_bh(bh); |
165 | submit_bh(READ, bh); | ||
166 | wait_on_buffer(bh); | ||
167 | if (!buffer_uptodate(bh)) { | ||
158 | put_bh(bh); | 168 | put_bh(bh); |
159 | ext4_error(sb, "Cannot read inode bitmap - " | 169 | ext4_error(sb, "Cannot read inode bitmap - " |
160 | "block_group = %u, inode_bitmap = %llu", | 170 | "block_group = %u, inode_bitmap = %llu", |
161 | block_group, bitmap_blk); | 171 | block_group, bitmap_blk); |
162 | return NULL; | 172 | return NULL; |
163 | } | 173 | } |
164 | return bh; | 174 | return bh; |
@@ -194,19 +204,20 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
194 | struct ext4_sb_info *sbi; | 204 | struct ext4_sb_info *sbi; |
195 | int fatal = 0, err, count, cleared; | 205 | int fatal = 0, err, count, cleared; |
196 | 206 | ||
197 | if (atomic_read(&inode->i_count) > 1) { | 207 | if (!sb) { |
198 | printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", | 208 | printk(KERN_ERR "EXT4-fs: %s:%d: inode on " |
199 | atomic_read(&inode->i_count)); | 209 | "nonexistent device\n", __func__, __LINE__); |
200 | return; | 210 | return; |
201 | } | 211 | } |
202 | if (inode->i_nlink) { | 212 | if (atomic_read(&inode->i_count) > 1) { |
203 | printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", | 213 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d", |
204 | inode->i_nlink); | 214 | __func__, __LINE__, inode->i_ino, |
215 | atomic_read(&inode->i_count)); | ||
205 | return; | 216 | return; |
206 | } | 217 | } |
207 | if (!sb) { | 218 | if (inode->i_nlink) { |
208 | printk(KERN_ERR "ext4_free_inode: inode on " | 219 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n", |
209 | "nonexistent device\n"); | 220 | __func__, __LINE__, inode->i_ino, inode->i_nlink); |
210 | return; | 221 | return; |
211 | } | 222 | } |
212 | sbi = EXT4_SB(sb); | 223 | sbi = EXT4_SB(sb); |
@@ -593,94 +604,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
593 | } | 604 | } |
594 | 605 | ||
595 | /* | 606 | /* |
596 | * claim the inode from the inode bitmap. If the group | ||
597 | * is uninit we need to take the groups's ext4_group_lock | ||
598 | * and clear the uninit flag. The inode bitmap update | ||
599 | * and group desc uninit flag clear should be done | ||
600 | * after holding ext4_group_lock so that ext4_read_inode_bitmap | ||
601 | * doesn't race with the ext4_claim_inode | ||
602 | */ | ||
603 | static int ext4_claim_inode(struct super_block *sb, | ||
604 | struct buffer_head *inode_bitmap_bh, | ||
605 | unsigned long ino, ext4_group_t group, umode_t mode) | ||
606 | { | ||
607 | int free = 0, retval = 0, count; | ||
608 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
609 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
610 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); | ||
611 | |||
612 | /* | ||
613 | * We have to be sure that new inode allocation does not race with | ||
614 | * inode table initialization, because otherwise we may end up | ||
615 | * allocating and writing new inode right before sb_issue_zeroout | ||
616 | * takes place and overwriting our new inode with zeroes. So we | ||
617 | * take alloc_sem to prevent it. | ||
618 | */ | ||
619 | down_read(&grp->alloc_sem); | ||
620 | ext4_lock_group(sb, group); | ||
621 | if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) { | ||
622 | /* not a free inode */ | ||
623 | retval = 1; | ||
624 | goto err_ret; | ||
625 | } | ||
626 | ino++; | ||
627 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | ||
628 | ino > EXT4_INODES_PER_GROUP(sb)) { | ||
629 | ext4_unlock_group(sb, group); | ||
630 | up_read(&grp->alloc_sem); | ||
631 | ext4_error(sb, "reserved inode or inode > inodes count - " | ||
632 | "block_group = %u, inode=%lu", group, | ||
633 | ino + group * EXT4_INODES_PER_GROUP(sb)); | ||
634 | return 1; | ||
635 | } | ||
636 | /* If we didn't allocate from within the initialized part of the inode | ||
637 | * table then we need to initialize up to this inode. */ | ||
638 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | ||
639 | |||
640 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | ||
641 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); | ||
642 | /* When marking the block group with | ||
643 | * ~EXT4_BG_INODE_UNINIT we don't want to depend | ||
644 | * on the value of bg_itable_unused even though | ||
645 | * mke2fs could have initialized the same for us. | ||
646 | * Instead we calculated the value below | ||
647 | */ | ||
648 | |||
649 | free = 0; | ||
650 | } else { | ||
651 | free = EXT4_INODES_PER_GROUP(sb) - | ||
652 | ext4_itable_unused_count(sb, gdp); | ||
653 | } | ||
654 | |||
655 | /* | ||
656 | * Check the relative inode number against the last used | ||
657 | * relative inode number in this group. if it is greater | ||
658 | * we need to update the bg_itable_unused count | ||
659 | * | ||
660 | */ | ||
661 | if (ino > free) | ||
662 | ext4_itable_unused_set(sb, gdp, | ||
663 | (EXT4_INODES_PER_GROUP(sb) - ino)); | ||
664 | } | ||
665 | count = ext4_free_inodes_count(sb, gdp) - 1; | ||
666 | ext4_free_inodes_set(sb, gdp, count); | ||
667 | if (S_ISDIR(mode)) { | ||
668 | count = ext4_used_dirs_count(sb, gdp) + 1; | ||
669 | ext4_used_dirs_set(sb, gdp, count); | ||
670 | if (sbi->s_log_groups_per_flex) { | ||
671 | ext4_group_t f = ext4_flex_group(sbi, group); | ||
672 | |||
673 | atomic_inc(&sbi->s_flex_groups[f].used_dirs); | ||
674 | } | ||
675 | } | ||
676 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
677 | err_ret: | ||
678 | ext4_unlock_group(sb, group); | ||
679 | up_read(&grp->alloc_sem); | ||
680 | return retval; | ||
681 | } | ||
682 | |||
683 | /* | ||
684 | * There are two policies for allocating an inode. If the new inode is | 607 | * There are two policies for allocating an inode. If the new inode is |
685 | * a directory, then a forward search is made for a block group with both | 608 | * a directory, then a forward search is made for a block group with both |
686 | * free space and a low directory-to-inode ratio; if that fails, then of | 609 | * free space and a low directory-to-inode ratio; if that fails, then of |
@@ -741,6 +664,11 @@ got_group: | |||
741 | if (ret2 == -1) | 664 | if (ret2 == -1) |
742 | goto out; | 665 | goto out; |
743 | 666 | ||
667 | /* | ||
668 | * Normally we will only go through one pass of this loop, | ||
669 | * unless we get unlucky and it turns out the group we selected | ||
670 | * had its last inode grabbed by someone else. | ||
671 | */ | ||
744 | for (i = 0; i < ngroups; i++, ino = 0) { | 672 | for (i = 0; i < ngroups; i++, ino = 0) { |
745 | err = -EIO; | 673 | err = -EIO; |
746 | 674 | ||
@@ -757,51 +685,24 @@ repeat_in_this_group: | |||
757 | ino = ext4_find_next_zero_bit((unsigned long *) | 685 | ino = ext4_find_next_zero_bit((unsigned long *) |
758 | inode_bitmap_bh->b_data, | 686 | inode_bitmap_bh->b_data, |
759 | EXT4_INODES_PER_GROUP(sb), ino); | 687 | EXT4_INODES_PER_GROUP(sb), ino); |
760 | 688 | if (ino >= EXT4_INODES_PER_GROUP(sb)) { | |
761 | if (ino < EXT4_INODES_PER_GROUP(sb)) { | 689 | if (++group == ngroups) |
762 | 690 | group = 0; | |
763 | BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); | 691 | continue; |
764 | err = ext4_journal_get_write_access(handle, | ||
765 | inode_bitmap_bh); | ||
766 | if (err) | ||
767 | goto fail; | ||
768 | |||
769 | BUFFER_TRACE(group_desc_bh, "get_write_access"); | ||
770 | err = ext4_journal_get_write_access(handle, | ||
771 | group_desc_bh); | ||
772 | if (err) | ||
773 | goto fail; | ||
774 | if (!ext4_claim_inode(sb, inode_bitmap_bh, | ||
775 | ino, group, mode)) { | ||
776 | /* we won it */ | ||
777 | BUFFER_TRACE(inode_bitmap_bh, | ||
778 | "call ext4_handle_dirty_metadata"); | ||
779 | err = ext4_handle_dirty_metadata(handle, | ||
780 | NULL, | ||
781 | inode_bitmap_bh); | ||
782 | if (err) | ||
783 | goto fail; | ||
784 | /* zero bit is inode number 1*/ | ||
785 | ino++; | ||
786 | goto got; | ||
787 | } | ||
788 | /* we lost it */ | ||
789 | ext4_handle_release_buffer(handle, inode_bitmap_bh); | ||
790 | ext4_handle_release_buffer(handle, group_desc_bh); | ||
791 | |||
792 | if (++ino < EXT4_INODES_PER_GROUP(sb)) | ||
793 | goto repeat_in_this_group; | ||
794 | } | 692 | } |
795 | 693 | if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { | |
796 | /* | 694 | ext4_error(sb, "reserved inode found cleared - " |
797 | * This case is possible in concurrent environment. It is very | 695 | "inode=%lu", ino + 1); |
798 | * rare. We cannot repeat the find_group_xxx() call because | 696 | continue; |
799 | * that will simply return the same blockgroup, because the | 697 | } |
800 | * group descriptor metadata has not yet been updated. | 698 | ext4_lock_group(sb, group); |
801 | * So we just go onto the next blockgroup. | 699 | ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data); |
802 | */ | 700 | ext4_unlock_group(sb, group); |
803 | if (++group == ngroups) | 701 | ino++; /* the inode bitmap is zero-based */ |
804 | group = 0; | 702 | if (!ret2) |
703 | goto got; /* we grabbed the inode! */ | ||
704 | if (ino < EXT4_INODES_PER_GROUP(sb)) | ||
705 | goto repeat_in_this_group; | ||
805 | } | 706 | } |
806 | err = -ENOSPC; | 707 | err = -ENOSPC; |
807 | goto out; | 708 | goto out; |
@@ -838,6 +739,59 @@ got: | |||
838 | if (err) | 739 | if (err) |
839 | goto fail; | 740 | goto fail; |
840 | } | 741 | } |
742 | |||
743 | BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); | ||
744 | err = ext4_journal_get_write_access(handle, inode_bitmap_bh); | ||
745 | if (err) | ||
746 | goto fail; | ||
747 | |||
748 | BUFFER_TRACE(group_desc_bh, "get_write_access"); | ||
749 | err = ext4_journal_get_write_access(handle, group_desc_bh); | ||
750 | if (err) | ||
751 | goto fail; | ||
752 | |||
753 | /* Update the relevant bg descriptor fields */ | ||
754 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | ||
755 | int free; | ||
756 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
757 | |||
758 | down_read(&grp->alloc_sem); /* protect vs itable lazyinit */ | ||
759 | ext4_lock_group(sb, group); /* while we modify the bg desc */ | ||
760 | free = EXT4_INODES_PER_GROUP(sb) - | ||
761 | ext4_itable_unused_count(sb, gdp); | ||
762 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | ||
763 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); | ||
764 | free = 0; | ||
765 | } | ||
766 | /* | ||
767 | * Check the relative inode number against the last used | ||
768 | * relative inode number in this group. if it is greater | ||
769 | * we need to update the bg_itable_unused count | ||
770 | */ | ||
771 | if (ino > free) | ||
772 | ext4_itable_unused_set(sb, gdp, | ||
773 | (EXT4_INODES_PER_GROUP(sb) - ino)); | ||
774 | up_read(&grp->alloc_sem); | ||
775 | } | ||
776 | ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); | ||
777 | if (S_ISDIR(mode)) { | ||
778 | ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); | ||
779 | if (sbi->s_log_groups_per_flex) { | ||
780 | ext4_group_t f = ext4_flex_group(sbi, group); | ||
781 | |||
782 | atomic_inc(&sbi->s_flex_groups[f].used_dirs); | ||
783 | } | ||
784 | } | ||
785 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | ||
786 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
787 | ext4_unlock_group(sb, group); | ||
788 | } | ||
789 | |||
790 | BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); | ||
791 | err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); | ||
792 | if (err) | ||
793 | goto fail; | ||
794 | |||
841 | BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); | 795 | BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); |
842 | err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); | 796 | err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); |
843 | if (err) | 797 | if (err) |
@@ -1101,7 +1055,7 @@ unsigned long ext4_count_dirs(struct super_block * sb) | |||
1101 | * where it is called from on active part of filesystem is ext4lazyinit | 1055 | * where it is called from on active part of filesystem is ext4lazyinit |
1102 | * thread, so we do not need any special locks, however we have to prevent | 1056 | * thread, so we do not need any special locks, however we have to prevent |
1103 | * inode allocation from the current group, so we take alloc_sem lock, to | 1057 | * inode allocation from the current group, so we take alloc_sem lock, to |
1104 | * block ext4_claim_inode until we are finished. | 1058 | * block ext4_new_inode() until we are finished. |
1105 | */ | 1059 | */ |
1106 | int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | 1060 | int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, |
1107 | int barrier) | 1061 | int barrier) |
@@ -1149,9 +1103,9 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | |||
1149 | sbi->s_inodes_per_block); | 1103 | sbi->s_inodes_per_block); |
1150 | 1104 | ||
1151 | if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { | 1105 | if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { |
1152 | ext4_error(sb, "Something is wrong with group %u\n" | 1106 | ext4_error(sb, "Something is wrong with group %u: " |
1153 | "Used itable blocks: %d" | 1107 | "used itable blocks: %d; " |
1154 | "itable unused count: %u\n", | 1108 | "itable unused count: %u", |
1155 | group, used_blks, | 1109 | group, used_blks, |
1156 | ext4_itable_unused_count(sb, gdp)); | 1110 | ext4_itable_unused_count(sb, gdp)); |
1157 | ret = 1; | 1111 | ret = 1; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index feaa82fe629d..c77b0bd2c711 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -272,7 +272,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
272 | trace_ext4_da_update_reserve_space(inode, used, quota_claim); | 272 | trace_ext4_da_update_reserve_space(inode, used, quota_claim); |
273 | if (unlikely(used > ei->i_reserved_data_blocks)) { | 273 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
274 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " | 274 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " |
275 | "with only %d reserved data blocks\n", | 275 | "with only %d reserved data blocks", |
276 | __func__, inode->i_ino, used, | 276 | __func__, inode->i_ino, used, |
277 | ei->i_reserved_data_blocks); | 277 | ei->i_reserved_data_blocks); |
278 | WARN_ON(1); | 278 | WARN_ON(1); |
@@ -1165,7 +1165,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1165 | */ | 1165 | */ |
1166 | ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " | 1166 | ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " |
1167 | "ino %lu, to_free %d with only %d reserved " | 1167 | "ino %lu, to_free %d with only %d reserved " |
1168 | "data blocks\n", inode->i_ino, to_free, | 1168 | "data blocks", inode->i_ino, to_free, |
1169 | ei->i_reserved_data_blocks); | 1169 | ei->i_reserved_data_blocks); |
1170 | WARN_ON(1); | 1170 | WARN_ON(1); |
1171 | to_free = ei->i_reserved_data_blocks; | 1171 | to_free = ei->i_reserved_data_blocks; |
@@ -1428,20 +1428,22 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | |||
1428 | static void ext4_print_free_blocks(struct inode *inode) | 1428 | static void ext4_print_free_blocks(struct inode *inode) |
1429 | { | 1429 | { |
1430 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1430 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1431 | printk(KERN_CRIT "Total free blocks count %lld\n", | 1431 | struct super_block *sb = inode->i_sb; |
1432 | |||
1433 | ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld", | ||
1432 | EXT4_C2B(EXT4_SB(inode->i_sb), | 1434 | EXT4_C2B(EXT4_SB(inode->i_sb), |
1433 | ext4_count_free_clusters(inode->i_sb))); | 1435 | ext4_count_free_clusters(inode->i_sb))); |
1434 | printk(KERN_CRIT "Free/Dirty block details\n"); | 1436 | ext4_msg(sb, KERN_CRIT, "Free/Dirty block details"); |
1435 | printk(KERN_CRIT "free_blocks=%lld\n", | 1437 | ext4_msg(sb, KERN_CRIT, "free_blocks=%lld", |
1436 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), | 1438 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
1437 | percpu_counter_sum(&sbi->s_freeclusters_counter))); | 1439 | percpu_counter_sum(&sbi->s_freeclusters_counter))); |
1438 | printk(KERN_CRIT "dirty_blocks=%lld\n", | 1440 | ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld", |
1439 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), | 1441 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
1440 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); | 1442 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); |
1441 | printk(KERN_CRIT "Block reservation details\n"); | 1443 | ext4_msg(sb, KERN_CRIT, "Block reservation details"); |
1442 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", | 1444 | ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u", |
1443 | EXT4_I(inode)->i_reserved_data_blocks); | 1445 | EXT4_I(inode)->i_reserved_data_blocks); |
1444 | printk(KERN_CRIT "i_reserved_meta_blocks=%u\n", | 1446 | ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u", |
1445 | EXT4_I(inode)->i_reserved_meta_blocks); | 1447 | EXT4_I(inode)->i_reserved_meta_blocks); |
1446 | return; | 1448 | return; |
1447 | } | 1449 | } |
@@ -2482,13 +2484,14 @@ static int ext4_da_write_end(struct file *file, | |||
2482 | int write_mode = (int)(unsigned long)fsdata; | 2484 | int write_mode = (int)(unsigned long)fsdata; |
2483 | 2485 | ||
2484 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { | 2486 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { |
2485 | if (ext4_should_order_data(inode)) { | 2487 | switch (ext4_inode_journal_mode(inode)) { |
2488 | case EXT4_INODE_ORDERED_DATA_MODE: | ||
2486 | return ext4_ordered_write_end(file, mapping, pos, | 2489 | return ext4_ordered_write_end(file, mapping, pos, |
2487 | len, copied, page, fsdata); | 2490 | len, copied, page, fsdata); |
2488 | } else if (ext4_should_writeback_data(inode)) { | 2491 | case EXT4_INODE_WRITEBACK_DATA_MODE: |
2489 | return ext4_writeback_write_end(file, mapping, pos, | 2492 | return ext4_writeback_write_end(file, mapping, pos, |
2490 | len, copied, page, fsdata); | 2493 | len, copied, page, fsdata); |
2491 | } else { | 2494 | default: |
2492 | BUG(); | 2495 | BUG(); |
2493 | } | 2496 | } |
2494 | } | 2497 | } |
@@ -2763,7 +2766,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
2763 | goto out; | 2766 | goto out; |
2764 | 2767 | ||
2765 | ext_debug("ext4_end_io_dio(): io_end 0x%p " | 2768 | ext_debug("ext4_end_io_dio(): io_end 0x%p " |
2766 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", | 2769 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", |
2767 | iocb->private, io_end->inode->i_ino, iocb, offset, | 2770 | iocb->private, io_end->inode->i_ino, iocb, offset, |
2768 | size); | 2771 | size); |
2769 | 2772 | ||
@@ -2795,9 +2798,6 @@ out: | |||
2795 | 2798 | ||
2796 | /* queue the work to convert unwritten extents to written */ | 2799 | /* queue the work to convert unwritten extents to written */ |
2797 | queue_work(wq, &io_end->work); | 2800 | queue_work(wq, &io_end->work); |
2798 | |||
2799 | /* XXX: probably should move into the real I/O completion handler */ | ||
2800 | inode_dio_done(inode); | ||
2801 | } | 2801 | } |
2802 | 2802 | ||
2803 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | 2803 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) |
@@ -2811,8 +2811,9 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
2811 | goto out; | 2811 | goto out; |
2812 | 2812 | ||
2813 | if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { | 2813 | if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { |
2814 | printk("sb umounted, discard end_io request for inode %lu\n", | 2814 | ext4_msg(io_end->inode->i_sb, KERN_INFO, |
2815 | io_end->inode->i_ino); | 2815 | "sb umounted, discard end_io request for inode %lu", |
2816 | io_end->inode->i_ino); | ||
2816 | ext4_free_io_end(io_end); | 2817 | ext4_free_io_end(io_end); |
2817 | goto out; | 2818 | goto out; |
2818 | } | 2819 | } |
@@ -2921,9 +2922,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
2921 | iocb->private = NULL; | 2922 | iocb->private = NULL; |
2922 | EXT4_I(inode)->cur_aio_dio = NULL; | 2923 | EXT4_I(inode)->cur_aio_dio = NULL; |
2923 | if (!is_sync_kiocb(iocb)) { | 2924 | if (!is_sync_kiocb(iocb)) { |
2924 | iocb->private = ext4_init_io_end(inode, GFP_NOFS); | 2925 | ext4_io_end_t *io_end = |
2925 | if (!iocb->private) | 2926 | ext4_init_io_end(inode, GFP_NOFS); |
2927 | if (!io_end) | ||
2926 | return -ENOMEM; | 2928 | return -ENOMEM; |
2929 | io_end->flag |= EXT4_IO_END_DIRECT; | ||
2930 | iocb->private = io_end; | ||
2927 | /* | 2931 | /* |
2928 | * we save the io structure for current async | 2932 | * we save the io structure for current async |
2929 | * direct IO, so that later ext4_map_blocks() | 2933 | * direct IO, so that later ext4_map_blocks() |
@@ -2940,7 +2944,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
2940 | ext4_get_block_write, | 2944 | ext4_get_block_write, |
2941 | ext4_end_io_dio, | 2945 | ext4_end_io_dio, |
2942 | NULL, | 2946 | NULL, |
2943 | DIO_LOCKING | DIO_SKIP_HOLES); | 2947 | DIO_LOCKING); |
2944 | if (iocb->private) | 2948 | if (iocb->private) |
2945 | EXT4_I(inode)->cur_aio_dio = NULL; | 2949 | EXT4_I(inode)->cur_aio_dio = NULL; |
2946 | /* | 2950 | /* |
@@ -3086,18 +3090,25 @@ static const struct address_space_operations ext4_da_aops = { | |||
3086 | 3090 | ||
3087 | void ext4_set_aops(struct inode *inode) | 3091 | void ext4_set_aops(struct inode *inode) |
3088 | { | 3092 | { |
3089 | if (ext4_should_order_data(inode) && | 3093 | switch (ext4_inode_journal_mode(inode)) { |
3090 | test_opt(inode->i_sb, DELALLOC)) | 3094 | case EXT4_INODE_ORDERED_DATA_MODE: |
3091 | inode->i_mapping->a_ops = &ext4_da_aops; | 3095 | if (test_opt(inode->i_sb, DELALLOC)) |
3092 | else if (ext4_should_order_data(inode)) | 3096 | inode->i_mapping->a_ops = &ext4_da_aops; |
3093 | inode->i_mapping->a_ops = &ext4_ordered_aops; | 3097 | else |
3094 | else if (ext4_should_writeback_data(inode) && | 3098 | inode->i_mapping->a_ops = &ext4_ordered_aops; |
3095 | test_opt(inode->i_sb, DELALLOC)) | 3099 | break; |
3096 | inode->i_mapping->a_ops = &ext4_da_aops; | 3100 | case EXT4_INODE_WRITEBACK_DATA_MODE: |
3097 | else if (ext4_should_writeback_data(inode)) | 3101 | if (test_opt(inode->i_sb, DELALLOC)) |
3098 | inode->i_mapping->a_ops = &ext4_writeback_aops; | 3102 | inode->i_mapping->a_ops = &ext4_da_aops; |
3099 | else | 3103 | else |
3104 | inode->i_mapping->a_ops = &ext4_writeback_aops; | ||
3105 | break; | ||
3106 | case EXT4_INODE_JOURNAL_DATA_MODE: | ||
3100 | inode->i_mapping->a_ops = &ext4_journalled_aops; | 3107 | inode->i_mapping->a_ops = &ext4_journalled_aops; |
3108 | break; | ||
3109 | default: | ||
3110 | BUG(); | ||
3111 | } | ||
3101 | } | 3112 | } |
3102 | 3113 | ||
3103 | 3114 | ||
@@ -3329,16 +3340,16 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
3329 | { | 3340 | { |
3330 | struct inode *inode = file->f_path.dentry->d_inode; | 3341 | struct inode *inode = file->f_path.dentry->d_inode; |
3331 | if (!S_ISREG(inode->i_mode)) | 3342 | if (!S_ISREG(inode->i_mode)) |
3332 | return -ENOTSUPP; | 3343 | return -EOPNOTSUPP; |
3333 | 3344 | ||
3334 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 3345 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
3335 | /* TODO: Add support for non extent hole punching */ | 3346 | /* TODO: Add support for non extent hole punching */ |
3336 | return -ENOTSUPP; | 3347 | return -EOPNOTSUPP; |
3337 | } | 3348 | } |
3338 | 3349 | ||
3339 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { | 3350 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { |
3340 | /* TODO: Add support for bigalloc file systems */ | 3351 | /* TODO: Add support for bigalloc file systems */ |
3341 | return -ENOTSUPP; | 3352 | return -EOPNOTSUPP; |
3342 | } | 3353 | } |
3343 | 3354 | ||
3344 | return ext4_ext_punch_hole(file, offset, length); | 3355 | return ext4_ext_punch_hole(file, offset, length); |
@@ -3924,10 +3935,8 @@ static int ext4_do_update_inode(handle_t *handle, | |||
3924 | ext4_update_dynamic_rev(sb); | 3935 | ext4_update_dynamic_rev(sb); |
3925 | EXT4_SET_RO_COMPAT_FEATURE(sb, | 3936 | EXT4_SET_RO_COMPAT_FEATURE(sb, |
3926 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); | 3937 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); |
3927 | sb->s_dirt = 1; | ||
3928 | ext4_handle_sync(handle); | 3938 | ext4_handle_sync(handle); |
3929 | err = ext4_handle_dirty_metadata(handle, NULL, | 3939 | err = ext4_handle_dirty_super(handle, sb); |
3930 | EXT4_SB(sb)->s_sbh); | ||
3931 | } | 3940 | } |
3932 | } | 3941 | } |
3933 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); | 3942 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); |
@@ -4152,11 +4161,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4152 | } | 4161 | } |
4153 | 4162 | ||
4154 | if (attr->ia_valid & ATTR_SIZE) { | 4163 | if (attr->ia_valid & ATTR_SIZE) { |
4155 | if (attr->ia_size != i_size_read(inode)) { | 4164 | if (attr->ia_size != i_size_read(inode)) |
4156 | truncate_setsize(inode, attr->ia_size); | 4165 | truncate_setsize(inode, attr->ia_size); |
4157 | ext4_truncate(inode); | 4166 | ext4_truncate(inode); |
4158 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | ||
4159 | ext4_truncate(inode); | ||
4160 | } | 4167 | } |
4161 | 4168 | ||
4162 | if (!rc) { | 4169 | if (!rc) { |
@@ -4314,7 +4321,7 @@ int ext4_mark_iloc_dirty(handle_t *handle, | |||
4314 | { | 4321 | { |
4315 | int err = 0; | 4322 | int err = 0; |
4316 | 4323 | ||
4317 | if (test_opt(inode->i_sb, I_VERSION)) | 4324 | if (IS_I_VERSION(inode)) |
4318 | inode_inc_iversion(inode); | 4325 | inode_inc_iversion(inode); |
4319 | 4326 | ||
4320 | /* the do_update_inode consumes one bh->b_count */ | 4327 | /* the do_update_inode consumes one bh->b_count */ |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index cb990b21c698..99ab428bcfa0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -21,6 +21,7 @@ | |||
21 | * mballoc.c contains the multiblocks allocation routines | 21 | * mballoc.c contains the multiblocks allocation routines |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include "ext4_jbd2.h" | ||
24 | #include "mballoc.h" | 25 | #include "mballoc.h" |
25 | #include <linux/debugfs.h> | 26 | #include <linux/debugfs.h> |
26 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
@@ -339,7 +340,7 @@ | |||
339 | */ | 340 | */ |
340 | static struct kmem_cache *ext4_pspace_cachep; | 341 | static struct kmem_cache *ext4_pspace_cachep; |
341 | static struct kmem_cache *ext4_ac_cachep; | 342 | static struct kmem_cache *ext4_ac_cachep; |
342 | static struct kmem_cache *ext4_free_ext_cachep; | 343 | static struct kmem_cache *ext4_free_data_cachep; |
343 | 344 | ||
344 | /* We create slab caches for groupinfo data structures based on the | 345 | /* We create slab caches for groupinfo data structures based on the |
345 | * superblock block size. There will be one per mounted filesystem for | 346 | * superblock block size. There will be one per mounted filesystem for |
@@ -357,7 +358,8 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
357 | ext4_group_t group); | 358 | ext4_group_t group); |
358 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | 359 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
359 | ext4_group_t group); | 360 | ext4_group_t group); |
360 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); | 361 | static void ext4_free_data_callback(struct super_block *sb, |
362 | struct ext4_journal_cb_entry *jce, int rc); | ||
361 | 363 | ||
362 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) | 364 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) |
363 | { | 365 | { |
@@ -425,7 +427,7 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) | |||
425 | { | 427 | { |
426 | char *bb; | 428 | char *bb; |
427 | 429 | ||
428 | BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); | 430 | BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); |
429 | BUG_ON(max == NULL); | 431 | BUG_ON(max == NULL); |
430 | 432 | ||
431 | if (order > e4b->bd_blkbits + 1) { | 433 | if (order > e4b->bd_blkbits + 1) { |
@@ -436,10 +438,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) | |||
436 | /* at order 0 we see each particular block */ | 438 | /* at order 0 we see each particular block */ |
437 | if (order == 0) { | 439 | if (order == 0) { |
438 | *max = 1 << (e4b->bd_blkbits + 3); | 440 | *max = 1 << (e4b->bd_blkbits + 3); |
439 | return EXT4_MB_BITMAP(e4b); | 441 | return e4b->bd_bitmap; |
440 | } | 442 | } |
441 | 443 | ||
442 | bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; | 444 | bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; |
443 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; | 445 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; |
444 | 446 | ||
445 | return bb; | 447 | return bb; |
@@ -588,7 +590,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
588 | for (j = 0; j < (1 << order); j++) { | 590 | for (j = 0; j < (1 << order); j++) { |
589 | k = (i * (1 << order)) + j; | 591 | k = (i * (1 << order)) + j; |
590 | MB_CHECK_ASSERT( | 592 | MB_CHECK_ASSERT( |
591 | !mb_test_bit(k, EXT4_MB_BITMAP(e4b))); | 593 | !mb_test_bit(k, e4b->bd_bitmap)); |
592 | } | 594 | } |
593 | count++; | 595 | count++; |
594 | } | 596 | } |
@@ -782,7 +784,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
782 | int groups_per_page; | 784 | int groups_per_page; |
783 | int err = 0; | 785 | int err = 0; |
784 | int i; | 786 | int i; |
785 | ext4_group_t first_group; | 787 | ext4_group_t first_group, group; |
786 | int first_block; | 788 | int first_block; |
787 | struct super_block *sb; | 789 | struct super_block *sb; |
788 | struct buffer_head *bhs; | 790 | struct buffer_head *bhs; |
@@ -806,24 +808,23 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
806 | 808 | ||
807 | /* allocate buffer_heads to read bitmaps */ | 809 | /* allocate buffer_heads to read bitmaps */ |
808 | if (groups_per_page > 1) { | 810 | if (groups_per_page > 1) { |
809 | err = -ENOMEM; | ||
810 | i = sizeof(struct buffer_head *) * groups_per_page; | 811 | i = sizeof(struct buffer_head *) * groups_per_page; |
811 | bh = kzalloc(i, GFP_NOFS); | 812 | bh = kzalloc(i, GFP_NOFS); |
812 | if (bh == NULL) | 813 | if (bh == NULL) { |
814 | err = -ENOMEM; | ||
813 | goto out; | 815 | goto out; |
816 | } | ||
814 | } else | 817 | } else |
815 | bh = &bhs; | 818 | bh = &bhs; |
816 | 819 | ||
817 | first_group = page->index * blocks_per_page / 2; | 820 | first_group = page->index * blocks_per_page / 2; |
818 | 821 | ||
819 | /* read all groups the page covers into the cache */ | 822 | /* read all groups the page covers into the cache */ |
820 | for (i = 0; i < groups_per_page; i++) { | 823 | for (i = 0, group = first_group; i < groups_per_page; i++, group++) { |
821 | struct ext4_group_desc *desc; | 824 | if (group >= ngroups) |
822 | |||
823 | if (first_group + i >= ngroups) | ||
824 | break; | 825 | break; |
825 | 826 | ||
826 | grinfo = ext4_get_group_info(sb, first_group + i); | 827 | grinfo = ext4_get_group_info(sb, group); |
827 | /* | 828 | /* |
828 | * If page is uptodate then we came here after online resize | 829 | * If page is uptodate then we came here after online resize |
829 | * which added some new uninitialized group info structs, so | 830 | * which added some new uninitialized group info structs, so |
@@ -834,69 +835,21 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
834 | bh[i] = NULL; | 835 | bh[i] = NULL; |
835 | continue; | 836 | continue; |
836 | } | 837 | } |
837 | 838 | if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) { | |
838 | err = -EIO; | 839 | err = -ENOMEM; |
839 | desc = ext4_get_group_desc(sb, first_group + i, NULL); | ||
840 | if (desc == NULL) | ||
841 | goto out; | ||
842 | |||
843 | err = -ENOMEM; | ||
844 | bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc)); | ||
845 | if (bh[i] == NULL) | ||
846 | goto out; | 840 | goto out; |
847 | |||
848 | if (bitmap_uptodate(bh[i])) | ||
849 | continue; | ||
850 | |||
851 | lock_buffer(bh[i]); | ||
852 | if (bitmap_uptodate(bh[i])) { | ||
853 | unlock_buffer(bh[i]); | ||
854 | continue; | ||
855 | } | ||
856 | ext4_lock_group(sb, first_group + i); | ||
857 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
858 | ext4_init_block_bitmap(sb, bh[i], | ||
859 | first_group + i, desc); | ||
860 | set_bitmap_uptodate(bh[i]); | ||
861 | set_buffer_uptodate(bh[i]); | ||
862 | ext4_unlock_group(sb, first_group + i); | ||
863 | unlock_buffer(bh[i]); | ||
864 | continue; | ||
865 | } | 841 | } |
866 | ext4_unlock_group(sb, first_group + i); | 842 | mb_debug(1, "read bitmap for group %u\n", group); |
867 | if (buffer_uptodate(bh[i])) { | ||
868 | /* | ||
869 | * if not uninit if bh is uptodate, | ||
870 | * bitmap is also uptodate | ||
871 | */ | ||
872 | set_bitmap_uptodate(bh[i]); | ||
873 | unlock_buffer(bh[i]); | ||
874 | continue; | ||
875 | } | ||
876 | get_bh(bh[i]); | ||
877 | /* | ||
878 | * submit the buffer_head for read. We can | ||
879 | * safely mark the bitmap as uptodate now. | ||
880 | * We do it here so the bitmap uptodate bit | ||
881 | * get set with buffer lock held. | ||
882 | */ | ||
883 | set_bitmap_uptodate(bh[i]); | ||
884 | bh[i]->b_end_io = end_buffer_read_sync; | ||
885 | submit_bh(READ, bh[i]); | ||
886 | mb_debug(1, "read bitmap for group %u\n", first_group + i); | ||
887 | } | 843 | } |
888 | 844 | ||
889 | /* wait for I/O completion */ | 845 | /* wait for I/O completion */ |
890 | for (i = 0; i < groups_per_page; i++) | 846 | for (i = 0, group = first_group; i < groups_per_page; i++, group++) { |
891 | if (bh[i]) | 847 | if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) { |
892 | wait_on_buffer(bh[i]); | 848 | err = -EIO; |
893 | |||
894 | err = -EIO; | ||
895 | for (i = 0; i < groups_per_page; i++) | ||
896 | if (bh[i] && !buffer_uptodate(bh[i])) | ||
897 | goto out; | 849 | goto out; |
850 | } | ||
851 | } | ||
898 | 852 | ||
899 | err = 0; | ||
900 | first_block = page->index * blocks_per_page; | 853 | first_block = page->index * blocks_per_page; |
901 | for (i = 0; i < blocks_per_page; i++) { | 854 | for (i = 0; i < blocks_per_page; i++) { |
902 | int group; | 855 | int group; |
@@ -1250,10 +1203,10 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) | |||
1250 | int order = 1; | 1203 | int order = 1; |
1251 | void *bb; | 1204 | void *bb; |
1252 | 1205 | ||
1253 | BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); | 1206 | BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); |
1254 | BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); | 1207 | BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); |
1255 | 1208 | ||
1256 | bb = EXT4_MB_BUDDY(e4b); | 1209 | bb = e4b->bd_buddy; |
1257 | while (order <= e4b->bd_blkbits + 1) { | 1210 | while (order <= e4b->bd_blkbits + 1) { |
1258 | block = block >> 1; | 1211 | block = block >> 1; |
1259 | if (!mb_test_bit(block, bb)) { | 1212 | if (!mb_test_bit(block, bb)) { |
@@ -1323,9 +1276,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1323 | 1276 | ||
1324 | /* let's maintain fragments counter */ | 1277 | /* let's maintain fragments counter */ |
1325 | if (first != 0) | 1278 | if (first != 0) |
1326 | block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b)); | 1279 | block = !mb_test_bit(first - 1, e4b->bd_bitmap); |
1327 | if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) | 1280 | if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) |
1328 | max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b)); | 1281 | max = !mb_test_bit(first + count, e4b->bd_bitmap); |
1329 | if (block && max) | 1282 | if (block && max) |
1330 | e4b->bd_info->bb_fragments--; | 1283 | e4b->bd_info->bb_fragments--; |
1331 | else if (!block && !max) | 1284 | else if (!block && !max) |
@@ -1336,7 +1289,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1336 | block = first++; | 1289 | block = first++; |
1337 | order = 0; | 1290 | order = 0; |
1338 | 1291 | ||
1339 | if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { | 1292 | if (!mb_test_bit(block, e4b->bd_bitmap)) { |
1340 | ext4_fsblk_t blocknr; | 1293 | ext4_fsblk_t blocknr; |
1341 | 1294 | ||
1342 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | 1295 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); |
@@ -1347,7 +1300,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1347 | "freeing already freed block " | 1300 | "freeing already freed block " |
1348 | "(bit %u)", block); | 1301 | "(bit %u)", block); |
1349 | } | 1302 | } |
1350 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); | 1303 | mb_clear_bit(block, e4b->bd_bitmap); |
1351 | e4b->bd_info->bb_counters[order]++; | 1304 | e4b->bd_info->bb_counters[order]++; |
1352 | 1305 | ||
1353 | /* start of the buddy */ | 1306 | /* start of the buddy */ |
@@ -1429,7 +1382,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | |||
1429 | break; | 1382 | break; |
1430 | 1383 | ||
1431 | next = (block + 1) * (1 << order); | 1384 | next = (block + 1) * (1 << order); |
1432 | if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) | 1385 | if (mb_test_bit(next, e4b->bd_bitmap)) |
1433 | break; | 1386 | break; |
1434 | 1387 | ||
1435 | order = mb_find_order_for_block(e4b, next); | 1388 | order = mb_find_order_for_block(e4b, next); |
@@ -1466,9 +1419,9 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) | |||
1466 | 1419 | ||
1467 | /* let's maintain fragments counter */ | 1420 | /* let's maintain fragments counter */ |
1468 | if (start != 0) | 1421 | if (start != 0) |
1469 | mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b)); | 1422 | mlen = !mb_test_bit(start - 1, e4b->bd_bitmap); |
1470 | if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0]) | 1423 | if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0]) |
1471 | max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b)); | 1424 | max = !mb_test_bit(start + len, e4b->bd_bitmap); |
1472 | if (mlen && max) | 1425 | if (mlen && max) |
1473 | e4b->bd_info->bb_fragments++; | 1426 | e4b->bd_info->bb_fragments++; |
1474 | else if (!mlen && !max) | 1427 | else if (!mlen && !max) |
@@ -1511,7 +1464,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) | |||
1511 | } | 1464 | } |
1512 | mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); | 1465 | mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); |
1513 | 1466 | ||
1514 | ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); | 1467 | ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0); |
1515 | mb_check_buddy(e4b); | 1468 | mb_check_buddy(e4b); |
1516 | 1469 | ||
1517 | return ret; | 1470 | return ret; |
@@ -1810,7 +1763,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1810 | struct ext4_buddy *e4b) | 1763 | struct ext4_buddy *e4b) |
1811 | { | 1764 | { |
1812 | struct super_block *sb = ac->ac_sb; | 1765 | struct super_block *sb = ac->ac_sb; |
1813 | void *bitmap = EXT4_MB_BITMAP(e4b); | 1766 | void *bitmap = e4b->bd_bitmap; |
1814 | struct ext4_free_extent ex; | 1767 | struct ext4_free_extent ex; |
1815 | int i; | 1768 | int i; |
1816 | int free; | 1769 | int free; |
@@ -1870,7 +1823,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | |||
1870 | { | 1823 | { |
1871 | struct super_block *sb = ac->ac_sb; | 1824 | struct super_block *sb = ac->ac_sb; |
1872 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1825 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1873 | void *bitmap = EXT4_MB_BITMAP(e4b); | 1826 | void *bitmap = e4b->bd_bitmap; |
1874 | struct ext4_free_extent ex; | 1827 | struct ext4_free_extent ex; |
1875 | ext4_fsblk_t first_group_block; | 1828 | ext4_fsblk_t first_group_block; |
1876 | ext4_fsblk_t a; | 1829 | ext4_fsblk_t a; |
@@ -2224,7 +2177,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2224 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2177 | EXT4_DESC_PER_BLOCK_BITS(sb); |
2225 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | 2178 | meta_group_info = kmalloc(metalen, GFP_KERNEL); |
2226 | if (meta_group_info == NULL) { | 2179 | if (meta_group_info == NULL) { |
2227 | ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem " | 2180 | ext4_msg(sb, KERN_ERR, "can't allocate mem " |
2228 | "for a buddy group"); | 2181 | "for a buddy group"); |
2229 | goto exit_meta_group_info; | 2182 | goto exit_meta_group_info; |
2230 | } | 2183 | } |
@@ -2238,7 +2191,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2238 | 2191 | ||
2239 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); | 2192 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); |
2240 | if (meta_group_info[i] == NULL) { | 2193 | if (meta_group_info[i] == NULL) { |
2241 | ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem"); | 2194 | ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); |
2242 | goto exit_group_info; | 2195 | goto exit_group_info; |
2243 | } | 2196 | } |
2244 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); | 2197 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); |
@@ -2522,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2522 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, | 2475 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
2523 | &ext4_mb_seq_groups_fops, sb); | 2476 | &ext4_mb_seq_groups_fops, sb); |
2524 | 2477 | ||
2525 | if (sbi->s_journal) | ||
2526 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | ||
2527 | |||
2528 | return 0; | 2478 | return 0; |
2529 | 2479 | ||
2530 | out_free_locality_groups: | 2480 | out_free_locality_groups: |
@@ -2637,58 +2587,55 @@ static inline int ext4_issue_discard(struct super_block *sb, | |||
2637 | * This function is called by the jbd2 layer once the commit has finished, | 2587 | * This function is called by the jbd2 layer once the commit has finished, |
2638 | * so we know we can free the blocks that were released with that commit. | 2588 | * so we know we can free the blocks that were released with that commit. |
2639 | */ | 2589 | */ |
2640 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | 2590 | static void ext4_free_data_callback(struct super_block *sb, |
2591 | struct ext4_journal_cb_entry *jce, | ||
2592 | int rc) | ||
2641 | { | 2593 | { |
2642 | struct super_block *sb = journal->j_private; | 2594 | struct ext4_free_data *entry = (struct ext4_free_data *)jce; |
2643 | struct ext4_buddy e4b; | 2595 | struct ext4_buddy e4b; |
2644 | struct ext4_group_info *db; | 2596 | struct ext4_group_info *db; |
2645 | int err, count = 0, count2 = 0; | 2597 | int err, count = 0, count2 = 0; |
2646 | struct ext4_free_data *entry; | ||
2647 | struct list_head *l, *ltmp; | ||
2648 | 2598 | ||
2649 | list_for_each_safe(l, ltmp, &txn->t_private_list) { | 2599 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
2650 | entry = list_entry(l, struct ext4_free_data, list); | 2600 | entry->efd_count, entry->efd_group, entry); |
2651 | 2601 | ||
2652 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", | 2602 | if (test_opt(sb, DISCARD)) |
2653 | entry->count, entry->group, entry); | 2603 | ext4_issue_discard(sb, entry->efd_group, |
2604 | entry->efd_start_cluster, entry->efd_count); | ||
2654 | 2605 | ||
2655 | if (test_opt(sb, DISCARD)) | 2606 | err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b); |
2656 | ext4_issue_discard(sb, entry->group, | 2607 | /* we expect to find existing buddy because it's pinned */ |
2657 | entry->start_cluster, entry->count); | 2608 | BUG_ON(err != 0); |
2658 | 2609 | ||
2659 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | ||
2660 | /* we expect to find existing buddy because it's pinned */ | ||
2661 | BUG_ON(err != 0); | ||
2662 | 2610 | ||
2663 | db = e4b.bd_info; | 2611 | db = e4b.bd_info; |
2664 | /* there are blocks to put in buddy to make them really free */ | 2612 | /* there are blocks to put in buddy to make them really free */ |
2665 | count += entry->count; | 2613 | count += entry->efd_count; |
2666 | count2++; | 2614 | count2++; |
2667 | ext4_lock_group(sb, entry->group); | 2615 | ext4_lock_group(sb, entry->efd_group); |
2668 | /* Take it out of per group rb tree */ | 2616 | /* Take it out of per group rb tree */ |
2669 | rb_erase(&entry->node, &(db->bb_free_root)); | 2617 | rb_erase(&entry->efd_node, &(db->bb_free_root)); |
2670 | mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count); | 2618 | mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count); |
2671 | 2619 | ||
2672 | /* | 2620 | /* |
2673 | * Clear the trimmed flag for the group so that the next | 2621 | * Clear the trimmed flag for the group so that the next |
2674 | * ext4_trim_fs can trim it. | 2622 | * ext4_trim_fs can trim it. |
2675 | * If the volume is mounted with -o discard, online discard | 2623 | * If the volume is mounted with -o discard, online discard |
2676 | * is supported and the free blocks will be trimmed online. | 2624 | * is supported and the free blocks will be trimmed online. |
2677 | */ | 2625 | */ |
2678 | if (!test_opt(sb, DISCARD)) | 2626 | if (!test_opt(sb, DISCARD)) |
2679 | EXT4_MB_GRP_CLEAR_TRIMMED(db); | 2627 | EXT4_MB_GRP_CLEAR_TRIMMED(db); |
2680 | 2628 | ||
2681 | if (!db->bb_free_root.rb_node) { | 2629 | if (!db->bb_free_root.rb_node) { |
2682 | /* No more items in the per group rb tree | 2630 | /* No more items in the per group rb tree |
2683 | * balance refcounts from ext4_mb_free_metadata() | 2631 | * balance refcounts from ext4_mb_free_metadata() |
2684 | */ | 2632 | */ |
2685 | page_cache_release(e4b.bd_buddy_page); | 2633 | page_cache_release(e4b.bd_buddy_page); |
2686 | page_cache_release(e4b.bd_bitmap_page); | 2634 | page_cache_release(e4b.bd_bitmap_page); |
2687 | } | ||
2688 | ext4_unlock_group(sb, entry->group); | ||
2689 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
2690 | ext4_mb_unload_buddy(&e4b); | ||
2691 | } | 2635 | } |
2636 | ext4_unlock_group(sb, entry->efd_group); | ||
2637 | kmem_cache_free(ext4_free_data_cachep, entry); | ||
2638 | ext4_mb_unload_buddy(&e4b); | ||
2692 | 2639 | ||
2693 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); | 2640 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); |
2694 | } | 2641 | } |
@@ -2741,9 +2688,9 @@ int __init ext4_init_mballoc(void) | |||
2741 | return -ENOMEM; | 2688 | return -ENOMEM; |
2742 | } | 2689 | } |
2743 | 2690 | ||
2744 | ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, | 2691 | ext4_free_data_cachep = KMEM_CACHE(ext4_free_data, |
2745 | SLAB_RECLAIM_ACCOUNT); | 2692 | SLAB_RECLAIM_ACCOUNT); |
2746 | if (ext4_free_ext_cachep == NULL) { | 2693 | if (ext4_free_data_cachep == NULL) { |
2747 | kmem_cache_destroy(ext4_pspace_cachep); | 2694 | kmem_cache_destroy(ext4_pspace_cachep); |
2748 | kmem_cache_destroy(ext4_ac_cachep); | 2695 | kmem_cache_destroy(ext4_ac_cachep); |
2749 | return -ENOMEM; | 2696 | return -ENOMEM; |
@@ -2761,7 +2708,7 @@ void ext4_exit_mballoc(void) | |||
2761 | rcu_barrier(); | 2708 | rcu_barrier(); |
2762 | kmem_cache_destroy(ext4_pspace_cachep); | 2709 | kmem_cache_destroy(ext4_pspace_cachep); |
2763 | kmem_cache_destroy(ext4_ac_cachep); | 2710 | kmem_cache_destroy(ext4_ac_cachep); |
2764 | kmem_cache_destroy(ext4_free_ext_cachep); | 2711 | kmem_cache_destroy(ext4_free_data_cachep); |
2765 | ext4_groupinfo_destroy_slabs(); | 2712 | ext4_groupinfo_destroy_slabs(); |
2766 | ext4_remove_debugfs_entry(); | 2713 | ext4_remove_debugfs_entry(); |
2767 | } | 2714 | } |
@@ -2815,7 +2762,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2815 | len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); | 2762 | len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
2816 | if (!ext4_data_block_valid(sbi, block, len)) { | 2763 | if (!ext4_data_block_valid(sbi, block, len)) { |
2817 | ext4_error(sb, "Allocating blocks %llu-%llu which overlap " | 2764 | ext4_error(sb, "Allocating blocks %llu-%llu which overlap " |
2818 | "fs metadata\n", block, block+len); | 2765 | "fs metadata", block, block+len); |
2819 | /* File system mounted not to panic on error | 2766 | /* File system mounted not to panic on error |
2820 | * Fix the bitmap and repeat the block allocation | 2767 | * Fix the bitmap and repeat the block allocation |
2821 | * We leak some of the blocks here. | 2768 | * We leak some of the blocks here. |
@@ -2911,7 +2858,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
2911 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | 2858 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
2912 | int bsbits, max; | 2859 | int bsbits, max; |
2913 | ext4_lblk_t end; | 2860 | ext4_lblk_t end; |
2914 | loff_t size, orig_size, start_off; | 2861 | loff_t size, start_off; |
2862 | loff_t orig_size __maybe_unused; | ||
2915 | ext4_lblk_t start; | 2863 | ext4_lblk_t start; |
2916 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | 2864 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); |
2917 | struct ext4_prealloc_space *pa; | 2865 | struct ext4_prealloc_space *pa; |
@@ -3321,8 +3269,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |||
3321 | n = rb_first(&(grp->bb_free_root)); | 3269 | n = rb_first(&(grp->bb_free_root)); |
3322 | 3270 | ||
3323 | while (n) { | 3271 | while (n) { |
3324 | entry = rb_entry(n, struct ext4_free_data, node); | 3272 | entry = rb_entry(n, struct ext4_free_data, efd_node); |
3325 | ext4_set_bits(bitmap, entry->start_cluster, entry->count); | 3273 | ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); |
3326 | n = rb_next(n); | 3274 | n = rb_next(n); |
3327 | } | 3275 | } |
3328 | return; | 3276 | return; |
@@ -3916,11 +3864,11 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
3916 | (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) | 3864 | (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) |
3917 | return; | 3865 | return; |
3918 | 3866 | ||
3919 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:" | 3867 | ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:" |
3920 | " Allocation context details:"); | 3868 | " Allocation context details:"); |
3921 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d", | 3869 | ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d", |
3922 | ac->ac_status, ac->ac_flags); | 3870 | ac->ac_status, ac->ac_flags); |
3923 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, " | 3871 | ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, " |
3924 | "goal %lu/%lu/%lu@%lu, " | 3872 | "goal %lu/%lu/%lu@%lu, " |
3925 | "best %lu/%lu/%lu@%lu cr %d", | 3873 | "best %lu/%lu/%lu@%lu cr %d", |
3926 | (unsigned long)ac->ac_o_ex.fe_group, | 3874 | (unsigned long)ac->ac_o_ex.fe_group, |
@@ -3936,9 +3884,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
3936 | (unsigned long)ac->ac_b_ex.fe_len, | 3884 | (unsigned long)ac->ac_b_ex.fe_len, |
3937 | (unsigned long)ac->ac_b_ex.fe_logical, | 3885 | (unsigned long)ac->ac_b_ex.fe_logical, |
3938 | (int)ac->ac_criteria); | 3886 | (int)ac->ac_criteria); |
3939 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found", | 3887 | ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found", |
3940 | ac->ac_ex_scanned, ac->ac_found); | 3888 | ac->ac_ex_scanned, ac->ac_found); |
3941 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: "); | 3889 | ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); |
3942 | ngroups = ext4_get_groups_count(sb); | 3890 | ngroups = ext4_get_groups_count(sb); |
3943 | for (i = 0; i < ngroups; i++) { | 3891 | for (i = 0; i < ngroups; i++) { |
3944 | struct ext4_group_info *grp = ext4_get_group_info(sb, i); | 3892 | struct ext4_group_info *grp = ext4_get_group_info(sb, i); |
@@ -4428,9 +4376,9 @@ out: | |||
4428 | static int can_merge(struct ext4_free_data *entry1, | 4376 | static int can_merge(struct ext4_free_data *entry1, |
4429 | struct ext4_free_data *entry2) | 4377 | struct ext4_free_data *entry2) |
4430 | { | 4378 | { |
4431 | if ((entry1->t_tid == entry2->t_tid) && | 4379 | if ((entry1->efd_tid == entry2->efd_tid) && |
4432 | (entry1->group == entry2->group) && | 4380 | (entry1->efd_group == entry2->efd_group) && |
4433 | ((entry1->start_cluster + entry1->count) == entry2->start_cluster)) | 4381 | ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster)) |
4434 | return 1; | 4382 | return 1; |
4435 | return 0; | 4383 | return 0; |
4436 | } | 4384 | } |
@@ -4452,8 +4400,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4452 | BUG_ON(e4b->bd_bitmap_page == NULL); | 4400 | BUG_ON(e4b->bd_bitmap_page == NULL); |
4453 | BUG_ON(e4b->bd_buddy_page == NULL); | 4401 | BUG_ON(e4b->bd_buddy_page == NULL); |
4454 | 4402 | ||
4455 | new_node = &new_entry->node; | 4403 | new_node = &new_entry->efd_node; |
4456 | cluster = new_entry->start_cluster; | 4404 | cluster = new_entry->efd_start_cluster; |
4457 | 4405 | ||
4458 | if (!*n) { | 4406 | if (!*n) { |
4459 | /* first free block exent. We need to | 4407 | /* first free block exent. We need to |
@@ -4466,10 +4414,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4466 | } | 4414 | } |
4467 | while (*n) { | 4415 | while (*n) { |
4468 | parent = *n; | 4416 | parent = *n; |
4469 | entry = rb_entry(parent, struct ext4_free_data, node); | 4417 | entry = rb_entry(parent, struct ext4_free_data, efd_node); |
4470 | if (cluster < entry->start_cluster) | 4418 | if (cluster < entry->efd_start_cluster) |
4471 | n = &(*n)->rb_left; | 4419 | n = &(*n)->rb_left; |
4472 | else if (cluster >= (entry->start_cluster + entry->count)) | 4420 | else if (cluster >= (entry->efd_start_cluster + entry->efd_count)) |
4473 | n = &(*n)->rb_right; | 4421 | n = &(*n)->rb_right; |
4474 | else { | 4422 | else { |
4475 | ext4_grp_locked_error(sb, group, 0, | 4423 | ext4_grp_locked_error(sb, group, 0, |
@@ -4486,34 +4434,29 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4486 | /* Now try to see the extent can be merged to left and right */ | 4434 | /* Now try to see the extent can be merged to left and right */ |
4487 | node = rb_prev(new_node); | 4435 | node = rb_prev(new_node); |
4488 | if (node) { | 4436 | if (node) { |
4489 | entry = rb_entry(node, struct ext4_free_data, node); | 4437 | entry = rb_entry(node, struct ext4_free_data, efd_node); |
4490 | if (can_merge(entry, new_entry)) { | 4438 | if (can_merge(entry, new_entry)) { |
4491 | new_entry->start_cluster = entry->start_cluster; | 4439 | new_entry->efd_start_cluster = entry->efd_start_cluster; |
4492 | new_entry->count += entry->count; | 4440 | new_entry->efd_count += entry->efd_count; |
4493 | rb_erase(node, &(db->bb_free_root)); | 4441 | rb_erase(node, &(db->bb_free_root)); |
4494 | spin_lock(&sbi->s_md_lock); | 4442 | ext4_journal_callback_del(handle, &entry->efd_jce); |
4495 | list_del(&entry->list); | 4443 | kmem_cache_free(ext4_free_data_cachep, entry); |
4496 | spin_unlock(&sbi->s_md_lock); | ||
4497 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
4498 | } | 4444 | } |
4499 | } | 4445 | } |
4500 | 4446 | ||
4501 | node = rb_next(new_node); | 4447 | node = rb_next(new_node); |
4502 | if (node) { | 4448 | if (node) { |
4503 | entry = rb_entry(node, struct ext4_free_data, node); | 4449 | entry = rb_entry(node, struct ext4_free_data, efd_node); |
4504 | if (can_merge(new_entry, entry)) { | 4450 | if (can_merge(new_entry, entry)) { |
4505 | new_entry->count += entry->count; | 4451 | new_entry->efd_count += entry->efd_count; |
4506 | rb_erase(node, &(db->bb_free_root)); | 4452 | rb_erase(node, &(db->bb_free_root)); |
4507 | spin_lock(&sbi->s_md_lock); | 4453 | ext4_journal_callback_del(handle, &entry->efd_jce); |
4508 | list_del(&entry->list); | 4454 | kmem_cache_free(ext4_free_data_cachep, entry); |
4509 | spin_unlock(&sbi->s_md_lock); | ||
4510 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
4511 | } | 4455 | } |
4512 | } | 4456 | } |
4513 | /* Add the extent to transaction's private list */ | 4457 | /* Add the extent to transaction's private list */ |
4514 | spin_lock(&sbi->s_md_lock); | 4458 | ext4_journal_callback_add(handle, ext4_free_data_callback, |
4515 | list_add(&new_entry->list, &handle->h_transaction->t_private_list); | 4459 | &new_entry->efd_jce); |
4516 | spin_unlock(&sbi->s_md_lock); | ||
4517 | return 0; | 4460 | return 0; |
4518 | } | 4461 | } |
4519 | 4462 | ||
@@ -4691,15 +4634,15 @@ do_more: | |||
4691 | * blocks being freed are metadata. these blocks shouldn't | 4634 | * blocks being freed are metadata. these blocks shouldn't |
4692 | * be used until this transaction is committed | 4635 | * be used until this transaction is committed |
4693 | */ | 4636 | */ |
4694 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | 4637 | new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); |
4695 | if (!new_entry) { | 4638 | if (!new_entry) { |
4696 | err = -ENOMEM; | 4639 | err = -ENOMEM; |
4697 | goto error_return; | 4640 | goto error_return; |
4698 | } | 4641 | } |
4699 | new_entry->start_cluster = bit; | 4642 | new_entry->efd_start_cluster = bit; |
4700 | new_entry->group = block_group; | 4643 | new_entry->efd_group = block_group; |
4701 | new_entry->count = count_clusters; | 4644 | new_entry->efd_count = count_clusters; |
4702 | new_entry->t_tid = handle->h_transaction->t_tid; | 4645 | new_entry->efd_tid = handle->h_transaction->t_tid; |
4703 | 4646 | ||
4704 | ext4_lock_group(sb, block_group); | 4647 | ext4_lock_group(sb, block_group); |
4705 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); | 4648 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); |
@@ -4971,11 +4914,11 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, | |||
4971 | start = (e4b.bd_info->bb_first_free > start) ? | 4914 | start = (e4b.bd_info->bb_first_free > start) ? |
4972 | e4b.bd_info->bb_first_free : start; | 4915 | e4b.bd_info->bb_first_free : start; |
4973 | 4916 | ||
4974 | while (start < max) { | 4917 | while (start <= max) { |
4975 | start = mb_find_next_zero_bit(bitmap, max, start); | 4918 | start = mb_find_next_zero_bit(bitmap, max + 1, start); |
4976 | if (start >= max) | 4919 | if (start > max) |
4977 | break; | 4920 | break; |
4978 | next = mb_find_next_bit(bitmap, max, start); | 4921 | next = mb_find_next_bit(bitmap, max + 1, start); |
4979 | 4922 | ||
4980 | if ((next - start) >= minblocks) { | 4923 | if ((next - start) >= minblocks) { |
4981 | ext4_trim_extent(sb, start, | 4924 | ext4_trim_extent(sb, start, |
@@ -5027,37 +4970,36 @@ out: | |||
5027 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | 4970 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) |
5028 | { | 4971 | { |
5029 | struct ext4_group_info *grp; | 4972 | struct ext4_group_info *grp; |
5030 | ext4_group_t first_group, last_group; | 4973 | ext4_group_t group, first_group, last_group; |
5031 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); | ||
5032 | ext4_grpblk_t cnt = 0, first_cluster, last_cluster; | 4974 | ext4_grpblk_t cnt = 0, first_cluster, last_cluster; |
5033 | uint64_t start, len, minlen, trimmed = 0; | 4975 | uint64_t start, end, minlen, trimmed = 0; |
5034 | ext4_fsblk_t first_data_blk = | 4976 | ext4_fsblk_t first_data_blk = |
5035 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 4977 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
4978 | ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es); | ||
5036 | int ret = 0; | 4979 | int ret = 0; |
5037 | 4980 | ||
5038 | start = range->start >> sb->s_blocksize_bits; | 4981 | start = range->start >> sb->s_blocksize_bits; |
5039 | len = range->len >> sb->s_blocksize_bits; | 4982 | end = start + (range->len >> sb->s_blocksize_bits) - 1; |
5040 | minlen = range->minlen >> sb->s_blocksize_bits; | 4983 | minlen = range->minlen >> sb->s_blocksize_bits; |
5041 | 4984 | ||
5042 | if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb))) | 4985 | if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || |
4986 | unlikely(start >= max_blks)) | ||
5043 | return -EINVAL; | 4987 | return -EINVAL; |
5044 | if (start + len <= first_data_blk) | 4988 | if (end >= max_blks) |
4989 | end = max_blks - 1; | ||
4990 | if (end <= first_data_blk) | ||
5045 | goto out; | 4991 | goto out; |
5046 | if (start < first_data_blk) { | 4992 | if (start < first_data_blk) |
5047 | len -= first_data_blk - start; | ||
5048 | start = first_data_blk; | 4993 | start = first_data_blk; |
5049 | } | ||
5050 | 4994 | ||
5051 | /* Determine first and last group to examine based on start and len */ | 4995 | /* Determine first and last group to examine based on start and end */ |
5052 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, | 4996 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, |
5053 | &first_group, &first_cluster); | 4997 | &first_group, &first_cluster); |
5054 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), | 4998 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end, |
5055 | &last_group, &last_cluster); | 4999 | &last_group, &last_cluster); |
5056 | last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; | ||
5057 | last_cluster = EXT4_CLUSTERS_PER_GROUP(sb); | ||
5058 | 5000 | ||
5059 | if (first_group > last_group) | 5001 | /* end now represents the last cluster to discard in this group */ |
5060 | return -EINVAL; | 5002 | end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; |
5061 | 5003 | ||
5062 | for (group = first_group; group <= last_group; group++) { | 5004 | for (group = first_group; group <= last_group; group++) { |
5063 | grp = ext4_get_group_info(sb, group); | 5005 | grp = ext4_get_group_info(sb, group); |
@@ -5069,31 +5011,35 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
5069 | } | 5011 | } |
5070 | 5012 | ||
5071 | /* | 5013 | /* |
5072 | * For all the groups except the last one, last block will | 5014 | * For all the groups except the last one, last cluster will |
5073 | * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to | 5015 | * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to |
5074 | * change it for the last group in which case start + | 5016 | * change it for the last group, note that last_cluster is |
5075 | * len < EXT4_BLOCKS_PER_GROUP(sb). | 5017 | * already computed earlier by ext4_get_group_no_and_offset() |
5076 | */ | 5018 | */ |
5077 | if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb)) | 5019 | if (group == last_group) |
5078 | last_cluster = first_cluster + len; | 5020 | end = last_cluster; |
5079 | len -= last_cluster - first_cluster; | ||
5080 | 5021 | ||
5081 | if (grp->bb_free >= minlen) { | 5022 | if (grp->bb_free >= minlen) { |
5082 | cnt = ext4_trim_all_free(sb, group, first_cluster, | 5023 | cnt = ext4_trim_all_free(sb, group, first_cluster, |
5083 | last_cluster, minlen); | 5024 | end, minlen); |
5084 | if (cnt < 0) { | 5025 | if (cnt < 0) { |
5085 | ret = cnt; | 5026 | ret = cnt; |
5086 | break; | 5027 | break; |
5087 | } | 5028 | } |
5029 | trimmed += cnt; | ||
5088 | } | 5030 | } |
5089 | trimmed += cnt; | 5031 | |
5032 | /* | ||
5033 | * For every group except the first one, we are sure | ||
5034 | * that the first cluster to discard will be cluster #0. | ||
5035 | */ | ||
5090 | first_cluster = 0; | 5036 | first_cluster = 0; |
5091 | } | 5037 | } |
5092 | range->len = trimmed * sb->s_blocksize; | ||
5093 | 5038 | ||
5094 | if (!ret) | 5039 | if (!ret) |
5095 | atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); | 5040 | atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); |
5096 | 5041 | ||
5097 | out: | 5042 | out: |
5043 | range->len = trimmed * sb->s_blocksize; | ||
5098 | return ret; | 5044 | return ret; |
5099 | } | 5045 | } |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 47705f3285e3..c070618c21ce 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -96,21 +96,23 @@ extern u8 mb_enable_debug; | |||
96 | 96 | ||
97 | 97 | ||
98 | struct ext4_free_data { | 98 | struct ext4_free_data { |
99 | /* this links the free block information from group_info */ | 99 | /* MUST be the first member */ |
100 | struct rb_node node; | 100 | struct ext4_journal_cb_entry efd_jce; |
101 | |||
102 | /* ext4_free_data private data starts from here */ | ||
101 | 103 | ||
102 | /* this links the free block information from ext4_sb_info */ | 104 | /* this links the free block information from group_info */ |
103 | struct list_head list; | 105 | struct rb_node efd_node; |
104 | 106 | ||
105 | /* group which free block extent belongs */ | 107 | /* group which free block extent belongs */ |
106 | ext4_group_t group; | 108 | ext4_group_t efd_group; |
107 | 109 | ||
108 | /* free block extent */ | 110 | /* free block extent */ |
109 | ext4_grpblk_t start_cluster; | 111 | ext4_grpblk_t efd_start_cluster; |
110 | ext4_grpblk_t count; | 112 | ext4_grpblk_t efd_count; |
111 | 113 | ||
112 | /* transaction which freed this extent */ | 114 | /* transaction which freed this extent */ |
113 | tid_t t_tid; | 115 | tid_t efd_tid; |
114 | }; | 116 | }; |
115 | 117 | ||
116 | struct ext4_prealloc_space { | 118 | struct ext4_prealloc_space { |
@@ -210,8 +212,6 @@ struct ext4_buddy { | |||
210 | __u16 bd_blkbits; | 212 | __u16 bd_blkbits; |
211 | ext4_group_t bd_group; | 213 | ext4_group_t bd_group; |
212 | }; | 214 | }; |
213 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) | ||
214 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) | ||
215 | 215 | ||
216 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, | 216 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, |
217 | struct ext4_free_extent *fex) | 217 | struct ext4_free_extent *fex) |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index e7d6bb0acfa6..f39f80f8f2c5 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -471,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode) | |||
471 | tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, | 471 | tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, |
472 | S_IFREG, NULL, goal, owner); | 472 | S_IFREG, NULL, goal, owner); |
473 | if (IS_ERR(tmp_inode)) { | 473 | if (IS_ERR(tmp_inode)) { |
474 | retval = PTR_ERR(inode); | 474 | retval = PTR_ERR(tmp_inode); |
475 | ext4_journal_stop(handle); | 475 | ext4_journal_stop(handle); |
476 | return retval; | 476 | return retval; |
477 | } | 477 | } |
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 7ea4ba4eff2a..ed6548d89165 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c | |||
@@ -257,8 +257,8 @@ int ext4_multi_mount_protect(struct super_block *sb, | |||
257 | * If check_interval in MMP block is larger, use that instead of | 257 | * If check_interval in MMP block is larger, use that instead of |
258 | * update_interval from the superblock. | 258 | * update_interval from the superblock. |
259 | */ | 259 | */ |
260 | if (mmp->mmp_check_interval > mmp_check_interval) | 260 | if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval) |
261 | mmp_check_interval = mmp->mmp_check_interval; | 261 | mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval); |
262 | 262 | ||
263 | seq = le32_to_cpu(mmp->mmp_seq); | 263 | seq = le32_to_cpu(mmp->mmp_seq); |
264 | if (seq == EXT4_MMP_SEQ_CLEAN) | 264 | if (seq == EXT4_MMP_SEQ_CLEAN) |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2043f482375d..349d7b3671c8 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -468,7 +468,7 @@ fail2: | |||
468 | fail: | 468 | fail: |
469 | if (*err == ERR_BAD_DX_DIR) | 469 | if (*err == ERR_BAD_DX_DIR) |
470 | ext4_warning(dir->i_sb, | 470 | ext4_warning(dir->i_sb, |
471 | "Corrupt dir inode %ld, running e2fsck is " | 471 | "Corrupt dir inode %lu, running e2fsck is " |
472 | "recommended.", dir->i_ino); | 472 | "recommended.", dir->i_ino); |
473 | return NULL; | 473 | return NULL; |
474 | } | 474 | } |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 475851896518..74cd1f7f1f88 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -60,7 +60,6 @@ void ext4_ioend_wait(struct inode *inode) | |||
60 | static void put_io_page(struct ext4_io_page *io_page) | 60 | static void put_io_page(struct ext4_io_page *io_page) |
61 | { | 61 | { |
62 | if (atomic_dec_and_test(&io_page->p_count)) { | 62 | if (atomic_dec_and_test(&io_page->p_count)) { |
63 | end_page_writeback(io_page->p_page); | ||
64 | put_page(io_page->p_page); | 63 | put_page(io_page->p_page); |
65 | kmem_cache_free(io_page_cachep, io_page); | 64 | kmem_cache_free(io_page_cachep, io_page); |
66 | } | 65 | } |
@@ -110,6 +109,8 @@ int ext4_end_io_nolock(ext4_io_end_t *io) | |||
110 | if (io->iocb) | 109 | if (io->iocb) |
111 | aio_complete(io->iocb, io->result, 0); | 110 | aio_complete(io->iocb, io->result, 0); |
112 | 111 | ||
112 | if (io->flag & EXT4_IO_END_DIRECT) | ||
113 | inode_dio_done(inode); | ||
113 | /* Wake up anyone waiting on unwritten extent conversion */ | 114 | /* Wake up anyone waiting on unwritten extent conversion */ |
114 | if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) | 115 | if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) |
115 | wake_up_all(ext4_ioend_wq(io->inode)); | 116 | wake_up_all(ext4_ioend_wq(io->inode)); |
@@ -127,12 +128,18 @@ static void ext4_end_io_work(struct work_struct *work) | |||
127 | unsigned long flags; | 128 | unsigned long flags; |
128 | 129 | ||
129 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 130 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
131 | if (io->flag & EXT4_IO_END_IN_FSYNC) | ||
132 | goto requeue; | ||
130 | if (list_empty(&io->list)) { | 133 | if (list_empty(&io->list)) { |
131 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 134 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
132 | goto free; | 135 | goto free; |
133 | } | 136 | } |
134 | 137 | ||
135 | if (!mutex_trylock(&inode->i_mutex)) { | 138 | if (!mutex_trylock(&inode->i_mutex)) { |
139 | bool was_queued; | ||
140 | requeue: | ||
141 | was_queued = !!(io->flag & EXT4_IO_END_QUEUED); | ||
142 | io->flag |= EXT4_IO_END_QUEUED; | ||
136 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 143 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
137 | /* | 144 | /* |
138 | * Requeue the work instead of waiting so that the work | 145 | * Requeue the work instead of waiting so that the work |
@@ -145,9 +152,8 @@ static void ext4_end_io_work(struct work_struct *work) | |||
145 | * yield the cpu if it sees an end_io request that has already | 152 | * yield the cpu if it sees an end_io request that has already |
146 | * been requeued. | 153 | * been requeued. |
147 | */ | 154 | */ |
148 | if (io->flag & EXT4_IO_END_QUEUED) | 155 | if (was_queued) |
149 | yield(); | 156 | yield(); |
150 | io->flag |= EXT4_IO_END_QUEUED; | ||
151 | return; | 157 | return; |
152 | } | 158 | } |
153 | list_del_init(&io->list); | 159 | list_del_init(&io->list); |
@@ -227,9 +233,9 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
227 | } while (bh != head); | 233 | } while (bh != head); |
228 | } | 234 | } |
229 | 235 | ||
230 | put_io_page(io_end->pages[i]); | 236 | if (atomic_read(&io_end->pages[i]->p_count) == 1) |
237 | end_page_writeback(io_end->pages[i]->p_page); | ||
231 | } | 238 | } |
232 | io_end->num_io_pages = 0; | ||
233 | inode = io_end->inode; | 239 | inode = io_end->inode; |
234 | 240 | ||
235 | if (error) { | 241 | if (error) { |
@@ -421,6 +427,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
421 | * PageWriteback bit from the page to prevent the system from | 427 | * PageWriteback bit from the page to prevent the system from |
422 | * wedging later on. | 428 | * wedging later on. |
423 | */ | 429 | */ |
430 | if (atomic_read(&io_page->p_count) == 1) | ||
431 | end_page_writeback(page); | ||
424 | put_io_page(io_page); | 432 | put_io_page(io_page); |
425 | return ret; | 433 | return ret; |
426 | } | 434 | } |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f9d948f0eb86..59fa0be27251 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -1163,8 +1163,11 @@ static void ext4_update_super(struct super_block *sb, | |||
1163 | do_div(reserved_blocks, 100); | 1163 | do_div(reserved_blocks, 100); |
1164 | 1164 | ||
1165 | ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); | 1165 | ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); |
1166 | ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks); | ||
1166 | le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * | 1167 | le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * |
1167 | flex_gd->count); | 1168 | flex_gd->count); |
1169 | le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * | ||
1170 | flex_gd->count); | ||
1168 | 1171 | ||
1169 | /* | 1172 | /* |
1170 | * We need to protect s_groups_count against other CPUs seeing | 1173 | * We need to protect s_groups_count against other CPUs seeing |
@@ -1465,6 +1468,7 @@ static int ext4_group_extend_no_check(struct super_block *sb, | |||
1465 | } | 1468 | } |
1466 | 1469 | ||
1467 | ext4_blocks_count_set(es, o_blocks_count + add); | 1470 | ext4_blocks_count_set(es, o_blocks_count + add); |
1471 | ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add); | ||
1468 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, | 1472 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, |
1469 | o_blocks_count + add); | 1473 | o_blocks_count + add); |
1470 | /* We add the blocks to the bitmap and set the group need init bit */ | 1474 | /* We add the blocks to the bitmap and set the group need init bit */ |
@@ -1512,16 +1516,17 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1512 | o_blocks_count = ext4_blocks_count(es); | 1516 | o_blocks_count = ext4_blocks_count(es); |
1513 | 1517 | ||
1514 | if (test_opt(sb, DEBUG)) | 1518 | if (test_opt(sb, DEBUG)) |
1515 | printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n", | 1519 | ext4_msg(sb, KERN_DEBUG, |
1516 | o_blocks_count, n_blocks_count); | 1520 | "extending last group from %llu to %llu blocks", |
1521 | o_blocks_count, n_blocks_count); | ||
1517 | 1522 | ||
1518 | if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) | 1523 | if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) |
1519 | return 0; | 1524 | return 0; |
1520 | 1525 | ||
1521 | if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { | 1526 | if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { |
1522 | printk(KERN_ERR "EXT4-fs: filesystem on %s:" | 1527 | ext4_msg(sb, KERN_ERR, |
1523 | " too large to resize to %llu blocks safely\n", | 1528 | "filesystem too large to resize to %llu blocks safely", |
1524 | sb->s_id, n_blocks_count); | 1529 | n_blocks_count); |
1525 | if (sizeof(sector_t) < 8) | 1530 | if (sizeof(sector_t) < 8) |
1526 | ext4_warning(sb, "CONFIG_LBDAF not enabled"); | 1531 | ext4_warning(sb, "CONFIG_LBDAF not enabled"); |
1527 | return -EINVAL; | 1532 | return -EINVAL; |
@@ -1582,7 +1587,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1582 | ext4_fsblk_t o_blocks_count; | 1587 | ext4_fsblk_t o_blocks_count; |
1583 | ext4_group_t o_group; | 1588 | ext4_group_t o_group; |
1584 | ext4_group_t n_group; | 1589 | ext4_group_t n_group; |
1585 | ext4_grpblk_t offset; | 1590 | ext4_grpblk_t offset, add; |
1586 | unsigned long n_desc_blocks; | 1591 | unsigned long n_desc_blocks; |
1587 | unsigned long o_desc_blocks; | 1592 | unsigned long o_desc_blocks; |
1588 | unsigned long desc_blocks; | 1593 | unsigned long desc_blocks; |
@@ -1591,8 +1596,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1591 | o_blocks_count = ext4_blocks_count(es); | 1596 | o_blocks_count = ext4_blocks_count(es); |
1592 | 1597 | ||
1593 | if (test_opt(sb, DEBUG)) | 1598 | if (test_opt(sb, DEBUG)) |
1594 | printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu " | 1599 | ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu " |
1595 | "upto %llu blocks\n", o_blocks_count, n_blocks_count); | 1600 | "to %llu blocks", o_blocks_count, n_blocks_count); |
1596 | 1601 | ||
1597 | if (n_blocks_count < o_blocks_count) { | 1602 | if (n_blocks_count < o_blocks_count) { |
1598 | /* On-line shrinking not supported */ | 1603 | /* On-line shrinking not supported */ |
@@ -1605,7 +1610,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1605 | return 0; | 1610 | return 0; |
1606 | 1611 | ||
1607 | ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); | 1612 | ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); |
1608 | ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset); | 1613 | ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); |
1609 | 1614 | ||
1610 | n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / | 1615 | n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / |
1611 | EXT4_DESC_PER_BLOCK(sb); | 1616 | EXT4_DESC_PER_BLOCK(sb); |
@@ -1634,10 +1639,12 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1634 | } | 1639 | } |
1635 | brelse(bh); | 1640 | brelse(bh); |
1636 | 1641 | ||
1637 | if (offset != 0) { | 1642 | /* extend the last group */ |
1638 | /* extend the last group */ | 1643 | if (n_group == o_group) |
1639 | ext4_grpblk_t add; | 1644 | add = n_blocks_count - o_blocks_count; |
1640 | add = EXT4_BLOCKS_PER_GROUP(sb) - offset; | 1645 | else |
1646 | add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1); | ||
1647 | if (add > 0) { | ||
1641 | err = ext4_group_extend_no_check(sb, o_blocks_count, add); | 1648 | err = ext4_group_extend_no_check(sb, o_blocks_count, add); |
1642 | if (err) | 1649 | if (err) |
1643 | goto out; | 1650 | goto out; |
@@ -1674,7 +1681,7 @@ out: | |||
1674 | 1681 | ||
1675 | iput(resize_inode); | 1682 | iput(resize_inode); |
1676 | if (test_opt(sb, DEBUG)) | 1683 | if (test_opt(sb, DEBUG)) |
1677 | printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu " | 1684 | ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu " |
1678 | "upto %llu blocks\n", o_blocks_count, n_blocks_count); | 1685 | "upto %llu blocks", o_blocks_count, n_blocks_count); |
1679 | return err; | 1686 | return err; |
1680 | } | 1687 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 933900909ed0..ceebaf853beb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -62,6 +62,7 @@ static struct ext4_features *ext4_feat; | |||
62 | 62 | ||
63 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 63 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
64 | unsigned long journal_devnum); | 64 | unsigned long journal_devnum); |
65 | static int ext4_show_options(struct seq_file *seq, struct dentry *root); | ||
65 | static int ext4_commit_super(struct super_block *sb, int sync); | 66 | static int ext4_commit_super(struct super_block *sb, int sync); |
66 | static void ext4_mark_recovery_complete(struct super_block *sb, | 67 | static void ext4_mark_recovery_complete(struct super_block *sb, |
67 | struct ext4_super_block *es); | 68 | struct ext4_super_block *es); |
@@ -375,7 +376,7 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line, | |||
375 | if (is_handle_aborted(handle)) | 376 | if (is_handle_aborted(handle)) |
376 | return; | 377 | return; |
377 | 378 | ||
378 | printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n", | 379 | printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n", |
379 | caller, line, errstr, err_fn); | 380 | caller, line, errstr, err_fn); |
380 | 381 | ||
381 | jbd2_journal_abort_handle(handle); | 382 | jbd2_journal_abort_handle(handle); |
@@ -431,6 +432,22 @@ static int block_device_ejected(struct super_block *sb) | |||
431 | return bdi->dev == NULL; | 432 | return bdi->dev == NULL; |
432 | } | 433 | } |
433 | 434 | ||
435 | static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) | ||
436 | { | ||
437 | struct super_block *sb = journal->j_private; | ||
438 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
439 | int error = is_journal_aborted(journal); | ||
440 | struct ext4_journal_cb_entry *jce, *tmp; | ||
441 | |||
442 | spin_lock(&sbi->s_md_lock); | ||
443 | list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) { | ||
444 | list_del_init(&jce->jce_list); | ||
445 | spin_unlock(&sbi->s_md_lock); | ||
446 | jce->jce_func(sb, jce, error); | ||
447 | spin_lock(&sbi->s_md_lock); | ||
448 | } | ||
449 | spin_unlock(&sbi->s_md_lock); | ||
450 | } | ||
434 | 451 | ||
435 | /* Deal with the reporting of failure conditions on a filesystem such as | 452 | /* Deal with the reporting of failure conditions on a filesystem such as |
436 | * inconsistencies detected or read IO failures. | 453 | * inconsistencies detected or read IO failures. |
@@ -498,11 +515,16 @@ void ext4_error_inode(struct inode *inode, const char *function, | |||
498 | va_start(args, fmt); | 515 | va_start(args, fmt); |
499 | vaf.fmt = fmt; | 516 | vaf.fmt = fmt; |
500 | vaf.va = &args; | 517 | vaf.va = &args; |
501 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", | ||
502 | inode->i_sb->s_id, function, line, inode->i_ino); | ||
503 | if (block) | 518 | if (block) |
504 | printk(KERN_CONT "block %llu: ", block); | 519 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " |
505 | printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf); | 520 | "inode #%lu: block %llu: comm %s: %pV\n", |
521 | inode->i_sb->s_id, function, line, inode->i_ino, | ||
522 | block, current->comm, &vaf); | ||
523 | else | ||
524 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " | ||
525 | "inode #%lu: comm %s: %pV\n", | ||
526 | inode->i_sb->s_id, function, line, inode->i_ino, | ||
527 | current->comm, &vaf); | ||
506 | va_end(args); | 528 | va_end(args); |
507 | 529 | ||
508 | ext4_handle_error(inode->i_sb); | 530 | ext4_handle_error(inode->i_sb); |
@@ -524,15 +546,21 @@ void ext4_error_file(struct file *file, const char *function, | |||
524 | path = d_path(&(file->f_path), pathname, sizeof(pathname)); | 546 | path = d_path(&(file->f_path), pathname, sizeof(pathname)); |
525 | if (IS_ERR(path)) | 547 | if (IS_ERR(path)) |
526 | path = "(unknown)"; | 548 | path = "(unknown)"; |
527 | printk(KERN_CRIT | ||
528 | "EXT4-fs error (device %s): %s:%d: inode #%lu: ", | ||
529 | inode->i_sb->s_id, function, line, inode->i_ino); | ||
530 | if (block) | ||
531 | printk(KERN_CONT "block %llu: ", block); | ||
532 | va_start(args, fmt); | 549 | va_start(args, fmt); |
533 | vaf.fmt = fmt; | 550 | vaf.fmt = fmt; |
534 | vaf.va = &args; | 551 | vaf.va = &args; |
535 | printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf); | 552 | if (block) |
553 | printk(KERN_CRIT | ||
554 | "EXT4-fs error (device %s): %s:%d: inode #%lu: " | ||
555 | "block %llu: comm %s: path %s: %pV\n", | ||
556 | inode->i_sb->s_id, function, line, inode->i_ino, | ||
557 | block, current->comm, path, &vaf); | ||
558 | else | ||
559 | printk(KERN_CRIT | ||
560 | "EXT4-fs error (device %s): %s:%d: inode #%lu: " | ||
561 | "comm %s: path %s: %pV\n", | ||
562 | inode->i_sb->s_id, function, line, inode->i_ino, | ||
563 | current->comm, path, &vaf); | ||
536 | va_end(args); | 564 | va_end(args); |
537 | 565 | ||
538 | ext4_handle_error(inode->i_sb); | 566 | ext4_handle_error(inode->i_sb); |
@@ -808,9 +836,6 @@ static void ext4_put_super(struct super_block *sb) | |||
808 | destroy_workqueue(sbi->dio_unwritten_wq); | 836 | destroy_workqueue(sbi->dio_unwritten_wq); |
809 | 837 | ||
810 | lock_super(sb); | 838 | lock_super(sb); |
811 | if (sb->s_dirt) | ||
812 | ext4_commit_super(sb, 1); | ||
813 | |||
814 | if (sbi->s_journal) { | 839 | if (sbi->s_journal) { |
815 | err = jbd2_journal_destroy(sbi->s_journal); | 840 | err = jbd2_journal_destroy(sbi->s_journal); |
816 | sbi->s_journal = NULL; | 841 | sbi->s_journal = NULL; |
@@ -827,9 +852,12 @@ static void ext4_put_super(struct super_block *sb) | |||
827 | if (!(sb->s_flags & MS_RDONLY)) { | 852 | if (!(sb->s_flags & MS_RDONLY)) { |
828 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 853 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
829 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 854 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
830 | ext4_commit_super(sb, 1); | ||
831 | } | 855 | } |
856 | if (sb->s_dirt || !(sb->s_flags & MS_RDONLY)) | ||
857 | ext4_commit_super(sb, 1); | ||
858 | |||
832 | if (sbi->s_proc) { | 859 | if (sbi->s_proc) { |
860 | remove_proc_entry("options", sbi->s_proc); | ||
833 | remove_proc_entry(sb->s_id, ext4_proc_root); | 861 | remove_proc_entry(sb->s_id, ext4_proc_root); |
834 | } | 862 | } |
835 | kobject_del(&sbi->s_kobj); | 863 | kobject_del(&sbi->s_kobj); |
@@ -990,180 +1018,6 @@ void ext4_clear_inode(struct inode *inode) | |||
990 | } | 1018 | } |
991 | } | 1019 | } |
992 | 1020 | ||
993 | static inline void ext4_show_quota_options(struct seq_file *seq, | ||
994 | struct super_block *sb) | ||
995 | { | ||
996 | #if defined(CONFIG_QUOTA) | ||
997 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
998 | |||
999 | if (sbi->s_jquota_fmt) { | ||
1000 | char *fmtname = ""; | ||
1001 | |||
1002 | switch (sbi->s_jquota_fmt) { | ||
1003 | case QFMT_VFS_OLD: | ||
1004 | fmtname = "vfsold"; | ||
1005 | break; | ||
1006 | case QFMT_VFS_V0: | ||
1007 | fmtname = "vfsv0"; | ||
1008 | break; | ||
1009 | case QFMT_VFS_V1: | ||
1010 | fmtname = "vfsv1"; | ||
1011 | break; | ||
1012 | } | ||
1013 | seq_printf(seq, ",jqfmt=%s", fmtname); | ||
1014 | } | ||
1015 | |||
1016 | if (sbi->s_qf_names[USRQUOTA]) | ||
1017 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); | ||
1018 | |||
1019 | if (sbi->s_qf_names[GRPQUOTA]) | ||
1020 | seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); | ||
1021 | |||
1022 | if (test_opt(sb, USRQUOTA)) | ||
1023 | seq_puts(seq, ",usrquota"); | ||
1024 | |||
1025 | if (test_opt(sb, GRPQUOTA)) | ||
1026 | seq_puts(seq, ",grpquota"); | ||
1027 | #endif | ||
1028 | } | ||
1029 | |||
1030 | /* | ||
1031 | * Show an option if | ||
1032 | * - it's set to a non-default value OR | ||
1033 | * - if the per-sb default is different from the global default | ||
1034 | */ | ||
1035 | static int ext4_show_options(struct seq_file *seq, struct dentry *root) | ||
1036 | { | ||
1037 | int def_errors; | ||
1038 | unsigned long def_mount_opts; | ||
1039 | struct super_block *sb = root->d_sb; | ||
1040 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1041 | struct ext4_super_block *es = sbi->s_es; | ||
1042 | |||
1043 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | ||
1044 | def_errors = le16_to_cpu(es->s_errors); | ||
1045 | |||
1046 | if (sbi->s_sb_block != 1) | ||
1047 | seq_printf(seq, ",sb=%llu", sbi->s_sb_block); | ||
1048 | if (test_opt(sb, MINIX_DF)) | ||
1049 | seq_puts(seq, ",minixdf"); | ||
1050 | if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) | ||
1051 | seq_puts(seq, ",grpid"); | ||
1052 | if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) | ||
1053 | seq_puts(seq, ",nogrpid"); | ||
1054 | if (sbi->s_resuid != EXT4_DEF_RESUID || | ||
1055 | le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { | ||
1056 | seq_printf(seq, ",resuid=%u", sbi->s_resuid); | ||
1057 | } | ||
1058 | if (sbi->s_resgid != EXT4_DEF_RESGID || | ||
1059 | le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { | ||
1060 | seq_printf(seq, ",resgid=%u", sbi->s_resgid); | ||
1061 | } | ||
1062 | if (test_opt(sb, ERRORS_RO)) { | ||
1063 | if (def_errors == EXT4_ERRORS_PANIC || | ||
1064 | def_errors == EXT4_ERRORS_CONTINUE) { | ||
1065 | seq_puts(seq, ",errors=remount-ro"); | ||
1066 | } | ||
1067 | } | ||
1068 | if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) | ||
1069 | seq_puts(seq, ",errors=continue"); | ||
1070 | if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) | ||
1071 | seq_puts(seq, ",errors=panic"); | ||
1072 | if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) | ||
1073 | seq_puts(seq, ",nouid32"); | ||
1074 | if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) | ||
1075 | seq_puts(seq, ",debug"); | ||
1076 | #ifdef CONFIG_EXT4_FS_XATTR | ||
1077 | if (test_opt(sb, XATTR_USER)) | ||
1078 | seq_puts(seq, ",user_xattr"); | ||
1079 | if (!test_opt(sb, XATTR_USER)) | ||
1080 | seq_puts(seq, ",nouser_xattr"); | ||
1081 | #endif | ||
1082 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | ||
1083 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) | ||
1084 | seq_puts(seq, ",acl"); | ||
1085 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) | ||
1086 | seq_puts(seq, ",noacl"); | ||
1087 | #endif | ||
1088 | if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { | ||
1089 | seq_printf(seq, ",commit=%u", | ||
1090 | (unsigned) (sbi->s_commit_interval / HZ)); | ||
1091 | } | ||
1092 | if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { | ||
1093 | seq_printf(seq, ",min_batch_time=%u", | ||
1094 | (unsigned) sbi->s_min_batch_time); | ||
1095 | } | ||
1096 | if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { | ||
1097 | seq_printf(seq, ",max_batch_time=%u", | ||
1098 | (unsigned) sbi->s_max_batch_time); | ||
1099 | } | ||
1100 | |||
1101 | /* | ||
1102 | * We're changing the default of barrier mount option, so | ||
1103 | * let's always display its mount state so it's clear what its | ||
1104 | * status is. | ||
1105 | */ | ||
1106 | seq_puts(seq, ",barrier="); | ||
1107 | seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); | ||
1108 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) | ||
1109 | seq_puts(seq, ",journal_async_commit"); | ||
1110 | else if (test_opt(sb, JOURNAL_CHECKSUM)) | ||
1111 | seq_puts(seq, ",journal_checksum"); | ||
1112 | if (test_opt(sb, I_VERSION)) | ||
1113 | seq_puts(seq, ",i_version"); | ||
1114 | if (!test_opt(sb, DELALLOC) && | ||
1115 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) | ||
1116 | seq_puts(seq, ",nodelalloc"); | ||
1117 | |||
1118 | if (!test_opt(sb, MBLK_IO_SUBMIT)) | ||
1119 | seq_puts(seq, ",nomblk_io_submit"); | ||
1120 | if (sbi->s_stripe) | ||
1121 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); | ||
1122 | /* | ||
1123 | * journal mode get enabled in different ways | ||
1124 | * So just print the value even if we didn't specify it | ||
1125 | */ | ||
1126 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) | ||
1127 | seq_puts(seq, ",data=journal"); | ||
1128 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) | ||
1129 | seq_puts(seq, ",data=ordered"); | ||
1130 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | ||
1131 | seq_puts(seq, ",data=writeback"); | ||
1132 | |||
1133 | if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) | ||
1134 | seq_printf(seq, ",inode_readahead_blks=%u", | ||
1135 | sbi->s_inode_readahead_blks); | ||
1136 | |||
1137 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
1138 | seq_puts(seq, ",data_err=abort"); | ||
1139 | |||
1140 | if (test_opt(sb, NO_AUTO_DA_ALLOC)) | ||
1141 | seq_puts(seq, ",noauto_da_alloc"); | ||
1142 | |||
1143 | if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD)) | ||
1144 | seq_puts(seq, ",discard"); | ||
1145 | |||
1146 | if (test_opt(sb, NOLOAD)) | ||
1147 | seq_puts(seq, ",norecovery"); | ||
1148 | |||
1149 | if (test_opt(sb, DIOREAD_NOLOCK)) | ||
1150 | seq_puts(seq, ",dioread_nolock"); | ||
1151 | |||
1152 | if (test_opt(sb, BLOCK_VALIDITY) && | ||
1153 | !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) | ||
1154 | seq_puts(seq, ",block_validity"); | ||
1155 | |||
1156 | if (!test_opt(sb, INIT_INODE_TABLE)) | ||
1157 | seq_puts(seq, ",noinit_itable"); | ||
1158 | else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT) | ||
1159 | seq_printf(seq, ",init_itable=%u", | ||
1160 | (unsigned) sbi->s_li_wait_mult); | ||
1161 | |||
1162 | ext4_show_quota_options(seq, sb); | ||
1163 | |||
1164 | return 0; | ||
1165 | } | ||
1166 | |||
1167 | static struct inode *ext4_nfs_get_inode(struct super_block *sb, | 1021 | static struct inode *ext4_nfs_get_inode(struct super_block *sb, |
1168 | u64 ino, u32 generation) | 1022 | u64 ino, u32 generation) |
1169 | { | 1023 | { |
@@ -1316,18 +1170,17 @@ static const struct export_operations ext4_export_ops = { | |||
1316 | enum { | 1170 | enum { |
1317 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, | 1171 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, |
1318 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, | 1172 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, |
1319 | Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, | 1173 | Opt_nouid32, Opt_debug, Opt_removed, |
1320 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, | 1174 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, |
1321 | Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, | 1175 | Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, |
1322 | Opt_commit, Opt_min_batch_time, Opt_max_batch_time, | 1176 | Opt_commit, Opt_min_batch_time, Opt_max_batch_time, |
1323 | Opt_journal_update, Opt_journal_dev, | 1177 | Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, |
1324 | Opt_journal_checksum, Opt_journal_async_commit, | ||
1325 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 1178 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
1326 | Opt_data_err_abort, Opt_data_err_ignore, | 1179 | Opt_data_err_abort, Opt_data_err_ignore, |
1327 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 1180 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
1328 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, | 1181 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, |
1329 | Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, | 1182 | Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, |
1330 | Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, | 1183 | Opt_usrquota, Opt_grpquota, Opt_i_version, |
1331 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, | 1184 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, |
1332 | Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, | 1185 | Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, |
1333 | Opt_inode_readahead_blks, Opt_journal_ioprio, | 1186 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
@@ -1350,20 +1203,19 @@ static const match_table_t tokens = { | |||
1350 | {Opt_err_ro, "errors=remount-ro"}, | 1203 | {Opt_err_ro, "errors=remount-ro"}, |
1351 | {Opt_nouid32, "nouid32"}, | 1204 | {Opt_nouid32, "nouid32"}, |
1352 | {Opt_debug, "debug"}, | 1205 | {Opt_debug, "debug"}, |
1353 | {Opt_oldalloc, "oldalloc"}, | 1206 | {Opt_removed, "oldalloc"}, |
1354 | {Opt_orlov, "orlov"}, | 1207 | {Opt_removed, "orlov"}, |
1355 | {Opt_user_xattr, "user_xattr"}, | 1208 | {Opt_user_xattr, "user_xattr"}, |
1356 | {Opt_nouser_xattr, "nouser_xattr"}, | 1209 | {Opt_nouser_xattr, "nouser_xattr"}, |
1357 | {Opt_acl, "acl"}, | 1210 | {Opt_acl, "acl"}, |
1358 | {Opt_noacl, "noacl"}, | 1211 | {Opt_noacl, "noacl"}, |
1359 | {Opt_noload, "noload"}, | ||
1360 | {Opt_noload, "norecovery"}, | 1212 | {Opt_noload, "norecovery"}, |
1361 | {Opt_nobh, "nobh"}, | 1213 | {Opt_noload, "noload"}, |
1362 | {Opt_bh, "bh"}, | 1214 | {Opt_removed, "nobh"}, |
1215 | {Opt_removed, "bh"}, | ||
1363 | {Opt_commit, "commit=%u"}, | 1216 | {Opt_commit, "commit=%u"}, |
1364 | {Opt_min_batch_time, "min_batch_time=%u"}, | 1217 | {Opt_min_batch_time, "min_batch_time=%u"}, |
1365 | {Opt_max_batch_time, "max_batch_time=%u"}, | 1218 | {Opt_max_batch_time, "max_batch_time=%u"}, |
1366 | {Opt_journal_update, "journal=update"}, | ||
1367 | {Opt_journal_dev, "journal_dev=%u"}, | 1219 | {Opt_journal_dev, "journal_dev=%u"}, |
1368 | {Opt_journal_checksum, "journal_checksum"}, | 1220 | {Opt_journal_checksum, "journal_checksum"}, |
1369 | {Opt_journal_async_commit, "journal_async_commit"}, | 1221 | {Opt_journal_async_commit, "journal_async_commit"}, |
@@ -1389,7 +1241,6 @@ static const match_table_t tokens = { | |||
1389 | {Opt_nobarrier, "nobarrier"}, | 1241 | {Opt_nobarrier, "nobarrier"}, |
1390 | {Opt_i_version, "i_version"}, | 1242 | {Opt_i_version, "i_version"}, |
1391 | {Opt_stripe, "stripe=%u"}, | 1243 | {Opt_stripe, "stripe=%u"}, |
1392 | {Opt_resize, "resize"}, | ||
1393 | {Opt_delalloc, "delalloc"}, | 1244 | {Opt_delalloc, "delalloc"}, |
1394 | {Opt_nodelalloc, "nodelalloc"}, | 1245 | {Opt_nodelalloc, "nodelalloc"}, |
1395 | {Opt_mblk_io_submit, "mblk_io_submit"}, | 1246 | {Opt_mblk_io_submit, "mblk_io_submit"}, |
@@ -1408,6 +1259,11 @@ static const match_table_t tokens = { | |||
1408 | {Opt_init_itable, "init_itable=%u"}, | 1259 | {Opt_init_itable, "init_itable=%u"}, |
1409 | {Opt_init_itable, "init_itable"}, | 1260 | {Opt_init_itable, "init_itable"}, |
1410 | {Opt_noinit_itable, "noinit_itable"}, | 1261 | {Opt_noinit_itable, "noinit_itable"}, |
1262 | {Opt_removed, "check=none"}, /* mount option from ext2/3 */ | ||
1263 | {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ | ||
1264 | {Opt_removed, "reservation"}, /* mount option from ext2/3 */ | ||
1265 | {Opt_removed, "noreservation"}, /* mount option from ext2/3 */ | ||
1266 | {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */ | ||
1411 | {Opt_err, NULL}, | 1267 | {Opt_err, NULL}, |
1412 | }; | 1268 | }; |
1413 | 1269 | ||
@@ -1496,420 +1352,273 @@ static int clear_qf_name(struct super_block *sb, int qtype) | |||
1496 | } | 1352 | } |
1497 | #endif | 1353 | #endif |
1498 | 1354 | ||
1499 | static int parse_options(char *options, struct super_block *sb, | 1355 | #define MOPT_SET 0x0001 |
1500 | unsigned long *journal_devnum, | 1356 | #define MOPT_CLEAR 0x0002 |
1501 | unsigned int *journal_ioprio, | 1357 | #define MOPT_NOSUPPORT 0x0004 |
1502 | ext4_fsblk_t *n_blocks_count, int is_remount) | 1358 | #define MOPT_EXPLICIT 0x0008 |
1503 | { | 1359 | #define MOPT_CLEAR_ERR 0x0010 |
1504 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1360 | #define MOPT_GTE0 0x0020 |
1505 | char *p; | ||
1506 | substring_t args[MAX_OPT_ARGS]; | ||
1507 | int data_opt = 0; | ||
1508 | int option; | ||
1509 | #ifdef CONFIG_QUOTA | 1361 | #ifdef CONFIG_QUOTA |
1510 | int qfmt; | 1362 | #define MOPT_Q 0 |
1363 | #define MOPT_QFMT 0x0040 | ||
1364 | #else | ||
1365 | #define MOPT_Q MOPT_NOSUPPORT | ||
1366 | #define MOPT_QFMT MOPT_NOSUPPORT | ||
1511 | #endif | 1367 | #endif |
1512 | 1368 | #define MOPT_DATAJ 0x0080 | |
1513 | if (!options) | 1369 | |
1514 | return 1; | 1370 | static const struct mount_opts { |
1515 | 1371 | int token; | |
1516 | while ((p = strsep(&options, ",")) != NULL) { | 1372 | int mount_opt; |
1517 | int token; | 1373 | int flags; |
1518 | if (!*p) | 1374 | } ext4_mount_opts[] = { |
1519 | continue; | 1375 | {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET}, |
1520 | 1376 | {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR}, | |
1521 | /* | 1377 | {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET}, |
1522 | * Initialize args struct so we know whether arg was | 1378 | {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR}, |
1523 | * found; some options take optional arguments. | 1379 | {Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET}, |
1524 | */ | 1380 | {Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR}, |
1525 | args[0].to = args[0].from = NULL; | 1381 | {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET}, |
1526 | token = match_token(p, tokens, args); | 1382 | {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR}, |
1527 | switch (token) { | 1383 | {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET}, |
1528 | case Opt_bsd_df: | 1384 | {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR}, |
1529 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | 1385 | {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET}, |
1530 | clear_opt(sb, MINIX_DF); | 1386 | {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR}, |
1531 | break; | 1387 | {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET | MOPT_EXPLICIT}, |
1532 | case Opt_minix_df: | 1388 | {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR | MOPT_EXPLICIT}, |
1533 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | 1389 | {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET}, |
1534 | set_opt(sb, MINIX_DF); | 1390 | {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | |
1535 | 1391 | EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET}, | |
1536 | break; | 1392 | {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET}, |
1537 | case Opt_grpid: | 1393 | {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, |
1538 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | 1394 | {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, |
1539 | set_opt(sb, GRPID); | 1395 | {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, |
1540 | 1396 | {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET}, | |
1541 | break; | 1397 | {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR}, |
1542 | case Opt_nogrpid: | 1398 | {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, |
1543 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | 1399 | {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, |
1544 | clear_opt(sb, GRPID); | 1400 | {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, |
1545 | 1401 | {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR}, | |
1546 | break; | 1402 | {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR}, |
1547 | case Opt_resuid: | 1403 | {Opt_commit, 0, MOPT_GTE0}, |
1548 | if (match_int(&args[0], &option)) | 1404 | {Opt_max_batch_time, 0, MOPT_GTE0}, |
1549 | return 0; | 1405 | {Opt_min_batch_time, 0, MOPT_GTE0}, |
1550 | sbi->s_resuid = option; | 1406 | {Opt_inode_readahead_blks, 0, MOPT_GTE0}, |
1551 | break; | 1407 | {Opt_init_itable, 0, MOPT_GTE0}, |
1552 | case Opt_resgid: | 1408 | {Opt_stripe, 0, MOPT_GTE0}, |
1553 | if (match_int(&args[0], &option)) | 1409 | {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ}, |
1554 | return 0; | 1410 | {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ}, |
1555 | sbi->s_resgid = option; | 1411 | {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ}, |
1556 | break; | ||
1557 | case Opt_sb: | ||
1558 | /* handled by get_sb_block() instead of here */ | ||
1559 | /* *sb_block = match_int(&args[0]); */ | ||
1560 | break; | ||
1561 | case Opt_err_panic: | ||
1562 | clear_opt(sb, ERRORS_CONT); | ||
1563 | clear_opt(sb, ERRORS_RO); | ||
1564 | set_opt(sb, ERRORS_PANIC); | ||
1565 | break; | ||
1566 | case Opt_err_ro: | ||
1567 | clear_opt(sb, ERRORS_CONT); | ||
1568 | clear_opt(sb, ERRORS_PANIC); | ||
1569 | set_opt(sb, ERRORS_RO); | ||
1570 | break; | ||
1571 | case Opt_err_cont: | ||
1572 | clear_opt(sb, ERRORS_RO); | ||
1573 | clear_opt(sb, ERRORS_PANIC); | ||
1574 | set_opt(sb, ERRORS_CONT); | ||
1575 | break; | ||
1576 | case Opt_nouid32: | ||
1577 | set_opt(sb, NO_UID32); | ||
1578 | break; | ||
1579 | case Opt_debug: | ||
1580 | set_opt(sb, DEBUG); | ||
1581 | break; | ||
1582 | case Opt_oldalloc: | ||
1583 | ext4_msg(sb, KERN_WARNING, | ||
1584 | "Ignoring deprecated oldalloc option"); | ||
1585 | break; | ||
1586 | case Opt_orlov: | ||
1587 | ext4_msg(sb, KERN_WARNING, | ||
1588 | "Ignoring deprecated orlov option"); | ||
1589 | break; | ||
1590 | #ifdef CONFIG_EXT4_FS_XATTR | 1412 | #ifdef CONFIG_EXT4_FS_XATTR |
1591 | case Opt_user_xattr: | 1413 | {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, |
1592 | set_opt(sb, XATTR_USER); | 1414 | {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, |
1593 | break; | ||
1594 | case Opt_nouser_xattr: | ||
1595 | clear_opt(sb, XATTR_USER); | ||
1596 | break; | ||
1597 | #else | 1415 | #else |
1598 | case Opt_user_xattr: | 1416 | {Opt_user_xattr, 0, MOPT_NOSUPPORT}, |
1599 | case Opt_nouser_xattr: | 1417 | {Opt_nouser_xattr, 0, MOPT_NOSUPPORT}, |
1600 | ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); | ||
1601 | break; | ||
1602 | #endif | 1418 | #endif |
1603 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 1419 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1604 | case Opt_acl: | 1420 | {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, |
1605 | set_opt(sb, POSIX_ACL); | 1421 | {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, |
1606 | break; | ||
1607 | case Opt_noacl: | ||
1608 | clear_opt(sb, POSIX_ACL); | ||
1609 | break; | ||
1610 | #else | 1422 | #else |
1611 | case Opt_acl: | 1423 | {Opt_acl, 0, MOPT_NOSUPPORT}, |
1612 | case Opt_noacl: | 1424 | {Opt_noacl, 0, MOPT_NOSUPPORT}, |
1613 | ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); | ||
1614 | break; | ||
1615 | #endif | 1425 | #endif |
1616 | case Opt_journal_update: | 1426 | {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, |
1617 | /* @@@ FIXME */ | 1427 | {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, |
1618 | /* Eventually we will want to be able to create | 1428 | {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, |
1619 | a journal file here. For now, only allow the | 1429 | {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, |
1620 | user to specify an existing inode to be the | 1430 | MOPT_SET | MOPT_Q}, |
1621 | journal file. */ | 1431 | {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA, |
1622 | if (is_remount) { | 1432 | MOPT_SET | MOPT_Q}, |
1623 | ext4_msg(sb, KERN_ERR, | 1433 | {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | |
1624 | "Cannot specify journal on remount"); | 1434 | EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q}, |
1625 | return 0; | 1435 | {Opt_usrjquota, 0, MOPT_Q}, |
1626 | } | 1436 | {Opt_grpjquota, 0, MOPT_Q}, |
1627 | set_opt(sb, UPDATE_JOURNAL); | 1437 | {Opt_offusrjquota, 0, MOPT_Q}, |
1628 | break; | 1438 | {Opt_offgrpjquota, 0, MOPT_Q}, |
1629 | case Opt_journal_dev: | 1439 | {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, |
1630 | if (is_remount) { | 1440 | {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, |
1441 | {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, | ||
1442 | {Opt_err, 0, 0} | ||
1443 | }; | ||
1444 | |||
1445 | static int handle_mount_opt(struct super_block *sb, char *opt, int token, | ||
1446 | substring_t *args, unsigned long *journal_devnum, | ||
1447 | unsigned int *journal_ioprio, int is_remount) | ||
1448 | { | ||
1449 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1450 | const struct mount_opts *m; | ||
1451 | int arg = 0; | ||
1452 | |||
1453 | if (args->from && match_int(args, &arg)) | ||
1454 | return -1; | ||
1455 | switch (token) { | ||
1456 | case Opt_noacl: | ||
1457 | case Opt_nouser_xattr: | ||
1458 | ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5"); | ||
1459 | break; | ||
1460 | case Opt_sb: | ||
1461 | return 1; /* handled by get_sb_block() */ | ||
1462 | case Opt_removed: | ||
1463 | ext4_msg(sb, KERN_WARNING, | ||
1464 | "Ignoring removed %s option", opt); | ||
1465 | return 1; | ||
1466 | case Opt_resuid: | ||
1467 | sbi->s_resuid = arg; | ||
1468 | return 1; | ||
1469 | case Opt_resgid: | ||
1470 | sbi->s_resgid = arg; | ||
1471 | return 1; | ||
1472 | case Opt_abort: | ||
1473 | sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; | ||
1474 | return 1; | ||
1475 | case Opt_i_version: | ||
1476 | sb->s_flags |= MS_I_VERSION; | ||
1477 | return 1; | ||
1478 | case Opt_journal_dev: | ||
1479 | if (is_remount) { | ||
1480 | ext4_msg(sb, KERN_ERR, | ||
1481 | "Cannot specify journal on remount"); | ||
1482 | return -1; | ||
1483 | } | ||
1484 | *journal_devnum = arg; | ||
1485 | return 1; | ||
1486 | case Opt_journal_ioprio: | ||
1487 | if (arg < 0 || arg > 7) | ||
1488 | return -1; | ||
1489 | *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); | ||
1490 | return 1; | ||
1491 | } | ||
1492 | |||
1493 | for (m = ext4_mount_opts; m->token != Opt_err; m++) { | ||
1494 | if (token != m->token) | ||
1495 | continue; | ||
1496 | if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) | ||
1497 | return -1; | ||
1498 | if (m->flags & MOPT_EXPLICIT) | ||
1499 | set_opt2(sb, EXPLICIT_DELALLOC); | ||
1500 | if (m->flags & MOPT_CLEAR_ERR) | ||
1501 | clear_opt(sb, ERRORS_MASK); | ||
1502 | if (token == Opt_noquota && sb_any_quota_loaded(sb)) { | ||
1503 | ext4_msg(sb, KERN_ERR, "Cannot change quota " | ||
1504 | "options when quota turned on"); | ||
1505 | return -1; | ||
1506 | } | ||
1507 | |||
1508 | if (m->flags & MOPT_NOSUPPORT) { | ||
1509 | ext4_msg(sb, KERN_ERR, "%s option not supported", opt); | ||
1510 | } else if (token == Opt_commit) { | ||
1511 | if (arg == 0) | ||
1512 | arg = JBD2_DEFAULT_MAX_COMMIT_AGE; | ||
1513 | sbi->s_commit_interval = HZ * arg; | ||
1514 | } else if (token == Opt_max_batch_time) { | ||
1515 | if (arg == 0) | ||
1516 | arg = EXT4_DEF_MAX_BATCH_TIME; | ||
1517 | sbi->s_max_batch_time = arg; | ||
1518 | } else if (token == Opt_min_batch_time) { | ||
1519 | sbi->s_min_batch_time = arg; | ||
1520 | } else if (token == Opt_inode_readahead_blks) { | ||
1521 | if (arg > (1 << 30)) | ||
1522 | return -1; | ||
1523 | if (arg && !is_power_of_2(arg)) { | ||
1631 | ext4_msg(sb, KERN_ERR, | 1524 | ext4_msg(sb, KERN_ERR, |
1632 | "Cannot specify journal on remount"); | 1525 | "EXT4-fs: inode_readahead_blks" |
1633 | return 0; | 1526 | " must be a power of 2"); |
1527 | return -1; | ||
1634 | } | 1528 | } |
1635 | if (match_int(&args[0], &option)) | 1529 | sbi->s_inode_readahead_blks = arg; |
1636 | return 0; | 1530 | } else if (token == Opt_init_itable) { |
1637 | *journal_devnum = option; | 1531 | set_opt(sb, INIT_INODE_TABLE); |
1638 | break; | 1532 | if (!args->from) |
1639 | case Opt_journal_checksum: | 1533 | arg = EXT4_DEF_LI_WAIT_MULT; |
1640 | set_opt(sb, JOURNAL_CHECKSUM); | 1534 | sbi->s_li_wait_mult = arg; |
1641 | break; | 1535 | } else if (token == Opt_stripe) { |
1642 | case Opt_journal_async_commit: | 1536 | sbi->s_stripe = arg; |
1643 | set_opt(sb, JOURNAL_ASYNC_COMMIT); | 1537 | } else if (m->flags & MOPT_DATAJ) { |
1644 | set_opt(sb, JOURNAL_CHECKSUM); | ||
1645 | break; | ||
1646 | case Opt_noload: | ||
1647 | set_opt(sb, NOLOAD); | ||
1648 | break; | ||
1649 | case Opt_commit: | ||
1650 | if (match_int(&args[0], &option)) | ||
1651 | return 0; | ||
1652 | if (option < 0) | ||
1653 | return 0; | ||
1654 | if (option == 0) | ||
1655 | option = JBD2_DEFAULT_MAX_COMMIT_AGE; | ||
1656 | sbi->s_commit_interval = HZ * option; | ||
1657 | break; | ||
1658 | case Opt_max_batch_time: | ||
1659 | if (match_int(&args[0], &option)) | ||
1660 | return 0; | ||
1661 | if (option < 0) | ||
1662 | return 0; | ||
1663 | if (option == 0) | ||
1664 | option = EXT4_DEF_MAX_BATCH_TIME; | ||
1665 | sbi->s_max_batch_time = option; | ||
1666 | break; | ||
1667 | case Opt_min_batch_time: | ||
1668 | if (match_int(&args[0], &option)) | ||
1669 | return 0; | ||
1670 | if (option < 0) | ||
1671 | return 0; | ||
1672 | sbi->s_min_batch_time = option; | ||
1673 | break; | ||
1674 | case Opt_data_journal: | ||
1675 | data_opt = EXT4_MOUNT_JOURNAL_DATA; | ||
1676 | goto datacheck; | ||
1677 | case Opt_data_ordered: | ||
1678 | data_opt = EXT4_MOUNT_ORDERED_DATA; | ||
1679 | goto datacheck; | ||
1680 | case Opt_data_writeback: | ||
1681 | data_opt = EXT4_MOUNT_WRITEBACK_DATA; | ||
1682 | datacheck: | ||
1683 | if (is_remount) { | 1538 | if (is_remount) { |
1684 | if (!sbi->s_journal) | 1539 | if (!sbi->s_journal) |
1685 | ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); | 1540 | ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); |
1686 | else if (test_opt(sb, DATA_FLAGS) != data_opt) { | 1541 | else if (test_opt(sb, DATA_FLAGS) != |
1542 | m->mount_opt) { | ||
1687 | ext4_msg(sb, KERN_ERR, | 1543 | ext4_msg(sb, KERN_ERR, |
1688 | "Cannot change data mode on remount"); | 1544 | "Cannot change data mode on remount"); |
1689 | return 0; | 1545 | return -1; |
1690 | } | 1546 | } |
1691 | } else { | 1547 | } else { |
1692 | clear_opt(sb, DATA_FLAGS); | 1548 | clear_opt(sb, DATA_FLAGS); |
1693 | sbi->s_mount_opt |= data_opt; | 1549 | sbi->s_mount_opt |= m->mount_opt; |
1694 | } | 1550 | } |
1695 | break; | ||
1696 | case Opt_data_err_abort: | ||
1697 | set_opt(sb, DATA_ERR_ABORT); | ||
1698 | break; | ||
1699 | case Opt_data_err_ignore: | ||
1700 | clear_opt(sb, DATA_ERR_ABORT); | ||
1701 | break; | ||
1702 | #ifdef CONFIG_QUOTA | 1551 | #ifdef CONFIG_QUOTA |
1703 | case Opt_usrjquota: | 1552 | } else if (token == Opt_usrjquota) { |
1704 | if (!set_qf_name(sb, USRQUOTA, &args[0])) | 1553 | if (!set_qf_name(sb, USRQUOTA, &args[0])) |
1705 | return 0; | 1554 | return -1; |
1706 | break; | 1555 | } else if (token == Opt_grpjquota) { |
1707 | case Opt_grpjquota: | ||
1708 | if (!set_qf_name(sb, GRPQUOTA, &args[0])) | 1556 | if (!set_qf_name(sb, GRPQUOTA, &args[0])) |
1709 | return 0; | 1557 | return -1; |
1710 | break; | 1558 | } else if (token == Opt_offusrjquota) { |
1711 | case Opt_offusrjquota: | ||
1712 | if (!clear_qf_name(sb, USRQUOTA)) | 1559 | if (!clear_qf_name(sb, USRQUOTA)) |
1713 | return 0; | 1560 | return -1; |
1714 | break; | 1561 | } else if (token == Opt_offgrpjquota) { |
1715 | case Opt_offgrpjquota: | ||
1716 | if (!clear_qf_name(sb, GRPQUOTA)) | 1562 | if (!clear_qf_name(sb, GRPQUOTA)) |
1717 | return 0; | 1563 | return -1; |
1718 | break; | 1564 | } else if (m->flags & MOPT_QFMT) { |
1719 | |||
1720 | case Opt_jqfmt_vfsold: | ||
1721 | qfmt = QFMT_VFS_OLD; | ||
1722 | goto set_qf_format; | ||
1723 | case Opt_jqfmt_vfsv0: | ||
1724 | qfmt = QFMT_VFS_V0; | ||
1725 | goto set_qf_format; | ||
1726 | case Opt_jqfmt_vfsv1: | ||
1727 | qfmt = QFMT_VFS_V1; | ||
1728 | set_qf_format: | ||
1729 | if (sb_any_quota_loaded(sb) && | 1565 | if (sb_any_quota_loaded(sb) && |
1730 | sbi->s_jquota_fmt != qfmt) { | 1566 | sbi->s_jquota_fmt != m->mount_opt) { |
1731 | ext4_msg(sb, KERN_ERR, "Cannot change " | 1567 | ext4_msg(sb, KERN_ERR, "Cannot " |
1732 | "journaled quota options when " | 1568 | "change journaled quota options " |
1733 | "quota turned on"); | 1569 | "when quota turned on"); |
1734 | return 0; | 1570 | return -1; |
1735 | } | ||
1736 | sbi->s_jquota_fmt = qfmt; | ||
1737 | break; | ||
1738 | case Opt_quota: | ||
1739 | case Opt_usrquota: | ||
1740 | set_opt(sb, QUOTA); | ||
1741 | set_opt(sb, USRQUOTA); | ||
1742 | break; | ||
1743 | case Opt_grpquota: | ||
1744 | set_opt(sb, QUOTA); | ||
1745 | set_opt(sb, GRPQUOTA); | ||
1746 | break; | ||
1747 | case Opt_noquota: | ||
1748 | if (sb_any_quota_loaded(sb)) { | ||
1749 | ext4_msg(sb, KERN_ERR, "Cannot change quota " | ||
1750 | "options when quota turned on"); | ||
1751 | return 0; | ||
1752 | } | 1571 | } |
1753 | clear_opt(sb, QUOTA); | 1572 | sbi->s_jquota_fmt = m->mount_opt; |
1754 | clear_opt(sb, USRQUOTA); | ||
1755 | clear_opt(sb, GRPQUOTA); | ||
1756 | break; | ||
1757 | #else | ||
1758 | case Opt_quota: | ||
1759 | case Opt_usrquota: | ||
1760 | case Opt_grpquota: | ||
1761 | ext4_msg(sb, KERN_ERR, | ||
1762 | "quota options not supported"); | ||
1763 | break; | ||
1764 | case Opt_usrjquota: | ||
1765 | case Opt_grpjquota: | ||
1766 | case Opt_offusrjquota: | ||
1767 | case Opt_offgrpjquota: | ||
1768 | case Opt_jqfmt_vfsold: | ||
1769 | case Opt_jqfmt_vfsv0: | ||
1770 | case Opt_jqfmt_vfsv1: | ||
1771 | ext4_msg(sb, KERN_ERR, | ||
1772 | "journaled quota options not supported"); | ||
1773 | break; | ||
1774 | case Opt_noquota: | ||
1775 | break; | ||
1776 | #endif | 1573 | #endif |
1777 | case Opt_abort: | 1574 | } else { |
1778 | sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; | 1575 | if (!args->from) |
1779 | break; | 1576 | arg = 1; |
1780 | case Opt_nobarrier: | 1577 | if (m->flags & MOPT_CLEAR) |
1781 | clear_opt(sb, BARRIER); | 1578 | arg = !arg; |
1782 | break; | 1579 | else if (unlikely(!(m->flags & MOPT_SET))) { |
1783 | case Opt_barrier: | 1580 | ext4_msg(sb, KERN_WARNING, |
1784 | if (args[0].from) { | 1581 | "buggy handling of option %s", opt); |
1785 | if (match_int(&args[0], &option)) | 1582 | WARN_ON(1); |
1786 | return 0; | 1583 | return -1; |
1787 | } else | ||
1788 | option = 1; /* No argument, default to 1 */ | ||
1789 | if (option) | ||
1790 | set_opt(sb, BARRIER); | ||
1791 | else | ||
1792 | clear_opt(sb, BARRIER); | ||
1793 | break; | ||
1794 | case Opt_ignore: | ||
1795 | break; | ||
1796 | case Opt_resize: | ||
1797 | if (!is_remount) { | ||
1798 | ext4_msg(sb, KERN_ERR, | ||
1799 | "resize option only available " | ||
1800 | "for remount"); | ||
1801 | return 0; | ||
1802 | } | ||
1803 | if (match_int(&args[0], &option) != 0) | ||
1804 | return 0; | ||
1805 | *n_blocks_count = option; | ||
1806 | break; | ||
1807 | case Opt_nobh: | ||
1808 | ext4_msg(sb, KERN_WARNING, | ||
1809 | "Ignoring deprecated nobh option"); | ||
1810 | break; | ||
1811 | case Opt_bh: | ||
1812 | ext4_msg(sb, KERN_WARNING, | ||
1813 | "Ignoring deprecated bh option"); | ||
1814 | break; | ||
1815 | case Opt_i_version: | ||
1816 | set_opt(sb, I_VERSION); | ||
1817 | sb->s_flags |= MS_I_VERSION; | ||
1818 | break; | ||
1819 | case Opt_nodelalloc: | ||
1820 | clear_opt(sb, DELALLOC); | ||
1821 | clear_opt2(sb, EXPLICIT_DELALLOC); | ||
1822 | break; | ||
1823 | case Opt_mblk_io_submit: | ||
1824 | set_opt(sb, MBLK_IO_SUBMIT); | ||
1825 | break; | ||
1826 | case Opt_nomblk_io_submit: | ||
1827 | clear_opt(sb, MBLK_IO_SUBMIT); | ||
1828 | break; | ||
1829 | case Opt_stripe: | ||
1830 | if (match_int(&args[0], &option)) | ||
1831 | return 0; | ||
1832 | if (option < 0) | ||
1833 | return 0; | ||
1834 | sbi->s_stripe = option; | ||
1835 | break; | ||
1836 | case Opt_delalloc: | ||
1837 | set_opt(sb, DELALLOC); | ||
1838 | set_opt2(sb, EXPLICIT_DELALLOC); | ||
1839 | break; | ||
1840 | case Opt_block_validity: | ||
1841 | set_opt(sb, BLOCK_VALIDITY); | ||
1842 | break; | ||
1843 | case Opt_noblock_validity: | ||
1844 | clear_opt(sb, BLOCK_VALIDITY); | ||
1845 | break; | ||
1846 | case Opt_inode_readahead_blks: | ||
1847 | if (match_int(&args[0], &option)) | ||
1848 | return 0; | ||
1849 | if (option < 0 || option > (1 << 30)) | ||
1850 | return 0; | ||
1851 | if (option && !is_power_of_2(option)) { | ||
1852 | ext4_msg(sb, KERN_ERR, | ||
1853 | "EXT4-fs: inode_readahead_blks" | ||
1854 | " must be a power of 2"); | ||
1855 | return 0; | ||
1856 | } | 1584 | } |
1857 | sbi->s_inode_readahead_blks = option; | 1585 | if (arg != 0) |
1858 | break; | 1586 | sbi->s_mount_opt |= m->mount_opt; |
1859 | case Opt_journal_ioprio: | ||
1860 | if (match_int(&args[0], &option)) | ||
1861 | return 0; | ||
1862 | if (option < 0 || option > 7) | ||
1863 | break; | ||
1864 | *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, | ||
1865 | option); | ||
1866 | break; | ||
1867 | case Opt_noauto_da_alloc: | ||
1868 | set_opt(sb, NO_AUTO_DA_ALLOC); | ||
1869 | break; | ||
1870 | case Opt_auto_da_alloc: | ||
1871 | if (args[0].from) { | ||
1872 | if (match_int(&args[0], &option)) | ||
1873 | return 0; | ||
1874 | } else | ||
1875 | option = 1; /* No argument, default to 1 */ | ||
1876 | if (option) | ||
1877 | clear_opt(sb, NO_AUTO_DA_ALLOC); | ||
1878 | else | 1587 | else |
1879 | set_opt(sb,NO_AUTO_DA_ALLOC); | 1588 | sbi->s_mount_opt &= ~m->mount_opt; |
1880 | break; | ||
1881 | case Opt_discard: | ||
1882 | set_opt(sb, DISCARD); | ||
1883 | break; | ||
1884 | case Opt_nodiscard: | ||
1885 | clear_opt(sb, DISCARD); | ||
1886 | break; | ||
1887 | case Opt_dioread_nolock: | ||
1888 | set_opt(sb, DIOREAD_NOLOCK); | ||
1889 | break; | ||
1890 | case Opt_dioread_lock: | ||
1891 | clear_opt(sb, DIOREAD_NOLOCK); | ||
1892 | break; | ||
1893 | case Opt_init_itable: | ||
1894 | set_opt(sb, INIT_INODE_TABLE); | ||
1895 | if (args[0].from) { | ||
1896 | if (match_int(&args[0], &option)) | ||
1897 | return 0; | ||
1898 | } else | ||
1899 | option = EXT4_DEF_LI_WAIT_MULT; | ||
1900 | if (option < 0) | ||
1901 | return 0; | ||
1902 | sbi->s_li_wait_mult = option; | ||
1903 | break; | ||
1904 | case Opt_noinit_itable: | ||
1905 | clear_opt(sb, INIT_INODE_TABLE); | ||
1906 | break; | ||
1907 | default: | ||
1908 | ext4_msg(sb, KERN_ERR, | ||
1909 | "Unrecognized mount option \"%s\" " | ||
1910 | "or missing value", p); | ||
1911 | return 0; | ||
1912 | } | 1589 | } |
1590 | return 1; | ||
1591 | } | ||
1592 | ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " | ||
1593 | "or missing value", opt); | ||
1594 | return -1; | ||
1595 | } | ||
1596 | |||
1597 | static int parse_options(char *options, struct super_block *sb, | ||
1598 | unsigned long *journal_devnum, | ||
1599 | unsigned int *journal_ioprio, | ||
1600 | int is_remount) | ||
1601 | { | ||
1602 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1603 | char *p; | ||
1604 | substring_t args[MAX_OPT_ARGS]; | ||
1605 | int token; | ||
1606 | |||
1607 | if (!options) | ||
1608 | return 1; | ||
1609 | |||
1610 | while ((p = strsep(&options, ",")) != NULL) { | ||
1611 | if (!*p) | ||
1612 | continue; | ||
1613 | /* | ||
1614 | * Initialize args struct so we know whether arg was | ||
1615 | * found; some options take optional arguments. | ||
1616 | */ | ||
1617 | args[0].to = args[0].from = 0; | ||
1618 | token = match_token(p, tokens, args); | ||
1619 | if (handle_mount_opt(sb, p, token, args, journal_devnum, | ||
1620 | journal_ioprio, is_remount) < 0) | ||
1621 | return 0; | ||
1913 | } | 1622 | } |
1914 | #ifdef CONFIG_QUOTA | 1623 | #ifdef CONFIG_QUOTA |
1915 | if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { | 1624 | if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { |
@@ -1942,6 +1651,160 @@ set_qf_format: | |||
1942 | return 1; | 1651 | return 1; |
1943 | } | 1652 | } |
1944 | 1653 | ||
1654 | static inline void ext4_show_quota_options(struct seq_file *seq, | ||
1655 | struct super_block *sb) | ||
1656 | { | ||
1657 | #if defined(CONFIG_QUOTA) | ||
1658 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1659 | |||
1660 | if (sbi->s_jquota_fmt) { | ||
1661 | char *fmtname = ""; | ||
1662 | |||
1663 | switch (sbi->s_jquota_fmt) { | ||
1664 | case QFMT_VFS_OLD: | ||
1665 | fmtname = "vfsold"; | ||
1666 | break; | ||
1667 | case QFMT_VFS_V0: | ||
1668 | fmtname = "vfsv0"; | ||
1669 | break; | ||
1670 | case QFMT_VFS_V1: | ||
1671 | fmtname = "vfsv1"; | ||
1672 | break; | ||
1673 | } | ||
1674 | seq_printf(seq, ",jqfmt=%s", fmtname); | ||
1675 | } | ||
1676 | |||
1677 | if (sbi->s_qf_names[USRQUOTA]) | ||
1678 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); | ||
1679 | |||
1680 | if (sbi->s_qf_names[GRPQUOTA]) | ||
1681 | seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); | ||
1682 | |||
1683 | if (test_opt(sb, USRQUOTA)) | ||
1684 | seq_puts(seq, ",usrquota"); | ||
1685 | |||
1686 | if (test_opt(sb, GRPQUOTA)) | ||
1687 | seq_puts(seq, ",grpquota"); | ||
1688 | #endif | ||
1689 | } | ||
1690 | |||
1691 | static const char *token2str(int token) | ||
1692 | { | ||
1693 | static const struct match_token *t; | ||
1694 | |||
1695 | for (t = tokens; t->token != Opt_err; t++) | ||
1696 | if (t->token == token && !strchr(t->pattern, '=')) | ||
1697 | break; | ||
1698 | return t->pattern; | ||
1699 | } | ||
1700 | |||
1701 | /* | ||
1702 | * Show an option if | ||
1703 | * - it's set to a non-default value OR | ||
1704 | * - if the per-sb default is different from the global default | ||
1705 | */ | ||
1706 | static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, | ||
1707 | int nodefs) | ||
1708 | { | ||
1709 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1710 | struct ext4_super_block *es = sbi->s_es; | ||
1711 | int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt; | ||
1712 | const struct mount_opts *m; | ||
1713 | char sep = nodefs ? '\n' : ','; | ||
1714 | |||
1715 | #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep) | ||
1716 | #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg) | ||
1717 | |||
1718 | if (sbi->s_sb_block != 1) | ||
1719 | SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block); | ||
1720 | |||
1721 | for (m = ext4_mount_opts; m->token != Opt_err; m++) { | ||
1722 | int want_set = m->flags & MOPT_SET; | ||
1723 | if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || | ||
1724 | (m->flags & MOPT_CLEAR_ERR)) | ||
1725 | continue; | ||
1726 | if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) | ||
1727 | continue; /* skip if same as the default */ | ||
1728 | if ((want_set && | ||
1729 | (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || | ||
1730 | (!want_set && (sbi->s_mount_opt & m->mount_opt))) | ||
1731 | continue; /* select Opt_noFoo vs Opt_Foo */ | ||
1732 | SEQ_OPTS_PRINT("%s", token2str(m->token)); | ||
1733 | } | ||
1734 | |||
1735 | if (nodefs || sbi->s_resuid != EXT4_DEF_RESUID || | ||
1736 | le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) | ||
1737 | SEQ_OPTS_PRINT("resuid=%u", sbi->s_resuid); | ||
1738 | if (nodefs || sbi->s_resgid != EXT4_DEF_RESGID || | ||
1739 | le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) | ||
1740 | SEQ_OPTS_PRINT("resgid=%u", sbi->s_resgid); | ||
1741 | def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors); | ||
1742 | if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO) | ||
1743 | SEQ_OPTS_PUTS("errors=remount-ro"); | ||
1744 | if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) | ||
1745 | SEQ_OPTS_PUTS("errors=continue"); | ||
1746 | if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) | ||
1747 | SEQ_OPTS_PUTS("errors=panic"); | ||
1748 | if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) | ||
1749 | SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ); | ||
1750 | if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) | ||
1751 | SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); | ||
1752 | if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) | ||
1753 | SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); | ||
1754 | if (sb->s_flags & MS_I_VERSION) | ||
1755 | SEQ_OPTS_PUTS("i_version"); | ||
1756 | if (nodefs || sbi->s_stripe) | ||
1757 | SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); | ||
1758 | if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) { | ||
1759 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) | ||
1760 | SEQ_OPTS_PUTS("data=journal"); | ||
1761 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) | ||
1762 | SEQ_OPTS_PUTS("data=ordered"); | ||
1763 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | ||
1764 | SEQ_OPTS_PUTS("data=writeback"); | ||
1765 | } | ||
1766 | if (nodefs || | ||
1767 | sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) | ||
1768 | SEQ_OPTS_PRINT("inode_readahead_blks=%u", | ||
1769 | sbi->s_inode_readahead_blks); | ||
1770 | |||
1771 | if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && | ||
1772 | (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) | ||
1773 | SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); | ||
1774 | |||
1775 | ext4_show_quota_options(seq, sb); | ||
1776 | return 0; | ||
1777 | } | ||
1778 | |||
1779 | static int ext4_show_options(struct seq_file *seq, struct dentry *root) | ||
1780 | { | ||
1781 | return _ext4_show_options(seq, root->d_sb, 0); | ||
1782 | } | ||
1783 | |||
1784 | static int options_seq_show(struct seq_file *seq, void *offset) | ||
1785 | { | ||
1786 | struct super_block *sb = seq->private; | ||
1787 | int rc; | ||
1788 | |||
1789 | seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw"); | ||
1790 | rc = _ext4_show_options(seq, sb, 1); | ||
1791 | seq_puts(seq, "\n"); | ||
1792 | return rc; | ||
1793 | } | ||
1794 | |||
1795 | static int options_open_fs(struct inode *inode, struct file *file) | ||
1796 | { | ||
1797 | return single_open(file, options_seq_show, PDE(inode)->data); | ||
1798 | } | ||
1799 | |||
1800 | static const struct file_operations ext4_seq_options_fops = { | ||
1801 | .owner = THIS_MODULE, | ||
1802 | .open = options_open_fs, | ||
1803 | .read = seq_read, | ||
1804 | .llseek = seq_lseek, | ||
1805 | .release = single_release, | ||
1806 | }; | ||
1807 | |||
1945 | static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | 1808 | static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, |
1946 | int read_only) | 1809 | int read_only) |
1947 | { | 1810 | { |
@@ -2945,7 +2808,7 @@ static int ext4_run_lazyinit_thread(void) | |||
2945 | ext4_clear_request_list(); | 2808 | ext4_clear_request_list(); |
2946 | kfree(ext4_li_info); | 2809 | kfree(ext4_li_info); |
2947 | ext4_li_info = NULL; | 2810 | ext4_li_info = NULL; |
2948 | printk(KERN_CRIT "EXT4: error %d creating inode table " | 2811 | printk(KERN_CRIT "EXT4-fs: error %d creating inode table " |
2949 | "initialization thread\n", | 2812 | "initialization thread\n", |
2950 | err); | 2813 | err); |
2951 | return err; | 2814 | return err; |
@@ -3183,11 +3046,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3183 | set_opt(sb, INIT_INODE_TABLE); | 3046 | set_opt(sb, INIT_INODE_TABLE); |
3184 | if (def_mount_opts & EXT4_DEFM_DEBUG) | 3047 | if (def_mount_opts & EXT4_DEFM_DEBUG) |
3185 | set_opt(sb, DEBUG); | 3048 | set_opt(sb, DEBUG); |
3186 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { | 3049 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) |
3187 | ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", | ||
3188 | "2.6.38"); | ||
3189 | set_opt(sb, GRPID); | 3050 | set_opt(sb, GRPID); |
3190 | } | ||
3191 | if (def_mount_opts & EXT4_DEFM_UID16) | 3051 | if (def_mount_opts & EXT4_DEFM_UID16) |
3192 | set_opt(sb, NO_UID32); | 3052 | set_opt(sb, NO_UID32); |
3193 | /* xattr user namespace & acls are now defaulted on */ | 3053 | /* xattr user namespace & acls are now defaulted on */ |
@@ -3240,13 +3100,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3240 | sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; | 3100 | sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; |
3241 | 3101 | ||
3242 | if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, | 3102 | if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, |
3243 | &journal_devnum, &journal_ioprio, NULL, 0)) { | 3103 | &journal_devnum, &journal_ioprio, 0)) { |
3244 | ext4_msg(sb, KERN_WARNING, | 3104 | ext4_msg(sb, KERN_WARNING, |
3245 | "failed to parse options in superblock: %s", | 3105 | "failed to parse options in superblock: %s", |
3246 | sbi->s_es->s_mount_opts); | 3106 | sbi->s_es->s_mount_opts); |
3247 | } | 3107 | } |
3108 | sbi->s_def_mount_opt = sbi->s_mount_opt; | ||
3248 | if (!parse_options((char *) data, sb, &journal_devnum, | 3109 | if (!parse_options((char *) data, sb, &journal_devnum, |
3249 | &journal_ioprio, NULL, 0)) | 3110 | &journal_ioprio, 0)) |
3250 | goto failed_mount; | 3111 | goto failed_mount; |
3251 | 3112 | ||
3252 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | 3113 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { |
@@ -3416,7 +3277,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3416 | #else | 3277 | #else |
3417 | es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); | 3278 | es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); |
3418 | #endif | 3279 | #endif |
3419 | sb->s_dirt = 1; | ||
3420 | } | 3280 | } |
3421 | 3281 | ||
3422 | /* Handle clustersize */ | 3282 | /* Handle clustersize */ |
@@ -3540,6 +3400,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3540 | if (ext4_proc_root) | 3400 | if (ext4_proc_root) |
3541 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); | 3401 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); |
3542 | 3402 | ||
3403 | if (sbi->s_proc) | ||
3404 | proc_create_data("options", S_IRUGO, sbi->s_proc, | ||
3405 | &ext4_seq_options_fops, sb); | ||
3406 | |||
3543 | bgl_lock_init(sbi->s_blockgroup_lock); | 3407 | bgl_lock_init(sbi->s_blockgroup_lock); |
3544 | 3408 | ||
3545 | for (i = 0; i < db_count; i++) { | 3409 | for (i = 0; i < db_count; i++) { |
@@ -3694,6 +3558,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3694 | } | 3558 | } |
3695 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); | 3559 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); |
3696 | 3560 | ||
3561 | sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; | ||
3562 | |||
3697 | /* | 3563 | /* |
3698 | * The journal may have updated the bg summary counts, so we | 3564 | * The journal may have updated the bg summary counts, so we |
3699 | * need to update the global counters. | 3565 | * need to update the global counters. |
@@ -3861,6 +3727,7 @@ failed_mount2: | |||
3861 | ext4_kvfree(sbi->s_group_desc); | 3727 | ext4_kvfree(sbi->s_group_desc); |
3862 | failed_mount: | 3728 | failed_mount: |
3863 | if (sbi->s_proc) { | 3729 | if (sbi->s_proc) { |
3730 | remove_proc_entry("options", sbi->s_proc); | ||
3864 | remove_proc_entry(sb->s_id, ext4_proc_root); | 3731 | remove_proc_entry(sb->s_id, ext4_proc_root); |
3865 | } | 3732 | } |
3866 | #ifdef CONFIG_QUOTA | 3733 | #ifdef CONFIG_QUOTA |
@@ -4090,15 +3957,6 @@ static int ext4_load_journal(struct super_block *sb, | |||
4090 | if (!(journal->j_flags & JBD2_BARRIER)) | 3957 | if (!(journal->j_flags & JBD2_BARRIER)) |
4091 | ext4_msg(sb, KERN_INFO, "barriers disabled"); | 3958 | ext4_msg(sb, KERN_INFO, "barriers disabled"); |
4092 | 3959 | ||
4093 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { | ||
4094 | err = jbd2_journal_update_format(journal); | ||
4095 | if (err) { | ||
4096 | ext4_msg(sb, KERN_ERR, "error updating journal"); | ||
4097 | jbd2_journal_destroy(journal); | ||
4098 | return err; | ||
4099 | } | ||
4100 | } | ||
4101 | |||
4102 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) | 3960 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) |
4103 | err = jbd2_journal_wipe(journal, !really_read_only); | 3961 | err = jbd2_journal_wipe(journal, !really_read_only); |
4104 | if (!err) { | 3962 | if (!err) { |
@@ -4385,7 +4243,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4385 | { | 4243 | { |
4386 | struct ext4_super_block *es; | 4244 | struct ext4_super_block *es; |
4387 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4245 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4388 | ext4_fsblk_t n_blocks_count = 0; | ||
4389 | unsigned long old_sb_flags; | 4246 | unsigned long old_sb_flags; |
4390 | struct ext4_mount_options old_opts; | 4247 | struct ext4_mount_options old_opts; |
4391 | int enable_quota = 0; | 4248 | int enable_quota = 0; |
@@ -4418,8 +4275,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4418 | /* | 4275 | /* |
4419 | * Allow the "check" option to be passed as a remount option. | 4276 | * Allow the "check" option to be passed as a remount option. |
4420 | */ | 4277 | */ |
4421 | if (!parse_options(data, sb, NULL, &journal_ioprio, | 4278 | if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { |
4422 | &n_blocks_count, 1)) { | ||
4423 | err = -EINVAL; | 4279 | err = -EINVAL; |
4424 | goto restore_opts; | 4280 | goto restore_opts; |
4425 | } | 4281 | } |
@@ -4437,8 +4293,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4437 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); | 4293 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); |
4438 | } | 4294 | } |
4439 | 4295 | ||
4440 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || | 4296 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { |
4441 | n_blocks_count > ext4_blocks_count(es)) { | ||
4442 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { | 4297 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { |
4443 | err = -EROFS; | 4298 | err = -EROFS; |
4444 | goto restore_opts; | 4299 | goto restore_opts; |
@@ -4513,8 +4368,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4513 | if (sbi->s_journal) | 4368 | if (sbi->s_journal) |
4514 | ext4_clear_journal_err(sb, es); | 4369 | ext4_clear_journal_err(sb, es); |
4515 | sbi->s_mount_state = le16_to_cpu(es->s_state); | 4370 | sbi->s_mount_state = le16_to_cpu(es->s_state); |
4516 | if ((err = ext4_group_extend(sb, es, n_blocks_count))) | ||
4517 | goto restore_opts; | ||
4518 | if (!ext4_setup_super(sb, es, 0)) | 4371 | if (!ext4_setup_super(sb, es, 0)) |
4519 | sb->s_flags &= ~MS_RDONLY; | 4372 | sb->s_flags &= ~MS_RDONLY; |
4520 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | 4373 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 93a00d89a220..e88748e55c0f 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -82,8 +82,8 @@ | |||
82 | printk("\n"); \ | 82 | printk("\n"); \ |
83 | } while (0) | 83 | } while (0) |
84 | #else | 84 | #else |
85 | # define ea_idebug(f...) | 85 | # define ea_idebug(inode, fmt, ...) no_printk(fmt, ##__VA_ARGS__) |
86 | # define ea_bdebug(f...) | 86 | # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) |
87 | #endif | 87 | #endif |
88 | 88 | ||
89 | static void ext4_xattr_cache_insert(struct buffer_head *); | 89 | static void ext4_xattr_cache_insert(struct buffer_head *); |
@@ -158,13 +158,10 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) | |||
158 | static inline int | 158 | static inline int |
159 | ext4_xattr_check_block(struct buffer_head *bh) | 159 | ext4_xattr_check_block(struct buffer_head *bh) |
160 | { | 160 | { |
161 | int error; | ||
162 | |||
163 | if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || | 161 | if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || |
164 | BHDR(bh)->h_blocks != cpu_to_le32(1)) | 162 | BHDR(bh)->h_blocks != cpu_to_le32(1)) |
165 | return -EIO; | 163 | return -EIO; |
166 | error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); | 164 | return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); |
167 | return error; | ||
168 | } | 165 | } |
169 | 166 | ||
170 | static inline int | 167 | static inline int |
@@ -220,7 +217,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, | |||
220 | error = -ENODATA; | 217 | error = -ENODATA; |
221 | if (!EXT4_I(inode)->i_file_acl) | 218 | if (!EXT4_I(inode)->i_file_acl) |
222 | goto cleanup; | 219 | goto cleanup; |
223 | ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); | 220 | ea_idebug(inode, "reading block %llu", |
221 | (unsigned long long)EXT4_I(inode)->i_file_acl); | ||
224 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); | 222 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); |
225 | if (!bh) | 223 | if (!bh) |
226 | goto cleanup; | 224 | goto cleanup; |
@@ -363,7 +361,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
363 | error = 0; | 361 | error = 0; |
364 | if (!EXT4_I(inode)->i_file_acl) | 362 | if (!EXT4_I(inode)->i_file_acl) |
365 | goto cleanup; | 363 | goto cleanup; |
366 | ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); | 364 | ea_idebug(inode, "reading block %llu", |
365 | (unsigned long long)EXT4_I(inode)->i_file_acl); | ||
367 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); | 366 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); |
368 | error = -EIO; | 367 | error = -EIO; |
369 | if (!bh) | 368 | if (!bh) |
@@ -487,18 +486,19 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, | |||
487 | ext4_free_blocks(handle, inode, bh, 0, 1, | 486 | ext4_free_blocks(handle, inode, bh, 0, 1, |
488 | EXT4_FREE_BLOCKS_METADATA | | 487 | EXT4_FREE_BLOCKS_METADATA | |
489 | EXT4_FREE_BLOCKS_FORGET); | 488 | EXT4_FREE_BLOCKS_FORGET); |
489 | unlock_buffer(bh); | ||
490 | } else { | 490 | } else { |
491 | le32_add_cpu(&BHDR(bh)->h_refcount, -1); | 491 | le32_add_cpu(&BHDR(bh)->h_refcount, -1); |
492 | if (ce) | ||
493 | mb_cache_entry_release(ce); | ||
494 | unlock_buffer(bh); | ||
492 | error = ext4_handle_dirty_metadata(handle, inode, bh); | 495 | error = ext4_handle_dirty_metadata(handle, inode, bh); |
493 | if (IS_SYNC(inode)) | 496 | if (IS_SYNC(inode)) |
494 | ext4_handle_sync(handle); | 497 | ext4_handle_sync(handle); |
495 | dquot_free_block(inode, 1); | 498 | dquot_free_block(inode, 1); |
496 | ea_bdebug(bh, "refcount now=%d; releasing", | 499 | ea_bdebug(bh, "refcount now=%d; releasing", |
497 | le32_to_cpu(BHDR(bh)->h_refcount)); | 500 | le32_to_cpu(BHDR(bh)->h_refcount)); |
498 | if (ce) | ||
499 | mb_cache_entry_release(ce); | ||
500 | } | 501 | } |
501 | unlock_buffer(bh); | ||
502 | out: | 502 | out: |
503 | ext4_std_error(inode->i_sb, error); | 503 | ext4_std_error(inode->i_sb, error); |
504 | return; | 504 | return; |
@@ -834,7 +834,8 @@ inserted: | |||
834 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 834 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
835 | BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); | 835 | BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); |
836 | 836 | ||
837 | ea_idebug(inode, "creating block %d", block); | 837 | ea_idebug(inode, "creating block %llu", |
838 | (unsigned long long)block); | ||
838 | 839 | ||
839 | new_bh = sb_getblk(sb, block); | 840 | new_bh = sb_getblk(sb, block); |
840 | if (!new_bh) { | 841 | if (!new_bh) { |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index a81eb2367d39..98ae804f5273 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
@@ -521,57 +521,46 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, | |||
521 | 521 | ||
522 | op = &outname[*outlen * sizeof(wchar_t)]; | 522 | op = &outname[*outlen * sizeof(wchar_t)]; |
523 | } else { | 523 | } else { |
524 | if (nls) { | 524 | for (i = 0, ip = name, op = outname, *outlen = 0; |
525 | for (i = 0, ip = name, op = outname, *outlen = 0; | 525 | i < len && *outlen < FAT_LFN_LEN; |
526 | i < len && *outlen <= FAT_LFN_LEN; | 526 | *outlen += 1) { |
527 | *outlen += 1) | 527 | if (escape && (*ip == ':')) { |
528 | { | 528 | if (i > len - 5) |
529 | if (escape && (*ip == ':')) { | 529 | return -EINVAL; |
530 | if (i > len - 5) | 530 | ec = 0; |
531 | return -EINVAL; | 531 | for (k = 1; k < 5; k++) { |
532 | ec = 0; | 532 | nc = ip[k]; |
533 | for (k = 1; k < 5; k++) { | 533 | ec <<= 4; |
534 | nc = ip[k]; | 534 | if (nc >= '0' && nc <= '9') { |
535 | ec <<= 4; | 535 | ec |= nc - '0'; |
536 | if (nc >= '0' && nc <= '9') { | 536 | continue; |
537 | ec |= nc - '0'; | ||
538 | continue; | ||
539 | } | ||
540 | if (nc >= 'a' && nc <= 'f') { | ||
541 | ec |= nc - ('a' - 10); | ||
542 | continue; | ||
543 | } | ||
544 | if (nc >= 'A' && nc <= 'F') { | ||
545 | ec |= nc - ('A' - 10); | ||
546 | continue; | ||
547 | } | ||
548 | return -EINVAL; | ||
549 | } | 537 | } |
550 | *op++ = ec & 0xFF; | 538 | if (nc >= 'a' && nc <= 'f') { |
551 | *op++ = ec >> 8; | 539 | ec |= nc - ('a' - 10); |
552 | ip += 5; | 540 | continue; |
553 | i += 5; | 541 | } |
554 | } else { | 542 | if (nc >= 'A' && nc <= 'F') { |
555 | if ((charlen = nls->char2uni(ip, len - i, (wchar_t *)op)) < 0) | 543 | ec |= nc - ('A' - 10); |
556 | return -EINVAL; | 544 | continue; |
557 | ip += charlen; | 545 | } |
558 | i += charlen; | 546 | return -EINVAL; |
559 | op += 2; | ||
560 | } | 547 | } |
548 | *op++ = ec & 0xFF; | ||
549 | *op++ = ec >> 8; | ||
550 | ip += 5; | ||
551 | i += 5; | ||
552 | } else { | ||
553 | charlen = nls->char2uni(ip, len - i, | ||
554 | (wchar_t *)op); | ||
555 | if (charlen < 0) | ||
556 | return -EINVAL; | ||
557 | ip += charlen; | ||
558 | i += charlen; | ||
559 | op += 2; | ||
561 | } | 560 | } |
562 | if (i < len) | ||
563 | return -ENAMETOOLONG; | ||
564 | } else { | ||
565 | for (i = 0, ip = name, op = outname, *outlen = 0; | ||
566 | i < len && *outlen <= FAT_LFN_LEN; | ||
567 | i++, *outlen += 1) | ||
568 | { | ||
569 | *op++ = *ip++; | ||
570 | *op++ = 0; | ||
571 | } | ||
572 | if (i < len) | ||
573 | return -ENAMETOOLONG; | ||
574 | } | 561 | } |
562 | if (i < len) | ||
563 | return -ENAMETOOLONG; | ||
575 | } | 564 | } |
576 | 565 | ||
577 | *longlen = *outlen; | 566 | *longlen = *outlen; |
@@ -6,7 +6,7 @@ | |||
6 | * Manage the dynamic fd arrays in the process files_struct. | 6 | * Manage the dynamic fd arrays in the process files_struct. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/module.h> | 9 | #include <linux/export.h> |
10 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
12 | #include <linux/mmzone.h> | 12 | #include <linux/mmzone.h> |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 77b535ac7136..539f36cf3e4a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -14,7 +14,7 @@ | |||
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/module.h> | 17 | #include <linux/export.h> |
18 | #include <linux/spinlock.h> | 18 | #include <linux/spinlock.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
@@ -256,7 +256,8 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) | |||
256 | } | 256 | } |
257 | 257 | ||
258 | /* | 258 | /* |
259 | * Move expired dirty inodes from @delaying_queue to @dispatch_queue. | 259 | * Move expired (dirtied after work->older_than_this) dirty inodes from |
260 | * @delaying_queue to @dispatch_queue. | ||
260 | */ | 261 | */ |
261 | static int move_expired_inodes(struct list_head *delaying_queue, | 262 | static int move_expired_inodes(struct list_head *delaying_queue, |
262 | struct list_head *dispatch_queue, | 263 | struct list_head *dispatch_queue, |
@@ -1148,23 +1149,6 @@ out_unlock_inode: | |||
1148 | } | 1149 | } |
1149 | EXPORT_SYMBOL(__mark_inode_dirty); | 1150 | EXPORT_SYMBOL(__mark_inode_dirty); |
1150 | 1151 | ||
1151 | /* | ||
1152 | * Write out a superblock's list of dirty inodes. A wait will be performed | ||
1153 | * upon no inodes, all inodes or the final one, depending upon sync_mode. | ||
1154 | * | ||
1155 | * If older_than_this is non-NULL, then only write out inodes which | ||
1156 | * had their first dirtying at a time earlier than *older_than_this. | ||
1157 | * | ||
1158 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. | ||
1159 | * This function assumes that the blockdev superblock's inodes are backed by | ||
1160 | * a variety of queues, so all inodes are searched. For other superblocks, | ||
1161 | * assume that all inodes are backed by the same queue. | ||
1162 | * | ||
1163 | * The inodes to be written are parked on bdi->b_io. They are moved back onto | ||
1164 | * bdi->b_dirty as they are selected for writing. This way, none can be missed | ||
1165 | * on the writer throttling path, and we get decent balancing between many | ||
1166 | * throttled threads: we don't want them all piling up on inode_sync_wait. | ||
1167 | */ | ||
1168 | static void wait_sb_inodes(struct super_block *sb) | 1152 | static void wait_sb_inodes(struct super_block *sb) |
1169 | { | 1153 | { |
1170 | struct inode *inode, *old_inode = NULL; | 1154 | struct inode *inode, *old_inode = NULL; |
@@ -1364,8 +1348,6 @@ int write_inode_now(struct inode *inode, int sync) | |||
1364 | ret = writeback_single_inode(inode, wb, &wbc); | 1348 | ret = writeback_single_inode(inode, wb, &wbc); |
1365 | spin_unlock(&inode->i_lock); | 1349 | spin_unlock(&inode->i_lock); |
1366 | spin_unlock(&wb->list_lock); | 1350 | spin_unlock(&wb->list_lock); |
1367 | if (sync) | ||
1368 | inode_sync_wait(inode); | ||
1369 | return ret; | 1351 | return ret; |
1370 | } | 1352 | } |
1371 | EXPORT_SYMBOL(write_inode_now); | 1353 | EXPORT_SYMBOL(write_inode_now); |
diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 6324c4274959..e159e682ad4c 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include <linux/module.h> | 1 | #include <linux/export.h> |
2 | #include <linux/sched.h> | 2 | #include <linux/sched.h> |
3 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
4 | #include <linux/path.h> | 4 | #include <linux/path.h> |
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 3cbfa93cd782..1fe731337f07 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h | |||
@@ -67,7 +67,8 @@ extern int access_file(char *path, int r, int w, int x); | |||
67 | extern int open_file(char *path, int r, int w, int append); | 67 | extern int open_file(char *path, int r, int w, int append); |
68 | extern void *open_dir(char *path, int *err_out); | 68 | extern void *open_dir(char *path, int *err_out); |
69 | extern char *read_dir(void *stream, unsigned long long *pos, | 69 | extern char *read_dir(void *stream, unsigned long long *pos, |
70 | unsigned long long *ino_out, int *len_out); | 70 | unsigned long long *ino_out, int *len_out, |
71 | unsigned int *type_out); | ||
71 | extern void close_file(void *stream); | 72 | extern void close_file(void *stream); |
72 | extern int replace_file(int oldfd, int fd); | 73 | extern int replace_file(int oldfd, int fd); |
73 | extern void close_dir(void *stream); | 74 | extern void close_dir(void *stream); |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 588d45885a6f..07c516bfea76 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -283,6 +283,7 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) | |||
283 | char *name; | 283 | char *name; |
284 | unsigned long long next, ino; | 284 | unsigned long long next, ino; |
285 | int error, len; | 285 | int error, len; |
286 | unsigned int type; | ||
286 | 287 | ||
287 | name = dentry_name(file->f_path.dentry); | 288 | name = dentry_name(file->f_path.dentry); |
288 | if (name == NULL) | 289 | if (name == NULL) |
@@ -292,9 +293,9 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) | |||
292 | if (dir == NULL) | 293 | if (dir == NULL) |
293 | return -error; | 294 | return -error; |
294 | next = file->f_pos; | 295 | next = file->f_pos; |
295 | while ((name = read_dir(dir, &next, &ino, &len)) != NULL) { | 296 | while ((name = read_dir(dir, &next, &ino, &len, &type)) != NULL) { |
296 | error = (*filldir)(ent, name, len, file->f_pos, | 297 | error = (*filldir)(ent, name, len, file->f_pos, |
297 | ino, DT_UNKNOWN); | 298 | ino, type); |
298 | if (error) break; | 299 | if (error) break; |
299 | file->f_pos = next; | 300 | file->f_pos = next; |
300 | } | 301 | } |
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index dd7bc38a3825..a74ad0d371c2 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c | |||
@@ -98,7 +98,8 @@ void *open_dir(char *path, int *err_out) | |||
98 | } | 98 | } |
99 | 99 | ||
100 | char *read_dir(void *stream, unsigned long long *pos, | 100 | char *read_dir(void *stream, unsigned long long *pos, |
101 | unsigned long long *ino_out, int *len_out) | 101 | unsigned long long *ino_out, int *len_out, |
102 | unsigned int *type_out) | ||
102 | { | 103 | { |
103 | DIR *dir = stream; | 104 | DIR *dir = stream; |
104 | struct dirent *ent; | 105 | struct dirent *ent; |
@@ -109,6 +110,7 @@ char *read_dir(void *stream, unsigned long long *pos, | |||
109 | return NULL; | 110 | return NULL; |
110 | *len_out = strlen(ent->d_name); | 111 | *len_out = strlen(ent->d_name); |
111 | *ino_out = ent->d_ino; | 112 | *ino_out = ent->d_ino; |
113 | *type_out = ent->d_type; | ||
112 | *pos = telldir(dir); | 114 | *pos = telldir(dir); |
113 | return ent->d_name; | 115 | return ent->d_name; |
114 | } | 116 | } |
diff --git a/fs/ioctl.c b/fs/ioctl.c index 066836e81848..29167bebe874 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include <linux/file.h> | 10 | #include <linux/file.h> |
11 | #include <linux/fs.h> | 11 | #include <linux/fs.h> |
12 | #include <linux/security.h> | 12 | #include <linux/security.h> |
13 | #include <linux/module.h> | 13 | #include <linux/export.h> |
14 | #include <linux/uaccess.h> | 14 | #include <linux/uaccess.h> |
15 | #include <linux/writeback.h> | 15 | #include <linux/writeback.h> |
16 | #include <linux/buffer_head.h> | 16 | #include <linux/buffer_head.h> |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index d49d202903fb..c78841ee81cf 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -88,14 +88,13 @@ static inline void __buffer_relink_io(struct journal_head *jh) | |||
88 | * whole transaction. | 88 | * whole transaction. |
89 | * | 89 | * |
90 | * Requires j_list_lock | 90 | * Requires j_list_lock |
91 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | ||
92 | */ | 91 | */ |
93 | static int __try_to_free_cp_buf(struct journal_head *jh) | 92 | static int __try_to_free_cp_buf(struct journal_head *jh) |
94 | { | 93 | { |
95 | int ret = 0; | 94 | int ret = 0; |
96 | struct buffer_head *bh = jh2bh(jh); | 95 | struct buffer_head *bh = jh2bh(jh); |
97 | 96 | ||
98 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && | 97 | if (jh->b_transaction == NULL && !buffer_locked(bh) && |
99 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | 98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { |
100 | /* | 99 | /* |
101 | * Get our reference so that bh cannot be freed before | 100 | * Get our reference so that bh cannot be freed before |
@@ -104,11 +103,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
104 | get_bh(bh); | 103 | get_bh(bh); |
105 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 104 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
106 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 105 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
107 | jbd_unlock_bh_state(bh); | ||
108 | BUFFER_TRACE(bh, "release"); | 106 | BUFFER_TRACE(bh, "release"); |
109 | __brelse(bh); | 107 | __brelse(bh); |
110 | } else { | ||
111 | jbd_unlock_bh_state(bh); | ||
112 | } | 108 | } |
113 | return ret; | 109 | return ret; |
114 | } | 110 | } |
@@ -180,21 +176,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
180 | } | 176 | } |
181 | 177 | ||
182 | /* | 178 | /* |
183 | * We were unable to perform jbd_trylock_bh_state() inside j_list_lock. | ||
184 | * The caller must restart a list walk. Wait for someone else to run | ||
185 | * jbd_unlock_bh_state(). | ||
186 | */ | ||
187 | static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) | ||
188 | __releases(journal->j_list_lock) | ||
189 | { | ||
190 | get_bh(bh); | ||
191 | spin_unlock(&journal->j_list_lock); | ||
192 | jbd_lock_bh_state(bh); | ||
193 | jbd_unlock_bh_state(bh); | ||
194 | put_bh(bh); | ||
195 | } | ||
196 | |||
197 | /* | ||
198 | * Clean up transaction's list of buffers submitted for io. | 179 | * Clean up transaction's list of buffers submitted for io. |
199 | * We wait for any pending IO to complete and remove any clean | 180 | * We wait for any pending IO to complete and remove any clean |
200 | * buffers. Note that we take the buffers in the opposite ordering | 181 | * buffers. Note that we take the buffers in the opposite ordering |
@@ -222,15 +203,9 @@ restart: | |||
222 | while (!released && transaction->t_checkpoint_io_list) { | 203 | while (!released && transaction->t_checkpoint_io_list) { |
223 | jh = transaction->t_checkpoint_io_list; | 204 | jh = transaction->t_checkpoint_io_list; |
224 | bh = jh2bh(jh); | 205 | bh = jh2bh(jh); |
225 | if (!jbd_trylock_bh_state(bh)) { | ||
226 | jbd_sync_bh(journal, bh); | ||
227 | spin_lock(&journal->j_list_lock); | ||
228 | goto restart; | ||
229 | } | ||
230 | get_bh(bh); | 206 | get_bh(bh); |
231 | if (buffer_locked(bh)) { | 207 | if (buffer_locked(bh)) { |
232 | spin_unlock(&journal->j_list_lock); | 208 | spin_unlock(&journal->j_list_lock); |
233 | jbd_unlock_bh_state(bh); | ||
234 | wait_on_buffer(bh); | 209 | wait_on_buffer(bh); |
235 | /* the journal_head may have gone by now */ | 210 | /* the journal_head may have gone by now */ |
236 | BUFFER_TRACE(bh, "brelse"); | 211 | BUFFER_TRACE(bh, "brelse"); |
@@ -246,7 +221,6 @@ restart: | |||
246 | * it has been written out and so we can drop it from the list | 221 | * it has been written out and so we can drop it from the list |
247 | */ | 222 | */ |
248 | released = __jbd2_journal_remove_checkpoint(jh); | 223 | released = __jbd2_journal_remove_checkpoint(jh); |
249 | jbd_unlock_bh_state(bh); | ||
250 | __brelse(bh); | 224 | __brelse(bh); |
251 | } | 225 | } |
252 | 226 | ||
@@ -266,7 +240,6 @@ __flush_batch(journal_t *journal, int *batch_count) | |||
266 | 240 | ||
267 | for (i = 0; i < *batch_count; i++) { | 241 | for (i = 0; i < *batch_count; i++) { |
268 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; | 242 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; |
269 | clear_buffer_jwrite(bh); | ||
270 | BUFFER_TRACE(bh, "brelse"); | 243 | BUFFER_TRACE(bh, "brelse"); |
271 | __brelse(bh); | 244 | __brelse(bh); |
272 | } | 245 | } |
@@ -281,7 +254,6 @@ __flush_batch(journal_t *journal, int *batch_count) | |||
281 | * be written out. | 254 | * be written out. |
282 | * | 255 | * |
283 | * Called with j_list_lock held and drops it if 1 is returned | 256 | * Called with j_list_lock held and drops it if 1 is returned |
284 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | ||
285 | */ | 257 | */ |
286 | static int __process_buffer(journal_t *journal, struct journal_head *jh, | 258 | static int __process_buffer(journal_t *journal, struct journal_head *jh, |
287 | int *batch_count, transaction_t *transaction) | 259 | int *batch_count, transaction_t *transaction) |
@@ -292,7 +264,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
292 | if (buffer_locked(bh)) { | 264 | if (buffer_locked(bh)) { |
293 | get_bh(bh); | 265 | get_bh(bh); |
294 | spin_unlock(&journal->j_list_lock); | 266 | spin_unlock(&journal->j_list_lock); |
295 | jbd_unlock_bh_state(bh); | ||
296 | wait_on_buffer(bh); | 267 | wait_on_buffer(bh); |
297 | /* the journal_head may have gone by now */ | 268 | /* the journal_head may have gone by now */ |
298 | BUFFER_TRACE(bh, "brelse"); | 269 | BUFFER_TRACE(bh, "brelse"); |
@@ -304,7 +275,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
304 | 275 | ||
305 | transaction->t_chp_stats.cs_forced_to_close++; | 276 | transaction->t_chp_stats.cs_forced_to_close++; |
306 | spin_unlock(&journal->j_list_lock); | 277 | spin_unlock(&journal->j_list_lock); |
307 | jbd_unlock_bh_state(bh); | ||
308 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) | 278 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) |
309 | /* | 279 | /* |
310 | * The journal thread is dead; so starting and | 280 | * The journal thread is dead; so starting and |
@@ -323,11 +293,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
323 | if (unlikely(buffer_write_io_error(bh))) | 293 | if (unlikely(buffer_write_io_error(bh))) |
324 | ret = -EIO; | 294 | ret = -EIO; |
325 | get_bh(bh); | 295 | get_bh(bh); |
326 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | ||
327 | BUFFER_TRACE(bh, "remove from checkpoint"); | 296 | BUFFER_TRACE(bh, "remove from checkpoint"); |
328 | __jbd2_journal_remove_checkpoint(jh); | 297 | __jbd2_journal_remove_checkpoint(jh); |
329 | spin_unlock(&journal->j_list_lock); | 298 | spin_unlock(&journal->j_list_lock); |
330 | jbd_unlock_bh_state(bh); | ||
331 | __brelse(bh); | 299 | __brelse(bh); |
332 | } else { | 300 | } else { |
333 | /* | 301 | /* |
@@ -340,10 +308,8 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
340 | BUFFER_TRACE(bh, "queue"); | 308 | BUFFER_TRACE(bh, "queue"); |
341 | get_bh(bh); | 309 | get_bh(bh); |
342 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); | 310 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); |
343 | set_buffer_jwrite(bh); | ||
344 | journal->j_chkpt_bhs[*batch_count] = bh; | 311 | journal->j_chkpt_bhs[*batch_count] = bh; |
345 | __buffer_relink_io(jh); | 312 | __buffer_relink_io(jh); |
346 | jbd_unlock_bh_state(bh); | ||
347 | transaction->t_chp_stats.cs_written++; | 313 | transaction->t_chp_stats.cs_written++; |
348 | (*batch_count)++; | 314 | (*batch_count)++; |
349 | if (*batch_count == JBD2_NR_BATCH) { | 315 | if (*batch_count == JBD2_NR_BATCH) { |
@@ -407,15 +373,7 @@ restart: | |||
407 | int retry = 0, err; | 373 | int retry = 0, err; |
408 | 374 | ||
409 | while (!retry && transaction->t_checkpoint_list) { | 375 | while (!retry && transaction->t_checkpoint_list) { |
410 | struct buffer_head *bh; | ||
411 | |||
412 | jh = transaction->t_checkpoint_list; | 376 | jh = transaction->t_checkpoint_list; |
413 | bh = jh2bh(jh); | ||
414 | if (!jbd_trylock_bh_state(bh)) { | ||
415 | jbd_sync_bh(journal, bh); | ||
416 | retry = 1; | ||
417 | break; | ||
418 | } | ||
419 | retry = __process_buffer(journal, jh, &batch_count, | 377 | retry = __process_buffer(journal, jh, &batch_count, |
420 | transaction); | 378 | transaction); |
421 | if (retry < 0 && !result) | 379 | if (retry < 0 && !result) |
@@ -478,79 +436,28 @@ out: | |||
478 | 436 | ||
479 | int jbd2_cleanup_journal_tail(journal_t *journal) | 437 | int jbd2_cleanup_journal_tail(journal_t *journal) |
480 | { | 438 | { |
481 | transaction_t * transaction; | ||
482 | tid_t first_tid; | 439 | tid_t first_tid; |
483 | unsigned long blocknr, freed; | 440 | unsigned long blocknr; |
484 | 441 | ||
485 | if (is_journal_aborted(journal)) | 442 | if (is_journal_aborted(journal)) |
486 | return 1; | 443 | return 1; |
487 | 444 | ||
488 | /* OK, work out the oldest transaction remaining in the log, and | 445 | if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) |
489 | * the log block it starts at. | ||
490 | * | ||
491 | * If the log is now empty, we need to work out which is the | ||
492 | * next transaction ID we will write, and where it will | ||
493 | * start. */ | ||
494 | |||
495 | write_lock(&journal->j_state_lock); | ||
496 | spin_lock(&journal->j_list_lock); | ||
497 | transaction = journal->j_checkpoint_transactions; | ||
498 | if (transaction) { | ||
499 | first_tid = transaction->t_tid; | ||
500 | blocknr = transaction->t_log_start; | ||
501 | } else if ((transaction = journal->j_committing_transaction) != NULL) { | ||
502 | first_tid = transaction->t_tid; | ||
503 | blocknr = transaction->t_log_start; | ||
504 | } else if ((transaction = journal->j_running_transaction) != NULL) { | ||
505 | first_tid = transaction->t_tid; | ||
506 | blocknr = journal->j_head; | ||
507 | } else { | ||
508 | first_tid = journal->j_transaction_sequence; | ||
509 | blocknr = journal->j_head; | ||
510 | } | ||
511 | spin_unlock(&journal->j_list_lock); | ||
512 | J_ASSERT(blocknr != 0); | ||
513 | |||
514 | /* If the oldest pinned transaction is at the tail of the log | ||
515 | already then there's not much we can do right now. */ | ||
516 | if (journal->j_tail_sequence == first_tid) { | ||
517 | write_unlock(&journal->j_state_lock); | ||
518 | return 1; | 446 | return 1; |
519 | } | 447 | J_ASSERT(blocknr != 0); |
520 | |||
521 | /* OK, update the superblock to recover the freed space. | ||
522 | * Physical blocks come first: have we wrapped beyond the end of | ||
523 | * the log? */ | ||
524 | freed = blocknr - journal->j_tail; | ||
525 | if (blocknr < journal->j_tail) | ||
526 | freed = freed + journal->j_last - journal->j_first; | ||
527 | |||
528 | trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed); | ||
529 | jbd_debug(1, | ||
530 | "Cleaning journal tail from %d to %d (offset %lu), " | ||
531 | "freeing %lu\n", | ||
532 | journal->j_tail_sequence, first_tid, blocknr, freed); | ||
533 | |||
534 | journal->j_free += freed; | ||
535 | journal->j_tail_sequence = first_tid; | ||
536 | journal->j_tail = blocknr; | ||
537 | write_unlock(&journal->j_state_lock); | ||
538 | 448 | ||
539 | /* | 449 | /* |
540 | * If there is an external journal, we need to make sure that | 450 | * We need to make sure that any blocks that were recently written out |
541 | * any data blocks that were recently written out --- perhaps | 451 | * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before |
542 | * by jbd2_log_do_checkpoint() --- are flushed out before we | 452 | * we drop the transactions from the journal. It's unlikely this will |
543 | * drop the transactions from the external journal. It's | 453 | * be necessary, especially with an appropriately sized journal, but we |
544 | * unlikely this will be necessary, especially with a | 454 | * need this to guarantee correctness. Fortunately |
545 | * appropriately sized journal, but we need this to guarantee | 455 | * jbd2_cleanup_journal_tail() doesn't get called all that often. |
546 | * correctness. Fortunately jbd2_cleanup_journal_tail() | ||
547 | * doesn't get called all that often. | ||
548 | */ | 456 | */ |
549 | if ((journal->j_fs_dev != journal->j_dev) && | 457 | if (journal->j_flags & JBD2_BARRIER) |
550 | (journal->j_flags & JBD2_BARRIER)) | ||
551 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | 458 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); |
552 | if (!(journal->j_flags & JBD2_ABORT)) | 459 | |
553 | jbd2_journal_update_superblock(journal, 1); | 460 | __jbd2_update_log_tail(journal, first_tid, blocknr); |
554 | return 0; | 461 | return 0; |
555 | } | 462 | } |
556 | 463 | ||
@@ -582,15 +489,12 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | |||
582 | do { | 489 | do { |
583 | jh = next_jh; | 490 | jh = next_jh; |
584 | next_jh = jh->b_cpnext; | 491 | next_jh = jh->b_cpnext; |
585 | /* Use trylock because of the ranking */ | 492 | ret = __try_to_free_cp_buf(jh); |
586 | if (jbd_trylock_bh_state(jh2bh(jh))) { | 493 | if (ret) { |
587 | ret = __try_to_free_cp_buf(jh); | 494 | freed++; |
588 | if (ret) { | 495 | if (ret == 2) { |
589 | freed++; | 496 | *released = 1; |
590 | if (ret == 2) { | 497 | return freed; |
591 | *released = 1; | ||
592 | return freed; | ||
593 | } | ||
594 | } | 498 | } |
595 | } | 499 | } |
596 | /* | 500 | /* |
@@ -673,9 +577,7 @@ out: | |||
673 | * The function can free jh and bh. | 577 | * The function can free jh and bh. |
674 | * | 578 | * |
675 | * This function is called with j_list_lock held. | 579 | * This function is called with j_list_lock held. |
676 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) | ||
677 | */ | 580 | */ |
678 | |||
679 | int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | 581 | int __jbd2_journal_remove_checkpoint(struct journal_head *jh) |
680 | { | 582 | { |
681 | struct transaction_chp_stats_s *stats; | 583 | struct transaction_chp_stats_s *stats; |
@@ -722,7 +624,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
722 | transaction->t_tid, stats); | 624 | transaction->t_tid, stats); |
723 | 625 | ||
724 | __jbd2_journal_drop_transaction(journal, transaction); | 626 | __jbd2_journal_drop_transaction(journal, transaction); |
725 | kfree(transaction); | 627 | jbd2_journal_free_transaction(transaction); |
726 | 628 | ||
727 | /* Just in case anybody was waiting for more transactions to be | 629 | /* Just in case anybody was waiting for more transactions to be |
728 | checkpointed... */ | 630 | checkpointed... */ |
@@ -797,5 +699,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
797 | J_ASSERT(journal->j_committing_transaction != transaction); | 699 | J_ASSERT(journal->j_committing_transaction != transaction); |
798 | J_ASSERT(journal->j_running_transaction != transaction); | 700 | J_ASSERT(journal->j_running_transaction != transaction); |
799 | 701 | ||
702 | trace_jbd2_drop_transaction(journal, transaction); | ||
703 | |||
800 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); | 704 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); |
801 | } | 705 | } |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 29853deee5ed..806525a7269c 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -330,6 +330,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
330 | struct buffer_head *cbh = NULL; /* For transactional checksums */ | 330 | struct buffer_head *cbh = NULL; /* For transactional checksums */ |
331 | __u32 crc32_sum = ~0; | 331 | __u32 crc32_sum = ~0; |
332 | struct blk_plug plug; | 332 | struct blk_plug plug; |
333 | /* Tail of the journal */ | ||
334 | unsigned long first_block; | ||
335 | tid_t first_tid; | ||
336 | int update_tail; | ||
333 | 337 | ||
334 | /* | 338 | /* |
335 | * First job: lock down the current transaction and wait for | 339 | * First job: lock down the current transaction and wait for |
@@ -339,7 +343,18 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
339 | /* Do we need to erase the effects of a prior jbd2_journal_flush? */ | 343 | /* Do we need to erase the effects of a prior jbd2_journal_flush? */ |
340 | if (journal->j_flags & JBD2_FLUSHED) { | 344 | if (journal->j_flags & JBD2_FLUSHED) { |
341 | jbd_debug(3, "super block updated\n"); | 345 | jbd_debug(3, "super block updated\n"); |
342 | jbd2_journal_update_superblock(journal, 1); | 346 | mutex_lock(&journal->j_checkpoint_mutex); |
347 | /* | ||
348 | * We hold j_checkpoint_mutex so tail cannot change under us. | ||
349 | * We don't need any special data guarantees for writing sb | ||
350 | * since journal is empty and it is ok for write to be | ||
351 | * flushed only with transaction commit. | ||
352 | */ | ||
353 | jbd2_journal_update_sb_log_tail(journal, | ||
354 | journal->j_tail_sequence, | ||
355 | journal->j_tail, | ||
356 | WRITE_SYNC); | ||
357 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
343 | } else { | 358 | } else { |
344 | jbd_debug(3, "superblock not updated\n"); | 359 | jbd_debug(3, "superblock not updated\n"); |
345 | } | 360 | } |
@@ -676,10 +691,30 @@ start_journal_io: | |||
676 | err = 0; | 691 | err = 0; |
677 | } | 692 | } |
678 | 693 | ||
694 | /* | ||
695 | * Get current oldest transaction in the log before we issue flush | ||
696 | * to the filesystem device. After the flush we can be sure that | ||
697 | * blocks of all older transactions are checkpointed to persistent | ||
698 | * storage and we will be safe to update journal start in the | ||
699 | * superblock with the numbers we get here. | ||
700 | */ | ||
701 | update_tail = | ||
702 | jbd2_journal_get_log_tail(journal, &first_tid, &first_block); | ||
703 | |||
679 | write_lock(&journal->j_state_lock); | 704 | write_lock(&journal->j_state_lock); |
705 | if (update_tail) { | ||
706 | long freed = first_block - journal->j_tail; | ||
707 | |||
708 | if (first_block < journal->j_tail) | ||
709 | freed += journal->j_last - journal->j_first; | ||
710 | /* Update tail only if we free significant amount of space */ | ||
711 | if (freed < journal->j_maxlen / 4) | ||
712 | update_tail = 0; | ||
713 | } | ||
680 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | 714 | J_ASSERT(commit_transaction->t_state == T_COMMIT); |
681 | commit_transaction->t_state = T_COMMIT_DFLUSH; | 715 | commit_transaction->t_state = T_COMMIT_DFLUSH; |
682 | write_unlock(&journal->j_state_lock); | 716 | write_unlock(&journal->j_state_lock); |
717 | |||
683 | /* | 718 | /* |
684 | * If the journal is not located on the file system device, | 719 | * If the journal is not located on the file system device, |
685 | * then we must flush the file system device before we issue | 720 | * then we must flush the file system device before we issue |
@@ -830,6 +865,14 @@ wait_for_iobuf: | |||
830 | if (err) | 865 | if (err) |
831 | jbd2_journal_abort(journal, err); | 866 | jbd2_journal_abort(journal, err); |
832 | 867 | ||
868 | /* | ||
869 | * Now disk caches for filesystem device are flushed so we are safe to | ||
870 | * erase checkpointed transactions from the log by updating journal | ||
871 | * superblock. | ||
872 | */ | ||
873 | if (update_tail) | ||
874 | jbd2_update_log_tail(journal, first_tid, first_block); | ||
875 | |||
833 | /* End of a transaction! Finally, we can do checkpoint | 876 | /* End of a transaction! Finally, we can do checkpoint |
834 | processing: any buffers committed as a result of this | 877 | processing: any buffers committed as a result of this |
835 | transaction can be removed from any checkpoint list it was on | 878 | transaction can be removed from any checkpoint list it was on |
@@ -1047,7 +1090,7 @@ restart_loop: | |||
1047 | jbd_debug(1, "JBD2: commit %d complete, head %d\n", | 1090 | jbd_debug(1, "JBD2: commit %d complete, head %d\n", |
1048 | journal->j_commit_sequence, journal->j_tail_sequence); | 1091 | journal->j_commit_sequence, journal->j_tail_sequence); |
1049 | if (to_free) | 1092 | if (to_free) |
1050 | kfree(commit_transaction); | 1093 | jbd2_journal_free_transaction(commit_transaction); |
1051 | 1094 | ||
1052 | wake_up(&journal->j_wait_done_commit); | 1095 | wake_up(&journal->j_wait_done_commit); |
1053 | } | 1096 | } |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index c6d22745553f..1afb701622b0 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -70,7 +70,6 @@ EXPORT_SYMBOL(jbd2_journal_revoke); | |||
70 | 70 | ||
71 | EXPORT_SYMBOL(jbd2_journal_init_dev); | 71 | EXPORT_SYMBOL(jbd2_journal_init_dev); |
72 | EXPORT_SYMBOL(jbd2_journal_init_inode); | 72 | EXPORT_SYMBOL(jbd2_journal_init_inode); |
73 | EXPORT_SYMBOL(jbd2_journal_update_format); | ||
74 | EXPORT_SYMBOL(jbd2_journal_check_used_features); | 73 | EXPORT_SYMBOL(jbd2_journal_check_used_features); |
75 | EXPORT_SYMBOL(jbd2_journal_check_available_features); | 74 | EXPORT_SYMBOL(jbd2_journal_check_available_features); |
76 | EXPORT_SYMBOL(jbd2_journal_set_features); | 75 | EXPORT_SYMBOL(jbd2_journal_set_features); |
@@ -95,7 +94,6 @@ EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); | |||
95 | EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); | 94 | EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); |
96 | EXPORT_SYMBOL(jbd2_inode_cache); | 95 | EXPORT_SYMBOL(jbd2_inode_cache); |
97 | 96 | ||
98 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); | ||
99 | static void __journal_abort_soft (journal_t *journal, int errno); | 97 | static void __journal_abort_soft (journal_t *journal, int errno); |
100 | static int jbd2_journal_create_slab(size_t slab_size); | 98 | static int jbd2_journal_create_slab(size_t slab_size); |
101 | 99 | ||
@@ -745,6 +743,98 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | |||
745 | return jbd2_journal_add_journal_head(bh); | 743 | return jbd2_journal_add_journal_head(bh); |
746 | } | 744 | } |
747 | 745 | ||
746 | /* | ||
747 | * Return tid of the oldest transaction in the journal and block in the journal | ||
748 | * where the transaction starts. | ||
749 | * | ||
750 | * If the journal is now empty, return which will be the next transaction ID | ||
751 | * we will write and where will that transaction start. | ||
752 | * | ||
753 | * The return value is 0 if journal tail cannot be pushed any further, 1 if | ||
754 | * it can. | ||
755 | */ | ||
756 | int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, | ||
757 | unsigned long *block) | ||
758 | { | ||
759 | transaction_t *transaction; | ||
760 | int ret; | ||
761 | |||
762 | read_lock(&journal->j_state_lock); | ||
763 | spin_lock(&journal->j_list_lock); | ||
764 | transaction = journal->j_checkpoint_transactions; | ||
765 | if (transaction) { | ||
766 | *tid = transaction->t_tid; | ||
767 | *block = transaction->t_log_start; | ||
768 | } else if ((transaction = journal->j_committing_transaction) != NULL) { | ||
769 | *tid = transaction->t_tid; | ||
770 | *block = transaction->t_log_start; | ||
771 | } else if ((transaction = journal->j_running_transaction) != NULL) { | ||
772 | *tid = transaction->t_tid; | ||
773 | *block = journal->j_head; | ||
774 | } else { | ||
775 | *tid = journal->j_transaction_sequence; | ||
776 | *block = journal->j_head; | ||
777 | } | ||
778 | ret = tid_gt(*tid, journal->j_tail_sequence); | ||
779 | spin_unlock(&journal->j_list_lock); | ||
780 | read_unlock(&journal->j_state_lock); | ||
781 | |||
782 | return ret; | ||
783 | } | ||
784 | |||
785 | /* | ||
786 | * Update information in journal structure and in on disk journal superblock | ||
787 | * about log tail. This function does not check whether information passed in | ||
788 | * really pushes log tail further. It's responsibility of the caller to make | ||
789 | * sure provided log tail information is valid (e.g. by holding | ||
790 | * j_checkpoint_mutex all the time between computing log tail and calling this | ||
791 | * function as is the case with jbd2_cleanup_journal_tail()). | ||
792 | * | ||
793 | * Requires j_checkpoint_mutex | ||
794 | */ | ||
795 | void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) | ||
796 | { | ||
797 | unsigned long freed; | ||
798 | |||
799 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | ||
800 | |||
801 | /* | ||
802 | * We cannot afford for write to remain in drive's caches since as | ||
803 | * soon as we update j_tail, next transaction can start reusing journal | ||
804 | * space and if we lose sb update during power failure we'd replay | ||
805 | * old transaction with possibly newly overwritten data. | ||
806 | */ | ||
807 | jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); | ||
808 | write_lock(&journal->j_state_lock); | ||
809 | freed = block - journal->j_tail; | ||
810 | if (block < journal->j_tail) | ||
811 | freed += journal->j_last - journal->j_first; | ||
812 | |||
813 | trace_jbd2_update_log_tail(journal, tid, block, freed); | ||
814 | jbd_debug(1, | ||
815 | "Cleaning journal tail from %d to %d (offset %lu), " | ||
816 | "freeing %lu\n", | ||
817 | journal->j_tail_sequence, tid, block, freed); | ||
818 | |||
819 | journal->j_free += freed; | ||
820 | journal->j_tail_sequence = tid; | ||
821 | journal->j_tail = block; | ||
822 | write_unlock(&journal->j_state_lock); | ||
823 | } | ||
824 | |||
825 | /* | ||
826 | * This is a variaon of __jbd2_update_log_tail which checks for validity of | ||
827 | * provided log tail and locks j_checkpoint_mutex. So it is safe against races | ||
828 | * with other threads updating log tail. | ||
829 | */ | ||
830 | void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) | ||
831 | { | ||
832 | mutex_lock(&journal->j_checkpoint_mutex); | ||
833 | if (tid_gt(tid, journal->j_tail_sequence)) | ||
834 | __jbd2_update_log_tail(journal, tid, block); | ||
835 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
836 | } | ||
837 | |||
748 | struct jbd2_stats_proc_session { | 838 | struct jbd2_stats_proc_session { |
749 | journal_t *journal; | 839 | journal_t *journal; |
750 | struct transaction_stats_s *stats; | 840 | struct transaction_stats_s *stats; |
@@ -1113,40 +1203,45 @@ static int journal_reset(journal_t *journal) | |||
1113 | 1203 | ||
1114 | journal->j_max_transaction_buffers = journal->j_maxlen / 4; | 1204 | journal->j_max_transaction_buffers = journal->j_maxlen / 4; |
1115 | 1205 | ||
1116 | /* Add the dynamic fields and write it to disk. */ | ||
1117 | jbd2_journal_update_superblock(journal, 1); | ||
1118 | return jbd2_journal_start_thread(journal); | ||
1119 | } | ||
1120 | |||
1121 | /** | ||
1122 | * void jbd2_journal_update_superblock() - Update journal sb on disk. | ||
1123 | * @journal: The journal to update. | ||
1124 | * @wait: Set to '0' if you don't want to wait for IO completion. | ||
1125 | * | ||
1126 | * Update a journal's dynamic superblock fields and write it to disk, | ||
1127 | * optionally waiting for the IO to complete. | ||
1128 | */ | ||
1129 | void jbd2_journal_update_superblock(journal_t *journal, int wait) | ||
1130 | { | ||
1131 | journal_superblock_t *sb = journal->j_superblock; | ||
1132 | struct buffer_head *bh = journal->j_sb_buffer; | ||
1133 | |||
1134 | /* | 1206 | /* |
1135 | * As a special case, if the on-disk copy is already marked as needing | 1207 | * As a special case, if the on-disk copy is already marked as needing |
1136 | * no recovery (s_start == 0) and there are no outstanding transactions | 1208 | * no recovery (s_start == 0), then we can safely defer the superblock |
1137 | * in the filesystem, then we can safely defer the superblock update | 1209 | * update until the next commit by setting JBD2_FLUSHED. This avoids |
1138 | * until the next commit by setting JBD2_FLUSHED. This avoids | ||
1139 | * attempting a write to a potential-readonly device. | 1210 | * attempting a write to a potential-readonly device. |
1140 | */ | 1211 | */ |
1141 | if (sb->s_start == 0 && journal->j_tail_sequence == | 1212 | if (sb->s_start == 0) { |
1142 | journal->j_transaction_sequence) { | ||
1143 | jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " | 1213 | jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " |
1144 | "(start %ld, seq %d, errno %d)\n", | 1214 | "(start %ld, seq %d, errno %d)\n", |
1145 | journal->j_tail, journal->j_tail_sequence, | 1215 | journal->j_tail, journal->j_tail_sequence, |
1146 | journal->j_errno); | 1216 | journal->j_errno); |
1147 | goto out; | 1217 | journal->j_flags |= JBD2_FLUSHED; |
1218 | } else { | ||
1219 | /* Lock here to make assertions happy... */ | ||
1220 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1221 | /* | ||
1222 | * Update log tail information. We use WRITE_FUA since new | ||
1223 | * transaction will start reusing journal space and so we | ||
1224 | * must make sure information about current log tail is on | ||
1225 | * disk before that. | ||
1226 | */ | ||
1227 | jbd2_journal_update_sb_log_tail(journal, | ||
1228 | journal->j_tail_sequence, | ||
1229 | journal->j_tail, | ||
1230 | WRITE_FUA); | ||
1231 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1148 | } | 1232 | } |
1233 | return jbd2_journal_start_thread(journal); | ||
1234 | } | ||
1149 | 1235 | ||
1236 | static void jbd2_write_superblock(journal_t *journal, int write_op) | ||
1237 | { | ||
1238 | struct buffer_head *bh = journal->j_sb_buffer; | ||
1239 | int ret; | ||
1240 | |||
1241 | trace_jbd2_write_superblock(journal, write_op); | ||
1242 | if (!(journal->j_flags & JBD2_BARRIER)) | ||
1243 | write_op &= ~(REQ_FUA | REQ_FLUSH); | ||
1244 | lock_buffer(bh); | ||
1150 | if (buffer_write_io_error(bh)) { | 1245 | if (buffer_write_io_error(bh)) { |
1151 | /* | 1246 | /* |
1152 | * Oh, dear. A previous attempt to write the journal | 1247 | * Oh, dear. A previous attempt to write the journal |
@@ -1162,48 +1257,106 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1162 | clear_buffer_write_io_error(bh); | 1257 | clear_buffer_write_io_error(bh); |
1163 | set_buffer_uptodate(bh); | 1258 | set_buffer_uptodate(bh); |
1164 | } | 1259 | } |
1260 | get_bh(bh); | ||
1261 | bh->b_end_io = end_buffer_write_sync; | ||
1262 | ret = submit_bh(write_op, bh); | ||
1263 | wait_on_buffer(bh); | ||
1264 | if (buffer_write_io_error(bh)) { | ||
1265 | clear_buffer_write_io_error(bh); | ||
1266 | set_buffer_uptodate(bh); | ||
1267 | ret = -EIO; | ||
1268 | } | ||
1269 | if (ret) { | ||
1270 | printk(KERN_ERR "JBD2: Error %d detected when updating " | ||
1271 | "journal superblock for %s.\n", ret, | ||
1272 | journal->j_devname); | ||
1273 | } | ||
1274 | } | ||
1275 | |||
1276 | /** | ||
1277 | * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk. | ||
1278 | * @journal: The journal to update. | ||
1279 | * @tail_tid: TID of the new transaction at the tail of the log | ||
1280 | * @tail_block: The first block of the transaction at the tail of the log | ||
1281 | * @write_op: With which operation should we write the journal sb | ||
1282 | * | ||
1283 | * Update a journal's superblock information about log tail and write it to | ||
1284 | * disk, waiting for the IO to complete. | ||
1285 | */ | ||
1286 | void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, | ||
1287 | unsigned long tail_block, int write_op) | ||
1288 | { | ||
1289 | journal_superblock_t *sb = journal->j_superblock; | ||
1290 | |||
1291 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | ||
1292 | jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", | ||
1293 | tail_block, tail_tid); | ||
1294 | |||
1295 | sb->s_sequence = cpu_to_be32(tail_tid); | ||
1296 | sb->s_start = cpu_to_be32(tail_block); | ||
1297 | |||
1298 | jbd2_write_superblock(journal, write_op); | ||
1299 | |||
1300 | /* Log is no longer empty */ | ||
1301 | write_lock(&journal->j_state_lock); | ||
1302 | WARN_ON(!sb->s_sequence); | ||
1303 | journal->j_flags &= ~JBD2_FLUSHED; | ||
1304 | write_unlock(&journal->j_state_lock); | ||
1305 | } | ||
1306 | |||
1307 | /** | ||
1308 | * jbd2_mark_journal_empty() - Mark on disk journal as empty. | ||
1309 | * @journal: The journal to update. | ||
1310 | * | ||
1311 | * Update a journal's dynamic superblock fields to show that journal is empty. | ||
1312 | * Write updated superblock to disk waiting for IO to complete. | ||
1313 | */ | ||
1314 | static void jbd2_mark_journal_empty(journal_t *journal) | ||
1315 | { | ||
1316 | journal_superblock_t *sb = journal->j_superblock; | ||
1165 | 1317 | ||
1318 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | ||
1166 | read_lock(&journal->j_state_lock); | 1319 | read_lock(&journal->j_state_lock); |
1167 | jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n", | 1320 | jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", |
1168 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 1321 | journal->j_tail_sequence); |
1169 | 1322 | ||
1170 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); | 1323 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); |
1171 | sb->s_start = cpu_to_be32(journal->j_tail); | 1324 | sb->s_start = cpu_to_be32(0); |
1172 | sb->s_errno = cpu_to_be32(journal->j_errno); | ||
1173 | read_unlock(&journal->j_state_lock); | 1325 | read_unlock(&journal->j_state_lock); |
1174 | 1326 | ||
1175 | BUFFER_TRACE(bh, "marking dirty"); | 1327 | jbd2_write_superblock(journal, WRITE_FUA); |
1176 | mark_buffer_dirty(bh); | ||
1177 | if (wait) { | ||
1178 | sync_dirty_buffer(bh); | ||
1179 | if (buffer_write_io_error(bh)) { | ||
1180 | printk(KERN_ERR "JBD2: I/O error detected " | ||
1181 | "when updating journal superblock for %s.\n", | ||
1182 | journal->j_devname); | ||
1183 | clear_buffer_write_io_error(bh); | ||
1184 | set_buffer_uptodate(bh); | ||
1185 | } | ||
1186 | } else | ||
1187 | write_dirty_buffer(bh, WRITE); | ||
1188 | |||
1189 | out: | ||
1190 | /* If we have just flushed the log (by marking s_start==0), then | ||
1191 | * any future commit will have to be careful to update the | ||
1192 | * superblock again to re-record the true start of the log. */ | ||
1193 | 1328 | ||
1329 | /* Log is no longer empty */ | ||
1194 | write_lock(&journal->j_state_lock); | 1330 | write_lock(&journal->j_state_lock); |
1195 | if (sb->s_start) | 1331 | journal->j_flags |= JBD2_FLUSHED; |
1196 | journal->j_flags &= ~JBD2_FLUSHED; | ||
1197 | else | ||
1198 | journal->j_flags |= JBD2_FLUSHED; | ||
1199 | write_unlock(&journal->j_state_lock); | 1332 | write_unlock(&journal->j_state_lock); |
1200 | } | 1333 | } |
1201 | 1334 | ||
1335 | |||
1336 | /** | ||
1337 | * jbd2_journal_update_sb_errno() - Update error in the journal. | ||
1338 | * @journal: The journal to update. | ||
1339 | * | ||
1340 | * Update a journal's errno. Write updated superblock to disk waiting for IO | ||
1341 | * to complete. | ||
1342 | */ | ||
1343 | static void jbd2_journal_update_sb_errno(journal_t *journal) | ||
1344 | { | ||
1345 | journal_superblock_t *sb = journal->j_superblock; | ||
1346 | |||
1347 | read_lock(&journal->j_state_lock); | ||
1348 | jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", | ||
1349 | journal->j_errno); | ||
1350 | sb->s_errno = cpu_to_be32(journal->j_errno); | ||
1351 | read_unlock(&journal->j_state_lock); | ||
1352 | |||
1353 | jbd2_write_superblock(journal, WRITE_SYNC); | ||
1354 | } | ||
1355 | |||
1202 | /* | 1356 | /* |
1203 | * Read the superblock for a given journal, performing initial | 1357 | * Read the superblock for a given journal, performing initial |
1204 | * validation of the format. | 1358 | * validation of the format. |
1205 | */ | 1359 | */ |
1206 | |||
1207 | static int journal_get_superblock(journal_t *journal) | 1360 | static int journal_get_superblock(journal_t *journal) |
1208 | { | 1361 | { |
1209 | struct buffer_head *bh; | 1362 | struct buffer_head *bh; |
@@ -1397,14 +1550,11 @@ int jbd2_journal_destroy(journal_t *journal) | |||
1397 | 1550 | ||
1398 | if (journal->j_sb_buffer) { | 1551 | if (journal->j_sb_buffer) { |
1399 | if (!is_journal_aborted(journal)) { | 1552 | if (!is_journal_aborted(journal)) { |
1400 | /* We can now mark the journal as empty. */ | 1553 | mutex_lock(&journal->j_checkpoint_mutex); |
1401 | journal->j_tail = 0; | 1554 | jbd2_mark_journal_empty(journal); |
1402 | journal->j_tail_sequence = | 1555 | mutex_unlock(&journal->j_checkpoint_mutex); |
1403 | ++journal->j_transaction_sequence; | 1556 | } else |
1404 | jbd2_journal_update_superblock(journal, 1); | ||
1405 | } else { | ||
1406 | err = -EIO; | 1557 | err = -EIO; |
1407 | } | ||
1408 | brelse(journal->j_sb_buffer); | 1558 | brelse(journal->j_sb_buffer); |
1409 | } | 1559 | } |
1410 | 1560 | ||
@@ -1551,61 +1701,6 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, | |||
1551 | EXPORT_SYMBOL(jbd2_journal_clear_features); | 1701 | EXPORT_SYMBOL(jbd2_journal_clear_features); |
1552 | 1702 | ||
1553 | /** | 1703 | /** |
1554 | * int jbd2_journal_update_format () - Update on-disk journal structure. | ||
1555 | * @journal: Journal to act on. | ||
1556 | * | ||
1557 | * Given an initialised but unloaded journal struct, poke about in the | ||
1558 | * on-disk structure to update it to the most recent supported version. | ||
1559 | */ | ||
1560 | int jbd2_journal_update_format (journal_t *journal) | ||
1561 | { | ||
1562 | journal_superblock_t *sb; | ||
1563 | int err; | ||
1564 | |||
1565 | err = journal_get_superblock(journal); | ||
1566 | if (err) | ||
1567 | return err; | ||
1568 | |||
1569 | sb = journal->j_superblock; | ||
1570 | |||
1571 | switch (be32_to_cpu(sb->s_header.h_blocktype)) { | ||
1572 | case JBD2_SUPERBLOCK_V2: | ||
1573 | return 0; | ||
1574 | case JBD2_SUPERBLOCK_V1: | ||
1575 | return journal_convert_superblock_v1(journal, sb); | ||
1576 | default: | ||
1577 | break; | ||
1578 | } | ||
1579 | return -EINVAL; | ||
1580 | } | ||
1581 | |||
1582 | static int journal_convert_superblock_v1(journal_t *journal, | ||
1583 | journal_superblock_t *sb) | ||
1584 | { | ||
1585 | int offset, blocksize; | ||
1586 | struct buffer_head *bh; | ||
1587 | |||
1588 | printk(KERN_WARNING | ||
1589 | "JBD2: Converting superblock from version 1 to 2.\n"); | ||
1590 | |||
1591 | /* Pre-initialise new fields to zero */ | ||
1592 | offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); | ||
1593 | blocksize = be32_to_cpu(sb->s_blocksize); | ||
1594 | memset(&sb->s_feature_compat, 0, blocksize-offset); | ||
1595 | |||
1596 | sb->s_nr_users = cpu_to_be32(1); | ||
1597 | sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); | ||
1598 | journal->j_format_version = 2; | ||
1599 | |||
1600 | bh = journal->j_sb_buffer; | ||
1601 | BUFFER_TRACE(bh, "marking dirty"); | ||
1602 | mark_buffer_dirty(bh); | ||
1603 | sync_dirty_buffer(bh); | ||
1604 | return 0; | ||
1605 | } | ||
1606 | |||
1607 | |||
1608 | /** | ||
1609 | * int jbd2_journal_flush () - Flush journal | 1704 | * int jbd2_journal_flush () - Flush journal |
1610 | * @journal: Journal to act on. | 1705 | * @journal: Journal to act on. |
1611 | * | 1706 | * |
@@ -1618,7 +1713,6 @@ int jbd2_journal_flush(journal_t *journal) | |||
1618 | { | 1713 | { |
1619 | int err = 0; | 1714 | int err = 0; |
1620 | transaction_t *transaction = NULL; | 1715 | transaction_t *transaction = NULL; |
1621 | unsigned long old_tail; | ||
1622 | 1716 | ||
1623 | write_lock(&journal->j_state_lock); | 1717 | write_lock(&journal->j_state_lock); |
1624 | 1718 | ||
@@ -1653,6 +1747,7 @@ int jbd2_journal_flush(journal_t *journal) | |||
1653 | if (is_journal_aborted(journal)) | 1747 | if (is_journal_aborted(journal)) |
1654 | return -EIO; | 1748 | return -EIO; |
1655 | 1749 | ||
1750 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1656 | jbd2_cleanup_journal_tail(journal); | 1751 | jbd2_cleanup_journal_tail(journal); |
1657 | 1752 | ||
1658 | /* Finally, mark the journal as really needing no recovery. | 1753 | /* Finally, mark the journal as really needing no recovery. |
@@ -1660,14 +1755,9 @@ int jbd2_journal_flush(journal_t *journal) | |||
1660 | * the magic code for a fully-recovered superblock. Any future | 1755 | * the magic code for a fully-recovered superblock. Any future |
1661 | * commits of data to the journal will restore the current | 1756 | * commits of data to the journal will restore the current |
1662 | * s_start value. */ | 1757 | * s_start value. */ |
1758 | jbd2_mark_journal_empty(journal); | ||
1759 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1663 | write_lock(&journal->j_state_lock); | 1760 | write_lock(&journal->j_state_lock); |
1664 | old_tail = journal->j_tail; | ||
1665 | journal->j_tail = 0; | ||
1666 | write_unlock(&journal->j_state_lock); | ||
1667 | jbd2_journal_update_superblock(journal, 1); | ||
1668 | write_lock(&journal->j_state_lock); | ||
1669 | journal->j_tail = old_tail; | ||
1670 | |||
1671 | J_ASSERT(!journal->j_running_transaction); | 1761 | J_ASSERT(!journal->j_running_transaction); |
1672 | J_ASSERT(!journal->j_committing_transaction); | 1762 | J_ASSERT(!journal->j_committing_transaction); |
1673 | J_ASSERT(!journal->j_checkpoint_transactions); | 1763 | J_ASSERT(!journal->j_checkpoint_transactions); |
@@ -1707,8 +1797,12 @@ int jbd2_journal_wipe(journal_t *journal, int write) | |||
1707 | write ? "Clearing" : "Ignoring"); | 1797 | write ? "Clearing" : "Ignoring"); |
1708 | 1798 | ||
1709 | err = jbd2_journal_skip_recovery(journal); | 1799 | err = jbd2_journal_skip_recovery(journal); |
1710 | if (write) | 1800 | if (write) { |
1711 | jbd2_journal_update_superblock(journal, 1); | 1801 | /* Lock to make assertions happy... */ |
1802 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1803 | jbd2_mark_journal_empty(journal); | ||
1804 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1805 | } | ||
1712 | 1806 | ||
1713 | no_recovery: | 1807 | no_recovery: |
1714 | return err; | 1808 | return err; |
@@ -1758,7 +1852,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) | |||
1758 | __jbd2_journal_abort_hard(journal); | 1852 | __jbd2_journal_abort_hard(journal); |
1759 | 1853 | ||
1760 | if (errno) | 1854 | if (errno) |
1761 | jbd2_journal_update_superblock(journal, 1); | 1855 | jbd2_journal_update_sb_errno(journal); |
1762 | } | 1856 | } |
1763 | 1857 | ||
1764 | /** | 1858 | /** |
@@ -2016,7 +2110,7 @@ static struct kmem_cache *jbd2_journal_head_cache; | |||
2016 | static atomic_t nr_journal_heads = ATOMIC_INIT(0); | 2110 | static atomic_t nr_journal_heads = ATOMIC_INIT(0); |
2017 | #endif | 2111 | #endif |
2018 | 2112 | ||
2019 | static int journal_init_jbd2_journal_head_cache(void) | 2113 | static int jbd2_journal_init_journal_head_cache(void) |
2020 | { | 2114 | { |
2021 | int retval; | 2115 | int retval; |
2022 | 2116 | ||
@@ -2034,7 +2128,7 @@ static int journal_init_jbd2_journal_head_cache(void) | |||
2034 | return retval; | 2128 | return retval; |
2035 | } | 2129 | } |
2036 | 2130 | ||
2037 | static void jbd2_journal_destroy_jbd2_journal_head_cache(void) | 2131 | static void jbd2_journal_destroy_journal_head_cache(void) |
2038 | { | 2132 | { |
2039 | if (jbd2_journal_head_cache) { | 2133 | if (jbd2_journal_head_cache) { |
2040 | kmem_cache_destroy(jbd2_journal_head_cache); | 2134 | kmem_cache_destroy(jbd2_journal_head_cache); |
@@ -2322,7 +2416,7 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void) | |||
2322 | 2416 | ||
2323 | struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; | 2417 | struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; |
2324 | 2418 | ||
2325 | static int __init journal_init_handle_cache(void) | 2419 | static int __init jbd2_journal_init_handle_cache(void) |
2326 | { | 2420 | { |
2327 | jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); | 2421 | jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); |
2328 | if (jbd2_handle_cache == NULL) { | 2422 | if (jbd2_handle_cache == NULL) { |
@@ -2357,17 +2451,20 @@ static int __init journal_init_caches(void) | |||
2357 | 2451 | ||
2358 | ret = jbd2_journal_init_revoke_caches(); | 2452 | ret = jbd2_journal_init_revoke_caches(); |
2359 | if (ret == 0) | 2453 | if (ret == 0) |
2360 | ret = journal_init_jbd2_journal_head_cache(); | 2454 | ret = jbd2_journal_init_journal_head_cache(); |
2455 | if (ret == 0) | ||
2456 | ret = jbd2_journal_init_handle_cache(); | ||
2361 | if (ret == 0) | 2457 | if (ret == 0) |
2362 | ret = journal_init_handle_cache(); | 2458 | ret = jbd2_journal_init_transaction_cache(); |
2363 | return ret; | 2459 | return ret; |
2364 | } | 2460 | } |
2365 | 2461 | ||
2366 | static void jbd2_journal_destroy_caches(void) | 2462 | static void jbd2_journal_destroy_caches(void) |
2367 | { | 2463 | { |
2368 | jbd2_journal_destroy_revoke_caches(); | 2464 | jbd2_journal_destroy_revoke_caches(); |
2369 | jbd2_journal_destroy_jbd2_journal_head_cache(); | 2465 | jbd2_journal_destroy_journal_head_cache(); |
2370 | jbd2_journal_destroy_handle_cache(); | 2466 | jbd2_journal_destroy_handle_cache(); |
2467 | jbd2_journal_destroy_transaction_cache(); | ||
2371 | jbd2_journal_destroy_slabs(); | 2468 | jbd2_journal_destroy_slabs(); |
2372 | } | 2469 | } |
2373 | 2470 | ||
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index da6d7baf1390..c1a03354a22f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/jbd2.h> | 21 | #include <linux/jbd2.h> |
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <linux/crc32.h> | 23 | #include <linux/crc32.h> |
24 | #include <linux/blkdev.h> | ||
24 | #endif | 25 | #endif |
25 | 26 | ||
26 | /* | 27 | /* |
@@ -265,7 +266,9 @@ int jbd2_journal_recover(journal_t *journal) | |||
265 | err2 = sync_blockdev(journal->j_fs_dev); | 266 | err2 = sync_blockdev(journal->j_fs_dev); |
266 | if (!err) | 267 | if (!err) |
267 | err = err2; | 268 | err = err2; |
268 | 269 | /* Make sure all replayed data is on permanent storage */ | |
270 | if (journal->j_flags & JBD2_BARRIER) | ||
271 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | ||
269 | return err; | 272 | return err; |
270 | } | 273 | } |
271 | 274 | ||
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 30b2867d6cc9..6973705d6a3d 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -208,17 +208,13 @@ int __init jbd2_journal_init_revoke_caches(void) | |||
208 | J_ASSERT(!jbd2_revoke_record_cache); | 208 | J_ASSERT(!jbd2_revoke_record_cache); |
209 | J_ASSERT(!jbd2_revoke_table_cache); | 209 | J_ASSERT(!jbd2_revoke_table_cache); |
210 | 210 | ||
211 | jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", | 211 | jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s, |
212 | sizeof(struct jbd2_revoke_record_s), | 212 | SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY); |
213 | 0, | ||
214 | SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, | ||
215 | NULL); | ||
216 | if (!jbd2_revoke_record_cache) | 213 | if (!jbd2_revoke_record_cache) |
217 | goto record_cache_failure; | 214 | goto record_cache_failure; |
218 | 215 | ||
219 | jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", | 216 | jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s, |
220 | sizeof(struct jbd2_revoke_table_s), | 217 | SLAB_TEMPORARY); |
221 | 0, SLAB_TEMPORARY, NULL); | ||
222 | if (!jbd2_revoke_table_cache) | 218 | if (!jbd2_revoke_table_cache) |
223 | goto table_cache_failure; | 219 | goto table_cache_failure; |
224 | return 0; | 220 | return 0; |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e5aba56e1fd5..ddcd3549c6c2 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -33,6 +33,35 @@ | |||
33 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 33 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
34 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh); | 34 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh); |
35 | 35 | ||
36 | static struct kmem_cache *transaction_cache; | ||
37 | int __init jbd2_journal_init_transaction_cache(void) | ||
38 | { | ||
39 | J_ASSERT(!transaction_cache); | ||
40 | transaction_cache = kmem_cache_create("jbd2_transaction_s", | ||
41 | sizeof(transaction_t), | ||
42 | 0, | ||
43 | SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, | ||
44 | NULL); | ||
45 | if (transaction_cache) | ||
46 | return 0; | ||
47 | return -ENOMEM; | ||
48 | } | ||
49 | |||
50 | void jbd2_journal_destroy_transaction_cache(void) | ||
51 | { | ||
52 | if (transaction_cache) { | ||
53 | kmem_cache_destroy(transaction_cache); | ||
54 | transaction_cache = NULL; | ||
55 | } | ||
56 | } | ||
57 | |||
58 | void jbd2_journal_free_transaction(transaction_t *transaction) | ||
59 | { | ||
60 | if (unlikely(ZERO_OR_NULL_PTR(transaction))) | ||
61 | return; | ||
62 | kmem_cache_free(transaction_cache, transaction); | ||
63 | } | ||
64 | |||
36 | /* | 65 | /* |
37 | * jbd2_get_transaction: obtain a new transaction_t object. | 66 | * jbd2_get_transaction: obtain a new transaction_t object. |
38 | * | 67 | * |
@@ -133,7 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle, | |||
133 | 162 | ||
134 | alloc_transaction: | 163 | alloc_transaction: |
135 | if (!journal->j_running_transaction) { | 164 | if (!journal->j_running_transaction) { |
136 | new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask); | 165 | new_transaction = kmem_cache_alloc(transaction_cache, |
166 | gfp_mask | __GFP_ZERO); | ||
137 | if (!new_transaction) { | 167 | if (!new_transaction) { |
138 | /* | 168 | /* |
139 | * If __GFP_FS is not present, then we may be | 169 | * If __GFP_FS is not present, then we may be |
@@ -162,7 +192,7 @@ repeat: | |||
162 | if (is_journal_aborted(journal) || | 192 | if (is_journal_aborted(journal) || |
163 | (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { | 193 | (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { |
164 | read_unlock(&journal->j_state_lock); | 194 | read_unlock(&journal->j_state_lock); |
165 | kfree(new_transaction); | 195 | jbd2_journal_free_transaction(new_transaction); |
166 | return -EROFS; | 196 | return -EROFS; |
167 | } | 197 | } |
168 | 198 | ||
@@ -284,7 +314,7 @@ repeat: | |||
284 | read_unlock(&journal->j_state_lock); | 314 | read_unlock(&journal->j_state_lock); |
285 | 315 | ||
286 | lock_map_acquire(&handle->h_lockdep_map); | 316 | lock_map_acquire(&handle->h_lockdep_map); |
287 | kfree(new_transaction); | 317 | jbd2_journal_free_transaction(new_transaction); |
288 | return 0; | 318 | return 0; |
289 | } | 319 | } |
290 | 320 | ||
@@ -1549,9 +1579,9 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) | |||
1549 | * of these pointers, it could go bad. Generally the caller needs to re-read | 1579 | * of these pointers, it could go bad. Generally the caller needs to re-read |
1550 | * the pointer from the transaction_t. | 1580 | * the pointer from the transaction_t. |
1551 | * | 1581 | * |
1552 | * Called under j_list_lock. The journal may not be locked. | 1582 | * Called under j_list_lock. |
1553 | */ | 1583 | */ |
1554 | void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | 1584 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) |
1555 | { | 1585 | { |
1556 | struct journal_head **list = NULL; | 1586 | struct journal_head **list = NULL; |
1557 | transaction_t *transaction; | 1587 | transaction_t *transaction; |
@@ -1646,10 +1676,8 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1646 | spin_lock(&journal->j_list_lock); | 1676 | spin_lock(&journal->j_list_lock); |
1647 | if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { | 1677 | if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { |
1648 | /* written-back checkpointed metadata buffer */ | 1678 | /* written-back checkpointed metadata buffer */ |
1649 | if (jh->b_jlist == BJ_None) { | 1679 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
1650 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1680 | __jbd2_journal_remove_checkpoint(jh); |
1651 | __jbd2_journal_remove_checkpoint(jh); | ||
1652 | } | ||
1653 | } | 1681 | } |
1654 | spin_unlock(&journal->j_list_lock); | 1682 | spin_unlock(&journal->j_list_lock); |
1655 | out: | 1683 | out: |
@@ -1949,6 +1977,8 @@ zap_buffer_unlocked: | |||
1949 | clear_buffer_mapped(bh); | 1977 | clear_buffer_mapped(bh); |
1950 | clear_buffer_req(bh); | 1978 | clear_buffer_req(bh); |
1951 | clear_buffer_new(bh); | 1979 | clear_buffer_new(bh); |
1980 | clear_buffer_delay(bh); | ||
1981 | clear_buffer_unwritten(bh); | ||
1952 | bh->b_bdev = NULL; | 1982 | bh->b_bdev = NULL; |
1953 | return may_free; | 1983 | return may_free; |
1954 | } | 1984 | } |
diff --git a/fs/libfs.c b/fs/libfs.c index 722e0d5ba182..4a0d1f06da57 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * Library for filesystems writers. | 3 | * Library for filesystems writers. |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/module.h> | 6 | #include <linux/export.h> |
7 | #include <linux/pagemap.h> | 7 | #include <linux/pagemap.h> |
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | #include <linux/mount.h> | 9 | #include <linux/mount.h> |
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index f848b52c67b1..3ddcbb1c0a43 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c | |||
@@ -598,7 +598,7 @@ static struct rpc_procinfo nlm4_procedures[] = { | |||
598 | PROC(GRANTED_RES, res, norep), | 598 | PROC(GRANTED_RES, res, norep), |
599 | }; | 599 | }; |
600 | 600 | ||
601 | struct rpc_version nlm_version4 = { | 601 | const struct rpc_version nlm_version4 = { |
602 | .number = 4, | 602 | .number = 4, |
603 | .nrprocs = ARRAY_SIZE(nlm4_procedures), | 603 | .nrprocs = ARRAY_SIZE(nlm4_procedures), |
604 | .procs = nlm4_procedures, | 604 | .procs = nlm4_procedures, |
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 8d4ea8351e3d..ba1dc2eebd1e 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c | |||
@@ -62,7 +62,8 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) | |||
62 | 62 | ||
63 | host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, | 63 | host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, |
64 | nlm_init->protocol, nlm_version, | 64 | nlm_init->protocol, nlm_version, |
65 | nlm_init->hostname, nlm_init->noresvport); | 65 | nlm_init->hostname, nlm_init->noresvport, |
66 | nlm_init->net); | ||
66 | if (host == NULL) { | 67 | if (host == NULL) { |
67 | lockd_down(); | 68 | lockd_down(); |
68 | return ERR_PTR(-ENOLCK); | 69 | return ERR_PTR(-ENOLCK); |
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index 180ac34feb9a..3d35e3e80c1c 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c | |||
@@ -596,19 +596,19 @@ static struct rpc_procinfo nlm_procedures[] = { | |||
596 | PROC(GRANTED_RES, res, norep), | 596 | PROC(GRANTED_RES, res, norep), |
597 | }; | 597 | }; |
598 | 598 | ||
599 | static struct rpc_version nlm_version1 = { | 599 | static const struct rpc_version nlm_version1 = { |
600 | .number = 1, | 600 | .number = 1, |
601 | .nrprocs = ARRAY_SIZE(nlm_procedures), | 601 | .nrprocs = ARRAY_SIZE(nlm_procedures), |
602 | .procs = nlm_procedures, | 602 | .procs = nlm_procedures, |
603 | }; | 603 | }; |
604 | 604 | ||
605 | static struct rpc_version nlm_version3 = { | 605 | static const struct rpc_version nlm_version3 = { |
606 | .number = 3, | 606 | .number = 3, |
607 | .nrprocs = ARRAY_SIZE(nlm_procedures), | 607 | .nrprocs = ARRAY_SIZE(nlm_procedures), |
608 | .procs = nlm_procedures, | 608 | .procs = nlm_procedures, |
609 | }; | 609 | }; |
610 | 610 | ||
611 | static struct rpc_version *nlm_versions[] = { | 611 | static const struct rpc_version *nlm_versions[] = { |
612 | [1] = &nlm_version1, | 612 | [1] = &nlm_version1, |
613 | [3] = &nlm_version3, | 613 | [3] = &nlm_version3, |
614 | #ifdef CONFIG_LOCKD_V4 | 614 | #ifdef CONFIG_LOCKD_V4 |
@@ -618,7 +618,7 @@ static struct rpc_version *nlm_versions[] = { | |||
618 | 618 | ||
619 | static struct rpc_stat nlm_rpc_stats; | 619 | static struct rpc_stat nlm_rpc_stats; |
620 | 620 | ||
621 | struct rpc_program nlm_program = { | 621 | const struct rpc_program nlm_program = { |
622 | .name = "lockd", | 622 | .name = "lockd", |
623 | .number = NLM_PROGRAM, | 623 | .number = NLM_PROGRAM, |
624 | .nrvers = ARRAY_SIZE(nlm_versions), | 624 | .nrvers = ARRAY_SIZE(nlm_versions), |
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 6f29836ec0cb..eb75ca7c2d6e 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
@@ -17,6 +17,8 @@ | |||
17 | #include <linux/lockd/lockd.h> | 17 | #include <linux/lockd/lockd.h> |
18 | #include <linux/mutex.h> | 18 | #include <linux/mutex.h> |
19 | 19 | ||
20 | #include <linux/sunrpc/svc_xprt.h> | ||
21 | |||
20 | #include <net/ipv6.h> | 22 | #include <net/ipv6.h> |
21 | 23 | ||
22 | #define NLMDBG_FACILITY NLMDBG_HOSTCACHE | 24 | #define NLMDBG_FACILITY NLMDBG_HOSTCACHE |
@@ -54,6 +56,7 @@ struct nlm_lookup_host_info { | |||
54 | const char *hostname; /* remote's hostname */ | 56 | const char *hostname; /* remote's hostname */ |
55 | const size_t hostname_len; /* it's length */ | 57 | const size_t hostname_len; /* it's length */ |
56 | const int noresvport; /* use non-priv port */ | 58 | const int noresvport; /* use non-priv port */ |
59 | struct net *net; /* network namespace to bind */ | ||
57 | }; | 60 | }; |
58 | 61 | ||
59 | /* | 62 | /* |
@@ -155,6 +158,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, | |||
155 | INIT_LIST_HEAD(&host->h_reclaim); | 158 | INIT_LIST_HEAD(&host->h_reclaim); |
156 | host->h_nsmhandle = nsm; | 159 | host->h_nsmhandle = nsm; |
157 | host->h_addrbuf = nsm->sm_addrbuf; | 160 | host->h_addrbuf = nsm->sm_addrbuf; |
161 | host->net = ni->net; | ||
158 | 162 | ||
159 | out: | 163 | out: |
160 | return host; | 164 | return host; |
@@ -206,7 +210,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, | |||
206 | const unsigned short protocol, | 210 | const unsigned short protocol, |
207 | const u32 version, | 211 | const u32 version, |
208 | const char *hostname, | 212 | const char *hostname, |
209 | int noresvport) | 213 | int noresvport, |
214 | struct net *net) | ||
210 | { | 215 | { |
211 | struct nlm_lookup_host_info ni = { | 216 | struct nlm_lookup_host_info ni = { |
212 | .server = 0, | 217 | .server = 0, |
@@ -217,6 +222,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, | |||
217 | .hostname = hostname, | 222 | .hostname = hostname, |
218 | .hostname_len = strlen(hostname), | 223 | .hostname_len = strlen(hostname), |
219 | .noresvport = noresvport, | 224 | .noresvport = noresvport, |
225 | .net = net, | ||
220 | }; | 226 | }; |
221 | struct hlist_head *chain; | 227 | struct hlist_head *chain; |
222 | struct hlist_node *pos; | 228 | struct hlist_node *pos; |
@@ -231,6 +237,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, | |||
231 | 237 | ||
232 | chain = &nlm_client_hosts[nlm_hash_address(sap)]; | 238 | chain = &nlm_client_hosts[nlm_hash_address(sap)]; |
233 | hlist_for_each_entry(host, pos, chain, h_hash) { | 239 | hlist_for_each_entry(host, pos, chain, h_hash) { |
240 | if (host->net != net) | ||
241 | continue; | ||
234 | if (!rpc_cmp_addr(nlm_addr(host), sap)) | 242 | if (!rpc_cmp_addr(nlm_addr(host), sap)) |
235 | continue; | 243 | continue; |
236 | 244 | ||
@@ -318,6 +326,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, | |||
318 | struct nsm_handle *nsm = NULL; | 326 | struct nsm_handle *nsm = NULL; |
319 | struct sockaddr *src_sap = svc_daddr(rqstp); | 327 | struct sockaddr *src_sap = svc_daddr(rqstp); |
320 | size_t src_len = rqstp->rq_daddrlen; | 328 | size_t src_len = rqstp->rq_daddrlen; |
329 | struct net *net = rqstp->rq_xprt->xpt_net; | ||
321 | struct nlm_lookup_host_info ni = { | 330 | struct nlm_lookup_host_info ni = { |
322 | .server = 1, | 331 | .server = 1, |
323 | .sap = svc_addr(rqstp), | 332 | .sap = svc_addr(rqstp), |
@@ -326,6 +335,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, | |||
326 | .version = rqstp->rq_vers, | 335 | .version = rqstp->rq_vers, |
327 | .hostname = hostname, | 336 | .hostname = hostname, |
328 | .hostname_len = hostname_len, | 337 | .hostname_len = hostname_len, |
338 | .net = net, | ||
329 | }; | 339 | }; |
330 | 340 | ||
331 | dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, | 341 | dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, |
@@ -339,6 +349,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, | |||
339 | 349 | ||
340 | chain = &nlm_server_hosts[nlm_hash_address(ni.sap)]; | 350 | chain = &nlm_server_hosts[nlm_hash_address(ni.sap)]; |
341 | hlist_for_each_entry(host, pos, chain, h_hash) { | 351 | hlist_for_each_entry(host, pos, chain, h_hash) { |
352 | if (host->net != net) | ||
353 | continue; | ||
342 | if (!rpc_cmp_addr(nlm_addr(host), ni.sap)) | 354 | if (!rpc_cmp_addr(nlm_addr(host), ni.sap)) |
343 | continue; | 355 | continue; |
344 | 356 | ||
@@ -431,7 +443,7 @@ nlm_bind_host(struct nlm_host *host) | |||
431 | .to_retries = 5U, | 443 | .to_retries = 5U, |
432 | }; | 444 | }; |
433 | struct rpc_create_args args = { | 445 | struct rpc_create_args args = { |
434 | .net = &init_net, | 446 | .net = host->net, |
435 | .protocol = host->h_proto, | 447 | .protocol = host->h_proto, |
436 | .address = nlm_addr(host), | 448 | .address = nlm_addr(host), |
437 | .addrsize = host->h_addrlen, | 449 | .addrsize = host->h_addrlen, |
@@ -553,12 +565,8 @@ void nlm_host_rebooted(const struct nlm_reboot *info) | |||
553 | nsm_release(nsm); | 565 | nsm_release(nsm); |
554 | } | 566 | } |
555 | 567 | ||
556 | /* | ||
557 | * Shut down the hosts module. | ||
558 | * Note that this routine is called only at server shutdown time. | ||
559 | */ | ||
560 | void | 568 | void |
561 | nlm_shutdown_hosts(void) | 569 | nlm_shutdown_hosts_net(struct net *net) |
562 | { | 570 | { |
563 | struct hlist_head *chain; | 571 | struct hlist_head *chain; |
564 | struct hlist_node *pos; | 572 | struct hlist_node *pos; |
@@ -570,6 +578,8 @@ nlm_shutdown_hosts(void) | |||
570 | /* First, make all hosts eligible for gc */ | 578 | /* First, make all hosts eligible for gc */ |
571 | dprintk("lockd: nuking all hosts...\n"); | 579 | dprintk("lockd: nuking all hosts...\n"); |
572 | for_each_host(host, pos, chain, nlm_server_hosts) { | 580 | for_each_host(host, pos, chain, nlm_server_hosts) { |
581 | if (net && host->net != net) | ||
582 | continue; | ||
573 | host->h_expires = jiffies - 1; | 583 | host->h_expires = jiffies - 1; |
574 | if (host->h_rpcclnt) { | 584 | if (host->h_rpcclnt) { |
575 | rpc_shutdown_client(host->h_rpcclnt); | 585 | rpc_shutdown_client(host->h_rpcclnt); |
@@ -580,15 +590,29 @@ nlm_shutdown_hosts(void) | |||
580 | /* Then, perform a garbage collection pass */ | 590 | /* Then, perform a garbage collection pass */ |
581 | nlm_gc_hosts(); | 591 | nlm_gc_hosts(); |
582 | mutex_unlock(&nlm_host_mutex); | 592 | mutex_unlock(&nlm_host_mutex); |
593 | } | ||
594 | |||
595 | /* | ||
596 | * Shut down the hosts module. | ||
597 | * Note that this routine is called only at server shutdown time. | ||
598 | */ | ||
599 | void | ||
600 | nlm_shutdown_hosts(void) | ||
601 | { | ||
602 | struct hlist_head *chain; | ||
603 | struct hlist_node *pos; | ||
604 | struct nlm_host *host; | ||
605 | |||
606 | nlm_shutdown_hosts_net(NULL); | ||
583 | 607 | ||
584 | /* complain if any hosts are left */ | 608 | /* complain if any hosts are left */ |
585 | if (nrhosts != 0) { | 609 | if (nrhosts != 0) { |
586 | printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); | 610 | printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); |
587 | dprintk("lockd: %lu hosts left:\n", nrhosts); | 611 | dprintk("lockd: %lu hosts left:\n", nrhosts); |
588 | for_each_host(host, pos, chain, nlm_server_hosts) { | 612 | for_each_host(host, pos, chain, nlm_server_hosts) { |
589 | dprintk(" %s (cnt %d use %d exp %ld)\n", | 613 | dprintk(" %s (cnt %d use %d exp %ld net %p)\n", |
590 | host->h_name, atomic_read(&host->h_count), | 614 | host->h_name, atomic_read(&host->h_count), |
591 | host->h_inuse, host->h_expires); | 615 | host->h_inuse, host->h_expires, host->net); |
592 | } | 616 | } |
593 | } | 617 | } |
594 | } | 618 | } |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 65ba36b80a9e..7ef14b3c5bee 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -47,7 +47,7 @@ struct nsm_res { | |||
47 | u32 state; | 47 | u32 state; |
48 | }; | 48 | }; |
49 | 49 | ||
50 | static struct rpc_program nsm_program; | 50 | static const struct rpc_program nsm_program; |
51 | static LIST_HEAD(nsm_handles); | 51 | static LIST_HEAD(nsm_handles); |
52 | static DEFINE_SPINLOCK(nsm_lock); | 52 | static DEFINE_SPINLOCK(nsm_lock); |
53 | 53 | ||
@@ -62,14 +62,14 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) | |||
62 | return (struct sockaddr *)&nsm->sm_addr; | 62 | return (struct sockaddr *)&nsm->sm_addr; |
63 | } | 63 | } |
64 | 64 | ||
65 | static struct rpc_clnt *nsm_create(void) | 65 | static struct rpc_clnt *nsm_create(struct net *net) |
66 | { | 66 | { |
67 | struct sockaddr_in sin = { | 67 | struct sockaddr_in sin = { |
68 | .sin_family = AF_INET, | 68 | .sin_family = AF_INET, |
69 | .sin_addr.s_addr = htonl(INADDR_LOOPBACK), | 69 | .sin_addr.s_addr = htonl(INADDR_LOOPBACK), |
70 | }; | 70 | }; |
71 | struct rpc_create_args args = { | 71 | struct rpc_create_args args = { |
72 | .net = &init_net, | 72 | .net = net, |
73 | .protocol = XPRT_TRANSPORT_UDP, | 73 | .protocol = XPRT_TRANSPORT_UDP, |
74 | .address = (struct sockaddr *)&sin, | 74 | .address = (struct sockaddr *)&sin, |
75 | .addrsize = sizeof(sin), | 75 | .addrsize = sizeof(sin), |
@@ -83,7 +83,8 @@ static struct rpc_clnt *nsm_create(void) | |||
83 | return rpc_create(&args); | 83 | return rpc_create(&args); |
84 | } | 84 | } |
85 | 85 | ||
86 | static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) | 86 | static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, |
87 | struct net *net) | ||
87 | { | 88 | { |
88 | struct rpc_clnt *clnt; | 89 | struct rpc_clnt *clnt; |
89 | int status; | 90 | int status; |
@@ -99,7 +100,7 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) | |||
99 | .rpc_resp = res, | 100 | .rpc_resp = res, |
100 | }; | 101 | }; |
101 | 102 | ||
102 | clnt = nsm_create(); | 103 | clnt = nsm_create(net); |
103 | if (IS_ERR(clnt)) { | 104 | if (IS_ERR(clnt)) { |
104 | status = PTR_ERR(clnt); | 105 | status = PTR_ERR(clnt); |
105 | dprintk("lockd: failed to create NSM upcall transport, " | 106 | dprintk("lockd: failed to create NSM upcall transport, " |
@@ -149,7 +150,7 @@ int nsm_monitor(const struct nlm_host *host) | |||
149 | */ | 150 | */ |
150 | nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; | 151 | nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; |
151 | 152 | ||
152 | status = nsm_mon_unmon(nsm, NSMPROC_MON, &res); | 153 | status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host->net); |
153 | if (unlikely(res.status != 0)) | 154 | if (unlikely(res.status != 0)) |
154 | status = -EIO; | 155 | status = -EIO; |
155 | if (unlikely(status < 0)) { | 156 | if (unlikely(status < 0)) { |
@@ -183,7 +184,7 @@ void nsm_unmonitor(const struct nlm_host *host) | |||
183 | && nsm->sm_monitored && !nsm->sm_sticky) { | 184 | && nsm->sm_monitored && !nsm->sm_sticky) { |
184 | dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); | 185 | dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); |
185 | 186 | ||
186 | status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res); | 187 | status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host->net); |
187 | if (res.status != 0) | 188 | if (res.status != 0) |
188 | status = -EIO; | 189 | status = -EIO; |
189 | if (status < 0) | 190 | if (status < 0) |
@@ -534,19 +535,19 @@ static struct rpc_procinfo nsm_procedures[] = { | |||
534 | }, | 535 | }, |
535 | }; | 536 | }; |
536 | 537 | ||
537 | static struct rpc_version nsm_version1 = { | 538 | static const struct rpc_version nsm_version1 = { |
538 | .number = 1, | 539 | .number = 1, |
539 | .nrprocs = ARRAY_SIZE(nsm_procedures), | 540 | .nrprocs = ARRAY_SIZE(nsm_procedures), |
540 | .procs = nsm_procedures | 541 | .procs = nsm_procedures |
541 | }; | 542 | }; |
542 | 543 | ||
543 | static struct rpc_version * nsm_version[] = { | 544 | static const struct rpc_version *nsm_version[] = { |
544 | [1] = &nsm_version1, | 545 | [1] = &nsm_version1, |
545 | }; | 546 | }; |
546 | 547 | ||
547 | static struct rpc_stat nsm_stats; | 548 | static struct rpc_stat nsm_stats; |
548 | 549 | ||
549 | static struct rpc_program nsm_program = { | 550 | static const struct rpc_program nsm_program = { |
550 | .name = "statd", | 551 | .name = "statd", |
551 | .number = NSM_PROGRAM, | 552 | .number = NSM_PROGRAM, |
552 | .nrvers = ARRAY_SIZE(nsm_version), | 553 | .nrvers = ARRAY_SIZE(nsm_version), |
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h new file mode 100644 index 000000000000..ce227e0fbc5c --- /dev/null +++ b/fs/lockd/netns.h | |||
@@ -0,0 +1,12 @@ | |||
1 | #ifndef __LOCKD_NETNS_H__ | ||
2 | #define __LOCKD_NETNS_H__ | ||
3 | |||
4 | #include <net/netns/generic.h> | ||
5 | |||
6 | struct lockd_net { | ||
7 | unsigned int nlmsvc_users; | ||
8 | }; | ||
9 | |||
10 | extern int lockd_net_id; | ||
11 | |||
12 | #endif | ||
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index c061b9aa7ddb..2774e1013b34 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -35,6 +35,8 @@ | |||
35 | #include <linux/lockd/lockd.h> | 35 | #include <linux/lockd/lockd.h> |
36 | #include <linux/nfs.h> | 36 | #include <linux/nfs.h> |
37 | 37 | ||
38 | #include "netns.h" | ||
39 | |||
38 | #define NLMDBG_FACILITY NLMDBG_SVC | 40 | #define NLMDBG_FACILITY NLMDBG_SVC |
39 | #define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) | 41 | #define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) |
40 | #define ALLOWED_SIGS (sigmask(SIGKILL)) | 42 | #define ALLOWED_SIGS (sigmask(SIGKILL)) |
@@ -50,6 +52,8 @@ static struct task_struct *nlmsvc_task; | |||
50 | static struct svc_rqst *nlmsvc_rqst; | 52 | static struct svc_rqst *nlmsvc_rqst; |
51 | unsigned long nlmsvc_timeout; | 53 | unsigned long nlmsvc_timeout; |
52 | 54 | ||
55 | int lockd_net_id; | ||
56 | |||
53 | /* | 57 | /* |
54 | * These can be set at insmod time (useful for NFS as root filesystem), | 58 | * These can be set at insmod time (useful for NFS as root filesystem), |
55 | * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 | 59 | * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 |
@@ -189,27 +193,29 @@ lockd(void *vrqstp) | |||
189 | } | 193 | } |
190 | 194 | ||
191 | static int create_lockd_listener(struct svc_serv *serv, const char *name, | 195 | static int create_lockd_listener(struct svc_serv *serv, const char *name, |
192 | const int family, const unsigned short port) | 196 | struct net *net, const int family, |
197 | const unsigned short port) | ||
193 | { | 198 | { |
194 | struct svc_xprt *xprt; | 199 | struct svc_xprt *xprt; |
195 | 200 | ||
196 | xprt = svc_find_xprt(serv, name, family, 0); | 201 | xprt = svc_find_xprt(serv, name, net, family, 0); |
197 | if (xprt == NULL) | 202 | if (xprt == NULL) |
198 | return svc_create_xprt(serv, name, &init_net, family, port, | 203 | return svc_create_xprt(serv, name, net, family, port, |
199 | SVC_SOCK_DEFAULTS); | 204 | SVC_SOCK_DEFAULTS); |
200 | svc_xprt_put(xprt); | 205 | svc_xprt_put(xprt); |
201 | return 0; | 206 | return 0; |
202 | } | 207 | } |
203 | 208 | ||
204 | static int create_lockd_family(struct svc_serv *serv, const int family) | 209 | static int create_lockd_family(struct svc_serv *serv, struct net *net, |
210 | const int family) | ||
205 | { | 211 | { |
206 | int err; | 212 | int err; |
207 | 213 | ||
208 | err = create_lockd_listener(serv, "udp", family, nlm_udpport); | 214 | err = create_lockd_listener(serv, "udp", net, family, nlm_udpport); |
209 | if (err < 0) | 215 | if (err < 0) |
210 | return err; | 216 | return err; |
211 | 217 | ||
212 | return create_lockd_listener(serv, "tcp", family, nlm_tcpport); | 218 | return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport); |
213 | } | 219 | } |
214 | 220 | ||
215 | /* | 221 | /* |
@@ -222,16 +228,16 @@ static int create_lockd_family(struct svc_serv *serv, const int family) | |||
222 | * Returns zero if all listeners are available; otherwise a | 228 | * Returns zero if all listeners are available; otherwise a |
223 | * negative errno value is returned. | 229 | * negative errno value is returned. |
224 | */ | 230 | */ |
225 | static int make_socks(struct svc_serv *serv) | 231 | static int make_socks(struct svc_serv *serv, struct net *net) |
226 | { | 232 | { |
227 | static int warned; | 233 | static int warned; |
228 | int err; | 234 | int err; |
229 | 235 | ||
230 | err = create_lockd_family(serv, PF_INET); | 236 | err = create_lockd_family(serv, net, PF_INET); |
231 | if (err < 0) | 237 | if (err < 0) |
232 | goto out_err; | 238 | goto out_err; |
233 | 239 | ||
234 | err = create_lockd_family(serv, PF_INET6); | 240 | err = create_lockd_family(serv, net, PF_INET6); |
235 | if (err < 0 && err != -EAFNOSUPPORT) | 241 | if (err < 0 && err != -EAFNOSUPPORT) |
236 | goto out_err; | 242 | goto out_err; |
237 | 243 | ||
@@ -245,6 +251,47 @@ out_err: | |||
245 | return err; | 251 | return err; |
246 | } | 252 | } |
247 | 253 | ||
254 | static int lockd_up_net(struct net *net) | ||
255 | { | ||
256 | struct lockd_net *ln = net_generic(net, lockd_net_id); | ||
257 | struct svc_serv *serv = nlmsvc_rqst->rq_server; | ||
258 | int error; | ||
259 | |||
260 | if (ln->nlmsvc_users) | ||
261 | return 0; | ||
262 | |||
263 | error = svc_rpcb_setup(serv, net); | ||
264 | if (error) | ||
265 | goto err_rpcb; | ||
266 | |||
267 | error = make_socks(serv, net); | ||
268 | if (error < 0) | ||
269 | goto err_socks; | ||
270 | return 0; | ||
271 | |||
272 | err_socks: | ||
273 | svc_rpcb_cleanup(serv, net); | ||
274 | err_rpcb: | ||
275 | return error; | ||
276 | } | ||
277 | |||
278 | static void lockd_down_net(struct net *net) | ||
279 | { | ||
280 | struct lockd_net *ln = net_generic(net, lockd_net_id); | ||
281 | struct svc_serv *serv = nlmsvc_rqst->rq_server; | ||
282 | |||
283 | if (ln->nlmsvc_users) { | ||
284 | if (--ln->nlmsvc_users == 0) { | ||
285 | nlm_shutdown_hosts_net(net); | ||
286 | svc_shutdown_net(serv, net); | ||
287 | } | ||
288 | } else { | ||
289 | printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n", | ||
290 | nlmsvc_task, net); | ||
291 | BUG(); | ||
292 | } | ||
293 | } | ||
294 | |||
248 | /* | 295 | /* |
249 | * Bring up the lockd process if it's not already up. | 296 | * Bring up the lockd process if it's not already up. |
250 | */ | 297 | */ |
@@ -252,13 +299,16 @@ int lockd_up(void) | |||
252 | { | 299 | { |
253 | struct svc_serv *serv; | 300 | struct svc_serv *serv; |
254 | int error = 0; | 301 | int error = 0; |
302 | struct net *net = current->nsproxy->net_ns; | ||
255 | 303 | ||
256 | mutex_lock(&nlmsvc_mutex); | 304 | mutex_lock(&nlmsvc_mutex); |
257 | /* | 305 | /* |
258 | * Check whether we're already up and running. | 306 | * Check whether we're already up and running. |
259 | */ | 307 | */ |
260 | if (nlmsvc_rqst) | 308 | if (nlmsvc_rqst) { |
309 | error = lockd_up_net(net); | ||
261 | goto out; | 310 | goto out; |
311 | } | ||
262 | 312 | ||
263 | /* | 313 | /* |
264 | * Sanity check: if there's no pid, | 314 | * Sanity check: if there's no pid, |
@@ -275,7 +325,7 @@ int lockd_up(void) | |||
275 | goto out; | 325 | goto out; |
276 | } | 326 | } |
277 | 327 | ||
278 | error = make_socks(serv); | 328 | error = make_socks(serv, net); |
279 | if (error < 0) | 329 | if (error < 0) |
280 | goto destroy_and_out; | 330 | goto destroy_and_out; |
281 | 331 | ||
@@ -313,8 +363,12 @@ int lockd_up(void) | |||
313 | destroy_and_out: | 363 | destroy_and_out: |
314 | svc_destroy(serv); | 364 | svc_destroy(serv); |
315 | out: | 365 | out: |
316 | if (!error) | 366 | if (!error) { |
367 | struct lockd_net *ln = net_generic(net, lockd_net_id); | ||
368 | |||
369 | ln->nlmsvc_users++; | ||
317 | nlmsvc_users++; | 370 | nlmsvc_users++; |
371 | } | ||
318 | mutex_unlock(&nlmsvc_mutex); | 372 | mutex_unlock(&nlmsvc_mutex); |
319 | return error; | 373 | return error; |
320 | } | 374 | } |
@@ -328,8 +382,10 @@ lockd_down(void) | |||
328 | { | 382 | { |
329 | mutex_lock(&nlmsvc_mutex); | 383 | mutex_lock(&nlmsvc_mutex); |
330 | if (nlmsvc_users) { | 384 | if (nlmsvc_users) { |
331 | if (--nlmsvc_users) | 385 | if (--nlmsvc_users) { |
386 | lockd_down_net(current->nsproxy->net_ns); | ||
332 | goto out; | 387 | goto out; |
388 | } | ||
333 | } else { | 389 | } else { |
334 | printk(KERN_ERR "lockd_down: no users! task=%p\n", | 390 | printk(KERN_ERR "lockd_down: no users! task=%p\n", |
335 | nlmsvc_task); | 391 | nlmsvc_task); |
@@ -497,24 +553,55 @@ module_param_call(nlm_tcpport, param_set_port, param_get_int, | |||
497 | module_param(nsm_use_hostnames, bool, 0644); | 553 | module_param(nsm_use_hostnames, bool, 0644); |
498 | module_param(nlm_max_connections, uint, 0644); | 554 | module_param(nlm_max_connections, uint, 0644); |
499 | 555 | ||
556 | static int lockd_init_net(struct net *net) | ||
557 | { | ||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | static void lockd_exit_net(struct net *net) | ||
562 | { | ||
563 | } | ||
564 | |||
565 | static struct pernet_operations lockd_net_ops = { | ||
566 | .init = lockd_init_net, | ||
567 | .exit = lockd_exit_net, | ||
568 | .id = &lockd_net_id, | ||
569 | .size = sizeof(struct lockd_net), | ||
570 | }; | ||
571 | |||
572 | |||
500 | /* | 573 | /* |
501 | * Initialising and terminating the module. | 574 | * Initialising and terminating the module. |
502 | */ | 575 | */ |
503 | 576 | ||
504 | static int __init init_nlm(void) | 577 | static int __init init_nlm(void) |
505 | { | 578 | { |
579 | int err; | ||
580 | |||
506 | #ifdef CONFIG_SYSCTL | 581 | #ifdef CONFIG_SYSCTL |
582 | err = -ENOMEM; | ||
507 | nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root); | 583 | nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root); |
508 | return nlm_sysctl_table ? 0 : -ENOMEM; | 584 | if (nlm_sysctl_table == NULL) |
509 | #else | 585 | goto err_sysctl; |
586 | #endif | ||
587 | err = register_pernet_subsys(&lockd_net_ops); | ||
588 | if (err) | ||
589 | goto err_pernet; | ||
510 | return 0; | 590 | return 0; |
591 | |||
592 | err_pernet: | ||
593 | #ifdef CONFIG_SYSCTL | ||
594 | unregister_sysctl_table(nlm_sysctl_table); | ||
511 | #endif | 595 | #endif |
596 | err_sysctl: | ||
597 | return err; | ||
512 | } | 598 | } |
513 | 599 | ||
514 | static void __exit exit_nlm(void) | 600 | static void __exit exit_nlm(void) |
515 | { | 601 | { |
516 | /* FIXME: delete all NLM clients */ | 602 | /* FIXME: delete all NLM clients */ |
517 | nlm_shutdown_hosts(); | 603 | nlm_shutdown_hosts(); |
604 | unregister_pernet_subsys(&lockd_net_ops); | ||
518 | #ifdef CONFIG_SYSCTL | 605 | #ifdef CONFIG_SYSCTL |
519 | unregister_sysctl_table(nlm_sysctl_table); | 606 | unregister_sysctl_table(nlm_sysctl_table); |
520 | #endif | 607 | #endif |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index f0179c3745d2..e46353f41a42 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -46,7 +46,6 @@ static void nlmsvc_remove_block(struct nlm_block *block); | |||
46 | static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); | 46 | static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); |
47 | static void nlmsvc_freegrantargs(struct nlm_rqst *call); | 47 | static void nlmsvc_freegrantargs(struct nlm_rqst *call); |
48 | static const struct rpc_call_ops nlmsvc_grant_ops; | 48 | static const struct rpc_call_ops nlmsvc_grant_ops; |
49 | static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie); | ||
50 | 49 | ||
51 | /* | 50 | /* |
52 | * The list of blocked locks to retry | 51 | * The list of blocked locks to retry |
@@ -54,6 +53,35 @@ static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie); | |||
54 | static LIST_HEAD(nlm_blocked); | 53 | static LIST_HEAD(nlm_blocked); |
55 | static DEFINE_SPINLOCK(nlm_blocked_lock); | 54 | static DEFINE_SPINLOCK(nlm_blocked_lock); |
56 | 55 | ||
56 | #ifdef LOCKD_DEBUG | ||
57 | static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) | ||
58 | { | ||
59 | /* | ||
60 | * We can get away with a static buffer because we're only | ||
61 | * called with BKL held. | ||
62 | */ | ||
63 | static char buf[2*NLM_MAXCOOKIELEN+1]; | ||
64 | unsigned int i, len = sizeof(buf); | ||
65 | char *p = buf; | ||
66 | |||
67 | len--; /* allow for trailing \0 */ | ||
68 | if (len < 3) | ||
69 | return "???"; | ||
70 | for (i = 0 ; i < cookie->len ; i++) { | ||
71 | if (len < 2) { | ||
72 | strcpy(p-3, "..."); | ||
73 | break; | ||
74 | } | ||
75 | sprintf(p, "%02x", cookie->data[i]); | ||
76 | p += 2; | ||
77 | len -= 2; | ||
78 | } | ||
79 | *p = '\0'; | ||
80 | |||
81 | return buf; | ||
82 | } | ||
83 | #endif | ||
84 | |||
57 | /* | 85 | /* |
58 | * Insert a blocked lock into the global list | 86 | * Insert a blocked lock into the global list |
59 | */ | 87 | */ |
@@ -935,32 +963,3 @@ nlmsvc_retry_blocked(void) | |||
935 | 963 | ||
936 | return timeout; | 964 | return timeout; |
937 | } | 965 | } |
938 | |||
939 | #ifdef RPC_DEBUG | ||
940 | static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) | ||
941 | { | ||
942 | /* | ||
943 | * We can get away with a static buffer because we're only | ||
944 | * called with BKL held. | ||
945 | */ | ||
946 | static char buf[2*NLM_MAXCOOKIELEN+1]; | ||
947 | unsigned int i, len = sizeof(buf); | ||
948 | char *p = buf; | ||
949 | |||
950 | len--; /* allow for trailing \0 */ | ||
951 | if (len < 3) | ||
952 | return "???"; | ||
953 | for (i = 0 ; i < cookie->len ; i++) { | ||
954 | if (len < 2) { | ||
955 | strcpy(p-3, "..."); | ||
956 | break; | ||
957 | } | ||
958 | sprintf(p, "%02x", cookie->data[i]); | ||
959 | p += 2; | ||
960 | len -= 2; | ||
961 | } | ||
962 | *p = '\0'; | ||
963 | |||
964 | return buf; | ||
965 | } | ||
966 | #endif | ||
diff --git a/fs/mpage.c b/fs/mpage.c index 643e9f55ef29..0face1c4d4c6 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -13,7 +13,7 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
16 | #include <linux/module.h> | 16 | #include <linux/export.h> |
17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
18 | #include <linux/kdev_t.h> | 18 | #include <linux/kdev_t.h> |
19 | #include <linux/gfp.h> | 19 | #include <linux/gfp.h> |
diff --git a/fs/namei.c b/fs/namei.c index 73ec863a9896..e615ff37e27d 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -15,7 +15,7 @@ | |||
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/module.h> | 18 | #include <linux/export.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
21 | #include <linux/namei.h> | 21 | #include <linux/namei.h> |
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index dbcd82126aed..2a0e6c599147 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -64,6 +64,7 @@ config NFS_V4 | |||
64 | bool "NFS client support for NFS version 4" | 64 | bool "NFS client support for NFS version 4" |
65 | depends on NFS_FS | 65 | depends on NFS_FS |
66 | select SUNRPC_GSS | 66 | select SUNRPC_GSS |
67 | select KEYS | ||
67 | help | 68 | help |
68 | This option enables support for version 4 of the NFS protocol | 69 | This option enables support for version 4 of the NFS protocol |
69 | (RFC 3530) in the kernel's NFS client. | 70 | (RFC 3530) in the kernel's NFS client. |
@@ -98,6 +99,18 @@ config PNFS_OBJLAYOUT | |||
98 | depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD | 99 | depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD |
99 | default m | 100 | default m |
100 | 101 | ||
102 | config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN | ||
103 | string "NFSv4.1 Implementation ID Domain" | ||
104 | depends on NFS_V4_1 | ||
105 | default "kernel.org" | ||
106 | help | ||
107 | This option defines the domain portion of the implementation ID that | ||
108 | may be sent in the NFS exchange_id operation. The value must be in | ||
109 | the format of a DNS domain name and should be set to the DNS domain | ||
110 | name of the distribution. | ||
111 | If the NFS client is unchanged from the upstream kernel, this | ||
112 | option should be set to the default "kernel.org". | ||
113 | |||
101 | config ROOT_NFS | 114 | config ROOT_NFS |
102 | bool "Root file system on NFS" | 115 | bool "Root file system on NFS" |
103 | depends on NFS_FS=y && IP_PNP | 116 | depends on NFS_FS=y && IP_PNP |
@@ -130,16 +143,10 @@ config NFS_USE_KERNEL_DNS | |||
130 | bool | 143 | bool |
131 | depends on NFS_V4 && !NFS_USE_LEGACY_DNS | 144 | depends on NFS_V4 && !NFS_USE_LEGACY_DNS |
132 | select DNS_RESOLVER | 145 | select DNS_RESOLVER |
133 | select KEYS | ||
134 | default y | 146 | default y |
135 | 147 | ||
136 | config NFS_USE_NEW_IDMAPPER | 148 | config NFS_DEBUG |
137 | bool "Use the new idmapper upcall routine" | 149 | bool |
138 | depends on NFS_V4 && KEYS | 150 | depends on NFS_FS && SUNRPC_DEBUG |
139 | help | 151 | select CRC32 |
140 | Say Y here if you want NFS to use the new idmapper upcall functions. | 152 | default y |
141 | You will need /sbin/request-key (usually provided by the keyutils | ||
142 | package). For details, read | ||
143 | <file:Documentation/filesystems/nfs/idmapper.txt>. | ||
144 | |||
145 | If you are unsure, say N. | ||
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 48cfac31f64c..9c94297bb70e 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -46,9 +46,6 @@ MODULE_LICENSE("GPL"); | |||
46 | MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); | 46 | MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); |
47 | MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); | 47 | MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); |
48 | 48 | ||
49 | struct dentry *bl_device_pipe; | ||
50 | wait_queue_head_t bl_wq; | ||
51 | |||
52 | static void print_page(struct page *page) | 49 | static void print_page(struct page *page) |
53 | { | 50 | { |
54 | dprintk("PRINTPAGE page %p\n", page); | 51 | dprintk("PRINTPAGE page %p\n", page); |
@@ -236,12 +233,11 @@ bl_read_pagelist(struct nfs_read_data *rdata) | |||
236 | sector_t isect, extent_length = 0; | 233 | sector_t isect, extent_length = 0; |
237 | struct parallel_io *par; | 234 | struct parallel_io *par; |
238 | loff_t f_offset = rdata->args.offset; | 235 | loff_t f_offset = rdata->args.offset; |
239 | size_t count = rdata->args.count; | ||
240 | struct page **pages = rdata->args.pages; | 236 | struct page **pages = rdata->args.pages; |
241 | int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; | 237 | int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; |
242 | 238 | ||
243 | dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__, | 239 | dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, |
244 | rdata->npages, f_offset, count); | 240 | rdata->npages, f_offset, (unsigned int)rdata->args.count); |
245 | 241 | ||
246 | par = alloc_parallel(rdata); | 242 | par = alloc_parallel(rdata); |
247 | if (!par) | 243 | if (!par) |
@@ -1025,10 +1021,128 @@ static const struct rpc_pipe_ops bl_upcall_ops = { | |||
1025 | .destroy_msg = bl_pipe_destroy_msg, | 1021 | .destroy_msg = bl_pipe_destroy_msg, |
1026 | }; | 1022 | }; |
1027 | 1023 | ||
1024 | static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb, | ||
1025 | struct rpc_pipe *pipe) | ||
1026 | { | ||
1027 | struct dentry *dir, *dentry; | ||
1028 | |||
1029 | dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME); | ||
1030 | if (dir == NULL) | ||
1031 | return ERR_PTR(-ENOENT); | ||
1032 | dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); | ||
1033 | dput(dir); | ||
1034 | return dentry; | ||
1035 | } | ||
1036 | |||
1037 | static void nfs4blocklayout_unregister_sb(struct super_block *sb, | ||
1038 | struct rpc_pipe *pipe) | ||
1039 | { | ||
1040 | if (pipe->dentry) | ||
1041 | rpc_unlink(pipe->dentry); | ||
1042 | } | ||
1043 | |||
1044 | static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, | ||
1045 | void *ptr) | ||
1046 | { | ||
1047 | struct super_block *sb = ptr; | ||
1048 | struct net *net = sb->s_fs_info; | ||
1049 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1050 | struct dentry *dentry; | ||
1051 | int ret = 0; | ||
1052 | |||
1053 | if (!try_module_get(THIS_MODULE)) | ||
1054 | return 0; | ||
1055 | |||
1056 | if (nn->bl_device_pipe == NULL) { | ||
1057 | module_put(THIS_MODULE); | ||
1058 | return 0; | ||
1059 | } | ||
1060 | |||
1061 | switch (event) { | ||
1062 | case RPC_PIPEFS_MOUNT: | ||
1063 | dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); | ||
1064 | if (IS_ERR(dentry)) { | ||
1065 | ret = PTR_ERR(dentry); | ||
1066 | break; | ||
1067 | } | ||
1068 | nn->bl_device_pipe->dentry = dentry; | ||
1069 | break; | ||
1070 | case RPC_PIPEFS_UMOUNT: | ||
1071 | if (nn->bl_device_pipe->dentry) | ||
1072 | nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe); | ||
1073 | break; | ||
1074 | default: | ||
1075 | ret = -ENOTSUPP; | ||
1076 | break; | ||
1077 | } | ||
1078 | module_put(THIS_MODULE); | ||
1079 | return ret; | ||
1080 | } | ||
1081 | |||
1082 | static struct notifier_block nfs4blocklayout_block = { | ||
1083 | .notifier_call = rpc_pipefs_event, | ||
1084 | }; | ||
1085 | |||
1086 | static struct dentry *nfs4blocklayout_register_net(struct net *net, | ||
1087 | struct rpc_pipe *pipe) | ||
1088 | { | ||
1089 | struct super_block *pipefs_sb; | ||
1090 | struct dentry *dentry; | ||
1091 | |||
1092 | pipefs_sb = rpc_get_sb_net(net); | ||
1093 | if (!pipefs_sb) | ||
1094 | return NULL; | ||
1095 | dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); | ||
1096 | rpc_put_sb_net(net); | ||
1097 | return dentry; | ||
1098 | } | ||
1099 | |||
1100 | static void nfs4blocklayout_unregister_net(struct net *net, | ||
1101 | struct rpc_pipe *pipe) | ||
1102 | { | ||
1103 | struct super_block *pipefs_sb; | ||
1104 | |||
1105 | pipefs_sb = rpc_get_sb_net(net); | ||
1106 | if (pipefs_sb) { | ||
1107 | nfs4blocklayout_unregister_sb(pipefs_sb, pipe); | ||
1108 | rpc_put_sb_net(net); | ||
1109 | } | ||
1110 | } | ||
1111 | |||
1112 | static int nfs4blocklayout_net_init(struct net *net) | ||
1113 | { | ||
1114 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1115 | struct dentry *dentry; | ||
1116 | |||
1117 | init_waitqueue_head(&nn->bl_wq); | ||
1118 | nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); | ||
1119 | if (IS_ERR(nn->bl_device_pipe)) | ||
1120 | return PTR_ERR(nn->bl_device_pipe); | ||
1121 | dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe); | ||
1122 | if (IS_ERR(dentry)) { | ||
1123 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
1124 | return PTR_ERR(dentry); | ||
1125 | } | ||
1126 | nn->bl_device_pipe->dentry = dentry; | ||
1127 | return 0; | ||
1128 | } | ||
1129 | |||
1130 | static void nfs4blocklayout_net_exit(struct net *net) | ||
1131 | { | ||
1132 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1133 | |||
1134 | nfs4blocklayout_unregister_net(net, nn->bl_device_pipe); | ||
1135 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
1136 | nn->bl_device_pipe = NULL; | ||
1137 | } | ||
1138 | |||
1139 | static struct pernet_operations nfs4blocklayout_net_ops = { | ||
1140 | .init = nfs4blocklayout_net_init, | ||
1141 | .exit = nfs4blocklayout_net_exit, | ||
1142 | }; | ||
1143 | |||
1028 | static int __init nfs4blocklayout_init(void) | 1144 | static int __init nfs4blocklayout_init(void) |
1029 | { | 1145 | { |
1030 | struct vfsmount *mnt; | ||
1031 | struct path path; | ||
1032 | int ret; | 1146 | int ret; |
1033 | 1147 | ||
1034 | dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); | 1148 | dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); |
@@ -1037,32 +1151,17 @@ static int __init nfs4blocklayout_init(void) | |||
1037 | if (ret) | 1151 | if (ret) |
1038 | goto out; | 1152 | goto out; |
1039 | 1153 | ||
1040 | init_waitqueue_head(&bl_wq); | 1154 | ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); |
1041 | 1155 | if (ret) | |
1042 | mnt = rpc_get_mount(); | ||
1043 | if (IS_ERR(mnt)) { | ||
1044 | ret = PTR_ERR(mnt); | ||
1045 | goto out_remove; | 1156 | goto out_remove; |
1046 | } | 1157 | ret = register_pernet_subsys(&nfs4blocklayout_net_ops); |
1047 | |||
1048 | ret = vfs_path_lookup(mnt->mnt_root, | ||
1049 | mnt, | ||
1050 | NFS_PIPE_DIRNAME, 0, &path); | ||
1051 | if (ret) | 1158 | if (ret) |
1052 | goto out_putrpc; | 1159 | goto out_notifier; |
1053 | |||
1054 | bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL, | ||
1055 | &bl_upcall_ops, 0); | ||
1056 | path_put(&path); | ||
1057 | if (IS_ERR(bl_device_pipe)) { | ||
1058 | ret = PTR_ERR(bl_device_pipe); | ||
1059 | goto out_putrpc; | ||
1060 | } | ||
1061 | out: | 1160 | out: |
1062 | return ret; | 1161 | return ret; |
1063 | 1162 | ||
1064 | out_putrpc: | 1163 | out_notifier: |
1065 | rpc_put_mount(); | 1164 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); |
1066 | out_remove: | 1165 | out_remove: |
1067 | pnfs_unregister_layoutdriver(&blocklayout_type); | 1166 | pnfs_unregister_layoutdriver(&blocklayout_type); |
1068 | return ret; | 1167 | return ret; |
@@ -1073,9 +1172,9 @@ static void __exit nfs4blocklayout_exit(void) | |||
1073 | dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", | 1172 | dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", |
1074 | __func__); | 1173 | __func__); |
1075 | 1174 | ||
1175 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); | ||
1176 | unregister_pernet_subsys(&nfs4blocklayout_net_ops); | ||
1076 | pnfs_unregister_layoutdriver(&blocklayout_type); | 1177 | pnfs_unregister_layoutdriver(&blocklayout_type); |
1077 | rpc_unlink(bl_device_pipe); | ||
1078 | rpc_put_mount(); | ||
1079 | } | 1178 | } |
1080 | 1179 | ||
1081 | MODULE_ALIAS("nfs-layouttype4-3"); | 1180 | MODULE_ALIAS("nfs-layouttype4-3"); |
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index e31a2df28e70..03350690118e 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/sunrpc/rpc_pipe_fs.h> | 37 | #include <linux/sunrpc/rpc_pipe_fs.h> |
38 | 38 | ||
39 | #include "../pnfs.h" | 39 | #include "../pnfs.h" |
40 | #include "../netns.h" | ||
40 | 41 | ||
41 | #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) | 42 | #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) |
42 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) | 43 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) |
@@ -50,6 +51,7 @@ struct pnfs_block_dev { | |||
50 | struct list_head bm_node; | 51 | struct list_head bm_node; |
51 | struct nfs4_deviceid bm_mdevid; /* associated devid */ | 52 | struct nfs4_deviceid bm_mdevid; /* associated devid */ |
52 | struct block_device *bm_mdev; /* meta device itself */ | 53 | struct block_device *bm_mdev; /* meta device itself */ |
54 | struct net *net; | ||
53 | }; | 55 | }; |
54 | 56 | ||
55 | enum exstate4 { | 57 | enum exstate4 { |
@@ -151,9 +153,9 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg) | |||
151 | return BLK_LO2EXT(lseg->pls_layout); | 153 | return BLK_LO2EXT(lseg->pls_layout); |
152 | } | 154 | } |
153 | 155 | ||
154 | struct bl_dev_msg { | 156 | struct bl_pipe_msg { |
155 | int32_t status; | 157 | struct rpc_pipe_msg msg; |
156 | uint32_t major, minor; | 158 | wait_queue_head_t *bl_wq; |
157 | }; | 159 | }; |
158 | 160 | ||
159 | struct bl_msg_hdr { | 161 | struct bl_msg_hdr { |
@@ -161,9 +163,6 @@ struct bl_msg_hdr { | |||
161 | u16 totallen; /* length of entire message, including hdr itself */ | 163 | u16 totallen; /* length of entire message, including hdr itself */ |
162 | }; | 164 | }; |
163 | 165 | ||
164 | extern struct dentry *bl_device_pipe; | ||
165 | extern wait_queue_head_t bl_wq; | ||
166 | |||
167 | #define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ | 166 | #define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ |
168 | #define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/ | 167 | #define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/ |
169 | #define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ | 168 | #define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ |
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index d08ba9107fde..a5c88a554d92 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c | |||
@@ -46,7 +46,7 @@ static int decode_sector_number(__be32 **rp, sector_t *sp) | |||
46 | 46 | ||
47 | *rp = xdr_decode_hyper(*rp, &s); | 47 | *rp = xdr_decode_hyper(*rp, &s); |
48 | if (s & 0x1ff) { | 48 | if (s & 0x1ff) { |
49 | printk(KERN_WARNING "%s: sector not aligned\n", __func__); | 49 | printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__); |
50 | return -1; | 50 | return -1; |
51 | } | 51 | } |
52 | *sp = s >> SECTOR_SHIFT; | 52 | *sp = s >> SECTOR_SHIFT; |
@@ -79,27 +79,30 @@ int nfs4_blkdev_put(struct block_device *bdev) | |||
79 | return blkdev_put(bdev, FMODE_READ); | 79 | return blkdev_put(bdev, FMODE_READ); |
80 | } | 80 | } |
81 | 81 | ||
82 | static struct bl_dev_msg bl_mount_reply; | ||
83 | |||
84 | ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, | 82 | ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, |
85 | size_t mlen) | 83 | size_t mlen) |
86 | { | 84 | { |
85 | struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, | ||
86 | nfs_net_id); | ||
87 | |||
87 | if (mlen != sizeof (struct bl_dev_msg)) | 88 | if (mlen != sizeof (struct bl_dev_msg)) |
88 | return -EINVAL; | 89 | return -EINVAL; |
89 | 90 | ||
90 | if (copy_from_user(&bl_mount_reply, src, mlen) != 0) | 91 | if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0) |
91 | return -EFAULT; | 92 | return -EFAULT; |
92 | 93 | ||
93 | wake_up(&bl_wq); | 94 | wake_up(&nn->bl_wq); |
94 | 95 | ||
95 | return mlen; | 96 | return mlen; |
96 | } | 97 | } |
97 | 98 | ||
98 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) | 99 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) |
99 | { | 100 | { |
101 | struct bl_pipe_msg *bl_pipe_msg = container_of(msg, struct bl_pipe_msg, msg); | ||
102 | |||
100 | if (msg->errno >= 0) | 103 | if (msg->errno >= 0) |
101 | return; | 104 | return; |
102 | wake_up(&bl_wq); | 105 | wake_up(bl_pipe_msg->bl_wq); |
103 | } | 106 | } |
104 | 107 | ||
105 | /* | 108 | /* |
@@ -111,29 +114,33 @@ nfs4_blk_decode_device(struct nfs_server *server, | |||
111 | { | 114 | { |
112 | struct pnfs_block_dev *rv; | 115 | struct pnfs_block_dev *rv; |
113 | struct block_device *bd = NULL; | 116 | struct block_device *bd = NULL; |
114 | struct rpc_pipe_msg msg; | 117 | struct bl_pipe_msg bl_pipe_msg; |
118 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | ||
115 | struct bl_msg_hdr bl_msg = { | 119 | struct bl_msg_hdr bl_msg = { |
116 | .type = BL_DEVICE_MOUNT, | 120 | .type = BL_DEVICE_MOUNT, |
117 | .totallen = dev->mincount, | 121 | .totallen = dev->mincount, |
118 | }; | 122 | }; |
119 | uint8_t *dataptr; | 123 | uint8_t *dataptr; |
120 | DECLARE_WAITQUEUE(wq, current); | 124 | DECLARE_WAITQUEUE(wq, current); |
121 | struct bl_dev_msg *reply = &bl_mount_reply; | ||
122 | int offset, len, i, rc; | 125 | int offset, len, i, rc; |
126 | struct net *net = server->nfs_client->net; | ||
127 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
128 | struct bl_dev_msg *reply = &nn->bl_mount_reply; | ||
123 | 129 | ||
124 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); | 130 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); |
125 | dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, | 131 | dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, |
126 | dev->mincount); | 132 | dev->mincount); |
127 | 133 | ||
128 | memset(&msg, 0, sizeof(msg)); | 134 | bl_pipe_msg.bl_wq = &nn->bl_wq; |
129 | msg.data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); | 135 | memset(msg, 0, sizeof(*msg)); |
130 | if (!msg.data) { | 136 | msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); |
137 | if (!msg->data) { | ||
131 | rv = ERR_PTR(-ENOMEM); | 138 | rv = ERR_PTR(-ENOMEM); |
132 | goto out; | 139 | goto out; |
133 | } | 140 | } |
134 | 141 | ||
135 | memcpy(msg.data, &bl_msg, sizeof(bl_msg)); | 142 | memcpy(msg->data, &bl_msg, sizeof(bl_msg)); |
136 | dataptr = (uint8_t *) msg.data; | 143 | dataptr = (uint8_t *) msg->data; |
137 | len = dev->mincount; | 144 | len = dev->mincount; |
138 | offset = sizeof(bl_msg); | 145 | offset = sizeof(bl_msg); |
139 | for (i = 0; len > 0; i++) { | 146 | for (i = 0; len > 0; i++) { |
@@ -142,13 +149,13 @@ nfs4_blk_decode_device(struct nfs_server *server, | |||
142 | len -= PAGE_CACHE_SIZE; | 149 | len -= PAGE_CACHE_SIZE; |
143 | offset += PAGE_CACHE_SIZE; | 150 | offset += PAGE_CACHE_SIZE; |
144 | } | 151 | } |
145 | msg.len = sizeof(bl_msg) + dev->mincount; | 152 | msg->len = sizeof(bl_msg) + dev->mincount; |
146 | 153 | ||
147 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); | 154 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); |
148 | add_wait_queue(&bl_wq, &wq); | 155 | add_wait_queue(&nn->bl_wq, &wq); |
149 | rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg); | 156 | rc = rpc_queue_upcall(nn->bl_device_pipe, msg); |
150 | if (rc < 0) { | 157 | if (rc < 0) { |
151 | remove_wait_queue(&bl_wq, &wq); | 158 | remove_wait_queue(&nn->bl_wq, &wq); |
152 | rv = ERR_PTR(rc); | 159 | rv = ERR_PTR(rc); |
153 | goto out; | 160 | goto out; |
154 | } | 161 | } |
@@ -156,7 +163,7 @@ nfs4_blk_decode_device(struct nfs_server *server, | |||
156 | set_current_state(TASK_UNINTERRUPTIBLE); | 163 | set_current_state(TASK_UNINTERRUPTIBLE); |
157 | schedule(); | 164 | schedule(); |
158 | __set_current_state(TASK_RUNNING); | 165 | __set_current_state(TASK_RUNNING); |
159 | remove_wait_queue(&bl_wq, &wq); | 166 | remove_wait_queue(&nn->bl_wq, &wq); |
160 | 167 | ||
161 | if (reply->status != BL_DEVICE_REQUEST_PROC) { | 168 | if (reply->status != BL_DEVICE_REQUEST_PROC) { |
162 | dprintk("%s failed to open device: %d\n", | 169 | dprintk("%s failed to open device: %d\n", |
@@ -181,13 +188,14 @@ nfs4_blk_decode_device(struct nfs_server *server, | |||
181 | 188 | ||
182 | rv->bm_mdev = bd; | 189 | rv->bm_mdev = bd; |
183 | memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); | 190 | memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); |
191 | rv->net = net; | ||
184 | dprintk("%s Created device %s with bd_block_size %u\n", | 192 | dprintk("%s Created device %s with bd_block_size %u\n", |
185 | __func__, | 193 | __func__, |
186 | bd->bd_disk->disk_name, | 194 | bd->bd_disk->disk_name, |
187 | bd->bd_block_size); | 195 | bd->bd_block_size); |
188 | 196 | ||
189 | out: | 197 | out: |
190 | kfree(msg.data); | 198 | kfree(msg->data); |
191 | return rv; | 199 | return rv; |
192 | } | 200 | } |
193 | 201 | ||
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index d055c7558073..737d839bc17b 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c | |||
@@ -38,9 +38,10 @@ | |||
38 | 38 | ||
39 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 39 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
40 | 40 | ||
41 | static void dev_remove(dev_t dev) | 41 | static void dev_remove(struct net *net, dev_t dev) |
42 | { | 42 | { |
43 | struct rpc_pipe_msg msg; | 43 | struct bl_pipe_msg bl_pipe_msg; |
44 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | ||
44 | struct bl_dev_msg bl_umount_request; | 45 | struct bl_dev_msg bl_umount_request; |
45 | struct bl_msg_hdr bl_msg = { | 46 | struct bl_msg_hdr bl_msg = { |
46 | .type = BL_DEVICE_UMOUNT, | 47 | .type = BL_DEVICE_UMOUNT, |
@@ -48,36 +49,38 @@ static void dev_remove(dev_t dev) | |||
48 | }; | 49 | }; |
49 | uint8_t *dataptr; | 50 | uint8_t *dataptr; |
50 | DECLARE_WAITQUEUE(wq, current); | 51 | DECLARE_WAITQUEUE(wq, current); |
52 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
51 | 53 | ||
52 | dprintk("Entering %s\n", __func__); | 54 | dprintk("Entering %s\n", __func__); |
53 | 55 | ||
54 | memset(&msg, 0, sizeof(msg)); | 56 | bl_pipe_msg.bl_wq = &nn->bl_wq; |
55 | msg.data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); | 57 | memset(msg, 0, sizeof(*msg)); |
56 | if (!msg.data) | 58 | msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); |
59 | if (!msg->data) | ||
57 | goto out; | 60 | goto out; |
58 | 61 | ||
59 | memset(&bl_umount_request, 0, sizeof(bl_umount_request)); | 62 | memset(&bl_umount_request, 0, sizeof(bl_umount_request)); |
60 | bl_umount_request.major = MAJOR(dev); | 63 | bl_umount_request.major = MAJOR(dev); |
61 | bl_umount_request.minor = MINOR(dev); | 64 | bl_umount_request.minor = MINOR(dev); |
62 | 65 | ||
63 | memcpy(msg.data, &bl_msg, sizeof(bl_msg)); | 66 | memcpy(msg->data, &bl_msg, sizeof(bl_msg)); |
64 | dataptr = (uint8_t *) msg.data; | 67 | dataptr = (uint8_t *) msg->data; |
65 | memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); | 68 | memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); |
66 | msg.len = sizeof(bl_msg) + bl_msg.totallen; | 69 | msg->len = sizeof(bl_msg) + bl_msg.totallen; |
67 | 70 | ||
68 | add_wait_queue(&bl_wq, &wq); | 71 | add_wait_queue(&nn->bl_wq, &wq); |
69 | if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { | 72 | if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { |
70 | remove_wait_queue(&bl_wq, &wq); | 73 | remove_wait_queue(&nn->bl_wq, &wq); |
71 | goto out; | 74 | goto out; |
72 | } | 75 | } |
73 | 76 | ||
74 | set_current_state(TASK_UNINTERRUPTIBLE); | 77 | set_current_state(TASK_UNINTERRUPTIBLE); |
75 | schedule(); | 78 | schedule(); |
76 | __set_current_state(TASK_RUNNING); | 79 | __set_current_state(TASK_RUNNING); |
77 | remove_wait_queue(&bl_wq, &wq); | 80 | remove_wait_queue(&nn->bl_wq, &wq); |
78 | 81 | ||
79 | out: | 82 | out: |
80 | kfree(msg.data); | 83 | kfree(msg->data); |
81 | } | 84 | } |
82 | 85 | ||
83 | /* | 86 | /* |
@@ -90,10 +93,10 @@ static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev) | |||
90 | dprintk("%s Releasing\n", __func__); | 93 | dprintk("%s Releasing\n", __func__); |
91 | rv = nfs4_blkdev_put(bdev->bm_mdev); | 94 | rv = nfs4_blkdev_put(bdev->bm_mdev); |
92 | if (rv) | 95 | if (rv) |
93 | printk(KERN_ERR "%s nfs4_blkdev_put returns %d\n", | 96 | printk(KERN_ERR "NFS: %s nfs4_blkdev_put returns %d\n", |
94 | __func__, rv); | 97 | __func__, rv); |
95 | 98 | ||
96 | dev_remove(bdev->bm_mdev->bd_dev); | 99 | dev_remove(bdev->net, bdev->bm_mdev->bd_dev); |
97 | } | 100 | } |
98 | 101 | ||
99 | void bl_free_block_dev(struct pnfs_block_dev *bdev) | 102 | void bl_free_block_dev(struct pnfs_block_dev *bdev) |
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 1abac09f7cd5..1f9a6032796b 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c | |||
@@ -147,7 +147,7 @@ static int _preload_range(struct pnfs_inval_markings *marks, | |||
147 | count = (int)(end - start) / (int)tree->mtt_step_size; | 147 | count = (int)(end - start) / (int)tree->mtt_step_size; |
148 | 148 | ||
149 | /* Pre-malloc what memory we might need */ | 149 | /* Pre-malloc what memory we might need */ |
150 | storage = kmalloc(sizeof(*storage) * count, GFP_NOFS); | 150 | storage = kcalloc(count, sizeof(*storage), GFP_NOFS); |
151 | if (!storage) | 151 | if (!storage) |
152 | return -ENOMEM; | 152 | return -ENOMEM; |
153 | for (i = 0; i < count; i++) { | 153 | for (i = 0; i < count; i++) { |
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index c98b439332fc..dded26368111 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/sunrpc/cache.h> | 14 | #include <linux/sunrpc/cache.h> |
15 | #include <linux/sunrpc/rpc_pipe_fs.h> | 15 | #include <linux/sunrpc/rpc_pipe_fs.h> |
16 | #include <net/net_namespace.h> | ||
16 | 17 | ||
17 | #include "cache_lib.h" | 18 | #include "cache_lib.h" |
18 | 19 | ||
@@ -111,30 +112,54 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq) | |||
111 | return 0; | 112 | return 0; |
112 | } | 113 | } |
113 | 114 | ||
114 | int nfs_cache_register(struct cache_detail *cd) | 115 | int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd) |
115 | { | 116 | { |
116 | struct vfsmount *mnt; | ||
117 | struct path path; | ||
118 | int ret; | 117 | int ret; |
118 | struct dentry *dir; | ||
119 | 119 | ||
120 | mnt = rpc_get_mount(); | 120 | dir = rpc_d_lookup_sb(sb, "cache"); |
121 | if (IS_ERR(mnt)) | 121 | BUG_ON(dir == NULL); |
122 | return PTR_ERR(mnt); | 122 | ret = sunrpc_cache_register_pipefs(dir, cd->name, 0600, cd); |
123 | ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path); | 123 | dput(dir); |
124 | if (ret) | ||
125 | goto err; | ||
126 | ret = sunrpc_cache_register_pipefs(path.dentry, cd->name, 0600, cd); | ||
127 | path_put(&path); | ||
128 | if (!ret) | ||
129 | return ret; | ||
130 | err: | ||
131 | rpc_put_mount(); | ||
132 | return ret; | 124 | return ret; |
133 | } | 125 | } |
134 | 126 | ||
135 | void nfs_cache_unregister(struct cache_detail *cd) | 127 | int nfs_cache_register_net(struct net *net, struct cache_detail *cd) |
136 | { | 128 | { |
137 | sunrpc_cache_unregister_pipefs(cd); | 129 | struct super_block *pipefs_sb; |
138 | rpc_put_mount(); | 130 | int ret = 0; |
131 | |||
132 | pipefs_sb = rpc_get_sb_net(net); | ||
133 | if (pipefs_sb) { | ||
134 | ret = nfs_cache_register_sb(pipefs_sb, cd); | ||
135 | rpc_put_sb_net(net); | ||
136 | } | ||
137 | return ret; | ||
138 | } | ||
139 | |||
140 | void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd) | ||
141 | { | ||
142 | if (cd->u.pipefs.dir) | ||
143 | sunrpc_cache_unregister_pipefs(cd); | ||
144 | } | ||
145 | |||
146 | void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd) | ||
147 | { | ||
148 | struct super_block *pipefs_sb; | ||
149 | |||
150 | pipefs_sb = rpc_get_sb_net(net); | ||
151 | if (pipefs_sb) { | ||
152 | nfs_cache_unregister_sb(pipefs_sb, cd); | ||
153 | rpc_put_sb_net(net); | ||
154 | } | ||
155 | } | ||
156 | |||
157 | void nfs_cache_init(struct cache_detail *cd) | ||
158 | { | ||
159 | sunrpc_init_cache_detail(cd); | ||
139 | } | 160 | } |
140 | 161 | ||
162 | void nfs_cache_destroy(struct cache_detail *cd) | ||
163 | { | ||
164 | sunrpc_destroy_cache_detail(cd); | ||
165 | } | ||
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 7cf6cafcc007..317db95e37f8 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h | |||
@@ -23,5 +23,11 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void); | |||
23 | extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); | 23 | extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); |
24 | extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); | 24 | extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); |
25 | 25 | ||
26 | extern int nfs_cache_register(struct cache_detail *cd); | 26 | extern void nfs_cache_init(struct cache_detail *cd); |
27 | extern void nfs_cache_unregister(struct cache_detail *cd); | 27 | extern void nfs_cache_destroy(struct cache_detail *cd); |
28 | extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd); | ||
29 | extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd); | ||
30 | extern int nfs_cache_register_sb(struct super_block *sb, | ||
31 | struct cache_detail *cd); | ||
32 | extern void nfs_cache_unregister_sb(struct super_block *sb, | ||
33 | struct cache_detail *cd); | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 516f3375e067..eb95f5091c1a 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -85,7 +85,7 @@ nfs4_callback_svc(void *vrqstp) | |||
85 | } | 85 | } |
86 | if (err < 0) { | 86 | if (err < 0) { |
87 | if (err != preverr) { | 87 | if (err != preverr) { |
88 | printk(KERN_WARNING "%s: unexpected error " | 88 | printk(KERN_WARNING "NFS: %s: unexpected error " |
89 | "from svc_recv (%d)\n", __func__, err); | 89 | "from svc_recv (%d)\n", __func__, err); |
90 | preverr = err; | 90 | preverr = err; |
91 | } | 91 | } |
@@ -101,12 +101,12 @@ nfs4_callback_svc(void *vrqstp) | |||
101 | /* | 101 | /* |
102 | * Prepare to bring up the NFSv4 callback service | 102 | * Prepare to bring up the NFSv4 callback service |
103 | */ | 103 | */ |
104 | struct svc_rqst * | 104 | static struct svc_rqst * |
105 | nfs4_callback_up(struct svc_serv *serv) | 105 | nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) |
106 | { | 106 | { |
107 | int ret; | 107 | int ret; |
108 | 108 | ||
109 | ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, | 109 | ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET, |
110 | nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); | 110 | nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); |
111 | if (ret <= 0) | 111 | if (ret <= 0) |
112 | goto out_err; | 112 | goto out_err; |
@@ -114,7 +114,7 @@ nfs4_callback_up(struct svc_serv *serv) | |||
114 | dprintk("NFS: Callback listener port = %u (af %u)\n", | 114 | dprintk("NFS: Callback listener port = %u (af %u)\n", |
115 | nfs_callback_tcpport, PF_INET); | 115 | nfs_callback_tcpport, PF_INET); |
116 | 116 | ||
117 | ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, | 117 | ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET6, |
118 | nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); | 118 | nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); |
119 | if (ret > 0) { | 119 | if (ret > 0) { |
120 | nfs_callback_tcpport6 = ret; | 120 | nfs_callback_tcpport6 = ret; |
@@ -172,7 +172,7 @@ nfs41_callback_svc(void *vrqstp) | |||
172 | /* | 172 | /* |
173 | * Bring up the NFSv4.1 callback service | 173 | * Bring up the NFSv4.1 callback service |
174 | */ | 174 | */ |
175 | struct svc_rqst * | 175 | static struct svc_rqst * |
176 | nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) | 176 | nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) |
177 | { | 177 | { |
178 | struct svc_rqst *rqstp; | 178 | struct svc_rqst *rqstp; |
@@ -183,7 +183,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) | |||
183 | * fore channel connection. | 183 | * fore channel connection. |
184 | * Returns the input port (0) and sets the svc_serv bc_xprt on success | 184 | * Returns the input port (0) and sets the svc_serv bc_xprt on success |
185 | */ | 185 | */ |
186 | ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0, | 186 | ret = svc_create_xprt(serv, "tcp-bc", xprt->xprt_net, PF_INET, 0, |
187 | SVC_SOCK_ANONYMOUS); | 187 | SVC_SOCK_ANONYMOUS); |
188 | if (ret < 0) { | 188 | if (ret < 0) { |
189 | rqstp = ERR_PTR(ret); | 189 | rqstp = ERR_PTR(ret); |
@@ -269,7 +269,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) | |||
269 | serv, xprt, &rqstp, &callback_svc); | 269 | serv, xprt, &rqstp, &callback_svc); |
270 | if (!minorversion_setup) { | 270 | if (!minorversion_setup) { |
271 | /* v4.0 callback setup */ | 271 | /* v4.0 callback setup */ |
272 | rqstp = nfs4_callback_up(serv); | 272 | rqstp = nfs4_callback_up(serv, xprt); |
273 | callback_svc = nfs4_callback_svc; | 273 | callback_svc = nfs4_callback_svc; |
274 | } | 274 | } |
275 | 275 | ||
@@ -332,7 +332,6 @@ void nfs_callback_down(int minorversion) | |||
332 | int | 332 | int |
333 | check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) | 333 | check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) |
334 | { | 334 | { |
335 | struct rpc_clnt *r = clp->cl_rpcclient; | ||
336 | char *p = svc_gss_principal(rqstp); | 335 | char *p = svc_gss_principal(rqstp); |
337 | 336 | ||
338 | if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) | 337 | if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) |
@@ -353,7 +352,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) | |||
353 | if (memcmp(p, "nfs@", 4) != 0) | 352 | if (memcmp(p, "nfs@", 4) != 0) |
354 | return 0; | 353 | return 0; |
355 | p += 4; | 354 | p += 4; |
356 | if (strcmp(p, r->cl_server) != 0) | 355 | if (strcmp(p, clp->cl_hostname) != 0) |
357 | return 0; | 356 | return 0; |
358 | return 1; | 357 | return 1; |
359 | } | 358 | } |
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index c89d3b9e483c..a5527c90a5aa 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h | |||
@@ -38,7 +38,8 @@ enum nfs4_callback_opnum { | |||
38 | struct cb_process_state { | 38 | struct cb_process_state { |
39 | __be32 drc_status; | 39 | __be32 drc_status; |
40 | struct nfs_client *clp; | 40 | struct nfs_client *clp; |
41 | int slotid; | 41 | u32 slotid; |
42 | struct net *net; | ||
42 | }; | 43 | }; |
43 | 44 | ||
44 | struct cb_compound_hdr_arg { | 45 | struct cb_compound_hdr_arg { |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 54cea8ad5a76..1b5d809a105e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/nfs4.h> | 8 | #include <linux/nfs4.h> |
9 | #include <linux/nfs_fs.h> | 9 | #include <linux/nfs_fs.h> |
10 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
11 | #include <linux/rcupdate.h> | ||
11 | #include "nfs4_fs.h" | 12 | #include "nfs4_fs.h" |
12 | #include "callback.h" | 13 | #include "callback.h" |
13 | #include "delegation.h" | 14 | #include "delegation.h" |
@@ -33,7 +34,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, | |||
33 | res->bitmap[0] = res->bitmap[1] = 0; | 34 | res->bitmap[0] = res->bitmap[1] = 0; |
34 | res->status = htonl(NFS4ERR_BADHANDLE); | 35 | res->status = htonl(NFS4ERR_BADHANDLE); |
35 | 36 | ||
36 | dprintk("NFS: GETATTR callback request from %s\n", | 37 | dprintk_rcu("NFS: GETATTR callback request from %s\n", |
37 | rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); | 38 | rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); |
38 | 39 | ||
39 | inode = nfs_delegation_find_inode(cps->clp, &args->fh); | 40 | inode = nfs_delegation_find_inode(cps->clp, &args->fh); |
@@ -73,7 +74,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, | |||
73 | if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */ | 74 | if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */ |
74 | goto out; | 75 | goto out; |
75 | 76 | ||
76 | dprintk("NFS: RECALL callback request from %s\n", | 77 | dprintk_rcu("NFS: RECALL callback request from %s\n", |
77 | rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); | 78 | rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); |
78 | 79 | ||
79 | res = htonl(NFS4ERR_BADHANDLE); | 80 | res = htonl(NFS4ERR_BADHANDLE); |
@@ -86,8 +87,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, | |||
86 | res = 0; | 87 | res = 0; |
87 | break; | 88 | break; |
88 | case -ENOENT: | 89 | case -ENOENT: |
89 | if (res != 0) | 90 | res = htonl(NFS4ERR_BAD_STATEID); |
90 | res = htonl(NFS4ERR_BAD_STATEID); | ||
91 | break; | 91 | break; |
92 | default: | 92 | default: |
93 | res = htonl(NFS4ERR_RESOURCE); | 93 | res = htonl(NFS4ERR_RESOURCE); |
@@ -98,52 +98,64 @@ out: | |||
98 | return res; | 98 | return res; |
99 | } | 99 | } |
100 | 100 | ||
101 | int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) | ||
102 | { | ||
103 | if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data, | ||
104 | sizeof(delegation->stateid.data)) != 0) | ||
105 | return 0; | ||
106 | return 1; | ||
107 | } | ||
108 | |||
109 | #if defined(CONFIG_NFS_V4_1) | 101 | #if defined(CONFIG_NFS_V4_1) |
110 | 102 | ||
111 | static u32 initiate_file_draining(struct nfs_client *clp, | 103 | /* |
112 | struct cb_layoutrecallargs *args) | 104 | * Lookup a layout by filehandle. |
105 | * | ||
106 | * Note: gets a refcount on the layout hdr and on its respective inode. | ||
107 | * Caller must put the layout hdr and the inode. | ||
108 | * | ||
109 | * TODO: keep track of all layouts (and delegations) in a hash table | ||
110 | * hashed by filehandle. | ||
111 | */ | ||
112 | static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh) | ||
113 | { | 113 | { |
114 | struct nfs_server *server; | 114 | struct nfs_server *server; |
115 | struct pnfs_layout_hdr *lo; | ||
116 | struct inode *ino; | 115 | struct inode *ino; |
117 | bool found = false; | 116 | struct pnfs_layout_hdr *lo; |
118 | u32 rv = NFS4ERR_NOMATCHING_LAYOUT; | ||
119 | LIST_HEAD(free_me_list); | ||
120 | 117 | ||
121 | spin_lock(&clp->cl_lock); | ||
122 | rcu_read_lock(); | ||
123 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | 118 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { |
124 | list_for_each_entry(lo, &server->layouts, plh_layouts) { | 119 | list_for_each_entry(lo, &server->layouts, plh_layouts) { |
125 | if (nfs_compare_fh(&args->cbl_fh, | 120 | if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh)) |
126 | &NFS_I(lo->plh_inode)->fh)) | ||
127 | continue; | 121 | continue; |
128 | ino = igrab(lo->plh_inode); | 122 | ino = igrab(lo->plh_inode); |
129 | if (!ino) | 123 | if (!ino) |
130 | continue; | 124 | continue; |
131 | found = true; | ||
132 | /* Without this, layout can be freed as soon | ||
133 | * as we release cl_lock. | ||
134 | */ | ||
135 | get_layout_hdr(lo); | 125 | get_layout_hdr(lo); |
136 | break; | 126 | return lo; |
137 | } | 127 | } |
138 | if (found) | ||
139 | break; | ||
140 | } | 128 | } |
129 | |||
130 | return NULL; | ||
131 | } | ||
132 | |||
133 | static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh) | ||
134 | { | ||
135 | struct pnfs_layout_hdr *lo; | ||
136 | |||
137 | spin_lock(&clp->cl_lock); | ||
138 | rcu_read_lock(); | ||
139 | lo = get_layout_by_fh_locked(clp, fh); | ||
141 | rcu_read_unlock(); | 140 | rcu_read_unlock(); |
142 | spin_unlock(&clp->cl_lock); | 141 | spin_unlock(&clp->cl_lock); |
143 | 142 | ||
144 | if (!found) | 143 | return lo; |
144 | } | ||
145 | |||
146 | static u32 initiate_file_draining(struct nfs_client *clp, | ||
147 | struct cb_layoutrecallargs *args) | ||
148 | { | ||
149 | struct inode *ino; | ||
150 | struct pnfs_layout_hdr *lo; | ||
151 | u32 rv = NFS4ERR_NOMATCHING_LAYOUT; | ||
152 | LIST_HEAD(free_me_list); | ||
153 | |||
154 | lo = get_layout_by_fh(clp, &args->cbl_fh); | ||
155 | if (!lo) | ||
145 | return NFS4ERR_NOMATCHING_LAYOUT; | 156 | return NFS4ERR_NOMATCHING_LAYOUT; |
146 | 157 | ||
158 | ino = lo->plh_inode; | ||
147 | spin_lock(&ino->i_lock); | 159 | spin_lock(&ino->i_lock); |
148 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | 160 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || |
149 | mark_matching_lsegs_invalid(lo, &free_me_list, | 161 | mark_matching_lsegs_invalid(lo, &free_me_list, |
@@ -213,17 +225,13 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, | |||
213 | static u32 do_callback_layoutrecall(struct nfs_client *clp, | 225 | static u32 do_callback_layoutrecall(struct nfs_client *clp, |
214 | struct cb_layoutrecallargs *args) | 226 | struct cb_layoutrecallargs *args) |
215 | { | 227 | { |
216 | u32 res = NFS4ERR_DELAY; | 228 | u32 res; |
217 | 229 | ||
218 | dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type); | 230 | dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type); |
219 | if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state)) | ||
220 | goto out; | ||
221 | if (args->cbl_recall_type == RETURN_FILE) | 231 | if (args->cbl_recall_type == RETURN_FILE) |
222 | res = initiate_file_draining(clp, args); | 232 | res = initiate_file_draining(clp, args); |
223 | else | 233 | else |
224 | res = initiate_bulk_draining(clp, args); | 234 | res = initiate_bulk_draining(clp, args); |
225 | clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state); | ||
226 | out: | ||
227 | dprintk("%s returning %i\n", __func__, res); | 235 | dprintk("%s returning %i\n", __func__, res); |
228 | return res; | 236 | return res; |
229 | 237 | ||
@@ -303,21 +311,6 @@ out: | |||
303 | return res; | 311 | return res; |
304 | } | 312 | } |
305 | 313 | ||
306 | int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) | ||
307 | { | ||
308 | if (delegation == NULL) | ||
309 | return 0; | ||
310 | |||
311 | if (stateid->stateid.seqid != 0) | ||
312 | return 0; | ||
313 | if (memcmp(&delegation->stateid.stateid.other, | ||
314 | &stateid->stateid.other, | ||
315 | NFS4_STATEID_OTHER_SIZE)) | ||
316 | return 0; | ||
317 | |||
318 | return 1; | ||
319 | } | ||
320 | |||
321 | /* | 314 | /* |
322 | * Validate the sequenceID sent by the server. | 315 | * Validate the sequenceID sent by the server. |
323 | * Return success if the sequenceID is one more than what we last saw on | 316 | * Return success if the sequenceID is one more than what we last saw on |
@@ -441,7 +434,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
441 | int i; | 434 | int i; |
442 | __be32 status = htonl(NFS4ERR_BADSESSION); | 435 | __be32 status = htonl(NFS4ERR_BADSESSION); |
443 | 436 | ||
444 | clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); | 437 | clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, &args->csa_sessionid); |
445 | if (clp == NULL) | 438 | if (clp == NULL) |
446 | goto out; | 439 | goto out; |
447 | 440 | ||
@@ -517,7 +510,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy, | |||
517 | if (!cps->clp) /* set in cb_sequence */ | 510 | if (!cps->clp) /* set in cb_sequence */ |
518 | goto out; | 511 | goto out; |
519 | 512 | ||
520 | dprintk("NFS: RECALL_ANY callback request from %s\n", | 513 | dprintk_rcu("NFS: RECALL_ANY callback request from %s\n", |
521 | rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); | 514 | rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); |
522 | 515 | ||
523 | status = cpu_to_be32(NFS4ERR_INVAL); | 516 | status = cpu_to_be32(NFS4ERR_INVAL); |
@@ -552,7 +545,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, | |||
552 | if (!cps->clp) /* set in cb_sequence */ | 545 | if (!cps->clp) /* set in cb_sequence */ |
553 | goto out; | 546 | goto out; |
554 | 547 | ||
555 | dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", | 548 | dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", |
556 | rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR), | 549 | rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR), |
557 | args->crsa_target_max_slots); | 550 | args->crsa_target_max_slots); |
558 | 551 | ||
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index d50b2742f23b..95bfc243992c 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -9,6 +9,8 @@ | |||
9 | #include <linux/sunrpc/svc.h> | 9 | #include <linux/sunrpc/svc.h> |
10 | #include <linux/nfs4.h> | 10 | #include <linux/nfs4.h> |
11 | #include <linux/nfs_fs.h> | 11 | #include <linux/nfs_fs.h> |
12 | #include <linux/ratelimit.h> | ||
13 | #include <linux/printk.h> | ||
12 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
13 | #include <linux/sunrpc/bc_xprt.h> | 15 | #include <linux/sunrpc/bc_xprt.h> |
14 | #include "nfs4_fs.h" | 16 | #include "nfs4_fs.h" |
@@ -73,7 +75,7 @@ static __be32 *read_buf(struct xdr_stream *xdr, int nbytes) | |||
73 | 75 | ||
74 | p = xdr_inline_decode(xdr, nbytes); | 76 | p = xdr_inline_decode(xdr, nbytes); |
75 | if (unlikely(p == NULL)) | 77 | if (unlikely(p == NULL)) |
76 | printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n"); | 78 | printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n"); |
77 | return p; | 79 | return p; |
78 | } | 80 | } |
79 | 81 | ||
@@ -138,10 +140,10 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | |||
138 | { | 140 | { |
139 | __be32 *p; | 141 | __be32 *p; |
140 | 142 | ||
141 | p = read_buf(xdr, 16); | 143 | p = read_buf(xdr, NFS4_STATEID_SIZE); |
142 | if (unlikely(p == NULL)) | 144 | if (unlikely(p == NULL)) |
143 | return htonl(NFS4ERR_RESOURCE); | 145 | return htonl(NFS4ERR_RESOURCE); |
144 | memcpy(stateid->data, p, 16); | 146 | memcpy(stateid, p, NFS4_STATEID_SIZE); |
145 | return 0; | 147 | return 0; |
146 | } | 148 | } |
147 | 149 | ||
@@ -155,7 +157,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound | |||
155 | return status; | 157 | return status; |
156 | /* We do not like overly long tags! */ | 158 | /* We do not like overly long tags! */ |
157 | if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { | 159 | if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { |
158 | printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", | 160 | printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n", |
159 | __func__, hdr->taglen); | 161 | __func__, hdr->taglen); |
160 | return htonl(NFS4ERR_RESOURCE); | 162 | return htonl(NFS4ERR_RESOURCE); |
161 | } | 163 | } |
@@ -167,7 +169,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound | |||
167 | if (hdr->minorversion <= 1) { | 169 | if (hdr->minorversion <= 1) { |
168 | hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */ | 170 | hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */ |
169 | } else { | 171 | } else { |
170 | printk(KERN_WARNING "%s: NFSv4 server callback with " | 172 | pr_warn_ratelimited("NFS: %s: NFSv4 server callback with " |
171 | "illegal minor version %u!\n", | 173 | "illegal minor version %u!\n", |
172 | __func__, hdr->minorversion); | 174 | __func__, hdr->minorversion); |
173 | return htonl(NFS4ERR_MINOR_VERS_MISMATCH); | 175 | return htonl(NFS4ERR_MINOR_VERS_MISMATCH); |
@@ -759,14 +761,14 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) | |||
759 | * Let the state manager know callback processing done. | 761 | * Let the state manager know callback processing done. |
760 | * A single slot, so highest used slotid is either 0 or -1 | 762 | * A single slot, so highest used slotid is either 0 or -1 |
761 | */ | 763 | */ |
762 | tbl->highest_used_slotid = -1; | 764 | tbl->highest_used_slotid = NFS4_NO_SLOT; |
763 | nfs4_check_drain_bc_complete(session); | 765 | nfs4_check_drain_bc_complete(session); |
764 | spin_unlock(&tbl->slot_tbl_lock); | 766 | spin_unlock(&tbl->slot_tbl_lock); |
765 | } | 767 | } |
766 | 768 | ||
767 | static void nfs4_cb_free_slot(struct cb_process_state *cps) | 769 | static void nfs4_cb_free_slot(struct cb_process_state *cps) |
768 | { | 770 | { |
769 | if (cps->slotid != -1) | 771 | if (cps->slotid != NFS4_NO_SLOT) |
770 | nfs4_callback_free_slot(cps->clp->cl_session); | 772 | nfs4_callback_free_slot(cps->clp->cl_session); |
771 | } | 773 | } |
772 | 774 | ||
@@ -860,7 +862,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
860 | struct cb_process_state cps = { | 862 | struct cb_process_state cps = { |
861 | .drc_status = 0, | 863 | .drc_status = 0, |
862 | .clp = NULL, | 864 | .clp = NULL, |
863 | .slotid = -1, | 865 | .slotid = NFS4_NO_SLOT, |
866 | .net = rqstp->rq_xprt->xpt_net, | ||
864 | }; | 867 | }; |
865 | unsigned int nops = 0; | 868 | unsigned int nops = 0; |
866 | 869 | ||
@@ -876,7 +879,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
876 | return rpc_garbage_args; | 879 | return rpc_garbage_args; |
877 | 880 | ||
878 | if (hdr_arg.minorversion == 0) { | 881 | if (hdr_arg.minorversion == 0) { |
879 | cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); | 882 | cps.clp = nfs4_find_client_ident(rqstp->rq_xprt->xpt_net, hdr_arg.cb_ident); |
880 | if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) | 883 | if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) |
881 | return rpc_drop_reply; | 884 | return rpc_drop_reply; |
882 | } | 885 | } |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ad5565acbf3b..da7b5e4ff9ec 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -40,6 +40,8 @@ | |||
40 | #include <net/ipv6.h> | 40 | #include <net/ipv6.h> |
41 | #include <linux/nfs_xdr.h> | 41 | #include <linux/nfs_xdr.h> |
42 | #include <linux/sunrpc/bc_xprt.h> | 42 | #include <linux/sunrpc/bc_xprt.h> |
43 | #include <linux/nsproxy.h> | ||
44 | #include <linux/pid_namespace.h> | ||
43 | 45 | ||
44 | 46 | ||
45 | #include "nfs4_fs.h" | 47 | #include "nfs4_fs.h" |
@@ -49,15 +51,12 @@ | |||
49 | #include "internal.h" | 51 | #include "internal.h" |
50 | #include "fscache.h" | 52 | #include "fscache.h" |
51 | #include "pnfs.h" | 53 | #include "pnfs.h" |
54 | #include "netns.h" | ||
52 | 55 | ||
53 | #define NFSDBG_FACILITY NFSDBG_CLIENT | 56 | #define NFSDBG_FACILITY NFSDBG_CLIENT |
54 | 57 | ||
55 | static DEFINE_SPINLOCK(nfs_client_lock); | ||
56 | static LIST_HEAD(nfs_client_list); | ||
57 | static LIST_HEAD(nfs_volume_list); | ||
58 | static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); | 58 | static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); |
59 | #ifdef CONFIG_NFS_V4 | 59 | #ifdef CONFIG_NFS_V4 |
60 | static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */ | ||
61 | 60 | ||
62 | /* | 61 | /* |
63 | * Get a unique NFSv4.0 callback identifier which will be used | 62 | * Get a unique NFSv4.0 callback identifier which will be used |
@@ -66,15 +65,16 @@ static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */ | |||
66 | static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) | 65 | static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) |
67 | { | 66 | { |
68 | int ret = 0; | 67 | int ret = 0; |
68 | struct nfs_net *nn = net_generic(clp->net, nfs_net_id); | ||
69 | 69 | ||
70 | if (clp->rpc_ops->version != 4 || minorversion != 0) | 70 | if (clp->rpc_ops->version != 4 || minorversion != 0) |
71 | return ret; | 71 | return ret; |
72 | retry: | 72 | retry: |
73 | if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL)) | 73 | if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) |
74 | return -ENOMEM; | 74 | return -ENOMEM; |
75 | spin_lock(&nfs_client_lock); | 75 | spin_lock(&nn->nfs_client_lock); |
76 | ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident); | 76 | ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); |
77 | spin_unlock(&nfs_client_lock); | 77 | spin_unlock(&nn->nfs_client_lock); |
78 | if (ret == -EAGAIN) | 78 | if (ret == -EAGAIN) |
79 | goto retry; | 79 | goto retry; |
80 | return ret; | 80 | return ret; |
@@ -89,7 +89,7 @@ static bool nfs4_disable_idmapping = true; | |||
89 | /* | 89 | /* |
90 | * RPC cruft for NFS | 90 | * RPC cruft for NFS |
91 | */ | 91 | */ |
92 | static struct rpc_version *nfs_version[5] = { | 92 | static const struct rpc_version *nfs_version[5] = { |
93 | [2] = &nfs_version2, | 93 | [2] = &nfs_version2, |
94 | #ifdef CONFIG_NFS_V3 | 94 | #ifdef CONFIG_NFS_V3 |
95 | [3] = &nfs_version3, | 95 | [3] = &nfs_version3, |
@@ -99,7 +99,7 @@ static struct rpc_version *nfs_version[5] = { | |||
99 | #endif | 99 | #endif |
100 | }; | 100 | }; |
101 | 101 | ||
102 | struct rpc_program nfs_program = { | 102 | const struct rpc_program nfs_program = { |
103 | .name = "nfs", | 103 | .name = "nfs", |
104 | .number = NFS_PROGRAM, | 104 | .number = NFS_PROGRAM, |
105 | .nrvers = ARRAY_SIZE(nfs_version), | 105 | .nrvers = ARRAY_SIZE(nfs_version), |
@@ -115,11 +115,11 @@ struct rpc_stat nfs_rpcstat = { | |||
115 | 115 | ||
116 | #ifdef CONFIG_NFS_V3_ACL | 116 | #ifdef CONFIG_NFS_V3_ACL |
117 | static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; | 117 | static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; |
118 | static struct rpc_version * nfsacl_version[] = { | 118 | static const struct rpc_version *nfsacl_version[] = { |
119 | [3] = &nfsacl_version3, | 119 | [3] = &nfsacl_version3, |
120 | }; | 120 | }; |
121 | 121 | ||
122 | struct rpc_program nfsacl_program = { | 122 | const struct rpc_program nfsacl_program = { |
123 | .name = "nfsacl", | 123 | .name = "nfsacl", |
124 | .number = NFS_ACL_PROGRAM, | 124 | .number = NFS_ACL_PROGRAM, |
125 | .nrvers = ARRAY_SIZE(nfsacl_version), | 125 | .nrvers = ARRAY_SIZE(nfsacl_version), |
@@ -135,6 +135,7 @@ struct nfs_client_initdata { | |||
135 | const struct nfs_rpc_ops *rpc_ops; | 135 | const struct nfs_rpc_ops *rpc_ops; |
136 | int proto; | 136 | int proto; |
137 | u32 minorversion; | 137 | u32 minorversion; |
138 | struct net *net; | ||
138 | }; | 139 | }; |
139 | 140 | ||
140 | /* | 141 | /* |
@@ -171,6 +172,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ | |||
171 | clp->cl_rpcclient = ERR_PTR(-EINVAL); | 172 | clp->cl_rpcclient = ERR_PTR(-EINVAL); |
172 | 173 | ||
173 | clp->cl_proto = cl_init->proto; | 174 | clp->cl_proto = cl_init->proto; |
175 | clp->net = get_net(cl_init->net); | ||
174 | 176 | ||
175 | #ifdef CONFIG_NFS_V4 | 177 | #ifdef CONFIG_NFS_V4 |
176 | err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); | 178 | err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); |
@@ -202,8 +204,11 @@ error_0: | |||
202 | #ifdef CONFIG_NFS_V4_1 | 204 | #ifdef CONFIG_NFS_V4_1 |
203 | static void nfs4_shutdown_session(struct nfs_client *clp) | 205 | static void nfs4_shutdown_session(struct nfs_client *clp) |
204 | { | 206 | { |
205 | if (nfs4_has_session(clp)) | 207 | if (nfs4_has_session(clp)) { |
208 | nfs4_deviceid_purge_client(clp); | ||
206 | nfs4_destroy_session(clp->cl_session); | 209 | nfs4_destroy_session(clp->cl_session); |
210 | } | ||
211 | |||
207 | } | 212 | } |
208 | #else /* CONFIG_NFS_V4_1 */ | 213 | #else /* CONFIG_NFS_V4_1 */ |
209 | static void nfs4_shutdown_session(struct nfs_client *clp) | 214 | static void nfs4_shutdown_session(struct nfs_client *clp) |
@@ -233,16 +238,20 @@ static void nfs4_shutdown_client(struct nfs_client *clp) | |||
233 | } | 238 | } |
234 | 239 | ||
235 | /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ | 240 | /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ |
236 | void nfs_cleanup_cb_ident_idr(void) | 241 | void nfs_cleanup_cb_ident_idr(struct net *net) |
237 | { | 242 | { |
238 | idr_destroy(&cb_ident_idr); | 243 | struct nfs_net *nn = net_generic(net, nfs_net_id); |
244 | |||
245 | idr_destroy(&nn->cb_ident_idr); | ||
239 | } | 246 | } |
240 | 247 | ||
241 | /* nfs_client_lock held */ | 248 | /* nfs_client_lock held */ |
242 | static void nfs_cb_idr_remove_locked(struct nfs_client *clp) | 249 | static void nfs_cb_idr_remove_locked(struct nfs_client *clp) |
243 | { | 250 | { |
251 | struct nfs_net *nn = net_generic(clp->net, nfs_net_id); | ||
252 | |||
244 | if (clp->cl_cb_ident) | 253 | if (clp->cl_cb_ident) |
245 | idr_remove(&cb_ident_idr, clp->cl_cb_ident); | 254 | idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident); |
246 | } | 255 | } |
247 | 256 | ||
248 | static void pnfs_init_server(struct nfs_server *server) | 257 | static void pnfs_init_server(struct nfs_server *server) |
@@ -260,7 +269,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) | |||
260 | { | 269 | { |
261 | } | 270 | } |
262 | 271 | ||
263 | void nfs_cleanup_cb_ident_idr(void) | 272 | void nfs_cleanup_cb_ident_idr(struct net *net) |
264 | { | 273 | { |
265 | } | 274 | } |
266 | 275 | ||
@@ -292,10 +301,10 @@ static void nfs_free_client(struct nfs_client *clp) | |||
292 | if (clp->cl_machine_cred != NULL) | 301 | if (clp->cl_machine_cred != NULL) |
293 | put_rpccred(clp->cl_machine_cred); | 302 | put_rpccred(clp->cl_machine_cred); |
294 | 303 | ||
295 | nfs4_deviceid_purge_client(clp); | 304 | put_net(clp->net); |
296 | |||
297 | kfree(clp->cl_hostname); | 305 | kfree(clp->cl_hostname); |
298 | kfree(clp->server_scope); | 306 | kfree(clp->server_scope); |
307 | kfree(clp->impl_id); | ||
299 | kfree(clp); | 308 | kfree(clp); |
300 | 309 | ||
301 | dprintk("<-- nfs_free_client()\n"); | 310 | dprintk("<-- nfs_free_client()\n"); |
@@ -306,15 +315,18 @@ static void nfs_free_client(struct nfs_client *clp) | |||
306 | */ | 315 | */ |
307 | void nfs_put_client(struct nfs_client *clp) | 316 | void nfs_put_client(struct nfs_client *clp) |
308 | { | 317 | { |
318 | struct nfs_net *nn; | ||
319 | |||
309 | if (!clp) | 320 | if (!clp) |
310 | return; | 321 | return; |
311 | 322 | ||
312 | dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); | 323 | dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); |
324 | nn = net_generic(clp->net, nfs_net_id); | ||
313 | 325 | ||
314 | if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { | 326 | if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { |
315 | list_del(&clp->cl_share_link); | 327 | list_del(&clp->cl_share_link); |
316 | nfs_cb_idr_remove_locked(clp); | 328 | nfs_cb_idr_remove_locked(clp); |
317 | spin_unlock(&nfs_client_lock); | 329 | spin_unlock(&nn->nfs_client_lock); |
318 | 330 | ||
319 | BUG_ON(!list_empty(&clp->cl_superblocks)); | 331 | BUG_ON(!list_empty(&clp->cl_superblocks)); |
320 | 332 | ||
@@ -392,6 +404,7 @@ static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1, | |||
392 | (sin1->sin_port == sin2->sin_port); | 404 | (sin1->sin_port == sin2->sin_port); |
393 | } | 405 | } |
394 | 406 | ||
407 | #if defined(CONFIG_NFS_V4_1) | ||
395 | /* | 408 | /* |
396 | * Test if two socket addresses represent the same actual socket, | 409 | * Test if two socket addresses represent the same actual socket, |
397 | * by comparing (only) relevant fields, excluding the port number. | 410 | * by comparing (only) relevant fields, excluding the port number. |
@@ -410,6 +423,7 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, | |||
410 | } | 423 | } |
411 | return 0; | 424 | return 0; |
412 | } | 425 | } |
426 | #endif /* CONFIG_NFS_V4_1 */ | ||
413 | 427 | ||
414 | /* | 428 | /* |
415 | * Test if two socket addresses represent the same actual socket, | 429 | * Test if two socket addresses represent the same actual socket, |
@@ -430,10 +444,10 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1, | |||
430 | return 0; | 444 | return 0; |
431 | } | 445 | } |
432 | 446 | ||
447 | #if defined(CONFIG_NFS_V4_1) | ||
433 | /* Common match routine for v4.0 and v4.1 callback services */ | 448 | /* Common match routine for v4.0 and v4.1 callback services */ |
434 | bool | 449 | static bool nfs4_cb_match_client(const struct sockaddr *addr, |
435 | nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp, | 450 | struct nfs_client *clp, u32 minorversion) |
436 | u32 minorversion) | ||
437 | { | 451 | { |
438 | struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; | 452 | struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; |
439 | 453 | ||
@@ -453,6 +467,7 @@ nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp, | |||
453 | 467 | ||
454 | return true; | 468 | return true; |
455 | } | 469 | } |
470 | #endif /* CONFIG_NFS_V4_1 */ | ||
456 | 471 | ||
457 | /* | 472 | /* |
458 | * Find an nfs_client on the list that matches the initialisation data | 473 | * Find an nfs_client on the list that matches the initialisation data |
@@ -462,8 +477,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat | |||
462 | { | 477 | { |
463 | struct nfs_client *clp; | 478 | struct nfs_client *clp; |
464 | const struct sockaddr *sap = data->addr; | 479 | const struct sockaddr *sap = data->addr; |
480 | struct nfs_net *nn = net_generic(data->net, nfs_net_id); | ||
465 | 481 | ||
466 | list_for_each_entry(clp, &nfs_client_list, cl_share_link) { | 482 | list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { |
467 | const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; | 483 | const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; |
468 | /* Don't match clients that failed to initialise properly */ | 484 | /* Don't match clients that failed to initialise properly */ |
469 | if (clp->cl_cons_state < 0) | 485 | if (clp->cl_cons_state < 0) |
@@ -501,13 +517,14 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, | |||
501 | { | 517 | { |
502 | struct nfs_client *clp, *new = NULL; | 518 | struct nfs_client *clp, *new = NULL; |
503 | int error; | 519 | int error; |
520 | struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); | ||
504 | 521 | ||
505 | dprintk("--> nfs_get_client(%s,v%u)\n", | 522 | dprintk("--> nfs_get_client(%s,v%u)\n", |
506 | cl_init->hostname ?: "", cl_init->rpc_ops->version); | 523 | cl_init->hostname ?: "", cl_init->rpc_ops->version); |
507 | 524 | ||
508 | /* see if the client already exists */ | 525 | /* see if the client already exists */ |
509 | do { | 526 | do { |
510 | spin_lock(&nfs_client_lock); | 527 | spin_lock(&nn->nfs_client_lock); |
511 | 528 | ||
512 | clp = nfs_match_client(cl_init); | 529 | clp = nfs_match_client(cl_init); |
513 | if (clp) | 530 | if (clp) |
@@ -515,7 +532,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, | |||
515 | if (new) | 532 | if (new) |
516 | goto install_client; | 533 | goto install_client; |
517 | 534 | ||
518 | spin_unlock(&nfs_client_lock); | 535 | spin_unlock(&nn->nfs_client_lock); |
519 | 536 | ||
520 | new = nfs_alloc_client(cl_init); | 537 | new = nfs_alloc_client(cl_init); |
521 | } while (!IS_ERR(new)); | 538 | } while (!IS_ERR(new)); |
@@ -526,8 +543,8 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, | |||
526 | /* install a new client and return with it unready */ | 543 | /* install a new client and return with it unready */ |
527 | install_client: | 544 | install_client: |
528 | clp = new; | 545 | clp = new; |
529 | list_add(&clp->cl_share_link, &nfs_client_list); | 546 | list_add(&clp->cl_share_link, &nn->nfs_client_list); |
530 | spin_unlock(&nfs_client_lock); | 547 | spin_unlock(&nn->nfs_client_lock); |
531 | 548 | ||
532 | error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr, | 549 | error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr, |
533 | authflavour, noresvport); | 550 | authflavour, noresvport); |
@@ -542,7 +559,7 @@ install_client: | |||
542 | * - make sure it's ready before returning | 559 | * - make sure it's ready before returning |
543 | */ | 560 | */ |
544 | found_client: | 561 | found_client: |
545 | spin_unlock(&nfs_client_lock); | 562 | spin_unlock(&nn->nfs_client_lock); |
546 | 563 | ||
547 | if (new) | 564 | if (new) |
548 | nfs_free_client(new); | 565 | nfs_free_client(new); |
@@ -642,7 +659,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, | |||
642 | { | 659 | { |
643 | struct rpc_clnt *clnt = NULL; | 660 | struct rpc_clnt *clnt = NULL; |
644 | struct rpc_create_args args = { | 661 | struct rpc_create_args args = { |
645 | .net = &init_net, | 662 | .net = clp->net, |
646 | .protocol = clp->cl_proto, | 663 | .protocol = clp->cl_proto, |
647 | .address = (struct sockaddr *)&clp->cl_addr, | 664 | .address = (struct sockaddr *)&clp->cl_addr, |
648 | .addrsize = clp->cl_addrlen, | 665 | .addrsize = clp->cl_addrlen, |
@@ -696,6 +713,7 @@ static int nfs_start_lockd(struct nfs_server *server) | |||
696 | .nfs_version = clp->rpc_ops->version, | 713 | .nfs_version = clp->rpc_ops->version, |
697 | .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? | 714 | .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? |
698 | 1 : 0, | 715 | 1 : 0, |
716 | .net = clp->net, | ||
699 | }; | 717 | }; |
700 | 718 | ||
701 | if (nlm_init.nfs_version > 3) | 719 | if (nlm_init.nfs_version > 3) |
@@ -831,6 +849,7 @@ static int nfs_init_server(struct nfs_server *server, | |||
831 | .addrlen = data->nfs_server.addrlen, | 849 | .addrlen = data->nfs_server.addrlen, |
832 | .rpc_ops = &nfs_v2_clientops, | 850 | .rpc_ops = &nfs_v2_clientops, |
833 | .proto = data->nfs_server.protocol, | 851 | .proto = data->nfs_server.protocol, |
852 | .net = data->net, | ||
834 | }; | 853 | }; |
835 | struct rpc_timeout timeparms; | 854 | struct rpc_timeout timeparms; |
836 | struct nfs_client *clp; | 855 | struct nfs_client *clp; |
@@ -1029,25 +1048,30 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve | |||
1029 | static void nfs_server_insert_lists(struct nfs_server *server) | 1048 | static void nfs_server_insert_lists(struct nfs_server *server) |
1030 | { | 1049 | { |
1031 | struct nfs_client *clp = server->nfs_client; | 1050 | struct nfs_client *clp = server->nfs_client; |
1051 | struct nfs_net *nn = net_generic(clp->net, nfs_net_id); | ||
1032 | 1052 | ||
1033 | spin_lock(&nfs_client_lock); | 1053 | spin_lock(&nn->nfs_client_lock); |
1034 | list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); | 1054 | list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); |
1035 | list_add_tail(&server->master_link, &nfs_volume_list); | 1055 | list_add_tail(&server->master_link, &nn->nfs_volume_list); |
1036 | clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); | 1056 | clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); |
1037 | spin_unlock(&nfs_client_lock); | 1057 | spin_unlock(&nn->nfs_client_lock); |
1038 | 1058 | ||
1039 | } | 1059 | } |
1040 | 1060 | ||
1041 | static void nfs_server_remove_lists(struct nfs_server *server) | 1061 | static void nfs_server_remove_lists(struct nfs_server *server) |
1042 | { | 1062 | { |
1043 | struct nfs_client *clp = server->nfs_client; | 1063 | struct nfs_client *clp = server->nfs_client; |
1064 | struct nfs_net *nn; | ||
1044 | 1065 | ||
1045 | spin_lock(&nfs_client_lock); | 1066 | if (clp == NULL) |
1067 | return; | ||
1068 | nn = net_generic(clp->net, nfs_net_id); | ||
1069 | spin_lock(&nn->nfs_client_lock); | ||
1046 | list_del_rcu(&server->client_link); | 1070 | list_del_rcu(&server->client_link); |
1047 | if (clp && list_empty(&clp->cl_superblocks)) | 1071 | if (list_empty(&clp->cl_superblocks)) |
1048 | set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); | 1072 | set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); |
1049 | list_del(&server->master_link); | 1073 | list_del(&server->master_link); |
1050 | spin_unlock(&nfs_client_lock); | 1074 | spin_unlock(&nn->nfs_client_lock); |
1051 | 1075 | ||
1052 | synchronize_rcu(); | 1076 | synchronize_rcu(); |
1053 | } | 1077 | } |
@@ -1086,6 +1110,8 @@ static struct nfs_server *nfs_alloc_server(void) | |||
1086 | return NULL; | 1110 | return NULL; |
1087 | } | 1111 | } |
1088 | 1112 | ||
1113 | ida_init(&server->openowner_id); | ||
1114 | ida_init(&server->lockowner_id); | ||
1089 | pnfs_init_server(server); | 1115 | pnfs_init_server(server); |
1090 | 1116 | ||
1091 | return server; | 1117 | return server; |
@@ -1111,6 +1137,8 @@ void nfs_free_server(struct nfs_server *server) | |||
1111 | 1137 | ||
1112 | nfs_put_client(server->nfs_client); | 1138 | nfs_put_client(server->nfs_client); |
1113 | 1139 | ||
1140 | ida_destroy(&server->lockowner_id); | ||
1141 | ida_destroy(&server->openowner_id); | ||
1114 | nfs_free_iostats(server->io_stats); | 1142 | nfs_free_iostats(server->io_stats); |
1115 | bdi_destroy(&server->backing_dev_info); | 1143 | bdi_destroy(&server->backing_dev_info); |
1116 | kfree(server); | 1144 | kfree(server); |
@@ -1189,45 +1217,19 @@ error: | |||
1189 | /* | 1217 | /* |
1190 | * NFSv4.0 callback thread helper | 1218 | * NFSv4.0 callback thread helper |
1191 | * | 1219 | * |
1192 | * Find a client by IP address, protocol version, and minorversion | ||
1193 | * | ||
1194 | * Called from the pg_authenticate method. The callback identifier | ||
1195 | * is not used as it has not been decoded. | ||
1196 | * | ||
1197 | * Returns NULL if no such client | ||
1198 | */ | ||
1199 | struct nfs_client * | ||
1200 | nfs4_find_client_no_ident(const struct sockaddr *addr) | ||
1201 | { | ||
1202 | struct nfs_client *clp; | ||
1203 | |||
1204 | spin_lock(&nfs_client_lock); | ||
1205 | list_for_each_entry(clp, &nfs_client_list, cl_share_link) { | ||
1206 | if (nfs4_cb_match_client(addr, clp, 0) == false) | ||
1207 | continue; | ||
1208 | atomic_inc(&clp->cl_count); | ||
1209 | spin_unlock(&nfs_client_lock); | ||
1210 | return clp; | ||
1211 | } | ||
1212 | spin_unlock(&nfs_client_lock); | ||
1213 | return NULL; | ||
1214 | } | ||
1215 | |||
1216 | /* | ||
1217 | * NFSv4.0 callback thread helper | ||
1218 | * | ||
1219 | * Find a client by callback identifier | 1220 | * Find a client by callback identifier |
1220 | */ | 1221 | */ |
1221 | struct nfs_client * | 1222 | struct nfs_client * |
1222 | nfs4_find_client_ident(int cb_ident) | 1223 | nfs4_find_client_ident(struct net *net, int cb_ident) |
1223 | { | 1224 | { |
1224 | struct nfs_client *clp; | 1225 | struct nfs_client *clp; |
1226 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1225 | 1227 | ||
1226 | spin_lock(&nfs_client_lock); | 1228 | spin_lock(&nn->nfs_client_lock); |
1227 | clp = idr_find(&cb_ident_idr, cb_ident); | 1229 | clp = idr_find(&nn->cb_ident_idr, cb_ident); |
1228 | if (clp) | 1230 | if (clp) |
1229 | atomic_inc(&clp->cl_count); | 1231 | atomic_inc(&clp->cl_count); |
1230 | spin_unlock(&nfs_client_lock); | 1232 | spin_unlock(&nn->nfs_client_lock); |
1231 | return clp; | 1233 | return clp; |
1232 | } | 1234 | } |
1233 | 1235 | ||
@@ -1240,13 +1242,14 @@ nfs4_find_client_ident(int cb_ident) | |||
1240 | * Returns NULL if no such client | 1242 | * Returns NULL if no such client |
1241 | */ | 1243 | */ |
1242 | struct nfs_client * | 1244 | struct nfs_client * |
1243 | nfs4_find_client_sessionid(const struct sockaddr *addr, | 1245 | nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, |
1244 | struct nfs4_sessionid *sid) | 1246 | struct nfs4_sessionid *sid) |
1245 | { | 1247 | { |
1246 | struct nfs_client *clp; | 1248 | struct nfs_client *clp; |
1249 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1247 | 1250 | ||
1248 | spin_lock(&nfs_client_lock); | 1251 | spin_lock(&nn->nfs_client_lock); |
1249 | list_for_each_entry(clp, &nfs_client_list, cl_share_link) { | 1252 | list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { |
1250 | if (nfs4_cb_match_client(addr, clp, 1) == false) | 1253 | if (nfs4_cb_match_client(addr, clp, 1) == false) |
1251 | continue; | 1254 | continue; |
1252 | 1255 | ||
@@ -1259,17 +1262,17 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, | |||
1259 | continue; | 1262 | continue; |
1260 | 1263 | ||
1261 | atomic_inc(&clp->cl_count); | 1264 | atomic_inc(&clp->cl_count); |
1262 | spin_unlock(&nfs_client_lock); | 1265 | spin_unlock(&nn->nfs_client_lock); |
1263 | return clp; | 1266 | return clp; |
1264 | } | 1267 | } |
1265 | spin_unlock(&nfs_client_lock); | 1268 | spin_unlock(&nn->nfs_client_lock); |
1266 | return NULL; | 1269 | return NULL; |
1267 | } | 1270 | } |
1268 | 1271 | ||
1269 | #else /* CONFIG_NFS_V4_1 */ | 1272 | #else /* CONFIG_NFS_V4_1 */ |
1270 | 1273 | ||
1271 | struct nfs_client * | 1274 | struct nfs_client * |
1272 | nfs4_find_client_sessionid(const struct sockaddr *addr, | 1275 | nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, |
1273 | struct nfs4_sessionid *sid) | 1276 | struct nfs4_sessionid *sid) |
1274 | { | 1277 | { |
1275 | return NULL; | 1278 | return NULL; |
@@ -1284,16 +1287,18 @@ static int nfs4_init_callback(struct nfs_client *clp) | |||
1284 | int error; | 1287 | int error; |
1285 | 1288 | ||
1286 | if (clp->rpc_ops->version == 4) { | 1289 | if (clp->rpc_ops->version == 4) { |
1290 | struct rpc_xprt *xprt; | ||
1291 | |||
1292 | xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); | ||
1293 | |||
1287 | if (nfs4_has_session(clp)) { | 1294 | if (nfs4_has_session(clp)) { |
1288 | error = xprt_setup_backchannel( | 1295 | error = xprt_setup_backchannel(xprt, |
1289 | clp->cl_rpcclient->cl_xprt, | ||
1290 | NFS41_BC_MIN_CALLBACKS); | 1296 | NFS41_BC_MIN_CALLBACKS); |
1291 | if (error < 0) | 1297 | if (error < 0) |
1292 | return error; | 1298 | return error; |
1293 | } | 1299 | } |
1294 | 1300 | ||
1295 | error = nfs_callback_up(clp->cl_mvops->minor_version, | 1301 | error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); |
1296 | clp->cl_rpcclient->cl_xprt); | ||
1297 | if (error < 0) { | 1302 | if (error < 0) { |
1298 | dprintk("%s: failed to start callback. Error = %d\n", | 1303 | dprintk("%s: failed to start callback. Error = %d\n", |
1299 | __func__, error); | 1304 | __func__, error); |
@@ -1344,6 +1349,7 @@ int nfs4_init_client(struct nfs_client *clp, | |||
1344 | rpc_authflavor_t authflavour, | 1349 | rpc_authflavor_t authflavour, |
1345 | int noresvport) | 1350 | int noresvport) |
1346 | { | 1351 | { |
1352 | char buf[INET6_ADDRSTRLEN + 1]; | ||
1347 | int error; | 1353 | int error; |
1348 | 1354 | ||
1349 | if (clp->cl_cons_state == NFS_CS_READY) { | 1355 | if (clp->cl_cons_state == NFS_CS_READY) { |
@@ -1359,6 +1365,20 @@ int nfs4_init_client(struct nfs_client *clp, | |||
1359 | 1, noresvport); | 1365 | 1, noresvport); |
1360 | if (error < 0) | 1366 | if (error < 0) |
1361 | goto error; | 1367 | goto error; |
1368 | |||
1369 | /* If no clientaddr= option was specified, find a usable cb address */ | ||
1370 | if (ip_addr == NULL) { | ||
1371 | struct sockaddr_storage cb_addr; | ||
1372 | struct sockaddr *sap = (struct sockaddr *)&cb_addr; | ||
1373 | |||
1374 | error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); | ||
1375 | if (error < 0) | ||
1376 | goto error; | ||
1377 | error = rpc_ntop(sap, buf, sizeof(buf)); | ||
1378 | if (error < 0) | ||
1379 | goto error; | ||
1380 | ip_addr = (const char *)buf; | ||
1381 | } | ||
1362 | strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); | 1382 | strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); |
1363 | 1383 | ||
1364 | error = nfs_idmap_new(clp); | 1384 | error = nfs_idmap_new(clp); |
@@ -1393,7 +1413,7 @@ static int nfs4_set_client(struct nfs_server *server, | |||
1393 | const char *ip_addr, | 1413 | const char *ip_addr, |
1394 | rpc_authflavor_t authflavour, | 1414 | rpc_authflavor_t authflavour, |
1395 | int proto, const struct rpc_timeout *timeparms, | 1415 | int proto, const struct rpc_timeout *timeparms, |
1396 | u32 minorversion) | 1416 | u32 minorversion, struct net *net) |
1397 | { | 1417 | { |
1398 | struct nfs_client_initdata cl_init = { | 1418 | struct nfs_client_initdata cl_init = { |
1399 | .hostname = hostname, | 1419 | .hostname = hostname, |
@@ -1402,6 +1422,7 @@ static int nfs4_set_client(struct nfs_server *server, | |||
1402 | .rpc_ops = &nfs_v4_clientops, | 1422 | .rpc_ops = &nfs_v4_clientops, |
1403 | .proto = proto, | 1423 | .proto = proto, |
1404 | .minorversion = minorversion, | 1424 | .minorversion = minorversion, |
1425 | .net = net, | ||
1405 | }; | 1426 | }; |
1406 | struct nfs_client *clp; | 1427 | struct nfs_client *clp; |
1407 | int error; | 1428 | int error; |
@@ -1453,6 +1474,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, | |||
1453 | .rpc_ops = &nfs_v4_clientops, | 1474 | .rpc_ops = &nfs_v4_clientops, |
1454 | .proto = ds_proto, | 1475 | .proto = ds_proto, |
1455 | .minorversion = mds_clp->cl_minorversion, | 1476 | .minorversion = mds_clp->cl_minorversion, |
1477 | .net = mds_clp->net, | ||
1456 | }; | 1478 | }; |
1457 | struct rpc_timeout ds_timeout = { | 1479 | struct rpc_timeout ds_timeout = { |
1458 | .to_initval = 15 * HZ, | 1480 | .to_initval = 15 * HZ, |
@@ -1580,7 +1602,8 @@ static int nfs4_init_server(struct nfs_server *server, | |||
1580 | data->auth_flavors[0], | 1602 | data->auth_flavors[0], |
1581 | data->nfs_server.protocol, | 1603 | data->nfs_server.protocol, |
1582 | &timeparms, | 1604 | &timeparms, |
1583 | data->minorversion); | 1605 | data->minorversion, |
1606 | data->net); | ||
1584 | if (error < 0) | 1607 | if (error < 0) |
1585 | goto error; | 1608 | goto error; |
1586 | 1609 | ||
@@ -1675,9 +1698,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, | |||
1675 | data->addrlen, | 1698 | data->addrlen, |
1676 | parent_client->cl_ipaddr, | 1699 | parent_client->cl_ipaddr, |
1677 | data->authflavor, | 1700 | data->authflavor, |
1678 | parent_server->client->cl_xprt->prot, | 1701 | rpc_protocol(parent_server->client), |
1679 | parent_server->client->cl_timeout, | 1702 | parent_server->client->cl_timeout, |
1680 | parent_client->cl_mvops->minor_version); | 1703 | parent_client->cl_mvops->minor_version, |
1704 | parent_client->net); | ||
1681 | if (error < 0) | 1705 | if (error < 0) |
1682 | goto error; | 1706 | goto error; |
1683 | 1707 | ||
@@ -1770,6 +1794,18 @@ out_free_server: | |||
1770 | return ERR_PTR(error); | 1794 | return ERR_PTR(error); |
1771 | } | 1795 | } |
1772 | 1796 | ||
1797 | void nfs_clients_init(struct net *net) | ||
1798 | { | ||
1799 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1800 | |||
1801 | INIT_LIST_HEAD(&nn->nfs_client_list); | ||
1802 | INIT_LIST_HEAD(&nn->nfs_volume_list); | ||
1803 | #ifdef CONFIG_NFS_V4 | ||
1804 | idr_init(&nn->cb_ident_idr); | ||
1805 | #endif | ||
1806 | spin_lock_init(&nn->nfs_client_lock); | ||
1807 | } | ||
1808 | |||
1773 | #ifdef CONFIG_PROC_FS | 1809 | #ifdef CONFIG_PROC_FS |
1774 | static struct proc_dir_entry *proc_fs_nfs; | 1810 | static struct proc_dir_entry *proc_fs_nfs; |
1775 | 1811 | ||
@@ -1823,13 +1859,15 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) | |||
1823 | { | 1859 | { |
1824 | struct seq_file *m; | 1860 | struct seq_file *m; |
1825 | int ret; | 1861 | int ret; |
1862 | struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info; | ||
1863 | struct net *net = pid_ns->child_reaper->nsproxy->net_ns; | ||
1826 | 1864 | ||
1827 | ret = seq_open(file, &nfs_server_list_ops); | 1865 | ret = seq_open(file, &nfs_server_list_ops); |
1828 | if (ret < 0) | 1866 | if (ret < 0) |
1829 | return ret; | 1867 | return ret; |
1830 | 1868 | ||
1831 | m = file->private_data; | 1869 | m = file->private_data; |
1832 | m->private = PDE(inode)->data; | 1870 | m->private = net; |
1833 | 1871 | ||
1834 | return 0; | 1872 | return 0; |
1835 | } | 1873 | } |
@@ -1839,9 +1877,11 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) | |||
1839 | */ | 1877 | */ |
1840 | static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) | 1878 | static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) |
1841 | { | 1879 | { |
1880 | struct nfs_net *nn = net_generic(m->private, nfs_net_id); | ||
1881 | |||
1842 | /* lock the list against modification */ | 1882 | /* lock the list against modification */ |
1843 | spin_lock(&nfs_client_lock); | 1883 | spin_lock(&nn->nfs_client_lock); |
1844 | return seq_list_start_head(&nfs_client_list, *_pos); | 1884 | return seq_list_start_head(&nn->nfs_client_list, *_pos); |
1845 | } | 1885 | } |
1846 | 1886 | ||
1847 | /* | 1887 | /* |
@@ -1849,7 +1889,9 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) | |||
1849 | */ | 1889 | */ |
1850 | static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) | 1890 | static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) |
1851 | { | 1891 | { |
1852 | return seq_list_next(v, &nfs_client_list, pos); | 1892 | struct nfs_net *nn = net_generic(p->private, nfs_net_id); |
1893 | |||
1894 | return seq_list_next(v, &nn->nfs_client_list, pos); | ||
1853 | } | 1895 | } |
1854 | 1896 | ||
1855 | /* | 1897 | /* |
@@ -1857,7 +1899,9 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) | |||
1857 | */ | 1899 | */ |
1858 | static void nfs_server_list_stop(struct seq_file *p, void *v) | 1900 | static void nfs_server_list_stop(struct seq_file *p, void *v) |
1859 | { | 1901 | { |
1860 | spin_unlock(&nfs_client_lock); | 1902 | struct nfs_net *nn = net_generic(p->private, nfs_net_id); |
1903 | |||
1904 | spin_unlock(&nn->nfs_client_lock); | ||
1861 | } | 1905 | } |
1862 | 1906 | ||
1863 | /* | 1907 | /* |
@@ -1866,9 +1910,10 @@ static void nfs_server_list_stop(struct seq_file *p, void *v) | |||
1866 | static int nfs_server_list_show(struct seq_file *m, void *v) | 1910 | static int nfs_server_list_show(struct seq_file *m, void *v) |
1867 | { | 1911 | { |
1868 | struct nfs_client *clp; | 1912 | struct nfs_client *clp; |
1913 | struct nfs_net *nn = net_generic(m->private, nfs_net_id); | ||
1869 | 1914 | ||
1870 | /* display header on line 1 */ | 1915 | /* display header on line 1 */ |
1871 | if (v == &nfs_client_list) { | 1916 | if (v == &nn->nfs_client_list) { |
1872 | seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); | 1917 | seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); |
1873 | return 0; | 1918 | return 0; |
1874 | } | 1919 | } |
@@ -1880,12 +1925,14 @@ static int nfs_server_list_show(struct seq_file *m, void *v) | |||
1880 | if (clp->cl_cons_state != NFS_CS_READY) | 1925 | if (clp->cl_cons_state != NFS_CS_READY) |
1881 | return 0; | 1926 | return 0; |
1882 | 1927 | ||
1928 | rcu_read_lock(); | ||
1883 | seq_printf(m, "v%u %s %s %3d %s\n", | 1929 | seq_printf(m, "v%u %s %s %3d %s\n", |
1884 | clp->rpc_ops->version, | 1930 | clp->rpc_ops->version, |
1885 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), | 1931 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), |
1886 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), | 1932 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), |
1887 | atomic_read(&clp->cl_count), | 1933 | atomic_read(&clp->cl_count), |
1888 | clp->cl_hostname); | 1934 | clp->cl_hostname); |
1935 | rcu_read_unlock(); | ||
1889 | 1936 | ||
1890 | return 0; | 1937 | return 0; |
1891 | } | 1938 | } |
@@ -1897,13 +1944,15 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) | |||
1897 | { | 1944 | { |
1898 | struct seq_file *m; | 1945 | struct seq_file *m; |
1899 | int ret; | 1946 | int ret; |
1947 | struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info; | ||
1948 | struct net *net = pid_ns->child_reaper->nsproxy->net_ns; | ||
1900 | 1949 | ||
1901 | ret = seq_open(file, &nfs_volume_list_ops); | 1950 | ret = seq_open(file, &nfs_volume_list_ops); |
1902 | if (ret < 0) | 1951 | if (ret < 0) |
1903 | return ret; | 1952 | return ret; |
1904 | 1953 | ||
1905 | m = file->private_data; | 1954 | m = file->private_data; |
1906 | m->private = PDE(inode)->data; | 1955 | m->private = net; |
1907 | 1956 | ||
1908 | return 0; | 1957 | return 0; |
1909 | } | 1958 | } |
@@ -1913,9 +1962,11 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) | |||
1913 | */ | 1962 | */ |
1914 | static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) | 1963 | static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) |
1915 | { | 1964 | { |
1965 | struct nfs_net *nn = net_generic(m->private, nfs_net_id); | ||
1966 | |||
1916 | /* lock the list against modification */ | 1967 | /* lock the list against modification */ |
1917 | spin_lock(&nfs_client_lock); | 1968 | spin_lock(&nn->nfs_client_lock); |
1918 | return seq_list_start_head(&nfs_volume_list, *_pos); | 1969 | return seq_list_start_head(&nn->nfs_volume_list, *_pos); |
1919 | } | 1970 | } |
1920 | 1971 | ||
1921 | /* | 1972 | /* |
@@ -1923,7 +1974,9 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) | |||
1923 | */ | 1974 | */ |
1924 | static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) | 1975 | static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) |
1925 | { | 1976 | { |
1926 | return seq_list_next(v, &nfs_volume_list, pos); | 1977 | struct nfs_net *nn = net_generic(p->private, nfs_net_id); |
1978 | |||
1979 | return seq_list_next(v, &nn->nfs_volume_list, pos); | ||
1927 | } | 1980 | } |
1928 | 1981 | ||
1929 | /* | 1982 | /* |
@@ -1931,7 +1984,9 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) | |||
1931 | */ | 1984 | */ |
1932 | static void nfs_volume_list_stop(struct seq_file *p, void *v) | 1985 | static void nfs_volume_list_stop(struct seq_file *p, void *v) |
1933 | { | 1986 | { |
1934 | spin_unlock(&nfs_client_lock); | 1987 | struct nfs_net *nn = net_generic(p->private, nfs_net_id); |
1988 | |||
1989 | spin_unlock(&nn->nfs_client_lock); | ||
1935 | } | 1990 | } |
1936 | 1991 | ||
1937 | /* | 1992 | /* |
@@ -1942,9 +1997,10 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) | |||
1942 | struct nfs_server *server; | 1997 | struct nfs_server *server; |
1943 | struct nfs_client *clp; | 1998 | struct nfs_client *clp; |
1944 | char dev[8], fsid[17]; | 1999 | char dev[8], fsid[17]; |
2000 | struct nfs_net *nn = net_generic(m->private, nfs_net_id); | ||
1945 | 2001 | ||
1946 | /* display header on line 1 */ | 2002 | /* display header on line 1 */ |
1947 | if (v == &nfs_volume_list) { | 2003 | if (v == &nn->nfs_volume_list) { |
1948 | seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); | 2004 | seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); |
1949 | return 0; | 2005 | return 0; |
1950 | } | 2006 | } |
@@ -1959,6 +2015,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) | |||
1959 | (unsigned long long) server->fsid.major, | 2015 | (unsigned long long) server->fsid.major, |
1960 | (unsigned long long) server->fsid.minor); | 2016 | (unsigned long long) server->fsid.minor); |
1961 | 2017 | ||
2018 | rcu_read_lock(); | ||
1962 | seq_printf(m, "v%u %s %s %-7s %-17s %s\n", | 2019 | seq_printf(m, "v%u %s %s %-7s %-17s %s\n", |
1963 | clp->rpc_ops->version, | 2020 | clp->rpc_ops->version, |
1964 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), | 2021 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), |
@@ -1966,6 +2023,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) | |||
1966 | dev, | 2023 | dev, |
1967 | fsid, | 2024 | fsid, |
1968 | nfs_server_fscache_state(server)); | 2025 | nfs_server_fscache_state(server)); |
2026 | rcu_read_unlock(); | ||
1969 | 2027 | ||
1970 | return 0; | 2028 | return 0; |
1971 | } | 2029 | } |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 7f2654069806..89af1d269274 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -105,7 +105,7 @@ again: | |||
105 | continue; | 105 | continue; |
106 | if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) | 106 | if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) |
107 | continue; | 107 | continue; |
108 | if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0) | 108 | if (!nfs4_stateid_match(&state->stateid, stateid)) |
109 | continue; | 109 | continue; |
110 | get_nfs_open_context(ctx); | 110 | get_nfs_open_context(ctx); |
111 | spin_unlock(&inode->i_lock); | 111 | spin_unlock(&inode->i_lock); |
@@ -139,8 +139,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, | |||
139 | if (delegation != NULL) { | 139 | if (delegation != NULL) { |
140 | spin_lock(&delegation->lock); | 140 | spin_lock(&delegation->lock); |
141 | if (delegation->inode != NULL) { | 141 | if (delegation->inode != NULL) { |
142 | memcpy(delegation->stateid.data, res->delegation.data, | 142 | nfs4_stateid_copy(&delegation->stateid, &res->delegation); |
143 | sizeof(delegation->stateid.data)); | ||
144 | delegation->type = res->delegation_type; | 143 | delegation->type = res->delegation_type; |
145 | delegation->maxsize = res->maxsize; | 144 | delegation->maxsize = res->maxsize; |
146 | oldcred = delegation->cred; | 145 | oldcred = delegation->cred; |
@@ -236,8 +235,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct | |||
236 | delegation = kmalloc(sizeof(*delegation), GFP_NOFS); | 235 | delegation = kmalloc(sizeof(*delegation), GFP_NOFS); |
237 | if (delegation == NULL) | 236 | if (delegation == NULL) |
238 | return -ENOMEM; | 237 | return -ENOMEM; |
239 | memcpy(delegation->stateid.data, res->delegation.data, | 238 | nfs4_stateid_copy(&delegation->stateid, &res->delegation); |
240 | sizeof(delegation->stateid.data)); | ||
241 | delegation->type = res->delegation_type; | 239 | delegation->type = res->delegation_type; |
242 | delegation->maxsize = res->maxsize; | 240 | delegation->maxsize = res->maxsize; |
243 | delegation->change_attr = inode->i_version; | 241 | delegation->change_attr = inode->i_version; |
@@ -250,19 +248,22 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct | |||
250 | old_delegation = rcu_dereference_protected(nfsi->delegation, | 248 | old_delegation = rcu_dereference_protected(nfsi->delegation, |
251 | lockdep_is_held(&clp->cl_lock)); | 249 | lockdep_is_held(&clp->cl_lock)); |
252 | if (old_delegation != NULL) { | 250 | if (old_delegation != NULL) { |
253 | if (memcmp(&delegation->stateid, &old_delegation->stateid, | 251 | if (nfs4_stateid_match(&delegation->stateid, |
254 | sizeof(old_delegation->stateid)) == 0 && | 252 | &old_delegation->stateid) && |
255 | delegation->type == old_delegation->type) { | 253 | delegation->type == old_delegation->type) { |
256 | goto out; | 254 | goto out; |
257 | } | 255 | } |
258 | /* | 256 | /* |
259 | * Deal with broken servers that hand out two | 257 | * Deal with broken servers that hand out two |
260 | * delegations for the same file. | 258 | * delegations for the same file. |
259 | * Allow for upgrades to a WRITE delegation, but | ||
260 | * nothing else. | ||
261 | */ | 261 | */ |
262 | dfprintk(FILE, "%s: server %s handed out " | 262 | dfprintk(FILE, "%s: server %s handed out " |
263 | "a duplicate delegation!\n", | 263 | "a duplicate delegation!\n", |
264 | __func__, clp->cl_hostname); | 264 | __func__, clp->cl_hostname); |
265 | if (delegation->type <= old_delegation->type) { | 265 | if (delegation->type == old_delegation->type || |
266 | !(delegation->type & FMODE_WRITE)) { | ||
266 | freeme = delegation; | 267 | freeme = delegation; |
267 | delegation = NULL; | 268 | delegation = NULL; |
268 | goto out; | 269 | goto out; |
@@ -455,17 +456,24 @@ static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, | |||
455 | rcu_read_unlock(); | 456 | rcu_read_unlock(); |
456 | } | 457 | } |
457 | 458 | ||
458 | static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) | ||
459 | { | ||
460 | nfs_client_mark_return_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); | ||
461 | } | ||
462 | |||
463 | static void nfs_delegation_run_state_manager(struct nfs_client *clp) | 459 | static void nfs_delegation_run_state_manager(struct nfs_client *clp) |
464 | { | 460 | { |
465 | if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) | 461 | if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) |
466 | nfs4_schedule_state_manager(clp); | 462 | nfs4_schedule_state_manager(clp); |
467 | } | 463 | } |
468 | 464 | ||
465 | void nfs_remove_bad_delegation(struct inode *inode) | ||
466 | { | ||
467 | struct nfs_delegation *delegation; | ||
468 | |||
469 | delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode)); | ||
470 | if (delegation) { | ||
471 | nfs_inode_find_state_and_recover(inode, &delegation->stateid); | ||
472 | nfs_free_delegation(delegation); | ||
473 | } | ||
474 | } | ||
475 | EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); | ||
476 | |||
469 | /** | 477 | /** |
470 | * nfs_expire_all_delegation_types | 478 | * nfs_expire_all_delegation_types |
471 | * @clp: client to process | 479 | * @clp: client to process |
@@ -488,18 +496,6 @@ void nfs_expire_all_delegations(struct nfs_client *clp) | |||
488 | nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); | 496 | nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); |
489 | } | 497 | } |
490 | 498 | ||
491 | /** | ||
492 | * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN | ||
493 | * @clp: client to process | ||
494 | * | ||
495 | */ | ||
496 | void nfs_handle_cb_pathdown(struct nfs_client *clp) | ||
497 | { | ||
498 | if (clp == NULL) | ||
499 | return; | ||
500 | nfs_client_mark_return_all_delegations(clp); | ||
501 | } | ||
502 | |||
503 | static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) | 499 | static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) |
504 | { | 500 | { |
505 | struct nfs_delegation *delegation; | 501 | struct nfs_delegation *delegation; |
@@ -531,7 +527,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp) | |||
531 | /** | 527 | /** |
532 | * nfs_async_inode_return_delegation - asynchronously return a delegation | 528 | * nfs_async_inode_return_delegation - asynchronously return a delegation |
533 | * @inode: inode to process | 529 | * @inode: inode to process |
534 | * @stateid: state ID information from CB_RECALL arguments | 530 | * @stateid: state ID information |
535 | * | 531 | * |
536 | * Returns zero on success, or a negative errno value. | 532 | * Returns zero on success, or a negative errno value. |
537 | */ | 533 | */ |
@@ -545,7 +541,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, | |||
545 | rcu_read_lock(); | 541 | rcu_read_lock(); |
546 | delegation = rcu_dereference(NFS_I(inode)->delegation); | 542 | delegation = rcu_dereference(NFS_I(inode)->delegation); |
547 | 543 | ||
548 | if (!clp->cl_mvops->validate_stateid(delegation, stateid)) { | 544 | if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) { |
549 | rcu_read_unlock(); | 545 | rcu_read_unlock(); |
550 | return -ENOENT; | 546 | return -ENOENT; |
551 | } | 547 | } |
@@ -684,21 +680,25 @@ int nfs_delegations_present(struct nfs_client *clp) | |||
684 | * nfs4_copy_delegation_stateid - Copy inode's state ID information | 680 | * nfs4_copy_delegation_stateid - Copy inode's state ID information |
685 | * @dst: stateid data structure to fill in | 681 | * @dst: stateid data structure to fill in |
686 | * @inode: inode to check | 682 | * @inode: inode to check |
683 | * @flags: delegation type requirement | ||
687 | * | 684 | * |
688 | * Returns one and fills in "dst->data" * if inode had a delegation, | 685 | * Returns "true" and fills in "dst->data" * if inode had a delegation, |
689 | * otherwise zero is returned. | 686 | * otherwise "false" is returned. |
690 | */ | 687 | */ |
691 | int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) | 688 | bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, |
689 | fmode_t flags) | ||
692 | { | 690 | { |
693 | struct nfs_inode *nfsi = NFS_I(inode); | 691 | struct nfs_inode *nfsi = NFS_I(inode); |
694 | struct nfs_delegation *delegation; | 692 | struct nfs_delegation *delegation; |
695 | int ret = 0; | 693 | bool ret; |
696 | 694 | ||
695 | flags &= FMODE_READ|FMODE_WRITE; | ||
697 | rcu_read_lock(); | 696 | rcu_read_lock(); |
698 | delegation = rcu_dereference(nfsi->delegation); | 697 | delegation = rcu_dereference(nfsi->delegation); |
699 | if (delegation != NULL) { | 698 | ret = (delegation != NULL && (delegation->type & flags) == flags); |
700 | memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); | 699 | if (ret) { |
701 | ret = 1; | 700 | nfs4_stateid_copy(dst, &delegation->stateid); |
701 | nfs_mark_delegation_referenced(delegation); | ||
702 | } | 702 | } |
703 | rcu_read_unlock(); | 703 | rcu_read_unlock(); |
704 | return ret; | 704 | return ret; |
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index d9322e490c56..cd6a7a8dadae 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h | |||
@@ -42,9 +42,9 @@ void nfs_super_return_all_delegations(struct super_block *sb); | |||
42 | void nfs_expire_all_delegations(struct nfs_client *clp); | 42 | void nfs_expire_all_delegations(struct nfs_client *clp); |
43 | void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); | 43 | void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); |
44 | void nfs_expire_unreferenced_delegations(struct nfs_client *clp); | 44 | void nfs_expire_unreferenced_delegations(struct nfs_client *clp); |
45 | void nfs_handle_cb_pathdown(struct nfs_client *clp); | ||
46 | int nfs_client_return_marked_delegations(struct nfs_client *clp); | 45 | int nfs_client_return_marked_delegations(struct nfs_client *clp); |
47 | int nfs_delegations_present(struct nfs_client *clp); | 46 | int nfs_delegations_present(struct nfs_client *clp); |
47 | void nfs_remove_bad_delegation(struct inode *inode); | ||
48 | 48 | ||
49 | void nfs_delegation_mark_reclaim(struct nfs_client *clp); | 49 | void nfs_delegation_mark_reclaim(struct nfs_client *clp); |
50 | void nfs_delegation_reap_unclaimed(struct nfs_client *clp); | 50 | void nfs_delegation_reap_unclaimed(struct nfs_client *clp); |
@@ -53,7 +53,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); | |||
53 | int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); | 53 | int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); |
54 | int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); | 54 | int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); |
55 | int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); | 55 | int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); |
56 | int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); | 56 | bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); |
57 | 57 | ||
58 | void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); | 58 | void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); |
59 | int nfs_have_delegation(struct inode *inode, fmode_t flags); | 59 | int nfs_have_delegation(struct inode *inode, fmode_t flags); |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 32aa6917265a..4aaf0316d76a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -207,7 +207,7 @@ struct nfs_cache_array_entry { | |||
207 | }; | 207 | }; |
208 | 208 | ||
209 | struct nfs_cache_array { | 209 | struct nfs_cache_array { |
210 | unsigned int size; | 210 | int size; |
211 | int eof_index; | 211 | int eof_index; |
212 | u64 last_cookie; | 212 | u64 last_cookie; |
213 | struct nfs_cache_array_entry array[0]; | 213 | struct nfs_cache_array_entry array[0]; |
@@ -1429,6 +1429,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
1429 | } | 1429 | } |
1430 | 1430 | ||
1431 | open_flags = nd->intent.open.flags; | 1431 | open_flags = nd->intent.open.flags; |
1432 | attr.ia_valid = 0; | ||
1432 | 1433 | ||
1433 | ctx = create_nfs_open_context(dentry, open_flags); | 1434 | ctx = create_nfs_open_context(dentry, open_flags); |
1434 | res = ERR_CAST(ctx); | 1435 | res = ERR_CAST(ctx); |
@@ -1437,11 +1438,14 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
1437 | 1438 | ||
1438 | if (nd->flags & LOOKUP_CREATE) { | 1439 | if (nd->flags & LOOKUP_CREATE) { |
1439 | attr.ia_mode = nd->intent.open.create_mode; | 1440 | attr.ia_mode = nd->intent.open.create_mode; |
1440 | attr.ia_valid = ATTR_MODE; | 1441 | attr.ia_valid |= ATTR_MODE; |
1441 | attr.ia_mode &= ~current_umask(); | 1442 | attr.ia_mode &= ~current_umask(); |
1442 | } else { | 1443 | } else |
1443 | open_flags &= ~(O_EXCL | O_CREAT); | 1444 | open_flags &= ~(O_EXCL | O_CREAT); |
1444 | attr.ia_valid = 0; | 1445 | |
1446 | if (open_flags & O_TRUNC) { | ||
1447 | attr.ia_valid |= ATTR_SIZE; | ||
1448 | attr.ia_size = 0; | ||
1445 | } | 1449 | } |
1446 | 1450 | ||
1447 | /* Open the file on the server */ | 1451 | /* Open the file on the server */ |
@@ -1495,6 +1499,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1495 | struct inode *inode; | 1499 | struct inode *inode; |
1496 | struct inode *dir; | 1500 | struct inode *dir; |
1497 | struct nfs_open_context *ctx; | 1501 | struct nfs_open_context *ctx; |
1502 | struct iattr attr; | ||
1498 | int openflags, ret = 0; | 1503 | int openflags, ret = 0; |
1499 | 1504 | ||
1500 | if (nd->flags & LOOKUP_RCU) | 1505 | if (nd->flags & LOOKUP_RCU) |
@@ -1523,19 +1528,27 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1523 | /* We cannot do exclusive creation on a positive dentry */ | 1528 | /* We cannot do exclusive creation on a positive dentry */ |
1524 | if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) | 1529 | if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) |
1525 | goto no_open_dput; | 1530 | goto no_open_dput; |
1526 | /* We can't create new files, or truncate existing ones here */ | 1531 | /* We can't create new files here */ |
1527 | openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); | 1532 | openflags &= ~(O_CREAT|O_EXCL); |
1528 | 1533 | ||
1529 | ctx = create_nfs_open_context(dentry, openflags); | 1534 | ctx = create_nfs_open_context(dentry, openflags); |
1530 | ret = PTR_ERR(ctx); | 1535 | ret = PTR_ERR(ctx); |
1531 | if (IS_ERR(ctx)) | 1536 | if (IS_ERR(ctx)) |
1532 | goto out; | 1537 | goto out; |
1538 | |||
1539 | attr.ia_valid = 0; | ||
1540 | if (openflags & O_TRUNC) { | ||
1541 | attr.ia_valid |= ATTR_SIZE; | ||
1542 | attr.ia_size = 0; | ||
1543 | nfs_wb_all(inode); | ||
1544 | } | ||
1545 | |||
1533 | /* | 1546 | /* |
1534 | * Note: we're not holding inode->i_mutex and so may be racing with | 1547 | * Note: we're not holding inode->i_mutex and so may be racing with |
1535 | * operations that change the directory. We therefore save the | 1548 | * operations that change the directory. We therefore save the |
1536 | * change attribute *before* we do the RPC call. | 1549 | * change attribute *before* we do the RPC call. |
1537 | */ | 1550 | */ |
1538 | inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL); | 1551 | inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); |
1539 | if (IS_ERR(inode)) { | 1552 | if (IS_ERR(inode)) { |
1540 | ret = PTR_ERR(inode); | 1553 | ret = PTR_ERR(inode); |
1541 | switch (ret) { | 1554 | switch (ret) { |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index ea5be1262d41..481be7f7bdd3 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -264,9 +264,7 @@ static void nfs_direct_read_release(void *calldata) | |||
264 | } | 264 | } |
265 | 265 | ||
266 | static const struct rpc_call_ops nfs_read_direct_ops = { | 266 | static const struct rpc_call_ops nfs_read_direct_ops = { |
267 | #if defined(CONFIG_NFS_V4_1) | ||
268 | .rpc_call_prepare = nfs_read_prepare, | 267 | .rpc_call_prepare = nfs_read_prepare, |
269 | #endif /* CONFIG_NFS_V4_1 */ | ||
270 | .rpc_call_done = nfs_direct_read_result, | 268 | .rpc_call_done = nfs_direct_read_result, |
271 | .rpc_release = nfs_direct_read_release, | 269 | .rpc_release = nfs_direct_read_release, |
272 | }; | 270 | }; |
@@ -553,9 +551,7 @@ static void nfs_direct_commit_release(void *calldata) | |||
553 | } | 551 | } |
554 | 552 | ||
555 | static const struct rpc_call_ops nfs_commit_direct_ops = { | 553 | static const struct rpc_call_ops nfs_commit_direct_ops = { |
556 | #if defined(CONFIG_NFS_V4_1) | ||
557 | .rpc_call_prepare = nfs_write_prepare, | 554 | .rpc_call_prepare = nfs_write_prepare, |
558 | #endif /* CONFIG_NFS_V4_1 */ | ||
559 | .rpc_call_done = nfs_direct_commit_result, | 555 | .rpc_call_done = nfs_direct_commit_result, |
560 | .rpc_release = nfs_direct_commit_release, | 556 | .rpc_release = nfs_direct_commit_release, |
561 | }; | 557 | }; |
@@ -695,9 +691,7 @@ out_unlock: | |||
695 | } | 691 | } |
696 | 692 | ||
697 | static const struct rpc_call_ops nfs_write_direct_ops = { | 693 | static const struct rpc_call_ops nfs_write_direct_ops = { |
698 | #if defined(CONFIG_NFS_V4_1) | ||
699 | .rpc_call_prepare = nfs_write_prepare, | 694 | .rpc_call_prepare = nfs_write_prepare, |
700 | #endif /* CONFIG_NFS_V4_1 */ | ||
701 | .rpc_call_done = nfs_direct_write_result, | 695 | .rpc_call_done = nfs_direct_write_result, |
702 | .rpc_release = nfs_direct_write_release, | 696 | .rpc_release = nfs_direct_write_release, |
703 | }; | 697 | }; |
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index a6e711ad130f..b3924b8a6000 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c | |||
@@ -10,8 +10,9 @@ | |||
10 | 10 | ||
11 | #include <linux/sunrpc/clnt.h> | 11 | #include <linux/sunrpc/clnt.h> |
12 | #include <linux/dns_resolver.h> | 12 | #include <linux/dns_resolver.h> |
13 | #include "dns_resolve.h" | ||
13 | 14 | ||
14 | ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | 15 | ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, |
15 | struct sockaddr *sa, size_t salen) | 16 | struct sockaddr *sa, size_t salen) |
16 | { | 17 | { |
17 | ssize_t ret; | 18 | ssize_t ret; |
@@ -20,7 +21,7 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | |||
20 | 21 | ||
21 | ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL); | 22 | ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL); |
22 | if (ip_len > 0) | 23 | if (ip_len > 0) |
23 | ret = rpc_pton(ip_addr, ip_len, sa, salen); | 24 | ret = rpc_pton(net, ip_addr, ip_len, sa, salen); |
24 | else | 25 | else |
25 | ret = -ESRCH; | 26 | ret = -ESRCH; |
26 | kfree(ip_addr); | 27 | kfree(ip_addr); |
@@ -40,15 +41,15 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | |||
40 | #include <linux/sunrpc/clnt.h> | 41 | #include <linux/sunrpc/clnt.h> |
41 | #include <linux/sunrpc/cache.h> | 42 | #include <linux/sunrpc/cache.h> |
42 | #include <linux/sunrpc/svcauth.h> | 43 | #include <linux/sunrpc/svcauth.h> |
44 | #include <linux/sunrpc/rpc_pipe_fs.h> | ||
43 | 45 | ||
44 | #include "dns_resolve.h" | 46 | #include "dns_resolve.h" |
45 | #include "cache_lib.h" | 47 | #include "cache_lib.h" |
48 | #include "netns.h" | ||
46 | 49 | ||
47 | #define NFS_DNS_HASHBITS 4 | 50 | #define NFS_DNS_HASHBITS 4 |
48 | #define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS) | 51 | #define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS) |
49 | 52 | ||
50 | static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE]; | ||
51 | |||
52 | struct nfs_dns_ent { | 53 | struct nfs_dns_ent { |
53 | struct cache_head h; | 54 | struct cache_head h; |
54 | 55 | ||
@@ -224,7 +225,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) | |||
224 | len = qword_get(&buf, buf1, sizeof(buf1)); | 225 | len = qword_get(&buf, buf1, sizeof(buf1)); |
225 | if (len <= 0) | 226 | if (len <= 0) |
226 | goto out; | 227 | goto out; |
227 | key.addrlen = rpc_pton(buf1, len, | 228 | key.addrlen = rpc_pton(cd->net, buf1, len, |
228 | (struct sockaddr *)&key.addr, | 229 | (struct sockaddr *)&key.addr, |
229 | sizeof(key.addr)); | 230 | sizeof(key.addr)); |
230 | 231 | ||
@@ -259,21 +260,6 @@ out: | |||
259 | return ret; | 260 | return ret; |
260 | } | 261 | } |
261 | 262 | ||
262 | static struct cache_detail nfs_dns_resolve = { | ||
263 | .owner = THIS_MODULE, | ||
264 | .hash_size = NFS_DNS_HASHTBL_SIZE, | ||
265 | .hash_table = nfs_dns_table, | ||
266 | .name = "dns_resolve", | ||
267 | .cache_put = nfs_dns_ent_put, | ||
268 | .cache_upcall = nfs_dns_upcall, | ||
269 | .cache_parse = nfs_dns_parse, | ||
270 | .cache_show = nfs_dns_show, | ||
271 | .match = nfs_dns_match, | ||
272 | .init = nfs_dns_ent_init, | ||
273 | .update = nfs_dns_ent_update, | ||
274 | .alloc = nfs_dns_ent_alloc, | ||
275 | }; | ||
276 | |||
277 | static int do_cache_lookup(struct cache_detail *cd, | 263 | static int do_cache_lookup(struct cache_detail *cd, |
278 | struct nfs_dns_ent *key, | 264 | struct nfs_dns_ent *key, |
279 | struct nfs_dns_ent **item, | 265 | struct nfs_dns_ent **item, |
@@ -336,8 +322,8 @@ out: | |||
336 | return ret; | 322 | return ret; |
337 | } | 323 | } |
338 | 324 | ||
339 | ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | 325 | ssize_t nfs_dns_resolve_name(struct net *net, char *name, |
340 | struct sockaddr *sa, size_t salen) | 326 | size_t namelen, struct sockaddr *sa, size_t salen) |
341 | { | 327 | { |
342 | struct nfs_dns_ent key = { | 328 | struct nfs_dns_ent key = { |
343 | .hostname = name, | 329 | .hostname = name, |
@@ -345,28 +331,118 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | |||
345 | }; | 331 | }; |
346 | struct nfs_dns_ent *item = NULL; | 332 | struct nfs_dns_ent *item = NULL; |
347 | ssize_t ret; | 333 | ssize_t ret; |
334 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
348 | 335 | ||
349 | ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item); | 336 | ret = do_cache_lookup_wait(nn->nfs_dns_resolve, &key, &item); |
350 | if (ret == 0) { | 337 | if (ret == 0) { |
351 | if (salen >= item->addrlen) { | 338 | if (salen >= item->addrlen) { |
352 | memcpy(sa, &item->addr, item->addrlen); | 339 | memcpy(sa, &item->addr, item->addrlen); |
353 | ret = item->addrlen; | 340 | ret = item->addrlen; |
354 | } else | 341 | } else |
355 | ret = -EOVERFLOW; | 342 | ret = -EOVERFLOW; |
356 | cache_put(&item->h, &nfs_dns_resolve); | 343 | cache_put(&item->h, nn->nfs_dns_resolve); |
357 | } else if (ret == -ENOENT) | 344 | } else if (ret == -ENOENT) |
358 | ret = -ESRCH; | 345 | ret = -ESRCH; |
359 | return ret; | 346 | return ret; |
360 | } | 347 | } |
361 | 348 | ||
349 | int nfs_dns_resolver_cache_init(struct net *net) | ||
350 | { | ||
351 | int err = -ENOMEM; | ||
352 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
353 | struct cache_detail *cd; | ||
354 | struct cache_head **tbl; | ||
355 | |||
356 | cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); | ||
357 | if (cd == NULL) | ||
358 | goto err_cd; | ||
359 | |||
360 | tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *), | ||
361 | GFP_KERNEL); | ||
362 | if (tbl == NULL) | ||
363 | goto err_tbl; | ||
364 | |||
365 | cd->owner = THIS_MODULE, | ||
366 | cd->hash_size = NFS_DNS_HASHTBL_SIZE, | ||
367 | cd->hash_table = tbl, | ||
368 | cd->name = "dns_resolve", | ||
369 | cd->cache_put = nfs_dns_ent_put, | ||
370 | cd->cache_upcall = nfs_dns_upcall, | ||
371 | cd->cache_parse = nfs_dns_parse, | ||
372 | cd->cache_show = nfs_dns_show, | ||
373 | cd->match = nfs_dns_match, | ||
374 | cd->init = nfs_dns_ent_init, | ||
375 | cd->update = nfs_dns_ent_update, | ||
376 | cd->alloc = nfs_dns_ent_alloc, | ||
377 | |||
378 | nfs_cache_init(cd); | ||
379 | err = nfs_cache_register_net(net, cd); | ||
380 | if (err) | ||
381 | goto err_reg; | ||
382 | nn->nfs_dns_resolve = cd; | ||
383 | return 0; | ||
384 | |||
385 | err_reg: | ||
386 | nfs_cache_destroy(cd); | ||
387 | kfree(cd->hash_table); | ||
388 | err_tbl: | ||
389 | kfree(cd); | ||
390 | err_cd: | ||
391 | return err; | ||
392 | } | ||
393 | |||
394 | void nfs_dns_resolver_cache_destroy(struct net *net) | ||
395 | { | ||
396 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
397 | struct cache_detail *cd = nn->nfs_dns_resolve; | ||
398 | |||
399 | nfs_cache_unregister_net(net, cd); | ||
400 | nfs_cache_destroy(cd); | ||
401 | kfree(cd->hash_table); | ||
402 | kfree(cd); | ||
403 | } | ||
404 | |||
405 | static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, | ||
406 | void *ptr) | ||
407 | { | ||
408 | struct super_block *sb = ptr; | ||
409 | struct net *net = sb->s_fs_info; | ||
410 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
411 | struct cache_detail *cd = nn->nfs_dns_resolve; | ||
412 | int ret = 0; | ||
413 | |||
414 | if (cd == NULL) | ||
415 | return 0; | ||
416 | |||
417 | if (!try_module_get(THIS_MODULE)) | ||
418 | return 0; | ||
419 | |||
420 | switch (event) { | ||
421 | case RPC_PIPEFS_MOUNT: | ||
422 | ret = nfs_cache_register_sb(sb, cd); | ||
423 | break; | ||
424 | case RPC_PIPEFS_UMOUNT: | ||
425 | nfs_cache_unregister_sb(sb, cd); | ||
426 | break; | ||
427 | default: | ||
428 | ret = -ENOTSUPP; | ||
429 | break; | ||
430 | } | ||
431 | module_put(THIS_MODULE); | ||
432 | return ret; | ||
433 | } | ||
434 | |||
435 | static struct notifier_block nfs_dns_resolver_block = { | ||
436 | .notifier_call = rpc_pipefs_event, | ||
437 | }; | ||
438 | |||
362 | int nfs_dns_resolver_init(void) | 439 | int nfs_dns_resolver_init(void) |
363 | { | 440 | { |
364 | return nfs_cache_register(&nfs_dns_resolve); | 441 | return rpc_pipefs_notifier_register(&nfs_dns_resolver_block); |
365 | } | 442 | } |
366 | 443 | ||
367 | void nfs_dns_resolver_destroy(void) | 444 | void nfs_dns_resolver_destroy(void) |
368 | { | 445 | { |
369 | nfs_cache_unregister(&nfs_dns_resolve); | 446 | rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block); |
370 | } | 447 | } |
371 | |||
372 | #endif | 448 | #endif |
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h index 199bb5543a91..2e4f596d2923 100644 --- a/fs/nfs/dns_resolve.h +++ b/fs/nfs/dns_resolve.h | |||
@@ -15,12 +15,22 @@ static inline int nfs_dns_resolver_init(void) | |||
15 | 15 | ||
16 | static inline void nfs_dns_resolver_destroy(void) | 16 | static inline void nfs_dns_resolver_destroy(void) |
17 | {} | 17 | {} |
18 | |||
19 | static inline int nfs_dns_resolver_cache_init(struct net *net) | ||
20 | { | ||
21 | return 0; | ||
22 | } | ||
23 | |||
24 | static inline void nfs_dns_resolver_cache_destroy(struct net *net) | ||
25 | {} | ||
18 | #else | 26 | #else |
19 | extern int nfs_dns_resolver_init(void); | 27 | extern int nfs_dns_resolver_init(void); |
20 | extern void nfs_dns_resolver_destroy(void); | 28 | extern void nfs_dns_resolver_destroy(void); |
29 | extern int nfs_dns_resolver_cache_init(struct net *net); | ||
30 | extern void nfs_dns_resolver_cache_destroy(struct net *net); | ||
21 | #endif | 31 | #endif |
22 | 32 | ||
23 | extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | 33 | extern ssize_t nfs_dns_resolve_name(struct net *net, char *name, |
24 | struct sockaddr *sa, size_t salen); | 34 | size_t namelen, struct sockaddr *sa, size_t salen); |
25 | 35 | ||
26 | #endif | 36 | #endif |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index a77a1f2da5d6..aa9b709fd328 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -529,6 +529,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
529 | if (mapping != dentry->d_inode->i_mapping) | 529 | if (mapping != dentry->d_inode->i_mapping) |
530 | goto out_unlock; | 530 | goto out_unlock; |
531 | 531 | ||
532 | wait_on_page_writeback(page); | ||
533 | |||
532 | pagelen = nfs_page_length(page); | 534 | pagelen = nfs_page_length(page); |
533 | if (pagelen == 0) | 535 | if (pagelen == 0) |
534 | goto out_unlock; | 536 | goto out_unlock; |
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 419119c371bf..ae65c16b3670 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c | |||
@@ -327,7 +327,7 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode) | |||
327 | { | 327 | { |
328 | struct nfs_inode *nfsi = NFS_I(inode); | 328 | struct nfs_inode *nfsi = NFS_I(inode); |
329 | struct nfs_server *nfss = NFS_SERVER(inode); | 329 | struct nfs_server *nfss = NFS_SERVER(inode); |
330 | struct fscache_cookie *old = nfsi->fscache; | 330 | NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache); |
331 | 331 | ||
332 | nfs_fscache_inode_lock(inode); | 332 | nfs_fscache_inode_lock(inode); |
333 | if (nfsi->fscache) { | 333 | if (nfsi->fscache) { |
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index a1bbf7780dfc..b7f348bb618b 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
@@ -34,11 +34,29 @@ | |||
34 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 34 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
35 | */ | 35 | */ |
36 | #include <linux/types.h> | 36 | #include <linux/types.h> |
37 | #include <linux/string.h> | 37 | #include <linux/parser.h> |
38 | #include <linux/kernel.h> | 38 | #include <linux/fs.h> |
39 | #include <linux/slab.h> | ||
40 | #include <linux/nfs_idmap.h> | 39 | #include <linux/nfs_idmap.h> |
40 | #include <net/net_namespace.h> | ||
41 | #include <linux/sunrpc/rpc_pipe_fs.h> | ||
41 | #include <linux/nfs_fs.h> | 42 | #include <linux/nfs_fs.h> |
43 | #include <linux/nfs_fs_sb.h> | ||
44 | #include <linux/key.h> | ||
45 | #include <linux/keyctl.h> | ||
46 | #include <linux/key-type.h> | ||
47 | #include <keys/user-type.h> | ||
48 | #include <linux/module.h> | ||
49 | |||
50 | #include "internal.h" | ||
51 | #include "netns.h" | ||
52 | |||
53 | #define NFS_UINT_MAXLEN 11 | ||
54 | |||
55 | /* Default cache timeout is 10 minutes */ | ||
56 | unsigned int nfs_idmap_cache_timeout = 600; | ||
57 | static const struct cred *id_resolver_cache; | ||
58 | static struct key_type key_type_id_resolver_legacy; | ||
59 | |||
42 | 60 | ||
43 | /** | 61 | /** |
44 | * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields | 62 | * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields |
@@ -142,24 +160,7 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) | |||
142 | return snprintf(buf, buflen, "%u", id); | 160 | return snprintf(buf, buflen, "%u", id); |
143 | } | 161 | } |
144 | 162 | ||
145 | #ifdef CONFIG_NFS_USE_NEW_IDMAPPER | 163 | static struct key_type key_type_id_resolver = { |
146 | |||
147 | #include <linux/cred.h> | ||
148 | #include <linux/sunrpc/sched.h> | ||
149 | #include <linux/nfs4.h> | ||
150 | #include <linux/nfs_fs_sb.h> | ||
151 | #include <linux/keyctl.h> | ||
152 | #include <linux/key-type.h> | ||
153 | #include <linux/rcupdate.h> | ||
154 | #include <linux/err.h> | ||
155 | |||
156 | #include <keys/user-type.h> | ||
157 | |||
158 | #define NFS_UINT_MAXLEN 11 | ||
159 | |||
160 | const struct cred *id_resolver_cache; | ||
161 | |||
162 | struct key_type key_type_id_resolver = { | ||
163 | .name = "id_resolver", | 164 | .name = "id_resolver", |
164 | .instantiate = user_instantiate, | 165 | .instantiate = user_instantiate, |
165 | .match = user_match, | 166 | .match = user_match, |
@@ -169,13 +170,14 @@ struct key_type key_type_id_resolver = { | |||
169 | .read = user_read, | 170 | .read = user_read, |
170 | }; | 171 | }; |
171 | 172 | ||
172 | int nfs_idmap_init(void) | 173 | static int nfs_idmap_init_keyring(void) |
173 | { | 174 | { |
174 | struct cred *cred; | 175 | struct cred *cred; |
175 | struct key *keyring; | 176 | struct key *keyring; |
176 | int ret = 0; | 177 | int ret = 0; |
177 | 178 | ||
178 | printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name); | 179 | printk(KERN_NOTICE "NFS: Registering the %s key type\n", |
180 | key_type_id_resolver.name); | ||
179 | 181 | ||
180 | cred = prepare_kernel_cred(NULL); | 182 | cred = prepare_kernel_cred(NULL); |
181 | if (!cred) | 183 | if (!cred) |
@@ -211,7 +213,7 @@ failed_put_cred: | |||
211 | return ret; | 213 | return ret; |
212 | } | 214 | } |
213 | 215 | ||
214 | void nfs_idmap_quit(void) | 216 | static void nfs_idmap_quit_keyring(void) |
215 | { | 217 | { |
216 | key_revoke(id_resolver_cache->thread_keyring); | 218 | key_revoke(id_resolver_cache->thread_keyring); |
217 | unregister_key_type(&key_type_id_resolver); | 219 | unregister_key_type(&key_type_id_resolver); |
@@ -246,8 +248,10 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen, | |||
246 | return desclen; | 248 | return desclen; |
247 | } | 249 | } |
248 | 250 | ||
249 | static ssize_t nfs_idmap_request_key(const char *name, size_t namelen, | 251 | static ssize_t nfs_idmap_request_key(struct key_type *key_type, |
250 | const char *type, void *data, size_t data_size) | 252 | const char *name, size_t namelen, |
253 | const char *type, void *data, | ||
254 | size_t data_size, struct idmap *idmap) | ||
251 | { | 255 | { |
252 | const struct cred *saved_cred; | 256 | const struct cred *saved_cred; |
253 | struct key *rkey; | 257 | struct key *rkey; |
@@ -260,8 +264,12 @@ static ssize_t nfs_idmap_request_key(const char *name, size_t namelen, | |||
260 | goto out; | 264 | goto out; |
261 | 265 | ||
262 | saved_cred = override_creds(id_resolver_cache); | 266 | saved_cred = override_creds(id_resolver_cache); |
263 | rkey = request_key(&key_type_id_resolver, desc, ""); | 267 | if (idmap) |
268 | rkey = request_key_with_auxdata(key_type, desc, "", 0, idmap); | ||
269 | else | ||
270 | rkey = request_key(&key_type_id_resolver, desc, ""); | ||
264 | revert_creds(saved_cred); | 271 | revert_creds(saved_cred); |
272 | |||
265 | kfree(desc); | 273 | kfree(desc); |
266 | if (IS_ERR(rkey)) { | 274 | if (IS_ERR(rkey)) { |
267 | ret = PTR_ERR(rkey); | 275 | ret = PTR_ERR(rkey); |
@@ -294,31 +302,46 @@ out: | |||
294 | return ret; | 302 | return ret; |
295 | } | 303 | } |
296 | 304 | ||
305 | static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, | ||
306 | const char *type, void *data, | ||
307 | size_t data_size, struct idmap *idmap) | ||
308 | { | ||
309 | ssize_t ret = nfs_idmap_request_key(&key_type_id_resolver, | ||
310 | name, namelen, type, data, | ||
311 | data_size, NULL); | ||
312 | if (ret < 0) { | ||
313 | ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, | ||
314 | name, namelen, type, data, | ||
315 | data_size, idmap); | ||
316 | } | ||
317 | return ret; | ||
318 | } | ||
297 | 319 | ||
298 | /* ID -> Name */ | 320 | /* ID -> Name */ |
299 | static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen) | 321 | static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, |
322 | size_t buflen, struct idmap *idmap) | ||
300 | { | 323 | { |
301 | char id_str[NFS_UINT_MAXLEN]; | 324 | char id_str[NFS_UINT_MAXLEN]; |
302 | int id_len; | 325 | int id_len; |
303 | ssize_t ret; | 326 | ssize_t ret; |
304 | 327 | ||
305 | id_len = snprintf(id_str, sizeof(id_str), "%u", id); | 328 | id_len = snprintf(id_str, sizeof(id_str), "%u", id); |
306 | ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen); | 329 | ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap); |
307 | if (ret < 0) | 330 | if (ret < 0) |
308 | return -EINVAL; | 331 | return -EINVAL; |
309 | return ret; | 332 | return ret; |
310 | } | 333 | } |
311 | 334 | ||
312 | /* Name -> ID */ | 335 | /* Name -> ID */ |
313 | static int nfs_idmap_lookup_id(const char *name, size_t namelen, | 336 | static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *type, |
314 | const char *type, __u32 *id) | 337 | __u32 *id, struct idmap *idmap) |
315 | { | 338 | { |
316 | char id_str[NFS_UINT_MAXLEN]; | 339 | char id_str[NFS_UINT_MAXLEN]; |
317 | long id_long; | 340 | long id_long; |
318 | ssize_t data_size; | 341 | ssize_t data_size; |
319 | int ret = 0; | 342 | int ret = 0; |
320 | 343 | ||
321 | data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN); | 344 | data_size = nfs_idmap_get_key(name, namelen, type, id_str, NFS_UINT_MAXLEN, idmap); |
322 | if (data_size <= 0) { | 345 | if (data_size <= 0) { |
323 | ret = -EINVAL; | 346 | ret = -EINVAL; |
324 | } else { | 347 | } else { |
@@ -328,114 +351,103 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, | |||
328 | return ret; | 351 | return ret; |
329 | } | 352 | } |
330 | 353 | ||
331 | int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) | 354 | /* idmap classic begins here */ |
332 | { | 355 | module_param(nfs_idmap_cache_timeout, int, 0644); |
333 | if (nfs_map_string_to_numeric(name, namelen, uid)) | ||
334 | return 0; | ||
335 | return nfs_idmap_lookup_id(name, namelen, "uid", uid); | ||
336 | } | ||
337 | |||
338 | int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) | ||
339 | { | ||
340 | if (nfs_map_string_to_numeric(name, namelen, gid)) | ||
341 | return 0; | ||
342 | return nfs_idmap_lookup_id(name, namelen, "gid", gid); | ||
343 | } | ||
344 | |||
345 | int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) | ||
346 | { | ||
347 | int ret = -EINVAL; | ||
348 | |||
349 | if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) | ||
350 | ret = nfs_idmap_lookup_name(uid, "user", buf, buflen); | ||
351 | if (ret < 0) | ||
352 | ret = nfs_map_numeric_to_string(uid, buf, buflen); | ||
353 | return ret; | ||
354 | } | ||
355 | int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) | ||
356 | { | ||
357 | int ret = -EINVAL; | ||
358 | 356 | ||
359 | if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) | 357 | struct idmap { |
360 | ret = nfs_idmap_lookup_name(gid, "group", buf, buflen); | 358 | struct rpc_pipe *idmap_pipe; |
361 | if (ret < 0) | 359 | struct key_construction *idmap_key_cons; |
362 | ret = nfs_map_numeric_to_string(gid, buf, buflen); | ||
363 | return ret; | ||
364 | } | ||
365 | |||
366 | #else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */ | ||
367 | |||
368 | #include <linux/module.h> | ||
369 | #include <linux/mutex.h> | ||
370 | #include <linux/init.h> | ||
371 | #include <linux/socket.h> | ||
372 | #include <linux/in.h> | ||
373 | #include <linux/sched.h> | ||
374 | #include <linux/sunrpc/clnt.h> | ||
375 | #include <linux/workqueue.h> | ||
376 | #include <linux/sunrpc/rpc_pipe_fs.h> | ||
377 | |||
378 | #include <linux/nfs_fs.h> | ||
379 | |||
380 | #include "nfs4_fs.h" | ||
381 | |||
382 | #define IDMAP_HASH_SZ 128 | ||
383 | |||
384 | /* Default cache timeout is 10 minutes */ | ||
385 | unsigned int nfs_idmap_cache_timeout = 600 * HZ; | ||
386 | |||
387 | static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) | ||
388 | { | ||
389 | char *endp; | ||
390 | int num = simple_strtol(val, &endp, 0); | ||
391 | int jif = num * HZ; | ||
392 | if (endp == val || *endp || num < 0 || jif < num) | ||
393 | return -EINVAL; | ||
394 | *((int *)kp->arg) = jif; | ||
395 | return 0; | ||
396 | } | ||
397 | |||
398 | module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, | ||
399 | &nfs_idmap_cache_timeout, 0644); | ||
400 | |||
401 | struct idmap_hashent { | ||
402 | unsigned long ih_expires; | ||
403 | __u32 ih_id; | ||
404 | size_t ih_namelen; | ||
405 | char ih_name[IDMAP_NAMESZ]; | ||
406 | }; | 360 | }; |
407 | 361 | ||
408 | struct idmap_hashtable { | 362 | enum { |
409 | __u8 h_type; | 363 | Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err |
410 | struct idmap_hashent h_entries[IDMAP_HASH_SZ]; | ||
411 | }; | 364 | }; |
412 | 365 | ||
413 | struct idmap { | 366 | static const match_table_t nfs_idmap_tokens = { |
414 | struct dentry *idmap_dentry; | 367 | { Opt_find_uid, "uid:%s" }, |
415 | wait_queue_head_t idmap_wq; | 368 | { Opt_find_gid, "gid:%s" }, |
416 | struct idmap_msg idmap_im; | 369 | { Opt_find_user, "user:%s" }, |
417 | struct mutex idmap_lock; /* Serializes upcalls */ | 370 | { Opt_find_group, "group:%s" }, |
418 | struct mutex idmap_im_lock; /* Protects the hashtable */ | 371 | { Opt_find_err, NULL } |
419 | struct idmap_hashtable idmap_user_hash; | ||
420 | struct idmap_hashtable idmap_group_hash; | ||
421 | }; | 372 | }; |
422 | 373 | ||
374 | static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); | ||
423 | static ssize_t idmap_pipe_downcall(struct file *, const char __user *, | 375 | static ssize_t idmap_pipe_downcall(struct file *, const char __user *, |
424 | size_t); | 376 | size_t); |
425 | static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); | 377 | static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); |
426 | 378 | ||
427 | static unsigned int fnvhash32(const void *, size_t); | ||
428 | |||
429 | static const struct rpc_pipe_ops idmap_upcall_ops = { | 379 | static const struct rpc_pipe_ops idmap_upcall_ops = { |
430 | .upcall = rpc_pipe_generic_upcall, | 380 | .upcall = rpc_pipe_generic_upcall, |
431 | .downcall = idmap_pipe_downcall, | 381 | .downcall = idmap_pipe_downcall, |
432 | .destroy_msg = idmap_pipe_destroy_msg, | 382 | .destroy_msg = idmap_pipe_destroy_msg, |
433 | }; | 383 | }; |
434 | 384 | ||
385 | static struct key_type key_type_id_resolver_legacy = { | ||
386 | .name = "id_resolver", | ||
387 | .instantiate = user_instantiate, | ||
388 | .match = user_match, | ||
389 | .revoke = user_revoke, | ||
390 | .destroy = user_destroy, | ||
391 | .describe = user_describe, | ||
392 | .read = user_read, | ||
393 | .request_key = nfs_idmap_legacy_upcall, | ||
394 | }; | ||
395 | |||
396 | static void __nfs_idmap_unregister(struct rpc_pipe *pipe) | ||
397 | { | ||
398 | if (pipe->dentry) | ||
399 | rpc_unlink(pipe->dentry); | ||
400 | } | ||
401 | |||
402 | static int __nfs_idmap_register(struct dentry *dir, | ||
403 | struct idmap *idmap, | ||
404 | struct rpc_pipe *pipe) | ||
405 | { | ||
406 | struct dentry *dentry; | ||
407 | |||
408 | dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe); | ||
409 | if (IS_ERR(dentry)) | ||
410 | return PTR_ERR(dentry); | ||
411 | pipe->dentry = dentry; | ||
412 | return 0; | ||
413 | } | ||
414 | |||
415 | static void nfs_idmap_unregister(struct nfs_client *clp, | ||
416 | struct rpc_pipe *pipe) | ||
417 | { | ||
418 | struct net *net = clp->net; | ||
419 | struct super_block *pipefs_sb; | ||
420 | |||
421 | pipefs_sb = rpc_get_sb_net(net); | ||
422 | if (pipefs_sb) { | ||
423 | __nfs_idmap_unregister(pipe); | ||
424 | rpc_put_sb_net(net); | ||
425 | } | ||
426 | } | ||
427 | |||
428 | static int nfs_idmap_register(struct nfs_client *clp, | ||
429 | struct idmap *idmap, | ||
430 | struct rpc_pipe *pipe) | ||
431 | { | ||
432 | struct net *net = clp->net; | ||
433 | struct super_block *pipefs_sb; | ||
434 | int err = 0; | ||
435 | |||
436 | pipefs_sb = rpc_get_sb_net(net); | ||
437 | if (pipefs_sb) { | ||
438 | if (clp->cl_rpcclient->cl_dentry) | ||
439 | err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, | ||
440 | idmap, pipe); | ||
441 | rpc_put_sb_net(net); | ||
442 | } | ||
443 | return err; | ||
444 | } | ||
445 | |||
435 | int | 446 | int |
436 | nfs_idmap_new(struct nfs_client *clp) | 447 | nfs_idmap_new(struct nfs_client *clp) |
437 | { | 448 | { |
438 | struct idmap *idmap; | 449 | struct idmap *idmap; |
450 | struct rpc_pipe *pipe; | ||
439 | int error; | 451 | int error; |
440 | 452 | ||
441 | BUG_ON(clp->cl_idmap != NULL); | 453 | BUG_ON(clp->cl_idmap != NULL); |
@@ -444,19 +456,19 @@ nfs_idmap_new(struct nfs_client *clp) | |||
444 | if (idmap == NULL) | 456 | if (idmap == NULL) |
445 | return -ENOMEM; | 457 | return -ENOMEM; |
446 | 458 | ||
447 | idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry, | 459 | pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0); |
448 | "idmap", idmap, &idmap_upcall_ops, 0); | 460 | if (IS_ERR(pipe)) { |
449 | if (IS_ERR(idmap->idmap_dentry)) { | 461 | error = PTR_ERR(pipe); |
450 | error = PTR_ERR(idmap->idmap_dentry); | ||
451 | kfree(idmap); | 462 | kfree(idmap); |
452 | return error; | 463 | return error; |
453 | } | 464 | } |
454 | 465 | error = nfs_idmap_register(clp, idmap, pipe); | |
455 | mutex_init(&idmap->idmap_lock); | 466 | if (error) { |
456 | mutex_init(&idmap->idmap_im_lock); | 467 | rpc_destroy_pipe_data(pipe); |
457 | init_waitqueue_head(&idmap->idmap_wq); | 468 | kfree(idmap); |
458 | idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; | 469 | return error; |
459 | idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; | 470 | } |
471 | idmap->idmap_pipe = pipe; | ||
460 | 472 | ||
461 | clp->cl_idmap = idmap; | 473 | clp->cl_idmap = idmap; |
462 | return 0; | 474 | return 0; |
@@ -469,211 +481,220 @@ nfs_idmap_delete(struct nfs_client *clp) | |||
469 | 481 | ||
470 | if (!idmap) | 482 | if (!idmap) |
471 | return; | 483 | return; |
472 | rpc_unlink(idmap->idmap_dentry); | 484 | nfs_idmap_unregister(clp, idmap->idmap_pipe); |
485 | rpc_destroy_pipe_data(idmap->idmap_pipe); | ||
473 | clp->cl_idmap = NULL; | 486 | clp->cl_idmap = NULL; |
474 | kfree(idmap); | 487 | kfree(idmap); |
475 | } | 488 | } |
476 | 489 | ||
477 | /* | 490 | static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event, |
478 | * Helper routines for manipulating the hashtable | 491 | struct super_block *sb) |
479 | */ | ||
480 | static inline struct idmap_hashent * | ||
481 | idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len) | ||
482 | { | ||
483 | return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ]; | ||
484 | } | ||
485 | |||
486 | static struct idmap_hashent * | ||
487 | idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) | ||
488 | { | 492 | { |
489 | struct idmap_hashent *he = idmap_name_hash(h, name, len); | 493 | int err = 0; |
490 | 494 | ||
491 | if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) | 495 | switch (event) { |
492 | return NULL; | 496 | case RPC_PIPEFS_MOUNT: |
493 | if (time_after(jiffies, he->ih_expires)) | 497 | BUG_ON(clp->cl_rpcclient->cl_dentry == NULL); |
494 | return NULL; | 498 | err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, |
495 | return he; | 499 | clp->cl_idmap, |
500 | clp->cl_idmap->idmap_pipe); | ||
501 | break; | ||
502 | case RPC_PIPEFS_UMOUNT: | ||
503 | if (clp->cl_idmap->idmap_pipe) { | ||
504 | struct dentry *parent; | ||
505 | |||
506 | parent = clp->cl_idmap->idmap_pipe->dentry->d_parent; | ||
507 | __nfs_idmap_unregister(clp->cl_idmap->idmap_pipe); | ||
508 | /* | ||
509 | * Note: This is a dirty hack. SUNRPC hook has been | ||
510 | * called already but simple_rmdir() call for the | ||
511 | * directory returned with error because of idmap pipe | ||
512 | * inside. Thus now we have to remove this directory | ||
513 | * here. | ||
514 | */ | ||
515 | if (rpc_rmdir(parent)) | ||
516 | printk(KERN_ERR "NFS: %s: failed to remove " | ||
517 | "clnt dir!\n", __func__); | ||
518 | } | ||
519 | break; | ||
520 | default: | ||
521 | printk(KERN_ERR "NFS: %s: unknown event: %ld\n", __func__, | ||
522 | event); | ||
523 | return -ENOTSUPP; | ||
524 | } | ||
525 | return err; | ||
526 | } | ||
527 | |||
528 | static struct nfs_client *nfs_get_client_for_event(struct net *net, int event) | ||
529 | { | ||
530 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
531 | struct dentry *cl_dentry; | ||
532 | struct nfs_client *clp; | ||
533 | |||
534 | spin_lock(&nn->nfs_client_lock); | ||
535 | list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { | ||
536 | if (clp->rpc_ops != &nfs_v4_clientops) | ||
537 | continue; | ||
538 | cl_dentry = clp->cl_idmap->idmap_pipe->dentry; | ||
539 | if (((event == RPC_PIPEFS_MOUNT) && cl_dentry) || | ||
540 | ((event == RPC_PIPEFS_UMOUNT) && !cl_dentry)) | ||
541 | continue; | ||
542 | atomic_inc(&clp->cl_count); | ||
543 | spin_unlock(&nn->nfs_client_lock); | ||
544 | return clp; | ||
545 | } | ||
546 | spin_unlock(&nn->nfs_client_lock); | ||
547 | return NULL; | ||
496 | } | 548 | } |
497 | 549 | ||
498 | static inline struct idmap_hashent * | 550 | static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, |
499 | idmap_id_hash(struct idmap_hashtable* h, __u32 id) | 551 | void *ptr) |
500 | { | 552 | { |
501 | return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ]; | 553 | struct super_block *sb = ptr; |
502 | } | 554 | struct nfs_client *clp; |
555 | int error = 0; | ||
503 | 556 | ||
504 | static struct idmap_hashent * | 557 | while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) { |
505 | idmap_lookup_id(struct idmap_hashtable *h, __u32 id) | 558 | error = __rpc_pipefs_event(clp, event, sb); |
506 | { | 559 | nfs_put_client(clp); |
507 | struct idmap_hashent *he = idmap_id_hash(h, id); | 560 | if (error) |
508 | if (he->ih_id != id || he->ih_namelen == 0) | 561 | break; |
509 | return NULL; | 562 | } |
510 | if (time_after(jiffies, he->ih_expires)) | 563 | return error; |
511 | return NULL; | ||
512 | return he; | ||
513 | } | 564 | } |
514 | 565 | ||
515 | /* | 566 | #define PIPEFS_NFS_PRIO 1 |
516 | * Routines for allocating new entries in the hashtable. | 567 | |
517 | * For now, we just have 1 entry per bucket, so it's all | 568 | static struct notifier_block nfs_idmap_block = { |
518 | * pretty trivial. | 569 | .notifier_call = rpc_pipefs_event, |
519 | */ | 570 | .priority = SUNRPC_PIPEFS_NFS_PRIO, |
520 | static inline struct idmap_hashent * | 571 | }; |
521 | idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len) | ||
522 | { | ||
523 | return idmap_name_hash(h, name, len); | ||
524 | } | ||
525 | 572 | ||
526 | static inline struct idmap_hashent * | 573 | int nfs_idmap_init(void) |
527 | idmap_alloc_id(struct idmap_hashtable *h, __u32 id) | ||
528 | { | 574 | { |
529 | return idmap_id_hash(h, id); | 575 | int ret; |
576 | ret = nfs_idmap_init_keyring(); | ||
577 | if (ret != 0) | ||
578 | goto out; | ||
579 | ret = rpc_pipefs_notifier_register(&nfs_idmap_block); | ||
580 | if (ret != 0) | ||
581 | nfs_idmap_quit_keyring(); | ||
582 | out: | ||
583 | return ret; | ||
530 | } | 584 | } |
531 | 585 | ||
532 | static void | 586 | void nfs_idmap_quit(void) |
533 | idmap_update_entry(struct idmap_hashent *he, const char *name, | ||
534 | size_t namelen, __u32 id) | ||
535 | { | 587 | { |
536 | he->ih_id = id; | 588 | rpc_pipefs_notifier_unregister(&nfs_idmap_block); |
537 | memcpy(he->ih_name, name, namelen); | 589 | nfs_idmap_quit_keyring(); |
538 | he->ih_name[namelen] = '\0'; | ||
539 | he->ih_namelen = namelen; | ||
540 | he->ih_expires = jiffies + nfs_idmap_cache_timeout; | ||
541 | } | 590 | } |
542 | 591 | ||
543 | /* | 592 | static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im, |
544 | * Name -> ID | 593 | struct rpc_pipe_msg *msg) |
545 | */ | ||
546 | static int | ||
547 | nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, | ||
548 | const char *name, size_t namelen, __u32 *id) | ||
549 | { | 594 | { |
550 | struct rpc_pipe_msg msg; | 595 | substring_t substr; |
551 | struct idmap_msg *im; | 596 | int token, ret; |
552 | struct idmap_hashent *he; | ||
553 | DECLARE_WAITQUEUE(wq, current); | ||
554 | int ret = -EIO; | ||
555 | |||
556 | im = &idmap->idmap_im; | ||
557 | |||
558 | /* | ||
559 | * String sanity checks | ||
560 | * Note that the userland daemon expects NUL terminated strings | ||
561 | */ | ||
562 | for (;;) { | ||
563 | if (namelen == 0) | ||
564 | return -EINVAL; | ||
565 | if (name[namelen-1] != '\0') | ||
566 | break; | ||
567 | namelen--; | ||
568 | } | ||
569 | if (namelen >= IDMAP_NAMESZ) | ||
570 | return -EINVAL; | ||
571 | 597 | ||
572 | mutex_lock(&idmap->idmap_lock); | 598 | memset(im, 0, sizeof(*im)); |
573 | mutex_lock(&idmap->idmap_im_lock); | 599 | memset(msg, 0, sizeof(*msg)); |
574 | |||
575 | he = idmap_lookup_name(h, name, namelen); | ||
576 | if (he != NULL) { | ||
577 | *id = he->ih_id; | ||
578 | ret = 0; | ||
579 | goto out; | ||
580 | } | ||
581 | 600 | ||
582 | memset(im, 0, sizeof(*im)); | 601 | im->im_type = IDMAP_TYPE_GROUP; |
583 | memcpy(im->im_name, name, namelen); | 602 | token = match_token(desc, nfs_idmap_tokens, &substr); |
584 | 603 | ||
585 | im->im_type = h->h_type; | 604 | switch (token) { |
586 | im->im_conv = IDMAP_CONV_NAMETOID; | 605 | case Opt_find_uid: |
606 | im->im_type = IDMAP_TYPE_USER; | ||
607 | case Opt_find_gid: | ||
608 | im->im_conv = IDMAP_CONV_NAMETOID; | ||
609 | ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ); | ||
610 | break; | ||
587 | 611 | ||
588 | memset(&msg, 0, sizeof(msg)); | 612 | case Opt_find_user: |
589 | msg.data = im; | 613 | im->im_type = IDMAP_TYPE_USER; |
590 | msg.len = sizeof(*im); | 614 | case Opt_find_group: |
615 | im->im_conv = IDMAP_CONV_IDTONAME; | ||
616 | ret = match_int(&substr, &im->im_id); | ||
617 | break; | ||
591 | 618 | ||
592 | add_wait_queue(&idmap->idmap_wq, &wq); | 619 | default: |
593 | if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { | 620 | ret = -EINVAL; |
594 | remove_wait_queue(&idmap->idmap_wq, &wq); | ||
595 | goto out; | 621 | goto out; |
596 | } | 622 | } |
597 | 623 | ||
598 | set_current_state(TASK_UNINTERRUPTIBLE); | 624 | msg->data = im; |
599 | mutex_unlock(&idmap->idmap_im_lock); | 625 | msg->len = sizeof(struct idmap_msg); |
600 | schedule(); | ||
601 | __set_current_state(TASK_RUNNING); | ||
602 | remove_wait_queue(&idmap->idmap_wq, &wq); | ||
603 | mutex_lock(&idmap->idmap_im_lock); | ||
604 | 626 | ||
605 | if (im->im_status & IDMAP_STATUS_SUCCESS) { | 627 | out: |
606 | *id = im->im_id; | ||
607 | ret = 0; | ||
608 | } | ||
609 | |||
610 | out: | ||
611 | memset(im, 0, sizeof(*im)); | ||
612 | mutex_unlock(&idmap->idmap_im_lock); | ||
613 | mutex_unlock(&idmap->idmap_lock); | ||
614 | return ret; | 628 | return ret; |
615 | } | 629 | } |
616 | 630 | ||
617 | /* | 631 | static int nfs_idmap_legacy_upcall(struct key_construction *cons, |
618 | * ID -> Name | 632 | const char *op, |
619 | */ | 633 | void *aux) |
620 | static int | ||
621 | nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, | ||
622 | __u32 id, char *name) | ||
623 | { | 634 | { |
624 | struct rpc_pipe_msg msg; | 635 | struct rpc_pipe_msg *msg; |
625 | struct idmap_msg *im; | 636 | struct idmap_msg *im; |
626 | struct idmap_hashent *he; | 637 | struct idmap *idmap = (struct idmap *)aux; |
627 | DECLARE_WAITQUEUE(wq, current); | 638 | struct key *key = cons->key; |
628 | int ret = -EIO; | 639 | int ret; |
629 | unsigned int len; | ||
630 | |||
631 | im = &idmap->idmap_im; | ||
632 | 640 | ||
633 | mutex_lock(&idmap->idmap_lock); | 641 | /* msg and im are freed in idmap_pipe_destroy_msg */ |
634 | mutex_lock(&idmap->idmap_im_lock); | 642 | msg = kmalloc(sizeof(*msg), GFP_KERNEL); |
643 | if (IS_ERR(msg)) { | ||
644 | ret = PTR_ERR(msg); | ||
645 | goto out0; | ||
646 | } | ||
635 | 647 | ||
636 | he = idmap_lookup_id(h, id); | 648 | im = kmalloc(sizeof(*im), GFP_KERNEL); |
637 | if (he) { | 649 | if (IS_ERR(im)) { |
638 | memcpy(name, he->ih_name, he->ih_namelen); | 650 | ret = PTR_ERR(im); |
639 | ret = he->ih_namelen; | 651 | goto out1; |
640 | goto out; | ||
641 | } | 652 | } |
642 | 653 | ||
643 | memset(im, 0, sizeof(*im)); | 654 | ret = nfs_idmap_prepare_message(key->description, im, msg); |
644 | im->im_type = h->h_type; | 655 | if (ret < 0) |
645 | im->im_conv = IDMAP_CONV_IDTONAME; | 656 | goto out2; |
646 | im->im_id = id; | ||
647 | 657 | ||
648 | memset(&msg, 0, sizeof(msg)); | 658 | idmap->idmap_key_cons = cons; |
649 | msg.data = im; | ||
650 | msg.len = sizeof(*im); | ||
651 | 659 | ||
652 | add_wait_queue(&idmap->idmap_wq, &wq); | 660 | ret = rpc_queue_upcall(idmap->idmap_pipe, msg); |
661 | if (ret < 0) | ||
662 | goto out2; | ||
653 | 663 | ||
654 | if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { | 664 | return ret; |
655 | remove_wait_queue(&idmap->idmap_wq, &wq); | 665 | |
656 | goto out; | 666 | out2: |
657 | } | 667 | kfree(im); |
668 | out1: | ||
669 | kfree(msg); | ||
670 | out0: | ||
671 | key_revoke(cons->key); | ||
672 | key_revoke(cons->authkey); | ||
673 | return ret; | ||
674 | } | ||
675 | |||
676 | static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data) | ||
677 | { | ||
678 | return key_instantiate_and_link(key, data, strlen(data) + 1, | ||
679 | id_resolver_cache->thread_keyring, | ||
680 | authkey); | ||
681 | } | ||
658 | 682 | ||
659 | set_current_state(TASK_UNINTERRUPTIBLE); | 683 | static int nfs_idmap_read_message(struct idmap_msg *im, struct key *key, struct key *authkey) |
660 | mutex_unlock(&idmap->idmap_im_lock); | 684 | { |
661 | schedule(); | 685 | char id_str[NFS_UINT_MAXLEN]; |
662 | __set_current_state(TASK_RUNNING); | 686 | int ret = -EINVAL; |
663 | remove_wait_queue(&idmap->idmap_wq, &wq); | 687 | |
664 | mutex_lock(&idmap->idmap_im_lock); | 688 | switch (im->im_conv) { |
665 | 689 | case IDMAP_CONV_NAMETOID: | |
666 | if (im->im_status & IDMAP_STATUS_SUCCESS) { | 690 | sprintf(id_str, "%d", im->im_id); |
667 | if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) | 691 | ret = nfs_idmap_instantiate(key, authkey, id_str); |
668 | goto out; | 692 | break; |
669 | memcpy(name, im->im_name, len); | 693 | case IDMAP_CONV_IDTONAME: |
670 | ret = len; | 694 | ret = nfs_idmap_instantiate(key, authkey, im->im_name); |
695 | break; | ||
671 | } | 696 | } |
672 | 697 | ||
673 | out: | ||
674 | memset(im, 0, sizeof(*im)); | ||
675 | mutex_unlock(&idmap->idmap_im_lock); | ||
676 | mutex_unlock(&idmap->idmap_lock); | ||
677 | return ret; | 698 | return ret; |
678 | } | 699 | } |
679 | 700 | ||
@@ -682,115 +703,51 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) | |||
682 | { | 703 | { |
683 | struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); | 704 | struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); |
684 | struct idmap *idmap = (struct idmap *)rpci->private; | 705 | struct idmap *idmap = (struct idmap *)rpci->private; |
685 | struct idmap_msg im_in, *im = &idmap->idmap_im; | 706 | struct key_construction *cons = idmap->idmap_key_cons; |
686 | struct idmap_hashtable *h; | 707 | struct idmap_msg im; |
687 | struct idmap_hashent *he = NULL; | ||
688 | size_t namelen_in; | 708 | size_t namelen_in; |
689 | int ret; | 709 | int ret; |
690 | 710 | ||
691 | if (mlen != sizeof(im_in)) | 711 | if (mlen != sizeof(im)) { |
692 | return -ENOSPC; | 712 | ret = -ENOSPC; |
693 | |||
694 | if (copy_from_user(&im_in, src, mlen) != 0) | ||
695 | return -EFAULT; | ||
696 | |||
697 | mutex_lock(&idmap->idmap_im_lock); | ||
698 | |||
699 | ret = mlen; | ||
700 | im->im_status = im_in.im_status; | ||
701 | /* If we got an error, terminate now, and wake up pending upcalls */ | ||
702 | if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) { | ||
703 | wake_up(&idmap->idmap_wq); | ||
704 | goto out; | 713 | goto out; |
705 | } | 714 | } |
706 | 715 | ||
707 | /* Sanity checking of strings */ | 716 | if (copy_from_user(&im, src, mlen) != 0) { |
708 | ret = -EINVAL; | 717 | ret = -EFAULT; |
709 | namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ); | ||
710 | if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) | ||
711 | goto out; | 718 | goto out; |
719 | } | ||
712 | 720 | ||
713 | switch (im_in.im_type) { | 721 | if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { |
714 | case IDMAP_TYPE_USER: | 722 | ret = mlen; |
715 | h = &idmap->idmap_user_hash; | 723 | complete_request_key(idmap->idmap_key_cons, -ENOKEY); |
716 | break; | 724 | goto out_incomplete; |
717 | case IDMAP_TYPE_GROUP: | ||
718 | h = &idmap->idmap_group_hash; | ||
719 | break; | ||
720 | default: | ||
721 | goto out; | ||
722 | } | 725 | } |
723 | 726 | ||
724 | switch (im_in.im_conv) { | 727 | namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); |
725 | case IDMAP_CONV_IDTONAME: | 728 | if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { |
726 | /* Did we match the current upcall? */ | 729 | ret = -EINVAL; |
727 | if (im->im_conv == IDMAP_CONV_IDTONAME | ||
728 | && im->im_type == im_in.im_type | ||
729 | && im->im_id == im_in.im_id) { | ||
730 | /* Yes: copy string, including the terminating '\0' */ | ||
731 | memcpy(im->im_name, im_in.im_name, namelen_in); | ||
732 | im->im_name[namelen_in] = '\0'; | ||
733 | wake_up(&idmap->idmap_wq); | ||
734 | } | ||
735 | he = idmap_alloc_id(h, im_in.im_id); | ||
736 | break; | ||
737 | case IDMAP_CONV_NAMETOID: | ||
738 | /* Did we match the current upcall? */ | ||
739 | if (im->im_conv == IDMAP_CONV_NAMETOID | ||
740 | && im->im_type == im_in.im_type | ||
741 | && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in | ||
742 | && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) { | ||
743 | im->im_id = im_in.im_id; | ||
744 | wake_up(&idmap->idmap_wq); | ||
745 | } | ||
746 | he = idmap_alloc_name(h, im_in.im_name, namelen_in); | ||
747 | break; | ||
748 | default: | ||
749 | goto out; | 730 | goto out; |
750 | } | 731 | } |
751 | 732 | ||
752 | /* If the entry is valid, also copy it to the cache */ | 733 | ret = nfs_idmap_read_message(&im, cons->key, cons->authkey); |
753 | if (he != NULL) | 734 | if (ret >= 0) { |
754 | idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); | 735 | key_set_timeout(cons->key, nfs_idmap_cache_timeout); |
755 | ret = mlen; | 736 | ret = mlen; |
737 | } | ||
738 | |||
756 | out: | 739 | out: |
757 | mutex_unlock(&idmap->idmap_im_lock); | 740 | complete_request_key(idmap->idmap_key_cons, ret); |
741 | out_incomplete: | ||
758 | return ret; | 742 | return ret; |
759 | } | 743 | } |
760 | 744 | ||
761 | static void | 745 | static void |
762 | idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) | 746 | idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) |
763 | { | 747 | { |
764 | struct idmap_msg *im = msg->data; | 748 | /* Free memory allocated in nfs_idmap_legacy_upcall() */ |
765 | struct idmap *idmap = container_of(im, struct idmap, idmap_im); | 749 | kfree(msg->data); |
766 | 750 | kfree(msg); | |
767 | if (msg->errno >= 0) | ||
768 | return; | ||
769 | mutex_lock(&idmap->idmap_im_lock); | ||
770 | im->im_status = IDMAP_STATUS_LOOKUPFAIL; | ||
771 | wake_up(&idmap->idmap_wq); | ||
772 | mutex_unlock(&idmap->idmap_im_lock); | ||
773 | } | ||
774 | |||
775 | /* | ||
776 | * Fowler/Noll/Vo hash | ||
777 | * http://www.isthe.com/chongo/tech/comp/fnv/ | ||
778 | */ | ||
779 | |||
780 | #define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */ | ||
781 | #define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */ | ||
782 | |||
783 | static unsigned int fnvhash32(const void *buf, size_t buflen) | ||
784 | { | ||
785 | const unsigned char *p, *end = (const unsigned char *)buf + buflen; | ||
786 | unsigned int hash = FNV_1_32; | ||
787 | |||
788 | for (p = buf; p < end; p++) { | ||
789 | hash *= FNV_P_32; | ||
790 | hash ^= (unsigned int)*p; | ||
791 | } | ||
792 | |||
793 | return hash; | ||
794 | } | 751 | } |
795 | 752 | ||
796 | int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) | 753 | int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) |
@@ -799,16 +756,16 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_ | |||
799 | 756 | ||
800 | if (nfs_map_string_to_numeric(name, namelen, uid)) | 757 | if (nfs_map_string_to_numeric(name, namelen, uid)) |
801 | return 0; | 758 | return 0; |
802 | return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); | 759 | return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap); |
803 | } | 760 | } |
804 | 761 | ||
805 | int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) | 762 | int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) |
806 | { | 763 | { |
807 | struct idmap *idmap = server->nfs_client->cl_idmap; | 764 | struct idmap *idmap = server->nfs_client->cl_idmap; |
808 | 765 | ||
809 | if (nfs_map_string_to_numeric(name, namelen, uid)) | 766 | if (nfs_map_string_to_numeric(name, namelen, gid)) |
810 | return 0; | 767 | return 0; |
811 | return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); | 768 | return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap); |
812 | } | 769 | } |
813 | 770 | ||
814 | int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) | 771 | int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) |
@@ -817,21 +774,19 @@ int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, s | |||
817 | int ret = -EINVAL; | 774 | int ret = -EINVAL; |
818 | 775 | ||
819 | if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) | 776 | if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) |
820 | ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); | 777 | ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap); |
821 | if (ret < 0) | 778 | if (ret < 0) |
822 | ret = nfs_map_numeric_to_string(uid, buf, buflen); | 779 | ret = nfs_map_numeric_to_string(uid, buf, buflen); |
823 | return ret; | 780 | return ret; |
824 | } | 781 | } |
825 | int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) | 782 | int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) |
826 | { | 783 | { |
827 | struct idmap *idmap = server->nfs_client->cl_idmap; | 784 | struct idmap *idmap = server->nfs_client->cl_idmap; |
828 | int ret = -EINVAL; | 785 | int ret = -EINVAL; |
829 | 786 | ||
830 | if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) | 787 | if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) |
831 | ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); | 788 | ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap); |
832 | if (ret < 0) | 789 | if (ret < 0) |
833 | ret = nfs_map_numeric_to_string(uid, buf, buflen); | 790 | ret = nfs_map_numeric_to_string(gid, buf, buflen); |
834 | return ret; | 791 | return ret; |
835 | } | 792 | } |
836 | |||
837 | #endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ | ||
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c2ce8196912c..e8bbfa5b3500 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
40 | #include <linux/compat.h> | 40 | #include <linux/compat.h> |
41 | #include <linux/freezer.h> | 41 | #include <linux/freezer.h> |
42 | #include <linux/crc32.h> | ||
42 | 43 | ||
43 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
44 | 45 | ||
@@ -50,6 +51,7 @@ | |||
50 | #include "fscache.h" | 51 | #include "fscache.h" |
51 | #include "dns_resolve.h" | 52 | #include "dns_resolve.h" |
52 | #include "pnfs.h" | 53 | #include "pnfs.h" |
54 | #include "netns.h" | ||
53 | 55 | ||
54 | #define NFSDBG_FACILITY NFSDBG_VFS | 56 | #define NFSDBG_FACILITY NFSDBG_VFS |
55 | 57 | ||
@@ -387,9 +389,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
387 | unlock_new_inode(inode); | 389 | unlock_new_inode(inode); |
388 | } else | 390 | } else |
389 | nfs_refresh_inode(inode, fattr); | 391 | nfs_refresh_inode(inode, fattr); |
390 | dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n", | 392 | dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n", |
391 | inode->i_sb->s_id, | 393 | inode->i_sb->s_id, |
392 | (long long)NFS_FILEID(inode), | 394 | (long long)NFS_FILEID(inode), |
395 | nfs_display_fhandle_hash(fh), | ||
393 | atomic_read(&inode->i_count)); | 396 | atomic_read(&inode->i_count)); |
394 | 397 | ||
395 | out: | 398 | out: |
@@ -400,7 +403,7 @@ out_no_inode: | |||
400 | goto out; | 403 | goto out; |
401 | } | 404 | } |
402 | 405 | ||
403 | #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE) | 406 | #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) |
404 | 407 | ||
405 | int | 408 | int |
406 | nfs_setattr(struct dentry *dentry, struct iattr *attr) | 409 | nfs_setattr(struct dentry *dentry, struct iattr *attr) |
@@ -422,7 +425,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
422 | 425 | ||
423 | /* Optimization: if the end result is no change, don't RPC */ | 426 | /* Optimization: if the end result is no change, don't RPC */ |
424 | attr->ia_valid &= NFS_VALID_ATTRS; | 427 | attr->ia_valid &= NFS_VALID_ATTRS; |
425 | if ((attr->ia_valid & ~ATTR_FILE) == 0) | 428 | if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0) |
426 | return 0; | 429 | return 0; |
427 | 430 | ||
428 | /* Write all dirty data */ | 431 | /* Write all dirty data */ |
@@ -1043,6 +1046,67 @@ struct nfs_fh *nfs_alloc_fhandle(void) | |||
1043 | return fh; | 1046 | return fh; |
1044 | } | 1047 | } |
1045 | 1048 | ||
1049 | #ifdef NFS_DEBUG | ||
1050 | /* | ||
1051 | * _nfs_display_fhandle_hash - calculate the crc32 hash for the filehandle | ||
1052 | * in the same way that wireshark does | ||
1053 | * | ||
1054 | * @fh: file handle | ||
1055 | * | ||
1056 | * For debugging only. | ||
1057 | */ | ||
1058 | u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh) | ||
1059 | { | ||
1060 | /* wireshark uses 32-bit AUTODIN crc and does a bitwise | ||
1061 | * not on the result */ | ||
1062 | return ~crc32(0xFFFFFFFF, &fh->data[0], fh->size); | ||
1063 | } | ||
1064 | |||
1065 | /* | ||
1066 | * _nfs_display_fhandle - display an NFS file handle on the console | ||
1067 | * | ||
1068 | * @fh: file handle to display | ||
1069 | * @caption: display caption | ||
1070 | * | ||
1071 | * For debugging only. | ||
1072 | */ | ||
1073 | void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) | ||
1074 | { | ||
1075 | unsigned short i; | ||
1076 | |||
1077 | if (fh == NULL || fh->size == 0) { | ||
1078 | printk(KERN_DEFAULT "%s at %p is empty\n", caption, fh); | ||
1079 | return; | ||
1080 | } | ||
1081 | |||
1082 | printk(KERN_DEFAULT "%s at %p is %u bytes, crc: 0x%08x:\n", | ||
1083 | caption, fh, fh->size, _nfs_display_fhandle_hash(fh)); | ||
1084 | for (i = 0; i < fh->size; i += 16) { | ||
1085 | __be32 *pos = (__be32 *)&fh->data[i]; | ||
1086 | |||
1087 | switch ((fh->size - i - 1) >> 2) { | ||
1088 | case 0: | ||
1089 | printk(KERN_DEFAULT " %08x\n", | ||
1090 | be32_to_cpup(pos)); | ||
1091 | break; | ||
1092 | case 1: | ||
1093 | printk(KERN_DEFAULT " %08x %08x\n", | ||
1094 | be32_to_cpup(pos), be32_to_cpup(pos + 1)); | ||
1095 | break; | ||
1096 | case 2: | ||
1097 | printk(KERN_DEFAULT " %08x %08x %08x\n", | ||
1098 | be32_to_cpup(pos), be32_to_cpup(pos + 1), | ||
1099 | be32_to_cpup(pos + 2)); | ||
1100 | break; | ||
1101 | default: | ||
1102 | printk(KERN_DEFAULT " %08x %08x %08x %08x\n", | ||
1103 | be32_to_cpup(pos), be32_to_cpup(pos + 1), | ||
1104 | be32_to_cpup(pos + 2), be32_to_cpup(pos + 3)); | ||
1105 | } | ||
1106 | } | ||
1107 | } | ||
1108 | #endif | ||
1109 | |||
1046 | /** | 1110 | /** |
1047 | * nfs_inode_attrs_need_update - check if the inode attributes need updating | 1111 | * nfs_inode_attrs_need_update - check if the inode attributes need updating |
1048 | * @inode - pointer to inode | 1112 | * @inode - pointer to inode |
@@ -1210,8 +1274,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1210 | unsigned long now = jiffies; | 1274 | unsigned long now = jiffies; |
1211 | unsigned long save_cache_validity; | 1275 | unsigned long save_cache_validity; |
1212 | 1276 | ||
1213 | dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", | 1277 | dfprintk(VFS, "NFS: %s(%s/%ld fh_crc=0x%08x ct=%d info=0x%x)\n", |
1214 | __func__, inode->i_sb->s_id, inode->i_ino, | 1278 | __func__, inode->i_sb->s_id, inode->i_ino, |
1279 | nfs_display_fhandle_hash(NFS_FH(inode)), | ||
1215 | atomic_read(&inode->i_count), fattr->valid); | 1280 | atomic_read(&inode->i_count), fattr->valid); |
1216 | 1281 | ||
1217 | if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) | 1282 | if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) |
@@ -1405,7 +1470,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1405 | /* | 1470 | /* |
1406 | * Big trouble! The inode has become a different object. | 1471 | * Big trouble! The inode has become a different object. |
1407 | */ | 1472 | */ |
1408 | printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", | 1473 | printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n", |
1409 | __func__, inode->i_ino, inode->i_mode, fattr->mode); | 1474 | __func__, inode->i_ino, inode->i_mode, fattr->mode); |
1410 | out_err: | 1475 | out_err: |
1411 | /* | 1476 | /* |
@@ -1494,7 +1559,7 @@ static void init_once(void *foo) | |||
1494 | INIT_LIST_HEAD(&nfsi->open_files); | 1559 | INIT_LIST_HEAD(&nfsi->open_files); |
1495 | INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); | 1560 | INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); |
1496 | INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); | 1561 | INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); |
1497 | INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); | 1562 | INIT_LIST_HEAD(&nfsi->commit_list); |
1498 | nfsi->npages = 0; | 1563 | nfsi->npages = 0; |
1499 | nfsi->ncommit = 0; | 1564 | nfsi->ncommit = 0; |
1500 | atomic_set(&nfsi->silly_count, 1); | 1565 | atomic_set(&nfsi->silly_count, 1); |
@@ -1551,6 +1616,28 @@ static void nfsiod_stop(void) | |||
1551 | destroy_workqueue(wq); | 1616 | destroy_workqueue(wq); |
1552 | } | 1617 | } |
1553 | 1618 | ||
1619 | int nfs_net_id; | ||
1620 | EXPORT_SYMBOL_GPL(nfs_net_id); | ||
1621 | |||
1622 | static int nfs_net_init(struct net *net) | ||
1623 | { | ||
1624 | nfs_clients_init(net); | ||
1625 | return nfs_dns_resolver_cache_init(net); | ||
1626 | } | ||
1627 | |||
1628 | static void nfs_net_exit(struct net *net) | ||
1629 | { | ||
1630 | nfs_dns_resolver_cache_destroy(net); | ||
1631 | nfs_cleanup_cb_ident_idr(net); | ||
1632 | } | ||
1633 | |||
1634 | static struct pernet_operations nfs_net_ops = { | ||
1635 | .init = nfs_net_init, | ||
1636 | .exit = nfs_net_exit, | ||
1637 | .id = &nfs_net_id, | ||
1638 | .size = sizeof(struct nfs_net), | ||
1639 | }; | ||
1640 | |||
1554 | /* | 1641 | /* |
1555 | * Initialize NFS | 1642 | * Initialize NFS |
1556 | */ | 1643 | */ |
@@ -1560,10 +1647,14 @@ static int __init init_nfs_fs(void) | |||
1560 | 1647 | ||
1561 | err = nfs_idmap_init(); | 1648 | err = nfs_idmap_init(); |
1562 | if (err < 0) | 1649 | if (err < 0) |
1563 | goto out9; | 1650 | goto out10; |
1564 | 1651 | ||
1565 | err = nfs_dns_resolver_init(); | 1652 | err = nfs_dns_resolver_init(); |
1566 | if (err < 0) | 1653 | if (err < 0) |
1654 | goto out9; | ||
1655 | |||
1656 | err = register_pernet_subsys(&nfs_net_ops); | ||
1657 | if (err < 0) | ||
1567 | goto out8; | 1658 | goto out8; |
1568 | 1659 | ||
1569 | err = nfs_fscache_register(); | 1660 | err = nfs_fscache_register(); |
@@ -1599,14 +1690,14 @@ static int __init init_nfs_fs(void) | |||
1599 | goto out0; | 1690 | goto out0; |
1600 | 1691 | ||
1601 | #ifdef CONFIG_PROC_FS | 1692 | #ifdef CONFIG_PROC_FS |
1602 | rpc_proc_register(&nfs_rpcstat); | 1693 | rpc_proc_register(&init_net, &nfs_rpcstat); |
1603 | #endif | 1694 | #endif |
1604 | if ((err = register_nfs_fs()) != 0) | 1695 | if ((err = register_nfs_fs()) != 0) |
1605 | goto out; | 1696 | goto out; |
1606 | return 0; | 1697 | return 0; |
1607 | out: | 1698 | out: |
1608 | #ifdef CONFIG_PROC_FS | 1699 | #ifdef CONFIG_PROC_FS |
1609 | rpc_proc_unregister("nfs"); | 1700 | rpc_proc_unregister(&init_net, "nfs"); |
1610 | #endif | 1701 | #endif |
1611 | nfs_destroy_directcache(); | 1702 | nfs_destroy_directcache(); |
1612 | out0: | 1703 | out0: |
@@ -1624,10 +1715,12 @@ out5: | |||
1624 | out6: | 1715 | out6: |
1625 | nfs_fscache_unregister(); | 1716 | nfs_fscache_unregister(); |
1626 | out7: | 1717 | out7: |
1627 | nfs_dns_resolver_destroy(); | 1718 | unregister_pernet_subsys(&nfs_net_ops); |
1628 | out8: | 1719 | out8: |
1629 | nfs_idmap_quit(); | 1720 | nfs_dns_resolver_destroy(); |
1630 | out9: | 1721 | out9: |
1722 | nfs_idmap_quit(); | ||
1723 | out10: | ||
1631 | return err; | 1724 | return err; |
1632 | } | 1725 | } |
1633 | 1726 | ||
@@ -1639,12 +1732,12 @@ static void __exit exit_nfs_fs(void) | |||
1639 | nfs_destroy_inodecache(); | 1732 | nfs_destroy_inodecache(); |
1640 | nfs_destroy_nfspagecache(); | 1733 | nfs_destroy_nfspagecache(); |
1641 | nfs_fscache_unregister(); | 1734 | nfs_fscache_unregister(); |
1735 | unregister_pernet_subsys(&nfs_net_ops); | ||
1642 | nfs_dns_resolver_destroy(); | 1736 | nfs_dns_resolver_destroy(); |
1643 | nfs_idmap_quit(); | 1737 | nfs_idmap_quit(); |
1644 | #ifdef CONFIG_PROC_FS | 1738 | #ifdef CONFIG_PROC_FS |
1645 | rpc_proc_unregister("nfs"); | 1739 | rpc_proc_unregister(&init_net, "nfs"); |
1646 | #endif | 1740 | #endif |
1647 | nfs_cleanup_cb_ident_idr(); | ||
1648 | unregister_nfs_fs(); | 1741 | unregister_nfs_fs(); |
1649 | nfs_fs_proc_exit(); | 1742 | nfs_fs_proc_exit(); |
1650 | nfsiod_stop(); | 1743 | nfsiod_stop(); |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8102db9b926c..2476dc69365f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -123,6 +123,7 @@ struct nfs_parsed_mount_data { | |||
123 | } nfs_server; | 123 | } nfs_server; |
124 | 124 | ||
125 | struct security_mnt_opts lsm_opts; | 125 | struct security_mnt_opts lsm_opts; |
126 | struct net *net; | ||
126 | }; | 127 | }; |
127 | 128 | ||
128 | /* mount_clnt.c */ | 129 | /* mount_clnt.c */ |
@@ -137,20 +138,22 @@ struct nfs_mount_request { | |||
137 | int noresvport; | 138 | int noresvport; |
138 | unsigned int *auth_flav_len; | 139 | unsigned int *auth_flav_len; |
139 | rpc_authflavor_t *auth_flavs; | 140 | rpc_authflavor_t *auth_flavs; |
141 | struct net *net; | ||
140 | }; | 142 | }; |
141 | 143 | ||
142 | extern int nfs_mount(struct nfs_mount_request *info); | 144 | extern int nfs_mount(struct nfs_mount_request *info); |
143 | extern void nfs_umount(const struct nfs_mount_request *info); | 145 | extern void nfs_umount(const struct nfs_mount_request *info); |
144 | 146 | ||
145 | /* client.c */ | 147 | /* client.c */ |
146 | extern struct rpc_program nfs_program; | 148 | extern const struct rpc_program nfs_program; |
149 | extern void nfs_clients_init(struct net *net); | ||
147 | 150 | ||
148 | extern void nfs_cleanup_cb_ident_idr(void); | 151 | extern void nfs_cleanup_cb_ident_idr(struct net *); |
149 | extern void nfs_put_client(struct nfs_client *); | 152 | extern void nfs_put_client(struct nfs_client *); |
150 | extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); | 153 | extern struct nfs_client *nfs4_find_client_ident(struct net *, int); |
151 | extern struct nfs_client *nfs4_find_client_ident(int); | ||
152 | extern struct nfs_client * | 154 | extern struct nfs_client * |
153 | nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); | 155 | nfs4_find_client_sessionid(struct net *, const struct sockaddr *, |
156 | struct nfs4_sessionid *); | ||
154 | extern struct nfs_server *nfs_create_server( | 157 | extern struct nfs_server *nfs_create_server( |
155 | const struct nfs_parsed_mount_data *, | 158 | const struct nfs_parsed_mount_data *, |
156 | struct nfs_fh *); | 159 | struct nfs_fh *); |
@@ -329,6 +332,8 @@ void nfs_retry_commit(struct list_head *page_list, | |||
329 | void nfs_commit_clear_lock(struct nfs_inode *nfsi); | 332 | void nfs_commit_clear_lock(struct nfs_inode *nfsi); |
330 | void nfs_commitdata_release(void *data); | 333 | void nfs_commitdata_release(void *data); |
331 | void nfs_commit_release_pages(struct nfs_write_data *data); | 334 | void nfs_commit_release_pages(struct nfs_write_data *data); |
335 | void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); | ||
336 | void nfs_request_remove_commit_list(struct nfs_page *req); | ||
332 | 337 | ||
333 | #ifdef CONFIG_MIGRATION | 338 | #ifdef CONFIG_MIGRATION |
334 | extern int nfs_migrate_page(struct address_space *, | 339 | extern int nfs_migrate_page(struct address_space *, |
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index d4c2d6b7507e..8e65c7f1f87c 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include <linux/nfs_fs.h> | 16 | #include <linux/nfs_fs.h> |
17 | #include "internal.h" | 17 | #include "internal.h" |
18 | 18 | ||
19 | #ifdef RPC_DEBUG | 19 | #ifdef NFS_DEBUG |
20 | # define NFSDBG_FACILITY NFSDBG_MOUNT | 20 | # define NFSDBG_FACILITY NFSDBG_MOUNT |
21 | #endif | 21 | #endif |
22 | 22 | ||
@@ -67,7 +67,7 @@ enum { | |||
67 | MOUNTPROC3_EXPORT = 5, | 67 | MOUNTPROC3_EXPORT = 5, |
68 | }; | 68 | }; |
69 | 69 | ||
70 | static struct rpc_program mnt_program; | 70 | static const struct rpc_program mnt_program; |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * Defined by OpenGroup XNFS Version 3W, chapter 8 | 73 | * Defined by OpenGroup XNFS Version 3W, chapter 8 |
@@ -153,7 +153,7 @@ int nfs_mount(struct nfs_mount_request *info) | |||
153 | .rpc_resp = &result, | 153 | .rpc_resp = &result, |
154 | }; | 154 | }; |
155 | struct rpc_create_args args = { | 155 | struct rpc_create_args args = { |
156 | .net = &init_net, | 156 | .net = info->net, |
157 | .protocol = info->protocol, | 157 | .protocol = info->protocol, |
158 | .address = info->sap, | 158 | .address = info->sap, |
159 | .addrsize = info->salen, | 159 | .addrsize = info->salen, |
@@ -225,7 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info) | |||
225 | .to_retries = 2, | 225 | .to_retries = 2, |
226 | }; | 226 | }; |
227 | struct rpc_create_args args = { | 227 | struct rpc_create_args args = { |
228 | .net = &init_net, | 228 | .net = info->net, |
229 | .protocol = IPPROTO_UDP, | 229 | .protocol = IPPROTO_UDP, |
230 | .address = info->sap, | 230 | .address = info->sap, |
231 | .addrsize = info->salen, | 231 | .addrsize = info->salen, |
@@ -488,19 +488,19 @@ static struct rpc_procinfo mnt3_procedures[] = { | |||
488 | }; | 488 | }; |
489 | 489 | ||
490 | 490 | ||
491 | static struct rpc_version mnt_version1 = { | 491 | static const struct rpc_version mnt_version1 = { |
492 | .number = 1, | 492 | .number = 1, |
493 | .nrprocs = ARRAY_SIZE(mnt_procedures), | 493 | .nrprocs = ARRAY_SIZE(mnt_procedures), |
494 | .procs = mnt_procedures, | 494 | .procs = mnt_procedures, |
495 | }; | 495 | }; |
496 | 496 | ||
497 | static struct rpc_version mnt_version3 = { | 497 | static const struct rpc_version mnt_version3 = { |
498 | .number = 3, | 498 | .number = 3, |
499 | .nrprocs = ARRAY_SIZE(mnt3_procedures), | 499 | .nrprocs = ARRAY_SIZE(mnt3_procedures), |
500 | .procs = mnt3_procedures, | 500 | .procs = mnt3_procedures, |
501 | }; | 501 | }; |
502 | 502 | ||
503 | static struct rpc_version *mnt_version[] = { | 503 | static const struct rpc_version *mnt_version[] = { |
504 | NULL, | 504 | NULL, |
505 | &mnt_version1, | 505 | &mnt_version1, |
506 | NULL, | 506 | NULL, |
@@ -509,7 +509,7 @@ static struct rpc_version *mnt_version[] = { | |||
509 | 509 | ||
510 | static struct rpc_stat mnt_stats; | 510 | static struct rpc_stat mnt_stats; |
511 | 511 | ||
512 | static struct rpc_program mnt_program = { | 512 | static const struct rpc_program mnt_program = { |
513 | .name = "mount", | 513 | .name = "mount", |
514 | .number = NFS_MNT_PROGRAM, | 514 | .number = NFS_MNT_PROGRAM, |
515 | .nrvers = ARRAY_SIZE(mnt_version), | 515 | .nrvers = ARRAY_SIZE(mnt_version), |
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 8102391bb374..1807866bb3ab 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
@@ -276,7 +276,10 @@ out: | |||
276 | nfs_free_fattr(fattr); | 276 | nfs_free_fattr(fattr); |
277 | nfs_free_fhandle(fh); | 277 | nfs_free_fhandle(fh); |
278 | out_nofree: | 278 | out_nofree: |
279 | dprintk("<-- nfs_follow_mountpoint() = %p\n", mnt); | 279 | if (IS_ERR(mnt)) |
280 | dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt)); | ||
281 | else | ||
282 | dprintk("<-- %s() = %p\n", __func__, mnt); | ||
280 | return mnt; | 283 | return mnt; |
281 | } | 284 | } |
282 | 285 | ||
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h new file mode 100644 index 000000000000..aa14ec303e94 --- /dev/null +++ b/fs/nfs/netns.h | |||
@@ -0,0 +1,27 @@ | |||
1 | #ifndef __NFS_NETNS_H__ | ||
2 | #define __NFS_NETNS_H__ | ||
3 | |||
4 | #include <net/net_namespace.h> | ||
5 | #include <net/netns/generic.h> | ||
6 | |||
7 | struct bl_dev_msg { | ||
8 | int32_t status; | ||
9 | uint32_t major, minor; | ||
10 | }; | ||
11 | |||
12 | struct nfs_net { | ||
13 | struct cache_detail *nfs_dns_resolve; | ||
14 | struct rpc_pipe *bl_device_pipe; | ||
15 | struct bl_dev_msg bl_mount_reply; | ||
16 | wait_queue_head_t bl_wq; | ||
17 | struct list_head nfs_client_list; | ||
18 | struct list_head nfs_volume_list; | ||
19 | #ifdef CONFIG_NFS_V4 | ||
20 | struct idr cb_ident_idr; /* Protected by nfs_client_lock */ | ||
21 | #endif | ||
22 | spinlock_t nfs_client_lock; | ||
23 | }; | ||
24 | |||
25 | extern int nfs_net_id; | ||
26 | |||
27 | #endif | ||
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 792cb13a4304..1f56000fabbd 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c | |||
@@ -1150,7 +1150,7 @@ struct rpc_procinfo nfs_procedures[] = { | |||
1150 | PROC(STATFS, fhandle, statfsres, 0), | 1150 | PROC(STATFS, fhandle, statfsres, 0), |
1151 | }; | 1151 | }; |
1152 | 1152 | ||
1153 | struct rpc_version nfs_version2 = { | 1153 | const struct rpc_version nfs_version2 = { |
1154 | .number = 2, | 1154 | .number = 2, |
1155 | .nrprocs = ARRAY_SIZE(nfs_procedures), | 1155 | .nrprocs = ARRAY_SIZE(nfs_procedures), |
1156 | .procs = nfs_procedures | 1156 | .procs = nfs_procedures |
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 7ef23979896d..e4498dc351a8 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
@@ -192,7 +192,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) | |||
192 | .pages = pages, | 192 | .pages = pages, |
193 | }; | 193 | }; |
194 | struct nfs3_getaclres res = { | 194 | struct nfs3_getaclres res = { |
195 | 0 | 195 | NULL, |
196 | }; | 196 | }; |
197 | struct rpc_message msg = { | 197 | struct rpc_message msg = { |
198 | .rpc_argp = &args, | 198 | .rpc_argp = &args, |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 91943953a370..5242eae6711a 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -428,6 +428,11 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) | |||
428 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; | 428 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; |
429 | } | 429 | } |
430 | 430 | ||
431 | static void nfs3_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) | ||
432 | { | ||
433 | rpc_call_start(task); | ||
434 | } | ||
435 | |||
431 | static int | 436 | static int |
432 | nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) | 437 | nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) |
433 | { | 438 | { |
@@ -445,6 +450,11 @@ nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir) | |||
445 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; | 450 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; |
446 | } | 451 | } |
447 | 452 | ||
453 | static void nfs3_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) | ||
454 | { | ||
455 | rpc_call_start(task); | ||
456 | } | ||
457 | |||
448 | static int | 458 | static int |
449 | nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | 459 | nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, |
450 | struct inode *new_dir) | 460 | struct inode *new_dir) |
@@ -814,6 +824,11 @@ static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message | |||
814 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; | 824 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; |
815 | } | 825 | } |
816 | 826 | ||
827 | static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) | ||
828 | { | ||
829 | rpc_call_start(task); | ||
830 | } | ||
831 | |||
817 | static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) | 832 | static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) |
818 | { | 833 | { |
819 | if (nfs3_async_handle_jukebox(task, data->inode)) | 834 | if (nfs3_async_handle_jukebox(task, data->inode)) |
@@ -828,6 +843,11 @@ static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_messag | |||
828 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; | 843 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; |
829 | } | 844 | } |
830 | 845 | ||
846 | static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) | ||
847 | { | ||
848 | rpc_call_start(task); | ||
849 | } | ||
850 | |||
831 | static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) | 851 | static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) |
832 | { | 852 | { |
833 | if (nfs3_async_handle_jukebox(task, data->inode)) | 853 | if (nfs3_async_handle_jukebox(task, data->inode)) |
@@ -864,9 +884,11 @@ const struct nfs_rpc_ops nfs_v3_clientops = { | |||
864 | .create = nfs3_proc_create, | 884 | .create = nfs3_proc_create, |
865 | .remove = nfs3_proc_remove, | 885 | .remove = nfs3_proc_remove, |
866 | .unlink_setup = nfs3_proc_unlink_setup, | 886 | .unlink_setup = nfs3_proc_unlink_setup, |
887 | .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare, | ||
867 | .unlink_done = nfs3_proc_unlink_done, | 888 | .unlink_done = nfs3_proc_unlink_done, |
868 | .rename = nfs3_proc_rename, | 889 | .rename = nfs3_proc_rename, |
869 | .rename_setup = nfs3_proc_rename_setup, | 890 | .rename_setup = nfs3_proc_rename_setup, |
891 | .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare, | ||
870 | .rename_done = nfs3_proc_rename_done, | 892 | .rename_done = nfs3_proc_rename_done, |
871 | .link = nfs3_proc_link, | 893 | .link = nfs3_proc_link, |
872 | .symlink = nfs3_proc_symlink, | 894 | .symlink = nfs3_proc_symlink, |
@@ -879,8 +901,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = { | |||
879 | .pathconf = nfs3_proc_pathconf, | 901 | .pathconf = nfs3_proc_pathconf, |
880 | .decode_dirent = nfs3_decode_dirent, | 902 | .decode_dirent = nfs3_decode_dirent, |
881 | .read_setup = nfs3_proc_read_setup, | 903 | .read_setup = nfs3_proc_read_setup, |
904 | .read_rpc_prepare = nfs3_proc_read_rpc_prepare, | ||
882 | .read_done = nfs3_read_done, | 905 | .read_done = nfs3_read_done, |
883 | .write_setup = nfs3_proc_write_setup, | 906 | .write_setup = nfs3_proc_write_setup, |
907 | .write_rpc_prepare = nfs3_proc_write_rpc_prepare, | ||
884 | .write_done = nfs3_write_done, | 908 | .write_done = nfs3_write_done, |
885 | .commit_setup = nfs3_proc_commit_setup, | 909 | .commit_setup = nfs3_proc_commit_setup, |
886 | .commit_done = nfs3_commit_done, | 910 | .commit_done = nfs3_commit_done, |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 183c6b123d0f..a77cc9a3ce55 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -2461,7 +2461,7 @@ struct rpc_procinfo nfs3_procedures[] = { | |||
2461 | PROC(COMMIT, commit, commit, 5), | 2461 | PROC(COMMIT, commit, commit, 5), |
2462 | }; | 2462 | }; |
2463 | 2463 | ||
2464 | struct rpc_version nfs_version3 = { | 2464 | const struct rpc_version nfs_version3 = { |
2465 | .number = 3, | 2465 | .number = 3, |
2466 | .nrprocs = ARRAY_SIZE(nfs3_procedures), | 2466 | .nrprocs = ARRAY_SIZE(nfs3_procedures), |
2467 | .procs = nfs3_procedures | 2467 | .procs = nfs3_procedures |
@@ -2489,7 +2489,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { | |||
2489 | }, | 2489 | }, |
2490 | }; | 2490 | }; |
2491 | 2491 | ||
2492 | struct rpc_version nfsacl_version3 = { | 2492 | const struct rpc_version nfsacl_version3 = { |
2493 | .number = 3, | 2493 | .number = 3, |
2494 | .nrprocs = sizeof(nfs3_acl_procedures)/ | 2494 | .nrprocs = sizeof(nfs3_acl_procedures)/ |
2495 | sizeof(nfs3_acl_procedures[0]), | 2495 | sizeof(nfs3_acl_procedures[0]), |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 4d7d0aedc101..97ecc863dd76 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -20,7 +20,6 @@ enum nfs4_client_state { | |||
20 | NFS4CLNT_RECLAIM_REBOOT, | 20 | NFS4CLNT_RECLAIM_REBOOT, |
21 | NFS4CLNT_RECLAIM_NOGRACE, | 21 | NFS4CLNT_RECLAIM_NOGRACE, |
22 | NFS4CLNT_DELEGRETURN, | 22 | NFS4CLNT_DELEGRETURN, |
23 | NFS4CLNT_LAYOUTRECALL, | ||
24 | NFS4CLNT_SESSION_RESET, | 23 | NFS4CLNT_SESSION_RESET, |
25 | NFS4CLNT_RECALL_SLOT, | 24 | NFS4CLNT_RECALL_SLOT, |
26 | NFS4CLNT_LEASE_CONFIRM, | 25 | NFS4CLNT_LEASE_CONFIRM, |
@@ -44,7 +43,7 @@ struct nfs4_minor_version_ops { | |||
44 | struct nfs4_sequence_args *args, | 43 | struct nfs4_sequence_args *args, |
45 | struct nfs4_sequence_res *res, | 44 | struct nfs4_sequence_res *res, |
46 | int cache_reply); | 45 | int cache_reply); |
47 | int (*validate_stateid)(struct nfs_delegation *, | 46 | bool (*match_stateid)(const nfs4_stateid *, |
48 | const nfs4_stateid *); | 47 | const nfs4_stateid *); |
49 | int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, | 48 | int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, |
50 | struct nfs_fsinfo *); | 49 | struct nfs_fsinfo *); |
@@ -53,26 +52,25 @@ struct nfs4_minor_version_ops { | |||
53 | const struct nfs4_state_maintenance_ops *state_renewal_ops; | 52 | const struct nfs4_state_maintenance_ops *state_renewal_ops; |
54 | }; | 53 | }; |
55 | 54 | ||
56 | /* | 55 | struct nfs_unique_id { |
57 | * struct rpc_sequence ensures that RPC calls are sent in the exact | 56 | struct rb_node rb_node; |
58 | * order that they appear on the list. | 57 | __u64 id; |
59 | */ | ||
60 | struct rpc_sequence { | ||
61 | struct rpc_wait_queue wait; /* RPC call delay queue */ | ||
62 | spinlock_t lock; /* Protects the list */ | ||
63 | struct list_head list; /* Defines sequence of RPC calls */ | ||
64 | }; | 58 | }; |
65 | 59 | ||
66 | #define NFS_SEQID_CONFIRMED 1 | 60 | #define NFS_SEQID_CONFIRMED 1 |
67 | struct nfs_seqid_counter { | 61 | struct nfs_seqid_counter { |
68 | struct rpc_sequence *sequence; | 62 | int owner_id; |
69 | int flags; | 63 | int flags; |
70 | u32 counter; | 64 | u32 counter; |
65 | spinlock_t lock; /* Protects the list */ | ||
66 | struct list_head list; /* Defines sequence of RPC calls */ | ||
67 | struct rpc_wait_queue wait; /* RPC call delay queue */ | ||
71 | }; | 68 | }; |
72 | 69 | ||
73 | struct nfs_seqid { | 70 | struct nfs_seqid { |
74 | struct nfs_seqid_counter *sequence; | 71 | struct nfs_seqid_counter *sequence; |
75 | struct list_head list; | 72 | struct list_head list; |
73 | struct rpc_task *task; | ||
76 | }; | 74 | }; |
77 | 75 | ||
78 | static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status) | 76 | static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status) |
@@ -81,18 +79,12 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status | |||
81 | seqid->flags |= NFS_SEQID_CONFIRMED; | 79 | seqid->flags |= NFS_SEQID_CONFIRMED; |
82 | } | 80 | } |
83 | 81 | ||
84 | struct nfs_unique_id { | ||
85 | struct rb_node rb_node; | ||
86 | __u64 id; | ||
87 | }; | ||
88 | |||
89 | /* | 82 | /* |
90 | * NFS4 state_owners and lock_owners are simply labels for ordered | 83 | * NFS4 state_owners and lock_owners are simply labels for ordered |
91 | * sequences of RPC calls. Their sole purpose is to provide once-only | 84 | * sequences of RPC calls. Their sole purpose is to provide once-only |
92 | * semantics by allowing the server to identify replayed requests. | 85 | * semantics by allowing the server to identify replayed requests. |
93 | */ | 86 | */ |
94 | struct nfs4_state_owner { | 87 | struct nfs4_state_owner { |
95 | struct nfs_unique_id so_owner_id; | ||
96 | struct nfs_server *so_server; | 88 | struct nfs_server *so_server; |
97 | struct list_head so_lru; | 89 | struct list_head so_lru; |
98 | unsigned long so_expires; | 90 | unsigned long so_expires; |
@@ -105,7 +97,6 @@ struct nfs4_state_owner { | |||
105 | unsigned long so_flags; | 97 | unsigned long so_flags; |
106 | struct list_head so_states; | 98 | struct list_head so_states; |
107 | struct nfs_seqid_counter so_seqid; | 99 | struct nfs_seqid_counter so_seqid; |
108 | struct rpc_sequence so_sequence; | ||
109 | }; | 100 | }; |
110 | 101 | ||
111 | enum { | 102 | enum { |
@@ -146,8 +137,6 @@ struct nfs4_lock_state { | |||
146 | #define NFS_LOCK_INITIALIZED 1 | 137 | #define NFS_LOCK_INITIALIZED 1 |
147 | int ls_flags; | 138 | int ls_flags; |
148 | struct nfs_seqid_counter ls_seqid; | 139 | struct nfs_seqid_counter ls_seqid; |
149 | struct rpc_sequence ls_sequence; | ||
150 | struct nfs_unique_id ls_id; | ||
151 | nfs4_stateid ls_stateid; | 140 | nfs4_stateid ls_stateid; |
152 | atomic_t ls_count; | 141 | atomic_t ls_count; |
153 | struct nfs4_lock_owner ls_owner; | 142 | struct nfs4_lock_owner ls_owner; |
@@ -193,6 +182,7 @@ struct nfs4_exception { | |||
193 | long timeout; | 182 | long timeout; |
194 | int retry; | 183 | int retry; |
195 | struct nfs4_state *state; | 184 | struct nfs4_state *state; |
185 | struct inode *inode; | ||
196 | }; | 186 | }; |
197 | 187 | ||
198 | struct nfs4_state_recovery_ops { | 188 | struct nfs4_state_recovery_ops { |
@@ -224,7 +214,7 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, boo | |||
224 | extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); | 214 | extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); |
225 | extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, | 215 | extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, |
226 | struct nfs4_fs_locations *fs_locations, struct page *page); | 216 | struct nfs4_fs_locations *fs_locations, struct page *page); |
227 | extern void nfs4_release_lockowner(const struct nfs4_lock_state *); | 217 | extern int nfs4_release_lockowner(struct nfs4_lock_state *); |
228 | extern const struct xattr_handler *nfs4_xattr_handlers[]; | 218 | extern const struct xattr_handler *nfs4_xattr_handlers[]; |
229 | 219 | ||
230 | #if defined(CONFIG_NFS_V4_1) | 220 | #if defined(CONFIG_NFS_V4_1) |
@@ -233,12 +223,13 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser | |||
233 | return server->nfs_client->cl_session; | 223 | return server->nfs_client->cl_session; |
234 | } | 224 | } |
235 | 225 | ||
226 | extern bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy); | ||
236 | extern int nfs4_setup_sequence(const struct nfs_server *server, | 227 | extern int nfs4_setup_sequence(const struct nfs_server *server, |
237 | struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, | 228 | struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, |
238 | int cache_reply, struct rpc_task *task); | 229 | struct rpc_task *task); |
239 | extern int nfs41_setup_sequence(struct nfs4_session *session, | 230 | extern int nfs41_setup_sequence(struct nfs4_session *session, |
240 | struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, | 231 | struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, |
241 | int cache_reply, struct rpc_task *task); | 232 | struct rpc_task *task); |
242 | extern void nfs4_destroy_session(struct nfs4_session *session); | 233 | extern void nfs4_destroy_session(struct nfs4_session *session); |
243 | extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); | 234 | extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); |
244 | extern int nfs4_proc_create_session(struct nfs_client *); | 235 | extern int nfs4_proc_create_session(struct nfs_client *); |
@@ -269,7 +260,7 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser | |||
269 | 260 | ||
270 | static inline int nfs4_setup_sequence(const struct nfs_server *server, | 261 | static inline int nfs4_setup_sequence(const struct nfs_server *server, |
271 | struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, | 262 | struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, |
272 | int cache_reply, struct rpc_task *task) | 263 | struct rpc_task *task) |
273 | { | 264 | { |
274 | return 0; | 265 | return 0; |
275 | } | 266 | } |
@@ -319,7 +310,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session) | |||
319 | } | 310 | } |
320 | #endif /* CONFIG_NFS_V4_1 */ | 311 | #endif /* CONFIG_NFS_V4_1 */ |
321 | 312 | ||
322 | extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); | 313 | extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *, gfp_t); |
323 | extern void nfs4_put_state_owner(struct nfs4_state_owner *); | 314 | extern void nfs4_put_state_owner(struct nfs4_state_owner *); |
324 | extern void nfs4_purge_state_owners(struct nfs_server *); | 315 | extern void nfs4_purge_state_owners(struct nfs_server *); |
325 | extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); | 316 | extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); |
@@ -327,6 +318,8 @@ extern void nfs4_put_open_state(struct nfs4_state *); | |||
327 | extern void nfs4_close_state(struct nfs4_state *, fmode_t); | 318 | extern void nfs4_close_state(struct nfs4_state *, fmode_t); |
328 | extern void nfs4_close_sync(struct nfs4_state *, fmode_t); | 319 | extern void nfs4_close_sync(struct nfs4_state *, fmode_t); |
329 | extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); | 320 | extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); |
321 | extern void nfs_inode_find_state_and_recover(struct inode *inode, | ||
322 | const nfs4_stateid *stateid); | ||
330 | extern void nfs4_schedule_lease_recovery(struct nfs_client *); | 323 | extern void nfs4_schedule_lease_recovery(struct nfs_client *); |
331 | extern void nfs4_schedule_state_manager(struct nfs_client *); | 324 | extern void nfs4_schedule_state_manager(struct nfs_client *); |
332 | extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); | 325 | extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); |
@@ -337,7 +330,8 @@ extern void nfs41_handle_server_scope(struct nfs_client *, | |||
337 | struct server_scope **); | 330 | struct server_scope **); |
338 | extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); | 331 | extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); |
339 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); | 332 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); |
340 | extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); | 333 | extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, |
334 | fmode_t, fl_owner_t, pid_t); | ||
341 | 335 | ||
342 | extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); | 336 | extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); |
343 | extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); | 337 | extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); |
@@ -346,6 +340,8 @@ extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); | |||
346 | extern void nfs_release_seqid(struct nfs_seqid *seqid); | 340 | extern void nfs_release_seqid(struct nfs_seqid *seqid); |
347 | extern void nfs_free_seqid(struct nfs_seqid *seqid); | 341 | extern void nfs_free_seqid(struct nfs_seqid *seqid); |
348 | 342 | ||
343 | extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); | ||
344 | |||
349 | extern const nfs4_stateid zero_stateid; | 345 | extern const nfs4_stateid zero_stateid; |
350 | 346 | ||
351 | /* nfs4xdr.c */ | 347 | /* nfs4xdr.c */ |
@@ -357,6 +353,16 @@ struct nfs4_mount_data; | |||
357 | extern struct svc_version nfs4_callback_version1; | 353 | extern struct svc_version nfs4_callback_version1; |
358 | extern struct svc_version nfs4_callback_version4; | 354 | extern struct svc_version nfs4_callback_version4; |
359 | 355 | ||
356 | static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src) | ||
357 | { | ||
358 | memcpy(dst, src, sizeof(*dst)); | ||
359 | } | ||
360 | |||
361 | static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src) | ||
362 | { | ||
363 | return memcmp(dst, src, sizeof(*dst)) == 0; | ||
364 | } | ||
365 | |||
360 | #else | 366 | #else |
361 | 367 | ||
362 | #define nfs4_close_state(a, b) do { } while (0) | 368 | #define nfs4_close_state(a, b) do { } while (0) |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 71ec08617e23..634c0bcb4fd6 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -33,7 +33,10 @@ | |||
33 | #include <linux/nfs_page.h> | 33 | #include <linux/nfs_page.h> |
34 | #include <linux/module.h> | 34 | #include <linux/module.h> |
35 | 35 | ||
36 | #include <linux/sunrpc/metrics.h> | ||
37 | |||
36 | #include "internal.h" | 38 | #include "internal.h" |
39 | #include "delegation.h" | ||
37 | #include "nfs4filelayout.h" | 40 | #include "nfs4filelayout.h" |
38 | 41 | ||
39 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 42 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
@@ -84,12 +87,27 @@ static int filelayout_async_handle_error(struct rpc_task *task, | |||
84 | struct nfs_client *clp, | 87 | struct nfs_client *clp, |
85 | int *reset) | 88 | int *reset) |
86 | { | 89 | { |
90 | struct nfs_server *mds_server = NFS_SERVER(state->inode); | ||
91 | struct nfs_client *mds_client = mds_server->nfs_client; | ||
92 | |||
87 | if (task->tk_status >= 0) | 93 | if (task->tk_status >= 0) |
88 | return 0; | 94 | return 0; |
89 | |||
90 | *reset = 0; | 95 | *reset = 0; |
91 | 96 | ||
92 | switch (task->tk_status) { | 97 | switch (task->tk_status) { |
98 | /* MDS state errors */ | ||
99 | case -NFS4ERR_DELEG_REVOKED: | ||
100 | case -NFS4ERR_ADMIN_REVOKED: | ||
101 | case -NFS4ERR_BAD_STATEID: | ||
102 | nfs_remove_bad_delegation(state->inode); | ||
103 | case -NFS4ERR_OPENMODE: | ||
104 | nfs4_schedule_stateid_recovery(mds_server, state); | ||
105 | goto wait_on_recovery; | ||
106 | case -NFS4ERR_EXPIRED: | ||
107 | nfs4_schedule_stateid_recovery(mds_server, state); | ||
108 | nfs4_schedule_lease_recovery(mds_client); | ||
109 | goto wait_on_recovery; | ||
110 | /* DS session errors */ | ||
93 | case -NFS4ERR_BADSESSION: | 111 | case -NFS4ERR_BADSESSION: |
94 | case -NFS4ERR_BADSLOT: | 112 | case -NFS4ERR_BADSLOT: |
95 | case -NFS4ERR_BAD_HIGH_SLOT: | 113 | case -NFS4ERR_BAD_HIGH_SLOT: |
@@ -115,8 +133,14 @@ static int filelayout_async_handle_error(struct rpc_task *task, | |||
115 | *reset = 1; | 133 | *reset = 1; |
116 | break; | 134 | break; |
117 | } | 135 | } |
136 | out: | ||
118 | task->tk_status = 0; | 137 | task->tk_status = 0; |
119 | return -EAGAIN; | 138 | return -EAGAIN; |
139 | wait_on_recovery: | ||
140 | rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); | ||
141 | if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) | ||
142 | rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task); | ||
143 | goto out; | ||
120 | } | 144 | } |
121 | 145 | ||
122 | /* NFS_PROTO call done callback routines */ | 146 | /* NFS_PROTO call done callback routines */ |
@@ -173,7 +197,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) | |||
173 | 197 | ||
174 | if (nfs41_setup_sequence(rdata->ds_clp->cl_session, | 198 | if (nfs41_setup_sequence(rdata->ds_clp->cl_session, |
175 | &rdata->args.seq_args, &rdata->res.seq_res, | 199 | &rdata->args.seq_args, &rdata->res.seq_res, |
176 | 0, task)) | 200 | task)) |
177 | return; | 201 | return; |
178 | 202 | ||
179 | rpc_call_start(task); | 203 | rpc_call_start(task); |
@@ -189,10 +213,18 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) | |||
189 | rdata->mds_ops->rpc_call_done(task, data); | 213 | rdata->mds_ops->rpc_call_done(task, data); |
190 | } | 214 | } |
191 | 215 | ||
216 | static void filelayout_read_count_stats(struct rpc_task *task, void *data) | ||
217 | { | ||
218 | struct nfs_read_data *rdata = (struct nfs_read_data *)data; | ||
219 | |||
220 | rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); | ||
221 | } | ||
222 | |||
192 | static void filelayout_read_release(void *data) | 223 | static void filelayout_read_release(void *data) |
193 | { | 224 | { |
194 | struct nfs_read_data *rdata = (struct nfs_read_data *)data; | 225 | struct nfs_read_data *rdata = (struct nfs_read_data *)data; |
195 | 226 | ||
227 | put_lseg(rdata->lseg); | ||
196 | rdata->mds_ops->rpc_release(data); | 228 | rdata->mds_ops->rpc_release(data); |
197 | } | 229 | } |
198 | 230 | ||
@@ -254,7 +286,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) | |||
254 | 286 | ||
255 | if (nfs41_setup_sequence(wdata->ds_clp->cl_session, | 287 | if (nfs41_setup_sequence(wdata->ds_clp->cl_session, |
256 | &wdata->args.seq_args, &wdata->res.seq_res, | 288 | &wdata->args.seq_args, &wdata->res.seq_res, |
257 | 0, task)) | 289 | task)) |
258 | return; | 290 | return; |
259 | 291 | ||
260 | rpc_call_start(task); | 292 | rpc_call_start(task); |
@@ -268,10 +300,18 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) | |||
268 | wdata->mds_ops->rpc_call_done(task, data); | 300 | wdata->mds_ops->rpc_call_done(task, data); |
269 | } | 301 | } |
270 | 302 | ||
303 | static void filelayout_write_count_stats(struct rpc_task *task, void *data) | ||
304 | { | ||
305 | struct nfs_write_data *wdata = (struct nfs_write_data *)data; | ||
306 | |||
307 | rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); | ||
308 | } | ||
309 | |||
271 | static void filelayout_write_release(void *data) | 310 | static void filelayout_write_release(void *data) |
272 | { | 311 | { |
273 | struct nfs_write_data *wdata = (struct nfs_write_data *)data; | 312 | struct nfs_write_data *wdata = (struct nfs_write_data *)data; |
274 | 313 | ||
314 | put_lseg(wdata->lseg); | ||
275 | wdata->mds_ops->rpc_release(data); | 315 | wdata->mds_ops->rpc_release(data); |
276 | } | 316 | } |
277 | 317 | ||
@@ -282,24 +322,28 @@ static void filelayout_commit_release(void *data) | |||
282 | nfs_commit_release_pages(wdata); | 322 | nfs_commit_release_pages(wdata); |
283 | if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) | 323 | if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) |
284 | nfs_commit_clear_lock(NFS_I(wdata->inode)); | 324 | nfs_commit_clear_lock(NFS_I(wdata->inode)); |
325 | put_lseg(wdata->lseg); | ||
285 | nfs_commitdata_release(wdata); | 326 | nfs_commitdata_release(wdata); |
286 | } | 327 | } |
287 | 328 | ||
288 | struct rpc_call_ops filelayout_read_call_ops = { | 329 | static const struct rpc_call_ops filelayout_read_call_ops = { |
289 | .rpc_call_prepare = filelayout_read_prepare, | 330 | .rpc_call_prepare = filelayout_read_prepare, |
290 | .rpc_call_done = filelayout_read_call_done, | 331 | .rpc_call_done = filelayout_read_call_done, |
332 | .rpc_count_stats = filelayout_read_count_stats, | ||
291 | .rpc_release = filelayout_read_release, | 333 | .rpc_release = filelayout_read_release, |
292 | }; | 334 | }; |
293 | 335 | ||
294 | struct rpc_call_ops filelayout_write_call_ops = { | 336 | static const struct rpc_call_ops filelayout_write_call_ops = { |
295 | .rpc_call_prepare = filelayout_write_prepare, | 337 | .rpc_call_prepare = filelayout_write_prepare, |
296 | .rpc_call_done = filelayout_write_call_done, | 338 | .rpc_call_done = filelayout_write_call_done, |
339 | .rpc_count_stats = filelayout_write_count_stats, | ||
297 | .rpc_release = filelayout_write_release, | 340 | .rpc_release = filelayout_write_release, |
298 | }; | 341 | }; |
299 | 342 | ||
300 | struct rpc_call_ops filelayout_commit_call_ops = { | 343 | static const struct rpc_call_ops filelayout_commit_call_ops = { |
301 | .rpc_call_prepare = filelayout_write_prepare, | 344 | .rpc_call_prepare = filelayout_write_prepare, |
302 | .rpc_call_done = filelayout_write_call_done, | 345 | .rpc_call_done = filelayout_write_call_done, |
346 | .rpc_count_stats = filelayout_write_count_stats, | ||
303 | .rpc_release = filelayout_commit_release, | 347 | .rpc_release = filelayout_commit_release, |
304 | }; | 348 | }; |
305 | 349 | ||
@@ -367,7 +411,8 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) | |||
367 | idx = nfs4_fl_calc_ds_index(lseg, j); | 411 | idx = nfs4_fl_calc_ds_index(lseg, j); |
368 | ds = nfs4_fl_prepare_ds(lseg, idx); | 412 | ds = nfs4_fl_prepare_ds(lseg, idx); |
369 | if (!ds) { | 413 | if (!ds) { |
370 | printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); | 414 | printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n", |
415 | __func__); | ||
371 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); | 416 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); |
372 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | 417 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); |
373 | return PNFS_NOT_ATTEMPTED; | 418 | return PNFS_NOT_ATTEMPTED; |
@@ -575,7 +620,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
575 | goto out_err_free; | 620 | goto out_err_free; |
576 | fl->fh_array[i]->size = be32_to_cpup(p++); | 621 | fl->fh_array[i]->size = be32_to_cpup(p++); |
577 | if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { | 622 | if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { |
578 | printk(KERN_ERR "Too big fh %d received %d\n", | 623 | printk(KERN_ERR "NFS: Too big fh %d received %d\n", |
579 | i, fl->fh_array[i]->size); | 624 | i, fl->fh_array[i]->size); |
580 | goto out_err_free; | 625 | goto out_err_free; |
581 | } | 626 | } |
@@ -640,14 +685,16 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, | |||
640 | int size = (fl->stripe_type == STRIPE_SPARSE) ? | 685 | int size = (fl->stripe_type == STRIPE_SPARSE) ? |
641 | fl->dsaddr->ds_num : fl->dsaddr->stripe_count; | 686 | fl->dsaddr->ds_num : fl->dsaddr->stripe_count; |
642 | 687 | ||
643 | fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags); | 688 | fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags); |
644 | if (!fl->commit_buckets) { | 689 | if (!fl->commit_buckets) { |
645 | filelayout_free_lseg(&fl->generic_hdr); | 690 | filelayout_free_lseg(&fl->generic_hdr); |
646 | return NULL; | 691 | return NULL; |
647 | } | 692 | } |
648 | fl->number_of_buckets = size; | 693 | fl->number_of_buckets = size; |
649 | for (i = 0; i < size; i++) | 694 | for (i = 0; i < size; i++) { |
650 | INIT_LIST_HEAD(&fl->commit_buckets[i]); | 695 | INIT_LIST_HEAD(&fl->commit_buckets[i].written); |
696 | INIT_LIST_HEAD(&fl->commit_buckets[i].committing); | ||
697 | } | ||
651 | } | 698 | } |
652 | return &fl->generic_hdr; | 699 | return &fl->generic_hdr; |
653 | } | 700 | } |
@@ -679,7 +726,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||
679 | return (p_stripe == r_stripe); | 726 | return (p_stripe == r_stripe); |
680 | } | 727 | } |
681 | 728 | ||
682 | void | 729 | static void |
683 | filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, | 730 | filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, |
684 | struct nfs_page *req) | 731 | struct nfs_page *req) |
685 | { | 732 | { |
@@ -696,7 +743,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, | |||
696 | nfs_pageio_reset_read_mds(pgio); | 743 | nfs_pageio_reset_read_mds(pgio); |
697 | } | 744 | } |
698 | 745 | ||
699 | void | 746 | static void |
700 | filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, | 747 | filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, |
701 | struct nfs_page *req) | 748 | struct nfs_page *req) |
702 | { | 749 | { |
@@ -725,11 +772,6 @@ static const struct nfs_pageio_ops filelayout_pg_write_ops = { | |||
725 | .pg_doio = pnfs_generic_pg_writepages, | 772 | .pg_doio = pnfs_generic_pg_writepages, |
726 | }; | 773 | }; |
727 | 774 | ||
728 | static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) | ||
729 | { | ||
730 | return !FILELAYOUT_LSEG(lseg)->commit_through_mds; | ||
731 | } | ||
732 | |||
733 | static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) | 775 | static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) |
734 | { | 776 | { |
735 | if (fl->stripe_type == STRIPE_SPARSE) | 777 | if (fl->stripe_type == STRIPE_SPARSE) |
@@ -738,13 +780,49 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) | |||
738 | return j; | 780 | return j; |
739 | } | 781 | } |
740 | 782 | ||
741 | struct list_head *filelayout_choose_commit_list(struct nfs_page *req) | 783 | /* The generic layer is about to remove the req from the commit list. |
784 | * If this will make the bucket empty, it will need to put the lseg reference. | ||
785 | */ | ||
786 | static void | ||
787 | filelayout_clear_request_commit(struct nfs_page *req) | ||
788 | { | ||
789 | struct pnfs_layout_segment *freeme = NULL; | ||
790 | struct inode *inode = req->wb_context->dentry->d_inode; | ||
791 | |||
792 | spin_lock(&inode->i_lock); | ||
793 | if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) | ||
794 | goto out; | ||
795 | if (list_is_singular(&req->wb_list)) { | ||
796 | struct inode *inode = req->wb_context->dentry->d_inode; | ||
797 | struct pnfs_layout_segment *lseg; | ||
798 | |||
799 | /* From here we can find the bucket, but for the moment, | ||
800 | * since there is only one relevant lseg... | ||
801 | */ | ||
802 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { | ||
803 | if (lseg->pls_range.iomode == IOMODE_RW) { | ||
804 | freeme = lseg; | ||
805 | break; | ||
806 | } | ||
807 | } | ||
808 | } | ||
809 | out: | ||
810 | nfs_request_remove_commit_list(req); | ||
811 | spin_unlock(&inode->i_lock); | ||
812 | put_lseg(freeme); | ||
813 | } | ||
814 | |||
815 | static struct list_head * | ||
816 | filelayout_choose_commit_list(struct nfs_page *req, | ||
817 | struct pnfs_layout_segment *lseg) | ||
742 | { | 818 | { |
743 | struct pnfs_layout_segment *lseg = req->wb_commit_lseg; | ||
744 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | 819 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); |
745 | u32 i, j; | 820 | u32 i, j; |
746 | struct list_head *list; | 821 | struct list_head *list; |
747 | 822 | ||
823 | if (fl->commit_through_mds) | ||
824 | return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; | ||
825 | |||
748 | /* Note that we are calling nfs4_fl_calc_j_index on each page | 826 | /* Note that we are calling nfs4_fl_calc_j_index on each page |
749 | * that ends up being committed to a data server. An attractive | 827 | * that ends up being committed to a data server. An attractive |
750 | * alternative is to add a field to nfs_write_data and nfs_page | 828 | * alternative is to add a field to nfs_write_data and nfs_page |
@@ -754,14 +832,30 @@ struct list_head *filelayout_choose_commit_list(struct nfs_page *req) | |||
754 | j = nfs4_fl_calc_j_index(lseg, | 832 | j = nfs4_fl_calc_j_index(lseg, |
755 | (loff_t)req->wb_index << PAGE_CACHE_SHIFT); | 833 | (loff_t)req->wb_index << PAGE_CACHE_SHIFT); |
756 | i = select_bucket_index(fl, j); | 834 | i = select_bucket_index(fl, j); |
757 | list = &fl->commit_buckets[i]; | 835 | list = &fl->commit_buckets[i].written; |
758 | if (list_empty(list)) { | 836 | if (list_empty(list)) { |
759 | /* Non-empty buckets hold a reference on the lseg */ | 837 | /* Non-empty buckets hold a reference on the lseg. That ref |
838 | * is normally transferred to the COMMIT call and released | ||
839 | * there. It could also be released if the last req is pulled | ||
840 | * off due to a rewrite, in which case it will be done in | ||
841 | * filelayout_remove_commit_req | ||
842 | */ | ||
760 | get_lseg(lseg); | 843 | get_lseg(lseg); |
761 | } | 844 | } |
845 | set_bit(PG_COMMIT_TO_DS, &req->wb_flags); | ||
762 | return list; | 846 | return list; |
763 | } | 847 | } |
764 | 848 | ||
849 | static void | ||
850 | filelayout_mark_request_commit(struct nfs_page *req, | ||
851 | struct pnfs_layout_segment *lseg) | ||
852 | { | ||
853 | struct list_head *list; | ||
854 | |||
855 | list = filelayout_choose_commit_list(req, lseg); | ||
856 | nfs_request_add_commit_list(req, list); | ||
857 | } | ||
858 | |||
765 | static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) | 859 | static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) |
766 | { | 860 | { |
767 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); | 861 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); |
@@ -797,11 +891,12 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) | |||
797 | idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); | 891 | idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); |
798 | ds = nfs4_fl_prepare_ds(lseg, idx); | 892 | ds = nfs4_fl_prepare_ds(lseg, idx); |
799 | if (!ds) { | 893 | if (!ds) { |
800 | printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); | 894 | printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n", |
895 | __func__); | ||
801 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); | 896 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); |
802 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | 897 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); |
803 | prepare_to_resend_writes(data); | 898 | prepare_to_resend_writes(data); |
804 | data->mds_ops->rpc_release(data); | 899 | filelayout_commit_release(data); |
805 | return -EAGAIN; | 900 | return -EAGAIN; |
806 | } | 901 | } |
807 | dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); | 902 | dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); |
@@ -817,24 +912,87 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) | |||
817 | /* | 912 | /* |
818 | * This is only useful while we are using whole file layouts. | 913 | * This is only useful while we are using whole file layouts. |
819 | */ | 914 | */ |
820 | static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) | 915 | static struct pnfs_layout_segment * |
916 | find_only_write_lseg_locked(struct inode *inode) | ||
821 | { | 917 | { |
822 | struct pnfs_layout_segment *lseg, *rv = NULL; | 918 | struct pnfs_layout_segment *lseg; |
823 | 919 | ||
824 | spin_lock(&inode->i_lock); | ||
825 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) | 920 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) |
826 | if (lseg->pls_range.iomode == IOMODE_RW) | 921 | if (lseg->pls_range.iomode == IOMODE_RW) |
827 | rv = get_lseg(lseg); | 922 | return lseg; |
923 | return NULL; | ||
924 | } | ||
925 | |||
926 | static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) | ||
927 | { | ||
928 | struct pnfs_layout_segment *rv; | ||
929 | |||
930 | spin_lock(&inode->i_lock); | ||
931 | rv = find_only_write_lseg_locked(inode); | ||
932 | if (rv) | ||
933 | get_lseg(rv); | ||
828 | spin_unlock(&inode->i_lock); | 934 | spin_unlock(&inode->i_lock); |
829 | return rv; | 935 | return rv; |
830 | } | 936 | } |
831 | 937 | ||
832 | static int alloc_ds_commits(struct inode *inode, struct list_head *list) | 938 | static int |
939 | filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, | ||
940 | spinlock_t *lock) | ||
941 | { | ||
942 | struct list_head *src = &bucket->written; | ||
943 | struct list_head *dst = &bucket->committing; | ||
944 | struct nfs_page *req, *tmp; | ||
945 | int ret = 0; | ||
946 | |||
947 | list_for_each_entry_safe(req, tmp, src, wb_list) { | ||
948 | if (!nfs_lock_request(req)) | ||
949 | continue; | ||
950 | if (cond_resched_lock(lock)) | ||
951 | list_safe_reset_next(req, tmp, wb_list); | ||
952 | nfs_request_remove_commit_list(req); | ||
953 | clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); | ||
954 | nfs_list_add_request(req, dst); | ||
955 | ret++; | ||
956 | if (ret == max) | ||
957 | break; | ||
958 | } | ||
959 | return ret; | ||
960 | } | ||
961 | |||
962 | /* Move reqs from written to committing lists, returning count of number moved. | ||
963 | * Note called with i_lock held. | ||
964 | */ | ||
965 | static int filelayout_scan_commit_lists(struct inode *inode, int max, | ||
966 | spinlock_t *lock) | ||
967 | { | ||
968 | struct pnfs_layout_segment *lseg; | ||
969 | struct nfs4_filelayout_segment *fl; | ||
970 | int i, rv = 0, cnt; | ||
971 | |||
972 | lseg = find_only_write_lseg_locked(inode); | ||
973 | if (!lseg) | ||
974 | goto out_done; | ||
975 | fl = FILELAYOUT_LSEG(lseg); | ||
976 | if (fl->commit_through_mds) | ||
977 | goto out_done; | ||
978 | for (i = 0; i < fl->number_of_buckets && max != 0; i++) { | ||
979 | cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i], | ||
980 | max, lock); | ||
981 | max -= cnt; | ||
982 | rv += cnt; | ||
983 | } | ||
984 | out_done: | ||
985 | return rv; | ||
986 | } | ||
987 | |||
988 | static unsigned int | ||
989 | alloc_ds_commits(struct inode *inode, struct list_head *list) | ||
833 | { | 990 | { |
834 | struct pnfs_layout_segment *lseg; | 991 | struct pnfs_layout_segment *lseg; |
835 | struct nfs4_filelayout_segment *fl; | 992 | struct nfs4_filelayout_segment *fl; |
836 | struct nfs_write_data *data; | 993 | struct nfs_write_data *data; |
837 | int i, j; | 994 | int i, j; |
995 | unsigned int nreq = 0; | ||
838 | 996 | ||
839 | /* Won't need this when non-whole file layout segments are supported | 997 | /* Won't need this when non-whole file layout segments are supported |
840 | * instead we will use a pnfs_layout_hdr structure */ | 998 | * instead we will use a pnfs_layout_hdr structure */ |
@@ -843,28 +1001,27 @@ static int alloc_ds_commits(struct inode *inode, struct list_head *list) | |||
843 | return 0; | 1001 | return 0; |
844 | fl = FILELAYOUT_LSEG(lseg); | 1002 | fl = FILELAYOUT_LSEG(lseg); |
845 | for (i = 0; i < fl->number_of_buckets; i++) { | 1003 | for (i = 0; i < fl->number_of_buckets; i++) { |
846 | if (list_empty(&fl->commit_buckets[i])) | 1004 | if (list_empty(&fl->commit_buckets[i].committing)) |
847 | continue; | 1005 | continue; |
848 | data = nfs_commitdata_alloc(); | 1006 | data = nfs_commitdata_alloc(); |
849 | if (!data) | 1007 | if (!data) |
850 | goto out_bad; | 1008 | break; |
851 | data->ds_commit_index = i; | 1009 | data->ds_commit_index = i; |
852 | data->lseg = lseg; | 1010 | data->lseg = lseg; |
853 | list_add(&data->pages, list); | 1011 | list_add(&data->pages, list); |
1012 | nreq++; | ||
854 | } | 1013 | } |
855 | put_lseg(lseg); | ||
856 | return 0; | ||
857 | 1014 | ||
858 | out_bad: | 1015 | /* Clean up on error */ |
859 | for (j = i; j < fl->number_of_buckets; j++) { | 1016 | for (j = i; j < fl->number_of_buckets; j++) { |
860 | if (list_empty(&fl->commit_buckets[i])) | 1017 | if (list_empty(&fl->commit_buckets[i].committing)) |
861 | continue; | 1018 | continue; |
862 | nfs_retry_commit(&fl->commit_buckets[i], lseg); | 1019 | nfs_retry_commit(&fl->commit_buckets[i].committing, lseg); |
863 | put_lseg(lseg); /* associated with emptying bucket */ | 1020 | put_lseg(lseg); /* associated with emptying bucket */ |
864 | } | 1021 | } |
865 | put_lseg(lseg); | 1022 | put_lseg(lseg); |
866 | /* Caller will clean up entries put on list */ | 1023 | /* Caller will clean up entries put on list */ |
867 | return -ENOMEM; | 1024 | return nreq; |
868 | } | 1025 | } |
869 | 1026 | ||
870 | /* This follows nfs_commit_list pretty closely */ | 1027 | /* This follows nfs_commit_list pretty closely */ |
@@ -874,40 +1031,40 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | |||
874 | { | 1031 | { |
875 | struct nfs_write_data *data, *tmp; | 1032 | struct nfs_write_data *data, *tmp; |
876 | LIST_HEAD(list); | 1033 | LIST_HEAD(list); |
1034 | unsigned int nreq = 0; | ||
877 | 1035 | ||
878 | if (!list_empty(mds_pages)) { | 1036 | if (!list_empty(mds_pages)) { |
879 | data = nfs_commitdata_alloc(); | 1037 | data = nfs_commitdata_alloc(); |
880 | if (!data) | 1038 | if (data != NULL) { |
881 | goto out_bad; | 1039 | data->lseg = NULL; |
882 | data->lseg = NULL; | 1040 | list_add(&data->pages, &list); |
883 | list_add(&data->pages, &list); | 1041 | nreq++; |
1042 | } else | ||
1043 | nfs_retry_commit(mds_pages, NULL); | ||
884 | } | 1044 | } |
885 | 1045 | ||
886 | if (alloc_ds_commits(inode, &list)) | 1046 | nreq += alloc_ds_commits(inode, &list); |
887 | goto out_bad; | 1047 | |
1048 | if (nreq == 0) { | ||
1049 | nfs_commit_clear_lock(NFS_I(inode)); | ||
1050 | goto out; | ||
1051 | } | ||
1052 | |||
1053 | atomic_add(nreq, &NFS_I(inode)->commits_outstanding); | ||
888 | 1054 | ||
889 | list_for_each_entry_safe(data, tmp, &list, pages) { | 1055 | list_for_each_entry_safe(data, tmp, &list, pages) { |
890 | list_del_init(&data->pages); | 1056 | list_del_init(&data->pages); |
891 | atomic_inc(&NFS_I(inode)->commits_outstanding); | ||
892 | if (!data->lseg) { | 1057 | if (!data->lseg) { |
893 | nfs_init_commit(data, mds_pages, NULL); | 1058 | nfs_init_commit(data, mds_pages, NULL); |
894 | nfs_initiate_commit(data, NFS_CLIENT(inode), | 1059 | nfs_initiate_commit(data, NFS_CLIENT(inode), |
895 | data->mds_ops, how); | 1060 | data->mds_ops, how); |
896 | } else { | 1061 | } else { |
897 | nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg); | 1062 | nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg); |
898 | filelayout_initiate_commit(data, how); | 1063 | filelayout_initiate_commit(data, how); |
899 | } | 1064 | } |
900 | } | 1065 | } |
901 | return 0; | 1066 | out: |
902 | out_bad: | 1067 | return PNFS_ATTEMPTED; |
903 | list_for_each_entry_safe(data, tmp, &list, pages) { | ||
904 | nfs_retry_commit(&data->pages, data->lseg); | ||
905 | list_del_init(&data->pages); | ||
906 | nfs_commit_free(data); | ||
907 | } | ||
908 | nfs_retry_commit(mds_pages, NULL); | ||
909 | nfs_commit_clear_lock(NFS_I(inode)); | ||
910 | return -ENOMEM; | ||
911 | } | 1068 | } |
912 | 1069 | ||
913 | static void | 1070 | static void |
@@ -924,8 +1081,9 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
924 | .free_lseg = filelayout_free_lseg, | 1081 | .free_lseg = filelayout_free_lseg, |
925 | .pg_read_ops = &filelayout_pg_read_ops, | 1082 | .pg_read_ops = &filelayout_pg_read_ops, |
926 | .pg_write_ops = &filelayout_pg_write_ops, | 1083 | .pg_write_ops = &filelayout_pg_write_ops, |
927 | .mark_pnfs_commit = filelayout_mark_pnfs_commit, | 1084 | .mark_request_commit = filelayout_mark_request_commit, |
928 | .choose_commit_list = filelayout_choose_commit_list, | 1085 | .clear_request_commit = filelayout_clear_request_commit, |
1086 | .scan_commit_lists = filelayout_scan_commit_lists, | ||
929 | .commit_pagelist = filelayout_commit_pagelist, | 1087 | .commit_pagelist = filelayout_commit_pagelist, |
930 | .read_pagelist = filelayout_read_pagelist, | 1088 | .read_pagelist = filelayout_read_pagelist, |
931 | .write_pagelist = filelayout_write_pagelist, | 1089 | .write_pagelist = filelayout_write_pagelist, |
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 2e42284253fa..21190bb1f5e3 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h | |||
@@ -74,6 +74,11 @@ struct nfs4_file_layout_dsaddr { | |||
74 | struct nfs4_pnfs_ds *ds_list[1]; | 74 | struct nfs4_pnfs_ds *ds_list[1]; |
75 | }; | 75 | }; |
76 | 76 | ||
77 | struct nfs4_fl_commit_bucket { | ||
78 | struct list_head written; | ||
79 | struct list_head committing; | ||
80 | }; | ||
81 | |||
77 | struct nfs4_filelayout_segment { | 82 | struct nfs4_filelayout_segment { |
78 | struct pnfs_layout_segment generic_hdr; | 83 | struct pnfs_layout_segment generic_hdr; |
79 | u32 stripe_type; | 84 | u32 stripe_type; |
@@ -84,7 +89,7 @@ struct nfs4_filelayout_segment { | |||
84 | struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ | 89 | struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ |
85 | unsigned int num_fh; | 90 | unsigned int num_fh; |
86 | struct nfs_fh **fh_array; | 91 | struct nfs_fh **fh_array; |
87 | struct list_head *commit_buckets; /* Sort commits to ds */ | 92 | struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */ |
88 | int number_of_buckets; | 93 | int number_of_buckets; |
89 | }; | 94 | }; |
90 | 95 | ||
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 8ae91908f5aa..a866bbd2890a 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c | |||
@@ -45,7 +45,7 @@ | |||
45 | * - incremented when a device id maps a data server already in the cache. | 45 | * - incremented when a device id maps a data server already in the cache. |
46 | * - decremented when deviceid is removed from the cache. | 46 | * - decremented when deviceid is removed from the cache. |
47 | */ | 47 | */ |
48 | DEFINE_SPINLOCK(nfs4_ds_cache_lock); | 48 | static DEFINE_SPINLOCK(nfs4_ds_cache_lock); |
49 | static LIST_HEAD(nfs4_data_server_cache); | 49 | static LIST_HEAD(nfs4_data_server_cache); |
50 | 50 | ||
51 | /* Debug routines */ | 51 | /* Debug routines */ |
@@ -108,58 +108,40 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) | |||
108 | return false; | 108 | return false; |
109 | } | 109 | } |
110 | 110 | ||
111 | /* | 111 | static bool |
112 | * Lookup DS by addresses. The first matching address returns true. | 112 | _same_data_server_addrs_locked(const struct list_head *dsaddrs1, |
113 | * nfs4_ds_cache_lock is held | 113 | const struct list_head *dsaddrs2) |
114 | */ | ||
115 | static struct nfs4_pnfs_ds * | ||
116 | _data_server_lookup_locked(struct list_head *dsaddrs) | ||
117 | { | 114 | { |
118 | struct nfs4_pnfs_ds *ds; | ||
119 | struct nfs4_pnfs_ds_addr *da1, *da2; | 115 | struct nfs4_pnfs_ds_addr *da1, *da2; |
120 | 116 | ||
121 | list_for_each_entry(da1, dsaddrs, da_node) { | 117 | /* step through both lists, comparing as we go */ |
122 | list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { | 118 | for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), |
123 | list_for_each_entry(da2, &ds->ds_addrs, da_node) { | 119 | da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); |
124 | if (same_sockaddr( | 120 | da1 != NULL && da2 != NULL; |
125 | (struct sockaddr *)&da1->da_addr, | 121 | da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), |
126 | (struct sockaddr *)&da2->da_addr)) | 122 | da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { |
127 | return ds; | 123 | if (!same_sockaddr((struct sockaddr *)&da1->da_addr, |
128 | } | 124 | (struct sockaddr *)&da2->da_addr)) |
129 | } | 125 | return false; |
130 | } | 126 | } |
131 | return NULL; | 127 | if (da1 == NULL && da2 == NULL) |
128 | return true; | ||
129 | |||
130 | return false; | ||
132 | } | 131 | } |
133 | 132 | ||
134 | /* | 133 | /* |
135 | * Compare two lists of addresses. | 134 | * Lookup DS by addresses. nfs4_ds_cache_lock is held |
136 | */ | 135 | */ |
137 | static bool | 136 | static struct nfs4_pnfs_ds * |
138 | _data_server_match_all_addrs_locked(struct list_head *dsaddrs1, | 137 | _data_server_lookup_locked(const struct list_head *dsaddrs) |
139 | struct list_head *dsaddrs2) | ||
140 | { | 138 | { |
141 | struct nfs4_pnfs_ds_addr *da1, *da2; | 139 | struct nfs4_pnfs_ds *ds; |
142 | size_t count1 = 0, | ||
143 | count2 = 0; | ||
144 | |||
145 | list_for_each_entry(da1, dsaddrs1, da_node) | ||
146 | count1++; | ||
147 | |||
148 | list_for_each_entry(da2, dsaddrs2, da_node) { | ||
149 | bool found = false; | ||
150 | count2++; | ||
151 | list_for_each_entry(da1, dsaddrs1, da_node) { | ||
152 | if (same_sockaddr((struct sockaddr *)&da1->da_addr, | ||
153 | (struct sockaddr *)&da2->da_addr)) { | ||
154 | found = true; | ||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | if (!found) | ||
159 | return false; | ||
160 | } | ||
161 | 140 | ||
162 | return (count1 == count2); | 141 | list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) |
142 | if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) | ||
143 | return ds; | ||
144 | return NULL; | ||
163 | } | 145 | } |
164 | 146 | ||
165 | /* | 147 | /* |
@@ -356,11 +338,6 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) | |||
356 | dprintk("%s add new data server %s\n", __func__, | 338 | dprintk("%s add new data server %s\n", __func__, |
357 | ds->ds_remotestr); | 339 | ds->ds_remotestr); |
358 | } else { | 340 | } else { |
359 | if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs, | ||
360 | dsaddrs)) { | ||
361 | dprintk("%s: multipath address mismatch: %s != %s", | ||
362 | __func__, tmp_ds->ds_remotestr, remotestr); | ||
363 | } | ||
364 | kfree(remotestr); | 341 | kfree(remotestr); |
365 | kfree(ds); | 342 | kfree(ds); |
366 | atomic_inc(&tmp_ds->ds_count); | 343 | atomic_inc(&tmp_ds->ds_count); |
@@ -378,7 +355,7 @@ out: | |||
378 | * Currently only supports ipv4, ipv6 and one multi-path address. | 355 | * Currently only supports ipv4, ipv6 and one multi-path address. |
379 | */ | 356 | */ |
380 | static struct nfs4_pnfs_ds_addr * | 357 | static struct nfs4_pnfs_ds_addr * |
381 | decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) | 358 | decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) |
382 | { | 359 | { |
383 | struct nfs4_pnfs_ds_addr *da = NULL; | 360 | struct nfs4_pnfs_ds_addr *da = NULL; |
384 | char *buf, *portstr; | 361 | char *buf, *portstr; |
@@ -457,7 +434,7 @@ decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) | |||
457 | 434 | ||
458 | INIT_LIST_HEAD(&da->da_node); | 435 | INIT_LIST_HEAD(&da->da_node); |
459 | 436 | ||
460 | if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr, | 437 | if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, |
461 | sizeof(da->da_addr))) { | 438 | sizeof(da->da_addr))) { |
462 | dprintk("%s: error parsing address %s\n", __func__, buf); | 439 | dprintk("%s: error parsing address %s\n", __func__, buf); |
463 | goto out_free_da; | 440 | goto out_free_da; |
@@ -554,7 +531,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
554 | cnt = be32_to_cpup(p); | 531 | cnt = be32_to_cpup(p); |
555 | dprintk("%s stripe count %d\n", __func__, cnt); | 532 | dprintk("%s stripe count %d\n", __func__, cnt); |
556 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { | 533 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { |
557 | printk(KERN_WARNING "%s: stripe count %d greater than " | 534 | printk(KERN_WARNING "NFS: %s: stripe count %d greater than " |
558 | "supported maximum %d\n", __func__, | 535 | "supported maximum %d\n", __func__, |
559 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); | 536 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); |
560 | goto out_err_free_scratch; | 537 | goto out_err_free_scratch; |
@@ -585,7 +562,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
585 | num = be32_to_cpup(p); | 562 | num = be32_to_cpup(p); |
586 | dprintk("%s ds_num %u\n", __func__, num); | 563 | dprintk("%s ds_num %u\n", __func__, num); |
587 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { | 564 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { |
588 | printk(KERN_WARNING "%s: multipath count %d greater than " | 565 | printk(KERN_WARNING "NFS: %s: multipath count %d greater than " |
589 | "supported maximum %d\n", __func__, | 566 | "supported maximum %d\n", __func__, |
590 | num, NFS4_PNFS_MAX_MULTI_CNT); | 567 | num, NFS4_PNFS_MAX_MULTI_CNT); |
591 | goto out_err_free_stripe_indices; | 568 | goto out_err_free_stripe_indices; |
@@ -593,7 +570,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
593 | 570 | ||
594 | /* validate stripe indices are all < num */ | 571 | /* validate stripe indices are all < num */ |
595 | if (max_stripe_index >= num) { | 572 | if (max_stripe_index >= num) { |
596 | printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n", | 573 | printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n", |
597 | __func__, max_stripe_index, num); | 574 | __func__, max_stripe_index, num); |
598 | goto out_err_free_stripe_indices; | 575 | goto out_err_free_stripe_indices; |
599 | } | 576 | } |
@@ -625,7 +602,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
625 | 602 | ||
626 | mp_count = be32_to_cpup(p); /* multipath count */ | 603 | mp_count = be32_to_cpup(p); /* multipath count */ |
627 | for (j = 0; j < mp_count; j++) { | 604 | for (j = 0; j < mp_count; j++) { |
628 | da = decode_ds_addr(&stream, gfp_flags); | 605 | da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net, |
606 | &stream, gfp_flags); | ||
629 | if (da) | 607 | if (da) |
630 | list_add_tail(&da->da_node, &dsaddrs); | 608 | list_add_tail(&da->da_node, &dsaddrs); |
631 | } | 609 | } |
@@ -686,7 +664,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl | |||
686 | 664 | ||
687 | new = decode_device(inode, dev, gfp_flags); | 665 | new = decode_device(inode, dev, gfp_flags); |
688 | if (!new) { | 666 | if (!new) { |
689 | printk(KERN_WARNING "%s: Could not decode or add device\n", | 667 | printk(KERN_WARNING "NFS: %s: Could not decode or add device\n", |
690 | __func__); | 668 | __func__); |
691 | return NULL; | 669 | return NULL; |
692 | } | 670 | } |
@@ -835,7 +813,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) | |||
835 | struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; | 813 | struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; |
836 | 814 | ||
837 | if (ds == NULL) { | 815 | if (ds == NULL) { |
838 | printk(KERN_ERR "%s: No data server for offset index %d\n", | 816 | printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", |
839 | __func__, ds_idx); | 817 | __func__, ds_idx); |
840 | return NULL; | 818 | return NULL; |
841 | } | 819 | } |
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index bb80c49b6533..9c8eca315f43 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c | |||
@@ -94,13 +94,14 @@ static int nfs4_validate_fspath(struct dentry *dentry, | |||
94 | } | 94 | } |
95 | 95 | ||
96 | static size_t nfs_parse_server_name(char *string, size_t len, | 96 | static size_t nfs_parse_server_name(char *string, size_t len, |
97 | struct sockaddr *sa, size_t salen) | 97 | struct sockaddr *sa, size_t salen, struct nfs_server *server) |
98 | { | 98 | { |
99 | struct net *net = rpc_net_ns(server->client); | ||
99 | ssize_t ret; | 100 | ssize_t ret; |
100 | 101 | ||
101 | ret = rpc_pton(string, len, sa, salen); | 102 | ret = rpc_pton(net, string, len, sa, salen); |
102 | if (ret == 0) { | 103 | if (ret == 0) { |
103 | ret = nfs_dns_resolve_name(string, len, sa, salen); | 104 | ret = nfs_dns_resolve_name(net, string, len, sa, salen); |
104 | if (ret < 0) | 105 | if (ret < 0) |
105 | ret = 0; | 106 | ret = 0; |
106 | } | 107 | } |
@@ -137,7 +138,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, | |||
137 | continue; | 138 | continue; |
138 | 139 | ||
139 | mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, | 140 | mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, |
140 | mountdata->addr, addr_bufsize); | 141 | mountdata->addr, addr_bufsize, |
142 | NFS_SB(mountdata->sb)); | ||
141 | if (mountdata->addrlen == 0) | 143 | if (mountdata->addrlen == 0) |
142 | continue; | 144 | continue; |
143 | 145 | ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index caf92d05c3a9..e809d2305ebf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -72,18 +72,21 @@ | |||
72 | 72 | ||
73 | #define NFS4_MAX_LOOP_ON_RECOVER (10) | 73 | #define NFS4_MAX_LOOP_ON_RECOVER (10) |
74 | 74 | ||
75 | static unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; | ||
76 | |||
75 | struct nfs4_opendata; | 77 | struct nfs4_opendata; |
76 | static int _nfs4_proc_open(struct nfs4_opendata *data); | 78 | static int _nfs4_proc_open(struct nfs4_opendata *data); |
77 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); | 79 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); |
78 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); | 80 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); |
79 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); | 81 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); |
82 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); | ||
80 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); | 83 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); |
81 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | 84 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
82 | struct nfs_fattr *fattr, struct iattr *sattr, | 85 | struct nfs_fattr *fattr, struct iattr *sattr, |
83 | struct nfs4_state *state); | 86 | struct nfs4_state *state); |
84 | #ifdef CONFIG_NFS_V4_1 | 87 | #ifdef CONFIG_NFS_V4_1 |
85 | static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *); | 88 | static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *); |
86 | static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *); | 89 | static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *); |
87 | #endif | 90 | #endif |
88 | /* Prevent leaks of NFSv4 errors into userland */ | 91 | /* Prevent leaks of NFSv4 errors into userland */ |
89 | static int nfs4_map_errors(int err) | 92 | static int nfs4_map_errors(int err) |
@@ -259,15 +262,28 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc | |||
259 | { | 262 | { |
260 | struct nfs_client *clp = server->nfs_client; | 263 | struct nfs_client *clp = server->nfs_client; |
261 | struct nfs4_state *state = exception->state; | 264 | struct nfs4_state *state = exception->state; |
265 | struct inode *inode = exception->inode; | ||
262 | int ret = errorcode; | 266 | int ret = errorcode; |
263 | 267 | ||
264 | exception->retry = 0; | 268 | exception->retry = 0; |
265 | switch(errorcode) { | 269 | switch(errorcode) { |
266 | case 0: | 270 | case 0: |
267 | return 0; | 271 | return 0; |
272 | case -NFS4ERR_OPENMODE: | ||
273 | if (nfs_have_delegation(inode, FMODE_READ)) { | ||
274 | nfs_inode_return_delegation(inode); | ||
275 | exception->retry = 1; | ||
276 | return 0; | ||
277 | } | ||
278 | if (state == NULL) | ||
279 | break; | ||
280 | nfs4_schedule_stateid_recovery(server, state); | ||
281 | goto wait_on_recovery; | ||
282 | case -NFS4ERR_DELEG_REVOKED: | ||
268 | case -NFS4ERR_ADMIN_REVOKED: | 283 | case -NFS4ERR_ADMIN_REVOKED: |
269 | case -NFS4ERR_BAD_STATEID: | 284 | case -NFS4ERR_BAD_STATEID: |
270 | case -NFS4ERR_OPENMODE: | 285 | if (state != NULL) |
286 | nfs_remove_bad_delegation(state->inode); | ||
271 | if (state == NULL) | 287 | if (state == NULL) |
272 | break; | 288 | break; |
273 | nfs4_schedule_stateid_recovery(server, state); | 289 | nfs4_schedule_stateid_recovery(server, state); |
@@ -360,16 +376,14 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp | |||
360 | * When updating highest_used_slotid there may be "holes" in the bitmap | 376 | * When updating highest_used_slotid there may be "holes" in the bitmap |
361 | * so we need to scan down from highest_used_slotid to 0 looking for the now | 377 | * so we need to scan down from highest_used_slotid to 0 looking for the now |
362 | * highest slotid in use. | 378 | * highest slotid in use. |
363 | * If none found, highest_used_slotid is set to -1. | 379 | * If none found, highest_used_slotid is set to NFS4_NO_SLOT. |
364 | * | 380 | * |
365 | * Must be called while holding tbl->slot_tbl_lock | 381 | * Must be called while holding tbl->slot_tbl_lock |
366 | */ | 382 | */ |
367 | static void | 383 | static void |
368 | nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) | 384 | nfs4_free_slot(struct nfs4_slot_table *tbl, u32 slotid) |
369 | { | 385 | { |
370 | int slotid = free_slotid; | 386 | BUG_ON(slotid >= NFS4_MAX_SLOT_TABLE); |
371 | |||
372 | BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE); | ||
373 | /* clear used bit in bitmap */ | 387 | /* clear used bit in bitmap */ |
374 | __clear_bit(slotid, tbl->used_slots); | 388 | __clear_bit(slotid, tbl->used_slots); |
375 | 389 | ||
@@ -379,10 +393,16 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) | |||
379 | if (slotid < tbl->max_slots) | 393 | if (slotid < tbl->max_slots) |
380 | tbl->highest_used_slotid = slotid; | 394 | tbl->highest_used_slotid = slotid; |
381 | else | 395 | else |
382 | tbl->highest_used_slotid = -1; | 396 | tbl->highest_used_slotid = NFS4_NO_SLOT; |
383 | } | 397 | } |
384 | dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__, | 398 | dprintk("%s: slotid %u highest_used_slotid %d\n", __func__, |
385 | free_slotid, tbl->highest_used_slotid); | 399 | slotid, tbl->highest_used_slotid); |
400 | } | ||
401 | |||
402 | bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy) | ||
403 | { | ||
404 | rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); | ||
405 | return true; | ||
386 | } | 406 | } |
387 | 407 | ||
388 | /* | 408 | /* |
@@ -390,16 +410,13 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) | |||
390 | */ | 410 | */ |
391 | static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) | 411 | static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) |
392 | { | 412 | { |
393 | struct rpc_task *task; | ||
394 | |||
395 | if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { | 413 | if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { |
396 | task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq); | 414 | rpc_wake_up_first(&ses->fc_slot_table.slot_tbl_waitq, |
397 | if (task) | 415 | nfs4_set_task_privileged, NULL); |
398 | rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); | ||
399 | return; | 416 | return; |
400 | } | 417 | } |
401 | 418 | ||
402 | if (ses->fc_slot_table.highest_used_slotid != -1) | 419 | if (ses->fc_slot_table.highest_used_slotid != NFS4_NO_SLOT) |
403 | return; | 420 | return; |
404 | 421 | ||
405 | dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__); | 422 | dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__); |
@@ -412,7 +429,7 @@ static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) | |||
412 | void nfs4_check_drain_bc_complete(struct nfs4_session *ses) | 429 | void nfs4_check_drain_bc_complete(struct nfs4_session *ses) |
413 | { | 430 | { |
414 | if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) || | 431 | if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) || |
415 | ses->bc_slot_table.highest_used_slotid != -1) | 432 | ses->bc_slot_table.highest_used_slotid != NFS4_NO_SLOT) |
416 | return; | 433 | return; |
417 | dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__); | 434 | dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__); |
418 | complete(&ses->bc_slot_table.complete); | 435 | complete(&ses->bc_slot_table.complete); |
@@ -507,25 +524,25 @@ static int nfs4_sequence_done(struct rpc_task *task, | |||
507 | * nfs4_find_slot looks for an unset bit in the used_slots bitmap. | 524 | * nfs4_find_slot looks for an unset bit in the used_slots bitmap. |
508 | * If found, we mark the slot as used, update the highest_used_slotid, | 525 | * If found, we mark the slot as used, update the highest_used_slotid, |
509 | * and respectively set up the sequence operation args. | 526 | * and respectively set up the sequence operation args. |
510 | * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise. | 527 | * The slot number is returned if found, or NFS4_NO_SLOT otherwise. |
511 | * | 528 | * |
512 | * Note: must be called with under the slot_tbl_lock. | 529 | * Note: must be called with under the slot_tbl_lock. |
513 | */ | 530 | */ |
514 | static u8 | 531 | static u32 |
515 | nfs4_find_slot(struct nfs4_slot_table *tbl) | 532 | nfs4_find_slot(struct nfs4_slot_table *tbl) |
516 | { | 533 | { |
517 | int slotid; | 534 | u32 slotid; |
518 | u8 ret_id = NFS4_MAX_SLOT_TABLE; | 535 | u32 ret_id = NFS4_NO_SLOT; |
519 | BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE); | ||
520 | 536 | ||
521 | dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n", | 537 | dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", |
522 | __func__, tbl->used_slots[0], tbl->highest_used_slotid, | 538 | __func__, tbl->used_slots[0], tbl->highest_used_slotid, |
523 | tbl->max_slots); | 539 | tbl->max_slots); |
524 | slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots); | 540 | slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots); |
525 | if (slotid >= tbl->max_slots) | 541 | if (slotid >= tbl->max_slots) |
526 | goto out; | 542 | goto out; |
527 | __set_bit(slotid, tbl->used_slots); | 543 | __set_bit(slotid, tbl->used_slots); |
528 | if (slotid > tbl->highest_used_slotid) | 544 | if (slotid > tbl->highest_used_slotid || |
545 | tbl->highest_used_slotid == NFS4_NO_SLOT) | ||
529 | tbl->highest_used_slotid = slotid; | 546 | tbl->highest_used_slotid = slotid; |
530 | ret_id = slotid; | 547 | ret_id = slotid; |
531 | out: | 548 | out: |
@@ -534,15 +551,25 @@ out: | |||
534 | return ret_id; | 551 | return ret_id; |
535 | } | 552 | } |
536 | 553 | ||
554 | static void nfs41_init_sequence(struct nfs4_sequence_args *args, | ||
555 | struct nfs4_sequence_res *res, int cache_reply) | ||
556 | { | ||
557 | args->sa_session = NULL; | ||
558 | args->sa_cache_this = 0; | ||
559 | if (cache_reply) | ||
560 | args->sa_cache_this = 1; | ||
561 | res->sr_session = NULL; | ||
562 | res->sr_slot = NULL; | ||
563 | } | ||
564 | |||
537 | int nfs41_setup_sequence(struct nfs4_session *session, | 565 | int nfs41_setup_sequence(struct nfs4_session *session, |
538 | struct nfs4_sequence_args *args, | 566 | struct nfs4_sequence_args *args, |
539 | struct nfs4_sequence_res *res, | 567 | struct nfs4_sequence_res *res, |
540 | int cache_reply, | ||
541 | struct rpc_task *task) | 568 | struct rpc_task *task) |
542 | { | 569 | { |
543 | struct nfs4_slot *slot; | 570 | struct nfs4_slot *slot; |
544 | struct nfs4_slot_table *tbl; | 571 | struct nfs4_slot_table *tbl; |
545 | u8 slotid; | 572 | u32 slotid; |
546 | 573 | ||
547 | dprintk("--> %s\n", __func__); | 574 | dprintk("--> %s\n", __func__); |
548 | /* slot already allocated? */ | 575 | /* slot already allocated? */ |
@@ -570,7 +597,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, | |||
570 | } | 597 | } |
571 | 598 | ||
572 | slotid = nfs4_find_slot(tbl); | 599 | slotid = nfs4_find_slot(tbl); |
573 | if (slotid == NFS4_MAX_SLOT_TABLE) { | 600 | if (slotid == NFS4_NO_SLOT) { |
574 | rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); | 601 | rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); |
575 | spin_unlock(&tbl->slot_tbl_lock); | 602 | spin_unlock(&tbl->slot_tbl_lock); |
576 | dprintk("<-- %s: no free slots\n", __func__); | 603 | dprintk("<-- %s: no free slots\n", __func__); |
@@ -582,7 +609,6 @@ int nfs41_setup_sequence(struct nfs4_session *session, | |||
582 | slot = tbl->slots + slotid; | 609 | slot = tbl->slots + slotid; |
583 | args->sa_session = session; | 610 | args->sa_session = session; |
584 | args->sa_slotid = slotid; | 611 | args->sa_slotid = slotid; |
585 | args->sa_cache_this = cache_reply; | ||
586 | 612 | ||
587 | dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); | 613 | dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); |
588 | 614 | ||
@@ -602,24 +628,19 @@ EXPORT_SYMBOL_GPL(nfs41_setup_sequence); | |||
602 | int nfs4_setup_sequence(const struct nfs_server *server, | 628 | int nfs4_setup_sequence(const struct nfs_server *server, |
603 | struct nfs4_sequence_args *args, | 629 | struct nfs4_sequence_args *args, |
604 | struct nfs4_sequence_res *res, | 630 | struct nfs4_sequence_res *res, |
605 | int cache_reply, | ||
606 | struct rpc_task *task) | 631 | struct rpc_task *task) |
607 | { | 632 | { |
608 | struct nfs4_session *session = nfs4_get_session(server); | 633 | struct nfs4_session *session = nfs4_get_session(server); |
609 | int ret = 0; | 634 | int ret = 0; |
610 | 635 | ||
611 | if (session == NULL) { | 636 | if (session == NULL) |
612 | args->sa_session = NULL; | ||
613 | res->sr_session = NULL; | ||
614 | goto out; | 637 | goto out; |
615 | } | ||
616 | 638 | ||
617 | dprintk("--> %s clp %p session %p sr_slot %td\n", | 639 | dprintk("--> %s clp %p session %p sr_slot %td\n", |
618 | __func__, session->clp, session, res->sr_slot ? | 640 | __func__, session->clp, session, res->sr_slot ? |
619 | res->sr_slot - session->fc_slot_table.slots : -1); | 641 | res->sr_slot - session->fc_slot_table.slots : -1); |
620 | 642 | ||
621 | ret = nfs41_setup_sequence(session, args, res, cache_reply, | 643 | ret = nfs41_setup_sequence(session, args, res, task); |
622 | task); | ||
623 | out: | 644 | out: |
624 | dprintk("<-- %s status=%d\n", __func__, ret); | 645 | dprintk("<-- %s status=%d\n", __func__, ret); |
625 | return ret; | 646 | return ret; |
@@ -629,7 +650,6 @@ struct nfs41_call_sync_data { | |||
629 | const struct nfs_server *seq_server; | 650 | const struct nfs_server *seq_server; |
630 | struct nfs4_sequence_args *seq_args; | 651 | struct nfs4_sequence_args *seq_args; |
631 | struct nfs4_sequence_res *seq_res; | 652 | struct nfs4_sequence_res *seq_res; |
632 | int cache_reply; | ||
633 | }; | 653 | }; |
634 | 654 | ||
635 | static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) | 655 | static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) |
@@ -639,7 +659,7 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) | |||
639 | dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); | 659 | dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); |
640 | 660 | ||
641 | if (nfs4_setup_sequence(data->seq_server, data->seq_args, | 661 | if (nfs4_setup_sequence(data->seq_server, data->seq_args, |
642 | data->seq_res, data->cache_reply, task)) | 662 | data->seq_res, task)) |
643 | return; | 663 | return; |
644 | rpc_call_start(task); | 664 | rpc_call_start(task); |
645 | } | 665 | } |
@@ -657,12 +677,12 @@ static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) | |||
657 | nfs41_sequence_done(task, data->seq_res); | 677 | nfs41_sequence_done(task, data->seq_res); |
658 | } | 678 | } |
659 | 679 | ||
660 | struct rpc_call_ops nfs41_call_sync_ops = { | 680 | static const struct rpc_call_ops nfs41_call_sync_ops = { |
661 | .rpc_call_prepare = nfs41_call_sync_prepare, | 681 | .rpc_call_prepare = nfs41_call_sync_prepare, |
662 | .rpc_call_done = nfs41_call_sync_done, | 682 | .rpc_call_done = nfs41_call_sync_done, |
663 | }; | 683 | }; |
664 | 684 | ||
665 | struct rpc_call_ops nfs41_call_priv_sync_ops = { | 685 | static const struct rpc_call_ops nfs41_call_priv_sync_ops = { |
666 | .rpc_call_prepare = nfs41_call_priv_sync_prepare, | 686 | .rpc_call_prepare = nfs41_call_priv_sync_prepare, |
667 | .rpc_call_done = nfs41_call_sync_done, | 687 | .rpc_call_done = nfs41_call_sync_done, |
668 | }; | 688 | }; |
@@ -672,7 +692,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, | |||
672 | struct rpc_message *msg, | 692 | struct rpc_message *msg, |
673 | struct nfs4_sequence_args *args, | 693 | struct nfs4_sequence_args *args, |
674 | struct nfs4_sequence_res *res, | 694 | struct nfs4_sequence_res *res, |
675 | int cache_reply, | ||
676 | int privileged) | 695 | int privileged) |
677 | { | 696 | { |
678 | int ret; | 697 | int ret; |
@@ -681,7 +700,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, | |||
681 | .seq_server = server, | 700 | .seq_server = server, |
682 | .seq_args = args, | 701 | .seq_args = args, |
683 | .seq_res = res, | 702 | .seq_res = res, |
684 | .cache_reply = cache_reply, | ||
685 | }; | 703 | }; |
686 | struct rpc_task_setup task_setup = { | 704 | struct rpc_task_setup task_setup = { |
687 | .rpc_client = clnt, | 705 | .rpc_client = clnt, |
@@ -690,7 +708,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, | |||
690 | .callback_data = &data | 708 | .callback_data = &data |
691 | }; | 709 | }; |
692 | 710 | ||
693 | res->sr_slot = NULL; | ||
694 | if (privileged) | 711 | if (privileged) |
695 | task_setup.callback_ops = &nfs41_call_priv_sync_ops; | 712 | task_setup.callback_ops = &nfs41_call_priv_sync_ops; |
696 | task = rpc_run_task(&task_setup); | 713 | task = rpc_run_task(&task_setup); |
@@ -710,10 +727,17 @@ int _nfs4_call_sync_session(struct rpc_clnt *clnt, | |||
710 | struct nfs4_sequence_res *res, | 727 | struct nfs4_sequence_res *res, |
711 | int cache_reply) | 728 | int cache_reply) |
712 | { | 729 | { |
713 | return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0); | 730 | nfs41_init_sequence(args, res, cache_reply); |
731 | return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0); | ||
714 | } | 732 | } |
715 | 733 | ||
716 | #else | 734 | #else |
735 | static inline | ||
736 | void nfs41_init_sequence(struct nfs4_sequence_args *args, | ||
737 | struct nfs4_sequence_res *res, int cache_reply) | ||
738 | { | ||
739 | } | ||
740 | |||
717 | static int nfs4_sequence_done(struct rpc_task *task, | 741 | static int nfs4_sequence_done(struct rpc_task *task, |
718 | struct nfs4_sequence_res *res) | 742 | struct nfs4_sequence_res *res) |
719 | { | 743 | { |
@@ -728,7 +752,7 @@ int _nfs4_call_sync(struct rpc_clnt *clnt, | |||
728 | struct nfs4_sequence_res *res, | 752 | struct nfs4_sequence_res *res, |
729 | int cache_reply) | 753 | int cache_reply) |
730 | { | 754 | { |
731 | args->sa_session = res->sr_session = NULL; | 755 | nfs41_init_sequence(args, res, cache_reply); |
732 | return rpc_call_sync(clnt, msg, 0); | 756 | return rpc_call_sync(clnt, msg, 0); |
733 | } | 757 | } |
734 | 758 | ||
@@ -815,20 +839,22 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | |||
815 | p->o_arg.open_flags = flags; | 839 | p->o_arg.open_flags = flags; |
816 | p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); | 840 | p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); |
817 | p->o_arg.clientid = server->nfs_client->cl_clientid; | 841 | p->o_arg.clientid = server->nfs_client->cl_clientid; |
818 | p->o_arg.id = sp->so_owner_id.id; | 842 | p->o_arg.id = sp->so_seqid.owner_id; |
819 | p->o_arg.name = &dentry->d_name; | 843 | p->o_arg.name = &dentry->d_name; |
820 | p->o_arg.server = server; | 844 | p->o_arg.server = server; |
821 | p->o_arg.bitmask = server->attr_bitmask; | 845 | p->o_arg.bitmask = server->attr_bitmask; |
822 | p->o_arg.dir_bitmask = server->cache_consistency_bitmask; | 846 | p->o_arg.dir_bitmask = server->cache_consistency_bitmask; |
823 | p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; | 847 | p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; |
824 | if (flags & O_CREAT) { | 848 | if (attrs != NULL && attrs->ia_valid != 0) { |
825 | u32 *s; | 849 | __be32 verf[2]; |
826 | 850 | ||
827 | p->o_arg.u.attrs = &p->attrs; | 851 | p->o_arg.u.attrs = &p->attrs; |
828 | memcpy(&p->attrs, attrs, sizeof(p->attrs)); | 852 | memcpy(&p->attrs, attrs, sizeof(p->attrs)); |
829 | s = (u32 *) p->o_arg.u.verifier.data; | 853 | |
830 | s[0] = jiffies; | 854 | verf[0] = jiffies; |
831 | s[1] = current->pid; | 855 | verf[1] = current->pid; |
856 | memcpy(p->o_arg.u.verifier.data, verf, | ||
857 | sizeof(p->o_arg.u.verifier.data)); | ||
832 | } | 858 | } |
833 | p->c_arg.fh = &p->o_res.fh; | 859 | p->c_arg.fh = &p->o_res.fh; |
834 | p->c_arg.stateid = &p->o_res.stateid; | 860 | p->c_arg.stateid = &p->o_res.stateid; |
@@ -878,7 +904,7 @@ static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode | |||
878 | { | 904 | { |
879 | int ret = 0; | 905 | int ret = 0; |
880 | 906 | ||
881 | if (open_mode & O_EXCL) | 907 | if (open_mode & (O_EXCL|O_TRUNC)) |
882 | goto out; | 908 | goto out; |
883 | switch (mode & (FMODE_READ|FMODE_WRITE)) { | 909 | switch (mode & (FMODE_READ|FMODE_WRITE)) { |
884 | case FMODE_READ: | 910 | case FMODE_READ: |
@@ -927,8 +953,8 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode) | |||
927 | static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) | 953 | static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) |
928 | { | 954 | { |
929 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) | 955 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) |
930 | memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data)); | 956 | nfs4_stateid_copy(&state->stateid, stateid); |
931 | memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data)); | 957 | nfs4_stateid_copy(&state->open_stateid, stateid); |
932 | switch (fmode) { | 958 | switch (fmode) { |
933 | case FMODE_READ: | 959 | case FMODE_READ: |
934 | set_bit(NFS_O_RDONLY_STATE, &state->flags); | 960 | set_bit(NFS_O_RDONLY_STATE, &state->flags); |
@@ -956,7 +982,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s | |||
956 | */ | 982 | */ |
957 | write_seqlock(&state->seqlock); | 983 | write_seqlock(&state->seqlock); |
958 | if (deleg_stateid != NULL) { | 984 | if (deleg_stateid != NULL) { |
959 | memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data)); | 985 | nfs4_stateid_copy(&state->stateid, deleg_stateid); |
960 | set_bit(NFS_DELEGATED_STATE, &state->flags); | 986 | set_bit(NFS_DELEGATED_STATE, &state->flags); |
961 | } | 987 | } |
962 | if (open_stateid != NULL) | 988 | if (open_stateid != NULL) |
@@ -987,7 +1013,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat | |||
987 | 1013 | ||
988 | if (delegation == NULL) | 1014 | if (delegation == NULL) |
989 | delegation = &deleg_cur->stateid; | 1015 | delegation = &deleg_cur->stateid; |
990 | else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0) | 1016 | else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation)) |
991 | goto no_delegation_unlock; | 1017 | goto no_delegation_unlock; |
992 | 1018 | ||
993 | nfs_mark_delegation_referenced(deleg_cur); | 1019 | nfs_mark_delegation_referenced(deleg_cur); |
@@ -1026,7 +1052,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) | |||
1026 | struct nfs4_state *state = opendata->state; | 1052 | struct nfs4_state *state = opendata->state; |
1027 | struct nfs_inode *nfsi = NFS_I(state->inode); | 1053 | struct nfs_inode *nfsi = NFS_I(state->inode); |
1028 | struct nfs_delegation *delegation; | 1054 | struct nfs_delegation *delegation; |
1029 | int open_mode = opendata->o_arg.open_flags & O_EXCL; | 1055 | int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC); |
1030 | fmode_t fmode = opendata->o_arg.fmode; | 1056 | fmode_t fmode = opendata->o_arg.fmode; |
1031 | nfs4_stateid stateid; | 1057 | nfs4_stateid stateid; |
1032 | int ret = -EAGAIN; | 1058 | int ret = -EAGAIN; |
@@ -1048,7 +1074,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) | |||
1048 | break; | 1074 | break; |
1049 | } | 1075 | } |
1050 | /* Save the delegation */ | 1076 | /* Save the delegation */ |
1051 | memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); | 1077 | nfs4_stateid_copy(&stateid, &delegation->stateid); |
1052 | rcu_read_unlock(); | 1078 | rcu_read_unlock(); |
1053 | ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); | 1079 | ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); |
1054 | if (ret != 0) | 1080 | if (ret != 0) |
@@ -1090,6 +1116,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data | |||
1090 | if (state == NULL) | 1116 | if (state == NULL) |
1091 | goto err_put_inode; | 1117 | goto err_put_inode; |
1092 | if (data->o_res.delegation_type != 0) { | 1118 | if (data->o_res.delegation_type != 0) { |
1119 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; | ||
1093 | int delegation_flags = 0; | 1120 | int delegation_flags = 0; |
1094 | 1121 | ||
1095 | rcu_read_lock(); | 1122 | rcu_read_lock(); |
@@ -1101,7 +1128,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data | |||
1101 | pr_err_ratelimited("NFS: Broken NFSv4 server %s is " | 1128 | pr_err_ratelimited("NFS: Broken NFSv4 server %s is " |
1102 | "returning a delegation for " | 1129 | "returning a delegation for " |
1103 | "OPEN(CLAIM_DELEGATE_CUR)\n", | 1130 | "OPEN(CLAIM_DELEGATE_CUR)\n", |
1104 | NFS_CLIENT(inode)->cl_server); | 1131 | clp->cl_hostname); |
1105 | } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) | 1132 | } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) |
1106 | nfs_inode_set_delegation(state->inode, | 1133 | nfs_inode_set_delegation(state->inode, |
1107 | data->owner->so_cred, | 1134 | data->owner->so_cred, |
@@ -1210,10 +1237,10 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * | |||
1210 | * Check if we need to update the current stateid. | 1237 | * Check if we need to update the current stateid. |
1211 | */ | 1238 | */ |
1212 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 && | 1239 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 && |
1213 | memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) { | 1240 | !nfs4_stateid_match(&state->stateid, &state->open_stateid)) { |
1214 | write_seqlock(&state->seqlock); | 1241 | write_seqlock(&state->seqlock); |
1215 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) | 1242 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) |
1216 | memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)); | 1243 | nfs4_stateid_copy(&state->stateid, &state->open_stateid); |
1217 | write_sequnlock(&state->seqlock); | 1244 | write_sequnlock(&state->seqlock); |
1218 | } | 1245 | } |
1219 | return 0; | 1246 | return 0; |
@@ -1282,8 +1309,7 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs | |||
1282 | if (IS_ERR(opendata)) | 1309 | if (IS_ERR(opendata)) |
1283 | return PTR_ERR(opendata); | 1310 | return PTR_ERR(opendata); |
1284 | opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; | 1311 | opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; |
1285 | memcpy(opendata->o_arg.u.delegation.data, stateid->data, | 1312 | nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); |
1286 | sizeof(opendata->o_arg.u.delegation.data)); | ||
1287 | ret = nfs4_open_recover(opendata, state); | 1313 | ret = nfs4_open_recover(opendata, state); |
1288 | nfs4_opendata_put(opendata); | 1314 | nfs4_opendata_put(opendata); |
1289 | return ret; | 1315 | return ret; |
@@ -1319,8 +1345,11 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state | |||
1319 | * The show must go on: exit, but mark the | 1345 | * The show must go on: exit, but mark the |
1320 | * stateid as needing recovery. | 1346 | * stateid as needing recovery. |
1321 | */ | 1347 | */ |
1348 | case -NFS4ERR_DELEG_REVOKED: | ||
1322 | case -NFS4ERR_ADMIN_REVOKED: | 1349 | case -NFS4ERR_ADMIN_REVOKED: |
1323 | case -NFS4ERR_BAD_STATEID: | 1350 | case -NFS4ERR_BAD_STATEID: |
1351 | nfs_inode_find_state_and_recover(state->inode, | ||
1352 | stateid); | ||
1324 | nfs4_schedule_stateid_recovery(server, state); | 1353 | nfs4_schedule_stateid_recovery(server, state); |
1325 | case -EKEYEXPIRED: | 1354 | case -EKEYEXPIRED: |
1326 | /* | 1355 | /* |
@@ -1345,8 +1374,7 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) | |||
1345 | 1374 | ||
1346 | data->rpc_status = task->tk_status; | 1375 | data->rpc_status = task->tk_status; |
1347 | if (data->rpc_status == 0) { | 1376 | if (data->rpc_status == 0) { |
1348 | memcpy(data->o_res.stateid.data, data->c_res.stateid.data, | 1377 | nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid); |
1349 | sizeof(data->o_res.stateid.data)); | ||
1350 | nfs_confirm_seqid(&data->owner->so_seqid, 0); | 1378 | nfs_confirm_seqid(&data->owner->so_seqid, 0); |
1351 | renew_lease(data->o_res.server, data->timestamp); | 1379 | renew_lease(data->o_res.server, data->timestamp); |
1352 | data->rpc_done = 1; | 1380 | data->rpc_done = 1; |
@@ -1440,7 +1468,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) | |||
1440 | rcu_read_unlock(); | 1468 | rcu_read_unlock(); |
1441 | } | 1469 | } |
1442 | /* Update sequence id. */ | 1470 | /* Update sequence id. */ |
1443 | data->o_arg.id = sp->so_owner_id.id; | 1471 | data->o_arg.id = sp->so_seqid.owner_id; |
1444 | data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; | 1472 | data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; |
1445 | if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { | 1473 | if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { |
1446 | task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; | 1474 | task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; |
@@ -1449,7 +1477,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) | |||
1449 | data->timestamp = jiffies; | 1477 | data->timestamp = jiffies; |
1450 | if (nfs4_setup_sequence(data->o_arg.server, | 1478 | if (nfs4_setup_sequence(data->o_arg.server, |
1451 | &data->o_arg.seq_args, | 1479 | &data->o_arg.seq_args, |
1452 | &data->o_res.seq_res, 1, task)) | 1480 | &data->o_res.seq_res, task)) |
1453 | return; | 1481 | return; |
1454 | rpc_call_start(task); | 1482 | rpc_call_start(task); |
1455 | return; | 1483 | return; |
@@ -1551,6 +1579,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) | |||
1551 | }; | 1579 | }; |
1552 | int status; | 1580 | int status; |
1553 | 1581 | ||
1582 | nfs41_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1); | ||
1554 | kref_get(&data->kref); | 1583 | kref_get(&data->kref); |
1555 | data->rpc_done = 0; | 1584 | data->rpc_done = 0; |
1556 | data->rpc_status = 0; | 1585 | data->rpc_status = 0; |
@@ -1712,15 +1741,32 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta | |||
1712 | } | 1741 | } |
1713 | 1742 | ||
1714 | #if defined(CONFIG_NFS_V4_1) | 1743 | #if defined(CONFIG_NFS_V4_1) |
1715 | static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) | 1744 | static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags) |
1716 | { | 1745 | { |
1717 | int status; | 1746 | int status = NFS_OK; |
1718 | struct nfs_server *server = NFS_SERVER(state->inode); | 1747 | struct nfs_server *server = NFS_SERVER(state->inode); |
1719 | 1748 | ||
1720 | status = nfs41_test_stateid(server, state); | 1749 | if (state->flags & flags) { |
1721 | if (status == NFS_OK) | 1750 | status = nfs41_test_stateid(server, stateid); |
1722 | return 0; | 1751 | if (status != NFS_OK) { |
1723 | nfs41_free_stateid(server, state); | 1752 | nfs41_free_stateid(server, stateid); |
1753 | state->flags &= ~flags; | ||
1754 | } | ||
1755 | } | ||
1756 | return status; | ||
1757 | } | ||
1758 | |||
1759 | static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) | ||
1760 | { | ||
1761 | int deleg_status, open_status; | ||
1762 | int deleg_flags = 1 << NFS_DELEGATED_STATE; | ||
1763 | int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE); | ||
1764 | |||
1765 | deleg_status = nfs41_check_expired_stateid(state, &state->stateid, deleg_flags); | ||
1766 | open_status = nfs41_check_expired_stateid(state, &state->open_stateid, open_flags); | ||
1767 | |||
1768 | if ((deleg_status == NFS_OK) && (open_status == NFS_OK)) | ||
1769 | return NFS_OK; | ||
1724 | return nfs4_open_expired(sp, state); | 1770 | return nfs4_open_expired(sp, state); |
1725 | } | 1771 | } |
1726 | #endif | 1772 | #endif |
@@ -1754,7 +1800,8 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode | |||
1754 | 1800 | ||
1755 | /* Protect against reboot recovery conflicts */ | 1801 | /* Protect against reboot recovery conflicts */ |
1756 | status = -ENOMEM; | 1802 | status = -ENOMEM; |
1757 | if (!(sp = nfs4_get_state_owner(server, cred))) { | 1803 | sp = nfs4_get_state_owner(server, cred, GFP_KERNEL); |
1804 | if (sp == NULL) { | ||
1758 | dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); | 1805 | dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); |
1759 | goto out_err; | 1806 | goto out_err; |
1760 | } | 1807 | } |
@@ -1829,7 +1876,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, | |||
1829 | * the user though... | 1876 | * the user though... |
1830 | */ | 1877 | */ |
1831 | if (status == -NFS4ERR_BAD_SEQID) { | 1878 | if (status == -NFS4ERR_BAD_SEQID) { |
1832 | printk(KERN_WARNING "NFS: v4 server %s " | 1879 | pr_warn_ratelimited("NFS: v4 server %s " |
1833 | " returned a bad sequence-id error!\n", | 1880 | " returned a bad sequence-id error!\n", |
1834 | NFS_SERVER(dir)->nfs_client->cl_hostname); | 1881 | NFS_SERVER(dir)->nfs_client->cl_hostname); |
1835 | exception.retry = 1; | 1882 | exception.retry = 1; |
@@ -1882,12 +1929,14 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
1882 | 1929 | ||
1883 | nfs_fattr_init(fattr); | 1930 | nfs_fattr_init(fattr); |
1884 | 1931 | ||
1885 | if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { | 1932 | if (state != NULL) { |
1933 | nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, | ||
1934 | current->files, current->tgid); | ||
1935 | } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode, | ||
1936 | FMODE_WRITE)) { | ||
1886 | /* Use that stateid */ | 1937 | /* Use that stateid */ |
1887 | } else if (state != NULL) { | ||
1888 | nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid); | ||
1889 | } else | 1938 | } else |
1890 | memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); | 1939 | nfs4_stateid_copy(&arg.stateid, &zero_stateid); |
1891 | 1940 | ||
1892 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); | 1941 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
1893 | if (status == 0 && state != NULL) | 1942 | if (status == 0 && state != NULL) |
@@ -1900,7 +1949,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
1900 | struct nfs4_state *state) | 1949 | struct nfs4_state *state) |
1901 | { | 1950 | { |
1902 | struct nfs_server *server = NFS_SERVER(inode); | 1951 | struct nfs_server *server = NFS_SERVER(inode); |
1903 | struct nfs4_exception exception = { }; | 1952 | struct nfs4_exception exception = { |
1953 | .state = state, | ||
1954 | .inode = inode, | ||
1955 | }; | ||
1904 | int err; | 1956 | int err; |
1905 | do { | 1957 | do { |
1906 | err = nfs4_handle_exception(server, | 1958 | err = nfs4_handle_exception(server, |
@@ -1954,6 +2006,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
1954 | struct nfs4_state *state = calldata->state; | 2006 | struct nfs4_state *state = calldata->state; |
1955 | struct nfs_server *server = NFS_SERVER(calldata->inode); | 2007 | struct nfs_server *server = NFS_SERVER(calldata->inode); |
1956 | 2008 | ||
2009 | dprintk("%s: begin!\n", __func__); | ||
1957 | if (!nfs4_sequence_done(task, &calldata->res.seq_res)) | 2010 | if (!nfs4_sequence_done(task, &calldata->res.seq_res)) |
1958 | return; | 2011 | return; |
1959 | /* hmm. we are done with the inode, and in the process of freeing | 2012 | /* hmm. we are done with the inode, and in the process of freeing |
@@ -1981,6 +2034,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
1981 | } | 2034 | } |
1982 | nfs_release_seqid(calldata->arg.seqid); | 2035 | nfs_release_seqid(calldata->arg.seqid); |
1983 | nfs_refresh_inode(calldata->inode, calldata->res.fattr); | 2036 | nfs_refresh_inode(calldata->inode, calldata->res.fattr); |
2037 | dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); | ||
1984 | } | 2038 | } |
1985 | 2039 | ||
1986 | static void nfs4_close_prepare(struct rpc_task *task, void *data) | 2040 | static void nfs4_close_prepare(struct rpc_task *task, void *data) |
@@ -1989,6 +2043,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) | |||
1989 | struct nfs4_state *state = calldata->state; | 2043 | struct nfs4_state *state = calldata->state; |
1990 | int call_close = 0; | 2044 | int call_close = 0; |
1991 | 2045 | ||
2046 | dprintk("%s: begin!\n", __func__); | ||
1992 | if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) | 2047 | if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) |
1993 | return; | 2048 | return; |
1994 | 2049 | ||
@@ -2013,7 +2068,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) | |||
2013 | if (!call_close) { | 2068 | if (!call_close) { |
2014 | /* Note: exit _without_ calling nfs4_close_done */ | 2069 | /* Note: exit _without_ calling nfs4_close_done */ |
2015 | task->tk_action = NULL; | 2070 | task->tk_action = NULL; |
2016 | return; | 2071 | goto out; |
2017 | } | 2072 | } |
2018 | 2073 | ||
2019 | if (calldata->arg.fmode == 0) { | 2074 | if (calldata->arg.fmode == 0) { |
@@ -2022,17 +2077,20 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) | |||
2022 | pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) { | 2077 | pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) { |
2023 | rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq, | 2078 | rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq, |
2024 | task, NULL); | 2079 | task, NULL); |
2025 | return; | 2080 | goto out; |
2026 | } | 2081 | } |
2027 | } | 2082 | } |
2028 | 2083 | ||
2029 | nfs_fattr_init(calldata->res.fattr); | 2084 | nfs_fattr_init(calldata->res.fattr); |
2030 | calldata->timestamp = jiffies; | 2085 | calldata->timestamp = jiffies; |
2031 | if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), | 2086 | if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), |
2032 | &calldata->arg.seq_args, &calldata->res.seq_res, | 2087 | &calldata->arg.seq_args, |
2033 | 1, task)) | 2088 | &calldata->res.seq_res, |
2034 | return; | 2089 | task)) |
2090 | goto out; | ||
2035 | rpc_call_start(task); | 2091 | rpc_call_start(task); |
2092 | out: | ||
2093 | dprintk("%s: done!\n", __func__); | ||
2036 | } | 2094 | } |
2037 | 2095 | ||
2038 | static const struct rpc_call_ops nfs4_close_ops = { | 2096 | static const struct rpc_call_ops nfs4_close_ops = { |
@@ -2074,6 +2132,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) | |||
2074 | calldata = kzalloc(sizeof(*calldata), gfp_mask); | 2132 | calldata = kzalloc(sizeof(*calldata), gfp_mask); |
2075 | if (calldata == NULL) | 2133 | if (calldata == NULL) |
2076 | goto out; | 2134 | goto out; |
2135 | nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1); | ||
2077 | calldata->inode = state->inode; | 2136 | calldata->inode = state->inode; |
2078 | calldata->state = state; | 2137 | calldata->state = state; |
2079 | calldata->arg.fh = NFS_FH(state->inode); | 2138 | calldata->arg.fh = NFS_FH(state->inode); |
@@ -2182,6 +2241,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f | |||
2182 | server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; | 2241 | server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; |
2183 | server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; | 2242 | server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; |
2184 | server->acl_bitmask = res.acl_bitmask; | 2243 | server->acl_bitmask = res.acl_bitmask; |
2244 | server->fh_expire_type = res.fh_expire_type; | ||
2185 | } | 2245 | } |
2186 | 2246 | ||
2187 | return status; | 2247 | return status; |
@@ -2303,7 +2363,6 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2303 | return nfs4_map_errors(status); | 2363 | return nfs4_map_errors(status); |
2304 | } | 2364 | } |
2305 | 2365 | ||
2306 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); | ||
2307 | /* | 2366 | /* |
2308 | * Get locations and (maybe) other attributes of a referral. | 2367 | * Get locations and (maybe) other attributes of a referral. |
2309 | * Note that we'll actually follow the referral later when | 2368 | * Note that we'll actually follow the referral later when |
@@ -2420,6 +2479,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
2420 | } | 2479 | } |
2421 | } | 2480 | } |
2422 | 2481 | ||
2482 | /* Deal with open(O_TRUNC) */ | ||
2483 | if (sattr->ia_valid & ATTR_OPEN) | ||
2484 | sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); | ||
2485 | |||
2423 | status = nfs4_do_setattr(inode, cred, fattr, sattr, state); | 2486 | status = nfs4_do_setattr(inode, cred, fattr, sattr, state); |
2424 | if (status == 0) | 2487 | if (status == 0) |
2425 | nfs_setattr_update_inode(inode, sattr); | 2488 | nfs_setattr_update_inode(inode, sattr); |
@@ -2494,7 +2557,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry | |||
2494 | struct nfs_server *server = NFS_SERVER(inode); | 2557 | struct nfs_server *server = NFS_SERVER(inode); |
2495 | struct nfs4_accessargs args = { | 2558 | struct nfs4_accessargs args = { |
2496 | .fh = NFS_FH(inode), | 2559 | .fh = NFS_FH(inode), |
2497 | .bitmask = server->attr_bitmask, | 2560 | .bitmask = server->cache_consistency_bitmask, |
2498 | }; | 2561 | }; |
2499 | struct nfs4_accessres res = { | 2562 | struct nfs4_accessres res = { |
2500 | .server = server, | 2563 | .server = server, |
@@ -2712,8 +2775,18 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) | |||
2712 | 2775 | ||
2713 | args->bitmask = server->cache_consistency_bitmask; | 2776 | args->bitmask = server->cache_consistency_bitmask; |
2714 | res->server = server; | 2777 | res->server = server; |
2715 | res->seq_res.sr_slot = NULL; | ||
2716 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; | 2778 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; |
2779 | nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); | ||
2780 | } | ||
2781 | |||
2782 | static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) | ||
2783 | { | ||
2784 | if (nfs4_setup_sequence(NFS_SERVER(data->dir), | ||
2785 | &data->args.seq_args, | ||
2786 | &data->res.seq_res, | ||
2787 | task)) | ||
2788 | return; | ||
2789 | rpc_call_start(task); | ||
2717 | } | 2790 | } |
2718 | 2791 | ||
2719 | static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) | 2792 | static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) |
@@ -2738,6 +2811,17 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) | |||
2738 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; | 2811 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; |
2739 | arg->bitmask = server->attr_bitmask; | 2812 | arg->bitmask = server->attr_bitmask; |
2740 | res->server = server; | 2813 | res->server = server; |
2814 | nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); | ||
2815 | } | ||
2816 | |||
2817 | static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) | ||
2818 | { | ||
2819 | if (nfs4_setup_sequence(NFS_SERVER(data->old_dir), | ||
2820 | &data->args.seq_args, | ||
2821 | &data->res.seq_res, | ||
2822 | task)) | ||
2823 | return; | ||
2824 | rpc_call_start(task); | ||
2741 | } | 2825 | } |
2742 | 2826 | ||
2743 | static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | 2827 | static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, |
@@ -3232,6 +3316,17 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message | |||
3232 | data->timestamp = jiffies; | 3316 | data->timestamp = jiffies; |
3233 | data->read_done_cb = nfs4_read_done_cb; | 3317 | data->read_done_cb = nfs4_read_done_cb; |
3234 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; | 3318 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; |
3319 | nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); | ||
3320 | } | ||
3321 | |||
3322 | static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) | ||
3323 | { | ||
3324 | if (nfs4_setup_sequence(NFS_SERVER(data->inode), | ||
3325 | &data->args.seq_args, | ||
3326 | &data->res.seq_res, | ||
3327 | task)) | ||
3328 | return; | ||
3329 | rpc_call_start(task); | ||
3235 | } | 3330 | } |
3236 | 3331 | ||
3237 | /* Reset the the nfs_read_data to send the read to the MDS. */ | 3332 | /* Reset the the nfs_read_data to send the read to the MDS. */ |
@@ -3305,6 +3400,17 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag | |||
3305 | data->timestamp = jiffies; | 3400 | data->timestamp = jiffies; |
3306 | 3401 | ||
3307 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; | 3402 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; |
3403 | nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); | ||
3404 | } | ||
3405 | |||
3406 | static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) | ||
3407 | { | ||
3408 | if (nfs4_setup_sequence(NFS_SERVER(data->inode), | ||
3409 | &data->args.seq_args, | ||
3410 | &data->res.seq_res, | ||
3411 | task)) | ||
3412 | return; | ||
3413 | rpc_call_start(task); | ||
3308 | } | 3414 | } |
3309 | 3415 | ||
3310 | static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) | 3416 | static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) |
@@ -3339,6 +3445,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa | |||
3339 | data->write_done_cb = nfs4_commit_done_cb; | 3445 | data->write_done_cb = nfs4_commit_done_cb; |
3340 | data->res.server = server; | 3446 | data->res.server = server; |
3341 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; | 3447 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; |
3448 | nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); | ||
3342 | } | 3449 | } |
3343 | 3450 | ||
3344 | struct nfs4_renewdata { | 3451 | struct nfs4_renewdata { |
@@ -3714,8 +3821,11 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, | |||
3714 | if (task->tk_status >= 0) | 3821 | if (task->tk_status >= 0) |
3715 | return 0; | 3822 | return 0; |
3716 | switch(task->tk_status) { | 3823 | switch(task->tk_status) { |
3824 | case -NFS4ERR_DELEG_REVOKED: | ||
3717 | case -NFS4ERR_ADMIN_REVOKED: | 3825 | case -NFS4ERR_ADMIN_REVOKED: |
3718 | case -NFS4ERR_BAD_STATEID: | 3826 | case -NFS4ERR_BAD_STATEID: |
3827 | if (state != NULL) | ||
3828 | nfs_remove_bad_delegation(state->inode); | ||
3719 | case -NFS4ERR_OPENMODE: | 3829 | case -NFS4ERR_OPENMODE: |
3720 | if (state == NULL) | 3830 | if (state == NULL) |
3721 | break; | 3831 | break; |
@@ -3764,6 +3874,16 @@ wait_on_recovery: | |||
3764 | return -EAGAIN; | 3874 | return -EAGAIN; |
3765 | } | 3875 | } |
3766 | 3876 | ||
3877 | static void nfs4_construct_boot_verifier(struct nfs_client *clp, | ||
3878 | nfs4_verifier *bootverf) | ||
3879 | { | ||
3880 | __be32 verf[2]; | ||
3881 | |||
3882 | verf[0] = htonl((u32)clp->cl_boot_time.tv_sec); | ||
3883 | verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec); | ||
3884 | memcpy(bootverf->data, verf, sizeof(bootverf->data)); | ||
3885 | } | ||
3886 | |||
3767 | int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | 3887 | int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, |
3768 | unsigned short port, struct rpc_cred *cred, | 3888 | unsigned short port, struct rpc_cred *cred, |
3769 | struct nfs4_setclientid_res *res) | 3889 | struct nfs4_setclientid_res *res) |
@@ -3780,15 +3900,13 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | |||
3780 | .rpc_resp = res, | 3900 | .rpc_resp = res, |
3781 | .rpc_cred = cred, | 3901 | .rpc_cred = cred, |
3782 | }; | 3902 | }; |
3783 | __be32 *p; | ||
3784 | int loop = 0; | 3903 | int loop = 0; |
3785 | int status; | 3904 | int status; |
3786 | 3905 | ||
3787 | p = (__be32*)sc_verifier.data; | 3906 | nfs4_construct_boot_verifier(clp, &sc_verifier); |
3788 | *p++ = htonl((u32)clp->cl_boot_time.tv_sec); | ||
3789 | *p = htonl((u32)clp->cl_boot_time.tv_nsec); | ||
3790 | 3907 | ||
3791 | for(;;) { | 3908 | for(;;) { |
3909 | rcu_read_lock(); | ||
3792 | setclientid.sc_name_len = scnprintf(setclientid.sc_name, | 3910 | setclientid.sc_name_len = scnprintf(setclientid.sc_name, |
3793 | sizeof(setclientid.sc_name), "%s/%s %s %s %u", | 3911 | sizeof(setclientid.sc_name), "%s/%s %s %s %u", |
3794 | clp->cl_ipaddr, | 3912 | clp->cl_ipaddr, |
@@ -3805,6 +3923,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | |||
3805 | setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, | 3923 | setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, |
3806 | sizeof(setclientid.sc_uaddr), "%s.%u.%u", | 3924 | sizeof(setclientid.sc_uaddr), "%s.%u.%u", |
3807 | clp->cl_ipaddr, port >> 8, port & 255); | 3925 | clp->cl_ipaddr, port >> 8, port & 255); |
3926 | rcu_read_unlock(); | ||
3808 | 3927 | ||
3809 | status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); | 3928 | status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); |
3810 | if (status != -NFS4ERR_CLID_INUSE) | 3929 | if (status != -NFS4ERR_CLID_INUSE) |
@@ -3891,7 +4010,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) | |||
3891 | 4010 | ||
3892 | if (nfs4_setup_sequence(d_data->res.server, | 4011 | if (nfs4_setup_sequence(d_data->res.server, |
3893 | &d_data->args.seq_args, | 4012 | &d_data->args.seq_args, |
3894 | &d_data->res.seq_res, 1, task)) | 4013 | &d_data->res.seq_res, task)) |
3895 | return; | 4014 | return; |
3896 | rpc_call_start(task); | 4015 | rpc_call_start(task); |
3897 | } | 4016 | } |
@@ -3925,11 +4044,12 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co | |||
3925 | data = kzalloc(sizeof(*data), GFP_NOFS); | 4044 | data = kzalloc(sizeof(*data), GFP_NOFS); |
3926 | if (data == NULL) | 4045 | if (data == NULL) |
3927 | return -ENOMEM; | 4046 | return -ENOMEM; |
4047 | nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); | ||
3928 | data->args.fhandle = &data->fh; | 4048 | data->args.fhandle = &data->fh; |
3929 | data->args.stateid = &data->stateid; | 4049 | data->args.stateid = &data->stateid; |
3930 | data->args.bitmask = server->attr_bitmask; | 4050 | data->args.bitmask = server->attr_bitmask; |
3931 | nfs_copy_fh(&data->fh, NFS_FH(inode)); | 4051 | nfs_copy_fh(&data->fh, NFS_FH(inode)); |
3932 | memcpy(&data->stateid, stateid, sizeof(data->stateid)); | 4052 | nfs4_stateid_copy(&data->stateid, stateid); |
3933 | data->res.fattr = &data->fattr; | 4053 | data->res.fattr = &data->fattr; |
3934 | data->res.server = server; | 4054 | data->res.server = server; |
3935 | nfs_fattr_init(data->res.fattr); | 4055 | nfs_fattr_init(data->res.fattr); |
@@ -4016,7 +4136,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock | |||
4016 | if (status != 0) | 4136 | if (status != 0) |
4017 | goto out; | 4137 | goto out; |
4018 | lsp = request->fl_u.nfs4_fl.owner; | 4138 | lsp = request->fl_u.nfs4_fl.owner; |
4019 | arg.lock_owner.id = lsp->ls_id.id; | 4139 | arg.lock_owner.id = lsp->ls_seqid.owner_id; |
4020 | arg.lock_owner.s_dev = server->s_dev; | 4140 | arg.lock_owner.s_dev = server->s_dev; |
4021 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); | 4141 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
4022 | switch (status) { | 4142 | switch (status) { |
@@ -4112,9 +4232,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) | |||
4112 | return; | 4232 | return; |
4113 | switch (task->tk_status) { | 4233 | switch (task->tk_status) { |
4114 | case 0: | 4234 | case 0: |
4115 | memcpy(calldata->lsp->ls_stateid.data, | 4235 | nfs4_stateid_copy(&calldata->lsp->ls_stateid, |
4116 | calldata->res.stateid.data, | 4236 | &calldata->res.stateid); |
4117 | sizeof(calldata->lsp->ls_stateid.data)); | ||
4118 | renew_lease(calldata->server, calldata->timestamp); | 4237 | renew_lease(calldata->server, calldata->timestamp); |
4119 | break; | 4238 | break; |
4120 | case -NFS4ERR_BAD_STATEID: | 4239 | case -NFS4ERR_BAD_STATEID: |
@@ -4142,7 +4261,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) | |||
4142 | calldata->timestamp = jiffies; | 4261 | calldata->timestamp = jiffies; |
4143 | if (nfs4_setup_sequence(calldata->server, | 4262 | if (nfs4_setup_sequence(calldata->server, |
4144 | &calldata->arg.seq_args, | 4263 | &calldata->arg.seq_args, |
4145 | &calldata->res.seq_res, 1, task)) | 4264 | &calldata->res.seq_res, task)) |
4146 | return; | 4265 | return; |
4147 | rpc_call_start(task); | 4266 | rpc_call_start(task); |
4148 | } | 4267 | } |
@@ -4182,6 +4301,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, | |||
4182 | return ERR_PTR(-ENOMEM); | 4301 | return ERR_PTR(-ENOMEM); |
4183 | } | 4302 | } |
4184 | 4303 | ||
4304 | nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); | ||
4185 | msg.rpc_argp = &data->arg; | 4305 | msg.rpc_argp = &data->arg; |
4186 | msg.rpc_resp = &data->res; | 4306 | msg.rpc_resp = &data->res; |
4187 | task_setup_data.callback_data = data; | 4307 | task_setup_data.callback_data = data; |
@@ -4261,7 +4381,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, | |||
4261 | goto out_free_seqid; | 4381 | goto out_free_seqid; |
4262 | p->arg.lock_stateid = &lsp->ls_stateid; | 4382 | p->arg.lock_stateid = &lsp->ls_stateid; |
4263 | p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; | 4383 | p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; |
4264 | p->arg.lock_owner.id = lsp->ls_id.id; | 4384 | p->arg.lock_owner.id = lsp->ls_seqid.owner_id; |
4265 | p->arg.lock_owner.s_dev = server->s_dev; | 4385 | p->arg.lock_owner.s_dev = server->s_dev; |
4266 | p->res.lock_seqid = p->arg.lock_seqid; | 4386 | p->res.lock_seqid = p->arg.lock_seqid; |
4267 | p->lsp = lsp; | 4387 | p->lsp = lsp; |
@@ -4297,7 +4417,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) | |||
4297 | data->timestamp = jiffies; | 4417 | data->timestamp = jiffies; |
4298 | if (nfs4_setup_sequence(data->server, | 4418 | if (nfs4_setup_sequence(data->server, |
4299 | &data->arg.seq_args, | 4419 | &data->arg.seq_args, |
4300 | &data->res.seq_res, 1, task)) | 4420 | &data->res.seq_res, task)) |
4301 | return; | 4421 | return; |
4302 | rpc_call_start(task); | 4422 | rpc_call_start(task); |
4303 | dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); | 4423 | dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); |
@@ -4326,8 +4446,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) | |||
4326 | goto out; | 4446 | goto out; |
4327 | } | 4447 | } |
4328 | if (data->rpc_status == 0) { | 4448 | if (data->rpc_status == 0) { |
4329 | memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, | 4449 | nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid); |
4330 | sizeof(data->lsp->ls_stateid.data)); | ||
4331 | data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; | 4450 | data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; |
4332 | renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); | 4451 | renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); |
4333 | } | 4452 | } |
@@ -4415,6 +4534,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f | |||
4415 | data->arg.reclaim = NFS_LOCK_RECLAIM; | 4534 | data->arg.reclaim = NFS_LOCK_RECLAIM; |
4416 | task_setup_data.callback_ops = &nfs4_recover_lock_ops; | 4535 | task_setup_data.callback_ops = &nfs4_recover_lock_ops; |
4417 | } | 4536 | } |
4537 | nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); | ||
4418 | msg.rpc_argp = &data->arg; | 4538 | msg.rpc_argp = &data->arg; |
4419 | msg.rpc_resp = &data->res; | 4539 | msg.rpc_resp = &data->res; |
4420 | task_setup_data.callback_data = data; | 4540 | task_setup_data.callback_data = data; |
@@ -4479,15 +4599,34 @@ out: | |||
4479 | } | 4599 | } |
4480 | 4600 | ||
4481 | #if defined(CONFIG_NFS_V4_1) | 4601 | #if defined(CONFIG_NFS_V4_1) |
4482 | static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) | 4602 | static int nfs41_check_expired_locks(struct nfs4_state *state) |
4483 | { | 4603 | { |
4484 | int status; | 4604 | int status, ret = NFS_OK; |
4605 | struct nfs4_lock_state *lsp; | ||
4485 | struct nfs_server *server = NFS_SERVER(state->inode); | 4606 | struct nfs_server *server = NFS_SERVER(state->inode); |
4486 | 4607 | ||
4487 | status = nfs41_test_stateid(server, state); | 4608 | list_for_each_entry(lsp, &state->lock_states, ls_locks) { |
4609 | if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { | ||
4610 | status = nfs41_test_stateid(server, &lsp->ls_stateid); | ||
4611 | if (status != NFS_OK) { | ||
4612 | nfs41_free_stateid(server, &lsp->ls_stateid); | ||
4613 | lsp->ls_flags &= ~NFS_LOCK_INITIALIZED; | ||
4614 | ret = status; | ||
4615 | } | ||
4616 | } | ||
4617 | }; | ||
4618 | |||
4619 | return ret; | ||
4620 | } | ||
4621 | |||
4622 | static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) | ||
4623 | { | ||
4624 | int status = NFS_OK; | ||
4625 | |||
4626 | if (test_bit(LK_STATE_IN_USE, &state->flags)) | ||
4627 | status = nfs41_check_expired_locks(state); | ||
4488 | if (status == NFS_OK) | 4628 | if (status == NFS_OK) |
4489 | return 0; | 4629 | return status; |
4490 | nfs41_free_stateid(server, state); | ||
4491 | return nfs4_lock_expired(state, request); | 4630 | return nfs4_lock_expired(state, request); |
4492 | } | 4631 | } |
4493 | #endif | 4632 | #endif |
@@ -4523,7 +4662,8 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock | |||
4523 | /* Note: we always want to sleep here! */ | 4662 | /* Note: we always want to sleep here! */ |
4524 | request->fl_flags = fl_flags | FL_SLEEP; | 4663 | request->fl_flags = fl_flags | FL_SLEEP; |
4525 | if (do_vfs_lock(request->fl_file, request) < 0) | 4664 | if (do_vfs_lock(request->fl_file, request) < 0) |
4526 | printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); | 4665 | printk(KERN_WARNING "NFS: %s: VFS is out of sync with lock " |
4666 | "manager!\n", __func__); | ||
4527 | out_unlock: | 4667 | out_unlock: |
4528 | up_read(&nfsi->rwsem); | 4668 | up_read(&nfsi->rwsem); |
4529 | out: | 4669 | out: |
@@ -4533,7 +4673,9 @@ out: | |||
4533 | 4673 | ||
4534 | static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) | 4674 | static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) |
4535 | { | 4675 | { |
4536 | struct nfs4_exception exception = { }; | 4676 | struct nfs4_exception exception = { |
4677 | .state = state, | ||
4678 | }; | ||
4537 | int err; | 4679 | int err; |
4538 | 4680 | ||
4539 | do { | 4681 | do { |
@@ -4603,8 +4745,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) | |||
4603 | err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); | 4745 | err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); |
4604 | switch (err) { | 4746 | switch (err) { |
4605 | default: | 4747 | default: |
4606 | printk(KERN_ERR "%s: unhandled error %d.\n", | 4748 | printk(KERN_ERR "NFS: %s: unhandled error " |
4607 | __func__, err); | 4749 | "%d.\n", __func__, err); |
4608 | case 0: | 4750 | case 0: |
4609 | case -ESTALE: | 4751 | case -ESTALE: |
4610 | goto out; | 4752 | goto out; |
@@ -4626,6 +4768,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) | |||
4626 | * The show must go on: exit, but mark the | 4768 | * The show must go on: exit, but mark the |
4627 | * stateid as needing recovery. | 4769 | * stateid as needing recovery. |
4628 | */ | 4770 | */ |
4771 | case -NFS4ERR_DELEG_REVOKED: | ||
4629 | case -NFS4ERR_ADMIN_REVOKED: | 4772 | case -NFS4ERR_ADMIN_REVOKED: |
4630 | case -NFS4ERR_BAD_STATEID: | 4773 | case -NFS4ERR_BAD_STATEID: |
4631 | case -NFS4ERR_OPENMODE: | 4774 | case -NFS4ERR_OPENMODE: |
@@ -4655,33 +4798,44 @@ out: | |||
4655 | return err; | 4798 | return err; |
4656 | } | 4799 | } |
4657 | 4800 | ||
4801 | struct nfs_release_lockowner_data { | ||
4802 | struct nfs4_lock_state *lsp; | ||
4803 | struct nfs_server *server; | ||
4804 | struct nfs_release_lockowner_args args; | ||
4805 | }; | ||
4806 | |||
4658 | static void nfs4_release_lockowner_release(void *calldata) | 4807 | static void nfs4_release_lockowner_release(void *calldata) |
4659 | { | 4808 | { |
4809 | struct nfs_release_lockowner_data *data = calldata; | ||
4810 | nfs4_free_lock_state(data->server, data->lsp); | ||
4660 | kfree(calldata); | 4811 | kfree(calldata); |
4661 | } | 4812 | } |
4662 | 4813 | ||
4663 | const struct rpc_call_ops nfs4_release_lockowner_ops = { | 4814 | static const struct rpc_call_ops nfs4_release_lockowner_ops = { |
4664 | .rpc_release = nfs4_release_lockowner_release, | 4815 | .rpc_release = nfs4_release_lockowner_release, |
4665 | }; | 4816 | }; |
4666 | 4817 | ||
4667 | void nfs4_release_lockowner(const struct nfs4_lock_state *lsp) | 4818 | int nfs4_release_lockowner(struct nfs4_lock_state *lsp) |
4668 | { | 4819 | { |
4669 | struct nfs_server *server = lsp->ls_state->owner->so_server; | 4820 | struct nfs_server *server = lsp->ls_state->owner->so_server; |
4670 | struct nfs_release_lockowner_args *args; | 4821 | struct nfs_release_lockowner_data *data; |
4671 | struct rpc_message msg = { | 4822 | struct rpc_message msg = { |
4672 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER], | 4823 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER], |
4673 | }; | 4824 | }; |
4674 | 4825 | ||
4675 | if (server->nfs_client->cl_mvops->minor_version != 0) | 4826 | if (server->nfs_client->cl_mvops->minor_version != 0) |
4676 | return; | 4827 | return -EINVAL; |
4677 | args = kmalloc(sizeof(*args), GFP_NOFS); | 4828 | data = kmalloc(sizeof(*data), GFP_NOFS); |
4678 | if (!args) | 4829 | if (!data) |
4679 | return; | 4830 | return -ENOMEM; |
4680 | args->lock_owner.clientid = server->nfs_client->cl_clientid; | 4831 | data->lsp = lsp; |
4681 | args->lock_owner.id = lsp->ls_id.id; | 4832 | data->server = server; |
4682 | args->lock_owner.s_dev = server->s_dev; | 4833 | data->args.lock_owner.clientid = server->nfs_client->cl_clientid; |
4683 | msg.rpc_argp = args; | 4834 | data->args.lock_owner.id = lsp->ls_seqid.owner_id; |
4684 | rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args); | 4835 | data->args.lock_owner.s_dev = server->s_dev; |
4836 | msg.rpc_argp = &data->args; | ||
4837 | rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); | ||
4838 | return 0; | ||
4685 | } | 4839 | } |
4686 | 4840 | ||
4687 | #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" | 4841 | #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" |
@@ -4727,11 +4881,11 @@ static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr) | |||
4727 | if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) || | 4881 | if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) || |
4728 | (fattr->valid & NFS_ATTR_FATTR_FILEID)) && | 4882 | (fattr->valid & NFS_ATTR_FATTR_FILEID)) && |
4729 | (fattr->valid & NFS_ATTR_FATTR_FSID) && | 4883 | (fattr->valid & NFS_ATTR_FATTR_FSID) && |
4730 | (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL))) | 4884 | (fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS))) |
4731 | return; | 4885 | return; |
4732 | 4886 | ||
4733 | fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | | 4887 | fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | |
4734 | NFS_ATTR_FATTR_NLINK; | 4888 | NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_V4_REFERRAL; |
4735 | fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO; | 4889 | fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO; |
4736 | fattr->nlink = 2; | 4890 | fattr->nlink = 2; |
4737 | } | 4891 | } |
@@ -4798,7 +4952,8 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct | |||
4798 | return status; | 4952 | return status; |
4799 | } | 4953 | } |
4800 | 4954 | ||
4801 | int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) | 4955 | static int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, |
4956 | struct nfs4_secinfo_flavors *flavors) | ||
4802 | { | 4957 | { |
4803 | struct nfs4_exception exception = { }; | 4958 | struct nfs4_exception exception = { }; |
4804 | int err; | 4959 | int err; |
@@ -4852,6 +5007,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) | |||
4852 | { | 5007 | { |
4853 | nfs4_verifier verifier; | 5008 | nfs4_verifier verifier; |
4854 | struct nfs41_exchange_id_args args = { | 5009 | struct nfs41_exchange_id_args args = { |
5010 | .verifier = &verifier, | ||
4855 | .client = clp, | 5011 | .client = clp, |
4856 | .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, | 5012 | .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, |
4857 | }; | 5013 | }; |
@@ -4865,15 +5021,11 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) | |||
4865 | .rpc_resp = &res, | 5021 | .rpc_resp = &res, |
4866 | .rpc_cred = cred, | 5022 | .rpc_cred = cred, |
4867 | }; | 5023 | }; |
4868 | __be32 *p; | ||
4869 | 5024 | ||
4870 | dprintk("--> %s\n", __func__); | 5025 | dprintk("--> %s\n", __func__); |
4871 | BUG_ON(clp == NULL); | 5026 | BUG_ON(clp == NULL); |
4872 | 5027 | ||
4873 | p = (u32 *)verifier.data; | 5028 | nfs4_construct_boot_verifier(clp, &verifier); |
4874 | *p++ = htonl((u32)clp->cl_boot_time.tv_sec); | ||
4875 | *p = htonl((u32)clp->cl_boot_time.tv_nsec); | ||
4876 | args.verifier = &verifier; | ||
4877 | 5029 | ||
4878 | args.id_len = scnprintf(args.id, sizeof(args.id), | 5030 | args.id_len = scnprintf(args.id, sizeof(args.id), |
4879 | "%s/%s.%s/%u", | 5031 | "%s/%s.%s/%u", |
@@ -4888,11 +5040,24 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) | |||
4888 | goto out; | 5040 | goto out; |
4889 | } | 5041 | } |
4890 | 5042 | ||
5043 | res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL); | ||
5044 | if (unlikely(!res.impl_id)) { | ||
5045 | status = -ENOMEM; | ||
5046 | goto out_server_scope; | ||
5047 | } | ||
5048 | |||
4891 | status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); | 5049 | status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); |
4892 | if (!status) | 5050 | if (!status) |
4893 | status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); | 5051 | status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); |
4894 | 5052 | ||
4895 | if (!status) { | 5053 | if (!status) { |
5054 | /* use the most recent implementation id */ | ||
5055 | kfree(clp->impl_id); | ||
5056 | clp->impl_id = res.impl_id; | ||
5057 | } else | ||
5058 | kfree(res.impl_id); | ||
5059 | |||
5060 | if (!status) { | ||
4896 | if (clp->server_scope && | 5061 | if (clp->server_scope && |
4897 | !nfs41_same_server_scope(clp->server_scope, | 5062 | !nfs41_same_server_scope(clp->server_scope, |
4898 | res.server_scope)) { | 5063 | res.server_scope)) { |
@@ -4908,8 +5073,16 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) | |||
4908 | goto out; | 5073 | goto out; |
4909 | } | 5074 | } |
4910 | } | 5075 | } |
5076 | |||
5077 | out_server_scope: | ||
4911 | kfree(res.server_scope); | 5078 | kfree(res.server_scope); |
4912 | out: | 5079 | out: |
5080 | if (clp->impl_id) | ||
5081 | dprintk("%s: Server Implementation ID: " | ||
5082 | "domain: %s, name: %s, date: %llu,%u\n", | ||
5083 | __func__, clp->impl_id->domain, clp->impl_id->name, | ||
5084 | clp->impl_id->date.seconds, | ||
5085 | clp->impl_id->date.nseconds); | ||
4913 | dprintk("<-- %s status= %d\n", __func__, status); | 5086 | dprintk("<-- %s status= %d\n", __func__, status); |
4914 | return status; | 5087 | return status; |
4915 | } | 5088 | } |
@@ -4933,7 +5106,7 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task, | |||
4933 | since we're invoked within one */ | 5106 | since we're invoked within one */ |
4934 | ret = nfs41_setup_sequence(data->clp->cl_session, | 5107 | ret = nfs41_setup_sequence(data->clp->cl_session, |
4935 | &data->args->la_seq_args, | 5108 | &data->args->la_seq_args, |
4936 | &data->res->lr_seq_res, 0, task); | 5109 | &data->res->lr_seq_res, task); |
4937 | 5110 | ||
4938 | BUG_ON(ret == -EAGAIN); | 5111 | BUG_ON(ret == -EAGAIN); |
4939 | rpc_call_start(task); | 5112 | rpc_call_start(task); |
@@ -4966,7 +5139,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) | |||
4966 | dprintk("<-- %s\n", __func__); | 5139 | dprintk("<-- %s\n", __func__); |
4967 | } | 5140 | } |
4968 | 5141 | ||
4969 | struct rpc_call_ops nfs4_get_lease_time_ops = { | 5142 | static const struct rpc_call_ops nfs4_get_lease_time_ops = { |
4970 | .rpc_call_prepare = nfs4_get_lease_time_prepare, | 5143 | .rpc_call_prepare = nfs4_get_lease_time_prepare, |
4971 | .rpc_call_done = nfs4_get_lease_time_done, | 5144 | .rpc_call_done = nfs4_get_lease_time_done, |
4972 | }; | 5145 | }; |
@@ -4997,6 +5170,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) | |||
4997 | }; | 5170 | }; |
4998 | int status; | 5171 | int status; |
4999 | 5172 | ||
5173 | nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); | ||
5000 | dprintk("--> %s\n", __func__); | 5174 | dprintk("--> %s\n", __func__); |
5001 | task = rpc_run_task(&task_setup); | 5175 | task = rpc_run_task(&task_setup); |
5002 | 5176 | ||
@@ -5113,13 +5287,13 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) | |||
5113 | return NULL; | 5287 | return NULL; |
5114 | 5288 | ||
5115 | tbl = &session->fc_slot_table; | 5289 | tbl = &session->fc_slot_table; |
5116 | tbl->highest_used_slotid = -1; | 5290 | tbl->highest_used_slotid = NFS4_NO_SLOT; |
5117 | spin_lock_init(&tbl->slot_tbl_lock); | 5291 | spin_lock_init(&tbl->slot_tbl_lock); |
5118 | rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); | 5292 | rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); |
5119 | init_completion(&tbl->complete); | 5293 | init_completion(&tbl->complete); |
5120 | 5294 | ||
5121 | tbl = &session->bc_slot_table; | 5295 | tbl = &session->bc_slot_table; |
5122 | tbl->highest_used_slotid = -1; | 5296 | tbl->highest_used_slotid = NFS4_NO_SLOT; |
5123 | spin_lock_init(&tbl->slot_tbl_lock); | 5297 | spin_lock_init(&tbl->slot_tbl_lock); |
5124 | rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); | 5298 | rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); |
5125 | init_completion(&tbl->complete); | 5299 | init_completion(&tbl->complete); |
@@ -5132,11 +5306,16 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) | |||
5132 | 5306 | ||
5133 | void nfs4_destroy_session(struct nfs4_session *session) | 5307 | void nfs4_destroy_session(struct nfs4_session *session) |
5134 | { | 5308 | { |
5309 | struct rpc_xprt *xprt; | ||
5310 | |||
5135 | nfs4_proc_destroy_session(session); | 5311 | nfs4_proc_destroy_session(session); |
5312 | |||
5313 | rcu_read_lock(); | ||
5314 | xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); | ||
5315 | rcu_read_unlock(); | ||
5136 | dprintk("%s Destroy backchannel for xprt %p\n", | 5316 | dprintk("%s Destroy backchannel for xprt %p\n", |
5137 | __func__, session->clp->cl_rpcclient->cl_xprt); | 5317 | __func__, xprt); |
5138 | xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt, | 5318 | xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); |
5139 | NFS41_BC_MIN_CALLBACKS); | ||
5140 | nfs4_destroy_slot_tables(session); | 5319 | nfs4_destroy_slot_tables(session); |
5141 | kfree(session); | 5320 | kfree(session); |
5142 | } | 5321 | } |
@@ -5164,7 +5343,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) | |||
5164 | args->fc_attrs.max_rqst_sz = mxrqst_sz; | 5343 | args->fc_attrs.max_rqst_sz = mxrqst_sz; |
5165 | args->fc_attrs.max_resp_sz = mxresp_sz; | 5344 | args->fc_attrs.max_resp_sz = mxresp_sz; |
5166 | args->fc_attrs.max_ops = NFS4_MAX_OPS; | 5345 | args->fc_attrs.max_ops = NFS4_MAX_OPS; |
5167 | args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs; | 5346 | args->fc_attrs.max_reqs = max_session_slots; |
5168 | 5347 | ||
5169 | dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u " | 5348 | dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u " |
5170 | "max_ops=%u max_reqs=%u\n", | 5349 | "max_ops=%u max_reqs=%u\n", |
@@ -5204,6 +5383,8 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args | |||
5204 | return -EINVAL; | 5383 | return -EINVAL; |
5205 | if (rcvd->max_reqs == 0) | 5384 | if (rcvd->max_reqs == 0) |
5206 | return -EINVAL; | 5385 | return -EINVAL; |
5386 | if (rcvd->max_reqs > NFS4_MAX_SLOT_TABLE) | ||
5387 | rcvd->max_reqs = NFS4_MAX_SLOT_TABLE; | ||
5207 | return 0; | 5388 | return 0; |
5208 | } | 5389 | } |
5209 | 5390 | ||
@@ -5219,9 +5400,9 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args | |||
5219 | if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) | 5400 | if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) |
5220 | return -EINVAL; | 5401 | return -EINVAL; |
5221 | /* These would render the backchannel useless: */ | 5402 | /* These would render the backchannel useless: */ |
5222 | if (rcvd->max_ops == 0) | 5403 | if (rcvd->max_ops != sent->max_ops) |
5223 | return -EINVAL; | 5404 | return -EINVAL; |
5224 | if (rcvd->max_reqs == 0) | 5405 | if (rcvd->max_reqs != sent->max_reqs) |
5225 | return -EINVAL; | 5406 | return -EINVAL; |
5226 | return 0; | 5407 | return 0; |
5227 | } | 5408 | } |
@@ -5324,7 +5505,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session) | |||
5324 | 5505 | ||
5325 | if (status) | 5506 | if (status) |
5326 | printk(KERN_WARNING | 5507 | printk(KERN_WARNING |
5327 | "Got error %d from the server on DESTROY_SESSION. " | 5508 | "NFS: Got error %d from the server on DESTROY_SESSION. " |
5328 | "Session has been destroyed regardless...\n", status); | 5509 | "Session has been destroyed regardless...\n", status); |
5329 | 5510 | ||
5330 | dprintk("<-- nfs4_proc_destroy_session\n"); | 5511 | dprintk("<-- nfs4_proc_destroy_session\n"); |
@@ -5447,7 +5628,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) | |||
5447 | args = task->tk_msg.rpc_argp; | 5628 | args = task->tk_msg.rpc_argp; |
5448 | res = task->tk_msg.rpc_resp; | 5629 | res = task->tk_msg.rpc_resp; |
5449 | 5630 | ||
5450 | if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task)) | 5631 | if (nfs41_setup_sequence(clp->cl_session, args, res, task)) |
5451 | return; | 5632 | return; |
5452 | rpc_call_start(task); | 5633 | rpc_call_start(task); |
5453 | } | 5634 | } |
@@ -5479,6 +5660,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ | |||
5479 | nfs_put_client(clp); | 5660 | nfs_put_client(clp); |
5480 | return ERR_PTR(-ENOMEM); | 5661 | return ERR_PTR(-ENOMEM); |
5481 | } | 5662 | } |
5663 | nfs41_init_sequence(&calldata->args, &calldata->res, 0); | ||
5482 | msg.rpc_argp = &calldata->args; | 5664 | msg.rpc_argp = &calldata->args; |
5483 | msg.rpc_resp = &calldata->res; | 5665 | msg.rpc_resp = &calldata->res; |
5484 | calldata->clp = clp; | 5666 | calldata->clp = clp; |
@@ -5540,7 +5722,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data) | |||
5540 | rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); | 5722 | rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); |
5541 | if (nfs41_setup_sequence(calldata->clp->cl_session, | 5723 | if (nfs41_setup_sequence(calldata->clp->cl_session, |
5542 | &calldata->arg.seq_args, | 5724 | &calldata->arg.seq_args, |
5543 | &calldata->res.seq_res, 0, task)) | 5725 | &calldata->res.seq_res, task)) |
5544 | return; | 5726 | return; |
5545 | 5727 | ||
5546 | rpc_call_start(task); | 5728 | rpc_call_start(task); |
@@ -5619,6 +5801,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) | |||
5619 | calldata->clp = clp; | 5801 | calldata->clp = clp; |
5620 | calldata->arg.one_fs = 0; | 5802 | calldata->arg.one_fs = 0; |
5621 | 5803 | ||
5804 | nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); | ||
5622 | msg.rpc_argp = &calldata->arg; | 5805 | msg.rpc_argp = &calldata->arg; |
5623 | msg.rpc_resp = &calldata->res; | 5806 | msg.rpc_resp = &calldata->res; |
5624 | task_setup_data.callback_data = calldata; | 5807 | task_setup_data.callback_data = calldata; |
@@ -5650,7 +5833,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) | |||
5650 | * to be no way to prevent it completely. | 5833 | * to be no way to prevent it completely. |
5651 | */ | 5834 | */ |
5652 | if (nfs4_setup_sequence(server, &lgp->args.seq_args, | 5835 | if (nfs4_setup_sequence(server, &lgp->args.seq_args, |
5653 | &lgp->res.seq_res, 0, task)) | 5836 | &lgp->res.seq_res, task)) |
5654 | return; | 5837 | return; |
5655 | if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, | 5838 | if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, |
5656 | NFS_I(lgp->args.inode)->layout, | 5839 | NFS_I(lgp->args.inode)->layout, |
@@ -5725,6 +5908,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) | |||
5725 | 5908 | ||
5726 | lgp->res.layoutp = &lgp->args.layout; | 5909 | lgp->res.layoutp = &lgp->args.layout; |
5727 | lgp->res.seq_res.sr_slot = NULL; | 5910 | lgp->res.seq_res.sr_slot = NULL; |
5911 | nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); | ||
5728 | task = rpc_run_task(&task_setup_data); | 5912 | task = rpc_run_task(&task_setup_data); |
5729 | if (IS_ERR(task)) | 5913 | if (IS_ERR(task)) |
5730 | return PTR_ERR(task); | 5914 | return PTR_ERR(task); |
@@ -5745,7 +5929,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) | |||
5745 | 5929 | ||
5746 | dprintk("--> %s\n", __func__); | 5930 | dprintk("--> %s\n", __func__); |
5747 | if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, | 5931 | if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, |
5748 | &lrp->res.seq_res, 0, task)) | 5932 | &lrp->res.seq_res, task)) |
5749 | return; | 5933 | return; |
5750 | rpc_call_start(task); | 5934 | rpc_call_start(task); |
5751 | } | 5935 | } |
@@ -5811,6 +5995,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | |||
5811 | int status; | 5995 | int status; |
5812 | 5996 | ||
5813 | dprintk("--> %s\n", __func__); | 5997 | dprintk("--> %s\n", __func__); |
5998 | nfs41_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); | ||
5814 | task = rpc_run_task(&task_setup_data); | 5999 | task = rpc_run_task(&task_setup_data); |
5815 | if (IS_ERR(task)) | 6000 | if (IS_ERR(task)) |
5816 | return PTR_ERR(task); | 6001 | return PTR_ERR(task); |
@@ -5911,7 +6096,7 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata) | |||
5911 | struct nfs_server *server = NFS_SERVER(data->args.inode); | 6096 | struct nfs_server *server = NFS_SERVER(data->args.inode); |
5912 | 6097 | ||
5913 | if (nfs4_setup_sequence(server, &data->args.seq_args, | 6098 | if (nfs4_setup_sequence(server, &data->args.seq_args, |
5914 | &data->res.seq_res, 1, task)) | 6099 | &data->res.seq_res, task)) |
5915 | return; | 6100 | return; |
5916 | rpc_call_start(task); | 6101 | rpc_call_start(task); |
5917 | } | 6102 | } |
@@ -5998,6 +6183,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) | |||
5998 | data->args.lastbytewritten, | 6183 | data->args.lastbytewritten, |
5999 | data->args.inode->i_ino); | 6184 | data->args.inode->i_ino); |
6000 | 6185 | ||
6186 | nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); | ||
6001 | task = rpc_run_task(&task_setup_data); | 6187 | task = rpc_run_task(&task_setup_data); |
6002 | if (IS_ERR(task)) | 6188 | if (IS_ERR(task)) |
6003 | return PTR_ERR(task); | 6189 | return PTR_ERR(task); |
@@ -6091,11 +6277,12 @@ out_freepage: | |||
6091 | out: | 6277 | out: |
6092 | return err; | 6278 | return err; |
6093 | } | 6279 | } |
6094 | static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) | 6280 | |
6281 | static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) | ||
6095 | { | 6282 | { |
6096 | int status; | 6283 | int status; |
6097 | struct nfs41_test_stateid_args args = { | 6284 | struct nfs41_test_stateid_args args = { |
6098 | .stateid = &state->stateid, | 6285 | .stateid = stateid, |
6099 | }; | 6286 | }; |
6100 | struct nfs41_test_stateid_res res; | 6287 | struct nfs41_test_stateid_res res; |
6101 | struct rpc_message msg = { | 6288 | struct rpc_message msg = { |
@@ -6103,28 +6290,31 @@ static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *sta | |||
6103 | .rpc_argp = &args, | 6290 | .rpc_argp = &args, |
6104 | .rpc_resp = &res, | 6291 | .rpc_resp = &res, |
6105 | }; | 6292 | }; |
6106 | args.seq_args.sa_session = res.seq_res.sr_session = NULL; | 6293 | |
6107 | status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); | 6294 | nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); |
6295 | status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); | ||
6296 | |||
6297 | if (status == NFS_OK) | ||
6298 | return res.status; | ||
6108 | return status; | 6299 | return status; |
6109 | } | 6300 | } |
6110 | 6301 | ||
6111 | static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) | 6302 | static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) |
6112 | { | 6303 | { |
6113 | struct nfs4_exception exception = { }; | 6304 | struct nfs4_exception exception = { }; |
6114 | int err; | 6305 | int err; |
6115 | do { | 6306 | do { |
6116 | err = nfs4_handle_exception(server, | 6307 | err = nfs4_handle_exception(server, |
6117 | _nfs41_test_stateid(server, state), | 6308 | _nfs41_test_stateid(server, stateid), |
6118 | &exception); | 6309 | &exception); |
6119 | } while (exception.retry); | 6310 | } while (exception.retry); |
6120 | return err; | 6311 | return err; |
6121 | } | 6312 | } |
6122 | 6313 | ||
6123 | static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) | 6314 | static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) |
6124 | { | 6315 | { |
6125 | int status; | ||
6126 | struct nfs41_free_stateid_args args = { | 6316 | struct nfs41_free_stateid_args args = { |
6127 | .stateid = &state->stateid, | 6317 | .stateid = stateid, |
6128 | }; | 6318 | }; |
6129 | struct nfs41_free_stateid_res res; | 6319 | struct nfs41_free_stateid_res res; |
6130 | struct rpc_message msg = { | 6320 | struct rpc_message msg = { |
@@ -6133,25 +6323,46 @@ static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *stat | |||
6133 | .rpc_resp = &res, | 6323 | .rpc_resp = &res, |
6134 | }; | 6324 | }; |
6135 | 6325 | ||
6136 | args.seq_args.sa_session = res.seq_res.sr_session = NULL; | 6326 | nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); |
6137 | status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); | 6327 | return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); |
6138 | return status; | ||
6139 | } | 6328 | } |
6140 | 6329 | ||
6141 | static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) | 6330 | static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) |
6142 | { | 6331 | { |
6143 | struct nfs4_exception exception = { }; | 6332 | struct nfs4_exception exception = { }; |
6144 | int err; | 6333 | int err; |
6145 | do { | 6334 | do { |
6146 | err = nfs4_handle_exception(server, | 6335 | err = nfs4_handle_exception(server, |
6147 | _nfs4_free_stateid(server, state), | 6336 | _nfs4_free_stateid(server, stateid), |
6148 | &exception); | 6337 | &exception); |
6149 | } while (exception.retry); | 6338 | } while (exception.retry); |
6150 | return err; | 6339 | return err; |
6151 | } | 6340 | } |
6341 | |||
6342 | static bool nfs41_match_stateid(const nfs4_stateid *s1, | ||
6343 | const nfs4_stateid *s2) | ||
6344 | { | ||
6345 | if (memcmp(s1->other, s2->other, sizeof(s1->other)) != 0) | ||
6346 | return false; | ||
6347 | |||
6348 | if (s1->seqid == s2->seqid) | ||
6349 | return true; | ||
6350 | if (s1->seqid == 0 || s2->seqid == 0) | ||
6351 | return true; | ||
6352 | |||
6353 | return false; | ||
6354 | } | ||
6355 | |||
6152 | #endif /* CONFIG_NFS_V4_1 */ | 6356 | #endif /* CONFIG_NFS_V4_1 */ |
6153 | 6357 | ||
6154 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { | 6358 | static bool nfs4_match_stateid(const nfs4_stateid *s1, |
6359 | const nfs4_stateid *s2) | ||
6360 | { | ||
6361 | return nfs4_stateid_match(s1, s2); | ||
6362 | } | ||
6363 | |||
6364 | |||
6365 | static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { | ||
6155 | .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, | 6366 | .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, |
6156 | .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, | 6367 | .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, |
6157 | .recover_open = nfs4_open_reclaim, | 6368 | .recover_open = nfs4_open_reclaim, |
@@ -6161,7 +6372,7 @@ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { | |||
6161 | }; | 6372 | }; |
6162 | 6373 | ||
6163 | #if defined(CONFIG_NFS_V4_1) | 6374 | #if defined(CONFIG_NFS_V4_1) |
6164 | struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { | 6375 | static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { |
6165 | .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, | 6376 | .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, |
6166 | .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, | 6377 | .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, |
6167 | .recover_open = nfs4_open_reclaim, | 6378 | .recover_open = nfs4_open_reclaim, |
@@ -6172,7 +6383,7 @@ struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { | |||
6172 | }; | 6383 | }; |
6173 | #endif /* CONFIG_NFS_V4_1 */ | 6384 | #endif /* CONFIG_NFS_V4_1 */ |
6174 | 6385 | ||
6175 | struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { | 6386 | static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { |
6176 | .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, | 6387 | .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, |
6177 | .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, | 6388 | .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, |
6178 | .recover_open = nfs4_open_expired, | 6389 | .recover_open = nfs4_open_expired, |
@@ -6182,7 +6393,7 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { | |||
6182 | }; | 6393 | }; |
6183 | 6394 | ||
6184 | #if defined(CONFIG_NFS_V4_1) | 6395 | #if defined(CONFIG_NFS_V4_1) |
6185 | struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { | 6396 | static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { |
6186 | .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, | 6397 | .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, |
6187 | .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, | 6398 | .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, |
6188 | .recover_open = nfs41_open_expired, | 6399 | .recover_open = nfs41_open_expired, |
@@ -6192,14 +6403,14 @@ struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { | |||
6192 | }; | 6403 | }; |
6193 | #endif /* CONFIG_NFS_V4_1 */ | 6404 | #endif /* CONFIG_NFS_V4_1 */ |
6194 | 6405 | ||
6195 | struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { | 6406 | static const struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { |
6196 | .sched_state_renewal = nfs4_proc_async_renew, | 6407 | .sched_state_renewal = nfs4_proc_async_renew, |
6197 | .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked, | 6408 | .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked, |
6198 | .renew_lease = nfs4_proc_renew, | 6409 | .renew_lease = nfs4_proc_renew, |
6199 | }; | 6410 | }; |
6200 | 6411 | ||
6201 | #if defined(CONFIG_NFS_V4_1) | 6412 | #if defined(CONFIG_NFS_V4_1) |
6202 | struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { | 6413 | static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { |
6203 | .sched_state_renewal = nfs41_proc_async_sequence, | 6414 | .sched_state_renewal = nfs41_proc_async_sequence, |
6204 | .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked, | 6415 | .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked, |
6205 | .renew_lease = nfs4_proc_sequence, | 6416 | .renew_lease = nfs4_proc_sequence, |
@@ -6209,7 +6420,7 @@ struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { | |||
6209 | static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { | 6420 | static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { |
6210 | .minor_version = 0, | 6421 | .minor_version = 0, |
6211 | .call_sync = _nfs4_call_sync, | 6422 | .call_sync = _nfs4_call_sync, |
6212 | .validate_stateid = nfs4_validate_delegation_stateid, | 6423 | .match_stateid = nfs4_match_stateid, |
6213 | .find_root_sec = nfs4_find_root_sec, | 6424 | .find_root_sec = nfs4_find_root_sec, |
6214 | .reboot_recovery_ops = &nfs40_reboot_recovery_ops, | 6425 | .reboot_recovery_ops = &nfs40_reboot_recovery_ops, |
6215 | .nograce_recovery_ops = &nfs40_nograce_recovery_ops, | 6426 | .nograce_recovery_ops = &nfs40_nograce_recovery_ops, |
@@ -6220,7 +6431,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { | |||
6220 | static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | 6431 | static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { |
6221 | .minor_version = 1, | 6432 | .minor_version = 1, |
6222 | .call_sync = _nfs4_call_sync_session, | 6433 | .call_sync = _nfs4_call_sync_session, |
6223 | .validate_stateid = nfs41_validate_delegation_stateid, | 6434 | .match_stateid = nfs41_match_stateid, |
6224 | .find_root_sec = nfs41_find_root_sec, | 6435 | .find_root_sec = nfs41_find_root_sec, |
6225 | .reboot_recovery_ops = &nfs41_reboot_recovery_ops, | 6436 | .reboot_recovery_ops = &nfs41_reboot_recovery_ops, |
6226 | .nograce_recovery_ops = &nfs41_nograce_recovery_ops, | 6437 | .nograce_recovery_ops = &nfs41_nograce_recovery_ops, |
@@ -6260,9 +6471,11 @@ const struct nfs_rpc_ops nfs_v4_clientops = { | |||
6260 | .create = nfs4_proc_create, | 6471 | .create = nfs4_proc_create, |
6261 | .remove = nfs4_proc_remove, | 6472 | .remove = nfs4_proc_remove, |
6262 | .unlink_setup = nfs4_proc_unlink_setup, | 6473 | .unlink_setup = nfs4_proc_unlink_setup, |
6474 | .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare, | ||
6263 | .unlink_done = nfs4_proc_unlink_done, | 6475 | .unlink_done = nfs4_proc_unlink_done, |
6264 | .rename = nfs4_proc_rename, | 6476 | .rename = nfs4_proc_rename, |
6265 | .rename_setup = nfs4_proc_rename_setup, | 6477 | .rename_setup = nfs4_proc_rename_setup, |
6478 | .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare, | ||
6266 | .rename_done = nfs4_proc_rename_done, | 6479 | .rename_done = nfs4_proc_rename_done, |
6267 | .link = nfs4_proc_link, | 6480 | .link = nfs4_proc_link, |
6268 | .symlink = nfs4_proc_symlink, | 6481 | .symlink = nfs4_proc_symlink, |
@@ -6276,8 +6489,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = { | |||
6276 | .set_capabilities = nfs4_server_capabilities, | 6489 | .set_capabilities = nfs4_server_capabilities, |
6277 | .decode_dirent = nfs4_decode_dirent, | 6490 | .decode_dirent = nfs4_decode_dirent, |
6278 | .read_setup = nfs4_proc_read_setup, | 6491 | .read_setup = nfs4_proc_read_setup, |
6492 | .read_rpc_prepare = nfs4_proc_read_rpc_prepare, | ||
6279 | .read_done = nfs4_read_done, | 6493 | .read_done = nfs4_read_done, |
6280 | .write_setup = nfs4_proc_write_setup, | 6494 | .write_setup = nfs4_proc_write_setup, |
6495 | .write_rpc_prepare = nfs4_proc_write_rpc_prepare, | ||
6281 | .write_done = nfs4_write_done, | 6496 | .write_done = nfs4_write_done, |
6282 | .commit_setup = nfs4_proc_commit_setup, | 6497 | .commit_setup = nfs4_proc_commit_setup, |
6283 | .commit_done = nfs4_commit_done, | 6498 | .commit_done = nfs4_commit_done, |
@@ -6301,6 +6516,10 @@ const struct xattr_handler *nfs4_xattr_handlers[] = { | |||
6301 | NULL | 6516 | NULL |
6302 | }; | 6517 | }; |
6303 | 6518 | ||
6519 | module_param(max_session_slots, ushort, 0644); | ||
6520 | MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " | ||
6521 | "requests the client will negotiate"); | ||
6522 | |||
6304 | /* | 6523 | /* |
6305 | * Local variables: | 6524 | * Local variables: |
6306 | * c-basic-offset: 8 | 6525 | * c-basic-offset: 8 |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 45392032e7bd..0f43414eb25a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -146,6 +146,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) | |||
146 | struct rpc_cred *cred = NULL; | 146 | struct rpc_cred *cred = NULL; |
147 | struct nfs_server *server; | 147 | struct nfs_server *server; |
148 | 148 | ||
149 | /* Use machine credentials if available */ | ||
150 | cred = nfs4_get_machine_cred_locked(clp); | ||
151 | if (cred != NULL) | ||
152 | goto out; | ||
153 | |||
149 | rcu_read_lock(); | 154 | rcu_read_lock(); |
150 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | 155 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { |
151 | cred = nfs4_get_renew_cred_server_locked(server); | 156 | cred = nfs4_get_renew_cred_server_locked(server); |
@@ -153,6 +158,8 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) | |||
153 | break; | 158 | break; |
154 | } | 159 | } |
155 | rcu_read_unlock(); | 160 | rcu_read_unlock(); |
161 | |||
162 | out: | ||
156 | return cred; | 163 | return cred; |
157 | } | 164 | } |
158 | 165 | ||
@@ -190,30 +197,29 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp) | |||
190 | static void nfs4_end_drain_session(struct nfs_client *clp) | 197 | static void nfs4_end_drain_session(struct nfs_client *clp) |
191 | { | 198 | { |
192 | struct nfs4_session *ses = clp->cl_session; | 199 | struct nfs4_session *ses = clp->cl_session; |
200 | struct nfs4_slot_table *tbl; | ||
193 | int max_slots; | 201 | int max_slots; |
194 | 202 | ||
195 | if (ses == NULL) | 203 | if (ses == NULL) |
196 | return; | 204 | return; |
205 | tbl = &ses->fc_slot_table; | ||
197 | if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { | 206 | if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { |
198 | spin_lock(&ses->fc_slot_table.slot_tbl_lock); | 207 | spin_lock(&tbl->slot_tbl_lock); |
199 | max_slots = ses->fc_slot_table.max_slots; | 208 | max_slots = tbl->max_slots; |
200 | while (max_slots--) { | 209 | while (max_slots--) { |
201 | struct rpc_task *task; | 210 | if (rpc_wake_up_first(&tbl->slot_tbl_waitq, |
202 | 211 | nfs4_set_task_privileged, | |
203 | task = rpc_wake_up_next(&ses->fc_slot_table. | 212 | NULL) == NULL) |
204 | slot_tbl_waitq); | ||
205 | if (!task) | ||
206 | break; | 213 | break; |
207 | rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); | ||
208 | } | 214 | } |
209 | spin_unlock(&ses->fc_slot_table.slot_tbl_lock); | 215 | spin_unlock(&tbl->slot_tbl_lock); |
210 | } | 216 | } |
211 | } | 217 | } |
212 | 218 | ||
213 | static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl) | 219 | static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl) |
214 | { | 220 | { |
215 | spin_lock(&tbl->slot_tbl_lock); | 221 | spin_lock(&tbl->slot_tbl_lock); |
216 | if (tbl->highest_used_slotid != -1) { | 222 | if (tbl->highest_used_slotid != NFS4_NO_SLOT) { |
217 | INIT_COMPLETION(tbl->complete); | 223 | INIT_COMPLETION(tbl->complete); |
218 | spin_unlock(&tbl->slot_tbl_lock); | 224 | spin_unlock(&tbl->slot_tbl_lock); |
219 | return wait_for_completion_interruptible(&tbl->complete); | 225 | return wait_for_completion_interruptible(&tbl->complete); |
@@ -317,62 +323,6 @@ out: | |||
317 | return cred; | 323 | return cred; |
318 | } | 324 | } |
319 | 325 | ||
320 | static void nfs_alloc_unique_id_locked(struct rb_root *root, | ||
321 | struct nfs_unique_id *new, | ||
322 | __u64 minval, int maxbits) | ||
323 | { | ||
324 | struct rb_node **p, *parent; | ||
325 | struct nfs_unique_id *pos; | ||
326 | __u64 mask = ~0ULL; | ||
327 | |||
328 | if (maxbits < 64) | ||
329 | mask = (1ULL << maxbits) - 1ULL; | ||
330 | |||
331 | /* Ensure distribution is more or less flat */ | ||
332 | get_random_bytes(&new->id, sizeof(new->id)); | ||
333 | new->id &= mask; | ||
334 | if (new->id < minval) | ||
335 | new->id += minval; | ||
336 | retry: | ||
337 | p = &root->rb_node; | ||
338 | parent = NULL; | ||
339 | |||
340 | while (*p != NULL) { | ||
341 | parent = *p; | ||
342 | pos = rb_entry(parent, struct nfs_unique_id, rb_node); | ||
343 | |||
344 | if (new->id < pos->id) | ||
345 | p = &(*p)->rb_left; | ||
346 | else if (new->id > pos->id) | ||
347 | p = &(*p)->rb_right; | ||
348 | else | ||
349 | goto id_exists; | ||
350 | } | ||
351 | rb_link_node(&new->rb_node, parent, p); | ||
352 | rb_insert_color(&new->rb_node, root); | ||
353 | return; | ||
354 | id_exists: | ||
355 | for (;;) { | ||
356 | new->id++; | ||
357 | if (new->id < minval || (new->id & mask) != new->id) { | ||
358 | new->id = minval; | ||
359 | break; | ||
360 | } | ||
361 | parent = rb_next(parent); | ||
362 | if (parent == NULL) | ||
363 | break; | ||
364 | pos = rb_entry(parent, struct nfs_unique_id, rb_node); | ||
365 | if (new->id < pos->id) | ||
366 | break; | ||
367 | } | ||
368 | goto retry; | ||
369 | } | ||
370 | |||
371 | static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id) | ||
372 | { | ||
373 | rb_erase(&id->rb_node, root); | ||
374 | } | ||
375 | |||
376 | static struct nfs4_state_owner * | 326 | static struct nfs4_state_owner * |
377 | nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred) | 327 | nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred) |
378 | { | 328 | { |
@@ -405,6 +355,7 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) | |||
405 | struct rb_node **p = &server->state_owners.rb_node, | 355 | struct rb_node **p = &server->state_owners.rb_node, |
406 | *parent = NULL; | 356 | *parent = NULL; |
407 | struct nfs4_state_owner *sp; | 357 | struct nfs4_state_owner *sp; |
358 | int err; | ||
408 | 359 | ||
409 | while (*p != NULL) { | 360 | while (*p != NULL) { |
410 | parent = *p; | 361 | parent = *p; |
@@ -421,8 +372,9 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) | |||
421 | return sp; | 372 | return sp; |
422 | } | 373 | } |
423 | } | 374 | } |
424 | nfs_alloc_unique_id_locked(&server->openowner_id, | 375 | err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id); |
425 | &new->so_owner_id, 1, 64); | 376 | if (err) |
377 | return ERR_PTR(err); | ||
426 | rb_link_node(&new->so_server_node, parent, p); | 378 | rb_link_node(&new->so_server_node, parent, p); |
427 | rb_insert_color(&new->so_server_node, &server->state_owners); | 379 | rb_insert_color(&new->so_server_node, &server->state_owners); |
428 | return new; | 380 | return new; |
@@ -435,7 +387,23 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) | |||
435 | 387 | ||
436 | if (!RB_EMPTY_NODE(&sp->so_server_node)) | 388 | if (!RB_EMPTY_NODE(&sp->so_server_node)) |
437 | rb_erase(&sp->so_server_node, &server->state_owners); | 389 | rb_erase(&sp->so_server_node, &server->state_owners); |
438 | nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id); | 390 | ida_remove(&server->openowner_id, sp->so_seqid.owner_id); |
391 | } | ||
392 | |||
393 | static void | ||
394 | nfs4_init_seqid_counter(struct nfs_seqid_counter *sc) | ||
395 | { | ||
396 | sc->flags = 0; | ||
397 | sc->counter = 0; | ||
398 | spin_lock_init(&sc->lock); | ||
399 | INIT_LIST_HEAD(&sc->list); | ||
400 | rpc_init_wait_queue(&sc->wait, "Seqid_waitqueue"); | ||
401 | } | ||
402 | |||
403 | static void | ||
404 | nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc) | ||
405 | { | ||
406 | rpc_destroy_wait_queue(&sc->wait); | ||
439 | } | 407 | } |
440 | 408 | ||
441 | /* | 409 | /* |
@@ -444,19 +412,20 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) | |||
444 | * | 412 | * |
445 | */ | 413 | */ |
446 | static struct nfs4_state_owner * | 414 | static struct nfs4_state_owner * |
447 | nfs4_alloc_state_owner(void) | 415 | nfs4_alloc_state_owner(struct nfs_server *server, |
416 | struct rpc_cred *cred, | ||
417 | gfp_t gfp_flags) | ||
448 | { | 418 | { |
449 | struct nfs4_state_owner *sp; | 419 | struct nfs4_state_owner *sp; |
450 | 420 | ||
451 | sp = kzalloc(sizeof(*sp),GFP_NOFS); | 421 | sp = kzalloc(sizeof(*sp), gfp_flags); |
452 | if (!sp) | 422 | if (!sp) |
453 | return NULL; | 423 | return NULL; |
424 | sp->so_server = server; | ||
425 | sp->so_cred = get_rpccred(cred); | ||
454 | spin_lock_init(&sp->so_lock); | 426 | spin_lock_init(&sp->so_lock); |
455 | INIT_LIST_HEAD(&sp->so_states); | 427 | INIT_LIST_HEAD(&sp->so_states); |
456 | rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); | 428 | nfs4_init_seqid_counter(&sp->so_seqid); |
457 | sp->so_seqid.sequence = &sp->so_sequence; | ||
458 | spin_lock_init(&sp->so_sequence.lock); | ||
459 | INIT_LIST_HEAD(&sp->so_sequence.list); | ||
460 | atomic_set(&sp->so_count, 1); | 429 | atomic_set(&sp->so_count, 1); |
461 | INIT_LIST_HEAD(&sp->so_lru); | 430 | INIT_LIST_HEAD(&sp->so_lru); |
462 | return sp; | 431 | return sp; |
@@ -478,7 +447,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp) | |||
478 | 447 | ||
479 | static void nfs4_free_state_owner(struct nfs4_state_owner *sp) | 448 | static void nfs4_free_state_owner(struct nfs4_state_owner *sp) |
480 | { | 449 | { |
481 | rpc_destroy_wait_queue(&sp->so_sequence.wait); | 450 | nfs4_destroy_seqid_counter(&sp->so_seqid); |
482 | put_rpccred(sp->so_cred); | 451 | put_rpccred(sp->so_cred); |
483 | kfree(sp); | 452 | kfree(sp); |
484 | } | 453 | } |
@@ -516,7 +485,8 @@ static void nfs4_gc_state_owners(struct nfs_server *server) | |||
516 | * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL. | 485 | * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL. |
517 | */ | 486 | */ |
518 | struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, | 487 | struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, |
519 | struct rpc_cred *cred) | 488 | struct rpc_cred *cred, |
489 | gfp_t gfp_flags) | ||
520 | { | 490 | { |
521 | struct nfs_client *clp = server->nfs_client; | 491 | struct nfs_client *clp = server->nfs_client; |
522 | struct nfs4_state_owner *sp, *new; | 492 | struct nfs4_state_owner *sp, *new; |
@@ -526,20 +496,18 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, | |||
526 | spin_unlock(&clp->cl_lock); | 496 | spin_unlock(&clp->cl_lock); |
527 | if (sp != NULL) | 497 | if (sp != NULL) |
528 | goto out; | 498 | goto out; |
529 | new = nfs4_alloc_state_owner(); | 499 | new = nfs4_alloc_state_owner(server, cred, gfp_flags); |
530 | if (new == NULL) | 500 | if (new == NULL) |
531 | goto out; | 501 | goto out; |
532 | new->so_server = server; | 502 | do { |
533 | new->so_cred = cred; | 503 | if (ida_pre_get(&server->openowner_id, gfp_flags) == 0) |
534 | spin_lock(&clp->cl_lock); | 504 | break; |
535 | sp = nfs4_insert_state_owner_locked(new); | 505 | spin_lock(&clp->cl_lock); |
536 | spin_unlock(&clp->cl_lock); | 506 | sp = nfs4_insert_state_owner_locked(new); |
537 | if (sp == new) | 507 | spin_unlock(&clp->cl_lock); |
538 | get_rpccred(cred); | 508 | } while (sp == ERR_PTR(-EAGAIN)); |
539 | else { | 509 | if (sp != new) |
540 | rpc_destroy_wait_queue(&new->so_sequence.wait); | 510 | nfs4_free_state_owner(new); |
541 | kfree(new); | ||
542 | } | ||
543 | out: | 511 | out: |
544 | nfs4_gc_state_owners(server); | 512 | nfs4_gc_state_owners(server); |
545 | return sp; | 513 | return sp; |
@@ -795,15 +763,11 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f | |||
795 | { | 763 | { |
796 | struct nfs4_lock_state *lsp; | 764 | struct nfs4_lock_state *lsp; |
797 | struct nfs_server *server = state->owner->so_server; | 765 | struct nfs_server *server = state->owner->so_server; |
798 | struct nfs_client *clp = server->nfs_client; | ||
799 | 766 | ||
800 | lsp = kzalloc(sizeof(*lsp), GFP_NOFS); | 767 | lsp = kzalloc(sizeof(*lsp), GFP_NOFS); |
801 | if (lsp == NULL) | 768 | if (lsp == NULL) |
802 | return NULL; | 769 | return NULL; |
803 | rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue"); | 770 | nfs4_init_seqid_counter(&lsp->ls_seqid); |
804 | spin_lock_init(&lsp->ls_sequence.lock); | ||
805 | INIT_LIST_HEAD(&lsp->ls_sequence.list); | ||
806 | lsp->ls_seqid.sequence = &lsp->ls_sequence; | ||
807 | atomic_set(&lsp->ls_count, 1); | 771 | atomic_set(&lsp->ls_count, 1); |
808 | lsp->ls_state = state; | 772 | lsp->ls_state = state; |
809 | lsp->ls_owner.lo_type = type; | 773 | lsp->ls_owner.lo_type = type; |
@@ -815,25 +779,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f | |||
815 | lsp->ls_owner.lo_u.posix_owner = fl_owner; | 779 | lsp->ls_owner.lo_u.posix_owner = fl_owner; |
816 | break; | 780 | break; |
817 | default: | 781 | default: |
818 | kfree(lsp); | 782 | goto out_free; |
819 | return NULL; | ||
820 | } | 783 | } |
821 | spin_lock(&clp->cl_lock); | 784 | lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); |
822 | nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64); | 785 | if (lsp->ls_seqid.owner_id < 0) |
823 | spin_unlock(&clp->cl_lock); | 786 | goto out_free; |
824 | INIT_LIST_HEAD(&lsp->ls_locks); | 787 | INIT_LIST_HEAD(&lsp->ls_locks); |
825 | return lsp; | 788 | return lsp; |
789 | out_free: | ||
790 | kfree(lsp); | ||
791 | return NULL; | ||
826 | } | 792 | } |
827 | 793 | ||
828 | static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) | 794 | void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) |
829 | { | 795 | { |
830 | struct nfs_server *server = lsp->ls_state->owner->so_server; | 796 | ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id); |
831 | struct nfs_client *clp = server->nfs_client; | 797 | nfs4_destroy_seqid_counter(&lsp->ls_seqid); |
832 | |||
833 | spin_lock(&clp->cl_lock); | ||
834 | nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id); | ||
835 | spin_unlock(&clp->cl_lock); | ||
836 | rpc_destroy_wait_queue(&lsp->ls_sequence.wait); | ||
837 | kfree(lsp); | 798 | kfree(lsp); |
838 | } | 799 | } |
839 | 800 | ||
@@ -865,7 +826,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ | |||
865 | } | 826 | } |
866 | spin_unlock(&state->state_lock); | 827 | spin_unlock(&state->state_lock); |
867 | if (new != NULL) | 828 | if (new != NULL) |
868 | nfs4_free_lock_state(new); | 829 | nfs4_free_lock_state(state->owner->so_server, new); |
869 | return lsp; | 830 | return lsp; |
870 | } | 831 | } |
871 | 832 | ||
@@ -886,9 +847,11 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) | |||
886 | if (list_empty(&state->lock_states)) | 847 | if (list_empty(&state->lock_states)) |
887 | clear_bit(LK_STATE_IN_USE, &state->flags); | 848 | clear_bit(LK_STATE_IN_USE, &state->flags); |
888 | spin_unlock(&state->state_lock); | 849 | spin_unlock(&state->state_lock); |
889 | if (lsp->ls_flags & NFS_LOCK_INITIALIZED) | 850 | if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { |
890 | nfs4_release_lockowner(lsp); | 851 | if (nfs4_release_lockowner(lsp) == 0) |
891 | nfs4_free_lock_state(lsp); | 852 | return; |
853 | } | ||
854 | nfs4_free_lock_state(lsp->ls_state->owner->so_server, lsp); | ||
892 | } | 855 | } |
893 | 856 | ||
894 | static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) | 857 | static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) |
@@ -918,7 +881,8 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) | |||
918 | if (fl->fl_flags & FL_POSIX) | 881 | if (fl->fl_flags & FL_POSIX) |
919 | lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE); | 882 | lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE); |
920 | else if (fl->fl_flags & FL_FLOCK) | 883 | else if (fl->fl_flags & FL_FLOCK) |
921 | lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE); | 884 | lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid, |
885 | NFS4_FLOCK_LOCK_TYPE); | ||
922 | else | 886 | else |
923 | return -EINVAL; | 887 | return -EINVAL; |
924 | if (lsp == NULL) | 888 | if (lsp == NULL) |
@@ -928,28 +892,49 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) | |||
928 | return 0; | 892 | return 0; |
929 | } | 893 | } |
930 | 894 | ||
931 | /* | 895 | static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, |
932 | * Byte-range lock aware utility to initialize the stateid of read/write | 896 | fl_owner_t fl_owner, pid_t fl_pid) |
933 | * requests. | ||
934 | */ | ||
935 | void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid) | ||
936 | { | 897 | { |
937 | struct nfs4_lock_state *lsp; | 898 | struct nfs4_lock_state *lsp; |
938 | int seq; | 899 | bool ret = false; |
939 | 900 | ||
940 | do { | ||
941 | seq = read_seqbegin(&state->seqlock); | ||
942 | memcpy(dst, &state->stateid, sizeof(*dst)); | ||
943 | } while (read_seqretry(&state->seqlock, seq)); | ||
944 | if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) | 901 | if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) |
945 | return; | 902 | goto out; |
946 | 903 | ||
947 | spin_lock(&state->state_lock); | 904 | spin_lock(&state->state_lock); |
948 | lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); | 905 | lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); |
949 | if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) | 906 | if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { |
950 | memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); | 907 | nfs4_stateid_copy(dst, &lsp->ls_stateid); |
908 | ret = true; | ||
909 | } | ||
951 | spin_unlock(&state->state_lock); | 910 | spin_unlock(&state->state_lock); |
952 | nfs4_put_lock_state(lsp); | 911 | nfs4_put_lock_state(lsp); |
912 | out: | ||
913 | return ret; | ||
914 | } | ||
915 | |||
916 | static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) | ||
917 | { | ||
918 | int seq; | ||
919 | |||
920 | do { | ||
921 | seq = read_seqbegin(&state->seqlock); | ||
922 | nfs4_stateid_copy(dst, &state->stateid); | ||
923 | } while (read_seqretry(&state->seqlock, seq)); | ||
924 | } | ||
925 | |||
926 | /* | ||
927 | * Byte-range lock aware utility to initialize the stateid of read/write | ||
928 | * requests. | ||
929 | */ | ||
930 | void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, | ||
931 | fmode_t fmode, fl_owner_t fl_owner, pid_t fl_pid) | ||
932 | { | ||
933 | if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) | ||
934 | return; | ||
935 | if (nfs4_copy_lock_stateid(dst, state, fl_owner, fl_pid)) | ||
936 | return; | ||
937 | nfs4_copy_open_stateid(dst, state); | ||
953 | } | 938 | } |
954 | 939 | ||
955 | struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask) | 940 | struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask) |
@@ -960,20 +945,28 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_m | |||
960 | if (new != NULL) { | 945 | if (new != NULL) { |
961 | new->sequence = counter; | 946 | new->sequence = counter; |
962 | INIT_LIST_HEAD(&new->list); | 947 | INIT_LIST_HEAD(&new->list); |
948 | new->task = NULL; | ||
963 | } | 949 | } |
964 | return new; | 950 | return new; |
965 | } | 951 | } |
966 | 952 | ||
967 | void nfs_release_seqid(struct nfs_seqid *seqid) | 953 | void nfs_release_seqid(struct nfs_seqid *seqid) |
968 | { | 954 | { |
969 | if (!list_empty(&seqid->list)) { | 955 | struct nfs_seqid_counter *sequence; |
970 | struct rpc_sequence *sequence = seqid->sequence->sequence; | ||
971 | 956 | ||
972 | spin_lock(&sequence->lock); | 957 | if (list_empty(&seqid->list)) |
973 | list_del_init(&seqid->list); | 958 | return; |
974 | spin_unlock(&sequence->lock); | 959 | sequence = seqid->sequence; |
975 | rpc_wake_up(&sequence->wait); | 960 | spin_lock(&sequence->lock); |
961 | list_del_init(&seqid->list); | ||
962 | if (!list_empty(&sequence->list)) { | ||
963 | struct nfs_seqid *next; | ||
964 | |||
965 | next = list_first_entry(&sequence->list, | ||
966 | struct nfs_seqid, list); | ||
967 | rpc_wake_up_queued_task(&sequence->wait, next->task); | ||
976 | } | 968 | } |
969 | spin_unlock(&sequence->lock); | ||
977 | } | 970 | } |
978 | 971 | ||
979 | void nfs_free_seqid(struct nfs_seqid *seqid) | 972 | void nfs_free_seqid(struct nfs_seqid *seqid) |
@@ -989,14 +982,14 @@ void nfs_free_seqid(struct nfs_seqid *seqid) | |||
989 | */ | 982 | */ |
990 | static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) | 983 | static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) |
991 | { | 984 | { |
992 | BUG_ON(list_first_entry(&seqid->sequence->sequence->list, struct nfs_seqid, list) != seqid); | 985 | BUG_ON(list_first_entry(&seqid->sequence->list, struct nfs_seqid, list) != seqid); |
993 | switch (status) { | 986 | switch (status) { |
994 | case 0: | 987 | case 0: |
995 | break; | 988 | break; |
996 | case -NFS4ERR_BAD_SEQID: | 989 | case -NFS4ERR_BAD_SEQID: |
997 | if (seqid->sequence->flags & NFS_SEQID_CONFIRMED) | 990 | if (seqid->sequence->flags & NFS_SEQID_CONFIRMED) |
998 | return; | 991 | return; |
999 | printk(KERN_WARNING "NFS: v4 server returned a bad" | 992 | pr_warn_ratelimited("NFS: v4 server returned a bad" |
1000 | " sequence-id error on an" | 993 | " sequence-id error on an" |
1001 | " unconfirmed sequence %p!\n", | 994 | " unconfirmed sequence %p!\n", |
1002 | seqid->sequence); | 995 | seqid->sequence); |
@@ -1040,10 +1033,11 @@ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) | |||
1040 | 1033 | ||
1041 | int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) | 1034 | int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) |
1042 | { | 1035 | { |
1043 | struct rpc_sequence *sequence = seqid->sequence->sequence; | 1036 | struct nfs_seqid_counter *sequence = seqid->sequence; |
1044 | int status = 0; | 1037 | int status = 0; |
1045 | 1038 | ||
1046 | spin_lock(&sequence->lock); | 1039 | spin_lock(&sequence->lock); |
1040 | seqid->task = task; | ||
1047 | if (list_empty(&seqid->list)) | 1041 | if (list_empty(&seqid->list)) |
1048 | list_add_tail(&seqid->list, &sequence->list); | 1042 | list_add_tail(&seqid->list, &sequence->list); |
1049 | if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) | 1043 | if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) |
@@ -1072,19 +1066,28 @@ static void nfs4_clear_state_manager_bit(struct nfs_client *clp) | |||
1072 | void nfs4_schedule_state_manager(struct nfs_client *clp) | 1066 | void nfs4_schedule_state_manager(struct nfs_client *clp) |
1073 | { | 1067 | { |
1074 | struct task_struct *task; | 1068 | struct task_struct *task; |
1069 | char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; | ||
1075 | 1070 | ||
1076 | if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) | 1071 | if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) |
1077 | return; | 1072 | return; |
1078 | __module_get(THIS_MODULE); | 1073 | __module_get(THIS_MODULE); |
1079 | atomic_inc(&clp->cl_count); | 1074 | atomic_inc(&clp->cl_count); |
1080 | task = kthread_run(nfs4_run_state_manager, clp, "%s-manager", | 1075 | |
1081 | rpc_peeraddr2str(clp->cl_rpcclient, | 1076 | /* The rcu_read_lock() is not strictly necessary, as the state |
1082 | RPC_DISPLAY_ADDR)); | 1077 | * manager is the only thread that ever changes the rpc_xprt |
1083 | if (!IS_ERR(task)) | 1078 | * after it's initialized. At this point, we're single threaded. */ |
1084 | return; | 1079 | rcu_read_lock(); |
1085 | nfs4_clear_state_manager_bit(clp); | 1080 | snprintf(buf, sizeof(buf), "%s-manager", |
1086 | nfs_put_client(clp); | 1081 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); |
1087 | module_put(THIS_MODULE); | 1082 | rcu_read_unlock(); |
1083 | task = kthread_run(nfs4_run_state_manager, clp, buf); | ||
1084 | if (IS_ERR(task)) { | ||
1085 | printk(KERN_ERR "%s: kthread_run: %ld\n", | ||
1086 | __func__, PTR_ERR(task)); | ||
1087 | nfs4_clear_state_manager_bit(clp); | ||
1088 | nfs_put_client(clp); | ||
1089 | module_put(THIS_MODULE); | ||
1090 | } | ||
1088 | } | 1091 | } |
1089 | 1092 | ||
1090 | /* | 1093 | /* |
@@ -1098,10 +1101,25 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) | |||
1098 | set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); | 1101 | set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); |
1099 | nfs4_schedule_state_manager(clp); | 1102 | nfs4_schedule_state_manager(clp); |
1100 | } | 1103 | } |
1104 | EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); | ||
1105 | |||
1106 | /* | ||
1107 | * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN | ||
1108 | * @clp: client to process | ||
1109 | * | ||
1110 | * Set the NFS4CLNT_LEASE_EXPIRED state in order to force a | ||
1111 | * resend of the SETCLIENTID and hence re-establish the | ||
1112 | * callback channel. Then return all existing delegations. | ||
1113 | */ | ||
1114 | static void nfs40_handle_cb_pathdown(struct nfs_client *clp) | ||
1115 | { | ||
1116 | set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); | ||
1117 | nfs_expire_all_delegations(clp); | ||
1118 | } | ||
1101 | 1119 | ||
1102 | void nfs4_schedule_path_down_recovery(struct nfs_client *clp) | 1120 | void nfs4_schedule_path_down_recovery(struct nfs_client *clp) |
1103 | { | 1121 | { |
1104 | nfs_handle_cb_pathdown(clp); | 1122 | nfs40_handle_cb_pathdown(clp); |
1105 | nfs4_schedule_state_manager(clp); | 1123 | nfs4_schedule_state_manager(clp); |
1106 | } | 1124 | } |
1107 | 1125 | ||
@@ -1132,11 +1150,37 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4 | |||
1132 | { | 1150 | { |
1133 | struct nfs_client *clp = server->nfs_client; | 1151 | struct nfs_client *clp = server->nfs_client; |
1134 | 1152 | ||
1135 | if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags)) | ||
1136 | nfs_async_inode_return_delegation(state->inode, &state->stateid); | ||
1137 | nfs4_state_mark_reclaim_nograce(clp, state); | 1153 | nfs4_state_mark_reclaim_nograce(clp, state); |
1138 | nfs4_schedule_state_manager(clp); | 1154 | nfs4_schedule_state_manager(clp); |
1139 | } | 1155 | } |
1156 | EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); | ||
1157 | |||
1158 | void nfs_inode_find_state_and_recover(struct inode *inode, | ||
1159 | const nfs4_stateid *stateid) | ||
1160 | { | ||
1161 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; | ||
1162 | struct nfs_inode *nfsi = NFS_I(inode); | ||
1163 | struct nfs_open_context *ctx; | ||
1164 | struct nfs4_state *state; | ||
1165 | bool found = false; | ||
1166 | |||
1167 | spin_lock(&inode->i_lock); | ||
1168 | list_for_each_entry(ctx, &nfsi->open_files, list) { | ||
1169 | state = ctx->state; | ||
1170 | if (state == NULL) | ||
1171 | continue; | ||
1172 | if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) | ||
1173 | continue; | ||
1174 | if (!nfs4_stateid_match(&state->stateid, stateid)) | ||
1175 | continue; | ||
1176 | nfs4_state_mark_reclaim_nograce(clp, state); | ||
1177 | found = true; | ||
1178 | } | ||
1179 | spin_unlock(&inode->i_lock); | ||
1180 | if (found) | ||
1181 | nfs4_schedule_state_manager(clp); | ||
1182 | } | ||
1183 | |||
1140 | 1184 | ||
1141 | static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) | 1185 | static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) |
1142 | { | 1186 | { |
@@ -1175,8 +1219,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ | |||
1175 | case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: | 1219 | case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: |
1176 | goto out; | 1220 | goto out; |
1177 | default: | 1221 | default: |
1178 | printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", | 1222 | printk(KERN_ERR "NFS: %s: unhandled error %d. " |
1179 | __func__, status); | 1223 | "Zeroing state\n", __func__, status); |
1180 | case -ENOMEM: | 1224 | case -ENOMEM: |
1181 | case -NFS4ERR_DENIED: | 1225 | case -NFS4ERR_DENIED: |
1182 | case -NFS4ERR_RECLAIM_BAD: | 1226 | case -NFS4ERR_RECLAIM_BAD: |
@@ -1222,8 +1266,9 @@ restart: | |||
1222 | spin_lock(&state->state_lock); | 1266 | spin_lock(&state->state_lock); |
1223 | list_for_each_entry(lock, &state->lock_states, ls_locks) { | 1267 | list_for_each_entry(lock, &state->lock_states, ls_locks) { |
1224 | if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) | 1268 | if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) |
1225 | printk("%s: Lock reclaim failed!\n", | 1269 | pr_warn_ratelimited("NFS: " |
1226 | __func__); | 1270 | "%s: Lock reclaim " |
1271 | "failed!\n", __func__); | ||
1227 | } | 1272 | } |
1228 | spin_unlock(&state->state_lock); | 1273 | spin_unlock(&state->state_lock); |
1229 | nfs4_put_open_state(state); | 1274 | nfs4_put_open_state(state); |
@@ -1232,8 +1277,8 @@ restart: | |||
1232 | } | 1277 | } |
1233 | switch (status) { | 1278 | switch (status) { |
1234 | default: | 1279 | default: |
1235 | printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", | 1280 | printk(KERN_ERR "NFS: %s: unhandled error %d. " |
1236 | __func__, status); | 1281 | "Zeroing state\n", __func__, status); |
1237 | case -ENOENT: | 1282 | case -ENOENT: |
1238 | case -ENOMEM: | 1283 | case -ENOMEM: |
1239 | case -ESTALE: | 1284 | case -ESTALE: |
@@ -1241,8 +1286,8 @@ restart: | |||
1241 | * Open state on this file cannot be recovered | 1286 | * Open state on this file cannot be recovered |
1242 | * All we can do is revert to using the zero stateid. | 1287 | * All we can do is revert to using the zero stateid. |
1243 | */ | 1288 | */ |
1244 | memset(state->stateid.data, 0, | 1289 | memset(&state->stateid, 0, |
1245 | sizeof(state->stateid.data)); | 1290 | sizeof(state->stateid)); |
1246 | /* Mark the file as being 'closed' */ | 1291 | /* Mark the file as being 'closed' */ |
1247 | state->state = 0; | 1292 | state->state = 0; |
1248 | break; | 1293 | break; |
@@ -1420,7 +1465,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) | |||
1420 | case 0: | 1465 | case 0: |
1421 | break; | 1466 | break; |
1422 | case -NFS4ERR_CB_PATH_DOWN: | 1467 | case -NFS4ERR_CB_PATH_DOWN: |
1423 | nfs_handle_cb_pathdown(clp); | 1468 | nfs40_handle_cb_pathdown(clp); |
1424 | break; | 1469 | break; |
1425 | case -NFS4ERR_NO_GRACE: | 1470 | case -NFS4ERR_NO_GRACE: |
1426 | nfs4_state_end_reclaim_reboot(clp); | 1471 | nfs4_state_end_reclaim_reboot(clp); |
@@ -1801,7 +1846,7 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
1801 | } while (atomic_read(&clp->cl_count) > 1); | 1846 | } while (atomic_read(&clp->cl_count) > 1); |
1802 | return; | 1847 | return; |
1803 | out_error: | 1848 | out_error: |
1804 | printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" | 1849 | pr_warn_ratelimited("NFS: state manager failed on NFSv4 server %s" |
1805 | " with error %d\n", clp->cl_hostname, -status); | 1850 | " with error %d\n", clp->cl_hostname, -status); |
1806 | nfs4_end_drain_session(clp); | 1851 | nfs4_end_drain_session(clp); |
1807 | nfs4_clear_state_manager_bit(clp); | 1852 | nfs4_clear_state_manager_bit(clp); |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 33bd8d0f745d..c74fdb114b48 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -44,6 +44,8 @@ | |||
44 | #include <linux/pagemap.h> | 44 | #include <linux/pagemap.h> |
45 | #include <linux/proc_fs.h> | 45 | #include <linux/proc_fs.h> |
46 | #include <linux/kdev_t.h> | 46 | #include <linux/kdev_t.h> |
47 | #include <linux/module.h> | ||
48 | #include <linux/utsname.h> | ||
47 | #include <linux/sunrpc/clnt.h> | 49 | #include <linux/sunrpc/clnt.h> |
48 | #include <linux/sunrpc/msg_prot.h> | 50 | #include <linux/sunrpc/msg_prot.h> |
49 | #include <linux/sunrpc/gss_api.h> | 51 | #include <linux/sunrpc/gss_api.h> |
@@ -271,7 +273,12 @@ static int nfs4_stat_to_errno(int); | |||
271 | 1 /* flags */ + \ | 273 | 1 /* flags */ + \ |
272 | 1 /* spa_how */ + \ | 274 | 1 /* spa_how */ + \ |
273 | 0 /* SP4_NONE (for now) */ + \ | 275 | 0 /* SP4_NONE (for now) */ + \ |
274 | 1 /* zero implemetation id array */) | 276 | 1 /* implementation id array of size 1 */ + \ |
277 | 1 /* nii_domain */ + \ | ||
278 | XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ | ||
279 | 1 /* nii_name */ + \ | ||
280 | XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ | ||
281 | 3 /* nii_date */) | ||
275 | #define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \ | 282 | #define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \ |
276 | 2 /* eir_clientid */ + \ | 283 | 2 /* eir_clientid */ + \ |
277 | 1 /* eir_sequenceid */ + \ | 284 | 1 /* eir_sequenceid */ + \ |
@@ -284,7 +291,11 @@ static int nfs4_stat_to_errno(int); | |||
284 | /* eir_server_scope<> */ \ | 291 | /* eir_server_scope<> */ \ |
285 | XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ | 292 | XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ |
286 | 1 /* eir_server_impl_id array length */ + \ | 293 | 1 /* eir_server_impl_id array length */ + \ |
287 | 0 /* ignored eir_server_impl_id contents */) | 294 | 1 /* nii_domain */ + \ |
295 | XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ | ||
296 | 1 /* nii_name */ + \ | ||
297 | XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ | ||
298 | 3 /* nii_date */) | ||
288 | #define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */) | 299 | #define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */) |
289 | #define decode_channel_attrs_maxsz (6 + \ | 300 | #define decode_channel_attrs_maxsz (6 + \ |
290 | 1 /* ca_rdma_ird.len */ + \ | 301 | 1 /* ca_rdma_ird.len */ + \ |
@@ -838,6 +849,12 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH + | |||
838 | XDR_UNIT); | 849 | XDR_UNIT); |
839 | #endif /* CONFIG_NFS_V4_1 */ | 850 | #endif /* CONFIG_NFS_V4_1 */ |
840 | 851 | ||
852 | static unsigned short send_implementation_id = 1; | ||
853 | |||
854 | module_param(send_implementation_id, ushort, 0644); | ||
855 | MODULE_PARM_DESC(send_implementation_id, | ||
856 | "Send implementation ID with NFSv4.1 exchange_id"); | ||
857 | |||
841 | static const umode_t nfs_type2fmt[] = { | 858 | static const umode_t nfs_type2fmt[] = { |
842 | [NF4BAD] = 0, | 859 | [NF4BAD] = 0, |
843 | [NF4REG] = S_IFREG, | 860 | [NF4REG] = S_IFREG, |
@@ -868,15 +885,44 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes) | |||
868 | return p; | 885 | return p; |
869 | } | 886 | } |
870 | 887 | ||
888 | static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) | ||
889 | { | ||
890 | __be32 *p; | ||
891 | |||
892 | p = xdr_reserve_space(xdr, len); | ||
893 | xdr_encode_opaque_fixed(p, buf, len); | ||
894 | } | ||
895 | |||
871 | static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) | 896 | static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) |
872 | { | 897 | { |
873 | __be32 *p; | 898 | __be32 *p; |
874 | 899 | ||
875 | p = xdr_reserve_space(xdr, 4 + len); | 900 | p = reserve_space(xdr, 4 + len); |
876 | BUG_ON(p == NULL); | ||
877 | xdr_encode_opaque(p, str, len); | 901 | xdr_encode_opaque(p, str, len); |
878 | } | 902 | } |
879 | 903 | ||
904 | static void encode_uint32(struct xdr_stream *xdr, u32 n) | ||
905 | { | ||
906 | __be32 *p; | ||
907 | |||
908 | p = reserve_space(xdr, 4); | ||
909 | *p = cpu_to_be32(n); | ||
910 | } | ||
911 | |||
912 | static void encode_uint64(struct xdr_stream *xdr, u64 n) | ||
913 | { | ||
914 | __be32 *p; | ||
915 | |||
916 | p = reserve_space(xdr, 8); | ||
917 | xdr_encode_hyper(p, n); | ||
918 | } | ||
919 | |||
920 | static void encode_nfs4_seqid(struct xdr_stream *xdr, | ||
921 | const struct nfs_seqid *seqid) | ||
922 | { | ||
923 | encode_uint32(xdr, seqid->sequence->counter); | ||
924 | } | ||
925 | |||
880 | static void encode_compound_hdr(struct xdr_stream *xdr, | 926 | static void encode_compound_hdr(struct xdr_stream *xdr, |
881 | struct rpc_rqst *req, | 927 | struct rpc_rqst *req, |
882 | struct compound_hdr *hdr) | 928 | struct compound_hdr *hdr) |
@@ -889,28 +935,37 @@ static void encode_compound_hdr(struct xdr_stream *xdr, | |||
889 | * but this is not required as a MUST for the server to do so. */ | 935 | * but this is not required as a MUST for the server to do so. */ |
890 | hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; | 936 | hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; |
891 | 937 | ||
892 | dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); | ||
893 | BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); | 938 | BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); |
894 | p = reserve_space(xdr, 4 + hdr->taglen + 8); | 939 | encode_string(xdr, hdr->taglen, hdr->tag); |
895 | p = xdr_encode_opaque(p, hdr->tag, hdr->taglen); | 940 | p = reserve_space(xdr, 8); |
896 | *p++ = cpu_to_be32(hdr->minorversion); | 941 | *p++ = cpu_to_be32(hdr->minorversion); |
897 | hdr->nops_p = p; | 942 | hdr->nops_p = p; |
898 | *p = cpu_to_be32(hdr->nops); | 943 | *p = cpu_to_be32(hdr->nops); |
899 | } | 944 | } |
900 | 945 | ||
946 | static void encode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 op, | ||
947 | uint32_t replen, | ||
948 | struct compound_hdr *hdr) | ||
949 | { | ||
950 | encode_uint32(xdr, op); | ||
951 | hdr->nops++; | ||
952 | hdr->replen += replen; | ||
953 | } | ||
954 | |||
901 | static void encode_nops(struct compound_hdr *hdr) | 955 | static void encode_nops(struct compound_hdr *hdr) |
902 | { | 956 | { |
903 | BUG_ON(hdr->nops > NFS4_MAX_OPS); | 957 | BUG_ON(hdr->nops > NFS4_MAX_OPS); |
904 | *hdr->nops_p = htonl(hdr->nops); | 958 | *hdr->nops_p = htonl(hdr->nops); |
905 | } | 959 | } |
906 | 960 | ||
907 | static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) | 961 | static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid) |
908 | { | 962 | { |
909 | __be32 *p; | 963 | encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); |
964 | } | ||
910 | 965 | ||
911 | p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); | 966 | static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) |
912 | BUG_ON(p == NULL); | 967 | { |
913 | xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE); | 968 | encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE); |
914 | } | 969 | } |
915 | 970 | ||
916 | static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) | 971 | static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) |
@@ -1023,7 +1078,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
1023 | * Now we backfill the bitmap and the attribute buffer length. | 1078 | * Now we backfill the bitmap and the attribute buffer length. |
1024 | */ | 1079 | */ |
1025 | if (len != ((char *)p - (char *)q) + 4) { | 1080 | if (len != ((char *)p - (char *)q) + 4) { |
1026 | printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n", | 1081 | printk(KERN_ERR "NFS: Attr length error, %u != %Zu\n", |
1027 | len, ((char *)p - (char *)q) + 4); | 1082 | len, ((char *)p - (char *)q) + 4); |
1028 | BUG(); | 1083 | BUG(); |
1029 | } | 1084 | } |
@@ -1037,46 +1092,33 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
1037 | 1092 | ||
1038 | static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr) | 1093 | static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr) |
1039 | { | 1094 | { |
1040 | __be32 *p; | 1095 | encode_op_hdr(xdr, OP_ACCESS, decode_access_maxsz, hdr); |
1041 | 1096 | encode_uint32(xdr, access); | |
1042 | p = reserve_space(xdr, 8); | ||
1043 | *p++ = cpu_to_be32(OP_ACCESS); | ||
1044 | *p = cpu_to_be32(access); | ||
1045 | hdr->nops++; | ||
1046 | hdr->replen += decode_access_maxsz; | ||
1047 | } | 1097 | } |
1048 | 1098 | ||
1049 | static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) | 1099 | static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) |
1050 | { | 1100 | { |
1051 | __be32 *p; | 1101 | encode_op_hdr(xdr, OP_CLOSE, decode_close_maxsz, hdr); |
1052 | 1102 | encode_nfs4_seqid(xdr, arg->seqid); | |
1053 | p = reserve_space(xdr, 8+NFS4_STATEID_SIZE); | 1103 | encode_nfs4_stateid(xdr, arg->stateid); |
1054 | *p++ = cpu_to_be32(OP_CLOSE); | ||
1055 | *p++ = cpu_to_be32(arg->seqid->sequence->counter); | ||
1056 | xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); | ||
1057 | hdr->nops++; | ||
1058 | hdr->replen += decode_close_maxsz; | ||
1059 | } | 1104 | } |
1060 | 1105 | ||
1061 | static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) | 1106 | static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) |
1062 | { | 1107 | { |
1063 | __be32 *p; | 1108 | __be32 *p; |
1064 | 1109 | ||
1065 | p = reserve_space(xdr, 16); | 1110 | encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr); |
1066 | *p++ = cpu_to_be32(OP_COMMIT); | 1111 | p = reserve_space(xdr, 12); |
1067 | p = xdr_encode_hyper(p, args->offset); | 1112 | p = xdr_encode_hyper(p, args->offset); |
1068 | *p = cpu_to_be32(args->count); | 1113 | *p = cpu_to_be32(args->count); |
1069 | hdr->nops++; | ||
1070 | hdr->replen += decode_commit_maxsz; | ||
1071 | } | 1114 | } |
1072 | 1115 | ||
1073 | static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) | 1116 | static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) |
1074 | { | 1117 | { |
1075 | __be32 *p; | 1118 | __be32 *p; |
1076 | 1119 | ||
1077 | p = reserve_space(xdr, 8); | 1120 | encode_op_hdr(xdr, OP_CREATE, decode_create_maxsz, hdr); |
1078 | *p++ = cpu_to_be32(OP_CREATE); | 1121 | encode_uint32(xdr, create->ftype); |
1079 | *p = cpu_to_be32(create->ftype); | ||
1080 | 1122 | ||
1081 | switch (create->ftype) { | 1123 | switch (create->ftype) { |
1082 | case NF4LNK: | 1124 | case NF4LNK: |
@@ -1096,9 +1138,6 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * | |||
1096 | } | 1138 | } |
1097 | 1139 | ||
1098 | encode_string(xdr, create->name->len, create->name->name); | 1140 | encode_string(xdr, create->name->len, create->name->name); |
1099 | hdr->nops++; | ||
1100 | hdr->replen += decode_create_maxsz; | ||
1101 | |||
1102 | encode_attrs(xdr, create->attrs, create->server); | 1141 | encode_attrs(xdr, create->attrs, create->server); |
1103 | } | 1142 | } |
1104 | 1143 | ||
@@ -1106,25 +1145,21 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c | |||
1106 | { | 1145 | { |
1107 | __be32 *p; | 1146 | __be32 *p; |
1108 | 1147 | ||
1109 | p = reserve_space(xdr, 12); | 1148 | encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); |
1110 | *p++ = cpu_to_be32(OP_GETATTR); | 1149 | p = reserve_space(xdr, 8); |
1111 | *p++ = cpu_to_be32(1); | 1150 | *p++ = cpu_to_be32(1); |
1112 | *p = cpu_to_be32(bitmap); | 1151 | *p = cpu_to_be32(bitmap); |
1113 | hdr->nops++; | ||
1114 | hdr->replen += decode_getattr_maxsz; | ||
1115 | } | 1152 | } |
1116 | 1153 | ||
1117 | static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) | 1154 | static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) |
1118 | { | 1155 | { |
1119 | __be32 *p; | 1156 | __be32 *p; |
1120 | 1157 | ||
1121 | p = reserve_space(xdr, 16); | 1158 | encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); |
1122 | *p++ = cpu_to_be32(OP_GETATTR); | 1159 | p = reserve_space(xdr, 12); |
1123 | *p++ = cpu_to_be32(2); | 1160 | *p++ = cpu_to_be32(2); |
1124 | *p++ = cpu_to_be32(bm0); | 1161 | *p++ = cpu_to_be32(bm0); |
1125 | *p = cpu_to_be32(bm1); | 1162 | *p = cpu_to_be32(bm1); |
1126 | hdr->nops++; | ||
1127 | hdr->replen += decode_getattr_maxsz; | ||
1128 | } | 1163 | } |
1129 | 1164 | ||
1130 | static void | 1165 | static void |
@@ -1134,8 +1169,7 @@ encode_getattr_three(struct xdr_stream *xdr, | |||
1134 | { | 1169 | { |
1135 | __be32 *p; | 1170 | __be32 *p; |
1136 | 1171 | ||
1137 | p = reserve_space(xdr, 4); | 1172 | encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); |
1138 | *p = cpu_to_be32(OP_GETATTR); | ||
1139 | if (bm2) { | 1173 | if (bm2) { |
1140 | p = reserve_space(xdr, 16); | 1174 | p = reserve_space(xdr, 16); |
1141 | *p++ = cpu_to_be32(3); | 1175 | *p++ = cpu_to_be32(3); |
@@ -1152,8 +1186,6 @@ encode_getattr_three(struct xdr_stream *xdr, | |||
1152 | *p++ = cpu_to_be32(1); | 1186 | *p++ = cpu_to_be32(1); |
1153 | *p = cpu_to_be32(bm0); | 1187 | *p = cpu_to_be32(bm0); |
1154 | } | 1188 | } |
1155 | hdr->nops++; | ||
1156 | hdr->replen += decode_getattr_maxsz; | ||
1157 | } | 1189 | } |
1158 | 1190 | ||
1159 | static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) | 1191 | static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) |
@@ -1179,23 +1211,13 @@ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, stru | |||
1179 | 1211 | ||
1180 | static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) | 1212 | static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) |
1181 | { | 1213 | { |
1182 | __be32 *p; | 1214 | encode_op_hdr(xdr, OP_GETFH, decode_getfh_maxsz, hdr); |
1183 | |||
1184 | p = reserve_space(xdr, 4); | ||
1185 | *p = cpu_to_be32(OP_GETFH); | ||
1186 | hdr->nops++; | ||
1187 | hdr->replen += decode_getfh_maxsz; | ||
1188 | } | 1215 | } |
1189 | 1216 | ||
1190 | static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) | 1217 | static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) |
1191 | { | 1218 | { |
1192 | __be32 *p; | 1219 | encode_op_hdr(xdr, OP_LINK, decode_link_maxsz, hdr); |
1193 | 1220 | encode_string(xdr, name->len, name->name); | |
1194 | p = reserve_space(xdr, 8 + name->len); | ||
1195 | *p++ = cpu_to_be32(OP_LINK); | ||
1196 | xdr_encode_opaque(p, name->name, name->len); | ||
1197 | hdr->nops++; | ||
1198 | hdr->replen += decode_link_maxsz; | ||
1199 | } | 1221 | } |
1200 | 1222 | ||
1201 | static inline int nfs4_lock_type(struct file_lock *fl, int block) | 1223 | static inline int nfs4_lock_type(struct file_lock *fl, int block) |
@@ -1232,79 +1254,60 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args | |||
1232 | { | 1254 | { |
1233 | __be32 *p; | 1255 | __be32 *p; |
1234 | 1256 | ||
1235 | p = reserve_space(xdr, 32); | 1257 | encode_op_hdr(xdr, OP_LOCK, decode_lock_maxsz, hdr); |
1236 | *p++ = cpu_to_be32(OP_LOCK); | 1258 | p = reserve_space(xdr, 28); |
1237 | *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block)); | 1259 | *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block)); |
1238 | *p++ = cpu_to_be32(args->reclaim); | 1260 | *p++ = cpu_to_be32(args->reclaim); |
1239 | p = xdr_encode_hyper(p, args->fl->fl_start); | 1261 | p = xdr_encode_hyper(p, args->fl->fl_start); |
1240 | p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); | 1262 | p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); |
1241 | *p = cpu_to_be32(args->new_lock_owner); | 1263 | *p = cpu_to_be32(args->new_lock_owner); |
1242 | if (args->new_lock_owner){ | 1264 | if (args->new_lock_owner){ |
1243 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); | 1265 | encode_nfs4_seqid(xdr, args->open_seqid); |
1244 | *p++ = cpu_to_be32(args->open_seqid->sequence->counter); | 1266 | encode_nfs4_stateid(xdr, args->open_stateid); |
1245 | p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE); | 1267 | encode_nfs4_seqid(xdr, args->lock_seqid); |
1246 | *p++ = cpu_to_be32(args->lock_seqid->sequence->counter); | ||
1247 | encode_lockowner(xdr, &args->lock_owner); | 1268 | encode_lockowner(xdr, &args->lock_owner); |
1248 | } | 1269 | } |
1249 | else { | 1270 | else { |
1250 | p = reserve_space(xdr, NFS4_STATEID_SIZE+4); | 1271 | encode_nfs4_stateid(xdr, args->lock_stateid); |
1251 | p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE); | 1272 | encode_nfs4_seqid(xdr, args->lock_seqid); |
1252 | *p = cpu_to_be32(args->lock_seqid->sequence->counter); | ||
1253 | } | 1273 | } |
1254 | hdr->nops++; | ||
1255 | hdr->replen += decode_lock_maxsz; | ||
1256 | } | 1274 | } |
1257 | 1275 | ||
1258 | static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) | 1276 | static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) |
1259 | { | 1277 | { |
1260 | __be32 *p; | 1278 | __be32 *p; |
1261 | 1279 | ||
1262 | p = reserve_space(xdr, 24); | 1280 | encode_op_hdr(xdr, OP_LOCKT, decode_lockt_maxsz, hdr); |
1263 | *p++ = cpu_to_be32(OP_LOCKT); | 1281 | p = reserve_space(xdr, 20); |
1264 | *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); | 1282 | *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); |
1265 | p = xdr_encode_hyper(p, args->fl->fl_start); | 1283 | p = xdr_encode_hyper(p, args->fl->fl_start); |
1266 | p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); | 1284 | p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); |
1267 | encode_lockowner(xdr, &args->lock_owner); | 1285 | encode_lockowner(xdr, &args->lock_owner); |
1268 | hdr->nops++; | ||
1269 | hdr->replen += decode_lockt_maxsz; | ||
1270 | } | 1286 | } |
1271 | 1287 | ||
1272 | static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) | 1288 | static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) |
1273 | { | 1289 | { |
1274 | __be32 *p; | 1290 | __be32 *p; |
1275 | 1291 | ||
1276 | p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16); | 1292 | encode_op_hdr(xdr, OP_LOCKU, decode_locku_maxsz, hdr); |
1277 | *p++ = cpu_to_be32(OP_LOCKU); | 1293 | encode_uint32(xdr, nfs4_lock_type(args->fl, 0)); |
1278 | *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); | 1294 | encode_nfs4_seqid(xdr, args->seqid); |
1279 | *p++ = cpu_to_be32(args->seqid->sequence->counter); | 1295 | encode_nfs4_stateid(xdr, args->stateid); |
1280 | p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); | 1296 | p = reserve_space(xdr, 16); |
1281 | p = xdr_encode_hyper(p, args->fl->fl_start); | 1297 | p = xdr_encode_hyper(p, args->fl->fl_start); |
1282 | xdr_encode_hyper(p, nfs4_lock_length(args->fl)); | 1298 | xdr_encode_hyper(p, nfs4_lock_length(args->fl)); |
1283 | hdr->nops++; | ||
1284 | hdr->replen += decode_locku_maxsz; | ||
1285 | } | 1299 | } |
1286 | 1300 | ||
1287 | static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr) | 1301 | static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr) |
1288 | { | 1302 | { |
1289 | __be32 *p; | 1303 | encode_op_hdr(xdr, OP_RELEASE_LOCKOWNER, decode_release_lockowner_maxsz, hdr); |
1290 | |||
1291 | p = reserve_space(xdr, 4); | ||
1292 | *p = cpu_to_be32(OP_RELEASE_LOCKOWNER); | ||
1293 | encode_lockowner(xdr, lowner); | 1304 | encode_lockowner(xdr, lowner); |
1294 | hdr->nops++; | ||
1295 | hdr->replen += decode_release_lockowner_maxsz; | ||
1296 | } | 1305 | } |
1297 | 1306 | ||
1298 | static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) | 1307 | static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) |
1299 | { | 1308 | { |
1300 | int len = name->len; | 1309 | encode_op_hdr(xdr, OP_LOOKUP, decode_lookup_maxsz, hdr); |
1301 | __be32 *p; | 1310 | encode_string(xdr, name->len, name->name); |
1302 | |||
1303 | p = reserve_space(xdr, 8 + len); | ||
1304 | *p++ = cpu_to_be32(OP_LOOKUP); | ||
1305 | xdr_encode_opaque(p, name->name, len); | ||
1306 | hdr->nops++; | ||
1307 | hdr->replen += decode_lookup_maxsz; | ||
1308 | } | 1311 | } |
1309 | 1312 | ||
1310 | static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) | 1313 | static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) |
@@ -1335,9 +1338,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena | |||
1335 | * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, | 1338 | * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, |
1336 | * owner 4 = 32 | 1339 | * owner 4 = 32 |
1337 | */ | 1340 | */ |
1338 | p = reserve_space(xdr, 8); | 1341 | encode_nfs4_seqid(xdr, arg->seqid); |
1339 | *p++ = cpu_to_be32(OP_OPEN); | ||
1340 | *p = cpu_to_be32(arg->seqid->sequence->counter); | ||
1341 | encode_share_access(xdr, arg->fmode); | 1342 | encode_share_access(xdr, arg->fmode); |
1342 | p = reserve_space(xdr, 32); | 1343 | p = reserve_space(xdr, 32); |
1343 | p = xdr_encode_hyper(p, arg->clientid); | 1344 | p = xdr_encode_hyper(p, arg->clientid); |
@@ -1437,14 +1438,15 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc | |||
1437 | { | 1438 | { |
1438 | __be32 *p; | 1439 | __be32 *p; |
1439 | 1440 | ||
1440 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); | 1441 | p = reserve_space(xdr, 4); |
1441 | *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR); | 1442 | *p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR); |
1442 | xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); | 1443 | encode_nfs4_stateid(xdr, stateid); |
1443 | encode_string(xdr, name->len, name->name); | 1444 | encode_string(xdr, name->len, name->name); |
1444 | } | 1445 | } |
1445 | 1446 | ||
1446 | static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) | 1447 | static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) |
1447 | { | 1448 | { |
1449 | encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr); | ||
1448 | encode_openhdr(xdr, arg); | 1450 | encode_openhdr(xdr, arg); |
1449 | encode_opentype(xdr, arg); | 1451 | encode_opentype(xdr, arg); |
1450 | switch (arg->claim) { | 1452 | switch (arg->claim) { |
@@ -1460,88 +1462,64 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, | |||
1460 | default: | 1462 | default: |
1461 | BUG(); | 1463 | BUG(); |
1462 | } | 1464 | } |
1463 | hdr->nops++; | ||
1464 | hdr->replen += decode_open_maxsz; | ||
1465 | } | 1465 | } |
1466 | 1466 | ||
1467 | static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) | 1467 | static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) |
1468 | { | 1468 | { |
1469 | __be32 *p; | 1469 | encode_op_hdr(xdr, OP_OPEN_CONFIRM, decode_open_confirm_maxsz, hdr); |
1470 | 1470 | encode_nfs4_stateid(xdr, arg->stateid); | |
1471 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); | 1471 | encode_nfs4_seqid(xdr, arg->seqid); |
1472 | *p++ = cpu_to_be32(OP_OPEN_CONFIRM); | ||
1473 | p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); | ||
1474 | *p = cpu_to_be32(arg->seqid->sequence->counter); | ||
1475 | hdr->nops++; | ||
1476 | hdr->replen += decode_open_confirm_maxsz; | ||
1477 | } | 1472 | } |
1478 | 1473 | ||
1479 | static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) | 1474 | static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) |
1480 | { | 1475 | { |
1481 | __be32 *p; | 1476 | encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr); |
1482 | 1477 | encode_nfs4_stateid(xdr, arg->stateid); | |
1483 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); | 1478 | encode_nfs4_seqid(xdr, arg->seqid); |
1484 | *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE); | ||
1485 | p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); | ||
1486 | *p = cpu_to_be32(arg->seqid->sequence->counter); | ||
1487 | encode_share_access(xdr, arg->fmode); | 1479 | encode_share_access(xdr, arg->fmode); |
1488 | hdr->nops++; | ||
1489 | hdr->replen += decode_open_downgrade_maxsz; | ||
1490 | } | 1480 | } |
1491 | 1481 | ||
1492 | static void | 1482 | static void |
1493 | encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr) | 1483 | encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr) |
1494 | { | 1484 | { |
1495 | int len = fh->size; | 1485 | encode_op_hdr(xdr, OP_PUTFH, decode_putfh_maxsz, hdr); |
1496 | __be32 *p; | 1486 | encode_string(xdr, fh->size, fh->data); |
1497 | |||
1498 | p = reserve_space(xdr, 8 + len); | ||
1499 | *p++ = cpu_to_be32(OP_PUTFH); | ||
1500 | xdr_encode_opaque(p, fh->data, len); | ||
1501 | hdr->nops++; | ||
1502 | hdr->replen += decode_putfh_maxsz; | ||
1503 | } | 1487 | } |
1504 | 1488 | ||
1505 | static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) | 1489 | static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) |
1506 | { | 1490 | { |
1507 | __be32 *p; | 1491 | encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); |
1508 | |||
1509 | p = reserve_space(xdr, 4); | ||
1510 | *p = cpu_to_be32(OP_PUTROOTFH); | ||
1511 | hdr->nops++; | ||
1512 | hdr->replen += decode_putrootfh_maxsz; | ||
1513 | } | 1492 | } |
1514 | 1493 | ||
1515 | static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid) | 1494 | static void encode_open_stateid(struct xdr_stream *xdr, |
1495 | const struct nfs_open_context *ctx, | ||
1496 | const struct nfs_lock_context *l_ctx, | ||
1497 | fmode_t fmode, | ||
1498 | int zero_seqid) | ||
1516 | { | 1499 | { |
1517 | nfs4_stateid stateid; | 1500 | nfs4_stateid stateid; |
1518 | __be32 *p; | ||
1519 | 1501 | ||
1520 | p = reserve_space(xdr, NFS4_STATEID_SIZE); | ||
1521 | if (ctx->state != NULL) { | 1502 | if (ctx->state != NULL) { |
1522 | nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); | 1503 | nfs4_select_rw_stateid(&stateid, ctx->state, |
1504 | fmode, l_ctx->lockowner, l_ctx->pid); | ||
1523 | if (zero_seqid) | 1505 | if (zero_seqid) |
1524 | stateid.stateid.seqid = 0; | 1506 | stateid.seqid = 0; |
1525 | xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); | 1507 | encode_nfs4_stateid(xdr, &stateid); |
1526 | } else | 1508 | } else |
1527 | xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); | 1509 | encode_nfs4_stateid(xdr, &zero_stateid); |
1528 | } | 1510 | } |
1529 | 1511 | ||
1530 | static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) | 1512 | static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) |
1531 | { | 1513 | { |
1532 | __be32 *p; | 1514 | __be32 *p; |
1533 | 1515 | ||
1534 | p = reserve_space(xdr, 4); | 1516 | encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr); |
1535 | *p = cpu_to_be32(OP_READ); | 1517 | encode_open_stateid(xdr, args->context, args->lock_context, |
1536 | 1518 | FMODE_READ, hdr->minorversion); | |
1537 | encode_stateid(xdr, args->context, args->lock_context, | ||
1538 | hdr->minorversion); | ||
1539 | 1519 | ||
1540 | p = reserve_space(xdr, 12); | 1520 | p = reserve_space(xdr, 12); |
1541 | p = xdr_encode_hyper(p, args->offset); | 1521 | p = xdr_encode_hyper(p, args->offset); |
1542 | *p = cpu_to_be32(args->count); | 1522 | *p = cpu_to_be32(args->count); |
1543 | hdr->nops++; | ||
1544 | hdr->replen += decode_read_maxsz; | ||
1545 | } | 1523 | } |
1546 | 1524 | ||
1547 | static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) | 1525 | static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) |
@@ -1551,7 +1529,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg | |||
1551 | FATTR4_WORD1_MOUNTED_ON_FILEID, | 1529 | FATTR4_WORD1_MOUNTED_ON_FILEID, |
1552 | }; | 1530 | }; |
1553 | uint32_t dircount = readdir->count >> 1; | 1531 | uint32_t dircount = readdir->count >> 1; |
1554 | __be32 *p; | 1532 | __be32 *p, verf[2]; |
1555 | 1533 | ||
1556 | if (readdir->plus) { | 1534 | if (readdir->plus) { |
1557 | attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| | 1535 | attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| |
@@ -1566,80 +1544,54 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg | |||
1566 | if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) | 1544 | if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) |
1567 | attrs[0] |= FATTR4_WORD0_FILEID; | 1545 | attrs[0] |= FATTR4_WORD0_FILEID; |
1568 | 1546 | ||
1569 | p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); | 1547 | encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr); |
1570 | *p++ = cpu_to_be32(OP_READDIR); | 1548 | encode_uint64(xdr, readdir->cookie); |
1571 | p = xdr_encode_hyper(p, readdir->cookie); | 1549 | encode_nfs4_verifier(xdr, &readdir->verifier); |
1572 | p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); | 1550 | p = reserve_space(xdr, 20); |
1573 | *p++ = cpu_to_be32(dircount); | 1551 | *p++ = cpu_to_be32(dircount); |
1574 | *p++ = cpu_to_be32(readdir->count); | 1552 | *p++ = cpu_to_be32(readdir->count); |
1575 | *p++ = cpu_to_be32(2); | 1553 | *p++ = cpu_to_be32(2); |
1576 | 1554 | ||
1577 | *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); | 1555 | *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); |
1578 | *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); | 1556 | *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); |
1579 | hdr->nops++; | 1557 | memcpy(verf, readdir->verifier.data, sizeof(verf)); |
1580 | hdr->replen += decode_readdir_maxsz; | ||
1581 | dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", | 1558 | dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", |
1582 | __func__, | 1559 | __func__, |
1583 | (unsigned long long)readdir->cookie, | 1560 | (unsigned long long)readdir->cookie, |
1584 | ((u32 *)readdir->verifier.data)[0], | 1561 | verf[0], verf[1], |
1585 | ((u32 *)readdir->verifier.data)[1], | ||
1586 | attrs[0] & readdir->bitmask[0], | 1562 | attrs[0] & readdir->bitmask[0], |
1587 | attrs[1] & readdir->bitmask[1]); | 1563 | attrs[1] & readdir->bitmask[1]); |
1588 | } | 1564 | } |
1589 | 1565 | ||
1590 | static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) | 1566 | static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) |
1591 | { | 1567 | { |
1592 | __be32 *p; | 1568 | encode_op_hdr(xdr, OP_READLINK, decode_readlink_maxsz, hdr); |
1593 | |||
1594 | p = reserve_space(xdr, 4); | ||
1595 | *p = cpu_to_be32(OP_READLINK); | ||
1596 | hdr->nops++; | ||
1597 | hdr->replen += decode_readlink_maxsz; | ||
1598 | } | 1569 | } |
1599 | 1570 | ||
1600 | static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) | 1571 | static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) |
1601 | { | 1572 | { |
1602 | __be32 *p; | 1573 | encode_op_hdr(xdr, OP_REMOVE, decode_remove_maxsz, hdr); |
1603 | 1574 | encode_string(xdr, name->len, name->name); | |
1604 | p = reserve_space(xdr, 8 + name->len); | ||
1605 | *p++ = cpu_to_be32(OP_REMOVE); | ||
1606 | xdr_encode_opaque(p, name->name, name->len); | ||
1607 | hdr->nops++; | ||
1608 | hdr->replen += decode_remove_maxsz; | ||
1609 | } | 1575 | } |
1610 | 1576 | ||
1611 | static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) | 1577 | static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) |
1612 | { | 1578 | { |
1613 | __be32 *p; | 1579 | encode_op_hdr(xdr, OP_RENAME, decode_rename_maxsz, hdr); |
1614 | |||
1615 | p = reserve_space(xdr, 4); | ||
1616 | *p = cpu_to_be32(OP_RENAME); | ||
1617 | encode_string(xdr, oldname->len, oldname->name); | 1580 | encode_string(xdr, oldname->len, oldname->name); |
1618 | encode_string(xdr, newname->len, newname->name); | 1581 | encode_string(xdr, newname->len, newname->name); |
1619 | hdr->nops++; | ||
1620 | hdr->replen += decode_rename_maxsz; | ||
1621 | } | 1582 | } |
1622 | 1583 | ||
1623 | static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr) | 1584 | static void encode_renew(struct xdr_stream *xdr, clientid4 clid, |
1585 | struct compound_hdr *hdr) | ||
1624 | { | 1586 | { |
1625 | __be32 *p; | 1587 | encode_op_hdr(xdr, OP_RENEW, decode_renew_maxsz, hdr); |
1626 | 1588 | encode_uint64(xdr, clid); | |
1627 | p = reserve_space(xdr, 12); | ||
1628 | *p++ = cpu_to_be32(OP_RENEW); | ||
1629 | xdr_encode_hyper(p, client_stateid->cl_clientid); | ||
1630 | hdr->nops++; | ||
1631 | hdr->replen += decode_renew_maxsz; | ||
1632 | } | 1589 | } |
1633 | 1590 | ||
1634 | static void | 1591 | static void |
1635 | encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) | 1592 | encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) |
1636 | { | 1593 | { |
1637 | __be32 *p; | 1594 | encode_op_hdr(xdr, OP_RESTOREFH, decode_restorefh_maxsz, hdr); |
1638 | |||
1639 | p = reserve_space(xdr, 4); | ||
1640 | *p = cpu_to_be32(OP_RESTOREFH); | ||
1641 | hdr->nops++; | ||
1642 | hdr->replen += decode_restorefh_maxsz; | ||
1643 | } | 1595 | } |
1644 | 1596 | ||
1645 | static void | 1597 | static void |
@@ -1647,9 +1599,8 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun | |||
1647 | { | 1599 | { |
1648 | __be32 *p; | 1600 | __be32 *p; |
1649 | 1601 | ||
1650 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); | 1602 | encode_op_hdr(xdr, OP_SETATTR, decode_setacl_maxsz, hdr); |
1651 | *p++ = cpu_to_be32(OP_SETATTR); | 1603 | encode_nfs4_stateid(xdr, &zero_stateid); |
1652 | xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); | ||
1653 | p = reserve_space(xdr, 2*4); | 1604 | p = reserve_space(xdr, 2*4); |
1654 | *p++ = cpu_to_be32(1); | 1605 | *p++ = cpu_to_be32(1); |
1655 | *p = cpu_to_be32(FATTR4_WORD0_ACL); | 1606 | *p = cpu_to_be32(FATTR4_WORD0_ACL); |
@@ -1657,30 +1608,18 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun | |||
1657 | p = reserve_space(xdr, 4); | 1608 | p = reserve_space(xdr, 4); |
1658 | *p = cpu_to_be32(arg->acl_len); | 1609 | *p = cpu_to_be32(arg->acl_len); |
1659 | xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); | 1610 | xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); |
1660 | hdr->nops++; | ||
1661 | hdr->replen += decode_setacl_maxsz; | ||
1662 | } | 1611 | } |
1663 | 1612 | ||
1664 | static void | 1613 | static void |
1665 | encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) | 1614 | encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) |
1666 | { | 1615 | { |
1667 | __be32 *p; | 1616 | encode_op_hdr(xdr, OP_SAVEFH, decode_savefh_maxsz, hdr); |
1668 | |||
1669 | p = reserve_space(xdr, 4); | ||
1670 | *p = cpu_to_be32(OP_SAVEFH); | ||
1671 | hdr->nops++; | ||
1672 | hdr->replen += decode_savefh_maxsz; | ||
1673 | } | 1617 | } |
1674 | 1618 | ||
1675 | static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) | 1619 | static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) |
1676 | { | 1620 | { |
1677 | __be32 *p; | 1621 | encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr); |
1678 | 1622 | encode_nfs4_stateid(xdr, &arg->stateid); | |
1679 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); | ||
1680 | *p++ = cpu_to_be32(OP_SETATTR); | ||
1681 | xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE); | ||
1682 | hdr->nops++; | ||
1683 | hdr->replen += decode_setattr_maxsz; | ||
1684 | encode_attrs(xdr, arg->iap, server); | 1623 | encode_attrs(xdr, arg->iap, server); |
1685 | } | 1624 | } |
1686 | 1625 | ||
@@ -1688,9 +1627,8 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie | |||
1688 | { | 1627 | { |
1689 | __be32 *p; | 1628 | __be32 *p; |
1690 | 1629 | ||
1691 | p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE); | 1630 | encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr); |
1692 | *p++ = cpu_to_be32(OP_SETCLIENTID); | 1631 | encode_nfs4_verifier(xdr, setclientid->sc_verifier); |
1693 | xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE); | ||
1694 | 1632 | ||
1695 | encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); | 1633 | encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); |
1696 | p = reserve_space(xdr, 4); | 1634 | p = reserve_space(xdr, 4); |
@@ -1699,31 +1637,23 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie | |||
1699 | encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); | 1637 | encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); |
1700 | p = reserve_space(xdr, 4); | 1638 | p = reserve_space(xdr, 4); |
1701 | *p = cpu_to_be32(setclientid->sc_cb_ident); | 1639 | *p = cpu_to_be32(setclientid->sc_cb_ident); |
1702 | hdr->nops++; | ||
1703 | hdr->replen += decode_setclientid_maxsz; | ||
1704 | } | 1640 | } |
1705 | 1641 | ||
1706 | static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) | 1642 | static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) |
1707 | { | 1643 | { |
1708 | __be32 *p; | 1644 | encode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM, |
1709 | 1645 | decode_setclientid_confirm_maxsz, hdr); | |
1710 | p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); | 1646 | encode_uint64(xdr, arg->clientid); |
1711 | *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM); | 1647 | encode_nfs4_verifier(xdr, &arg->confirm); |
1712 | p = xdr_encode_hyper(p, arg->clientid); | ||
1713 | xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE); | ||
1714 | hdr->nops++; | ||
1715 | hdr->replen += decode_setclientid_confirm_maxsz; | ||
1716 | } | 1648 | } |
1717 | 1649 | ||
1718 | static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) | 1650 | static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) |
1719 | { | 1651 | { |
1720 | __be32 *p; | 1652 | __be32 *p; |
1721 | 1653 | ||
1722 | p = reserve_space(xdr, 4); | 1654 | encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr); |
1723 | *p = cpu_to_be32(OP_WRITE); | 1655 | encode_open_stateid(xdr, args->context, args->lock_context, |
1724 | 1656 | FMODE_WRITE, hdr->minorversion); | |
1725 | encode_stateid(xdr, args->context, args->lock_context, | ||
1726 | hdr->minorversion); | ||
1727 | 1657 | ||
1728 | p = reserve_space(xdr, 16); | 1658 | p = reserve_space(xdr, 16); |
1729 | p = xdr_encode_hyper(p, args->offset); | 1659 | p = xdr_encode_hyper(p, args->offset); |
@@ -1731,32 +1661,18 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg | |||
1731 | *p = cpu_to_be32(args->count); | 1661 | *p = cpu_to_be32(args->count); |
1732 | 1662 | ||
1733 | xdr_write_pages(xdr, args->pages, args->pgbase, args->count); | 1663 | xdr_write_pages(xdr, args->pages, args->pgbase, args->count); |
1734 | hdr->nops++; | ||
1735 | hdr->replen += decode_write_maxsz; | ||
1736 | } | 1664 | } |
1737 | 1665 | ||
1738 | static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) | 1666 | static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) |
1739 | { | 1667 | { |
1740 | __be32 *p; | 1668 | encode_op_hdr(xdr, OP_DELEGRETURN, decode_delegreturn_maxsz, hdr); |
1741 | 1669 | encode_nfs4_stateid(xdr, stateid); | |
1742 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); | ||
1743 | |||
1744 | *p++ = cpu_to_be32(OP_DELEGRETURN); | ||
1745 | xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); | ||
1746 | hdr->nops++; | ||
1747 | hdr->replen += decode_delegreturn_maxsz; | ||
1748 | } | 1670 | } |
1749 | 1671 | ||
1750 | static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) | 1672 | static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) |
1751 | { | 1673 | { |
1752 | int len = name->len; | 1674 | encode_op_hdr(xdr, OP_SECINFO, decode_secinfo_maxsz, hdr); |
1753 | __be32 *p; | 1675 | encode_string(xdr, name->len, name->name); |
1754 | |||
1755 | p = reserve_space(xdr, 8 + len); | ||
1756 | *p++ = cpu_to_be32(OP_SECINFO); | ||
1757 | xdr_encode_opaque(p, name->name, len); | ||
1758 | hdr->nops++; | ||
1759 | hdr->replen += decode_secinfo_maxsz; | ||
1760 | } | 1676 | } |
1761 | 1677 | ||
1762 | #if defined(CONFIG_NFS_V4_1) | 1678 | #if defined(CONFIG_NFS_V4_1) |
@@ -1766,19 +1682,39 @@ static void encode_exchange_id(struct xdr_stream *xdr, | |||
1766 | struct compound_hdr *hdr) | 1682 | struct compound_hdr *hdr) |
1767 | { | 1683 | { |
1768 | __be32 *p; | 1684 | __be32 *p; |
1685 | char impl_name[NFS4_OPAQUE_LIMIT]; | ||
1686 | int len = 0; | ||
1769 | 1687 | ||
1770 | p = reserve_space(xdr, 4 + sizeof(args->verifier->data)); | 1688 | encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); |
1771 | *p++ = cpu_to_be32(OP_EXCHANGE_ID); | 1689 | encode_nfs4_verifier(xdr, args->verifier); |
1772 | xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data)); | ||
1773 | 1690 | ||
1774 | encode_string(xdr, args->id_len, args->id); | 1691 | encode_string(xdr, args->id_len, args->id); |
1775 | 1692 | ||
1776 | p = reserve_space(xdr, 12); | 1693 | p = reserve_space(xdr, 12); |
1777 | *p++ = cpu_to_be32(args->flags); | 1694 | *p++ = cpu_to_be32(args->flags); |
1778 | *p++ = cpu_to_be32(0); /* zero length state_protect4_a */ | 1695 | *p++ = cpu_to_be32(0); /* zero length state_protect4_a */ |
1779 | *p = cpu_to_be32(0); /* zero length implementation id array */ | 1696 | |
1780 | hdr->nops++; | 1697 | if (send_implementation_id && |
1781 | hdr->replen += decode_exchange_id_maxsz; | 1698 | sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 && |
1699 | sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) | ||
1700 | <= NFS4_OPAQUE_LIMIT + 1) | ||
1701 | len = snprintf(impl_name, sizeof(impl_name), "%s %s %s %s", | ||
1702 | utsname()->sysname, utsname()->release, | ||
1703 | utsname()->version, utsname()->machine); | ||
1704 | |||
1705 | if (len > 0) { | ||
1706 | *p = cpu_to_be32(1); /* implementation id array length=1 */ | ||
1707 | |||
1708 | encode_string(xdr, | ||
1709 | sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - 1, | ||
1710 | CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN); | ||
1711 | encode_string(xdr, len, impl_name); | ||
1712 | /* just send zeros for nii_date - the date is in nii_name */ | ||
1713 | p = reserve_space(xdr, 12); | ||
1714 | p = xdr_encode_hyper(p, 0); | ||
1715 | *p = cpu_to_be32(0); | ||
1716 | } else | ||
1717 | *p = cpu_to_be32(0); /* implementation id array length=0 */ | ||
1782 | } | 1718 | } |
1783 | 1719 | ||
1784 | static void encode_create_session(struct xdr_stream *xdr, | 1720 | static void encode_create_session(struct xdr_stream *xdr, |
@@ -1801,8 +1737,8 @@ static void encode_create_session(struct xdr_stream *xdr, | |||
1801 | len = scnprintf(machine_name, sizeof(machine_name), "%s", | 1737 | len = scnprintf(machine_name, sizeof(machine_name), "%s", |
1802 | clp->cl_ipaddr); | 1738 | clp->cl_ipaddr); |
1803 | 1739 | ||
1804 | p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); | 1740 | encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr); |
1805 | *p++ = cpu_to_be32(OP_CREATE_SESSION); | 1741 | p = reserve_space(xdr, 16 + 2*28 + 20 + len + 12); |
1806 | p = xdr_encode_hyper(p, clp->cl_clientid); | 1742 | p = xdr_encode_hyper(p, clp->cl_clientid); |
1807 | *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ | 1743 | *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ |
1808 | *p++ = cpu_to_be32(args->flags); /*flags */ | 1744 | *p++ = cpu_to_be32(args->flags); /*flags */ |
@@ -1835,33 +1771,22 @@ static void encode_create_session(struct xdr_stream *xdr, | |||
1835 | *p++ = cpu_to_be32(0); /* UID */ | 1771 | *p++ = cpu_to_be32(0); /* UID */ |
1836 | *p++ = cpu_to_be32(0); /* GID */ | 1772 | *p++ = cpu_to_be32(0); /* GID */ |
1837 | *p = cpu_to_be32(0); /* No more gids */ | 1773 | *p = cpu_to_be32(0); /* No more gids */ |
1838 | hdr->nops++; | ||
1839 | hdr->replen += decode_create_session_maxsz; | ||
1840 | } | 1774 | } |
1841 | 1775 | ||
1842 | static void encode_destroy_session(struct xdr_stream *xdr, | 1776 | static void encode_destroy_session(struct xdr_stream *xdr, |
1843 | struct nfs4_session *session, | 1777 | struct nfs4_session *session, |
1844 | struct compound_hdr *hdr) | 1778 | struct compound_hdr *hdr) |
1845 | { | 1779 | { |
1846 | __be32 *p; | 1780 | encode_op_hdr(xdr, OP_DESTROY_SESSION, decode_destroy_session_maxsz, hdr); |
1847 | p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN); | 1781 | encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); |
1848 | *p++ = cpu_to_be32(OP_DESTROY_SESSION); | ||
1849 | xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); | ||
1850 | hdr->nops++; | ||
1851 | hdr->replen += decode_destroy_session_maxsz; | ||
1852 | } | 1782 | } |
1853 | 1783 | ||
1854 | static void encode_reclaim_complete(struct xdr_stream *xdr, | 1784 | static void encode_reclaim_complete(struct xdr_stream *xdr, |
1855 | struct nfs41_reclaim_complete_args *args, | 1785 | struct nfs41_reclaim_complete_args *args, |
1856 | struct compound_hdr *hdr) | 1786 | struct compound_hdr *hdr) |
1857 | { | 1787 | { |
1858 | __be32 *p; | 1788 | encode_op_hdr(xdr, OP_RECLAIM_COMPLETE, decode_reclaim_complete_maxsz, hdr); |
1859 | 1789 | encode_uint32(xdr, args->one_fs); | |
1860 | p = reserve_space(xdr, 8); | ||
1861 | *p++ = cpu_to_be32(OP_RECLAIM_COMPLETE); | ||
1862 | *p++ = cpu_to_be32(args->one_fs); | ||
1863 | hdr->nops++; | ||
1864 | hdr->replen += decode_reclaim_complete_maxsz; | ||
1865 | } | 1790 | } |
1866 | #endif /* CONFIG_NFS_V4_1 */ | 1791 | #endif /* CONFIG_NFS_V4_1 */ |
1867 | 1792 | ||
@@ -1883,8 +1808,7 @@ static void encode_sequence(struct xdr_stream *xdr, | |||
1883 | WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); | 1808 | WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); |
1884 | slot = tp->slots + args->sa_slotid; | 1809 | slot = tp->slots + args->sa_slotid; |
1885 | 1810 | ||
1886 | p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16); | 1811 | encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr); |
1887 | *p++ = cpu_to_be32(OP_SEQUENCE); | ||
1888 | 1812 | ||
1889 | /* | 1813 | /* |
1890 | * Sessionid + seqid + slotid + max slotid + cache_this | 1814 | * Sessionid + seqid + slotid + max slotid + cache_this |
@@ -1898,13 +1822,12 @@ static void encode_sequence(struct xdr_stream *xdr, | |||
1898 | ((u32 *)session->sess_id.data)[3], | 1822 | ((u32 *)session->sess_id.data)[3], |
1899 | slot->seq_nr, args->sa_slotid, | 1823 | slot->seq_nr, args->sa_slotid, |
1900 | tp->highest_used_slotid, args->sa_cache_this); | 1824 | tp->highest_used_slotid, args->sa_cache_this); |
1825 | p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16); | ||
1901 | p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); | 1826 | p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); |
1902 | *p++ = cpu_to_be32(slot->seq_nr); | 1827 | *p++ = cpu_to_be32(slot->seq_nr); |
1903 | *p++ = cpu_to_be32(args->sa_slotid); | 1828 | *p++ = cpu_to_be32(args->sa_slotid); |
1904 | *p++ = cpu_to_be32(tp->highest_used_slotid); | 1829 | *p++ = cpu_to_be32(tp->highest_used_slotid); |
1905 | *p = cpu_to_be32(args->sa_cache_this); | 1830 | *p = cpu_to_be32(args->sa_cache_this); |
1906 | hdr->nops++; | ||
1907 | hdr->replen += decode_sequence_maxsz; | ||
1908 | #endif /* CONFIG_NFS_V4_1 */ | 1831 | #endif /* CONFIG_NFS_V4_1 */ |
1909 | } | 1832 | } |
1910 | 1833 | ||
@@ -1919,14 +1842,12 @@ encode_getdevicelist(struct xdr_stream *xdr, | |||
1919 | .data = "dummmmmy", | 1842 | .data = "dummmmmy", |
1920 | }; | 1843 | }; |
1921 | 1844 | ||
1922 | p = reserve_space(xdr, 20); | 1845 | encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr); |
1923 | *p++ = cpu_to_be32(OP_GETDEVICELIST); | 1846 | p = reserve_space(xdr, 16); |
1924 | *p++ = cpu_to_be32(args->layoutclass); | 1847 | *p++ = cpu_to_be32(args->layoutclass); |
1925 | *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); | 1848 | *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); |
1926 | xdr_encode_hyper(p, 0ULL); /* cookie */ | 1849 | xdr_encode_hyper(p, 0ULL); /* cookie */ |
1927 | encode_nfs4_verifier(xdr, &dummy); | 1850 | encode_nfs4_verifier(xdr, &dummy); |
1928 | hdr->nops++; | ||
1929 | hdr->replen += decode_getdevicelist_maxsz; | ||
1930 | } | 1851 | } |
1931 | 1852 | ||
1932 | static void | 1853 | static void |
@@ -1936,15 +1857,13 @@ encode_getdeviceinfo(struct xdr_stream *xdr, | |||
1936 | { | 1857 | { |
1937 | __be32 *p; | 1858 | __be32 *p; |
1938 | 1859 | ||
1939 | p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE); | 1860 | encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr); |
1940 | *p++ = cpu_to_be32(OP_GETDEVICEINFO); | 1861 | p = reserve_space(xdr, 12 + NFS4_DEVICEID4_SIZE); |
1941 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, | 1862 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, |
1942 | NFS4_DEVICEID4_SIZE); | 1863 | NFS4_DEVICEID4_SIZE); |
1943 | *p++ = cpu_to_be32(args->pdev->layout_type); | 1864 | *p++ = cpu_to_be32(args->pdev->layout_type); |
1944 | *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ | 1865 | *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ |
1945 | *p++ = cpu_to_be32(0); /* bitmap length 0 */ | 1866 | *p++ = cpu_to_be32(0); /* bitmap length 0 */ |
1946 | hdr->nops++; | ||
1947 | hdr->replen += decode_getdeviceinfo_maxsz; | ||
1948 | } | 1867 | } |
1949 | 1868 | ||
1950 | static void | 1869 | static void |
@@ -1954,16 +1873,16 @@ encode_layoutget(struct xdr_stream *xdr, | |||
1954 | { | 1873 | { |
1955 | __be32 *p; | 1874 | __be32 *p; |
1956 | 1875 | ||
1957 | p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); | 1876 | encode_op_hdr(xdr, OP_LAYOUTGET, decode_layoutget_maxsz, hdr); |
1958 | *p++ = cpu_to_be32(OP_LAYOUTGET); | 1877 | p = reserve_space(xdr, 36); |
1959 | *p++ = cpu_to_be32(0); /* Signal layout available */ | 1878 | *p++ = cpu_to_be32(0); /* Signal layout available */ |
1960 | *p++ = cpu_to_be32(args->type); | 1879 | *p++ = cpu_to_be32(args->type); |
1961 | *p++ = cpu_to_be32(args->range.iomode); | 1880 | *p++ = cpu_to_be32(args->range.iomode); |
1962 | p = xdr_encode_hyper(p, args->range.offset); | 1881 | p = xdr_encode_hyper(p, args->range.offset); |
1963 | p = xdr_encode_hyper(p, args->range.length); | 1882 | p = xdr_encode_hyper(p, args->range.length); |
1964 | p = xdr_encode_hyper(p, args->minlength); | 1883 | p = xdr_encode_hyper(p, args->minlength); |
1965 | p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); | 1884 | encode_nfs4_stateid(xdr, &args->stateid); |
1966 | *p = cpu_to_be32(args->maxcount); | 1885 | encode_uint32(xdr, args->maxcount); |
1967 | 1886 | ||
1968 | dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", | 1887 | dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", |
1969 | __func__, | 1888 | __func__, |
@@ -1972,8 +1891,6 @@ encode_layoutget(struct xdr_stream *xdr, | |||
1972 | (unsigned long)args->range.offset, | 1891 | (unsigned long)args->range.offset, |
1973 | (unsigned long)args->range.length, | 1892 | (unsigned long)args->range.length, |
1974 | args->maxcount); | 1893 | args->maxcount); |
1975 | hdr->nops++; | ||
1976 | hdr->replen += decode_layoutget_maxsz; | ||
1977 | } | 1894 | } |
1978 | 1895 | ||
1979 | static int | 1896 | static int |
@@ -1987,13 +1904,14 @@ encode_layoutcommit(struct xdr_stream *xdr, | |||
1987 | dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, | 1904 | dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, |
1988 | NFS_SERVER(args->inode)->pnfs_curr_ld->id); | 1905 | NFS_SERVER(args->inode)->pnfs_curr_ld->id); |
1989 | 1906 | ||
1990 | p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); | 1907 | encode_op_hdr(xdr, OP_LAYOUTCOMMIT, decode_layoutcommit_maxsz, hdr); |
1991 | *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); | 1908 | p = reserve_space(xdr, 20); |
1992 | /* Only whole file layouts */ | 1909 | /* Only whole file layouts */ |
1993 | p = xdr_encode_hyper(p, 0); /* offset */ | 1910 | p = xdr_encode_hyper(p, 0); /* offset */ |
1994 | p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */ | 1911 | p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */ |
1995 | *p++ = cpu_to_be32(0); /* reclaim */ | 1912 | *p = cpu_to_be32(0); /* reclaim */ |
1996 | p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); | 1913 | encode_nfs4_stateid(xdr, &args->stateid); |
1914 | p = reserve_space(xdr, 20); | ||
1997 | *p++ = cpu_to_be32(1); /* newoffset = TRUE */ | 1915 | *p++ = cpu_to_be32(1); /* newoffset = TRUE */ |
1998 | p = xdr_encode_hyper(p, args->lastbytewritten); | 1916 | p = xdr_encode_hyper(p, args->lastbytewritten); |
1999 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ | 1917 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ |
@@ -2002,13 +1920,9 @@ encode_layoutcommit(struct xdr_stream *xdr, | |||
2002 | if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) | 1920 | if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) |
2003 | NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( | 1921 | NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( |
2004 | NFS_I(inode)->layout, xdr, args); | 1922 | NFS_I(inode)->layout, xdr, args); |
2005 | else { | 1923 | else |
2006 | p = reserve_space(xdr, 4); | 1924 | encode_uint32(xdr, 0); /* no layout-type payload */ |
2007 | *p = cpu_to_be32(0); /* no layout-type payload */ | ||
2008 | } | ||
2009 | 1925 | ||
2010 | hdr->nops++; | ||
2011 | hdr->replen += decode_layoutcommit_maxsz; | ||
2012 | return 0; | 1926 | return 0; |
2013 | } | 1927 | } |
2014 | 1928 | ||
@@ -2019,27 +1933,23 @@ encode_layoutreturn(struct xdr_stream *xdr, | |||
2019 | { | 1933 | { |
2020 | __be32 *p; | 1934 | __be32 *p; |
2021 | 1935 | ||
2022 | p = reserve_space(xdr, 20); | 1936 | encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr); |
2023 | *p++ = cpu_to_be32(OP_LAYOUTRETURN); | 1937 | p = reserve_space(xdr, 16); |
2024 | *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ | 1938 | *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ |
2025 | *p++ = cpu_to_be32(args->layout_type); | 1939 | *p++ = cpu_to_be32(args->layout_type); |
2026 | *p++ = cpu_to_be32(IOMODE_ANY); | 1940 | *p++ = cpu_to_be32(IOMODE_ANY); |
2027 | *p = cpu_to_be32(RETURN_FILE); | 1941 | *p = cpu_to_be32(RETURN_FILE); |
2028 | p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); | 1942 | p = reserve_space(xdr, 16); |
2029 | p = xdr_encode_hyper(p, 0); | 1943 | p = xdr_encode_hyper(p, 0); |
2030 | p = xdr_encode_hyper(p, NFS4_MAX_UINT64); | 1944 | p = xdr_encode_hyper(p, NFS4_MAX_UINT64); |
2031 | spin_lock(&args->inode->i_lock); | 1945 | spin_lock(&args->inode->i_lock); |
2032 | xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); | 1946 | encode_nfs4_stateid(xdr, &args->stateid); |
2033 | spin_unlock(&args->inode->i_lock); | 1947 | spin_unlock(&args->inode->i_lock); |
2034 | if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { | 1948 | if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { |
2035 | NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( | 1949 | NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( |
2036 | NFS_I(args->inode)->layout, xdr, args); | 1950 | NFS_I(args->inode)->layout, xdr, args); |
2037 | } else { | 1951 | } else |
2038 | p = reserve_space(xdr, 4); | 1952 | encode_uint32(xdr, 0); |
2039 | *p = cpu_to_be32(0); | ||
2040 | } | ||
2041 | hdr->nops++; | ||
2042 | hdr->replen += decode_layoutreturn_maxsz; | ||
2043 | } | 1953 | } |
2044 | 1954 | ||
2045 | static int | 1955 | static int |
@@ -2047,12 +1957,8 @@ encode_secinfo_no_name(struct xdr_stream *xdr, | |||
2047 | const struct nfs41_secinfo_no_name_args *args, | 1957 | const struct nfs41_secinfo_no_name_args *args, |
2048 | struct compound_hdr *hdr) | 1958 | struct compound_hdr *hdr) |
2049 | { | 1959 | { |
2050 | __be32 *p; | 1960 | encode_op_hdr(xdr, OP_SECINFO_NO_NAME, decode_secinfo_no_name_maxsz, hdr); |
2051 | p = reserve_space(xdr, 8); | 1961 | encode_uint32(xdr, args->style); |
2052 | *p++ = cpu_to_be32(OP_SECINFO_NO_NAME); | ||
2053 | *p++ = cpu_to_be32(args->style); | ||
2054 | hdr->nops++; | ||
2055 | hdr->replen += decode_secinfo_no_name_maxsz; | ||
2056 | return 0; | 1962 | return 0; |
2057 | } | 1963 | } |
2058 | 1964 | ||
@@ -2060,26 +1966,17 @@ static void encode_test_stateid(struct xdr_stream *xdr, | |||
2060 | struct nfs41_test_stateid_args *args, | 1966 | struct nfs41_test_stateid_args *args, |
2061 | struct compound_hdr *hdr) | 1967 | struct compound_hdr *hdr) |
2062 | { | 1968 | { |
2063 | __be32 *p; | 1969 | encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr); |
2064 | 1970 | encode_uint32(xdr, 1); | |
2065 | p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE); | 1971 | encode_nfs4_stateid(xdr, args->stateid); |
2066 | *p++ = cpu_to_be32(OP_TEST_STATEID); | ||
2067 | *p++ = cpu_to_be32(1); | ||
2068 | xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); | ||
2069 | hdr->nops++; | ||
2070 | hdr->replen += decode_test_stateid_maxsz; | ||
2071 | } | 1972 | } |
2072 | 1973 | ||
2073 | static void encode_free_stateid(struct xdr_stream *xdr, | 1974 | static void encode_free_stateid(struct xdr_stream *xdr, |
2074 | struct nfs41_free_stateid_args *args, | 1975 | struct nfs41_free_stateid_args *args, |
2075 | struct compound_hdr *hdr) | 1976 | struct compound_hdr *hdr) |
2076 | { | 1977 | { |
2077 | __be32 *p; | 1978 | encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr); |
2078 | p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE); | 1979 | encode_nfs4_stateid(xdr, args->stateid); |
2079 | *p++ = cpu_to_be32(OP_FREE_STATEID); | ||
2080 | xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); | ||
2081 | hdr->nops++; | ||
2082 | hdr->replen += decode_free_stateid_maxsz; | ||
2083 | } | 1980 | } |
2084 | #endif /* CONFIG_NFS_V4_1 */ | 1981 | #endif /* CONFIG_NFS_V4_1 */ |
2085 | 1982 | ||
@@ -2633,6 +2530,7 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req, | |||
2633 | encode_sequence(xdr, &args->seq_args, &hdr); | 2530 | encode_sequence(xdr, &args->seq_args, &hdr); |
2634 | encode_putfh(xdr, args->fhandle, &hdr); | 2531 | encode_putfh(xdr, args->fhandle, &hdr); |
2635 | encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS| | 2532 | encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS| |
2533 | FATTR4_WORD0_FH_EXPIRE_TYPE| | ||
2636 | FATTR4_WORD0_LINK_SUPPORT| | 2534 | FATTR4_WORD0_LINK_SUPPORT| |
2637 | FATTR4_WORD0_SYMLINK_SUPPORT| | 2535 | FATTR4_WORD0_SYMLINK_SUPPORT| |
2638 | FATTR4_WORD0_ACLSUPPORT, &hdr); | 2536 | FATTR4_WORD0_ACLSUPPORT, &hdr); |
@@ -2650,7 +2548,7 @@ static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
2650 | }; | 2548 | }; |
2651 | 2549 | ||
2652 | encode_compound_hdr(xdr, req, &hdr); | 2550 | encode_compound_hdr(xdr, req, &hdr); |
2653 | encode_renew(xdr, clp, &hdr); | 2551 | encode_renew(xdr, clp->cl_clientid, &hdr); |
2654 | encode_nops(&hdr); | 2552 | encode_nops(&hdr); |
2655 | } | 2553 | } |
2656 | 2554 | ||
@@ -3180,6 +3078,28 @@ out_overflow: | |||
3180 | return -EIO; | 3078 | return -EIO; |
3181 | } | 3079 | } |
3182 | 3080 | ||
3081 | static int decode_attr_fh_expire_type(struct xdr_stream *xdr, | ||
3082 | uint32_t *bitmap, uint32_t *type) | ||
3083 | { | ||
3084 | __be32 *p; | ||
3085 | |||
3086 | *type = 0; | ||
3087 | if (unlikely(bitmap[0] & (FATTR4_WORD0_FH_EXPIRE_TYPE - 1U))) | ||
3088 | return -EIO; | ||
3089 | if (likely(bitmap[0] & FATTR4_WORD0_FH_EXPIRE_TYPE)) { | ||
3090 | p = xdr_inline_decode(xdr, 4); | ||
3091 | if (unlikely(!p)) | ||
3092 | goto out_overflow; | ||
3093 | *type = be32_to_cpup(p); | ||
3094 | bitmap[0] &= ~FATTR4_WORD0_FH_EXPIRE_TYPE; | ||
3095 | } | ||
3096 | dprintk("%s: expire type=0x%x\n", __func__, *type); | ||
3097 | return 0; | ||
3098 | out_overflow: | ||
3099 | print_overflow_msg(__func__, xdr); | ||
3100 | return -EIO; | ||
3101 | } | ||
3102 | |||
3183 | static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) | 3103 | static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) |
3184 | { | 3104 | { |
3185 | __be32 *p; | 3105 | __be32 *p; |
@@ -3513,16 +3433,17 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) | |||
3513 | n = be32_to_cpup(p); | 3433 | n = be32_to_cpup(p); |
3514 | if (n == 0) | 3434 | if (n == 0) |
3515 | goto root_path; | 3435 | goto root_path; |
3516 | dprintk("path "); | 3436 | dprintk("pathname4: "); |
3517 | path->ncomponents = 0; | 3437 | path->ncomponents = 0; |
3518 | while (path->ncomponents < n) { | 3438 | while (path->ncomponents < n) { |
3519 | struct nfs4_string *component = &path->components[path->ncomponents]; | 3439 | struct nfs4_string *component = &path->components[path->ncomponents]; |
3520 | status = decode_opaque_inline(xdr, &component->len, &component->data); | 3440 | status = decode_opaque_inline(xdr, &component->len, &component->data); |
3521 | if (unlikely(status != 0)) | 3441 | if (unlikely(status != 0)) |
3522 | goto out_eio; | 3442 | goto out_eio; |
3523 | if (path->ncomponents != n) | 3443 | ifdebug (XDR) |
3524 | dprintk("/"); | 3444 | pr_cont("%s%.*s ", |
3525 | dprintk("%s", component->data); | 3445 | (path->ncomponents != n ? "/ " : ""), |
3446 | component->len, component->data); | ||
3526 | if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS) | 3447 | if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS) |
3527 | path->ncomponents++; | 3448 | path->ncomponents++; |
3528 | else { | 3449 | else { |
@@ -3531,14 +3452,13 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) | |||
3531 | } | 3452 | } |
3532 | } | 3453 | } |
3533 | out: | 3454 | out: |
3534 | dprintk("\n"); | ||
3535 | return status; | 3455 | return status; |
3536 | root_path: | 3456 | root_path: |
3537 | /* a root pathname is sent as a zero component4 */ | 3457 | /* a root pathname is sent as a zero component4 */ |
3538 | path->ncomponents = 1; | 3458 | path->ncomponents = 1; |
3539 | path->components[0].len=0; | 3459 | path->components[0].len=0; |
3540 | path->components[0].data=NULL; | 3460 | path->components[0].data=NULL; |
3541 | dprintk("path /\n"); | 3461 | dprintk("pathname4: /\n"); |
3542 | goto out; | 3462 | goto out; |
3543 | out_eio: | 3463 | out_eio: |
3544 | dprintk(" status %d", status); | 3464 | dprintk(" status %d", status); |
@@ -3560,7 +3480,11 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st | |||
3560 | status = 0; | 3480 | status = 0; |
3561 | if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) | 3481 | if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) |
3562 | goto out; | 3482 | goto out; |
3563 | dprintk("%s: fsroot ", __func__); | 3483 | status = -EIO; |
3484 | /* Ignore borken servers that return unrequested attrs */ | ||
3485 | if (unlikely(res == NULL)) | ||
3486 | goto out; | ||
3487 | dprintk("%s: fsroot:\n", __func__); | ||
3564 | status = decode_pathname(xdr, &res->fs_path); | 3488 | status = decode_pathname(xdr, &res->fs_path); |
3565 | if (unlikely(status != 0)) | 3489 | if (unlikely(status != 0)) |
3566 | goto out; | 3490 | goto out; |
@@ -3581,7 +3505,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st | |||
3581 | m = be32_to_cpup(p); | 3505 | m = be32_to_cpup(p); |
3582 | 3506 | ||
3583 | loc->nservers = 0; | 3507 | loc->nservers = 0; |
3584 | dprintk("%s: servers ", __func__); | 3508 | dprintk("%s: servers:\n", __func__); |
3585 | while (loc->nservers < m) { | 3509 | while (loc->nservers < m) { |
3586 | struct nfs4_string *server = &loc->servers[loc->nservers]; | 3510 | struct nfs4_string *server = &loc->servers[loc->nservers]; |
3587 | status = decode_opaque_inline(xdr, &server->len, &server->data); | 3511 | status = decode_opaque_inline(xdr, &server->len, &server->data); |
@@ -3613,7 +3537,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st | |||
3613 | res->nlocations++; | 3537 | res->nlocations++; |
3614 | } | 3538 | } |
3615 | if (res->nlocations != 0) | 3539 | if (res->nlocations != 0) |
3616 | status = NFS_ATTR_FATTR_V4_REFERRAL; | 3540 | status = NFS_ATTR_FATTR_V4_LOCATIONS; |
3617 | out: | 3541 | out: |
3618 | dprintk("%s: fs_locations done, error = %d\n", __func__, status); | 3542 | dprintk("%s: fs_locations done, error = %d\n", __func__, status); |
3619 | return status; | 3543 | return status; |
@@ -4157,7 +4081,7 @@ static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len) | |||
4157 | 4081 | ||
4158 | static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | 4082 | static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) |
4159 | { | 4083 | { |
4160 | return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); | 4084 | return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); |
4161 | } | 4085 | } |
4162 | 4086 | ||
4163 | static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) | 4087 | static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) |
@@ -4174,7 +4098,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) | |||
4174 | 4098 | ||
4175 | static int decode_verifier(struct xdr_stream *xdr, void *verifier) | 4099 | static int decode_verifier(struct xdr_stream *xdr, void *verifier) |
4176 | { | 4100 | { |
4177 | return decode_opaque_fixed(xdr, verifier, 8); | 4101 | return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE); |
4178 | } | 4102 | } |
4179 | 4103 | ||
4180 | static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) | 4104 | static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) |
@@ -4224,6 +4148,9 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re | |||
4224 | goto xdr_error; | 4148 | goto xdr_error; |
4225 | if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0) | 4149 | if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0) |
4226 | goto xdr_error; | 4150 | goto xdr_error; |
4151 | if ((status = decode_attr_fh_expire_type(xdr, bitmap, | ||
4152 | &res->fh_expire_type)) != 0) | ||
4153 | goto xdr_error; | ||
4227 | if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0) | 4154 | if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0) |
4228 | goto xdr_error; | 4155 | goto xdr_error; |
4229 | if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0) | 4156 | if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0) |
@@ -4294,6 +4221,7 @@ xdr_error: | |||
4294 | 4221 | ||
4295 | static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, | 4222 | static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, |
4296 | struct nfs_fattr *fattr, struct nfs_fh *fh, | 4223 | struct nfs_fattr *fattr, struct nfs_fh *fh, |
4224 | struct nfs4_fs_locations *fs_loc, | ||
4297 | const struct nfs_server *server) | 4225 | const struct nfs_server *server) |
4298 | { | 4226 | { |
4299 | int status; | 4227 | int status; |
@@ -4341,9 +4269,7 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, | |||
4341 | goto xdr_error; | 4269 | goto xdr_error; |
4342 | fattr->valid |= status; | 4270 | fattr->valid |= status; |
4343 | 4271 | ||
4344 | status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, | 4272 | status = decode_attr_fs_locations(xdr, bitmap, fs_loc); |
4345 | struct nfs4_fs_locations, | ||
4346 | fattr)); | ||
4347 | if (status < 0) | 4273 | if (status < 0) |
4348 | goto xdr_error; | 4274 | goto xdr_error; |
4349 | fattr->valid |= status; | 4275 | fattr->valid |= status; |
@@ -4407,7 +4333,8 @@ xdr_error: | |||
4407 | } | 4333 | } |
4408 | 4334 | ||
4409 | static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, | 4335 | static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, |
4410 | struct nfs_fh *fh, const struct nfs_server *server) | 4336 | struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, |
4337 | const struct nfs_server *server) | ||
4411 | { | 4338 | { |
4412 | __be32 *savep; | 4339 | __be32 *savep; |
4413 | uint32_t attrlen, | 4340 | uint32_t attrlen, |
@@ -4426,7 +4353,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat | |||
4426 | if (status < 0) | 4353 | if (status < 0) |
4427 | goto xdr_error; | 4354 | goto xdr_error; |
4428 | 4355 | ||
4429 | status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server); | 4356 | status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server); |
4430 | if (status < 0) | 4357 | if (status < 0) |
4431 | goto xdr_error; | 4358 | goto xdr_error; |
4432 | 4359 | ||
@@ -4439,7 +4366,7 @@ xdr_error: | |||
4439 | static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, | 4366 | static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, |
4440 | const struct nfs_server *server) | 4367 | const struct nfs_server *server) |
4441 | { | 4368 | { |
4442 | return decode_getfattr_generic(xdr, fattr, NULL, server); | 4369 | return decode_getfattr_generic(xdr, fattr, NULL, NULL, server); |
4443 | } | 4370 | } |
4444 | 4371 | ||
4445 | /* | 4372 | /* |
@@ -4463,8 +4390,8 @@ static int decode_first_pnfs_layout_type(struct xdr_stream *xdr, | |||
4463 | return 0; | 4390 | return 0; |
4464 | } | 4391 | } |
4465 | if (num > 1) | 4392 | if (num > 1) |
4466 | printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers " | 4393 | printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout " |
4467 | "per filesystem not supported\n", __func__); | 4394 | "drivers per filesystem not supported\n", __func__); |
4468 | 4395 | ||
4469 | /* Decode and set first layout type, move xdr->p past unused types */ | 4396 | /* Decode and set first layout type, move xdr->p past unused types */ |
4470 | p = xdr_inline_decode(xdr, num * 4); | 4397 | p = xdr_inline_decode(xdr, num * 4); |
@@ -4863,17 +4790,16 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n | |||
4863 | size_t hdrlen; | 4790 | size_t hdrlen; |
4864 | u32 recvd, pglen = rcvbuf->page_len; | 4791 | u32 recvd, pglen = rcvbuf->page_len; |
4865 | int status; | 4792 | int status; |
4793 | __be32 verf[2]; | ||
4866 | 4794 | ||
4867 | status = decode_op_hdr(xdr, OP_READDIR); | 4795 | status = decode_op_hdr(xdr, OP_READDIR); |
4868 | if (!status) | 4796 | if (!status) |
4869 | status = decode_verifier(xdr, readdir->verifier.data); | 4797 | status = decode_verifier(xdr, readdir->verifier.data); |
4870 | if (unlikely(status)) | 4798 | if (unlikely(status)) |
4871 | return status; | 4799 | return status; |
4800 | memcpy(verf, readdir->verifier.data, sizeof(verf)); | ||
4872 | dprintk("%s: verifier = %08x:%08x\n", | 4801 | dprintk("%s: verifier = %08x:%08x\n", |
4873 | __func__, | 4802 | __func__, verf[0], verf[1]); |
4874 | ((u32 *)readdir->verifier.data)[0], | ||
4875 | ((u32 *)readdir->verifier.data)[1]); | ||
4876 | |||
4877 | 4803 | ||
4878 | hdrlen = (char *) xdr->p - (char *) iov->iov_base; | 4804 | hdrlen = (char *) xdr->p - (char *) iov->iov_base; |
4879 | recvd = rcvbuf->len - hdrlen; | 4805 | recvd = rcvbuf->len - hdrlen; |
@@ -5120,7 +5046,7 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) | |||
5120 | goto out_overflow; | 5046 | goto out_overflow; |
5121 | res->count = be32_to_cpup(p++); | 5047 | res->count = be32_to_cpup(p++); |
5122 | res->verf->committed = be32_to_cpup(p++); | 5048 | res->verf->committed = be32_to_cpup(p++); |
5123 | memcpy(res->verf->verifier, p, 8); | 5049 | memcpy(res->verf->verifier, p, NFS4_VERIFIER_SIZE); |
5124 | return 0; | 5050 | return 0; |
5125 | out_overflow: | 5051 | out_overflow: |
5126 | print_overflow_msg(__func__, xdr); | 5052 | print_overflow_msg(__func__, xdr); |
@@ -5214,6 +5140,7 @@ static int decode_exchange_id(struct xdr_stream *xdr, | |||
5214 | char *dummy_str; | 5140 | char *dummy_str; |
5215 | int status; | 5141 | int status; |
5216 | struct nfs_client *clp = res->client; | 5142 | struct nfs_client *clp = res->client; |
5143 | uint32_t impl_id_count; | ||
5217 | 5144 | ||
5218 | status = decode_op_hdr(xdr, OP_EXCHANGE_ID); | 5145 | status = decode_op_hdr(xdr, OP_EXCHANGE_ID); |
5219 | if (status) | 5146 | if (status) |
@@ -5255,11 +5182,38 @@ static int decode_exchange_id(struct xdr_stream *xdr, | |||
5255 | memcpy(res->server_scope->server_scope, dummy_str, dummy); | 5182 | memcpy(res->server_scope->server_scope, dummy_str, dummy); |
5256 | res->server_scope->server_scope_sz = dummy; | 5183 | res->server_scope->server_scope_sz = dummy; |
5257 | 5184 | ||
5258 | /* Throw away Implementation id array */ | 5185 | /* Implementation Id */ |
5259 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); | 5186 | p = xdr_inline_decode(xdr, 4); |
5260 | if (unlikely(status)) | 5187 | if (unlikely(!p)) |
5261 | return status; | 5188 | goto out_overflow; |
5189 | impl_id_count = be32_to_cpup(p++); | ||
5190 | |||
5191 | if (impl_id_count) { | ||
5192 | /* nii_domain */ | ||
5193 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); | ||
5194 | if (unlikely(status)) | ||
5195 | return status; | ||
5196 | if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) | ||
5197 | return -EIO; | ||
5198 | memcpy(res->impl_id->domain, dummy_str, dummy); | ||
5262 | 5199 | ||
5200 | /* nii_name */ | ||
5201 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); | ||
5202 | if (unlikely(status)) | ||
5203 | return status; | ||
5204 | if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) | ||
5205 | return -EIO; | ||
5206 | memcpy(res->impl_id->name, dummy_str, dummy); | ||
5207 | |||
5208 | /* nii_date */ | ||
5209 | p = xdr_inline_decode(xdr, 12); | ||
5210 | if (unlikely(!p)) | ||
5211 | goto out_overflow; | ||
5212 | p = xdr_decode_hyper(p, &res->impl_id->date.seconds); | ||
5213 | res->impl_id->date.nseconds = be32_to_cpup(p); | ||
5214 | |||
5215 | /* if there's more than one entry, ignore the rest */ | ||
5216 | } | ||
5263 | return 0; | 5217 | return 0; |
5264 | out_overflow: | 5218 | out_overflow: |
5265 | print_overflow_msg(__func__, xdr); | 5219 | print_overflow_msg(__func__, xdr); |
@@ -5285,8 +5239,8 @@ static int decode_chan_attrs(struct xdr_stream *xdr, | |||
5285 | attrs->max_reqs = be32_to_cpup(p++); | 5239 | attrs->max_reqs = be32_to_cpup(p++); |
5286 | nr_attrs = be32_to_cpup(p); | 5240 | nr_attrs = be32_to_cpup(p); |
5287 | if (unlikely(nr_attrs > 1)) { | 5241 | if (unlikely(nr_attrs > 1)) { |
5288 | printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", | 5242 | printk(KERN_WARNING "NFS: %s: Invalid rdma channel attrs " |
5289 | __func__, nr_attrs); | 5243 | "count %u\n", __func__, nr_attrs); |
5290 | return -EINVAL; | 5244 | return -EINVAL; |
5291 | } | 5245 | } |
5292 | if (nr_attrs == 1) { | 5246 | if (nr_attrs == 1) { |
@@ -5436,14 +5390,14 @@ static int decode_getdevicelist(struct xdr_stream *xdr, | |||
5436 | p += 2; | 5390 | p += 2; |
5437 | 5391 | ||
5438 | /* Read verifier */ | 5392 | /* Read verifier */ |
5439 | p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8); | 5393 | p = xdr_decode_opaque_fixed(p, verftemp.verifier, NFS4_VERIFIER_SIZE); |
5440 | 5394 | ||
5441 | res->num_devs = be32_to_cpup(p); | 5395 | res->num_devs = be32_to_cpup(p); |
5442 | 5396 | ||
5443 | dprintk("%s: num_dev %d\n", __func__, res->num_devs); | 5397 | dprintk("%s: num_dev %d\n", __func__, res->num_devs); |
5444 | 5398 | ||
5445 | if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { | 5399 | if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { |
5446 | printk(KERN_ERR "%s too many result dev_num %u\n", | 5400 | printk(KERN_ERR "NFS: %s too many result dev_num %u\n", |
5447 | __func__, res->num_devs); | 5401 | __func__, res->num_devs); |
5448 | return -EIO; | 5402 | return -EIO; |
5449 | } | 5403 | } |
@@ -5537,11 +5491,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
5537 | status = decode_op_hdr(xdr, OP_LAYOUTGET); | 5491 | status = decode_op_hdr(xdr, OP_LAYOUTGET); |
5538 | if (status) | 5492 | if (status) |
5539 | return status; | 5493 | return status; |
5540 | p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE); | 5494 | p = xdr_inline_decode(xdr, 4); |
5495 | if (unlikely(!p)) | ||
5496 | goto out_overflow; | ||
5497 | res->return_on_close = be32_to_cpup(p); | ||
5498 | decode_stateid(xdr, &res->stateid); | ||
5499 | p = xdr_inline_decode(xdr, 4); | ||
5541 | if (unlikely(!p)) | 5500 | if (unlikely(!p)) |
5542 | goto out_overflow; | 5501 | goto out_overflow; |
5543 | res->return_on_close = be32_to_cpup(p++); | ||
5544 | p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE); | ||
5545 | layout_count = be32_to_cpup(p); | 5502 | layout_count = be32_to_cpup(p); |
5546 | if (!layout_count) { | 5503 | if (!layout_count) { |
5547 | dprintk("%s: server responded with empty layout array\n", | 5504 | dprintk("%s: server responded with empty layout array\n", |
@@ -5666,7 +5623,8 @@ static int decode_test_stateid(struct xdr_stream *xdr, | |||
5666 | if (unlikely(!p)) | 5623 | if (unlikely(!p)) |
5667 | goto out_overflow; | 5624 | goto out_overflow; |
5668 | res->status = be32_to_cpup(p++); | 5625 | res->status = be32_to_cpup(p++); |
5669 | return res->status; | 5626 | |
5627 | return status; | ||
5670 | out_overflow: | 5628 | out_overflow: |
5671 | print_overflow_msg(__func__, xdr); | 5629 | print_overflow_msg(__func__, xdr); |
5672 | out: | 5630 | out: |
@@ -6583,8 +6541,9 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, | |||
6583 | if (status) | 6541 | if (status) |
6584 | goto out; | 6542 | goto out; |
6585 | xdr_enter_page(xdr, PAGE_SIZE); | 6543 | xdr_enter_page(xdr, PAGE_SIZE); |
6586 | status = decode_getfattr(xdr, &res->fs_locations->fattr, | 6544 | status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, |
6587 | res->fs_locations->server); | 6545 | NULL, res->fs_locations, |
6546 | res->fs_locations->server); | ||
6588 | out: | 6547 | out: |
6589 | return status; | 6548 | return status; |
6590 | } | 6549 | } |
@@ -6964,7 +6923,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | |||
6964 | goto out_overflow; | 6923 | goto out_overflow; |
6965 | 6924 | ||
6966 | if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, | 6925 | if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, |
6967 | entry->server) < 0) | 6926 | NULL, entry->server) < 0) |
6968 | goto out_overflow; | 6927 | goto out_overflow; |
6969 | if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) | 6928 | if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) |
6970 | entry->ino = entry->fattr->mounted_on_fileid; | 6929 | entry->ino = entry->fattr->mounted_on_fileid; |
@@ -7112,7 +7071,7 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
7112 | #endif /* CONFIG_NFS_V4_1 */ | 7071 | #endif /* CONFIG_NFS_V4_1 */ |
7113 | }; | 7072 | }; |
7114 | 7073 | ||
7115 | struct rpc_version nfs_version4 = { | 7074 | const struct rpc_version nfs_version4 = { |
7116 | .number = 4, | 7075 | .number = 4, |
7117 | .nrprocs = ARRAY_SIZE(nfs4_procedures), | 7076 | .nrprocs = ARRAY_SIZE(nfs4_procedures), |
7118 | .procs = nfs4_procedures | 7077 | .procs = nfs4_procedures |
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index c4744e1d513c..cd3c910d2d12 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c | |||
@@ -104,7 +104,7 @@ static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = ""; | |||
104 | /* server:export path string passed to super.c */ | 104 | /* server:export path string passed to super.c */ |
105 | static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = ""; | 105 | static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = ""; |
106 | 106 | ||
107 | #ifdef RPC_DEBUG | 107 | #ifdef NFS_DEBUG |
108 | /* | 108 | /* |
109 | * When the "nfsrootdebug" kernel command line option is specified, | 109 | * When the "nfsrootdebug" kernel command line option is specified, |
110 | * enable debugging messages for NFSROOT. | 110 | * enable debugging messages for NFSROOT. |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 55d01280a609..4bff4a3dab46 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -137,6 +137,7 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, | |||
137 | struct objio_dev_ent *ode; | 137 | struct objio_dev_ent *ode; |
138 | struct osd_dev *od; | 138 | struct osd_dev *od; |
139 | struct osd_dev_info odi; | 139 | struct osd_dev_info odi; |
140 | bool retry_flag = true; | ||
140 | int err; | 141 | int err; |
141 | 142 | ||
142 | ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); | 143 | ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); |
@@ -171,10 +172,18 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, | |||
171 | goto out; | 172 | goto out; |
172 | } | 173 | } |
173 | 174 | ||
175 | retry_lookup: | ||
174 | od = osduld_info_lookup(&odi); | 176 | od = osduld_info_lookup(&odi); |
175 | if (unlikely(IS_ERR(od))) { | 177 | if (unlikely(IS_ERR(od))) { |
176 | err = PTR_ERR(od); | 178 | err = PTR_ERR(od); |
177 | dprintk("%s: osduld_info_lookup => %d\n", __func__, err); | 179 | dprintk("%s: osduld_info_lookup => %d\n", __func__, err); |
180 | if (err == -ENODEV && retry_flag) { | ||
181 | err = objlayout_autologin(deviceaddr); | ||
182 | if (likely(!err)) { | ||
183 | retry_flag = false; | ||
184 | goto retry_lookup; | ||
185 | } | ||
186 | } | ||
178 | goto out; | 187 | goto out; |
179 | } | 188 | } |
180 | 189 | ||
@@ -205,25 +214,36 @@ static void copy_single_comp(struct ore_components *oc, unsigned c, | |||
205 | int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, | 214 | int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, |
206 | struct objio_segment **pseg) | 215 | struct objio_segment **pseg) |
207 | { | 216 | { |
208 | struct __alloc_objio_segment { | 217 | /* This is the in memory structure of the objio_segment |
209 | struct objio_segment olseg; | 218 | * |
210 | struct ore_dev *ods[numdevs]; | 219 | * struct __alloc_objio_segment { |
211 | struct ore_comp comps[numdevs]; | 220 | * struct objio_segment olseg; |
212 | } *aolseg; | 221 | * struct ore_dev *ods[numdevs]; |
213 | 222 | * struct ore_comp comps[numdevs]; | |
214 | aolseg = kzalloc(sizeof(*aolseg), gfp_flags); | 223 | * } *aolseg; |
215 | if (unlikely(!aolseg)) { | 224 | * NOTE: The code as above compiles and runs perfectly. It is elegant, |
225 | * type safe and compact. At some Past time Linus has decided he does not | ||
226 | * like variable length arrays, For the sake of this principal we uglify | ||
227 | * the code as below. | ||
228 | */ | ||
229 | struct objio_segment *lseg; | ||
230 | size_t lseg_size = sizeof(*lseg) + | ||
231 | numdevs * sizeof(lseg->oc.ods[0]) + | ||
232 | numdevs * sizeof(*lseg->oc.comps); | ||
233 | |||
234 | lseg = kzalloc(lseg_size, gfp_flags); | ||
235 | if (unlikely(!lseg)) { | ||
216 | dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__, | 236 | dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__, |
217 | numdevs, sizeof(*aolseg)); | 237 | numdevs, lseg_size); |
218 | return -ENOMEM; | 238 | return -ENOMEM; |
219 | } | 239 | } |
220 | 240 | ||
221 | aolseg->olseg.oc.numdevs = numdevs; | 241 | lseg->oc.numdevs = numdevs; |
222 | aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS; | 242 | lseg->oc.single_comp = EC_MULTPLE_COMPS; |
223 | aolseg->olseg.oc.comps = aolseg->comps; | 243 | lseg->oc.ods = (void *)(lseg + 1); |
224 | aolseg->olseg.oc.ods = aolseg->ods; | 244 | lseg->oc.comps = (void *)(lseg->oc.ods + numdevs); |
225 | 245 | ||
226 | *pseg = &aolseg->olseg; | 246 | *pseg = lseg; |
227 | return 0; | 247 | return 0; |
228 | } | 248 | } |
229 | 249 | ||
@@ -582,10 +602,10 @@ objlayout_init(void) | |||
582 | 602 | ||
583 | if (ret) | 603 | if (ret) |
584 | printk(KERN_INFO | 604 | printk(KERN_INFO |
585 | "%s: Registering OSD pNFS Layout Driver failed: error=%d\n", | 605 | "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n", |
586 | __func__, ret); | 606 | __func__, ret); |
587 | else | 607 | else |
588 | printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n", | 608 | printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n", |
589 | __func__); | 609 | __func__); |
590 | return ret; | 610 | return ret; |
591 | } | 611 | } |
@@ -594,7 +614,7 @@ static void __exit | |||
594 | objlayout_exit(void) | 614 | objlayout_exit(void) |
595 | { | 615 | { |
596 | pnfs_unregister_layoutdriver(&objlayout_type); | 616 | pnfs_unregister_layoutdriver(&objlayout_type); |
597 | printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n", | 617 | printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n", |
598 | __func__); | 618 | __func__); |
599 | } | 619 | } |
600 | 620 | ||
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index b3c29039f5b8..8d45f1c318ce 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
@@ -37,6 +37,9 @@ | |||
37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
38 | */ | 38 | */ |
39 | 39 | ||
40 | #include <linux/kmod.h> | ||
41 | #include <linux/moduleparam.h> | ||
42 | #include <linux/ratelimit.h> | ||
40 | #include <scsi/osd_initiator.h> | 43 | #include <scsi/osd_initiator.h> |
41 | #include "objlayout.h" | 44 | #include "objlayout.h" |
42 | 45 | ||
@@ -156,7 +159,7 @@ last_byte_offset(u64 start, u64 len) | |||
156 | return end > start ? end - 1 : NFS4_MAX_UINT64; | 159 | return end > start ? end - 1 : NFS4_MAX_UINT64; |
157 | } | 160 | } |
158 | 161 | ||
159 | void _fix_verify_io_params(struct pnfs_layout_segment *lseg, | 162 | static void _fix_verify_io_params(struct pnfs_layout_segment *lseg, |
160 | struct page ***p_pages, unsigned *p_pgbase, | 163 | struct page ***p_pages, unsigned *p_pgbase, |
161 | u64 offset, unsigned long count) | 164 | u64 offset, unsigned long count) |
162 | { | 165 | { |
@@ -490,9 +493,9 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p) | |||
490 | if (!ioerr->oer_errno) | 493 | if (!ioerr->oer_errno) |
491 | continue; | 494 | continue; |
492 | 495 | ||
493 | printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d " | 496 | printk(KERN_ERR "NFS: %s: err[%d]: errno=%d " |
494 | "dev(%llx:%llx) par=0x%llx obj=0x%llx " | 497 | "is_write=%d dev(%llx:%llx) par=0x%llx " |
495 | "offset=0x%llx length=0x%llx\n", | 498 | "obj=0x%llx offset=0x%llx length=0x%llx\n", |
496 | __func__, i, ioerr->oer_errno, | 499 | __func__, i, ioerr->oer_errno, |
497 | ioerr->oer_iswrite, | 500 | ioerr->oer_iswrite, |
498 | _DEVID_LO(&ioerr->oer_component.oid_device_id), | 501 | _DEVID_LO(&ioerr->oer_component.oid_device_id), |
@@ -651,3 +654,134 @@ void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr) | |||
651 | __free_page(odi->page); | 654 | __free_page(odi->page); |
652 | kfree(odi); | 655 | kfree(odi); |
653 | } | 656 | } |
657 | |||
658 | enum { | ||
659 | OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64, | ||
660 | OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1, | ||
661 | OSD_LOGIN_UPCALL_PATHLEN = 256 | ||
662 | }; | ||
663 | |||
664 | static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login"; | ||
665 | |||
666 | module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog), | ||
667 | 0600); | ||
668 | MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program"); | ||
669 | |||
670 | struct __auto_login { | ||
671 | char uri[OBJLAYOUT_MAX_URI_LEN]; | ||
672 | char osdname[OBJLAYOUT_MAX_OSDNAME_LEN]; | ||
673 | char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN]; | ||
674 | }; | ||
675 | |||
676 | static int __objlayout_upcall(struct __auto_login *login) | ||
677 | { | ||
678 | static char *envp[] = { "HOME=/", | ||
679 | "TERM=linux", | ||
680 | "PATH=/sbin:/usr/sbin:/bin:/usr/bin", | ||
681 | NULL | ||
682 | }; | ||
683 | char *argv[8]; | ||
684 | int ret; | ||
685 | |||
686 | if (unlikely(!osd_login_prog[0])) { | ||
687 | dprintk("%s: osd_login_prog is disabled\n", __func__); | ||
688 | return -EACCES; | ||
689 | } | ||
690 | |||
691 | dprintk("%s uri: %s\n", __func__, login->uri); | ||
692 | dprintk("%s osdname %s\n", __func__, login->osdname); | ||
693 | dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex); | ||
694 | |||
695 | argv[0] = (char *)osd_login_prog; | ||
696 | argv[1] = "-u"; | ||
697 | argv[2] = login->uri; | ||
698 | argv[3] = "-o"; | ||
699 | argv[4] = login->osdname; | ||
700 | argv[5] = "-s"; | ||
701 | argv[6] = login->systemid_hex; | ||
702 | argv[7] = NULL; | ||
703 | |||
704 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | ||
705 | /* | ||
706 | * Disable the upcall mechanism if we're getting an ENOENT or | ||
707 | * EACCES error. The admin can re-enable it on the fly by using | ||
708 | * sysfs to set the objlayoutdriver.osd_login_prog module parameter once | ||
709 | * the problem has been fixed. | ||
710 | */ | ||
711 | if (ret == -ENOENT || ret == -EACCES) { | ||
712 | printk(KERN_ERR "PNFS-OBJ: %s was not found please set " | ||
713 | "objlayoutdriver.osd_login_prog kernel parameter!\n", | ||
714 | osd_login_prog); | ||
715 | osd_login_prog[0] = '\0'; | ||
716 | } | ||
717 | dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret); | ||
718 | |||
719 | return ret; | ||
720 | } | ||
721 | |||
722 | /* Assume dest is all zeros */ | ||
723 | static void __copy_nfsS_and_zero_terminate(struct nfs4_string s, | ||
724 | char *dest, int max_len, | ||
725 | const char *var_name) | ||
726 | { | ||
727 | if (!s.len) | ||
728 | return; | ||
729 | |||
730 | if (s.len >= max_len) { | ||
731 | pr_warn_ratelimited( | ||
732 | "objlayout_autologin: %s: s.len(%d) >= max_len(%d)", | ||
733 | var_name, s.len, max_len); | ||
734 | s.len = max_len - 1; /* space for null terminator */ | ||
735 | } | ||
736 | |||
737 | memcpy(dest, s.data, s.len); | ||
738 | } | ||
739 | |||
740 | /* Assume sysid is all zeros */ | ||
741 | static void _sysid_2_hex(struct nfs4_string s, | ||
742 | char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN]) | ||
743 | { | ||
744 | int i; | ||
745 | char *cur; | ||
746 | |||
747 | if (!s.len) | ||
748 | return; | ||
749 | |||
750 | if (s.len != OSD_SYSTEMID_LEN) { | ||
751 | pr_warn_ratelimited( | ||
752 | "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN", | ||
753 | s.len); | ||
754 | if (s.len > OSD_SYSTEMID_LEN) | ||
755 | s.len = OSD_SYSTEMID_LEN; | ||
756 | } | ||
757 | |||
758 | cur = sysid; | ||
759 | for (i = 0; i < s.len; i++) | ||
760 | cur = hex_byte_pack(cur, s.data[i]); | ||
761 | } | ||
762 | |||
763 | int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr) | ||
764 | { | ||
765 | int rc; | ||
766 | struct __auto_login login; | ||
767 | |||
768 | if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len) | ||
769 | return -ENODEV; | ||
770 | |||
771 | memset(&login, 0, sizeof(login)); | ||
772 | __copy_nfsS_and_zero_terminate( | ||
773 | deviceaddr->oda_targetaddr.ota_netaddr.r_addr, | ||
774 | login.uri, sizeof(login.uri), "URI"); | ||
775 | |||
776 | __copy_nfsS_and_zero_terminate( | ||
777 | deviceaddr->oda_osdname, | ||
778 | login.osdname, sizeof(login.osdname), "OSDNAME"); | ||
779 | |||
780 | _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex); | ||
781 | |||
782 | rc = __objlayout_upcall(&login); | ||
783 | if (rc > 0) /* script returns positive values */ | ||
784 | rc = -ENODEV; | ||
785 | |||
786 | return rc; | ||
787 | } | ||
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 8ec34727ed21..880ba086be94 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h | |||
@@ -184,4 +184,6 @@ extern void objlayout_encode_layoutreturn( | |||
184 | struct xdr_stream *, | 184 | struct xdr_stream *, |
185 | const struct nfs4_layoutreturn_args *); | 185 | const struct nfs4_layoutreturn_args *); |
186 | 186 | ||
187 | extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr); | ||
188 | |||
187 | #endif /* _OBJLAYOUT_H */ | 189 | #endif /* _OBJLAYOUT_H */ |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 5668f7c54c41..d21fceaa9f62 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/file.h> | 13 | #include <linux/file.h> |
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/sunrpc/clnt.h> | 15 | #include <linux/sunrpc/clnt.h> |
16 | #include <linux/nfs.h> | ||
16 | #include <linux/nfs3.h> | 17 | #include <linux/nfs3.h> |
17 | #include <linux/nfs4.h> | 18 | #include <linux/nfs4.h> |
18 | #include <linux/nfs_page.h> | 19 | #include <linux/nfs_page.h> |
@@ -106,36 +107,6 @@ void nfs_unlock_request(struct nfs_page *req) | |||
106 | nfs_release_request(req); | 107 | nfs_release_request(req); |
107 | } | 108 | } |
108 | 109 | ||
109 | /** | ||
110 | * nfs_set_page_tag_locked - Tag a request as locked | ||
111 | * @req: | ||
112 | */ | ||
113 | int nfs_set_page_tag_locked(struct nfs_page *req) | ||
114 | { | ||
115 | if (!nfs_lock_request_dontget(req)) | ||
116 | return 0; | ||
117 | if (test_bit(PG_MAPPED, &req->wb_flags)) | ||
118 | radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); | ||
119 | return 1; | ||
120 | } | ||
121 | |||
122 | /** | ||
123 | * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers | ||
124 | */ | ||
125 | void nfs_clear_page_tag_locked(struct nfs_page *req) | ||
126 | { | ||
127 | if (test_bit(PG_MAPPED, &req->wb_flags)) { | ||
128 | struct inode *inode = req->wb_context->dentry->d_inode; | ||
129 | struct nfs_inode *nfsi = NFS_I(inode); | ||
130 | |||
131 | spin_lock(&inode->i_lock); | ||
132 | radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); | ||
133 | nfs_unlock_request(req); | ||
134 | spin_unlock(&inode->i_lock); | ||
135 | } else | ||
136 | nfs_unlock_request(req); | ||
137 | } | ||
138 | |||
139 | /* | 110 | /* |
140 | * nfs_clear_request - Free up all resources allocated to the request | 111 | * nfs_clear_request - Free up all resources allocated to the request |
141 | * @req: | 112 | * @req: |
@@ -425,67 +396,6 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) | |||
425 | } | 396 | } |
426 | } | 397 | } |
427 | 398 | ||
428 | #define NFS_SCAN_MAXENTRIES 16 | ||
429 | /** | ||
430 | * nfs_scan_list - Scan a list for matching requests | ||
431 | * @nfsi: NFS inode | ||
432 | * @dst: Destination list | ||
433 | * @idx_start: lower bound of page->index to scan | ||
434 | * @npages: idx_start + npages sets the upper bound to scan. | ||
435 | * @tag: tag to scan for | ||
436 | * | ||
437 | * Moves elements from one of the inode request lists. | ||
438 | * If the number of requests is set to 0, the entire address_space | ||
439 | * starting at index idx_start, is scanned. | ||
440 | * The requests are *not* checked to ensure that they form a contiguous set. | ||
441 | * You must be holding the inode's i_lock when calling this function | ||
442 | */ | ||
443 | int nfs_scan_list(struct nfs_inode *nfsi, | ||
444 | struct list_head *dst, pgoff_t idx_start, | ||
445 | unsigned int npages, int tag) | ||
446 | { | ||
447 | struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; | ||
448 | struct nfs_page *req; | ||
449 | pgoff_t idx_end; | ||
450 | int found, i; | ||
451 | int res; | ||
452 | struct list_head *list; | ||
453 | |||
454 | res = 0; | ||
455 | if (npages == 0) | ||
456 | idx_end = ~0; | ||
457 | else | ||
458 | idx_end = idx_start + npages - 1; | ||
459 | |||
460 | for (;;) { | ||
461 | found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, | ||
462 | (void **)&pgvec[0], idx_start, | ||
463 | NFS_SCAN_MAXENTRIES, tag); | ||
464 | if (found <= 0) | ||
465 | break; | ||
466 | for (i = 0; i < found; i++) { | ||
467 | req = pgvec[i]; | ||
468 | if (req->wb_index > idx_end) | ||
469 | goto out; | ||
470 | idx_start = req->wb_index + 1; | ||
471 | if (nfs_set_page_tag_locked(req)) { | ||
472 | kref_get(&req->wb_kref); | ||
473 | radix_tree_tag_clear(&nfsi->nfs_page_tree, | ||
474 | req->wb_index, tag); | ||
475 | list = pnfs_choose_commit_list(req, dst); | ||
476 | nfs_list_add_request(req, list); | ||
477 | res++; | ||
478 | if (res == INT_MAX) | ||
479 | goto out; | ||
480 | } | ||
481 | } | ||
482 | /* for latency reduction */ | ||
483 | cond_resched_lock(&nfsi->vfs_inode.i_lock); | ||
484 | } | ||
485 | out: | ||
486 | return res; | ||
487 | } | ||
488 | |||
489 | int __init nfs_init_nfspagecache(void) | 399 | int __init nfs_init_nfspagecache(void) |
490 | { | 400 | { |
491 | nfs_page_cachep = kmem_cache_create("nfs_page", | 401 | nfs_page_cachep = kmem_cache_create("nfs_page", |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 17149a490065..b5d451586943 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -101,8 +101,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, | |||
101 | goto out_no_driver; | 101 | goto out_no_driver; |
102 | if (!(server->nfs_client->cl_exchange_flags & | 102 | if (!(server->nfs_client->cl_exchange_flags & |
103 | (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { | 103 | (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { |
104 | printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__, | 104 | printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n", |
105 | id, server->nfs_client->cl_exchange_flags); | 105 | __func__, id, server->nfs_client->cl_exchange_flags); |
106 | goto out_no_driver; | 106 | goto out_no_driver; |
107 | } | 107 | } |
108 | ld_type = find_pnfs_driver(id); | 108 | ld_type = find_pnfs_driver(id); |
@@ -122,8 +122,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, | |||
122 | server->pnfs_curr_ld = ld_type; | 122 | server->pnfs_curr_ld = ld_type; |
123 | if (ld_type->set_layoutdriver | 123 | if (ld_type->set_layoutdriver |
124 | && ld_type->set_layoutdriver(server, mntfh)) { | 124 | && ld_type->set_layoutdriver(server, mntfh)) { |
125 | printk(KERN_ERR "%s: Error initializing pNFS layout driver %u.\n", | 125 | printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " |
126 | __func__, id); | 126 | "driver %u.\n", __func__, id); |
127 | module_put(ld_type->owner); | 127 | module_put(ld_type->owner); |
128 | goto out_no_driver; | 128 | goto out_no_driver; |
129 | } | 129 | } |
@@ -143,11 +143,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | |||
143 | struct pnfs_layoutdriver_type *tmp; | 143 | struct pnfs_layoutdriver_type *tmp; |
144 | 144 | ||
145 | if (ld_type->id == 0) { | 145 | if (ld_type->id == 0) { |
146 | printk(KERN_ERR "%s id 0 is reserved\n", __func__); | 146 | printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); |
147 | return status; | 147 | return status; |
148 | } | 148 | } |
149 | if (!ld_type->alloc_lseg || !ld_type->free_lseg) { | 149 | if (!ld_type->alloc_lseg || !ld_type->free_lseg) { |
150 | printk(KERN_ERR "%s Layout driver must provide " | 150 | printk(KERN_ERR "NFS: %s Layout driver must provide " |
151 | "alloc_lseg and free_lseg.\n", __func__); | 151 | "alloc_lseg and free_lseg.\n", __func__); |
152 | return status; | 152 | return status; |
153 | } | 153 | } |
@@ -160,7 +160,7 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | |||
160 | dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, | 160 | dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, |
161 | ld_type->name); | 161 | ld_type->name); |
162 | } else { | 162 | } else { |
163 | printk(KERN_ERR "%s Module with id %d already loaded!\n", | 163 | printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", |
164 | __func__, ld_type->id); | 164 | __func__, ld_type->id); |
165 | } | 165 | } |
166 | spin_unlock(&pnfs_spinlock); | 166 | spin_unlock(&pnfs_spinlock); |
@@ -496,12 +496,12 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, | |||
496 | { | 496 | { |
497 | u32 oldseq, newseq; | 497 | u32 oldseq, newseq; |
498 | 498 | ||
499 | oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); | 499 | oldseq = be32_to_cpu(lo->plh_stateid.seqid); |
500 | newseq = be32_to_cpu(new->stateid.seqid); | 500 | newseq = be32_to_cpu(new->seqid); |
501 | if ((int)(newseq - oldseq) > 0) { | 501 | if ((int)(newseq - oldseq) > 0) { |
502 | memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); | 502 | nfs4_stateid_copy(&lo->plh_stateid, new); |
503 | if (update_barrier) { | 503 | if (update_barrier) { |
504 | u32 new_barrier = be32_to_cpu(new->stateid.seqid); | 504 | u32 new_barrier = be32_to_cpu(new->seqid); |
505 | 505 | ||
506 | if ((int)(new_barrier - lo->plh_barrier)) | 506 | if ((int)(new_barrier - lo->plh_barrier)) |
507 | lo->plh_barrier = new_barrier; | 507 | lo->plh_barrier = new_barrier; |
@@ -525,7 +525,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, | |||
525 | int lget) | 525 | int lget) |
526 | { | 526 | { |
527 | if ((stateid) && | 527 | if ((stateid) && |
528 | (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) | 528 | (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0) |
529 | return true; | 529 | return true; |
530 | return lo->plh_block_lgets || | 530 | return lo->plh_block_lgets || |
531 | test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) || | 531 | test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) || |
@@ -549,11 +549,10 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | |||
549 | 549 | ||
550 | do { | 550 | do { |
551 | seq = read_seqbegin(&open_state->seqlock); | 551 | seq = read_seqbegin(&open_state->seqlock); |
552 | memcpy(dst->data, open_state->stateid.data, | 552 | nfs4_stateid_copy(dst, &open_state->stateid); |
553 | sizeof(open_state->stateid.data)); | ||
554 | } while (read_seqretry(&open_state->seqlock, seq)); | 553 | } while (read_seqretry(&open_state->seqlock, seq)); |
555 | } else | 554 | } else |
556 | memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data)); | 555 | nfs4_stateid_copy(dst, &lo->plh_stateid); |
557 | spin_unlock(&lo->plh_inode->i_lock); | 556 | spin_unlock(&lo->plh_inode->i_lock); |
558 | dprintk("<-- %s\n", __func__); | 557 | dprintk("<-- %s\n", __func__); |
559 | return status; | 558 | return status; |
@@ -590,7 +589,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
590 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | 589 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; |
591 | max_pages = max_resp_sz >> PAGE_SHIFT; | 590 | max_pages = max_resp_sz >> PAGE_SHIFT; |
592 | 591 | ||
593 | pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); | 592 | pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); |
594 | if (!pages) | 593 | if (!pages) |
595 | goto out_err_free; | 594 | goto out_err_free; |
596 | 595 | ||
@@ -760,7 +759,7 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier) | |||
760 | } | 759 | } |
761 | if (!found) { | 760 | if (!found) { |
762 | struct pnfs_layout_hdr *lo = nfsi->layout; | 761 | struct pnfs_layout_hdr *lo = nfsi->layout; |
763 | u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid); | 762 | u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); |
764 | 763 | ||
765 | /* Since close does not return a layout stateid for use as | 764 | /* Since close does not return a layout stateid for use as |
766 | * a barrier, we choose the worst-case barrier. | 765 | * a barrier, we choose the worst-case barrier. |
@@ -966,8 +965,7 @@ pnfs_update_layout(struct inode *ino, | |||
966 | } | 965 | } |
967 | 966 | ||
968 | /* Do we even need to bother with this? */ | 967 | /* Do we even need to bother with this? */ |
969 | if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || | 968 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { |
970 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | ||
971 | dprintk("%s matches recall, use MDS\n", __func__); | 969 | dprintk("%s matches recall, use MDS\n", __func__); |
972 | goto out_unlock; | 970 | goto out_unlock; |
973 | } | 971 | } |
@@ -1032,7 +1030,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1032 | struct nfs4_layoutget_res *res = &lgp->res; | 1030 | struct nfs4_layoutget_res *res = &lgp->res; |
1033 | struct pnfs_layout_segment *lseg; | 1031 | struct pnfs_layout_segment *lseg; |
1034 | struct inode *ino = lo->plh_inode; | 1032 | struct inode *ino = lo->plh_inode; |
1035 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; | ||
1036 | int status = 0; | 1033 | int status = 0; |
1037 | 1034 | ||
1038 | /* Inject layout blob into I/O device driver */ | 1035 | /* Inject layout blob into I/O device driver */ |
@@ -1048,8 +1045,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1048 | } | 1045 | } |
1049 | 1046 | ||
1050 | spin_lock(&ino->i_lock); | 1047 | spin_lock(&ino->i_lock); |
1051 | if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || | 1048 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { |
1052 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | ||
1053 | dprintk("%s forget reply due to recall\n", __func__); | 1049 | dprintk("%s forget reply due to recall\n", __func__); |
1054 | goto out_forget_reply; | 1050 | goto out_forget_reply; |
1055 | } | 1051 | } |
@@ -1214,6 +1210,7 @@ void pnfs_ld_write_done(struct nfs_write_data *data) | |||
1214 | } | 1210 | } |
1215 | data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); | 1211 | data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); |
1216 | } | 1212 | } |
1213 | put_lseg(data->lseg); | ||
1217 | data->mds_ops->rpc_release(data); | 1214 | data->mds_ops->rpc_release(data); |
1218 | } | 1215 | } |
1219 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); | 1216 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); |
@@ -1227,6 +1224,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | |||
1227 | nfs_list_add_request(data->req, &desc->pg_list); | 1224 | nfs_list_add_request(data->req, &desc->pg_list); |
1228 | nfs_pageio_reset_write_mds(desc); | 1225 | nfs_pageio_reset_write_mds(desc); |
1229 | desc->pg_recoalesce = 1; | 1226 | desc->pg_recoalesce = 1; |
1227 | put_lseg(data->lseg); | ||
1230 | nfs_writedata_release(data); | 1228 | nfs_writedata_release(data); |
1231 | } | 1229 | } |
1232 | 1230 | ||
@@ -1327,6 +1325,7 @@ void pnfs_ld_read_done(struct nfs_read_data *data) | |||
1327 | data->mds_ops->rpc_call_done(&data->task, data); | 1325 | data->mds_ops->rpc_call_done(&data->task, data); |
1328 | } else | 1326 | } else |
1329 | pnfs_ld_handle_read_error(data); | 1327 | pnfs_ld_handle_read_error(data); |
1328 | put_lseg(data->lseg); | ||
1330 | data->mds_ops->rpc_release(data); | 1329 | data->mds_ops->rpc_release(data); |
1331 | } | 1330 | } |
1332 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); | 1331 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); |
@@ -1530,8 +1529,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
1530 | end_pos = nfsi->layout->plh_lwb; | 1529 | end_pos = nfsi->layout->plh_lwb; |
1531 | nfsi->layout->plh_lwb = 0; | 1530 | nfsi->layout->plh_lwb = 0; |
1532 | 1531 | ||
1533 | memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, | 1532 | nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); |
1534 | sizeof(nfsi->layout->plh_stateid.data)); | ||
1535 | spin_unlock(&inode->i_lock); | 1533 | spin_unlock(&inode->i_lock); |
1536 | 1534 | ||
1537 | data->args.inode = inode; | 1535 | data->args.inode = inode; |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 53d593a0a4f2..442ebf68eeec 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -94,11 +94,10 @@ struct pnfs_layoutdriver_type { | |||
94 | const struct nfs_pageio_ops *pg_read_ops; | 94 | const struct nfs_pageio_ops *pg_read_ops; |
95 | const struct nfs_pageio_ops *pg_write_ops; | 95 | const struct nfs_pageio_ops *pg_write_ops; |
96 | 96 | ||
97 | /* Returns true if layoutdriver wants to divert this request to | 97 | void (*mark_request_commit) (struct nfs_page *req, |
98 | * driver's commit routine. | 98 | struct pnfs_layout_segment *lseg); |
99 | */ | 99 | void (*clear_request_commit) (struct nfs_page *req); |
100 | bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg); | 100 | int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock); |
101 | struct list_head * (*choose_commit_list) (struct nfs_page *req); | ||
102 | int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); | 101 | int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); |
103 | 102 | ||
104 | /* | 103 | /* |
@@ -229,7 +228,6 @@ struct nfs4_deviceid_node { | |||
229 | atomic_t ref; | 228 | atomic_t ref; |
230 | }; | 229 | }; |
231 | 230 | ||
232 | void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); | ||
233 | struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | 231 | struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); |
234 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | 232 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); |
235 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, | 233 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, |
@@ -262,20 +260,6 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss) | |||
262 | return nfss->pnfs_curr_ld != NULL; | 260 | return nfss->pnfs_curr_ld != NULL; |
263 | } | 261 | } |
264 | 262 | ||
265 | static inline void | ||
266 | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) | ||
267 | { | ||
268 | if (lseg) { | ||
269 | struct pnfs_layoutdriver_type *ld; | ||
270 | |||
271 | ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld; | ||
272 | if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) { | ||
273 | set_bit(PG_PNFS_COMMIT, &req->wb_flags); | ||
274 | req->wb_commit_lseg = get_lseg(lseg); | ||
275 | } | ||
276 | } | ||
277 | } | ||
278 | |||
279 | static inline int | 263 | static inline int |
280 | pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) | 264 | pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) |
281 | { | 265 | { |
@@ -284,27 +268,42 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) | |||
284 | return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); | 268 | return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); |
285 | } | 269 | } |
286 | 270 | ||
287 | static inline struct list_head * | 271 | static inline bool |
288 | pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) | 272 | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) |
289 | { | 273 | { |
290 | struct list_head *rv; | 274 | struct inode *inode = req->wb_context->dentry->d_inode; |
275 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | ||
291 | 276 | ||
292 | if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) { | 277 | if (lseg == NULL || ld->mark_request_commit == NULL) |
293 | struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode; | 278 | return false; |
279 | ld->mark_request_commit(req, lseg); | ||
280 | return true; | ||
281 | } | ||
294 | 282 | ||
295 | set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); | 283 | static inline bool |
296 | rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req); | 284 | pnfs_clear_request_commit(struct nfs_page *req) |
297 | /* matched by ref taken when PG_PNFS_COMMIT is set */ | 285 | { |
298 | put_lseg(req->wb_commit_lseg); | 286 | struct inode *inode = req->wb_context->dentry->d_inode; |
299 | } else | 287 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; |
300 | rv = mds; | 288 | |
301 | return rv; | 289 | if (ld == NULL || ld->clear_request_commit == NULL) |
290 | return false; | ||
291 | ld->clear_request_commit(req); | ||
292 | return true; | ||
302 | } | 293 | } |
303 | 294 | ||
304 | static inline void pnfs_clear_request_commit(struct nfs_page *req) | 295 | static inline int |
296 | pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) | ||
305 | { | 297 | { |
306 | if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) | 298 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; |
307 | put_lseg(req->wb_commit_lseg); | 299 | int ret; |
300 | |||
301 | if (ld == NULL || ld->scan_commit_lists == NULL) | ||
302 | return 0; | ||
303 | ret = ld->scan_commit_lists(inode, max, lock); | ||
304 | if (ret != 0) | ||
305 | set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); | ||
306 | return ret; | ||
308 | } | 307 | } |
309 | 308 | ||
310 | /* Should the pNFS client commit and return the layout upon a setattr */ | 309 | /* Should the pNFS client commit and return the layout upon a setattr */ |
@@ -328,6 +327,13 @@ static inline int pnfs_return_layout(struct inode *ino) | |||
328 | return 0; | 327 | return 0; |
329 | } | 328 | } |
330 | 329 | ||
330 | #ifdef NFS_DEBUG | ||
331 | void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); | ||
332 | #else | ||
333 | static inline void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id) | ||
334 | { | ||
335 | } | ||
336 | #endif /* NFS_DEBUG */ | ||
331 | #else /* CONFIG_NFS_V4_1 */ | 337 | #else /* CONFIG_NFS_V4_1 */ |
332 | 338 | ||
333 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) | 339 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) |
@@ -400,35 +406,35 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st | |||
400 | return false; | 406 | return false; |
401 | } | 407 | } |
402 | 408 | ||
403 | static inline void | ||
404 | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) | ||
405 | { | ||
406 | } | ||
407 | |||
408 | static inline int | 409 | static inline int |
409 | pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) | 410 | pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) |
410 | { | 411 | { |
411 | return PNFS_NOT_ATTEMPTED; | 412 | return PNFS_NOT_ATTEMPTED; |
412 | } | 413 | } |
413 | 414 | ||
414 | static inline struct list_head * | 415 | static inline bool |
415 | pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) | 416 | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) |
416 | { | 417 | { |
417 | return mds; | 418 | return false; |
418 | } | 419 | } |
419 | 420 | ||
420 | static inline void pnfs_clear_request_commit(struct nfs_page *req) | 421 | static inline bool |
422 | pnfs_clear_request_commit(struct nfs_page *req) | ||
421 | { | 423 | { |
424 | return false; | ||
422 | } | 425 | } |
423 | 426 | ||
424 | static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) | 427 | static inline int |
428 | pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) | ||
425 | { | 429 | { |
426 | return 0; | 430 | return 0; |
427 | } | 431 | } |
428 | 432 | ||
429 | static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl) | 433 | static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) |
430 | { | 434 | { |
435 | return 0; | ||
431 | } | 436 | } |
437 | |||
432 | #endif /* CONFIG_NFS_V4_1 */ | 438 | #endif /* CONFIG_NFS_V4_1 */ |
433 | 439 | ||
434 | #endif /* FS_NFS_PNFS_H */ | 440 | #endif /* FS_NFS_PNFS_H */ |
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 4f359d2a26eb..73f701f1f4d3 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c | |||
@@ -43,6 +43,7 @@ | |||
43 | static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; | 43 | static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; |
44 | static DEFINE_SPINLOCK(nfs4_deviceid_lock); | 44 | static DEFINE_SPINLOCK(nfs4_deviceid_lock); |
45 | 45 | ||
46 | #ifdef NFS_DEBUG | ||
46 | void | 47 | void |
47 | nfs4_print_deviceid(const struct nfs4_deviceid *id) | 48 | nfs4_print_deviceid(const struct nfs4_deviceid *id) |
48 | { | 49 | { |
@@ -52,6 +53,7 @@ nfs4_print_deviceid(const struct nfs4_deviceid *id) | |||
52 | p[0], p[1], p[2], p[3]); | 53 | p[0], p[1], p[2], p[3]); |
53 | } | 54 | } |
54 | EXPORT_SYMBOL_GPL(nfs4_print_deviceid); | 55 | EXPORT_SYMBOL_GPL(nfs4_print_deviceid); |
56 | #endif | ||
55 | 57 | ||
56 | static inline u32 | 58 | static inline u32 |
57 | nfs4_deviceid_hash(const struct nfs4_deviceid *id) | 59 | nfs4_deviceid_hash(const struct nfs4_deviceid *id) |
@@ -92,7 +94,7 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
92 | * @clp nfs_client associated with deviceid | 94 | * @clp nfs_client associated with deviceid |
93 | * @id deviceid to look up | 95 | * @id deviceid to look up |
94 | */ | 96 | */ |
95 | struct nfs4_deviceid_node * | 97 | static struct nfs4_deviceid_node * |
96 | _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | 98 | _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, |
97 | const struct nfs_client *clp, const struct nfs4_deviceid *id, | 99 | const struct nfs_client *clp, const struct nfs4_deviceid *id, |
98 | long hash) | 100 | long hash) |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 0c672588fe5a..b63b6f4d14fb 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -358,6 +358,11 @@ nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) | |||
358 | msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; | 358 | msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; |
359 | } | 359 | } |
360 | 360 | ||
361 | static void nfs_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) | ||
362 | { | ||
363 | rpc_call_start(task); | ||
364 | } | ||
365 | |||
361 | static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) | 366 | static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) |
362 | { | 367 | { |
363 | if (nfs_async_handle_expired_key(task)) | 368 | if (nfs_async_handle_expired_key(task)) |
@@ -372,6 +377,11 @@ nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir) | |||
372 | msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; | 377 | msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; |
373 | } | 378 | } |
374 | 379 | ||
380 | static void nfs_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) | ||
381 | { | ||
382 | rpc_call_start(task); | ||
383 | } | ||
384 | |||
375 | static int | 385 | static int |
376 | nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | 386 | nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, |
377 | struct inode *new_dir) | 387 | struct inode *new_dir) |
@@ -651,6 +661,11 @@ static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message * | |||
651 | msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; | 661 | msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; |
652 | } | 662 | } |
653 | 663 | ||
664 | static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) | ||
665 | { | ||
666 | rpc_call_start(task); | ||
667 | } | ||
668 | |||
654 | static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) | 669 | static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) |
655 | { | 670 | { |
656 | if (nfs_async_handle_expired_key(task)) | 671 | if (nfs_async_handle_expired_key(task)) |
@@ -668,6 +683,11 @@ static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message | |||
668 | msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; | 683 | msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; |
669 | } | 684 | } |
670 | 685 | ||
686 | static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) | ||
687 | { | ||
688 | rpc_call_start(task); | ||
689 | } | ||
690 | |||
671 | static void | 691 | static void |
672 | nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) | 692 | nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) |
673 | { | 693 | { |
@@ -721,9 +741,11 @@ const struct nfs_rpc_ops nfs_v2_clientops = { | |||
721 | .create = nfs_proc_create, | 741 | .create = nfs_proc_create, |
722 | .remove = nfs_proc_remove, | 742 | .remove = nfs_proc_remove, |
723 | .unlink_setup = nfs_proc_unlink_setup, | 743 | .unlink_setup = nfs_proc_unlink_setup, |
744 | .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare, | ||
724 | .unlink_done = nfs_proc_unlink_done, | 745 | .unlink_done = nfs_proc_unlink_done, |
725 | .rename = nfs_proc_rename, | 746 | .rename = nfs_proc_rename, |
726 | .rename_setup = nfs_proc_rename_setup, | 747 | .rename_setup = nfs_proc_rename_setup, |
748 | .rename_rpc_prepare = nfs_proc_rename_rpc_prepare, | ||
727 | .rename_done = nfs_proc_rename_done, | 749 | .rename_done = nfs_proc_rename_done, |
728 | .link = nfs_proc_link, | 750 | .link = nfs_proc_link, |
729 | .symlink = nfs_proc_symlink, | 751 | .symlink = nfs_proc_symlink, |
@@ -736,8 +758,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = { | |||
736 | .pathconf = nfs_proc_pathconf, | 758 | .pathconf = nfs_proc_pathconf, |
737 | .decode_dirent = nfs2_decode_dirent, | 759 | .decode_dirent = nfs2_decode_dirent, |
738 | .read_setup = nfs_proc_read_setup, | 760 | .read_setup = nfs_proc_read_setup, |
761 | .read_rpc_prepare = nfs_proc_read_rpc_prepare, | ||
739 | .read_done = nfs_read_done, | 762 | .read_done = nfs_read_done, |
740 | .write_setup = nfs_proc_write_setup, | 763 | .write_setup = nfs_proc_write_setup, |
764 | .write_rpc_prepare = nfs_proc_write_rpc_prepare, | ||
741 | .write_done = nfs_write_done, | 765 | .write_done = nfs_write_done, |
742 | .commit_setup = nfs_proc_commit_setup, | 766 | .commit_setup = nfs_proc_commit_setup, |
743 | .lock = nfs_proc_lock, | 767 | .lock = nfs_proc_lock, |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index b83e89bf4a74..9a0e8ef4a409 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -65,7 +65,6 @@ void nfs_readdata_free(struct nfs_read_data *p) | |||
65 | 65 | ||
66 | void nfs_readdata_release(struct nfs_read_data *rdata) | 66 | void nfs_readdata_release(struct nfs_read_data *rdata) |
67 | { | 67 | { |
68 | put_lseg(rdata->lseg); | ||
69 | put_nfs_open_context(rdata->args.context); | 68 | put_nfs_open_context(rdata->args.context); |
70 | nfs_readdata_free(rdata); | 69 | nfs_readdata_free(rdata); |
71 | } | 70 | } |
@@ -464,23 +463,14 @@ static void nfs_readpage_release_partial(void *calldata) | |||
464 | nfs_readdata_release(calldata); | 463 | nfs_readdata_release(calldata); |
465 | } | 464 | } |
466 | 465 | ||
467 | #if defined(CONFIG_NFS_V4_1) | ||
468 | void nfs_read_prepare(struct rpc_task *task, void *calldata) | 466 | void nfs_read_prepare(struct rpc_task *task, void *calldata) |
469 | { | 467 | { |
470 | struct nfs_read_data *data = calldata; | 468 | struct nfs_read_data *data = calldata; |
471 | 469 | NFS_PROTO(data->inode)->read_rpc_prepare(task, data); | |
472 | if (nfs4_setup_sequence(NFS_SERVER(data->inode), | ||
473 | &data->args.seq_args, &data->res.seq_res, | ||
474 | 0, task)) | ||
475 | return; | ||
476 | rpc_call_start(task); | ||
477 | } | 470 | } |
478 | #endif /* CONFIG_NFS_V4_1 */ | ||
479 | 471 | ||
480 | static const struct rpc_call_ops nfs_read_partial_ops = { | 472 | static const struct rpc_call_ops nfs_read_partial_ops = { |
481 | #if defined(CONFIG_NFS_V4_1) | ||
482 | .rpc_call_prepare = nfs_read_prepare, | 473 | .rpc_call_prepare = nfs_read_prepare, |
483 | #endif /* CONFIG_NFS_V4_1 */ | ||
484 | .rpc_call_done = nfs_readpage_result_partial, | 474 | .rpc_call_done = nfs_readpage_result_partial, |
485 | .rpc_release = nfs_readpage_release_partial, | 475 | .rpc_release = nfs_readpage_release_partial, |
486 | }; | 476 | }; |
@@ -544,9 +534,7 @@ static void nfs_readpage_release_full(void *calldata) | |||
544 | } | 534 | } |
545 | 535 | ||
546 | static const struct rpc_call_ops nfs_read_full_ops = { | 536 | static const struct rpc_call_ops nfs_read_full_ops = { |
547 | #if defined(CONFIG_NFS_V4_1) | ||
548 | .rpc_call_prepare = nfs_read_prepare, | 537 | .rpc_call_prepare = nfs_read_prepare, |
549 | #endif /* CONFIG_NFS_V4_1 */ | ||
550 | .rpc_call_done = nfs_readpage_result_full, | 538 | .rpc_call_done = nfs_readpage_result_full, |
551 | .rpc_release = nfs_readpage_release_full, | 539 | .rpc_release = nfs_readpage_release_full, |
552 | }; | 540 | }; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e3f6b2349411..37412f706b32 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -52,6 +52,8 @@ | |||
52 | #include <linux/nfs_xdr.h> | 52 | #include <linux/nfs_xdr.h> |
53 | #include <linux/magic.h> | 53 | #include <linux/magic.h> |
54 | #include <linux/parser.h> | 54 | #include <linux/parser.h> |
55 | #include <linux/nsproxy.h> | ||
56 | #include <linux/rcupdate.h> | ||
55 | 57 | ||
56 | #include <asm/uaccess.h> | 58 | #include <asm/uaccess.h> |
57 | 59 | ||
@@ -78,7 +80,6 @@ enum { | |||
78 | Opt_cto, Opt_nocto, | 80 | Opt_cto, Opt_nocto, |
79 | Opt_ac, Opt_noac, | 81 | Opt_ac, Opt_noac, |
80 | Opt_lock, Opt_nolock, | 82 | Opt_lock, Opt_nolock, |
81 | Opt_v2, Opt_v3, Opt_v4, | ||
82 | Opt_udp, Opt_tcp, Opt_rdma, | 83 | Opt_udp, Opt_tcp, Opt_rdma, |
83 | Opt_acl, Opt_noacl, | 84 | Opt_acl, Opt_noacl, |
84 | Opt_rdirplus, Opt_nordirplus, | 85 | Opt_rdirplus, Opt_nordirplus, |
@@ -96,10 +97,10 @@ enum { | |||
96 | Opt_namelen, | 97 | Opt_namelen, |
97 | Opt_mountport, | 98 | Opt_mountport, |
98 | Opt_mountvers, | 99 | Opt_mountvers, |
99 | Opt_nfsvers, | ||
100 | Opt_minorversion, | 100 | Opt_minorversion, |
101 | 101 | ||
102 | /* Mount options that take string arguments */ | 102 | /* Mount options that take string arguments */ |
103 | Opt_nfsvers, | ||
103 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, | 104 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, |
104 | Opt_addr, Opt_mountaddr, Opt_clientaddr, | 105 | Opt_addr, Opt_mountaddr, Opt_clientaddr, |
105 | Opt_lookupcache, | 106 | Opt_lookupcache, |
@@ -131,9 +132,6 @@ static const match_table_t nfs_mount_option_tokens = { | |||
131 | { Opt_noac, "noac" }, | 132 | { Opt_noac, "noac" }, |
132 | { Opt_lock, "lock" }, | 133 | { Opt_lock, "lock" }, |
133 | { Opt_nolock, "nolock" }, | 134 | { Opt_nolock, "nolock" }, |
134 | { Opt_v2, "v2" }, | ||
135 | { Opt_v3, "v3" }, | ||
136 | { Opt_v4, "v4" }, | ||
137 | { Opt_udp, "udp" }, | 135 | { Opt_udp, "udp" }, |
138 | { Opt_tcp, "tcp" }, | 136 | { Opt_tcp, "tcp" }, |
139 | { Opt_rdma, "rdma" }, | 137 | { Opt_rdma, "rdma" }, |
@@ -162,9 +160,10 @@ static const match_table_t nfs_mount_option_tokens = { | |||
162 | { Opt_namelen, "namlen=%s" }, | 160 | { Opt_namelen, "namlen=%s" }, |
163 | { Opt_mountport, "mountport=%s" }, | 161 | { Opt_mountport, "mountport=%s" }, |
164 | { Opt_mountvers, "mountvers=%s" }, | 162 | { Opt_mountvers, "mountvers=%s" }, |
163 | { Opt_minorversion, "minorversion=%s" }, | ||
164 | |||
165 | { Opt_nfsvers, "nfsvers=%s" }, | 165 | { Opt_nfsvers, "nfsvers=%s" }, |
166 | { Opt_nfsvers, "vers=%s" }, | 166 | { Opt_nfsvers, "vers=%s" }, |
167 | { Opt_minorversion, "minorversion=%s" }, | ||
168 | 167 | ||
169 | { Opt_sec, "sec=%s" }, | 168 | { Opt_sec, "sec=%s" }, |
170 | { Opt_proto, "proto=%s" }, | 169 | { Opt_proto, "proto=%s" }, |
@@ -178,6 +177,9 @@ static const match_table_t nfs_mount_option_tokens = { | |||
178 | { Opt_fscache_uniq, "fsc=%s" }, | 177 | { Opt_fscache_uniq, "fsc=%s" }, |
179 | { Opt_local_lock, "local_lock=%s" }, | 178 | { Opt_local_lock, "local_lock=%s" }, |
180 | 179 | ||
180 | /* The following needs to be listed after all other options */ | ||
181 | { Opt_nfsvers, "v%s" }, | ||
182 | |||
181 | { Opt_err, NULL } | 183 | { Opt_err, NULL } |
182 | }; | 184 | }; |
183 | 185 | ||
@@ -258,6 +260,22 @@ static match_table_t nfs_local_lock_tokens = { | |||
258 | { Opt_local_lock_err, NULL } | 260 | { Opt_local_lock_err, NULL } |
259 | }; | 261 | }; |
260 | 262 | ||
263 | enum { | ||
264 | Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, | ||
265 | Opt_vers_4_1, | ||
266 | |||
267 | Opt_vers_err | ||
268 | }; | ||
269 | |||
270 | static match_table_t nfs_vers_tokens = { | ||
271 | { Opt_vers_2, "2" }, | ||
272 | { Opt_vers_3, "3" }, | ||
273 | { Opt_vers_4, "4" }, | ||
274 | { Opt_vers_4_0, "4.0" }, | ||
275 | { Opt_vers_4_1, "4.1" }, | ||
276 | |||
277 | { Opt_vers_err, NULL } | ||
278 | }; | ||
261 | 279 | ||
262 | static void nfs_umount_begin(struct super_block *); | 280 | static void nfs_umount_begin(struct super_block *); |
263 | static int nfs_statfs(struct dentry *, struct kstatfs *); | 281 | static int nfs_statfs(struct dentry *, struct kstatfs *); |
@@ -619,7 +637,6 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, | |||
619 | struct nfs_client *clp = nfss->nfs_client; | 637 | struct nfs_client *clp = nfss->nfs_client; |
620 | 638 | ||
621 | seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); | 639 | seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); |
622 | seq_printf(m, ",minorversion=%u", clp->cl_minorversion); | ||
623 | } | 640 | } |
624 | #else | 641 | #else |
625 | static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, | 642 | static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, |
@@ -628,6 +645,15 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, | |||
628 | } | 645 | } |
629 | #endif | 646 | #endif |
630 | 647 | ||
648 | static void nfs_show_nfs_version(struct seq_file *m, | ||
649 | unsigned int version, | ||
650 | unsigned int minorversion) | ||
651 | { | ||
652 | seq_printf(m, ",vers=%u", version); | ||
653 | if (version == 4) | ||
654 | seq_printf(m, ".%u", minorversion); | ||
655 | } | ||
656 | |||
631 | /* | 657 | /* |
632 | * Describe the mount options in force on this server representation | 658 | * Describe the mount options in force on this server representation |
633 | */ | 659 | */ |
@@ -655,7 +681,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, | |||
655 | u32 version = clp->rpc_ops->version; | 681 | u32 version = clp->rpc_ops->version; |
656 | int local_flock, local_fcntl; | 682 | int local_flock, local_fcntl; |
657 | 683 | ||
658 | seq_printf(m, ",vers=%u", version); | 684 | nfs_show_nfs_version(m, version, clp->cl_minorversion); |
659 | seq_printf(m, ",rsize=%u", nfss->rsize); | 685 | seq_printf(m, ",rsize=%u", nfss->rsize); |
660 | seq_printf(m, ",wsize=%u", nfss->wsize); | 686 | seq_printf(m, ",wsize=%u", nfss->wsize); |
661 | if (nfss->bsize != 0) | 687 | if (nfss->bsize != 0) |
@@ -675,8 +701,10 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, | |||
675 | else | 701 | else |
676 | seq_puts(m, nfs_infop->nostr); | 702 | seq_puts(m, nfs_infop->nostr); |
677 | } | 703 | } |
704 | rcu_read_lock(); | ||
678 | seq_printf(m, ",proto=%s", | 705 | seq_printf(m, ",proto=%s", |
679 | rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID)); | 706 | rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID)); |
707 | rcu_read_unlock(); | ||
680 | if (version == 4) { | 708 | if (version == 4) { |
681 | if (nfss->port != NFS_PORT) | 709 | if (nfss->port != NFS_PORT) |
682 | seq_printf(m, ",port=%u", nfss->port); | 710 | seq_printf(m, ",port=%u", nfss->port); |
@@ -725,9 +753,11 @@ static int nfs_show_options(struct seq_file *m, struct dentry *root) | |||
725 | 753 | ||
726 | nfs_show_mount_options(m, nfss, 0); | 754 | nfs_show_mount_options(m, nfss, 0); |
727 | 755 | ||
756 | rcu_read_lock(); | ||
728 | seq_printf(m, ",addr=%s", | 757 | seq_printf(m, ",addr=%s", |
729 | rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient, | 758 | rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient, |
730 | RPC_DISPLAY_ADDR)); | 759 | RPC_DISPLAY_ADDR)); |
760 | rcu_read_unlock(); | ||
731 | 761 | ||
732 | return 0; | 762 | return 0; |
733 | } | 763 | } |
@@ -744,7 +774,6 @@ static void show_sessions(struct seq_file *m, struct nfs_server *server) {} | |||
744 | #endif | 774 | #endif |
745 | #endif | 775 | #endif |
746 | 776 | ||
747 | #ifdef CONFIG_NFS_V4 | ||
748 | #ifdef CONFIG_NFS_V4_1 | 777 | #ifdef CONFIG_NFS_V4_1 |
749 | static void show_pnfs(struct seq_file *m, struct nfs_server *server) | 778 | static void show_pnfs(struct seq_file *m, struct nfs_server *server) |
750 | { | 779 | { |
@@ -754,9 +783,26 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server) | |||
754 | else | 783 | else |
755 | seq_printf(m, "not configured"); | 784 | seq_printf(m, "not configured"); |
756 | } | 785 | } |
786 | |||
787 | static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) | ||
788 | { | ||
789 | if (nfss->nfs_client && nfss->nfs_client->impl_id) { | ||
790 | struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id; | ||
791 | seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s'," | ||
792 | "date='%llu,%u'", | ||
793 | impl_id->name, impl_id->domain, | ||
794 | impl_id->date.seconds, impl_id->date.nseconds); | ||
795 | } | ||
796 | } | ||
757 | #else | 797 | #else |
758 | static void show_pnfs(struct seq_file *m, struct nfs_server *server) {} | 798 | #ifdef CONFIG_NFS_V4 |
799 | static void show_pnfs(struct seq_file *m, struct nfs_server *server) | ||
800 | { | ||
801 | } | ||
759 | #endif | 802 | #endif |
803 | static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) | ||
804 | { | ||
805 | } | ||
760 | #endif | 806 | #endif |
761 | 807 | ||
762 | static int nfs_show_devname(struct seq_file *m, struct dentry *root) | 808 | static int nfs_show_devname(struct seq_file *m, struct dentry *root) |
@@ -805,6 +851,8 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root) | |||
805 | 851 | ||
806 | seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); | 852 | seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); |
807 | 853 | ||
854 | show_implementation_id(m, nfss); | ||
855 | |||
808 | seq_printf(m, "\n\tcaps:\t"); | 856 | seq_printf(m, "\n\tcaps:\t"); |
809 | seq_printf(m, "caps=0x%x", nfss->caps); | 857 | seq_printf(m, "caps=0x%x", nfss->caps); |
810 | seq_printf(m, ",wtmult=%u", nfss->wtmult); | 858 | seq_printf(m, ",wtmult=%u", nfss->wtmult); |
@@ -907,6 +955,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve | |||
907 | data->auth_flavor_len = 1; | 955 | data->auth_flavor_len = 1; |
908 | data->version = version; | 956 | data->version = version; |
909 | data->minorversion = 0; | 957 | data->minorversion = 0; |
958 | data->net = current->nsproxy->net_ns; | ||
910 | security_init_mnt_opts(&data->lsm_opts); | 959 | security_init_mnt_opts(&data->lsm_opts); |
911 | } | 960 | } |
912 | return data; | 961 | return data; |
@@ -1051,6 +1100,40 @@ static int nfs_parse_security_flavors(char *value, | |||
1051 | return 1; | 1100 | return 1; |
1052 | } | 1101 | } |
1053 | 1102 | ||
1103 | static int nfs_parse_version_string(char *string, | ||
1104 | struct nfs_parsed_mount_data *mnt, | ||
1105 | substring_t *args) | ||
1106 | { | ||
1107 | mnt->flags &= ~NFS_MOUNT_VER3; | ||
1108 | switch (match_token(string, nfs_vers_tokens, args)) { | ||
1109 | case Opt_vers_2: | ||
1110 | mnt->version = 2; | ||
1111 | break; | ||
1112 | case Opt_vers_3: | ||
1113 | mnt->flags |= NFS_MOUNT_VER3; | ||
1114 | mnt->version = 3; | ||
1115 | break; | ||
1116 | case Opt_vers_4: | ||
1117 | /* Backward compatibility option. In future, | ||
1118 | * the mount program should always supply | ||
1119 | * a NFSv4 minor version number. | ||
1120 | */ | ||
1121 | mnt->version = 4; | ||
1122 | break; | ||
1123 | case Opt_vers_4_0: | ||
1124 | mnt->version = 4; | ||
1125 | mnt->minorversion = 0; | ||
1126 | break; | ||
1127 | case Opt_vers_4_1: | ||
1128 | mnt->version = 4; | ||
1129 | mnt->minorversion = 1; | ||
1130 | break; | ||
1131 | default: | ||
1132 | return 0; | ||
1133 | } | ||
1134 | return 1; | ||
1135 | } | ||
1136 | |||
1054 | static int nfs_get_option_str(substring_t args[], char **option) | 1137 | static int nfs_get_option_str(substring_t args[], char **option) |
1055 | { | 1138 | { |
1056 | kfree(*option); | 1139 | kfree(*option); |
@@ -1156,18 +1239,6 @@ static int nfs_parse_mount_options(char *raw, | |||
1156 | mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | | 1239 | mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | |
1157 | NFS_MOUNT_LOCAL_FCNTL); | 1240 | NFS_MOUNT_LOCAL_FCNTL); |
1158 | break; | 1241 | break; |
1159 | case Opt_v2: | ||
1160 | mnt->flags &= ~NFS_MOUNT_VER3; | ||
1161 | mnt->version = 2; | ||
1162 | break; | ||
1163 | case Opt_v3: | ||
1164 | mnt->flags |= NFS_MOUNT_VER3; | ||
1165 | mnt->version = 3; | ||
1166 | break; | ||
1167 | case Opt_v4: | ||
1168 | mnt->flags &= ~NFS_MOUNT_VER3; | ||
1169 | mnt->version = 4; | ||
1170 | break; | ||
1171 | case Opt_udp: | 1242 | case Opt_udp: |
1172 | mnt->flags &= ~NFS_MOUNT_TCP; | 1243 | mnt->flags &= ~NFS_MOUNT_TCP; |
1173 | mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; | 1244 | mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; |
@@ -1294,26 +1365,6 @@ static int nfs_parse_mount_options(char *raw, | |||
1294 | goto out_invalid_value; | 1365 | goto out_invalid_value; |
1295 | mnt->mount_server.version = option; | 1366 | mnt->mount_server.version = option; |
1296 | break; | 1367 | break; |
1297 | case Opt_nfsvers: | ||
1298 | if (nfs_get_option_ul(args, &option)) | ||
1299 | goto out_invalid_value; | ||
1300 | switch (option) { | ||
1301 | case NFS2_VERSION: | ||
1302 | mnt->flags &= ~NFS_MOUNT_VER3; | ||
1303 | mnt->version = 2; | ||
1304 | break; | ||
1305 | case NFS3_VERSION: | ||
1306 | mnt->flags |= NFS_MOUNT_VER3; | ||
1307 | mnt->version = 3; | ||
1308 | break; | ||
1309 | case NFS4_VERSION: | ||
1310 | mnt->flags &= ~NFS_MOUNT_VER3; | ||
1311 | mnt->version = 4; | ||
1312 | break; | ||
1313 | default: | ||
1314 | goto out_invalid_value; | ||
1315 | } | ||
1316 | break; | ||
1317 | case Opt_minorversion: | 1368 | case Opt_minorversion: |
1318 | if (nfs_get_option_ul(args, &option)) | 1369 | if (nfs_get_option_ul(args, &option)) |
1319 | goto out_invalid_value; | 1370 | goto out_invalid_value; |
@@ -1325,6 +1376,15 @@ static int nfs_parse_mount_options(char *raw, | |||
1325 | /* | 1376 | /* |
1326 | * options that take text values | 1377 | * options that take text values |
1327 | */ | 1378 | */ |
1379 | case Opt_nfsvers: | ||
1380 | string = match_strdup(args); | ||
1381 | if (string == NULL) | ||
1382 | goto out_nomem; | ||
1383 | rc = nfs_parse_version_string(string, mnt, args); | ||
1384 | kfree(string); | ||
1385 | if (!rc) | ||
1386 | goto out_invalid_value; | ||
1387 | break; | ||
1328 | case Opt_sec: | 1388 | case Opt_sec: |
1329 | string = match_strdup(args); | 1389 | string = match_strdup(args); |
1330 | if (string == NULL) | 1390 | if (string == NULL) |
@@ -1404,7 +1464,7 @@ static int nfs_parse_mount_options(char *raw, | |||
1404 | if (string == NULL) | 1464 | if (string == NULL) |
1405 | goto out_nomem; | 1465 | goto out_nomem; |
1406 | mnt->nfs_server.addrlen = | 1466 | mnt->nfs_server.addrlen = |
1407 | rpc_pton(string, strlen(string), | 1467 | rpc_pton(mnt->net, string, strlen(string), |
1408 | (struct sockaddr *) | 1468 | (struct sockaddr *) |
1409 | &mnt->nfs_server.address, | 1469 | &mnt->nfs_server.address, |
1410 | sizeof(mnt->nfs_server.address)); | 1470 | sizeof(mnt->nfs_server.address)); |
@@ -1426,7 +1486,7 @@ static int nfs_parse_mount_options(char *raw, | |||
1426 | if (string == NULL) | 1486 | if (string == NULL) |
1427 | goto out_nomem; | 1487 | goto out_nomem; |
1428 | mnt->mount_server.addrlen = | 1488 | mnt->mount_server.addrlen = |
1429 | rpc_pton(string, strlen(string), | 1489 | rpc_pton(mnt->net, string, strlen(string), |
1430 | (struct sockaddr *) | 1490 | (struct sockaddr *) |
1431 | &mnt->mount_server.address, | 1491 | &mnt->mount_server.address, |
1432 | sizeof(mnt->mount_server.address)); | 1492 | sizeof(mnt->mount_server.address)); |
@@ -1515,6 +1575,9 @@ static int nfs_parse_mount_options(char *raw, | |||
1515 | if (!sloppy && invalid_option) | 1575 | if (!sloppy && invalid_option) |
1516 | return 0; | 1576 | return 0; |
1517 | 1577 | ||
1578 | if (mnt->minorversion && mnt->version != 4) | ||
1579 | goto out_minorversion_mismatch; | ||
1580 | |||
1518 | /* | 1581 | /* |
1519 | * verify that any proto=/mountproto= options match the address | 1582 | * verify that any proto=/mountproto= options match the address |
1520 | * familiies in the addr=/mountaddr= options. | 1583 | * familiies in the addr=/mountaddr= options. |
@@ -1548,6 +1611,10 @@ out_invalid_address: | |||
1548 | out_invalid_value: | 1611 | out_invalid_value: |
1549 | printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); | 1612 | printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); |
1550 | return 0; | 1613 | return 0; |
1614 | out_minorversion_mismatch: | ||
1615 | printk(KERN_INFO "NFS: mount option vers=%u does not support " | ||
1616 | "minorversion=%u\n", mnt->version, mnt->minorversion); | ||
1617 | return 0; | ||
1551 | out_nomem: | 1618 | out_nomem: |
1552 | printk(KERN_INFO "NFS: not enough memory to parse option\n"); | 1619 | printk(KERN_INFO "NFS: not enough memory to parse option\n"); |
1553 | return 0; | 1620 | return 0; |
@@ -1621,6 +1688,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, | |||
1621 | .noresvport = args->flags & NFS_MOUNT_NORESVPORT, | 1688 | .noresvport = args->flags & NFS_MOUNT_NORESVPORT, |
1622 | .auth_flav_len = &server_authlist_len, | 1689 | .auth_flav_len = &server_authlist_len, |
1623 | .auth_flavs = server_authlist, | 1690 | .auth_flavs = server_authlist, |
1691 | .net = args->net, | ||
1624 | }; | 1692 | }; |
1625 | int status; | 1693 | int status; |
1626 | 1694 | ||
@@ -2046,7 +2114,7 @@ static inline void nfs_initialise_sb(struct super_block *sb) | |||
2046 | 2114 | ||
2047 | /* We probably want something more informative here */ | 2115 | /* We probably want something more informative here */ |
2048 | snprintf(sb->s_id, sizeof(sb->s_id), | 2116 | snprintf(sb->s_id, sizeof(sb->s_id), |
2049 | "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); | 2117 | "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev)); |
2050 | 2118 | ||
2051 | if (sb->s_blocksize == 0) | 2119 | if (sb->s_blocksize == 0) |
2052 | sb->s_blocksize = nfs_block_bits(server->wsize, | 2120 | sb->s_blocksize = nfs_block_bits(server->wsize, |
@@ -2498,12 +2566,6 @@ static int nfs4_validate_text_mount_data(void *options, | |||
2498 | return -EINVAL; | 2566 | return -EINVAL; |
2499 | } | 2567 | } |
2500 | 2568 | ||
2501 | if (args->client_address == NULL) { | ||
2502 | dfprintk(MOUNT, | ||
2503 | "NFS4: mount program didn't pass callback address\n"); | ||
2504 | return -EINVAL; | ||
2505 | } | ||
2506 | |||
2507 | return nfs_parse_devname(dev_name, | 2569 | return nfs_parse_devname(dev_name, |
2508 | &args->nfs_server.hostname, | 2570 | &args->nfs_server.hostname, |
2509 | NFS4_MAXNAMLEN, | 2571 | NFS4_MAXNAMLEN, |
@@ -2662,8 +2724,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, | |||
2662 | if (!s->s_root) { | 2724 | if (!s->s_root) { |
2663 | /* initial superblock/root creation */ | 2725 | /* initial superblock/root creation */ |
2664 | nfs4_fill_super(s); | 2726 | nfs4_fill_super(s); |
2665 | nfs_fscache_get_super_cookie( | 2727 | nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL); |
2666 | s, data ? data->fscache_uniq : NULL, NULL); | ||
2667 | } | 2728 | } |
2668 | 2729 | ||
2669 | mntroot = nfs4_get_root(s, mntfh, dev_name); | 2730 | mntroot = nfs4_get_root(s, mntfh, dev_name); |
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 978aaeb8a093..ad4d2e787b20 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c | |||
@@ -32,7 +32,6 @@ static ctl_table nfs_cb_sysctls[] = { | |||
32 | .extra1 = (int *)&nfs_set_port_min, | 32 | .extra1 = (int *)&nfs_set_port_min, |
33 | .extra2 = (int *)&nfs_set_port_max, | 33 | .extra2 = (int *)&nfs_set_port_max, |
34 | }, | 34 | }, |
35 | #ifndef CONFIG_NFS_USE_NEW_IDMAPPER | ||
36 | { | 35 | { |
37 | .procname = "idmap_cache_timeout", | 36 | .procname = "idmap_cache_timeout", |
38 | .data = &nfs_idmap_cache_timeout, | 37 | .data = &nfs_idmap_cache_timeout, |
@@ -40,7 +39,6 @@ static ctl_table nfs_cb_sysctls[] = { | |||
40 | .mode = 0644, | 39 | .mode = 0644, |
41 | .proc_handler = proc_dointvec_jiffies, | 40 | .proc_handler = proc_dointvec_jiffies, |
42 | }, | 41 | }, |
43 | #endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ | ||
44 | #endif | 42 | #endif |
45 | { | 43 | { |
46 | .procname = "nfs_mountpoint_timeout", | 44 | .procname = "nfs_mountpoint_timeout", |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 4f9319a2e567..3210a03342f9 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
@@ -20,15 +20,6 @@ | |||
20 | #include "iostat.h" | 20 | #include "iostat.h" |
21 | #include "delegation.h" | 21 | #include "delegation.h" |
22 | 22 | ||
23 | struct nfs_unlinkdata { | ||
24 | struct hlist_node list; | ||
25 | struct nfs_removeargs args; | ||
26 | struct nfs_removeres res; | ||
27 | struct inode *dir; | ||
28 | struct rpc_cred *cred; | ||
29 | struct nfs_fattr dir_attr; | ||
30 | }; | ||
31 | |||
32 | /** | 23 | /** |
33 | * nfs_free_unlinkdata - release data from a sillydelete operation. | 24 | * nfs_free_unlinkdata - release data from a sillydelete operation. |
34 | * @data: pointer to unlink structure. | 25 | * @data: pointer to unlink structure. |
@@ -107,25 +98,16 @@ static void nfs_async_unlink_release(void *calldata) | |||
107 | nfs_sb_deactive(sb); | 98 | nfs_sb_deactive(sb); |
108 | } | 99 | } |
109 | 100 | ||
110 | #if defined(CONFIG_NFS_V4_1) | 101 | static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) |
111 | void nfs_unlink_prepare(struct rpc_task *task, void *calldata) | ||
112 | { | 102 | { |
113 | struct nfs_unlinkdata *data = calldata; | 103 | struct nfs_unlinkdata *data = calldata; |
114 | struct nfs_server *server = NFS_SERVER(data->dir); | 104 | NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data); |
115 | |||
116 | if (nfs4_setup_sequence(server, &data->args.seq_args, | ||
117 | &data->res.seq_res, 1, task)) | ||
118 | return; | ||
119 | rpc_call_start(task); | ||
120 | } | 105 | } |
121 | #endif /* CONFIG_NFS_V4_1 */ | ||
122 | 106 | ||
123 | static const struct rpc_call_ops nfs_unlink_ops = { | 107 | static const struct rpc_call_ops nfs_unlink_ops = { |
124 | .rpc_call_done = nfs_async_unlink_done, | 108 | .rpc_call_done = nfs_async_unlink_done, |
125 | .rpc_release = nfs_async_unlink_release, | 109 | .rpc_release = nfs_async_unlink_release, |
126 | #if defined(CONFIG_NFS_V4_1) | ||
127 | .rpc_call_prepare = nfs_unlink_prepare, | 110 | .rpc_call_prepare = nfs_unlink_prepare, |
128 | #endif /* CONFIG_NFS_V4_1 */ | ||
129 | }; | 111 | }; |
130 | 112 | ||
131 | static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) | 113 | static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) |
@@ -341,18 +323,6 @@ nfs_cancel_async_unlink(struct dentry *dentry) | |||
341 | spin_unlock(&dentry->d_lock); | 323 | spin_unlock(&dentry->d_lock); |
342 | } | 324 | } |
343 | 325 | ||
344 | struct nfs_renamedata { | ||
345 | struct nfs_renameargs args; | ||
346 | struct nfs_renameres res; | ||
347 | struct rpc_cred *cred; | ||
348 | struct inode *old_dir; | ||
349 | struct dentry *old_dentry; | ||
350 | struct nfs_fattr old_fattr; | ||
351 | struct inode *new_dir; | ||
352 | struct dentry *new_dentry; | ||
353 | struct nfs_fattr new_fattr; | ||
354 | }; | ||
355 | |||
356 | /** | 326 | /** |
357 | * nfs_async_rename_done - Sillyrename post-processing | 327 | * nfs_async_rename_done - Sillyrename post-processing |
358 | * @task: rpc_task of the sillyrename | 328 | * @task: rpc_task of the sillyrename |
@@ -403,25 +373,16 @@ static void nfs_async_rename_release(void *calldata) | |||
403 | kfree(data); | 373 | kfree(data); |
404 | } | 374 | } |
405 | 375 | ||
406 | #if defined(CONFIG_NFS_V4_1) | ||
407 | static void nfs_rename_prepare(struct rpc_task *task, void *calldata) | 376 | static void nfs_rename_prepare(struct rpc_task *task, void *calldata) |
408 | { | 377 | { |
409 | struct nfs_renamedata *data = calldata; | 378 | struct nfs_renamedata *data = calldata; |
410 | struct nfs_server *server = NFS_SERVER(data->old_dir); | 379 | NFS_PROTO(data->old_dir)->rename_rpc_prepare(task, data); |
411 | |||
412 | if (nfs4_setup_sequence(server, &data->args.seq_args, | ||
413 | &data->res.seq_res, 1, task)) | ||
414 | return; | ||
415 | rpc_call_start(task); | ||
416 | } | 380 | } |
417 | #endif /* CONFIG_NFS_V4_1 */ | ||
418 | 381 | ||
419 | static const struct rpc_call_ops nfs_rename_ops = { | 382 | static const struct rpc_call_ops nfs_rename_ops = { |
420 | .rpc_call_done = nfs_async_rename_done, | 383 | .rpc_call_done = nfs_async_rename_done, |
421 | .rpc_release = nfs_async_rename_release, | 384 | .rpc_release = nfs_async_rename_release, |
422 | #if defined(CONFIG_NFS_V4_1) | ||
423 | .rpc_call_prepare = nfs_rename_prepare, | 385 | .rpc_call_prepare = nfs_rename_prepare, |
424 | #endif /* CONFIG_NFS_V4_1 */ | ||
425 | }; | 386 | }; |
426 | 387 | ||
427 | /** | 388 | /** |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 834f0fe96f89..2c68818f68ac 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -100,7 +100,6 @@ void nfs_writedata_free(struct nfs_write_data *p) | |||
100 | 100 | ||
101 | void nfs_writedata_release(struct nfs_write_data *wdata) | 101 | void nfs_writedata_release(struct nfs_write_data *wdata) |
102 | { | 102 | { |
103 | put_lseg(wdata->lseg); | ||
104 | put_nfs_open_context(wdata->args.context); | 103 | put_nfs_open_context(wdata->args.context); |
105 | nfs_writedata_free(wdata); | 104 | nfs_writedata_free(wdata); |
106 | } | 105 | } |
@@ -236,10 +235,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo | |||
236 | req = nfs_page_find_request_locked(page); | 235 | req = nfs_page_find_request_locked(page); |
237 | if (req == NULL) | 236 | if (req == NULL) |
238 | break; | 237 | break; |
239 | if (nfs_set_page_tag_locked(req)) | 238 | if (nfs_lock_request_dontget(req)) |
240 | break; | 239 | break; |
241 | /* Note: If we hold the page lock, as is the case in nfs_writepage, | 240 | /* Note: If we hold the page lock, as is the case in nfs_writepage, |
242 | * then the call to nfs_set_page_tag_locked() will always | 241 | * then the call to nfs_lock_request_dontget() will always |
243 | * succeed provided that someone hasn't already marked the | 242 | * succeed provided that someone hasn't already marked the |
244 | * request as dirty (in which case we don't care). | 243 | * request as dirty (in which case we don't care). |
245 | */ | 244 | */ |
@@ -375,21 +374,14 @@ out_err: | |||
375 | /* | 374 | /* |
376 | * Insert a write request into an inode | 375 | * Insert a write request into an inode |
377 | */ | 376 | */ |
378 | static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | 377 | static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) |
379 | { | 378 | { |
380 | struct nfs_inode *nfsi = NFS_I(inode); | 379 | struct nfs_inode *nfsi = NFS_I(inode); |
381 | int error; | ||
382 | |||
383 | error = radix_tree_preload(GFP_NOFS); | ||
384 | if (error != 0) | ||
385 | goto out; | ||
386 | 380 | ||
387 | /* Lock the request! */ | 381 | /* Lock the request! */ |
388 | nfs_lock_request_dontget(req); | 382 | nfs_lock_request_dontget(req); |
389 | 383 | ||
390 | spin_lock(&inode->i_lock); | 384 | spin_lock(&inode->i_lock); |
391 | error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); | ||
392 | BUG_ON(error); | ||
393 | if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) | 385 | if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) |
394 | inode->i_version++; | 386 | inode->i_version++; |
395 | set_bit(PG_MAPPED, &req->wb_flags); | 387 | set_bit(PG_MAPPED, &req->wb_flags); |
@@ -397,12 +389,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
397 | set_page_private(req->wb_page, (unsigned long)req); | 389 | set_page_private(req->wb_page, (unsigned long)req); |
398 | nfsi->npages++; | 390 | nfsi->npages++; |
399 | kref_get(&req->wb_kref); | 391 | kref_get(&req->wb_kref); |
400 | radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, | ||
401 | NFS_PAGE_TAG_LOCKED); | ||
402 | spin_unlock(&inode->i_lock); | 392 | spin_unlock(&inode->i_lock); |
403 | radix_tree_preload_end(); | ||
404 | out: | ||
405 | return error; | ||
406 | } | 393 | } |
407 | 394 | ||
408 | /* | 395 | /* |
@@ -419,7 +406,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
419 | set_page_private(req->wb_page, 0); | 406 | set_page_private(req->wb_page, 0); |
420 | ClearPagePrivate(req->wb_page); | 407 | ClearPagePrivate(req->wb_page); |
421 | clear_bit(PG_MAPPED, &req->wb_flags); | 408 | clear_bit(PG_MAPPED, &req->wb_flags); |
422 | radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); | ||
423 | nfsi->npages--; | 409 | nfsi->npages--; |
424 | spin_unlock(&inode->i_lock); | 410 | spin_unlock(&inode->i_lock); |
425 | nfs_release_request(req); | 411 | nfs_release_request(req); |
@@ -432,39 +418,90 @@ nfs_mark_request_dirty(struct nfs_page *req) | |||
432 | } | 418 | } |
433 | 419 | ||
434 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 420 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
435 | /* | 421 | /** |
436 | * Add a request to the inode's commit list. | 422 | * nfs_request_add_commit_list - add request to a commit list |
423 | * @req: pointer to a struct nfs_page | ||
424 | * @head: commit list head | ||
425 | * | ||
426 | * This sets the PG_CLEAN bit, updates the inode global count of | ||
427 | * number of outstanding requests requiring a commit as well as | ||
428 | * the MM page stats. | ||
429 | * | ||
430 | * The caller must _not_ hold the inode->i_lock, but must be | ||
431 | * holding the nfs_page lock. | ||
437 | */ | 432 | */ |
438 | static void | 433 | void |
439 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) | 434 | nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) |
440 | { | 435 | { |
441 | struct inode *inode = req->wb_context->dentry->d_inode; | 436 | struct inode *inode = req->wb_context->dentry->d_inode; |
442 | struct nfs_inode *nfsi = NFS_I(inode); | ||
443 | 437 | ||
444 | spin_lock(&inode->i_lock); | ||
445 | set_bit(PG_CLEAN, &(req)->wb_flags); | 438 | set_bit(PG_CLEAN, &(req)->wb_flags); |
446 | radix_tree_tag_set(&nfsi->nfs_page_tree, | 439 | spin_lock(&inode->i_lock); |
447 | req->wb_index, | 440 | nfs_list_add_request(req, head); |
448 | NFS_PAGE_TAG_COMMIT); | 441 | NFS_I(inode)->ncommit++; |
449 | nfsi->ncommit++; | ||
450 | spin_unlock(&inode->i_lock); | 442 | spin_unlock(&inode->i_lock); |
451 | pnfs_mark_request_commit(req, lseg); | ||
452 | inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | 443 | inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); |
453 | inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); | 444 | inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); |
454 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | 445 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); |
455 | } | 446 | } |
447 | EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); | ||
456 | 448 | ||
457 | static int | 449 | /** |
450 | * nfs_request_remove_commit_list - Remove request from a commit list | ||
451 | * @req: pointer to a nfs_page | ||
452 | * | ||
453 | * This clears the PG_CLEAN bit, and updates the inode global count of | ||
454 | * number of outstanding requests requiring a commit | ||
455 | * It does not update the MM page stats. | ||
456 | * | ||
457 | * The caller _must_ hold the inode->i_lock and the nfs_page lock. | ||
458 | */ | ||
459 | void | ||
460 | nfs_request_remove_commit_list(struct nfs_page *req) | ||
461 | { | ||
462 | struct inode *inode = req->wb_context->dentry->d_inode; | ||
463 | |||
464 | if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) | ||
465 | return; | ||
466 | nfs_list_remove_request(req); | ||
467 | NFS_I(inode)->ncommit--; | ||
468 | } | ||
469 | EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); | ||
470 | |||
471 | |||
472 | /* | ||
473 | * Add a request to the inode's commit list. | ||
474 | */ | ||
475 | static void | ||
476 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) | ||
477 | { | ||
478 | struct inode *inode = req->wb_context->dentry->d_inode; | ||
479 | |||
480 | if (pnfs_mark_request_commit(req, lseg)) | ||
481 | return; | ||
482 | nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); | ||
483 | } | ||
484 | |||
485 | static void | ||
486 | nfs_clear_page_commit(struct page *page) | ||
487 | { | ||
488 | dec_zone_page_state(page, NR_UNSTABLE_NFS); | ||
489 | dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); | ||
490 | } | ||
491 | |||
492 | static void | ||
458 | nfs_clear_request_commit(struct nfs_page *req) | 493 | nfs_clear_request_commit(struct nfs_page *req) |
459 | { | 494 | { |
460 | struct page *page = req->wb_page; | 495 | if (test_bit(PG_CLEAN, &req->wb_flags)) { |
496 | struct inode *inode = req->wb_context->dentry->d_inode; | ||
461 | 497 | ||
462 | if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { | 498 | if (!pnfs_clear_request_commit(req)) { |
463 | dec_zone_page_state(page, NR_UNSTABLE_NFS); | 499 | spin_lock(&inode->i_lock); |
464 | dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); | 500 | nfs_request_remove_commit_list(req); |
465 | return 1; | 501 | spin_unlock(&inode->i_lock); |
502 | } | ||
503 | nfs_clear_page_commit(req->wb_page); | ||
466 | } | 504 | } |
467 | return 0; | ||
468 | } | 505 | } |
469 | 506 | ||
470 | static inline | 507 | static inline |
@@ -491,15 +528,14 @@ int nfs_reschedule_unstable_write(struct nfs_page *req, | |||
491 | return 0; | 528 | return 0; |
492 | } | 529 | } |
493 | #else | 530 | #else |
494 | static inline void | 531 | static void |
495 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) | 532 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) |
496 | { | 533 | { |
497 | } | 534 | } |
498 | 535 | ||
499 | static inline int | 536 | static void |
500 | nfs_clear_request_commit(struct nfs_page *req) | 537 | nfs_clear_request_commit(struct nfs_page *req) |
501 | { | 538 | { |
502 | return 0; | ||
503 | } | 539 | } |
504 | 540 | ||
505 | static inline | 541 | static inline |
@@ -520,46 +556,65 @@ int nfs_reschedule_unstable_write(struct nfs_page *req, | |||
520 | static int | 556 | static int |
521 | nfs_need_commit(struct nfs_inode *nfsi) | 557 | nfs_need_commit(struct nfs_inode *nfsi) |
522 | { | 558 | { |
523 | return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); | 559 | return nfsi->ncommit > 0; |
560 | } | ||
561 | |||
562 | /* i_lock held by caller */ | ||
563 | static int | ||
564 | nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, | ||
565 | spinlock_t *lock) | ||
566 | { | ||
567 | struct nfs_page *req, *tmp; | ||
568 | int ret = 0; | ||
569 | |||
570 | list_for_each_entry_safe(req, tmp, src, wb_list) { | ||
571 | if (!nfs_lock_request(req)) | ||
572 | continue; | ||
573 | if (cond_resched_lock(lock)) | ||
574 | list_safe_reset_next(req, tmp, wb_list); | ||
575 | nfs_request_remove_commit_list(req); | ||
576 | nfs_list_add_request(req, dst); | ||
577 | ret++; | ||
578 | if (ret == max) | ||
579 | break; | ||
580 | } | ||
581 | return ret; | ||
524 | } | 582 | } |
525 | 583 | ||
526 | /* | 584 | /* |
527 | * nfs_scan_commit - Scan an inode for commit requests | 585 | * nfs_scan_commit - Scan an inode for commit requests |
528 | * @inode: NFS inode to scan | 586 | * @inode: NFS inode to scan |
529 | * @dst: destination list | 587 | * @dst: destination list |
530 | * @idx_start: lower bound of page->index to scan. | ||
531 | * @npages: idx_start + npages sets the upper bound to scan. | ||
532 | * | 588 | * |
533 | * Moves requests from the inode's 'commit' request list. | 589 | * Moves requests from the inode's 'commit' request list. |
534 | * The requests are *not* checked to ensure that they form a contiguous set. | 590 | * The requests are *not* checked to ensure that they form a contiguous set. |
535 | */ | 591 | */ |
536 | static int | 592 | static int |
537 | nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) | 593 | nfs_scan_commit(struct inode *inode, struct list_head *dst) |
538 | { | 594 | { |
539 | struct nfs_inode *nfsi = NFS_I(inode); | 595 | struct nfs_inode *nfsi = NFS_I(inode); |
540 | int ret; | 596 | int ret = 0; |
541 | |||
542 | if (!nfs_need_commit(nfsi)) | ||
543 | return 0; | ||
544 | 597 | ||
545 | spin_lock(&inode->i_lock); | 598 | spin_lock(&inode->i_lock); |
546 | ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); | 599 | if (nfsi->ncommit > 0) { |
547 | if (ret > 0) | 600 | const int max = INT_MAX; |
548 | nfsi->ncommit -= ret; | ||
549 | spin_unlock(&inode->i_lock); | ||
550 | |||
551 | if (nfs_need_commit(NFS_I(inode))) | ||
552 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | ||
553 | 601 | ||
602 | ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max, | ||
603 | &inode->i_lock); | ||
604 | ret += pnfs_scan_commit_lists(inode, max - ret, | ||
605 | &inode->i_lock); | ||
606 | } | ||
607 | spin_unlock(&inode->i_lock); | ||
554 | return ret; | 608 | return ret; |
555 | } | 609 | } |
610 | |||
556 | #else | 611 | #else |
557 | static inline int nfs_need_commit(struct nfs_inode *nfsi) | 612 | static inline int nfs_need_commit(struct nfs_inode *nfsi) |
558 | { | 613 | { |
559 | return 0; | 614 | return 0; |
560 | } | 615 | } |
561 | 616 | ||
562 | static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) | 617 | static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) |
563 | { | 618 | { |
564 | return 0; | 619 | return 0; |
565 | } | 620 | } |
@@ -604,7 +659,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, | |||
604 | || end < req->wb_offset) | 659 | || end < req->wb_offset) |
605 | goto out_flushme; | 660 | goto out_flushme; |
606 | 661 | ||
607 | if (nfs_set_page_tag_locked(req)) | 662 | if (nfs_lock_request_dontget(req)) |
608 | break; | 663 | break; |
609 | 664 | ||
610 | /* The request is locked, so wait and then retry */ | 665 | /* The request is locked, so wait and then retry */ |
@@ -616,13 +671,6 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, | |||
616 | spin_lock(&inode->i_lock); | 671 | spin_lock(&inode->i_lock); |
617 | } | 672 | } |
618 | 673 | ||
619 | if (nfs_clear_request_commit(req) && | ||
620 | radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, | ||
621 | req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) { | ||
622 | NFS_I(inode)->ncommit--; | ||
623 | pnfs_clear_request_commit(req); | ||
624 | } | ||
625 | |||
626 | /* Okay, the request matches. Update the region */ | 674 | /* Okay, the request matches. Update the region */ |
627 | if (offset < req->wb_offset) { | 675 | if (offset < req->wb_offset) { |
628 | req->wb_offset = offset; | 676 | req->wb_offset = offset; |
@@ -634,6 +682,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, | |||
634 | req->wb_bytes = rqend - req->wb_offset; | 682 | req->wb_bytes = rqend - req->wb_offset; |
635 | out_unlock: | 683 | out_unlock: |
636 | spin_unlock(&inode->i_lock); | 684 | spin_unlock(&inode->i_lock); |
685 | nfs_clear_request_commit(req); | ||
637 | return req; | 686 | return req; |
638 | out_flushme: | 687 | out_flushme: |
639 | spin_unlock(&inode->i_lock); | 688 | spin_unlock(&inode->i_lock); |
@@ -655,7 +704,6 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, | |||
655 | { | 704 | { |
656 | struct inode *inode = page->mapping->host; | 705 | struct inode *inode = page->mapping->host; |
657 | struct nfs_page *req; | 706 | struct nfs_page *req; |
658 | int error; | ||
659 | 707 | ||
660 | req = nfs_try_to_update_request(inode, page, offset, bytes); | 708 | req = nfs_try_to_update_request(inode, page, offset, bytes); |
661 | if (req != NULL) | 709 | if (req != NULL) |
@@ -663,11 +711,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, | |||
663 | req = nfs_create_request(ctx, inode, page, offset, bytes); | 711 | req = nfs_create_request(ctx, inode, page, offset, bytes); |
664 | if (IS_ERR(req)) | 712 | if (IS_ERR(req)) |
665 | goto out; | 713 | goto out; |
666 | error = nfs_inode_add_request(inode, req); | 714 | nfs_inode_add_request(inode, req); |
667 | if (error != 0) { | ||
668 | nfs_release_request(req); | ||
669 | req = ERR_PTR(error); | ||
670 | } | ||
671 | out: | 715 | out: |
672 | return req; | 716 | return req; |
673 | } | 717 | } |
@@ -684,7 +728,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, | |||
684 | nfs_grow_file(page, offset, count); | 728 | nfs_grow_file(page, offset, count); |
685 | nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); | 729 | nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); |
686 | nfs_mark_request_dirty(req); | 730 | nfs_mark_request_dirty(req); |
687 | nfs_clear_page_tag_locked(req); | 731 | nfs_unlock_request(req); |
688 | return 0; | 732 | return 0; |
689 | } | 733 | } |
690 | 734 | ||
@@ -777,7 +821,7 @@ static void nfs_writepage_release(struct nfs_page *req, | |||
777 | 821 | ||
778 | if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) | 822 | if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) |
779 | nfs_inode_remove_request(req); | 823 | nfs_inode_remove_request(req); |
780 | nfs_clear_page_tag_locked(req); | 824 | nfs_unlock_request(req); |
781 | nfs_end_page_writeback(page); | 825 | nfs_end_page_writeback(page); |
782 | } | 826 | } |
783 | 827 | ||
@@ -925,7 +969,7 @@ static void nfs_redirty_request(struct nfs_page *req) | |||
925 | struct page *page = req->wb_page; | 969 | struct page *page = req->wb_page; |
926 | 970 | ||
927 | nfs_mark_request_dirty(req); | 971 | nfs_mark_request_dirty(req); |
928 | nfs_clear_page_tag_locked(req); | 972 | nfs_unlock_request(req); |
929 | nfs_end_page_writeback(page); | 973 | nfs_end_page_writeback(page); |
930 | } | 974 | } |
931 | 975 | ||
@@ -1128,23 +1172,14 @@ out: | |||
1128 | nfs_writedata_release(calldata); | 1172 | nfs_writedata_release(calldata); |
1129 | } | 1173 | } |
1130 | 1174 | ||
1131 | #if defined(CONFIG_NFS_V4_1) | ||
1132 | void nfs_write_prepare(struct rpc_task *task, void *calldata) | 1175 | void nfs_write_prepare(struct rpc_task *task, void *calldata) |
1133 | { | 1176 | { |
1134 | struct nfs_write_data *data = calldata; | 1177 | struct nfs_write_data *data = calldata; |
1135 | 1178 | NFS_PROTO(data->inode)->write_rpc_prepare(task, data); | |
1136 | if (nfs4_setup_sequence(NFS_SERVER(data->inode), | ||
1137 | &data->args.seq_args, | ||
1138 | &data->res.seq_res, 1, task)) | ||
1139 | return; | ||
1140 | rpc_call_start(task); | ||
1141 | } | 1179 | } |
1142 | #endif /* CONFIG_NFS_V4_1 */ | ||
1143 | 1180 | ||
1144 | static const struct rpc_call_ops nfs_write_partial_ops = { | 1181 | static const struct rpc_call_ops nfs_write_partial_ops = { |
1145 | #if defined(CONFIG_NFS_V4_1) | ||
1146 | .rpc_call_prepare = nfs_write_prepare, | 1182 | .rpc_call_prepare = nfs_write_prepare, |
1147 | #endif /* CONFIG_NFS_V4_1 */ | ||
1148 | .rpc_call_done = nfs_writeback_done_partial, | 1183 | .rpc_call_done = nfs_writeback_done_partial, |
1149 | .rpc_release = nfs_writeback_release_partial, | 1184 | .rpc_release = nfs_writeback_release_partial, |
1150 | }; | 1185 | }; |
@@ -1199,16 +1234,14 @@ static void nfs_writeback_release_full(void *calldata) | |||
1199 | remove_request: | 1234 | remove_request: |
1200 | nfs_inode_remove_request(req); | 1235 | nfs_inode_remove_request(req); |
1201 | next: | 1236 | next: |
1202 | nfs_clear_page_tag_locked(req); | 1237 | nfs_unlock_request(req); |
1203 | nfs_end_page_writeback(page); | 1238 | nfs_end_page_writeback(page); |
1204 | } | 1239 | } |
1205 | nfs_writedata_release(calldata); | 1240 | nfs_writedata_release(calldata); |
1206 | } | 1241 | } |
1207 | 1242 | ||
1208 | static const struct rpc_call_ops nfs_write_full_ops = { | 1243 | static const struct rpc_call_ops nfs_write_full_ops = { |
1209 | #if defined(CONFIG_NFS_V4_1) | ||
1210 | .rpc_call_prepare = nfs_write_prepare, | 1244 | .rpc_call_prepare = nfs_write_prepare, |
1211 | #endif /* CONFIG_NFS_V4_1 */ | ||
1212 | .rpc_call_done = nfs_writeback_done_full, | 1245 | .rpc_call_done = nfs_writeback_done_full, |
1213 | .rpc_release = nfs_writeback_release_full, | 1246 | .rpc_release = nfs_writeback_release_full, |
1214 | }; | 1247 | }; |
@@ -1325,7 +1358,6 @@ void nfs_commitdata_release(void *data) | |||
1325 | { | 1358 | { |
1326 | struct nfs_write_data *wdata = data; | 1359 | struct nfs_write_data *wdata = data; |
1327 | 1360 | ||
1328 | put_lseg(wdata->lseg); | ||
1329 | put_nfs_open_context(wdata->args.context); | 1361 | put_nfs_open_context(wdata->args.context); |
1330 | nfs_commit_free(wdata); | 1362 | nfs_commit_free(wdata); |
1331 | } | 1363 | } |
@@ -1411,7 +1443,7 @@ void nfs_retry_commit(struct list_head *page_list, | |||
1411 | dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | 1443 | dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); |
1412 | dec_bdi_stat(req->wb_page->mapping->backing_dev_info, | 1444 | dec_bdi_stat(req->wb_page->mapping->backing_dev_info, |
1413 | BDI_RECLAIMABLE); | 1445 | BDI_RECLAIMABLE); |
1414 | nfs_clear_page_tag_locked(req); | 1446 | nfs_unlock_request(req); |
1415 | } | 1447 | } |
1416 | } | 1448 | } |
1417 | EXPORT_SYMBOL_GPL(nfs_retry_commit); | 1449 | EXPORT_SYMBOL_GPL(nfs_retry_commit); |
@@ -1460,7 +1492,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data) | |||
1460 | while (!list_empty(&data->pages)) { | 1492 | while (!list_empty(&data->pages)) { |
1461 | req = nfs_list_entry(data->pages.next); | 1493 | req = nfs_list_entry(data->pages.next); |
1462 | nfs_list_remove_request(req); | 1494 | nfs_list_remove_request(req); |
1463 | nfs_clear_request_commit(req); | 1495 | nfs_clear_page_commit(req->wb_page); |
1464 | 1496 | ||
1465 | dprintk("NFS: commit (%s/%lld %d@%lld)", | 1497 | dprintk("NFS: commit (%s/%lld %d@%lld)", |
1466 | req->wb_context->dentry->d_sb->s_id, | 1498 | req->wb_context->dentry->d_sb->s_id, |
@@ -1486,7 +1518,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data) | |||
1486 | dprintk(" mismatch\n"); | 1518 | dprintk(" mismatch\n"); |
1487 | nfs_mark_request_dirty(req); | 1519 | nfs_mark_request_dirty(req); |
1488 | next: | 1520 | next: |
1489 | nfs_clear_page_tag_locked(req); | 1521 | nfs_unlock_request(req); |
1490 | } | 1522 | } |
1491 | } | 1523 | } |
1492 | EXPORT_SYMBOL_GPL(nfs_commit_release_pages); | 1524 | EXPORT_SYMBOL_GPL(nfs_commit_release_pages); |
@@ -1501,9 +1533,7 @@ static void nfs_commit_release(void *calldata) | |||
1501 | } | 1533 | } |
1502 | 1534 | ||
1503 | static const struct rpc_call_ops nfs_commit_ops = { | 1535 | static const struct rpc_call_ops nfs_commit_ops = { |
1504 | #if defined(CONFIG_NFS_V4_1) | ||
1505 | .rpc_call_prepare = nfs_write_prepare, | 1536 | .rpc_call_prepare = nfs_write_prepare, |
1506 | #endif /* CONFIG_NFS_V4_1 */ | ||
1507 | .rpc_call_done = nfs_commit_done, | 1537 | .rpc_call_done = nfs_commit_done, |
1508 | .rpc_release = nfs_commit_release, | 1538 | .rpc_release = nfs_commit_release, |
1509 | }; | 1539 | }; |
@@ -1517,7 +1547,7 @@ int nfs_commit_inode(struct inode *inode, int how) | |||
1517 | res = nfs_commit_set_lock(NFS_I(inode), may_wait); | 1547 | res = nfs_commit_set_lock(NFS_I(inode), may_wait); |
1518 | if (res <= 0) | 1548 | if (res <= 0) |
1519 | goto out_mark_dirty; | 1549 | goto out_mark_dirty; |
1520 | res = nfs_scan_commit(inode, &head, 0, 0); | 1550 | res = nfs_scan_commit(inode, &head); |
1521 | if (res) { | 1551 | if (res) { |
1522 | int error; | 1552 | int error; |
1523 | 1553 | ||
@@ -1635,6 +1665,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) | |||
1635 | if (req == NULL) | 1665 | if (req == NULL) |
1636 | break; | 1666 | break; |
1637 | if (nfs_lock_request_dontget(req)) { | 1667 | if (nfs_lock_request_dontget(req)) { |
1668 | nfs_clear_request_commit(req); | ||
1638 | nfs_inode_remove_request(req); | 1669 | nfs_inode_remove_request(req); |
1639 | /* | 1670 | /* |
1640 | * In case nfs_inode_remove_request has marked the | 1671 | * In case nfs_inode_remove_request has marked the |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 6f3ebb48b12f..0e262f32ac41 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -605,24 +605,24 @@ static struct rpc_version nfs_cb_version4 = { | |||
605 | .procs = nfs4_cb_procedures | 605 | .procs = nfs4_cb_procedures |
606 | }; | 606 | }; |
607 | 607 | ||
608 | static struct rpc_version *nfs_cb_version[] = { | 608 | static const struct rpc_version *nfs_cb_version[] = { |
609 | &nfs_cb_version4, | 609 | &nfs_cb_version4, |
610 | }; | 610 | }; |
611 | 611 | ||
612 | static struct rpc_program cb_program; | 612 | static const struct rpc_program cb_program; |
613 | 613 | ||
614 | static struct rpc_stat cb_stats = { | 614 | static struct rpc_stat cb_stats = { |
615 | .program = &cb_program | 615 | .program = &cb_program |
616 | }; | 616 | }; |
617 | 617 | ||
618 | #define NFS4_CALLBACK 0x40000000 | 618 | #define NFS4_CALLBACK 0x40000000 |
619 | static struct rpc_program cb_program = { | 619 | static const struct rpc_program cb_program = { |
620 | .name = "nfs4_cb", | 620 | .name = "nfs4_cb", |
621 | .number = NFS4_CALLBACK, | 621 | .number = NFS4_CALLBACK, |
622 | .nrvers = ARRAY_SIZE(nfs_cb_version), | 622 | .nrvers = ARRAY_SIZE(nfs_cb_version), |
623 | .version = nfs_cb_version, | 623 | .version = nfs_cb_version, |
624 | .stats = &cb_stats, | 624 | .stats = &cb_stats, |
625 | .pipe_dir_name = "/nfsd4_cb", | 625 | .pipe_dir_name = "nfsd4_cb", |
626 | }; | 626 | }; |
627 | 627 | ||
628 | static int max_cb_time(void) | 628 | static int max_cb_time(void) |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e8c98f009670..c5cddd659429 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -1308,7 +1308,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r | |||
1308 | else | 1308 | else |
1309 | goto out_err; | 1309 | goto out_err; |
1310 | 1310 | ||
1311 | conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, | 1311 | conn->cb_addrlen = rpc_uaddr2sockaddr(&init_net, se->se_callback_addr_val, |
1312 | se->se_callback_addr_len, | 1312 | se->se_callback_addr_len, |
1313 | (struct sockaddr *)&conn->cb_addr, | 1313 | (struct sockaddr *)&conn->cb_addr, |
1314 | sizeof(conn->cb_addr)); | 1314 | sizeof(conn->cb_addr)); |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 748eda93ce59..64c24af8d7ea 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -223,7 +223,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) | |||
223 | if (qword_get(&buf, fo_path, size) < 0) | 223 | if (qword_get(&buf, fo_path, size) < 0) |
224 | return -EINVAL; | 224 | return -EINVAL; |
225 | 225 | ||
226 | if (rpc_pton(fo_path, size, sap, salen) == 0) | 226 | if (rpc_pton(&init_net, fo_path, size, sap, salen) == 0) |
227 | return -EINVAL; | 227 | return -EINVAL; |
228 | 228 | ||
229 | return nlmsvc_unlock_all_by_ip(sap); | 229 | return nlmsvc_unlock_all_by_ip(sap); |
@@ -722,7 +722,7 @@ static ssize_t __write_ports_addxprt(char *buf) | |||
722 | nfsd_serv->sv_nrthreads--; | 722 | nfsd_serv->sv_nrthreads--; |
723 | return 0; | 723 | return 0; |
724 | out_close: | 724 | out_close: |
725 | xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port); | 725 | xprt = svc_find_xprt(nfsd_serv, transport, &init_net, PF_INET, port); |
726 | if (xprt != NULL) { | 726 | if (xprt != NULL) { |
727 | svc_close_xprt(xprt); | 727 | svc_close_xprt(xprt); |
728 | svc_xprt_put(xprt); | 728 | svc_xprt_put(xprt); |
@@ -748,7 +748,7 @@ static ssize_t __write_ports_delxprt(char *buf) | |||
748 | if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) | 748 | if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) |
749 | return -EINVAL; | 749 | return -EINVAL; |
750 | 750 | ||
751 | xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); | 751 | xprt = svc_find_xprt(nfsd_serv, transport, &init_net, AF_UNSPEC, port); |
752 | if (xprt == NULL) | 752 | if (xprt == NULL) |
753 | return -ENOTCONN; | 753 | return -ENOTCONN; |
754 | 754 | ||
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index eda7d7e55e05..fce472f5f39e 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -251,13 +251,13 @@ static void nfsd_shutdown(void) | |||
251 | nfsd_up = false; | 251 | nfsd_up = false; |
252 | } | 252 | } |
253 | 253 | ||
254 | static void nfsd_last_thread(struct svc_serv *serv) | 254 | static void nfsd_last_thread(struct svc_serv *serv, struct net *net) |
255 | { | 255 | { |
256 | /* When last nfsd thread exits we need to do some clean-up */ | 256 | /* When last nfsd thread exits we need to do some clean-up */ |
257 | nfsd_serv = NULL; | 257 | nfsd_serv = NULL; |
258 | nfsd_shutdown(); | 258 | nfsd_shutdown(); |
259 | 259 | ||
260 | svc_rpcb_cleanup(serv); | 260 | svc_rpcb_cleanup(serv, net); |
261 | 261 | ||
262 | printk(KERN_WARNING "nfsd: last server has exited, flushing export " | 262 | printk(KERN_WARNING "nfsd: last server has exited, flushing export " |
263 | "cache\n"); | 263 | "cache\n"); |
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index a2e2402b2afb..6d4521feb6e3 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | #include <linux/sunrpc/stats.h> | 26 | #include <linux/sunrpc/stats.h> |
27 | #include <linux/nfsd/stats.h> | 27 | #include <linux/nfsd/stats.h> |
28 | #include <net/net_namespace.h> | ||
28 | 29 | ||
29 | #include "nfsd.h" | 30 | #include "nfsd.h" |
30 | 31 | ||
@@ -94,11 +95,11 @@ static const struct file_operations nfsd_proc_fops = { | |||
94 | void | 95 | void |
95 | nfsd_stat_init(void) | 96 | nfsd_stat_init(void) |
96 | { | 97 | { |
97 | svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops); | 98 | svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_fops); |
98 | } | 99 | } |
99 | 100 | ||
100 | void | 101 | void |
101 | nfsd_stat_shutdown(void) | 102 | nfsd_stat_shutdown(void) |
102 | { | 103 | { |
103 | svc_proc_unregister("nfsd"); | 104 | svc_proc_unregister(&init_net, "nfsd"); |
104 | } | 105 | } |
diff --git a/fs/notify/notification.c b/fs/notify/notification.c index ee188158a224..c887b1378f7e 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c | |||
@@ -447,7 +447,7 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, | |||
447 | return event; | 447 | return event; |
448 | } | 448 | } |
449 | 449 | ||
450 | __init int fsnotify_notification_init(void) | 450 | static __init int fsnotify_notification_init(void) |
451 | { | 451 | { |
452 | fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC); | 452 | fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC); |
453 | fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC); | 453 | fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC); |
@@ -461,4 +461,3 @@ __init int fsnotify_notification_init(void) | |||
461 | return 0; | 461 | return 0; |
462 | } | 462 | } |
463 | subsys_initcall(fsnotify_notification_init); | 463 | subsys_initcall(fsnotify_notification_init); |
464 | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/log2.h> | 14 | #include <linux/log2.h> |
15 | #include <linux/mount.h> | 15 | #include <linux/mount.h> |
16 | #include <linux/magic.h> | ||
16 | #include <linux/pipe_fs_i.h> | 17 | #include <linux/pipe_fs_i.h> |
17 | #include <linux/uio.h> | 18 | #include <linux/uio.h> |
18 | #include <linux/highmem.h> | 19 | #include <linux/highmem.h> |
diff --git a/fs/posix_acl.c b/fs/posix_acl.c index cea4623f1ed6..5e325a42e33d 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include <linux/fs.h> | 18 | #include <linux/fs.h> |
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/posix_acl.h> | 20 | #include <linux/posix_acl.h> |
21 | #include <linux/module.h> | 21 | #include <linux/export.h> |
22 | 22 | ||
23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
24 | 24 | ||
diff --git a/fs/proc/array.c b/fs/proc/array.c index c602b8d20f06..fbb53c249086 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -462,59 +462,56 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
462 | /* convert nsec -> ticks */ | 462 | /* convert nsec -> ticks */ |
463 | start_time = nsec_to_clock_t(start_time); | 463 | start_time = nsec_to_clock_t(start_time); |
464 | 464 | ||
465 | seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ | 465 | seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); |
466 | %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ | 466 | seq_put_decimal_ll(m, ' ', ppid); |
467 | %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n", | 467 | seq_put_decimal_ll(m, ' ', pgid); |
468 | pid_nr_ns(pid, ns), | 468 | seq_put_decimal_ll(m, ' ', sid); |
469 | tcomm, | 469 | seq_put_decimal_ll(m, ' ', tty_nr); |
470 | state, | 470 | seq_put_decimal_ll(m, ' ', tty_pgrp); |
471 | ppid, | 471 | seq_put_decimal_ull(m, ' ', task->flags); |
472 | pgid, | 472 | seq_put_decimal_ull(m, ' ', min_flt); |
473 | sid, | 473 | seq_put_decimal_ull(m, ' ', cmin_flt); |
474 | tty_nr, | 474 | seq_put_decimal_ull(m, ' ', maj_flt); |
475 | tty_pgrp, | 475 | seq_put_decimal_ull(m, ' ', cmaj_flt); |
476 | task->flags, | 476 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime)); |
477 | min_flt, | 477 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime)); |
478 | cmin_flt, | 478 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime)); |
479 | maj_flt, | 479 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime)); |
480 | cmaj_flt, | 480 | seq_put_decimal_ll(m, ' ', priority); |
481 | cputime_to_clock_t(utime), | 481 | seq_put_decimal_ll(m, ' ', nice); |
482 | cputime_to_clock_t(stime), | 482 | seq_put_decimal_ll(m, ' ', num_threads); |
483 | cputime_to_clock_t(cutime), | 483 | seq_put_decimal_ull(m, ' ', 0); |
484 | cputime_to_clock_t(cstime), | 484 | seq_put_decimal_ull(m, ' ', start_time); |
485 | priority, | 485 | seq_put_decimal_ull(m, ' ', vsize); |
486 | nice, | 486 | seq_put_decimal_ll(m, ' ', mm ? get_mm_rss(mm) : 0); |
487 | num_threads, | 487 | seq_put_decimal_ull(m, ' ', rsslim); |
488 | start_time, | 488 | seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0); |
489 | vsize, | 489 | seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0); |
490 | mm ? get_mm_rss(mm) : 0, | 490 | seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0); |
491 | rsslim, | 491 | seq_put_decimal_ull(m, ' ', esp); |
492 | mm ? (permitted ? mm->start_code : 1) : 0, | 492 | seq_put_decimal_ull(m, ' ', eip); |
493 | mm ? (permitted ? mm->end_code : 1) : 0, | 493 | /* The signal information here is obsolete. |
494 | (permitted && mm) ? mm->start_stack : 0, | 494 | * It must be decimal for Linux 2.0 compatibility. |
495 | esp, | 495 | * Use /proc/#/status for real-time signals. |
496 | eip, | 496 | */ |
497 | /* The signal information here is obsolete. | 497 | seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL); |
498 | * It must be decimal for Linux 2.0 compatibility. | 498 | seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL); |
499 | * Use /proc/#/status for real-time signals. | 499 | seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL); |
500 | */ | 500 | seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL); |
501 | task->pending.signal.sig[0] & 0x7fffffffUL, | 501 | seq_put_decimal_ull(m, ' ', wchan); |
502 | task->blocked.sig[0] & 0x7fffffffUL, | 502 | seq_put_decimal_ull(m, ' ', 0); |
503 | sigign .sig[0] & 0x7fffffffUL, | 503 | seq_put_decimal_ull(m, ' ', 0); |
504 | sigcatch .sig[0] & 0x7fffffffUL, | 504 | seq_put_decimal_ll(m, ' ', task->exit_signal); |
505 | wchan, | 505 | seq_put_decimal_ll(m, ' ', task_cpu(task)); |
506 | 0UL, | 506 | seq_put_decimal_ull(m, ' ', task->rt_priority); |
507 | 0UL, | 507 | seq_put_decimal_ull(m, ' ', task->policy); |
508 | task->exit_signal, | 508 | seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task)); |
509 | task_cpu(task), | 509 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime)); |
510 | task->rt_priority, | 510 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime)); |
511 | task->policy, | 511 | seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_data : 0); |
512 | (unsigned long long)delayacct_blkio_ticks(task), | 512 | seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->end_data : 0); |
513 | cputime_to_clock_t(gtime), | 513 | seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_brk : 0); |
514 | cputime_to_clock_t(cgtime), | 514 | seq_putc(m, '\n'); |
515 | (mm && permitted) ? mm->start_data : 0, | ||
516 | (mm && permitted) ? mm->end_data : 0, | ||
517 | (mm && permitted) ? mm->start_brk : 0); | ||
518 | if (mm) | 515 | if (mm) |
519 | mmput(mm); | 516 | mmput(mm); |
520 | return 0; | 517 | return 0; |
@@ -542,8 +539,20 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, | |||
542 | size = task_statm(mm, &shared, &text, &data, &resident); | 539 | size = task_statm(mm, &shared, &text, &data, &resident); |
543 | mmput(mm); | 540 | mmput(mm); |
544 | } | 541 | } |
545 | seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", | 542 | /* |
546 | size, resident, shared, text, data); | 543 | * For quick read, open code by putting numbers directly |
544 | * expected format is | ||
545 | * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", | ||
546 | * size, resident, shared, text, data); | ||
547 | */ | ||
548 | seq_put_decimal_ull(m, 0, size); | ||
549 | seq_put_decimal_ull(m, ' ', resident); | ||
550 | seq_put_decimal_ull(m, ' ', shared); | ||
551 | seq_put_decimal_ull(m, ' ', text); | ||
552 | seq_put_decimal_ull(m, ' ', 0); | ||
553 | seq_put_decimal_ull(m, ' ', text); | ||
554 | seq_put_decimal_ull(m, ' ', 0); | ||
555 | seq_putc(m, '\n'); | ||
547 | 556 | ||
548 | return 0; | 557 | return 0; |
549 | } | 558 | } |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c44efe19798f..5f79bb8b4c60 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -10,12 +10,15 @@ | |||
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/proc_fs.h> | 12 | #include <linux/proc_fs.h> |
13 | struct ctl_table_header; | ||
13 | 14 | ||
14 | extern struct proc_dir_entry proc_root; | 15 | extern struct proc_dir_entry proc_root; |
15 | #ifdef CONFIG_PROC_SYSCTL | 16 | #ifdef CONFIG_PROC_SYSCTL |
16 | extern int proc_sys_init(void); | 17 | extern int proc_sys_init(void); |
18 | extern void sysctl_head_put(struct ctl_table_header *head); | ||
17 | #else | 19 | #else |
18 | static inline void proc_sys_init(void) { } | 20 | static inline void proc_sys_init(void) { } |
21 | static inline void sysctl_head_put(struct ctl_table_header *head) { } | ||
19 | #endif | 22 | #endif |
20 | #ifdef CONFIG_NET | 23 | #ifdef CONFIG_NET |
21 | extern int proc_net_init(void); | 24 | extern int proc_net_init(void); |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index e5e69aff6c69..86c67eee439f 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -157,7 +157,8 @@ static int kcore_update_ram(void) | |||
157 | 157 | ||
158 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | 158 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
159 | /* calculate vmemmap's address from given system ram pfn and register it */ | 159 | /* calculate vmemmap's address from given system ram pfn and register it */ |
160 | int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | 160 | static int |
161 | get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | ||
161 | { | 162 | { |
162 | unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; | 163 | unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; |
163 | unsigned long nr_pages = ent->size >> PAGE_SHIFT; | 164 | unsigned long nr_pages = ent->size >> PAGE_SHIFT; |
@@ -189,7 +190,8 @@ int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | |||
189 | 190 | ||
190 | } | 191 | } |
191 | #else | 192 | #else |
192 | int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | 193 | static int |
194 | get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | ||
193 | { | 195 | { |
194 | return 1; | 196 | return 1; |
195 | } | 197 | } |
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 27da860115c6..3551f1f839eb 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c | |||
@@ -53,7 +53,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, | |||
53 | ei->ns_ops = ns_ops; | 53 | ei->ns_ops = ns_ops; |
54 | ei->ns = ns; | 54 | ei->ns = ns; |
55 | 55 | ||
56 | dentry->d_op = &pid_dentry_operations; | 56 | d_set_d_op(dentry, &pid_dentry_operations); |
57 | d_add(dentry, inode); | 57 | d_add(dentry, inode); |
58 | /* Close the race of the process dying before we return the dentry */ | 58 | /* Close the race of the process dying before we return the dentry */ |
59 | if (pid_revalidate(dentry, NULL)) | 59 | if (pid_revalidate(dentry, NULL)) |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 67bbf6e4e197..21d836f40292 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/namei.h> | 10 | #include <linux/namei.h> |
11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
12 | #include <linux/module.h> | ||
12 | #include "internal.h" | 13 | #include "internal.h" |
13 | 14 | ||
14 | static const struct dentry_operations proc_sys_dentry_operations; | 15 | static const struct dentry_operations proc_sys_dentry_operations; |
@@ -26,6 +27,371 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll) | |||
26 | wake_up_interruptible(&poll->wait); | 27 | wake_up_interruptible(&poll->wait); |
27 | } | 28 | } |
28 | 29 | ||
30 | static struct ctl_table root_table[] = { | ||
31 | { | ||
32 | .procname = "", | ||
33 | .mode = S_IFDIR|S_IRUGO|S_IXUGO, | ||
34 | }, | ||
35 | { } | ||
36 | }; | ||
37 | static struct ctl_table_root sysctl_table_root = { | ||
38 | .default_set.dir.header = { | ||
39 | {{.count = 1, | ||
40 | .nreg = 1, | ||
41 | .ctl_table = root_table }}, | ||
42 | .ctl_table_arg = root_table, | ||
43 | .root = &sysctl_table_root, | ||
44 | .set = &sysctl_table_root.default_set, | ||
45 | }, | ||
46 | }; | ||
47 | |||
48 | static DEFINE_SPINLOCK(sysctl_lock); | ||
49 | |||
50 | static void drop_sysctl_table(struct ctl_table_header *header); | ||
51 | static int sysctl_follow_link(struct ctl_table_header **phead, | ||
52 | struct ctl_table **pentry, struct nsproxy *namespaces); | ||
53 | static int insert_links(struct ctl_table_header *head); | ||
54 | static void put_links(struct ctl_table_header *header); | ||
55 | |||
56 | static void sysctl_print_dir(struct ctl_dir *dir) | ||
57 | { | ||
58 | if (dir->header.parent) | ||
59 | sysctl_print_dir(dir->header.parent); | ||
60 | printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname); | ||
61 | } | ||
62 | |||
63 | static int namecmp(const char *name1, int len1, const char *name2, int len2) | ||
64 | { | ||
65 | int minlen; | ||
66 | int cmp; | ||
67 | |||
68 | minlen = len1; | ||
69 | if (minlen > len2) | ||
70 | minlen = len2; | ||
71 | |||
72 | cmp = memcmp(name1, name2, minlen); | ||
73 | if (cmp == 0) | ||
74 | cmp = len1 - len2; | ||
75 | return cmp; | ||
76 | } | ||
77 | |||
78 | /* Called under sysctl_lock */ | ||
79 | static struct ctl_table *find_entry(struct ctl_table_header **phead, | ||
80 | struct ctl_dir *dir, const char *name, int namelen) | ||
81 | { | ||
82 | struct ctl_table_header *head; | ||
83 | struct ctl_table *entry; | ||
84 | struct rb_node *node = dir->root.rb_node; | ||
85 | |||
86 | while (node) | ||
87 | { | ||
88 | struct ctl_node *ctl_node; | ||
89 | const char *procname; | ||
90 | int cmp; | ||
91 | |||
92 | ctl_node = rb_entry(node, struct ctl_node, node); | ||
93 | head = ctl_node->header; | ||
94 | entry = &head->ctl_table[ctl_node - head->node]; | ||
95 | procname = entry->procname; | ||
96 | |||
97 | cmp = namecmp(name, namelen, procname, strlen(procname)); | ||
98 | if (cmp < 0) | ||
99 | node = node->rb_left; | ||
100 | else if (cmp > 0) | ||
101 | node = node->rb_right; | ||
102 | else { | ||
103 | *phead = head; | ||
104 | return entry; | ||
105 | } | ||
106 | } | ||
107 | return NULL; | ||
108 | } | ||
109 | |||
110 | static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) | ||
111 | { | ||
112 | struct rb_node *node = &head->node[entry - head->ctl_table].node; | ||
113 | struct rb_node **p = &head->parent->root.rb_node; | ||
114 | struct rb_node *parent = NULL; | ||
115 | const char *name = entry->procname; | ||
116 | int namelen = strlen(name); | ||
117 | |||
118 | while (*p) { | ||
119 | struct ctl_table_header *parent_head; | ||
120 | struct ctl_table *parent_entry; | ||
121 | struct ctl_node *parent_node; | ||
122 | const char *parent_name; | ||
123 | int cmp; | ||
124 | |||
125 | parent = *p; | ||
126 | parent_node = rb_entry(parent, struct ctl_node, node); | ||
127 | parent_head = parent_node->header; | ||
128 | parent_entry = &parent_head->ctl_table[parent_node - parent_head->node]; | ||
129 | parent_name = parent_entry->procname; | ||
130 | |||
131 | cmp = namecmp(name, namelen, parent_name, strlen(parent_name)); | ||
132 | if (cmp < 0) | ||
133 | p = &(*p)->rb_left; | ||
134 | else if (cmp > 0) | ||
135 | p = &(*p)->rb_right; | ||
136 | else { | ||
137 | printk(KERN_ERR "sysctl duplicate entry: "); | ||
138 | sysctl_print_dir(head->parent); | ||
139 | printk(KERN_CONT "/%s\n", entry->procname); | ||
140 | return -EEXIST; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | rb_link_node(node, parent, p); | ||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry) | ||
149 | { | ||
150 | struct rb_node *node = &head->node[entry - head->ctl_table].node; | ||
151 | |||
152 | rb_erase(node, &head->parent->root); | ||
153 | } | ||
154 | |||
155 | static void init_header(struct ctl_table_header *head, | ||
156 | struct ctl_table_root *root, struct ctl_table_set *set, | ||
157 | struct ctl_node *node, struct ctl_table *table) | ||
158 | { | ||
159 | head->ctl_table = table; | ||
160 | head->ctl_table_arg = table; | ||
161 | head->used = 0; | ||
162 | head->count = 1; | ||
163 | head->nreg = 1; | ||
164 | head->unregistering = NULL; | ||
165 | head->root = root; | ||
166 | head->set = set; | ||
167 | head->parent = NULL; | ||
168 | head->node = node; | ||
169 | if (node) { | ||
170 | struct ctl_table *entry; | ||
171 | for (entry = table; entry->procname; entry++, node++) { | ||
172 | rb_init_node(&node->node); | ||
173 | node->header = head; | ||
174 | } | ||
175 | } | ||
176 | } | ||
177 | |||
178 | static void erase_header(struct ctl_table_header *head) | ||
179 | { | ||
180 | struct ctl_table *entry; | ||
181 | for (entry = head->ctl_table; entry->procname; entry++) | ||
182 | erase_entry(head, entry); | ||
183 | } | ||
184 | |||
185 | static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) | ||
186 | { | ||
187 | struct ctl_table *entry; | ||
188 | int err; | ||
189 | |||
190 | dir->header.nreg++; | ||
191 | header->parent = dir; | ||
192 | err = insert_links(header); | ||
193 | if (err) | ||
194 | goto fail_links; | ||
195 | for (entry = header->ctl_table; entry->procname; entry++) { | ||
196 | err = insert_entry(header, entry); | ||
197 | if (err) | ||
198 | goto fail; | ||
199 | } | ||
200 | return 0; | ||
201 | fail: | ||
202 | erase_header(header); | ||
203 | put_links(header); | ||
204 | fail_links: | ||
205 | header->parent = NULL; | ||
206 | drop_sysctl_table(&dir->header); | ||
207 | return err; | ||
208 | } | ||
209 | |||
210 | /* called under sysctl_lock */ | ||
211 | static int use_table(struct ctl_table_header *p) | ||
212 | { | ||
213 | if (unlikely(p->unregistering)) | ||
214 | return 0; | ||
215 | p->used++; | ||
216 | return 1; | ||
217 | } | ||
218 | |||
219 | /* called under sysctl_lock */ | ||
220 | static void unuse_table(struct ctl_table_header *p) | ||
221 | { | ||
222 | if (!--p->used) | ||
223 | if (unlikely(p->unregistering)) | ||
224 | complete(p->unregistering); | ||
225 | } | ||
226 | |||
227 | /* called under sysctl_lock, will reacquire if has to wait */ | ||
228 | static void start_unregistering(struct ctl_table_header *p) | ||
229 | { | ||
230 | /* | ||
231 | * if p->used is 0, nobody will ever touch that entry again; | ||
232 | * we'll eliminate all paths to it before dropping sysctl_lock | ||
233 | */ | ||
234 | if (unlikely(p->used)) { | ||
235 | struct completion wait; | ||
236 | init_completion(&wait); | ||
237 | p->unregistering = &wait; | ||
238 | spin_unlock(&sysctl_lock); | ||
239 | wait_for_completion(&wait); | ||
240 | spin_lock(&sysctl_lock); | ||
241 | } else { | ||
242 | /* anything non-NULL; we'll never dereference it */ | ||
243 | p->unregistering = ERR_PTR(-EINVAL); | ||
244 | } | ||
245 | /* | ||
246 | * do not remove from the list until nobody holds it; walking the | ||
247 | * list in do_sysctl() relies on that. | ||
248 | */ | ||
249 | erase_header(p); | ||
250 | } | ||
251 | |||
252 | static void sysctl_head_get(struct ctl_table_header *head) | ||
253 | { | ||
254 | spin_lock(&sysctl_lock); | ||
255 | head->count++; | ||
256 | spin_unlock(&sysctl_lock); | ||
257 | } | ||
258 | |||
259 | void sysctl_head_put(struct ctl_table_header *head) | ||
260 | { | ||
261 | spin_lock(&sysctl_lock); | ||
262 | if (!--head->count) | ||
263 | kfree_rcu(head, rcu); | ||
264 | spin_unlock(&sysctl_lock); | ||
265 | } | ||
266 | |||
267 | static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) | ||
268 | { | ||
269 | if (!head) | ||
270 | BUG(); | ||
271 | spin_lock(&sysctl_lock); | ||
272 | if (!use_table(head)) | ||
273 | head = ERR_PTR(-ENOENT); | ||
274 | spin_unlock(&sysctl_lock); | ||
275 | return head; | ||
276 | } | ||
277 | |||
278 | static void sysctl_head_finish(struct ctl_table_header *head) | ||
279 | { | ||
280 | if (!head) | ||
281 | return; | ||
282 | spin_lock(&sysctl_lock); | ||
283 | unuse_table(head); | ||
284 | spin_unlock(&sysctl_lock); | ||
285 | } | ||
286 | |||
287 | static struct ctl_table_set * | ||
288 | lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) | ||
289 | { | ||
290 | struct ctl_table_set *set = &root->default_set; | ||
291 | if (root->lookup) | ||
292 | set = root->lookup(root, namespaces); | ||
293 | return set; | ||
294 | } | ||
295 | |||
296 | static struct ctl_table *lookup_entry(struct ctl_table_header **phead, | ||
297 | struct ctl_dir *dir, | ||
298 | const char *name, int namelen) | ||
299 | { | ||
300 | struct ctl_table_header *head; | ||
301 | struct ctl_table *entry; | ||
302 | |||
303 | spin_lock(&sysctl_lock); | ||
304 | entry = find_entry(&head, dir, name, namelen); | ||
305 | if (entry && use_table(head)) | ||
306 | *phead = head; | ||
307 | else | ||
308 | entry = NULL; | ||
309 | spin_unlock(&sysctl_lock); | ||
310 | return entry; | ||
311 | } | ||
312 | |||
313 | static struct ctl_node *first_usable_entry(struct rb_node *node) | ||
314 | { | ||
315 | struct ctl_node *ctl_node; | ||
316 | |||
317 | for (;node; node = rb_next(node)) { | ||
318 | ctl_node = rb_entry(node, struct ctl_node, node); | ||
319 | if (use_table(ctl_node->header)) | ||
320 | return ctl_node; | ||
321 | } | ||
322 | return NULL; | ||
323 | } | ||
324 | |||
325 | static void first_entry(struct ctl_dir *dir, | ||
326 | struct ctl_table_header **phead, struct ctl_table **pentry) | ||
327 | { | ||
328 | struct ctl_table_header *head = NULL; | ||
329 | struct ctl_table *entry = NULL; | ||
330 | struct ctl_node *ctl_node; | ||
331 | |||
332 | spin_lock(&sysctl_lock); | ||
333 | ctl_node = first_usable_entry(rb_first(&dir->root)); | ||
334 | spin_unlock(&sysctl_lock); | ||
335 | if (ctl_node) { | ||
336 | head = ctl_node->header; | ||
337 | entry = &head->ctl_table[ctl_node - head->node]; | ||
338 | } | ||
339 | *phead = head; | ||
340 | *pentry = entry; | ||
341 | } | ||
342 | |||
343 | static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry) | ||
344 | { | ||
345 | struct ctl_table_header *head = *phead; | ||
346 | struct ctl_table *entry = *pentry; | ||
347 | struct ctl_node *ctl_node = &head->node[entry - head->ctl_table]; | ||
348 | |||
349 | spin_lock(&sysctl_lock); | ||
350 | unuse_table(head); | ||
351 | |||
352 | ctl_node = first_usable_entry(rb_next(&ctl_node->node)); | ||
353 | spin_unlock(&sysctl_lock); | ||
354 | head = NULL; | ||
355 | if (ctl_node) { | ||
356 | head = ctl_node->header; | ||
357 | entry = &head->ctl_table[ctl_node - head->node]; | ||
358 | } | ||
359 | *phead = head; | ||
360 | *pentry = entry; | ||
361 | } | ||
362 | |||
363 | void register_sysctl_root(struct ctl_table_root *root) | ||
364 | { | ||
365 | } | ||
366 | |||
367 | /* | ||
368 | * sysctl_perm does NOT grant the superuser all rights automatically, because | ||
369 | * some sysctl variables are readonly even to root. | ||
370 | */ | ||
371 | |||
372 | static int test_perm(int mode, int op) | ||
373 | { | ||
374 | if (!current_euid()) | ||
375 | mode >>= 6; | ||
376 | else if (in_egroup_p(0)) | ||
377 | mode >>= 3; | ||
378 | if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) | ||
379 | return 0; | ||
380 | return -EACCES; | ||
381 | } | ||
382 | |||
383 | static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) | ||
384 | { | ||
385 | int mode; | ||
386 | |||
387 | if (root->permissions) | ||
388 | mode = root->permissions(root, current->nsproxy, table); | ||
389 | else | ||
390 | mode = table->mode; | ||
391 | |||
392 | return test_perm(mode, op); | ||
393 | } | ||
394 | |||
29 | static struct inode *proc_sys_make_inode(struct super_block *sb, | 395 | static struct inode *proc_sys_make_inode(struct super_block *sb, |
30 | struct ctl_table_header *head, struct ctl_table *table) | 396 | struct ctl_table_header *head, struct ctl_table *table) |
31 | { | 397 | { |
@@ -45,13 +411,12 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, | |||
45 | 411 | ||
46 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 412 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
47 | inode->i_mode = table->mode; | 413 | inode->i_mode = table->mode; |
48 | if (!table->child) { | 414 | if (!S_ISDIR(table->mode)) { |
49 | inode->i_mode |= S_IFREG; | 415 | inode->i_mode |= S_IFREG; |
50 | inode->i_op = &proc_sys_inode_operations; | 416 | inode->i_op = &proc_sys_inode_operations; |
51 | inode->i_fop = &proc_sys_file_operations; | 417 | inode->i_fop = &proc_sys_file_operations; |
52 | } else { | 418 | } else { |
53 | inode->i_mode |= S_IFDIR; | 419 | inode->i_mode |= S_IFDIR; |
54 | clear_nlink(inode); | ||
55 | inode->i_op = &proc_sys_dir_operations; | 420 | inode->i_op = &proc_sys_dir_operations; |
56 | inode->i_fop = &proc_sys_dir_file_operations; | 421 | inode->i_fop = &proc_sys_dir_file_operations; |
57 | } | 422 | } |
@@ -59,70 +424,42 @@ out: | |||
59 | return inode; | 424 | return inode; |
60 | } | 425 | } |
61 | 426 | ||
62 | static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) | ||
63 | { | ||
64 | int len; | ||
65 | for ( ; p->procname; p++) { | ||
66 | |||
67 | if (!p->procname) | ||
68 | continue; | ||
69 | |||
70 | len = strlen(p->procname); | ||
71 | if (len != name->len) | ||
72 | continue; | ||
73 | |||
74 | if (memcmp(p->procname, name->name, len) != 0) | ||
75 | continue; | ||
76 | |||
77 | /* I have a match */ | ||
78 | return p; | ||
79 | } | ||
80 | return NULL; | ||
81 | } | ||
82 | |||
83 | static struct ctl_table_header *grab_header(struct inode *inode) | 427 | static struct ctl_table_header *grab_header(struct inode *inode) |
84 | { | 428 | { |
85 | if (PROC_I(inode)->sysctl) | 429 | struct ctl_table_header *head = PROC_I(inode)->sysctl; |
86 | return sysctl_head_grab(PROC_I(inode)->sysctl); | 430 | if (!head) |
87 | else | 431 | head = &sysctl_table_root.default_set.dir.header; |
88 | return sysctl_head_next(NULL); | 432 | return sysctl_head_grab(head); |
89 | } | 433 | } |
90 | 434 | ||
91 | static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, | 435 | static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, |
92 | struct nameidata *nd) | 436 | struct nameidata *nd) |
93 | { | 437 | { |
94 | struct ctl_table_header *head = grab_header(dir); | 438 | struct ctl_table_header *head = grab_header(dir); |
95 | struct ctl_table *table = PROC_I(dir)->sysctl_entry; | ||
96 | struct ctl_table_header *h = NULL; | 439 | struct ctl_table_header *h = NULL; |
97 | struct qstr *name = &dentry->d_name; | 440 | struct qstr *name = &dentry->d_name; |
98 | struct ctl_table *p; | 441 | struct ctl_table *p; |
99 | struct inode *inode; | 442 | struct inode *inode; |
100 | struct dentry *err = ERR_PTR(-ENOENT); | 443 | struct dentry *err = ERR_PTR(-ENOENT); |
444 | struct ctl_dir *ctl_dir; | ||
445 | int ret; | ||
101 | 446 | ||
102 | if (IS_ERR(head)) | 447 | if (IS_ERR(head)) |
103 | return ERR_CAST(head); | 448 | return ERR_CAST(head); |
104 | 449 | ||
105 | if (table && !table->child) { | 450 | ctl_dir = container_of(head, struct ctl_dir, header); |
106 | WARN_ON(1); | ||
107 | goto out; | ||
108 | } | ||
109 | |||
110 | table = table ? table->child : head->ctl_table; | ||
111 | |||
112 | p = find_in_table(table, name); | ||
113 | if (!p) { | ||
114 | for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { | ||
115 | if (h->attached_to != table) | ||
116 | continue; | ||
117 | p = find_in_table(h->attached_by, name); | ||
118 | if (p) | ||
119 | break; | ||
120 | } | ||
121 | } | ||
122 | 451 | ||
452 | p = lookup_entry(&h, ctl_dir, name->name, name->len); | ||
123 | if (!p) | 453 | if (!p) |
124 | goto out; | 454 | goto out; |
125 | 455 | ||
456 | if (S_ISLNK(p->mode)) { | ||
457 | ret = sysctl_follow_link(&h, &p, current->nsproxy); | ||
458 | err = ERR_PTR(ret); | ||
459 | if (ret) | ||
460 | goto out; | ||
461 | } | ||
462 | |||
126 | err = ERR_PTR(-ENOMEM); | 463 | err = ERR_PTR(-ENOMEM); |
127 | inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); | 464 | inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); |
128 | if (h) | 465 | if (h) |
@@ -190,20 +527,32 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf, | |||
190 | 527 | ||
191 | static int proc_sys_open(struct inode *inode, struct file *filp) | 528 | static int proc_sys_open(struct inode *inode, struct file *filp) |
192 | { | 529 | { |
530 | struct ctl_table_header *head = grab_header(inode); | ||
193 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; | 531 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; |
194 | 532 | ||
533 | /* sysctl was unregistered */ | ||
534 | if (IS_ERR(head)) | ||
535 | return PTR_ERR(head); | ||
536 | |||
195 | if (table->poll) | 537 | if (table->poll) |
196 | filp->private_data = proc_sys_poll_event(table->poll); | 538 | filp->private_data = proc_sys_poll_event(table->poll); |
197 | 539 | ||
540 | sysctl_head_finish(head); | ||
541 | |||
198 | return 0; | 542 | return 0; |
199 | } | 543 | } |
200 | 544 | ||
201 | static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) | 545 | static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) |
202 | { | 546 | { |
203 | struct inode *inode = filp->f_path.dentry->d_inode; | 547 | struct inode *inode = filp->f_path.dentry->d_inode; |
548 | struct ctl_table_header *head = grab_header(inode); | ||
204 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; | 549 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; |
205 | unsigned long event = (unsigned long)filp->private_data; | ||
206 | unsigned int ret = DEFAULT_POLLMASK; | 550 | unsigned int ret = DEFAULT_POLLMASK; |
551 | unsigned long event; | ||
552 | |||
553 | /* sysctl was unregistered */ | ||
554 | if (IS_ERR(head)) | ||
555 | return POLLERR | POLLHUP; | ||
207 | 556 | ||
208 | if (!table->proc_handler) | 557 | if (!table->proc_handler) |
209 | goto out; | 558 | goto out; |
@@ -211,6 +560,7 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) | |||
211 | if (!table->poll) | 560 | if (!table->poll) |
212 | goto out; | 561 | goto out; |
213 | 562 | ||
563 | event = (unsigned long)filp->private_data; | ||
214 | poll_wait(filp, &table->poll->wait, wait); | 564 | poll_wait(filp, &table->poll->wait, wait); |
215 | 565 | ||
216 | if (event != atomic_read(&table->poll->event)) { | 566 | if (event != atomic_read(&table->poll->event)) { |
@@ -219,6 +569,8 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) | |||
219 | } | 569 | } |
220 | 570 | ||
221 | out: | 571 | out: |
572 | sysctl_head_finish(head); | ||
573 | |||
222 | return ret; | 574 | return ret; |
223 | } | 575 | } |
224 | 576 | ||
@@ -260,28 +612,45 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, | |||
260 | return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); | 612 | return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); |
261 | } | 613 | } |
262 | 614 | ||
615 | static int proc_sys_link_fill_cache(struct file *filp, void *dirent, | ||
616 | filldir_t filldir, | ||
617 | struct ctl_table_header *head, | ||
618 | struct ctl_table *table) | ||
619 | { | ||
620 | int err, ret = 0; | ||
621 | head = sysctl_head_grab(head); | ||
622 | |||
623 | if (S_ISLNK(table->mode)) { | ||
624 | /* It is not an error if we can not follow the link ignore it */ | ||
625 | err = sysctl_follow_link(&head, &table, current->nsproxy); | ||
626 | if (err) | ||
627 | goto out; | ||
628 | } | ||
629 | |||
630 | ret = proc_sys_fill_cache(filp, dirent, filldir, head, table); | ||
631 | out: | ||
632 | sysctl_head_finish(head); | ||
633 | return ret; | ||
634 | } | ||
635 | |||
263 | static int scan(struct ctl_table_header *head, ctl_table *table, | 636 | static int scan(struct ctl_table_header *head, ctl_table *table, |
264 | unsigned long *pos, struct file *file, | 637 | unsigned long *pos, struct file *file, |
265 | void *dirent, filldir_t filldir) | 638 | void *dirent, filldir_t filldir) |
266 | { | 639 | { |
640 | int res; | ||
267 | 641 | ||
268 | for (; table->procname; table++, (*pos)++) { | 642 | if ((*pos)++ < file->f_pos) |
269 | int res; | 643 | return 0; |
270 | |||
271 | /* Can't do anything without a proc name */ | ||
272 | if (!table->procname) | ||
273 | continue; | ||
274 | |||
275 | if (*pos < file->f_pos) | ||
276 | continue; | ||
277 | 644 | ||
645 | if (unlikely(S_ISLNK(table->mode))) | ||
646 | res = proc_sys_link_fill_cache(file, dirent, filldir, head, table); | ||
647 | else | ||
278 | res = proc_sys_fill_cache(file, dirent, filldir, head, table); | 648 | res = proc_sys_fill_cache(file, dirent, filldir, head, table); |
279 | if (res) | ||
280 | return res; | ||
281 | 649 | ||
282 | file->f_pos = *pos + 1; | 650 | if (res == 0) |
283 | } | 651 | file->f_pos = *pos; |
284 | return 0; | 652 | |
653 | return res; | ||
285 | } | 654 | } |
286 | 655 | ||
287 | static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | 656 | static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) |
@@ -289,20 +658,16 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
289 | struct dentry *dentry = filp->f_path.dentry; | 658 | struct dentry *dentry = filp->f_path.dentry; |
290 | struct inode *inode = dentry->d_inode; | 659 | struct inode *inode = dentry->d_inode; |
291 | struct ctl_table_header *head = grab_header(inode); | 660 | struct ctl_table_header *head = grab_header(inode); |
292 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; | ||
293 | struct ctl_table_header *h = NULL; | 661 | struct ctl_table_header *h = NULL; |
662 | struct ctl_table *entry; | ||
663 | struct ctl_dir *ctl_dir; | ||
294 | unsigned long pos; | 664 | unsigned long pos; |
295 | int ret = -EINVAL; | 665 | int ret = -EINVAL; |
296 | 666 | ||
297 | if (IS_ERR(head)) | 667 | if (IS_ERR(head)) |
298 | return PTR_ERR(head); | 668 | return PTR_ERR(head); |
299 | 669 | ||
300 | if (table && !table->child) { | 670 | ctl_dir = container_of(head, struct ctl_dir, header); |
301 | WARN_ON(1); | ||
302 | goto out; | ||
303 | } | ||
304 | |||
305 | table = table ? table->child : head->ctl_table; | ||
306 | 671 | ||
307 | ret = 0; | 672 | ret = 0; |
308 | /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ | 673 | /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ |
@@ -320,14 +685,8 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
320 | } | 685 | } |
321 | pos = 2; | 686 | pos = 2; |
322 | 687 | ||
323 | ret = scan(head, table, &pos, filp, dirent, filldir); | 688 | for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { |
324 | if (ret) | 689 | ret = scan(h, entry, &pos, filp, dirent, filldir); |
325 | goto out; | ||
326 | |||
327 | for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { | ||
328 | if (h->attached_to != table) | ||
329 | continue; | ||
330 | ret = scan(h, h->attached_by, &pos, filp, dirent, filldir); | ||
331 | if (ret) { | 690 | if (ret) { |
332 | sysctl_head_finish(h); | 691 | sysctl_head_finish(h); |
333 | break; | 692 | break; |
@@ -447,6 +806,21 @@ static int proc_sys_delete(const struct dentry *dentry) | |||
447 | return !!PROC_I(dentry->d_inode)->sysctl->unregistering; | 806 | return !!PROC_I(dentry->d_inode)->sysctl->unregistering; |
448 | } | 807 | } |
449 | 808 | ||
809 | static int sysctl_is_seen(struct ctl_table_header *p) | ||
810 | { | ||
811 | struct ctl_table_set *set = p->set; | ||
812 | int res; | ||
813 | spin_lock(&sysctl_lock); | ||
814 | if (p->unregistering) | ||
815 | res = 0; | ||
816 | else if (!set->is_seen) | ||
817 | res = 1; | ||
818 | else | ||
819 | res = set->is_seen(set); | ||
820 | spin_unlock(&sysctl_lock); | ||
821 | return res; | ||
822 | } | ||
823 | |||
450 | static int proc_sys_compare(const struct dentry *parent, | 824 | static int proc_sys_compare(const struct dentry *parent, |
451 | const struct inode *pinode, | 825 | const struct inode *pinode, |
452 | const struct dentry *dentry, const struct inode *inode, | 826 | const struct dentry *dentry, const struct inode *inode, |
@@ -472,6 +846,753 @@ static const struct dentry_operations proc_sys_dentry_operations = { | |||
472 | .d_compare = proc_sys_compare, | 846 | .d_compare = proc_sys_compare, |
473 | }; | 847 | }; |
474 | 848 | ||
849 | static struct ctl_dir *find_subdir(struct ctl_dir *dir, | ||
850 | const char *name, int namelen) | ||
851 | { | ||
852 | struct ctl_table_header *head; | ||
853 | struct ctl_table *entry; | ||
854 | |||
855 | entry = find_entry(&head, dir, name, namelen); | ||
856 | if (!entry) | ||
857 | return ERR_PTR(-ENOENT); | ||
858 | if (!S_ISDIR(entry->mode)) | ||
859 | return ERR_PTR(-ENOTDIR); | ||
860 | return container_of(head, struct ctl_dir, header); | ||
861 | } | ||
862 | |||
863 | static struct ctl_dir *new_dir(struct ctl_table_set *set, | ||
864 | const char *name, int namelen) | ||
865 | { | ||
866 | struct ctl_table *table; | ||
867 | struct ctl_dir *new; | ||
868 | struct ctl_node *node; | ||
869 | char *new_name; | ||
870 | |||
871 | new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) + | ||
872 | sizeof(struct ctl_table)*2 + namelen + 1, | ||
873 | GFP_KERNEL); | ||
874 | if (!new) | ||
875 | return NULL; | ||
876 | |||
877 | node = (struct ctl_node *)(new + 1); | ||
878 | table = (struct ctl_table *)(node + 1); | ||
879 | new_name = (char *)(table + 2); | ||
880 | memcpy(new_name, name, namelen); | ||
881 | new_name[namelen] = '\0'; | ||
882 | table[0].procname = new_name; | ||
883 | table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; | ||
884 | init_header(&new->header, set->dir.header.root, set, node, table); | ||
885 | |||
886 | return new; | ||
887 | } | ||
888 | |||
889 | /** | ||
890 | * get_subdir - find or create a subdir with the specified name. | ||
891 | * @dir: Directory to create the subdirectory in | ||
892 | * @name: The name of the subdirectory to find or create | ||
893 | * @namelen: The length of name | ||
894 | * | ||
895 | * Takes a directory with an elevated reference count so we know that | ||
896 | * if we drop the lock the directory will not go away. Upon success | ||
897 | * the reference is moved from @dir to the returned subdirectory. | ||
898 | * Upon error an error code is returned and the reference on @dir is | ||
899 | * simply dropped. | ||
900 | */ | ||
901 | static struct ctl_dir *get_subdir(struct ctl_dir *dir, | ||
902 | const char *name, int namelen) | ||
903 | { | ||
904 | struct ctl_table_set *set = dir->header.set; | ||
905 | struct ctl_dir *subdir, *new = NULL; | ||
906 | int err; | ||
907 | |||
908 | spin_lock(&sysctl_lock); | ||
909 | subdir = find_subdir(dir, name, namelen); | ||
910 | if (!IS_ERR(subdir)) | ||
911 | goto found; | ||
912 | if (PTR_ERR(subdir) != -ENOENT) | ||
913 | goto failed; | ||
914 | |||
915 | spin_unlock(&sysctl_lock); | ||
916 | new = new_dir(set, name, namelen); | ||
917 | spin_lock(&sysctl_lock); | ||
918 | subdir = ERR_PTR(-ENOMEM); | ||
919 | if (!new) | ||
920 | goto failed; | ||
921 | |||
922 | /* Was the subdir added while we dropped the lock? */ | ||
923 | subdir = find_subdir(dir, name, namelen); | ||
924 | if (!IS_ERR(subdir)) | ||
925 | goto found; | ||
926 | if (PTR_ERR(subdir) != -ENOENT) | ||
927 | goto failed; | ||
928 | |||
929 | /* Nope. Use the our freshly made directory entry. */ | ||
930 | err = insert_header(dir, &new->header); | ||
931 | subdir = ERR_PTR(err); | ||
932 | if (err) | ||
933 | goto failed; | ||
934 | subdir = new; | ||
935 | found: | ||
936 | subdir->header.nreg++; | ||
937 | failed: | ||
938 | if (unlikely(IS_ERR(subdir))) { | ||
939 | printk(KERN_ERR "sysctl could not get directory: "); | ||
940 | sysctl_print_dir(dir); | ||
941 | printk(KERN_CONT "/%*.*s %ld\n", | ||
942 | namelen, namelen, name, PTR_ERR(subdir)); | ||
943 | } | ||
944 | drop_sysctl_table(&dir->header); | ||
945 | if (new) | ||
946 | drop_sysctl_table(&new->header); | ||
947 | spin_unlock(&sysctl_lock); | ||
948 | return subdir; | ||
949 | } | ||
950 | |||
951 | static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) | ||
952 | { | ||
953 | struct ctl_dir *parent; | ||
954 | const char *procname; | ||
955 | if (!dir->header.parent) | ||
956 | return &set->dir; | ||
957 | parent = xlate_dir(set, dir->header.parent); | ||
958 | if (IS_ERR(parent)) | ||
959 | return parent; | ||
960 | procname = dir->header.ctl_table[0].procname; | ||
961 | return find_subdir(parent, procname, strlen(procname)); | ||
962 | } | ||
963 | |||
964 | static int sysctl_follow_link(struct ctl_table_header **phead, | ||
965 | struct ctl_table **pentry, struct nsproxy *namespaces) | ||
966 | { | ||
967 | struct ctl_table_header *head; | ||
968 | struct ctl_table_root *root; | ||
969 | struct ctl_table_set *set; | ||
970 | struct ctl_table *entry; | ||
971 | struct ctl_dir *dir; | ||
972 | int ret; | ||
973 | |||
974 | ret = 0; | ||
975 | spin_lock(&sysctl_lock); | ||
976 | root = (*pentry)->data; | ||
977 | set = lookup_header_set(root, namespaces); | ||
978 | dir = xlate_dir(set, (*phead)->parent); | ||
979 | if (IS_ERR(dir)) | ||
980 | ret = PTR_ERR(dir); | ||
981 | else { | ||
982 | const char *procname = (*pentry)->procname; | ||
983 | head = NULL; | ||
984 | entry = find_entry(&head, dir, procname, strlen(procname)); | ||
985 | ret = -ENOENT; | ||
986 | if (entry && use_table(head)) { | ||
987 | unuse_table(*phead); | ||
988 | *phead = head; | ||
989 | *pentry = entry; | ||
990 | ret = 0; | ||
991 | } | ||
992 | } | ||
993 | |||
994 | spin_unlock(&sysctl_lock); | ||
995 | return ret; | ||
996 | } | ||
997 | |||
998 | static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) | ||
999 | { | ||
1000 | struct va_format vaf; | ||
1001 | va_list args; | ||
1002 | |||
1003 | va_start(args, fmt); | ||
1004 | vaf.fmt = fmt; | ||
1005 | vaf.va = &args; | ||
1006 | |||
1007 | printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", | ||
1008 | path, table->procname, &vaf); | ||
1009 | |||
1010 | va_end(args); | ||
1011 | return -EINVAL; | ||
1012 | } | ||
1013 | |||
1014 | static int sysctl_check_table(const char *path, struct ctl_table *table) | ||
1015 | { | ||
1016 | int err = 0; | ||
1017 | for (; table->procname; table++) { | ||
1018 | if (table->child) | ||
1019 | err = sysctl_err(path, table, "Not a file"); | ||
1020 | |||
1021 | if ((table->proc_handler == proc_dostring) || | ||
1022 | (table->proc_handler == proc_dointvec) || | ||
1023 | (table->proc_handler == proc_dointvec_minmax) || | ||
1024 | (table->proc_handler == proc_dointvec_jiffies) || | ||
1025 | (table->proc_handler == proc_dointvec_userhz_jiffies) || | ||
1026 | (table->proc_handler == proc_dointvec_ms_jiffies) || | ||
1027 | (table->proc_handler == proc_doulongvec_minmax) || | ||
1028 | (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { | ||
1029 | if (!table->data) | ||
1030 | err = sysctl_err(path, table, "No data"); | ||
1031 | if (!table->maxlen) | ||
1032 | err = sysctl_err(path, table, "No maxlen"); | ||
1033 | } | ||
1034 | if (!table->proc_handler) | ||
1035 | err = sysctl_err(path, table, "No proc_handler"); | ||
1036 | |||
1037 | if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) | ||
1038 | err = sysctl_err(path, table, "bogus .mode 0%o", | ||
1039 | table->mode); | ||
1040 | } | ||
1041 | return err; | ||
1042 | } | ||
1043 | |||
1044 | static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table, | ||
1045 | struct ctl_table_root *link_root) | ||
1046 | { | ||
1047 | struct ctl_table *link_table, *entry, *link; | ||
1048 | struct ctl_table_header *links; | ||
1049 | struct ctl_node *node; | ||
1050 | char *link_name; | ||
1051 | int nr_entries, name_bytes; | ||
1052 | |||
1053 | name_bytes = 0; | ||
1054 | nr_entries = 0; | ||
1055 | for (entry = table; entry->procname; entry++) { | ||
1056 | nr_entries++; | ||
1057 | name_bytes += strlen(entry->procname) + 1; | ||
1058 | } | ||
1059 | |||
1060 | links = kzalloc(sizeof(struct ctl_table_header) + | ||
1061 | sizeof(struct ctl_node)*nr_entries + | ||
1062 | sizeof(struct ctl_table)*(nr_entries + 1) + | ||
1063 | name_bytes, | ||
1064 | GFP_KERNEL); | ||
1065 | |||
1066 | if (!links) | ||
1067 | return NULL; | ||
1068 | |||
1069 | node = (struct ctl_node *)(links + 1); | ||
1070 | link_table = (struct ctl_table *)(node + nr_entries); | ||
1071 | link_name = (char *)&link_table[nr_entries + 1]; | ||
1072 | |||
1073 | for (link = link_table, entry = table; entry->procname; link++, entry++) { | ||
1074 | int len = strlen(entry->procname) + 1; | ||
1075 | memcpy(link_name, entry->procname, len); | ||
1076 | link->procname = link_name; | ||
1077 | link->mode = S_IFLNK|S_IRWXUGO; | ||
1078 | link->data = link_root; | ||
1079 | link_name += len; | ||
1080 | } | ||
1081 | init_header(links, dir->header.root, dir->header.set, node, link_table); | ||
1082 | links->nreg = nr_entries; | ||
1083 | |||
1084 | return links; | ||
1085 | } | ||
1086 | |||
1087 | static bool get_links(struct ctl_dir *dir, | ||
1088 | struct ctl_table *table, struct ctl_table_root *link_root) | ||
1089 | { | ||
1090 | struct ctl_table_header *head; | ||
1091 | struct ctl_table *entry, *link; | ||
1092 | |||
1093 | /* Are there links available for every entry in table? */ | ||
1094 | for (entry = table; entry->procname; entry++) { | ||
1095 | const char *procname = entry->procname; | ||
1096 | link = find_entry(&head, dir, procname, strlen(procname)); | ||
1097 | if (!link) | ||
1098 | return false; | ||
1099 | if (S_ISDIR(link->mode) && S_ISDIR(entry->mode)) | ||
1100 | continue; | ||
1101 | if (S_ISLNK(link->mode) && (link->data == link_root)) | ||
1102 | continue; | ||
1103 | return false; | ||
1104 | } | ||
1105 | |||
1106 | /* The checks passed. Increase the registration count on the links */ | ||
1107 | for (entry = table; entry->procname; entry++) { | ||
1108 | const char *procname = entry->procname; | ||
1109 | link = find_entry(&head, dir, procname, strlen(procname)); | ||
1110 | head->nreg++; | ||
1111 | } | ||
1112 | return true; | ||
1113 | } | ||
1114 | |||
1115 | static int insert_links(struct ctl_table_header *head) | ||
1116 | { | ||
1117 | struct ctl_table_set *root_set = &sysctl_table_root.default_set; | ||
1118 | struct ctl_dir *core_parent = NULL; | ||
1119 | struct ctl_table_header *links; | ||
1120 | int err; | ||
1121 | |||
1122 | if (head->set == root_set) | ||
1123 | return 0; | ||
1124 | |||
1125 | core_parent = xlate_dir(root_set, head->parent); | ||
1126 | if (IS_ERR(core_parent)) | ||
1127 | return 0; | ||
1128 | |||
1129 | if (get_links(core_parent, head->ctl_table, head->root)) | ||
1130 | return 0; | ||
1131 | |||
1132 | core_parent->header.nreg++; | ||
1133 | spin_unlock(&sysctl_lock); | ||
1134 | |||
1135 | links = new_links(core_parent, head->ctl_table, head->root); | ||
1136 | |||
1137 | spin_lock(&sysctl_lock); | ||
1138 | err = -ENOMEM; | ||
1139 | if (!links) | ||
1140 | goto out; | ||
1141 | |||
1142 | err = 0; | ||
1143 | if (get_links(core_parent, head->ctl_table, head->root)) { | ||
1144 | kfree(links); | ||
1145 | goto out; | ||
1146 | } | ||
1147 | |||
1148 | err = insert_header(core_parent, links); | ||
1149 | if (err) | ||
1150 | kfree(links); | ||
1151 | out: | ||
1152 | drop_sysctl_table(&core_parent->header); | ||
1153 | return err; | ||
1154 | } | ||
1155 | |||
1156 | /** | ||
1157 | * __register_sysctl_table - register a leaf sysctl table | ||
1158 | * @set: Sysctl tree to register on | ||
1159 | * @path: The path to the directory the sysctl table is in. | ||
1160 | * @table: the top-level table structure | ||
1161 | * | ||
1162 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1163 | * array. A completely 0 filled entry terminates the table. | ||
1164 | * | ||
1165 | * The members of the &struct ctl_table structure are used as follows: | ||
1166 | * | ||
1167 | * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not | ||
1168 | * enter a sysctl file | ||
1169 | * | ||
1170 | * data - a pointer to data for use by proc_handler | ||
1171 | * | ||
1172 | * maxlen - the maximum size in bytes of the data | ||
1173 | * | ||
1174 | * mode - the file permissions for the /proc/sys file | ||
1175 | * | ||
1176 | * child - must be %NULL. | ||
1177 | * | ||
1178 | * proc_handler - the text handler routine (described below) | ||
1179 | * | ||
1180 | * extra1, extra2 - extra pointers usable by the proc handler routines | ||
1181 | * | ||
1182 | * Leaf nodes in the sysctl tree will be represented by a single file | ||
1183 | * under /proc; non-leaf nodes will be represented by directories. | ||
1184 | * | ||
1185 | * There must be a proc_handler routine for any terminal nodes. | ||
1186 | * Several default handlers are available to cover common cases - | ||
1187 | * | ||
1188 | * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), | ||
1189 | * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), | ||
1190 | * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() | ||
1191 | * | ||
1192 | * It is the handler's job to read the input buffer from user memory | ||
1193 | * and process it. The handler should return 0 on success. | ||
1194 | * | ||
1195 | * This routine returns %NULL on a failure to register, and a pointer | ||
1196 | * to the table header on success. | ||
1197 | */ | ||
1198 | struct ctl_table_header *__register_sysctl_table( | ||
1199 | struct ctl_table_set *set, | ||
1200 | const char *path, struct ctl_table *table) | ||
1201 | { | ||
1202 | struct ctl_table_root *root = set->dir.header.root; | ||
1203 | struct ctl_table_header *header; | ||
1204 | const char *name, *nextname; | ||
1205 | struct ctl_dir *dir; | ||
1206 | struct ctl_table *entry; | ||
1207 | struct ctl_node *node; | ||
1208 | int nr_entries = 0; | ||
1209 | |||
1210 | for (entry = table; entry->procname; entry++) | ||
1211 | nr_entries++; | ||
1212 | |||
1213 | header = kzalloc(sizeof(struct ctl_table_header) + | ||
1214 | sizeof(struct ctl_node)*nr_entries, GFP_KERNEL); | ||
1215 | if (!header) | ||
1216 | return NULL; | ||
1217 | |||
1218 | node = (struct ctl_node *)(header + 1); | ||
1219 | init_header(header, root, set, node, table); | ||
1220 | if (sysctl_check_table(path, table)) | ||
1221 | goto fail; | ||
1222 | |||
1223 | spin_lock(&sysctl_lock); | ||
1224 | dir = &set->dir; | ||
1225 | /* Reference moved down the diretory tree get_subdir */ | ||
1226 | dir->header.nreg++; | ||
1227 | spin_unlock(&sysctl_lock); | ||
1228 | |||
1229 | /* Find the directory for the ctl_table */ | ||
1230 | for (name = path; name; name = nextname) { | ||
1231 | int namelen; | ||
1232 | nextname = strchr(name, '/'); | ||
1233 | if (nextname) { | ||
1234 | namelen = nextname - name; | ||
1235 | nextname++; | ||
1236 | } else { | ||
1237 | namelen = strlen(name); | ||
1238 | } | ||
1239 | if (namelen == 0) | ||
1240 | continue; | ||
1241 | |||
1242 | dir = get_subdir(dir, name, namelen); | ||
1243 | if (IS_ERR(dir)) | ||
1244 | goto fail; | ||
1245 | } | ||
1246 | |||
1247 | spin_lock(&sysctl_lock); | ||
1248 | if (insert_header(dir, header)) | ||
1249 | goto fail_put_dir_locked; | ||
1250 | |||
1251 | drop_sysctl_table(&dir->header); | ||
1252 | spin_unlock(&sysctl_lock); | ||
1253 | |||
1254 | return header; | ||
1255 | |||
1256 | fail_put_dir_locked: | ||
1257 | drop_sysctl_table(&dir->header); | ||
1258 | spin_unlock(&sysctl_lock); | ||
1259 | fail: | ||
1260 | kfree(header); | ||
1261 | dump_stack(); | ||
1262 | return NULL; | ||
1263 | } | ||
1264 | |||
1265 | /** | ||
1266 | * register_sysctl - register a sysctl table | ||
1267 | * @path: The path to the directory the sysctl table is in. | ||
1268 | * @table: the table structure | ||
1269 | * | ||
1270 | * Register a sysctl table. @table should be a filled in ctl_table | ||
1271 | * array. A completely 0 filled entry terminates the table. | ||
1272 | * | ||
1273 | * See __register_sysctl_table for more details. | ||
1274 | */ | ||
1275 | struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) | ||
1276 | { | ||
1277 | return __register_sysctl_table(&sysctl_table_root.default_set, | ||
1278 | path, table); | ||
1279 | } | ||
1280 | EXPORT_SYMBOL(register_sysctl); | ||
1281 | |||
1282 | static char *append_path(const char *path, char *pos, const char *name) | ||
1283 | { | ||
1284 | int namelen; | ||
1285 | namelen = strlen(name); | ||
1286 | if (((pos - path) + namelen + 2) >= PATH_MAX) | ||
1287 | return NULL; | ||
1288 | memcpy(pos, name, namelen); | ||
1289 | pos[namelen] = '/'; | ||
1290 | pos[namelen + 1] = '\0'; | ||
1291 | pos += namelen + 1; | ||
1292 | return pos; | ||
1293 | } | ||
1294 | |||
1295 | static int count_subheaders(struct ctl_table *table) | ||
1296 | { | ||
1297 | int has_files = 0; | ||
1298 | int nr_subheaders = 0; | ||
1299 | struct ctl_table *entry; | ||
1300 | |||
1301 | /* special case: no directory and empty directory */ | ||
1302 | if (!table || !table->procname) | ||
1303 | return 1; | ||
1304 | |||
1305 | for (entry = table; entry->procname; entry++) { | ||
1306 | if (entry->child) | ||
1307 | nr_subheaders += count_subheaders(entry->child); | ||
1308 | else | ||
1309 | has_files = 1; | ||
1310 | } | ||
1311 | return nr_subheaders + has_files; | ||
1312 | } | ||
1313 | |||
1314 | static int register_leaf_sysctl_tables(const char *path, char *pos, | ||
1315 | struct ctl_table_header ***subheader, struct ctl_table_set *set, | ||
1316 | struct ctl_table *table) | ||
1317 | { | ||
1318 | struct ctl_table *ctl_table_arg = NULL; | ||
1319 | struct ctl_table *entry, *files; | ||
1320 | int nr_files = 0; | ||
1321 | int nr_dirs = 0; | ||
1322 | int err = -ENOMEM; | ||
1323 | |||
1324 | for (entry = table; entry->procname; entry++) { | ||
1325 | if (entry->child) | ||
1326 | nr_dirs++; | ||
1327 | else | ||
1328 | nr_files++; | ||
1329 | } | ||
1330 | |||
1331 | files = table; | ||
1332 | /* If there are mixed files and directories we need a new table */ | ||
1333 | if (nr_dirs && nr_files) { | ||
1334 | struct ctl_table *new; | ||
1335 | files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1), | ||
1336 | GFP_KERNEL); | ||
1337 | if (!files) | ||
1338 | goto out; | ||
1339 | |||
1340 | ctl_table_arg = files; | ||
1341 | for (new = files, entry = table; entry->procname; entry++) { | ||
1342 | if (entry->child) | ||
1343 | continue; | ||
1344 | *new = *entry; | ||
1345 | new++; | ||
1346 | } | ||
1347 | } | ||
1348 | |||
1349 | /* Register everything except a directory full of subdirectories */ | ||
1350 | if (nr_files || !nr_dirs) { | ||
1351 | struct ctl_table_header *header; | ||
1352 | header = __register_sysctl_table(set, path, files); | ||
1353 | if (!header) { | ||
1354 | kfree(ctl_table_arg); | ||
1355 | goto out; | ||
1356 | } | ||
1357 | |||
1358 | /* Remember if we need to free the file table */ | ||
1359 | header->ctl_table_arg = ctl_table_arg; | ||
1360 | **subheader = header; | ||
1361 | (*subheader)++; | ||
1362 | } | ||
1363 | |||
1364 | /* Recurse into the subdirectories. */ | ||
1365 | for (entry = table; entry->procname; entry++) { | ||
1366 | char *child_pos; | ||
1367 | |||
1368 | if (!entry->child) | ||
1369 | continue; | ||
1370 | |||
1371 | err = -ENAMETOOLONG; | ||
1372 | child_pos = append_path(path, pos, entry->procname); | ||
1373 | if (!child_pos) | ||
1374 | goto out; | ||
1375 | |||
1376 | err = register_leaf_sysctl_tables(path, child_pos, subheader, | ||
1377 | set, entry->child); | ||
1378 | pos[0] = '\0'; | ||
1379 | if (err) | ||
1380 | goto out; | ||
1381 | } | ||
1382 | err = 0; | ||
1383 | out: | ||
1384 | /* On failure our caller will unregister all registered subheaders */ | ||
1385 | return err; | ||
1386 | } | ||
1387 | |||
1388 | /** | ||
1389 | * __register_sysctl_paths - register a sysctl table hierarchy | ||
1390 | * @set: Sysctl tree to register on | ||
1391 | * @path: The path to the directory the sysctl table is in. | ||
1392 | * @table: the top-level table structure | ||
1393 | * | ||
1394 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1395 | * array. A completely 0 filled entry terminates the table. | ||
1396 | * | ||
1397 | * See __register_sysctl_table for more details. | ||
1398 | */ | ||
1399 | struct ctl_table_header *__register_sysctl_paths( | ||
1400 | struct ctl_table_set *set, | ||
1401 | const struct ctl_path *path, struct ctl_table *table) | ||
1402 | { | ||
1403 | struct ctl_table *ctl_table_arg = table; | ||
1404 | int nr_subheaders = count_subheaders(table); | ||
1405 | struct ctl_table_header *header = NULL, **subheaders, **subheader; | ||
1406 | const struct ctl_path *component; | ||
1407 | char *new_path, *pos; | ||
1408 | |||
1409 | pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL); | ||
1410 | if (!new_path) | ||
1411 | return NULL; | ||
1412 | |||
1413 | pos[0] = '\0'; | ||
1414 | for (component = path; component->procname; component++) { | ||
1415 | pos = append_path(new_path, pos, component->procname); | ||
1416 | if (!pos) | ||
1417 | goto out; | ||
1418 | } | ||
1419 | while (table->procname && table->child && !table[1].procname) { | ||
1420 | pos = append_path(new_path, pos, table->procname); | ||
1421 | if (!pos) | ||
1422 | goto out; | ||
1423 | table = table->child; | ||
1424 | } | ||
1425 | if (nr_subheaders == 1) { | ||
1426 | header = __register_sysctl_table(set, new_path, table); | ||
1427 | if (header) | ||
1428 | header->ctl_table_arg = ctl_table_arg; | ||
1429 | } else { | ||
1430 | header = kzalloc(sizeof(*header) + | ||
1431 | sizeof(*subheaders)*nr_subheaders, GFP_KERNEL); | ||
1432 | if (!header) | ||
1433 | goto out; | ||
1434 | |||
1435 | subheaders = (struct ctl_table_header **) (header + 1); | ||
1436 | subheader = subheaders; | ||
1437 | header->ctl_table_arg = ctl_table_arg; | ||
1438 | |||
1439 | if (register_leaf_sysctl_tables(new_path, pos, &subheader, | ||
1440 | set, table)) | ||
1441 | goto err_register_leaves; | ||
1442 | } | ||
1443 | |||
1444 | out: | ||
1445 | kfree(new_path); | ||
1446 | return header; | ||
1447 | |||
1448 | err_register_leaves: | ||
1449 | while (subheader > subheaders) { | ||
1450 | struct ctl_table_header *subh = *(--subheader); | ||
1451 | struct ctl_table *table = subh->ctl_table_arg; | ||
1452 | unregister_sysctl_table(subh); | ||
1453 | kfree(table); | ||
1454 | } | ||
1455 | kfree(header); | ||
1456 | header = NULL; | ||
1457 | goto out; | ||
1458 | } | ||
1459 | |||
1460 | /** | ||
1461 | * register_sysctl_table_path - register a sysctl table hierarchy | ||
1462 | * @path: The path to the directory the sysctl table is in. | ||
1463 | * @table: the top-level table structure | ||
1464 | * | ||
1465 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1466 | * array. A completely 0 filled entry terminates the table. | ||
1467 | * | ||
1468 | * See __register_sysctl_paths for more details. | ||
1469 | */ | ||
1470 | struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, | ||
1471 | struct ctl_table *table) | ||
1472 | { | ||
1473 | return __register_sysctl_paths(&sysctl_table_root.default_set, | ||
1474 | path, table); | ||
1475 | } | ||
1476 | EXPORT_SYMBOL(register_sysctl_paths); | ||
1477 | |||
1478 | /** | ||
1479 | * register_sysctl_table - register a sysctl table hierarchy | ||
1480 | * @table: the top-level table structure | ||
1481 | * | ||
1482 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1483 | * array. A completely 0 filled entry terminates the table. | ||
1484 | * | ||
1485 | * See register_sysctl_paths for more details. | ||
1486 | */ | ||
1487 | struct ctl_table_header *register_sysctl_table(struct ctl_table *table) | ||
1488 | { | ||
1489 | static const struct ctl_path null_path[] = { {} }; | ||
1490 | |||
1491 | return register_sysctl_paths(null_path, table); | ||
1492 | } | ||
1493 | EXPORT_SYMBOL(register_sysctl_table); | ||
1494 | |||
1495 | static void put_links(struct ctl_table_header *header) | ||
1496 | { | ||
1497 | struct ctl_table_set *root_set = &sysctl_table_root.default_set; | ||
1498 | struct ctl_table_root *root = header->root; | ||
1499 | struct ctl_dir *parent = header->parent; | ||
1500 | struct ctl_dir *core_parent; | ||
1501 | struct ctl_table *entry; | ||
1502 | |||
1503 | if (header->set == root_set) | ||
1504 | return; | ||
1505 | |||
1506 | core_parent = xlate_dir(root_set, parent); | ||
1507 | if (IS_ERR(core_parent)) | ||
1508 | return; | ||
1509 | |||
1510 | for (entry = header->ctl_table; entry->procname; entry++) { | ||
1511 | struct ctl_table_header *link_head; | ||
1512 | struct ctl_table *link; | ||
1513 | const char *name = entry->procname; | ||
1514 | |||
1515 | link = find_entry(&link_head, core_parent, name, strlen(name)); | ||
1516 | if (link && | ||
1517 | ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) || | ||
1518 | (S_ISLNK(link->mode) && (link->data == root)))) { | ||
1519 | drop_sysctl_table(link_head); | ||
1520 | } | ||
1521 | else { | ||
1522 | printk(KERN_ERR "sysctl link missing during unregister: "); | ||
1523 | sysctl_print_dir(parent); | ||
1524 | printk(KERN_CONT "/%s\n", name); | ||
1525 | } | ||
1526 | } | ||
1527 | } | ||
1528 | |||
1529 | static void drop_sysctl_table(struct ctl_table_header *header) | ||
1530 | { | ||
1531 | struct ctl_dir *parent = header->parent; | ||
1532 | |||
1533 | if (--header->nreg) | ||
1534 | return; | ||
1535 | |||
1536 | put_links(header); | ||
1537 | start_unregistering(header); | ||
1538 | if (!--header->count) | ||
1539 | kfree_rcu(header, rcu); | ||
1540 | |||
1541 | if (parent) | ||
1542 | drop_sysctl_table(&parent->header); | ||
1543 | } | ||
1544 | |||
1545 | /** | ||
1546 | * unregister_sysctl_table - unregister a sysctl table hierarchy | ||
1547 | * @header: the header returned from register_sysctl_table | ||
1548 | * | ||
1549 | * Unregisters the sysctl table and all children. proc entries may not | ||
1550 | * actually be removed until they are no longer used by anyone. | ||
1551 | */ | ||
1552 | void unregister_sysctl_table(struct ctl_table_header * header) | ||
1553 | { | ||
1554 | int nr_subheaders; | ||
1555 | might_sleep(); | ||
1556 | |||
1557 | if (header == NULL) | ||
1558 | return; | ||
1559 | |||
1560 | nr_subheaders = count_subheaders(header->ctl_table_arg); | ||
1561 | if (unlikely(nr_subheaders > 1)) { | ||
1562 | struct ctl_table_header **subheaders; | ||
1563 | int i; | ||
1564 | |||
1565 | subheaders = (struct ctl_table_header **)(header + 1); | ||
1566 | for (i = nr_subheaders -1; i >= 0; i--) { | ||
1567 | struct ctl_table_header *subh = subheaders[i]; | ||
1568 | struct ctl_table *table = subh->ctl_table_arg; | ||
1569 | unregister_sysctl_table(subh); | ||
1570 | kfree(table); | ||
1571 | } | ||
1572 | kfree(header); | ||
1573 | return; | ||
1574 | } | ||
1575 | |||
1576 | spin_lock(&sysctl_lock); | ||
1577 | drop_sysctl_table(header); | ||
1578 | spin_unlock(&sysctl_lock); | ||
1579 | } | ||
1580 | EXPORT_SYMBOL(unregister_sysctl_table); | ||
1581 | |||
1582 | void setup_sysctl_set(struct ctl_table_set *set, | ||
1583 | struct ctl_table_root *root, | ||
1584 | int (*is_seen)(struct ctl_table_set *)) | ||
1585 | { | ||
1586 | memset(set, 0, sizeof(*set)); | ||
1587 | set->is_seen = is_seen; | ||
1588 | init_header(&set->dir.header, root, set, NULL, root_table); | ||
1589 | } | ||
1590 | |||
1591 | void retire_sysctl_set(struct ctl_table_set *set) | ||
1592 | { | ||
1593 | WARN_ON(!RB_EMPTY_ROOT(&set->dir.root)); | ||
1594 | } | ||
1595 | |||
475 | int __init proc_sys_init(void) | 1596 | int __init proc_sys_init(void) |
476 | { | 1597 | { |
477 | struct proc_dir_entry *proc_sys_root; | 1598 | struct proc_dir_entry *proc_sys_root; |
@@ -480,5 +1601,6 @@ int __init proc_sys_init(void) | |||
480 | proc_sys_root->proc_iops = &proc_sys_dir_operations; | 1601 | proc_sys_root->proc_iops = &proc_sys_dir_operations; |
481 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; | 1602 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; |
482 | proc_sys_root->nlink = 0; | 1603 | proc_sys_root->nlink = 0; |
483 | return 0; | 1604 | |
1605 | return sysctl_init(); | ||
484 | } | 1606 | } |
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 121f77cfef76..6a0c62d6e442 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c | |||
@@ -89,18 +89,19 @@ static int show_stat(struct seq_file *p, void *v) | |||
89 | } | 89 | } |
90 | sum += arch_irq_stat(); | 90 | sum += arch_irq_stat(); |
91 | 91 | ||
92 | seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu " | 92 | seq_puts(p, "cpu "); |
93 | "%llu\n", | 93 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user)); |
94 | (unsigned long long)cputime64_to_clock_t(user), | 94 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice)); |
95 | (unsigned long long)cputime64_to_clock_t(nice), | 95 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system)); |
96 | (unsigned long long)cputime64_to_clock_t(system), | 96 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle)); |
97 | (unsigned long long)cputime64_to_clock_t(idle), | 97 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait)); |
98 | (unsigned long long)cputime64_to_clock_t(iowait), | 98 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq)); |
99 | (unsigned long long)cputime64_to_clock_t(irq), | 99 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq)); |
100 | (unsigned long long)cputime64_to_clock_t(softirq), | 100 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal)); |
101 | (unsigned long long)cputime64_to_clock_t(steal), | 101 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest)); |
102 | (unsigned long long)cputime64_to_clock_t(guest), | 102 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice)); |
103 | (unsigned long long)cputime64_to_clock_t(guest_nice)); | 103 | seq_putc(p, '\n'); |
104 | |||
104 | for_each_online_cpu(i) { | 105 | for_each_online_cpu(i) { |
105 | /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ | 106 | /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ |
106 | user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; | 107 | user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; |
@@ -113,26 +114,24 @@ static int show_stat(struct seq_file *p, void *v) | |||
113 | steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; | 114 | steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; |
114 | guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; | 115 | guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; |
115 | guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; | 116 | guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; |
116 | seq_printf(p, | 117 | seq_printf(p, "cpu%d", i); |
117 | "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " | 118 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user)); |
118 | "%llu\n", | 119 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice)); |
119 | i, | 120 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system)); |
120 | (unsigned long long)cputime64_to_clock_t(user), | 121 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle)); |
121 | (unsigned long long)cputime64_to_clock_t(nice), | 122 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait)); |
122 | (unsigned long long)cputime64_to_clock_t(system), | 123 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq)); |
123 | (unsigned long long)cputime64_to_clock_t(idle), | 124 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq)); |
124 | (unsigned long long)cputime64_to_clock_t(iowait), | 125 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal)); |
125 | (unsigned long long)cputime64_to_clock_t(irq), | 126 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest)); |
126 | (unsigned long long)cputime64_to_clock_t(softirq), | 127 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice)); |
127 | (unsigned long long)cputime64_to_clock_t(steal), | 128 | seq_putc(p, '\n'); |
128 | (unsigned long long)cputime64_to_clock_t(guest), | ||
129 | (unsigned long long)cputime64_to_clock_t(guest_nice)); | ||
130 | } | 129 | } |
131 | seq_printf(p, "intr %llu", (unsigned long long)sum); | 130 | seq_printf(p, "intr %llu", (unsigned long long)sum); |
132 | 131 | ||
133 | /* sum again ? it could be updated? */ | 132 | /* sum again ? it could be updated? */ |
134 | for_each_irq_nr(j) | 133 | for_each_irq_nr(j) |
135 | seq_printf(p, " %u", kstat_irqs(j)); | 134 | seq_put_decimal_ull(p, ' ', kstat_irqs(j)); |
136 | 135 | ||
137 | seq_printf(p, | 136 | seq_printf(p, |
138 | "\nctxt %llu\n" | 137 | "\nctxt %llu\n" |
@@ -149,7 +148,7 @@ static int show_stat(struct seq_file *p, void *v) | |||
149 | seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq); | 148 | seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq); |
150 | 149 | ||
151 | for (i = 0; i < NR_SOFTIRQS; i++) | 150 | for (i = 0; i < NR_SOFTIRQS; i++) |
152 | seq_printf(p, " %u", per_softirq_sums[i]); | 151 | seq_put_decimal_ull(p, ' ', per_softirq_sums[i]); |
153 | seq_putc(p, '\n'); | 152 | seq_putc(p, '\n'); |
154 | 153 | ||
155 | return 0; | 154 | return 0; |
@@ -157,11 +156,14 @@ static int show_stat(struct seq_file *p, void *v) | |||
157 | 156 | ||
158 | static int stat_open(struct inode *inode, struct file *file) | 157 | static int stat_open(struct inode *inode, struct file *file) |
159 | { | 158 | { |
160 | unsigned size = 4096 * (1 + num_possible_cpus() / 32); | 159 | unsigned size = 1024 + 128 * num_possible_cpus(); |
161 | char *buf; | 160 | char *buf; |
162 | struct seq_file *m; | 161 | struct seq_file *m; |
163 | int res; | 162 | int res; |
164 | 163 | ||
164 | /* minimum size to display an interrupt count : 2 bytes */ | ||
165 | size += 2 * nr_irqs; | ||
166 | |||
165 | /* don't ask for more than the kmalloc() max size */ | 167 | /* don't ask for more than the kmalloc() max size */ |
166 | if (size > KMALLOC_MAX_SIZE) | 168 | if (size > KMALLOC_MAX_SIZE) |
167 | size = KMALLOC_MAX_SIZE; | 169 | size = KMALLOC_MAX_SIZE; |
@@ -173,7 +175,7 @@ static int stat_open(struct inode *inode, struct file *file) | |||
173 | if (!res) { | 175 | if (!res) { |
174 | m = file->private_data; | 176 | m = file->private_data; |
175 | m->buf = buf; | 177 | m->buf = buf; |
176 | m->size = size; | 178 | m->size = ksize(buf); |
177 | } else | 179 | } else |
178 | kfree(buf); | 180 | kfree(buf); |
179 | return res; | 181 | return res; |
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 9ec22d3b4293..82c585f715e3 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c | |||
@@ -68,9 +68,25 @@ void pstore_set_kmsg_bytes(int bytes) | |||
68 | /* Tag each group of saved records with a sequence number */ | 68 | /* Tag each group of saved records with a sequence number */ |
69 | static int oopscount; | 69 | static int oopscount; |
70 | 70 | ||
71 | static char *reason_str[] = { | 71 | static const char *get_reason_str(enum kmsg_dump_reason reason) |
72 | "Oops", "Panic", "Kexec", "Restart", "Halt", "Poweroff", "Emergency" | 72 | { |
73 | }; | 73 | switch (reason) { |
74 | case KMSG_DUMP_PANIC: | ||
75 | return "Panic"; | ||
76 | case KMSG_DUMP_OOPS: | ||
77 | return "Oops"; | ||
78 | case KMSG_DUMP_EMERG: | ||
79 | return "Emergency"; | ||
80 | case KMSG_DUMP_RESTART: | ||
81 | return "Restart"; | ||
82 | case KMSG_DUMP_HALT: | ||
83 | return "Halt"; | ||
84 | case KMSG_DUMP_POWEROFF: | ||
85 | return "Poweroff"; | ||
86 | default: | ||
87 | return "Unknown"; | ||
88 | } | ||
89 | } | ||
74 | 90 | ||
75 | /* | 91 | /* |
76 | * callback from kmsg_dump. (s2,l2) has the most recently | 92 | * callback from kmsg_dump. (s2,l2) has the most recently |
@@ -85,17 +101,15 @@ static void pstore_dump(struct kmsg_dumper *dumper, | |||
85 | unsigned long s1_start, s2_start; | 101 | unsigned long s1_start, s2_start; |
86 | unsigned long l1_cpy, l2_cpy; | 102 | unsigned long l1_cpy, l2_cpy; |
87 | unsigned long size, total = 0; | 103 | unsigned long size, total = 0; |
88 | char *dst, *why; | 104 | char *dst; |
105 | const char *why; | ||
89 | u64 id; | 106 | u64 id; |
90 | int hsize, ret; | 107 | int hsize, ret; |
91 | unsigned int part = 1; | 108 | unsigned int part = 1; |
92 | unsigned long flags = 0; | 109 | unsigned long flags = 0; |
93 | int is_locked = 0; | 110 | int is_locked = 0; |
94 | 111 | ||
95 | if (reason < ARRAY_SIZE(reason_str)) | 112 | why = get_reason_str(reason); |
96 | why = reason_str[reason]; | ||
97 | else | ||
98 | why = "Unknown"; | ||
99 | 113 | ||
100 | if (in_nmi()) { | 114 | if (in_nmi()) { |
101 | is_locked = spin_trylock(&psinfo->buf_lock); | 115 | is_locked = spin_trylock(&psinfo->buf_lock); |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 8b4f12b33f57..d69a1d1d7e15 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -1110,6 +1110,13 @@ static void dquot_decr_space(struct dquot *dquot, qsize_t number) | |||
1110 | clear_bit(DQ_BLKS_B, &dquot->dq_flags); | 1110 | clear_bit(DQ_BLKS_B, &dquot->dq_flags); |
1111 | } | 1111 | } |
1112 | 1112 | ||
1113 | struct dquot_warn { | ||
1114 | struct super_block *w_sb; | ||
1115 | qid_t w_dq_id; | ||
1116 | short w_dq_type; | ||
1117 | short w_type; | ||
1118 | }; | ||
1119 | |||
1113 | static int warning_issued(struct dquot *dquot, const int warntype) | 1120 | static int warning_issued(struct dquot *dquot, const int warntype) |
1114 | { | 1121 | { |
1115 | int flag = (warntype == QUOTA_NL_BHARDWARN || | 1122 | int flag = (warntype == QUOTA_NL_BHARDWARN || |
@@ -1125,41 +1132,42 @@ static int warning_issued(struct dquot *dquot, const int warntype) | |||
1125 | #ifdef CONFIG_PRINT_QUOTA_WARNING | 1132 | #ifdef CONFIG_PRINT_QUOTA_WARNING |
1126 | static int flag_print_warnings = 1; | 1133 | static int flag_print_warnings = 1; |
1127 | 1134 | ||
1128 | static int need_print_warning(struct dquot *dquot) | 1135 | static int need_print_warning(struct dquot_warn *warn) |
1129 | { | 1136 | { |
1130 | if (!flag_print_warnings) | 1137 | if (!flag_print_warnings) |
1131 | return 0; | 1138 | return 0; |
1132 | 1139 | ||
1133 | switch (dquot->dq_type) { | 1140 | switch (warn->w_dq_type) { |
1134 | case USRQUOTA: | 1141 | case USRQUOTA: |
1135 | return current_fsuid() == dquot->dq_id; | 1142 | return current_fsuid() == warn->w_dq_id; |
1136 | case GRPQUOTA: | 1143 | case GRPQUOTA: |
1137 | return in_group_p(dquot->dq_id); | 1144 | return in_group_p(warn->w_dq_id); |
1138 | } | 1145 | } |
1139 | return 0; | 1146 | return 0; |
1140 | } | 1147 | } |
1141 | 1148 | ||
1142 | /* Print warning to user which exceeded quota */ | 1149 | /* Print warning to user which exceeded quota */ |
1143 | static void print_warning(struct dquot *dquot, const int warntype) | 1150 | static void print_warning(struct dquot_warn *warn) |
1144 | { | 1151 | { |
1145 | char *msg = NULL; | 1152 | char *msg = NULL; |
1146 | struct tty_struct *tty; | 1153 | struct tty_struct *tty; |
1154 | int warntype = warn->w_type; | ||
1147 | 1155 | ||
1148 | if (warntype == QUOTA_NL_IHARDBELOW || | 1156 | if (warntype == QUOTA_NL_IHARDBELOW || |
1149 | warntype == QUOTA_NL_ISOFTBELOW || | 1157 | warntype == QUOTA_NL_ISOFTBELOW || |
1150 | warntype == QUOTA_NL_BHARDBELOW || | 1158 | warntype == QUOTA_NL_BHARDBELOW || |
1151 | warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot)) | 1159 | warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(warn)) |
1152 | return; | 1160 | return; |
1153 | 1161 | ||
1154 | tty = get_current_tty(); | 1162 | tty = get_current_tty(); |
1155 | if (!tty) | 1163 | if (!tty) |
1156 | return; | 1164 | return; |
1157 | tty_write_message(tty, dquot->dq_sb->s_id); | 1165 | tty_write_message(tty, warn->w_sb->s_id); |
1158 | if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN) | 1166 | if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN) |
1159 | tty_write_message(tty, ": warning, "); | 1167 | tty_write_message(tty, ": warning, "); |
1160 | else | 1168 | else |
1161 | tty_write_message(tty, ": write failed, "); | 1169 | tty_write_message(tty, ": write failed, "); |
1162 | tty_write_message(tty, quotatypes[dquot->dq_type]); | 1170 | tty_write_message(tty, quotatypes[warn->w_dq_type]); |
1163 | switch (warntype) { | 1171 | switch (warntype) { |
1164 | case QUOTA_NL_IHARDWARN: | 1172 | case QUOTA_NL_IHARDWARN: |
1165 | msg = " file limit reached.\r\n"; | 1173 | msg = " file limit reached.\r\n"; |
@@ -1185,26 +1193,34 @@ static void print_warning(struct dquot *dquot, const int warntype) | |||
1185 | } | 1193 | } |
1186 | #endif | 1194 | #endif |
1187 | 1195 | ||
1196 | static void prepare_warning(struct dquot_warn *warn, struct dquot *dquot, | ||
1197 | int warntype) | ||
1198 | { | ||
1199 | if (warning_issued(dquot, warntype)) | ||
1200 | return; | ||
1201 | warn->w_type = warntype; | ||
1202 | warn->w_sb = dquot->dq_sb; | ||
1203 | warn->w_dq_id = dquot->dq_id; | ||
1204 | warn->w_dq_type = dquot->dq_type; | ||
1205 | } | ||
1206 | |||
1188 | /* | 1207 | /* |
1189 | * Write warnings to the console and send warning messages over netlink. | 1208 | * Write warnings to the console and send warning messages over netlink. |
1190 | * | 1209 | * |
1191 | * Note that this function can sleep. | 1210 | * Note that this function can call into tty and networking code. |
1192 | */ | 1211 | */ |
1193 | static void flush_warnings(struct dquot *const *dquots, char *warntype) | 1212 | static void flush_warnings(struct dquot_warn *warn) |
1194 | { | 1213 | { |
1195 | struct dquot *dq; | ||
1196 | int i; | 1214 | int i; |
1197 | 1215 | ||
1198 | for (i = 0; i < MAXQUOTAS; i++) { | 1216 | for (i = 0; i < MAXQUOTAS; i++) { |
1199 | dq = dquots[i]; | 1217 | if (warn[i].w_type == QUOTA_NL_NOWARN) |
1200 | if (dq && warntype[i] != QUOTA_NL_NOWARN && | 1218 | continue; |
1201 | !warning_issued(dq, warntype[i])) { | ||
1202 | #ifdef CONFIG_PRINT_QUOTA_WARNING | 1219 | #ifdef CONFIG_PRINT_QUOTA_WARNING |
1203 | print_warning(dq, warntype[i]); | 1220 | print_warning(&warn[i]); |
1204 | #endif | 1221 | #endif |
1205 | quota_send_warning(dq->dq_type, dq->dq_id, | 1222 | quota_send_warning(warn[i].w_dq_type, warn[i].w_dq_id, |
1206 | dq->dq_sb->s_dev, warntype[i]); | 1223 | warn[i].w_sb->s_dev, warn[i].w_type); |
1207 | } | ||
1208 | } | 1224 | } |
1209 | } | 1225 | } |
1210 | 1226 | ||
@@ -1218,11 +1234,11 @@ static int ignore_hardlimit(struct dquot *dquot) | |||
1218 | } | 1234 | } |
1219 | 1235 | ||
1220 | /* needs dq_data_lock */ | 1236 | /* needs dq_data_lock */ |
1221 | static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype) | 1237 | static int check_idq(struct dquot *dquot, qsize_t inodes, |
1238 | struct dquot_warn *warn) | ||
1222 | { | 1239 | { |
1223 | qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes; | 1240 | qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes; |
1224 | 1241 | ||
1225 | *warntype = QUOTA_NL_NOWARN; | ||
1226 | if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) || | 1242 | if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) || |
1227 | test_bit(DQ_FAKE_B, &dquot->dq_flags)) | 1243 | test_bit(DQ_FAKE_B, &dquot->dq_flags)) |
1228 | return 0; | 1244 | return 0; |
@@ -1230,7 +1246,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype) | |||
1230 | if (dquot->dq_dqb.dqb_ihardlimit && | 1246 | if (dquot->dq_dqb.dqb_ihardlimit && |
1231 | newinodes > dquot->dq_dqb.dqb_ihardlimit && | 1247 | newinodes > dquot->dq_dqb.dqb_ihardlimit && |
1232 | !ignore_hardlimit(dquot)) { | 1248 | !ignore_hardlimit(dquot)) { |
1233 | *warntype = QUOTA_NL_IHARDWARN; | 1249 | prepare_warning(warn, dquot, QUOTA_NL_IHARDWARN); |
1234 | return -EDQUOT; | 1250 | return -EDQUOT; |
1235 | } | 1251 | } |
1236 | 1252 | ||
@@ -1239,14 +1255,14 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype) | |||
1239 | dquot->dq_dqb.dqb_itime && | 1255 | dquot->dq_dqb.dqb_itime && |
1240 | get_seconds() >= dquot->dq_dqb.dqb_itime && | 1256 | get_seconds() >= dquot->dq_dqb.dqb_itime && |
1241 | !ignore_hardlimit(dquot)) { | 1257 | !ignore_hardlimit(dquot)) { |
1242 | *warntype = QUOTA_NL_ISOFTLONGWARN; | 1258 | prepare_warning(warn, dquot, QUOTA_NL_ISOFTLONGWARN); |
1243 | return -EDQUOT; | 1259 | return -EDQUOT; |
1244 | } | 1260 | } |
1245 | 1261 | ||
1246 | if (dquot->dq_dqb.dqb_isoftlimit && | 1262 | if (dquot->dq_dqb.dqb_isoftlimit && |
1247 | newinodes > dquot->dq_dqb.dqb_isoftlimit && | 1263 | newinodes > dquot->dq_dqb.dqb_isoftlimit && |
1248 | dquot->dq_dqb.dqb_itime == 0) { | 1264 | dquot->dq_dqb.dqb_itime == 0) { |
1249 | *warntype = QUOTA_NL_ISOFTWARN; | 1265 | prepare_warning(warn, dquot, QUOTA_NL_ISOFTWARN); |
1250 | dquot->dq_dqb.dqb_itime = get_seconds() + | 1266 | dquot->dq_dqb.dqb_itime = get_seconds() + |
1251 | sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; | 1267 | sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; |
1252 | } | 1268 | } |
@@ -1255,12 +1271,12 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype) | |||
1255 | } | 1271 | } |
1256 | 1272 | ||
1257 | /* needs dq_data_lock */ | 1273 | /* needs dq_data_lock */ |
1258 | static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype) | 1274 | static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, |
1275 | struct dquot_warn *warn) | ||
1259 | { | 1276 | { |
1260 | qsize_t tspace; | 1277 | qsize_t tspace; |
1261 | struct super_block *sb = dquot->dq_sb; | 1278 | struct super_block *sb = dquot->dq_sb; |
1262 | 1279 | ||
1263 | *warntype = QUOTA_NL_NOWARN; | ||
1264 | if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) || | 1280 | if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) || |
1265 | test_bit(DQ_FAKE_B, &dquot->dq_flags)) | 1281 | test_bit(DQ_FAKE_B, &dquot->dq_flags)) |
1266 | return 0; | 1282 | return 0; |
@@ -1272,7 +1288,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war | |||
1272 | tspace > dquot->dq_dqb.dqb_bhardlimit && | 1288 | tspace > dquot->dq_dqb.dqb_bhardlimit && |
1273 | !ignore_hardlimit(dquot)) { | 1289 | !ignore_hardlimit(dquot)) { |
1274 | if (!prealloc) | 1290 | if (!prealloc) |
1275 | *warntype = QUOTA_NL_BHARDWARN; | 1291 | prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN); |
1276 | return -EDQUOT; | 1292 | return -EDQUOT; |
1277 | } | 1293 | } |
1278 | 1294 | ||
@@ -1282,7 +1298,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war | |||
1282 | get_seconds() >= dquot->dq_dqb.dqb_btime && | 1298 | get_seconds() >= dquot->dq_dqb.dqb_btime && |
1283 | !ignore_hardlimit(dquot)) { | 1299 | !ignore_hardlimit(dquot)) { |
1284 | if (!prealloc) | 1300 | if (!prealloc) |
1285 | *warntype = QUOTA_NL_BSOFTLONGWARN; | 1301 | prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN); |
1286 | return -EDQUOT; | 1302 | return -EDQUOT; |
1287 | } | 1303 | } |
1288 | 1304 | ||
@@ -1290,7 +1306,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war | |||
1290 | tspace > dquot->dq_dqb.dqb_bsoftlimit && | 1306 | tspace > dquot->dq_dqb.dqb_bsoftlimit && |
1291 | dquot->dq_dqb.dqb_btime == 0) { | 1307 | dquot->dq_dqb.dqb_btime == 0) { |
1292 | if (!prealloc) { | 1308 | if (!prealloc) { |
1293 | *warntype = QUOTA_NL_BSOFTWARN; | 1309 | prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN); |
1294 | dquot->dq_dqb.dqb_btime = get_seconds() + | 1310 | dquot->dq_dqb.dqb_btime = get_seconds() + |
1295 | sb_dqopt(sb)->info[dquot->dq_type].dqi_bgrace; | 1311 | sb_dqopt(sb)->info[dquot->dq_type].dqi_bgrace; |
1296 | } | 1312 | } |
@@ -1543,10 +1559,9 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve) | |||
1543 | int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) | 1559 | int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) |
1544 | { | 1560 | { |
1545 | int cnt, ret = 0; | 1561 | int cnt, ret = 0; |
1546 | char warntype[MAXQUOTAS]; | 1562 | struct dquot_warn warn[MAXQUOTAS]; |
1547 | int warn = flags & DQUOT_SPACE_WARN; | 1563 | struct dquot **dquots = inode->i_dquot; |
1548 | int reserve = flags & DQUOT_SPACE_RESERVE; | 1564 | int reserve = flags & DQUOT_SPACE_RESERVE; |
1549 | int nofail = flags & DQUOT_SPACE_NOFAIL; | ||
1550 | 1565 | ||
1551 | /* | 1566 | /* |
1552 | * First test before acquiring mutex - solves deadlocks when we | 1567 | * First test before acquiring mutex - solves deadlocks when we |
@@ -1559,36 +1574,36 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) | |||
1559 | 1574 | ||
1560 | down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1575 | down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1561 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) | 1576 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) |
1562 | warntype[cnt] = QUOTA_NL_NOWARN; | 1577 | warn[cnt].w_type = QUOTA_NL_NOWARN; |
1563 | 1578 | ||
1564 | spin_lock(&dq_data_lock); | 1579 | spin_lock(&dq_data_lock); |
1565 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | 1580 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { |
1566 | if (!inode->i_dquot[cnt]) | 1581 | if (!dquots[cnt]) |
1567 | continue; | 1582 | continue; |
1568 | ret = check_bdq(inode->i_dquot[cnt], number, !warn, | 1583 | ret = check_bdq(dquots[cnt], number, |
1569 | warntype+cnt); | 1584 | !(flags & DQUOT_SPACE_WARN), &warn[cnt]); |
1570 | if (ret && !nofail) { | 1585 | if (ret && !(flags & DQUOT_SPACE_NOFAIL)) { |
1571 | spin_unlock(&dq_data_lock); | 1586 | spin_unlock(&dq_data_lock); |
1572 | goto out_flush_warn; | 1587 | goto out_flush_warn; |
1573 | } | 1588 | } |
1574 | } | 1589 | } |
1575 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | 1590 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { |
1576 | if (!inode->i_dquot[cnt]) | 1591 | if (!dquots[cnt]) |
1577 | continue; | 1592 | continue; |
1578 | if (reserve) | 1593 | if (reserve) |
1579 | dquot_resv_space(inode->i_dquot[cnt], number); | 1594 | dquot_resv_space(dquots[cnt], number); |
1580 | else | 1595 | else |
1581 | dquot_incr_space(inode->i_dquot[cnt], number); | 1596 | dquot_incr_space(dquots[cnt], number); |
1582 | } | 1597 | } |
1583 | inode_incr_space(inode, number, reserve); | 1598 | inode_incr_space(inode, number, reserve); |
1584 | spin_unlock(&dq_data_lock); | 1599 | spin_unlock(&dq_data_lock); |
1585 | 1600 | ||
1586 | if (reserve) | 1601 | if (reserve) |
1587 | goto out_flush_warn; | 1602 | goto out_flush_warn; |
1588 | mark_all_dquot_dirty(inode->i_dquot); | 1603 | mark_all_dquot_dirty(dquots); |
1589 | out_flush_warn: | 1604 | out_flush_warn: |
1590 | flush_warnings(inode->i_dquot, warntype); | ||
1591 | up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1605 | up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1606 | flush_warnings(warn); | ||
1592 | out: | 1607 | out: |
1593 | return ret; | 1608 | return ret; |
1594 | } | 1609 | } |
@@ -1600,36 +1615,37 @@ EXPORT_SYMBOL(__dquot_alloc_space); | |||
1600 | int dquot_alloc_inode(const struct inode *inode) | 1615 | int dquot_alloc_inode(const struct inode *inode) |
1601 | { | 1616 | { |
1602 | int cnt, ret = 0; | 1617 | int cnt, ret = 0; |
1603 | char warntype[MAXQUOTAS]; | 1618 | struct dquot_warn warn[MAXQUOTAS]; |
1619 | struct dquot * const *dquots = inode->i_dquot; | ||
1604 | 1620 | ||
1605 | /* First test before acquiring mutex - solves deadlocks when we | 1621 | /* First test before acquiring mutex - solves deadlocks when we |
1606 | * re-enter the quota code and are already holding the mutex */ | 1622 | * re-enter the quota code and are already holding the mutex */ |
1607 | if (!dquot_active(inode)) | 1623 | if (!dquot_active(inode)) |
1608 | return 0; | 1624 | return 0; |
1609 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) | 1625 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) |
1610 | warntype[cnt] = QUOTA_NL_NOWARN; | 1626 | warn[cnt].w_type = QUOTA_NL_NOWARN; |
1611 | down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1627 | down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1612 | spin_lock(&dq_data_lock); | 1628 | spin_lock(&dq_data_lock); |
1613 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | 1629 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { |
1614 | if (!inode->i_dquot[cnt]) | 1630 | if (!dquots[cnt]) |
1615 | continue; | 1631 | continue; |
1616 | ret = check_idq(inode->i_dquot[cnt], 1, warntype + cnt); | 1632 | ret = check_idq(dquots[cnt], 1, &warn[cnt]); |
1617 | if (ret) | 1633 | if (ret) |
1618 | goto warn_put_all; | 1634 | goto warn_put_all; |
1619 | } | 1635 | } |
1620 | 1636 | ||
1621 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | 1637 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { |
1622 | if (!inode->i_dquot[cnt]) | 1638 | if (!dquots[cnt]) |
1623 | continue; | 1639 | continue; |
1624 | dquot_incr_inodes(inode->i_dquot[cnt], 1); | 1640 | dquot_incr_inodes(dquots[cnt], 1); |
1625 | } | 1641 | } |
1626 | 1642 | ||
1627 | warn_put_all: | 1643 | warn_put_all: |
1628 | spin_unlock(&dq_data_lock); | 1644 | spin_unlock(&dq_data_lock); |
1629 | if (ret == 0) | 1645 | if (ret == 0) |
1630 | mark_all_dquot_dirty(inode->i_dquot); | 1646 | mark_all_dquot_dirty(dquots); |
1631 | flush_warnings(inode->i_dquot, warntype); | ||
1632 | up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1647 | up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1648 | flush_warnings(warn); | ||
1633 | return ret; | 1649 | return ret; |
1634 | } | 1650 | } |
1635 | EXPORT_SYMBOL(dquot_alloc_inode); | 1651 | EXPORT_SYMBOL(dquot_alloc_inode); |
@@ -1669,7 +1685,8 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty); | |||
1669 | void __dquot_free_space(struct inode *inode, qsize_t number, int flags) | 1685 | void __dquot_free_space(struct inode *inode, qsize_t number, int flags) |
1670 | { | 1686 | { |
1671 | unsigned int cnt; | 1687 | unsigned int cnt; |
1672 | char warntype[MAXQUOTAS]; | 1688 | struct dquot_warn warn[MAXQUOTAS]; |
1689 | struct dquot **dquots = inode->i_dquot; | ||
1673 | int reserve = flags & DQUOT_SPACE_RESERVE; | 1690 | int reserve = flags & DQUOT_SPACE_RESERVE; |
1674 | 1691 | ||
1675 | /* First test before acquiring mutex - solves deadlocks when we | 1692 | /* First test before acquiring mutex - solves deadlocks when we |
@@ -1682,23 +1699,28 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) | |||
1682 | down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1699 | down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1683 | spin_lock(&dq_data_lock); | 1700 | spin_lock(&dq_data_lock); |
1684 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | 1701 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { |
1685 | if (!inode->i_dquot[cnt]) | 1702 | int wtype; |
1703 | |||
1704 | warn[cnt].w_type = QUOTA_NL_NOWARN; | ||
1705 | if (!dquots[cnt]) | ||
1686 | continue; | 1706 | continue; |
1687 | warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number); | 1707 | wtype = info_bdq_free(dquots[cnt], number); |
1708 | if (wtype != QUOTA_NL_NOWARN) | ||
1709 | prepare_warning(&warn[cnt], dquots[cnt], wtype); | ||
1688 | if (reserve) | 1710 | if (reserve) |
1689 | dquot_free_reserved_space(inode->i_dquot[cnt], number); | 1711 | dquot_free_reserved_space(dquots[cnt], number); |
1690 | else | 1712 | else |
1691 | dquot_decr_space(inode->i_dquot[cnt], number); | 1713 | dquot_decr_space(dquots[cnt], number); |
1692 | } | 1714 | } |
1693 | inode_decr_space(inode, number, reserve); | 1715 | inode_decr_space(inode, number, reserve); |
1694 | spin_unlock(&dq_data_lock); | 1716 | spin_unlock(&dq_data_lock); |
1695 | 1717 | ||
1696 | if (reserve) | 1718 | if (reserve) |
1697 | goto out_unlock; | 1719 | goto out_unlock; |
1698 | mark_all_dquot_dirty(inode->i_dquot); | 1720 | mark_all_dquot_dirty(dquots); |
1699 | out_unlock: | 1721 | out_unlock: |
1700 | flush_warnings(inode->i_dquot, warntype); | ||
1701 | up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1722 | up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1723 | flush_warnings(warn); | ||
1702 | } | 1724 | } |
1703 | EXPORT_SYMBOL(__dquot_free_space); | 1725 | EXPORT_SYMBOL(__dquot_free_space); |
1704 | 1726 | ||
@@ -1708,7 +1730,8 @@ EXPORT_SYMBOL(__dquot_free_space); | |||
1708 | void dquot_free_inode(const struct inode *inode) | 1730 | void dquot_free_inode(const struct inode *inode) |
1709 | { | 1731 | { |
1710 | unsigned int cnt; | 1732 | unsigned int cnt; |
1711 | char warntype[MAXQUOTAS]; | 1733 | struct dquot_warn warn[MAXQUOTAS]; |
1734 | struct dquot * const *dquots = inode->i_dquot; | ||
1712 | 1735 | ||
1713 | /* First test before acquiring mutex - solves deadlocks when we | 1736 | /* First test before acquiring mutex - solves deadlocks when we |
1714 | * re-enter the quota code and are already holding the mutex */ | 1737 | * re-enter the quota code and are already holding the mutex */ |
@@ -1718,15 +1741,20 @@ void dquot_free_inode(const struct inode *inode) | |||
1718 | down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1741 | down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1719 | spin_lock(&dq_data_lock); | 1742 | spin_lock(&dq_data_lock); |
1720 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | 1743 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { |
1721 | if (!inode->i_dquot[cnt]) | 1744 | int wtype; |
1745 | |||
1746 | warn[cnt].w_type = QUOTA_NL_NOWARN; | ||
1747 | if (!dquots[cnt]) | ||
1722 | continue; | 1748 | continue; |
1723 | warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1); | 1749 | wtype = info_idq_free(dquots[cnt], 1); |
1724 | dquot_decr_inodes(inode->i_dquot[cnt], 1); | 1750 | if (wtype != QUOTA_NL_NOWARN) |
1751 | prepare_warning(&warn[cnt], dquots[cnt], wtype); | ||
1752 | dquot_decr_inodes(dquots[cnt], 1); | ||
1725 | } | 1753 | } |
1726 | spin_unlock(&dq_data_lock); | 1754 | spin_unlock(&dq_data_lock); |
1727 | mark_all_dquot_dirty(inode->i_dquot); | 1755 | mark_all_dquot_dirty(dquots); |
1728 | flush_warnings(inode->i_dquot, warntype); | ||
1729 | up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1756 | up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1757 | flush_warnings(warn); | ||
1730 | } | 1758 | } |
1731 | EXPORT_SYMBOL(dquot_free_inode); | 1759 | EXPORT_SYMBOL(dquot_free_inode); |
1732 | 1760 | ||
@@ -1747,16 +1775,20 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) | |||
1747 | struct dquot *transfer_from[MAXQUOTAS] = {}; | 1775 | struct dquot *transfer_from[MAXQUOTAS] = {}; |
1748 | int cnt, ret = 0; | 1776 | int cnt, ret = 0; |
1749 | char is_valid[MAXQUOTAS] = {}; | 1777 | char is_valid[MAXQUOTAS] = {}; |
1750 | char warntype_to[MAXQUOTAS]; | 1778 | struct dquot_warn warn_to[MAXQUOTAS]; |
1751 | char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; | 1779 | struct dquot_warn warn_from_inodes[MAXQUOTAS]; |
1780 | struct dquot_warn warn_from_space[MAXQUOTAS]; | ||
1752 | 1781 | ||
1753 | /* First test before acquiring mutex - solves deadlocks when we | 1782 | /* First test before acquiring mutex - solves deadlocks when we |
1754 | * re-enter the quota code and are already holding the mutex */ | 1783 | * re-enter the quota code and are already holding the mutex */ |
1755 | if (IS_NOQUOTA(inode)) | 1784 | if (IS_NOQUOTA(inode)) |
1756 | return 0; | 1785 | return 0; |
1757 | /* Initialize the arrays */ | 1786 | /* Initialize the arrays */ |
1758 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) | 1787 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { |
1759 | warntype_to[cnt] = QUOTA_NL_NOWARN; | 1788 | warn_to[cnt].w_type = QUOTA_NL_NOWARN; |
1789 | warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN; | ||
1790 | warn_from_space[cnt].w_type = QUOTA_NL_NOWARN; | ||
1791 | } | ||
1760 | down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1792 | down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1761 | if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ | 1793 | if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ |
1762 | up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1794 | up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); |
@@ -1778,10 +1810,10 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) | |||
1778 | continue; | 1810 | continue; |
1779 | is_valid[cnt] = 1; | 1811 | is_valid[cnt] = 1; |
1780 | transfer_from[cnt] = inode->i_dquot[cnt]; | 1812 | transfer_from[cnt] = inode->i_dquot[cnt]; |
1781 | ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt); | 1813 | ret = check_idq(transfer_to[cnt], 1, &warn_to[cnt]); |
1782 | if (ret) | 1814 | if (ret) |
1783 | goto over_quota; | 1815 | goto over_quota; |
1784 | ret = check_bdq(transfer_to[cnt], space, 0, warntype_to + cnt); | 1816 | ret = check_bdq(transfer_to[cnt], space, 0, &warn_to[cnt]); |
1785 | if (ret) | 1817 | if (ret) |
1786 | goto over_quota; | 1818 | goto over_quota; |
1787 | } | 1819 | } |
@@ -1794,10 +1826,15 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) | |||
1794 | continue; | 1826 | continue; |
1795 | /* Due to IO error we might not have transfer_from[] structure */ | 1827 | /* Due to IO error we might not have transfer_from[] structure */ |
1796 | if (transfer_from[cnt]) { | 1828 | if (transfer_from[cnt]) { |
1797 | warntype_from_inodes[cnt] = | 1829 | int wtype; |
1798 | info_idq_free(transfer_from[cnt], 1); | 1830 | wtype = info_idq_free(transfer_from[cnt], 1); |
1799 | warntype_from_space[cnt] = | 1831 | if (wtype != QUOTA_NL_NOWARN) |
1800 | info_bdq_free(transfer_from[cnt], space); | 1832 | prepare_warning(&warn_from_inodes[cnt], |
1833 | transfer_from[cnt], wtype); | ||
1834 | wtype = info_bdq_free(transfer_from[cnt], space); | ||
1835 | if (wtype != QUOTA_NL_NOWARN) | ||
1836 | prepare_warning(&warn_from_space[cnt], | ||
1837 | transfer_from[cnt], wtype); | ||
1801 | dquot_decr_inodes(transfer_from[cnt], 1); | 1838 | dquot_decr_inodes(transfer_from[cnt], 1); |
1802 | dquot_decr_space(transfer_from[cnt], cur_space); | 1839 | dquot_decr_space(transfer_from[cnt], cur_space); |
1803 | dquot_free_reserved_space(transfer_from[cnt], | 1840 | dquot_free_reserved_space(transfer_from[cnt], |
@@ -1815,9 +1852,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) | |||
1815 | 1852 | ||
1816 | mark_all_dquot_dirty(transfer_from); | 1853 | mark_all_dquot_dirty(transfer_from); |
1817 | mark_all_dquot_dirty(transfer_to); | 1854 | mark_all_dquot_dirty(transfer_to); |
1818 | flush_warnings(transfer_to, warntype_to); | 1855 | flush_warnings(warn_to); |
1819 | flush_warnings(transfer_from, warntype_from_inodes); | 1856 | flush_warnings(warn_from_inodes); |
1820 | flush_warnings(transfer_from, warntype_from_space); | 1857 | flush_warnings(warn_from_space); |
1821 | /* Pass back references to put */ | 1858 | /* Pass back references to put */ |
1822 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) | 1859 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) |
1823 | if (is_valid[cnt]) | 1860 | if (is_valid[cnt]) |
@@ -1826,7 +1863,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) | |||
1826 | over_quota: | 1863 | over_quota: |
1827 | spin_unlock(&dq_data_lock); | 1864 | spin_unlock(&dq_data_lock); |
1828 | up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1865 | up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1829 | flush_warnings(transfer_to, warntype_to); | 1866 | flush_warnings(warn_to); |
1830 | return ret; | 1867 | return ret; |
1831 | } | 1868 | } |
1832 | EXPORT_SYMBOL(__dquot_transfer); | 1869 | EXPORT_SYMBOL(__dquot_transfer); |
diff --git a/fs/quota/quota.c b/fs/quota/quota.c index fc2c4388d126..9a391204ca27 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c | |||
@@ -282,10 +282,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, | |||
282 | case Q_XGETQUOTA: | 282 | case Q_XGETQUOTA: |
283 | return quota_getxquota(sb, type, id, addr); | 283 | return quota_getxquota(sb, type, id, addr); |
284 | case Q_XQUOTASYNC: | 284 | case Q_XQUOTASYNC: |
285 | /* caller already holds s_umount */ | ||
286 | if (sb->s_flags & MS_RDONLY) | 285 | if (sb->s_flags & MS_RDONLY) |
287 | return -EROFS; | 286 | return -EROFS; |
288 | writeback_inodes_sb(sb, WB_REASON_SYNC); | 287 | /* XFS quotas are fully coherent now, making this call a noop */ |
289 | return 0; | 288 | return 0; |
290 | default: | 289 | default: |
291 | return -EINVAL; | 290 | return -EINVAL; |
diff --git a/fs/read_write.c b/fs/read_write.c index 5ad4248b0cd8..ffc99d22e0a3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -11,7 +11,7 @@ | |||
11 | #include <linux/uio.h> | 11 | #include <linux/uio.h> |
12 | #include <linux/fsnotify.h> | 12 | #include <linux/fsnotify.h> |
13 | #include <linux/security.h> | 13 | #include <linux/security.h> |
14 | #include <linux/module.h> | 14 | #include <linux/export.h> |
15 | #include <linux/syscalls.h> | 15 | #include <linux/syscalls.h> |
16 | #include <linux/pagemap.h> | 16 | #include <linux/pagemap.h> |
17 | #include <linux/splice.h> | 17 | #include <linux/splice.h> |
diff --git a/fs/readdir.c b/fs/readdir.c index 356f71528ad6..cc0a8227cddf 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
@@ -6,7 +6,7 @@ | |||
6 | 6 | ||
7 | #include <linux/stddef.h> | 7 | #include <linux/stddef.h> |
8 | #include <linux/kernel.h> | 8 | #include <linux/kernel.h> |
9 | #include <linux/module.h> | 9 | #include <linux/export.h> |
10 | #include <linux/time.h> | 10 | #include <linux/time.h> |
11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
12 | #include <linux/errno.h> | 12 | #include <linux/errno.h> |
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 445d768eea44..a59d27126338 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/slab.h> | 7 | #include <linux/slab.h> |
8 | #include <linux/interrupt.h> | 8 | #include <linux/interrupt.h> |
9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/bug.h> | ||
10 | #include <linux/workqueue.h> | 11 | #include <linux/workqueue.h> |
11 | #include <asm/unaligned.h> | 12 | #include <asm/unaligned.h> |
12 | #include <linux/bitops.h> | 13 | #include <linux/bitops.h> |
diff --git a/fs/select.c b/fs/select.c index e782258d0de3..6fb8943d580b 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -17,7 +17,7 @@ | |||
17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/syscalls.h> | 19 | #include <linux/syscalls.h> |
20 | #include <linux/module.h> | 20 | #include <linux/export.h> |
21 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
22 | #include <linux/poll.h> | 22 | #include <linux/poll.h> |
23 | #include <linux/personality.h> /* for STICKY_TIMEOUTS */ | 23 | #include <linux/personality.h> /* for STICKY_TIMEOUTS */ |
@@ -223,7 +223,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, | |||
223 | get_file(filp); | 223 | get_file(filp); |
224 | entry->filp = filp; | 224 | entry->filp = filp; |
225 | entry->wait_address = wait_address; | 225 | entry->wait_address = wait_address; |
226 | entry->key = p->key; | 226 | entry->key = p->_key; |
227 | init_waitqueue_func_entry(&entry->wait, pollwake); | 227 | init_waitqueue_func_entry(&entry->wait, pollwake); |
228 | entry->wait.private = pwq; | 228 | entry->wait.private = pwq; |
229 | add_wait_queue(wait_address, &entry->wait); | 229 | add_wait_queue(wait_address, &entry->wait); |
@@ -386,13 +386,11 @@ get_max: | |||
386 | static inline void wait_key_set(poll_table *wait, unsigned long in, | 386 | static inline void wait_key_set(poll_table *wait, unsigned long in, |
387 | unsigned long out, unsigned long bit) | 387 | unsigned long out, unsigned long bit) |
388 | { | 388 | { |
389 | if (wait) { | 389 | wait->_key = POLLEX_SET; |
390 | wait->key = POLLEX_SET; | 390 | if (in & bit) |
391 | if (in & bit) | 391 | wait->_key |= POLLIN_SET; |
392 | wait->key |= POLLIN_SET; | 392 | if (out & bit) |
393 | if (out & bit) | 393 | wait->_key |= POLLOUT_SET; |
394 | wait->key |= POLLOUT_SET; | ||
395 | } | ||
396 | } | 394 | } |
397 | 395 | ||
398 | int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | 396 | int do_select(int n, fd_set_bits *fds, struct timespec *end_time) |
@@ -414,7 +412,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
414 | poll_initwait(&table); | 412 | poll_initwait(&table); |
415 | wait = &table.pt; | 413 | wait = &table.pt; |
416 | if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { | 414 | if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { |
417 | wait = NULL; | 415 | wait->_qproc = NULL; |
418 | timed_out = 1; | 416 | timed_out = 1; |
419 | } | 417 | } |
420 | 418 | ||
@@ -459,17 +457,17 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
459 | if ((mask & POLLIN_SET) && (in & bit)) { | 457 | if ((mask & POLLIN_SET) && (in & bit)) { |
460 | res_in |= bit; | 458 | res_in |= bit; |
461 | retval++; | 459 | retval++; |
462 | wait = NULL; | 460 | wait->_qproc = NULL; |
463 | } | 461 | } |
464 | if ((mask & POLLOUT_SET) && (out & bit)) { | 462 | if ((mask & POLLOUT_SET) && (out & bit)) { |
465 | res_out |= bit; | 463 | res_out |= bit; |
466 | retval++; | 464 | retval++; |
467 | wait = NULL; | 465 | wait->_qproc = NULL; |
468 | } | 466 | } |
469 | if ((mask & POLLEX_SET) && (ex & bit)) { | 467 | if ((mask & POLLEX_SET) && (ex & bit)) { |
470 | res_ex |= bit; | 468 | res_ex |= bit; |
471 | retval++; | 469 | retval++; |
472 | wait = NULL; | 470 | wait->_qproc = NULL; |
473 | } | 471 | } |
474 | } | 472 | } |
475 | } | 473 | } |
@@ -481,7 +479,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
481 | *rexp = res_ex; | 479 | *rexp = res_ex; |
482 | cond_resched(); | 480 | cond_resched(); |
483 | } | 481 | } |
484 | wait = NULL; | 482 | wait->_qproc = NULL; |
485 | if (retval || timed_out || signal_pending(current)) | 483 | if (retval || timed_out || signal_pending(current)) |
486 | break; | 484 | break; |
487 | if (table.error) { | 485 | if (table.error) { |
@@ -720,7 +718,7 @@ struct poll_list { | |||
720 | * interested in events matching the pollfd->events mask, and the result | 718 | * interested in events matching the pollfd->events mask, and the result |
721 | * matching that mask is both recorded in pollfd->revents and returned. The | 719 | * matching that mask is both recorded in pollfd->revents and returned. The |
722 | * pwait poll_table will be used by the fd-provided poll handler for waiting, | 720 | * pwait poll_table will be used by the fd-provided poll handler for waiting, |
723 | * if non-NULL. | 721 | * if pwait->_qproc is non-NULL. |
724 | */ | 722 | */ |
725 | static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) | 723 | static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) |
726 | { | 724 | { |
@@ -738,9 +736,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) | |||
738 | if (file != NULL) { | 736 | if (file != NULL) { |
739 | mask = DEFAULT_POLLMASK; | 737 | mask = DEFAULT_POLLMASK; |
740 | if (file->f_op && file->f_op->poll) { | 738 | if (file->f_op && file->f_op->poll) { |
741 | if (pwait) | 739 | pwait->_key = pollfd->events|POLLERR|POLLHUP; |
742 | pwait->key = pollfd->events | | ||
743 | POLLERR | POLLHUP; | ||
744 | mask = file->f_op->poll(file, pwait); | 740 | mask = file->f_op->poll(file, pwait); |
745 | } | 741 | } |
746 | /* Mask out unneeded events. */ | 742 | /* Mask out unneeded events. */ |
@@ -763,7 +759,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
763 | 759 | ||
764 | /* Optimise the no-wait case */ | 760 | /* Optimise the no-wait case */ |
765 | if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { | 761 | if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { |
766 | pt = NULL; | 762 | pt->_qproc = NULL; |
767 | timed_out = 1; | 763 | timed_out = 1; |
768 | } | 764 | } |
769 | 765 | ||
@@ -781,22 +777,22 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
781 | for (; pfd != pfd_end; pfd++) { | 777 | for (; pfd != pfd_end; pfd++) { |
782 | /* | 778 | /* |
783 | * Fish for events. If we found one, record it | 779 | * Fish for events. If we found one, record it |
784 | * and kill the poll_table, so we don't | 780 | * and kill poll_table->_qproc, so we don't |
785 | * needlessly register any other waiters after | 781 | * needlessly register any other waiters after |
786 | * this. They'll get immediately deregistered | 782 | * this. They'll get immediately deregistered |
787 | * when we break out and return. | 783 | * when we break out and return. |
788 | */ | 784 | */ |
789 | if (do_pollfd(pfd, pt)) { | 785 | if (do_pollfd(pfd, pt)) { |
790 | count++; | 786 | count++; |
791 | pt = NULL; | 787 | pt->_qproc = NULL; |
792 | } | 788 | } |
793 | } | 789 | } |
794 | } | 790 | } |
795 | /* | 791 | /* |
796 | * All waiters have already been registered, so don't provide | 792 | * All waiters have already been registered, so don't provide |
797 | * a poll_table to them on the next loop iteration. | 793 | * a poll_table->_qproc to them on the next loop iteration. |
798 | */ | 794 | */ |
799 | pt = NULL; | 795 | pt->_qproc = NULL; |
800 | if (!count) { | 796 | if (!count) { |
801 | count = wait->error; | 797 | count = wait->error; |
802 | if (signal_pending(current)) | 798 | if (signal_pending(current)) |
diff --git a/fs/seq_file.c b/fs/seq_file.c index aa242dc99373..0cbd0494b79e 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -6,13 +6,29 @@ | |||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
9 | #include <linux/module.h> | 9 | #include <linux/export.h> |
10 | #include <linux/seq_file.h> | 10 | #include <linux/seq_file.h> |
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | 12 | ||
13 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
14 | #include <asm/page.h> | 14 | #include <asm/page.h> |
15 | 15 | ||
16 | |||
17 | /* | ||
18 | * seq_files have a buffer which can may overflow. When this happens a larger | ||
19 | * buffer is reallocated and all the data will be printed again. | ||
20 | * The overflow state is true when m->count == m->size. | ||
21 | */ | ||
22 | static bool seq_overflow(struct seq_file *m) | ||
23 | { | ||
24 | return m->count == m->size; | ||
25 | } | ||
26 | |||
27 | static void seq_set_overflow(struct seq_file *m) | ||
28 | { | ||
29 | m->count = m->size; | ||
30 | } | ||
31 | |||
16 | /** | 32 | /** |
17 | * seq_open - initialize sequential file | 33 | * seq_open - initialize sequential file |
18 | * @file: file we initialize | 34 | * @file: file we initialize |
@@ -92,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset) | |||
92 | error = 0; | 108 | error = 0; |
93 | m->count = 0; | 109 | m->count = 0; |
94 | } | 110 | } |
95 | if (m->count == m->size) | 111 | if (seq_overflow(m)) |
96 | goto Eoverflow; | 112 | goto Eoverflow; |
97 | if (pos + m->count > offset) { | 113 | if (pos + m->count > offset) { |
98 | m->from = offset - pos; | 114 | m->from = offset - pos; |
@@ -234,7 +250,7 @@ Fill: | |||
234 | break; | 250 | break; |
235 | } | 251 | } |
236 | err = m->op->show(m, p); | 252 | err = m->op->show(m, p); |
237 | if (m->count == m->size || err) { | 253 | if (seq_overflow(m) || err) { |
238 | m->count = offs; | 254 | m->count = offs; |
239 | if (likely(err <= 0)) | 255 | if (likely(err <= 0)) |
240 | break; | 256 | break; |
@@ -361,7 +377,7 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc) | |||
361 | *p++ = '0' + (c & 07); | 377 | *p++ = '0' + (c & 07); |
362 | continue; | 378 | continue; |
363 | } | 379 | } |
364 | m->count = m->size; | 380 | seq_set_overflow(m); |
365 | return -1; | 381 | return -1; |
366 | } | 382 | } |
367 | m->count = p - m->buf; | 383 | m->count = p - m->buf; |
@@ -383,7 +399,7 @@ int seq_printf(struct seq_file *m, const char *f, ...) | |||
383 | return 0; | 399 | return 0; |
384 | } | 400 | } |
385 | } | 401 | } |
386 | m->count = m->size; | 402 | seq_set_overflow(m); |
387 | return -1; | 403 | return -1; |
388 | } | 404 | } |
389 | EXPORT_SYMBOL(seq_printf); | 405 | EXPORT_SYMBOL(seq_printf); |
@@ -512,7 +528,7 @@ int seq_bitmap(struct seq_file *m, const unsigned long *bits, | |||
512 | return 0; | 528 | return 0; |
513 | } | 529 | } |
514 | } | 530 | } |
515 | m->count = m->size; | 531 | seq_set_overflow(m); |
516 | return -1; | 532 | return -1; |
517 | } | 533 | } |
518 | EXPORT_SYMBOL(seq_bitmap); | 534 | EXPORT_SYMBOL(seq_bitmap); |
@@ -528,7 +544,7 @@ int seq_bitmap_list(struct seq_file *m, const unsigned long *bits, | |||
528 | return 0; | 544 | return 0; |
529 | } | 545 | } |
530 | } | 546 | } |
531 | m->count = m->size; | 547 | seq_set_overflow(m); |
532 | return -1; | 548 | return -1; |
533 | } | 549 | } |
534 | EXPORT_SYMBOL(seq_bitmap_list); | 550 | EXPORT_SYMBOL(seq_bitmap_list); |
@@ -639,11 +655,63 @@ int seq_puts(struct seq_file *m, const char *s) | |||
639 | m->count += len; | 655 | m->count += len; |
640 | return 0; | 656 | return 0; |
641 | } | 657 | } |
642 | m->count = m->size; | 658 | seq_set_overflow(m); |
643 | return -1; | 659 | return -1; |
644 | } | 660 | } |
645 | EXPORT_SYMBOL(seq_puts); | 661 | EXPORT_SYMBOL(seq_puts); |
646 | 662 | ||
663 | /* | ||
664 | * A helper routine for putting decimal numbers without rich format of printf(). | ||
665 | * only 'unsigned long long' is supported. | ||
666 | * This routine will put one byte delimiter + number into seq_file. | ||
667 | * This routine is very quick when you show lots of numbers. | ||
668 | * In usual cases, it will be better to use seq_printf(). It's easier to read. | ||
669 | */ | ||
670 | int seq_put_decimal_ull(struct seq_file *m, char delimiter, | ||
671 | unsigned long long num) | ||
672 | { | ||
673 | int len; | ||
674 | |||
675 | if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */ | ||
676 | goto overflow; | ||
677 | |||
678 | if (delimiter) | ||
679 | m->buf[m->count++] = delimiter; | ||
680 | |||
681 | if (num < 10) { | ||
682 | m->buf[m->count++] = num + '0'; | ||
683 | return 0; | ||
684 | } | ||
685 | |||
686 | len = num_to_str(m->buf + m->count, m->size - m->count, num); | ||
687 | if (!len) | ||
688 | goto overflow; | ||
689 | m->count += len; | ||
690 | return 0; | ||
691 | overflow: | ||
692 | seq_set_overflow(m); | ||
693 | return -1; | ||
694 | } | ||
695 | EXPORT_SYMBOL(seq_put_decimal_ull); | ||
696 | |||
697 | int seq_put_decimal_ll(struct seq_file *m, char delimiter, | ||
698 | long long num) | ||
699 | { | ||
700 | if (num < 0) { | ||
701 | if (m->count + 3 >= m->size) { | ||
702 | seq_set_overflow(m); | ||
703 | return -1; | ||
704 | } | ||
705 | if (delimiter) | ||
706 | m->buf[m->count++] = delimiter; | ||
707 | num = -num; | ||
708 | delimiter = '-'; | ||
709 | } | ||
710 | return seq_put_decimal_ull(m, delimiter, num); | ||
711 | |||
712 | } | ||
713 | EXPORT_SYMBOL(seq_put_decimal_ll); | ||
714 | |||
647 | /** | 715 | /** |
648 | * seq_write - write arbitrary data to buffer | 716 | * seq_write - write arbitrary data to buffer |
649 | * @seq: seq_file identifying the buffer to which data should be written | 717 | * @seq: seq_file identifying the buffer to which data should be written |
@@ -659,7 +727,7 @@ int seq_write(struct seq_file *seq, const void *data, size_t len) | |||
659 | seq->count += len; | 727 | seq->count += len; |
660 | return 0; | 728 | return 0; |
661 | } | 729 | } |
662 | seq->count = seq->size; | 730 | seq_set_overflow(seq); |
663 | return -1; | 731 | return -1; |
664 | } | 732 | } |
665 | EXPORT_SYMBOL(seq_write); | 733 | EXPORT_SYMBOL(seq_write); |
diff --git a/fs/splice.c b/fs/splice.c index f16402ed915c..5f883de7ef3a 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <linux/mm_inline.h> | 25 | #include <linux/mm_inline.h> |
26 | #include <linux/swap.h> | 26 | #include <linux/swap.h> |
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/module.h> | 28 | #include <linux/export.h> |
29 | #include <linux/syscalls.h> | 29 | #include <linux/syscalls.h> |
30 | #include <linux/uio.h> | 30 | #include <linux/uio.h> |
31 | #include <linux/security.h> | 31 | #include <linux/security.h> |
diff --git a/fs/stack.c b/fs/stack.c index 9c11519245a6..5b5388250e29 100644 --- a/fs/stack.c +++ b/fs/stack.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include <linux/module.h> | 1 | #include <linux/export.h> |
2 | #include <linux/fs.h> | 2 | #include <linux/fs.h> |
3 | #include <linux/fs_stack.h> | 3 | #include <linux/fs_stack.h> |
4 | 4 | ||
@@ -4,7 +4,7 @@ | |||
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/module.h> | 7 | #include <linux/export.h> |
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/errno.h> | 9 | #include <linux/errno.h> |
10 | #include <linux/file.h> | 10 | #include <linux/file.h> |
diff --git a/fs/statfs.c b/fs/statfs.c index 2aa6a22e0be2..43e6b6fe4e85 100644 --- a/fs/statfs.c +++ b/fs/statfs.c | |||
@@ -1,5 +1,5 @@ | |||
1 | #include <linux/syscalls.h> | 1 | #include <linux/syscalls.h> |
2 | #include <linux/module.h> | 2 | #include <linux/export.h> |
3 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
4 | #include <linux/file.h> | 4 | #include <linux/file.h> |
5 | #include <linux/mount.h> | 5 | #include <linux/mount.h> |
diff --git a/fs/super.c b/fs/super.c index 7fcb1354c554..cf001775617f 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -20,7 +20,7 @@ | |||
20 | * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 | 20 | * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/module.h> | 23 | #include <linux/export.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/acct.h> | 25 | #include <linux/acct.h> |
26 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
@@ -6,7 +6,7 @@ | |||
6 | #include <linux/file.h> | 6 | #include <linux/file.h> |
7 | #include <linux/fs.h> | 7 | #include <linux/fs.h> |
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | #include <linux/module.h> | 9 | #include <linux/export.h> |
10 | #include <linux/namei.h> | 10 | #include <linux/namei.h> |
11 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
12 | #include <linux/writeback.h> | 12 | #include <linux/writeback.h> |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index f922cbacdb96..1934084e2088 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -36,7 +36,7 @@ | |||
36 | 36 | ||
37 | #ifdef CONFIG_UBIFS_FS_DEBUG | 37 | #ifdef CONFIG_UBIFS_FS_DEBUG |
38 | 38 | ||
39 | DEFINE_SPINLOCK(dbg_lock); | 39 | static DEFINE_SPINLOCK(dbg_lock); |
40 | 40 | ||
41 | static const char *get_key_fmt(int fmt) | 41 | static const char *get_key_fmt(int fmt) |
42 | { | 42 | { |
@@ -221,15 +221,15 @@ const char *dbg_jhead(int jhead) | |||
221 | 221 | ||
222 | static void dump_ch(const struct ubifs_ch *ch) | 222 | static void dump_ch(const struct ubifs_ch *ch) |
223 | { | 223 | { |
224 | printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); | 224 | printk(KERN_ERR "\tmagic %#x\n", le32_to_cpu(ch->magic)); |
225 | printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc)); | 225 | printk(KERN_ERR "\tcrc %#x\n", le32_to_cpu(ch->crc)); |
226 | printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type, | 226 | printk(KERN_ERR "\tnode_type %d (%s)\n", ch->node_type, |
227 | dbg_ntype(ch->node_type)); | 227 | dbg_ntype(ch->node_type)); |
228 | printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type, | 228 | printk(KERN_ERR "\tgroup_type %d (%s)\n", ch->group_type, |
229 | dbg_gtype(ch->group_type)); | 229 | dbg_gtype(ch->group_type)); |
230 | printk(KERN_DEBUG "\tsqnum %llu\n", | 230 | printk(KERN_ERR "\tsqnum %llu\n", |
231 | (unsigned long long)le64_to_cpu(ch->sqnum)); | 231 | (unsigned long long)le64_to_cpu(ch->sqnum)); |
232 | printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); | 232 | printk(KERN_ERR "\tlen %u\n", le32_to_cpu(ch->len)); |
233 | } | 233 | } |
234 | 234 | ||
235 | void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) | 235 | void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) |
@@ -240,43 +240,43 @@ void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) | |||
240 | struct ubifs_dent_node *dent, *pdent = NULL; | 240 | struct ubifs_dent_node *dent, *pdent = NULL; |
241 | int count = 2; | 241 | int count = 2; |
242 | 242 | ||
243 | printk(KERN_DEBUG "Dump in-memory inode:"); | 243 | printk(KERN_ERR "Dump in-memory inode:"); |
244 | printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); | 244 | printk(KERN_ERR "\tinode %lu\n", inode->i_ino); |
245 | printk(KERN_DEBUG "\tsize %llu\n", | 245 | printk(KERN_ERR "\tsize %llu\n", |
246 | (unsigned long long)i_size_read(inode)); | 246 | (unsigned long long)i_size_read(inode)); |
247 | printk(KERN_DEBUG "\tnlink %u\n", inode->i_nlink); | 247 | printk(KERN_ERR "\tnlink %u\n", inode->i_nlink); |
248 | printk(KERN_DEBUG "\tuid %u\n", (unsigned int)inode->i_uid); | 248 | printk(KERN_ERR "\tuid %u\n", (unsigned int)inode->i_uid); |
249 | printk(KERN_DEBUG "\tgid %u\n", (unsigned int)inode->i_gid); | 249 | printk(KERN_ERR "\tgid %u\n", (unsigned int)inode->i_gid); |
250 | printk(KERN_DEBUG "\tatime %u.%u\n", | 250 | printk(KERN_ERR "\tatime %u.%u\n", |
251 | (unsigned int)inode->i_atime.tv_sec, | 251 | (unsigned int)inode->i_atime.tv_sec, |
252 | (unsigned int)inode->i_atime.tv_nsec); | 252 | (unsigned int)inode->i_atime.tv_nsec); |
253 | printk(KERN_DEBUG "\tmtime %u.%u\n", | 253 | printk(KERN_ERR "\tmtime %u.%u\n", |
254 | (unsigned int)inode->i_mtime.tv_sec, | 254 | (unsigned int)inode->i_mtime.tv_sec, |
255 | (unsigned int)inode->i_mtime.tv_nsec); | 255 | (unsigned int)inode->i_mtime.tv_nsec); |
256 | printk(KERN_DEBUG "\tctime %u.%u\n", | 256 | printk(KERN_ERR "\tctime %u.%u\n", |
257 | (unsigned int)inode->i_ctime.tv_sec, | 257 | (unsigned int)inode->i_ctime.tv_sec, |
258 | (unsigned int)inode->i_ctime.tv_nsec); | 258 | (unsigned int)inode->i_ctime.tv_nsec); |
259 | printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ui->creat_sqnum); | 259 | printk(KERN_ERR "\tcreat_sqnum %llu\n", ui->creat_sqnum); |
260 | printk(KERN_DEBUG "\txattr_size %u\n", ui->xattr_size); | 260 | printk(KERN_ERR "\txattr_size %u\n", ui->xattr_size); |
261 | printk(KERN_DEBUG "\txattr_cnt %u\n", ui->xattr_cnt); | 261 | printk(KERN_ERR "\txattr_cnt %u\n", ui->xattr_cnt); |
262 | printk(KERN_DEBUG "\txattr_names %u\n", ui->xattr_names); | 262 | printk(KERN_ERR "\txattr_names %u\n", ui->xattr_names); |
263 | printk(KERN_DEBUG "\tdirty %u\n", ui->dirty); | 263 | printk(KERN_ERR "\tdirty %u\n", ui->dirty); |
264 | printk(KERN_DEBUG "\txattr %u\n", ui->xattr); | 264 | printk(KERN_ERR "\txattr %u\n", ui->xattr); |
265 | printk(KERN_DEBUG "\tbulk_read %u\n", ui->xattr); | 265 | printk(KERN_ERR "\tbulk_read %u\n", ui->xattr); |
266 | printk(KERN_DEBUG "\tsynced_i_size %llu\n", | 266 | printk(KERN_ERR "\tsynced_i_size %llu\n", |
267 | (unsigned long long)ui->synced_i_size); | 267 | (unsigned long long)ui->synced_i_size); |
268 | printk(KERN_DEBUG "\tui_size %llu\n", | 268 | printk(KERN_ERR "\tui_size %llu\n", |
269 | (unsigned long long)ui->ui_size); | 269 | (unsigned long long)ui->ui_size); |
270 | printk(KERN_DEBUG "\tflags %d\n", ui->flags); | 270 | printk(KERN_ERR "\tflags %d\n", ui->flags); |
271 | printk(KERN_DEBUG "\tcompr_type %d\n", ui->compr_type); | 271 | printk(KERN_ERR "\tcompr_type %d\n", ui->compr_type); |
272 | printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); | 272 | printk(KERN_ERR "\tlast_page_read %lu\n", ui->last_page_read); |
273 | printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); | 273 | printk(KERN_ERR "\tread_in_a_row %lu\n", ui->read_in_a_row); |
274 | printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); | 274 | printk(KERN_ERR "\tdata_len %d\n", ui->data_len); |
275 | 275 | ||
276 | if (!S_ISDIR(inode->i_mode)) | 276 | if (!S_ISDIR(inode->i_mode)) |
277 | return; | 277 | return; |
278 | 278 | ||
279 | printk(KERN_DEBUG "List of directory entries:\n"); | 279 | printk(KERN_ERR "List of directory entries:\n"); |
280 | ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); | 280 | ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); |
281 | 281 | ||
282 | lowest_dent_key(c, &key, inode->i_ino); | 282 | lowest_dent_key(c, &key, inode->i_ino); |
@@ -284,11 +284,11 @@ void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) | |||
284 | dent = ubifs_tnc_next_ent(c, &key, &nm); | 284 | dent = ubifs_tnc_next_ent(c, &key, &nm); |
285 | if (IS_ERR(dent)) { | 285 | if (IS_ERR(dent)) { |
286 | if (PTR_ERR(dent) != -ENOENT) | 286 | if (PTR_ERR(dent) != -ENOENT) |
287 | printk(KERN_DEBUG "error %ld\n", PTR_ERR(dent)); | 287 | printk(KERN_ERR "error %ld\n", PTR_ERR(dent)); |
288 | break; | 288 | break; |
289 | } | 289 | } |
290 | 290 | ||
291 | printk(KERN_DEBUG "\t%d: %s (%s)\n", | 291 | printk(KERN_ERR "\t%d: %s (%s)\n", |
292 | count++, dent->name, get_dent_type(dent->type)); | 292 | count++, dent->name, get_dent_type(dent->type)); |
293 | 293 | ||
294 | nm.name = dent->name; | 294 | nm.name = dent->name; |
@@ -312,8 +312,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
312 | 312 | ||
313 | /* If the magic is incorrect, just hexdump the first bytes */ | 313 | /* If the magic is incorrect, just hexdump the first bytes */ |
314 | if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { | 314 | if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { |
315 | printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ); | 315 | printk(KERN_ERR "Not a node, first %zu bytes:", UBIFS_CH_SZ); |
316 | print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, | 316 | print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1, |
317 | (void *)node, UBIFS_CH_SZ, 1); | 317 | (void *)node, UBIFS_CH_SZ, 1); |
318 | return; | 318 | return; |
319 | } | 319 | } |
@@ -326,7 +326,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
326 | { | 326 | { |
327 | const struct ubifs_pad_node *pad = node; | 327 | const struct ubifs_pad_node *pad = node; |
328 | 328 | ||
329 | printk(KERN_DEBUG "\tpad_len %u\n", | 329 | printk(KERN_ERR "\tpad_len %u\n", |
330 | le32_to_cpu(pad->pad_len)); | 330 | le32_to_cpu(pad->pad_len)); |
331 | break; | 331 | break; |
332 | } | 332 | } |
@@ -335,50 +335,50 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
335 | const struct ubifs_sb_node *sup = node; | 335 | const struct ubifs_sb_node *sup = node; |
336 | unsigned int sup_flags = le32_to_cpu(sup->flags); | 336 | unsigned int sup_flags = le32_to_cpu(sup->flags); |
337 | 337 | ||
338 | printk(KERN_DEBUG "\tkey_hash %d (%s)\n", | 338 | printk(KERN_ERR "\tkey_hash %d (%s)\n", |
339 | (int)sup->key_hash, get_key_hash(sup->key_hash)); | 339 | (int)sup->key_hash, get_key_hash(sup->key_hash)); |
340 | printk(KERN_DEBUG "\tkey_fmt %d (%s)\n", | 340 | printk(KERN_ERR "\tkey_fmt %d (%s)\n", |
341 | (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); | 341 | (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); |
342 | printk(KERN_DEBUG "\tflags %#x\n", sup_flags); | 342 | printk(KERN_ERR "\tflags %#x\n", sup_flags); |
343 | printk(KERN_DEBUG "\t big_lpt %u\n", | 343 | printk(KERN_ERR "\t big_lpt %u\n", |
344 | !!(sup_flags & UBIFS_FLG_BIGLPT)); | 344 | !!(sup_flags & UBIFS_FLG_BIGLPT)); |
345 | printk(KERN_DEBUG "\t space_fixup %u\n", | 345 | printk(KERN_ERR "\t space_fixup %u\n", |
346 | !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); | 346 | !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); |
347 | printk(KERN_DEBUG "\tmin_io_size %u\n", | 347 | printk(KERN_ERR "\tmin_io_size %u\n", |
348 | le32_to_cpu(sup->min_io_size)); | 348 | le32_to_cpu(sup->min_io_size)); |
349 | printk(KERN_DEBUG "\tleb_size %u\n", | 349 | printk(KERN_ERR "\tleb_size %u\n", |
350 | le32_to_cpu(sup->leb_size)); | 350 | le32_to_cpu(sup->leb_size)); |
351 | printk(KERN_DEBUG "\tleb_cnt %u\n", | 351 | printk(KERN_ERR "\tleb_cnt %u\n", |
352 | le32_to_cpu(sup->leb_cnt)); | 352 | le32_to_cpu(sup->leb_cnt)); |
353 | printk(KERN_DEBUG "\tmax_leb_cnt %u\n", | 353 | printk(KERN_ERR "\tmax_leb_cnt %u\n", |
354 | le32_to_cpu(sup->max_leb_cnt)); | 354 | le32_to_cpu(sup->max_leb_cnt)); |
355 | printk(KERN_DEBUG "\tmax_bud_bytes %llu\n", | 355 | printk(KERN_ERR "\tmax_bud_bytes %llu\n", |
356 | (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); | 356 | (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); |
357 | printk(KERN_DEBUG "\tlog_lebs %u\n", | 357 | printk(KERN_ERR "\tlog_lebs %u\n", |
358 | le32_to_cpu(sup->log_lebs)); | 358 | le32_to_cpu(sup->log_lebs)); |
359 | printk(KERN_DEBUG "\tlpt_lebs %u\n", | 359 | printk(KERN_ERR "\tlpt_lebs %u\n", |
360 | le32_to_cpu(sup->lpt_lebs)); | 360 | le32_to_cpu(sup->lpt_lebs)); |
361 | printk(KERN_DEBUG "\torph_lebs %u\n", | 361 | printk(KERN_ERR "\torph_lebs %u\n", |
362 | le32_to_cpu(sup->orph_lebs)); | 362 | le32_to_cpu(sup->orph_lebs)); |
363 | printk(KERN_DEBUG "\tjhead_cnt %u\n", | 363 | printk(KERN_ERR "\tjhead_cnt %u\n", |
364 | le32_to_cpu(sup->jhead_cnt)); | 364 | le32_to_cpu(sup->jhead_cnt)); |
365 | printk(KERN_DEBUG "\tfanout %u\n", | 365 | printk(KERN_ERR "\tfanout %u\n", |
366 | le32_to_cpu(sup->fanout)); | 366 | le32_to_cpu(sup->fanout)); |
367 | printk(KERN_DEBUG "\tlsave_cnt %u\n", | 367 | printk(KERN_ERR "\tlsave_cnt %u\n", |
368 | le32_to_cpu(sup->lsave_cnt)); | 368 | le32_to_cpu(sup->lsave_cnt)); |
369 | printk(KERN_DEBUG "\tdefault_compr %u\n", | 369 | printk(KERN_ERR "\tdefault_compr %u\n", |
370 | (int)le16_to_cpu(sup->default_compr)); | 370 | (int)le16_to_cpu(sup->default_compr)); |
371 | printk(KERN_DEBUG "\trp_size %llu\n", | 371 | printk(KERN_ERR "\trp_size %llu\n", |
372 | (unsigned long long)le64_to_cpu(sup->rp_size)); | 372 | (unsigned long long)le64_to_cpu(sup->rp_size)); |
373 | printk(KERN_DEBUG "\trp_uid %u\n", | 373 | printk(KERN_ERR "\trp_uid %u\n", |
374 | le32_to_cpu(sup->rp_uid)); | 374 | le32_to_cpu(sup->rp_uid)); |
375 | printk(KERN_DEBUG "\trp_gid %u\n", | 375 | printk(KERN_ERR "\trp_gid %u\n", |
376 | le32_to_cpu(sup->rp_gid)); | 376 | le32_to_cpu(sup->rp_gid)); |
377 | printk(KERN_DEBUG "\tfmt_version %u\n", | 377 | printk(KERN_ERR "\tfmt_version %u\n", |
378 | le32_to_cpu(sup->fmt_version)); | 378 | le32_to_cpu(sup->fmt_version)); |
379 | printk(KERN_DEBUG "\ttime_gran %u\n", | 379 | printk(KERN_ERR "\ttime_gran %u\n", |
380 | le32_to_cpu(sup->time_gran)); | 380 | le32_to_cpu(sup->time_gran)); |
381 | printk(KERN_DEBUG "\tUUID %pUB\n", | 381 | printk(KERN_ERR "\tUUID %pUB\n", |
382 | sup->uuid); | 382 | sup->uuid); |
383 | break; | 383 | break; |
384 | } | 384 | } |
@@ -386,61 +386,61 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
386 | { | 386 | { |
387 | const struct ubifs_mst_node *mst = node; | 387 | const struct ubifs_mst_node *mst = node; |
388 | 388 | ||
389 | printk(KERN_DEBUG "\thighest_inum %llu\n", | 389 | printk(KERN_ERR "\thighest_inum %llu\n", |
390 | (unsigned long long)le64_to_cpu(mst->highest_inum)); | 390 | (unsigned long long)le64_to_cpu(mst->highest_inum)); |
391 | printk(KERN_DEBUG "\tcommit number %llu\n", | 391 | printk(KERN_ERR "\tcommit number %llu\n", |
392 | (unsigned long long)le64_to_cpu(mst->cmt_no)); | 392 | (unsigned long long)le64_to_cpu(mst->cmt_no)); |
393 | printk(KERN_DEBUG "\tflags %#x\n", | 393 | printk(KERN_ERR "\tflags %#x\n", |
394 | le32_to_cpu(mst->flags)); | 394 | le32_to_cpu(mst->flags)); |
395 | printk(KERN_DEBUG "\tlog_lnum %u\n", | 395 | printk(KERN_ERR "\tlog_lnum %u\n", |
396 | le32_to_cpu(mst->log_lnum)); | 396 | le32_to_cpu(mst->log_lnum)); |
397 | printk(KERN_DEBUG "\troot_lnum %u\n", | 397 | printk(KERN_ERR "\troot_lnum %u\n", |
398 | le32_to_cpu(mst->root_lnum)); | 398 | le32_to_cpu(mst->root_lnum)); |
399 | printk(KERN_DEBUG "\troot_offs %u\n", | 399 | printk(KERN_ERR "\troot_offs %u\n", |
400 | le32_to_cpu(mst->root_offs)); | 400 | le32_to_cpu(mst->root_offs)); |
401 | printk(KERN_DEBUG "\troot_len %u\n", | 401 | printk(KERN_ERR "\troot_len %u\n", |
402 | le32_to_cpu(mst->root_len)); | 402 | le32_to_cpu(mst->root_len)); |
403 | printk(KERN_DEBUG "\tgc_lnum %u\n", | 403 | printk(KERN_ERR "\tgc_lnum %u\n", |
404 | le32_to_cpu(mst->gc_lnum)); | 404 | le32_to_cpu(mst->gc_lnum)); |
405 | printk(KERN_DEBUG "\tihead_lnum %u\n", | 405 | printk(KERN_ERR "\tihead_lnum %u\n", |
406 | le32_to_cpu(mst->ihead_lnum)); | 406 | le32_to_cpu(mst->ihead_lnum)); |
407 | printk(KERN_DEBUG "\tihead_offs %u\n", | 407 | printk(KERN_ERR "\tihead_offs %u\n", |
408 | le32_to_cpu(mst->ihead_offs)); | 408 | le32_to_cpu(mst->ihead_offs)); |
409 | printk(KERN_DEBUG "\tindex_size %llu\n", | 409 | printk(KERN_ERR "\tindex_size %llu\n", |
410 | (unsigned long long)le64_to_cpu(mst->index_size)); | 410 | (unsigned long long)le64_to_cpu(mst->index_size)); |
411 | printk(KERN_DEBUG "\tlpt_lnum %u\n", | 411 | printk(KERN_ERR "\tlpt_lnum %u\n", |
412 | le32_to_cpu(mst->lpt_lnum)); | 412 | le32_to_cpu(mst->lpt_lnum)); |
413 | printk(KERN_DEBUG "\tlpt_offs %u\n", | 413 | printk(KERN_ERR "\tlpt_offs %u\n", |
414 | le32_to_cpu(mst->lpt_offs)); | 414 | le32_to_cpu(mst->lpt_offs)); |
415 | printk(KERN_DEBUG "\tnhead_lnum %u\n", | 415 | printk(KERN_ERR "\tnhead_lnum %u\n", |
416 | le32_to_cpu(mst->nhead_lnum)); | 416 | le32_to_cpu(mst->nhead_lnum)); |
417 | printk(KERN_DEBUG "\tnhead_offs %u\n", | 417 | printk(KERN_ERR "\tnhead_offs %u\n", |
418 | le32_to_cpu(mst->nhead_offs)); | 418 | le32_to_cpu(mst->nhead_offs)); |
419 | printk(KERN_DEBUG "\tltab_lnum %u\n", | 419 | printk(KERN_ERR "\tltab_lnum %u\n", |
420 | le32_to_cpu(mst->ltab_lnum)); | 420 | le32_to_cpu(mst->ltab_lnum)); |
421 | printk(KERN_DEBUG "\tltab_offs %u\n", | 421 | printk(KERN_ERR "\tltab_offs %u\n", |
422 | le32_to_cpu(mst->ltab_offs)); | 422 | le32_to_cpu(mst->ltab_offs)); |
423 | printk(KERN_DEBUG "\tlsave_lnum %u\n", | 423 | printk(KERN_ERR "\tlsave_lnum %u\n", |
424 | le32_to_cpu(mst->lsave_lnum)); | 424 | le32_to_cpu(mst->lsave_lnum)); |
425 | printk(KERN_DEBUG "\tlsave_offs %u\n", | 425 | printk(KERN_ERR "\tlsave_offs %u\n", |
426 | le32_to_cpu(mst->lsave_offs)); | 426 | le32_to_cpu(mst->lsave_offs)); |
427 | printk(KERN_DEBUG "\tlscan_lnum %u\n", | 427 | printk(KERN_ERR "\tlscan_lnum %u\n", |
428 | le32_to_cpu(mst->lscan_lnum)); | 428 | le32_to_cpu(mst->lscan_lnum)); |
429 | printk(KERN_DEBUG "\tleb_cnt %u\n", | 429 | printk(KERN_ERR "\tleb_cnt %u\n", |
430 | le32_to_cpu(mst->leb_cnt)); | 430 | le32_to_cpu(mst->leb_cnt)); |
431 | printk(KERN_DEBUG "\tempty_lebs %u\n", | 431 | printk(KERN_ERR "\tempty_lebs %u\n", |
432 | le32_to_cpu(mst->empty_lebs)); | 432 | le32_to_cpu(mst->empty_lebs)); |
433 | printk(KERN_DEBUG "\tidx_lebs %u\n", | 433 | printk(KERN_ERR "\tidx_lebs %u\n", |
434 | le32_to_cpu(mst->idx_lebs)); | 434 | le32_to_cpu(mst->idx_lebs)); |
435 | printk(KERN_DEBUG "\ttotal_free %llu\n", | 435 | printk(KERN_ERR "\ttotal_free %llu\n", |
436 | (unsigned long long)le64_to_cpu(mst->total_free)); | 436 | (unsigned long long)le64_to_cpu(mst->total_free)); |
437 | printk(KERN_DEBUG "\ttotal_dirty %llu\n", | 437 | printk(KERN_ERR "\ttotal_dirty %llu\n", |
438 | (unsigned long long)le64_to_cpu(mst->total_dirty)); | 438 | (unsigned long long)le64_to_cpu(mst->total_dirty)); |
439 | printk(KERN_DEBUG "\ttotal_used %llu\n", | 439 | printk(KERN_ERR "\ttotal_used %llu\n", |
440 | (unsigned long long)le64_to_cpu(mst->total_used)); | 440 | (unsigned long long)le64_to_cpu(mst->total_used)); |
441 | printk(KERN_DEBUG "\ttotal_dead %llu\n", | 441 | printk(KERN_ERR "\ttotal_dead %llu\n", |
442 | (unsigned long long)le64_to_cpu(mst->total_dead)); | 442 | (unsigned long long)le64_to_cpu(mst->total_dead)); |
443 | printk(KERN_DEBUG "\ttotal_dark %llu\n", | 443 | printk(KERN_ERR "\ttotal_dark %llu\n", |
444 | (unsigned long long)le64_to_cpu(mst->total_dark)); | 444 | (unsigned long long)le64_to_cpu(mst->total_dark)); |
445 | break; | 445 | break; |
446 | } | 446 | } |
@@ -448,11 +448,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
448 | { | 448 | { |
449 | const struct ubifs_ref_node *ref = node; | 449 | const struct ubifs_ref_node *ref = node; |
450 | 450 | ||
451 | printk(KERN_DEBUG "\tlnum %u\n", | 451 | printk(KERN_ERR "\tlnum %u\n", |
452 | le32_to_cpu(ref->lnum)); | 452 | le32_to_cpu(ref->lnum)); |
453 | printk(KERN_DEBUG "\toffs %u\n", | 453 | printk(KERN_ERR "\toffs %u\n", |
454 | le32_to_cpu(ref->offs)); | 454 | le32_to_cpu(ref->offs)); |
455 | printk(KERN_DEBUG "\tjhead %u\n", | 455 | printk(KERN_ERR "\tjhead %u\n", |
456 | le32_to_cpu(ref->jhead)); | 456 | le32_to_cpu(ref->jhead)); |
457 | break; | 457 | break; |
458 | } | 458 | } |
@@ -461,40 +461,40 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
461 | const struct ubifs_ino_node *ino = node; | 461 | const struct ubifs_ino_node *ino = node; |
462 | 462 | ||
463 | key_read(c, &ino->key, &key); | 463 | key_read(c, &ino->key, &key); |
464 | printk(KERN_DEBUG "\tkey %s\n", | 464 | printk(KERN_ERR "\tkey %s\n", |
465 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); | 465 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); |
466 | printk(KERN_DEBUG "\tcreat_sqnum %llu\n", | 466 | printk(KERN_ERR "\tcreat_sqnum %llu\n", |
467 | (unsigned long long)le64_to_cpu(ino->creat_sqnum)); | 467 | (unsigned long long)le64_to_cpu(ino->creat_sqnum)); |
468 | printk(KERN_DEBUG "\tsize %llu\n", | 468 | printk(KERN_ERR "\tsize %llu\n", |
469 | (unsigned long long)le64_to_cpu(ino->size)); | 469 | (unsigned long long)le64_to_cpu(ino->size)); |
470 | printk(KERN_DEBUG "\tnlink %u\n", | 470 | printk(KERN_ERR "\tnlink %u\n", |
471 | le32_to_cpu(ino->nlink)); | 471 | le32_to_cpu(ino->nlink)); |
472 | printk(KERN_DEBUG "\tatime %lld.%u\n", | 472 | printk(KERN_ERR "\tatime %lld.%u\n", |
473 | (long long)le64_to_cpu(ino->atime_sec), | 473 | (long long)le64_to_cpu(ino->atime_sec), |
474 | le32_to_cpu(ino->atime_nsec)); | 474 | le32_to_cpu(ino->atime_nsec)); |
475 | printk(KERN_DEBUG "\tmtime %lld.%u\n", | 475 | printk(KERN_ERR "\tmtime %lld.%u\n", |
476 | (long long)le64_to_cpu(ino->mtime_sec), | 476 | (long long)le64_to_cpu(ino->mtime_sec), |
477 | le32_to_cpu(ino->mtime_nsec)); | 477 | le32_to_cpu(ino->mtime_nsec)); |
478 | printk(KERN_DEBUG "\tctime %lld.%u\n", | 478 | printk(KERN_ERR "\tctime %lld.%u\n", |
479 | (long long)le64_to_cpu(ino->ctime_sec), | 479 | (long long)le64_to_cpu(ino->ctime_sec), |
480 | le32_to_cpu(ino->ctime_nsec)); | 480 | le32_to_cpu(ino->ctime_nsec)); |
481 | printk(KERN_DEBUG "\tuid %u\n", | 481 | printk(KERN_ERR "\tuid %u\n", |
482 | le32_to_cpu(ino->uid)); | 482 | le32_to_cpu(ino->uid)); |
483 | printk(KERN_DEBUG "\tgid %u\n", | 483 | printk(KERN_ERR "\tgid %u\n", |
484 | le32_to_cpu(ino->gid)); | 484 | le32_to_cpu(ino->gid)); |
485 | printk(KERN_DEBUG "\tmode %u\n", | 485 | printk(KERN_ERR "\tmode %u\n", |
486 | le32_to_cpu(ino->mode)); | 486 | le32_to_cpu(ino->mode)); |
487 | printk(KERN_DEBUG "\tflags %#x\n", | 487 | printk(KERN_ERR "\tflags %#x\n", |
488 | le32_to_cpu(ino->flags)); | 488 | le32_to_cpu(ino->flags)); |
489 | printk(KERN_DEBUG "\txattr_cnt %u\n", | 489 | printk(KERN_ERR "\txattr_cnt %u\n", |
490 | le32_to_cpu(ino->xattr_cnt)); | 490 | le32_to_cpu(ino->xattr_cnt)); |
491 | printk(KERN_DEBUG "\txattr_size %u\n", | 491 | printk(KERN_ERR "\txattr_size %u\n", |
492 | le32_to_cpu(ino->xattr_size)); | 492 | le32_to_cpu(ino->xattr_size)); |
493 | printk(KERN_DEBUG "\txattr_names %u\n", | 493 | printk(KERN_ERR "\txattr_names %u\n", |
494 | le32_to_cpu(ino->xattr_names)); | 494 | le32_to_cpu(ino->xattr_names)); |
495 | printk(KERN_DEBUG "\tcompr_type %#x\n", | 495 | printk(KERN_ERR "\tcompr_type %#x\n", |
496 | (int)le16_to_cpu(ino->compr_type)); | 496 | (int)le16_to_cpu(ino->compr_type)); |
497 | printk(KERN_DEBUG "\tdata len %u\n", | 497 | printk(KERN_ERR "\tdata len %u\n", |
498 | le32_to_cpu(ino->data_len)); | 498 | le32_to_cpu(ino->data_len)); |
499 | break; | 499 | break; |
500 | } | 500 | } |
@@ -505,16 +505,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
505 | int nlen = le16_to_cpu(dent->nlen); | 505 | int nlen = le16_to_cpu(dent->nlen); |
506 | 506 | ||
507 | key_read(c, &dent->key, &key); | 507 | key_read(c, &dent->key, &key); |
508 | printk(KERN_DEBUG "\tkey %s\n", | 508 | printk(KERN_ERR "\tkey %s\n", |
509 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); | 509 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); |
510 | printk(KERN_DEBUG "\tinum %llu\n", | 510 | printk(KERN_ERR "\tinum %llu\n", |
511 | (unsigned long long)le64_to_cpu(dent->inum)); | 511 | (unsigned long long)le64_to_cpu(dent->inum)); |
512 | printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); | 512 | printk(KERN_ERR "\ttype %d\n", (int)dent->type); |
513 | printk(KERN_DEBUG "\tnlen %d\n", nlen); | 513 | printk(KERN_ERR "\tnlen %d\n", nlen); |
514 | printk(KERN_DEBUG "\tname "); | 514 | printk(KERN_ERR "\tname "); |
515 | 515 | ||
516 | if (nlen > UBIFS_MAX_NLEN) | 516 | if (nlen > UBIFS_MAX_NLEN) |
517 | printk(KERN_DEBUG "(bad name length, not printing, " | 517 | printk(KERN_ERR "(bad name length, not printing, " |
518 | "bad or corrupted node)"); | 518 | "bad or corrupted node)"); |
519 | else { | 519 | else { |
520 | for (i = 0; i < nlen && dent->name[i]; i++) | 520 | for (i = 0; i < nlen && dent->name[i]; i++) |
@@ -530,16 +530,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
530 | int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; | 530 | int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; |
531 | 531 | ||
532 | key_read(c, &dn->key, &key); | 532 | key_read(c, &dn->key, &key); |
533 | printk(KERN_DEBUG "\tkey %s\n", | 533 | printk(KERN_ERR "\tkey %s\n", |
534 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); | 534 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); |
535 | printk(KERN_DEBUG "\tsize %u\n", | 535 | printk(KERN_ERR "\tsize %u\n", |
536 | le32_to_cpu(dn->size)); | 536 | le32_to_cpu(dn->size)); |
537 | printk(KERN_DEBUG "\tcompr_typ %d\n", | 537 | printk(KERN_ERR "\tcompr_typ %d\n", |
538 | (int)le16_to_cpu(dn->compr_type)); | 538 | (int)le16_to_cpu(dn->compr_type)); |
539 | printk(KERN_DEBUG "\tdata size %d\n", | 539 | printk(KERN_ERR "\tdata size %d\n", |
540 | dlen); | 540 | dlen); |
541 | printk(KERN_DEBUG "\tdata:\n"); | 541 | printk(KERN_ERR "\tdata:\n"); |
542 | print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1, | 542 | print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, |
543 | (void *)&dn->data, dlen, 0); | 543 | (void *)&dn->data, dlen, 0); |
544 | break; | 544 | break; |
545 | } | 545 | } |
@@ -547,11 +547,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
547 | { | 547 | { |
548 | const struct ubifs_trun_node *trun = node; | 548 | const struct ubifs_trun_node *trun = node; |
549 | 549 | ||
550 | printk(KERN_DEBUG "\tinum %u\n", | 550 | printk(KERN_ERR "\tinum %u\n", |
551 | le32_to_cpu(trun->inum)); | 551 | le32_to_cpu(trun->inum)); |
552 | printk(KERN_DEBUG "\told_size %llu\n", | 552 | printk(KERN_ERR "\told_size %llu\n", |
553 | (unsigned long long)le64_to_cpu(trun->old_size)); | 553 | (unsigned long long)le64_to_cpu(trun->old_size)); |
554 | printk(KERN_DEBUG "\tnew_size %llu\n", | 554 | printk(KERN_ERR "\tnew_size %llu\n", |
555 | (unsigned long long)le64_to_cpu(trun->new_size)); | 555 | (unsigned long long)le64_to_cpu(trun->new_size)); |
556 | break; | 556 | break; |
557 | } | 557 | } |
@@ -560,17 +560,17 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
560 | const struct ubifs_idx_node *idx = node; | 560 | const struct ubifs_idx_node *idx = node; |
561 | 561 | ||
562 | n = le16_to_cpu(idx->child_cnt); | 562 | n = le16_to_cpu(idx->child_cnt); |
563 | printk(KERN_DEBUG "\tchild_cnt %d\n", n); | 563 | printk(KERN_ERR "\tchild_cnt %d\n", n); |
564 | printk(KERN_DEBUG "\tlevel %d\n", | 564 | printk(KERN_ERR "\tlevel %d\n", |
565 | (int)le16_to_cpu(idx->level)); | 565 | (int)le16_to_cpu(idx->level)); |
566 | printk(KERN_DEBUG "\tBranches:\n"); | 566 | printk(KERN_ERR "\tBranches:\n"); |
567 | 567 | ||
568 | for (i = 0; i < n && i < c->fanout - 1; i++) { | 568 | for (i = 0; i < n && i < c->fanout - 1; i++) { |
569 | const struct ubifs_branch *br; | 569 | const struct ubifs_branch *br; |
570 | 570 | ||
571 | br = ubifs_idx_branch(c, idx, i); | 571 | br = ubifs_idx_branch(c, idx, i); |
572 | key_read(c, &br->key, &key); | 572 | key_read(c, &br->key, &key); |
573 | printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", | 573 | printk(KERN_ERR "\t%d: LEB %d:%d len %d key %s\n", |
574 | i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), | 574 | i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), |
575 | le32_to_cpu(br->len), | 575 | le32_to_cpu(br->len), |
576 | dbg_snprintf_key(c, &key, key_buf, | 576 | dbg_snprintf_key(c, &key, key_buf, |
@@ -584,20 +584,20 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
584 | { | 584 | { |
585 | const struct ubifs_orph_node *orph = node; | 585 | const struct ubifs_orph_node *orph = node; |
586 | 586 | ||
587 | printk(KERN_DEBUG "\tcommit number %llu\n", | 587 | printk(KERN_ERR "\tcommit number %llu\n", |
588 | (unsigned long long) | 588 | (unsigned long long) |
589 | le64_to_cpu(orph->cmt_no) & LLONG_MAX); | 589 | le64_to_cpu(orph->cmt_no) & LLONG_MAX); |
590 | printk(KERN_DEBUG "\tlast node flag %llu\n", | 590 | printk(KERN_ERR "\tlast node flag %llu\n", |
591 | (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); | 591 | (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); |
592 | n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; | 592 | n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; |
593 | printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); | 593 | printk(KERN_ERR "\t%d orphan inode numbers:\n", n); |
594 | for (i = 0; i < n; i++) | 594 | for (i = 0; i < n; i++) |
595 | printk(KERN_DEBUG "\t ino %llu\n", | 595 | printk(KERN_ERR "\t ino %llu\n", |
596 | (unsigned long long)le64_to_cpu(orph->inos[i])); | 596 | (unsigned long long)le64_to_cpu(orph->inos[i])); |
597 | break; | 597 | break; |
598 | } | 598 | } |
599 | default: | 599 | default: |
600 | printk(KERN_DEBUG "node type %d was not recognized\n", | 600 | printk(KERN_ERR "node type %d was not recognized\n", |
601 | (int)ch->node_type); | 601 | (int)ch->node_type); |
602 | } | 602 | } |
603 | spin_unlock(&dbg_lock); | 603 | spin_unlock(&dbg_lock); |
@@ -606,16 +606,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
606 | void dbg_dump_budget_req(const struct ubifs_budget_req *req) | 606 | void dbg_dump_budget_req(const struct ubifs_budget_req *req) |
607 | { | 607 | { |
608 | spin_lock(&dbg_lock); | 608 | spin_lock(&dbg_lock); |
609 | printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n", | 609 | printk(KERN_ERR "Budgeting request: new_ino %d, dirtied_ino %d\n", |
610 | req->new_ino, req->dirtied_ino); | 610 | req->new_ino, req->dirtied_ino); |
611 | printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n", | 611 | printk(KERN_ERR "\tnew_ino_d %d, dirtied_ino_d %d\n", |
612 | req->new_ino_d, req->dirtied_ino_d); | 612 | req->new_ino_d, req->dirtied_ino_d); |
613 | printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n", | 613 | printk(KERN_ERR "\tnew_page %d, dirtied_page %d\n", |
614 | req->new_page, req->dirtied_page); | 614 | req->new_page, req->dirtied_page); |
615 | printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n", | 615 | printk(KERN_ERR "\tnew_dent %d, mod_dent %d\n", |
616 | req->new_dent, req->mod_dent); | 616 | req->new_dent, req->mod_dent); |
617 | printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth); | 617 | printk(KERN_ERR "\tidx_growth %d\n", req->idx_growth); |
618 | printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n", | 618 | printk(KERN_ERR "\tdata_growth %d dd_growth %d\n", |
619 | req->data_growth, req->dd_growth); | 619 | req->data_growth, req->dd_growth); |
620 | spin_unlock(&dbg_lock); | 620 | spin_unlock(&dbg_lock); |
621 | } | 621 | } |
@@ -623,12 +623,12 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req) | |||
623 | void dbg_dump_lstats(const struct ubifs_lp_stats *lst) | 623 | void dbg_dump_lstats(const struct ubifs_lp_stats *lst) |
624 | { | 624 | { |
625 | spin_lock(&dbg_lock); | 625 | spin_lock(&dbg_lock); |
626 | printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, " | 626 | printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, " |
627 | "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); | 627 | "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); |
628 | printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " | 628 | printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, " |
629 | "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, | 629 | "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, |
630 | lst->total_dirty); | 630 | lst->total_dirty); |
631 | printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, " | 631 | printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, " |
632 | "total_dead %lld\n", lst->total_used, lst->total_dark, | 632 | "total_dead %lld\n", lst->total_used, lst->total_dark, |
633 | lst->total_dead); | 633 | lst->total_dead); |
634 | spin_unlock(&dbg_lock); | 634 | spin_unlock(&dbg_lock); |
@@ -644,21 +644,21 @@ void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) | |||
644 | 644 | ||
645 | spin_lock(&c->space_lock); | 645 | spin_lock(&c->space_lock); |
646 | spin_lock(&dbg_lock); | 646 | spin_lock(&dbg_lock); |
647 | printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, " | 647 | printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, " |
648 | "total budget sum %lld\n", current->pid, | 648 | "total budget sum %lld\n", current->pid, |
649 | bi->data_growth + bi->dd_growth, | 649 | bi->data_growth + bi->dd_growth, |
650 | bi->data_growth + bi->dd_growth + bi->idx_growth); | 650 | bi->data_growth + bi->dd_growth + bi->idx_growth); |
651 | printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, " | 651 | printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, " |
652 | "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, | 652 | "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, |
653 | bi->idx_growth); | 653 | bi->idx_growth); |
654 | printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, " | 654 | printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, " |
655 | "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, | 655 | "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, |
656 | bi->uncommitted_idx); | 656 | bi->uncommitted_idx); |
657 | printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n", | 657 | printk(KERN_ERR "\tpage_budget %d, inode_budget %d, dent_budget %d\n", |
658 | bi->page_budget, bi->inode_budget, bi->dent_budget); | 658 | bi->page_budget, bi->inode_budget, bi->dent_budget); |
659 | printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n", | 659 | printk(KERN_ERR "\tnospace %u, nospace_rp %u\n", |
660 | bi->nospace, bi->nospace_rp); | 660 | bi->nospace, bi->nospace_rp); |
661 | printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", | 661 | printk(KERN_ERR "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", |
662 | c->dark_wm, c->dead_wm, c->max_idx_node_sz); | 662 | c->dark_wm, c->dead_wm, c->max_idx_node_sz); |
663 | 663 | ||
664 | if (bi != &c->bi) | 664 | if (bi != &c->bi) |
@@ -669,38 +669,38 @@ void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) | |||
669 | */ | 669 | */ |
670 | goto out_unlock; | 670 | goto out_unlock; |
671 | 671 | ||
672 | printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", | 672 | printk(KERN_ERR "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", |
673 | c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); | 673 | c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); |
674 | printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " | 674 | printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " |
675 | "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), | 675 | "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), |
676 | atomic_long_read(&c->dirty_zn_cnt), | 676 | atomic_long_read(&c->dirty_zn_cnt), |
677 | atomic_long_read(&c->clean_zn_cnt)); | 677 | atomic_long_read(&c->clean_zn_cnt)); |
678 | printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", | 678 | printk(KERN_ERR "\tgc_lnum %d, ihead_lnum %d\n", |
679 | c->gc_lnum, c->ihead_lnum); | 679 | c->gc_lnum, c->ihead_lnum); |
680 | 680 | ||
681 | /* If we are in R/O mode, journal heads do not exist */ | 681 | /* If we are in R/O mode, journal heads do not exist */ |
682 | if (c->jheads) | 682 | if (c->jheads) |
683 | for (i = 0; i < c->jhead_cnt; i++) | 683 | for (i = 0; i < c->jhead_cnt; i++) |
684 | printk(KERN_DEBUG "\tjhead %s\t LEB %d\n", | 684 | printk(KERN_ERR "\tjhead %s\t LEB %d\n", |
685 | dbg_jhead(c->jheads[i].wbuf.jhead), | 685 | dbg_jhead(c->jheads[i].wbuf.jhead), |
686 | c->jheads[i].wbuf.lnum); | 686 | c->jheads[i].wbuf.lnum); |
687 | for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { | 687 | for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { |
688 | bud = rb_entry(rb, struct ubifs_bud, rb); | 688 | bud = rb_entry(rb, struct ubifs_bud, rb); |
689 | printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); | 689 | printk(KERN_ERR "\tbud LEB %d\n", bud->lnum); |
690 | } | 690 | } |
691 | list_for_each_entry(bud, &c->old_buds, list) | 691 | list_for_each_entry(bud, &c->old_buds, list) |
692 | printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum); | 692 | printk(KERN_ERR "\told bud LEB %d\n", bud->lnum); |
693 | list_for_each_entry(idx_gc, &c->idx_gc, list) | 693 | list_for_each_entry(idx_gc, &c->idx_gc, list) |
694 | printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", | 694 | printk(KERN_ERR "\tGC'ed idx LEB %d unmap %d\n", |
695 | idx_gc->lnum, idx_gc->unmap); | 695 | idx_gc->lnum, idx_gc->unmap); |
696 | printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); | 696 | printk(KERN_ERR "\tcommit state %d\n", c->cmt_state); |
697 | 697 | ||
698 | /* Print budgeting predictions */ | 698 | /* Print budgeting predictions */ |
699 | available = ubifs_calc_available(c, c->bi.min_idx_lebs); | 699 | available = ubifs_calc_available(c, c->bi.min_idx_lebs); |
700 | outstanding = c->bi.data_growth + c->bi.dd_growth; | 700 | outstanding = c->bi.data_growth + c->bi.dd_growth; |
701 | free = ubifs_get_free_space_nolock(c); | 701 | free = ubifs_get_free_space_nolock(c); |
702 | printk(KERN_DEBUG "Budgeting predictions:\n"); | 702 | printk(KERN_ERR "Budgeting predictions:\n"); |
703 | printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", | 703 | printk(KERN_ERR "\tavailable: %lld, outstanding %lld, free %lld\n", |
704 | available, outstanding, free); | 704 | available, outstanding, free); |
705 | out_unlock: | 705 | out_unlock: |
706 | spin_unlock(&dbg_lock); | 706 | spin_unlock(&dbg_lock); |
@@ -720,11 +720,11 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) | |||
720 | dark = ubifs_calc_dark(c, spc); | 720 | dark = ubifs_calc_dark(c, spc); |
721 | 721 | ||
722 | if (lp->flags & LPROPS_INDEX) | 722 | if (lp->flags & LPROPS_INDEX) |
723 | printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " | 723 | printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " |
724 | "free + dirty %-8d flags %#x (", lp->lnum, lp->free, | 724 | "free + dirty %-8d flags %#x (", lp->lnum, lp->free, |
725 | lp->dirty, c->leb_size - spc, spc, lp->flags); | 725 | lp->dirty, c->leb_size - spc, spc, lp->flags); |
726 | else | 726 | else |
727 | printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " | 727 | printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " |
728 | "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " | 728 | "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " |
729 | "flags %#-4x (", lp->lnum, lp->free, lp->dirty, | 729 | "flags %#-4x (", lp->lnum, lp->free, lp->dirty, |
730 | c->leb_size - spc, spc, dark, dead, | 730 | c->leb_size - spc, spc, dark, dead, |
@@ -807,7 +807,7 @@ void dbg_dump_lprops(struct ubifs_info *c) | |||
807 | struct ubifs_lprops lp; | 807 | struct ubifs_lprops lp; |
808 | struct ubifs_lp_stats lst; | 808 | struct ubifs_lp_stats lst; |
809 | 809 | ||
810 | printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n", | 810 | printk(KERN_ERR "(pid %d) start dumping LEB properties\n", |
811 | current->pid); | 811 | current->pid); |
812 | ubifs_get_lp_stats(c, &lst); | 812 | ubifs_get_lp_stats(c, &lst); |
813 | dbg_dump_lstats(&lst); | 813 | dbg_dump_lstats(&lst); |
@@ -819,7 +819,7 @@ void dbg_dump_lprops(struct ubifs_info *c) | |||
819 | 819 | ||
820 | dbg_dump_lprop(c, &lp); | 820 | dbg_dump_lprop(c, &lp); |
821 | } | 821 | } |
822 | printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n", | 822 | printk(KERN_ERR "(pid %d) finish dumping LEB properties\n", |
823 | current->pid); | 823 | current->pid); |
824 | } | 824 | } |
825 | 825 | ||
@@ -828,35 +828,35 @@ void dbg_dump_lpt_info(struct ubifs_info *c) | |||
828 | int i; | 828 | int i; |
829 | 829 | ||
830 | spin_lock(&dbg_lock); | 830 | spin_lock(&dbg_lock); |
831 | printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid); | 831 | printk(KERN_ERR "(pid %d) dumping LPT information\n", current->pid); |
832 | printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); | 832 | printk(KERN_ERR "\tlpt_sz: %lld\n", c->lpt_sz); |
833 | printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); | 833 | printk(KERN_ERR "\tpnode_sz: %d\n", c->pnode_sz); |
834 | printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); | 834 | printk(KERN_ERR "\tnnode_sz: %d\n", c->nnode_sz); |
835 | printk(KERN_DEBUG "\tltab_sz: %d\n", c->ltab_sz); | 835 | printk(KERN_ERR "\tltab_sz: %d\n", c->ltab_sz); |
836 | printk(KERN_DEBUG "\tlsave_sz: %d\n", c->lsave_sz); | 836 | printk(KERN_ERR "\tlsave_sz: %d\n", c->lsave_sz); |
837 | printk(KERN_DEBUG "\tbig_lpt: %d\n", c->big_lpt); | 837 | printk(KERN_ERR "\tbig_lpt: %d\n", c->big_lpt); |
838 | printk(KERN_DEBUG "\tlpt_hght: %d\n", c->lpt_hght); | 838 | printk(KERN_ERR "\tlpt_hght: %d\n", c->lpt_hght); |
839 | printk(KERN_DEBUG "\tpnode_cnt: %d\n", c->pnode_cnt); | 839 | printk(KERN_ERR "\tpnode_cnt: %d\n", c->pnode_cnt); |
840 | printk(KERN_DEBUG "\tnnode_cnt: %d\n", c->nnode_cnt); | 840 | printk(KERN_ERR "\tnnode_cnt: %d\n", c->nnode_cnt); |
841 | printk(KERN_DEBUG "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); | 841 | printk(KERN_ERR "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); |
842 | printk(KERN_DEBUG "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); | 842 | printk(KERN_ERR "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); |
843 | printk(KERN_DEBUG "\tlsave_cnt: %d\n", c->lsave_cnt); | 843 | printk(KERN_ERR "\tlsave_cnt: %d\n", c->lsave_cnt); |
844 | printk(KERN_DEBUG "\tspace_bits: %d\n", c->space_bits); | 844 | printk(KERN_ERR "\tspace_bits: %d\n", c->space_bits); |
845 | printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); | 845 | printk(KERN_ERR "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); |
846 | printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); | 846 | printk(KERN_ERR "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); |
847 | printk(KERN_DEBUG "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); | 847 | printk(KERN_ERR "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); |
848 | printk(KERN_DEBUG "\tpcnt_bits: %d\n", c->pcnt_bits); | 848 | printk(KERN_ERR "\tpcnt_bits: %d\n", c->pcnt_bits); |
849 | printk(KERN_DEBUG "\tlnum_bits: %d\n", c->lnum_bits); | 849 | printk(KERN_ERR "\tlnum_bits: %d\n", c->lnum_bits); |
850 | printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); | 850 | printk(KERN_ERR "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); |
851 | printk(KERN_DEBUG "\tLPT head is at %d:%d\n", | 851 | printk(KERN_ERR "\tLPT head is at %d:%d\n", |
852 | c->nhead_lnum, c->nhead_offs); | 852 | c->nhead_lnum, c->nhead_offs); |
853 | printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", | 853 | printk(KERN_ERR "\tLPT ltab is at %d:%d\n", |
854 | c->ltab_lnum, c->ltab_offs); | 854 | c->ltab_lnum, c->ltab_offs); |
855 | if (c->big_lpt) | 855 | if (c->big_lpt) |
856 | printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", | 856 | printk(KERN_ERR "\tLPT lsave is at %d:%d\n", |
857 | c->lsave_lnum, c->lsave_offs); | 857 | c->lsave_lnum, c->lsave_offs); |
858 | for (i = 0; i < c->lpt_lebs; i++) | 858 | for (i = 0; i < c->lpt_lebs; i++) |
859 | printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d " | 859 | printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d " |
860 | "cmt %d\n", i + c->lpt_first, c->ltab[i].free, | 860 | "cmt %d\n", i + c->lpt_first, c->ltab[i].free, |
861 | c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); | 861 | c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); |
862 | spin_unlock(&dbg_lock); | 862 | spin_unlock(&dbg_lock); |
@@ -867,12 +867,12 @@ void dbg_dump_sleb(const struct ubifs_info *c, | |||
867 | { | 867 | { |
868 | struct ubifs_scan_node *snod; | 868 | struct ubifs_scan_node *snod; |
869 | 869 | ||
870 | printk(KERN_DEBUG "(pid %d) start dumping scanned data from LEB %d:%d\n", | 870 | printk(KERN_ERR "(pid %d) start dumping scanned data from LEB %d:%d\n", |
871 | current->pid, sleb->lnum, offs); | 871 | current->pid, sleb->lnum, offs); |
872 | 872 | ||
873 | list_for_each_entry(snod, &sleb->nodes, list) { | 873 | list_for_each_entry(snod, &sleb->nodes, list) { |
874 | cond_resched(); | 874 | cond_resched(); |
875 | printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", sleb->lnum, | 875 | printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", sleb->lnum, |
876 | snod->offs, snod->len); | 876 | snod->offs, snod->len); |
877 | dbg_dump_node(c, snod->node); | 877 | dbg_dump_node(c, snod->node); |
878 | } | 878 | } |
@@ -887,7 +887,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) | |||
887 | if (dbg_is_tst_rcvry(c)) | 887 | if (dbg_is_tst_rcvry(c)) |
888 | return; | 888 | return; |
889 | 889 | ||
890 | printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", | 890 | printk(KERN_ERR "(pid %d) start dumping LEB %d\n", |
891 | current->pid, lnum); | 891 | current->pid, lnum); |
892 | 892 | ||
893 | buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); | 893 | buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); |
@@ -902,17 +902,17 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) | |||
902 | goto out; | 902 | goto out; |
903 | } | 903 | } |
904 | 904 | ||
905 | printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, | 905 | printk(KERN_ERR "LEB %d has %d nodes ending at %d\n", lnum, |
906 | sleb->nodes_cnt, sleb->endpt); | 906 | sleb->nodes_cnt, sleb->endpt); |
907 | 907 | ||
908 | list_for_each_entry(snod, &sleb->nodes, list) { | 908 | list_for_each_entry(snod, &sleb->nodes, list) { |
909 | cond_resched(); | 909 | cond_resched(); |
910 | printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum, | 910 | printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", lnum, |
911 | snod->offs, snod->len); | 911 | snod->offs, snod->len); |
912 | dbg_dump_node(c, snod->node); | 912 | dbg_dump_node(c, snod->node); |
913 | } | 913 | } |
914 | 914 | ||
915 | printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", | 915 | printk(KERN_ERR "(pid %d) finish dumping LEB %d\n", |
916 | current->pid, lnum); | 916 | current->pid, lnum); |
917 | ubifs_scan_destroy(sleb); | 917 | ubifs_scan_destroy(sleb); |
918 | 918 | ||
@@ -934,7 +934,7 @@ void dbg_dump_znode(const struct ubifs_info *c, | |||
934 | else | 934 | else |
935 | zbr = &c->zroot; | 935 | zbr = &c->zroot; |
936 | 936 | ||
937 | printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d" | 937 | printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d" |
938 | " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, | 938 | " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, |
939 | zbr->len, znode->parent, znode->iip, znode->level, | 939 | zbr->len, znode->parent, znode->iip, znode->level, |
940 | znode->child_cnt, znode->flags); | 940 | znode->child_cnt, znode->flags); |
@@ -944,18 +944,18 @@ void dbg_dump_znode(const struct ubifs_info *c, | |||
944 | return; | 944 | return; |
945 | } | 945 | } |
946 | 946 | ||
947 | printk(KERN_DEBUG "zbranches:\n"); | 947 | printk(KERN_ERR "zbranches:\n"); |
948 | for (n = 0; n < znode->child_cnt; n++) { | 948 | for (n = 0; n < znode->child_cnt; n++) { |
949 | zbr = &znode->zbranch[n]; | 949 | zbr = &znode->zbranch[n]; |
950 | if (znode->level > 0) | 950 | if (znode->level > 0) |
951 | printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " | 951 | printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key " |
952 | "%s\n", n, zbr->znode, zbr->lnum, | 952 | "%s\n", n, zbr->znode, zbr->lnum, |
953 | zbr->offs, zbr->len, | 953 | zbr->offs, zbr->len, |
954 | dbg_snprintf_key(c, &zbr->key, | 954 | dbg_snprintf_key(c, &zbr->key, |
955 | key_buf, | 955 | key_buf, |
956 | DBG_KEY_BUF_LEN)); | 956 | DBG_KEY_BUF_LEN)); |
957 | else | 957 | else |
958 | printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " | 958 | printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key " |
959 | "%s\n", n, zbr->znode, zbr->lnum, | 959 | "%s\n", n, zbr->znode, zbr->lnum, |
960 | zbr->offs, zbr->len, | 960 | zbr->offs, zbr->len, |
961 | dbg_snprintf_key(c, &zbr->key, | 961 | dbg_snprintf_key(c, &zbr->key, |
@@ -969,16 +969,16 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) | |||
969 | { | 969 | { |
970 | int i; | 970 | int i; |
971 | 971 | ||
972 | printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n", | 972 | printk(KERN_ERR "(pid %d) start dumping heap cat %d (%d elements)\n", |
973 | current->pid, cat, heap->cnt); | 973 | current->pid, cat, heap->cnt); |
974 | for (i = 0; i < heap->cnt; i++) { | 974 | for (i = 0; i < heap->cnt; i++) { |
975 | struct ubifs_lprops *lprops = heap->arr[i]; | 975 | struct ubifs_lprops *lprops = heap->arr[i]; |
976 | 976 | ||
977 | printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d " | 977 | printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d " |
978 | "flags %d\n", i, lprops->lnum, lprops->hpos, | 978 | "flags %d\n", i, lprops->lnum, lprops->hpos, |
979 | lprops->free, lprops->dirty, lprops->flags); | 979 | lprops->free, lprops->dirty, lprops->flags); |
980 | } | 980 | } |
981 | printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid); | 981 | printk(KERN_ERR "(pid %d) finish dumping heap\n", current->pid); |
982 | } | 982 | } |
983 | 983 | ||
984 | void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, | 984 | void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, |
@@ -986,15 +986,15 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, | |||
986 | { | 986 | { |
987 | int i; | 987 | int i; |
988 | 988 | ||
989 | printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid); | 989 | printk(KERN_ERR "(pid %d) dumping pnode:\n", current->pid); |
990 | printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", | 990 | printk(KERN_ERR "\taddress %zx parent %zx cnext %zx\n", |
991 | (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); | 991 | (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); |
992 | printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", | 992 | printk(KERN_ERR "\tflags %lu iip %d level %d num %d\n", |
993 | pnode->flags, iip, pnode->level, pnode->num); | 993 | pnode->flags, iip, pnode->level, pnode->num); |
994 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | 994 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { |
995 | struct ubifs_lprops *lp = &pnode->lprops[i]; | 995 | struct ubifs_lprops *lp = &pnode->lprops[i]; |
996 | 996 | ||
997 | printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n", | 997 | printk(KERN_ERR "\t%d: free %d dirty %d flags %d lnum %d\n", |
998 | i, lp->free, lp->dirty, lp->flags, lp->lnum); | 998 | i, lp->free, lp->dirty, lp->flags, lp->lnum); |
999 | } | 999 | } |
1000 | } | 1000 | } |
@@ -1004,20 +1004,20 @@ void dbg_dump_tnc(struct ubifs_info *c) | |||
1004 | struct ubifs_znode *znode; | 1004 | struct ubifs_znode *znode; |
1005 | int level; | 1005 | int level; |
1006 | 1006 | ||
1007 | printk(KERN_DEBUG "\n"); | 1007 | printk(KERN_ERR "\n"); |
1008 | printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid); | 1008 | printk(KERN_ERR "(pid %d) start dumping TNC tree\n", current->pid); |
1009 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); | 1009 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); |
1010 | level = znode->level; | 1010 | level = znode->level; |
1011 | printk(KERN_DEBUG "== Level %d ==\n", level); | 1011 | printk(KERN_ERR "== Level %d ==\n", level); |
1012 | while (znode) { | 1012 | while (znode) { |
1013 | if (level != znode->level) { | 1013 | if (level != znode->level) { |
1014 | level = znode->level; | 1014 | level = znode->level; |
1015 | printk(KERN_DEBUG "== Level %d ==\n", level); | 1015 | printk(KERN_ERR "== Level %d ==\n", level); |
1016 | } | 1016 | } |
1017 | dbg_dump_znode(c, znode); | 1017 | dbg_dump_znode(c, znode); |
1018 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); | 1018 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); |
1019 | } | 1019 | } |
1020 | printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid); | 1020 | printk(KERN_ERR "(pid %d) finish dumping TNC tree\n", current->pid); |
1021 | } | 1021 | } |
1022 | 1022 | ||
1023 | static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, | 1023 | static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, |
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index ad1a6fee6010..9f717655df18 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h | |||
@@ -164,9 +164,7 @@ struct ubifs_global_debug_info { | |||
164 | #define dbg_dump_stack() dump_stack() | 164 | #define dbg_dump_stack() dump_stack() |
165 | 165 | ||
166 | #define dbg_err(fmt, ...) do { \ | 166 | #define dbg_err(fmt, ...) do { \ |
167 | spin_lock(&dbg_lock); \ | ||
168 | ubifs_err(fmt, ##__VA_ARGS__); \ | 167 | ubifs_err(fmt, ##__VA_ARGS__); \ |
169 | spin_unlock(&dbg_lock); \ | ||
170 | } while (0) | 168 | } while (0) |
171 | 169 | ||
172 | #define ubifs_dbg_msg(type, fmt, ...) \ | 170 | #define ubifs_dbg_msg(type, fmt, ...) \ |
@@ -217,7 +215,6 @@ struct ubifs_global_debug_info { | |||
217 | /* Additional recovery messages */ | 215 | /* Additional recovery messages */ |
218 | #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) | 216 | #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) |
219 | 217 | ||
220 | extern spinlock_t dbg_lock; | ||
221 | extern struct ubifs_global_debug_info ubifs_dbg; | 218 | extern struct ubifs_global_debug_info ubifs_dbg; |
222 | 219 | ||
223 | static inline int dbg_is_chk_gen(const struct ubifs_info *c) | 220 | static inline int dbg_is_chk_gen(const struct ubifs_info *c) |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index d6fe1c79f18b..ec9f1870ab7f 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -566,6 +566,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) | |||
566 | int sz_change = CALC_DENT_SIZE(dentry->d_name.len); | 566 | int sz_change = CALC_DENT_SIZE(dentry->d_name.len); |
567 | int err, budgeted = 1; | 567 | int err, budgeted = 1; |
568 | struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; | 568 | struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; |
569 | unsigned int saved_nlink = inode->i_nlink; | ||
569 | 570 | ||
570 | /* | 571 | /* |
571 | * Budget request settings: deletion direntry, deletion inode (+1 for | 572 | * Budget request settings: deletion direntry, deletion inode (+1 for |
@@ -613,7 +614,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) | |||
613 | out_cancel: | 614 | out_cancel: |
614 | dir->i_size += sz_change; | 615 | dir->i_size += sz_change; |
615 | dir_ui->ui_size = dir->i_size; | 616 | dir_ui->ui_size = dir->i_size; |
616 | inc_nlink(inode); | 617 | set_nlink(inode, saved_nlink); |
617 | unlock_2_inodes(dir, inode); | 618 | unlock_2_inodes(dir, inode); |
618 | if (budgeted) | 619 | if (budgeted) |
619 | ubifs_release_budget(c, &req); | 620 | ubifs_release_budget(c, &req); |
@@ -704,8 +705,7 @@ out_cancel: | |||
704 | dir->i_size += sz_change; | 705 | dir->i_size += sz_change; |
705 | dir_ui->ui_size = dir->i_size; | 706 | dir_ui->ui_size = dir->i_size; |
706 | inc_nlink(dir); | 707 | inc_nlink(dir); |
707 | inc_nlink(inode); | 708 | set_nlink(inode, 2); |
708 | inc_nlink(inode); | ||
709 | unlock_2_inodes(dir, inode); | 709 | unlock_2_inodes(dir, inode); |
710 | if (budgeted) | 710 | if (budgeted) |
711 | ubifs_release_budget(c, &req); | 711 | ubifs_release_budget(c, &req); |
@@ -977,6 +977,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
977 | struct ubifs_budget_req ino_req = { .dirtied_ino = 1, | 977 | struct ubifs_budget_req ino_req = { .dirtied_ino = 1, |
978 | .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; | 978 | .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; |
979 | struct timespec time; | 979 | struct timespec time; |
980 | unsigned int saved_nlink; | ||
980 | 981 | ||
981 | /* | 982 | /* |
982 | * Budget request settings: deletion direntry, new direntry, removing | 983 | * Budget request settings: deletion direntry, new direntry, removing |
@@ -1059,13 +1060,14 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1059 | if (unlink) { | 1060 | if (unlink) { |
1060 | /* | 1061 | /* |
1061 | * Directories cannot have hard-links, so if this is a | 1062 | * Directories cannot have hard-links, so if this is a |
1062 | * directory, decrement its @i_nlink twice because an empty | 1063 | * directory, just clear @i_nlink. |
1063 | * directory has @i_nlink 2. | ||
1064 | */ | 1064 | */ |
1065 | saved_nlink = new_inode->i_nlink; | ||
1065 | if (is_dir) | 1066 | if (is_dir) |
1067 | clear_nlink(new_inode); | ||
1068 | else | ||
1066 | drop_nlink(new_inode); | 1069 | drop_nlink(new_inode); |
1067 | new_inode->i_ctime = time; | 1070 | new_inode->i_ctime = time; |
1068 | drop_nlink(new_inode); | ||
1069 | } else { | 1071 | } else { |
1070 | new_dir->i_size += new_sz; | 1072 | new_dir->i_size += new_sz; |
1071 | ubifs_inode(new_dir)->ui_size = new_dir->i_size; | 1073 | ubifs_inode(new_dir)->ui_size = new_dir->i_size; |
@@ -1102,9 +1104,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1102 | 1104 | ||
1103 | out_cancel: | 1105 | out_cancel: |
1104 | if (unlink) { | 1106 | if (unlink) { |
1105 | if (is_dir) | 1107 | set_nlink(new_inode, saved_nlink); |
1106 | inc_nlink(new_inode); | ||
1107 | inc_nlink(new_inode); | ||
1108 | } else { | 1108 | } else { |
1109 | new_dir->i_size -= new_sz; | 1109 | new_dir->i_size -= new_sz; |
1110 | ubifs_inode(new_dir)->ui_size = new_dir->i_size; | 1110 | ubifs_inode(new_dir)->ui_size = new_dir->i_size; |
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index ee4f43f4bb99..2a935b317232 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c | |||
@@ -679,7 +679,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
679 | ret == SCANNED_GARBAGE || | 679 | ret == SCANNED_GARBAGE || |
680 | ret == SCANNED_A_BAD_PAD_NODE || | 680 | ret == SCANNED_A_BAD_PAD_NODE || |
681 | ret == SCANNED_A_CORRUPT_NODE) { | 681 | ret == SCANNED_A_CORRUPT_NODE) { |
682 | dbg_rcvry("found corruption - %d", ret); | 682 | dbg_rcvry("found corruption (%d) at %d:%d", |
683 | ret, lnum, offs); | ||
683 | break; | 684 | break; |
684 | } else { | 685 | } else { |
685 | dbg_err("unexpected return value %d", ret); | 686 | dbg_err("unexpected return value %d", ret); |
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 6094c5a5d7a8..771f7fb6ce92 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c | |||
@@ -410,13 +410,23 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) | |||
410 | } | 410 | } |
411 | 411 | ||
412 | if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { | 412 | if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { |
413 | err = 7; | 413 | ubifs_err("too few main LEBs count %d, must be at least %d", |
414 | c->main_lebs, UBIFS_MIN_MAIN_LEBS); | ||
414 | goto failed; | 415 | goto failed; |
415 | } | 416 | } |
416 | 417 | ||
417 | if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || | 418 | max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS; |
418 | c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { | 419 | if (c->max_bud_bytes < max_bytes) { |
419 | err = 8; | 420 | ubifs_err("too small journal (%lld bytes), must be at least " |
421 | "%lld bytes", c->max_bud_bytes, max_bytes); | ||
422 | goto failed; | ||
423 | } | ||
424 | |||
425 | max_bytes = (long long)c->leb_size * c->main_lebs; | ||
426 | if (c->max_bud_bytes > max_bytes) { | ||
427 | ubifs_err("too large journal size (%lld bytes), only %lld bytes" | ||
428 | "available in the main area", | ||
429 | c->max_bud_bytes, max_bytes); | ||
420 | goto failed; | 430 | goto failed; |
421 | } | 431 | } |
422 | 432 | ||
@@ -450,7 +460,6 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) | |||
450 | goto failed; | 460 | goto failed; |
451 | } | 461 | } |
452 | 462 | ||
453 | max_bytes = c->main_lebs * (long long)c->leb_size; | ||
454 | if (c->rp_size < 0 || max_bytes < c->rp_size) { | 463 | if (c->rp_size < 0 || max_bytes < c->rp_size) { |
455 | err = 14; | 464 | err = 14; |
456 | goto failed; | 465 | goto failed; |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 12e94774aa88..93d59aceaaef 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -84,9 +84,6 @@ | |||
84 | #define INUM_WARN_WATERMARK 0xFFF00000 | 84 | #define INUM_WARN_WATERMARK 0xFFF00000 |
85 | #define INUM_WATERMARK 0xFFFFFF00 | 85 | #define INUM_WATERMARK 0xFFFFFF00 |
86 | 86 | ||
87 | /* Largest key size supported in this implementation */ | ||
88 | #define CUR_MAX_KEY_LEN UBIFS_SK_LEN | ||
89 | |||
90 | /* Maximum number of entries in each LPT (LEB category) heap */ | 87 | /* Maximum number of entries in each LPT (LEB category) heap */ |
91 | #define LPT_HEAP_SZ 256 | 88 | #define LPT_HEAP_SZ 256 |
92 | 89 | ||
@@ -277,10 +274,10 @@ struct ubifs_old_idx { | |||
277 | 274 | ||
278 | /* The below union makes it easier to deal with keys */ | 275 | /* The below union makes it easier to deal with keys */ |
279 | union ubifs_key { | 276 | union ubifs_key { |
280 | uint8_t u8[CUR_MAX_KEY_LEN]; | 277 | uint8_t u8[UBIFS_SK_LEN]; |
281 | uint32_t u32[CUR_MAX_KEY_LEN/4]; | 278 | uint32_t u32[UBIFS_SK_LEN/4]; |
282 | uint64_t u64[CUR_MAX_KEY_LEN/8]; | 279 | uint64_t u64[UBIFS_SK_LEN/8]; |
283 | __le32 j32[CUR_MAX_KEY_LEN/4]; | 280 | __le32 j32[UBIFS_SK_LEN/4]; |
284 | }; | 281 | }; |
285 | 282 | ||
286 | /** | 283 | /** |
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 987585bb0a1d..1ba2baaf4367 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c | |||
@@ -105,7 +105,6 @@ static void udf_add_free_space(struct super_block *sb, u16 partition, u32 cnt) | |||
105 | } | 105 | } |
106 | 106 | ||
107 | static void udf_bitmap_free_blocks(struct super_block *sb, | 107 | static void udf_bitmap_free_blocks(struct super_block *sb, |
108 | struct inode *inode, | ||
109 | struct udf_bitmap *bitmap, | 108 | struct udf_bitmap *bitmap, |
110 | struct kernel_lb_addr *bloc, | 109 | struct kernel_lb_addr *bloc, |
111 | uint32_t offset, | 110 | uint32_t offset, |
@@ -172,7 +171,6 @@ error_return: | |||
172 | } | 171 | } |
173 | 172 | ||
174 | static int udf_bitmap_prealloc_blocks(struct super_block *sb, | 173 | static int udf_bitmap_prealloc_blocks(struct super_block *sb, |
175 | struct inode *inode, | ||
176 | struct udf_bitmap *bitmap, | 174 | struct udf_bitmap *bitmap, |
177 | uint16_t partition, uint32_t first_block, | 175 | uint16_t partition, uint32_t first_block, |
178 | uint32_t block_count) | 176 | uint32_t block_count) |
@@ -223,7 +221,6 @@ out: | |||
223 | } | 221 | } |
224 | 222 | ||
225 | static int udf_bitmap_new_block(struct super_block *sb, | 223 | static int udf_bitmap_new_block(struct super_block *sb, |
226 | struct inode *inode, | ||
227 | struct udf_bitmap *bitmap, uint16_t partition, | 224 | struct udf_bitmap *bitmap, uint16_t partition, |
228 | uint32_t goal, int *err) | 225 | uint32_t goal, int *err) |
229 | { | 226 | { |
@@ -349,7 +346,6 @@ error_return: | |||
349 | } | 346 | } |
350 | 347 | ||
351 | static void udf_table_free_blocks(struct super_block *sb, | 348 | static void udf_table_free_blocks(struct super_block *sb, |
352 | struct inode *inode, | ||
353 | struct inode *table, | 349 | struct inode *table, |
354 | struct kernel_lb_addr *bloc, | 350 | struct kernel_lb_addr *bloc, |
355 | uint32_t offset, | 351 | uint32_t offset, |
@@ -581,7 +577,6 @@ error_return: | |||
581 | } | 577 | } |
582 | 578 | ||
583 | static int udf_table_prealloc_blocks(struct super_block *sb, | 579 | static int udf_table_prealloc_blocks(struct super_block *sb, |
584 | struct inode *inode, | ||
585 | struct inode *table, uint16_t partition, | 580 | struct inode *table, uint16_t partition, |
586 | uint32_t first_block, uint32_t block_count) | 581 | uint32_t first_block, uint32_t block_count) |
587 | { | 582 | { |
@@ -643,7 +638,6 @@ static int udf_table_prealloc_blocks(struct super_block *sb, | |||
643 | } | 638 | } |
644 | 639 | ||
645 | static int udf_table_new_block(struct super_block *sb, | 640 | static int udf_table_new_block(struct super_block *sb, |
646 | struct inode *inode, | ||
647 | struct inode *table, uint16_t partition, | 641 | struct inode *table, uint16_t partition, |
648 | uint32_t goal, int *err) | 642 | uint32_t goal, int *err) |
649 | { | 643 | { |
@@ -743,18 +737,23 @@ void udf_free_blocks(struct super_block *sb, struct inode *inode, | |||
743 | struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; | 737 | struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; |
744 | 738 | ||
745 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { | 739 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { |
746 | udf_bitmap_free_blocks(sb, inode, map->s_uspace.s_bitmap, | 740 | udf_bitmap_free_blocks(sb, map->s_uspace.s_bitmap, |
747 | bloc, offset, count); | 741 | bloc, offset, count); |
748 | } else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) { | 742 | } else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) { |
749 | udf_table_free_blocks(sb, inode, map->s_uspace.s_table, | 743 | udf_table_free_blocks(sb, map->s_uspace.s_table, |
750 | bloc, offset, count); | 744 | bloc, offset, count); |
751 | } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) { | 745 | } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) { |
752 | udf_bitmap_free_blocks(sb, inode, map->s_fspace.s_bitmap, | 746 | udf_bitmap_free_blocks(sb, map->s_fspace.s_bitmap, |
753 | bloc, offset, count); | 747 | bloc, offset, count); |
754 | } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) { | 748 | } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) { |
755 | udf_table_free_blocks(sb, inode, map->s_fspace.s_table, | 749 | udf_table_free_blocks(sb, map->s_fspace.s_table, |
756 | bloc, offset, count); | 750 | bloc, offset, count); |
757 | } | 751 | } |
752 | |||
753 | if (inode) { | ||
754 | inode_sub_bytes(inode, | ||
755 | ((sector_t)count) << sb->s_blocksize_bits); | ||
756 | } | ||
758 | } | 757 | } |
759 | 758 | ||
760 | inline int udf_prealloc_blocks(struct super_block *sb, | 759 | inline int udf_prealloc_blocks(struct super_block *sb, |
@@ -763,29 +762,34 @@ inline int udf_prealloc_blocks(struct super_block *sb, | |||
763 | uint32_t block_count) | 762 | uint32_t block_count) |
764 | { | 763 | { |
765 | struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; | 764 | struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; |
765 | sector_t allocated; | ||
766 | 766 | ||
767 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) | 767 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) |
768 | return udf_bitmap_prealloc_blocks(sb, inode, | 768 | allocated = udf_bitmap_prealloc_blocks(sb, |
769 | map->s_uspace.s_bitmap, | 769 | map->s_uspace.s_bitmap, |
770 | partition, first_block, | 770 | partition, first_block, |
771 | block_count); | 771 | block_count); |
772 | else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) | 772 | else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) |
773 | return udf_table_prealloc_blocks(sb, inode, | 773 | allocated = udf_table_prealloc_blocks(sb, |
774 | map->s_uspace.s_table, | 774 | map->s_uspace.s_table, |
775 | partition, first_block, | 775 | partition, first_block, |
776 | block_count); | 776 | block_count); |
777 | else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) | 777 | else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) |
778 | return udf_bitmap_prealloc_blocks(sb, inode, | 778 | allocated = udf_bitmap_prealloc_blocks(sb, |
779 | map->s_fspace.s_bitmap, | 779 | map->s_fspace.s_bitmap, |
780 | partition, first_block, | 780 | partition, first_block, |
781 | block_count); | 781 | block_count); |
782 | else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) | 782 | else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) |
783 | return udf_table_prealloc_blocks(sb, inode, | 783 | allocated = udf_table_prealloc_blocks(sb, |
784 | map->s_fspace.s_table, | 784 | map->s_fspace.s_table, |
785 | partition, first_block, | 785 | partition, first_block, |
786 | block_count); | 786 | block_count); |
787 | else | 787 | else |
788 | return 0; | 788 | return 0; |
789 | |||
790 | if (inode && allocated > 0) | ||
791 | inode_add_bytes(inode, allocated << sb->s_blocksize_bits); | ||
792 | return allocated; | ||
789 | } | 793 | } |
790 | 794 | ||
791 | inline int udf_new_block(struct super_block *sb, | 795 | inline int udf_new_block(struct super_block *sb, |
@@ -793,25 +797,29 @@ inline int udf_new_block(struct super_block *sb, | |||
793 | uint16_t partition, uint32_t goal, int *err) | 797 | uint16_t partition, uint32_t goal, int *err) |
794 | { | 798 | { |
795 | struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; | 799 | struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; |
800 | int block; | ||
796 | 801 | ||
797 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) | 802 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) |
798 | return udf_bitmap_new_block(sb, inode, | 803 | block = udf_bitmap_new_block(sb, |
799 | map->s_uspace.s_bitmap, | 804 | map->s_uspace.s_bitmap, |
800 | partition, goal, err); | 805 | partition, goal, err); |
801 | else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) | 806 | else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) |
802 | return udf_table_new_block(sb, inode, | 807 | block = udf_table_new_block(sb, |
803 | map->s_uspace.s_table, | 808 | map->s_uspace.s_table, |
804 | partition, goal, err); | ||
805 | else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) | ||
806 | return udf_bitmap_new_block(sb, inode, | ||
807 | map->s_fspace.s_bitmap, | ||
808 | partition, goal, err); | 809 | partition, goal, err); |
810 | else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) | ||
811 | block = udf_bitmap_new_block(sb, | ||
812 | map->s_fspace.s_bitmap, | ||
813 | partition, goal, err); | ||
809 | else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) | 814 | else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) |
810 | return udf_table_new_block(sb, inode, | 815 | block = udf_table_new_block(sb, |
811 | map->s_fspace.s_table, | 816 | map->s_fspace.s_table, |
812 | partition, goal, err); | 817 | partition, goal, err); |
813 | else { | 818 | else { |
814 | *err = -EIO; | 819 | *err = -EIO; |
815 | return 0; | 820 | return 0; |
816 | } | 821 | } |
822 | if (inode && block) | ||
823 | inode_add_bytes(inode, sb->s_blocksize); | ||
824 | return block; | ||
817 | } | 825 | } |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 05ab48195be9..7e5aae4bf46f 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -116,6 +116,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) | |||
116 | iinfo->i_lenEAttr = 0; | 116 | iinfo->i_lenEAttr = 0; |
117 | iinfo->i_lenAlloc = 0; | 117 | iinfo->i_lenAlloc = 0; |
118 | iinfo->i_use = 0; | 118 | iinfo->i_use = 0; |
119 | iinfo->i_checkpoint = 1; | ||
119 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) | 120 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) |
120 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; | 121 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; |
121 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) | 122 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 7699df7b3198..7d7528008359 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -1358,6 +1358,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1358 | iinfo->i_unique = le64_to_cpu(fe->uniqueID); | 1358 | iinfo->i_unique = le64_to_cpu(fe->uniqueID); |
1359 | iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr); | 1359 | iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr); |
1360 | iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs); | 1360 | iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs); |
1361 | iinfo->i_checkpoint = le32_to_cpu(fe->checkpoint); | ||
1361 | offset = sizeof(struct fileEntry) + iinfo->i_lenEAttr; | 1362 | offset = sizeof(struct fileEntry) + iinfo->i_lenEAttr; |
1362 | } else { | 1363 | } else { |
1363 | inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << | 1364 | inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << |
@@ -1379,6 +1380,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1379 | iinfo->i_unique = le64_to_cpu(efe->uniqueID); | 1380 | iinfo->i_unique = le64_to_cpu(efe->uniqueID); |
1380 | iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr); | 1381 | iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr); |
1381 | iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs); | 1382 | iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs); |
1383 | iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint); | ||
1382 | offset = sizeof(struct extendedFileEntry) + | 1384 | offset = sizeof(struct extendedFileEntry) + |
1383 | iinfo->i_lenEAttr; | 1385 | iinfo->i_lenEAttr; |
1384 | } | 1386 | } |
@@ -1495,6 +1497,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1495 | struct buffer_head *bh = NULL; | 1497 | struct buffer_head *bh = NULL; |
1496 | struct fileEntry *fe; | 1498 | struct fileEntry *fe; |
1497 | struct extendedFileEntry *efe; | 1499 | struct extendedFileEntry *efe; |
1500 | uint64_t lb_recorded; | ||
1498 | uint32_t udfperms; | 1501 | uint32_t udfperms; |
1499 | uint16_t icbflags; | 1502 | uint16_t icbflags; |
1500 | uint16_t crclen; | 1503 | uint16_t crclen; |
@@ -1589,13 +1592,18 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1589 | dsea->minorDeviceIdent = cpu_to_le32(iminor(inode)); | 1592 | dsea->minorDeviceIdent = cpu_to_le32(iminor(inode)); |
1590 | } | 1593 | } |
1591 | 1594 | ||
1595 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | ||
1596 | lb_recorded = 0; /* No extents => no blocks! */ | ||
1597 | else | ||
1598 | lb_recorded = | ||
1599 | (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >> | ||
1600 | (blocksize_bits - 9); | ||
1601 | |||
1592 | if (iinfo->i_efe == 0) { | 1602 | if (iinfo->i_efe == 0) { |
1593 | memcpy(bh->b_data + sizeof(struct fileEntry), | 1603 | memcpy(bh->b_data + sizeof(struct fileEntry), |
1594 | iinfo->i_ext.i_data, | 1604 | iinfo->i_ext.i_data, |
1595 | inode->i_sb->s_blocksize - sizeof(struct fileEntry)); | 1605 | inode->i_sb->s_blocksize - sizeof(struct fileEntry)); |
1596 | fe->logicalBlocksRecorded = cpu_to_le64( | 1606 | fe->logicalBlocksRecorded = cpu_to_le64(lb_recorded); |
1597 | (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >> | ||
1598 | (blocksize_bits - 9)); | ||
1599 | 1607 | ||
1600 | udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime); | 1608 | udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime); |
1601 | udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime); | 1609 | udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime); |
@@ -1607,6 +1615,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1607 | fe->uniqueID = cpu_to_le64(iinfo->i_unique); | 1615 | fe->uniqueID = cpu_to_le64(iinfo->i_unique); |
1608 | fe->lengthExtendedAttr = cpu_to_le32(iinfo->i_lenEAttr); | 1616 | fe->lengthExtendedAttr = cpu_to_le32(iinfo->i_lenEAttr); |
1609 | fe->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc); | 1617 | fe->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc); |
1618 | fe->checkpoint = cpu_to_le32(iinfo->i_checkpoint); | ||
1610 | fe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_FE); | 1619 | fe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_FE); |
1611 | crclen = sizeof(struct fileEntry); | 1620 | crclen = sizeof(struct fileEntry); |
1612 | } else { | 1621 | } else { |
@@ -1615,9 +1624,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1615 | inode->i_sb->s_blocksize - | 1624 | inode->i_sb->s_blocksize - |
1616 | sizeof(struct extendedFileEntry)); | 1625 | sizeof(struct extendedFileEntry)); |
1617 | efe->objectSize = cpu_to_le64(inode->i_size); | 1626 | efe->objectSize = cpu_to_le64(inode->i_size); |
1618 | efe->logicalBlocksRecorded = cpu_to_le64( | 1627 | efe->logicalBlocksRecorded = cpu_to_le64(lb_recorded); |
1619 | (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >> | ||
1620 | (blocksize_bits - 9)); | ||
1621 | 1628 | ||
1622 | if (iinfo->i_crtime.tv_sec > inode->i_atime.tv_sec || | 1629 | if (iinfo->i_crtime.tv_sec > inode->i_atime.tv_sec || |
1623 | (iinfo->i_crtime.tv_sec == inode->i_atime.tv_sec && | 1630 | (iinfo->i_crtime.tv_sec == inode->i_atime.tv_sec && |
@@ -1646,6 +1653,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1646 | efe->uniqueID = cpu_to_le64(iinfo->i_unique); | 1653 | efe->uniqueID = cpu_to_le64(iinfo->i_unique); |
1647 | efe->lengthExtendedAttr = cpu_to_le32(iinfo->i_lenEAttr); | 1654 | efe->lengthExtendedAttr = cpu_to_le32(iinfo->i_lenEAttr); |
1648 | efe->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc); | 1655 | efe->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc); |
1656 | efe->checkpoint = cpu_to_le32(iinfo->i_checkpoint); | ||
1649 | efe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_EFE); | 1657 | efe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_EFE); |
1650 | crclen = sizeof(struct extendedFileEntry); | 1658 | crclen = sizeof(struct extendedFileEntry); |
1651 | } | 1659 | } |
diff --git a/fs/udf/super.c b/fs/udf/super.c index 85067b4c7e14..ac8a348dcb69 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -950,11 +950,8 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index) | |||
950 | else | 950 | else |
951 | bitmap = vzalloc(size); /* TODO: get rid of vzalloc */ | 951 | bitmap = vzalloc(size); /* TODO: get rid of vzalloc */ |
952 | 952 | ||
953 | if (bitmap == NULL) { | 953 | if (bitmap == NULL) |
954 | udf_err(sb, "Unable to allocate space for bitmap and %d buffer_head pointers\n", | ||
955 | nr_groups); | ||
956 | return NULL; | 954 | return NULL; |
957 | } | ||
958 | 955 | ||
959 | bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1); | 956 | bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1); |
960 | bitmap->s_nr_groups = nr_groups; | 957 | bitmap->s_nr_groups = nr_groups; |
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h index d1bd31ea724e..bb8309dcd5c1 100644 --- a/fs/udf/udf_i.h +++ b/fs/udf/udf_i.h | |||
@@ -23,6 +23,7 @@ struct udf_inode_info { | |||
23 | __u64 i_lenExtents; | 23 | __u64 i_lenExtents; |
24 | __u32 i_next_alloc_block; | 24 | __u32 i_next_alloc_block; |
25 | __u32 i_next_alloc_goal; | 25 | __u32 i_next_alloc_goal; |
26 | __u32 i_checkpoint; | ||
26 | unsigned i_alloc_type : 3; | 27 | unsigned i_alloc_type : 3; |
27 | unsigned i_efe : 1; /* extendedFileEntry */ | 28 | unsigned i_efe : 1; /* extendedFileEntry */ |
28 | unsigned i_use : 1; /* unallocSpaceEntry */ | 29 | unsigned i_use : 1; /* unallocSpaceEntry */ |
diff --git a/fs/xattr.c b/fs/xattr.c index 82f43376c7cd..d6dfd247bb2f 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include <linux/security.h> | 16 | #include <linux/security.h> |
17 | #include <linux/evm.h> | 17 | #include <linux/evm.h> |
18 | #include <linux/syscalls.h> | 18 | #include <linux/syscalls.h> |
19 | #include <linux/module.h> | 19 | #include <linux/export.h> |
20 | #include <linux/fsnotify.h> | 20 | #include <linux/fsnotify.h> |
21 | #include <linux/audit.h> | 21 | #include <linux/audit.h> |
22 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c index 8d5a506c82eb..69d06b07b169 100644 --- a/fs/xattr_acl.c +++ b/fs/xattr_acl.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> | 5 | * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | 8 | #include <linux/export.h> |
9 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
10 | #include <linux/posix_acl_xattr.h> | 10 | #include <linux/posix_acl_xattr.h> |
11 | #include <linux/gfp.h> | 11 | #include <linux/gfp.h> |
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 427a4e82a588..0a9977983f92 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -96,9 +96,6 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ | |||
96 | xfs_qm_bhv.o \ | 96 | xfs_qm_bhv.o \ |
97 | xfs_qm.o \ | 97 | xfs_qm.o \ |
98 | xfs_quotaops.o | 98 | xfs_quotaops.o |
99 | ifeq ($(CONFIG_XFS_QUOTA),y) | ||
100 | xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o | ||
101 | endif | ||
102 | xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o | 99 | xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o |
103 | xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o | 100 | xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o |
104 | xfs-$(CONFIG_PROC_FS) += xfs_stats.o | 101 | xfs-$(CONFIG_PROC_FS) += xfs_stats.o |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index ce84ffd0264c..0f0df2759b09 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include "xfs_error.h" | 35 | #include "xfs_error.h" |
36 | #include "xfs_trace.h" | 36 | #include "xfs_trace.h" |
37 | 37 | ||
38 | struct workqueue_struct *xfs_alloc_wq; | ||
38 | 39 | ||
39 | #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) | 40 | #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) |
40 | 41 | ||
@@ -68,7 +69,7 @@ xfs_alloc_lookup_eq( | |||
68 | * Lookup the first record greater than or equal to [bno, len] | 69 | * Lookup the first record greater than or equal to [bno, len] |
69 | * in the btree given by cur. | 70 | * in the btree given by cur. |
70 | */ | 71 | */ |
71 | STATIC int /* error */ | 72 | int /* error */ |
72 | xfs_alloc_lookup_ge( | 73 | xfs_alloc_lookup_ge( |
73 | struct xfs_btree_cur *cur, /* btree cursor */ | 74 | struct xfs_btree_cur *cur, /* btree cursor */ |
74 | xfs_agblock_t bno, /* starting block of extent */ | 75 | xfs_agblock_t bno, /* starting block of extent */ |
@@ -2207,7 +2208,7 @@ xfs_alloc_read_agf( | |||
2207 | * group or loop over the allocation groups to find the result. | 2208 | * group or loop over the allocation groups to find the result. |
2208 | */ | 2209 | */ |
2209 | int /* error */ | 2210 | int /* error */ |
2210 | xfs_alloc_vextent( | 2211 | __xfs_alloc_vextent( |
2211 | xfs_alloc_arg_t *args) /* allocation argument structure */ | 2212 | xfs_alloc_arg_t *args) /* allocation argument structure */ |
2212 | { | 2213 | { |
2213 | xfs_agblock_t agsize; /* allocation group size */ | 2214 | xfs_agblock_t agsize; /* allocation group size */ |
@@ -2417,6 +2418,37 @@ error0: | |||
2417 | return error; | 2418 | return error; |
2418 | } | 2419 | } |
2419 | 2420 | ||
2421 | static void | ||
2422 | xfs_alloc_vextent_worker( | ||
2423 | struct work_struct *work) | ||
2424 | { | ||
2425 | struct xfs_alloc_arg *args = container_of(work, | ||
2426 | struct xfs_alloc_arg, work); | ||
2427 | unsigned long pflags; | ||
2428 | |||
2429 | /* we are in a transaction context here */ | ||
2430 | current_set_flags_nested(&pflags, PF_FSTRANS); | ||
2431 | |||
2432 | args->result = __xfs_alloc_vextent(args); | ||
2433 | complete(args->done); | ||
2434 | |||
2435 | current_restore_flags_nested(&pflags, PF_FSTRANS); | ||
2436 | } | ||
2437 | |||
2438 | |||
2439 | int /* error */ | ||
2440 | xfs_alloc_vextent( | ||
2441 | xfs_alloc_arg_t *args) /* allocation argument structure */ | ||
2442 | { | ||
2443 | DECLARE_COMPLETION_ONSTACK(done); | ||
2444 | |||
2445 | args->done = &done; | ||
2446 | INIT_WORK(&args->work, xfs_alloc_vextent_worker); | ||
2447 | queue_work(xfs_alloc_wq, &args->work); | ||
2448 | wait_for_completion(&done); | ||
2449 | return args->result; | ||
2450 | } | ||
2451 | |||
2420 | /* | 2452 | /* |
2421 | * Free an extent. | 2453 | * Free an extent. |
2422 | * Just break up the extent address and hand off to xfs_free_ag_extent | 2454 | * Just break up the extent address and hand off to xfs_free_ag_extent |
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 2f52b924be79..3a7e7d8f8ded 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -25,6 +25,8 @@ struct xfs_perag; | |||
25 | struct xfs_trans; | 25 | struct xfs_trans; |
26 | struct xfs_busy_extent; | 26 | struct xfs_busy_extent; |
27 | 27 | ||
28 | extern struct workqueue_struct *xfs_alloc_wq; | ||
29 | |||
28 | /* | 30 | /* |
29 | * Freespace allocation types. Argument to xfs_alloc_[v]extent. | 31 | * Freespace allocation types. Argument to xfs_alloc_[v]extent. |
30 | */ | 32 | */ |
@@ -119,6 +121,9 @@ typedef struct xfs_alloc_arg { | |||
119 | char isfl; /* set if is freelist blocks - !acctg */ | 121 | char isfl; /* set if is freelist blocks - !acctg */ |
120 | char userdata; /* set if this is user data */ | 122 | char userdata; /* set if this is user data */ |
121 | xfs_fsblock_t firstblock; /* io first block allocated */ | 123 | xfs_fsblock_t firstblock; /* io first block allocated */ |
124 | struct completion *done; | ||
125 | struct work_struct work; | ||
126 | int result; | ||
122 | } xfs_alloc_arg_t; | 127 | } xfs_alloc_arg_t; |
123 | 128 | ||
124 | /* | 129 | /* |
@@ -243,6 +248,13 @@ xfs_alloc_lookup_le( | |||
243 | xfs_extlen_t len, /* length of extent */ | 248 | xfs_extlen_t len, /* length of extent */ |
244 | int *stat); /* success/failure */ | 249 | int *stat); /* success/failure */ |
245 | 250 | ||
251 | int /* error */ | ||
252 | xfs_alloc_lookup_ge( | ||
253 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
254 | xfs_agblock_t bno, /* starting block of extent */ | ||
255 | xfs_extlen_t len, /* length of extent */ | ||
256 | int *stat); /* success/failure */ | ||
257 | |||
246 | int /* error */ | 258 | int /* error */ |
247 | xfs_alloc_get_rec( | 259 | xfs_alloc_get_rec( |
248 | struct xfs_btree_cur *cur, /* btree cursor */ | 260 | struct xfs_btree_cur *cur, /* btree cursor */ |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 74b9baf36ac3..0dbb9e70fe21 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include "xfs_bmap_btree.h" | 26 | #include "xfs_bmap_btree.h" |
27 | #include "xfs_dinode.h" | 27 | #include "xfs_dinode.h" |
28 | #include "xfs_inode.h" | 28 | #include "xfs_inode.h" |
29 | #include "xfs_inode_item.h" | ||
29 | #include "xfs_alloc.h" | 30 | #include "xfs_alloc.h" |
30 | #include "xfs_error.h" | 31 | #include "xfs_error.h" |
31 | #include "xfs_rw.h" | 32 | #include "xfs_rw.h" |
@@ -99,23 +100,6 @@ xfs_destroy_ioend( | |||
99 | } | 100 | } |
100 | 101 | ||
101 | /* | 102 | /* |
102 | * If the end of the current ioend is beyond the current EOF, | ||
103 | * return the new EOF value, otherwise zero. | ||
104 | */ | ||
105 | STATIC xfs_fsize_t | ||
106 | xfs_ioend_new_eof( | ||
107 | xfs_ioend_t *ioend) | ||
108 | { | ||
109 | xfs_inode_t *ip = XFS_I(ioend->io_inode); | ||
110 | xfs_fsize_t isize; | ||
111 | xfs_fsize_t bsize; | ||
112 | |||
113 | bsize = ioend->io_offset + ioend->io_size; | ||
114 | isize = MIN(i_size_read(VFS_I(ip)), bsize); | ||
115 | return isize > ip->i_d.di_size ? isize : 0; | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * Fast and loose check if this write could update the on-disk inode size. | 103 | * Fast and loose check if this write could update the on-disk inode size. |
120 | */ | 104 | */ |
121 | static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) | 105 | static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) |
@@ -124,32 +108,65 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) | |||
124 | XFS_I(ioend->io_inode)->i_d.di_size; | 108 | XFS_I(ioend->io_inode)->i_d.di_size; |
125 | } | 109 | } |
126 | 110 | ||
111 | STATIC int | ||
112 | xfs_setfilesize_trans_alloc( | ||
113 | struct xfs_ioend *ioend) | ||
114 | { | ||
115 | struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; | ||
116 | struct xfs_trans *tp; | ||
117 | int error; | ||
118 | |||
119 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); | ||
120 | |||
121 | error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); | ||
122 | if (error) { | ||
123 | xfs_trans_cancel(tp, 0); | ||
124 | return error; | ||
125 | } | ||
126 | |||
127 | ioend->io_append_trans = tp; | ||
128 | |||
129 | /* | ||
130 | * We hand off the transaction to the completion thread now, so | ||
131 | * clear the flag here. | ||
132 | */ | ||
133 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | ||
134 | return 0; | ||
135 | } | ||
136 | |||
127 | /* | 137 | /* |
128 | * Update on-disk file size now that data has been written to disk. | 138 | * Update on-disk file size now that data has been written to disk. |
129 | * | ||
130 | * This function does not block as blocking on the inode lock in IO completion | ||
131 | * can lead to IO completion order dependency deadlocks.. If it can't get the | ||
132 | * inode ilock it will return EAGAIN. Callers must handle this. | ||
133 | */ | 139 | */ |
134 | STATIC int | 140 | STATIC int |
135 | xfs_setfilesize( | 141 | xfs_setfilesize( |
136 | xfs_ioend_t *ioend) | 142 | struct xfs_ioend *ioend) |
137 | { | 143 | { |
138 | xfs_inode_t *ip = XFS_I(ioend->io_inode); | 144 | struct xfs_inode *ip = XFS_I(ioend->io_inode); |
145 | struct xfs_trans *tp = ioend->io_append_trans; | ||
139 | xfs_fsize_t isize; | 146 | xfs_fsize_t isize; |
140 | 147 | ||
141 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) | 148 | /* |
142 | return EAGAIN; | 149 | * The transaction was allocated in the I/O submission thread, |
150 | * thus we need to mark ourselves as beeing in a transaction | ||
151 | * manually. | ||
152 | */ | ||
153 | current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); | ||
143 | 154 | ||
144 | isize = xfs_ioend_new_eof(ioend); | 155 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
145 | if (isize) { | 156 | isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); |
146 | trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); | 157 | if (!isize) { |
147 | ip->i_d.di_size = isize; | 158 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
148 | xfs_mark_inode_dirty(ip); | 159 | xfs_trans_cancel(tp, 0); |
160 | return 0; | ||
149 | } | 161 | } |
150 | 162 | ||
151 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 163 | trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); |
152 | return 0; | 164 | |
165 | ip->i_d.di_size = isize; | ||
166 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | ||
167 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
168 | |||
169 | return xfs_trans_commit(tp, 0); | ||
153 | } | 170 | } |
154 | 171 | ||
155 | /* | 172 | /* |
@@ -163,10 +180,12 @@ xfs_finish_ioend( | |||
163 | struct xfs_ioend *ioend) | 180 | struct xfs_ioend *ioend) |
164 | { | 181 | { |
165 | if (atomic_dec_and_test(&ioend->io_remaining)) { | 182 | if (atomic_dec_and_test(&ioend->io_remaining)) { |
183 | struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; | ||
184 | |||
166 | if (ioend->io_type == IO_UNWRITTEN) | 185 | if (ioend->io_type == IO_UNWRITTEN) |
167 | queue_work(xfsconvertd_workqueue, &ioend->io_work); | 186 | queue_work(mp->m_unwritten_workqueue, &ioend->io_work); |
168 | else if (xfs_ioend_is_append(ioend)) | 187 | else if (ioend->io_append_trans) |
169 | queue_work(xfsdatad_workqueue, &ioend->io_work); | 188 | queue_work(mp->m_data_workqueue, &ioend->io_work); |
170 | else | 189 | else |
171 | xfs_destroy_ioend(ioend); | 190 | xfs_destroy_ioend(ioend); |
172 | } | 191 | } |
@@ -195,35 +214,36 @@ xfs_end_io( | |||
195 | * range to normal written extens after the data I/O has finished. | 214 | * range to normal written extens after the data I/O has finished. |
196 | */ | 215 | */ |
197 | if (ioend->io_type == IO_UNWRITTEN) { | 216 | if (ioend->io_type == IO_UNWRITTEN) { |
217 | /* | ||
218 | * For buffered I/O we never preallocate a transaction when | ||
219 | * doing the unwritten extent conversion, but for direct I/O | ||
220 | * we do not know if we are converting an unwritten extent | ||
221 | * or not at the point where we preallocate the transaction. | ||
222 | */ | ||
223 | if (ioend->io_append_trans) { | ||
224 | ASSERT(ioend->io_isdirect); | ||
225 | |||
226 | current_set_flags_nested( | ||
227 | &ioend->io_append_trans->t_pflags, PF_FSTRANS); | ||
228 | xfs_trans_cancel(ioend->io_append_trans, 0); | ||
229 | } | ||
230 | |||
198 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, | 231 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, |
199 | ioend->io_size); | 232 | ioend->io_size); |
200 | if (error) { | 233 | if (error) { |
201 | ioend->io_error = -error; | 234 | ioend->io_error = -error; |
202 | goto done; | 235 | goto done; |
203 | } | 236 | } |
237 | } else if (ioend->io_append_trans) { | ||
238 | error = xfs_setfilesize(ioend); | ||
239 | if (error) | ||
240 | ioend->io_error = -error; | ||
241 | } else { | ||
242 | ASSERT(!xfs_ioend_is_append(ioend)); | ||
204 | } | 243 | } |
205 | 244 | ||
206 | /* | ||
207 | * We might have to update the on-disk file size after extending | ||
208 | * writes. | ||
209 | */ | ||
210 | error = xfs_setfilesize(ioend); | ||
211 | ASSERT(!error || error == EAGAIN); | ||
212 | |||
213 | done: | 245 | done: |
214 | /* | 246 | xfs_destroy_ioend(ioend); |
215 | * If we didn't complete processing of the ioend, requeue it to the | ||
216 | * tail of the workqueue for another attempt later. Otherwise destroy | ||
217 | * it. | ||
218 | */ | ||
219 | if (error == EAGAIN) { | ||
220 | atomic_inc(&ioend->io_remaining); | ||
221 | xfs_finish_ioend(ioend); | ||
222 | /* ensure we don't spin on blocked ioends */ | ||
223 | delay(1); | ||
224 | } else { | ||
225 | xfs_destroy_ioend(ioend); | ||
226 | } | ||
227 | } | 247 | } |
228 | 248 | ||
229 | /* | 249 | /* |
@@ -259,6 +279,7 @@ xfs_alloc_ioend( | |||
259 | */ | 279 | */ |
260 | atomic_set(&ioend->io_remaining, 1); | 280 | atomic_set(&ioend->io_remaining, 1); |
261 | ioend->io_isasync = 0; | 281 | ioend->io_isasync = 0; |
282 | ioend->io_isdirect = 0; | ||
262 | ioend->io_error = 0; | 283 | ioend->io_error = 0; |
263 | ioend->io_list = NULL; | 284 | ioend->io_list = NULL; |
264 | ioend->io_type = type; | 285 | ioend->io_type = type; |
@@ -269,6 +290,7 @@ xfs_alloc_ioend( | |||
269 | ioend->io_size = 0; | 290 | ioend->io_size = 0; |
270 | ioend->io_iocb = NULL; | 291 | ioend->io_iocb = NULL; |
271 | ioend->io_result = 0; | 292 | ioend->io_result = 0; |
293 | ioend->io_append_trans = NULL; | ||
272 | 294 | ||
273 | INIT_WORK(&ioend->io_work, xfs_end_io); | 295 | INIT_WORK(&ioend->io_work, xfs_end_io); |
274 | return ioend; | 296 | return ioend; |
@@ -379,14 +401,6 @@ xfs_submit_ioend_bio( | |||
379 | atomic_inc(&ioend->io_remaining); | 401 | atomic_inc(&ioend->io_remaining); |
380 | bio->bi_private = ioend; | 402 | bio->bi_private = ioend; |
381 | bio->bi_end_io = xfs_end_bio; | 403 | bio->bi_end_io = xfs_end_bio; |
382 | |||
383 | /* | ||
384 | * If the I/O is beyond EOF we mark the inode dirty immediately | ||
385 | * but don't update the inode size until I/O completion. | ||
386 | */ | ||
387 | if (xfs_ioend_new_eof(ioend)) | ||
388 | xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); | ||
389 | |||
390 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); | 404 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); |
391 | } | 405 | } |
392 | 406 | ||
@@ -1033,8 +1047,20 @@ xfs_vm_writepage( | |||
1033 | wbc, end_index); | 1047 | wbc, end_index); |
1034 | } | 1048 | } |
1035 | 1049 | ||
1036 | if (iohead) | 1050 | if (iohead) { |
1051 | /* | ||
1052 | * Reserve log space if we might write beyond the on-disk | ||
1053 | * inode size. | ||
1054 | */ | ||
1055 | if (ioend->io_type != IO_UNWRITTEN && | ||
1056 | xfs_ioend_is_append(ioend)) { | ||
1057 | err = xfs_setfilesize_trans_alloc(ioend); | ||
1058 | if (err) | ||
1059 | goto error; | ||
1060 | } | ||
1061 | |||
1037 | xfs_submit_ioend(wbc, iohead); | 1062 | xfs_submit_ioend(wbc, iohead); |
1063 | } | ||
1038 | 1064 | ||
1039 | return 0; | 1065 | return 0; |
1040 | 1066 | ||
@@ -1314,17 +1340,32 @@ xfs_vm_direct_IO( | |||
1314 | { | 1340 | { |
1315 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 1341 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
1316 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); | 1342 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); |
1343 | struct xfs_ioend *ioend = NULL; | ||
1317 | ssize_t ret; | 1344 | ssize_t ret; |
1318 | 1345 | ||
1319 | if (rw & WRITE) { | 1346 | if (rw & WRITE) { |
1320 | iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); | 1347 | size_t size = iov_length(iov, nr_segs); |
1348 | |||
1349 | /* | ||
1350 | * We need to preallocate a transaction for a size update | ||
1351 | * here. In the case that this write both updates the size | ||
1352 | * and converts at least on unwritten extent we will cancel | ||
1353 | * the still clean transaction after the I/O has finished. | ||
1354 | */ | ||
1355 | iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT); | ||
1356 | if (offset + size > XFS_I(inode)->i_d.di_size) { | ||
1357 | ret = xfs_setfilesize_trans_alloc(ioend); | ||
1358 | if (ret) | ||
1359 | goto out_destroy_ioend; | ||
1360 | ioend->io_isdirect = 1; | ||
1361 | } | ||
1321 | 1362 | ||
1322 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1363 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
1323 | offset, nr_segs, | 1364 | offset, nr_segs, |
1324 | xfs_get_blocks_direct, | 1365 | xfs_get_blocks_direct, |
1325 | xfs_end_io_direct_write, NULL, 0); | 1366 | xfs_end_io_direct_write, NULL, 0); |
1326 | if (ret != -EIOCBQUEUED && iocb->private) | 1367 | if (ret != -EIOCBQUEUED && iocb->private) |
1327 | xfs_destroy_ioend(iocb->private); | 1368 | goto out_trans_cancel; |
1328 | } else { | 1369 | } else { |
1329 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1370 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
1330 | offset, nr_segs, | 1371 | offset, nr_segs, |
@@ -1333,6 +1374,16 @@ xfs_vm_direct_IO( | |||
1333 | } | 1374 | } |
1334 | 1375 | ||
1335 | return ret; | 1376 | return ret; |
1377 | |||
1378 | out_trans_cancel: | ||
1379 | if (ioend->io_append_trans) { | ||
1380 | current_set_flags_nested(&ioend->io_append_trans->t_pflags, | ||
1381 | PF_FSTRANS); | ||
1382 | xfs_trans_cancel(ioend->io_append_trans, 0); | ||
1383 | } | ||
1384 | out_destroy_ioend: | ||
1385 | xfs_destroy_ioend(ioend); | ||
1386 | return ret; | ||
1336 | } | 1387 | } |
1337 | 1388 | ||
1338 | STATIC void | 1389 | STATIC void |
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 116dd5c37034..84eafbcb0d9d 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h | |||
@@ -18,8 +18,6 @@ | |||
18 | #ifndef __XFS_AOPS_H__ | 18 | #ifndef __XFS_AOPS_H__ |
19 | #define __XFS_AOPS_H__ | 19 | #define __XFS_AOPS_H__ |
20 | 20 | ||
21 | extern struct workqueue_struct *xfsdatad_workqueue; | ||
22 | extern struct workqueue_struct *xfsconvertd_workqueue; | ||
23 | extern mempool_t *xfs_ioend_pool; | 21 | extern mempool_t *xfs_ioend_pool; |
24 | 22 | ||
25 | /* | 23 | /* |
@@ -48,12 +46,14 @@ typedef struct xfs_ioend { | |||
48 | int io_error; /* I/O error code */ | 46 | int io_error; /* I/O error code */ |
49 | atomic_t io_remaining; /* hold count */ | 47 | atomic_t io_remaining; /* hold count */ |
50 | unsigned int io_isasync : 1; /* needs aio_complete */ | 48 | unsigned int io_isasync : 1; /* needs aio_complete */ |
49 | unsigned int io_isdirect : 1;/* direct I/O */ | ||
51 | struct inode *io_inode; /* file being written to */ | 50 | struct inode *io_inode; /* file being written to */ |
52 | struct buffer_head *io_buffer_head;/* buffer linked list head */ | 51 | struct buffer_head *io_buffer_head;/* buffer linked list head */ |
53 | struct buffer_head *io_buffer_tail;/* buffer linked list tail */ | 52 | struct buffer_head *io_buffer_tail;/* buffer linked list tail */ |
54 | size_t io_size; /* size of the extent */ | 53 | size_t io_size; /* size of the extent */ |
55 | xfs_off_t io_offset; /* offset in the file */ | 54 | xfs_off_t io_offset; /* offset in the file */ |
56 | struct work_struct io_work; /* xfsdatad work queue */ | 55 | struct work_struct io_work; /* xfsdatad work queue */ |
56 | struct xfs_trans *io_append_trans;/* xact. for size update */ | ||
57 | struct kiocb *io_iocb; | 57 | struct kiocb *io_iocb; |
58 | int io_result; | 58 | int io_result; |
59 | } xfs_ioend_t; | 59 | } xfs_ioend_t; |
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 08b9ac644c31..65d61b948ead 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
@@ -853,6 +853,8 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) | |||
853 | { | 853 | { |
854 | int newsize, forkoff, retval; | 854 | int newsize, forkoff, retval; |
855 | 855 | ||
856 | trace_xfs_attr_sf_addname(args); | ||
857 | |||
856 | retval = xfs_attr_shortform_lookup(args); | 858 | retval = xfs_attr_shortform_lookup(args); |
857 | if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { | 859 | if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { |
858 | return(retval); | 860 | return(retval); |
@@ -896,6 +898,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) | |||
896 | xfs_dabuf_t *bp; | 898 | xfs_dabuf_t *bp; |
897 | int retval, error, committed, forkoff; | 899 | int retval, error, committed, forkoff; |
898 | 900 | ||
901 | trace_xfs_attr_leaf_addname(args); | ||
902 | |||
899 | /* | 903 | /* |
900 | * Read the (only) block in the attribute list in. | 904 | * Read the (only) block in the attribute list in. |
901 | */ | 905 | */ |
@@ -920,6 +924,9 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) | |||
920 | xfs_da_brelse(args->trans, bp); | 924 | xfs_da_brelse(args->trans, bp); |
921 | return(retval); | 925 | return(retval); |
922 | } | 926 | } |
927 | |||
928 | trace_xfs_attr_leaf_replace(args); | ||
929 | |||
923 | args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ | 930 | args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ |
924 | args->blkno2 = args->blkno; /* set 2nd entry info*/ | 931 | args->blkno2 = args->blkno; /* set 2nd entry info*/ |
925 | args->index2 = args->index; | 932 | args->index2 = args->index; |
@@ -1090,6 +1097,8 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) | |||
1090 | xfs_dabuf_t *bp; | 1097 | xfs_dabuf_t *bp; |
1091 | int error, committed, forkoff; | 1098 | int error, committed, forkoff; |
1092 | 1099 | ||
1100 | trace_xfs_attr_leaf_removename(args); | ||
1101 | |||
1093 | /* | 1102 | /* |
1094 | * Remove the attribute. | 1103 | * Remove the attribute. |
1095 | */ | 1104 | */ |
@@ -1223,6 +1232,8 @@ xfs_attr_node_addname(xfs_da_args_t *args) | |||
1223 | xfs_mount_t *mp; | 1232 | xfs_mount_t *mp; |
1224 | int committed, retval, error; | 1233 | int committed, retval, error; |
1225 | 1234 | ||
1235 | trace_xfs_attr_node_addname(args); | ||
1236 | |||
1226 | /* | 1237 | /* |
1227 | * Fill in bucket of arguments/results/context to carry around. | 1238 | * Fill in bucket of arguments/results/context to carry around. |
1228 | */ | 1239 | */ |
@@ -1249,6 +1260,9 @@ restart: | |||
1249 | } else if (retval == EEXIST) { | 1260 | } else if (retval == EEXIST) { |
1250 | if (args->flags & ATTR_CREATE) | 1261 | if (args->flags & ATTR_CREATE) |
1251 | goto out; | 1262 | goto out; |
1263 | |||
1264 | trace_xfs_attr_node_replace(args); | ||
1265 | |||
1252 | args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ | 1266 | args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ |
1253 | args->blkno2 = args->blkno; /* set 2nd entry info*/ | 1267 | args->blkno2 = args->blkno; /* set 2nd entry info*/ |
1254 | args->index2 = args->index; | 1268 | args->index2 = args->index; |
@@ -1480,6 +1494,8 @@ xfs_attr_node_removename(xfs_da_args_t *args) | |||
1480 | xfs_dabuf_t *bp; | 1494 | xfs_dabuf_t *bp; |
1481 | int retval, error, committed, forkoff; | 1495 | int retval, error, committed, forkoff; |
1482 | 1496 | ||
1497 | trace_xfs_attr_node_removename(args); | ||
1498 | |||
1483 | /* | 1499 | /* |
1484 | * Tie a string around our finger to remind us where we are. | 1500 | * Tie a string around our finger to remind us where we are. |
1485 | */ | 1501 | */ |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index d25eafd4d28d..76d93dc953e1 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
@@ -235,6 +235,8 @@ xfs_attr_shortform_create(xfs_da_args_t *args) | |||
235 | xfs_inode_t *dp; | 235 | xfs_inode_t *dp; |
236 | xfs_ifork_t *ifp; | 236 | xfs_ifork_t *ifp; |
237 | 237 | ||
238 | trace_xfs_attr_sf_create(args); | ||
239 | |||
238 | dp = args->dp; | 240 | dp = args->dp; |
239 | ASSERT(dp != NULL); | 241 | ASSERT(dp != NULL); |
240 | ifp = dp->i_afp; | 242 | ifp = dp->i_afp; |
@@ -268,6 +270,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) | |||
268 | xfs_inode_t *dp; | 270 | xfs_inode_t *dp; |
269 | xfs_ifork_t *ifp; | 271 | xfs_ifork_t *ifp; |
270 | 272 | ||
273 | trace_xfs_attr_sf_add(args); | ||
274 | |||
271 | dp = args->dp; | 275 | dp = args->dp; |
272 | mp = dp->i_mount; | 276 | mp = dp->i_mount; |
273 | dp->i_d.di_forkoff = forkoff; | 277 | dp->i_d.di_forkoff = forkoff; |
@@ -337,6 +341,8 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) | |||
337 | xfs_mount_t *mp; | 341 | xfs_mount_t *mp; |
338 | xfs_inode_t *dp; | 342 | xfs_inode_t *dp; |
339 | 343 | ||
344 | trace_xfs_attr_sf_remove(args); | ||
345 | |||
340 | dp = args->dp; | 346 | dp = args->dp; |
341 | mp = dp->i_mount; | 347 | mp = dp->i_mount; |
342 | base = sizeof(xfs_attr_sf_hdr_t); | 348 | base = sizeof(xfs_attr_sf_hdr_t); |
@@ -405,6 +411,8 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) | |||
405 | int i; | 411 | int i; |
406 | xfs_ifork_t *ifp; | 412 | xfs_ifork_t *ifp; |
407 | 413 | ||
414 | trace_xfs_attr_sf_lookup(args); | ||
415 | |||
408 | ifp = args->dp->i_afp; | 416 | ifp = args->dp->i_afp; |
409 | ASSERT(ifp->if_flags & XFS_IFINLINE); | 417 | ASSERT(ifp->if_flags & XFS_IFINLINE); |
410 | sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; | 418 | sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; |
@@ -476,6 +484,8 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) | |||
476 | xfs_dabuf_t *bp; | 484 | xfs_dabuf_t *bp; |
477 | xfs_ifork_t *ifp; | 485 | xfs_ifork_t *ifp; |
478 | 486 | ||
487 | trace_xfs_attr_sf_to_leaf(args); | ||
488 | |||
479 | dp = args->dp; | 489 | dp = args->dp; |
480 | ifp = dp->i_afp; | 490 | ifp = dp->i_afp; |
481 | sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; | 491 | sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; |
@@ -775,6 +785,8 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) | |||
775 | char *tmpbuffer; | 785 | char *tmpbuffer; |
776 | int error, i; | 786 | int error, i; |
777 | 787 | ||
788 | trace_xfs_attr_leaf_to_sf(args); | ||
789 | |||
778 | dp = args->dp; | 790 | dp = args->dp; |
779 | tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP); | 791 | tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP); |
780 | ASSERT(tmpbuffer != NULL); | 792 | ASSERT(tmpbuffer != NULL); |
@@ -848,6 +860,8 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args) | |||
848 | xfs_dablk_t blkno; | 860 | xfs_dablk_t blkno; |
849 | int error; | 861 | int error; |
850 | 862 | ||
863 | trace_xfs_attr_leaf_to_node(args); | ||
864 | |||
851 | dp = args->dp; | 865 | dp = args->dp; |
852 | bp1 = bp2 = NULL; | 866 | bp1 = bp2 = NULL; |
853 | error = xfs_da_grow_inode(args, &blkno); | 867 | error = xfs_da_grow_inode(args, &blkno); |
@@ -911,6 +925,8 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) | |||
911 | xfs_dabuf_t *bp; | 925 | xfs_dabuf_t *bp; |
912 | int error; | 926 | int error; |
913 | 927 | ||
928 | trace_xfs_attr_leaf_create(args); | ||
929 | |||
914 | dp = args->dp; | 930 | dp = args->dp; |
915 | ASSERT(dp != NULL); | 931 | ASSERT(dp != NULL); |
916 | error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp, | 932 | error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp, |
@@ -948,6 +964,8 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, | |||
948 | xfs_dablk_t blkno; | 964 | xfs_dablk_t blkno; |
949 | int error; | 965 | int error; |
950 | 966 | ||
967 | trace_xfs_attr_leaf_split(state->args); | ||
968 | |||
951 | /* | 969 | /* |
952 | * Allocate space for a new leaf node. | 970 | * Allocate space for a new leaf node. |
953 | */ | 971 | */ |
@@ -977,10 +995,13 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, | |||
977 | * | 995 | * |
978 | * Insert the "new" entry in the correct block. | 996 | * Insert the "new" entry in the correct block. |
979 | */ | 997 | */ |
980 | if (state->inleaf) | 998 | if (state->inleaf) { |
999 | trace_xfs_attr_leaf_add_old(state->args); | ||
981 | error = xfs_attr_leaf_add(oldblk->bp, state->args); | 1000 | error = xfs_attr_leaf_add(oldblk->bp, state->args); |
982 | else | 1001 | } else { |
1002 | trace_xfs_attr_leaf_add_new(state->args); | ||
983 | error = xfs_attr_leaf_add(newblk->bp, state->args); | 1003 | error = xfs_attr_leaf_add(newblk->bp, state->args); |
1004 | } | ||
984 | 1005 | ||
985 | /* | 1006 | /* |
986 | * Update last hashval in each block since we added the name. | 1007 | * Update last hashval in each block since we added the name. |
@@ -1001,6 +1022,8 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args) | |||
1001 | xfs_attr_leaf_map_t *map; | 1022 | xfs_attr_leaf_map_t *map; |
1002 | int tablesize, entsize, sum, tmp, i; | 1023 | int tablesize, entsize, sum, tmp, i; |
1003 | 1024 | ||
1025 | trace_xfs_attr_leaf_add(args); | ||
1026 | |||
1004 | leaf = bp->data; | 1027 | leaf = bp->data; |
1005 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | 1028 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); |
1006 | ASSERT((args->index >= 0) | 1029 | ASSERT((args->index >= 0) |
@@ -1128,8 +1151,6 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) | |||
1128 | (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval))); | 1151 | (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval))); |
1129 | 1152 | ||
1130 | /* | 1153 | /* |
1131 | * Copy the attribute name and value into the new space. | ||
1132 | * | ||
1133 | * For "remote" attribute values, simply note that we need to | 1154 | * For "remote" attribute values, simply note that we need to |
1134 | * allocate space for the "remote" value. We can't actually | 1155 | * allocate space for the "remote" value. We can't actually |
1135 | * allocate the extents in this transaction, and we can't decide | 1156 | * allocate the extents in this transaction, and we can't decide |
@@ -1265,6 +1286,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, | |||
1265 | ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | 1286 | ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); |
1266 | args = state->args; | 1287 | args = state->args; |
1267 | 1288 | ||
1289 | trace_xfs_attr_leaf_rebalance(args); | ||
1290 | |||
1268 | /* | 1291 | /* |
1269 | * Check ordering of blocks, reverse if it makes things simpler. | 1292 | * Check ordering of blocks, reverse if it makes things simpler. |
1270 | * | 1293 | * |
@@ -1810,6 +1833,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, | |||
1810 | xfs_mount_t *mp; | 1833 | xfs_mount_t *mp; |
1811 | char *tmpbuffer; | 1834 | char *tmpbuffer; |
1812 | 1835 | ||
1836 | trace_xfs_attr_leaf_unbalance(state->args); | ||
1837 | |||
1813 | /* | 1838 | /* |
1814 | * Set up environment. | 1839 | * Set up environment. |
1815 | */ | 1840 | */ |
@@ -1919,6 +1944,8 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) | |||
1919 | int probe, span; | 1944 | int probe, span; |
1920 | xfs_dahash_t hashval; | 1945 | xfs_dahash_t hashval; |
1921 | 1946 | ||
1947 | trace_xfs_attr_leaf_lookup(args); | ||
1948 | |||
1922 | leaf = bp->data; | 1949 | leaf = bp->data; |
1923 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | 1950 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); |
1924 | ASSERT(be16_to_cpu(leaf->hdr.count) | 1951 | ASSERT(be16_to_cpu(leaf->hdr.count) |
@@ -2445,6 +2472,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) | |||
2445 | char *name; | 2472 | char *name; |
2446 | #endif /* DEBUG */ | 2473 | #endif /* DEBUG */ |
2447 | 2474 | ||
2475 | trace_xfs_attr_leaf_clearflag(args); | ||
2448 | /* | 2476 | /* |
2449 | * Set up the operation. | 2477 | * Set up the operation. |
2450 | */ | 2478 | */ |
@@ -2509,6 +2537,8 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) | |||
2509 | xfs_dabuf_t *bp; | 2537 | xfs_dabuf_t *bp; |
2510 | int error; | 2538 | int error; |
2511 | 2539 | ||
2540 | trace_xfs_attr_leaf_setflag(args); | ||
2541 | |||
2512 | /* | 2542 | /* |
2513 | * Set up the operation. | 2543 | * Set up the operation. |
2514 | */ | 2544 | */ |
@@ -2565,6 +2595,8 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) | |||
2565 | char *name1, *name2; | 2595 | char *name1, *name2; |
2566 | #endif /* DEBUG */ | 2596 | #endif /* DEBUG */ |
2567 | 2597 | ||
2598 | trace_xfs_attr_leaf_flipflags(args); | ||
2599 | |||
2568 | /* | 2600 | /* |
2569 | * Read the block containing the "old" attr | 2601 | * Read the block containing the "old" attr |
2570 | */ | 2602 | */ |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 188ef2fbd628..85e7e327bcd8 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -5124,6 +5124,15 @@ xfs_bunmapi( | |||
5124 | cur->bc_private.b.flags = 0; | 5124 | cur->bc_private.b.flags = 0; |
5125 | } else | 5125 | } else |
5126 | cur = NULL; | 5126 | cur = NULL; |
5127 | |||
5128 | if (isrt) { | ||
5129 | /* | ||
5130 | * Synchronize by locking the bitmap inode. | ||
5131 | */ | ||
5132 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); | ||
5133 | xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); | ||
5134 | } | ||
5135 | |||
5127 | extno = 0; | 5136 | extno = 0; |
5128 | while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && | 5137 | while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && |
5129 | (nexts == 0 || extno < nexts)) { | 5138 | (nexts == 0 || extno < nexts)) { |
@@ -5536,8 +5545,12 @@ xfs_getbmap( | |||
5536 | if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) | 5545 | if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) |
5537 | return XFS_ERROR(ENOMEM); | 5546 | return XFS_ERROR(ENOMEM); |
5538 | out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL); | 5547 | out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL); |
5539 | if (!out) | 5548 | if (!out) { |
5540 | return XFS_ERROR(ENOMEM); | 5549 | out = kmem_zalloc_large(bmv->bmv_count * |
5550 | sizeof(struct getbmapx)); | ||
5551 | if (!out) | ||
5552 | return XFS_ERROR(ENOMEM); | ||
5553 | } | ||
5541 | 5554 | ||
5542 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 5555 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
5543 | if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { | 5556 | if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { |
@@ -5661,7 +5674,10 @@ xfs_getbmap( | |||
5661 | break; | 5674 | break; |
5662 | } | 5675 | } |
5663 | 5676 | ||
5664 | kmem_free(out); | 5677 | if (is_vmalloc_addr(out)) |
5678 | kmem_free_large(out); | ||
5679 | else | ||
5680 | kmem_free(out); | ||
5665 | return error; | 5681 | return error; |
5666 | } | 5682 | } |
5667 | 5683 | ||
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4dff85c7d7eb..6819b5163e33 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -45,8 +45,6 @@ static kmem_zone_t *xfs_buf_zone; | |||
45 | STATIC int xfsbufd(void *); | 45 | STATIC int xfsbufd(void *); |
46 | 46 | ||
47 | static struct workqueue_struct *xfslogd_workqueue; | 47 | static struct workqueue_struct *xfslogd_workqueue; |
48 | struct workqueue_struct *xfsdatad_workqueue; | ||
49 | struct workqueue_struct *xfsconvertd_workqueue; | ||
50 | 48 | ||
51 | #ifdef XFS_BUF_LOCK_TRACKING | 49 | #ifdef XFS_BUF_LOCK_TRACKING |
52 | # define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) | 50 | # define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) |
@@ -1793,21 +1791,8 @@ xfs_buf_init(void) | |||
1793 | if (!xfslogd_workqueue) | 1791 | if (!xfslogd_workqueue) |
1794 | goto out_free_buf_zone; | 1792 | goto out_free_buf_zone; |
1795 | 1793 | ||
1796 | xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1); | ||
1797 | if (!xfsdatad_workqueue) | ||
1798 | goto out_destroy_xfslogd_workqueue; | ||
1799 | |||
1800 | xfsconvertd_workqueue = alloc_workqueue("xfsconvertd", | ||
1801 | WQ_MEM_RECLAIM, 1); | ||
1802 | if (!xfsconvertd_workqueue) | ||
1803 | goto out_destroy_xfsdatad_workqueue; | ||
1804 | |||
1805 | return 0; | 1794 | return 0; |
1806 | 1795 | ||
1807 | out_destroy_xfsdatad_workqueue: | ||
1808 | destroy_workqueue(xfsdatad_workqueue); | ||
1809 | out_destroy_xfslogd_workqueue: | ||
1810 | destroy_workqueue(xfslogd_workqueue); | ||
1811 | out_free_buf_zone: | 1796 | out_free_buf_zone: |
1812 | kmem_zone_destroy(xfs_buf_zone); | 1797 | kmem_zone_destroy(xfs_buf_zone); |
1813 | out: | 1798 | out: |
@@ -1817,8 +1802,6 @@ xfs_buf_init(void) | |||
1817 | void | 1802 | void |
1818 | xfs_buf_terminate(void) | 1803 | xfs_buf_terminate(void) |
1819 | { | 1804 | { |
1820 | destroy_workqueue(xfsconvertd_workqueue); | ||
1821 | destroy_workqueue(xfsdatad_workqueue); | ||
1822 | destroy_workqueue(xfslogd_workqueue); | 1805 | destroy_workqueue(xfslogd_workqueue); |
1823 | kmem_zone_destroy(xfs_buf_zone); | 1806 | kmem_zone_destroy(xfs_buf_zone); |
1824 | } | 1807 | } |
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 77c74257c2a3..7f1a6f5b05a6 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c | |||
@@ -108,6 +108,8 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, | |||
108 | int error; | 108 | int error; |
109 | xfs_trans_t *tp; | 109 | xfs_trans_t *tp; |
110 | 110 | ||
111 | trace_xfs_da_node_create(args); | ||
112 | |||
111 | tp = args->trans; | 113 | tp = args->trans; |
112 | error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork); | 114 | error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork); |
113 | if (error) | 115 | if (error) |
@@ -140,6 +142,8 @@ xfs_da_split(xfs_da_state_t *state) | |||
140 | xfs_dabuf_t *bp; | 142 | xfs_dabuf_t *bp; |
141 | int max, action, error, i; | 143 | int max, action, error, i; |
142 | 144 | ||
145 | trace_xfs_da_split(state->args); | ||
146 | |||
143 | /* | 147 | /* |
144 | * Walk back up the tree splitting/inserting/adjusting as necessary. | 148 | * Walk back up the tree splitting/inserting/adjusting as necessary. |
145 | * If we need to insert and there isn't room, split the node, then | 149 | * If we need to insert and there isn't room, split the node, then |
@@ -178,10 +182,12 @@ xfs_da_split(xfs_da_state_t *state) | |||
178 | state->extravalid = 1; | 182 | state->extravalid = 1; |
179 | if (state->inleaf) { | 183 | if (state->inleaf) { |
180 | state->extraafter = 0; /* before newblk */ | 184 | state->extraafter = 0; /* before newblk */ |
185 | trace_xfs_attr_leaf_split_before(state->args); | ||
181 | error = xfs_attr_leaf_split(state, oldblk, | 186 | error = xfs_attr_leaf_split(state, oldblk, |
182 | &state->extrablk); | 187 | &state->extrablk); |
183 | } else { | 188 | } else { |
184 | state->extraafter = 1; /* after newblk */ | 189 | state->extraafter = 1; /* after newblk */ |
190 | trace_xfs_attr_leaf_split_after(state->args); | ||
185 | error = xfs_attr_leaf_split(state, newblk, | 191 | error = xfs_attr_leaf_split(state, newblk, |
186 | &state->extrablk); | 192 | &state->extrablk); |
187 | } | 193 | } |
@@ -300,6 +306,8 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, | |||
300 | xfs_mount_t *mp; | 306 | xfs_mount_t *mp; |
301 | xfs_dir2_leaf_t *leaf; | 307 | xfs_dir2_leaf_t *leaf; |
302 | 308 | ||
309 | trace_xfs_da_root_split(state->args); | ||
310 | |||
303 | /* | 311 | /* |
304 | * Copy the existing (incorrect) block from the root node position | 312 | * Copy the existing (incorrect) block from the root node position |
305 | * to a free space somewhere. | 313 | * to a free space somewhere. |
@@ -380,6 +388,8 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, | |||
380 | int newcount, error; | 388 | int newcount, error; |
381 | int useextra; | 389 | int useextra; |
382 | 390 | ||
391 | trace_xfs_da_node_split(state->args); | ||
392 | |||
383 | node = oldblk->bp->data; | 393 | node = oldblk->bp->data; |
384 | ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); | 394 | ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); |
385 | 395 | ||
@@ -466,6 +476,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, | |||
466 | int count, tmp; | 476 | int count, tmp; |
467 | xfs_trans_t *tp; | 477 | xfs_trans_t *tp; |
468 | 478 | ||
479 | trace_xfs_da_node_rebalance(state->args); | ||
480 | |||
469 | node1 = blk1->bp->data; | 481 | node1 = blk1->bp->data; |
470 | node2 = blk2->bp->data; | 482 | node2 = blk2->bp->data; |
471 | /* | 483 | /* |
@@ -574,6 +586,8 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, | |||
574 | xfs_da_node_entry_t *btree; | 586 | xfs_da_node_entry_t *btree; |
575 | int tmp; | 587 | int tmp; |
576 | 588 | ||
589 | trace_xfs_da_node_add(state->args); | ||
590 | |||
577 | node = oldblk->bp->data; | 591 | node = oldblk->bp->data; |
578 | ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); | 592 | ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); |
579 | ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); | 593 | ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); |
@@ -619,6 +633,8 @@ xfs_da_join(xfs_da_state_t *state) | |||
619 | xfs_da_state_blk_t *drop_blk, *save_blk; | 633 | xfs_da_state_blk_t *drop_blk, *save_blk; |
620 | int action, error; | 634 | int action, error; |
621 | 635 | ||
636 | trace_xfs_da_join(state->args); | ||
637 | |||
622 | action = 0; | 638 | action = 0; |
623 | drop_blk = &state->path.blk[ state->path.active-1 ]; | 639 | drop_blk = &state->path.blk[ state->path.active-1 ]; |
624 | save_blk = &state->altpath.blk[ state->path.active-1 ]; | 640 | save_blk = &state->altpath.blk[ state->path.active-1 ]; |
@@ -723,6 +739,8 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) | |||
723 | xfs_dabuf_t *bp; | 739 | xfs_dabuf_t *bp; |
724 | int error; | 740 | int error; |
725 | 741 | ||
742 | trace_xfs_da_root_join(state->args); | ||
743 | |||
726 | args = state->args; | 744 | args = state->args; |
727 | ASSERT(args != NULL); | 745 | ASSERT(args != NULL); |
728 | ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); | 746 | ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); |
@@ -941,6 +959,8 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk) | |||
941 | xfs_da_node_entry_t *btree; | 959 | xfs_da_node_entry_t *btree; |
942 | int tmp; | 960 | int tmp; |
943 | 961 | ||
962 | trace_xfs_da_node_remove(state->args); | ||
963 | |||
944 | node = drop_blk->bp->data; | 964 | node = drop_blk->bp->data; |
945 | ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count)); | 965 | ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count)); |
946 | ASSERT(drop_blk->index >= 0); | 966 | ASSERT(drop_blk->index >= 0); |
@@ -984,6 +1004,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, | |||
984 | int tmp; | 1004 | int tmp; |
985 | xfs_trans_t *tp; | 1005 | xfs_trans_t *tp; |
986 | 1006 | ||
1007 | trace_xfs_da_node_unbalance(state->args); | ||
1008 | |||
987 | drop_node = drop_blk->bp->data; | 1009 | drop_node = drop_blk->bp->data; |
988 | save_node = save_blk->bp->data; | 1010 | save_node = save_blk->bp->data; |
989 | ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); | 1011 | ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); |
@@ -1230,6 +1252,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, | |||
1230 | /* | 1252 | /* |
1231 | * Link new block in before existing block. | 1253 | * Link new block in before existing block. |
1232 | */ | 1254 | */ |
1255 | trace_xfs_da_link_before(args); | ||
1233 | new_info->forw = cpu_to_be32(old_blk->blkno); | 1256 | new_info->forw = cpu_to_be32(old_blk->blkno); |
1234 | new_info->back = old_info->back; | 1257 | new_info->back = old_info->back; |
1235 | if (old_info->back) { | 1258 | if (old_info->back) { |
@@ -1251,6 +1274,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, | |||
1251 | /* | 1274 | /* |
1252 | * Link new block in after existing block. | 1275 | * Link new block in after existing block. |
1253 | */ | 1276 | */ |
1277 | trace_xfs_da_link_after(args); | ||
1254 | new_info->forw = old_info->forw; | 1278 | new_info->forw = old_info->forw; |
1255 | new_info->back = cpu_to_be32(old_blk->blkno); | 1279 | new_info->back = cpu_to_be32(old_blk->blkno); |
1256 | if (old_info->forw) { | 1280 | if (old_info->forw) { |
@@ -1348,6 +1372,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, | |||
1348 | * Unlink the leaf block from the doubly linked chain of leaves. | 1372 | * Unlink the leaf block from the doubly linked chain of leaves. |
1349 | */ | 1373 | */ |
1350 | if (be32_to_cpu(save_info->back) == drop_blk->blkno) { | 1374 | if (be32_to_cpu(save_info->back) == drop_blk->blkno) { |
1375 | trace_xfs_da_unlink_back(args); | ||
1351 | save_info->back = drop_info->back; | 1376 | save_info->back = drop_info->back; |
1352 | if (drop_info->back) { | 1377 | if (drop_info->back) { |
1353 | error = xfs_da_read_buf(args->trans, args->dp, | 1378 | error = xfs_da_read_buf(args->trans, args->dp, |
@@ -1365,6 +1390,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, | |||
1365 | xfs_da_buf_done(bp); | 1390 | xfs_da_buf_done(bp); |
1366 | } | 1391 | } |
1367 | } else { | 1392 | } else { |
1393 | trace_xfs_da_unlink_forward(args); | ||
1368 | save_info->forw = drop_info->forw; | 1394 | save_info->forw = drop_info->forw; |
1369 | if (drop_info->forw) { | 1395 | if (drop_info->forw) { |
1370 | error = xfs_da_read_buf(args->trans, args->dp, | 1396 | error = xfs_da_read_buf(args->trans, args->dp, |
@@ -1652,6 +1678,8 @@ xfs_da_grow_inode( | |||
1652 | int count; | 1678 | int count; |
1653 | int error; | 1679 | int error; |
1654 | 1680 | ||
1681 | trace_xfs_da_grow_inode(args); | ||
1682 | |||
1655 | if (args->whichfork == XFS_DATA_FORK) { | 1683 | if (args->whichfork == XFS_DATA_FORK) { |
1656 | bno = args->dp->i_mount->m_dirleafblk; | 1684 | bno = args->dp->i_mount->m_dirleafblk; |
1657 | count = args->dp->i_mount->m_dirblkfsbs; | 1685 | count = args->dp->i_mount->m_dirblkfsbs; |
@@ -1690,6 +1718,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, | |||
1690 | xfs_dir2_leaf_t *dead_leaf2; | 1718 | xfs_dir2_leaf_t *dead_leaf2; |
1691 | xfs_dahash_t dead_hash; | 1719 | xfs_dahash_t dead_hash; |
1692 | 1720 | ||
1721 | trace_xfs_da_swap_lastblock(args); | ||
1722 | |||
1693 | dead_buf = *dead_bufp; | 1723 | dead_buf = *dead_bufp; |
1694 | dead_blkno = *dead_blknop; | 1724 | dead_blkno = *dead_blknop; |
1695 | tp = args->trans; | 1725 | tp = args->trans; |
@@ -1878,6 +1908,8 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, | |||
1878 | xfs_trans_t *tp; | 1908 | xfs_trans_t *tp; |
1879 | xfs_mount_t *mp; | 1909 | xfs_mount_t *mp; |
1880 | 1910 | ||
1911 | trace_xfs_da_shrink_inode(args); | ||
1912 | |||
1881 | dp = args->dp; | 1913 | dp = args->dp; |
1882 | w = args->whichfork; | 1914 | w = args->whichfork; |
1883 | tp = args->trans; | 1915 | tp = args->trans; |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index dd974a55c77d..1137bbc5eccb 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -215,7 +215,7 @@ xfs_swap_extents( | |||
215 | xfs_trans_t *tp; | 215 | xfs_trans_t *tp; |
216 | xfs_bstat_t *sbp = &sxp->sx_stat; | 216 | xfs_bstat_t *sbp = &sxp->sx_stat; |
217 | xfs_ifork_t *tempifp, *ifp, *tifp; | 217 | xfs_ifork_t *tempifp, *ifp, *tifp; |
218 | int ilf_fields, tilf_fields; | 218 | int src_log_flags, target_log_flags; |
219 | int error = 0; | 219 | int error = 0; |
220 | int aforkblks = 0; | 220 | int aforkblks = 0; |
221 | int taforkblks = 0; | 221 | int taforkblks = 0; |
@@ -385,9 +385,8 @@ xfs_swap_extents( | |||
385 | tip->i_delayed_blks = ip->i_delayed_blks; | 385 | tip->i_delayed_blks = ip->i_delayed_blks; |
386 | ip->i_delayed_blks = 0; | 386 | ip->i_delayed_blks = 0; |
387 | 387 | ||
388 | ilf_fields = XFS_ILOG_CORE; | 388 | src_log_flags = XFS_ILOG_CORE; |
389 | 389 | switch (ip->i_d.di_format) { | |
390 | switch(ip->i_d.di_format) { | ||
391 | case XFS_DINODE_FMT_EXTENTS: | 390 | case XFS_DINODE_FMT_EXTENTS: |
392 | /* If the extents fit in the inode, fix the | 391 | /* If the extents fit in the inode, fix the |
393 | * pointer. Otherwise it's already NULL or | 392 | * pointer. Otherwise it's already NULL or |
@@ -397,16 +396,15 @@ xfs_swap_extents( | |||
397 | ifp->if_u1.if_extents = | 396 | ifp->if_u1.if_extents = |
398 | ifp->if_u2.if_inline_ext; | 397 | ifp->if_u2.if_inline_ext; |
399 | } | 398 | } |
400 | ilf_fields |= XFS_ILOG_DEXT; | 399 | src_log_flags |= XFS_ILOG_DEXT; |
401 | break; | 400 | break; |
402 | case XFS_DINODE_FMT_BTREE: | 401 | case XFS_DINODE_FMT_BTREE: |
403 | ilf_fields |= XFS_ILOG_DBROOT; | 402 | src_log_flags |= XFS_ILOG_DBROOT; |
404 | break; | 403 | break; |
405 | } | 404 | } |
406 | 405 | ||
407 | tilf_fields = XFS_ILOG_CORE; | 406 | target_log_flags = XFS_ILOG_CORE; |
408 | 407 | switch (tip->i_d.di_format) { | |
409 | switch(tip->i_d.di_format) { | ||
410 | case XFS_DINODE_FMT_EXTENTS: | 408 | case XFS_DINODE_FMT_EXTENTS: |
411 | /* If the extents fit in the inode, fix the | 409 | /* If the extents fit in the inode, fix the |
412 | * pointer. Otherwise it's already NULL or | 410 | * pointer. Otherwise it's already NULL or |
@@ -416,10 +414,10 @@ xfs_swap_extents( | |||
416 | tifp->if_u1.if_extents = | 414 | tifp->if_u1.if_extents = |
417 | tifp->if_u2.if_inline_ext; | 415 | tifp->if_u2.if_inline_ext; |
418 | } | 416 | } |
419 | tilf_fields |= XFS_ILOG_DEXT; | 417 | target_log_flags |= XFS_ILOG_DEXT; |
420 | break; | 418 | break; |
421 | case XFS_DINODE_FMT_BTREE: | 419 | case XFS_DINODE_FMT_BTREE: |
422 | tilf_fields |= XFS_ILOG_DBROOT; | 420 | target_log_flags |= XFS_ILOG_DBROOT; |
423 | break; | 421 | break; |
424 | } | 422 | } |
425 | 423 | ||
@@ -427,8 +425,8 @@ xfs_swap_extents( | |||
427 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 425 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
428 | xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 426 | xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
429 | 427 | ||
430 | xfs_trans_log_inode(tp, ip, ilf_fields); | 428 | xfs_trans_log_inode(tp, ip, src_log_flags); |
431 | xfs_trans_log_inode(tp, tip, tilf_fields); | 429 | xfs_trans_log_inode(tp, tip, target_log_flags); |
432 | 430 | ||
433 | /* | 431 | /* |
434 | * If this is a synchronous mount, make sure that the | 432 | * If this is a synchronous mount, make sure that the |
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 9245e029b8ea..d3b63aefd01d 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "xfs_dinode.h" | 29 | #include "xfs_dinode.h" |
30 | #include "xfs_inode.h" | 30 | #include "xfs_inode.h" |
31 | #include "xfs_inode_item.h" | 31 | #include "xfs_inode_item.h" |
32 | #include "xfs_dir2.h" | ||
32 | #include "xfs_dir2_format.h" | 33 | #include "xfs_dir2_format.h" |
33 | #include "xfs_dir2_priv.h" | 34 | #include "xfs_dir2_priv.h" |
34 | #include "xfs_error.h" | 35 | #include "xfs_error.h" |
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 286a051f12cf..1ad3a4b8ca40 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c | |||
@@ -37,9 +37,9 @@ STATIC int | |||
37 | xfs_trim_extents( | 37 | xfs_trim_extents( |
38 | struct xfs_mount *mp, | 38 | struct xfs_mount *mp, |
39 | xfs_agnumber_t agno, | 39 | xfs_agnumber_t agno, |
40 | xfs_fsblock_t start, | 40 | xfs_daddr_t start, |
41 | xfs_fsblock_t end, | 41 | xfs_daddr_t end, |
42 | xfs_fsblock_t minlen, | 42 | xfs_daddr_t minlen, |
43 | __uint64_t *blocks_trimmed) | 43 | __uint64_t *blocks_trimmed) |
44 | { | 44 | { |
45 | struct block_device *bdev = mp->m_ddev_targp->bt_bdev; | 45 | struct block_device *bdev = mp->m_ddev_targp->bt_bdev; |
@@ -67,7 +67,7 @@ xfs_trim_extents( | |||
67 | /* | 67 | /* |
68 | * Look up the longest btree in the AGF and start with it. | 68 | * Look up the longest btree in the AGF and start with it. |
69 | */ | 69 | */ |
70 | error = xfs_alloc_lookup_le(cur, 0, | 70 | error = xfs_alloc_lookup_ge(cur, 0, |
71 | be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i); | 71 | be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i); |
72 | if (error) | 72 | if (error) |
73 | goto out_del_cursor; | 73 | goto out_del_cursor; |
@@ -77,8 +77,10 @@ xfs_trim_extents( | |||
77 | * enough to be worth discarding. | 77 | * enough to be worth discarding. |
78 | */ | 78 | */ |
79 | while (i) { | 79 | while (i) { |
80 | xfs_agblock_t fbno; | 80 | xfs_agblock_t fbno; |
81 | xfs_extlen_t flen; | 81 | xfs_extlen_t flen; |
82 | xfs_daddr_t dbno; | ||
83 | xfs_extlen_t dlen; | ||
82 | 84 | ||
83 | error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); | 85 | error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); |
84 | if (error) | 86 | if (error) |
@@ -87,9 +89,17 @@ xfs_trim_extents( | |||
87 | ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); | 89 | ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); |
88 | 90 | ||
89 | /* | 91 | /* |
92 | * use daddr format for all range/len calculations as that is | ||
93 | * the format the range/len variables are supplied in by | ||
94 | * userspace. | ||
95 | */ | ||
96 | dbno = XFS_AGB_TO_DADDR(mp, agno, fbno); | ||
97 | dlen = XFS_FSB_TO_BB(mp, flen); | ||
98 | |||
99 | /* | ||
90 | * Too small? Give up. | 100 | * Too small? Give up. |
91 | */ | 101 | */ |
92 | if (flen < minlen) { | 102 | if (dlen < minlen) { |
93 | trace_xfs_discard_toosmall(mp, agno, fbno, flen); | 103 | trace_xfs_discard_toosmall(mp, agno, fbno, flen); |
94 | goto out_del_cursor; | 104 | goto out_del_cursor; |
95 | } | 105 | } |
@@ -99,8 +109,7 @@ xfs_trim_extents( | |||
99 | * supposed to discard skip it. Do not bother to trim | 109 | * supposed to discard skip it. Do not bother to trim |
100 | * down partially overlapping ranges for now. | 110 | * down partially overlapping ranges for now. |
101 | */ | 111 | */ |
102 | if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || | 112 | if (dbno + dlen < start || dbno > end) { |
103 | XFS_AGB_TO_FSB(mp, agno, fbno) > end) { | ||
104 | trace_xfs_discard_exclude(mp, agno, fbno, flen); | 113 | trace_xfs_discard_exclude(mp, agno, fbno, flen); |
105 | goto next_extent; | 114 | goto next_extent; |
106 | } | 115 | } |
@@ -115,10 +124,7 @@ xfs_trim_extents( | |||
115 | } | 124 | } |
116 | 125 | ||
117 | trace_xfs_discard_extent(mp, agno, fbno, flen); | 126 | trace_xfs_discard_extent(mp, agno, fbno, flen); |
118 | error = -blkdev_issue_discard(bdev, | 127 | error = -blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0); |
119 | XFS_AGB_TO_DADDR(mp, agno, fbno), | ||
120 | XFS_FSB_TO_BB(mp, flen), | ||
121 | GFP_NOFS, 0); | ||
122 | if (error) | 128 | if (error) |
123 | goto out_del_cursor; | 129 | goto out_del_cursor; |
124 | *blocks_trimmed += flen; | 130 | *blocks_trimmed += flen; |
@@ -137,6 +143,15 @@ out_put_perag: | |||
137 | return error; | 143 | return error; |
138 | } | 144 | } |
139 | 145 | ||
146 | /* | ||
147 | * trim a range of the filesystem. | ||
148 | * | ||
149 | * Note: the parameters passed from userspace are byte ranges into the | ||
150 | * filesystem which does not match to the format we use for filesystem block | ||
151 | * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format | ||
152 | * is a linear address range. Hence we need to use DADDR based conversions and | ||
153 | * comparisons for determining the correct offset and regions to trim. | ||
154 | */ | ||
140 | int | 155 | int |
141 | xfs_ioc_trim( | 156 | xfs_ioc_trim( |
142 | struct xfs_mount *mp, | 157 | struct xfs_mount *mp, |
@@ -145,7 +160,7 @@ xfs_ioc_trim( | |||
145 | struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; | 160 | struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; |
146 | unsigned int granularity = q->limits.discard_granularity; | 161 | unsigned int granularity = q->limits.discard_granularity; |
147 | struct fstrim_range range; | 162 | struct fstrim_range range; |
148 | xfs_fsblock_t start, end, minlen; | 163 | xfs_daddr_t start, end, minlen; |
149 | xfs_agnumber_t start_agno, end_agno, agno; | 164 | xfs_agnumber_t start_agno, end_agno, agno; |
150 | __uint64_t blocks_trimmed = 0; | 165 | __uint64_t blocks_trimmed = 0; |
151 | int error, last_error = 0; | 166 | int error, last_error = 0; |
@@ -159,22 +174,22 @@ xfs_ioc_trim( | |||
159 | 174 | ||
160 | /* | 175 | /* |
161 | * Truncating down the len isn't actually quite correct, but using | 176 | * Truncating down the len isn't actually quite correct, but using |
162 | * XFS_B_TO_FSB would mean we trivially get overflows for values | 177 | * BBTOB would mean we trivially get overflows for values |
163 | * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default | 178 | * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default |
164 | * used by the fstrim application. In the end it really doesn't | 179 | * used by the fstrim application. In the end it really doesn't |
165 | * matter as trimming blocks is an advisory interface. | 180 | * matter as trimming blocks is an advisory interface. |
166 | */ | 181 | */ |
167 | start = XFS_B_TO_FSBT(mp, range.start); | 182 | start = BTOBB(range.start); |
168 | end = start + XFS_B_TO_FSBT(mp, range.len) - 1; | 183 | end = start + BTOBBT(range.len) - 1; |
169 | minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); | 184 | minlen = BTOBB(max_t(u64, granularity, range.minlen)); |
170 | 185 | ||
171 | if (start >= mp->m_sb.sb_dblocks) | 186 | if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks) |
172 | return -XFS_ERROR(EINVAL); | 187 | return -XFS_ERROR(EINVAL); |
173 | if (end > mp->m_sb.sb_dblocks - 1) | 188 | if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) |
174 | end = mp->m_sb.sb_dblocks - 1; | 189 | end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1; |
175 | 190 | ||
176 | start_agno = XFS_FSB_TO_AGNO(mp, start); | 191 | start_agno = xfs_daddr_to_agno(mp, start); |
177 | end_agno = XFS_FSB_TO_AGNO(mp, end); | 192 | end_agno = xfs_daddr_to_agno(mp, end); |
178 | 193 | ||
179 | for (agno = start_agno; agno <= end_agno; agno++) { | 194 | for (agno = start_agno; agno <= end_agno; agno++) { |
180 | error = -xfs_trim_extents(mp, agno, start, end, minlen, | 195 | error = -xfs_trim_extents(mp, agno, start, end, minlen, |
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 53db20ee3e77..1155208fa830 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
@@ -43,11 +43,10 @@ | |||
43 | * Lock order: | 43 | * Lock order: |
44 | * | 44 | * |
45 | * ip->i_lock | 45 | * ip->i_lock |
46 | * qh->qh_lock | 46 | * qi->qi_tree_lock |
47 | * qi->qi_dqlist_lock | 47 | * dquot->q_qlock (xfs_dqlock() and friends) |
48 | * dquot->q_qlock (xfs_dqlock() and friends) | 48 | * dquot->q_flush (xfs_dqflock() and friends) |
49 | * dquot->q_flush (xfs_dqflock() and friends) | 49 | * qi->qi_lru_lock |
50 | * xfs_Gqm->qm_dqfrlist_lock | ||
51 | * | 50 | * |
52 | * If two dquots need to be locked the order is user before group/project, | 51 | * If two dquots need to be locked the order is user before group/project, |
53 | * otherwise by the lowest id first, see xfs_dqlock2. | 52 | * otherwise by the lowest id first, see xfs_dqlock2. |
@@ -60,6 +59,9 @@ int xfs_dqreq_num; | |||
60 | int xfs_dqerror_mod = 33; | 59 | int xfs_dqerror_mod = 33; |
61 | #endif | 60 | #endif |
62 | 61 | ||
62 | struct kmem_zone *xfs_qm_dqtrxzone; | ||
63 | static struct kmem_zone *xfs_qm_dqzone; | ||
64 | |||
63 | static struct lock_class_key xfs_dquot_other_class; | 65 | static struct lock_class_key xfs_dquot_other_class; |
64 | 66 | ||
65 | /* | 67 | /* |
@@ -69,12 +71,12 @@ void | |||
69 | xfs_qm_dqdestroy( | 71 | xfs_qm_dqdestroy( |
70 | xfs_dquot_t *dqp) | 72 | xfs_dquot_t *dqp) |
71 | { | 73 | { |
72 | ASSERT(list_empty(&dqp->q_freelist)); | 74 | ASSERT(list_empty(&dqp->q_lru)); |
73 | 75 | ||
74 | mutex_destroy(&dqp->q_qlock); | 76 | mutex_destroy(&dqp->q_qlock); |
75 | kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); | 77 | kmem_zone_free(xfs_qm_dqzone, dqp); |
76 | 78 | ||
77 | atomic_dec(&xfs_Gqm->qm_totaldquots); | 79 | XFS_STATS_DEC(xs_qm_dquot); |
78 | } | 80 | } |
79 | 81 | ||
80 | /* | 82 | /* |
@@ -282,7 +284,7 @@ xfs_qm_dqalloc( | |||
282 | * Return if this type of quotas is turned off while we didn't | 284 | * Return if this type of quotas is turned off while we didn't |
283 | * have an inode lock | 285 | * have an inode lock |
284 | */ | 286 | */ |
285 | if (XFS_IS_THIS_QUOTA_OFF(dqp)) { | 287 | if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { |
286 | xfs_iunlock(quotip, XFS_ILOCK_EXCL); | 288 | xfs_iunlock(quotip, XFS_ILOCK_EXCL); |
287 | return (ESRCH); | 289 | return (ESRCH); |
288 | } | 290 | } |
@@ -384,7 +386,7 @@ xfs_qm_dqtobp( | |||
384 | dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; | 386 | dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; |
385 | 387 | ||
386 | xfs_ilock(quotip, XFS_ILOCK_SHARED); | 388 | xfs_ilock(quotip, XFS_ILOCK_SHARED); |
387 | if (XFS_IS_THIS_QUOTA_OFF(dqp)) { | 389 | if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { |
388 | /* | 390 | /* |
389 | * Return if this type of quotas is turned off while we | 391 | * Return if this type of quotas is turned off while we |
390 | * didn't have the quota inode lock. | 392 | * didn't have the quota inode lock. |
@@ -492,12 +494,12 @@ xfs_qm_dqread( | |||
492 | int cancelflags = 0; | 494 | int cancelflags = 0; |
493 | 495 | ||
494 | 496 | ||
495 | dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); | 497 | dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP); |
496 | 498 | ||
497 | dqp->dq_flags = type; | 499 | dqp->dq_flags = type; |
498 | dqp->q_core.d_id = cpu_to_be32(id); | 500 | dqp->q_core.d_id = cpu_to_be32(id); |
499 | dqp->q_mount = mp; | 501 | dqp->q_mount = mp; |
500 | INIT_LIST_HEAD(&dqp->q_freelist); | 502 | INIT_LIST_HEAD(&dqp->q_lru); |
501 | mutex_init(&dqp->q_qlock); | 503 | mutex_init(&dqp->q_qlock); |
502 | init_waitqueue_head(&dqp->q_pinwait); | 504 | init_waitqueue_head(&dqp->q_pinwait); |
503 | 505 | ||
@@ -516,7 +518,7 @@ xfs_qm_dqread( | |||
516 | if (!(type & XFS_DQ_USER)) | 518 | if (!(type & XFS_DQ_USER)) |
517 | lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); | 519 | lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); |
518 | 520 | ||
519 | atomic_inc(&xfs_Gqm->qm_totaldquots); | 521 | XFS_STATS_INC(xs_qm_dquot); |
520 | 522 | ||
521 | trace_xfs_dqread(dqp); | 523 | trace_xfs_dqread(dqp); |
522 | 524 | ||
@@ -602,60 +604,6 @@ error0: | |||
602 | } | 604 | } |
603 | 605 | ||
604 | /* | 606 | /* |
605 | * Lookup a dquot in the incore dquot hashtable. We keep two separate | ||
606 | * hashtables for user and group dquots; and, these are global tables | ||
607 | * inside the XQM, not per-filesystem tables. | ||
608 | * The hash chain must be locked by caller, and it is left locked | ||
609 | * on return. Returning dquot is locked. | ||
610 | */ | ||
611 | STATIC int | ||
612 | xfs_qm_dqlookup( | ||
613 | xfs_mount_t *mp, | ||
614 | xfs_dqid_t id, | ||
615 | xfs_dqhash_t *qh, | ||
616 | xfs_dquot_t **O_dqpp) | ||
617 | { | ||
618 | xfs_dquot_t *dqp; | ||
619 | |||
620 | ASSERT(mutex_is_locked(&qh->qh_lock)); | ||
621 | |||
622 | /* | ||
623 | * Traverse the hashchain looking for a match | ||
624 | */ | ||
625 | list_for_each_entry(dqp, &qh->qh_list, q_hashlist) { | ||
626 | /* | ||
627 | * We already have the hashlock. We don't need the | ||
628 | * dqlock to look at the id field of the dquot, since the | ||
629 | * id can't be modified without the hashlock anyway. | ||
630 | */ | ||
631 | if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp) | ||
632 | continue; | ||
633 | |||
634 | trace_xfs_dqlookup_found(dqp); | ||
635 | |||
636 | xfs_dqlock(dqp); | ||
637 | if (dqp->dq_flags & XFS_DQ_FREEING) { | ||
638 | *O_dqpp = NULL; | ||
639 | xfs_dqunlock(dqp); | ||
640 | return -1; | ||
641 | } | ||
642 | |||
643 | dqp->q_nrefs++; | ||
644 | |||
645 | /* | ||
646 | * move the dquot to the front of the hashchain | ||
647 | */ | ||
648 | list_move(&dqp->q_hashlist, &qh->qh_list); | ||
649 | trace_xfs_dqlookup_done(dqp); | ||
650 | *O_dqpp = dqp; | ||
651 | return 0; | ||
652 | } | ||
653 | |||
654 | *O_dqpp = NULL; | ||
655 | return 1; | ||
656 | } | ||
657 | |||
658 | /* | ||
659 | * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a | 607 | * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a |
660 | * a locked dquot, doing an allocation (if requested) as needed. | 608 | * a locked dquot, doing an allocation (if requested) as needed. |
661 | * When both an inode and an id are given, the inode's id takes precedence. | 609 | * When both an inode and an id are given, the inode's id takes precedence. |
@@ -672,10 +620,10 @@ xfs_qm_dqget( | |||
672 | uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ | 620 | uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ |
673 | xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ | 621 | xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ |
674 | { | 622 | { |
675 | xfs_dquot_t *dqp; | 623 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
676 | xfs_dqhash_t *h; | 624 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); |
677 | uint version; | 625 | struct xfs_dquot *dqp; |
678 | int error; | 626 | int error; |
679 | 627 | ||
680 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 628 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
681 | if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || | 629 | if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || |
@@ -683,7 +631,6 @@ xfs_qm_dqget( | |||
683 | (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { | 631 | (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { |
684 | return (ESRCH); | 632 | return (ESRCH); |
685 | } | 633 | } |
686 | h = XFS_DQ_HASH(mp, id, type); | ||
687 | 634 | ||
688 | #ifdef DEBUG | 635 | #ifdef DEBUG |
689 | if (xfs_do_dqerror) { | 636 | if (xfs_do_dqerror) { |
@@ -699,42 +646,33 @@ xfs_qm_dqget( | |||
699 | type == XFS_DQ_GROUP); | 646 | type == XFS_DQ_GROUP); |
700 | if (ip) { | 647 | if (ip) { |
701 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 648 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
702 | if (type == XFS_DQ_USER) | 649 | ASSERT(xfs_inode_dquot(ip, type) == NULL); |
703 | ASSERT(ip->i_udquot == NULL); | ||
704 | else | ||
705 | ASSERT(ip->i_gdquot == NULL); | ||
706 | } | 650 | } |
707 | #endif | 651 | #endif |
708 | 652 | ||
709 | restart: | 653 | restart: |
710 | mutex_lock(&h->qh_lock); | 654 | mutex_lock(&qi->qi_tree_lock); |
655 | dqp = radix_tree_lookup(tree, id); | ||
656 | if (dqp) { | ||
657 | xfs_dqlock(dqp); | ||
658 | if (dqp->dq_flags & XFS_DQ_FREEING) { | ||
659 | xfs_dqunlock(dqp); | ||
660 | mutex_unlock(&qi->qi_tree_lock); | ||
661 | trace_xfs_dqget_freeing(dqp); | ||
662 | delay(1); | ||
663 | goto restart; | ||
664 | } | ||
711 | 665 | ||
712 | /* | 666 | dqp->q_nrefs++; |
713 | * Look in the cache (hashtable). | 667 | mutex_unlock(&qi->qi_tree_lock); |
714 | * The chain is kept locked during lookup. | 668 | |
715 | */ | 669 | trace_xfs_dqget_hit(dqp); |
716 | switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) { | 670 | XFS_STATS_INC(xs_qm_dqcachehits); |
717 | case -1: | 671 | *O_dqpp = dqp; |
718 | XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); | 672 | return 0; |
719 | mutex_unlock(&h->qh_lock); | ||
720 | delay(1); | ||
721 | goto restart; | ||
722 | case 0: | ||
723 | XQM_STATS_INC(xqmstats.xs_qm_dqcachehits); | ||
724 | /* | ||
725 | * The dquot was found, moved to the front of the chain, | ||
726 | * taken off the freelist if it was on it, and locked | ||
727 | * at this point. Just unlock the hashchain and return. | ||
728 | */ | ||
729 | ASSERT(*O_dqpp); | ||
730 | ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); | ||
731 | mutex_unlock(&h->qh_lock); | ||
732 | trace_xfs_dqget_hit(*O_dqpp); | ||
733 | return 0; /* success */ | ||
734 | default: | ||
735 | XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses); | ||
736 | break; | ||
737 | } | 673 | } |
674 | mutex_unlock(&qi->qi_tree_lock); | ||
675 | XFS_STATS_INC(xs_qm_dqcachemisses); | ||
738 | 676 | ||
739 | /* | 677 | /* |
740 | * Dquot cache miss. We don't want to keep the inode lock across | 678 | * Dquot cache miss. We don't want to keep the inode lock across |
@@ -745,12 +683,6 @@ restart: | |||
745 | */ | 683 | */ |
746 | if (ip) | 684 | if (ip) |
747 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 685 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
748 | /* | ||
749 | * Save the hashchain version stamp, and unlock the chain, so that | ||
750 | * we don't keep the lock across a disk read | ||
751 | */ | ||
752 | version = h->qh_version; | ||
753 | mutex_unlock(&h->qh_lock); | ||
754 | 686 | ||
755 | error = xfs_qm_dqread(mp, id, type, flags, &dqp); | 687 | error = xfs_qm_dqread(mp, id, type, flags, &dqp); |
756 | 688 | ||
@@ -760,97 +692,53 @@ restart: | |||
760 | if (error) | 692 | if (error) |
761 | return error; | 693 | return error; |
762 | 694 | ||
763 | /* | ||
764 | * Dquot lock comes after hashlock in the lock ordering | ||
765 | */ | ||
766 | if (ip) { | 695 | if (ip) { |
767 | /* | 696 | /* |
768 | * A dquot could be attached to this inode by now, since | 697 | * A dquot could be attached to this inode by now, since |
769 | * we had dropped the ilock. | 698 | * we had dropped the ilock. |
770 | */ | 699 | */ |
771 | if (type == XFS_DQ_USER) { | 700 | if (xfs_this_quota_on(mp, type)) { |
772 | if (!XFS_IS_UQUOTA_ON(mp)) { | 701 | struct xfs_dquot *dqp1; |
773 | /* inode stays locked on return */ | 702 | |
774 | xfs_qm_dqdestroy(dqp); | 703 | dqp1 = xfs_inode_dquot(ip, type); |
775 | return XFS_ERROR(ESRCH); | 704 | if (dqp1) { |
776 | } | ||
777 | if (ip->i_udquot) { | ||
778 | xfs_qm_dqdestroy(dqp); | 705 | xfs_qm_dqdestroy(dqp); |
779 | dqp = ip->i_udquot; | 706 | dqp = dqp1; |
780 | xfs_dqlock(dqp); | 707 | xfs_dqlock(dqp); |
781 | goto dqret; | 708 | goto dqret; |
782 | } | 709 | } |
783 | } else { | 710 | } else { |
784 | if (!XFS_IS_OQUOTA_ON(mp)) { | 711 | /* inode stays locked on return */ |
785 | /* inode stays locked on return */ | 712 | xfs_qm_dqdestroy(dqp); |
786 | xfs_qm_dqdestroy(dqp); | 713 | return XFS_ERROR(ESRCH); |
787 | return XFS_ERROR(ESRCH); | ||
788 | } | ||
789 | if (ip->i_gdquot) { | ||
790 | xfs_qm_dqdestroy(dqp); | ||
791 | dqp = ip->i_gdquot; | ||
792 | xfs_dqlock(dqp); | ||
793 | goto dqret; | ||
794 | } | ||
795 | } | 714 | } |
796 | } | 715 | } |
797 | 716 | ||
798 | /* | 717 | mutex_lock(&qi->qi_tree_lock); |
799 | * Hashlock comes after ilock in lock order | 718 | error = -radix_tree_insert(tree, id, dqp); |
800 | */ | 719 | if (unlikely(error)) { |
801 | mutex_lock(&h->qh_lock); | 720 | WARN_ON(error != EEXIST); |
802 | if (version != h->qh_version) { | 721 | |
803 | xfs_dquot_t *tmpdqp; | ||
804 | /* | 722 | /* |
805 | * Now, see if somebody else put the dquot in the | 723 | * Duplicate found. Just throw away the new dquot and start |
806 | * hashtable before us. This can happen because we didn't | 724 | * over. |
807 | * keep the hashchain lock. We don't have to worry about | ||
808 | * lock order between the two dquots here since dqp isn't | ||
809 | * on any findable lists yet. | ||
810 | */ | 725 | */ |
811 | switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) { | 726 | mutex_unlock(&qi->qi_tree_lock); |
812 | case 0: | 727 | trace_xfs_dqget_dup(dqp); |
813 | case -1: | 728 | xfs_qm_dqdestroy(dqp); |
814 | /* | 729 | XFS_STATS_INC(xs_qm_dquot_dups); |
815 | * Duplicate found, either in cache or on its way out. | 730 | goto restart; |
816 | * Just throw away the new dquot and start over. | ||
817 | */ | ||
818 | if (tmpdqp) | ||
819 | xfs_qm_dqput(tmpdqp); | ||
820 | mutex_unlock(&h->qh_lock); | ||
821 | xfs_qm_dqdestroy(dqp); | ||
822 | XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); | ||
823 | goto restart; | ||
824 | default: | ||
825 | break; | ||
826 | } | ||
827 | } | 731 | } |
828 | 732 | ||
829 | /* | 733 | /* |
830 | * Put the dquot at the beginning of the hash-chain and mp's list | ||
831 | * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock .. | ||
832 | */ | ||
833 | ASSERT(mutex_is_locked(&h->qh_lock)); | ||
834 | dqp->q_hash = h; | ||
835 | list_add(&dqp->q_hashlist, &h->qh_list); | ||
836 | h->qh_version++; | ||
837 | |||
838 | /* | ||
839 | * Attach this dquot to this filesystem's list of all dquots, | ||
840 | * kept inside the mount structure in m_quotainfo field | ||
841 | */ | ||
842 | mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); | ||
843 | |||
844 | /* | ||
845 | * We return a locked dquot to the caller, with a reference taken | 734 | * We return a locked dquot to the caller, with a reference taken |
846 | */ | 735 | */ |
847 | xfs_dqlock(dqp); | 736 | xfs_dqlock(dqp); |
848 | dqp->q_nrefs = 1; | 737 | dqp->q_nrefs = 1; |
849 | 738 | ||
850 | list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist); | 739 | qi->qi_dquots++; |
851 | mp->m_quotainfo->qi_dquots++; | 740 | mutex_unlock(&qi->qi_tree_lock); |
852 | mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); | 741 | |
853 | mutex_unlock(&h->qh_lock); | ||
854 | dqret: | 742 | dqret: |
855 | ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 743 | ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
856 | trace_xfs_dqget_miss(dqp); | 744 | trace_xfs_dqget_miss(dqp); |
@@ -859,37 +747,22 @@ restart: | |||
859 | } | 747 | } |
860 | 748 | ||
861 | 749 | ||
862 | /* | 750 | STATIC void |
863 | * Release a reference to the dquot (decrement ref-count) | 751 | xfs_qm_dqput_final( |
864 | * and unlock it. If there is a group quota attached to this | ||
865 | * dquot, carefully release that too without tripping over | ||
866 | * deadlocks'n'stuff. | ||
867 | */ | ||
868 | void | ||
869 | xfs_qm_dqput( | ||
870 | struct xfs_dquot *dqp) | 752 | struct xfs_dquot *dqp) |
871 | { | 753 | { |
754 | struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; | ||
872 | struct xfs_dquot *gdqp; | 755 | struct xfs_dquot *gdqp; |
873 | 756 | ||
874 | ASSERT(dqp->q_nrefs > 0); | ||
875 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | ||
876 | |||
877 | trace_xfs_dqput(dqp); | ||
878 | |||
879 | recurse: | ||
880 | if (--dqp->q_nrefs > 0) { | ||
881 | xfs_dqunlock(dqp); | ||
882 | return; | ||
883 | } | ||
884 | |||
885 | trace_xfs_dqput_free(dqp); | 757 | trace_xfs_dqput_free(dqp); |
886 | 758 | ||
887 | mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); | 759 | mutex_lock(&qi->qi_lru_lock); |
888 | if (list_empty(&dqp->q_freelist)) { | 760 | if (list_empty(&dqp->q_lru)) { |
889 | list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); | 761 | list_add_tail(&dqp->q_lru, &qi->qi_lru_list); |
890 | xfs_Gqm->qm_dqfrlist_cnt++; | 762 | qi->qi_lru_count++; |
763 | XFS_STATS_INC(xs_qm_dquot_unused); | ||
891 | } | 764 | } |
892 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | 765 | mutex_unlock(&qi->qi_lru_lock); |
893 | 766 | ||
894 | /* | 767 | /* |
895 | * If we just added a udquot to the freelist, then we want to release | 768 | * If we just added a udquot to the freelist, then we want to release |
@@ -906,10 +779,29 @@ recurse: | |||
906 | /* | 779 | /* |
907 | * If we had a group quota hint, release it now. | 780 | * If we had a group quota hint, release it now. |
908 | */ | 781 | */ |
909 | if (gdqp) { | 782 | if (gdqp) |
910 | dqp = gdqp; | 783 | xfs_qm_dqput(gdqp); |
911 | goto recurse; | 784 | } |
912 | } | 785 | |
786 | /* | ||
787 | * Release a reference to the dquot (decrement ref-count) and unlock it. | ||
788 | * | ||
789 | * If there is a group quota attached to this dquot, carefully release that | ||
790 | * too without tripping over deadlocks'n'stuff. | ||
791 | */ | ||
792 | void | ||
793 | xfs_qm_dqput( | ||
794 | struct xfs_dquot *dqp) | ||
795 | { | ||
796 | ASSERT(dqp->q_nrefs > 0); | ||
797 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | ||
798 | |||
799 | trace_xfs_dqput(dqp); | ||
800 | |||
801 | if (--dqp->q_nrefs > 0) | ||
802 | xfs_dqunlock(dqp); | ||
803 | else | ||
804 | xfs_qm_dqput_final(dqp); | ||
913 | } | 805 | } |
914 | 806 | ||
915 | /* | 807 | /* |
@@ -1091,17 +983,6 @@ xfs_qm_dqflush( | |||
1091 | 983 | ||
1092 | } | 984 | } |
1093 | 985 | ||
1094 | void | ||
1095 | xfs_dqunlock( | ||
1096 | xfs_dquot_t *dqp) | ||
1097 | { | ||
1098 | xfs_dqunlock_nonotify(dqp); | ||
1099 | if (dqp->q_logitem.qli_dquot == dqp) { | ||
1100 | xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp, | ||
1101 | &dqp->q_logitem.qli_item); | ||
1102 | } | ||
1103 | } | ||
1104 | |||
1105 | /* | 986 | /* |
1106 | * Lock two xfs_dquot structures. | 987 | * Lock two xfs_dquot structures. |
1107 | * | 988 | * |
@@ -1131,85 +1012,6 @@ xfs_dqlock2( | |||
1131 | } | 1012 | } |
1132 | 1013 | ||
1133 | /* | 1014 | /* |
1134 | * Take a dquot out of the mount's dqlist as well as the hashlist. This is | ||
1135 | * called via unmount as well as quotaoff, and the purge will always succeed. | ||
1136 | */ | ||
1137 | void | ||
1138 | xfs_qm_dqpurge( | ||
1139 | struct xfs_dquot *dqp) | ||
1140 | { | ||
1141 | struct xfs_mount *mp = dqp->q_mount; | ||
1142 | struct xfs_dqhash *qh = dqp->q_hash; | ||
1143 | |||
1144 | xfs_dqlock(dqp); | ||
1145 | |||
1146 | /* | ||
1147 | * If we're turning off quotas, we have to make sure that, for | ||
1148 | * example, we don't delete quota disk blocks while dquots are | ||
1149 | * in the process of getting written to those disk blocks. | ||
1150 | * This dquot might well be on AIL, and we can't leave it there | ||
1151 | * if we're turning off quotas. Basically, we need this flush | ||
1152 | * lock, and are willing to block on it. | ||
1153 | */ | ||
1154 | if (!xfs_dqflock_nowait(dqp)) { | ||
1155 | /* | ||
1156 | * Block on the flush lock after nudging dquot buffer, | ||
1157 | * if it is incore. | ||
1158 | */ | ||
1159 | xfs_dqflock_pushbuf_wait(dqp); | ||
1160 | } | ||
1161 | |||
1162 | /* | ||
1163 | * If we are turning this type of quotas off, we don't care | ||
1164 | * about the dirty metadata sitting in this dquot. OTOH, if | ||
1165 | * we're unmounting, we do care, so we flush it and wait. | ||
1166 | */ | ||
1167 | if (XFS_DQ_IS_DIRTY(dqp)) { | ||
1168 | int error; | ||
1169 | |||
1170 | /* | ||
1171 | * We don't care about getting disk errors here. We need | ||
1172 | * to purge this dquot anyway, so we go ahead regardless. | ||
1173 | */ | ||
1174 | error = xfs_qm_dqflush(dqp, SYNC_WAIT); | ||
1175 | if (error) | ||
1176 | xfs_warn(mp, "%s: dquot %p flush failed", | ||
1177 | __func__, dqp); | ||
1178 | xfs_dqflock(dqp); | ||
1179 | } | ||
1180 | |||
1181 | ASSERT(atomic_read(&dqp->q_pincount) == 0); | ||
1182 | ASSERT(XFS_FORCED_SHUTDOWN(mp) || | ||
1183 | !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); | ||
1184 | |||
1185 | xfs_dqfunlock(dqp); | ||
1186 | xfs_dqunlock(dqp); | ||
1187 | |||
1188 | mutex_lock(&qh->qh_lock); | ||
1189 | list_del_init(&dqp->q_hashlist); | ||
1190 | qh->qh_version++; | ||
1191 | mutex_unlock(&qh->qh_lock); | ||
1192 | |||
1193 | mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); | ||
1194 | list_del_init(&dqp->q_mplist); | ||
1195 | mp->m_quotainfo->qi_dqreclaims++; | ||
1196 | mp->m_quotainfo->qi_dquots--; | ||
1197 | mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); | ||
1198 | |||
1199 | /* | ||
1200 | * We move dquots to the freelist as soon as their reference count | ||
1201 | * hits zero, so it really should be on the freelist here. | ||
1202 | */ | ||
1203 | mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); | ||
1204 | ASSERT(!list_empty(&dqp->q_freelist)); | ||
1205 | list_del_init(&dqp->q_freelist); | ||
1206 | xfs_Gqm->qm_dqfrlist_cnt--; | ||
1207 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | ||
1208 | |||
1209 | xfs_qm_dqdestroy(dqp); | ||
1210 | } | ||
1211 | |||
1212 | /* | ||
1213 | * Give the buffer a little push if it is incore and | 1015 | * Give the buffer a little push if it is incore and |
1214 | * wait on the flush lock. | 1016 | * wait on the flush lock. |
1215 | */ | 1017 | */ |
@@ -1241,3 +1043,31 @@ xfs_dqflock_pushbuf_wait( | |||
1241 | out_lock: | 1043 | out_lock: |
1242 | xfs_dqflock(dqp); | 1044 | xfs_dqflock(dqp); |
1243 | } | 1045 | } |
1046 | |||
1047 | int __init | ||
1048 | xfs_qm_init(void) | ||
1049 | { | ||
1050 | xfs_qm_dqzone = | ||
1051 | kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot"); | ||
1052 | if (!xfs_qm_dqzone) | ||
1053 | goto out; | ||
1054 | |||
1055 | xfs_qm_dqtrxzone = | ||
1056 | kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx"); | ||
1057 | if (!xfs_qm_dqtrxzone) | ||
1058 | goto out_free_dqzone; | ||
1059 | |||
1060 | return 0; | ||
1061 | |||
1062 | out_free_dqzone: | ||
1063 | kmem_zone_destroy(xfs_qm_dqzone); | ||
1064 | out: | ||
1065 | return -ENOMEM; | ||
1066 | } | ||
1067 | |||
1068 | void | ||
1069 | xfs_qm_exit(void) | ||
1070 | { | ||
1071 | kmem_zone_destroy(xfs_qm_dqtrxzone); | ||
1072 | kmem_zone_destroy(xfs_qm_dqzone); | ||
1073 | } | ||
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index a1d91d8f1802..ef9190bd8b30 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h | |||
@@ -29,16 +29,6 @@ | |||
29 | * when quotas are off. | 29 | * when quotas are off. |
30 | */ | 30 | */ |
31 | 31 | ||
32 | /* | ||
33 | * The hash chain headers (hash buckets) | ||
34 | */ | ||
35 | typedef struct xfs_dqhash { | ||
36 | struct list_head qh_list; | ||
37 | struct mutex qh_lock; | ||
38 | uint qh_version; /* ever increasing version */ | ||
39 | uint qh_nelems; /* number of dquots on the list */ | ||
40 | } xfs_dqhash_t; | ||
41 | |||
42 | struct xfs_mount; | 32 | struct xfs_mount; |
43 | struct xfs_trans; | 33 | struct xfs_trans; |
44 | 34 | ||
@@ -47,10 +37,7 @@ struct xfs_trans; | |||
47 | */ | 37 | */ |
48 | typedef struct xfs_dquot { | 38 | typedef struct xfs_dquot { |
49 | uint dq_flags; /* various flags (XFS_DQ_*) */ | 39 | uint dq_flags; /* various flags (XFS_DQ_*) */ |
50 | struct list_head q_freelist; /* global free list of dquots */ | 40 | struct list_head q_lru; /* global free list of dquots */ |
51 | struct list_head q_mplist; /* mount's list of dquots */ | ||
52 | struct list_head q_hashlist; /* gloabl hash list of dquots */ | ||
53 | xfs_dqhash_t *q_hash; /* the hashchain header */ | ||
54 | struct xfs_mount*q_mount; /* filesystem this relates to */ | 41 | struct xfs_mount*q_mount; /* filesystem this relates to */ |
55 | struct xfs_trans*q_transp; /* trans this belongs to currently */ | 42 | struct xfs_trans*q_transp; /* trans this belongs to currently */ |
56 | uint q_nrefs; /* # active refs from inodes */ | 43 | uint q_nrefs; /* # active refs from inodes */ |
@@ -110,11 +97,37 @@ static inline void xfs_dqlock(struct xfs_dquot *dqp) | |||
110 | mutex_lock(&dqp->q_qlock); | 97 | mutex_lock(&dqp->q_qlock); |
111 | } | 98 | } |
112 | 99 | ||
113 | static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp) | 100 | static inline void xfs_dqunlock(struct xfs_dquot *dqp) |
114 | { | 101 | { |
115 | mutex_unlock(&dqp->q_qlock); | 102 | mutex_unlock(&dqp->q_qlock); |
116 | } | 103 | } |
117 | 104 | ||
105 | static inline int xfs_this_quota_on(struct xfs_mount *mp, int type) | ||
106 | { | ||
107 | switch (type & XFS_DQ_ALLTYPES) { | ||
108 | case XFS_DQ_USER: | ||
109 | return XFS_IS_UQUOTA_ON(mp); | ||
110 | case XFS_DQ_GROUP: | ||
111 | case XFS_DQ_PROJ: | ||
112 | return XFS_IS_OQUOTA_ON(mp); | ||
113 | default: | ||
114 | return 0; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) | ||
119 | { | ||
120 | switch (type & XFS_DQ_ALLTYPES) { | ||
121 | case XFS_DQ_USER: | ||
122 | return ip->i_udquot; | ||
123 | case XFS_DQ_GROUP: | ||
124 | case XFS_DQ_PROJ: | ||
125 | return ip->i_gdquot; | ||
126 | default: | ||
127 | return NULL; | ||
128 | } | ||
129 | } | ||
130 | |||
118 | #define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) | 131 | #define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) |
119 | #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) | 132 | #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) |
120 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) | 133 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) |
@@ -125,15 +138,10 @@ static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp) | |||
125 | XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ | 138 | XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ |
126 | XFS_DQ_TO_QINF(dqp)->qi_gquotaip) | 139 | XFS_DQ_TO_QINF(dqp)->qi_gquotaip) |
127 | 140 | ||
128 | #define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \ | ||
129 | (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ | ||
130 | (XFS_IS_OQUOTA_ON((d)->q_mount)))) | ||
131 | |||
132 | extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, | 141 | extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, |
133 | uint, struct xfs_dquot **); | 142 | uint, struct xfs_dquot **); |
134 | extern void xfs_qm_dqdestroy(xfs_dquot_t *); | 143 | extern void xfs_qm_dqdestroy(xfs_dquot_t *); |
135 | extern int xfs_qm_dqflush(xfs_dquot_t *, uint); | 144 | extern int xfs_qm_dqflush(xfs_dquot_t *, uint); |
136 | extern void xfs_qm_dqpurge(xfs_dquot_t *); | ||
137 | extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); | 145 | extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); |
138 | extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, | 146 | extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, |
139 | xfs_disk_dquot_t *); | 147 | xfs_disk_dquot_t *); |
@@ -144,7 +152,6 @@ extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, | |||
144 | extern void xfs_qm_dqput(xfs_dquot_t *); | 152 | extern void xfs_qm_dqput(xfs_dquot_t *); |
145 | 153 | ||
146 | extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); | 154 | extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); |
147 | extern void xfs_dqunlock(struct xfs_dquot *); | ||
148 | extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp); | 155 | extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp); |
149 | 156 | ||
150 | static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) | 157 | static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7e5bc872f2b4..54a67dd9ac0a 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -163,7 +163,6 @@ xfs_file_fsync( | |||
163 | struct inode *inode = file->f_mapping->host; | 163 | struct inode *inode = file->f_mapping->host; |
164 | struct xfs_inode *ip = XFS_I(inode); | 164 | struct xfs_inode *ip = XFS_I(inode); |
165 | struct xfs_mount *mp = ip->i_mount; | 165 | struct xfs_mount *mp = ip->i_mount; |
166 | struct xfs_trans *tp; | ||
167 | int error = 0; | 166 | int error = 0; |
168 | int log_flushed = 0; | 167 | int log_flushed = 0; |
169 | xfs_lsn_t lsn = 0; | 168 | xfs_lsn_t lsn = 0; |
@@ -194,75 +193,18 @@ xfs_file_fsync( | |||
194 | } | 193 | } |
195 | 194 | ||
196 | /* | 195 | /* |
197 | * We always need to make sure that the required inode state is safe on | 196 | * All metadata updates are logged, which means that we just have |
198 | * disk. The inode might be clean but we still might need to force the | 197 | * to flush the log up to the latest LSN that touched the inode. |
199 | * log because of committed transactions that haven't hit the disk yet. | ||
200 | * Likewise, there could be unflushed non-transactional changes to the | ||
201 | * inode core that have to go to disk and this requires us to issue | ||
202 | * a synchronous transaction to capture these changes correctly. | ||
203 | * | ||
204 | * This code relies on the assumption that if the i_update_core field | ||
205 | * of the inode is clear and the inode is unpinned then it is clean | ||
206 | * and no action is required. | ||
207 | */ | 198 | */ |
208 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 199 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
209 | 200 | if (xfs_ipincount(ip)) { | |
210 | /* | 201 | if (!datasync || |
211 | * First check if the VFS inode is marked dirty. All the dirtying | 202 | (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP)) |
212 | * of non-transactional updates do not go through mark_inode_dirty*, | ||
213 | * which allows us to distinguish between pure timestamp updates | ||
214 | * and i_size updates which need to be caught for fdatasync. | ||
215 | * After that also check for the dirty state in the XFS inode, which | ||
216 | * might gets cleared when the inode gets written out via the AIL | ||
217 | * or xfs_iflush_cluster. | ||
218 | */ | ||
219 | if (((inode->i_state & I_DIRTY_DATASYNC) || | ||
220 | ((inode->i_state & I_DIRTY_SYNC) && !datasync)) && | ||
221 | ip->i_update_core) { | ||
222 | /* | ||
223 | * Kick off a transaction to log the inode core to get the | ||
224 | * updates. The sync transaction will also force the log. | ||
225 | */ | ||
226 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
227 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); | ||
228 | error = xfs_trans_reserve(tp, 0, | ||
229 | XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); | ||
230 | if (error) { | ||
231 | xfs_trans_cancel(tp, 0); | ||
232 | return -error; | ||
233 | } | ||
234 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
235 | |||
236 | /* | ||
237 | * Note - it's possible that we might have pushed ourselves out | ||
238 | * of the way during trans_reserve which would flush the inode. | ||
239 | * But there's no guarantee that the inode buffer has actually | ||
240 | * gone out yet (it's delwri). Plus the buffer could be pinned | ||
241 | * anyway if it's part of an inode in another recent | ||
242 | * transaction. So we play it safe and fire off the | ||
243 | * transaction anyway. | ||
244 | */ | ||
245 | xfs_trans_ijoin(tp, ip, 0); | ||
246 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
247 | error = xfs_trans_commit(tp, 0); | ||
248 | |||
249 | lsn = ip->i_itemp->ili_last_lsn; | ||
250 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
251 | } else { | ||
252 | /* | ||
253 | * Timestamps/size haven't changed since last inode flush or | ||
254 | * inode transaction commit. That means either nothing got | ||
255 | * written or a transaction committed which caught the updates. | ||
256 | * If the latter happened and the transaction hasn't hit the | ||
257 | * disk yet, the inode will be still be pinned. If it is, | ||
258 | * force the log. | ||
259 | */ | ||
260 | if (xfs_ipincount(ip)) | ||
261 | lsn = ip->i_itemp->ili_last_lsn; | 203 | lsn = ip->i_itemp->ili_last_lsn; |
262 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
263 | } | 204 | } |
205 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
264 | 206 | ||
265 | if (!error && lsn) | 207 | if (lsn) |
266 | error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); | 208 | error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); |
267 | 209 | ||
268 | /* | 210 | /* |
@@ -659,9 +601,6 @@ restart: | |||
659 | return error; | 601 | return error; |
660 | } | 602 | } |
661 | 603 | ||
662 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) | ||
663 | file_update_time(file); | ||
664 | |||
665 | /* | 604 | /* |
666 | * If the offset is beyond the size of the file, we need to zero any | 605 | * If the offset is beyond the size of the file, we need to zero any |
667 | * blocks that fall between the existing EOF and the start of this | 606 | * blocks that fall between the existing EOF and the start of this |
@@ -685,6 +624,15 @@ restart: | |||
685 | return error; | 624 | return error; |
686 | 625 | ||
687 | /* | 626 | /* |
627 | * Updating the timestamps will grab the ilock again from | ||
628 | * xfs_fs_dirty_inode, so we have to call it after dropping the | ||
629 | * lock above. Eventually we should look into a way to avoid | ||
630 | * the pointless lock roundtrip. | ||
631 | */ | ||
632 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) | ||
633 | file_update_time(file); | ||
634 | |||
635 | /* | ||
688 | * If we're writing the file then make sure to clear the setuid and | 636 | * If we're writing the file then make sure to clear the setuid and |
689 | * setgid bits if the process is not being run by root. This keeps | 637 | * setgid bits if the process is not being run by root. This keeps |
690 | * people from modifying setuid and setgid binaries. | 638 | * people from modifying setuid and setgid binaries. |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 8c3e46394d48..bcc6c249b2c7 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -91,7 +91,6 @@ xfs_inode_alloc( | |||
91 | ip->i_afp = NULL; | 91 | ip->i_afp = NULL; |
92 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); | 92 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); |
93 | ip->i_flags = 0; | 93 | ip->i_flags = 0; |
94 | ip->i_update_core = 0; | ||
95 | ip->i_delayed_blks = 0; | 94 | ip->i_delayed_blks = 0; |
96 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); | 95 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); |
97 | 96 | ||
@@ -290,7 +289,7 @@ xfs_iget_cache_hit( | |||
290 | if (lock_flags != 0) | 289 | if (lock_flags != 0) |
291 | xfs_ilock(ip, lock_flags); | 290 | xfs_ilock(ip, lock_flags); |
292 | 291 | ||
293 | xfs_iflags_clear(ip, XFS_ISTALE); | 292 | xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); |
294 | XFS_STATS_INC(xs_ig_found); | 293 | XFS_STATS_INC(xs_ig_found); |
295 | 294 | ||
296 | return 0; | 295 | return 0; |
@@ -315,6 +314,7 @@ xfs_iget_cache_miss( | |||
315 | struct xfs_inode *ip; | 314 | struct xfs_inode *ip; |
316 | int error; | 315 | int error; |
317 | xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); | 316 | xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); |
317 | int iflags; | ||
318 | 318 | ||
319 | ip = xfs_inode_alloc(mp, ino); | 319 | ip = xfs_inode_alloc(mp, ino); |
320 | if (!ip) | 320 | if (!ip) |
@@ -350,9 +350,23 @@ xfs_iget_cache_miss( | |||
350 | BUG(); | 350 | BUG(); |
351 | } | 351 | } |
352 | 352 | ||
353 | spin_lock(&pag->pag_ici_lock); | 353 | /* |
354 | * These values must be set before inserting the inode into the radix | ||
355 | * tree as the moment it is inserted a concurrent lookup (allowed by the | ||
356 | * RCU locking mechanism) can find it and that lookup must see that this | ||
357 | * is an inode currently under construction (i.e. that XFS_INEW is set). | ||
358 | * The ip->i_flags_lock that protects the XFS_INEW flag forms the | ||
359 | * memory barrier that ensures this detection works correctly at lookup | ||
360 | * time. | ||
361 | */ | ||
362 | iflags = XFS_INEW; | ||
363 | if (flags & XFS_IGET_DONTCACHE) | ||
364 | iflags |= XFS_IDONTCACHE; | ||
365 | ip->i_udquot = ip->i_gdquot = NULL; | ||
366 | xfs_iflags_set(ip, iflags); | ||
354 | 367 | ||
355 | /* insert the new inode */ | 368 | /* insert the new inode */ |
369 | spin_lock(&pag->pag_ici_lock); | ||
356 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); | 370 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); |
357 | if (unlikely(error)) { | 371 | if (unlikely(error)) { |
358 | WARN_ON(error != -EEXIST); | 372 | WARN_ON(error != -EEXIST); |
@@ -360,11 +374,6 @@ xfs_iget_cache_miss( | |||
360 | error = EAGAIN; | 374 | error = EAGAIN; |
361 | goto out_preload_end; | 375 | goto out_preload_end; |
362 | } | 376 | } |
363 | |||
364 | /* These values _must_ be set before releasing the radix tree lock! */ | ||
365 | ip->i_udquot = ip->i_gdquot = NULL; | ||
366 | xfs_iflags_set(ip, XFS_INEW); | ||
367 | |||
368 | spin_unlock(&pag->pag_ici_lock); | 377 | spin_unlock(&pag->pag_ici_lock); |
369 | radix_tree_preload_end(); | 378 | radix_tree_preload_end(); |
370 | 379 | ||
@@ -418,6 +427,15 @@ xfs_iget( | |||
418 | xfs_perag_t *pag; | 427 | xfs_perag_t *pag; |
419 | xfs_agino_t agino; | 428 | xfs_agino_t agino; |
420 | 429 | ||
430 | /* | ||
431 | * xfs_reclaim_inode() uses the ILOCK to ensure an inode | ||
432 | * doesn't get freed while it's being referenced during a | ||
433 | * radix tree traversal here. It assumes this function | ||
434 | * aqcuires only the ILOCK (and therefore it has no need to | ||
435 | * involve the IOLOCK in this synchronization). | ||
436 | */ | ||
437 | ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0); | ||
438 | |||
421 | /* reject inode numbers outside existing AGs */ | 439 | /* reject inode numbers outside existing AGs */ |
422 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) | 440 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) |
423 | return EINVAL; | 441 | return EINVAL; |
@@ -642,8 +660,7 @@ xfs_iunlock( | |||
642 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); | 660 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); |
643 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != | 661 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != |
644 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | 662 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); |
645 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY | | 663 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); |
646 | XFS_LOCK_DEP_MASK)) == 0); | ||
647 | ASSERT(lock_flags != 0); | 664 | ASSERT(lock_flags != 0); |
648 | 665 | ||
649 | if (lock_flags & XFS_IOLOCK_EXCL) | 666 | if (lock_flags & XFS_IOLOCK_EXCL) |
@@ -656,16 +673,6 @@ xfs_iunlock( | |||
656 | else if (lock_flags & XFS_ILOCK_SHARED) | 673 | else if (lock_flags & XFS_ILOCK_SHARED) |
657 | mrunlock_shared(&ip->i_lock); | 674 | mrunlock_shared(&ip->i_lock); |
658 | 675 | ||
659 | if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) && | ||
660 | !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) { | ||
661 | /* | ||
662 | * Let the AIL know that this item has been unlocked in case | ||
663 | * it is in the AIL and anyone is waiting on it. Don't do | ||
664 | * this if the caller has asked us not to. | ||
665 | */ | ||
666 | xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp, | ||
667 | (xfs_log_item_t*)(ip->i_itemp)); | ||
668 | } | ||
669 | trace_xfs_iunlock(ip, lock_flags, _RET_IP_); | 676 | trace_xfs_iunlock(ip, lock_flags, _RET_IP_); |
670 | } | 677 | } |
671 | 678 | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b21022499c2e..bc46c0a133d3 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1656,14 +1656,13 @@ retry: | |||
1656 | iip = ip->i_itemp; | 1656 | iip = ip->i_itemp; |
1657 | if (!iip || xfs_inode_clean(ip)) { | 1657 | if (!iip || xfs_inode_clean(ip)) { |
1658 | ASSERT(ip != free_ip); | 1658 | ASSERT(ip != free_ip); |
1659 | ip->i_update_core = 0; | ||
1660 | xfs_ifunlock(ip); | 1659 | xfs_ifunlock(ip); |
1661 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1660 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
1662 | continue; | 1661 | continue; |
1663 | } | 1662 | } |
1664 | 1663 | ||
1665 | iip->ili_last_fields = iip->ili_format.ilf_fields; | 1664 | iip->ili_last_fields = iip->ili_fields; |
1666 | iip->ili_format.ilf_fields = 0; | 1665 | iip->ili_fields = 0; |
1667 | iip->ili_logged = 1; | 1666 | iip->ili_logged = 1; |
1668 | xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, | 1667 | xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, |
1669 | &iip->ili_item.li_lsn); | 1668 | &iip->ili_item.li_lsn); |
@@ -2177,7 +2176,7 @@ xfs_iflush_fork( | |||
2177 | mp = ip->i_mount; | 2176 | mp = ip->i_mount; |
2178 | switch (XFS_IFORK_FORMAT(ip, whichfork)) { | 2177 | switch (XFS_IFORK_FORMAT(ip, whichfork)) { |
2179 | case XFS_DINODE_FMT_LOCAL: | 2178 | case XFS_DINODE_FMT_LOCAL: |
2180 | if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && | 2179 | if ((iip->ili_fields & dataflag[whichfork]) && |
2181 | (ifp->if_bytes > 0)) { | 2180 | (ifp->if_bytes > 0)) { |
2182 | ASSERT(ifp->if_u1.if_data != NULL); | 2181 | ASSERT(ifp->if_u1.if_data != NULL); |
2183 | ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); | 2182 | ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); |
@@ -2187,8 +2186,8 @@ xfs_iflush_fork( | |||
2187 | 2186 | ||
2188 | case XFS_DINODE_FMT_EXTENTS: | 2187 | case XFS_DINODE_FMT_EXTENTS: |
2189 | ASSERT((ifp->if_flags & XFS_IFEXTENTS) || | 2188 | ASSERT((ifp->if_flags & XFS_IFEXTENTS) || |
2190 | !(iip->ili_format.ilf_fields & extflag[whichfork])); | 2189 | !(iip->ili_fields & extflag[whichfork])); |
2191 | if ((iip->ili_format.ilf_fields & extflag[whichfork]) && | 2190 | if ((iip->ili_fields & extflag[whichfork]) && |
2192 | (ifp->if_bytes > 0)) { | 2191 | (ifp->if_bytes > 0)) { |
2193 | ASSERT(xfs_iext_get_ext(ifp, 0)); | 2192 | ASSERT(xfs_iext_get_ext(ifp, 0)); |
2194 | ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); | 2193 | ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); |
@@ -2198,7 +2197,7 @@ xfs_iflush_fork( | |||
2198 | break; | 2197 | break; |
2199 | 2198 | ||
2200 | case XFS_DINODE_FMT_BTREE: | 2199 | case XFS_DINODE_FMT_BTREE: |
2201 | if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && | 2200 | if ((iip->ili_fields & brootflag[whichfork]) && |
2202 | (ifp->if_broot_bytes > 0)) { | 2201 | (ifp->if_broot_bytes > 0)) { |
2203 | ASSERT(ifp->if_broot != NULL); | 2202 | ASSERT(ifp->if_broot != NULL); |
2204 | ASSERT(ifp->if_broot_bytes <= | 2203 | ASSERT(ifp->if_broot_bytes <= |
@@ -2211,14 +2210,14 @@ xfs_iflush_fork( | |||
2211 | break; | 2210 | break; |
2212 | 2211 | ||
2213 | case XFS_DINODE_FMT_DEV: | 2212 | case XFS_DINODE_FMT_DEV: |
2214 | if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { | 2213 | if (iip->ili_fields & XFS_ILOG_DEV) { |
2215 | ASSERT(whichfork == XFS_DATA_FORK); | 2214 | ASSERT(whichfork == XFS_DATA_FORK); |
2216 | xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); | 2215 | xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); |
2217 | } | 2216 | } |
2218 | break; | 2217 | break; |
2219 | 2218 | ||
2220 | case XFS_DINODE_FMT_UUID: | 2219 | case XFS_DINODE_FMT_UUID: |
2221 | if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { | 2220 | if (iip->ili_fields & XFS_ILOG_UUID) { |
2222 | ASSERT(whichfork == XFS_DATA_FORK); | 2221 | ASSERT(whichfork == XFS_DATA_FORK); |
2223 | memcpy(XFS_DFORK_DPTR(dip), | 2222 | memcpy(XFS_DFORK_DPTR(dip), |
2224 | &ip->i_df.if_u2.if_uuid, | 2223 | &ip->i_df.if_u2.if_uuid, |
@@ -2451,9 +2450,8 @@ xfs_iflush( | |||
2451 | * to disk, because the log record didn't make it to disk! | 2450 | * to disk, because the log record didn't make it to disk! |
2452 | */ | 2451 | */ |
2453 | if (XFS_FORCED_SHUTDOWN(mp)) { | 2452 | if (XFS_FORCED_SHUTDOWN(mp)) { |
2454 | ip->i_update_core = 0; | ||
2455 | if (iip) | 2453 | if (iip) |
2456 | iip->ili_format.ilf_fields = 0; | 2454 | iip->ili_fields = 0; |
2457 | xfs_ifunlock(ip); | 2455 | xfs_ifunlock(ip); |
2458 | return XFS_ERROR(EIO); | 2456 | return XFS_ERROR(EIO); |
2459 | } | 2457 | } |
@@ -2533,26 +2531,6 @@ xfs_iflush_int( | |||
2533 | /* set *dip = inode's place in the buffer */ | 2531 | /* set *dip = inode's place in the buffer */ |
2534 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); | 2532 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); |
2535 | 2533 | ||
2536 | /* | ||
2537 | * Clear i_update_core before copying out the data. | ||
2538 | * This is for coordination with our timestamp updates | ||
2539 | * that don't hold the inode lock. They will always | ||
2540 | * update the timestamps BEFORE setting i_update_core, | ||
2541 | * so if we clear i_update_core after they set it we | ||
2542 | * are guaranteed to see their updates to the timestamps. | ||
2543 | * I believe that this depends on strongly ordered memory | ||
2544 | * semantics, but we have that. We use the SYNCHRONIZE | ||
2545 | * macro to make sure that the compiler does not reorder | ||
2546 | * the i_update_core access below the data copy below. | ||
2547 | */ | ||
2548 | ip->i_update_core = 0; | ||
2549 | SYNCHRONIZE(); | ||
2550 | |||
2551 | /* | ||
2552 | * Make sure to get the latest timestamps from the Linux inode. | ||
2553 | */ | ||
2554 | xfs_synchronize_times(ip); | ||
2555 | |||
2556 | if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), | 2534 | if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), |
2557 | mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { | 2535 | mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { |
2558 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, | 2536 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
@@ -2663,36 +2641,33 @@ xfs_iflush_int( | |||
2663 | xfs_inobp_check(mp, bp); | 2641 | xfs_inobp_check(mp, bp); |
2664 | 2642 | ||
2665 | /* | 2643 | /* |
2666 | * We've recorded everything logged in the inode, so we'd | 2644 | * We've recorded everything logged in the inode, so we'd like to clear |
2667 | * like to clear the ilf_fields bits so we don't log and | 2645 | * the ili_fields bits so we don't log and flush things unnecessarily. |
2668 | * flush things unnecessarily. However, we can't stop | 2646 | * However, we can't stop logging all this information until the data |
2669 | * logging all this information until the data we've copied | 2647 | * we've copied into the disk buffer is written to disk. If we did we |
2670 | * into the disk buffer is written to disk. If we did we might | 2648 | * might overwrite the copy of the inode in the log with all the data |
2671 | * overwrite the copy of the inode in the log with all the | 2649 | * after re-logging only part of it, and in the face of a crash we |
2672 | * data after re-logging only part of it, and in the face of | 2650 | * wouldn't have all the data we need to recover. |
2673 | * a crash we wouldn't have all the data we need to recover. | ||
2674 | * | 2651 | * |
2675 | * What we do is move the bits to the ili_last_fields field. | 2652 | * What we do is move the bits to the ili_last_fields field. When |
2676 | * When logging the inode, these bits are moved back to the | 2653 | * logging the inode, these bits are moved back to the ili_fields field. |
2677 | * ilf_fields field. In the xfs_iflush_done() routine we | 2654 | * In the xfs_iflush_done() routine we clear ili_last_fields, since we |
2678 | * clear ili_last_fields, since we know that the information | 2655 | * know that the information those bits represent is permanently on |
2679 | * those bits represent is permanently on disk. As long as | 2656 | * disk. As long as the flush completes before the inode is logged |
2680 | * the flush completes before the inode is logged again, then | 2657 | * again, then both ili_fields and ili_last_fields will be cleared. |
2681 | * both ilf_fields and ili_last_fields will be cleared. | ||
2682 | * | 2658 | * |
2683 | * We can play with the ilf_fields bits here, because the inode | 2659 | * We can play with the ili_fields bits here, because the inode lock |
2684 | * lock must be held exclusively in order to set bits there | 2660 | * must be held exclusively in order to set bits there and the flush |
2685 | * and the flush lock protects the ili_last_fields bits. | 2661 | * lock protects the ili_last_fields bits. Set ili_logged so the flush |
2686 | * Set ili_logged so the flush done | 2662 | * done routine can tell whether or not to look in the AIL. Also, store |
2687 | * routine can tell whether or not to look in the AIL. | 2663 | * the current LSN of the inode so that we can tell whether the item has |
2688 | * Also, store the current LSN of the inode so that we can tell | 2664 | * moved in the AIL from xfs_iflush_done(). In order to read the lsn we |
2689 | * whether the item has moved in the AIL from xfs_iflush_done(). | 2665 | * need the AIL lock, because it is a 64 bit value that cannot be read |
2690 | * In order to read the lsn we need the AIL lock, because | 2666 | * atomically. |
2691 | * it is a 64 bit value that cannot be read atomically. | ||
2692 | */ | 2667 | */ |
2693 | if (iip != NULL && iip->ili_format.ilf_fields != 0) { | 2668 | if (iip != NULL && iip->ili_fields != 0) { |
2694 | iip->ili_last_fields = iip->ili_format.ilf_fields; | 2669 | iip->ili_last_fields = iip->ili_fields; |
2695 | iip->ili_format.ilf_fields = 0; | 2670 | iip->ili_fields = 0; |
2696 | iip->ili_logged = 1; | 2671 | iip->ili_logged = 1; |
2697 | 2672 | ||
2698 | xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, | 2673 | xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, |
@@ -2711,8 +2686,7 @@ xfs_iflush_int( | |||
2711 | } else { | 2686 | } else { |
2712 | /* | 2687 | /* |
2713 | * We're flushing an inode which is not in the AIL and has | 2688 | * We're flushing an inode which is not in the AIL and has |
2714 | * not been logged but has i_update_core set. For this | 2689 | * not been logged. For this case we can immediately drop |
2715 | * case we can use a B_DELWRI flush and immediately drop | ||
2716 | * the inode flush lock because we can avoid the whole | 2690 | * the inode flush lock because we can avoid the whole |
2717 | * AIL state thing. It's OK to drop the flush lock now, | 2691 | * AIL state thing. It's OK to drop the flush lock now, |
2718 | * because we've already locked the buffer and to do anything | 2692 | * because we've already locked the buffer and to do anything |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 2f27b7454085..7fee3387e1c8 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -241,7 +241,6 @@ typedef struct xfs_inode { | |||
241 | spinlock_t i_flags_lock; /* inode i_flags lock */ | 241 | spinlock_t i_flags_lock; /* inode i_flags lock */ |
242 | /* Miscellaneous state. */ | 242 | /* Miscellaneous state. */ |
243 | unsigned long i_flags; /* see defined flags below */ | 243 | unsigned long i_flags; /* see defined flags below */ |
244 | unsigned char i_update_core; /* timestamps/size is dirty */ | ||
245 | unsigned int i_delayed_blks; /* count of delay alloc blks */ | 244 | unsigned int i_delayed_blks; /* count of delay alloc blks */ |
246 | 245 | ||
247 | xfs_icdinode_t i_d; /* most of ondisk inode */ | 246 | xfs_icdinode_t i_d; /* most of ondisk inode */ |
@@ -275,6 +274,20 @@ static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip) | |||
275 | } | 274 | } |
276 | 275 | ||
277 | /* | 276 | /* |
277 | * If this I/O goes past the on-disk inode size update it unless it would | ||
278 | * be past the current in-core inode size. | ||
279 | */ | ||
280 | static inline xfs_fsize_t | ||
281 | xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size) | ||
282 | { | ||
283 | xfs_fsize_t i_size = i_size_read(VFS_I(ip)); | ||
284 | |||
285 | if (new_size > i_size) | ||
286 | new_size = i_size; | ||
287 | return new_size > ip->i_d.di_size ? new_size : 0; | ||
288 | } | ||
289 | |||
290 | /* | ||
278 | * i_flags helper functions | 291 | * i_flags helper functions |
279 | */ | 292 | */ |
280 | static inline void | 293 | static inline void |
@@ -374,10 +387,11 @@ xfs_set_projid(struct xfs_inode *ip, | |||
374 | #define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT) | 387 | #define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT) |
375 | #define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */ | 388 | #define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */ |
376 | #define XFS_IPINNED (1 << __XFS_IPINNED_BIT) | 389 | #define XFS_IPINNED (1 << __XFS_IPINNED_BIT) |
390 | #define XFS_IDONTCACHE (1 << 9) /* don't cache the inode long term */ | ||
377 | 391 | ||
378 | /* | 392 | /* |
379 | * Per-lifetime flags need to be reset when re-using a reclaimable inode during | 393 | * Per-lifetime flags need to be reset when re-using a reclaimable inode during |
380 | * inode lookup. Thi prevents unintended behaviour on the new inode from | 394 | * inode lookup. This prevents unintended behaviour on the new inode from |
381 | * ocurring. | 395 | * ocurring. |
382 | */ | 396 | */ |
383 | #define XFS_IRECLAIM_RESET_FLAGS \ | 397 | #define XFS_IRECLAIM_RESET_FLAGS \ |
@@ -422,7 +436,6 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) | |||
422 | #define XFS_IOLOCK_SHARED (1<<1) | 436 | #define XFS_IOLOCK_SHARED (1<<1) |
423 | #define XFS_ILOCK_EXCL (1<<2) | 437 | #define XFS_ILOCK_EXCL (1<<2) |
424 | #define XFS_ILOCK_SHARED (1<<3) | 438 | #define XFS_ILOCK_SHARED (1<<3) |
425 | #define XFS_IUNLOCK_NONOTIFY (1<<4) | ||
426 | 439 | ||
427 | #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ | 440 | #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ |
428 | | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) | 441 | | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) |
@@ -431,8 +444,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) | |||
431 | { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ | 444 | { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ |
432 | { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ | 445 | { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ |
433 | { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ | 446 | { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ |
434 | { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ | 447 | { XFS_ILOCK_SHARED, "ILOCK_SHARED" } |
435 | { XFS_IUNLOCK_NONOTIFY, "IUNLOCK_NONOTIFY" } | ||
436 | 448 | ||
437 | 449 | ||
438 | /* | 450 | /* |
@@ -522,10 +534,6 @@ void xfs_promote_inode(struct xfs_inode *); | |||
522 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 534 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |
523 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); | 535 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); |
524 | 536 | ||
525 | void xfs_synchronize_times(xfs_inode_t *); | ||
526 | void xfs_mark_inode_dirty(xfs_inode_t *); | ||
527 | void xfs_mark_inode_dirty_sync(xfs_inode_t *); | ||
528 | |||
529 | #define IHOLD(ip) \ | 537 | #define IHOLD(ip) \ |
530 | do { \ | 538 | do { \ |
531 | ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ | 539 | ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ |
@@ -546,6 +554,7 @@ do { \ | |||
546 | */ | 554 | */ |
547 | #define XFS_IGET_CREATE 0x1 | 555 | #define XFS_IGET_CREATE 0x1 |
548 | #define XFS_IGET_UNTRUSTED 0x2 | 556 | #define XFS_IGET_UNTRUSTED 0x2 |
557 | #define XFS_IGET_DONTCACHE 0x4 | ||
549 | 558 | ||
550 | int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, | 559 | int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, |
551 | xfs_ino_t, struct xfs_dinode **, | 560 | xfs_ino_t, struct xfs_dinode **, |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 91d71dcd4852..05d924efceaf 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -57,77 +57,28 @@ xfs_inode_item_size( | |||
57 | struct xfs_inode *ip = iip->ili_inode; | 57 | struct xfs_inode *ip = iip->ili_inode; |
58 | uint nvecs = 2; | 58 | uint nvecs = 2; |
59 | 59 | ||
60 | /* | ||
61 | * Only log the data/extents/b-tree root if there is something | ||
62 | * left to log. | ||
63 | */ | ||
64 | iip->ili_format.ilf_fields |= XFS_ILOG_CORE; | ||
65 | |||
66 | switch (ip->i_d.di_format) { | 60 | switch (ip->i_d.di_format) { |
67 | case XFS_DINODE_FMT_EXTENTS: | 61 | case XFS_DINODE_FMT_EXTENTS: |
68 | iip->ili_format.ilf_fields &= | 62 | if ((iip->ili_fields & XFS_ILOG_DEXT) && |
69 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | | 63 | ip->i_d.di_nextents > 0 && |
70 | XFS_ILOG_DEV | XFS_ILOG_UUID); | 64 | ip->i_df.if_bytes > 0) |
71 | if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) && | ||
72 | (ip->i_d.di_nextents > 0) && | ||
73 | (ip->i_df.if_bytes > 0)) { | ||
74 | ASSERT(ip->i_df.if_u1.if_extents != NULL); | ||
75 | nvecs++; | 65 | nvecs++; |
76 | } else { | ||
77 | iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT; | ||
78 | } | ||
79 | break; | 66 | break; |
80 | 67 | ||
81 | case XFS_DINODE_FMT_BTREE: | 68 | case XFS_DINODE_FMT_BTREE: |
82 | iip->ili_format.ilf_fields &= | 69 | if ((iip->ili_fields & XFS_ILOG_DBROOT) && |
83 | ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | | 70 | ip->i_df.if_broot_bytes > 0) |
84 | XFS_ILOG_DEV | XFS_ILOG_UUID); | ||
85 | if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) && | ||
86 | (ip->i_df.if_broot_bytes > 0)) { | ||
87 | ASSERT(ip->i_df.if_broot != NULL); | ||
88 | nvecs++; | 71 | nvecs++; |
89 | } else { | ||
90 | ASSERT(!(iip->ili_format.ilf_fields & | ||
91 | XFS_ILOG_DBROOT)); | ||
92 | #ifdef XFS_TRANS_DEBUG | ||
93 | if (iip->ili_root_size > 0) { | ||
94 | ASSERT(iip->ili_root_size == | ||
95 | ip->i_df.if_broot_bytes); | ||
96 | ASSERT(memcmp(iip->ili_orig_root, | ||
97 | ip->i_df.if_broot, | ||
98 | iip->ili_root_size) == 0); | ||
99 | } else { | ||
100 | ASSERT(ip->i_df.if_broot_bytes == 0); | ||
101 | } | ||
102 | #endif | ||
103 | iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT; | ||
104 | } | ||
105 | break; | 72 | break; |
106 | 73 | ||
107 | case XFS_DINODE_FMT_LOCAL: | 74 | case XFS_DINODE_FMT_LOCAL: |
108 | iip->ili_format.ilf_fields &= | 75 | if ((iip->ili_fields & XFS_ILOG_DDATA) && |
109 | ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | | 76 | ip->i_df.if_bytes > 0) |
110 | XFS_ILOG_DEV | XFS_ILOG_UUID); | ||
111 | if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) && | ||
112 | (ip->i_df.if_bytes > 0)) { | ||
113 | ASSERT(ip->i_df.if_u1.if_data != NULL); | ||
114 | ASSERT(ip->i_d.di_size > 0); | ||
115 | nvecs++; | 77 | nvecs++; |
116 | } else { | ||
117 | iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA; | ||
118 | } | ||
119 | break; | 78 | break; |
120 | 79 | ||
121 | case XFS_DINODE_FMT_DEV: | 80 | case XFS_DINODE_FMT_DEV: |
122 | iip->ili_format.ilf_fields &= | ||
123 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | | ||
124 | XFS_ILOG_DEXT | XFS_ILOG_UUID); | ||
125 | break; | ||
126 | |||
127 | case XFS_DINODE_FMT_UUID: | 81 | case XFS_DINODE_FMT_UUID: |
128 | iip->ili_format.ilf_fields &= | ||
129 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | | ||
130 | XFS_ILOG_DEXT | XFS_ILOG_DEV); | ||
131 | break; | 82 | break; |
132 | 83 | ||
133 | default: | 84 | default: |
@@ -135,56 +86,31 @@ xfs_inode_item_size( | |||
135 | break; | 86 | break; |
136 | } | 87 | } |
137 | 88 | ||
138 | /* | 89 | if (!XFS_IFORK_Q(ip)) |
139 | * If there are no attributes associated with this file, | ||
140 | * then there cannot be anything more to log. | ||
141 | * Clear all attribute-related log flags. | ||
142 | */ | ||
143 | if (!XFS_IFORK_Q(ip)) { | ||
144 | iip->ili_format.ilf_fields &= | ||
145 | ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); | ||
146 | return nvecs; | 90 | return nvecs; |
147 | } | 91 | |
148 | 92 | ||
149 | /* | 93 | /* |
150 | * Log any necessary attribute data. | 94 | * Log any necessary attribute data. |
151 | */ | 95 | */ |
152 | switch (ip->i_d.di_aformat) { | 96 | switch (ip->i_d.di_aformat) { |
153 | case XFS_DINODE_FMT_EXTENTS: | 97 | case XFS_DINODE_FMT_EXTENTS: |
154 | iip->ili_format.ilf_fields &= | 98 | if ((iip->ili_fields & XFS_ILOG_AEXT) && |
155 | ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); | 99 | ip->i_d.di_anextents > 0 && |
156 | if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) && | 100 | ip->i_afp->if_bytes > 0) |
157 | (ip->i_d.di_anextents > 0) && | ||
158 | (ip->i_afp->if_bytes > 0)) { | ||
159 | ASSERT(ip->i_afp->if_u1.if_extents != NULL); | ||
160 | nvecs++; | 101 | nvecs++; |
161 | } else { | ||
162 | iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT; | ||
163 | } | ||
164 | break; | 102 | break; |
165 | 103 | ||
166 | case XFS_DINODE_FMT_BTREE: | 104 | case XFS_DINODE_FMT_BTREE: |
167 | iip->ili_format.ilf_fields &= | 105 | if ((iip->ili_fields & XFS_ILOG_ABROOT) && |
168 | ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); | 106 | ip->i_afp->if_broot_bytes > 0) |
169 | if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) && | ||
170 | (ip->i_afp->if_broot_bytes > 0)) { | ||
171 | ASSERT(ip->i_afp->if_broot != NULL); | ||
172 | nvecs++; | 107 | nvecs++; |
173 | } else { | ||
174 | iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT; | ||
175 | } | ||
176 | break; | 108 | break; |
177 | 109 | ||
178 | case XFS_DINODE_FMT_LOCAL: | 110 | case XFS_DINODE_FMT_LOCAL: |
179 | iip->ili_format.ilf_fields &= | 111 | if ((iip->ili_fields & XFS_ILOG_ADATA) && |
180 | ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); | 112 | ip->i_afp->if_bytes > 0) |
181 | if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) && | ||
182 | (ip->i_afp->if_bytes > 0)) { | ||
183 | ASSERT(ip->i_afp->if_u1.if_data != NULL); | ||
184 | nvecs++; | 113 | nvecs++; |
185 | } else { | ||
186 | iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA; | ||
187 | } | ||
188 | break; | 114 | break; |
189 | 115 | ||
190 | default: | 116 | default: |
@@ -254,48 +180,11 @@ xfs_inode_item_format( | |||
254 | vecp++; | 180 | vecp++; |
255 | nvecs = 1; | 181 | nvecs = 1; |
256 | 182 | ||
257 | /* | ||
258 | * Clear i_update_core if the timestamps (or any other | ||
259 | * non-transactional modification) need flushing/logging | ||
260 | * and we're about to log them with the rest of the core. | ||
261 | * | ||
262 | * This is the same logic as xfs_iflush() but this code can't | ||
263 | * run at the same time as xfs_iflush because we're in commit | ||
264 | * processing here and so we have the inode lock held in | ||
265 | * exclusive mode. Although it doesn't really matter | ||
266 | * for the timestamps if both routines were to grab the | ||
267 | * timestamps or not. That would be ok. | ||
268 | * | ||
269 | * We clear i_update_core before copying out the data. | ||
270 | * This is for coordination with our timestamp updates | ||
271 | * that don't hold the inode lock. They will always | ||
272 | * update the timestamps BEFORE setting i_update_core, | ||
273 | * so if we clear i_update_core after they set it we | ||
274 | * are guaranteed to see their updates to the timestamps | ||
275 | * either here. Likewise, if they set it after we clear it | ||
276 | * here, we'll see it either on the next commit of this | ||
277 | * inode or the next time the inode gets flushed via | ||
278 | * xfs_iflush(). This depends on strongly ordered memory | ||
279 | * semantics, but we have that. We use the SYNCHRONIZE | ||
280 | * macro to make sure that the compiler does not reorder | ||
281 | * the i_update_core access below the data copy below. | ||
282 | */ | ||
283 | if (ip->i_update_core) { | ||
284 | ip->i_update_core = 0; | ||
285 | SYNCHRONIZE(); | ||
286 | } | ||
287 | |||
288 | /* | ||
289 | * Make sure to get the latest timestamps from the Linux inode. | ||
290 | */ | ||
291 | xfs_synchronize_times(ip); | ||
292 | |||
293 | vecp->i_addr = &ip->i_d; | 183 | vecp->i_addr = &ip->i_d; |
294 | vecp->i_len = sizeof(struct xfs_icdinode); | 184 | vecp->i_len = sizeof(struct xfs_icdinode); |
295 | vecp->i_type = XLOG_REG_TYPE_ICORE; | 185 | vecp->i_type = XLOG_REG_TYPE_ICORE; |
296 | vecp++; | 186 | vecp++; |
297 | nvecs++; | 187 | nvecs++; |
298 | iip->ili_format.ilf_fields |= XFS_ILOG_CORE; | ||
299 | 188 | ||
300 | /* | 189 | /* |
301 | * If this is really an old format inode, then we need to | 190 | * If this is really an old format inode, then we need to |
@@ -328,16 +217,17 @@ xfs_inode_item_format( | |||
328 | 217 | ||
329 | switch (ip->i_d.di_format) { | 218 | switch (ip->i_d.di_format) { |
330 | case XFS_DINODE_FMT_EXTENTS: | 219 | case XFS_DINODE_FMT_EXTENTS: |
331 | ASSERT(!(iip->ili_format.ilf_fields & | 220 | iip->ili_fields &= |
332 | (XFS_ILOG_DDATA | XFS_ILOG_DBROOT | | 221 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | |
333 | XFS_ILOG_DEV | XFS_ILOG_UUID))); | 222 | XFS_ILOG_DEV | XFS_ILOG_UUID); |
334 | if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) { | 223 | |
335 | ASSERT(ip->i_df.if_bytes > 0); | 224 | if ((iip->ili_fields & XFS_ILOG_DEXT) && |
225 | ip->i_d.di_nextents > 0 && | ||
226 | ip->i_df.if_bytes > 0) { | ||
336 | ASSERT(ip->i_df.if_u1.if_extents != NULL); | 227 | ASSERT(ip->i_df.if_u1.if_extents != NULL); |
337 | ASSERT(ip->i_d.di_nextents > 0); | 228 | ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0); |
338 | ASSERT(iip->ili_extents_buf == NULL); | 229 | ASSERT(iip->ili_extents_buf == NULL); |
339 | ASSERT((ip->i_df.if_bytes / | 230 | |
340 | (uint)sizeof(xfs_bmbt_rec_t)) > 0); | ||
341 | #ifdef XFS_NATIVE_HOST | 231 | #ifdef XFS_NATIVE_HOST |
342 | if (ip->i_d.di_nextents == ip->i_df.if_bytes / | 232 | if (ip->i_d.di_nextents == ip->i_df.if_bytes / |
343 | (uint)sizeof(xfs_bmbt_rec_t)) { | 233 | (uint)sizeof(xfs_bmbt_rec_t)) { |
@@ -359,15 +249,18 @@ xfs_inode_item_format( | |||
359 | iip->ili_format.ilf_dsize = vecp->i_len; | 249 | iip->ili_format.ilf_dsize = vecp->i_len; |
360 | vecp++; | 250 | vecp++; |
361 | nvecs++; | 251 | nvecs++; |
252 | } else { | ||
253 | iip->ili_fields &= ~XFS_ILOG_DEXT; | ||
362 | } | 254 | } |
363 | break; | 255 | break; |
364 | 256 | ||
365 | case XFS_DINODE_FMT_BTREE: | 257 | case XFS_DINODE_FMT_BTREE: |
366 | ASSERT(!(iip->ili_format.ilf_fields & | 258 | iip->ili_fields &= |
367 | (XFS_ILOG_DDATA | XFS_ILOG_DEXT | | 259 | ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | |
368 | XFS_ILOG_DEV | XFS_ILOG_UUID))); | 260 | XFS_ILOG_DEV | XFS_ILOG_UUID); |
369 | if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { | 261 | |
370 | ASSERT(ip->i_df.if_broot_bytes > 0); | 262 | if ((iip->ili_fields & XFS_ILOG_DBROOT) && |
263 | ip->i_df.if_broot_bytes > 0) { | ||
371 | ASSERT(ip->i_df.if_broot != NULL); | 264 | ASSERT(ip->i_df.if_broot != NULL); |
372 | vecp->i_addr = ip->i_df.if_broot; | 265 | vecp->i_addr = ip->i_df.if_broot; |
373 | vecp->i_len = ip->i_df.if_broot_bytes; | 266 | vecp->i_len = ip->i_df.if_broot_bytes; |
@@ -375,15 +268,30 @@ xfs_inode_item_format( | |||
375 | vecp++; | 268 | vecp++; |
376 | nvecs++; | 269 | nvecs++; |
377 | iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; | 270 | iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; |
271 | } else { | ||
272 | ASSERT(!(iip->ili_fields & | ||
273 | XFS_ILOG_DBROOT)); | ||
274 | #ifdef XFS_TRANS_DEBUG | ||
275 | if (iip->ili_root_size > 0) { | ||
276 | ASSERT(iip->ili_root_size == | ||
277 | ip->i_df.if_broot_bytes); | ||
278 | ASSERT(memcmp(iip->ili_orig_root, | ||
279 | ip->i_df.if_broot, | ||
280 | iip->ili_root_size) == 0); | ||
281 | } else { | ||
282 | ASSERT(ip->i_df.if_broot_bytes == 0); | ||
283 | } | ||
284 | #endif | ||
285 | iip->ili_fields &= ~XFS_ILOG_DBROOT; | ||
378 | } | 286 | } |
379 | break; | 287 | break; |
380 | 288 | ||
381 | case XFS_DINODE_FMT_LOCAL: | 289 | case XFS_DINODE_FMT_LOCAL: |
382 | ASSERT(!(iip->ili_format.ilf_fields & | 290 | iip->ili_fields &= |
383 | (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | | 291 | ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | |
384 | XFS_ILOG_DEV | XFS_ILOG_UUID))); | 292 | XFS_ILOG_DEV | XFS_ILOG_UUID); |
385 | if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) { | 293 | if ((iip->ili_fields & XFS_ILOG_DDATA) && |
386 | ASSERT(ip->i_df.if_bytes > 0); | 294 | ip->i_df.if_bytes > 0) { |
387 | ASSERT(ip->i_df.if_u1.if_data != NULL); | 295 | ASSERT(ip->i_df.if_u1.if_data != NULL); |
388 | ASSERT(ip->i_d.di_size > 0); | 296 | ASSERT(ip->i_d.di_size > 0); |
389 | 297 | ||
@@ -401,24 +309,26 @@ xfs_inode_item_format( | |||
401 | vecp++; | 309 | vecp++; |
402 | nvecs++; | 310 | nvecs++; |
403 | iip->ili_format.ilf_dsize = (unsigned)data_bytes; | 311 | iip->ili_format.ilf_dsize = (unsigned)data_bytes; |
312 | } else { | ||
313 | iip->ili_fields &= ~XFS_ILOG_DDATA; | ||
404 | } | 314 | } |
405 | break; | 315 | break; |
406 | 316 | ||
407 | case XFS_DINODE_FMT_DEV: | 317 | case XFS_DINODE_FMT_DEV: |
408 | ASSERT(!(iip->ili_format.ilf_fields & | 318 | iip->ili_fields &= |
409 | (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | | 319 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | |
410 | XFS_ILOG_DDATA | XFS_ILOG_UUID))); | 320 | XFS_ILOG_DEXT | XFS_ILOG_UUID); |
411 | if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { | 321 | if (iip->ili_fields & XFS_ILOG_DEV) { |
412 | iip->ili_format.ilf_u.ilfu_rdev = | 322 | iip->ili_format.ilf_u.ilfu_rdev = |
413 | ip->i_df.if_u2.if_rdev; | 323 | ip->i_df.if_u2.if_rdev; |
414 | } | 324 | } |
415 | break; | 325 | break; |
416 | 326 | ||
417 | case XFS_DINODE_FMT_UUID: | 327 | case XFS_DINODE_FMT_UUID: |
418 | ASSERT(!(iip->ili_format.ilf_fields & | 328 | iip->ili_fields &= |
419 | (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | | 329 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | |
420 | XFS_ILOG_DDATA | XFS_ILOG_DEV))); | 330 | XFS_ILOG_DEXT | XFS_ILOG_DEV); |
421 | if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { | 331 | if (iip->ili_fields & XFS_ILOG_UUID) { |
422 | iip->ili_format.ilf_u.ilfu_uuid = | 332 | iip->ili_format.ilf_u.ilfu_uuid = |
423 | ip->i_df.if_u2.if_uuid; | 333 | ip->i_df.if_u2.if_uuid; |
424 | } | 334 | } |
@@ -430,31 +340,25 @@ xfs_inode_item_format( | |||
430 | } | 340 | } |
431 | 341 | ||
432 | /* | 342 | /* |
433 | * If there are no attributes associated with the file, | 343 | * If there are no attributes associated with the file, then we're done. |
434 | * then we're done. | ||
435 | * Assert that no attribute-related log flags are set. | ||
436 | */ | 344 | */ |
437 | if (!XFS_IFORK_Q(ip)) { | 345 | if (!XFS_IFORK_Q(ip)) { |
438 | iip->ili_format.ilf_size = nvecs; | 346 | iip->ili_fields &= |
439 | ASSERT(!(iip->ili_format.ilf_fields & | 347 | ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); |
440 | (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); | 348 | goto out; |
441 | return; | ||
442 | } | 349 | } |
443 | 350 | ||
444 | switch (ip->i_d.di_aformat) { | 351 | switch (ip->i_d.di_aformat) { |
445 | case XFS_DINODE_FMT_EXTENTS: | 352 | case XFS_DINODE_FMT_EXTENTS: |
446 | ASSERT(!(iip->ili_format.ilf_fields & | 353 | iip->ili_fields &= |
447 | (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); | 354 | ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); |
448 | if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { | 355 | |
449 | #ifdef DEBUG | 356 | if ((iip->ili_fields & XFS_ILOG_AEXT) && |
450 | int nrecs = ip->i_afp->if_bytes / | 357 | ip->i_d.di_anextents > 0 && |
451 | (uint)sizeof(xfs_bmbt_rec_t); | 358 | ip->i_afp->if_bytes > 0) { |
452 | ASSERT(nrecs > 0); | 359 | ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) == |
453 | ASSERT(nrecs == ip->i_d.di_anextents); | 360 | ip->i_d.di_anextents); |
454 | ASSERT(ip->i_afp->if_bytes > 0); | ||
455 | ASSERT(ip->i_afp->if_u1.if_extents != NULL); | 361 | ASSERT(ip->i_afp->if_u1.if_extents != NULL); |
456 | ASSERT(ip->i_d.di_anextents > 0); | ||
457 | #endif | ||
458 | #ifdef XFS_NATIVE_HOST | 362 | #ifdef XFS_NATIVE_HOST |
459 | /* | 363 | /* |
460 | * There are not delayed allocation extents | 364 | * There are not delayed allocation extents |
@@ -471,29 +375,36 @@ xfs_inode_item_format( | |||
471 | iip->ili_format.ilf_asize = vecp->i_len; | 375 | iip->ili_format.ilf_asize = vecp->i_len; |
472 | vecp++; | 376 | vecp++; |
473 | nvecs++; | 377 | nvecs++; |
378 | } else { | ||
379 | iip->ili_fields &= ~XFS_ILOG_AEXT; | ||
474 | } | 380 | } |
475 | break; | 381 | break; |
476 | 382 | ||
477 | case XFS_DINODE_FMT_BTREE: | 383 | case XFS_DINODE_FMT_BTREE: |
478 | ASSERT(!(iip->ili_format.ilf_fields & | 384 | iip->ili_fields &= |
479 | (XFS_ILOG_ADATA | XFS_ILOG_AEXT))); | 385 | ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); |
480 | if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { | 386 | |
481 | ASSERT(ip->i_afp->if_broot_bytes > 0); | 387 | if ((iip->ili_fields & XFS_ILOG_ABROOT) && |
388 | ip->i_afp->if_broot_bytes > 0) { | ||
482 | ASSERT(ip->i_afp->if_broot != NULL); | 389 | ASSERT(ip->i_afp->if_broot != NULL); |
390 | |||
483 | vecp->i_addr = ip->i_afp->if_broot; | 391 | vecp->i_addr = ip->i_afp->if_broot; |
484 | vecp->i_len = ip->i_afp->if_broot_bytes; | 392 | vecp->i_len = ip->i_afp->if_broot_bytes; |
485 | vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; | 393 | vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; |
486 | vecp++; | 394 | vecp++; |
487 | nvecs++; | 395 | nvecs++; |
488 | iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; | 396 | iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; |
397 | } else { | ||
398 | iip->ili_fields &= ~XFS_ILOG_ABROOT; | ||
489 | } | 399 | } |
490 | break; | 400 | break; |
491 | 401 | ||
492 | case XFS_DINODE_FMT_LOCAL: | 402 | case XFS_DINODE_FMT_LOCAL: |
493 | ASSERT(!(iip->ili_format.ilf_fields & | 403 | iip->ili_fields &= |
494 | (XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); | 404 | ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); |
495 | if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) { | 405 | |
496 | ASSERT(ip->i_afp->if_bytes > 0); | 406 | if ((iip->ili_fields & XFS_ILOG_ADATA) && |
407 | ip->i_afp->if_bytes > 0) { | ||
497 | ASSERT(ip->i_afp->if_u1.if_data != NULL); | 408 | ASSERT(ip->i_afp->if_u1.if_data != NULL); |
498 | 409 | ||
499 | vecp->i_addr = ip->i_afp->if_u1.if_data; | 410 | vecp->i_addr = ip->i_afp->if_u1.if_data; |
@@ -510,6 +421,8 @@ xfs_inode_item_format( | |||
510 | vecp++; | 421 | vecp++; |
511 | nvecs++; | 422 | nvecs++; |
512 | iip->ili_format.ilf_asize = (unsigned)data_bytes; | 423 | iip->ili_format.ilf_asize = (unsigned)data_bytes; |
424 | } else { | ||
425 | iip->ili_fields &= ~XFS_ILOG_ADATA; | ||
513 | } | 426 | } |
514 | break; | 427 | break; |
515 | 428 | ||
@@ -518,6 +431,15 @@ xfs_inode_item_format( | |||
518 | break; | 431 | break; |
519 | } | 432 | } |
520 | 433 | ||
434 | out: | ||
435 | /* | ||
436 | * Now update the log format that goes out to disk from the in-core | ||
437 | * values. We always write the inode core to make the arithmetic | ||
438 | * games in recovery easier, which isn't a big deal as just about any | ||
439 | * transaction would dirty it anyway. | ||
440 | */ | ||
441 | iip->ili_format.ilf_fields = XFS_ILOG_CORE | | ||
442 | (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); | ||
521 | iip->ili_format.ilf_size = nvecs; | 443 | iip->ili_format.ilf_size = nvecs; |
522 | } | 444 | } |
523 | 445 | ||
@@ -596,17 +518,13 @@ xfs_inode_item_trylock( | |||
596 | /* Stale items should force out the iclog */ | 518 | /* Stale items should force out the iclog */ |
597 | if (ip->i_flags & XFS_ISTALE) { | 519 | if (ip->i_flags & XFS_ISTALE) { |
598 | xfs_ifunlock(ip); | 520 | xfs_ifunlock(ip); |
599 | /* | 521 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
600 | * we hold the AIL lock - notify the unlock routine of this | ||
601 | * so it doesn't try to get the lock again. | ||
602 | */ | ||
603 | xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); | ||
604 | return XFS_ITEM_PINNED; | 522 | return XFS_ITEM_PINNED; |
605 | } | 523 | } |
606 | 524 | ||
607 | #ifdef DEBUG | 525 | #ifdef DEBUG |
608 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 526 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
609 | ASSERT(iip->ili_format.ilf_fields != 0); | 527 | ASSERT(iip->ili_fields != 0); |
610 | ASSERT(iip->ili_logged == 0); | 528 | ASSERT(iip->ili_logged == 0); |
611 | ASSERT(lip->li_flags & XFS_LI_IN_AIL); | 529 | ASSERT(lip->li_flags & XFS_LI_IN_AIL); |
612 | } | 530 | } |
@@ -638,7 +556,7 @@ xfs_inode_item_unlock( | |||
638 | if (iip->ili_extents_buf != NULL) { | 556 | if (iip->ili_extents_buf != NULL) { |
639 | ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); | 557 | ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); |
640 | ASSERT(ip->i_d.di_nextents > 0); | 558 | ASSERT(ip->i_d.di_nextents > 0); |
641 | ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); | 559 | ASSERT(iip->ili_fields & XFS_ILOG_DEXT); |
642 | ASSERT(ip->i_df.if_bytes > 0); | 560 | ASSERT(ip->i_df.if_bytes > 0); |
643 | kmem_free(iip->ili_extents_buf); | 561 | kmem_free(iip->ili_extents_buf); |
644 | iip->ili_extents_buf = NULL; | 562 | iip->ili_extents_buf = NULL; |
@@ -646,7 +564,7 @@ xfs_inode_item_unlock( | |||
646 | if (iip->ili_aextents_buf != NULL) { | 564 | if (iip->ili_aextents_buf != NULL) { |
647 | ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); | 565 | ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); |
648 | ASSERT(ip->i_d.di_anextents > 0); | 566 | ASSERT(ip->i_d.di_anextents > 0); |
649 | ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); | 567 | ASSERT(iip->ili_fields & XFS_ILOG_AEXT); |
650 | ASSERT(ip->i_afp->if_bytes > 0); | 568 | ASSERT(ip->i_afp->if_bytes > 0); |
651 | kmem_free(iip->ili_aextents_buf); | 569 | kmem_free(iip->ili_aextents_buf); |
652 | iip->ili_aextents_buf = NULL; | 570 | iip->ili_aextents_buf = NULL; |
@@ -761,8 +679,7 @@ xfs_inode_item_push( | |||
761 | * lock without sleeping, then there must not have been | 679 | * lock without sleeping, then there must not have been |
762 | * anyone in the process of flushing the inode. | 680 | * anyone in the process of flushing the inode. |
763 | */ | 681 | */ |
764 | ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || | 682 | ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0); |
765 | iip->ili_format.ilf_fields != 0); | ||
766 | 683 | ||
767 | /* | 684 | /* |
768 | * Push the inode to it's backing buffer. This will not remove the | 685 | * Push the inode to it's backing buffer. This will not remove the |
@@ -985,7 +902,7 @@ xfs_iflush_abort( | |||
985 | * Clear the inode logging fields so no more flushes are | 902 | * Clear the inode logging fields so no more flushes are |
986 | * attempted. | 903 | * attempted. |
987 | */ | 904 | */ |
988 | iip->ili_format.ilf_fields = 0; | 905 | iip->ili_fields = 0; |
989 | } | 906 | } |
990 | /* | 907 | /* |
991 | * Release the inode's flush lock since we're done with it. | 908 | * Release the inode's flush lock since we're done with it. |
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index d3dee61e6d91..41d61c3b7a36 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h | |||
@@ -86,6 +86,15 @@ typedef struct xfs_inode_log_format_64 { | |||
86 | #define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ | 86 | #define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ |
87 | #define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ | 87 | #define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ |
88 | 88 | ||
89 | |||
90 | /* | ||
91 | * The timestamps are dirty, but not necessarily anything else in the inode | ||
92 | * core. Unlike the other fields above this one must never make it to disk | ||
93 | * in the ilf_fields of the inode_log_format, but is purely store in-memory in | ||
94 | * ili_fields in the inode_log_item. | ||
95 | */ | ||
96 | #define XFS_ILOG_TIMESTAMP 0x4000 | ||
97 | |||
89 | #define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ | 98 | #define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ |
90 | XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ | 99 | XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ |
91 | XFS_ILOG_UUID | XFS_ILOG_ADATA | \ | 100 | XFS_ILOG_UUID | XFS_ILOG_ADATA | \ |
@@ -101,7 +110,7 @@ typedef struct xfs_inode_log_format_64 { | |||
101 | XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ | 110 | XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ |
102 | XFS_ILOG_DEV | XFS_ILOG_UUID | \ | 111 | XFS_ILOG_DEV | XFS_ILOG_UUID | \ |
103 | XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ | 112 | XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ |
104 | XFS_ILOG_ABROOT) | 113 | XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP) |
105 | 114 | ||
106 | static inline int xfs_ilog_fbroot(int w) | 115 | static inline int xfs_ilog_fbroot(int w) |
107 | { | 116 | { |
@@ -134,6 +143,7 @@ typedef struct xfs_inode_log_item { | |||
134 | unsigned short ili_lock_flags; /* lock flags */ | 143 | unsigned short ili_lock_flags; /* lock flags */ |
135 | unsigned short ili_logged; /* flushed logged data */ | 144 | unsigned short ili_logged; /* flushed logged data */ |
136 | unsigned int ili_last_fields; /* fields when flushed */ | 145 | unsigned int ili_last_fields; /* fields when flushed */ |
146 | unsigned int ili_fields; /* fields to be logged */ | ||
137 | struct xfs_bmbt_rec *ili_extents_buf; /* array of logged | 147 | struct xfs_bmbt_rec *ili_extents_buf; /* array of logged |
138 | data exts */ | 148 | data exts */ |
139 | struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged | 149 | struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged |
@@ -148,9 +158,7 @@ typedef struct xfs_inode_log_item { | |||
148 | 158 | ||
149 | static inline int xfs_inode_clean(xfs_inode_t *ip) | 159 | static inline int xfs_inode_clean(xfs_inode_t *ip) |
150 | { | 160 | { |
151 | return (!ip->i_itemp || | 161 | return !ip->i_itemp || !(ip->i_itemp->ili_fields & XFS_ILOG_ALL); |
152 | !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && | ||
153 | !ip->i_update_core; | ||
154 | } | 162 | } |
155 | 163 | ||
156 | extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); | 164 | extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 76f3ca5cfc36..91f8ff547ab3 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -209,6 +209,7 @@ xfs_open_by_handle( | |||
209 | struct file *filp; | 209 | struct file *filp; |
210 | struct inode *inode; | 210 | struct inode *inode; |
211 | struct dentry *dentry; | 211 | struct dentry *dentry; |
212 | fmode_t fmode; | ||
212 | 213 | ||
213 | if (!capable(CAP_SYS_ADMIN)) | 214 | if (!capable(CAP_SYS_ADMIN)) |
214 | return -XFS_ERROR(EPERM); | 215 | return -XFS_ERROR(EPERM); |
@@ -228,26 +229,21 @@ xfs_open_by_handle( | |||
228 | hreq->oflags |= O_LARGEFILE; | 229 | hreq->oflags |= O_LARGEFILE; |
229 | #endif | 230 | #endif |
230 | 231 | ||
231 | /* Put open permission in namei format. */ | ||
232 | permflag = hreq->oflags; | 232 | permflag = hreq->oflags; |
233 | if ((permflag+1) & O_ACCMODE) | 233 | fmode = OPEN_FMODE(permflag); |
234 | permflag++; | ||
235 | if (permflag & O_TRUNC) | ||
236 | permflag |= 2; | ||
237 | |||
238 | if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && | 234 | if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && |
239 | (permflag & FMODE_WRITE) && IS_APPEND(inode)) { | 235 | (fmode & FMODE_WRITE) && IS_APPEND(inode)) { |
240 | error = -XFS_ERROR(EPERM); | 236 | error = -XFS_ERROR(EPERM); |
241 | goto out_dput; | 237 | goto out_dput; |
242 | } | 238 | } |
243 | 239 | ||
244 | if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) { | 240 | if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) { |
245 | error = -XFS_ERROR(EACCES); | 241 | error = -XFS_ERROR(EACCES); |
246 | goto out_dput; | 242 | goto out_dput; |
247 | } | 243 | } |
248 | 244 | ||
249 | /* Can't write directories. */ | 245 | /* Can't write directories. */ |
250 | if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) { | 246 | if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) { |
251 | error = -XFS_ERROR(EISDIR); | 247 | error = -XFS_ERROR(EISDIR); |
252 | goto out_dput; | 248 | goto out_dput; |
253 | } | 249 | } |
@@ -450,9 +446,12 @@ xfs_attrmulti_attr_get( | |||
450 | 446 | ||
451 | if (*len > XATTR_SIZE_MAX) | 447 | if (*len > XATTR_SIZE_MAX) |
452 | return EINVAL; | 448 | return EINVAL; |
453 | kbuf = kmalloc(*len, GFP_KERNEL); | 449 | kbuf = kmem_zalloc(*len, KM_SLEEP | KM_MAYFAIL); |
454 | if (!kbuf) | 450 | if (!kbuf) { |
455 | return ENOMEM; | 451 | kbuf = kmem_zalloc_large(*len); |
452 | if (!kbuf) | ||
453 | return ENOMEM; | ||
454 | } | ||
456 | 455 | ||
457 | error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); | 456 | error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); |
458 | if (error) | 457 | if (error) |
@@ -462,7 +461,10 @@ xfs_attrmulti_attr_get( | |||
462 | error = EFAULT; | 461 | error = EFAULT; |
463 | 462 | ||
464 | out_kfree: | 463 | out_kfree: |
465 | kfree(kbuf); | 464 | if (is_vmalloc_addr(kbuf)) |
465 | kmem_free_large(kbuf); | ||
466 | else | ||
467 | kmem_free(kbuf); | ||
466 | return error; | 468 | return error; |
467 | } | 469 | } |
468 | 470 | ||
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index f9ccb7b7c043..a849a5473aff 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c | |||
@@ -293,7 +293,7 @@ xfs_compat_ioc_bulkstat( | |||
293 | int res; | 293 | int res; |
294 | 294 | ||
295 | error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, | 295 | error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, |
296 | sizeof(compat_xfs_bstat_t), 0, &res); | 296 | sizeof(compat_xfs_bstat_t), NULL, &res); |
297 | } else if (cmd == XFS_IOC_FSBULKSTAT_32) { | 297 | } else if (cmd == XFS_IOC_FSBULKSTAT_32) { |
298 | error = xfs_bulkstat(mp, &inlast, &count, | 298 | error = xfs_bulkstat(mp, &inlast, &count, |
299 | xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), | 299 | xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 246c7d57c6f9..71a464503c43 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "xfs_ialloc_btree.h" | 31 | #include "xfs_ialloc_btree.h" |
32 | #include "xfs_dinode.h" | 32 | #include "xfs_dinode.h" |
33 | #include "xfs_inode.h" | 33 | #include "xfs_inode.h" |
34 | #include "xfs_inode_item.h" | ||
34 | #include "xfs_btree.h" | 35 | #include "xfs_btree.h" |
35 | #include "xfs_bmap.h" | 36 | #include "xfs_bmap.h" |
36 | #include "xfs_rtalloc.h" | 37 | #include "xfs_rtalloc.h" |
@@ -645,6 +646,7 @@ xfs_iomap_write_unwritten( | |||
645 | xfs_trans_t *tp; | 646 | xfs_trans_t *tp; |
646 | xfs_bmbt_irec_t imap; | 647 | xfs_bmbt_irec_t imap; |
647 | xfs_bmap_free_t free_list; | 648 | xfs_bmap_free_t free_list; |
649 | xfs_fsize_t i_size; | ||
648 | uint resblks; | 650 | uint resblks; |
649 | int committed; | 651 | int committed; |
650 | int error; | 652 | int error; |
@@ -705,7 +707,22 @@ xfs_iomap_write_unwritten( | |||
705 | if (error) | 707 | if (error) |
706 | goto error_on_bmapi_transaction; | 708 | goto error_on_bmapi_transaction; |
707 | 709 | ||
708 | error = xfs_bmap_finish(&(tp), &(free_list), &committed); | 710 | /* |
711 | * Log the updated inode size as we go. We have to be careful | ||
712 | * to only log it up to the actual write offset if it is | ||
713 | * halfway into a block. | ||
714 | */ | ||
715 | i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); | ||
716 | if (i_size > offset + count) | ||
717 | i_size = offset + count; | ||
718 | |||
719 | i_size = xfs_new_eof(ip, i_size); | ||
720 | if (i_size) { | ||
721 | ip->i_d.di_size = i_size; | ||
722 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
723 | } | ||
724 | |||
725 | error = xfs_bmap_finish(&tp, &free_list, &committed); | ||
709 | if (error) | 726 | if (error) |
710 | goto error_on_bmapi_transaction; | 727 | goto error_on_bmapi_transaction; |
711 | 728 | ||
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ab302539e5b9..3011b879f850 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -50,65 +50,15 @@ | |||
50 | #include <linux/fiemap.h> | 50 | #include <linux/fiemap.h> |
51 | #include <linux/slab.h> | 51 | #include <linux/slab.h> |
52 | 52 | ||
53 | /* | 53 | static int |
54 | * Bring the timestamps in the XFS inode uptodate. | 54 | xfs_initxattrs( |
55 | * | 55 | struct inode *inode, |
56 | * Used before writing the inode to disk. | 56 | const struct xattr *xattr_array, |
57 | */ | 57 | void *fs_info) |
58 | void | ||
59 | xfs_synchronize_times( | ||
60 | xfs_inode_t *ip) | ||
61 | { | ||
62 | struct inode *inode = VFS_I(ip); | ||
63 | |||
64 | ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; | ||
65 | ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; | ||
66 | ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec; | ||
67 | ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec; | ||
68 | ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec; | ||
69 | ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * If the linux inode is valid, mark it dirty, else mark the dirty state | ||
74 | * in the XFS inode to make sure we pick it up when reclaiming the inode. | ||
75 | */ | ||
76 | void | ||
77 | xfs_mark_inode_dirty_sync( | ||
78 | xfs_inode_t *ip) | ||
79 | { | ||
80 | struct inode *inode = VFS_I(ip); | ||
81 | |||
82 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) | ||
83 | mark_inode_dirty_sync(inode); | ||
84 | else { | ||
85 | barrier(); | ||
86 | ip->i_update_core = 1; | ||
87 | } | ||
88 | } | ||
89 | |||
90 | void | ||
91 | xfs_mark_inode_dirty( | ||
92 | xfs_inode_t *ip) | ||
93 | { | ||
94 | struct inode *inode = VFS_I(ip); | ||
95 | |||
96 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) | ||
97 | mark_inode_dirty(inode); | ||
98 | else { | ||
99 | barrier(); | ||
100 | ip->i_update_core = 1; | ||
101 | } | ||
102 | |||
103 | } | ||
104 | |||
105 | |||
106 | int xfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, | ||
107 | void *fs_info) | ||
108 | { | 58 | { |
109 | const struct xattr *xattr; | 59 | const struct xattr *xattr; |
110 | struct xfs_inode *ip = XFS_I(inode); | 60 | struct xfs_inode *ip = XFS_I(inode); |
111 | int error = 0; | 61 | int error = 0; |
112 | 62 | ||
113 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { | 63 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { |
114 | error = xfs_attr_set(ip, xattr->name, xattr->value, | 64 | error = xfs_attr_set(ip, xattr->name, xattr->value, |
@@ -678,19 +628,16 @@ xfs_setattr_nonsize( | |||
678 | inode->i_atime = iattr->ia_atime; | 628 | inode->i_atime = iattr->ia_atime; |
679 | ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; | 629 | ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; |
680 | ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; | 630 | ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; |
681 | ip->i_update_core = 1; | ||
682 | } | 631 | } |
683 | if (mask & ATTR_CTIME) { | 632 | if (mask & ATTR_CTIME) { |
684 | inode->i_ctime = iattr->ia_ctime; | 633 | inode->i_ctime = iattr->ia_ctime; |
685 | ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; | 634 | ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; |
686 | ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; | 635 | ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; |
687 | ip->i_update_core = 1; | ||
688 | } | 636 | } |
689 | if (mask & ATTR_MTIME) { | 637 | if (mask & ATTR_MTIME) { |
690 | inode->i_mtime = iattr->ia_mtime; | 638 | inode->i_mtime = iattr->ia_mtime; |
691 | ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; | 639 | ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; |
692 | ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; | 640 | ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; |
693 | ip->i_update_core = 1; | ||
694 | } | 641 | } |
695 | 642 | ||
696 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 643 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
@@ -918,13 +865,11 @@ xfs_setattr_size( | |||
918 | inode->i_ctime = iattr->ia_ctime; | 865 | inode->i_ctime = iattr->ia_ctime; |
919 | ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; | 866 | ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; |
920 | ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; | 867 | ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; |
921 | ip->i_update_core = 1; | ||
922 | } | 868 | } |
923 | if (mask & ATTR_MTIME) { | 869 | if (mask & ATTR_MTIME) { |
924 | inode->i_mtime = iattr->ia_mtime; | 870 | inode->i_mtime = iattr->ia_mtime; |
925 | ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; | 871 | ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; |
926 | ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; | 872 | ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; |
927 | ip->i_update_core = 1; | ||
928 | } | 873 | } |
929 | 874 | ||
930 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 875 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 751e94fe1f77..acc2bf264dab 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -62,7 +62,6 @@ xfs_bulkstat_one_int( | |||
62 | { | 62 | { |
63 | struct xfs_icdinode *dic; /* dinode core info pointer */ | 63 | struct xfs_icdinode *dic; /* dinode core info pointer */ |
64 | struct xfs_inode *ip; /* incore inode pointer */ | 64 | struct xfs_inode *ip; /* incore inode pointer */ |
65 | struct inode *inode; | ||
66 | struct xfs_bstat *buf; /* return buffer */ | 65 | struct xfs_bstat *buf; /* return buffer */ |
67 | int error = 0; /* error value */ | 66 | int error = 0; /* error value */ |
68 | 67 | ||
@@ -76,7 +75,8 @@ xfs_bulkstat_one_int( | |||
76 | return XFS_ERROR(ENOMEM); | 75 | return XFS_ERROR(ENOMEM); |
77 | 76 | ||
78 | error = xfs_iget(mp, NULL, ino, | 77 | error = xfs_iget(mp, NULL, ino, |
79 | XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip); | 78 | (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), |
79 | XFS_ILOCK_SHARED, &ip); | ||
80 | if (error) { | 80 | if (error) { |
81 | *stat = BULKSTAT_RV_NOTHING; | 81 | *stat = BULKSTAT_RV_NOTHING; |
82 | goto out_free; | 82 | goto out_free; |
@@ -86,7 +86,6 @@ xfs_bulkstat_one_int( | |||
86 | ASSERT(ip->i_imap.im_blkno != 0); | 86 | ASSERT(ip->i_imap.im_blkno != 0); |
87 | 87 | ||
88 | dic = &ip->i_d; | 88 | dic = &ip->i_d; |
89 | inode = VFS_I(ip); | ||
90 | 89 | ||
91 | /* xfs_iget returns the following without needing | 90 | /* xfs_iget returns the following without needing |
92 | * further change. | 91 | * further change. |
@@ -99,19 +98,12 @@ xfs_bulkstat_one_int( | |||
99 | buf->bs_uid = dic->di_uid; | 98 | buf->bs_uid = dic->di_uid; |
100 | buf->bs_gid = dic->di_gid; | 99 | buf->bs_gid = dic->di_gid; |
101 | buf->bs_size = dic->di_size; | 100 | buf->bs_size = dic->di_size; |
102 | 101 | buf->bs_atime.tv_sec = dic->di_atime.t_sec; | |
103 | /* | 102 | buf->bs_atime.tv_nsec = dic->di_atime.t_nsec; |
104 | * We need to read the timestamps from the Linux inode because | 103 | buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; |
105 | * the VFS keeps writing directly into the inode structure instead | 104 | buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; |
106 | * of telling us about the updates. | 105 | buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; |
107 | */ | 106 | buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec; |
108 | buf->bs_atime.tv_sec = inode->i_atime.tv_sec; | ||
109 | buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec; | ||
110 | buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec; | ||
111 | buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec; | ||
112 | buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec; | ||
113 | buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec; | ||
114 | |||
115 | buf->bs_xflags = xfs_ip2xflags(ip); | 107 | buf->bs_xflags = xfs_ip2xflags(ip); |
116 | buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; | 108 | buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; |
117 | buf->bs_extents = dic->di_nextents; | 109 | buf->bs_extents = dic->di_nextents; |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e2cc3568c299..6db1fef38bff 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -67,15 +67,10 @@ STATIC void xlog_state_switch_iclogs(xlog_t *log, | |||
67 | int eventual_size); | 67 | int eventual_size); |
68 | STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); | 68 | STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); |
69 | 69 | ||
70 | /* local functions to manipulate grant head */ | ||
71 | STATIC int xlog_grant_log_space(xlog_t *log, | ||
72 | xlog_ticket_t *xtic); | ||
73 | STATIC void xlog_grant_push_ail(struct log *log, | 70 | STATIC void xlog_grant_push_ail(struct log *log, |
74 | int need_bytes); | 71 | int need_bytes); |
75 | STATIC void xlog_regrant_reserve_log_space(xlog_t *log, | 72 | STATIC void xlog_regrant_reserve_log_space(xlog_t *log, |
76 | xlog_ticket_t *ticket); | 73 | xlog_ticket_t *ticket); |
77 | STATIC int xlog_regrant_write_log_space(xlog_t *log, | ||
78 | xlog_ticket_t *ticket); | ||
79 | STATIC void xlog_ungrant_log_space(xlog_t *log, | 74 | STATIC void xlog_ungrant_log_space(xlog_t *log, |
80 | xlog_ticket_t *ticket); | 75 | xlog_ticket_t *ticket); |
81 | 76 | ||
@@ -150,78 +145,93 @@ xlog_grant_add_space( | |||
150 | } while (head_val != old); | 145 | } while (head_val != old); |
151 | } | 146 | } |
152 | 147 | ||
153 | STATIC bool | 148 | STATIC void |
154 | xlog_reserveq_wake( | 149 | xlog_grant_head_init( |
155 | struct log *log, | 150 | struct xlog_grant_head *head) |
156 | int *free_bytes) | 151 | { |
152 | xlog_assign_grant_head(&head->grant, 1, 0); | ||
153 | INIT_LIST_HEAD(&head->waiters); | ||
154 | spin_lock_init(&head->lock); | ||
155 | } | ||
156 | |||
157 | STATIC void | ||
158 | xlog_grant_head_wake_all( | ||
159 | struct xlog_grant_head *head) | ||
157 | { | 160 | { |
158 | struct xlog_ticket *tic; | 161 | struct xlog_ticket *tic; |
159 | int need_bytes; | ||
160 | 162 | ||
161 | list_for_each_entry(tic, &log->l_reserveq, t_queue) { | 163 | spin_lock(&head->lock); |
164 | list_for_each_entry(tic, &head->waiters, t_queue) | ||
165 | wake_up_process(tic->t_task); | ||
166 | spin_unlock(&head->lock); | ||
167 | } | ||
168 | |||
169 | static inline int | ||
170 | xlog_ticket_reservation( | ||
171 | struct log *log, | ||
172 | struct xlog_grant_head *head, | ||
173 | struct xlog_ticket *tic) | ||
174 | { | ||
175 | if (head == &log->l_write_head) { | ||
176 | ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); | ||
177 | return tic->t_unit_res; | ||
178 | } else { | ||
162 | if (tic->t_flags & XLOG_TIC_PERM_RESERV) | 179 | if (tic->t_flags & XLOG_TIC_PERM_RESERV) |
163 | need_bytes = tic->t_unit_res * tic->t_cnt; | 180 | return tic->t_unit_res * tic->t_cnt; |
164 | else | 181 | else |
165 | need_bytes = tic->t_unit_res; | 182 | return tic->t_unit_res; |
166 | |||
167 | if (*free_bytes < need_bytes) | ||
168 | return false; | ||
169 | *free_bytes -= need_bytes; | ||
170 | |||
171 | trace_xfs_log_grant_wake_up(log, tic); | ||
172 | wake_up(&tic->t_wait); | ||
173 | } | 183 | } |
174 | |||
175 | return true; | ||
176 | } | 184 | } |
177 | 185 | ||
178 | STATIC bool | 186 | STATIC bool |
179 | xlog_writeq_wake( | 187 | xlog_grant_head_wake( |
180 | struct log *log, | 188 | struct log *log, |
189 | struct xlog_grant_head *head, | ||
181 | int *free_bytes) | 190 | int *free_bytes) |
182 | { | 191 | { |
183 | struct xlog_ticket *tic; | 192 | struct xlog_ticket *tic; |
184 | int need_bytes; | 193 | int need_bytes; |
185 | 194 | ||
186 | list_for_each_entry(tic, &log->l_writeq, t_queue) { | 195 | list_for_each_entry(tic, &head->waiters, t_queue) { |
187 | ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); | 196 | need_bytes = xlog_ticket_reservation(log, head, tic); |
188 | |||
189 | need_bytes = tic->t_unit_res; | ||
190 | |||
191 | if (*free_bytes < need_bytes) | 197 | if (*free_bytes < need_bytes) |
192 | return false; | 198 | return false; |
193 | *free_bytes -= need_bytes; | ||
194 | 199 | ||
195 | trace_xfs_log_regrant_write_wake_up(log, tic); | 200 | *free_bytes -= need_bytes; |
196 | wake_up(&tic->t_wait); | 201 | trace_xfs_log_grant_wake_up(log, tic); |
202 | wake_up_process(tic->t_task); | ||
197 | } | 203 | } |
198 | 204 | ||
199 | return true; | 205 | return true; |
200 | } | 206 | } |
201 | 207 | ||
202 | STATIC int | 208 | STATIC int |
203 | xlog_reserveq_wait( | 209 | xlog_grant_head_wait( |
204 | struct log *log, | 210 | struct log *log, |
211 | struct xlog_grant_head *head, | ||
205 | struct xlog_ticket *tic, | 212 | struct xlog_ticket *tic, |
206 | int need_bytes) | 213 | int need_bytes) |
207 | { | 214 | { |
208 | list_add_tail(&tic->t_queue, &log->l_reserveq); | 215 | list_add_tail(&tic->t_queue, &head->waiters); |
209 | 216 | ||
210 | do { | 217 | do { |
211 | if (XLOG_FORCED_SHUTDOWN(log)) | 218 | if (XLOG_FORCED_SHUTDOWN(log)) |
212 | goto shutdown; | 219 | goto shutdown; |
213 | xlog_grant_push_ail(log, need_bytes); | 220 | xlog_grant_push_ail(log, need_bytes); |
214 | 221 | ||
222 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
223 | spin_unlock(&head->lock); | ||
224 | |||
215 | XFS_STATS_INC(xs_sleep_logspace); | 225 | XFS_STATS_INC(xs_sleep_logspace); |
216 | trace_xfs_log_grant_sleep(log, tic); | ||
217 | 226 | ||
218 | xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); | 227 | trace_xfs_log_grant_sleep(log, tic); |
228 | schedule(); | ||
219 | trace_xfs_log_grant_wake(log, tic); | 229 | trace_xfs_log_grant_wake(log, tic); |
220 | 230 | ||
221 | spin_lock(&log->l_grant_reserve_lock); | 231 | spin_lock(&head->lock); |
222 | if (XLOG_FORCED_SHUTDOWN(log)) | 232 | if (XLOG_FORCED_SHUTDOWN(log)) |
223 | goto shutdown; | 233 | goto shutdown; |
224 | } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes); | 234 | } while (xlog_space_left(log, &head->grant) < need_bytes); |
225 | 235 | ||
226 | list_del_init(&tic->t_queue); | 236 | list_del_init(&tic->t_queue); |
227 | return 0; | 237 | return 0; |
@@ -230,35 +240,58 @@ shutdown: | |||
230 | return XFS_ERROR(EIO); | 240 | return XFS_ERROR(EIO); |
231 | } | 241 | } |
232 | 242 | ||
243 | /* | ||
244 | * Atomically get the log space required for a log ticket. | ||
245 | * | ||
246 | * Once a ticket gets put onto head->waiters, it will only return after the | ||
247 | * needed reservation is satisfied. | ||
248 | * | ||
249 | * This function is structured so that it has a lock free fast path. This is | ||
250 | * necessary because every new transaction reservation will come through this | ||
251 | * path. Hence any lock will be globally hot if we take it unconditionally on | ||
252 | * every pass. | ||
253 | * | ||
254 | * As tickets are only ever moved on and off head->waiters under head->lock, we | ||
255 | * only need to take that lock if we are going to add the ticket to the queue | ||
256 | * and sleep. We can avoid taking the lock if the ticket was never added to | ||
257 | * head->waiters because the t_queue list head will be empty and we hold the | ||
258 | * only reference to it so it can safely be checked unlocked. | ||
259 | */ | ||
233 | STATIC int | 260 | STATIC int |
234 | xlog_writeq_wait( | 261 | xlog_grant_head_check( |
235 | struct log *log, | 262 | struct log *log, |
263 | struct xlog_grant_head *head, | ||
236 | struct xlog_ticket *tic, | 264 | struct xlog_ticket *tic, |
237 | int need_bytes) | 265 | int *need_bytes) |
238 | { | 266 | { |
239 | list_add_tail(&tic->t_queue, &log->l_writeq); | 267 | int free_bytes; |
240 | 268 | int error = 0; | |
241 | do { | ||
242 | if (XLOG_FORCED_SHUTDOWN(log)) | ||
243 | goto shutdown; | ||
244 | xlog_grant_push_ail(log, need_bytes); | ||
245 | |||
246 | XFS_STATS_INC(xs_sleep_logspace); | ||
247 | trace_xfs_log_regrant_write_sleep(log, tic); | ||
248 | 269 | ||
249 | xlog_wait(&tic->t_wait, &log->l_grant_write_lock); | 270 | ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); |
250 | trace_xfs_log_regrant_write_wake(log, tic); | ||
251 | 271 | ||
252 | spin_lock(&log->l_grant_write_lock); | 272 | /* |
253 | if (XLOG_FORCED_SHUTDOWN(log)) | 273 | * If there are other waiters on the queue then give them a chance at |
254 | goto shutdown; | 274 | * logspace before us. Wake up the first waiters, if we do not wake |
255 | } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes); | 275 | * up all the waiters then go to sleep waiting for more free space, |
276 | * otherwise try to get some space for this transaction. | ||
277 | */ | ||
278 | *need_bytes = xlog_ticket_reservation(log, head, tic); | ||
279 | free_bytes = xlog_space_left(log, &head->grant); | ||
280 | if (!list_empty_careful(&head->waiters)) { | ||
281 | spin_lock(&head->lock); | ||
282 | if (!xlog_grant_head_wake(log, head, &free_bytes) || | ||
283 | free_bytes < *need_bytes) { | ||
284 | error = xlog_grant_head_wait(log, head, tic, | ||
285 | *need_bytes); | ||
286 | } | ||
287 | spin_unlock(&head->lock); | ||
288 | } else if (free_bytes < *need_bytes) { | ||
289 | spin_lock(&head->lock); | ||
290 | error = xlog_grant_head_wait(log, head, tic, *need_bytes); | ||
291 | spin_unlock(&head->lock); | ||
292 | } | ||
256 | 293 | ||
257 | list_del_init(&tic->t_queue); | 294 | return error; |
258 | return 0; | ||
259 | shutdown: | ||
260 | list_del_init(&tic->t_queue); | ||
261 | return XFS_ERROR(EIO); | ||
262 | } | 295 | } |
263 | 296 | ||
264 | static void | 297 | static void |
@@ -286,6 +319,128 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type) | |||
286 | } | 319 | } |
287 | 320 | ||
288 | /* | 321 | /* |
322 | * Replenish the byte reservation required by moving the grant write head. | ||
323 | */ | ||
324 | int | ||
325 | xfs_log_regrant( | ||
326 | struct xfs_mount *mp, | ||
327 | struct xlog_ticket *tic) | ||
328 | { | ||
329 | struct log *log = mp->m_log; | ||
330 | int need_bytes; | ||
331 | int error = 0; | ||
332 | |||
333 | if (XLOG_FORCED_SHUTDOWN(log)) | ||
334 | return XFS_ERROR(EIO); | ||
335 | |||
336 | XFS_STATS_INC(xs_try_logspace); | ||
337 | |||
338 | /* | ||
339 | * This is a new transaction on the ticket, so we need to change the | ||
340 | * transaction ID so that the next transaction has a different TID in | ||
341 | * the log. Just add one to the existing tid so that we can see chains | ||
342 | * of rolling transactions in the log easily. | ||
343 | */ | ||
344 | tic->t_tid++; | ||
345 | |||
346 | xlog_grant_push_ail(log, tic->t_unit_res); | ||
347 | |||
348 | tic->t_curr_res = tic->t_unit_res; | ||
349 | xlog_tic_reset_res(tic); | ||
350 | |||
351 | if (tic->t_cnt > 0) | ||
352 | return 0; | ||
353 | |||
354 | trace_xfs_log_regrant(log, tic); | ||
355 | |||
356 | error = xlog_grant_head_check(log, &log->l_write_head, tic, | ||
357 | &need_bytes); | ||
358 | if (error) | ||
359 | goto out_error; | ||
360 | |||
361 | xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes); | ||
362 | trace_xfs_log_regrant_exit(log, tic); | ||
363 | xlog_verify_grant_tail(log); | ||
364 | return 0; | ||
365 | |||
366 | out_error: | ||
367 | /* | ||
368 | * If we are failing, make sure the ticket doesn't have any current | ||
369 | * reservations. We don't want to add this back when the ticket/ | ||
370 | * transaction gets cancelled. | ||
371 | */ | ||
372 | tic->t_curr_res = 0; | ||
373 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ | ||
374 | return error; | ||
375 | } | ||
376 | |||
377 | /* | ||
378 | * Reserve log space and return a ticket corresponding the reservation. | ||
379 | * | ||
380 | * Each reservation is going to reserve extra space for a log record header. | ||
381 | * When writes happen to the on-disk log, we don't subtract the length of the | ||
382 | * log record header from any reservation. By wasting space in each | ||
383 | * reservation, we prevent over allocation problems. | ||
384 | */ | ||
385 | int | ||
386 | xfs_log_reserve( | ||
387 | struct xfs_mount *mp, | ||
388 | int unit_bytes, | ||
389 | int cnt, | ||
390 | struct xlog_ticket **ticp, | ||
391 | __uint8_t client, | ||
392 | bool permanent, | ||
393 | uint t_type) | ||
394 | { | ||
395 | struct log *log = mp->m_log; | ||
396 | struct xlog_ticket *tic; | ||
397 | int need_bytes; | ||
398 | int error = 0; | ||
399 | |||
400 | ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); | ||
401 | |||
402 | if (XLOG_FORCED_SHUTDOWN(log)) | ||
403 | return XFS_ERROR(EIO); | ||
404 | |||
405 | XFS_STATS_INC(xs_try_logspace); | ||
406 | |||
407 | ASSERT(*ticp == NULL); | ||
408 | tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, | ||
409 | KM_SLEEP | KM_MAYFAIL); | ||
410 | if (!tic) | ||
411 | return XFS_ERROR(ENOMEM); | ||
412 | |||
413 | tic->t_trans_type = t_type; | ||
414 | *ticp = tic; | ||
415 | |||
416 | xlog_grant_push_ail(log, tic->t_unit_res * tic->t_cnt); | ||
417 | |||
418 | trace_xfs_log_reserve(log, tic); | ||
419 | |||
420 | error = xlog_grant_head_check(log, &log->l_reserve_head, tic, | ||
421 | &need_bytes); | ||
422 | if (error) | ||
423 | goto out_error; | ||
424 | |||
425 | xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes); | ||
426 | xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes); | ||
427 | trace_xfs_log_reserve_exit(log, tic); | ||
428 | xlog_verify_grant_tail(log); | ||
429 | return 0; | ||
430 | |||
431 | out_error: | ||
432 | /* | ||
433 | * If we are failing, make sure the ticket doesn't have any current | ||
434 | * reservations. We don't want to add this back when the ticket/ | ||
435 | * transaction gets cancelled. | ||
436 | */ | ||
437 | tic->t_curr_res = 0; | ||
438 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ | ||
439 | return error; | ||
440 | } | ||
441 | |||
442 | |||
443 | /* | ||
289 | * NOTES: | 444 | * NOTES: |
290 | * | 445 | * |
291 | * 1. currblock field gets updated at startup and after in-core logs | 446 | * 1. currblock field gets updated at startup and after in-core logs |
@@ -395,88 +550,6 @@ xfs_log_release_iclog( | |||
395 | } | 550 | } |
396 | 551 | ||
397 | /* | 552 | /* |
398 | * 1. Reserve an amount of on-disk log space and return a ticket corresponding | ||
399 | * to the reservation. | ||
400 | * 2. Potentially, push buffers at tail of log to disk. | ||
401 | * | ||
402 | * Each reservation is going to reserve extra space for a log record header. | ||
403 | * When writes happen to the on-disk log, we don't subtract the length of the | ||
404 | * log record header from any reservation. By wasting space in each | ||
405 | * reservation, we prevent over allocation problems. | ||
406 | */ | ||
407 | int | ||
408 | xfs_log_reserve( | ||
409 | struct xfs_mount *mp, | ||
410 | int unit_bytes, | ||
411 | int cnt, | ||
412 | struct xlog_ticket **ticket, | ||
413 | __uint8_t client, | ||
414 | uint flags, | ||
415 | uint t_type) | ||
416 | { | ||
417 | struct log *log = mp->m_log; | ||
418 | struct xlog_ticket *internal_ticket; | ||
419 | int retval = 0; | ||
420 | |||
421 | ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); | ||
422 | |||
423 | if (XLOG_FORCED_SHUTDOWN(log)) | ||
424 | return XFS_ERROR(EIO); | ||
425 | |||
426 | XFS_STATS_INC(xs_try_logspace); | ||
427 | |||
428 | |||
429 | if (*ticket != NULL) { | ||
430 | ASSERT(flags & XFS_LOG_PERM_RESERV); | ||
431 | internal_ticket = *ticket; | ||
432 | |||
433 | /* | ||
434 | * this is a new transaction on the ticket, so we need to | ||
435 | * change the transaction ID so that the next transaction has a | ||
436 | * different TID in the log. Just add one to the existing tid | ||
437 | * so that we can see chains of rolling transactions in the log | ||
438 | * easily. | ||
439 | */ | ||
440 | internal_ticket->t_tid++; | ||
441 | |||
442 | trace_xfs_log_reserve(log, internal_ticket); | ||
443 | |||
444 | xlog_grant_push_ail(log, internal_ticket->t_unit_res); | ||
445 | retval = xlog_regrant_write_log_space(log, internal_ticket); | ||
446 | } else { | ||
447 | /* may sleep if need to allocate more tickets */ | ||
448 | internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt, | ||
449 | client, flags, | ||
450 | KM_SLEEP|KM_MAYFAIL); | ||
451 | if (!internal_ticket) | ||
452 | return XFS_ERROR(ENOMEM); | ||
453 | internal_ticket->t_trans_type = t_type; | ||
454 | *ticket = internal_ticket; | ||
455 | |||
456 | trace_xfs_log_reserve(log, internal_ticket); | ||
457 | |||
458 | xlog_grant_push_ail(log, | ||
459 | (internal_ticket->t_unit_res * | ||
460 | internal_ticket->t_cnt)); | ||
461 | retval = xlog_grant_log_space(log, internal_ticket); | ||
462 | } | ||
463 | |||
464 | if (unlikely(retval)) { | ||
465 | /* | ||
466 | * If we are failing, make sure the ticket doesn't have any | ||
467 | * current reservations. We don't want to add this back | ||
468 | * when the ticket/ transaction gets cancelled. | ||
469 | */ | ||
470 | internal_ticket->t_curr_res = 0; | ||
471 | /* ungrant will give back unit_res * t_cnt. */ | ||
472 | internal_ticket->t_cnt = 0; | ||
473 | } | ||
474 | |||
475 | return retval; | ||
476 | } | ||
477 | |||
478 | |||
479 | /* | ||
480 | * Mount a log filesystem | 553 | * Mount a log filesystem |
481 | * | 554 | * |
482 | * mp - ubiquitous xfs mount point structure | 555 | * mp - ubiquitous xfs mount point structure |
@@ -653,8 +726,9 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
653 | .lv_iovecp = ®, | 726 | .lv_iovecp = ®, |
654 | }; | 727 | }; |
655 | 728 | ||
656 | /* remove inited flag */ | 729 | /* remove inited flag, and account for space used */ |
657 | tic->t_flags = 0; | 730 | tic->t_flags = 0; |
731 | tic->t_curr_res -= sizeof(magic); | ||
658 | error = xlog_write(log, &vec, tic, &lsn, | 732 | error = xlog_write(log, &vec, tic, &lsn, |
659 | NULL, XLOG_UNMOUNT_TRANS); | 733 | NULL, XLOG_UNMOUNT_TRANS); |
660 | /* | 734 | /* |
@@ -760,64 +834,35 @@ xfs_log_item_init( | |||
760 | INIT_LIST_HEAD(&item->li_cil); | 834 | INIT_LIST_HEAD(&item->li_cil); |
761 | } | 835 | } |
762 | 836 | ||
837 | /* | ||
838 | * Wake up processes waiting for log space after we have moved the log tail. | ||
839 | */ | ||
763 | void | 840 | void |
764 | xfs_log_move_tail(xfs_mount_t *mp, | 841 | xfs_log_space_wake( |
765 | xfs_lsn_t tail_lsn) | 842 | struct xfs_mount *mp) |
766 | { | 843 | { |
767 | xlog_ticket_t *tic; | 844 | struct log *log = mp->m_log; |
768 | xlog_t *log = mp->m_log; | 845 | int free_bytes; |
769 | int need_bytes, free_bytes; | ||
770 | 846 | ||
771 | if (XLOG_FORCED_SHUTDOWN(log)) | 847 | if (XLOG_FORCED_SHUTDOWN(log)) |
772 | return; | 848 | return; |
773 | 849 | ||
774 | if (tail_lsn == 0) | 850 | if (!list_empty_careful(&log->l_write_head.waiters)) { |
775 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); | 851 | ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); |
776 | |||
777 | /* tail_lsn == 1 implies that we weren't passed a valid value. */ | ||
778 | if (tail_lsn != 1) | ||
779 | atomic64_set(&log->l_tail_lsn, tail_lsn); | ||
780 | |||
781 | if (!list_empty_careful(&log->l_writeq)) { | ||
782 | #ifdef DEBUG | ||
783 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | ||
784 | panic("Recovery problem"); | ||
785 | #endif | ||
786 | spin_lock(&log->l_grant_write_lock); | ||
787 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); | ||
788 | list_for_each_entry(tic, &log->l_writeq, t_queue) { | ||
789 | ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); | ||
790 | 852 | ||
791 | if (free_bytes < tic->t_unit_res && tail_lsn != 1) | 853 | spin_lock(&log->l_write_head.lock); |
792 | break; | 854 | free_bytes = xlog_space_left(log, &log->l_write_head.grant); |
793 | tail_lsn = 0; | 855 | xlog_grant_head_wake(log, &log->l_write_head, &free_bytes); |
794 | free_bytes -= tic->t_unit_res; | 856 | spin_unlock(&log->l_write_head.lock); |
795 | trace_xfs_log_regrant_write_wake_up(log, tic); | ||
796 | wake_up(&tic->t_wait); | ||
797 | } | ||
798 | spin_unlock(&log->l_grant_write_lock); | ||
799 | } | 857 | } |
800 | 858 | ||
801 | if (!list_empty_careful(&log->l_reserveq)) { | 859 | if (!list_empty_careful(&log->l_reserve_head.waiters)) { |
802 | #ifdef DEBUG | 860 | ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); |
803 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | 861 | |
804 | panic("Recovery problem"); | 862 | spin_lock(&log->l_reserve_head.lock); |
805 | #endif | 863 | free_bytes = xlog_space_left(log, &log->l_reserve_head.grant); |
806 | spin_lock(&log->l_grant_reserve_lock); | 864 | xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes); |
807 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); | 865 | spin_unlock(&log->l_reserve_head.lock); |
808 | list_for_each_entry(tic, &log->l_reserveq, t_queue) { | ||
809 | if (tic->t_flags & XLOG_TIC_PERM_RESERV) | ||
810 | need_bytes = tic->t_unit_res*tic->t_cnt; | ||
811 | else | ||
812 | need_bytes = tic->t_unit_res; | ||
813 | if (free_bytes < need_bytes && tail_lsn != 1) | ||
814 | break; | ||
815 | tail_lsn = 0; | ||
816 | free_bytes -= need_bytes; | ||
817 | trace_xfs_log_grant_wake_up(log, tic); | ||
818 | wake_up(&tic->t_wait); | ||
819 | } | ||
820 | spin_unlock(&log->l_grant_reserve_lock); | ||
821 | } | 866 | } |
822 | } | 867 | } |
823 | 868 | ||
@@ -867,21 +912,7 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
867 | return needed; | 912 | return needed; |
868 | } | 913 | } |
869 | 914 | ||
870 | /****************************************************************************** | 915 | /* |
871 | * | ||
872 | * local routines | ||
873 | * | ||
874 | ****************************************************************************** | ||
875 | */ | ||
876 | |||
877 | /* xfs_trans_tail_ail returns 0 when there is nothing in the list. | ||
878 | * The log manager must keep track of the last LR which was committed | ||
879 | * to disk. The lsn of this LR will become the new tail_lsn whenever | ||
880 | * xfs_trans_tail_ail returns 0. If we don't do this, we run into | ||
881 | * the situation where stuff could be written into the log but nothing | ||
882 | * was ever in the AIL when asked. Eventually, we panic since the | ||
883 | * tail hits the head. | ||
884 | * | ||
885 | * We may be holding the log iclog lock upon entering this routine. | 916 | * We may be holding the log iclog lock upon entering this routine. |
886 | */ | 917 | */ |
887 | xfs_lsn_t | 918 | xfs_lsn_t |
@@ -891,10 +922,17 @@ xlog_assign_tail_lsn( | |||
891 | xfs_lsn_t tail_lsn; | 922 | xfs_lsn_t tail_lsn; |
892 | struct log *log = mp->m_log; | 923 | struct log *log = mp->m_log; |
893 | 924 | ||
925 | /* | ||
926 | * To make sure we always have a valid LSN for the log tail we keep | ||
927 | * track of the last LSN which was committed in log->l_last_sync_lsn, | ||
928 | * and use that when the AIL was empty and xfs_ail_min_lsn returns 0. | ||
929 | * | ||
930 | * If the AIL has been emptied we also need to wake any process | ||
931 | * waiting for this condition. | ||
932 | */ | ||
894 | tail_lsn = xfs_ail_min_lsn(mp->m_ail); | 933 | tail_lsn = xfs_ail_min_lsn(mp->m_ail); |
895 | if (!tail_lsn) | 934 | if (!tail_lsn) |
896 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); | 935 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); |
897 | |||
898 | atomic64_set(&log->l_tail_lsn, tail_lsn); | 936 | atomic64_set(&log->l_tail_lsn, tail_lsn); |
899 | return tail_lsn; | 937 | return tail_lsn; |
900 | } | 938 | } |
@@ -1100,12 +1138,9 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1100 | xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); | 1138 | xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); |
1101 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); | 1139 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); |
1102 | log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ | 1140 | log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ |
1103 | xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); | 1141 | |
1104 | xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); | 1142 | xlog_grant_head_init(&log->l_reserve_head); |
1105 | INIT_LIST_HEAD(&log->l_reserveq); | 1143 | xlog_grant_head_init(&log->l_write_head); |
1106 | INIT_LIST_HEAD(&log->l_writeq); | ||
1107 | spin_lock_init(&log->l_grant_reserve_lock); | ||
1108 | spin_lock_init(&log->l_grant_write_lock); | ||
1109 | 1144 | ||
1110 | error = EFSCORRUPTED; | 1145 | error = EFSCORRUPTED; |
1111 | if (xfs_sb_version_hassector(&mp->m_sb)) { | 1146 | if (xfs_sb_version_hassector(&mp->m_sb)) { |
@@ -1280,7 +1315,7 @@ xlog_grant_push_ail( | |||
1280 | 1315 | ||
1281 | ASSERT(BTOBB(need_bytes) < log->l_logBBsize); | 1316 | ASSERT(BTOBB(need_bytes) < log->l_logBBsize); |
1282 | 1317 | ||
1283 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); | 1318 | free_bytes = xlog_space_left(log, &log->l_reserve_head.grant); |
1284 | free_blocks = BTOBBT(free_bytes); | 1319 | free_blocks = BTOBBT(free_bytes); |
1285 | 1320 | ||
1286 | /* | 1321 | /* |
@@ -1412,8 +1447,8 @@ xlog_sync(xlog_t *log, | |||
1412 | roundoff < BBTOB(1))); | 1447 | roundoff < BBTOB(1))); |
1413 | 1448 | ||
1414 | /* move grant heads by roundoff in sync */ | 1449 | /* move grant heads by roundoff in sync */ |
1415 | xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); | 1450 | xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff); |
1416 | xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); | 1451 | xlog_grant_add_space(log, &log->l_write_head.grant, roundoff); |
1417 | 1452 | ||
1418 | /* put cycle number in every block */ | 1453 | /* put cycle number in every block */ |
1419 | xlog_pack_data(log, iclog, roundoff); | 1454 | xlog_pack_data(log, iclog, roundoff); |
@@ -2566,119 +2601,6 @@ restart: | |||
2566 | return 0; | 2601 | return 0; |
2567 | } /* xlog_state_get_iclog_space */ | 2602 | } /* xlog_state_get_iclog_space */ |
2568 | 2603 | ||
2569 | /* | ||
2570 | * Atomically get the log space required for a log ticket. | ||
2571 | * | ||
2572 | * Once a ticket gets put onto the reserveq, it will only return after the | ||
2573 | * needed reservation is satisfied. | ||
2574 | * | ||
2575 | * This function is structured so that it has a lock free fast path. This is | ||
2576 | * necessary because every new transaction reservation will come through this | ||
2577 | * path. Hence any lock will be globally hot if we take it unconditionally on | ||
2578 | * every pass. | ||
2579 | * | ||
2580 | * As tickets are only ever moved on and off the reserveq under the | ||
2581 | * l_grant_reserve_lock, we only need to take that lock if we are going to add | ||
2582 | * the ticket to the queue and sleep. We can avoid taking the lock if the ticket | ||
2583 | * was never added to the reserveq because the t_queue list head will be empty | ||
2584 | * and we hold the only reference to it so it can safely be checked unlocked. | ||
2585 | */ | ||
2586 | STATIC int | ||
2587 | xlog_grant_log_space( | ||
2588 | struct log *log, | ||
2589 | struct xlog_ticket *tic) | ||
2590 | { | ||
2591 | int free_bytes, need_bytes; | ||
2592 | int error = 0; | ||
2593 | |||
2594 | ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); | ||
2595 | |||
2596 | trace_xfs_log_grant_enter(log, tic); | ||
2597 | |||
2598 | /* | ||
2599 | * If there are other waiters on the queue then give them a chance at | ||
2600 | * logspace before us. Wake up the first waiters, if we do not wake | ||
2601 | * up all the waiters then go to sleep waiting for more free space, | ||
2602 | * otherwise try to get some space for this transaction. | ||
2603 | */ | ||
2604 | need_bytes = tic->t_unit_res; | ||
2605 | if (tic->t_flags & XFS_LOG_PERM_RESERV) | ||
2606 | need_bytes *= tic->t_ocnt; | ||
2607 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); | ||
2608 | if (!list_empty_careful(&log->l_reserveq)) { | ||
2609 | spin_lock(&log->l_grant_reserve_lock); | ||
2610 | if (!xlog_reserveq_wake(log, &free_bytes) || | ||
2611 | free_bytes < need_bytes) | ||
2612 | error = xlog_reserveq_wait(log, tic, need_bytes); | ||
2613 | spin_unlock(&log->l_grant_reserve_lock); | ||
2614 | } else if (free_bytes < need_bytes) { | ||
2615 | spin_lock(&log->l_grant_reserve_lock); | ||
2616 | error = xlog_reserveq_wait(log, tic, need_bytes); | ||
2617 | spin_unlock(&log->l_grant_reserve_lock); | ||
2618 | } | ||
2619 | if (error) | ||
2620 | return error; | ||
2621 | |||
2622 | xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); | ||
2623 | xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); | ||
2624 | trace_xfs_log_grant_exit(log, tic); | ||
2625 | xlog_verify_grant_tail(log); | ||
2626 | return 0; | ||
2627 | } | ||
2628 | |||
2629 | /* | ||
2630 | * Replenish the byte reservation required by moving the grant write head. | ||
2631 | * | ||
2632 | * Similar to xlog_grant_log_space, the function is structured to have a lock | ||
2633 | * free fast path. | ||
2634 | */ | ||
2635 | STATIC int | ||
2636 | xlog_regrant_write_log_space( | ||
2637 | struct log *log, | ||
2638 | struct xlog_ticket *tic) | ||
2639 | { | ||
2640 | int free_bytes, need_bytes; | ||
2641 | int error = 0; | ||
2642 | |||
2643 | tic->t_curr_res = tic->t_unit_res; | ||
2644 | xlog_tic_reset_res(tic); | ||
2645 | |||
2646 | if (tic->t_cnt > 0) | ||
2647 | return 0; | ||
2648 | |||
2649 | ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); | ||
2650 | |||
2651 | trace_xfs_log_regrant_write_enter(log, tic); | ||
2652 | |||
2653 | /* | ||
2654 | * If there are other waiters on the queue then give them a chance at | ||
2655 | * logspace before us. Wake up the first waiters, if we do not wake | ||
2656 | * up all the waiters then go to sleep waiting for more free space, | ||
2657 | * otherwise try to get some space for this transaction. | ||
2658 | */ | ||
2659 | need_bytes = tic->t_unit_res; | ||
2660 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); | ||
2661 | if (!list_empty_careful(&log->l_writeq)) { | ||
2662 | spin_lock(&log->l_grant_write_lock); | ||
2663 | if (!xlog_writeq_wake(log, &free_bytes) || | ||
2664 | free_bytes < need_bytes) | ||
2665 | error = xlog_writeq_wait(log, tic, need_bytes); | ||
2666 | spin_unlock(&log->l_grant_write_lock); | ||
2667 | } else if (free_bytes < need_bytes) { | ||
2668 | spin_lock(&log->l_grant_write_lock); | ||
2669 | error = xlog_writeq_wait(log, tic, need_bytes); | ||
2670 | spin_unlock(&log->l_grant_write_lock); | ||
2671 | } | ||
2672 | |||
2673 | if (error) | ||
2674 | return error; | ||
2675 | |||
2676 | xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); | ||
2677 | trace_xfs_log_regrant_write_exit(log, tic); | ||
2678 | xlog_verify_grant_tail(log); | ||
2679 | return 0; | ||
2680 | } | ||
2681 | |||
2682 | /* The first cnt-1 times through here we don't need to | 2604 | /* The first cnt-1 times through here we don't need to |
2683 | * move the grant write head because the permanent | 2605 | * move the grant write head because the permanent |
2684 | * reservation has reserved cnt times the unit amount. | 2606 | * reservation has reserved cnt times the unit amount. |
@@ -2695,9 +2617,9 @@ xlog_regrant_reserve_log_space(xlog_t *log, | |||
2695 | if (ticket->t_cnt > 0) | 2617 | if (ticket->t_cnt > 0) |
2696 | ticket->t_cnt--; | 2618 | ticket->t_cnt--; |
2697 | 2619 | ||
2698 | xlog_grant_sub_space(log, &log->l_grant_reserve_head, | 2620 | xlog_grant_sub_space(log, &log->l_reserve_head.grant, |
2699 | ticket->t_curr_res); | 2621 | ticket->t_curr_res); |
2700 | xlog_grant_sub_space(log, &log->l_grant_write_head, | 2622 | xlog_grant_sub_space(log, &log->l_write_head.grant, |
2701 | ticket->t_curr_res); | 2623 | ticket->t_curr_res); |
2702 | ticket->t_curr_res = ticket->t_unit_res; | 2624 | ticket->t_curr_res = ticket->t_unit_res; |
2703 | xlog_tic_reset_res(ticket); | 2625 | xlog_tic_reset_res(ticket); |
@@ -2708,7 +2630,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, | |||
2708 | if (ticket->t_cnt > 0) | 2630 | if (ticket->t_cnt > 0) |
2709 | return; | 2631 | return; |
2710 | 2632 | ||
2711 | xlog_grant_add_space(log, &log->l_grant_reserve_head, | 2633 | xlog_grant_add_space(log, &log->l_reserve_head.grant, |
2712 | ticket->t_unit_res); | 2634 | ticket->t_unit_res); |
2713 | 2635 | ||
2714 | trace_xfs_log_regrant_reserve_exit(log, ticket); | 2636 | trace_xfs_log_regrant_reserve_exit(log, ticket); |
@@ -2754,14 +2676,13 @@ xlog_ungrant_log_space(xlog_t *log, | |||
2754 | bytes += ticket->t_unit_res*ticket->t_cnt; | 2676 | bytes += ticket->t_unit_res*ticket->t_cnt; |
2755 | } | 2677 | } |
2756 | 2678 | ||
2757 | xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes); | 2679 | xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes); |
2758 | xlog_grant_sub_space(log, &log->l_grant_write_head, bytes); | 2680 | xlog_grant_sub_space(log, &log->l_write_head.grant, bytes); |
2759 | 2681 | ||
2760 | trace_xfs_log_ungrant_exit(log, ticket); | 2682 | trace_xfs_log_ungrant_exit(log, ticket); |
2761 | 2683 | ||
2762 | xfs_log_move_tail(log->l_mp, 1); | 2684 | xfs_log_space_wake(log->l_mp); |
2763 | } /* xlog_ungrant_log_space */ | 2685 | } |
2764 | |||
2765 | 2686 | ||
2766 | /* | 2687 | /* |
2767 | * Flush iclog to disk if this is the last reference to the given iclog and | 2688 | * Flush iclog to disk if this is the last reference to the given iclog and |
@@ -3219,7 +3140,7 @@ xlog_ticket_alloc( | |||
3219 | int unit_bytes, | 3140 | int unit_bytes, |
3220 | int cnt, | 3141 | int cnt, |
3221 | char client, | 3142 | char client, |
3222 | uint xflags, | 3143 | bool permanent, |
3223 | int alloc_flags) | 3144 | int alloc_flags) |
3224 | { | 3145 | { |
3225 | struct xlog_ticket *tic; | 3146 | struct xlog_ticket *tic; |
@@ -3313,6 +3234,7 @@ xlog_ticket_alloc( | |||
3313 | } | 3234 | } |
3314 | 3235 | ||
3315 | atomic_set(&tic->t_ref, 1); | 3236 | atomic_set(&tic->t_ref, 1); |
3237 | tic->t_task = current; | ||
3316 | INIT_LIST_HEAD(&tic->t_queue); | 3238 | INIT_LIST_HEAD(&tic->t_queue); |
3317 | tic->t_unit_res = unit_bytes; | 3239 | tic->t_unit_res = unit_bytes; |
3318 | tic->t_curr_res = unit_bytes; | 3240 | tic->t_curr_res = unit_bytes; |
@@ -3322,9 +3244,8 @@ xlog_ticket_alloc( | |||
3322 | tic->t_clientid = client; | 3244 | tic->t_clientid = client; |
3323 | tic->t_flags = XLOG_TIC_INITED; | 3245 | tic->t_flags = XLOG_TIC_INITED; |
3324 | tic->t_trans_type = 0; | 3246 | tic->t_trans_type = 0; |
3325 | if (xflags & XFS_LOG_PERM_RESERV) | 3247 | if (permanent) |
3326 | tic->t_flags |= XLOG_TIC_PERM_RESERV; | 3248 | tic->t_flags |= XLOG_TIC_PERM_RESERV; |
3327 | init_waitqueue_head(&tic->t_wait); | ||
3328 | 3249 | ||
3329 | xlog_tic_reset_res(tic); | 3250 | xlog_tic_reset_res(tic); |
3330 | 3251 | ||
@@ -3380,7 +3301,7 @@ xlog_verify_grant_tail( | |||
3380 | int tail_cycle, tail_blocks; | 3301 | int tail_cycle, tail_blocks; |
3381 | int cycle, space; | 3302 | int cycle, space; |
3382 | 3303 | ||
3383 | xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); | 3304 | xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space); |
3384 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); | 3305 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); |
3385 | if (tail_cycle != cycle) { | 3306 | if (tail_cycle != cycle) { |
3386 | if (cycle - 1 != tail_cycle && | 3307 | if (cycle - 1 != tail_cycle && |
@@ -3582,7 +3503,6 @@ xfs_log_force_umount( | |||
3582 | struct xfs_mount *mp, | 3503 | struct xfs_mount *mp, |
3583 | int logerror) | 3504 | int logerror) |
3584 | { | 3505 | { |
3585 | xlog_ticket_t *tic; | ||
3586 | xlog_t *log; | 3506 | xlog_t *log; |
3587 | int retval; | 3507 | int retval; |
3588 | 3508 | ||
@@ -3650,15 +3570,8 @@ xfs_log_force_umount( | |||
3650 | * we don't enqueue anything once the SHUTDOWN flag is set, and this | 3570 | * we don't enqueue anything once the SHUTDOWN flag is set, and this |
3651 | * action is protected by the grant locks. | 3571 | * action is protected by the grant locks. |
3652 | */ | 3572 | */ |
3653 | spin_lock(&log->l_grant_reserve_lock); | 3573 | xlog_grant_head_wake_all(&log->l_reserve_head); |
3654 | list_for_each_entry(tic, &log->l_reserveq, t_queue) | 3574 | xlog_grant_head_wake_all(&log->l_write_head); |
3655 | wake_up(&tic->t_wait); | ||
3656 | spin_unlock(&log->l_grant_reserve_lock); | ||
3657 | |||
3658 | spin_lock(&log->l_grant_write_lock); | ||
3659 | list_for_each_entry(tic, &log->l_writeq, t_queue) | ||
3660 | wake_up(&tic->t_wait); | ||
3661 | spin_unlock(&log->l_grant_write_lock); | ||
3662 | 3575 | ||
3663 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { | 3576 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { |
3664 | ASSERT(!logerror); | 3577 | ASSERT(!logerror); |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 2aee3b22d29c..2c622bedb302 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -53,15 +53,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) | |||
53 | #define XFS_LOG_REL_PERM_RESERV 0x1 | 53 | #define XFS_LOG_REL_PERM_RESERV 0x1 |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * Flags to xfs_log_reserve() | ||
57 | * | ||
58 | * XFS_LOG_PERM_RESERV: Permanent reservation. When writes are | ||
59 | * performed against this type of reservation, the reservation | ||
60 | * is not decreased. Long running transactions should use this. | ||
61 | */ | ||
62 | #define XFS_LOG_PERM_RESERV 0x2 | ||
63 | |||
64 | /* | ||
65 | * Flags to xfs_log_force() | 56 | * Flags to xfs_log_force() |
66 | * | 57 | * |
67 | * XFS_LOG_SYNC: Synchronous force in-core log to disk | 58 | * XFS_LOG_SYNC: Synchronous force in-core log to disk |
@@ -160,8 +151,8 @@ int xfs_log_mount(struct xfs_mount *mp, | |||
160 | xfs_daddr_t start_block, | 151 | xfs_daddr_t start_block, |
161 | int num_bblocks); | 152 | int num_bblocks); |
162 | int xfs_log_mount_finish(struct xfs_mount *mp); | 153 | int xfs_log_mount_finish(struct xfs_mount *mp); |
163 | void xfs_log_move_tail(struct xfs_mount *mp, | 154 | xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); |
164 | xfs_lsn_t tail_lsn); | 155 | void xfs_log_space_wake(struct xfs_mount *mp); |
165 | int xfs_log_notify(struct xfs_mount *mp, | 156 | int xfs_log_notify(struct xfs_mount *mp, |
166 | struct xlog_in_core *iclog, | 157 | struct xlog_in_core *iclog, |
167 | xfs_log_callback_t *callback_entry); | 158 | xfs_log_callback_t *callback_entry); |
@@ -172,8 +163,9 @@ int xfs_log_reserve(struct xfs_mount *mp, | |||
172 | int count, | 163 | int count, |
173 | struct xlog_ticket **ticket, | 164 | struct xlog_ticket **ticket, |
174 | __uint8_t clientid, | 165 | __uint8_t clientid, |
175 | uint flags, | 166 | bool permanent, |
176 | uint t_type); | 167 | uint t_type); |
168 | int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic); | ||
177 | int xfs_log_unmount_write(struct xfs_mount *mp); | 169 | int xfs_log_unmount_write(struct xfs_mount *mp); |
178 | void xfs_log_unmount(struct xfs_mount *mp); | 170 | void xfs_log_unmount(struct xfs_mount *mp); |
179 | int xfs_log_force_umount(struct xfs_mount *mp, int logerror); | 171 | int xfs_log_force_umount(struct xfs_mount *mp, int logerror); |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 2d3b6a498d63..2152900b79d4 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -239,8 +239,8 @@ typedef struct xlog_res { | |||
239 | } xlog_res_t; | 239 | } xlog_res_t; |
240 | 240 | ||
241 | typedef struct xlog_ticket { | 241 | typedef struct xlog_ticket { |
242 | wait_queue_head_t t_wait; /* ticket wait queue */ | ||
243 | struct list_head t_queue; /* reserve/write queue */ | 242 | struct list_head t_queue; /* reserve/write queue */ |
243 | struct task_struct *t_task; /* task that owns this ticket */ | ||
244 | xlog_tid_t t_tid; /* transaction identifier : 4 */ | 244 | xlog_tid_t t_tid; /* transaction identifier : 4 */ |
245 | atomic_t t_ref; /* ticket reference count : 4 */ | 245 | atomic_t t_ref; /* ticket reference count : 4 */ |
246 | int t_curr_res; /* current reservation in bytes : 4 */ | 246 | int t_curr_res; /* current reservation in bytes : 4 */ |
@@ -470,6 +470,16 @@ struct xfs_cil { | |||
470 | #define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) | 470 | #define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) |
471 | 471 | ||
472 | /* | 472 | /* |
473 | * ticket grant locks, queues and accounting have their own cachlines | ||
474 | * as these are quite hot and can be operated on concurrently. | ||
475 | */ | ||
476 | struct xlog_grant_head { | ||
477 | spinlock_t lock ____cacheline_aligned_in_smp; | ||
478 | struct list_head waiters; | ||
479 | atomic64_t grant; | ||
480 | }; | ||
481 | |||
482 | /* | ||
473 | * The reservation head lsn is not made up of a cycle number and block number. | 483 | * The reservation head lsn is not made up of a cycle number and block number. |
474 | * Instead, it uses a cycle number and byte number. Logs don't expect to | 484 | * Instead, it uses a cycle number and byte number. Logs don't expect to |
475 | * overflow 31 bits worth of byte offset, so using a byte number will mean | 485 | * overflow 31 bits worth of byte offset, so using a byte number will mean |
@@ -520,17 +530,8 @@ typedef struct log { | |||
520 | /* lsn of 1st LR with unflushed * buffers */ | 530 | /* lsn of 1st LR with unflushed * buffers */ |
521 | atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; | 531 | atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; |
522 | 532 | ||
523 | /* | 533 | struct xlog_grant_head l_reserve_head; |
524 | * ticket grant locks, queues and accounting have their own cachlines | 534 | struct xlog_grant_head l_write_head; |
525 | * as these are quite hot and can be operated on concurrently. | ||
526 | */ | ||
527 | spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp; | ||
528 | struct list_head l_reserveq; | ||
529 | atomic64_t l_grant_reserve_head; | ||
530 | |||
531 | spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp; | ||
532 | struct list_head l_writeq; | ||
533 | atomic64_t l_grant_write_head; | ||
534 | 535 | ||
535 | /* The following field are used for debugging; need to hold icloglock */ | 536 | /* The following field are used for debugging; need to hold icloglock */ |
536 | #ifdef DEBUG | 537 | #ifdef DEBUG |
@@ -545,14 +546,13 @@ typedef struct log { | |||
545 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) | 546 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) |
546 | 547 | ||
547 | /* common routines */ | 548 | /* common routines */ |
548 | extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); | ||
549 | extern int xlog_recover(xlog_t *log); | 549 | extern int xlog_recover(xlog_t *log); |
550 | extern int xlog_recover_finish(xlog_t *log); | 550 | extern int xlog_recover_finish(xlog_t *log); |
551 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); | 551 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); |
552 | 552 | ||
553 | extern kmem_zone_t *xfs_log_ticket_zone; | 553 | extern kmem_zone_t *xfs_log_ticket_zone; |
554 | struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, | 554 | struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, |
555 | int count, char client, uint xflags, | 555 | int count, char client, bool permanent, |
556 | int alloc_flags); | 556 | int alloc_flags); |
557 | 557 | ||
558 | 558 | ||
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 0ed9ee77937c..8ecad5bad66c 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -965,9 +965,9 @@ xlog_find_tail( | |||
965 | log->l_curr_cycle++; | 965 | log->l_curr_cycle++; |
966 | atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); | 966 | atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); |
967 | atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); | 967 | atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); |
968 | xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle, | 968 | xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, |
969 | BBTOB(log->l_curr_block)); | 969 | BBTOB(log->l_curr_block)); |
970 | xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle, | 970 | xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, |
971 | BBTOB(log->l_curr_block)); | 971 | BBTOB(log->l_curr_block)); |
972 | 972 | ||
973 | /* | 973 | /* |
@@ -3161,37 +3161,26 @@ xlog_recover_process_iunlinks( | |||
3161 | */ | 3161 | */ |
3162 | continue; | 3162 | continue; |
3163 | } | 3163 | } |
3164 | /* | ||
3165 | * Unlock the buffer so that it can be acquired in the normal | ||
3166 | * course of the transaction to truncate and free each inode. | ||
3167 | * Because we are not racing with anyone else here for the AGI | ||
3168 | * buffer, we don't even need to hold it locked to read the | ||
3169 | * initial unlinked bucket entries out of the buffer. We keep | ||
3170 | * buffer reference though, so that it stays pinned in memory | ||
3171 | * while we need the buffer. | ||
3172 | */ | ||
3164 | agi = XFS_BUF_TO_AGI(agibp); | 3173 | agi = XFS_BUF_TO_AGI(agibp); |
3174 | xfs_buf_unlock(agibp); | ||
3165 | 3175 | ||
3166 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { | 3176 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { |
3167 | agino = be32_to_cpu(agi->agi_unlinked[bucket]); | 3177 | agino = be32_to_cpu(agi->agi_unlinked[bucket]); |
3168 | while (agino != NULLAGINO) { | 3178 | while (agino != NULLAGINO) { |
3169 | /* | ||
3170 | * Release the agi buffer so that it can | ||
3171 | * be acquired in the normal course of the | ||
3172 | * transaction to truncate and free the inode. | ||
3173 | */ | ||
3174 | xfs_buf_relse(agibp); | ||
3175 | |||
3176 | agino = xlog_recover_process_one_iunlink(mp, | 3179 | agino = xlog_recover_process_one_iunlink(mp, |
3177 | agno, agino, bucket); | 3180 | agno, agino, bucket); |
3178 | |||
3179 | /* | ||
3180 | * Reacquire the agibuffer and continue around | ||
3181 | * the loop. This should never fail as we know | ||
3182 | * the buffer was good earlier on. | ||
3183 | */ | ||
3184 | error = xfs_read_agi(mp, NULL, agno, &agibp); | ||
3185 | ASSERT(error == 0); | ||
3186 | agi = XFS_BUF_TO_AGI(agibp); | ||
3187 | } | 3181 | } |
3188 | } | 3182 | } |
3189 | 3183 | xfs_buf_rele(agibp); | |
3190 | /* | ||
3191 | * Release the buffer for the current agi so we can | ||
3192 | * go on to the next one. | ||
3193 | */ | ||
3194 | xfs_buf_relse(agibp); | ||
3195 | } | 3184 | } |
3196 | 3185 | ||
3197 | mp->m_dmevmask = mp_dmevmask; | 3186 | mp->m_dmevmask = mp_dmevmask; |
@@ -3695,7 +3684,7 @@ xlog_do_recover( | |||
3695 | 3684 | ||
3696 | /* Convert superblock from on-disk format */ | 3685 | /* Convert superblock from on-disk format */ |
3697 | sbp = &log->l_mp->m_sb; | 3686 | sbp = &log->l_mp->m_sb; |
3698 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); | 3687 | xfs_sb_from_disk(log->l_mp, XFS_BUF_TO_SBP(bp)); |
3699 | ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); | 3688 | ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); |
3700 | ASSERT(xfs_sb_good_version(sbp)); | 3689 | ASSERT(xfs_sb_good_version(sbp)); |
3701 | xfs_buf_relse(bp); | 3690 | xfs_buf_relse(bp); |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d06afbc3540d..1ffead4b2296 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -158,7 +158,7 @@ xfs_uuid_mount( | |||
158 | 158 | ||
159 | out_duplicate: | 159 | out_duplicate: |
160 | mutex_unlock(&xfs_uuid_table_mutex); | 160 | mutex_unlock(&xfs_uuid_table_mutex); |
161 | xfs_warn(mp, "Filesystem has duplicate UUID - can't mount"); | 161 | xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid); |
162 | return XFS_ERROR(EINVAL); | 162 | return XFS_ERROR(EINVAL); |
163 | } | 163 | } |
164 | 164 | ||
@@ -553,9 +553,11 @@ out_unwind: | |||
553 | 553 | ||
554 | void | 554 | void |
555 | xfs_sb_from_disk( | 555 | xfs_sb_from_disk( |
556 | xfs_sb_t *to, | 556 | struct xfs_mount *mp, |
557 | xfs_dsb_t *from) | 557 | xfs_dsb_t *from) |
558 | { | 558 | { |
559 | struct xfs_sb *to = &mp->m_sb; | ||
560 | |||
559 | to->sb_magicnum = be32_to_cpu(from->sb_magicnum); | 561 | to->sb_magicnum = be32_to_cpu(from->sb_magicnum); |
560 | to->sb_blocksize = be32_to_cpu(from->sb_blocksize); | 562 | to->sb_blocksize = be32_to_cpu(from->sb_blocksize); |
561 | to->sb_dblocks = be64_to_cpu(from->sb_dblocks); | 563 | to->sb_dblocks = be64_to_cpu(from->sb_dblocks); |
@@ -693,7 +695,7 @@ reread: | |||
693 | * Initialize the mount structure from the superblock. | 695 | * Initialize the mount structure from the superblock. |
694 | * But first do some basic consistency checking. | 696 | * But first do some basic consistency checking. |
695 | */ | 697 | */ |
696 | xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); | 698 | xfs_sb_from_disk(mp, XFS_BUF_TO_SBP(bp)); |
697 | error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); | 699 | error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); |
698 | if (error) { | 700 | if (error) { |
699 | if (loud) | 701 | if (loud) |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 19f69e232509..9eba73887829 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -211,6 +211,9 @@ typedef struct xfs_mount { | |||
211 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ | 211 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ |
212 | int64_t m_low_space[XFS_LOWSP_MAX]; | 212 | int64_t m_low_space[XFS_LOWSP_MAX]; |
213 | /* low free space thresholds */ | 213 | /* low free space thresholds */ |
214 | |||
215 | struct workqueue_struct *m_data_workqueue; | ||
216 | struct workqueue_struct *m_unwritten_workqueue; | ||
214 | } xfs_mount_t; | 217 | } xfs_mount_t; |
215 | 218 | ||
216 | /* | 219 | /* |
@@ -395,7 +398,7 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *); | |||
395 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); | 398 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); |
396 | extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t, | 399 | extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t, |
397 | xfs_agnumber_t *); | 400 | xfs_agnumber_t *); |
398 | extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); | 401 | extern void xfs_sb_from_disk(struct xfs_mount *, struct xfs_dsb *); |
399 | extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); | 402 | extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); |
400 | 403 | ||
401 | #endif /* __XFS_MOUNT_H__ */ | 404 | #endif /* __XFS_MOUNT_H__ */ |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index c436def733bf..55c6afedc879 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -48,194 +48,189 @@ | |||
48 | * quota functionality, including maintaining the freelist and hash | 48 | * quota functionality, including maintaining the freelist and hash |
49 | * tables of dquots. | 49 | * tables of dquots. |
50 | */ | 50 | */ |
51 | struct mutex xfs_Gqm_lock; | ||
52 | struct xfs_qm *xfs_Gqm; | ||
53 | |||
54 | kmem_zone_t *qm_dqzone; | ||
55 | kmem_zone_t *qm_dqtrxzone; | ||
56 | |||
57 | STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); | ||
58 | STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); | ||
59 | |||
60 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); | 51 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); |
61 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); | 52 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); |
62 | STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); | 53 | STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); |
63 | 54 | ||
64 | static struct shrinker xfs_qm_shaker = { | ||
65 | .shrink = xfs_qm_shake, | ||
66 | .seeks = DEFAULT_SEEKS, | ||
67 | }; | ||
68 | |||
69 | /* | 55 | /* |
70 | * Initialize the XQM structure. | 56 | * We use the batch lookup interface to iterate over the dquots as it |
71 | * Note that there is not one quota manager per file system. | 57 | * currently is the only interface into the radix tree code that allows |
58 | * fuzzy lookups instead of exact matches. Holding the lock over multiple | ||
59 | * operations is fine as all callers are used either during mount/umount | ||
60 | * or quotaoff. | ||
72 | */ | 61 | */ |
73 | STATIC struct xfs_qm * | 62 | #define XFS_DQ_LOOKUP_BATCH 32 |
74 | xfs_Gqm_init(void) | 63 | |
64 | STATIC int | ||
65 | xfs_qm_dquot_walk( | ||
66 | struct xfs_mount *mp, | ||
67 | int type, | ||
68 | int (*execute)(struct xfs_dquot *dqp)) | ||
75 | { | 69 | { |
76 | xfs_dqhash_t *udqhash, *gdqhash; | 70 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
77 | xfs_qm_t *xqm; | 71 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); |
78 | size_t hsize; | 72 | uint32_t next_index; |
79 | uint i; | 73 | int last_error = 0; |
74 | int skipped; | ||
75 | int nr_found; | ||
76 | |||
77 | restart: | ||
78 | skipped = 0; | ||
79 | next_index = 0; | ||
80 | nr_found = 0; | ||
81 | |||
82 | while (1) { | ||
83 | struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH]; | ||
84 | int error = 0; | ||
85 | int i; | ||
86 | |||
87 | mutex_lock(&qi->qi_tree_lock); | ||
88 | nr_found = radix_tree_gang_lookup(tree, (void **)batch, | ||
89 | next_index, XFS_DQ_LOOKUP_BATCH); | ||
90 | if (!nr_found) { | ||
91 | mutex_unlock(&qi->qi_tree_lock); | ||
92 | break; | ||
93 | } | ||
80 | 94 | ||
81 | /* | 95 | for (i = 0; i < nr_found; i++) { |
82 | * Initialize the dquot hash tables. | 96 | struct xfs_dquot *dqp = batch[i]; |
83 | */ | ||
84 | udqhash = kmem_zalloc_greedy(&hsize, | ||
85 | XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), | ||
86 | XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t)); | ||
87 | if (!udqhash) | ||
88 | goto out; | ||
89 | 97 | ||
90 | gdqhash = kmem_zalloc_large(hsize); | 98 | next_index = be32_to_cpu(dqp->q_core.d_id) + 1; |
91 | if (!gdqhash) | ||
92 | goto out_free_udqhash; | ||
93 | 99 | ||
94 | hsize /= sizeof(xfs_dqhash_t); | 100 | error = execute(batch[i]); |
101 | if (error == EAGAIN) { | ||
102 | skipped++; | ||
103 | continue; | ||
104 | } | ||
105 | if (error && last_error != EFSCORRUPTED) | ||
106 | last_error = error; | ||
107 | } | ||
95 | 108 | ||
96 | xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); | 109 | mutex_unlock(&qi->qi_tree_lock); |
97 | xqm->qm_dqhashmask = hsize - 1; | ||
98 | xqm->qm_usr_dqhtable = udqhash; | ||
99 | xqm->qm_grp_dqhtable = gdqhash; | ||
100 | ASSERT(xqm->qm_usr_dqhtable != NULL); | ||
101 | ASSERT(xqm->qm_grp_dqhtable != NULL); | ||
102 | 110 | ||
103 | for (i = 0; i < hsize; i++) { | 111 | /* bail out if the filesystem is corrupted. */ |
104 | xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i); | 112 | if (last_error == EFSCORRUPTED) { |
105 | xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i); | 113 | skipped = 0; |
114 | break; | ||
115 | } | ||
106 | } | 116 | } |
107 | 117 | ||
108 | /* | 118 | if (skipped) { |
109 | * Freelist of all dquots of all file systems | 119 | delay(1); |
110 | */ | 120 | goto restart; |
111 | INIT_LIST_HEAD(&xqm->qm_dqfrlist); | 121 | } |
112 | xqm->qm_dqfrlist_cnt = 0; | ||
113 | mutex_init(&xqm->qm_dqfrlist_lock); | ||
114 | |||
115 | /* | ||
116 | * dquot zone. we register our own low-memory callback. | ||
117 | */ | ||
118 | if (!qm_dqzone) { | ||
119 | xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t), | ||
120 | "xfs_dquots"); | ||
121 | qm_dqzone = xqm->qm_dqzone; | ||
122 | } else | ||
123 | xqm->qm_dqzone = qm_dqzone; | ||
124 | |||
125 | register_shrinker(&xfs_qm_shaker); | ||
126 | |||
127 | /* | ||
128 | * The t_dqinfo portion of transactions. | ||
129 | */ | ||
130 | if (!qm_dqtrxzone) { | ||
131 | xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t), | ||
132 | "xfs_dqtrx"); | ||
133 | qm_dqtrxzone = xqm->qm_dqtrxzone; | ||
134 | } else | ||
135 | xqm->qm_dqtrxzone = qm_dqtrxzone; | ||
136 | |||
137 | atomic_set(&xqm->qm_totaldquots, 0); | ||
138 | xqm->qm_nrefs = 0; | ||
139 | return xqm; | ||
140 | 122 | ||
141 | out_free_udqhash: | 123 | return last_error; |
142 | kmem_free_large(udqhash); | ||
143 | out: | ||
144 | return NULL; | ||
145 | } | 124 | } |
146 | 125 | ||
126 | |||
147 | /* | 127 | /* |
148 | * Destroy the global quota manager when its reference count goes to zero. | 128 | * Purge a dquot from all tracking data structures and free it. |
149 | */ | 129 | */ |
150 | STATIC void | 130 | STATIC int |
151 | xfs_qm_destroy( | 131 | xfs_qm_dqpurge( |
152 | struct xfs_qm *xqm) | 132 | struct xfs_dquot *dqp) |
153 | { | 133 | { |
154 | int hsize, i; | 134 | struct xfs_mount *mp = dqp->q_mount; |
135 | struct xfs_quotainfo *qi = mp->m_quotainfo; | ||
136 | struct xfs_dquot *gdqp = NULL; | ||
155 | 137 | ||
156 | ASSERT(xqm != NULL); | 138 | xfs_dqlock(dqp); |
157 | ASSERT(xqm->qm_nrefs == 0); | 139 | if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { |
140 | xfs_dqunlock(dqp); | ||
141 | return EAGAIN; | ||
142 | } | ||
158 | 143 | ||
159 | unregister_shrinker(&xfs_qm_shaker); | 144 | /* |
145 | * If this quota has a group hint attached, prepare for releasing it | ||
146 | * now. | ||
147 | */ | ||
148 | gdqp = dqp->q_gdquot; | ||
149 | if (gdqp) { | ||
150 | xfs_dqlock(gdqp); | ||
151 | dqp->q_gdquot = NULL; | ||
152 | } | ||
160 | 153 | ||
161 | mutex_lock(&xqm->qm_dqfrlist_lock); | 154 | dqp->dq_flags |= XFS_DQ_FREEING; |
162 | ASSERT(list_empty(&xqm->qm_dqfrlist)); | ||
163 | mutex_unlock(&xqm->qm_dqfrlist_lock); | ||
164 | 155 | ||
165 | hsize = xqm->qm_dqhashmask + 1; | 156 | /* |
166 | for (i = 0; i < hsize; i++) { | 157 | * If we're turning off quotas, we have to make sure that, for |
167 | xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); | 158 | * example, we don't delete quota disk blocks while dquots are |
168 | xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); | 159 | * in the process of getting written to those disk blocks. |
160 | * This dquot might well be on AIL, and we can't leave it there | ||
161 | * if we're turning off quotas. Basically, we need this flush | ||
162 | * lock, and are willing to block on it. | ||
163 | */ | ||
164 | if (!xfs_dqflock_nowait(dqp)) { | ||
165 | /* | ||
166 | * Block on the flush lock after nudging dquot buffer, | ||
167 | * if it is incore. | ||
168 | */ | ||
169 | xfs_dqflock_pushbuf_wait(dqp); | ||
169 | } | 170 | } |
170 | kmem_free_large(xqm->qm_usr_dqhtable); | ||
171 | kmem_free_large(xqm->qm_grp_dqhtable); | ||
172 | xqm->qm_usr_dqhtable = NULL; | ||
173 | xqm->qm_grp_dqhtable = NULL; | ||
174 | xqm->qm_dqhashmask = 0; | ||
175 | 171 | ||
176 | kmem_free(xqm); | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | * Called at mount time to let XQM know that another file system is | ||
181 | * starting quotas. This isn't crucial information as the individual mount | ||
182 | * structures are pretty independent, but it helps the XQM keep a | ||
183 | * global view of what's going on. | ||
184 | */ | ||
185 | /* ARGSUSED */ | ||
186 | STATIC int | ||
187 | xfs_qm_hold_quotafs_ref( | ||
188 | struct xfs_mount *mp) | ||
189 | { | ||
190 | /* | 172 | /* |
191 | * Need to lock the xfs_Gqm structure for things like this. For example, | 173 | * If we are turning this type of quotas off, we don't care |
192 | * the structure could disappear between the entry to this routine and | 174 | * about the dirty metadata sitting in this dquot. OTOH, if |
193 | * a HOLD operation if not locked. | 175 | * we're unmounting, we do care, so we flush it and wait. |
194 | */ | 176 | */ |
195 | mutex_lock(&xfs_Gqm_lock); | 177 | if (XFS_DQ_IS_DIRTY(dqp)) { |
178 | int error; | ||
196 | 179 | ||
197 | if (!xfs_Gqm) { | 180 | /* |
198 | xfs_Gqm = xfs_Gqm_init(); | 181 | * We don't care about getting disk errors here. We need |
199 | if (!xfs_Gqm) { | 182 | * to purge this dquot anyway, so we go ahead regardless. |
200 | mutex_unlock(&xfs_Gqm_lock); | 183 | */ |
201 | return ENOMEM; | 184 | error = xfs_qm_dqflush(dqp, SYNC_WAIT); |
202 | } | 185 | if (error) |
186 | xfs_warn(mp, "%s: dquot %p flush failed", | ||
187 | __func__, dqp); | ||
188 | xfs_dqflock(dqp); | ||
203 | } | 189 | } |
204 | 190 | ||
191 | ASSERT(atomic_read(&dqp->q_pincount) == 0); | ||
192 | ASSERT(XFS_FORCED_SHUTDOWN(mp) || | ||
193 | !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); | ||
194 | |||
195 | xfs_dqfunlock(dqp); | ||
196 | xfs_dqunlock(dqp); | ||
197 | |||
198 | radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), | ||
199 | be32_to_cpu(dqp->q_core.d_id)); | ||
200 | qi->qi_dquots--; | ||
201 | |||
205 | /* | 202 | /* |
206 | * We can keep a list of all filesystems with quotas mounted for | 203 | * We move dquots to the freelist as soon as their reference count |
207 | * debugging and statistical purposes, but ... | 204 | * hits zero, so it really should be on the freelist here. |
208 | * Just take a reference and get out. | ||
209 | */ | 205 | */ |
210 | xfs_Gqm->qm_nrefs++; | 206 | mutex_lock(&qi->qi_lru_lock); |
211 | mutex_unlock(&xfs_Gqm_lock); | 207 | ASSERT(!list_empty(&dqp->q_lru)); |
208 | list_del_init(&dqp->q_lru); | ||
209 | qi->qi_lru_count--; | ||
210 | XFS_STATS_DEC(xs_qm_dquot_unused); | ||
211 | mutex_unlock(&qi->qi_lru_lock); | ||
212 | 212 | ||
213 | xfs_qm_dqdestroy(dqp); | ||
214 | |||
215 | if (gdqp) | ||
216 | xfs_qm_dqput(gdqp); | ||
213 | return 0; | 217 | return 0; |
214 | } | 218 | } |
215 | 219 | ||
216 | |||
217 | /* | 220 | /* |
218 | * Release the reference that a filesystem took at mount time, | 221 | * Purge the dquot cache. |
219 | * so that we know when we need to destroy the entire quota manager. | ||
220 | */ | 222 | */ |
221 | /* ARGSUSED */ | 223 | void |
222 | STATIC void | 224 | xfs_qm_dqpurge_all( |
223 | xfs_qm_rele_quotafs_ref( | 225 | struct xfs_mount *mp, |
224 | struct xfs_mount *mp) | 226 | uint flags) |
225 | { | 227 | { |
226 | ASSERT(xfs_Gqm); | 228 | if (flags & XFS_QMOPT_UQUOTA) |
227 | ASSERT(xfs_Gqm->qm_nrefs > 0); | 229 | xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge); |
228 | 230 | if (flags & XFS_QMOPT_GQUOTA) | |
229 | /* | 231 | xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge); |
230 | * Destroy the entire XQM. If somebody mounts with quotaon, this'll | 232 | if (flags & XFS_QMOPT_PQUOTA) |
231 | * be restarted. | 233 | xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge); |
232 | */ | ||
233 | mutex_lock(&xfs_Gqm_lock); | ||
234 | if (--xfs_Gqm->qm_nrefs == 0) { | ||
235 | xfs_qm_destroy(xfs_Gqm); | ||
236 | xfs_Gqm = NULL; | ||
237 | } | ||
238 | mutex_unlock(&xfs_Gqm_lock); | ||
239 | } | 234 | } |
240 | 235 | ||
241 | /* | 236 | /* |
@@ -376,175 +371,6 @@ xfs_qm_unmount_quotas( | |||
376 | } | 371 | } |
377 | } | 372 | } |
378 | 373 | ||
379 | /* | ||
380 | * Flush all dquots of the given file system to disk. The dquots are | ||
381 | * _not_ purged from memory here, just their data written to disk. | ||
382 | */ | ||
383 | STATIC int | ||
384 | xfs_qm_dqflush_all( | ||
385 | struct xfs_mount *mp) | ||
386 | { | ||
387 | struct xfs_quotainfo *q = mp->m_quotainfo; | ||
388 | int recl; | ||
389 | struct xfs_dquot *dqp; | ||
390 | int error; | ||
391 | |||
392 | if (!q) | ||
393 | return 0; | ||
394 | again: | ||
395 | mutex_lock(&q->qi_dqlist_lock); | ||
396 | list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { | ||
397 | xfs_dqlock(dqp); | ||
398 | if ((dqp->dq_flags & XFS_DQ_FREEING) || | ||
399 | !XFS_DQ_IS_DIRTY(dqp)) { | ||
400 | xfs_dqunlock(dqp); | ||
401 | continue; | ||
402 | } | ||
403 | |||
404 | /* XXX a sentinel would be better */ | ||
405 | recl = q->qi_dqreclaims; | ||
406 | if (!xfs_dqflock_nowait(dqp)) { | ||
407 | /* | ||
408 | * If we can't grab the flush lock then check | ||
409 | * to see if the dquot has been flushed delayed | ||
410 | * write. If so, grab its buffer and send it | ||
411 | * out immediately. We'll be able to acquire | ||
412 | * the flush lock when the I/O completes. | ||
413 | */ | ||
414 | xfs_dqflock_pushbuf_wait(dqp); | ||
415 | } | ||
416 | /* | ||
417 | * Let go of the mplist lock. We don't want to hold it | ||
418 | * across a disk write. | ||
419 | */ | ||
420 | mutex_unlock(&q->qi_dqlist_lock); | ||
421 | error = xfs_qm_dqflush(dqp, 0); | ||
422 | xfs_dqunlock(dqp); | ||
423 | if (error) | ||
424 | return error; | ||
425 | |||
426 | mutex_lock(&q->qi_dqlist_lock); | ||
427 | if (recl != q->qi_dqreclaims) { | ||
428 | mutex_unlock(&q->qi_dqlist_lock); | ||
429 | /* XXX restart limit */ | ||
430 | goto again; | ||
431 | } | ||
432 | } | ||
433 | |||
434 | mutex_unlock(&q->qi_dqlist_lock); | ||
435 | /* return ! busy */ | ||
436 | return 0; | ||
437 | } | ||
438 | |||
439 | /* | ||
440 | * Release the group dquot pointers the user dquots may be | ||
441 | * carrying around as a hint. mplist is locked on entry and exit. | ||
442 | */ | ||
443 | STATIC void | ||
444 | xfs_qm_detach_gdquots( | ||
445 | struct xfs_mount *mp) | ||
446 | { | ||
447 | struct xfs_quotainfo *q = mp->m_quotainfo; | ||
448 | struct xfs_dquot *dqp, *gdqp; | ||
449 | |||
450 | again: | ||
451 | ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); | ||
452 | list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { | ||
453 | xfs_dqlock(dqp); | ||
454 | if (dqp->dq_flags & XFS_DQ_FREEING) { | ||
455 | xfs_dqunlock(dqp); | ||
456 | mutex_unlock(&q->qi_dqlist_lock); | ||
457 | delay(1); | ||
458 | mutex_lock(&q->qi_dqlist_lock); | ||
459 | goto again; | ||
460 | } | ||
461 | |||
462 | gdqp = dqp->q_gdquot; | ||
463 | if (gdqp) | ||
464 | dqp->q_gdquot = NULL; | ||
465 | xfs_dqunlock(dqp); | ||
466 | |||
467 | if (gdqp) | ||
468 | xfs_qm_dqrele(gdqp); | ||
469 | } | ||
470 | } | ||
471 | |||
472 | /* | ||
473 | * Go through all the incore dquots of this file system and take them | ||
474 | * off the mplist and hashlist, if the dquot type matches the dqtype | ||
475 | * parameter. This is used when turning off quota accounting for | ||
476 | * users and/or groups, as well as when the filesystem is unmounting. | ||
477 | */ | ||
478 | STATIC int | ||
479 | xfs_qm_dqpurge_int( | ||
480 | struct xfs_mount *mp, | ||
481 | uint flags) | ||
482 | { | ||
483 | struct xfs_quotainfo *q = mp->m_quotainfo; | ||
484 | struct xfs_dquot *dqp, *n; | ||
485 | uint dqtype; | ||
486 | int nmisses = 0; | ||
487 | LIST_HEAD (dispose_list); | ||
488 | |||
489 | if (!q) | ||
490 | return 0; | ||
491 | |||
492 | dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; | ||
493 | dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; | ||
494 | dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0; | ||
495 | |||
496 | mutex_lock(&q->qi_dqlist_lock); | ||
497 | |||
498 | /* | ||
499 | * In the first pass through all incore dquots of this filesystem, | ||
500 | * we release the group dquot pointers the user dquots may be | ||
501 | * carrying around as a hint. We need to do this irrespective of | ||
502 | * what's being turned off. | ||
503 | */ | ||
504 | xfs_qm_detach_gdquots(mp); | ||
505 | |||
506 | /* | ||
507 | * Try to get rid of all of the unwanted dquots. | ||
508 | */ | ||
509 | list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) { | ||
510 | xfs_dqlock(dqp); | ||
511 | if ((dqp->dq_flags & dqtype) != 0 && | ||
512 | !(dqp->dq_flags & XFS_DQ_FREEING)) { | ||
513 | if (dqp->q_nrefs == 0) { | ||
514 | dqp->dq_flags |= XFS_DQ_FREEING; | ||
515 | list_move_tail(&dqp->q_mplist, &dispose_list); | ||
516 | } else | ||
517 | nmisses++; | ||
518 | } | ||
519 | xfs_dqunlock(dqp); | ||
520 | } | ||
521 | mutex_unlock(&q->qi_dqlist_lock); | ||
522 | |||
523 | list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist) | ||
524 | xfs_qm_dqpurge(dqp); | ||
525 | |||
526 | return nmisses; | ||
527 | } | ||
528 | |||
529 | int | ||
530 | xfs_qm_dqpurge_all( | ||
531 | xfs_mount_t *mp, | ||
532 | uint flags) | ||
533 | { | ||
534 | int ndquots; | ||
535 | |||
536 | /* | ||
537 | * Purge the dquot cache. | ||
538 | * None of the dquots should really be busy at this point. | ||
539 | */ | ||
540 | if (mp->m_quotainfo) { | ||
541 | while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) { | ||
542 | delay(ndquots * 10); | ||
543 | } | ||
544 | } | ||
545 | return 0; | ||
546 | } | ||
547 | |||
548 | STATIC int | 374 | STATIC int |
549 | xfs_qm_dqattach_one( | 375 | xfs_qm_dqattach_one( |
550 | xfs_inode_t *ip, | 376 | xfs_inode_t *ip, |
@@ -783,14 +609,6 @@ xfs_qm_dqdetach( | |||
783 | } | 609 | } |
784 | 610 | ||
785 | /* | 611 | /* |
786 | * The hash chains and the mplist use the same xfs_dqhash structure as | ||
787 | * their list head, but we can take the mplist qh_lock and one of the | ||
788 | * hash qh_locks at the same time without any problem as they aren't | ||
789 | * related. | ||
790 | */ | ||
791 | static struct lock_class_key xfs_quota_mplist_class; | ||
792 | |||
793 | /* | ||
794 | * This initializes all the quota information that's kept in the | 612 | * This initializes all the quota information that's kept in the |
795 | * mount structure | 613 | * mount structure |
796 | */ | 614 | */ |
@@ -804,13 +622,6 @@ xfs_qm_init_quotainfo( | |||
804 | 622 | ||
805 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 623 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
806 | 624 | ||
807 | /* | ||
808 | * Tell XQM that we exist as soon as possible. | ||
809 | */ | ||
810 | if ((error = xfs_qm_hold_quotafs_ref(mp))) { | ||
811 | return error; | ||
812 | } | ||
813 | |||
814 | qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); | 625 | qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); |
815 | 626 | ||
816 | /* | 627 | /* |
@@ -823,11 +634,13 @@ xfs_qm_init_quotainfo( | |||
823 | return error; | 634 | return error; |
824 | } | 635 | } |
825 | 636 | ||
826 | INIT_LIST_HEAD(&qinf->qi_dqlist); | 637 | INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS); |
827 | mutex_init(&qinf->qi_dqlist_lock); | 638 | INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); |
828 | lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class); | 639 | mutex_init(&qinf->qi_tree_lock); |
829 | 640 | ||
830 | qinf->qi_dqreclaims = 0; | 641 | INIT_LIST_HEAD(&qinf->qi_lru_list); |
642 | qinf->qi_lru_count = 0; | ||
643 | mutex_init(&qinf->qi_lru_lock); | ||
831 | 644 | ||
832 | /* mutex used to serialize quotaoffs */ | 645 | /* mutex used to serialize quotaoffs */ |
833 | mutex_init(&qinf->qi_quotaofflock); | 646 | mutex_init(&qinf->qi_quotaofflock); |
@@ -894,6 +707,9 @@ xfs_qm_init_quotainfo( | |||
894 | qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; | 707 | qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; |
895 | } | 708 | } |
896 | 709 | ||
710 | qinf->qi_shrinker.shrink = xfs_qm_shake; | ||
711 | qinf->qi_shrinker.seeks = DEFAULT_SEEKS; | ||
712 | register_shrinker(&qinf->qi_shrinker); | ||
897 | return 0; | 713 | return 0; |
898 | } | 714 | } |
899 | 715 | ||
@@ -911,17 +727,8 @@ xfs_qm_destroy_quotainfo( | |||
911 | 727 | ||
912 | qi = mp->m_quotainfo; | 728 | qi = mp->m_quotainfo; |
913 | ASSERT(qi != NULL); | 729 | ASSERT(qi != NULL); |
914 | ASSERT(xfs_Gqm != NULL); | ||
915 | |||
916 | /* | ||
917 | * Release the reference that XQM kept, so that we know | ||
918 | * when the XQM structure should be freed. We cannot assume | ||
919 | * that xfs_Gqm is non-null after this point. | ||
920 | */ | ||
921 | xfs_qm_rele_quotafs_ref(mp); | ||
922 | 730 | ||
923 | ASSERT(list_empty(&qi->qi_dqlist)); | 731 | unregister_shrinker(&qi->qi_shrinker); |
924 | mutex_destroy(&qi->qi_dqlist_lock); | ||
925 | 732 | ||
926 | if (qi->qi_uquotaip) { | 733 | if (qi->qi_uquotaip) { |
927 | IRELE(qi->qi_uquotaip); | 734 | IRELE(qi->qi_uquotaip); |
@@ -936,30 +743,6 @@ xfs_qm_destroy_quotainfo( | |||
936 | mp->m_quotainfo = NULL; | 743 | mp->m_quotainfo = NULL; |
937 | } | 744 | } |
938 | 745 | ||
939 | |||
940 | |||
941 | /* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */ | ||
942 | |||
943 | /* ARGSUSED */ | ||
944 | STATIC void | ||
945 | xfs_qm_list_init( | ||
946 | xfs_dqlist_t *list, | ||
947 | char *str, | ||
948 | int n) | ||
949 | { | ||
950 | mutex_init(&list->qh_lock); | ||
951 | INIT_LIST_HEAD(&list->qh_list); | ||
952 | list->qh_version = 0; | ||
953 | list->qh_nelems = 0; | ||
954 | } | ||
955 | |||
956 | STATIC void | ||
957 | xfs_qm_list_destroy( | ||
958 | xfs_dqlist_t *list) | ||
959 | { | ||
960 | mutex_destroy(&(list->qh_lock)); | ||
961 | } | ||
962 | |||
963 | /* | 746 | /* |
964 | * Create an inode and return with a reference already taken, but unlocked | 747 | * Create an inode and return with a reference already taken, but unlocked |
965 | * This is how we create quota inodes | 748 | * This is how we create quota inodes |
@@ -1397,6 +1180,28 @@ error0: | |||
1397 | return error; | 1180 | return error; |
1398 | } | 1181 | } |
1399 | 1182 | ||
1183 | STATIC int | ||
1184 | xfs_qm_flush_one( | ||
1185 | struct xfs_dquot *dqp) | ||
1186 | { | ||
1187 | int error = 0; | ||
1188 | |||
1189 | xfs_dqlock(dqp); | ||
1190 | if (dqp->dq_flags & XFS_DQ_FREEING) | ||
1191 | goto out_unlock; | ||
1192 | if (!XFS_DQ_IS_DIRTY(dqp)) | ||
1193 | goto out_unlock; | ||
1194 | |||
1195 | if (!xfs_dqflock_nowait(dqp)) | ||
1196 | xfs_dqflock_pushbuf_wait(dqp); | ||
1197 | |||
1198 | error = xfs_qm_dqflush(dqp, 0); | ||
1199 | |||
1200 | out_unlock: | ||
1201 | xfs_dqunlock(dqp); | ||
1202 | return error; | ||
1203 | } | ||
1204 | |||
1400 | /* | 1205 | /* |
1401 | * Walk thru all the filesystem inodes and construct a consistent view | 1206 | * Walk thru all the filesystem inodes and construct a consistent view |
1402 | * of the disk quota world. If the quotacheck fails, disable quotas. | 1207 | * of the disk quota world. If the quotacheck fails, disable quotas. |
@@ -1405,7 +1210,7 @@ int | |||
1405 | xfs_qm_quotacheck( | 1210 | xfs_qm_quotacheck( |
1406 | xfs_mount_t *mp) | 1211 | xfs_mount_t *mp) |
1407 | { | 1212 | { |
1408 | int done, count, error; | 1213 | int done, count, error, error2; |
1409 | xfs_ino_t lastino; | 1214 | xfs_ino_t lastino; |
1410 | size_t structsz; | 1215 | size_t structsz; |
1411 | xfs_inode_t *uip, *gip; | 1216 | xfs_inode_t *uip, *gip; |
@@ -1419,12 +1224,6 @@ xfs_qm_quotacheck( | |||
1419 | ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); | 1224 | ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); |
1420 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 1225 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
1421 | 1226 | ||
1422 | /* | ||
1423 | * There should be no cached dquots. The (simplistic) quotacheck | ||
1424 | * algorithm doesn't like that. | ||
1425 | */ | ||
1426 | ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); | ||
1427 | |||
1428 | xfs_notice(mp, "Quotacheck needed: Please wait."); | 1227 | xfs_notice(mp, "Quotacheck needed: Please wait."); |
1429 | 1228 | ||
1430 | /* | 1229 | /* |
@@ -1463,12 +1262,21 @@ xfs_qm_quotacheck( | |||
1463 | } while (!done); | 1262 | } while (!done); |
1464 | 1263 | ||
1465 | /* | 1264 | /* |
1466 | * We've made all the changes that we need to make incore. | 1265 | * We've made all the changes that we need to make incore. Flush them |
1467 | * Flush them down to disk buffers if everything was updated | 1266 | * down to disk buffers if everything was updated successfully. |
1468 | * successfully. | ||
1469 | */ | 1267 | */ |
1470 | if (!error) | 1268 | if (XFS_IS_UQUOTA_ON(mp)) |
1471 | error = xfs_qm_dqflush_all(mp); | 1269 | error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one); |
1270 | if (XFS_IS_GQUOTA_ON(mp)) { | ||
1271 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one); | ||
1272 | if (!error) | ||
1273 | error = error2; | ||
1274 | } | ||
1275 | if (XFS_IS_PQUOTA_ON(mp)) { | ||
1276 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one); | ||
1277 | if (!error) | ||
1278 | error = error2; | ||
1279 | } | ||
1472 | 1280 | ||
1473 | /* | 1281 | /* |
1474 | * We can get this error if we couldn't do a dquot allocation inside | 1282 | * We can get this error if we couldn't do a dquot allocation inside |
@@ -1496,7 +1304,7 @@ xfs_qm_quotacheck( | |||
1496 | * quotachecked status, since we won't be doing accounting for | 1304 | * quotachecked status, since we won't be doing accounting for |
1497 | * that type anymore. | 1305 | * that type anymore. |
1498 | */ | 1306 | */ |
1499 | mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); | 1307 | mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD; |
1500 | mp->m_qflags |= flags; | 1308 | mp->m_qflags |= flags; |
1501 | 1309 | ||
1502 | error_return: | 1310 | error_return: |
@@ -1508,7 +1316,6 @@ xfs_qm_quotacheck( | |||
1508 | * We must turn off quotas. | 1316 | * We must turn off quotas. |
1509 | */ | 1317 | */ |
1510 | ASSERT(mp->m_quotainfo != NULL); | 1318 | ASSERT(mp->m_quotainfo != NULL); |
1511 | ASSERT(xfs_Gqm != NULL); | ||
1512 | xfs_qm_destroy_quotainfo(mp); | 1319 | xfs_qm_destroy_quotainfo(mp); |
1513 | if (xfs_mount_reset_sbqflags(mp)) { | 1320 | if (xfs_mount_reset_sbqflags(mp)) { |
1514 | xfs_warn(mp, | 1321 | xfs_warn(mp, |
@@ -1604,16 +1411,12 @@ xfs_qm_dqfree_one( | |||
1604 | struct xfs_mount *mp = dqp->q_mount; | 1411 | struct xfs_mount *mp = dqp->q_mount; |
1605 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 1412 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
1606 | 1413 | ||
1607 | mutex_lock(&dqp->q_hash->qh_lock); | 1414 | mutex_lock(&qi->qi_tree_lock); |
1608 | list_del_init(&dqp->q_hashlist); | 1415 | radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), |
1609 | dqp->q_hash->qh_version++; | 1416 | be32_to_cpu(dqp->q_core.d_id)); |
1610 | mutex_unlock(&dqp->q_hash->qh_lock); | ||
1611 | 1417 | ||
1612 | mutex_lock(&qi->qi_dqlist_lock); | ||
1613 | list_del_init(&dqp->q_mplist); | ||
1614 | qi->qi_dquots--; | 1418 | qi->qi_dquots--; |
1615 | qi->qi_dqreclaims++; | 1419 | mutex_unlock(&qi->qi_tree_lock); |
1616 | mutex_unlock(&qi->qi_dqlist_lock); | ||
1617 | 1420 | ||
1618 | xfs_qm_dqdestroy(dqp); | 1421 | xfs_qm_dqdestroy(dqp); |
1619 | } | 1422 | } |
@@ -1624,6 +1427,7 @@ xfs_qm_dqreclaim_one( | |||
1624 | struct list_head *dispose_list) | 1427 | struct list_head *dispose_list) |
1625 | { | 1428 | { |
1626 | struct xfs_mount *mp = dqp->q_mount; | 1429 | struct xfs_mount *mp = dqp->q_mount; |
1430 | struct xfs_quotainfo *qi = mp->m_quotainfo; | ||
1627 | int error; | 1431 | int error; |
1628 | 1432 | ||
1629 | if (!xfs_dqlock_nowait(dqp)) | 1433 | if (!xfs_dqlock_nowait(dqp)) |
@@ -1637,16 +1441,14 @@ xfs_qm_dqreclaim_one( | |||
1637 | xfs_dqunlock(dqp); | 1441 | xfs_dqunlock(dqp); |
1638 | 1442 | ||
1639 | trace_xfs_dqreclaim_want(dqp); | 1443 | trace_xfs_dqreclaim_want(dqp); |
1640 | XQM_STATS_INC(xqmstats.xs_qm_dqwants); | 1444 | XFS_STATS_INC(xs_qm_dqwants); |
1641 | 1445 | ||
1642 | list_del_init(&dqp->q_freelist); | 1446 | list_del_init(&dqp->q_lru); |
1643 | xfs_Gqm->qm_dqfrlist_cnt--; | 1447 | qi->qi_lru_count--; |
1448 | XFS_STATS_DEC(xs_qm_dquot_unused); | ||
1644 | return; | 1449 | return; |
1645 | } | 1450 | } |
1646 | 1451 | ||
1647 | ASSERT(dqp->q_hash); | ||
1648 | ASSERT(!list_empty(&dqp->q_mplist)); | ||
1649 | |||
1650 | /* | 1452 | /* |
1651 | * Try to grab the flush lock. If this dquot is in the process of | 1453 | * Try to grab the flush lock. If this dquot is in the process of |
1652 | * getting flushed to disk, we don't want to reclaim it. | 1454 | * getting flushed to disk, we don't want to reclaim it. |
@@ -1688,11 +1490,12 @@ xfs_qm_dqreclaim_one( | |||
1688 | xfs_dqunlock(dqp); | 1490 | xfs_dqunlock(dqp); |
1689 | 1491 | ||
1690 | ASSERT(dqp->q_nrefs == 0); | 1492 | ASSERT(dqp->q_nrefs == 0); |
1691 | list_move_tail(&dqp->q_freelist, dispose_list); | 1493 | list_move_tail(&dqp->q_lru, dispose_list); |
1692 | xfs_Gqm->qm_dqfrlist_cnt--; | 1494 | qi->qi_lru_count--; |
1495 | XFS_STATS_DEC(xs_qm_dquot_unused); | ||
1693 | 1496 | ||
1694 | trace_xfs_dqreclaim_done(dqp); | 1497 | trace_xfs_dqreclaim_done(dqp); |
1695 | XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); | 1498 | XFS_STATS_INC(xs_qm_dqreclaims); |
1696 | return; | 1499 | return; |
1697 | 1500 | ||
1698 | out_busy: | 1501 | out_busy: |
@@ -1701,10 +1504,10 @@ out_busy: | |||
1701 | /* | 1504 | /* |
1702 | * Move the dquot to the tail of the list so that we don't spin on it. | 1505 | * Move the dquot to the tail of the list so that we don't spin on it. |
1703 | */ | 1506 | */ |
1704 | list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); | 1507 | list_move_tail(&dqp->q_lru, &qi->qi_lru_list); |
1705 | 1508 | ||
1706 | trace_xfs_dqreclaim_busy(dqp); | 1509 | trace_xfs_dqreclaim_busy(dqp); |
1707 | XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); | 1510 | XFS_STATS_INC(xs_qm_dqreclaim_misses); |
1708 | } | 1511 | } |
1709 | 1512 | ||
1710 | STATIC int | 1513 | STATIC int |
@@ -1712,6 +1515,8 @@ xfs_qm_shake( | |||
1712 | struct shrinker *shrink, | 1515 | struct shrinker *shrink, |
1713 | struct shrink_control *sc) | 1516 | struct shrink_control *sc) |
1714 | { | 1517 | { |
1518 | struct xfs_quotainfo *qi = | ||
1519 | container_of(shrink, struct xfs_quotainfo, qi_shrinker); | ||
1715 | int nr_to_scan = sc->nr_to_scan; | 1520 | int nr_to_scan = sc->nr_to_scan; |
1716 | LIST_HEAD (dispose_list); | 1521 | LIST_HEAD (dispose_list); |
1717 | struct xfs_dquot *dqp; | 1522 | struct xfs_dquot *dqp; |
@@ -1721,24 +1526,23 @@ xfs_qm_shake( | |||
1721 | if (!nr_to_scan) | 1526 | if (!nr_to_scan) |
1722 | goto out; | 1527 | goto out; |
1723 | 1528 | ||
1724 | mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); | 1529 | mutex_lock(&qi->qi_lru_lock); |
1725 | while (!list_empty(&xfs_Gqm->qm_dqfrlist)) { | 1530 | while (!list_empty(&qi->qi_lru_list)) { |
1726 | if (nr_to_scan-- <= 0) | 1531 | if (nr_to_scan-- <= 0) |
1727 | break; | 1532 | break; |
1728 | dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot, | 1533 | dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, |
1729 | q_freelist); | 1534 | q_lru); |
1730 | xfs_qm_dqreclaim_one(dqp, &dispose_list); | 1535 | xfs_qm_dqreclaim_one(dqp, &dispose_list); |
1731 | } | 1536 | } |
1732 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | 1537 | mutex_unlock(&qi->qi_lru_lock); |
1733 | 1538 | ||
1734 | while (!list_empty(&dispose_list)) { | 1539 | while (!list_empty(&dispose_list)) { |
1735 | dqp = list_first_entry(&dispose_list, struct xfs_dquot, | 1540 | dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); |
1736 | q_freelist); | 1541 | list_del_init(&dqp->q_lru); |
1737 | list_del_init(&dqp->q_freelist); | ||
1738 | xfs_qm_dqfree_one(dqp); | 1542 | xfs_qm_dqfree_one(dqp); |
1739 | } | 1543 | } |
1740 | out: | 1544 | out: |
1741 | return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure; | 1545 | return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; |
1742 | } | 1546 | } |
1743 | 1547 | ||
1744 | /* | 1548 | /* |
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 9a9b997e1a0a..44b858b79d71 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h | |||
@@ -21,21 +21,10 @@ | |||
21 | #include "xfs_dquot_item.h" | 21 | #include "xfs_dquot_item.h" |
22 | #include "xfs_dquot.h" | 22 | #include "xfs_dquot.h" |
23 | #include "xfs_quota_priv.h" | 23 | #include "xfs_quota_priv.h" |
24 | #include "xfs_qm_stats.h" | ||
25 | 24 | ||
26 | struct xfs_qm; | ||
27 | struct xfs_inode; | 25 | struct xfs_inode; |
28 | 26 | ||
29 | extern struct mutex xfs_Gqm_lock; | 27 | extern struct kmem_zone *xfs_qm_dqtrxzone; |
30 | extern struct xfs_qm *xfs_Gqm; | ||
31 | extern kmem_zone_t *qm_dqzone; | ||
32 | extern kmem_zone_t *qm_dqtrxzone; | ||
33 | |||
34 | /* | ||
35 | * Dquot hashtable constants/threshold values. | ||
36 | */ | ||
37 | #define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t)) | ||
38 | #define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t)) | ||
39 | 28 | ||
40 | /* | 29 | /* |
41 | * This defines the unit of allocation of dquots. | 30 | * This defines the unit of allocation of dquots. |
@@ -48,36 +37,20 @@ extern kmem_zone_t *qm_dqtrxzone; | |||
48 | */ | 37 | */ |
49 | #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 | 38 | #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 |
50 | 39 | ||
51 | typedef xfs_dqhash_t xfs_dqlist_t; | ||
52 | |||
53 | /* | ||
54 | * Quota Manager (global) structure. Lives only in core. | ||
55 | */ | ||
56 | typedef struct xfs_qm { | ||
57 | xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */ | ||
58 | xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */ | ||
59 | uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */ | ||
60 | struct list_head qm_dqfrlist; /* freelist of dquots */ | ||
61 | struct mutex qm_dqfrlist_lock; | ||
62 | int qm_dqfrlist_cnt; | ||
63 | atomic_t qm_totaldquots; /* total incore dquots */ | ||
64 | uint qm_nrefs; /* file systems with quota on */ | ||
65 | kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ | ||
66 | kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ | ||
67 | } xfs_qm_t; | ||
68 | |||
69 | /* | 40 | /* |
70 | * Various quota information for individual filesystems. | 41 | * Various quota information for individual filesystems. |
71 | * The mount structure keeps a pointer to this. | 42 | * The mount structure keeps a pointer to this. |
72 | */ | 43 | */ |
73 | typedef struct xfs_quotainfo { | 44 | typedef struct xfs_quotainfo { |
45 | struct radix_tree_root qi_uquota_tree; | ||
46 | struct radix_tree_root qi_gquota_tree; | ||
47 | struct mutex qi_tree_lock; | ||
74 | xfs_inode_t *qi_uquotaip; /* user quota inode */ | 48 | xfs_inode_t *qi_uquotaip; /* user quota inode */ |
75 | xfs_inode_t *qi_gquotaip; /* group quota inode */ | 49 | xfs_inode_t *qi_gquotaip; /* group quota inode */ |
76 | struct list_head qi_dqlist; /* all dquots in filesys */ | 50 | struct list_head qi_lru_list; |
77 | struct mutex qi_dqlist_lock; | 51 | struct mutex qi_lru_lock; |
52 | int qi_lru_count; | ||
78 | int qi_dquots; | 53 | int qi_dquots; |
79 | int qi_dqreclaims; /* a change here indicates | ||
80 | a removal in the dqlist */ | ||
81 | time_t qi_btimelimit; /* limit for blks timer */ | 54 | time_t qi_btimelimit; /* limit for blks timer */ |
82 | time_t qi_itimelimit; /* limit for inodes timer */ | 55 | time_t qi_itimelimit; /* limit for inodes timer */ |
83 | time_t qi_rtbtimelimit;/* limit for rt blks timer */ | 56 | time_t qi_rtbtimelimit;/* limit for rt blks timer */ |
@@ -93,8 +66,14 @@ typedef struct xfs_quotainfo { | |||
93 | xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ | 66 | xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ |
94 | xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ | 67 | xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ |
95 | xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ | 68 | xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ |
69 | struct shrinker qi_shrinker; | ||
96 | } xfs_quotainfo_t; | 70 | } xfs_quotainfo_t; |
97 | 71 | ||
72 | #define XFS_DQUOT_TREE(qi, type) \ | ||
73 | ((type & XFS_DQ_USER) ? \ | ||
74 | &((qi)->qi_uquota_tree) : \ | ||
75 | &((qi)->qi_gquota_tree)) | ||
76 | |||
98 | 77 | ||
99 | extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); | 78 | extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); |
100 | extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, | 79 | extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, |
@@ -130,7 +109,7 @@ extern int xfs_qm_quotacheck(xfs_mount_t *); | |||
130 | extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); | 109 | extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); |
131 | 110 | ||
132 | /* dquot stuff */ | 111 | /* dquot stuff */ |
133 | extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); | 112 | extern void xfs_qm_dqpurge_all(xfs_mount_t *, uint); |
134 | extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); | 113 | extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); |
135 | 114 | ||
136 | /* quota ops */ | 115 | /* quota ops */ |
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index a0a829addca9..e6986b5d80d8 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c | |||
@@ -40,28 +40,28 @@ | |||
40 | STATIC void | 40 | STATIC void |
41 | xfs_fill_statvfs_from_dquot( | 41 | xfs_fill_statvfs_from_dquot( |
42 | struct kstatfs *statp, | 42 | struct kstatfs *statp, |
43 | xfs_disk_dquot_t *dp) | 43 | struct xfs_dquot *dqp) |
44 | { | 44 | { |
45 | __uint64_t limit; | 45 | __uint64_t limit; |
46 | 46 | ||
47 | limit = dp->d_blk_softlimit ? | 47 | limit = dqp->q_core.d_blk_softlimit ? |
48 | be64_to_cpu(dp->d_blk_softlimit) : | 48 | be64_to_cpu(dqp->q_core.d_blk_softlimit) : |
49 | be64_to_cpu(dp->d_blk_hardlimit); | 49 | be64_to_cpu(dqp->q_core.d_blk_hardlimit); |
50 | if (limit && statp->f_blocks > limit) { | 50 | if (limit && statp->f_blocks > limit) { |
51 | statp->f_blocks = limit; | 51 | statp->f_blocks = limit; |
52 | statp->f_bfree = statp->f_bavail = | 52 | statp->f_bfree = statp->f_bavail = |
53 | (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? | 53 | (statp->f_blocks > dqp->q_res_bcount) ? |
54 | (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; | 54 | (statp->f_blocks - dqp->q_res_bcount) : 0; |
55 | } | 55 | } |
56 | 56 | ||
57 | limit = dp->d_ino_softlimit ? | 57 | limit = dqp->q_core.d_ino_softlimit ? |
58 | be64_to_cpu(dp->d_ino_softlimit) : | 58 | be64_to_cpu(dqp->q_core.d_ino_softlimit) : |
59 | be64_to_cpu(dp->d_ino_hardlimit); | 59 | be64_to_cpu(dqp->q_core.d_ino_hardlimit); |
60 | if (limit && statp->f_files > limit) { | 60 | if (limit && statp->f_files > limit) { |
61 | statp->f_files = limit; | 61 | statp->f_files = limit; |
62 | statp->f_ffree = | 62 | statp->f_ffree = |
63 | (statp->f_files > be64_to_cpu(dp->d_icount)) ? | 63 | (statp->f_files > dqp->q_res_icount) ? |
64 | (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0; | 64 | (statp->f_ffree - dqp->q_res_icount) : 0; |
65 | } | 65 | } |
66 | } | 66 | } |
67 | 67 | ||
@@ -82,7 +82,7 @@ xfs_qm_statvfs( | |||
82 | xfs_dquot_t *dqp; | 82 | xfs_dquot_t *dqp; |
83 | 83 | ||
84 | if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { | 84 | if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { |
85 | xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); | 85 | xfs_fill_statvfs_from_dquot(statp, dqp); |
86 | xfs_qm_dqput(dqp); | 86 | xfs_qm_dqput(dqp); |
87 | } | 87 | } |
88 | } | 88 | } |
@@ -156,21 +156,3 @@ xfs_qm_newmount( | |||
156 | 156 | ||
157 | return 0; | 157 | return 0; |
158 | } | 158 | } |
159 | |||
160 | void __init | ||
161 | xfs_qm_init(void) | ||
162 | { | ||
163 | printk(KERN_INFO "SGI XFS Quota Management subsystem\n"); | ||
164 | mutex_init(&xfs_Gqm_lock); | ||
165 | xfs_qm_init_procfs(); | ||
166 | } | ||
167 | |||
168 | void __exit | ||
169 | xfs_qm_exit(void) | ||
170 | { | ||
171 | xfs_qm_cleanup_procfs(); | ||
172 | if (qm_dqzone) | ||
173 | kmem_zone_destroy(qm_dqzone); | ||
174 | if (qm_dqtrxzone) | ||
175 | kmem_zone_destroy(qm_dqtrxzone); | ||
176 | } | ||
diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c deleted file mode 100644 index 5729ba570877..000000000000 --- a/fs/xfs/xfs_qm_stats.c +++ /dev/null | |||
@@ -1,105 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_bit.h" | ||
21 | #include "xfs_log.h" | ||
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | ||
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_alloc.h" | ||
27 | #include "xfs_quota.h" | ||
28 | #include "xfs_mount.h" | ||
29 | #include "xfs_bmap_btree.h" | ||
30 | #include "xfs_inode.h" | ||
31 | #include "xfs_itable.h" | ||
32 | #include "xfs_bmap.h" | ||
33 | #include "xfs_rtalloc.h" | ||
34 | #include "xfs_error.h" | ||
35 | #include "xfs_attr.h" | ||
36 | #include "xfs_buf_item.h" | ||
37 | #include "xfs_qm.h" | ||
38 | |||
39 | struct xqmstats xqmstats; | ||
40 | |||
41 | static int xqm_proc_show(struct seq_file *m, void *v) | ||
42 | { | ||
43 | /* maximum; incore; ratio free to inuse; freelist */ | ||
44 | seq_printf(m, "%d\t%d\t%d\t%u\n", | ||
45 | 0, | ||
46 | xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, | ||
47 | 0, | ||
48 | xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | static int xqm_proc_open(struct inode *inode, struct file *file) | ||
53 | { | ||
54 | return single_open(file, xqm_proc_show, NULL); | ||
55 | } | ||
56 | |||
57 | static const struct file_operations xqm_proc_fops = { | ||
58 | .owner = THIS_MODULE, | ||
59 | .open = xqm_proc_open, | ||
60 | .read = seq_read, | ||
61 | .llseek = seq_lseek, | ||
62 | .release = single_release, | ||
63 | }; | ||
64 | |||
65 | static int xqmstat_proc_show(struct seq_file *m, void *v) | ||
66 | { | ||
67 | /* quota performance statistics */ | ||
68 | seq_printf(m, "qm %u %u %u %u %u %u %u %u\n", | ||
69 | xqmstats.xs_qm_dqreclaims, | ||
70 | xqmstats.xs_qm_dqreclaim_misses, | ||
71 | xqmstats.xs_qm_dquot_dups, | ||
72 | xqmstats.xs_qm_dqcachemisses, | ||
73 | xqmstats.xs_qm_dqcachehits, | ||
74 | xqmstats.xs_qm_dqwants, | ||
75 | xqmstats.xs_qm_dqshake_reclaims, | ||
76 | xqmstats.xs_qm_dqinact_reclaims); | ||
77 | return 0; | ||
78 | } | ||
79 | |||
80 | static int xqmstat_proc_open(struct inode *inode, struct file *file) | ||
81 | { | ||
82 | return single_open(file, xqmstat_proc_show, NULL); | ||
83 | } | ||
84 | |||
85 | static const struct file_operations xqmstat_proc_fops = { | ||
86 | .owner = THIS_MODULE, | ||
87 | .open = xqmstat_proc_open, | ||
88 | .read = seq_read, | ||
89 | .llseek = seq_lseek, | ||
90 | .release = single_release, | ||
91 | }; | ||
92 | |||
93 | void | ||
94 | xfs_qm_init_procfs(void) | ||
95 | { | ||
96 | proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops); | ||
97 | proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops); | ||
98 | } | ||
99 | |||
100 | void | ||
101 | xfs_qm_cleanup_procfs(void) | ||
102 | { | ||
103 | remove_proc_entry("fs/xfs/xqm", NULL); | ||
104 | remove_proc_entry("fs/xfs/xqmstat", NULL); | ||
105 | } | ||
diff --git a/fs/xfs/xfs_qm_stats.h b/fs/xfs/xfs_qm_stats.h deleted file mode 100644 index 5b964fc0dc09..000000000000 --- a/fs/xfs/xfs_qm_stats.h +++ /dev/null | |||
@@ -1,53 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2002 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_QM_STATS_H__ | ||
19 | #define __XFS_QM_STATS_H__ | ||
20 | |||
21 | #if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) | ||
22 | |||
23 | /* | ||
24 | * XQM global statistics | ||
25 | */ | ||
26 | struct xqmstats { | ||
27 | __uint32_t xs_qm_dqreclaims; | ||
28 | __uint32_t xs_qm_dqreclaim_misses; | ||
29 | __uint32_t xs_qm_dquot_dups; | ||
30 | __uint32_t xs_qm_dqcachemisses; | ||
31 | __uint32_t xs_qm_dqcachehits; | ||
32 | __uint32_t xs_qm_dqwants; | ||
33 | __uint32_t xs_qm_dqshake_reclaims; | ||
34 | __uint32_t xs_qm_dqinact_reclaims; | ||
35 | }; | ||
36 | |||
37 | extern struct xqmstats xqmstats; | ||
38 | |||
39 | # define XQM_STATS_INC(count) ( (count)++ ) | ||
40 | |||
41 | extern void xfs_qm_init_procfs(void); | ||
42 | extern void xfs_qm_cleanup_procfs(void); | ||
43 | |||
44 | #else | ||
45 | |||
46 | # define XQM_STATS_INC(count) do { } while (0) | ||
47 | |||
48 | static inline void xfs_qm_init_procfs(void) { }; | ||
49 | static inline void xfs_qm_cleanup_procfs(void) { }; | ||
50 | |||
51 | #endif | ||
52 | |||
53 | #endif /* __XFS_QM_STATS_H__ */ | ||
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 711a86e39ff0..c4f396e437a8 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c | |||
@@ -47,9 +47,6 @@ STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, | |||
47 | uint); | 47 | uint); |
48 | STATIC uint xfs_qm_export_flags(uint); | 48 | STATIC uint xfs_qm_export_flags(uint); |
49 | STATIC uint xfs_qm_export_qtype_flags(uint); | 49 | STATIC uint xfs_qm_export_qtype_flags(uint); |
50 | STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *, | ||
51 | fs_disk_quota_t *); | ||
52 | |||
53 | 50 | ||
54 | /* | 51 | /* |
55 | * Turn off quota accounting and/or enforcement for all udquots and/or | 52 | * Turn off quota accounting and/or enforcement for all udquots and/or |
@@ -69,7 +66,6 @@ xfs_qm_scall_quotaoff( | |||
69 | int error; | 66 | int error; |
70 | uint inactivate_flags; | 67 | uint inactivate_flags; |
71 | xfs_qoff_logitem_t *qoffstart; | 68 | xfs_qoff_logitem_t *qoffstart; |
72 | int nculprits; | ||
73 | 69 | ||
74 | /* | 70 | /* |
75 | * No file system can have quotas enabled on disk but not in core. | 71 | * No file system can have quotas enabled on disk but not in core. |
@@ -175,18 +171,13 @@ xfs_qm_scall_quotaoff( | |||
175 | * This isn't protected by a particular lock directly, because we | 171 | * This isn't protected by a particular lock directly, because we |
176 | * don't want to take a mrlock every time we depend on quotas being on. | 172 | * don't want to take a mrlock every time we depend on quotas being on. |
177 | */ | 173 | */ |
178 | mp->m_qflags &= ~(flags); | 174 | mp->m_qflags &= ~flags; |
179 | 175 | ||
180 | /* | 176 | /* |
181 | * Go through all the dquots of this file system and purge them, | 177 | * Go through all the dquots of this file system and purge them, |
182 | * according to what was turned off. We may not be able to get rid | 178 | * according to what was turned off. |
183 | * of all dquots, because dquots can have temporary references that | ||
184 | * are not attached to inodes. eg. xfs_setattr, xfs_create. | ||
185 | * So, if we couldn't purge all the dquots from the filesystem, | ||
186 | * we can't get rid of the incore data structures. | ||
187 | */ | 179 | */ |
188 | while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype))) | 180 | xfs_qm_dqpurge_all(mp, dqtype); |
189 | delay(10 * nculprits); | ||
190 | 181 | ||
191 | /* | 182 | /* |
192 | * Transactions that had started before ACTIVE state bit was cleared | 183 | * Transactions that had started before ACTIVE state bit was cleared |
@@ -635,42 +626,6 @@ xfs_qm_scall_setqlim( | |||
635 | return error; | 626 | return error; |
636 | } | 627 | } |
637 | 628 | ||
638 | int | ||
639 | xfs_qm_scall_getquota( | ||
640 | xfs_mount_t *mp, | ||
641 | xfs_dqid_t id, | ||
642 | uint type, | ||
643 | fs_disk_quota_t *out) | ||
644 | { | ||
645 | xfs_dquot_t *dqp; | ||
646 | int error; | ||
647 | |||
648 | /* | ||
649 | * Try to get the dquot. We don't want it allocated on disk, so | ||
650 | * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't | ||
651 | * exist, we'll get ENOENT back. | ||
652 | */ | ||
653 | if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) { | ||
654 | return (error); | ||
655 | } | ||
656 | |||
657 | /* | ||
658 | * If everything's NULL, this dquot doesn't quite exist as far as | ||
659 | * our utility programs are concerned. | ||
660 | */ | ||
661 | if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { | ||
662 | xfs_qm_dqput(dqp); | ||
663 | return XFS_ERROR(ENOENT); | ||
664 | } | ||
665 | /* | ||
666 | * Convert the disk dquot to the exportable format | ||
667 | */ | ||
668 | xfs_qm_export_dquot(mp, &dqp->q_core, out); | ||
669 | xfs_qm_dqput(dqp); | ||
670 | return (error ? XFS_ERROR(EFAULT) : 0); | ||
671 | } | ||
672 | |||
673 | |||
674 | STATIC int | 629 | STATIC int |
675 | xfs_qm_log_quotaoff_end( | 630 | xfs_qm_log_quotaoff_end( |
676 | xfs_mount_t *mp, | 631 | xfs_mount_t *mp, |
@@ -759,50 +714,66 @@ error0: | |||
759 | } | 714 | } |
760 | 715 | ||
761 | 716 | ||
762 | /* | 717 | int |
763 | * Translate an internal style on-disk-dquot to the exportable format. | 718 | xfs_qm_scall_getquota( |
764 | * The main differences are that the counters/limits are all in Basic | 719 | struct xfs_mount *mp, |
765 | * Blocks (BBs) instead of the internal FSBs, and all on-disk data has | 720 | xfs_dqid_t id, |
766 | * to be converted to the native endianness. | 721 | uint type, |
767 | */ | ||
768 | STATIC void | ||
769 | xfs_qm_export_dquot( | ||
770 | xfs_mount_t *mp, | ||
771 | xfs_disk_dquot_t *src, | ||
772 | struct fs_disk_quota *dst) | 722 | struct fs_disk_quota *dst) |
773 | { | 723 | { |
724 | struct xfs_dquot *dqp; | ||
725 | int error; | ||
726 | |||
727 | /* | ||
728 | * Try to get the dquot. We don't want it allocated on disk, so | ||
729 | * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't | ||
730 | * exist, we'll get ENOENT back. | ||
731 | */ | ||
732 | error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp); | ||
733 | if (error) | ||
734 | return error; | ||
735 | |||
736 | /* | ||
737 | * If everything's NULL, this dquot doesn't quite exist as far as | ||
738 | * our utility programs are concerned. | ||
739 | */ | ||
740 | if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { | ||
741 | error = XFS_ERROR(ENOENT); | ||
742 | goto out_put; | ||
743 | } | ||
744 | |||
774 | memset(dst, 0, sizeof(*dst)); | 745 | memset(dst, 0, sizeof(*dst)); |
775 | dst->d_version = FS_DQUOT_VERSION; /* different from src->d_version */ | 746 | dst->d_version = FS_DQUOT_VERSION; |
776 | dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags); | 747 | dst->d_flags = xfs_qm_export_qtype_flags(dqp->q_core.d_flags); |
777 | dst->d_id = be32_to_cpu(src->d_id); | 748 | dst->d_id = be32_to_cpu(dqp->q_core.d_id); |
778 | dst->d_blk_hardlimit = | 749 | dst->d_blk_hardlimit = |
779 | XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit)); | 750 | XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit)); |
780 | dst->d_blk_softlimit = | 751 | dst->d_blk_softlimit = |
781 | XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit)); | 752 | XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_softlimit)); |
782 | dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit); | 753 | dst->d_ino_hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); |
783 | dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit); | 754 | dst->d_ino_softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); |
784 | dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount)); | 755 | dst->d_bcount = XFS_FSB_TO_BB(mp, dqp->q_res_bcount); |
785 | dst->d_icount = be64_to_cpu(src->d_icount); | 756 | dst->d_icount = dqp->q_res_icount; |
786 | dst->d_btimer = be32_to_cpu(src->d_btimer); | 757 | dst->d_btimer = be32_to_cpu(dqp->q_core.d_btimer); |
787 | dst->d_itimer = be32_to_cpu(src->d_itimer); | 758 | dst->d_itimer = be32_to_cpu(dqp->q_core.d_itimer); |
788 | dst->d_iwarns = be16_to_cpu(src->d_iwarns); | 759 | dst->d_iwarns = be16_to_cpu(dqp->q_core.d_iwarns); |
789 | dst->d_bwarns = be16_to_cpu(src->d_bwarns); | 760 | dst->d_bwarns = be16_to_cpu(dqp->q_core.d_bwarns); |
790 | dst->d_rtb_hardlimit = | 761 | dst->d_rtb_hardlimit = |
791 | XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit)); | 762 | XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_hardlimit)); |
792 | dst->d_rtb_softlimit = | 763 | dst->d_rtb_softlimit = |
793 | XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit)); | 764 | XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_softlimit)); |
794 | dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount)); | 765 | dst->d_rtbcount = XFS_FSB_TO_BB(mp, dqp->q_res_rtbcount); |
795 | dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer); | 766 | dst->d_rtbtimer = be32_to_cpu(dqp->q_core.d_rtbtimer); |
796 | dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns); | 767 | dst->d_rtbwarns = be16_to_cpu(dqp->q_core.d_rtbwarns); |
797 | 768 | ||
798 | /* | 769 | /* |
799 | * Internally, we don't reset all the timers when quota enforcement | 770 | * Internally, we don't reset all the timers when quota enforcement |
800 | * gets turned off. No need to confuse the user level code, | 771 | * gets turned off. No need to confuse the user level code, |
801 | * so return zeroes in that case. | 772 | * so return zeroes in that case. |
802 | */ | 773 | */ |
803 | if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) || | 774 | if ((!XFS_IS_UQUOTA_ENFORCED(mp) && dqp->q_core.d_flags == XFS_DQ_USER) || |
804 | (!XFS_IS_OQUOTA_ENFORCED(mp) && | 775 | (!XFS_IS_OQUOTA_ENFORCED(mp) && |
805 | (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { | 776 | (dqp->q_core.d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { |
806 | dst->d_btimer = 0; | 777 | dst->d_btimer = 0; |
807 | dst->d_itimer = 0; | 778 | dst->d_itimer = 0; |
808 | dst->d_rtbtimer = 0; | 779 | dst->d_rtbtimer = 0; |
@@ -823,6 +794,9 @@ xfs_qm_export_dquot( | |||
823 | } | 794 | } |
824 | } | 795 | } |
825 | #endif | 796 | #endif |
797 | out_put: | ||
798 | xfs_qm_dqput(dqp); | ||
799 | return error; | ||
826 | } | 800 | } |
827 | 801 | ||
828 | STATIC uint | 802 | STATIC uint |
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 8a0807e0f979..b50ec5b95d5a 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h | |||
@@ -174,6 +174,8 @@ typedef struct xfs_qoff_logformat { | |||
174 | #define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ | 174 | #define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ |
175 | #define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ | 175 | #define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ |
176 | #define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ | 176 | #define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ |
177 | #define XFS_ALL_QUOTA_ACTIVE \ | ||
178 | (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE) | ||
177 | 179 | ||
178 | /* | 180 | /* |
179 | * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees | 181 | * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees |
diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h index 94a3d927d716..6d86219d93da 100644 --- a/fs/xfs/xfs_quota_priv.h +++ b/fs/xfs/xfs_quota_priv.h | |||
@@ -24,17 +24,6 @@ | |||
24 | */ | 24 | */ |
25 | #define XFS_DQITER_MAP_SIZE 10 | 25 | #define XFS_DQITER_MAP_SIZE 10 |
26 | 26 | ||
27 | /* | ||
28 | * Hash into a bucket in the dquot hash table, based on <mp, id>. | ||
29 | */ | ||
30 | #define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ | ||
31 | (__psunsigned_t)(id)) & \ | ||
32 | (xfs_Gqm->qm_dqhashmask - 1)) | ||
33 | #define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \ | ||
34 | (xfs_Gqm->qm_usr_dqhtable + \ | ||
35 | XFS_DQ_HASHVAL(mp, id)) : \ | ||
36 | (xfs_Gqm->qm_grp_dqhtable + \ | ||
37 | XFS_DQ_HASHVAL(mp, id))) | ||
38 | #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ | 27 | #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ |
39 | !dqp->q_core.d_blk_hardlimit && \ | 28 | !dqp->q_core.d_blk_hardlimit && \ |
40 | !dqp->q_core.d_blk_softlimit && \ | 29 | !dqp->q_core.d_blk_softlimit && \ |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 87323f1ded64..ca4f31534a0a 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -183,6 +183,7 @@ error_cancel: | |||
183 | oblocks = map.br_startoff + map.br_blockcount; | 183 | oblocks = map.br_startoff + map.br_blockcount; |
184 | } | 184 | } |
185 | return 0; | 185 | return 0; |
186 | |||
186 | error: | 187 | error: |
187 | return error; | 188 | return error; |
188 | } | 189 | } |
@@ -2139,11 +2140,9 @@ xfs_rtfree_extent( | |||
2139 | xfs_buf_t *sumbp; /* summary file block buffer */ | 2140 | xfs_buf_t *sumbp; /* summary file block buffer */ |
2140 | 2141 | ||
2141 | mp = tp->t_mountp; | 2142 | mp = tp->t_mountp; |
2142 | /* | 2143 | |
2143 | * Synchronize by locking the bitmap inode. | 2144 | ASSERT(mp->m_rbmip->i_itemp != NULL); |
2144 | */ | 2145 | ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); |
2145 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); | ||
2146 | xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); | ||
2147 | 2146 | ||
2148 | #if defined(__KERNEL__) && defined(DEBUG) | 2147 | #if defined(__KERNEL__) && defined(DEBUG) |
2149 | /* | 2148 | /* |
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index cb6ae715814a..f429d9d5d325 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h | |||
@@ -529,7 +529,6 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) | |||
529 | #define XFS_BB_TO_FSB(mp,bb) \ | 529 | #define XFS_BB_TO_FSB(mp,bb) \ |
530 | (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log) | 530 | (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log) |
531 | #define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log) | 531 | #define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log) |
532 | #define XFS_BB_FSB_OFFSET(mp,bb) ((bb) & ((mp)->m_bsize - 1)) | ||
533 | 532 | ||
534 | /* | 533 | /* |
535 | * File system block to byte conversions. | 534 | * File system block to byte conversions. |
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index 76fdc5861932..ce372b7d5644 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c | |||
@@ -20,9 +20,18 @@ | |||
20 | 20 | ||
21 | DEFINE_PER_CPU(struct xfsstats, xfsstats); | 21 | DEFINE_PER_CPU(struct xfsstats, xfsstats); |
22 | 22 | ||
23 | static int counter_val(int idx) | ||
24 | { | ||
25 | int val = 0, cpu; | ||
26 | |||
27 | for_each_possible_cpu(cpu) | ||
28 | val += *(((__u32 *)&per_cpu(xfsstats, cpu) + idx)); | ||
29 | return val; | ||
30 | } | ||
31 | |||
23 | static int xfs_stat_proc_show(struct seq_file *m, void *v) | 32 | static int xfs_stat_proc_show(struct seq_file *m, void *v) |
24 | { | 33 | { |
25 | int c, i, j, val; | 34 | int i, j; |
26 | __uint64_t xs_xstrat_bytes = 0; | 35 | __uint64_t xs_xstrat_bytes = 0; |
27 | __uint64_t xs_write_bytes = 0; | 36 | __uint64_t xs_write_bytes = 0; |
28 | __uint64_t xs_read_bytes = 0; | 37 | __uint64_t xs_read_bytes = 0; |
@@ -50,20 +59,16 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v) | |||
50 | { "abtc2", XFSSTAT_END_ABTC_V2 }, | 59 | { "abtc2", XFSSTAT_END_ABTC_V2 }, |
51 | { "bmbt2", XFSSTAT_END_BMBT_V2 }, | 60 | { "bmbt2", XFSSTAT_END_BMBT_V2 }, |
52 | { "ibt2", XFSSTAT_END_IBT_V2 }, | 61 | { "ibt2", XFSSTAT_END_IBT_V2 }, |
62 | /* we print both series of quota information together */ | ||
63 | { "qm", XFSSTAT_END_QM }, | ||
53 | }; | 64 | }; |
54 | 65 | ||
55 | /* Loop over all stats groups */ | 66 | /* Loop over all stats groups */ |
56 | for (i=j = 0; i < ARRAY_SIZE(xstats); i++) { | 67 | for (i = j = 0; i < ARRAY_SIZE(xstats); i++) { |
57 | seq_printf(m, "%s", xstats[i].desc); | 68 | seq_printf(m, "%s", xstats[i].desc); |
58 | /* inner loop does each group */ | 69 | /* inner loop does each group */ |
59 | while (j < xstats[i].endpoint) { | 70 | for (; j < xstats[i].endpoint; j++) |
60 | val = 0; | 71 | seq_printf(m, " %u", counter_val(j)); |
61 | /* sum over all cpus */ | ||
62 | for_each_possible_cpu(c) | ||
63 | val += *(((__u32*)&per_cpu(xfsstats, c) + j)); | ||
64 | seq_printf(m, " %u", val); | ||
65 | j++; | ||
66 | } | ||
67 | seq_putc(m, '\n'); | 72 | seq_putc(m, '\n'); |
68 | } | 73 | } |
69 | /* extra precision counters */ | 74 | /* extra precision counters */ |
@@ -97,6 +102,58 @@ static const struct file_operations xfs_stat_proc_fops = { | |||
97 | .release = single_release, | 102 | .release = single_release, |
98 | }; | 103 | }; |
99 | 104 | ||
105 | /* legacy quota interfaces */ | ||
106 | #ifdef CONFIG_XFS_QUOTA | ||
107 | static int xqm_proc_show(struct seq_file *m, void *v) | ||
108 | { | ||
109 | /* maximum; incore; ratio free to inuse; freelist */ | ||
110 | seq_printf(m, "%d\t%d\t%d\t%u\n", | ||
111 | 0, | ||
112 | counter_val(XFSSTAT_END_XQMSTAT), | ||
113 | 0, | ||
114 | counter_val(XFSSTAT_END_XQMSTAT + 1)); | ||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | static int xqm_proc_open(struct inode *inode, struct file *file) | ||
119 | { | ||
120 | return single_open(file, xqm_proc_show, NULL); | ||
121 | } | ||
122 | |||
123 | static const struct file_operations xqm_proc_fops = { | ||
124 | .owner = THIS_MODULE, | ||
125 | .open = xqm_proc_open, | ||
126 | .read = seq_read, | ||
127 | .llseek = seq_lseek, | ||
128 | .release = single_release, | ||
129 | }; | ||
130 | |||
131 | /* legacy quota stats interface no 2 */ | ||
132 | static int xqmstat_proc_show(struct seq_file *m, void *v) | ||
133 | { | ||
134 | int j; | ||
135 | |||
136 | seq_printf(m, "qm"); | ||
137 | for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++) | ||
138 | seq_printf(m, " %u", counter_val(j)); | ||
139 | seq_putc(m, '\n'); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int xqmstat_proc_open(struct inode *inode, struct file *file) | ||
144 | { | ||
145 | return single_open(file, xqmstat_proc_show, NULL); | ||
146 | } | ||
147 | |||
148 | static const struct file_operations xqmstat_proc_fops = { | ||
149 | .owner = THIS_MODULE, | ||
150 | .open = xqmstat_proc_open, | ||
151 | .read = seq_read, | ||
152 | .llseek = seq_lseek, | ||
153 | .release = single_release, | ||
154 | }; | ||
155 | #endif /* CONFIG_XFS_QUOTA */ | ||
156 | |||
100 | int | 157 | int |
101 | xfs_init_procfs(void) | 158 | xfs_init_procfs(void) |
102 | { | 159 | { |
@@ -105,10 +162,24 @@ xfs_init_procfs(void) | |||
105 | 162 | ||
106 | if (!proc_create("fs/xfs/stat", 0, NULL, | 163 | if (!proc_create("fs/xfs/stat", 0, NULL, |
107 | &xfs_stat_proc_fops)) | 164 | &xfs_stat_proc_fops)) |
108 | goto out_remove_entry; | 165 | goto out_remove_xfs_dir; |
166 | #ifdef CONFIG_XFS_QUOTA | ||
167 | if (!proc_create("fs/xfs/xqmstat", 0, NULL, | ||
168 | &xqmstat_proc_fops)) | ||
169 | goto out_remove_stat_file; | ||
170 | if (!proc_create("fs/xfs/xqm", 0, NULL, | ||
171 | &xqm_proc_fops)) | ||
172 | goto out_remove_xqmstat_file; | ||
173 | #endif | ||
109 | return 0; | 174 | return 0; |
110 | 175 | ||
111 | out_remove_entry: | 176 | #ifdef CONFIG_XFS_QUOTA |
177 | out_remove_xqmstat_file: | ||
178 | remove_proc_entry("fs/xfs/xqmstat", NULL); | ||
179 | out_remove_stat_file: | ||
180 | remove_proc_entry("fs/xfs/stat", NULL); | ||
181 | #endif | ||
182 | out_remove_xfs_dir: | ||
112 | remove_proc_entry("fs/xfs", NULL); | 183 | remove_proc_entry("fs/xfs", NULL); |
113 | out: | 184 | out: |
114 | return -ENOMEM; | 185 | return -ENOMEM; |
@@ -117,6 +188,10 @@ xfs_init_procfs(void) | |||
117 | void | 188 | void |
118 | xfs_cleanup_procfs(void) | 189 | xfs_cleanup_procfs(void) |
119 | { | 190 | { |
191 | #ifdef CONFIG_XFS_QUOTA | ||
192 | remove_proc_entry("fs/xfs/xqm", NULL); | ||
193 | remove_proc_entry("fs/xfs/xqmstat", NULL); | ||
194 | #endif | ||
120 | remove_proc_entry("fs/xfs/stat", NULL); | 195 | remove_proc_entry("fs/xfs/stat", NULL); |
121 | remove_proc_entry("fs/xfs", NULL); | 196 | remove_proc_entry("fs/xfs", NULL); |
122 | } | 197 | } |
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index 736854b1ca1a..c03ad38ceaeb 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h | |||
@@ -183,6 +183,16 @@ struct xfsstats { | |||
183 | __uint32_t xs_ibt_2_alloc; | 183 | __uint32_t xs_ibt_2_alloc; |
184 | __uint32_t xs_ibt_2_free; | 184 | __uint32_t xs_ibt_2_free; |
185 | __uint32_t xs_ibt_2_moves; | 185 | __uint32_t xs_ibt_2_moves; |
186 | #define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6) | ||
187 | __uint32_t xs_qm_dqreclaims; | ||
188 | __uint32_t xs_qm_dqreclaim_misses; | ||
189 | __uint32_t xs_qm_dquot_dups; | ||
190 | __uint32_t xs_qm_dqcachemisses; | ||
191 | __uint32_t xs_qm_dqcachehits; | ||
192 | __uint32_t xs_qm_dqwants; | ||
193 | #define XFSSTAT_END_QM (XFSSTAT_END_XQMSTAT+2) | ||
194 | __uint32_t xs_qm_dquot; | ||
195 | __uint32_t xs_qm_dquot_unused; | ||
186 | /* Extra precision counters */ | 196 | /* Extra precision counters */ |
187 | __uint64_t xs_xstrat_bytes; | 197 | __uint64_t xs_xstrat_bytes; |
188 | __uint64_t xs_write_bytes; | 198 | __uint64_t xs_write_bytes; |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index baf40e378d35..dab9a5f6dfd6 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -324,10 +324,9 @@ xfs_parseargs( | |||
324 | } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { | 324 | } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { |
325 | mp->m_flags |= XFS_MOUNT_FILESTREAMS; | 325 | mp->m_flags |= XFS_MOUNT_FILESTREAMS; |
326 | } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { | 326 | } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { |
327 | mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | | 327 | mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; |
328 | XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | | 328 | mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; |
329 | XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | | 329 | mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE; |
330 | XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD); | ||
331 | } else if (!strcmp(this_char, MNTOPT_QUOTA) || | 330 | } else if (!strcmp(this_char, MNTOPT_QUOTA) || |
332 | !strcmp(this_char, MNTOPT_UQUOTA) || | 331 | !strcmp(this_char, MNTOPT_UQUOTA) || |
333 | !strcmp(this_char, MNTOPT_USRQUOTA)) { | 332 | !strcmp(this_char, MNTOPT_USRQUOTA)) { |
@@ -760,6 +759,36 @@ xfs_setup_devices( | |||
760 | return 0; | 759 | return 0; |
761 | } | 760 | } |
762 | 761 | ||
762 | STATIC int | ||
763 | xfs_init_mount_workqueues( | ||
764 | struct xfs_mount *mp) | ||
765 | { | ||
766 | mp->m_data_workqueue = alloc_workqueue("xfs-data/%s", | ||
767 | WQ_MEM_RECLAIM, 0, mp->m_fsname); | ||
768 | if (!mp->m_data_workqueue) | ||
769 | goto out; | ||
770 | |||
771 | mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", | ||
772 | WQ_MEM_RECLAIM, 0, mp->m_fsname); | ||
773 | if (!mp->m_unwritten_workqueue) | ||
774 | goto out_destroy_data_iodone_queue; | ||
775 | |||
776 | return 0; | ||
777 | |||
778 | out_destroy_data_iodone_queue: | ||
779 | destroy_workqueue(mp->m_data_workqueue); | ||
780 | out: | ||
781 | return -ENOMEM; | ||
782 | } | ||
783 | |||
784 | STATIC void | ||
785 | xfs_destroy_mount_workqueues( | ||
786 | struct xfs_mount *mp) | ||
787 | { | ||
788 | destroy_workqueue(mp->m_data_workqueue); | ||
789 | destroy_workqueue(mp->m_unwritten_workqueue); | ||
790 | } | ||
791 | |||
763 | /* Catch misguided souls that try to use this interface on XFS */ | 792 | /* Catch misguided souls that try to use this interface on XFS */ |
764 | STATIC struct inode * | 793 | STATIC struct inode * |
765 | xfs_fs_alloc_inode( | 794 | xfs_fs_alloc_inode( |
@@ -834,91 +863,58 @@ xfs_fs_inode_init_once( | |||
834 | } | 863 | } |
835 | 864 | ||
836 | /* | 865 | /* |
837 | * Dirty the XFS inode when mark_inode_dirty_sync() is called so that | 866 | * This is called by the VFS when dirtying inode metadata. This can happen |
838 | * we catch unlogged VFS level updates to the inode. | 867 | * for a few reasons, but we only care about timestamp updates, given that |
868 | * we handled the rest ourselves. In theory no other calls should happen, | ||
869 | * but for example generic_write_end() keeps dirtying the inode after | ||
870 | * updating i_size. Thus we check that the flags are exactly I_DIRTY_SYNC, | ||
871 | * and skip this call otherwise. | ||
839 | * | 872 | * |
840 | * We need the barrier() to maintain correct ordering between unlogged | 873 | * We'll hopefull get a different method just for updating timestamps soon, |
841 | * updates and the transaction commit code that clears the i_update_core | 874 | * at which point this hack can go away, and maybe we'll also get real |
842 | * field. This requires all updates to be completed before marking the | 875 | * error handling here. |
843 | * inode dirty. | ||
844 | */ | 876 | */ |
845 | STATIC void | 877 | STATIC void |
846 | xfs_fs_dirty_inode( | 878 | xfs_fs_dirty_inode( |
847 | struct inode *inode, | ||
848 | int flags) | ||
849 | { | ||
850 | barrier(); | ||
851 | XFS_I(inode)->i_update_core = 1; | ||
852 | } | ||
853 | |||
854 | STATIC int | ||
855 | xfs_fs_write_inode( | ||
856 | struct inode *inode, | 879 | struct inode *inode, |
857 | struct writeback_control *wbc) | 880 | int flags) |
858 | { | 881 | { |
859 | struct xfs_inode *ip = XFS_I(inode); | 882 | struct xfs_inode *ip = XFS_I(inode); |
860 | struct xfs_mount *mp = ip->i_mount; | 883 | struct xfs_mount *mp = ip->i_mount; |
861 | int error = EAGAIN; | 884 | struct xfs_trans *tp; |
862 | 885 | int error; | |
863 | trace_xfs_write_inode(ip); | ||
864 | |||
865 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
866 | return -XFS_ERROR(EIO); | ||
867 | 886 | ||
868 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) { | 887 | if (flags != I_DIRTY_SYNC) |
869 | /* | 888 | return; |
870 | * Make sure the inode has made it it into the log. Instead | ||
871 | * of forcing it all the way to stable storage using a | ||
872 | * synchronous transaction we let the log force inside the | ||
873 | * ->sync_fs call do that for thus, which reduces the number | ||
874 | * of synchronous log forces dramatically. | ||
875 | */ | ||
876 | error = xfs_log_dirty_inode(ip, NULL, 0); | ||
877 | if (error) | ||
878 | goto out; | ||
879 | return 0; | ||
880 | } else { | ||
881 | if (!ip->i_update_core) | ||
882 | return 0; | ||
883 | 889 | ||
884 | /* | 890 | trace_xfs_dirty_inode(ip); |
885 | * We make this non-blocking if the inode is contended, return | ||
886 | * EAGAIN to indicate to the caller that they did not succeed. | ||
887 | * This prevents the flush path from blocking on inodes inside | ||
888 | * another operation right now, they get caught later by | ||
889 | * xfs_sync. | ||
890 | */ | ||
891 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) | ||
892 | goto out; | ||
893 | 891 | ||
894 | if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) | 892 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); |
895 | goto out_unlock; | 893 | error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); |
896 | 894 | if (error) { | |
897 | /* | 895 | xfs_trans_cancel(tp, 0); |
898 | * Now we have the flush lock and the inode is not pinned, we | 896 | goto trouble; |
899 | * can check if the inode is really clean as we know that | ||
900 | * there are no pending transaction completions, it is not | ||
901 | * waiting on the delayed write queue and there is no IO in | ||
902 | * progress. | ||
903 | */ | ||
904 | if (xfs_inode_clean(ip)) { | ||
905 | xfs_ifunlock(ip); | ||
906 | error = 0; | ||
907 | goto out_unlock; | ||
908 | } | ||
909 | error = xfs_iflush(ip, SYNC_TRYLOCK); | ||
910 | } | 897 | } |
911 | 898 | xfs_ilock(ip, XFS_ILOCK_EXCL); | |
912 | out_unlock: | ||
913 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
914 | out: | ||
915 | /* | 899 | /* |
916 | * if we failed to write out the inode then mark | 900 | * Grab all the latest timestamps from the Linux inode. |
917 | * it dirty again so we'll try again later. | ||
918 | */ | 901 | */ |
902 | ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; | ||
903 | ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; | ||
904 | ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec; | ||
905 | ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec; | ||
906 | ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec; | ||
907 | ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; | ||
908 | |||
909 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | ||
910 | xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); | ||
911 | error = xfs_trans_commit(tp, 0); | ||
919 | if (error) | 912 | if (error) |
920 | xfs_mark_inode_dirty_sync(ip); | 913 | goto trouble; |
921 | return -error; | 914 | return; |
915 | |||
916 | trouble: | ||
917 | xfs_warn(mp, "failed to update timestamps for inode 0x%llx", ip->i_ino); | ||
922 | } | 918 | } |
923 | 919 | ||
924 | STATIC void | 920 | STATIC void |
@@ -954,6 +950,22 @@ xfs_fs_evict_inode( | |||
954 | xfs_inactive(ip); | 950 | xfs_inactive(ip); |
955 | } | 951 | } |
956 | 952 | ||
953 | /* | ||
954 | * We do an unlocked check for XFS_IDONTCACHE here because we are already | ||
955 | * serialised against cache hits here via the inode->i_lock and igrab() in | ||
956 | * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be | ||
957 | * racing with us, and it avoids needing to grab a spinlock here for every inode | ||
958 | * we drop the final reference on. | ||
959 | */ | ||
960 | STATIC int | ||
961 | xfs_fs_drop_inode( | ||
962 | struct inode *inode) | ||
963 | { | ||
964 | struct xfs_inode *ip = XFS_I(inode); | ||
965 | |||
966 | return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE); | ||
967 | } | ||
968 | |||
957 | STATIC void | 969 | STATIC void |
958 | xfs_free_fsname( | 970 | xfs_free_fsname( |
959 | struct xfs_mount *mp) | 971 | struct xfs_mount *mp) |
@@ -983,6 +995,7 @@ xfs_fs_put_super( | |||
983 | xfs_unmountfs(mp); | 995 | xfs_unmountfs(mp); |
984 | xfs_freesb(mp); | 996 | xfs_freesb(mp); |
985 | xfs_icsb_destroy_counters(mp); | 997 | xfs_icsb_destroy_counters(mp); |
998 | xfs_destroy_mount_workqueues(mp); | ||
986 | xfs_close_devices(mp); | 999 | xfs_close_devices(mp); |
987 | xfs_free_fsname(mp); | 1000 | xfs_free_fsname(mp); |
988 | kfree(mp); | 1001 | kfree(mp); |
@@ -1309,10 +1322,14 @@ xfs_fs_fill_super( | |||
1309 | if (error) | 1322 | if (error) |
1310 | goto out_free_fsname; | 1323 | goto out_free_fsname; |
1311 | 1324 | ||
1312 | error = xfs_icsb_init_counters(mp); | 1325 | error = xfs_init_mount_workqueues(mp); |
1313 | if (error) | 1326 | if (error) |
1314 | goto out_close_devices; | 1327 | goto out_close_devices; |
1315 | 1328 | ||
1329 | error = xfs_icsb_init_counters(mp); | ||
1330 | if (error) | ||
1331 | goto out_destroy_workqueues; | ||
1332 | |||
1316 | error = xfs_readsb(mp, flags); | 1333 | error = xfs_readsb(mp, flags); |
1317 | if (error) | 1334 | if (error) |
1318 | goto out_destroy_counters; | 1335 | goto out_destroy_counters; |
@@ -1376,6 +1393,8 @@ xfs_fs_fill_super( | |||
1376 | xfs_freesb(mp); | 1393 | xfs_freesb(mp); |
1377 | out_destroy_counters: | 1394 | out_destroy_counters: |
1378 | xfs_icsb_destroy_counters(mp); | 1395 | xfs_icsb_destroy_counters(mp); |
1396 | out_destroy_workqueues: | ||
1397 | xfs_destroy_mount_workqueues(mp); | ||
1379 | out_close_devices: | 1398 | out_close_devices: |
1380 | xfs_close_devices(mp); | 1399 | xfs_close_devices(mp); |
1381 | out_free_fsname: | 1400 | out_free_fsname: |
@@ -1429,8 +1448,8 @@ static const struct super_operations xfs_super_operations = { | |||
1429 | .alloc_inode = xfs_fs_alloc_inode, | 1448 | .alloc_inode = xfs_fs_alloc_inode, |
1430 | .destroy_inode = xfs_fs_destroy_inode, | 1449 | .destroy_inode = xfs_fs_destroy_inode, |
1431 | .dirty_inode = xfs_fs_dirty_inode, | 1450 | .dirty_inode = xfs_fs_dirty_inode, |
1432 | .write_inode = xfs_fs_write_inode, | ||
1433 | .evict_inode = xfs_fs_evict_inode, | 1451 | .evict_inode = xfs_fs_evict_inode, |
1452 | .drop_inode = xfs_fs_drop_inode, | ||
1434 | .put_super = xfs_fs_put_super, | 1453 | .put_super = xfs_fs_put_super, |
1435 | .sync_fs = xfs_fs_sync_fs, | 1454 | .sync_fs = xfs_fs_sync_fs, |
1436 | .freeze_fs = xfs_fs_freeze, | 1455 | .freeze_fs = xfs_fs_freeze, |
@@ -1604,12 +1623,28 @@ xfs_init_workqueues(void) | |||
1604 | xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0); | 1623 | xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0); |
1605 | if (!xfs_syncd_wq) | 1624 | if (!xfs_syncd_wq) |
1606 | return -ENOMEM; | 1625 | return -ENOMEM; |
1626 | |||
1627 | /* | ||
1628 | * The allocation workqueue can be used in memory reclaim situations | ||
1629 | * (writepage path), and parallelism is only limited by the number of | ||
1630 | * AGs in all the filesystems mounted. Hence use the default large | ||
1631 | * max_active value for this workqueue. | ||
1632 | */ | ||
1633 | xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0); | ||
1634 | if (!xfs_alloc_wq) | ||
1635 | goto out_destroy_syncd; | ||
1636 | |||
1607 | return 0; | 1637 | return 0; |
1638 | |||
1639 | out_destroy_syncd: | ||
1640 | destroy_workqueue(xfs_syncd_wq); | ||
1641 | return -ENOMEM; | ||
1608 | } | 1642 | } |
1609 | 1643 | ||
1610 | STATIC void | 1644 | STATIC void |
1611 | xfs_destroy_workqueues(void) | 1645 | xfs_destroy_workqueues(void) |
1612 | { | 1646 | { |
1647 | destroy_workqueue(xfs_alloc_wq); | ||
1613 | destroy_workqueue(xfs_syncd_wq); | 1648 | destroy_workqueue(xfs_syncd_wq); |
1614 | } | 1649 | } |
1615 | 1650 | ||
@@ -1651,13 +1686,17 @@ init_xfs_fs(void) | |||
1651 | if (error) | 1686 | if (error) |
1652 | goto out_cleanup_procfs; | 1687 | goto out_cleanup_procfs; |
1653 | 1688 | ||
1654 | vfs_initquota(); | 1689 | error = xfs_qm_init(); |
1690 | if (error) | ||
1691 | goto out_sysctl_unregister; | ||
1655 | 1692 | ||
1656 | error = register_filesystem(&xfs_fs_type); | 1693 | error = register_filesystem(&xfs_fs_type); |
1657 | if (error) | 1694 | if (error) |
1658 | goto out_sysctl_unregister; | 1695 | goto out_qm_exit; |
1659 | return 0; | 1696 | return 0; |
1660 | 1697 | ||
1698 | out_qm_exit: | ||
1699 | xfs_qm_exit(); | ||
1661 | out_sysctl_unregister: | 1700 | out_sysctl_unregister: |
1662 | xfs_sysctl_unregister(); | 1701 | xfs_sysctl_unregister(); |
1663 | out_cleanup_procfs: | 1702 | out_cleanup_procfs: |
@@ -1679,7 +1718,7 @@ init_xfs_fs(void) | |||
1679 | STATIC void __exit | 1718 | STATIC void __exit |
1680 | exit_xfs_fs(void) | 1719 | exit_xfs_fs(void) |
1681 | { | 1720 | { |
1682 | vfs_exitquota(); | 1721 | xfs_qm_exit(); |
1683 | unregister_filesystem(&xfs_fs_type); | 1722 | unregister_filesystem(&xfs_fs_type); |
1684 | xfs_sysctl_unregister(); | 1723 | xfs_sysctl_unregister(); |
1685 | xfs_cleanup_procfs(); | 1724 | xfs_cleanup_procfs(); |
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 50a3266c999e..09b0c26b2245 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h | |||
@@ -21,13 +21,11 @@ | |||
21 | #include <linux/exportfs.h> | 21 | #include <linux/exportfs.h> |
22 | 22 | ||
23 | #ifdef CONFIG_XFS_QUOTA | 23 | #ifdef CONFIG_XFS_QUOTA |
24 | extern void xfs_qm_init(void); | 24 | extern int xfs_qm_init(void); |
25 | extern void xfs_qm_exit(void); | 25 | extern void xfs_qm_exit(void); |
26 | # define vfs_initquota() xfs_qm_init() | ||
27 | # define vfs_exitquota() xfs_qm_exit() | ||
28 | #else | 26 | #else |
29 | # define vfs_initquota() do { } while (0) | 27 | # define xfs_qm_init() (0) |
30 | # define vfs_exitquota() do { } while (0) | 28 | # define xfs_qm_exit() do { } while (0) |
31 | #endif | 29 | #endif |
32 | 30 | ||
33 | #ifdef CONFIG_XFS_POSIX_ACL | 31 | #ifdef CONFIG_XFS_POSIX_ACL |
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 40b75eecd2b4..205ebcb34d9e 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
@@ -336,32 +336,6 @@ xfs_sync_fsdata( | |||
336 | return error; | 336 | return error; |
337 | } | 337 | } |
338 | 338 | ||
339 | int | ||
340 | xfs_log_dirty_inode( | ||
341 | struct xfs_inode *ip, | ||
342 | struct xfs_perag *pag, | ||
343 | int flags) | ||
344 | { | ||
345 | struct xfs_mount *mp = ip->i_mount; | ||
346 | struct xfs_trans *tp; | ||
347 | int error; | ||
348 | |||
349 | if (!ip->i_update_core) | ||
350 | return 0; | ||
351 | |||
352 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); | ||
353 | error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); | ||
354 | if (error) { | ||
355 | xfs_trans_cancel(tp, 0); | ||
356 | return error; | ||
357 | } | ||
358 | |||
359 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
360 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | ||
361 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
362 | return xfs_trans_commit(tp, 0); | ||
363 | } | ||
364 | |||
365 | /* | 339 | /* |
366 | * When remounting a filesystem read-only or freezing the filesystem, we have | 340 | * When remounting a filesystem read-only or freezing the filesystem, we have |
367 | * two phases to execute. This first phase is syncing the data before we | 341 | * two phases to execute. This first phase is syncing the data before we |
@@ -385,16 +359,6 @@ xfs_quiesce_data( | |||
385 | { | 359 | { |
386 | int error, error2 = 0; | 360 | int error, error2 = 0; |
387 | 361 | ||
388 | /* | ||
389 | * Log all pending size and timestamp updates. The vfs writeback | ||
390 | * code is supposed to do this, but due to its overagressive | ||
391 | * livelock detection it will skip inodes where appending writes | ||
392 | * were written out in the first non-blocking sync phase if their | ||
393 | * completion took long enough that it happened after taking the | ||
394 | * timestamp for the cut-off in the blocking phase. | ||
395 | */ | ||
396 | xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0); | ||
397 | |||
398 | /* force out the log */ | 362 | /* force out the log */ |
399 | xfs_log_force(mp, XFS_LOG_SYNC); | 363 | xfs_log_force(mp, XFS_LOG_SYNC); |
400 | 364 | ||
@@ -913,17 +877,15 @@ reclaim: | |||
913 | * can reference the inodes in the cache without taking references. | 877 | * can reference the inodes in the cache without taking references. |
914 | * | 878 | * |
915 | * We make that OK here by ensuring that we wait until the inode is | 879 | * We make that OK here by ensuring that we wait until the inode is |
916 | * unlocked after the lookup before we go ahead and free it. We get | 880 | * unlocked after the lookup before we go ahead and free it. |
917 | * both the ilock and the iolock because the code may need to drop the | ||
918 | * ilock one but will still hold the iolock. | ||
919 | */ | 881 | */ |
920 | xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 882 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
921 | xfs_qm_dqdetach(ip); | 883 | xfs_qm_dqdetach(ip); |
922 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 884 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
923 | 885 | ||
924 | xfs_inode_free(ip); | 886 | xfs_inode_free(ip); |
925 | return error; | ||
926 | 887 | ||
888 | return error; | ||
927 | } | 889 | } |
928 | 890 | ||
929 | /* | 891 | /* |
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h index fa965479d788..941202e7ac6e 100644 --- a/fs/xfs/xfs_sync.h +++ b/fs/xfs/xfs_sync.h | |||
@@ -34,8 +34,6 @@ void xfs_quiesce_attr(struct xfs_mount *mp); | |||
34 | 34 | ||
35 | void xfs_flush_inodes(struct xfs_inode *ip); | 35 | void xfs_flush_inodes(struct xfs_inode *ip); |
36 | 36 | ||
37 | int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags); | ||
38 | |||
39 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); | 37 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); |
40 | int xfs_reclaim_inodes_count(struct xfs_mount *mp); | 38 | int xfs_reclaim_inodes_count(struct xfs_mount *mp); |
41 | void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); | 39 | void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index bb134a819930..06838c42b2a0 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -580,7 +580,7 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr); | |||
580 | DEFINE_INODE_EVENT(xfs_dir_fsync); | 580 | DEFINE_INODE_EVENT(xfs_dir_fsync); |
581 | DEFINE_INODE_EVENT(xfs_file_fsync); | 581 | DEFINE_INODE_EVENT(xfs_file_fsync); |
582 | DEFINE_INODE_EVENT(xfs_destroy_inode); | 582 | DEFINE_INODE_EVENT(xfs_destroy_inode); |
583 | DEFINE_INODE_EVENT(xfs_write_inode); | 583 | DEFINE_INODE_EVENT(xfs_dirty_inode); |
584 | DEFINE_INODE_EVENT(xfs_evict_inode); | 584 | DEFINE_INODE_EVENT(xfs_evict_inode); |
585 | 585 | ||
586 | DEFINE_INODE_EVENT(xfs_dquot_dqalloc); | 586 | DEFINE_INODE_EVENT(xfs_dquot_dqalloc); |
@@ -627,16 +627,19 @@ DECLARE_EVENT_CLASS(xfs_namespace_class, | |||
627 | TP_STRUCT__entry( | 627 | TP_STRUCT__entry( |
628 | __field(dev_t, dev) | 628 | __field(dev_t, dev) |
629 | __field(xfs_ino_t, dp_ino) | 629 | __field(xfs_ino_t, dp_ino) |
630 | __field(int, namelen) | ||
630 | __dynamic_array(char, name, name->len) | 631 | __dynamic_array(char, name, name->len) |
631 | ), | 632 | ), |
632 | TP_fast_assign( | 633 | TP_fast_assign( |
633 | __entry->dev = VFS_I(dp)->i_sb->s_dev; | 634 | __entry->dev = VFS_I(dp)->i_sb->s_dev; |
634 | __entry->dp_ino = dp->i_ino; | 635 | __entry->dp_ino = dp->i_ino; |
636 | __entry->namelen = name->len; | ||
635 | memcpy(__get_str(name), name->name, name->len); | 637 | memcpy(__get_str(name), name->name, name->len); |
636 | ), | 638 | ), |
637 | TP_printk("dev %d:%d dp ino 0x%llx name %s", | 639 | TP_printk("dev %d:%d dp ino 0x%llx name %.*s", |
638 | MAJOR(__entry->dev), MINOR(__entry->dev), | 640 | MAJOR(__entry->dev), MINOR(__entry->dev), |
639 | __entry->dp_ino, | 641 | __entry->dp_ino, |
642 | __entry->namelen, | ||
640 | __get_str(name)) | 643 | __get_str(name)) |
641 | ) | 644 | ) |
642 | 645 | ||
@@ -658,6 +661,8 @@ TRACE_EVENT(xfs_rename, | |||
658 | __field(dev_t, dev) | 661 | __field(dev_t, dev) |
659 | __field(xfs_ino_t, src_dp_ino) | 662 | __field(xfs_ino_t, src_dp_ino) |
660 | __field(xfs_ino_t, target_dp_ino) | 663 | __field(xfs_ino_t, target_dp_ino) |
664 | __field(int, src_namelen) | ||
665 | __field(int, target_namelen) | ||
661 | __dynamic_array(char, src_name, src_name->len) | 666 | __dynamic_array(char, src_name, src_name->len) |
662 | __dynamic_array(char, target_name, target_name->len) | 667 | __dynamic_array(char, target_name, target_name->len) |
663 | ), | 668 | ), |
@@ -665,15 +670,20 @@ TRACE_EVENT(xfs_rename, | |||
665 | __entry->dev = VFS_I(src_dp)->i_sb->s_dev; | 670 | __entry->dev = VFS_I(src_dp)->i_sb->s_dev; |
666 | __entry->src_dp_ino = src_dp->i_ino; | 671 | __entry->src_dp_ino = src_dp->i_ino; |
667 | __entry->target_dp_ino = target_dp->i_ino; | 672 | __entry->target_dp_ino = target_dp->i_ino; |
673 | __entry->src_namelen = src_name->len; | ||
674 | __entry->target_namelen = target_name->len; | ||
668 | memcpy(__get_str(src_name), src_name->name, src_name->len); | 675 | memcpy(__get_str(src_name), src_name->name, src_name->len); |
669 | memcpy(__get_str(target_name), target_name->name, target_name->len); | 676 | memcpy(__get_str(target_name), target_name->name, |
677 | target_name->len); | ||
670 | ), | 678 | ), |
671 | TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx" | 679 | TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx" |
672 | " src name %s target name %s", | 680 | " src name %.*s target name %.*s", |
673 | MAJOR(__entry->dev), MINOR(__entry->dev), | 681 | MAJOR(__entry->dev), MINOR(__entry->dev), |
674 | __entry->src_dp_ino, | 682 | __entry->src_dp_ino, |
675 | __entry->target_dp_ino, | 683 | __entry->target_dp_ino, |
684 | __entry->src_namelen, | ||
676 | __get_str(src_name), | 685 | __get_str(src_name), |
686 | __entry->target_namelen, | ||
677 | __get_str(target_name)) | 687 | __get_str(target_name)) |
678 | ) | 688 | ) |
679 | 689 | ||
@@ -741,10 +751,10 @@ DEFINE_DQUOT_EVENT(xfs_dqalloc); | |||
741 | DEFINE_DQUOT_EVENT(xfs_dqtobp_read); | 751 | DEFINE_DQUOT_EVENT(xfs_dqtobp_read); |
742 | DEFINE_DQUOT_EVENT(xfs_dqread); | 752 | DEFINE_DQUOT_EVENT(xfs_dqread); |
743 | DEFINE_DQUOT_EVENT(xfs_dqread_fail); | 753 | DEFINE_DQUOT_EVENT(xfs_dqread_fail); |
744 | DEFINE_DQUOT_EVENT(xfs_dqlookup_found); | ||
745 | DEFINE_DQUOT_EVENT(xfs_dqlookup_done); | ||
746 | DEFINE_DQUOT_EVENT(xfs_dqget_hit); | 754 | DEFINE_DQUOT_EVENT(xfs_dqget_hit); |
747 | DEFINE_DQUOT_EVENT(xfs_dqget_miss); | 755 | DEFINE_DQUOT_EVENT(xfs_dqget_miss); |
756 | DEFINE_DQUOT_EVENT(xfs_dqget_freeing); | ||
757 | DEFINE_DQUOT_EVENT(xfs_dqget_dup); | ||
748 | DEFINE_DQUOT_EVENT(xfs_dqput); | 758 | DEFINE_DQUOT_EVENT(xfs_dqput); |
749 | DEFINE_DQUOT_EVENT(xfs_dqput_wait); | 759 | DEFINE_DQUOT_EVENT(xfs_dqput_wait); |
750 | DEFINE_DQUOT_EVENT(xfs_dqput_free); | 760 | DEFINE_DQUOT_EVENT(xfs_dqput_free); |
@@ -782,12 +792,12 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
782 | __entry->curr_res = tic->t_curr_res; | 792 | __entry->curr_res = tic->t_curr_res; |
783 | __entry->unit_res = tic->t_unit_res; | 793 | __entry->unit_res = tic->t_unit_res; |
784 | __entry->flags = tic->t_flags; | 794 | __entry->flags = tic->t_flags; |
785 | __entry->reserveq = list_empty(&log->l_reserveq); | 795 | __entry->reserveq = list_empty(&log->l_reserve_head.waiters); |
786 | __entry->writeq = list_empty(&log->l_writeq); | 796 | __entry->writeq = list_empty(&log->l_write_head.waiters); |
787 | xlog_crack_grant_head(&log->l_grant_reserve_head, | 797 | xlog_crack_grant_head(&log->l_reserve_head.grant, |
788 | &__entry->grant_reserve_cycle, | 798 | &__entry->grant_reserve_cycle, |
789 | &__entry->grant_reserve_bytes); | 799 | &__entry->grant_reserve_bytes); |
790 | xlog_crack_grant_head(&log->l_grant_write_head, | 800 | xlog_crack_grant_head(&log->l_write_head.grant, |
791 | &__entry->grant_write_cycle, | 801 | &__entry->grant_write_cycle, |
792 | &__entry->grant_write_bytes); | 802 | &__entry->grant_write_bytes); |
793 | __entry->curr_cycle = log->l_curr_cycle; | 803 | __entry->curr_cycle = log->l_curr_cycle; |
@@ -826,20 +836,14 @@ DEFINE_EVENT(xfs_loggrant_class, name, \ | |||
826 | TP_ARGS(log, tic)) | 836 | TP_ARGS(log, tic)) |
827 | DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); | 837 | DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); |
828 | DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); | 838 | DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); |
829 | DEFINE_LOGGRANT_EVENT(xfs_log_reserve); | ||
830 | DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); | 839 | DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); |
831 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter); | ||
832 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit); | ||
833 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_error); | ||
834 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep); | 840 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep); |
835 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake); | 841 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake); |
836 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); | 842 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); |
837 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); | 843 | DEFINE_LOGGRANT_EVENT(xfs_log_reserve); |
838 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); | 844 | DEFINE_LOGGRANT_EVENT(xfs_log_reserve_exit); |
839 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); | 845 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant); |
840 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep); | 846 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_exit); |
841 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake); | ||
842 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); | ||
843 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); | 847 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); |
844 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); | 848 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); |
845 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); | 849 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); |
@@ -1414,7 +1418,7 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); | |||
1414 | DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); | 1418 | DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); |
1415 | DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); | 1419 | DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); |
1416 | 1420 | ||
1417 | DECLARE_EVENT_CLASS(xfs_dir2_class, | 1421 | DECLARE_EVENT_CLASS(xfs_da_class, |
1418 | TP_PROTO(struct xfs_da_args *args), | 1422 | TP_PROTO(struct xfs_da_args *args), |
1419 | TP_ARGS(args), | 1423 | TP_ARGS(args), |
1420 | TP_STRUCT__entry( | 1424 | TP_STRUCT__entry( |
@@ -1449,7 +1453,7 @@ DECLARE_EVENT_CLASS(xfs_dir2_class, | |||
1449 | ) | 1453 | ) |
1450 | 1454 | ||
1451 | #define DEFINE_DIR2_EVENT(name) \ | 1455 | #define DEFINE_DIR2_EVENT(name) \ |
1452 | DEFINE_EVENT(xfs_dir2_class, name, \ | 1456 | DEFINE_EVENT(xfs_da_class, name, \ |
1453 | TP_PROTO(struct xfs_da_args *args), \ | 1457 | TP_PROTO(struct xfs_da_args *args), \ |
1454 | TP_ARGS(args)) | 1458 | TP_ARGS(args)) |
1455 | DEFINE_DIR2_EVENT(xfs_dir2_sf_addname); | 1459 | DEFINE_DIR2_EVENT(xfs_dir2_sf_addname); |
@@ -1478,6 +1482,64 @@ DEFINE_DIR2_EVENT(xfs_dir2_node_replace); | |||
1478 | DEFINE_DIR2_EVENT(xfs_dir2_node_removename); | 1482 | DEFINE_DIR2_EVENT(xfs_dir2_node_removename); |
1479 | DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf); | 1483 | DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf); |
1480 | 1484 | ||
1485 | #define DEFINE_ATTR_EVENT(name) \ | ||
1486 | DEFINE_EVENT(xfs_da_class, name, \ | ||
1487 | TP_PROTO(struct xfs_da_args *args), \ | ||
1488 | TP_ARGS(args)) | ||
1489 | DEFINE_ATTR_EVENT(xfs_attr_sf_add); | ||
1490 | DEFINE_ATTR_EVENT(xfs_attr_sf_addname); | ||
1491 | DEFINE_ATTR_EVENT(xfs_attr_sf_create); | ||
1492 | DEFINE_ATTR_EVENT(xfs_attr_sf_lookup); | ||
1493 | DEFINE_ATTR_EVENT(xfs_attr_sf_remove); | ||
1494 | DEFINE_ATTR_EVENT(xfs_attr_sf_removename); | ||
1495 | DEFINE_ATTR_EVENT(xfs_attr_sf_to_leaf); | ||
1496 | |||
1497 | DEFINE_ATTR_EVENT(xfs_attr_leaf_add); | ||
1498 | DEFINE_ATTR_EVENT(xfs_attr_leaf_add_old); | ||
1499 | DEFINE_ATTR_EVENT(xfs_attr_leaf_add_new); | ||
1500 | DEFINE_ATTR_EVENT(xfs_attr_leaf_addname); | ||
1501 | DEFINE_ATTR_EVENT(xfs_attr_leaf_create); | ||
1502 | DEFINE_ATTR_EVENT(xfs_attr_leaf_lookup); | ||
1503 | DEFINE_ATTR_EVENT(xfs_attr_leaf_replace); | ||
1504 | DEFINE_ATTR_EVENT(xfs_attr_leaf_removename); | ||
1505 | DEFINE_ATTR_EVENT(xfs_attr_leaf_split); | ||
1506 | DEFINE_ATTR_EVENT(xfs_attr_leaf_split_before); | ||
1507 | DEFINE_ATTR_EVENT(xfs_attr_leaf_split_after); | ||
1508 | DEFINE_ATTR_EVENT(xfs_attr_leaf_clearflag); | ||
1509 | DEFINE_ATTR_EVENT(xfs_attr_leaf_setflag); | ||
1510 | DEFINE_ATTR_EVENT(xfs_attr_leaf_flipflags); | ||
1511 | DEFINE_ATTR_EVENT(xfs_attr_leaf_to_sf); | ||
1512 | DEFINE_ATTR_EVENT(xfs_attr_leaf_to_node); | ||
1513 | DEFINE_ATTR_EVENT(xfs_attr_leaf_rebalance); | ||
1514 | DEFINE_ATTR_EVENT(xfs_attr_leaf_unbalance); | ||
1515 | |||
1516 | DEFINE_ATTR_EVENT(xfs_attr_node_addname); | ||
1517 | DEFINE_ATTR_EVENT(xfs_attr_node_lookup); | ||
1518 | DEFINE_ATTR_EVENT(xfs_attr_node_replace); | ||
1519 | DEFINE_ATTR_EVENT(xfs_attr_node_removename); | ||
1520 | |||
1521 | #define DEFINE_DA_EVENT(name) \ | ||
1522 | DEFINE_EVENT(xfs_da_class, name, \ | ||
1523 | TP_PROTO(struct xfs_da_args *args), \ | ||
1524 | TP_ARGS(args)) | ||
1525 | DEFINE_DA_EVENT(xfs_da_split); | ||
1526 | DEFINE_DA_EVENT(xfs_da_join); | ||
1527 | DEFINE_DA_EVENT(xfs_da_link_before); | ||
1528 | DEFINE_DA_EVENT(xfs_da_link_after); | ||
1529 | DEFINE_DA_EVENT(xfs_da_unlink_back); | ||
1530 | DEFINE_DA_EVENT(xfs_da_unlink_forward); | ||
1531 | DEFINE_DA_EVENT(xfs_da_root_split); | ||
1532 | DEFINE_DA_EVENT(xfs_da_root_join); | ||
1533 | DEFINE_DA_EVENT(xfs_da_node_add); | ||
1534 | DEFINE_DA_EVENT(xfs_da_node_create); | ||
1535 | DEFINE_DA_EVENT(xfs_da_node_split); | ||
1536 | DEFINE_DA_EVENT(xfs_da_node_remove); | ||
1537 | DEFINE_DA_EVENT(xfs_da_node_rebalance); | ||
1538 | DEFINE_DA_EVENT(xfs_da_node_unbalance); | ||
1539 | DEFINE_DA_EVENT(xfs_da_swap_lastblock); | ||
1540 | DEFINE_DA_EVENT(xfs_da_grow_inode); | ||
1541 | DEFINE_DA_EVENT(xfs_da_shrink_inode); | ||
1542 | |||
1481 | DECLARE_EVENT_CLASS(xfs_dir2_space_class, | 1543 | DECLARE_EVENT_CLASS(xfs_dir2_space_class, |
1482 | TP_PROTO(struct xfs_da_args *args, int idx), | 1544 | TP_PROTO(struct xfs_da_args *args, int idx), |
1483 | TP_ARGS(args, idx), | 1545 | TP_ARGS(args, idx), |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 7adcdf15ae0c..103b00c90004 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -681,7 +681,6 @@ xfs_trans_reserve( | |||
681 | uint flags, | 681 | uint flags, |
682 | uint logcount) | 682 | uint logcount) |
683 | { | 683 | { |
684 | int log_flags; | ||
685 | int error = 0; | 684 | int error = 0; |
686 | int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; | 685 | int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; |
687 | 686 | ||
@@ -707,24 +706,32 @@ xfs_trans_reserve( | |||
707 | * Reserve the log space needed for this transaction. | 706 | * Reserve the log space needed for this transaction. |
708 | */ | 707 | */ |
709 | if (logspace > 0) { | 708 | if (logspace > 0) { |
710 | ASSERT((tp->t_log_res == 0) || (tp->t_log_res == logspace)); | 709 | bool permanent = false; |
711 | ASSERT((tp->t_log_count == 0) || | 710 | |
712 | (tp->t_log_count == logcount)); | 711 | ASSERT(tp->t_log_res == 0 || tp->t_log_res == logspace); |
712 | ASSERT(tp->t_log_count == 0 || tp->t_log_count == logcount); | ||
713 | |||
713 | if (flags & XFS_TRANS_PERM_LOG_RES) { | 714 | if (flags & XFS_TRANS_PERM_LOG_RES) { |
714 | log_flags = XFS_LOG_PERM_RESERV; | ||
715 | tp->t_flags |= XFS_TRANS_PERM_LOG_RES; | 715 | tp->t_flags |= XFS_TRANS_PERM_LOG_RES; |
716 | permanent = true; | ||
716 | } else { | 717 | } else { |
717 | ASSERT(tp->t_ticket == NULL); | 718 | ASSERT(tp->t_ticket == NULL); |
718 | ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); | 719 | ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); |
719 | log_flags = 0; | ||
720 | } | 720 | } |
721 | 721 | ||
722 | error = xfs_log_reserve(tp->t_mountp, logspace, logcount, | 722 | if (tp->t_ticket != NULL) { |
723 | &tp->t_ticket, | 723 | ASSERT(flags & XFS_TRANS_PERM_LOG_RES); |
724 | XFS_TRANSACTION, log_flags, tp->t_type); | 724 | error = xfs_log_regrant(tp->t_mountp, tp->t_ticket); |
725 | if (error) { | 725 | } else { |
726 | goto undo_blocks; | 726 | error = xfs_log_reserve(tp->t_mountp, logspace, |
727 | logcount, &tp->t_ticket, | ||
728 | XFS_TRANSACTION, permanent, | ||
729 | tp->t_type); | ||
727 | } | 730 | } |
731 | |||
732 | if (error) | ||
733 | goto undo_blocks; | ||
734 | |||
728 | tp->t_log_res = logspace; | 735 | tp->t_log_res = logspace; |
729 | tp->t_log_count = logcount; | 736 | tp->t_log_count = logcount; |
730 | } | 737 | } |
@@ -752,6 +759,8 @@ xfs_trans_reserve( | |||
752 | */ | 759 | */ |
753 | undo_log: | 760 | undo_log: |
754 | if (logspace > 0) { | 761 | if (logspace > 0) { |
762 | int log_flags; | ||
763 | |||
755 | if (flags & XFS_TRANS_PERM_LOG_RES) { | 764 | if (flags & XFS_TRANS_PERM_LOG_RES) { |
756 | log_flags = XFS_LOG_REL_PERM_RESERV; | 765 | log_flags = XFS_LOG_REL_PERM_RESERV; |
757 | } else { | 766 | } else { |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index ed9252bcdac9..1dead07f092c 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -611,50 +611,6 @@ xfs_ail_push_all( | |||
611 | } | 611 | } |
612 | 612 | ||
613 | /* | 613 | /* |
614 | * This is to be called when an item is unlocked that may have | ||
615 | * been in the AIL. It will wake up the first member of the AIL | ||
616 | * wait list if this item's unlocking might allow it to progress. | ||
617 | * If the item is in the AIL, then we need to get the AIL lock | ||
618 | * while doing our checking so we don't race with someone going | ||
619 | * to sleep waiting for this event in xfs_trans_push_ail(). | ||
620 | */ | ||
621 | void | ||
622 | xfs_trans_unlocked_item( | ||
623 | struct xfs_ail *ailp, | ||
624 | xfs_log_item_t *lip) | ||
625 | { | ||
626 | xfs_log_item_t *min_lip; | ||
627 | |||
628 | /* | ||
629 | * If we're forcibly shutting down, we may have | ||
630 | * unlocked log items arbitrarily. The last thing | ||
631 | * we want to do is to move the tail of the log | ||
632 | * over some potentially valid data. | ||
633 | */ | ||
634 | if (!(lip->li_flags & XFS_LI_IN_AIL) || | ||
635 | XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { | ||
636 | return; | ||
637 | } | ||
638 | |||
639 | /* | ||
640 | * This is the one case where we can call into xfs_ail_min() | ||
641 | * without holding the AIL lock because we only care about the | ||
642 | * case where we are at the tail of the AIL. If the object isn't | ||
643 | * at the tail, it doesn't matter what result we get back. This | ||
644 | * is slightly racy because since we were just unlocked, we could | ||
645 | * go to sleep between the call to xfs_ail_min and the call to | ||
646 | * xfs_log_move_tail, have someone else lock us, commit to us disk, | ||
647 | * move us out of the tail of the AIL, and then we wake up. However, | ||
648 | * the call to xfs_log_move_tail() doesn't do anything if there's | ||
649 | * not enough free space to wake people up so we're safe calling it. | ||
650 | */ | ||
651 | min_lip = xfs_ail_min(ailp); | ||
652 | |||
653 | if (min_lip == lip) | ||
654 | xfs_log_move_tail(ailp->xa_mount, 1); | ||
655 | } /* xfs_trans_unlocked_item */ | ||
656 | |||
657 | /* | ||
658 | * xfs_trans_ail_update - bulk AIL insertion operation. | 614 | * xfs_trans_ail_update - bulk AIL insertion operation. |
659 | * | 615 | * |
660 | * @xfs_trans_ail_update takes an array of log items that all need to be | 616 | * @xfs_trans_ail_update takes an array of log items that all need to be |
@@ -685,7 +641,6 @@ xfs_trans_ail_update_bulk( | |||
685 | xfs_lsn_t lsn) __releases(ailp->xa_lock) | 641 | xfs_lsn_t lsn) __releases(ailp->xa_lock) |
686 | { | 642 | { |
687 | xfs_log_item_t *mlip; | 643 | xfs_log_item_t *mlip; |
688 | xfs_lsn_t tail_lsn; | ||
689 | int mlip_changed = 0; | 644 | int mlip_changed = 0; |
690 | int i; | 645 | int i; |
691 | LIST_HEAD(tmp); | 646 | LIST_HEAD(tmp); |
@@ -712,22 +667,12 @@ xfs_trans_ail_update_bulk( | |||
712 | 667 | ||
713 | if (!list_empty(&tmp)) | 668 | if (!list_empty(&tmp)) |
714 | xfs_ail_splice(ailp, cur, &tmp, lsn); | 669 | xfs_ail_splice(ailp, cur, &tmp, lsn); |
670 | spin_unlock(&ailp->xa_lock); | ||
715 | 671 | ||
716 | if (!mlip_changed) { | 672 | if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { |
717 | spin_unlock(&ailp->xa_lock); | 673 | xlog_assign_tail_lsn(ailp->xa_mount); |
718 | return; | 674 | xfs_log_space_wake(ailp->xa_mount); |
719 | } | 675 | } |
720 | |||
721 | /* | ||
722 | * It is not safe to access mlip after the AIL lock is dropped, so we | ||
723 | * must get a copy of li_lsn before we do so. This is especially | ||
724 | * important on 32-bit platforms where accessing and updating 64-bit | ||
725 | * values like li_lsn is not atomic. | ||
726 | */ | ||
727 | mlip = xfs_ail_min(ailp); | ||
728 | tail_lsn = mlip->li_lsn; | ||
729 | spin_unlock(&ailp->xa_lock); | ||
730 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
731 | } | 676 | } |
732 | 677 | ||
733 | /* | 678 | /* |
@@ -758,7 +703,6 @@ xfs_trans_ail_delete_bulk( | |||
758 | int nr_items) __releases(ailp->xa_lock) | 703 | int nr_items) __releases(ailp->xa_lock) |
759 | { | 704 | { |
760 | xfs_log_item_t *mlip; | 705 | xfs_log_item_t *mlip; |
761 | xfs_lsn_t tail_lsn; | ||
762 | int mlip_changed = 0; | 706 | int mlip_changed = 0; |
763 | int i; | 707 | int i; |
764 | 708 | ||
@@ -785,23 +729,12 @@ xfs_trans_ail_delete_bulk( | |||
785 | if (mlip == lip) | 729 | if (mlip == lip) |
786 | mlip_changed = 1; | 730 | mlip_changed = 1; |
787 | } | 731 | } |
732 | spin_unlock(&ailp->xa_lock); | ||
788 | 733 | ||
789 | if (!mlip_changed) { | 734 | if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { |
790 | spin_unlock(&ailp->xa_lock); | 735 | xlog_assign_tail_lsn(ailp->xa_mount); |
791 | return; | 736 | xfs_log_space_wake(ailp->xa_mount); |
792 | } | 737 | } |
793 | |||
794 | /* | ||
795 | * It is not safe to access mlip after the AIL lock is dropped, so we | ||
796 | * must get a copy of li_lsn before we do so. This is especially | ||
797 | * important on 32-bit platforms where accessing and updating 64-bit | ||
798 | * values like li_lsn is not atomic. It is possible we've emptied the | ||
799 | * AIL here, so if that is the case, pass an LSN of 0 to the tail move. | ||
800 | */ | ||
801 | mlip = xfs_ail_min(ailp); | ||
802 | tail_lsn = mlip ? mlip->li_lsn : 0; | ||
803 | spin_unlock(&ailp->xa_lock); | ||
804 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
805 | } | 738 | } |
806 | 739 | ||
807 | /* | 740 | /* |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 475a4ded4f41..1302d1d95a58 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -463,19 +463,7 @@ xfs_trans_brelse(xfs_trans_t *tp, | |||
463 | * Default to a normal brelse() call if the tp is NULL. | 463 | * Default to a normal brelse() call if the tp is NULL. |
464 | */ | 464 | */ |
465 | if (tp == NULL) { | 465 | if (tp == NULL) { |
466 | struct xfs_log_item *lip = bp->b_fspriv; | ||
467 | |||
468 | ASSERT(bp->b_transp == NULL); | 466 | ASSERT(bp->b_transp == NULL); |
469 | |||
470 | /* | ||
471 | * If there's a buf log item attached to the buffer, | ||
472 | * then let the AIL know that the buffer is being | ||
473 | * unlocked. | ||
474 | */ | ||
475 | if (lip != NULL && lip->li_type == XFS_LI_BUF) { | ||
476 | bip = bp->b_fspriv; | ||
477 | xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip); | ||
478 | } | ||
479 | xfs_buf_relse(bp); | 467 | xfs_buf_relse(bp); |
480 | return; | 468 | return; |
481 | } | 469 | } |
@@ -550,21 +538,10 @@ xfs_trans_brelse(xfs_trans_t *tp, | |||
550 | ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); | 538 | ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); |
551 | ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF)); | 539 | ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF)); |
552 | xfs_buf_item_relse(bp); | 540 | xfs_buf_item_relse(bp); |
553 | bip = NULL; | ||
554 | } | ||
555 | bp->b_transp = NULL; | ||
556 | |||
557 | /* | ||
558 | * If we've still got a buf log item on the buffer, then | ||
559 | * tell the AIL that the buffer is being unlocked. | ||
560 | */ | ||
561 | if (bip != NULL) { | ||
562 | xfs_trans_unlocked_item(bip->bli_item.li_ailp, | ||
563 | (xfs_log_item_t*)bip); | ||
564 | } | 541 | } |
565 | 542 | ||
543 | bp->b_transp = NULL; | ||
566 | xfs_buf_relse(bp); | 544 | xfs_buf_relse(bp); |
567 | return; | ||
568 | } | 545 | } |
569 | 546 | ||
570 | /* | 547 | /* |
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index c4ba366d24e6..279099717ed2 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c | |||
@@ -605,7 +605,7 @@ xfs_trans_dqresv( | |||
605 | time_t timer; | 605 | time_t timer; |
606 | xfs_qwarncnt_t warns; | 606 | xfs_qwarncnt_t warns; |
607 | xfs_qwarncnt_t warnlimit; | 607 | xfs_qwarncnt_t warnlimit; |
608 | xfs_qcnt_t count; | 608 | xfs_qcnt_t total_count; |
609 | xfs_qcnt_t *resbcountp; | 609 | xfs_qcnt_t *resbcountp; |
610 | xfs_quotainfo_t *q = mp->m_quotainfo; | 610 | xfs_quotainfo_t *q = mp->m_quotainfo; |
611 | 611 | ||
@@ -648,13 +648,12 @@ xfs_trans_dqresv( | |||
648 | * hardlimit or exceed the timelimit if we allocate | 648 | * hardlimit or exceed the timelimit if we allocate |
649 | * nblks. | 649 | * nblks. |
650 | */ | 650 | */ |
651 | if (hardlimit > 0ULL && | 651 | total_count = *resbcountp + nblks; |
652 | hardlimit < nblks + *resbcountp) { | 652 | if (hardlimit && total_count > hardlimit) { |
653 | xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); | 653 | xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); |
654 | goto error_return; | 654 | goto error_return; |
655 | } | 655 | } |
656 | if (softlimit > 0ULL && | 656 | if (softlimit && total_count > softlimit) { |
657 | softlimit < nblks + *resbcountp) { | ||
658 | if ((timer != 0 && get_seconds() > timer) || | 657 | if ((timer != 0 && get_seconds() > timer) || |
659 | (warns != 0 && warns >= warnlimit)) { | 658 | (warns != 0 && warns >= warnlimit)) { |
660 | xfs_quota_warn(mp, dqp, | 659 | xfs_quota_warn(mp, dqp, |
@@ -666,7 +665,7 @@ xfs_trans_dqresv( | |||
666 | } | 665 | } |
667 | } | 666 | } |
668 | if (ninos > 0) { | 667 | if (ninos > 0) { |
669 | count = be64_to_cpu(dqp->q_core.d_icount); | 668 | total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos; |
670 | timer = be32_to_cpu(dqp->q_core.d_itimer); | 669 | timer = be32_to_cpu(dqp->q_core.d_itimer); |
671 | warns = be16_to_cpu(dqp->q_core.d_iwarns); | 670 | warns = be16_to_cpu(dqp->q_core.d_iwarns); |
672 | warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; | 671 | warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; |
@@ -677,13 +676,11 @@ xfs_trans_dqresv( | |||
677 | if (!softlimit) | 676 | if (!softlimit) |
678 | softlimit = q->qi_isoftlimit; | 677 | softlimit = q->qi_isoftlimit; |
679 | 678 | ||
680 | if (hardlimit > 0ULL && | 679 | if (hardlimit && total_count > hardlimit) { |
681 | hardlimit < ninos + count) { | ||
682 | xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); | 680 | xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); |
683 | goto error_return; | 681 | goto error_return; |
684 | } | 682 | } |
685 | if (softlimit > 0ULL && | 683 | if (softlimit && total_count > softlimit) { |
686 | softlimit < ninos + count) { | ||
687 | if ((timer != 0 && get_seconds() > timer) || | 684 | if ((timer != 0 && get_seconds() > timer) || |
688 | (warns != 0 && warns >= warnlimit)) { | 685 | (warns != 0 && warns >= warnlimit)) { |
689 | xfs_quota_warn(mp, dqp, | 686 | xfs_quota_warn(mp, dqp, |
@@ -878,7 +875,7 @@ STATIC void | |||
878 | xfs_trans_alloc_dqinfo( | 875 | xfs_trans_alloc_dqinfo( |
879 | xfs_trans_t *tp) | 876 | xfs_trans_t *tp) |
880 | { | 877 | { |
881 | tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); | 878 | tp->t_dqinfo = kmem_zone_zalloc(xfs_qm_dqtrxzone, KM_SLEEP); |
882 | } | 879 | } |
883 | 880 | ||
884 | void | 881 | void |
@@ -887,6 +884,6 @@ xfs_trans_free_dqinfo( | |||
887 | { | 884 | { |
888 | if (!tp->t_dqinfo) | 885 | if (!tp->t_dqinfo) |
889 | return; | 886 | return; |
890 | kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo); | 887 | kmem_zone_free(xfs_qm_dqtrxzone, tp->t_dqinfo); |
891 | tp->t_dqinfo = NULL; | 888 | tp->t_dqinfo = NULL; |
892 | } | 889 | } |
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 32f0288ae10f..7a7442c03f2b 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
@@ -95,10 +95,14 @@ xfs_trans_ichgtime( | |||
95 | if ((flags & XFS_ICHGTIME_MOD) && | 95 | if ((flags & XFS_ICHGTIME_MOD) && |
96 | !timespec_equal(&inode->i_mtime, &tv)) { | 96 | !timespec_equal(&inode->i_mtime, &tv)) { |
97 | inode->i_mtime = tv; | 97 | inode->i_mtime = tv; |
98 | ip->i_d.di_mtime.t_sec = tv.tv_sec; | ||
99 | ip->i_d.di_mtime.t_nsec = tv.tv_nsec; | ||
98 | } | 100 | } |
99 | if ((flags & XFS_ICHGTIME_CHG) && | 101 | if ((flags & XFS_ICHGTIME_CHG) && |
100 | !timespec_equal(&inode->i_ctime, &tv)) { | 102 | !timespec_equal(&inode->i_ctime, &tv)) { |
101 | inode->i_ctime = tv; | 103 | inode->i_ctime = tv; |
104 | ip->i_d.di_ctime.t_sec = tv.tv_sec; | ||
105 | ip->i_d.di_ctime.t_nsec = tv.tv_nsec; | ||
102 | } | 106 | } |
103 | } | 107 | } |
104 | 108 | ||
@@ -126,12 +130,12 @@ xfs_trans_log_inode( | |||
126 | /* | 130 | /* |
127 | * Always OR in the bits from the ili_last_fields field. | 131 | * Always OR in the bits from the ili_last_fields field. |
128 | * This is to coordinate with the xfs_iflush() and xfs_iflush_done() | 132 | * This is to coordinate with the xfs_iflush() and xfs_iflush_done() |
129 | * routines in the eventual clearing of the ilf_fields bits. | 133 | * routines in the eventual clearing of the ili_fields bits. |
130 | * See the big comment in xfs_iflush() for an explanation of | 134 | * See the big comment in xfs_iflush() for an explanation of |
131 | * this coordination mechanism. | 135 | * this coordination mechanism. |
132 | */ | 136 | */ |
133 | flags |= ip->i_itemp->ili_last_fields; | 137 | flags |= ip->i_itemp->ili_last_fields; |
134 | ip->i_itemp->ili_format.ilf_fields |= flags; | 138 | ip->i_itemp->ili_fields |= flags; |
135 | } | 139 | } |
136 | 140 | ||
137 | #ifdef XFS_TRANS_DEBUG | 141 | #ifdef XFS_TRANS_DEBUG |
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 44820b9fcb43..8ab2ced415f1 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -104,9 +104,6 @@ void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); | |||
104 | void xfs_ail_push_all(struct xfs_ail *); | 104 | void xfs_ail_push_all(struct xfs_ail *); |
105 | xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); | 105 | xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); |
106 | 106 | ||
107 | void xfs_trans_unlocked_item(struct xfs_ail *, | ||
108 | xfs_log_item_t *); | ||
109 | |||
110 | struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, | 107 | struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, |
111 | struct xfs_ail_cursor *cur, | 108 | struct xfs_ail_cursor *cur, |
112 | xfs_lsn_t lsn); | 109 | xfs_lsn_t lsn); |
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h index 7c220b4227bc..db14d0c08682 100644 --- a/fs/xfs/xfs_vnode.h +++ b/fs/xfs/xfs_vnode.h | |||
@@ -22,7 +22,6 @@ | |||
22 | 22 | ||
23 | struct file; | 23 | struct file; |
24 | struct xfs_inode; | 24 | struct xfs_inode; |
25 | struct xfs_iomap; | ||
26 | struct attrlist_cursor_kern; | 25 | struct attrlist_cursor_kern; |
27 | 26 | ||
28 | /* | 27 | /* |
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 0c877cbde142..447e146b2ba6 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h | |||
@@ -10,7 +10,6 @@ struct kiocb; | |||
10 | struct pipe_inode_info; | 10 | struct pipe_inode_info; |
11 | struct uio; | 11 | struct uio; |
12 | struct xfs_inode; | 12 | struct xfs_inode; |
13 | struct xfs_iomap; | ||
14 | 13 | ||
15 | 14 | ||
16 | int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags); | 15 | int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags); |
@@ -49,8 +48,6 @@ int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, | |||
49 | int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); | 48 | int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); |
50 | int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, | 49 | int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, |
51 | int flags, struct attrlist_cursor_kern *cursor); | 50 | int flags, struct attrlist_cursor_kern *cursor); |
52 | int xfs_bmap(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, | ||
53 | int flags, struct xfs_iomap *iomapp, int *niomaps); | ||
54 | void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first, | 51 | void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first, |
55 | xfs_off_t last, int fiopt); | 52 | xfs_off_t last, int fiopt); |
56 | int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first, | 53 | int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first, |