diff options
Diffstat (limited to 'fs')
115 files changed, 1740 insertions, 996 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 358563689064..6406f896bf95 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c | |||
@@ -242,7 +242,8 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) | |||
242 | } | 242 | } |
243 | kfree(wnames); | 243 | kfree(wnames); |
244 | fid_out: | 244 | fid_out: |
245 | v9fs_fid_add(dentry, fid); | 245 | if (!IS_ERR(fid)) |
246 | v9fs_fid_add(dentry, fid); | ||
246 | err_out: | 247 | err_out: |
247 | up_read(&v9ses->rename_sem); | 248 | up_read(&v9ses->rename_sem); |
248 | return fid; | 249 | return fid; |
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 16c8a2a98c1b..899f168fd19c 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
@@ -292,9 +292,11 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) | |||
292 | 292 | ||
293 | fid = filp->private_data; | 293 | fid = filp->private_data; |
294 | P9_DPRINTK(P9_DEBUG_VFS, | 294 | P9_DPRINTK(P9_DEBUG_VFS, |
295 | "inode: %p filp: %p fid: %d\n", inode, filp, fid->fid); | 295 | "v9fs_dir_release: inode: %p filp: %p fid: %d\n", |
296 | inode, filp, fid ? fid->fid : -1); | ||
296 | filemap_write_and_wait(inode->i_mapping); | 297 | filemap_write_and_wait(inode->i_mapping); |
297 | p9_client_clunk(fid); | 298 | if (fid) |
299 | p9_client_clunk(fid); | ||
298 | return 0; | 300 | return 0; |
299 | } | 301 | } |
300 | 302 | ||
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index c7c23eab9440..9e670d527646 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -730,7 +730,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode, | |||
730 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); | 730 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); |
731 | goto error; | 731 | goto error; |
732 | } | 732 | } |
733 | dentry->d_op = &v9fs_cached_dentry_operations; | 733 | if (v9ses->cache) |
734 | dentry->d_op = &v9fs_cached_dentry_operations; | ||
735 | else | ||
736 | dentry->d_op = &v9fs_dentry_operations; | ||
734 | d_instantiate(dentry, inode); | 737 | d_instantiate(dentry, inode); |
735 | err = v9fs_fid_add(dentry, fid); | 738 | err = v9fs_fid_add(dentry, fid); |
736 | if (err < 0) | 739 | if (err < 0) |
@@ -1128,6 +1131,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
1128 | v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); | 1131 | v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); |
1129 | generic_fillattr(dentry->d_inode, stat); | 1132 | generic_fillattr(dentry->d_inode, stat); |
1130 | 1133 | ||
1134 | p9stat_free(st); | ||
1131 | kfree(st); | 1135 | kfree(st); |
1132 | return 0; | 1136 | return 0; |
1133 | } | 1137 | } |
@@ -1489,6 +1493,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) | |||
1489 | 1493 | ||
1490 | retval = strnlen(buffer, buflen); | 1494 | retval = strnlen(buffer, buflen); |
1491 | done: | 1495 | done: |
1496 | p9stat_free(st); | ||
1492 | kfree(st); | 1497 | kfree(st); |
1493 | return retval; | 1498 | return retval; |
1494 | } | 1499 | } |
@@ -1942,7 +1947,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = { | |||
1942 | .unlink = v9fs_vfs_unlink, | 1947 | .unlink = v9fs_vfs_unlink, |
1943 | .mkdir = v9fs_vfs_mkdir, | 1948 | .mkdir = v9fs_vfs_mkdir, |
1944 | .rmdir = v9fs_vfs_rmdir, | 1949 | .rmdir = v9fs_vfs_rmdir, |
1945 | .mknod = v9fs_vfs_mknod_dotl, | 1950 | .mknod = v9fs_vfs_mknod, |
1946 | .rename = v9fs_vfs_rename, | 1951 | .rename = v9fs_vfs_rename, |
1947 | .getattr = v9fs_vfs_getattr, | 1952 | .getattr = v9fs_vfs_getattr, |
1948 | .setattr = v9fs_vfs_setattr, | 1953 | .setattr = v9fs_vfs_setattr, |
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index f9311077de68..1d12ba0ed3db 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -122,6 +122,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
122 | fid = v9fs_session_init(v9ses, dev_name, data); | 122 | fid = v9fs_session_init(v9ses, dev_name, data); |
123 | if (IS_ERR(fid)) { | 123 | if (IS_ERR(fid)) { |
124 | retval = PTR_ERR(fid); | 124 | retval = PTR_ERR(fid); |
125 | /* | ||
126 | * we need to call session_close to tear down some | ||
127 | * of the data structure setup by session_init | ||
128 | */ | ||
125 | goto close_session; | 129 | goto close_session; |
126 | } | 130 | } |
127 | 131 | ||
@@ -144,7 +148,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
144 | retval = -ENOMEM; | 148 | retval = -ENOMEM; |
145 | goto release_sb; | 149 | goto release_sb; |
146 | } | 150 | } |
147 | |||
148 | sb->s_root = root; | 151 | sb->s_root = root; |
149 | 152 | ||
150 | if (v9fs_proto_dotl(v9ses)) { | 153 | if (v9fs_proto_dotl(v9ses)) { |
@@ -152,7 +155,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
152 | st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); | 155 | st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); |
153 | if (IS_ERR(st)) { | 156 | if (IS_ERR(st)) { |
154 | retval = PTR_ERR(st); | 157 | retval = PTR_ERR(st); |
155 | goto clunk_fid; | 158 | goto release_sb; |
156 | } | 159 | } |
157 | 160 | ||
158 | v9fs_stat2inode_dotl(st, root->d_inode); | 161 | v9fs_stat2inode_dotl(st, root->d_inode); |
@@ -162,7 +165,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
162 | st = p9_client_stat(fid); | 165 | st = p9_client_stat(fid); |
163 | if (IS_ERR(st)) { | 166 | if (IS_ERR(st)) { |
164 | retval = PTR_ERR(st); | 167 | retval = PTR_ERR(st); |
165 | goto clunk_fid; | 168 | goto release_sb; |
166 | } | 169 | } |
167 | 170 | ||
168 | root->d_inode->i_ino = v9fs_qid2ino(&st->qid); | 171 | root->d_inode->i_ino = v9fs_qid2ino(&st->qid); |
@@ -174,19 +177,24 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
174 | 177 | ||
175 | v9fs_fid_add(root, fid); | 178 | v9fs_fid_add(root, fid); |
176 | 179 | ||
177 | P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); | 180 | P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); |
178 | simple_set_mnt(mnt, sb); | 181 | simple_set_mnt(mnt, sb); |
179 | return 0; | 182 | return 0; |
180 | 183 | ||
181 | clunk_fid: | 184 | clunk_fid: |
182 | p9_client_clunk(fid); | 185 | p9_client_clunk(fid); |
183 | |||
184 | close_session: | 186 | close_session: |
185 | v9fs_session_close(v9ses); | 187 | v9fs_session_close(v9ses); |
186 | kfree(v9ses); | 188 | kfree(v9ses); |
187 | return retval; | 189 | return retval; |
188 | |||
189 | release_sb: | 190 | release_sb: |
191 | /* | ||
192 | * we will do the session_close and root dentry release | ||
193 | * in the below call. But we need to clunk fid, because we haven't | ||
194 | * attached the fid to dentry so it won't get clunked | ||
195 | * automatically. | ||
196 | */ | ||
197 | p9_client_clunk(fid); | ||
190 | deactivate_locked_super(sb); | 198 | deactivate_locked_super(sb); |
191 | return retval; | 199 | return retval; |
192 | } | 200 | } |
@@ -712,8 +712,16 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) | |||
712 | */ | 712 | */ |
713 | ret = retry(iocb); | 713 | ret = retry(iocb); |
714 | 714 | ||
715 | if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) | 715 | if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { |
716 | /* | ||
717 | * There's no easy way to restart the syscall since other AIO's | ||
718 | * may be already running. Just fail this IO with EINTR. | ||
719 | */ | ||
720 | if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR || | ||
721 | ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK)) | ||
722 | ret = -EINTR; | ||
716 | aio_complete(iocb, ret, 0); | 723 | aio_complete(iocb, ret, 0); |
724 | } | ||
717 | out: | 725 | out: |
718 | spin_lock_irq(&ctx->ctx_lock); | 726 | spin_lock_irq(&ctx->ctx_lock); |
719 | 727 | ||
@@ -1659,6 +1667,9 @@ long do_io_submit(aio_context_t ctx_id, long nr, | |||
1659 | if (unlikely(nr < 0)) | 1667 | if (unlikely(nr < 0)) |
1660 | return -EINVAL; | 1668 | return -EINVAL; |
1661 | 1669 | ||
1670 | if (unlikely(nr > LONG_MAX/sizeof(*iocbpp))) | ||
1671 | nr = LONG_MAX/sizeof(*iocbpp); | ||
1672 | |||
1662 | if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp))))) | 1673 | if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp))))) |
1663 | return -EFAULT; | 1674 | return -EFAULT; |
1664 | 1675 | ||
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index f96eff04e11a..a6395bdb26ae 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c | |||
@@ -134,10 +134,6 @@ static int aout_core_dump(struct coredump_params *cprm) | |||
134 | if (!dump_write(file, dump_start, dump_size)) | 134 | if (!dump_write(file, dump_start, dump_size)) |
135 | goto end_coredump; | 135 | goto end_coredump; |
136 | } | 136 | } |
137 | /* Finally dump the task struct. Not be used by gdb, but could be useful */ | ||
138 | set_fs(KERNEL_DS); | ||
139 | if (!dump_write(file, current, sizeof(*current))) | ||
140 | goto end_coredump; | ||
141 | end_coredump: | 137 | end_coredump: |
142 | set_fs(fs); | 138 | set_fs(fs); |
143 | return has_dumped; | 139 | return has_dumped; |
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index a7528b913936..fd0cc0bf9a40 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -724,7 +724,7 @@ static int __init init_misc_binfmt(void) | |||
724 | { | 724 | { |
725 | int err = register_filesystem(&bm_fs_type); | 725 | int err = register_filesystem(&bm_fs_type); |
726 | if (!err) { | 726 | if (!err) { |
727 | err = register_binfmt(&misc_format); | 727 | err = insert_binfmt(&misc_format); |
728 | if (err) | 728 | if (err) |
729 | unregister_filesystem(&bm_fs_type); | 729 | unregister_filesystem(&bm_fs_type); |
730 | } | 730 | } |
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 612a5c38d3c1..4d0ff5ee27b8 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
@@ -413,10 +413,10 @@ int bio_integrity_prep(struct bio *bio) | |||
413 | 413 | ||
414 | /* Allocate kernel buffer for protection data */ | 414 | /* Allocate kernel buffer for protection data */ |
415 | len = sectors * blk_integrity_tuple_size(bi); | 415 | len = sectors * blk_integrity_tuple_size(bi); |
416 | buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp); | 416 | buf = kmalloc(len, GFP_NOIO | q->bounce_gfp); |
417 | if (unlikely(buf == NULL)) { | 417 | if (unlikely(buf == NULL)) { |
418 | printk(KERN_ERR "could not allocate integrity buffer\n"); | 418 | printk(KERN_ERR "could not allocate integrity buffer\n"); |
419 | return -EIO; | 419 | return -ENOMEM; |
420 | } | 420 | } |
421 | 421 | ||
422 | end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 422 | end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index bc87b9c1d27e..0fcd2640c23f 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig | |||
@@ -3,6 +3,7 @@ config CEPH_FS | |||
3 | depends on INET && EXPERIMENTAL | 3 | depends on INET && EXPERIMENTAL |
4 | select LIBCRC32C | 4 | select LIBCRC32C |
5 | select CRYPTO_AES | 5 | select CRYPTO_AES |
6 | select CRYPTO | ||
6 | help | 7 | help |
7 | Choose Y or M here to include support for mounting the | 8 | Choose Y or M here to include support for mounting the |
8 | experimental Ceph distributed file system. Ceph is an extremely | 9 | experimental Ceph distributed file system. Ceph is an extremely |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 5598a0d02295..efbc604001c8 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page) | |||
87 | 87 | ||
88 | /* dirty the head */ | 88 | /* dirty the head */ |
89 | spin_lock(&inode->i_lock); | 89 | spin_lock(&inode->i_lock); |
90 | if (ci->i_wrbuffer_ref_head == 0) | 90 | if (ci->i_head_snapc == NULL) |
91 | ci->i_head_snapc = ceph_get_snap_context(snapc); | 91 | ci->i_head_snapc = ceph_get_snap_context(snapc); |
92 | ++ci->i_wrbuffer_ref_head; | 92 | ++ci->i_wrbuffer_ref_head; |
93 | if (ci->i_wrbuffer_ref == 0) | 93 | if (ci->i_wrbuffer_ref == 0) |
@@ -105,13 +105,7 @@ static int ceph_set_page_dirty(struct page *page) | |||
105 | spin_lock_irq(&mapping->tree_lock); | 105 | spin_lock_irq(&mapping->tree_lock); |
106 | if (page->mapping) { /* Race with truncate? */ | 106 | if (page->mapping) { /* Race with truncate? */ |
107 | WARN_ON_ONCE(!PageUptodate(page)); | 107 | WARN_ON_ONCE(!PageUptodate(page)); |
108 | 108 | account_page_dirtied(page, page->mapping); | |
109 | if (mapping_cap_account_dirty(mapping)) { | ||
110 | __inc_zone_page_state(page, NR_FILE_DIRTY); | ||
111 | __inc_bdi_stat(mapping->backing_dev_info, | ||
112 | BDI_RECLAIMABLE); | ||
113 | task_io_account_write(PAGE_CACHE_SIZE); | ||
114 | } | ||
115 | radix_tree_tag_set(&mapping->page_tree, | 109 | radix_tree_tag_set(&mapping->page_tree, |
116 | page_index(page), PAGECACHE_TAG_DIRTY); | 110 | page_index(page), PAGECACHE_TAG_DIRTY); |
117 | 111 | ||
@@ -352,7 +346,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode, | |||
352 | break; | 346 | break; |
353 | } | 347 | } |
354 | } | 348 | } |
355 | if (!snapc && ci->i_head_snapc) { | 349 | if (!snapc && ci->i_wrbuffer_ref_head) { |
356 | snapc = ceph_get_snap_context(ci->i_head_snapc); | 350 | snapc = ceph_get_snap_context(ci->i_head_snapc); |
357 | dout(" head snapc %p has %d dirty pages\n", | 351 | dout(" head snapc %p has %d dirty pages\n", |
358 | snapc, ci->i_wrbuffer_ref_head); | 352 | snapc, ci->i_wrbuffer_ref_head); |
@@ -417,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
417 | if (i_size < page_off + len) | 411 | if (i_size < page_off + len) |
418 | len = i_size - page_off; | 412 | len = i_size - page_off; |
419 | 413 | ||
420 | dout("writepage %p page %p index %lu on %llu~%u\n", | 414 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", |
421 | inode, page, page->index, page_off, len); | 415 | inode, page, page->index, page_off, len, snapc); |
422 | 416 | ||
423 | writeback_stat = atomic_long_inc_return(&client->writeback_count); | 417 | writeback_stat = atomic_long_inc_return(&client->writeback_count); |
424 | if (writeback_stat > | 418 | if (writeback_stat > |
@@ -772,7 +766,8 @@ get_more_pages: | |||
772 | /* ok */ | 766 | /* ok */ |
773 | if (locked_pages == 0) { | 767 | if (locked_pages == 0) { |
774 | /* prepare async write request */ | 768 | /* prepare async write request */ |
775 | offset = page->index << PAGE_CACHE_SHIFT; | 769 | offset = (unsigned long long)page->index |
770 | << PAGE_CACHE_SHIFT; | ||
776 | len = wsize; | 771 | len = wsize; |
777 | req = ceph_osdc_new_request(&client->osdc, | 772 | req = ceph_osdc_new_request(&client->osdc, |
778 | &ci->i_layout, | 773 | &ci->i_layout, |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 582e0b2caf8a..a2d002cbdec2 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
@@ -376,7 +376,7 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed) | |||
376 | 376 | ||
377 | th = get_ticket_handler(ac, service); | 377 | th = get_ticket_handler(ac, service); |
378 | 378 | ||
379 | if (!th) { | 379 | if (IS_ERR(th)) { |
380 | *pneed |= service; | 380 | *pneed |= service; |
381 | continue; | 381 | continue; |
382 | } | 382 | } |
@@ -399,6 +399,9 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, | |||
399 | struct ceph_x_ticket_handler *th = | 399 | struct ceph_x_ticket_handler *th = |
400 | get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); | 400 | get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); |
401 | 401 | ||
402 | if (IS_ERR(th)) | ||
403 | return PTR_ERR(th); | ||
404 | |||
402 | ceph_x_validate_tickets(ac, &need); | 405 | ceph_x_validate_tickets(ac, &need); |
403 | 406 | ||
404 | dout("build_request want %x have %x need %x\n", | 407 | dout("build_request want %x have %x need %x\n", |
@@ -450,7 +453,6 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, | |||
450 | return -ERANGE; | 453 | return -ERANGE; |
451 | head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY); | 454 | head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY); |
452 | 455 | ||
453 | BUG_ON(!th); | ||
454 | ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer); | 456 | ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer); |
455 | if (ret) | 457 | if (ret) |
456 | return ret; | 458 | return ret; |
@@ -505,7 +507,8 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, | |||
505 | 507 | ||
506 | case CEPHX_GET_PRINCIPAL_SESSION_KEY: | 508 | case CEPHX_GET_PRINCIPAL_SESSION_KEY: |
507 | th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); | 509 | th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); |
508 | BUG_ON(!th); | 510 | if (IS_ERR(th)) |
511 | return PTR_ERR(th); | ||
509 | ret = ceph_x_proc_ticket_reply(ac, &th->session_key, | 512 | ret = ceph_x_proc_ticket_reply(ac, &th->session_key, |
510 | buf + sizeof(*head), end); | 513 | buf + sizeof(*head), end); |
511 | break; | 514 | break; |
@@ -563,8 +566,8 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, | |||
563 | void *end = p + sizeof(au->reply_buf); | 566 | void *end = p + sizeof(au->reply_buf); |
564 | 567 | ||
565 | th = get_ticket_handler(ac, au->service); | 568 | th = get_ticket_handler(ac, au->service); |
566 | if (!th) | 569 | if (IS_ERR(th)) |
567 | return -EIO; /* hrm! */ | 570 | return PTR_ERR(th); |
568 | ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); | 571 | ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); |
569 | if (ret < 0) | 572 | if (ret < 0) |
570 | return ret; | 573 | return ret; |
@@ -626,7 +629,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, | |||
626 | struct ceph_x_ticket_handler *th; | 629 | struct ceph_x_ticket_handler *th; |
627 | 630 | ||
628 | th = get_ticket_handler(ac, peer_type); | 631 | th = get_ticket_handler(ac, peer_type); |
629 | if (th && !IS_ERR(th)) | 632 | if (!IS_ERR(th)) |
630 | remove_ticket_handler(ac, th); | 633 | remove_ticket_handler(ac, th); |
631 | } | 634 | } |
632 | 635 | ||
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7bf182b03973..5e9da996a151 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci) | |||
814 | used |= CEPH_CAP_PIN; | 814 | used |= CEPH_CAP_PIN; |
815 | if (ci->i_rd_ref) | 815 | if (ci->i_rd_ref) |
816 | used |= CEPH_CAP_FILE_RD; | 816 | used |= CEPH_CAP_FILE_RD; |
817 | if (ci->i_rdcache_ref || ci->i_rdcache_gen) | 817 | if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages) |
818 | used |= CEPH_CAP_FILE_CACHE; | 818 | used |= CEPH_CAP_FILE_CACHE; |
819 | if (ci->i_wr_ref) | 819 | if (ci->i_wr_ref) |
820 | used |= CEPH_CAP_FILE_WR; | 820 | used |= CEPH_CAP_FILE_WR; |
@@ -1082,6 +1082,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1082 | gid_t gid; | 1082 | gid_t gid; |
1083 | struct ceph_mds_session *session; | 1083 | struct ceph_mds_session *session; |
1084 | u64 xattr_version = 0; | 1084 | u64 xattr_version = 0; |
1085 | struct ceph_buffer *xattr_blob = NULL; | ||
1085 | int delayed = 0; | 1086 | int delayed = 0; |
1086 | u64 flush_tid = 0; | 1087 | u64 flush_tid = 0; |
1087 | int i; | 1088 | int i; |
@@ -1142,6 +1143,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1142 | for (i = 0; i < CEPH_CAP_BITS; i++) | 1143 | for (i = 0; i < CEPH_CAP_BITS; i++) |
1143 | if (flushing & (1 << i)) | 1144 | if (flushing & (1 << i)) |
1144 | ci->i_cap_flush_tid[i] = flush_tid; | 1145 | ci->i_cap_flush_tid[i] = flush_tid; |
1146 | |||
1147 | follows = ci->i_head_snapc->seq; | ||
1148 | } else { | ||
1149 | follows = 0; | ||
1145 | } | 1150 | } |
1146 | 1151 | ||
1147 | keep = cap->implemented; | 1152 | keep = cap->implemented; |
@@ -1155,14 +1160,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1155 | mtime = inode->i_mtime; | 1160 | mtime = inode->i_mtime; |
1156 | atime = inode->i_atime; | 1161 | atime = inode->i_atime; |
1157 | time_warp_seq = ci->i_time_warp_seq; | 1162 | time_warp_seq = ci->i_time_warp_seq; |
1158 | follows = ci->i_snap_realm->cached_context->seq; | ||
1159 | uid = inode->i_uid; | 1163 | uid = inode->i_uid; |
1160 | gid = inode->i_gid; | 1164 | gid = inode->i_gid; |
1161 | mode = inode->i_mode; | 1165 | mode = inode->i_mode; |
1162 | 1166 | ||
1163 | if (dropping & CEPH_CAP_XATTR_EXCL) { | 1167 | if (flushing & CEPH_CAP_XATTR_EXCL) { |
1164 | __ceph_build_xattrs_blob(ci); | 1168 | __ceph_build_xattrs_blob(ci); |
1165 | xattr_version = ci->i_xattrs.version + 1; | 1169 | xattr_blob = ci->i_xattrs.blob; |
1170 | xattr_version = ci->i_xattrs.version; | ||
1166 | } | 1171 | } |
1167 | 1172 | ||
1168 | spin_unlock(&inode->i_lock); | 1173 | spin_unlock(&inode->i_lock); |
@@ -1170,9 +1175,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1170 | ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, | 1175 | ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, |
1171 | op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, | 1176 | op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, |
1172 | size, max_size, &mtime, &atime, time_warp_seq, | 1177 | size, max_size, &mtime, &atime, time_warp_seq, |
1173 | uid, gid, mode, | 1178 | uid, gid, mode, xattr_version, xattr_blob, |
1174 | xattr_version, | ||
1175 | (flushing & CEPH_CAP_XATTR_EXCL) ? ci->i_xattrs.blob : NULL, | ||
1176 | follows); | 1179 | follows); |
1177 | if (ret < 0) { | 1180 | if (ret < 0) { |
1178 | dout("error sending cap msg, must requeue %p\n", inode); | 1181 | dout("error sending cap msg, must requeue %p\n", inode); |
@@ -1192,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1192 | * asynchronously back to the MDS once sync writes complete and dirty | 1195 | * asynchronously back to the MDS once sync writes complete and dirty |
1193 | * data is written out. | 1196 | * data is written out. |
1194 | * | 1197 | * |
1198 | * Unless @again is true, skip cap_snaps that were already sent to | ||
1199 | * the MDS (i.e., during this session). | ||
1200 | * | ||
1195 | * Called under i_lock. Takes s_mutex as needed. | 1201 | * Called under i_lock. Takes s_mutex as needed. |
1196 | */ | 1202 | */ |
1197 | void __ceph_flush_snaps(struct ceph_inode_info *ci, | 1203 | void __ceph_flush_snaps(struct ceph_inode_info *ci, |
1198 | struct ceph_mds_session **psession) | 1204 | struct ceph_mds_session **psession, |
1205 | int again) | ||
1199 | __releases(ci->vfs_inode->i_lock) | 1206 | __releases(ci->vfs_inode->i_lock) |
1200 | __acquires(ci->vfs_inode->i_lock) | 1207 | __acquires(ci->vfs_inode->i_lock) |
1201 | { | 1208 | { |
@@ -1224,7 +1231,7 @@ retry: | |||
1224 | * pages to be written out. | 1231 | * pages to be written out. |
1225 | */ | 1232 | */ |
1226 | if (capsnap->dirty_pages || capsnap->writing) | 1233 | if (capsnap->dirty_pages || capsnap->writing) |
1227 | continue; | 1234 | break; |
1228 | 1235 | ||
1229 | /* | 1236 | /* |
1230 | * if cap writeback already occurred, we should have dropped | 1237 | * if cap writeback already occurred, we should have dropped |
@@ -1237,6 +1244,13 @@ retry: | |||
1237 | dout("no auth cap (migrating?), doing nothing\n"); | 1244 | dout("no auth cap (migrating?), doing nothing\n"); |
1238 | goto out; | 1245 | goto out; |
1239 | } | 1246 | } |
1247 | |||
1248 | /* only flush each capsnap once */ | ||
1249 | if (!again && !list_empty(&capsnap->flushing_item)) { | ||
1250 | dout("already flushed %p, skipping\n", capsnap); | ||
1251 | continue; | ||
1252 | } | ||
1253 | |||
1240 | mds = ci->i_auth_cap->session->s_mds; | 1254 | mds = ci->i_auth_cap->session->s_mds; |
1241 | mseq = ci->i_auth_cap->mseq; | 1255 | mseq = ci->i_auth_cap->mseq; |
1242 | 1256 | ||
@@ -1273,8 +1287,8 @@ retry: | |||
1273 | &session->s_cap_snaps_flushing); | 1287 | &session->s_cap_snaps_flushing); |
1274 | spin_unlock(&inode->i_lock); | 1288 | spin_unlock(&inode->i_lock); |
1275 | 1289 | ||
1276 | dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", | 1290 | dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", |
1277 | inode, capsnap, next_follows, capsnap->size); | 1291 | inode, capsnap, capsnap->follows, capsnap->flush_tid); |
1278 | send_cap_msg(session, ceph_vino(inode).ino, 0, | 1292 | send_cap_msg(session, ceph_vino(inode).ino, 0, |
1279 | CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, | 1293 | CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, |
1280 | capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, | 1294 | capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, |
@@ -1282,7 +1296,7 @@ retry: | |||
1282 | &capsnap->mtime, &capsnap->atime, | 1296 | &capsnap->mtime, &capsnap->atime, |
1283 | capsnap->time_warp_seq, | 1297 | capsnap->time_warp_seq, |
1284 | capsnap->uid, capsnap->gid, capsnap->mode, | 1298 | capsnap->uid, capsnap->gid, capsnap->mode, |
1285 | 0, NULL, | 1299 | capsnap->xattr_version, capsnap->xattr_blob, |
1286 | capsnap->follows); | 1300 | capsnap->follows); |
1287 | 1301 | ||
1288 | next_follows = capsnap->follows + 1; | 1302 | next_follows = capsnap->follows + 1; |
@@ -1311,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
1311 | struct inode *inode = &ci->vfs_inode; | 1325 | struct inode *inode = &ci->vfs_inode; |
1312 | 1326 | ||
1313 | spin_lock(&inode->i_lock); | 1327 | spin_lock(&inode->i_lock); |
1314 | __ceph_flush_snaps(ci, NULL); | 1328 | __ceph_flush_snaps(ci, NULL, 0); |
1315 | spin_unlock(&inode->i_lock); | 1329 | spin_unlock(&inode->i_lock); |
1316 | } | 1330 | } |
1317 | 1331 | ||
@@ -1332,7 +1346,11 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1332 | ceph_cap_string(was | mask)); | 1346 | ceph_cap_string(was | mask)); |
1333 | ci->i_dirty_caps |= mask; | 1347 | ci->i_dirty_caps |= mask; |
1334 | if (was == 0) { | 1348 | if (was == 0) { |
1335 | dout(" inode %p now dirty\n", &ci->vfs_inode); | 1349 | if (!ci->i_head_snapc) |
1350 | ci->i_head_snapc = ceph_get_snap_context( | ||
1351 | ci->i_snap_realm->cached_context); | ||
1352 | dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode, | ||
1353 | ci->i_head_snapc); | ||
1336 | BUG_ON(!list_empty(&ci->i_dirty_item)); | 1354 | BUG_ON(!list_empty(&ci->i_dirty_item)); |
1337 | spin_lock(&mdsc->cap_dirty_lock); | 1355 | spin_lock(&mdsc->cap_dirty_lock); |
1338 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); | 1356 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); |
@@ -1470,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, | |||
1470 | 1488 | ||
1471 | /* flush snaps first time around only */ | 1489 | /* flush snaps first time around only */ |
1472 | if (!list_empty(&ci->i_cap_snaps)) | 1490 | if (!list_empty(&ci->i_cap_snaps)) |
1473 | __ceph_flush_snaps(ci, &session); | 1491 | __ceph_flush_snaps(ci, &session, 0); |
1474 | goto retry_locked; | 1492 | goto retry_locked; |
1475 | retry: | 1493 | retry: |
1476 | spin_lock(&inode->i_lock); | 1494 | spin_lock(&inode->i_lock); |
@@ -1887,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, | |||
1887 | if (cap && cap->session == session) { | 1905 | if (cap && cap->session == session) { |
1888 | dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, | 1906 | dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, |
1889 | cap, capsnap); | 1907 | cap, capsnap); |
1890 | __ceph_flush_snaps(ci, &session); | 1908 | __ceph_flush_snaps(ci, &session, 1); |
1891 | } else { | 1909 | } else { |
1892 | pr_err("%p auth cap %p not mds%d ???\n", inode, | 1910 | pr_err("%p auth cap %p not mds%d ???\n", inode, |
1893 | cap, session->s_mds); | 1911 | cap, session->s_mds); |
@@ -2190,7 +2208,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2190 | 2208 | ||
2191 | if (ci->i_head_snapc == snapc) { | 2209 | if (ci->i_head_snapc == snapc) { |
2192 | ci->i_wrbuffer_ref_head -= nr; | 2210 | ci->i_wrbuffer_ref_head -= nr; |
2193 | if (!ci->i_wrbuffer_ref_head) { | 2211 | if (ci->i_wrbuffer_ref_head == 0 && |
2212 | ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) { | ||
2213 | BUG_ON(!ci->i_head_snapc); | ||
2194 | ceph_put_snap_context(ci->i_head_snapc); | 2214 | ceph_put_snap_context(ci->i_head_snapc); |
2195 | ci->i_head_snapc = NULL; | 2215 | ci->i_head_snapc = NULL; |
2196 | } | 2216 | } |
@@ -2263,7 +2283,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2263 | { | 2283 | { |
2264 | struct ceph_inode_info *ci = ceph_inode(inode); | 2284 | struct ceph_inode_info *ci = ceph_inode(inode); |
2265 | int mds = session->s_mds; | 2285 | int mds = session->s_mds; |
2266 | int seq = le32_to_cpu(grant->seq); | 2286 | unsigned seq = le32_to_cpu(grant->seq); |
2287 | unsigned issue_seq = le32_to_cpu(grant->issue_seq); | ||
2267 | int newcaps = le32_to_cpu(grant->caps); | 2288 | int newcaps = le32_to_cpu(grant->caps); |
2268 | int issued, implemented, used, wanted, dirty; | 2289 | int issued, implemented, used, wanted, dirty; |
2269 | u64 size = le64_to_cpu(grant->size); | 2290 | u64 size = le64_to_cpu(grant->size); |
@@ -2275,8 +2296,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2275 | int revoked_rdcache = 0; | 2296 | int revoked_rdcache = 0; |
2276 | int queue_invalidate = 0; | 2297 | int queue_invalidate = 0; |
2277 | 2298 | ||
2278 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", | 2299 | dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", |
2279 | inode, cap, mds, seq, ceph_cap_string(newcaps)); | 2300 | inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); |
2280 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, | 2301 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, |
2281 | inode->i_size); | 2302 | inode->i_size); |
2282 | 2303 | ||
@@ -2372,6 +2393,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2372 | } | 2393 | } |
2373 | 2394 | ||
2374 | cap->seq = seq; | 2395 | cap->seq = seq; |
2396 | cap->issue_seq = issue_seq; | ||
2375 | 2397 | ||
2376 | /* file layout may have changed */ | 2398 | /* file layout may have changed */ |
2377 | ci->i_layout = grant->layout; | 2399 | ci->i_layout = grant->layout; |
@@ -2483,6 +2505,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
2483 | dout(" inode %p now clean\n", inode); | 2505 | dout(" inode %p now clean\n", inode); |
2484 | BUG_ON(!list_empty(&ci->i_dirty_item)); | 2506 | BUG_ON(!list_empty(&ci->i_dirty_item)); |
2485 | drop = 1; | 2507 | drop = 1; |
2508 | if (ci->i_wrbuffer_ref_head == 0) { | ||
2509 | BUG_ON(!ci->i_head_snapc); | ||
2510 | ceph_put_snap_context(ci->i_head_snapc); | ||
2511 | ci->i_head_snapc = NULL; | ||
2512 | } | ||
2486 | } else { | 2513 | } else { |
2487 | BUG_ON(list_empty(&ci->i_dirty_item)); | 2514 | BUG_ON(list_empty(&ci->i_dirty_item)); |
2488 | } | 2515 | } |
@@ -2749,15 +2776,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2749 | if (op == CEPH_CAP_OP_IMPORT) | 2776 | if (op == CEPH_CAP_OP_IMPORT) |
2750 | __queue_cap_release(session, vino.ino, cap_id, | 2777 | __queue_cap_release(session, vino.ino, cap_id, |
2751 | mseq, seq); | 2778 | mseq, seq); |
2752 | 2779 | goto flush_cap_releases; | |
2753 | /* | ||
2754 | * send any full release message to try to move things | ||
2755 | * along for the mds (who clearly thinks we still have this | ||
2756 | * cap). | ||
2757 | */ | ||
2758 | ceph_add_cap_releases(mdsc, session); | ||
2759 | ceph_send_cap_releases(mdsc, session); | ||
2760 | goto done; | ||
2761 | } | 2780 | } |
2762 | 2781 | ||
2763 | /* these will work even if we don't have a cap yet */ | 2782 | /* these will work even if we don't have a cap yet */ |
@@ -2785,7 +2804,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2785 | dout(" no cap on %p ino %llx.%llx from mds%d\n", | 2804 | dout(" no cap on %p ino %llx.%llx from mds%d\n", |
2786 | inode, ceph_ino(inode), ceph_snap(inode), mds); | 2805 | inode, ceph_ino(inode), ceph_snap(inode), mds); |
2787 | spin_unlock(&inode->i_lock); | 2806 | spin_unlock(&inode->i_lock); |
2788 | goto done; | 2807 | goto flush_cap_releases; |
2789 | } | 2808 | } |
2790 | 2809 | ||
2791 | /* note that each of these drops i_lock for us */ | 2810 | /* note that each of these drops i_lock for us */ |
@@ -2809,6 +2828,17 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2809 | ceph_cap_op_name(op)); | 2828 | ceph_cap_op_name(op)); |
2810 | } | 2829 | } |
2811 | 2830 | ||
2831 | goto done; | ||
2832 | |||
2833 | flush_cap_releases: | ||
2834 | /* | ||
2835 | * send any full release message to try to move things | ||
2836 | * along for the mds (who clearly thinks we still have this | ||
2837 | * cap). | ||
2838 | */ | ||
2839 | ceph_add_cap_releases(mdsc, session); | ||
2840 | ceph_send_cap_releases(mdsc, session); | ||
2841 | |||
2812 | done: | 2842 | done: |
2813 | mutex_unlock(&session->s_mutex); | 2843 | mutex_unlock(&session->s_mutex); |
2814 | done_unlocked: | 2844 | done_unlocked: |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 360c4f22718d..6fd8b20a8611 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -171,6 +171,8 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
171 | } else if (req->r_dentry) { | 171 | } else if (req->r_dentry) { |
172 | path = ceph_mdsc_build_path(req->r_dentry, &pathlen, | 172 | path = ceph_mdsc_build_path(req->r_dentry, &pathlen, |
173 | &pathbase, 0); | 173 | &pathbase, 0); |
174 | if (IS_ERR(path)) | ||
175 | path = NULL; | ||
174 | spin_lock(&req->r_dentry->d_lock); | 176 | spin_lock(&req->r_dentry->d_lock); |
175 | seq_printf(s, " #%llx/%.*s (%s)", | 177 | seq_printf(s, " #%llx/%.*s (%s)", |
176 | ceph_ino(req->r_dentry->d_parent->d_inode), | 178 | ceph_ino(req->r_dentry->d_parent->d_inode), |
@@ -187,6 +189,8 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
187 | if (req->r_old_dentry) { | 189 | if (req->r_old_dentry) { |
188 | path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen, | 190 | path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen, |
189 | &pathbase, 0); | 191 | &pathbase, 0); |
192 | if (IS_ERR(path)) | ||
193 | path = NULL; | ||
190 | spin_lock(&req->r_old_dentry->d_lock); | 194 | spin_lock(&req->r_old_dentry->d_lock); |
191 | seq_printf(s, " #%llx/%.*s (%s)", | 195 | seq_printf(s, " #%llx/%.*s (%s)", |
192 | ceph_ino(req->r_old_dentry->d_parent->d_inode), | 196 | ceph_ino(req->r_old_dentry->d_parent->d_inode), |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 67bbb41d5526..a1986eb52045 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -46,7 +46,7 @@ int ceph_init_dentry(struct dentry *dentry) | |||
46 | else | 46 | else |
47 | dentry->d_op = &ceph_snap_dentry_ops; | 47 | dentry->d_op = &ceph_snap_dentry_ops; |
48 | 48 | ||
49 | di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS); | 49 | di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); |
50 | if (!di) | 50 | if (!di) |
51 | return -ENOMEM; /* oh well */ | 51 | return -ENOMEM; /* oh well */ |
52 | 52 | ||
@@ -1021,11 +1021,15 @@ out_touch: | |||
1021 | static void ceph_dentry_release(struct dentry *dentry) | 1021 | static void ceph_dentry_release(struct dentry *dentry) |
1022 | { | 1022 | { |
1023 | struct ceph_dentry_info *di = ceph_dentry(dentry); | 1023 | struct ceph_dentry_info *di = ceph_dentry(dentry); |
1024 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1024 | struct inode *parent_inode = NULL; |
1025 | u64 snapid = ceph_snap(parent_inode); | 1025 | u64 snapid = CEPH_NOSNAP; |
1026 | 1026 | ||
1027 | if (!IS_ROOT(dentry)) { | ||
1028 | parent_inode = dentry->d_parent->d_inode; | ||
1029 | if (parent_inode) | ||
1030 | snapid = ceph_snap(parent_inode); | ||
1031 | } | ||
1027 | dout("dentry_release %p parent %p\n", dentry, parent_inode); | 1032 | dout("dentry_release %p parent %p\n", dentry, parent_inode); |
1028 | |||
1029 | if (parent_inode && snapid != CEPH_SNAPDIR) { | 1033 | if (parent_inode && snapid != CEPH_SNAPDIR) { |
1030 | struct ceph_inode_info *ci = ceph_inode(parent_inode); | 1034 | struct ceph_inode_info *ci = ceph_inode(parent_inode); |
1031 | 1035 | ||
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 4480cb1c63e7..e38423e82f2e 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -42,32 +42,37 @@ struct ceph_nfs_confh { | |||
42 | static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, | 42 | static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, |
43 | int connectable) | 43 | int connectable) |
44 | { | 44 | { |
45 | int type; | ||
45 | struct ceph_nfs_fh *fh = (void *)rawfh; | 46 | struct ceph_nfs_fh *fh = (void *)rawfh; |
46 | struct ceph_nfs_confh *cfh = (void *)rawfh; | 47 | struct ceph_nfs_confh *cfh = (void *)rawfh; |
47 | struct dentry *parent = dentry->d_parent; | 48 | struct dentry *parent = dentry->d_parent; |
48 | struct inode *inode = dentry->d_inode; | 49 | struct inode *inode = dentry->d_inode; |
49 | int type; | 50 | int connected_handle_length = sizeof(*cfh)/4; |
51 | int handle_length = sizeof(*fh)/4; | ||
50 | 52 | ||
51 | /* don't re-export snaps */ | 53 | /* don't re-export snaps */ |
52 | if (ceph_snap(inode) != CEPH_NOSNAP) | 54 | if (ceph_snap(inode) != CEPH_NOSNAP) |
53 | return -EINVAL; | 55 | return -EINVAL; |
54 | 56 | ||
55 | if (*max_len >= sizeof(*cfh)) { | 57 | if (*max_len >= connected_handle_length) { |
56 | dout("encode_fh %p connectable\n", dentry); | 58 | dout("encode_fh %p connectable\n", dentry); |
57 | cfh->ino = ceph_ino(dentry->d_inode); | 59 | cfh->ino = ceph_ino(dentry->d_inode); |
58 | cfh->parent_ino = ceph_ino(parent->d_inode); | 60 | cfh->parent_ino = ceph_ino(parent->d_inode); |
59 | cfh->parent_name_hash = parent->d_name.hash; | 61 | cfh->parent_name_hash = parent->d_name.hash; |
60 | *max_len = sizeof(*cfh); | 62 | *max_len = connected_handle_length; |
61 | type = 2; | 63 | type = 2; |
62 | } else if (*max_len > sizeof(*fh)) { | 64 | } else if (*max_len >= handle_length) { |
63 | if (connectable) | 65 | if (connectable) { |
64 | return -ENOSPC; | 66 | *max_len = connected_handle_length; |
67 | return 255; | ||
68 | } | ||
65 | dout("encode_fh %p\n", dentry); | 69 | dout("encode_fh %p\n", dentry); |
66 | fh->ino = ceph_ino(dentry->d_inode); | 70 | fh->ino = ceph_ino(dentry->d_inode); |
67 | *max_len = sizeof(*fh); | 71 | *max_len = handle_length; |
68 | type = 1; | 72 | type = 1; |
69 | } else { | 73 | } else { |
70 | return -ENOSPC; | 74 | *max_len = handle_length; |
75 | return 255; | ||
71 | } | 76 | } |
72 | return type; | 77 | return type; |
73 | } | 78 | } |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8c044a4f0457..66e4da6dba22 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -697,7 +697,7 @@ more: | |||
697 | * start_request so that a tid has been assigned. | 697 | * start_request so that a tid has been assigned. |
698 | */ | 698 | */ |
699 | spin_lock(&ci->i_unsafe_lock); | 699 | spin_lock(&ci->i_unsafe_lock); |
700 | list_add(&ci->i_unsafe_writes, &req->r_unsafe_item); | 700 | list_add(&req->r_unsafe_item, &ci->i_unsafe_writes); |
701 | spin_unlock(&ci->i_unsafe_lock); | 701 | spin_unlock(&ci->i_unsafe_lock); |
702 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); | 702 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); |
703 | } | 703 | } |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 5d893d31e399..62377ec37edf 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -677,6 +677,7 @@ static int fill_inode(struct inode *inode, | |||
677 | if (ci->i_files == 0 && ci->i_subdirs == 0 && | 677 | if (ci->i_files == 0 && ci->i_subdirs == 0 && |
678 | ceph_snap(inode) == CEPH_NOSNAP && | 678 | ceph_snap(inode) == CEPH_NOSNAP && |
679 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && | 679 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && |
680 | (issued & CEPH_CAP_FILE_EXCL) == 0 && | ||
680 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | 681 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { |
681 | dout(" marking %p complete (empty)\n", inode); | 682 | dout(" marking %p complete (empty)\n", inode); |
682 | ci->i_ceph_flags |= CEPH_I_COMPLETE; | 683 | ci->i_ceph_flags |= CEPH_I_COMPLETE; |
@@ -844,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) | |||
844 | * the caller) if we fail. | 845 | * the caller) if we fail. |
845 | */ | 846 | */ |
846 | static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | 847 | static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, |
847 | bool *prehash) | 848 | bool *prehash, bool set_offset) |
848 | { | 849 | { |
849 | struct dentry *realdn; | 850 | struct dentry *realdn; |
850 | 851 | ||
@@ -876,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
876 | } | 877 | } |
877 | if ((!prehash || *prehash) && d_unhashed(dn)) | 878 | if ((!prehash || *prehash) && d_unhashed(dn)) |
878 | d_rehash(dn); | 879 | d_rehash(dn); |
879 | ceph_set_dentry_offset(dn); | 880 | if (set_offset) |
881 | ceph_set_dentry_offset(dn); | ||
880 | out: | 882 | out: |
881 | return dn; | 883 | return dn; |
882 | } | 884 | } |
@@ -1061,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1061 | d_delete(dn); | 1063 | d_delete(dn); |
1062 | goto done; | 1064 | goto done; |
1063 | } | 1065 | } |
1064 | dn = splice_dentry(dn, in, &have_lease); | 1066 | dn = splice_dentry(dn, in, &have_lease, true); |
1065 | if (IS_ERR(dn)) { | 1067 | if (IS_ERR(dn)) { |
1066 | err = PTR_ERR(dn); | 1068 | err = PTR_ERR(dn); |
1067 | goto done; | 1069 | goto done; |
@@ -1104,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1104 | goto done; | 1106 | goto done; |
1105 | } | 1107 | } |
1106 | dout(" linking snapped dir %p to dn %p\n", in, dn); | 1108 | dout(" linking snapped dir %p to dn %p\n", in, dn); |
1107 | dn = splice_dentry(dn, in, NULL); | 1109 | dn = splice_dentry(dn, in, NULL, true); |
1108 | if (IS_ERR(dn)) { | 1110 | if (IS_ERR(dn)) { |
1109 | err = PTR_ERR(dn); | 1111 | err = PTR_ERR(dn); |
1110 | goto done; | 1112 | goto done; |
@@ -1229,14 +1231,14 @@ retry_lookup: | |||
1229 | in = dn->d_inode; | 1231 | in = dn->d_inode; |
1230 | } else { | 1232 | } else { |
1231 | in = ceph_get_inode(parent->d_sb, vino); | 1233 | in = ceph_get_inode(parent->d_sb, vino); |
1232 | if (in == NULL) { | 1234 | if (IS_ERR(in)) { |
1233 | dout("new_inode badness\n"); | 1235 | dout("new_inode badness\n"); |
1234 | d_delete(dn); | 1236 | d_delete(dn); |
1235 | dput(dn); | 1237 | dput(dn); |
1236 | err = -ENOMEM; | 1238 | err = PTR_ERR(in); |
1237 | goto out; | 1239 | goto out; |
1238 | } | 1240 | } |
1239 | dn = splice_dentry(dn, in, NULL); | 1241 | dn = splice_dentry(dn, in, NULL, false); |
1240 | if (IS_ERR(dn)) | 1242 | if (IS_ERR(dn)) |
1241 | dn = NULL; | 1243 | dn = NULL; |
1242 | } | 1244 | } |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ae85af06454f..ff4e753aae92 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -82,7 +82,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
82 | length = fl->fl_end - fl->fl_start + 1; | 82 | length = fl->fl_end - fl->fl_start + 1; |
83 | 83 | ||
84 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | 84 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, |
85 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | 85 | (u64)fl->fl_pid, |
86 | (u64)(unsigned long)fl->fl_nspid, | ||
86 | lock_cmd, fl->fl_start, | 87 | lock_cmd, fl->fl_start, |
87 | length, wait); | 88 | length, wait); |
88 | if (!err) { | 89 | if (!err) { |
@@ -92,7 +93,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
92 | /* undo! This should only happen if the kernel detects | 93 | /* undo! This should only happen if the kernel detects |
93 | * local deadlock. */ | 94 | * local deadlock. */ |
94 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | 95 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, |
95 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | 96 | (u64)fl->fl_pid, |
97 | (u64)(unsigned long)fl->fl_nspid, | ||
96 | CEPH_LOCK_UNLOCK, fl->fl_start, | 98 | CEPH_LOCK_UNLOCK, fl->fl_start, |
97 | length, 0); | 99 | length, 0); |
98 | dout("got %d on posix_lock_file, undid lock", err); | 100 | dout("got %d on posix_lock_file, undid lock", err); |
@@ -132,7 +134,8 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
132 | length = fl->fl_end - fl->fl_start + 1; | 134 | length = fl->fl_end - fl->fl_start + 1; |
133 | 135 | ||
134 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, | 136 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, |
135 | file, (u64)fl->fl_pid, (u64)fl->fl_nspid, | 137 | file, (u64)fl->fl_pid, |
138 | (u64)(unsigned long)fl->fl_nspid, | ||
136 | lock_cmd, fl->fl_start, | 139 | lock_cmd, fl->fl_start, |
137 | length, wait); | 140 | length, wait); |
138 | if (!err) { | 141 | if (!err) { |
@@ -141,7 +144,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
141 | ceph_lock_message(CEPH_LOCK_FLOCK, | 144 | ceph_lock_message(CEPH_LOCK_FLOCK, |
142 | CEPH_MDS_OP_SETFILELOCK, | 145 | CEPH_MDS_OP_SETFILELOCK, |
143 | file, (u64)fl->fl_pid, | 146 | file, (u64)fl->fl_pid, |
144 | (u64)fl->fl_nspid, | 147 | (u64)(unsigned long)fl->fl_nspid, |
145 | CEPH_LOCK_UNLOCK, fl->fl_start, | 148 | CEPH_LOCK_UNLOCK, fl->fl_start, |
146 | length, 0); | 149 | length, 0); |
147 | dout("got %d on flock_lock_file_wait, undid lock", err); | 150 | dout("got %d on flock_lock_file_wait, undid lock", err); |
@@ -235,7 +238,8 @@ int lock_to_ceph_filelock(struct file_lock *lock, | |||
235 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); | 238 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); |
236 | cephlock->client = cpu_to_le64(0); | 239 | cephlock->client = cpu_to_le64(0); |
237 | cephlock->pid = cpu_to_le64(lock->fl_pid); | 240 | cephlock->pid = cpu_to_le64(lock->fl_pid); |
238 | cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid); | 241 | cephlock->pid_namespace = |
242 | cpu_to_le64((u64)(unsigned long)lock->fl_nspid); | ||
239 | 243 | ||
240 | switch (lock->fl_type) { | 244 | switch (lock->fl_type) { |
241 | case F_RDLCK: | 245 | case F_RDLCK: |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a75ddbf9fe37..fad95f8f2608 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -560,6 +560,13 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
560 | * | 560 | * |
561 | * Called under mdsc->mutex. | 561 | * Called under mdsc->mutex. |
562 | */ | 562 | */ |
563 | struct dentry *get_nonsnap_parent(struct dentry *dentry) | ||
564 | { | ||
565 | while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP) | ||
566 | dentry = dentry->d_parent; | ||
567 | return dentry; | ||
568 | } | ||
569 | |||
563 | static int __choose_mds(struct ceph_mds_client *mdsc, | 570 | static int __choose_mds(struct ceph_mds_client *mdsc, |
564 | struct ceph_mds_request *req) | 571 | struct ceph_mds_request *req) |
565 | { | 572 | { |
@@ -590,14 +597,29 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
590 | if (req->r_inode) { | 597 | if (req->r_inode) { |
591 | inode = req->r_inode; | 598 | inode = req->r_inode; |
592 | } else if (req->r_dentry) { | 599 | } else if (req->r_dentry) { |
593 | if (req->r_dentry->d_inode) { | 600 | struct inode *dir = req->r_dentry->d_parent->d_inode; |
601 | |||
602 | if (dir->i_sb != mdsc->client->sb) { | ||
603 | /* not this fs! */ | ||
604 | inode = req->r_dentry->d_inode; | ||
605 | } else if (ceph_snap(dir) != CEPH_NOSNAP) { | ||
606 | /* direct snapped/virtual snapdir requests | ||
607 | * based on parent dir inode */ | ||
608 | struct dentry *dn = | ||
609 | get_nonsnap_parent(req->r_dentry->d_parent); | ||
610 | inode = dn->d_inode; | ||
611 | dout("__choose_mds using nonsnap parent %p\n", inode); | ||
612 | } else if (req->r_dentry->d_inode) { | ||
613 | /* dentry target */ | ||
594 | inode = req->r_dentry->d_inode; | 614 | inode = req->r_dentry->d_inode; |
595 | } else { | 615 | } else { |
596 | inode = req->r_dentry->d_parent->d_inode; | 616 | /* dir + name */ |
617 | inode = dir; | ||
597 | hash = req->r_dentry->d_name.hash; | 618 | hash = req->r_dentry->d_name.hash; |
598 | is_hash = true; | 619 | is_hash = true; |
599 | } | 620 | } |
600 | } | 621 | } |
622 | |||
601 | dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash, | 623 | dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash, |
602 | (int)hash, mode); | 624 | (int)hash, mode); |
603 | if (!inode) | 625 | if (!inode) |
@@ -2208,7 +2230,7 @@ static void handle_session(struct ceph_mds_session *session, | |||
2208 | pr_info("mds%d reconnect denied\n", session->s_mds); | 2230 | pr_info("mds%d reconnect denied\n", session->s_mds); |
2209 | remove_session_caps(session); | 2231 | remove_session_caps(session); |
2210 | wake = 1; /* for good measure */ | 2232 | wake = 1; /* for good measure */ |
2211 | complete_all(&mdsc->session_close_waiters); | 2233 | wake_up_all(&mdsc->session_close_wq); |
2212 | kick_requests(mdsc, mds); | 2234 | kick_requests(mdsc, mds); |
2213 | break; | 2235 | break; |
2214 | 2236 | ||
@@ -2302,7 +2324,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2302 | path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0); | 2324 | path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0); |
2303 | if (IS_ERR(path)) { | 2325 | if (IS_ERR(path)) { |
2304 | err = PTR_ERR(path); | 2326 | err = PTR_ERR(path); |
2305 | BUG_ON(err); | 2327 | goto out_dput; |
2306 | } | 2328 | } |
2307 | } else { | 2329 | } else { |
2308 | path = NULL; | 2330 | path = NULL; |
@@ -2310,7 +2332,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2310 | } | 2332 | } |
2311 | err = ceph_pagelist_encode_string(pagelist, path, pathlen); | 2333 | err = ceph_pagelist_encode_string(pagelist, path, pathlen); |
2312 | if (err) | 2334 | if (err) |
2313 | goto out; | 2335 | goto out_free; |
2314 | 2336 | ||
2315 | spin_lock(&inode->i_lock); | 2337 | spin_lock(&inode->i_lock); |
2316 | cap->seq = 0; /* reset cap seq */ | 2338 | cap->seq = 0; /* reset cap seq */ |
@@ -2352,10 +2374,13 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2352 | num_fcntl_locks, | 2374 | num_fcntl_locks, |
2353 | num_flock_locks); | 2375 | num_flock_locks); |
2354 | unlock_kernel(); | 2376 | unlock_kernel(); |
2377 | } else { | ||
2378 | err = ceph_pagelist_append(pagelist, &rec, reclen); | ||
2355 | } | 2379 | } |
2356 | 2380 | ||
2357 | out: | 2381 | out_free: |
2358 | kfree(path); | 2382 | kfree(path); |
2383 | out_dput: | ||
2359 | dput(dentry); | 2384 | dput(dentry); |
2360 | return err; | 2385 | return err; |
2361 | } | 2386 | } |
@@ -2876,7 +2901,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2876 | return -ENOMEM; | 2901 | return -ENOMEM; |
2877 | 2902 | ||
2878 | init_completion(&mdsc->safe_umount_waiters); | 2903 | init_completion(&mdsc->safe_umount_waiters); |
2879 | init_completion(&mdsc->session_close_waiters); | 2904 | init_waitqueue_head(&mdsc->session_close_wq); |
2880 | INIT_LIST_HEAD(&mdsc->waiting_for_map); | 2905 | INIT_LIST_HEAD(&mdsc->waiting_for_map); |
2881 | mdsc->sessions = NULL; | 2906 | mdsc->sessions = NULL; |
2882 | mdsc->max_sessions = 0; | 2907 | mdsc->max_sessions = 0; |
@@ -3021,6 +3046,23 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
3021 | wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); | 3046 | wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); |
3022 | } | 3047 | } |
3023 | 3048 | ||
3049 | /* | ||
3050 | * true if all sessions are closed, or we force unmount | ||
3051 | */ | ||
3052 | bool done_closing_sessions(struct ceph_mds_client *mdsc) | ||
3053 | { | ||
3054 | int i, n = 0; | ||
3055 | |||
3056 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | ||
3057 | return true; | ||
3058 | |||
3059 | mutex_lock(&mdsc->mutex); | ||
3060 | for (i = 0; i < mdsc->max_sessions; i++) | ||
3061 | if (mdsc->sessions[i]) | ||
3062 | n++; | ||
3063 | mutex_unlock(&mdsc->mutex); | ||
3064 | return n == 0; | ||
3065 | } | ||
3024 | 3066 | ||
3025 | /* | 3067 | /* |
3026 | * called after sb is ro. | 3068 | * called after sb is ro. |
@@ -3029,45 +3071,32 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
3029 | { | 3071 | { |
3030 | struct ceph_mds_session *session; | 3072 | struct ceph_mds_session *session; |
3031 | int i; | 3073 | int i; |
3032 | int n; | ||
3033 | struct ceph_client *client = mdsc->client; | 3074 | struct ceph_client *client = mdsc->client; |
3034 | unsigned long started, timeout = client->mount_args->mount_timeout * HZ; | 3075 | unsigned long timeout = client->mount_args->mount_timeout * HZ; |
3035 | 3076 | ||
3036 | dout("close_sessions\n"); | 3077 | dout("close_sessions\n"); |
3037 | 3078 | ||
3038 | mutex_lock(&mdsc->mutex); | ||
3039 | |||
3040 | /* close sessions */ | 3079 | /* close sessions */ |
3041 | started = jiffies; | 3080 | mutex_lock(&mdsc->mutex); |
3042 | while (time_before(jiffies, started + timeout)) { | 3081 | for (i = 0; i < mdsc->max_sessions; i++) { |
3043 | dout("closing sessions\n"); | 3082 | session = __ceph_lookup_mds_session(mdsc, i); |
3044 | n = 0; | 3083 | if (!session) |
3045 | for (i = 0; i < mdsc->max_sessions; i++) { | 3084 | continue; |
3046 | session = __ceph_lookup_mds_session(mdsc, i); | ||
3047 | if (!session) | ||
3048 | continue; | ||
3049 | mutex_unlock(&mdsc->mutex); | ||
3050 | mutex_lock(&session->s_mutex); | ||
3051 | __close_session(mdsc, session); | ||
3052 | mutex_unlock(&session->s_mutex); | ||
3053 | ceph_put_mds_session(session); | ||
3054 | mutex_lock(&mdsc->mutex); | ||
3055 | n++; | ||
3056 | } | ||
3057 | if (n == 0) | ||
3058 | break; | ||
3059 | |||
3060 | if (client->mount_state == CEPH_MOUNT_SHUTDOWN) | ||
3061 | break; | ||
3062 | |||
3063 | dout("waiting for sessions to close\n"); | ||
3064 | mutex_unlock(&mdsc->mutex); | 3085 | mutex_unlock(&mdsc->mutex); |
3065 | wait_for_completion_timeout(&mdsc->session_close_waiters, | 3086 | mutex_lock(&session->s_mutex); |
3066 | timeout); | 3087 | __close_session(mdsc, session); |
3088 | mutex_unlock(&session->s_mutex); | ||
3089 | ceph_put_mds_session(session); | ||
3067 | mutex_lock(&mdsc->mutex); | 3090 | mutex_lock(&mdsc->mutex); |
3068 | } | 3091 | } |
3092 | mutex_unlock(&mdsc->mutex); | ||
3093 | |||
3094 | dout("waiting for sessions to close\n"); | ||
3095 | wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc), | ||
3096 | timeout); | ||
3069 | 3097 | ||
3070 | /* tear down remaining sessions */ | 3098 | /* tear down remaining sessions */ |
3099 | mutex_lock(&mdsc->mutex); | ||
3071 | for (i = 0; i < mdsc->max_sessions; i++) { | 3100 | for (i = 0; i < mdsc->max_sessions; i++) { |
3072 | if (mdsc->sessions[i]) { | 3101 | if (mdsc->sessions[i]) { |
3073 | session = get_session(mdsc->sessions[i]); | 3102 | session = get_session(mdsc->sessions[i]); |
@@ -3080,9 +3109,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
3080 | mutex_lock(&mdsc->mutex); | 3109 | mutex_lock(&mdsc->mutex); |
3081 | } | 3110 | } |
3082 | } | 3111 | } |
3083 | |||
3084 | WARN_ON(!list_empty(&mdsc->cap_delay_list)); | 3112 | WARN_ON(!list_empty(&mdsc->cap_delay_list)); |
3085 | |||
3086 | mutex_unlock(&mdsc->mutex); | 3113 | mutex_unlock(&mdsc->mutex); |
3087 | 3114 | ||
3088 | ceph_cleanup_empty_realms(mdsc); | 3115 | ceph_cleanup_empty_realms(mdsc); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index ab7e89f5e344..c98267ce6d2a 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -234,7 +234,8 @@ struct ceph_mds_client { | |||
234 | struct mutex mutex; /* all nested structures */ | 234 | struct mutex mutex; /* all nested structures */ |
235 | 235 | ||
236 | struct ceph_mdsmap *mdsmap; | 236 | struct ceph_mdsmap *mdsmap; |
237 | struct completion safe_umount_waiters, session_close_waiters; | 237 | struct completion safe_umount_waiters; |
238 | wait_queue_head_t session_close_wq; | ||
238 | struct list_head waiting_for_map; | 239 | struct list_head waiting_for_map; |
239 | 240 | ||
240 | struct ceph_mds_session **sessions; /* NULL for mds if no session */ | 241 | struct ceph_mds_session **sessions; /* NULL for mds if no session */ |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index bed6391e52c7..3b5571b8ce22 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -549,7 +549,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, | |||
549 | */ | 549 | */ |
550 | static void __cancel_request(struct ceph_osd_request *req) | 550 | static void __cancel_request(struct ceph_osd_request *req) |
551 | { | 551 | { |
552 | if (req->r_sent) { | 552 | if (req->r_sent && req->r_osd) { |
553 | ceph_con_revoke(&req->r_osd->o_con, req->r_request); | 553 | ceph_con_revoke(&req->r_osd->o_con, req->r_request); |
554 | req->r_sent = 0; | 554 | req->r_sent = 0; |
555 | } | 555 | } |
@@ -661,7 +661,7 @@ static int __send_request(struct ceph_osd_client *osdc, | |||
661 | reqhead->reassert_version = req->r_reassert_version; | 661 | reqhead->reassert_version = req->r_reassert_version; |
662 | 662 | ||
663 | req->r_stamp = jiffies; | 663 | req->r_stamp = jiffies; |
664 | list_move_tail(&osdc->req_lru, &req->r_req_lru_item); | 664 | list_move_tail(&req->r_req_lru_item, &osdc->req_lru); |
665 | 665 | ||
666 | ceph_msg_get(req->r_request); /* send consumes a ref */ | 666 | ceph_msg_get(req->r_request); /* send consumes a ref */ |
667 | ceph_con_send(&req->r_osd->o_con, req->r_request); | 667 | ceph_con_send(&req->r_osd->o_con, req->r_request); |
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c index b6859f47d364..46a368b6dce5 100644 --- a/fs/ceph/pagelist.c +++ b/fs/ceph/pagelist.c | |||
@@ -5,10 +5,18 @@ | |||
5 | 5 | ||
6 | #include "pagelist.h" | 6 | #include "pagelist.h" |
7 | 7 | ||
8 | static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) | ||
9 | { | ||
10 | struct page *page = list_entry(pl->head.prev, struct page, | ||
11 | lru); | ||
12 | kunmap(page); | ||
13 | } | ||
14 | |||
8 | int ceph_pagelist_release(struct ceph_pagelist *pl) | 15 | int ceph_pagelist_release(struct ceph_pagelist *pl) |
9 | { | 16 | { |
10 | if (pl->mapped_tail) | 17 | if (pl->mapped_tail) |
11 | kunmap(pl->mapped_tail); | 18 | ceph_pagelist_unmap_tail(pl); |
19 | |||
12 | while (!list_empty(&pl->head)) { | 20 | while (!list_empty(&pl->head)) { |
13 | struct page *page = list_first_entry(&pl->head, struct page, | 21 | struct page *page = list_first_entry(&pl->head, struct page, |
14 | lru); | 22 | lru); |
@@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | |||
26 | pl->room += PAGE_SIZE; | 34 | pl->room += PAGE_SIZE; |
27 | list_add_tail(&page->lru, &pl->head); | 35 | list_add_tail(&page->lru, &pl->head); |
28 | if (pl->mapped_tail) | 36 | if (pl->mapped_tail) |
29 | kunmap(pl->mapped_tail); | 37 | ceph_pagelist_unmap_tail(pl); |
30 | pl->mapped_tail = kmap(page); | 38 | pl->mapped_tail = kmap(page); |
31 | return 0; | 39 | return 0; |
32 | } | 40 | } |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index c0b26b6badba..190b6c4a6f2b 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( | |||
119 | INIT_LIST_HEAD(&realm->children); | 119 | INIT_LIST_HEAD(&realm->children); |
120 | INIT_LIST_HEAD(&realm->child_item); | 120 | INIT_LIST_HEAD(&realm->child_item); |
121 | INIT_LIST_HEAD(&realm->empty_item); | 121 | INIT_LIST_HEAD(&realm->empty_item); |
122 | INIT_LIST_HEAD(&realm->dirty_item); | ||
122 | INIT_LIST_HEAD(&realm->inodes_with_caps); | 123 | INIT_LIST_HEAD(&realm->inodes_with_caps); |
123 | spin_lock_init(&realm->inodes_with_caps_lock); | 124 | spin_lock_init(&realm->inodes_with_caps_lock); |
124 | __insert_snap_realm(&mdsc->snap_realms, realm); | 125 | __insert_snap_realm(&mdsc->snap_realms, realm); |
@@ -435,7 +436,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
435 | { | 436 | { |
436 | struct inode *inode = &ci->vfs_inode; | 437 | struct inode *inode = &ci->vfs_inode; |
437 | struct ceph_cap_snap *capsnap; | 438 | struct ceph_cap_snap *capsnap; |
438 | int used; | 439 | int used, dirty; |
439 | 440 | ||
440 | capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); | 441 | capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); |
441 | if (!capsnap) { | 442 | if (!capsnap) { |
@@ -445,6 +446,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
445 | 446 | ||
446 | spin_lock(&inode->i_lock); | 447 | spin_lock(&inode->i_lock); |
447 | used = __ceph_caps_used(ci); | 448 | used = __ceph_caps_used(ci); |
449 | dirty = __ceph_caps_dirty(ci); | ||
448 | if (__ceph_have_pending_cap_snap(ci)) { | 450 | if (__ceph_have_pending_cap_snap(ci)) { |
449 | /* there is no point in queuing multiple "pending" cap_snaps, | 451 | /* there is no point in queuing multiple "pending" cap_snaps, |
450 | as no new writes are allowed to start when pending, so any | 452 | as no new writes are allowed to start when pending, so any |
@@ -452,27 +454,37 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
452 | cap_snap. lucky us. */ | 454 | cap_snap. lucky us. */ |
453 | dout("queue_cap_snap %p already pending\n", inode); | 455 | dout("queue_cap_snap %p already pending\n", inode); |
454 | kfree(capsnap); | 456 | kfree(capsnap); |
455 | } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) { | 457 | } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR) || |
458 | (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| | ||
459 | CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) { | ||
456 | struct ceph_snap_context *snapc = ci->i_head_snapc; | 460 | struct ceph_snap_context *snapc = ci->i_head_snapc; |
457 | 461 | ||
462 | dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode, | ||
463 | capsnap, snapc); | ||
458 | igrab(inode); | 464 | igrab(inode); |
459 | 465 | ||
460 | atomic_set(&capsnap->nref, 1); | 466 | atomic_set(&capsnap->nref, 1); |
461 | capsnap->ci = ci; | 467 | capsnap->ci = ci; |
462 | INIT_LIST_HEAD(&capsnap->ci_item); | 468 | INIT_LIST_HEAD(&capsnap->ci_item); |
463 | INIT_LIST_HEAD(&capsnap->flushing_item); | 469 | INIT_LIST_HEAD(&capsnap->flushing_item); |
464 | 470 | ||
465 | capsnap->follows = snapc->seq - 1; | 471 | capsnap->follows = snapc->seq; |
466 | capsnap->issued = __ceph_caps_issued(ci, NULL); | 472 | capsnap->issued = __ceph_caps_issued(ci, NULL); |
467 | capsnap->dirty = __ceph_caps_dirty(ci); | 473 | capsnap->dirty = dirty; |
468 | 474 | ||
469 | capsnap->mode = inode->i_mode; | 475 | capsnap->mode = inode->i_mode; |
470 | capsnap->uid = inode->i_uid; | 476 | capsnap->uid = inode->i_uid; |
471 | capsnap->gid = inode->i_gid; | 477 | capsnap->gid = inode->i_gid; |
472 | 478 | ||
473 | /* fixme? */ | 479 | if (dirty & CEPH_CAP_XATTR_EXCL) { |
474 | capsnap->xattr_blob = NULL; | 480 | __ceph_build_xattrs_blob(ci); |
475 | capsnap->xattr_len = 0; | 481 | capsnap->xattr_blob = |
482 | ceph_buffer_get(ci->i_xattrs.blob); | ||
483 | capsnap->xattr_version = ci->i_xattrs.version; | ||
484 | } else { | ||
485 | capsnap->xattr_blob = NULL; | ||
486 | capsnap->xattr_version = 0; | ||
487 | } | ||
476 | 488 | ||
477 | /* dirty page count moved from _head to this cap_snap; | 489 | /* dirty page count moved from _head to this cap_snap; |
478 | all subsequent writes page dirties occur _after_ this | 490 | all subsequent writes page dirties occur _after_ this |
@@ -480,7 +492,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
480 | capsnap->dirty_pages = ci->i_wrbuffer_ref_head; | 492 | capsnap->dirty_pages = ci->i_wrbuffer_ref_head; |
481 | ci->i_wrbuffer_ref_head = 0; | 493 | ci->i_wrbuffer_ref_head = 0; |
482 | capsnap->context = snapc; | 494 | capsnap->context = snapc; |
483 | ci->i_head_snapc = NULL; | 495 | ci->i_head_snapc = |
496 | ceph_get_snap_context(ci->i_snap_realm->cached_context); | ||
497 | dout(" new snapc is %p\n", ci->i_head_snapc); | ||
484 | list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); | 498 | list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); |
485 | 499 | ||
486 | if (used & CEPH_CAP_FILE_WR) { | 500 | if (used & CEPH_CAP_FILE_WR) { |
@@ -539,6 +553,41 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||
539 | return 1; /* caller may want to ceph_flush_snaps */ | 553 | return 1; /* caller may want to ceph_flush_snaps */ |
540 | } | 554 | } |
541 | 555 | ||
556 | /* | ||
557 | * Queue cap_snaps for snap writeback for this realm and its children. | ||
558 | * Called under snap_rwsem, so realm topology won't change. | ||
559 | */ | ||
560 | static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) | ||
561 | { | ||
562 | struct ceph_inode_info *ci; | ||
563 | struct inode *lastinode = NULL; | ||
564 | struct ceph_snap_realm *child; | ||
565 | |||
566 | dout("queue_realm_cap_snaps %p %llx inodes\n", realm, realm->ino); | ||
567 | |||
568 | spin_lock(&realm->inodes_with_caps_lock); | ||
569 | list_for_each_entry(ci, &realm->inodes_with_caps, | ||
570 | i_snap_realm_item) { | ||
571 | struct inode *inode = igrab(&ci->vfs_inode); | ||
572 | if (!inode) | ||
573 | continue; | ||
574 | spin_unlock(&realm->inodes_with_caps_lock); | ||
575 | if (lastinode) | ||
576 | iput(lastinode); | ||
577 | lastinode = inode; | ||
578 | ceph_queue_cap_snap(ci); | ||
579 | spin_lock(&realm->inodes_with_caps_lock); | ||
580 | } | ||
581 | spin_unlock(&realm->inodes_with_caps_lock); | ||
582 | if (lastinode) | ||
583 | iput(lastinode); | ||
584 | |||
585 | dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino); | ||
586 | list_for_each_entry(child, &realm->children, child_item) | ||
587 | queue_realm_cap_snaps(child); | ||
588 | |||
589 | dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); | ||
590 | } | ||
542 | 591 | ||
543 | /* | 592 | /* |
544 | * Parse and apply a snapblob "snap trace" from the MDS. This specifies | 593 | * Parse and apply a snapblob "snap trace" from the MDS. This specifies |
@@ -556,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, | |||
556 | struct ceph_snap_realm *realm; | 605 | struct ceph_snap_realm *realm; |
557 | int invalidate = 0; | 606 | int invalidate = 0; |
558 | int err = -ENOMEM; | 607 | int err = -ENOMEM; |
608 | LIST_HEAD(dirty_realms); | ||
559 | 609 | ||
560 | dout("update_snap_trace deletion=%d\n", deletion); | 610 | dout("update_snap_trace deletion=%d\n", deletion); |
561 | more: | 611 | more: |
@@ -578,45 +628,6 @@ more: | |||
578 | } | 628 | } |
579 | } | 629 | } |
580 | 630 | ||
581 | if (le64_to_cpu(ri->seq) > realm->seq) { | ||
582 | dout("update_snap_trace updating %llx %p %lld -> %lld\n", | ||
583 | realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); | ||
584 | /* | ||
585 | * if the realm seq has changed, queue a cap_snap for every | ||
586 | * inode with open caps. we do this _before_ we update | ||
587 | * the realm info so that we prepare for writeback under the | ||
588 | * _previous_ snap context. | ||
589 | * | ||
590 | * ...unless it's a snap deletion! | ||
591 | */ | ||
592 | if (!deletion) { | ||
593 | struct ceph_inode_info *ci; | ||
594 | struct inode *lastinode = NULL; | ||
595 | |||
596 | spin_lock(&realm->inodes_with_caps_lock); | ||
597 | list_for_each_entry(ci, &realm->inodes_with_caps, | ||
598 | i_snap_realm_item) { | ||
599 | struct inode *inode = igrab(&ci->vfs_inode); | ||
600 | if (!inode) | ||
601 | continue; | ||
602 | spin_unlock(&realm->inodes_with_caps_lock); | ||
603 | if (lastinode) | ||
604 | iput(lastinode); | ||
605 | lastinode = inode; | ||
606 | ceph_queue_cap_snap(ci); | ||
607 | spin_lock(&realm->inodes_with_caps_lock); | ||
608 | } | ||
609 | spin_unlock(&realm->inodes_with_caps_lock); | ||
610 | if (lastinode) | ||
611 | iput(lastinode); | ||
612 | dout("update_snap_trace cap_snaps queued\n"); | ||
613 | } | ||
614 | |||
615 | } else { | ||
616 | dout("update_snap_trace %llx %p seq %lld unchanged\n", | ||
617 | realm->ino, realm, realm->seq); | ||
618 | } | ||
619 | |||
620 | /* ensure the parent is correct */ | 631 | /* ensure the parent is correct */ |
621 | err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); | 632 | err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); |
622 | if (err < 0) | 633 | if (err < 0) |
@@ -624,6 +635,8 @@ more: | |||
624 | invalidate += err; | 635 | invalidate += err; |
625 | 636 | ||
626 | if (le64_to_cpu(ri->seq) > realm->seq) { | 637 | if (le64_to_cpu(ri->seq) > realm->seq) { |
638 | dout("update_snap_trace updating %llx %p %lld -> %lld\n", | ||
639 | realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); | ||
627 | /* update realm parameters, snap lists */ | 640 | /* update realm parameters, snap lists */ |
628 | realm->seq = le64_to_cpu(ri->seq); | 641 | realm->seq = le64_to_cpu(ri->seq); |
629 | realm->created = le64_to_cpu(ri->created); | 642 | realm->created = le64_to_cpu(ri->created); |
@@ -641,9 +654,17 @@ more: | |||
641 | if (err < 0) | 654 | if (err < 0) |
642 | goto fail; | 655 | goto fail; |
643 | 656 | ||
657 | /* queue realm for cap_snap creation */ | ||
658 | list_add(&realm->dirty_item, &dirty_realms); | ||
659 | |||
644 | invalidate = 1; | 660 | invalidate = 1; |
645 | } else if (!realm->cached_context) { | 661 | } else if (!realm->cached_context) { |
662 | dout("update_snap_trace %llx %p seq %lld new\n", | ||
663 | realm->ino, realm, realm->seq); | ||
646 | invalidate = 1; | 664 | invalidate = 1; |
665 | } else { | ||
666 | dout("update_snap_trace %llx %p seq %lld unchanged\n", | ||
667 | realm->ino, realm, realm->seq); | ||
647 | } | 668 | } |
648 | 669 | ||
649 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, | 670 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, |
@@ -656,6 +677,14 @@ more: | |||
656 | if (invalidate) | 677 | if (invalidate) |
657 | rebuild_snap_realms(realm); | 678 | rebuild_snap_realms(realm); |
658 | 679 | ||
680 | /* | ||
681 | * queue cap snaps _after_ we've built the new snap contexts, | ||
682 | * so that i_head_snapc can be set appropriately. | ||
683 | */ | ||
684 | list_for_each_entry(realm, &dirty_realms, dirty_item) { | ||
685 | queue_realm_cap_snaps(realm); | ||
686 | } | ||
687 | |||
659 | __cleanup_empty_realms(mdsc); | 688 | __cleanup_empty_realms(mdsc); |
660 | return 0; | 689 | return 0; |
661 | 690 | ||
@@ -688,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) | |||
688 | igrab(inode); | 717 | igrab(inode); |
689 | spin_unlock(&mdsc->snap_flush_lock); | 718 | spin_unlock(&mdsc->snap_flush_lock); |
690 | spin_lock(&inode->i_lock); | 719 | spin_lock(&inode->i_lock); |
691 | __ceph_flush_snaps(ci, &session); | 720 | __ceph_flush_snaps(ci, &session, 0); |
692 | spin_unlock(&inode->i_lock); | 721 | spin_unlock(&inode->i_lock); |
693 | iput(inode); | 722 | iput(inode); |
694 | spin_lock(&mdsc->snap_flush_lock); | 723 | spin_lock(&mdsc->snap_flush_lock); |
@@ -789,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
789 | }; | 818 | }; |
790 | struct inode *inode = ceph_find_inode(sb, vino); | 819 | struct inode *inode = ceph_find_inode(sb, vino); |
791 | struct ceph_inode_info *ci; | 820 | struct ceph_inode_info *ci; |
821 | struct ceph_snap_realm *oldrealm; | ||
792 | 822 | ||
793 | if (!inode) | 823 | if (!inode) |
794 | continue; | 824 | continue; |
@@ -814,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
814 | dout(" will move %p to split realm %llx %p\n", | 844 | dout(" will move %p to split realm %llx %p\n", |
815 | inode, realm->ino, realm); | 845 | inode, realm->ino, realm); |
816 | /* | 846 | /* |
817 | * Remove the inode from the realm's inode | 847 | * Move the inode to the new realm |
818 | * list, but don't add it to the new realm | ||
819 | * yet. We don't want the cap_snap to be | ||
820 | * queued (again) by ceph_update_snap_trace() | ||
821 | * below. Queue it _now_, under the old context. | ||
822 | */ | 848 | */ |
823 | spin_lock(&realm->inodes_with_caps_lock); | 849 | spin_lock(&realm->inodes_with_caps_lock); |
824 | list_del_init(&ci->i_snap_realm_item); | 850 | list_del_init(&ci->i_snap_realm_item); |
851 | list_add(&ci->i_snap_realm_item, | ||
852 | &realm->inodes_with_caps); | ||
853 | oldrealm = ci->i_snap_realm; | ||
854 | ci->i_snap_realm = realm; | ||
825 | spin_unlock(&realm->inodes_with_caps_lock); | 855 | spin_unlock(&realm->inodes_with_caps_lock); |
826 | spin_unlock(&inode->i_lock); | 856 | spin_unlock(&inode->i_lock); |
827 | 857 | ||
828 | ceph_queue_cap_snap(ci); | 858 | ceph_get_snap_realm(mdsc, realm); |
859 | ceph_put_snap_realm(mdsc, oldrealm); | ||
829 | 860 | ||
830 | iput(inode); | 861 | iput(inode); |
831 | continue; | 862 | continue; |
@@ -853,43 +884,9 @@ skip_inode: | |||
853 | ceph_update_snap_trace(mdsc, p, e, | 884 | ceph_update_snap_trace(mdsc, p, e, |
854 | op == CEPH_SNAP_OP_DESTROY); | 885 | op == CEPH_SNAP_OP_DESTROY); |
855 | 886 | ||
856 | if (op == CEPH_SNAP_OP_SPLIT) { | 887 | if (op == CEPH_SNAP_OP_SPLIT) |
857 | /* | ||
858 | * ok, _now_ add the inodes into the new realm. | ||
859 | */ | ||
860 | for (i = 0; i < num_split_inos; i++) { | ||
861 | struct ceph_vino vino = { | ||
862 | .ino = le64_to_cpu(split_inos[i]), | ||
863 | .snap = CEPH_NOSNAP, | ||
864 | }; | ||
865 | struct inode *inode = ceph_find_inode(sb, vino); | ||
866 | struct ceph_inode_info *ci; | ||
867 | |||
868 | if (!inode) | ||
869 | continue; | ||
870 | ci = ceph_inode(inode); | ||
871 | spin_lock(&inode->i_lock); | ||
872 | if (list_empty(&ci->i_snap_realm_item)) { | ||
873 | struct ceph_snap_realm *oldrealm = | ||
874 | ci->i_snap_realm; | ||
875 | |||
876 | dout(" moving %p to split realm %llx %p\n", | ||
877 | inode, realm->ino, realm); | ||
878 | spin_lock(&realm->inodes_with_caps_lock); | ||
879 | list_add(&ci->i_snap_realm_item, | ||
880 | &realm->inodes_with_caps); | ||
881 | ci->i_snap_realm = realm; | ||
882 | spin_unlock(&realm->inodes_with_caps_lock); | ||
883 | ceph_get_snap_realm(mdsc, realm); | ||
884 | ceph_put_snap_realm(mdsc, oldrealm); | ||
885 | } | ||
886 | spin_unlock(&inode->i_lock); | ||
887 | iput(inode); | ||
888 | } | ||
889 | |||
890 | /* we took a reference when we created the realm, above */ | 888 | /* we took a reference when we created the realm, above */ |
891 | ceph_put_snap_realm(mdsc, realm); | 889 | ceph_put_snap_realm(mdsc, realm); |
892 | } | ||
893 | 890 | ||
894 | __cleanup_empty_realms(mdsc); | 891 | __cleanup_empty_realms(mdsc); |
895 | 892 | ||
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 2482d696f0de..b87638e84c4b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -216,8 +216,7 @@ struct ceph_cap_snap { | |||
216 | uid_t uid; | 216 | uid_t uid; |
217 | gid_t gid; | 217 | gid_t gid; |
218 | 218 | ||
219 | void *xattr_blob; | 219 | struct ceph_buffer *xattr_blob; |
220 | int xattr_len; | ||
221 | u64 xattr_version; | 220 | u64 xattr_version; |
222 | 221 | ||
223 | u64 size; | 222 | u64 size; |
@@ -229,8 +228,11 @@ struct ceph_cap_snap { | |||
229 | 228 | ||
230 | static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) | 229 | static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) |
231 | { | 230 | { |
232 | if (atomic_dec_and_test(&capsnap->nref)) | 231 | if (atomic_dec_and_test(&capsnap->nref)) { |
232 | if (capsnap->xattr_blob) | ||
233 | ceph_buffer_put(capsnap->xattr_blob); | ||
233 | kfree(capsnap); | 234 | kfree(capsnap); |
235 | } | ||
234 | } | 236 | } |
235 | 237 | ||
236 | /* | 238 | /* |
@@ -342,7 +344,8 @@ struct ceph_inode_info { | |||
342 | unsigned i_cap_exporting_issued; | 344 | unsigned i_cap_exporting_issued; |
343 | struct ceph_cap_reservation i_cap_migration_resv; | 345 | struct ceph_cap_reservation i_cap_migration_resv; |
344 | struct list_head i_cap_snaps; /* snapped state pending flush to mds */ | 346 | struct list_head i_cap_snaps; /* snapped state pending flush to mds */ |
345 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 */ | 347 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or |
348 | dirty|flushing caps */ | ||
346 | unsigned i_snap_caps; /* cap bits for snapped files */ | 349 | unsigned i_snap_caps; /* cap bits for snapped files */ |
347 | 350 | ||
348 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ | 351 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ |
@@ -687,6 +690,8 @@ struct ceph_snap_realm { | |||
687 | 690 | ||
688 | struct list_head empty_item; /* if i have ref==0 */ | 691 | struct list_head empty_item; /* if i have ref==0 */ |
689 | 692 | ||
693 | struct list_head dirty_item; /* if realm needs new context */ | ||
694 | |||
690 | /* the current set of snaps for this realm */ | 695 | /* the current set of snaps for this realm */ |
691 | struct ceph_snap_context *cached_context; | 696 | struct ceph_snap_context *cached_context; |
692 | 697 | ||
@@ -823,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); | |||
823 | extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | 828 | extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, |
824 | struct ceph_snap_context *snapc); | 829 | struct ceph_snap_context *snapc); |
825 | extern void __ceph_flush_snaps(struct ceph_inode_info *ci, | 830 | extern void __ceph_flush_snaps(struct ceph_inode_info *ci, |
826 | struct ceph_mds_session **psession); | 831 | struct ceph_mds_session **psession, |
832 | int again); | ||
827 | extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 833 | extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
828 | struct ceph_mds_session *session); | 834 | struct ceph_mds_session *session); |
829 | extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); | 835 | extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 097a2654c00f..9578af610b73 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -485,6 +485,7 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci) | |||
485 | ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; | 485 | ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; |
486 | ci->i_xattrs.prealloc_blob = NULL; | 486 | ci->i_xattrs.prealloc_blob = NULL; |
487 | ci->i_xattrs.dirty = false; | 487 | ci->i_xattrs.dirty = false; |
488 | ci->i_xattrs.version++; | ||
488 | } | 489 | } |
489 | } | 490 | } |
490 | 491 | ||
diff --git a/fs/char_dev.c b/fs/char_dev.c index f80a4f25123c..143d393881cb 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -40,7 +40,9 @@ struct backing_dev_info directly_mappable_cdev_bdi = { | |||
40 | #endif | 40 | #endif |
41 | /* permit direct mmap, for read, write or exec */ | 41 | /* permit direct mmap, for read, write or exec */ |
42 | BDI_CAP_MAP_DIRECT | | 42 | BDI_CAP_MAP_DIRECT | |
43 | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP), | 43 | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP | |
44 | /* no writeback happens */ | ||
45 | BDI_CAP_NO_ACCT_AND_WRITEBACK), | ||
44 | }; | 46 | }; |
45 | 47 | ||
46 | static struct kobj_map *cdev_map; | 48 | static struct kobj_map *cdev_map; |
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 650638275a6f..7fe6b52df507 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h | |||
@@ -30,6 +30,8 @@ | |||
30 | * This is a compressed table of upper and lower case conversion. | 30 | * This is a compressed table of upper and lower case conversion. |
31 | * | 31 | * |
32 | */ | 32 | */ |
33 | #ifndef _CIFS_UNICODE_H | ||
34 | #define _CIFS_UNICODE_H | ||
33 | 35 | ||
34 | #include <asm/byteorder.h> | 36 | #include <asm/byteorder.h> |
35 | #include <linux/types.h> | 37 | #include <linux/types.h> |
@@ -67,8 +69,8 @@ extern const struct UniCaseRange CifsUniUpperRange[]; | |||
67 | #endif /* UNIUPR_NOUPPER */ | 69 | #endif /* UNIUPR_NOUPPER */ |
68 | 70 | ||
69 | #ifndef UNIUPR_NOLOWER | 71 | #ifndef UNIUPR_NOLOWER |
70 | extern signed char UniLowerTable[512]; | 72 | extern signed char CifsUniLowerTable[512]; |
71 | extern struct UniCaseRange UniLowerRange[]; | 73 | extern const struct UniCaseRange CifsUniLowerRange[]; |
72 | #endif /* UNIUPR_NOLOWER */ | 74 | #endif /* UNIUPR_NOLOWER */ |
73 | 75 | ||
74 | #ifdef __KERNEL__ | 76 | #ifdef __KERNEL__ |
@@ -337,15 +339,15 @@ UniStrupr(register wchar_t *upin) | |||
337 | * UniTolower: Convert a unicode character to lower case | 339 | * UniTolower: Convert a unicode character to lower case |
338 | */ | 340 | */ |
339 | static inline wchar_t | 341 | static inline wchar_t |
340 | UniTolower(wchar_t uc) | 342 | UniTolower(register wchar_t uc) |
341 | { | 343 | { |
342 | register struct UniCaseRange *rp; | 344 | register const struct UniCaseRange *rp; |
343 | 345 | ||
344 | if (uc < sizeof(UniLowerTable)) { | 346 | if (uc < sizeof(CifsUniLowerTable)) { |
345 | /* Latin characters */ | 347 | /* Latin characters */ |
346 | return uc + UniLowerTable[uc]; /* Use base tables */ | 348 | return uc + CifsUniLowerTable[uc]; /* Use base tables */ |
347 | } else { | 349 | } else { |
348 | rp = UniLowerRange; /* Use range tables */ | 350 | rp = CifsUniLowerRange; /* Use range tables */ |
349 | while (rp->start) { | 351 | while (rp->start) { |
350 | if (uc < rp->start) /* Before start of range */ | 352 | if (uc < rp->start) /* Before start of range */ |
351 | return uc; /* Uppercase = input */ | 353 | return uc; /* Uppercase = input */ |
@@ -374,3 +376,5 @@ UniStrlwr(register wchar_t *upin) | |||
374 | } | 376 | } |
375 | 377 | ||
376 | #endif | 378 | #endif |
379 | |||
380 | #endif /* _CIFS_UNICODE_H */ | ||
diff --git a/fs/cifs/cifs_uniupr.h b/fs/cifs/cifs_uniupr.h index 18a9d978e519..0ac7c5a8633a 100644 --- a/fs/cifs/cifs_uniupr.h +++ b/fs/cifs/cifs_uniupr.h | |||
@@ -140,7 +140,7 @@ const struct UniCaseRange CifsUniUpperRange[] = { | |||
140 | /* | 140 | /* |
141 | * Latin lower case | 141 | * Latin lower case |
142 | */ | 142 | */ |
143 | static signed char CifsUniLowerTable[512] = { | 143 | signed char CifsUniLowerTable[512] = { |
144 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */ | 144 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */ |
145 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */ | 145 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */ |
146 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */ | 146 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */ |
@@ -242,12 +242,12 @@ static signed char UniCaseRangeLff20[27] = { | |||
242 | /* | 242 | /* |
243 | * Lower Case Range | 243 | * Lower Case Range |
244 | */ | 244 | */ |
245 | static const struct UniCaseRange CifsUniLowerRange[] = { | 245 | const struct UniCaseRange CifsUniLowerRange[] = { |
246 | 0x0380, 0x03ab, UniCaseRangeL0380, | 246 | {0x0380, 0x03ab, UniCaseRangeL0380}, |
247 | 0x0400, 0x042f, UniCaseRangeL0400, | 247 | {0x0400, 0x042f, UniCaseRangeL0400}, |
248 | 0x0490, 0x04cb, UniCaseRangeL0490, | 248 | {0x0490, 0x04cb, UniCaseRangeL0490}, |
249 | 0x1e00, 0x1ff7, UniCaseRangeL1e00, | 249 | {0x1e00, 0x1ff7, UniCaseRangeL1e00}, |
250 | 0xff20, 0xff3a, UniCaseRangeLff20, | 250 | {0xff20, 0xff3a, UniCaseRangeLff20}, |
251 | 0, 0, 0 | 251 | {0} |
252 | }; | 252 | }; |
253 | #endif | 253 | #endif |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 847628dfdc44..35042d8f7338 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -223,63 +223,6 @@ int cifs_calculate_mac_key(struct mac_key *key, const char *rn, | |||
223 | return 0; | 223 | return 0; |
224 | } | 224 | } |
225 | 225 | ||
226 | int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *ses, | ||
227 | const struct nls_table *nls_info) | ||
228 | { | ||
229 | char temp_hash[16]; | ||
230 | struct HMACMD5Context ctx; | ||
231 | char *ucase_buf; | ||
232 | __le16 *unicode_buf; | ||
233 | unsigned int i, user_name_len, dom_name_len; | ||
234 | |||
235 | if (ses == NULL) | ||
236 | return -EINVAL; | ||
237 | |||
238 | E_md4hash(ses->password, temp_hash); | ||
239 | |||
240 | hmac_md5_init_limK_to_64(temp_hash, 16, &ctx); | ||
241 | user_name_len = strlen(ses->userName); | ||
242 | if (user_name_len > MAX_USERNAME_SIZE) | ||
243 | return -EINVAL; | ||
244 | if (ses->domainName == NULL) | ||
245 | return -EINVAL; /* BB should we use CIFS_LINUX_DOM */ | ||
246 | dom_name_len = strlen(ses->domainName); | ||
247 | if (dom_name_len > MAX_USERNAME_SIZE) | ||
248 | return -EINVAL; | ||
249 | |||
250 | ucase_buf = kmalloc((MAX_USERNAME_SIZE+1), GFP_KERNEL); | ||
251 | if (ucase_buf == NULL) | ||
252 | return -ENOMEM; | ||
253 | unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL); | ||
254 | if (unicode_buf == NULL) { | ||
255 | kfree(ucase_buf); | ||
256 | return -ENOMEM; | ||
257 | } | ||
258 | |||
259 | for (i = 0; i < user_name_len; i++) | ||
260 | ucase_buf[i] = nls_info->charset2upper[(int)ses->userName[i]]; | ||
261 | ucase_buf[i] = 0; | ||
262 | user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf, | ||
263 | MAX_USERNAME_SIZE*2, nls_info); | ||
264 | unicode_buf[user_name_len] = 0; | ||
265 | user_name_len++; | ||
266 | |||
267 | for (i = 0; i < dom_name_len; i++) | ||
268 | ucase_buf[i] = nls_info->charset2upper[(int)ses->domainName[i]]; | ||
269 | ucase_buf[i] = 0; | ||
270 | dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf, | ||
271 | MAX_USERNAME_SIZE*2, nls_info); | ||
272 | |||
273 | unicode_buf[user_name_len + dom_name_len] = 0; | ||
274 | hmac_md5_update((const unsigned char *) unicode_buf, | ||
275 | (user_name_len+dom_name_len)*2, &ctx); | ||
276 | |||
277 | hmac_md5_final(ses->server->ntlmv2_hash, &ctx); | ||
278 | kfree(ucase_buf); | ||
279 | kfree(unicode_buf); | ||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 226 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
284 | void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, | 227 | void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, |
285 | char *lnm_session_key) | 228 | char *lnm_session_key) |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 1f5450814087..1d60c655e3e0 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -87,8 +87,9 @@ extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); | |||
87 | extern int decode_negTokenInit(unsigned char *security_blob, int length, | 87 | extern int decode_negTokenInit(unsigned char *security_blob, int length, |
88 | struct TCP_Server_Info *server); | 88 | struct TCP_Server_Info *server); |
89 | extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); | 89 | extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); |
90 | extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port); | ||
90 | extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, | 91 | extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, |
91 | unsigned short int port); | 92 | const unsigned short int port); |
92 | extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); | 93 | extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); |
93 | extern void header_assemble(struct smb_hdr *, char /* command */ , | 94 | extern void header_assemble(struct smb_hdr *, char /* command */ , |
94 | const struct cifsTconInfo *, int /* length of | 95 | const struct cifsTconInfo *, int /* length of |
@@ -365,8 +366,6 @@ extern int cifs_verify_signature(struct smb_hdr *, | |||
365 | __u32 expected_sequence_number); | 366 | __u32 expected_sequence_number); |
366 | extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn, | 367 | extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn, |
367 | const char *pass); | 368 | const char *pass); |
368 | extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *, | ||
369 | const struct nls_table *); | ||
370 | extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *); | 369 | extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *); |
371 | extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, | 370 | extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, |
372 | const struct nls_table *); | 371 | const struct nls_table *); |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index c65c3419dd37..7e83b356cc9e 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -232,7 +232,7 @@ static int | |||
232 | small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | 232 | small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, |
233 | void **request_buf) | 233 | void **request_buf) |
234 | { | 234 | { |
235 | int rc = 0; | 235 | int rc; |
236 | 236 | ||
237 | rc = cifs_reconnect_tcon(tcon, smb_command); | 237 | rc = cifs_reconnect_tcon(tcon, smb_command); |
238 | if (rc) | 238 | if (rc) |
@@ -250,7 +250,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | |||
250 | if (tcon != NULL) | 250 | if (tcon != NULL) |
251 | cifs_stats_inc(&tcon->num_smbs_sent); | 251 | cifs_stats_inc(&tcon->num_smbs_sent); |
252 | 252 | ||
253 | return rc; | 253 | return 0; |
254 | } | 254 | } |
255 | 255 | ||
256 | int | 256 | int |
@@ -281,16 +281,9 @@ small_smb_init_no_tc(const int smb_command, const int wct, | |||
281 | 281 | ||
282 | /* If the return code is zero, this function must fill in request_buf pointer */ | 282 | /* If the return code is zero, this function must fill in request_buf pointer */ |
283 | static int | 283 | static int |
284 | smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | 284 | __smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, |
285 | void **request_buf /* returned */ , | 285 | void **request_buf, void **response_buf) |
286 | void **response_buf /* returned */ ) | ||
287 | { | 286 | { |
288 | int rc = 0; | ||
289 | |||
290 | rc = cifs_reconnect_tcon(tcon, smb_command); | ||
291 | if (rc) | ||
292 | return rc; | ||
293 | |||
294 | *request_buf = cifs_buf_get(); | 287 | *request_buf = cifs_buf_get(); |
295 | if (*request_buf == NULL) { | 288 | if (*request_buf == NULL) { |
296 | /* BB should we add a retry in here if not a writepage? */ | 289 | /* BB should we add a retry in here if not a writepage? */ |
@@ -309,7 +302,31 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | |||
309 | if (tcon != NULL) | 302 | if (tcon != NULL) |
310 | cifs_stats_inc(&tcon->num_smbs_sent); | 303 | cifs_stats_inc(&tcon->num_smbs_sent); |
311 | 304 | ||
312 | return rc; | 305 | return 0; |
306 | } | ||
307 | |||
308 | /* If the return code is zero, this function must fill in request_buf pointer */ | ||
309 | static int | ||
310 | smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | ||
311 | void **request_buf, void **response_buf) | ||
312 | { | ||
313 | int rc; | ||
314 | |||
315 | rc = cifs_reconnect_tcon(tcon, smb_command); | ||
316 | if (rc) | ||
317 | return rc; | ||
318 | |||
319 | return __smb_init(smb_command, wct, tcon, request_buf, response_buf); | ||
320 | } | ||
321 | |||
322 | static int | ||
323 | smb_init_no_reconnect(int smb_command, int wct, struct cifsTconInfo *tcon, | ||
324 | void **request_buf, void **response_buf) | ||
325 | { | ||
326 | if (tcon->ses->need_reconnect || tcon->need_reconnect) | ||
327 | return -EHOSTDOWN; | ||
328 | |||
329 | return __smb_init(smb_command, wct, tcon, request_buf, response_buf); | ||
313 | } | 330 | } |
314 | 331 | ||
315 | static int validate_t2(struct smb_t2_rsp *pSMB) | 332 | static int validate_t2(struct smb_t2_rsp *pSMB) |
@@ -4534,8 +4551,8 @@ CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon) | |||
4534 | 4551 | ||
4535 | cFYI(1, "In QFSUnixInfo"); | 4552 | cFYI(1, "In QFSUnixInfo"); |
4536 | QFSUnixRetry: | 4553 | QFSUnixRetry: |
4537 | rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, | 4554 | rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon, |
4538 | (void **) &pSMBr); | 4555 | (void **) &pSMB, (void **) &pSMBr); |
4539 | if (rc) | 4556 | if (rc) |
4540 | return rc; | 4557 | return rc; |
4541 | 4558 | ||
@@ -4604,8 +4621,8 @@ CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap) | |||
4604 | cFYI(1, "In SETFSUnixInfo"); | 4621 | cFYI(1, "In SETFSUnixInfo"); |
4605 | SETFSUnixRetry: | 4622 | SETFSUnixRetry: |
4606 | /* BB switch to small buf init to save memory */ | 4623 | /* BB switch to small buf init to save memory */ |
4607 | rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, | 4624 | rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon, |
4608 | (void **) &pSMBr); | 4625 | (void **) &pSMB, (void **) &pSMBr); |
4609 | if (rc) | 4626 | if (rc) |
4610 | return rc; | 4627 | return rc; |
4611 | 4628 | ||
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 95c2ea67edfb..88c84a38bccb 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -400,7 +400,9 @@ incomplete_rcv: | |||
400 | cFYI(1, "call to reconnect done"); | 400 | cFYI(1, "call to reconnect done"); |
401 | csocket = server->ssocket; | 401 | csocket = server->ssocket; |
402 | continue; | 402 | continue; |
403 | } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) { | 403 | } else if (length == -ERESTARTSYS || |
404 | length == -EAGAIN || | ||
405 | length == -EINTR) { | ||
404 | msleep(1); /* minimum sleep to prevent looping | 406 | msleep(1); /* minimum sleep to prevent looping |
405 | allowing socket to clear and app threads to set | 407 | allowing socket to clear and app threads to set |
406 | tcpStatus CifsNeedReconnect if server hung */ | 408 | tcpStatus CifsNeedReconnect if server hung */ |
@@ -414,18 +416,6 @@ incomplete_rcv: | |||
414 | } else | 416 | } else |
415 | continue; | 417 | continue; |
416 | } else if (length <= 0) { | 418 | } else if (length <= 0) { |
417 | if (server->tcpStatus == CifsNew) { | ||
418 | cFYI(1, "tcp session abend after SMBnegprot"); | ||
419 | /* some servers kill the TCP session rather than | ||
420 | returning an SMB negprot error, in which | ||
421 | case reconnecting here is not going to help, | ||
422 | and so simply return error to mount */ | ||
423 | break; | ||
424 | } | ||
425 | if (!try_to_freeze() && (length == -EINTR)) { | ||
426 | cFYI(1, "cifsd thread killed"); | ||
427 | break; | ||
428 | } | ||
429 | cFYI(1, "Reconnect after unexpected peek error %d", | 419 | cFYI(1, "Reconnect after unexpected peek error %d", |
430 | length); | 420 | length); |
431 | cifs_reconnect(server); | 421 | cifs_reconnect(server); |
@@ -466,27 +456,19 @@ incomplete_rcv: | |||
466 | an error on SMB negprot response */ | 456 | an error on SMB negprot response */ |
467 | cFYI(1, "Negative RFC1002 Session Response Error 0x%x)", | 457 | cFYI(1, "Negative RFC1002 Session Response Error 0x%x)", |
468 | pdu_length); | 458 | pdu_length); |
469 | if (server->tcpStatus == CifsNew) { | 459 | /* give server a second to clean up */ |
470 | /* if nack on negprot (rather than | 460 | msleep(1000); |
471 | ret of smb negprot error) reconnecting | 461 | /* always try 445 first on reconnect since we get NACK |
472 | not going to help, ret error to mount */ | 462 | * on some if we ever connected to port 139 (the NACK |
473 | break; | 463 | * is since we do not begin with RFC1001 session |
474 | } else { | 464 | * initialize frame) |
475 | /* give server a second to | 465 | */ |
476 | clean up before reconnect attempt */ | 466 | cifs_set_port((struct sockaddr *) |
477 | msleep(1000); | 467 | &server->addr.sockAddr, CIFS_PORT); |
478 | /* always try 445 first on reconnect | 468 | cifs_reconnect(server); |
479 | since we get NACK on some if we ever | 469 | csocket = server->ssocket; |
480 | connected to port 139 (the NACK is | 470 | wake_up(&server->response_q); |
481 | since we do not begin with RFC1001 | 471 | continue; |
482 | session initialize frame) */ | ||
483 | server->addr.sockAddr.sin_port = | ||
484 | htons(CIFS_PORT); | ||
485 | cifs_reconnect(server); | ||
486 | csocket = server->ssocket; | ||
487 | wake_up(&server->response_q); | ||
488 | continue; | ||
489 | } | ||
490 | } else if (temp != (char) 0) { | 472 | } else if (temp != (char) 0) { |
491 | cERROR(1, "Unknown RFC 1002 frame"); | 473 | cERROR(1, "Unknown RFC 1002 frame"); |
492 | cifs_dump_mem(" Received Data: ", (char *)smb_buffer, | 474 | cifs_dump_mem(" Received Data: ", (char *)smb_buffer, |
@@ -522,8 +504,7 @@ incomplete_rcv: | |||
522 | total_read += length) { | 504 | total_read += length) { |
523 | length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, | 505 | length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, |
524 | pdu_length - total_read, 0); | 506 | pdu_length - total_read, 0); |
525 | if ((server->tcpStatus == CifsExiting) || | 507 | if (server->tcpStatus == CifsExiting) { |
526 | (length == -EINTR)) { | ||
527 | /* then will exit */ | 508 | /* then will exit */ |
528 | reconnect = 2; | 509 | reconnect = 2; |
529 | break; | 510 | break; |
@@ -534,8 +515,9 @@ incomplete_rcv: | |||
534 | /* Now we will reread sock */ | 515 | /* Now we will reread sock */ |
535 | reconnect = 1; | 516 | reconnect = 1; |
536 | break; | 517 | break; |
537 | } else if ((length == -ERESTARTSYS) || | 518 | } else if (length == -ERESTARTSYS || |
538 | (length == -EAGAIN)) { | 519 | length == -EAGAIN || |
520 | length == -EINTR) { | ||
539 | msleep(1); /* minimum sleep to prevent looping, | 521 | msleep(1); /* minimum sleep to prevent looping, |
540 | allowing socket to clear and app | 522 | allowing socket to clear and app |
541 | threads to set tcpStatus | 523 | threads to set tcpStatus |
@@ -1673,7 +1655,9 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) | |||
1673 | MAX_USERNAME_SIZE)) | 1655 | MAX_USERNAME_SIZE)) |
1674 | continue; | 1656 | continue; |
1675 | if (strlen(vol->username) != 0 && | 1657 | if (strlen(vol->username) != 0 && |
1676 | strncmp(ses->password, vol->password, | 1658 | ses->password != NULL && |
1659 | strncmp(ses->password, | ||
1660 | vol->password ? vol->password : "", | ||
1677 | MAX_PASSWORD_SIZE)) | 1661 | MAX_PASSWORD_SIZE)) |
1678 | continue; | 1662 | continue; |
1679 | } | 1663 | } |
@@ -1722,9 +1706,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) | |||
1722 | if (ses) { | 1706 | if (ses) { |
1723 | cFYI(1, "Existing smb sess found (status=%d)", ses->status); | 1707 | cFYI(1, "Existing smb sess found (status=%d)", ses->status); |
1724 | 1708 | ||
1725 | /* existing SMB ses has a server reference already */ | ||
1726 | cifs_put_tcp_session(server); | ||
1727 | |||
1728 | mutex_lock(&ses->session_mutex); | 1709 | mutex_lock(&ses->session_mutex); |
1729 | rc = cifs_negotiate_protocol(xid, ses); | 1710 | rc = cifs_negotiate_protocol(xid, ses); |
1730 | if (rc) { | 1711 | if (rc) { |
@@ -1747,6 +1728,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) | |||
1747 | } | 1728 | } |
1748 | } | 1729 | } |
1749 | mutex_unlock(&ses->session_mutex); | 1730 | mutex_unlock(&ses->session_mutex); |
1731 | |||
1732 | /* existing SMB ses has a server reference already */ | ||
1733 | cifs_put_tcp_session(server); | ||
1750 | FreeXid(xid); | 1734 | FreeXid(xid); |
1751 | return ses; | 1735 | return ses; |
1752 | } | 1736 | } |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 578d88c5b46e..f9ed0751cc12 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -305,8 +305,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
305 | full_path = build_path_from_dentry(direntry); | 305 | full_path = build_path_from_dentry(direntry); |
306 | if (full_path == NULL) { | 306 | if (full_path == NULL) { |
307 | rc = -ENOMEM; | 307 | rc = -ENOMEM; |
308 | FreeXid(xid); | 308 | goto cifs_create_out; |
309 | return rc; | ||
310 | } | 309 | } |
311 | 310 | ||
312 | if (oplockEnabled) | 311 | if (oplockEnabled) |
@@ -365,9 +364,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
365 | 364 | ||
366 | buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); | 365 | buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); |
367 | if (buf == NULL) { | 366 | if (buf == NULL) { |
368 | kfree(full_path); | 367 | rc = -ENOMEM; |
369 | FreeXid(xid); | 368 | goto cifs_create_out; |
370 | return -ENOMEM; | ||
371 | } | 369 | } |
372 | 370 | ||
373 | /* | 371 | /* |
@@ -496,6 +494,11 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, | |||
496 | struct cifsTconInfo *pTcon; | 494 | struct cifsTconInfo *pTcon; |
497 | char *full_path = NULL; | 495 | char *full_path = NULL; |
498 | struct inode *newinode = NULL; | 496 | struct inode *newinode = NULL; |
497 | int oplock = 0; | ||
498 | u16 fileHandle; | ||
499 | FILE_ALL_INFO *buf = NULL; | ||
500 | unsigned int bytes_written; | ||
501 | struct win_dev *pdev; | ||
499 | 502 | ||
500 | if (!old_valid_dev(device_number)) | 503 | if (!old_valid_dev(device_number)) |
501 | return -EINVAL; | 504 | return -EINVAL; |
@@ -506,9 +509,12 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, | |||
506 | pTcon = cifs_sb->tcon; | 509 | pTcon = cifs_sb->tcon; |
507 | 510 | ||
508 | full_path = build_path_from_dentry(direntry); | 511 | full_path = build_path_from_dentry(direntry); |
509 | if (full_path == NULL) | 512 | if (full_path == NULL) { |
510 | rc = -ENOMEM; | 513 | rc = -ENOMEM; |
511 | else if (pTcon->unix_ext) { | 514 | goto mknod_out; |
515 | } | ||
516 | |||
517 | if (pTcon->unix_ext) { | ||
512 | struct cifs_unix_set_info_args args = { | 518 | struct cifs_unix_set_info_args args = { |
513 | .mode = mode & ~current_umask(), | 519 | .mode = mode & ~current_umask(), |
514 | .ctime = NO_CHANGE_64, | 520 | .ctime = NO_CHANGE_64, |
@@ -527,87 +533,78 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, | |||
527 | cifs_sb->local_nls, | 533 | cifs_sb->local_nls, |
528 | cifs_sb->mnt_cifs_flags & | 534 | cifs_sb->mnt_cifs_flags & |
529 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 535 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
536 | if (rc) | ||
537 | goto mknod_out; | ||
530 | 538 | ||
531 | if (!rc) { | 539 | rc = cifs_get_inode_info_unix(&newinode, full_path, |
532 | rc = cifs_get_inode_info_unix(&newinode, full_path, | ||
533 | inode->i_sb, xid); | 540 | inode->i_sb, xid); |
534 | if (pTcon->nocase) | 541 | if (pTcon->nocase) |
535 | direntry->d_op = &cifs_ci_dentry_ops; | 542 | direntry->d_op = &cifs_ci_dentry_ops; |
536 | else | 543 | else |
537 | direntry->d_op = &cifs_dentry_ops; | 544 | direntry->d_op = &cifs_dentry_ops; |
538 | if (rc == 0) | ||
539 | d_instantiate(direntry, newinode); | ||
540 | } | ||
541 | } else { | ||
542 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { | ||
543 | int oplock = 0; | ||
544 | u16 fileHandle; | ||
545 | FILE_ALL_INFO *buf; | ||
546 | 545 | ||
547 | cFYI(1, "sfu compat create special file"); | 546 | if (rc == 0) |
547 | d_instantiate(direntry, newinode); | ||
548 | goto mknod_out; | ||
549 | } | ||
548 | 550 | ||
549 | buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); | 551 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) |
550 | if (buf == NULL) { | 552 | goto mknod_out; |
551 | kfree(full_path); | ||
552 | rc = -ENOMEM; | ||
553 | FreeXid(xid); | ||
554 | return rc; | ||
555 | } | ||
556 | 553 | ||
557 | rc = CIFSSMBOpen(xid, pTcon, full_path, | 554 | |
558 | FILE_CREATE, /* fail if exists */ | 555 | cFYI(1, "sfu compat create special file"); |
559 | GENERIC_WRITE /* BB would | 556 | |
560 | WRITE_OWNER | WRITE_DAC be better? */, | 557 | buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); |
561 | /* Create a file and set the | 558 | if (buf == NULL) { |
562 | file attribute to SYSTEM */ | 559 | kfree(full_path); |
563 | CREATE_NOT_DIR | CREATE_OPTION_SPECIAL, | 560 | rc = -ENOMEM; |
564 | &fileHandle, &oplock, buf, | 561 | FreeXid(xid); |
565 | cifs_sb->local_nls, | 562 | return rc; |
566 | cifs_sb->mnt_cifs_flags & | ||
567 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
568 | |||
569 | /* BB FIXME - add handling for backlevel servers | ||
570 | which need legacy open and check for all | ||
571 | calls to SMBOpen for fallback to SMBLeagcyOpen */ | ||
572 | if (!rc) { | ||
573 | /* BB Do not bother to decode buf since no | ||
574 | local inode yet to put timestamps in, | ||
575 | but we can reuse it safely */ | ||
576 | unsigned int bytes_written; | ||
577 | struct win_dev *pdev; | ||
578 | pdev = (struct win_dev *)buf; | ||
579 | if (S_ISCHR(mode)) { | ||
580 | memcpy(pdev->type, "IntxCHR", 8); | ||
581 | pdev->major = | ||
582 | cpu_to_le64(MAJOR(device_number)); | ||
583 | pdev->minor = | ||
584 | cpu_to_le64(MINOR(device_number)); | ||
585 | rc = CIFSSMBWrite(xid, pTcon, | ||
586 | fileHandle, | ||
587 | sizeof(struct win_dev), | ||
588 | 0, &bytes_written, (char *)pdev, | ||
589 | NULL, 0); | ||
590 | } else if (S_ISBLK(mode)) { | ||
591 | memcpy(pdev->type, "IntxBLK", 8); | ||
592 | pdev->major = | ||
593 | cpu_to_le64(MAJOR(device_number)); | ||
594 | pdev->minor = | ||
595 | cpu_to_le64(MINOR(device_number)); | ||
596 | rc = CIFSSMBWrite(xid, pTcon, | ||
597 | fileHandle, | ||
598 | sizeof(struct win_dev), | ||
599 | 0, &bytes_written, (char *)pdev, | ||
600 | NULL, 0); | ||
601 | } /* else if(S_ISFIFO */ | ||
602 | CIFSSMBClose(xid, pTcon, fileHandle); | ||
603 | d_drop(direntry); | ||
604 | } | ||
605 | kfree(buf); | ||
606 | /* add code here to set EAs */ | ||
607 | } | ||
608 | } | 563 | } |
609 | 564 | ||
565 | /* FIXME: would WRITE_OWNER | WRITE_DAC be better? */ | ||
566 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_CREATE, | ||
567 | GENERIC_WRITE, CREATE_NOT_DIR | CREATE_OPTION_SPECIAL, | ||
568 | &fileHandle, &oplock, buf, cifs_sb->local_nls, | ||
569 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
570 | if (rc) | ||
571 | goto mknod_out; | ||
572 | |||
573 | /* BB Do not bother to decode buf since no local inode yet to put | ||
574 | * timestamps in, but we can reuse it safely */ | ||
575 | |||
576 | pdev = (struct win_dev *)buf; | ||
577 | if (S_ISCHR(mode)) { | ||
578 | memcpy(pdev->type, "IntxCHR", 8); | ||
579 | pdev->major = | ||
580 | cpu_to_le64(MAJOR(device_number)); | ||
581 | pdev->minor = | ||
582 | cpu_to_le64(MINOR(device_number)); | ||
583 | rc = CIFSSMBWrite(xid, pTcon, | ||
584 | fileHandle, | ||
585 | sizeof(struct win_dev), | ||
586 | 0, &bytes_written, (char *)pdev, | ||
587 | NULL, 0); | ||
588 | } else if (S_ISBLK(mode)) { | ||
589 | memcpy(pdev->type, "IntxBLK", 8); | ||
590 | pdev->major = | ||
591 | cpu_to_le64(MAJOR(device_number)); | ||
592 | pdev->minor = | ||
593 | cpu_to_le64(MINOR(device_number)); | ||
594 | rc = CIFSSMBWrite(xid, pTcon, | ||
595 | fileHandle, | ||
596 | sizeof(struct win_dev), | ||
597 | 0, &bytes_written, (char *)pdev, | ||
598 | NULL, 0); | ||
599 | } /* else if (S_ISFIFO) */ | ||
600 | CIFSSMBClose(xid, pTcon, fileHandle); | ||
601 | d_drop(direntry); | ||
602 | |||
603 | /* FIXME: add code here to set EAs */ | ||
604 | |||
605 | mknod_out: | ||
610 | kfree(full_path); | 606 | kfree(full_path); |
607 | kfree(buf); | ||
611 | FreeXid(xid); | 608 | FreeXid(xid); |
612 | return rc; | 609 | return rc; |
613 | } | 610 | } |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index db11fdef0e92..de748c652d11 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -242,8 +242,7 @@ int cifs_open(struct inode *inode, struct file *file) | |||
242 | full_path = build_path_from_dentry(file->f_path.dentry); | 242 | full_path = build_path_from_dentry(file->f_path.dentry); |
243 | if (full_path == NULL) { | 243 | if (full_path == NULL) { |
244 | rc = -ENOMEM; | 244 | rc = -ENOMEM; |
245 | FreeXid(xid); | 245 | goto out; |
246 | return rc; | ||
247 | } | 246 | } |
248 | 247 | ||
249 | cFYI(1, "inode = 0x%p file flags are 0x%x for %s", | 248 | cFYI(1, "inode = 0x%p file flags are 0x%x for %s", |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 4bc47e5b5f29..53cce8cc2224 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -801,6 +801,8 @@ retry_iget5_locked: | |||
801 | inode->i_flags |= S_NOATIME | S_NOCMTIME; | 801 | inode->i_flags |= S_NOATIME | S_NOCMTIME; |
802 | if (inode->i_state & I_NEW) { | 802 | if (inode->i_state & I_NEW) { |
803 | inode->i_ino = hash; | 803 | inode->i_ino = hash; |
804 | if (S_ISREG(inode->i_mode)) | ||
805 | inode->i_data.backing_dev_info = sb->s_bdi; | ||
804 | #ifdef CONFIG_CIFS_FSCACHE | 806 | #ifdef CONFIG_CIFS_FSCACHE |
805 | /* initialize per-inode cache cookie pointer */ | 807 | /* initialize per-inode cache cookie pointer */ |
806 | CIFS_I(inode)->fscache = NULL; | 808 | CIFS_I(inode)->fscache = NULL; |
@@ -834,7 +836,7 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) | |||
834 | xid, NULL); | 836 | xid, NULL); |
835 | 837 | ||
836 | if (!inode) | 838 | if (!inode) |
837 | return ERR_PTR(-ENOMEM); | 839 | return ERR_PTR(rc); |
838 | 840 | ||
839 | #ifdef CONFIG_CIFS_FSCACHE | 841 | #ifdef CONFIG_CIFS_FSCACHE |
840 | /* populate tcon->resource_id */ | 842 | /* populate tcon->resource_id */ |
@@ -1462,29 +1464,18 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, | |||
1462 | { | 1464 | { |
1463 | char *fromName = NULL; | 1465 | char *fromName = NULL; |
1464 | char *toName = NULL; | 1466 | char *toName = NULL; |
1465 | struct cifs_sb_info *cifs_sb_source; | 1467 | struct cifs_sb_info *cifs_sb; |
1466 | struct cifs_sb_info *cifs_sb_target; | ||
1467 | struct cifsTconInfo *tcon; | 1468 | struct cifsTconInfo *tcon; |
1468 | FILE_UNIX_BASIC_INFO *info_buf_source = NULL; | 1469 | FILE_UNIX_BASIC_INFO *info_buf_source = NULL; |
1469 | FILE_UNIX_BASIC_INFO *info_buf_target; | 1470 | FILE_UNIX_BASIC_INFO *info_buf_target; |
1470 | int xid, rc, tmprc; | 1471 | int xid, rc, tmprc; |
1471 | 1472 | ||
1472 | cifs_sb_target = CIFS_SB(target_dir->i_sb); | 1473 | cifs_sb = CIFS_SB(source_dir->i_sb); |
1473 | cifs_sb_source = CIFS_SB(source_dir->i_sb); | 1474 | tcon = cifs_sb->tcon; |
1474 | tcon = cifs_sb_source->tcon; | ||
1475 | 1475 | ||
1476 | xid = GetXid(); | 1476 | xid = GetXid(); |
1477 | 1477 | ||
1478 | /* | 1478 | /* |
1479 | * BB: this might be allowed if same server, but different share. | ||
1480 | * Consider adding support for this | ||
1481 | */ | ||
1482 | if (tcon != cifs_sb_target->tcon) { | ||
1483 | rc = -EXDEV; | ||
1484 | goto cifs_rename_exit; | ||
1485 | } | ||
1486 | |||
1487 | /* | ||
1488 | * we already have the rename sem so we do not need to | 1479 | * we already have the rename sem so we do not need to |
1489 | * grab it again here to protect the path integrity | 1480 | * grab it again here to protect the path integrity |
1490 | */ | 1481 | */ |
@@ -1519,17 +1510,16 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, | |||
1519 | info_buf_target = info_buf_source + 1; | 1510 | info_buf_target = info_buf_source + 1; |
1520 | tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName, | 1511 | tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName, |
1521 | info_buf_source, | 1512 | info_buf_source, |
1522 | cifs_sb_source->local_nls, | 1513 | cifs_sb->local_nls, |
1523 | cifs_sb_source->mnt_cifs_flags & | 1514 | cifs_sb->mnt_cifs_flags & |
1524 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1515 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1525 | if (tmprc != 0) | 1516 | if (tmprc != 0) |
1526 | goto unlink_target; | 1517 | goto unlink_target; |
1527 | 1518 | ||
1528 | tmprc = CIFSSMBUnixQPathInfo(xid, tcon, | 1519 | tmprc = CIFSSMBUnixQPathInfo(xid, tcon, toName, |
1529 | toName, info_buf_target, | 1520 | info_buf_target, |
1530 | cifs_sb_target->local_nls, | 1521 | cifs_sb->local_nls, |
1531 | /* remap based on source sb */ | 1522 | cifs_sb->mnt_cifs_flags & |
1532 | cifs_sb_source->mnt_cifs_flags & | ||
1533 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1523 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1534 | 1524 | ||
1535 | if (tmprc == 0 && (info_buf_source->UniqueId == | 1525 | if (tmprc == 0 && (info_buf_source->UniqueId == |
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index f97851119e6c..9aad47a2d62f 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c | |||
@@ -206,26 +206,30 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len) | |||
206 | } | 206 | } |
207 | 207 | ||
208 | int | 208 | int |
209 | cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, | 209 | cifs_set_port(struct sockaddr *addr, const unsigned short int port) |
210 | const unsigned short int port) | ||
211 | { | 210 | { |
212 | if (!cifs_convert_address(dst, src, len)) | 211 | switch (addr->sa_family) { |
213 | return 0; | ||
214 | |||
215 | switch (dst->sa_family) { | ||
216 | case AF_INET: | 212 | case AF_INET: |
217 | ((struct sockaddr_in *)dst)->sin_port = htons(port); | 213 | ((struct sockaddr_in *)addr)->sin_port = htons(port); |
218 | break; | 214 | break; |
219 | case AF_INET6: | 215 | case AF_INET6: |
220 | ((struct sockaddr_in6 *)dst)->sin6_port = htons(port); | 216 | ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); |
221 | break; | 217 | break; |
222 | default: | 218 | default: |
223 | return 0; | 219 | return 0; |
224 | } | 220 | } |
225 | |||
226 | return 1; | 221 | return 1; |
227 | } | 222 | } |
228 | 223 | ||
224 | int | ||
225 | cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, | ||
226 | const unsigned short int port) | ||
227 | { | ||
228 | if (!cifs_convert_address(dst, src, len)) | ||
229 | return 0; | ||
230 | return cifs_set_port(dst, port); | ||
231 | } | ||
232 | |||
229 | /***************************************************************************** | 233 | /***************************************************************************** |
230 | convert a NT status code to a dos class/code | 234 | convert a NT status code to a dos class/code |
231 | *****************************************************************************/ | 235 | *****************************************************************************/ |
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index de89645777c7..116af7546cf0 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c | |||
@@ -184,8 +184,8 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, | |||
184 | } | 184 | } |
185 | 185 | ||
186 | /* adjust outsize. is this useful ?? */ | 186 | /* adjust outsize. is this useful ?? */ |
187 | req->uc_outSize = nbytes; | 187 | req->uc_outSize = nbytes; |
188 | req->uc_flags |= REQ_WRITE; | 188 | req->uc_flags |= CODA_REQ_WRITE; |
189 | count = nbytes; | 189 | count = nbytes; |
190 | 190 | ||
191 | /* Convert filedescriptor into a file handle */ | 191 | /* Convert filedescriptor into a file handle */ |
diff --git a/fs/compat.c b/fs/compat.c index 718c7062aec1..0644a154672b 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -1153,7 +1153,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, | |||
1153 | { | 1153 | { |
1154 | compat_ssize_t tot_len; | 1154 | compat_ssize_t tot_len; |
1155 | struct iovec iovstack[UIO_FASTIOV]; | 1155 | struct iovec iovstack[UIO_FASTIOV]; |
1156 | struct iovec *iov; | 1156 | struct iovec *iov = iovstack; |
1157 | ssize_t ret; | 1157 | ssize_t ret; |
1158 | io_fn_t fn; | 1158 | io_fn_t fn; |
1159 | iov_fn_t fnv; | 1159 | iov_fn_t fnv; |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 51f270b479b6..48d74c7391d1 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -634,7 +634,7 @@ static int dio_send_cur_page(struct dio *dio) | |||
634 | int ret = 0; | 634 | int ret = 0; |
635 | 635 | ||
636 | if (dio->bio) { | 636 | if (dio->bio) { |
637 | loff_t cur_offset = dio->block_in_file << dio->blkbits; | 637 | loff_t cur_offset = dio->cur_page_fs_offset; |
638 | loff_t bio_next_offset = dio->logical_offset_in_bio + | 638 | loff_t bio_next_offset = dio->logical_offset_in_bio + |
639 | dio->bio->bi_size; | 639 | dio->bio->bi_size; |
640 | 640 | ||
@@ -659,7 +659,7 @@ static int dio_send_cur_page(struct dio *dio) | |||
659 | * Submit now if the underlying fs is about to perform a | 659 | * Submit now if the underlying fs is about to perform a |
660 | * metadata read | 660 | * metadata read |
661 | */ | 661 | */ |
662 | if (dio->boundary) | 662 | else if (dio->boundary) |
663 | dio_bio_submit(dio); | 663 | dio_bio_submit(dio); |
664 | } | 664 | } |
665 | 665 | ||
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index a2e3b562e65d..cbadc1bee6e7 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c | |||
@@ -1793,7 +1793,7 @@ struct kmem_cache *ecryptfs_key_tfm_cache; | |||
1793 | static struct list_head key_tfm_list; | 1793 | static struct list_head key_tfm_list; |
1794 | struct mutex key_tfm_list_mutex; | 1794 | struct mutex key_tfm_list_mutex; |
1795 | 1795 | ||
1796 | int ecryptfs_init_crypto(void) | 1796 | int __init ecryptfs_init_crypto(void) |
1797 | { | 1797 | { |
1798 | mutex_init(&key_tfm_list_mutex); | 1798 | mutex_init(&key_tfm_list_mutex); |
1799 | INIT_LIST_HEAD(&key_tfm_list); | 1799 | INIT_LIST_HEAD(&key_tfm_list); |
@@ -2169,7 +2169,6 @@ int ecryptfs_encrypt_and_encode_filename( | |||
2169 | (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE | 2169 | (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE |
2170 | + encoded_name_no_prefix_size); | 2170 | + encoded_name_no_prefix_size); |
2171 | (*encoded_name)[(*encoded_name_size)] = '\0'; | 2171 | (*encoded_name)[(*encoded_name_size)] = '\0'; |
2172 | (*encoded_name_size)++; | ||
2173 | } else { | 2172 | } else { |
2174 | rc = -EOPNOTSUPP; | 2173 | rc = -EOPNOTSUPP; |
2175 | } | 2174 | } |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 6c55113e7222..3fbc94203380 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -349,7 +349,7 @@ out: | |||
349 | 349 | ||
350 | /** | 350 | /** |
351 | * ecryptfs_new_lower_dentry | 351 | * ecryptfs_new_lower_dentry |
352 | * @ename: The name of the new dentry. | 352 | * @name: The name of the new dentry. |
353 | * @lower_dir_dentry: Parent directory of the new dentry. | 353 | * @lower_dir_dentry: Parent directory of the new dentry. |
354 | * @nd: nameidata from last lookup. | 354 | * @nd: nameidata from last lookup. |
355 | * | 355 | * |
@@ -386,20 +386,19 @@ ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry, | |||
386 | * ecryptfs_lookup_one_lower | 386 | * ecryptfs_lookup_one_lower |
387 | * @ecryptfs_dentry: The eCryptfs dentry that we are looking up | 387 | * @ecryptfs_dentry: The eCryptfs dentry that we are looking up |
388 | * @lower_dir_dentry: lower parent directory | 388 | * @lower_dir_dentry: lower parent directory |
389 | * @name: lower file name | ||
389 | * | 390 | * |
390 | * Get the lower dentry from vfs. If lower dentry does not exist yet, | 391 | * Get the lower dentry from vfs. If lower dentry does not exist yet, |
391 | * create it. | 392 | * create it. |
392 | */ | 393 | */ |
393 | static struct dentry * | 394 | static struct dentry * |
394 | ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry, | 395 | ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry, |
395 | struct dentry *lower_dir_dentry) | 396 | struct dentry *lower_dir_dentry, struct qstr *name) |
396 | { | 397 | { |
397 | struct nameidata nd; | 398 | struct nameidata nd; |
398 | struct vfsmount *lower_mnt; | 399 | struct vfsmount *lower_mnt; |
399 | struct qstr *name; | ||
400 | int err; | 400 | int err; |
401 | 401 | ||
402 | name = &ecryptfs_dentry->d_name; | ||
403 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( | 402 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( |
404 | ecryptfs_dentry->d_parent)); | 403 | ecryptfs_dentry->d_parent)); |
405 | err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd); | 404 | err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd); |
@@ -434,6 +433,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
434 | size_t encrypted_and_encoded_name_size; | 433 | size_t encrypted_and_encoded_name_size; |
435 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; | 434 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; |
436 | struct dentry *lower_dir_dentry, *lower_dentry; | 435 | struct dentry *lower_dir_dentry, *lower_dentry; |
436 | struct qstr lower_name; | ||
437 | int rc = 0; | 437 | int rc = 0; |
438 | 438 | ||
439 | ecryptfs_dentry->d_op = &ecryptfs_dops; | 439 | ecryptfs_dentry->d_op = &ecryptfs_dops; |
@@ -444,9 +444,17 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
444 | goto out_d_drop; | 444 | goto out_d_drop; |
445 | } | 445 | } |
446 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); | 446 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); |
447 | 447 | lower_name.name = ecryptfs_dentry->d_name.name; | |
448 | lower_name.len = ecryptfs_dentry->d_name.len; | ||
449 | lower_name.hash = ecryptfs_dentry->d_name.hash; | ||
450 | if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { | ||
451 | rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, | ||
452 | &lower_name); | ||
453 | if (rc < 0) | ||
454 | goto out_d_drop; | ||
455 | } | ||
448 | lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, | 456 | lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, |
449 | lower_dir_dentry); | 457 | lower_dir_dentry, &lower_name); |
450 | if (IS_ERR(lower_dentry)) { | 458 | if (IS_ERR(lower_dentry)) { |
451 | rc = PTR_ERR(lower_dentry); | 459 | rc = PTR_ERR(lower_dentry); |
452 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " | 460 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " |
@@ -471,8 +479,17 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
471 | "filename; rc = [%d]\n", __func__, rc); | 479 | "filename; rc = [%d]\n", __func__, rc); |
472 | goto out_d_drop; | 480 | goto out_d_drop; |
473 | } | 481 | } |
482 | lower_name.name = encrypted_and_encoded_name; | ||
483 | lower_name.len = encrypted_and_encoded_name_size; | ||
484 | lower_name.hash = full_name_hash(lower_name.name, lower_name.len); | ||
485 | if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { | ||
486 | rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, | ||
487 | &lower_name); | ||
488 | if (rc < 0) | ||
489 | goto out_d_drop; | ||
490 | } | ||
474 | lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, | 491 | lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, |
475 | lower_dir_dentry); | 492 | lower_dir_dentry, &lower_name); |
476 | if (IS_ERR(lower_dentry)) { | 493 | if (IS_ERR(lower_dentry)) { |
477 | rc = PTR_ERR(lower_dentry); | 494 | rc = PTR_ERR(lower_dentry); |
478 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " | 495 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " |
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 89c5476506ef..73811cfa2ea4 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c | |||
@@ -515,6 +515,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, | |||
515 | if (!s) { | 515 | if (!s) { |
516 | printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " | 516 | printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " |
517 | "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); | 517 | "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); |
518 | rc = -ENOMEM; | ||
518 | goto out; | 519 | goto out; |
519 | } | 520 | } |
520 | s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; | 521 | s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; |
@@ -806,6 +807,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, | |||
806 | if (!s) { | 807 | if (!s) { |
807 | printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " | 808 | printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " |
808 | "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); | 809 | "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); |
810 | rc = -ENOMEM; | ||
809 | goto out; | 811 | goto out; |
810 | } | 812 | } |
811 | s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; | 813 | s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; |
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index d8c3a373aafa..0851ab6980f5 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c | |||
@@ -86,7 +86,7 @@ out: | |||
86 | return 0; | 86 | return 0; |
87 | } | 87 | } |
88 | 88 | ||
89 | int ecryptfs_init_kthread(void) | 89 | int __init ecryptfs_init_kthread(void) |
90 | { | 90 | { |
91 | int rc = 0; | 91 | int rc = 0; |
92 | 92 | ||
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index bcb68c0cb1f0..ab2248090515 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
@@ -473,7 +473,7 @@ sleep: | |||
473 | return rc; | 473 | return rc; |
474 | } | 474 | } |
475 | 475 | ||
476 | int ecryptfs_init_messaging(void) | 476 | int __init ecryptfs_init_messaging(void) |
477 | { | 477 | { |
478 | int i; | 478 | int i; |
479 | int rc = 0; | 479 | int rc = 0; |
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 3745f612bcd4..00208c3d7e92 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c | |||
@@ -500,7 +500,7 @@ static struct miscdevice ecryptfs_miscdev = { | |||
500 | * | 500 | * |
501 | * Returns zero on success; non-zero otherwise | 501 | * Returns zero on success; non-zero otherwise |
502 | */ | 502 | */ |
503 | int ecryptfs_init_ecryptfs_miscdev(void) | 503 | int __init ecryptfs_init_ecryptfs_miscdev(void) |
504 | { | 504 | { |
505 | int rc; | 505 | int rc; |
506 | 506 | ||
@@ -376,6 +376,9 @@ static int count(const char __user * const __user * argv, int max) | |||
376 | argv++; | 376 | argv++; |
377 | if (i++ >= max) | 377 | if (i++ >= max) |
378 | return -E2BIG; | 378 | return -E2BIG; |
379 | |||
380 | if (fatal_signal_pending(current)) | ||
381 | return -ERESTARTNOHAND; | ||
379 | cond_resched(); | 382 | cond_resched(); |
380 | } | 383 | } |
381 | } | 384 | } |
@@ -419,6 +422,12 @@ static int copy_strings(int argc, const char __user *const __user *argv, | |||
419 | while (len > 0) { | 422 | while (len > 0) { |
420 | int offset, bytes_to_copy; | 423 | int offset, bytes_to_copy; |
421 | 424 | ||
425 | if (fatal_signal_pending(current)) { | ||
426 | ret = -ERESTARTNOHAND; | ||
427 | goto out; | ||
428 | } | ||
429 | cond_resched(); | ||
430 | |||
422 | offset = pos % PAGE_SIZE; | 431 | offset = pos % PAGE_SIZE; |
423 | if (offset == 0) | 432 | if (offset == 0) |
424 | offset = PAGE_SIZE; | 433 | offset = PAGE_SIZE; |
@@ -594,6 +603,11 @@ int setup_arg_pages(struct linux_binprm *bprm, | |||
594 | #else | 603 | #else |
595 | stack_top = arch_align_stack(stack_top); | 604 | stack_top = arch_align_stack(stack_top); |
596 | stack_top = PAGE_ALIGN(stack_top); | 605 | stack_top = PAGE_ALIGN(stack_top); |
606 | |||
607 | if (unlikely(stack_top < mmap_min_addr) || | ||
608 | unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr)) | ||
609 | return -ENOMEM; | ||
610 | |||
597 | stack_shift = vma->vm_end - stack_top; | 611 | stack_shift = vma->vm_end - stack_top; |
598 | 612 | ||
599 | bprm->p -= stack_shift; | 613 | bprm->p -= stack_shift; |
@@ -2000,3 +2014,41 @@ fail_creds: | |||
2000 | fail: | 2014 | fail: |
2001 | return; | 2015 | return; |
2002 | } | 2016 | } |
2017 | |||
2018 | /* | ||
2019 | * Core dumping helper functions. These are the only things you should | ||
2020 | * do on a core-file: use only these functions to write out all the | ||
2021 | * necessary info. | ||
2022 | */ | ||
2023 | int dump_write(struct file *file, const void *addr, int nr) | ||
2024 | { | ||
2025 | return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; | ||
2026 | } | ||
2027 | |||
2028 | int dump_seek(struct file *file, loff_t off) | ||
2029 | { | ||
2030 | int ret = 1; | ||
2031 | |||
2032 | if (file->f_op->llseek && file->f_op->llseek != no_llseek) { | ||
2033 | if (file->f_op->llseek(file, off, SEEK_CUR) < 0) | ||
2034 | return 0; | ||
2035 | } else { | ||
2036 | char *buf = (char *)get_zeroed_page(GFP_KERNEL); | ||
2037 | |||
2038 | if (!buf) | ||
2039 | return 0; | ||
2040 | while (off > 0) { | ||
2041 | unsigned long n = off; | ||
2042 | |||
2043 | if (n > PAGE_SIZE) | ||
2044 | n = PAGE_SIZE; | ||
2045 | if (!dump_write(file, buf, n)) { | ||
2046 | ret = 0; | ||
2047 | break; | ||
2048 | } | ||
2049 | off -= n; | ||
2050 | } | ||
2051 | free_page((unsigned long)buf); | ||
2052 | } | ||
2053 | return ret; | ||
2054 | } | ||
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index eb7368ebd8cd..3eadd97324b1 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -54,6 +54,9 @@ struct page_collect { | |||
54 | unsigned nr_pages; | 54 | unsigned nr_pages; |
55 | unsigned long length; | 55 | unsigned long length; |
56 | loff_t pg_first; /* keep 64bit also in 32-arches */ | 56 | loff_t pg_first; /* keep 64bit also in 32-arches */ |
57 | bool read_4_write; /* This means two things: that the read is sync | ||
58 | * And the pages should not be unlocked. | ||
59 | */ | ||
57 | }; | 60 | }; |
58 | 61 | ||
59 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | 62 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, |
@@ -71,6 +74,7 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | |||
71 | pcol->nr_pages = 0; | 74 | pcol->nr_pages = 0; |
72 | pcol->length = 0; | 75 | pcol->length = 0; |
73 | pcol->pg_first = -1; | 76 | pcol->pg_first = -1; |
77 | pcol->read_4_write = false; | ||
74 | } | 78 | } |
75 | 79 | ||
76 | static void _pcol_reset(struct page_collect *pcol) | 80 | static void _pcol_reset(struct page_collect *pcol) |
@@ -347,7 +351,8 @@ static int readpage_strip(void *data, struct page *page) | |||
347 | if (PageError(page)) | 351 | if (PageError(page)) |
348 | ClearPageError(page); | 352 | ClearPageError(page); |
349 | 353 | ||
350 | unlock_page(page); | 354 | if (!pcol->read_4_write) |
355 | unlock_page(page); | ||
351 | EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," | 356 | EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," |
352 | " splitting\n", inode->i_ino, page->index); | 357 | " splitting\n", inode->i_ino, page->index); |
353 | 358 | ||
@@ -428,6 +433,7 @@ static int _readpage(struct page *page, bool is_sync) | |||
428 | /* readpage_strip might call read_exec(,is_sync==false) at several | 433 | /* readpage_strip might call read_exec(,is_sync==false) at several |
429 | * places but not if we have a single page. | 434 | * places but not if we have a single page. |
430 | */ | 435 | */ |
436 | pcol.read_4_write = is_sync; | ||
431 | ret = readpage_strip(&pcol, page); | 437 | ret = readpage_strip(&pcol, page); |
432 | if (ret) { | 438 | if (ret) { |
433 | EXOFS_ERR("_readpage => %d\n", ret); | 439 | EXOFS_ERR("_readpage => %d\n", ret); |
diff --git a/fs/fcntl.c b/fs/fcntl.c index 6769fd0f35b8..f8cc34f542c3 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -769,11 +769,15 @@ EXPORT_SYMBOL(kill_fasync); | |||
769 | 769 | ||
770 | static int __init fcntl_init(void) | 770 | static int __init fcntl_init(void) |
771 | { | 771 | { |
772 | /* please add new bits here to ensure allocation uniqueness */ | 772 | /* |
773 | BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( | 773 | * Please add new bits here to ensure allocation uniqueness. |
774 | * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY | ||
775 | * is defined as O_NONBLOCK on some platforms and not on others. | ||
776 | */ | ||
777 | BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( | ||
774 | O_RDONLY | O_WRONLY | O_RDWR | | 778 | O_RDONLY | O_WRONLY | O_RDWR | |
775 | O_CREAT | O_EXCL | O_NOCTTY | | 779 | O_CREAT | O_EXCL | O_NOCTTY | |
776 | O_TRUNC | O_APPEND | O_NONBLOCK | | 780 | O_TRUNC | O_APPEND | /* O_NONBLOCK | */ |
777 | __O_SYNC | O_DSYNC | FASYNC | | 781 | __O_SYNC | O_DSYNC | FASYNC | |
778 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | | 782 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | |
779 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | | 783 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 7d9d06ba184b..ab38fef1c9a1 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -52,8 +52,6 @@ struct wb_writeback_work { | |||
52 | #define CREATE_TRACE_POINTS | 52 | #define CREATE_TRACE_POINTS |
53 | #include <trace/events/writeback.h> | 53 | #include <trace/events/writeback.h> |
54 | 54 | ||
55 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
56 | |||
57 | /* | 55 | /* |
58 | * We don't actually have pdflush, but this one is exported though /proc... | 56 | * We don't actually have pdflush, but this one is exported though /proc... |
59 | */ | 57 | */ |
@@ -71,6 +69,16 @@ int writeback_in_progress(struct backing_dev_info *bdi) | |||
71 | return test_bit(BDI_writeback_running, &bdi->state); | 69 | return test_bit(BDI_writeback_running, &bdi->state); |
72 | } | 70 | } |
73 | 71 | ||
72 | static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) | ||
73 | { | ||
74 | struct super_block *sb = inode->i_sb; | ||
75 | |||
76 | if (strcmp(sb->s_type->name, "bdev") == 0) | ||
77 | return inode->i_mapping->backing_dev_info; | ||
78 | |||
79 | return sb->s_bdi; | ||
80 | } | ||
81 | |||
74 | static void bdi_queue_work(struct backing_dev_info *bdi, | 82 | static void bdi_queue_work(struct backing_dev_info *bdi, |
75 | struct wb_writeback_work *work) | 83 | struct wb_writeback_work *work) |
76 | { | 84 | { |
@@ -808,7 +816,7 @@ int bdi_writeback_thread(void *data) | |||
808 | wb->last_active = jiffies; | 816 | wb->last_active = jiffies; |
809 | 817 | ||
810 | set_current_state(TASK_INTERRUPTIBLE); | 818 | set_current_state(TASK_INTERRUPTIBLE); |
811 | if (!list_empty(&bdi->work_list)) { | 819 | if (!list_empty(&bdi->work_list) || kthread_should_stop()) { |
812 | __set_current_state(TASK_RUNNING); | 820 | __set_current_state(TASK_RUNNING); |
813 | continue; | 821 | continue; |
814 | } | 822 | } |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 69ad053ffd78..cde755cca564 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -276,7 +276,7 @@ static void flush_bg_queue(struct fuse_conn *fc) | |||
276 | * Called with fc->lock, unlocks it | 276 | * Called with fc->lock, unlocks it |
277 | */ | 277 | */ |
278 | static void request_end(struct fuse_conn *fc, struct fuse_req *req) | 278 | static void request_end(struct fuse_conn *fc, struct fuse_req *req) |
279 | __releases(&fc->lock) | 279 | __releases(fc->lock) |
280 | { | 280 | { |
281 | void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; | 281 | void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; |
282 | req->end = NULL; | 282 | req->end = NULL; |
@@ -306,8 +306,8 @@ __releases(&fc->lock) | |||
306 | 306 | ||
307 | static void wait_answer_interruptible(struct fuse_conn *fc, | 307 | static void wait_answer_interruptible(struct fuse_conn *fc, |
308 | struct fuse_req *req) | 308 | struct fuse_req *req) |
309 | __releases(&fc->lock) | 309 | __releases(fc->lock) |
310 | __acquires(&fc->lock) | 310 | __acquires(fc->lock) |
311 | { | 311 | { |
312 | if (signal_pending(current)) | 312 | if (signal_pending(current)) |
313 | return; | 313 | return; |
@@ -325,8 +325,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) | |||
325 | } | 325 | } |
326 | 326 | ||
327 | static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) | 327 | static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) |
328 | __releases(&fc->lock) | 328 | __releases(fc->lock) |
329 | __acquires(&fc->lock) | 329 | __acquires(fc->lock) |
330 | { | 330 | { |
331 | if (!fc->no_interrupt) { | 331 | if (!fc->no_interrupt) { |
332 | /* Any signal may interrupt this */ | 332 | /* Any signal may interrupt this */ |
@@ -905,8 +905,8 @@ static int request_pending(struct fuse_conn *fc) | |||
905 | 905 | ||
906 | /* Wait until a request is available on the pending list */ | 906 | /* Wait until a request is available on the pending list */ |
907 | static void request_wait(struct fuse_conn *fc) | 907 | static void request_wait(struct fuse_conn *fc) |
908 | __releases(&fc->lock) | 908 | __releases(fc->lock) |
909 | __acquires(&fc->lock) | 909 | __acquires(fc->lock) |
910 | { | 910 | { |
911 | DECLARE_WAITQUEUE(wait, current); | 911 | DECLARE_WAITQUEUE(wait, current); |
912 | 912 | ||
@@ -934,7 +934,7 @@ __acquires(&fc->lock) | |||
934 | */ | 934 | */ |
935 | static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, | 935 | static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, |
936 | size_t nbytes, struct fuse_req *req) | 936 | size_t nbytes, struct fuse_req *req) |
937 | __releases(&fc->lock) | 937 | __releases(fc->lock) |
938 | { | 938 | { |
939 | struct fuse_in_header ih; | 939 | struct fuse_in_header ih; |
940 | struct fuse_interrupt_in arg; | 940 | struct fuse_interrupt_in arg; |
@@ -1354,7 +1354,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, | |||
1354 | loff_t file_size; | 1354 | loff_t file_size; |
1355 | unsigned int num; | 1355 | unsigned int num; |
1356 | unsigned int offset; | 1356 | unsigned int offset; |
1357 | size_t total_len; | 1357 | size_t total_len = 0; |
1358 | 1358 | ||
1359 | req = fuse_get_req(fc); | 1359 | req = fuse_get_req(fc); |
1360 | if (IS_ERR(req)) | 1360 | if (IS_ERR(req)) |
@@ -1720,8 +1720,8 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) | |||
1720 | * This function releases and reacquires fc->lock | 1720 | * This function releases and reacquires fc->lock |
1721 | */ | 1721 | */ |
1722 | static void end_requests(struct fuse_conn *fc, struct list_head *head) | 1722 | static void end_requests(struct fuse_conn *fc, struct list_head *head) |
1723 | __releases(&fc->lock) | 1723 | __releases(fc->lock) |
1724 | __acquires(&fc->lock) | 1724 | __acquires(fc->lock) |
1725 | { | 1725 | { |
1726 | while (!list_empty(head)) { | 1726 | while (!list_empty(head)) { |
1727 | struct fuse_req *req; | 1727 | struct fuse_req *req; |
@@ -1744,8 +1744,8 @@ __acquires(&fc->lock) | |||
1744 | * locked). | 1744 | * locked). |
1745 | */ | 1745 | */ |
1746 | static void end_io_requests(struct fuse_conn *fc) | 1746 | static void end_io_requests(struct fuse_conn *fc) |
1747 | __releases(&fc->lock) | 1747 | __releases(fc->lock) |
1748 | __acquires(&fc->lock) | 1748 | __acquires(fc->lock) |
1749 | { | 1749 | { |
1750 | while (!list_empty(&fc->io)) { | 1750 | while (!list_empty(&fc->io)) { |
1751 | struct fuse_req *req = | 1751 | struct fuse_req *req = |
@@ -1769,6 +1769,16 @@ __acquires(&fc->lock) | |||
1769 | } | 1769 | } |
1770 | } | 1770 | } |
1771 | 1771 | ||
1772 | static void end_queued_requests(struct fuse_conn *fc) | ||
1773 | __releases(fc->lock) | ||
1774 | __acquires(fc->lock) | ||
1775 | { | ||
1776 | fc->max_background = UINT_MAX; | ||
1777 | flush_bg_queue(fc); | ||
1778 | end_requests(fc, &fc->pending); | ||
1779 | end_requests(fc, &fc->processing); | ||
1780 | } | ||
1781 | |||
1772 | /* | 1782 | /* |
1773 | * Abort all requests. | 1783 | * Abort all requests. |
1774 | * | 1784 | * |
@@ -1795,8 +1805,7 @@ void fuse_abort_conn(struct fuse_conn *fc) | |||
1795 | fc->connected = 0; | 1805 | fc->connected = 0; |
1796 | fc->blocked = 0; | 1806 | fc->blocked = 0; |
1797 | end_io_requests(fc); | 1807 | end_io_requests(fc); |
1798 | end_requests(fc, &fc->pending); | 1808 | end_queued_requests(fc); |
1799 | end_requests(fc, &fc->processing); | ||
1800 | wake_up_all(&fc->waitq); | 1809 | wake_up_all(&fc->waitq); |
1801 | wake_up_all(&fc->blocked_waitq); | 1810 | wake_up_all(&fc->blocked_waitq); |
1802 | kill_fasync(&fc->fasync, SIGIO, POLL_IN); | 1811 | kill_fasync(&fc->fasync, SIGIO, POLL_IN); |
@@ -1811,8 +1820,9 @@ int fuse_dev_release(struct inode *inode, struct file *file) | |||
1811 | if (fc) { | 1820 | if (fc) { |
1812 | spin_lock(&fc->lock); | 1821 | spin_lock(&fc->lock); |
1813 | fc->connected = 0; | 1822 | fc->connected = 0; |
1814 | end_requests(fc, &fc->pending); | 1823 | fc->blocked = 0; |
1815 | end_requests(fc, &fc->processing); | 1824 | end_queued_requests(fc); |
1825 | wake_up_all(&fc->blocked_waitq); | ||
1816 | spin_unlock(&fc->lock); | 1826 | spin_unlock(&fc->lock); |
1817 | fuse_conn_put(fc); | 1827 | fuse_conn_put(fc); |
1818 | } | 1828 | } |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 147c1f71bdb9..c8224587123f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -1144,8 +1144,8 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) | |||
1144 | 1144 | ||
1145 | /* Called under fc->lock, may release and reacquire it */ | 1145 | /* Called under fc->lock, may release and reacquire it */ |
1146 | static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) | 1146 | static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) |
1147 | __releases(&fc->lock) | 1147 | __releases(fc->lock) |
1148 | __acquires(&fc->lock) | 1148 | __acquires(fc->lock) |
1149 | { | 1149 | { |
1150 | struct fuse_inode *fi = get_fuse_inode(req->inode); | 1150 | struct fuse_inode *fi = get_fuse_inode(req->inode); |
1151 | loff_t size = i_size_read(req->inode); | 1151 | loff_t size = i_size_read(req->inode); |
@@ -1183,8 +1183,8 @@ __acquires(&fc->lock) | |||
1183 | * Called with fc->lock | 1183 | * Called with fc->lock |
1184 | */ | 1184 | */ |
1185 | void fuse_flush_writepages(struct inode *inode) | 1185 | void fuse_flush_writepages(struct inode *inode) |
1186 | __releases(&fc->lock) | 1186 | __releases(fc->lock) |
1187 | __acquires(&fc->lock) | 1187 | __acquires(fc->lock) |
1188 | { | 1188 | { |
1189 | struct fuse_conn *fc = get_fuse_conn(inode); | 1189 | struct fuse_conn *fc = get_fuse_conn(inode); |
1190 | struct fuse_inode *fi = get_fuse_inode(inode); | 1190 | struct fuse_inode *fi = get_fuse_inode(inode); |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 9c65170e932e..eb01f3575e10 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -923,7 +923,7 @@ int gfs2_logd(void *data) | |||
923 | 923 | ||
924 | do { | 924 | do { |
925 | prepare_to_wait(&sdp->sd_logd_waitq, &wait, | 925 | prepare_to_wait(&sdp->sd_logd_waitq, &wait, |
926 | TASK_UNINTERRUPTIBLE); | 926 | TASK_INTERRUPTIBLE); |
927 | if (!gfs2_ail_flush_reqd(sdp) && | 927 | if (!gfs2_ail_flush_reqd(sdp) && |
928 | !gfs2_jrnl_flush_reqd(sdp) && | 928 | !gfs2_jrnl_flush_reqd(sdp) && |
929 | !kthread_should_stop()) | 929 | !kthread_should_stop()) |
diff --git a/fs/minix/namei.c b/fs/minix/namei.c index e20ee85955d1..f3f3578393a4 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c | |||
@@ -115,7 +115,7 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode) | |||
115 | 115 | ||
116 | inode_inc_link_count(dir); | 116 | inode_inc_link_count(dir); |
117 | 117 | ||
118 | inode = minix_new_inode(dir, mode, &err); | 118 | inode = minix_new_inode(dir, S_IFDIR | mode, &err); |
119 | if (!inode) | 119 | if (!inode) |
120 | goto out_dir; | 120 | goto out_dir; |
121 | 121 | ||
diff --git a/fs/namespace.c b/fs/namespace.c index de402eb6eafb..a72eaabfe8f2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1484,13 +1484,30 @@ out_unlock: | |||
1484 | } | 1484 | } |
1485 | 1485 | ||
1486 | /* | 1486 | /* |
1487 | * Sanity check the flags to change_mnt_propagation. | ||
1488 | */ | ||
1489 | |||
1490 | static int flags_to_propagation_type(int flags) | ||
1491 | { | ||
1492 | int type = flags & ~MS_REC; | ||
1493 | |||
1494 | /* Fail if any non-propagation flags are set */ | ||
1495 | if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) | ||
1496 | return 0; | ||
1497 | /* Only one propagation flag should be set */ | ||
1498 | if (!is_power_of_2(type)) | ||
1499 | return 0; | ||
1500 | return type; | ||
1501 | } | ||
1502 | |||
1503 | /* | ||
1487 | * recursively change the type of the mountpoint. | 1504 | * recursively change the type of the mountpoint. |
1488 | */ | 1505 | */ |
1489 | static int do_change_type(struct path *path, int flag) | 1506 | static int do_change_type(struct path *path, int flag) |
1490 | { | 1507 | { |
1491 | struct vfsmount *m, *mnt = path->mnt; | 1508 | struct vfsmount *m, *mnt = path->mnt; |
1492 | int recurse = flag & MS_REC; | 1509 | int recurse = flag & MS_REC; |
1493 | int type = flag & ~MS_REC; | 1510 | int type; |
1494 | int err = 0; | 1511 | int err = 0; |
1495 | 1512 | ||
1496 | if (!capable(CAP_SYS_ADMIN)) | 1513 | if (!capable(CAP_SYS_ADMIN)) |
@@ -1499,6 +1516,10 @@ static int do_change_type(struct path *path, int flag) | |||
1499 | if (path->dentry != path->mnt->mnt_root) | 1516 | if (path->dentry != path->mnt->mnt_root) |
1500 | return -EINVAL; | 1517 | return -EINVAL; |
1501 | 1518 | ||
1519 | type = flags_to_propagation_type(flag); | ||
1520 | if (!type) | ||
1521 | return -EINVAL; | ||
1522 | |||
1502 | down_write(&namespace_sem); | 1523 | down_write(&namespace_sem); |
1503 | if (type == MS_SHARED) { | 1524 | if (type == MS_SHARED) { |
1504 | err = invent_group_ids(mnt, recurse); | 1525 | err = invent_group_ids(mnt, recurse); |
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 6c2aad49d731..f7e13db613cb 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -63,6 +63,7 @@ config NFS_V3_ACL | |||
63 | config NFS_V4 | 63 | config NFS_V4 |
64 | bool "NFS client support for NFS version 4" | 64 | bool "NFS client support for NFS version 4" |
65 | depends on NFS_FS | 65 | depends on NFS_FS |
66 | select SUNRPC_GSS | ||
66 | help | 67 | help |
67 | This option enables support for version 4 of the NFS protocol | 68 | This option enables support for version 4 of the NFS protocol |
68 | (RFC 3530) in the kernel's NFS client. | 69 | (RFC 3530) in the kernel's NFS client. |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 4e7df2adb212..e7340729af89 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -275,7 +275,7 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, | |||
275 | sin1->sin6_scope_id != sin2->sin6_scope_id) | 275 | sin1->sin6_scope_id != sin2->sin6_scope_id) |
276 | return 0; | 276 | return 0; |
277 | 277 | ||
278 | return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr); | 278 | return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); |
279 | } | 279 | } |
280 | #else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */ | 280 | #else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */ |
281 | static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, | 281 | static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index eb51bd6201da..05bf3c0dc751 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -723,10 +723,6 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) | |||
723 | default: | 723 | default: |
724 | BUG(); | 724 | BUG(); |
725 | } | 725 | } |
726 | if (res < 0) | ||
727 | dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager" | ||
728 | " - error %d!\n", | ||
729 | __func__, res); | ||
730 | return res; | 726 | return res; |
731 | } | 727 | } |
732 | 728 | ||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ec3966e4706b..f4cbf0c306c6 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -431,7 +431,15 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
431 | goto out_err; | 431 | goto out_err; |
432 | 432 | ||
433 | error = server->nfs_client->rpc_ops->statfs(server, fh, &res); | 433 | error = server->nfs_client->rpc_ops->statfs(server, fh, &res); |
434 | if (unlikely(error == -ESTALE)) { | ||
435 | struct dentry *pd_dentry; | ||
434 | 436 | ||
437 | pd_dentry = dget_parent(dentry); | ||
438 | if (pd_dentry != NULL) { | ||
439 | nfs_zap_caches(pd_dentry->d_inode); | ||
440 | dput(pd_dentry); | ||
441 | } | ||
442 | } | ||
435 | nfs_free_fattr(res.fattr); | 443 | nfs_free_fattr(res.fattr); |
436 | if (error < 0) | 444 | if (error < 0) |
437 | goto out_err; | 445 | goto out_err; |
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 95932f523aef..4264377552e2 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
@@ -69,6 +69,7 @@ config NFSD_V4 | |||
69 | depends on NFSD && PROC_FS && EXPERIMENTAL | 69 | depends on NFSD && PROC_FS && EXPERIMENTAL |
70 | select NFSD_V3 | 70 | select NFSD_V3 |
71 | select FS_POSIX_ACL | 71 | select FS_POSIX_ACL |
72 | select SUNRPC_GSS | ||
72 | help | 73 | help |
73 | This option enables support in your system's NFS server for | 74 | This option enables support in your system's NFS server for |
74 | version 4 of the NFS protocol (RFC 3530). | 75 | version 4 of the NFS protocol (RFC 3530). |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2e7357104cfd..cf0d2ffb3c84 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -440,7 +440,7 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { | |||
440 | 440 | ||
441 | static int nfs4_access_to_omode(u32 access) | 441 | static int nfs4_access_to_omode(u32 access) |
442 | { | 442 | { |
443 | switch (access) { | 443 | switch (access & NFS4_SHARE_ACCESS_BOTH) { |
444 | case NFS4_SHARE_ACCESS_READ: | 444 | case NFS4_SHARE_ACCESS_READ: |
445 | return O_RDONLY; | 445 | return O_RDONLY; |
446 | case NFS4_SHARE_ACCESS_WRITE: | 446 | case NFS4_SHARE_ACCESS_WRITE: |
@@ -2450,14 +2450,13 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, | |||
2450 | static __be32 | 2450 | static __be32 |
2451 | nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) | 2451 | nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) |
2452 | { | 2452 | { |
2453 | u32 op_share_access, new_access; | 2453 | u32 op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK; |
2454 | bool new_access; | ||
2454 | __be32 status; | 2455 | __be32 status; |
2455 | 2456 | ||
2456 | set_access(&new_access, stp->st_access_bmap); | 2457 | new_access = !test_bit(op_share_access, &stp->st_access_bmap); |
2457 | new_access = (~new_access) & open->op_share_access & ~NFS4_SHARE_WANT_MASK; | ||
2458 | |||
2459 | if (new_access) { | 2458 | if (new_access) { |
2460 | status = nfs4_get_vfs_file(rqstp, fp, cur_fh, new_access); | 2459 | status = nfs4_get_vfs_file(rqstp, fp, cur_fh, op_share_access); |
2461 | if (status) | 2460 | if (status) |
2462 | return status; | 2461 | return status; |
2463 | } | 2462 | } |
@@ -2470,7 +2469,6 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c | |||
2470 | return status; | 2469 | return status; |
2471 | } | 2470 | } |
2472 | /* remember the open */ | 2471 | /* remember the open */ |
2473 | op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK; | ||
2474 | __set_bit(op_share_access, &stp->st_access_bmap); | 2472 | __set_bit(op_share_access, &stp->st_access_bmap); |
2475 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); | 2473 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); |
2476 | 2474 | ||
@@ -2983,7 +2981,6 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, | |||
2983 | *filpp = find_readable_file(stp->st_file); | 2981 | *filpp = find_readable_file(stp->st_file); |
2984 | else | 2982 | else |
2985 | *filpp = find_writeable_file(stp->st_file); | 2983 | *filpp = find_writeable_file(stp->st_file); |
2986 | BUG_ON(!*filpp); /* assured by check_openmode */ | ||
2987 | } | 2984 | } |
2988 | } | 2985 | } |
2989 | status = nfs_ok; | 2986 | status = nfs_ok; |
@@ -3561,7 +3558,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3561 | struct nfs4_stateowner *open_sop = NULL; | 3558 | struct nfs4_stateowner *open_sop = NULL; |
3562 | struct nfs4_stateowner *lock_sop = NULL; | 3559 | struct nfs4_stateowner *lock_sop = NULL; |
3563 | struct nfs4_stateid *lock_stp; | 3560 | struct nfs4_stateid *lock_stp; |
3564 | struct file *filp; | 3561 | struct nfs4_file *fp; |
3562 | struct file *filp = NULL; | ||
3565 | struct file_lock file_lock; | 3563 | struct file_lock file_lock; |
3566 | struct file_lock conflock; | 3564 | struct file_lock conflock; |
3567 | __be32 status = 0; | 3565 | __be32 status = 0; |
@@ -3591,7 +3589,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3591 | * lock stateid. | 3589 | * lock stateid. |
3592 | */ | 3590 | */ |
3593 | struct nfs4_stateid *open_stp = NULL; | 3591 | struct nfs4_stateid *open_stp = NULL; |
3594 | struct nfs4_file *fp; | ||
3595 | 3592 | ||
3596 | status = nfserr_stale_clientid; | 3593 | status = nfserr_stale_clientid; |
3597 | if (!nfsd4_has_session(cstate) && | 3594 | if (!nfsd4_has_session(cstate) && |
@@ -3634,6 +3631,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3634 | if (status) | 3631 | if (status) |
3635 | goto out; | 3632 | goto out; |
3636 | lock_sop = lock->lk_replay_owner; | 3633 | lock_sop = lock->lk_replay_owner; |
3634 | fp = lock_stp->st_file; | ||
3637 | } | 3635 | } |
3638 | /* lock->lk_replay_owner and lock_stp have been created or found */ | 3636 | /* lock->lk_replay_owner and lock_stp have been created or found */ |
3639 | 3637 | ||
@@ -3648,13 +3646,19 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3648 | switch (lock->lk_type) { | 3646 | switch (lock->lk_type) { |
3649 | case NFS4_READ_LT: | 3647 | case NFS4_READ_LT: |
3650 | case NFS4_READW_LT: | 3648 | case NFS4_READW_LT: |
3651 | filp = find_readable_file(lock_stp->st_file); | 3649 | if (find_readable_file(lock_stp->st_file)) { |
3650 | nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_READ); | ||
3651 | filp = find_readable_file(lock_stp->st_file); | ||
3652 | } | ||
3652 | file_lock.fl_type = F_RDLCK; | 3653 | file_lock.fl_type = F_RDLCK; |
3653 | cmd = F_SETLK; | 3654 | cmd = F_SETLK; |
3654 | break; | 3655 | break; |
3655 | case NFS4_WRITE_LT: | 3656 | case NFS4_WRITE_LT: |
3656 | case NFS4_WRITEW_LT: | 3657 | case NFS4_WRITEW_LT: |
3657 | filp = find_writeable_file(lock_stp->st_file); | 3658 | if (find_writeable_file(lock_stp->st_file)) { |
3659 | nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_WRITE); | ||
3660 | filp = find_writeable_file(lock_stp->st_file); | ||
3661 | } | ||
3658 | file_lock.fl_type = F_WRLCK; | 3662 | file_lock.fl_type = F_WRLCK; |
3659 | cmd = F_SETLK; | 3663 | cmd = F_SETLK; |
3660 | break; | 3664 | break; |
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index cdfb8c6a4206..c16f8d8331b5 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h | |||
@@ -196,8 +196,6 @@ fh_lock(struct svc_fh *fhp) | |||
196 | static inline void | 196 | static inline void |
197 | fh_unlock(struct svc_fh *fhp) | 197 | fh_unlock(struct svc_fh *fhp) |
198 | { | 198 | { |
199 | BUG_ON(!fhp->fh_dentry); | ||
200 | |||
201 | if (fhp->fh_locked) { | 199 | if (fhp->fh_locked) { |
202 | fill_post_wcc(fhp); | 200 | fill_post_wcc(fhp); |
203 | mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex); | 201 | mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex); |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 7731a75971dd..322518c88e4b 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -363,23 +363,23 @@ struct nfs4_file { | |||
363 | * at all? */ | 363 | * at all? */ |
364 | static inline struct file *find_writeable_file(struct nfs4_file *f) | 364 | static inline struct file *find_writeable_file(struct nfs4_file *f) |
365 | { | 365 | { |
366 | if (f->fi_fds[O_RDWR]) | 366 | if (f->fi_fds[O_WRONLY]) |
367 | return f->fi_fds[O_RDWR]; | 367 | return f->fi_fds[O_WRONLY]; |
368 | return f->fi_fds[O_WRONLY]; | 368 | return f->fi_fds[O_RDWR]; |
369 | } | 369 | } |
370 | 370 | ||
371 | static inline struct file *find_readable_file(struct nfs4_file *f) | 371 | static inline struct file *find_readable_file(struct nfs4_file *f) |
372 | { | 372 | { |
373 | if (f->fi_fds[O_RDWR]) | 373 | if (f->fi_fds[O_RDONLY]) |
374 | return f->fi_fds[O_RDWR]; | 374 | return f->fi_fds[O_RDONLY]; |
375 | return f->fi_fds[O_RDONLY]; | 375 | return f->fi_fds[O_RDWR]; |
376 | } | 376 | } |
377 | 377 | ||
378 | static inline struct file *find_any_file(struct nfs4_file *f) | 378 | static inline struct file *find_any_file(struct nfs4_file *f) |
379 | { | 379 | { |
380 | if (f->fi_fds[O_RDWR]) | 380 | if (f->fi_fds[O_RDWR]) |
381 | return f->fi_fds[O_RDWR]; | 381 | return f->fi_fds[O_RDWR]; |
382 | else if (f->fi_fds[O_RDWR]) | 382 | else if (f->fi_fds[O_WRONLY]) |
383 | return f->fi_fds[O_WRONLY]; | 383 | return f->fi_fds[O_WRONLY]; |
384 | else | 384 | else |
385 | return f->fi_fds[O_RDONLY]; | 385 | return f->fi_fds[O_RDONLY]; |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 96360a83cb91..661a6cf8e826 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -2033,15 +2033,17 @@ out: | |||
2033 | __be32 | 2033 | __be32 |
2034 | nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) | 2034 | nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) |
2035 | { | 2035 | { |
2036 | struct path path = { | ||
2037 | .mnt = fhp->fh_export->ex_path.mnt, | ||
2038 | .dentry = fhp->fh_dentry, | ||
2039 | }; | ||
2040 | __be32 err; | 2036 | __be32 err; |
2041 | 2037 | ||
2042 | err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); | 2038 | err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); |
2043 | if (!err && vfs_statfs(&path, stat)) | 2039 | if (!err) { |
2044 | err = nfserr_io; | 2040 | struct path path = { |
2041 | .mnt = fhp->fh_export->ex_path.mnt, | ||
2042 | .dentry = fhp->fh_dentry, | ||
2043 | }; | ||
2044 | if (vfs_statfs(&path, stat)) | ||
2045 | err = nfserr_io; | ||
2046 | } | ||
2045 | return err; | 2047 | return err; |
2046 | } | 2048 | } |
2047 | 2049 | ||
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index d97310f07bef..d27715103376 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -446,6 +446,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | |||
446 | nilfs_mdt_destroy(nilfs->ns_cpfile); | 446 | nilfs_mdt_destroy(nilfs->ns_cpfile); |
447 | nilfs_mdt_destroy(nilfs->ns_sufile); | 447 | nilfs_mdt_destroy(nilfs->ns_sufile); |
448 | nilfs_mdt_destroy(nilfs->ns_dat); | 448 | nilfs_mdt_destroy(nilfs->ns_dat); |
449 | nilfs_mdt_destroy(nilfs->ns_gc_dat); | ||
449 | 450 | ||
450 | failed: | 451 | failed: |
451 | nilfs_clear_recovery_info(&ri); | 452 | nilfs_clear_recovery_info(&ri); |
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index 22c629eedd82..b388443c3a09 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig | |||
@@ -3,4 +3,4 @@ config FSNOTIFY | |||
3 | 3 | ||
4 | source "fs/notify/dnotify/Kconfig" | 4 | source "fs/notify/dnotify/Kconfig" |
5 | source "fs/notify/inotify/Kconfig" | 5 | source "fs/notify/inotify/Kconfig" |
6 | source "fs/notify/fanotify/Kconfig" | 6 | #source "fs/notify/fanotify/Kconfig" |
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 756566fe8449..85366c78cc37 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c | |||
@@ -165,9 +165,6 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, | |||
165 | "mask=%x data=%p data_type=%d\n", __func__, group, to_tell, | 165 | "mask=%x data=%p data_type=%d\n", __func__, group, to_tell, |
166 | inode_mark, vfsmnt_mark, event_mask, data, data_type); | 166 | inode_mark, vfsmnt_mark, event_mask, data, data_type); |
167 | 167 | ||
168 | pr_debug("%s: group=%p vfsmount_mark=%p inode_mark=%p mask=%x\n", | ||
169 | __func__, group, vfsmnt_mark, inode_mark, event_mask); | ||
170 | |||
171 | /* sorry, fanotify only gives a damn about files and dirs */ | 168 | /* sorry, fanotify only gives a damn about files and dirs */ |
172 | if (!S_ISREG(to_tell->i_mode) && | 169 | if (!S_ISREG(to_tell->i_mode) && |
173 | !S_ISDIR(to_tell->i_mode)) | 170 | !S_ISDIR(to_tell->i_mode)) |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 032b837fcd11..5ed8e58d7bfc 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -195,6 +195,14 @@ static int prepare_for_access_response(struct fsnotify_group *group, | |||
195 | re->fd = fd; | 195 | re->fd = fd; |
196 | 196 | ||
197 | mutex_lock(&group->fanotify_data.access_mutex); | 197 | mutex_lock(&group->fanotify_data.access_mutex); |
198 | |||
199 | if (group->fanotify_data.bypass_perm) { | ||
200 | mutex_unlock(&group->fanotify_data.access_mutex); | ||
201 | kmem_cache_free(fanotify_response_event_cache, re); | ||
202 | event->response = FAN_ALLOW; | ||
203 | return 0; | ||
204 | } | ||
205 | |||
198 | list_add_tail(&re->list, &group->fanotify_data.access_list); | 206 | list_add_tail(&re->list, &group->fanotify_data.access_list); |
199 | mutex_unlock(&group->fanotify_data.access_mutex); | 207 | mutex_unlock(&group->fanotify_data.access_mutex); |
200 | 208 | ||
@@ -364,9 +372,28 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t | |||
364 | static int fanotify_release(struct inode *ignored, struct file *file) | 372 | static int fanotify_release(struct inode *ignored, struct file *file) |
365 | { | 373 | { |
366 | struct fsnotify_group *group = file->private_data; | 374 | struct fsnotify_group *group = file->private_data; |
375 | struct fanotify_response_event *re, *lre; | ||
367 | 376 | ||
368 | pr_debug("%s: file=%p group=%p\n", __func__, file, group); | 377 | pr_debug("%s: file=%p group=%p\n", __func__, file, group); |
369 | 378 | ||
379 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
380 | mutex_lock(&group->fanotify_data.access_mutex); | ||
381 | |||
382 | group->fanotify_data.bypass_perm = true; | ||
383 | |||
384 | list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { | ||
385 | pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, | ||
386 | re, re->event); | ||
387 | |||
388 | list_del_init(&re->list); | ||
389 | re->event->response = FAN_ALLOW; | ||
390 | |||
391 | kmem_cache_free(fanotify_response_event_cache, re); | ||
392 | } | ||
393 | mutex_unlock(&group->fanotify_data.access_mutex); | ||
394 | |||
395 | wake_up(&group->fanotify_data.access_waitq); | ||
396 | #endif | ||
370 | /* matches the fanotify_init->fsnotify_alloc_group */ | 397 | /* matches the fanotify_init->fsnotify_alloc_group */ |
371 | fsnotify_put_group(group); | 398 | fsnotify_put_group(group); |
372 | 399 | ||
@@ -614,7 +641,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) | |||
614 | __func__, flags, event_f_flags); | 641 | __func__, flags, event_f_flags); |
615 | 642 | ||
616 | if (!capable(CAP_SYS_ADMIN)) | 643 | if (!capable(CAP_SYS_ADMIN)) |
617 | return -EACCES; | 644 | return -EPERM; |
618 | 645 | ||
619 | if (flags & ~FAN_ALL_INIT_FLAGS) | 646 | if (flags & ~FAN_ALL_INIT_FLAGS) |
620 | return -EINVAL; | 647 | return -EINVAL; |
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 3970392b2722..36802420d69a 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c | |||
@@ -148,13 +148,14 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, | |||
148 | const unsigned char *file_name, | 148 | const unsigned char *file_name, |
149 | struct fsnotify_event **event) | 149 | struct fsnotify_event **event) |
150 | { | 150 | { |
151 | struct fsnotify_group *group = inode_mark->group; | 151 | struct fsnotify_group *group = NULL; |
152 | __u32 inode_test_mask = (mask & ~FS_EVENT_ON_CHILD); | 152 | __u32 inode_test_mask = 0; |
153 | __u32 vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD); | 153 | __u32 vfsmount_test_mask = 0; |
154 | 154 | ||
155 | pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p" | 155 | if (unlikely(!inode_mark && !vfsmount_mark)) { |
156 | " data_is=%d cookie=%d event=%p\n", __func__, group, to_tell, | 156 | BUG(); |
157 | mnt, inode_mark, mask, data, data_is, cookie, *event); | 157 | return 0; |
158 | } | ||
158 | 159 | ||
159 | /* clear ignored on inode modification */ | 160 | /* clear ignored on inode modification */ |
160 | if (mask & FS_MODIFY) { | 161 | if (mask & FS_MODIFY) { |
@@ -168,18 +169,29 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, | |||
168 | 169 | ||
169 | /* does the inode mark tell us to do something? */ | 170 | /* does the inode mark tell us to do something? */ |
170 | if (inode_mark) { | 171 | if (inode_mark) { |
172 | group = inode_mark->group; | ||
173 | inode_test_mask = (mask & ~FS_EVENT_ON_CHILD); | ||
171 | inode_test_mask &= inode_mark->mask; | 174 | inode_test_mask &= inode_mark->mask; |
172 | inode_test_mask &= ~inode_mark->ignored_mask; | 175 | inode_test_mask &= ~inode_mark->ignored_mask; |
173 | } | 176 | } |
174 | 177 | ||
175 | /* does the vfsmount_mark tell us to do something? */ | 178 | /* does the vfsmount_mark tell us to do something? */ |
176 | if (vfsmount_mark) { | 179 | if (vfsmount_mark) { |
180 | vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD); | ||
181 | group = vfsmount_mark->group; | ||
177 | vfsmount_test_mask &= vfsmount_mark->mask; | 182 | vfsmount_test_mask &= vfsmount_mark->mask; |
178 | vfsmount_test_mask &= ~vfsmount_mark->ignored_mask; | 183 | vfsmount_test_mask &= ~vfsmount_mark->ignored_mask; |
179 | if (inode_mark) | 184 | if (inode_mark) |
180 | vfsmount_test_mask &= ~inode_mark->ignored_mask; | 185 | vfsmount_test_mask &= ~inode_mark->ignored_mask; |
181 | } | 186 | } |
182 | 187 | ||
188 | pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x inode_mark=%p" | ||
189 | " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" | ||
190 | " data=%p data_is=%d cookie=%d event=%p\n", | ||
191 | __func__, group, to_tell, mnt, mask, inode_mark, | ||
192 | inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, | ||
193 | data_is, cookie, *event); | ||
194 | |||
183 | if (!inode_test_mask && !vfsmount_test_mask) | 195 | if (!inode_test_mask && !vfsmount_test_mask) |
184 | return 0; | 196 | return 0; |
185 | 197 | ||
@@ -207,13 +219,12 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, | |||
207 | int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | 219 | int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, |
208 | const unsigned char *file_name, u32 cookie) | 220 | const unsigned char *file_name, u32 cookie) |
209 | { | 221 | { |
210 | struct hlist_node *inode_node, *vfsmount_node; | 222 | struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; |
211 | struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; | 223 | struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; |
212 | struct fsnotify_group *inode_group, *vfsmount_group; | 224 | struct fsnotify_group *inode_group, *vfsmount_group; |
213 | struct fsnotify_event *event = NULL; | 225 | struct fsnotify_event *event = NULL; |
214 | struct vfsmount *mnt; | 226 | struct vfsmount *mnt; |
215 | int idx, ret = 0; | 227 | int idx, ret = 0; |
216 | bool used_inode = false, used_vfsmount = false; | ||
217 | /* global tests shouldn't care about events on child only the specific event */ | 228 | /* global tests shouldn't care about events on child only the specific event */ |
218 | __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); | 229 | __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); |
219 | 230 | ||
@@ -238,57 +249,50 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
238 | (test_mask & to_tell->i_fsnotify_mask)) | 249 | (test_mask & to_tell->i_fsnotify_mask)) |
239 | inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, | 250 | inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, |
240 | &fsnotify_mark_srcu); | 251 | &fsnotify_mark_srcu); |
241 | else | ||
242 | inode_node = NULL; | ||
243 | 252 | ||
244 | if (mnt) { | 253 | if (mnt && ((mask & FS_MODIFY) || |
245 | if ((mask & FS_MODIFY) || | 254 | (test_mask & mnt->mnt_fsnotify_mask))) { |
246 | (test_mask & mnt->mnt_fsnotify_mask)) | 255 | vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first, |
247 | vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first, | 256 | &fsnotify_mark_srcu); |
248 | &fsnotify_mark_srcu); | 257 | inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, |
249 | else | 258 | &fsnotify_mark_srcu); |
250 | vfsmount_node = NULL; | ||
251 | } else { | ||
252 | mnt = NULL; | ||
253 | vfsmount_node = NULL; | ||
254 | } | 259 | } |
255 | 260 | ||
256 | while (inode_node || vfsmount_node) { | 261 | while (inode_node || vfsmount_node) { |
262 | inode_group = vfsmount_group = NULL; | ||
263 | |||
257 | if (inode_node) { | 264 | if (inode_node) { |
258 | inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), | 265 | inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), |
259 | struct fsnotify_mark, i.i_list); | 266 | struct fsnotify_mark, i.i_list); |
260 | inode_group = inode_mark->group; | 267 | inode_group = inode_mark->group; |
261 | } else | 268 | } |
262 | inode_group = (void *)-1; | ||
263 | 269 | ||
264 | if (vfsmount_node) { | 270 | if (vfsmount_node) { |
265 | vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu), | 271 | vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu), |
266 | struct fsnotify_mark, m.m_list); | 272 | struct fsnotify_mark, m.m_list); |
267 | vfsmount_group = vfsmount_mark->group; | 273 | vfsmount_group = vfsmount_mark->group; |
268 | } else | 274 | } |
269 | vfsmount_group = (void *)-1; | ||
270 | 275 | ||
271 | if (inode_group < vfsmount_group) { | 276 | if (inode_group > vfsmount_group) { |
272 | /* handle inode */ | 277 | /* handle inode */ |
273 | send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, | 278 | send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, |
274 | data_is, cookie, file_name, &event); | 279 | data_is, cookie, file_name, &event); |
275 | used_inode = true; | 280 | /* we didn't use the vfsmount_mark */ |
276 | } else if (vfsmount_group < inode_group) { | 281 | vfsmount_group = NULL; |
282 | } else if (vfsmount_group > inode_group) { | ||
277 | send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, | 283 | send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, |
278 | data_is, cookie, file_name, &event); | 284 | data_is, cookie, file_name, &event); |
279 | used_vfsmount = true; | 285 | inode_group = NULL; |
280 | } else { | 286 | } else { |
281 | send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, | 287 | send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, |
282 | mask, data, data_is, cookie, file_name, | 288 | mask, data, data_is, cookie, file_name, |
283 | &event); | 289 | &event); |
284 | used_vfsmount = true; | ||
285 | used_inode = true; | ||
286 | } | 290 | } |
287 | 291 | ||
288 | if (used_inode) | 292 | if (inode_group) |
289 | inode_node = srcu_dereference(inode_node->next, | 293 | inode_node = srcu_dereference(inode_node->next, |
290 | &fsnotify_mark_srcu); | 294 | &fsnotify_mark_srcu); |
291 | if (used_vfsmount) | 295 | if (vfsmount_group) |
292 | vfsmount_node = srcu_dereference(vfsmount_node->next, | 296 | vfsmount_node = srcu_dereference(vfsmount_node->next, |
293 | &fsnotify_mark_srcu); | 297 | &fsnotify_mark_srcu); |
294 | } | 298 | } |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index a76e0aa5cd3f..391915093fe1 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -209,7 +209,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, | |||
209 | } | 209 | } |
210 | 210 | ||
211 | inode->i_mode = new_mode; | 211 | inode->i_mode = new_mode; |
212 | inode->i_ctime = CURRENT_TIME; | ||
212 | di->i_mode = cpu_to_le16(inode->i_mode); | 213 | di->i_mode = cpu_to_le16(inode->i_mode); |
214 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
215 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
213 | 216 | ||
214 | ocfs2_journal_dirty(handle, di_bh); | 217 | ocfs2_journal_dirty(handle, di_bh); |
215 | 218 | ||
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 215e12ce1d85..592fae5007d1 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -6672,7 +6672,7 @@ int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end, | |||
6672 | last_page_bytes = PAGE_ALIGN(end); | 6672 | last_page_bytes = PAGE_ALIGN(end); |
6673 | index = start >> PAGE_CACHE_SHIFT; | 6673 | index = start >> PAGE_CACHE_SHIFT; |
6674 | do { | 6674 | do { |
6675 | pages[numpages] = grab_cache_page(mapping, index); | 6675 | pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS); |
6676 | if (!pages[numpages]) { | 6676 | if (!pages[numpages]) { |
6677 | ret = -ENOMEM; | 6677 | ret = -ENOMEM; |
6678 | mlog_errno(ret); | 6678 | mlog_errno(ret); |
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index ec6d12339593..c7ee03c22226 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c | |||
@@ -439,7 +439,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, | |||
439 | 439 | ||
440 | ocfs2_blockcheck_inc_failure(stats); | 440 | ocfs2_blockcheck_inc_failure(stats); |
441 | mlog(ML_ERROR, | 441 | mlog(ML_ERROR, |
442 | "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", | 442 | "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n", |
443 | (unsigned int)check.bc_crc32e, (unsigned int)crc); | 443 | (unsigned int)check.bc_crc32e, (unsigned int)crc); |
444 | 444 | ||
445 | /* Ok, try ECC fixups */ | 445 | /* Ok, try ECC fixups */ |
@@ -453,7 +453,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, | |||
453 | goto out; | 453 | goto out; |
454 | } | 454 | } |
455 | 455 | ||
456 | mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", | 456 | mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n", |
457 | (unsigned int)check.bc_crc32e, (unsigned int)crc); | 457 | (unsigned int)check.bc_crc32e, (unsigned int)crc); |
458 | 458 | ||
459 | rc = -EIO; | 459 | rc = -EIO; |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1361997cf205..cbe2f057cc28 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -977,7 +977,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, | |||
977 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | 977 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, |
978 | size_t caller_veclen, u8 target_node, int *status) | 978 | size_t caller_veclen, u8 target_node, int *status) |
979 | { | 979 | { |
980 | int ret; | 980 | int ret = 0; |
981 | struct o2net_msg *msg = NULL; | 981 | struct o2net_msg *msg = NULL; |
982 | size_t veclen, caller_bytes = 0; | 982 | size_t veclen, caller_bytes = 0; |
983 | struct kvec *vec = NULL; | 983 | struct kvec *vec = NULL; |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f04ebcfffc4a..c49f6de0e7ab 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -3931,6 +3931,15 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | |||
3931 | goto out_commit; | 3931 | goto out_commit; |
3932 | } | 3932 | } |
3933 | 3933 | ||
3934 | cpos = split_hash; | ||
3935 | ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, | ||
3936 | data_ac, meta_ac, new_dx_leaves, | ||
3937 | num_dx_leaves); | ||
3938 | if (ret) { | ||
3939 | mlog_errno(ret); | ||
3940 | goto out_commit; | ||
3941 | } | ||
3942 | |||
3934 | for (i = 0; i < num_dx_leaves; i++) { | 3943 | for (i = 0; i < num_dx_leaves; i++) { |
3935 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), | 3944 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), |
3936 | orig_dx_leaves[i], | 3945 | orig_dx_leaves[i], |
@@ -3939,15 +3948,14 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | |||
3939 | mlog_errno(ret); | 3948 | mlog_errno(ret); |
3940 | goto out_commit; | 3949 | goto out_commit; |
3941 | } | 3950 | } |
3942 | } | ||
3943 | 3951 | ||
3944 | cpos = split_hash; | 3952 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), |
3945 | ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, | 3953 | new_dx_leaves[i], |
3946 | data_ac, meta_ac, new_dx_leaves, | 3954 | OCFS2_JOURNAL_ACCESS_WRITE); |
3947 | num_dx_leaves); | 3955 | if (ret) { |
3948 | if (ret) { | 3956 | mlog_errno(ret); |
3949 | mlog_errno(ret); | 3957 | goto out_commit; |
3950 | goto out_commit; | 3958 | } |
3951 | } | 3959 | } |
3952 | 3960 | ||
3953 | ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, | 3961 | ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 4b6ae2c13b47..765298908f1d 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -1030,6 +1030,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, | |||
1030 | struct dlm_lock_resource *res); | 1030 | struct dlm_lock_resource *res); |
1031 | void dlm_clean_master_list(struct dlm_ctxt *dlm, | 1031 | void dlm_clean_master_list(struct dlm_ctxt *dlm, |
1032 | u8 dead_node); | 1032 | u8 dead_node); |
1033 | void dlm_force_free_mles(struct dlm_ctxt *dlm); | ||
1033 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 1034 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
1034 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res); | 1035 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res); |
1035 | int __dlm_lockres_unused(struct dlm_lock_resource *res); | 1036 | int __dlm_lockres_unused(struct dlm_lock_resource *res); |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 5efdd37dfe48..901ca52bf86b 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -636,8 +636,14 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) | |||
636 | spin_lock(&dlm->track_lock); | 636 | spin_lock(&dlm->track_lock); |
637 | if (oldres) | 637 | if (oldres) |
638 | track_list = &oldres->tracking; | 638 | track_list = &oldres->tracking; |
639 | else | 639 | else { |
640 | track_list = &dlm->tracking_list; | 640 | track_list = &dlm->tracking_list; |
641 | if (list_empty(track_list)) { | ||
642 | dl = NULL; | ||
643 | spin_unlock(&dlm->track_lock); | ||
644 | goto bail; | ||
645 | } | ||
646 | } | ||
641 | 647 | ||
642 | list_for_each_entry(res, track_list, tracking) { | 648 | list_for_each_entry(res, track_list, tracking) { |
643 | if (&res->tracking == &dlm->tracking_list) | 649 | if (&res->tracking == &dlm->tracking_list) |
@@ -660,6 +666,7 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) | |||
660 | } else | 666 | } else |
661 | dl = NULL; | 667 | dl = NULL; |
662 | 668 | ||
669 | bail: | ||
663 | /* passed to seq_show */ | 670 | /* passed to seq_show */ |
664 | return dl; | 671 | return dl; |
665 | } | 672 | } |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 153abb5abef0..11a5c87fd7f7 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -693,6 +693,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
693 | 693 | ||
694 | dlm_mark_domain_leaving(dlm); | 694 | dlm_mark_domain_leaving(dlm); |
695 | dlm_leave_domain(dlm); | 695 | dlm_leave_domain(dlm); |
696 | dlm_force_free_mles(dlm); | ||
696 | dlm_complete_dlm_shutdown(dlm); | 697 | dlm_complete_dlm_shutdown(dlm); |
697 | } | 698 | } |
698 | dlm_put(dlm); | 699 | dlm_put(dlm); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index ffb4c68dafa4..f564b0e5f80d 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -3433,3 +3433,43 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm, | |||
3433 | wake_up(&res->wq); | 3433 | wake_up(&res->wq); |
3434 | wake_up(&dlm->migration_wq); | 3434 | wake_up(&dlm->migration_wq); |
3435 | } | 3435 | } |
3436 | |||
3437 | void dlm_force_free_mles(struct dlm_ctxt *dlm) | ||
3438 | { | ||
3439 | int i; | ||
3440 | struct hlist_head *bucket; | ||
3441 | struct dlm_master_list_entry *mle; | ||
3442 | struct hlist_node *tmp, *list; | ||
3443 | |||
3444 | /* | ||
3445 | * We notified all other nodes that we are exiting the domain and | ||
3446 | * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still | ||
3447 | * around we force free them and wake any processes that are waiting | ||
3448 | * on the mles | ||
3449 | */ | ||
3450 | spin_lock(&dlm->spinlock); | ||
3451 | spin_lock(&dlm->master_lock); | ||
3452 | |||
3453 | BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING); | ||
3454 | BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES)); | ||
3455 | |||
3456 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { | ||
3457 | bucket = dlm_master_hash(dlm, i); | ||
3458 | hlist_for_each_safe(list, tmp, bucket) { | ||
3459 | mle = hlist_entry(list, struct dlm_master_list_entry, | ||
3460 | master_hash_node); | ||
3461 | if (mle->type != DLM_MLE_BLOCK) { | ||
3462 | mlog(ML_ERROR, "bad mle: %p\n", mle); | ||
3463 | dlm_print_one_mle(mle); | ||
3464 | } | ||
3465 | atomic_set(&mle->woken, 1); | ||
3466 | wake_up(&mle->wq); | ||
3467 | |||
3468 | __dlm_unlink_mle(dlm, mle); | ||
3469 | __dlm_mle_detach_hb_events(dlm, mle); | ||
3470 | __dlm_put_mle(mle); | ||
3471 | } | ||
3472 | } | ||
3473 | spin_unlock(&dlm->master_lock); | ||
3474 | spin_unlock(&dlm->spinlock); | ||
3475 | } | ||
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index d1ce48e1b3d6..1d596d8c4a4a 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
@@ -84,6 +84,7 @@ enum { | |||
84 | OI_LS_PARENT, | 84 | OI_LS_PARENT, |
85 | OI_LS_RENAME1, | 85 | OI_LS_RENAME1, |
86 | OI_LS_RENAME2, | 86 | OI_LS_RENAME2, |
87 | OI_LS_REFLINK_TARGET, | ||
87 | }; | 88 | }; |
88 | 89 | ||
89 | int ocfs2_dlm_init(struct ocfs2_super *osb); | 90 | int ocfs2_dlm_init(struct ocfs2_super *osb); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 81296b4e3646..9a03c151b5ce 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/writeback.h> | 36 | #include <linux/writeback.h> |
37 | #include <linux/falloc.h> | 37 | #include <linux/falloc.h> |
38 | #include <linux/quotaops.h> | 38 | #include <linux/quotaops.h> |
39 | #include <linux/blkdev.h> | ||
39 | 40 | ||
40 | #define MLOG_MASK_PREFIX ML_INODE | 41 | #define MLOG_MASK_PREFIX ML_INODE |
41 | #include <cluster/masklog.h> | 42 | #include <cluster/masklog.h> |
@@ -190,8 +191,16 @@ static int ocfs2_sync_file(struct file *file, int datasync) | |||
190 | if (err) | 191 | if (err) |
191 | goto bail; | 192 | goto bail; |
192 | 193 | ||
193 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | 194 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { |
195 | /* | ||
196 | * We still have to flush drive's caches to get data to the | ||
197 | * platter | ||
198 | */ | ||
199 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | ||
200 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, | ||
201 | NULL, BLKDEV_IFL_WAIT); | ||
194 | goto bail; | 202 | goto bail; |
203 | } | ||
195 | 204 | ||
196 | journal = osb->journal->j_journal; | 205 | journal = osb->journal->j_journal; |
197 | err = jbd2_journal_force_commit(journal); | 206 | err = jbd2_journal_force_commit(journal); |
@@ -774,7 +783,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | |||
774 | BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); | 783 | BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); |
775 | BUG_ON(abs_from & (inode->i_blkbits - 1)); | 784 | BUG_ON(abs_from & (inode->i_blkbits - 1)); |
776 | 785 | ||
777 | page = grab_cache_page(mapping, index); | 786 | page = find_or_create_page(mapping, index, GFP_NOFS); |
778 | if (!page) { | 787 | if (!page) { |
779 | ret = -ENOMEM; | 788 | ret = -ENOMEM; |
780 | mlog_errno(ret); | 789 | mlog_errno(ret); |
@@ -2329,7 +2338,7 @@ out_dio: | |||
2329 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); | 2338 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); |
2330 | 2339 | ||
2331 | if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || | 2340 | if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || |
2332 | ((file->f_flags & O_DIRECT) && has_refcount)) { | 2341 | ((file->f_flags & O_DIRECT) && !direct_io)) { |
2333 | ret = filemap_fdatawrite_range(file->f_mapping, pos, | 2342 | ret = filemap_fdatawrite_range(file->f_mapping, pos, |
2334 | pos + count - 1); | 2343 | pos + count - 1); |
2335 | if (ret < 0) | 2344 | if (ret < 0) |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 0492464916b1..eece3e05d9d0 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -488,7 +488,11 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
488 | OCFS2_BH_IGNORE_CACHE); | 488 | OCFS2_BH_IGNORE_CACHE); |
489 | } else { | 489 | } else { |
490 | status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); | 490 | status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); |
491 | if (!status) | 491 | /* |
492 | * If buffer is in jbd, then its checksum may not have been | ||
493 | * computed as yet. | ||
494 | */ | ||
495 | if (!status && !buffer_jbd(bh)) | ||
492 | status = ocfs2_validate_inode_block(osb->sb, bh); | 496 | status = ocfs2_validate_inode_block(osb->sb, bh); |
493 | } | 497 | } |
494 | if (status < 0) { | 498 | if (status < 0) { |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index af2b8fe1f139..4c18f4ad93b4 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -74,9 +74,11 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, | |||
74 | /* | 74 | /* |
75 | * Another node might have truncated while we were waiting on | 75 | * Another node might have truncated while we were waiting on |
76 | * cluster locks. | 76 | * cluster locks. |
77 | * We don't check size == 0 before the shift. This is borrowed | ||
78 | * from do_generic_file_read. | ||
77 | */ | 79 | */ |
78 | last_index = size >> PAGE_CACHE_SHIFT; | 80 | last_index = (size - 1) >> PAGE_CACHE_SHIFT; |
79 | if (page->index > last_index) { | 81 | if (unlikely(!size || page->index > last_index)) { |
80 | ret = -EINVAL; | 82 | ret = -EINVAL; |
81 | goto out; | 83 | goto out; |
82 | } | 84 | } |
@@ -107,7 +109,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, | |||
107 | * because the "write" would invalidate their data. | 109 | * because the "write" would invalidate their data. |
108 | */ | 110 | */ |
109 | if (page->index == last_index) | 111 | if (page->index == last_index) |
110 | len = size & ~PAGE_CACHE_MASK; | 112 | len = ((size - 1) & ~PAGE_CACHE_MASK) + 1; |
111 | 113 | ||
112 | ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, | 114 | ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, |
113 | &fsdata, di_bh, page); | 115 | &fsdata, di_bh, page); |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index f171b51a74f7..a00dda2e4f16 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -472,32 +472,23 @@ leave: | |||
472 | return status; | 472 | return status; |
473 | } | 473 | } |
474 | 474 | ||
475 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, | 475 | static int __ocfs2_mknod_locked(struct inode *dir, |
476 | struct inode *dir, | 476 | struct inode *inode, |
477 | struct inode *inode, | 477 | dev_t dev, |
478 | dev_t dev, | 478 | struct buffer_head **new_fe_bh, |
479 | struct buffer_head **new_fe_bh, | 479 | struct buffer_head *parent_fe_bh, |
480 | struct buffer_head *parent_fe_bh, | 480 | handle_t *handle, |
481 | handle_t *handle, | 481 | struct ocfs2_alloc_context *inode_ac, |
482 | struct ocfs2_alloc_context *inode_ac) | 482 | u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit) |
483 | { | 483 | { |
484 | int status = 0; | 484 | int status = 0; |
485 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
485 | struct ocfs2_dinode *fe = NULL; | 486 | struct ocfs2_dinode *fe = NULL; |
486 | struct ocfs2_extent_list *fel; | 487 | struct ocfs2_extent_list *fel; |
487 | u64 suballoc_loc, fe_blkno = 0; | ||
488 | u16 suballoc_bit; | ||
489 | u16 feat; | 488 | u16 feat; |
490 | 489 | ||
491 | *new_fe_bh = NULL; | 490 | *new_fe_bh = NULL; |
492 | 491 | ||
493 | status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh, | ||
494 | inode_ac, &suballoc_loc, | ||
495 | &suballoc_bit, &fe_blkno); | ||
496 | if (status < 0) { | ||
497 | mlog_errno(status); | ||
498 | goto leave; | ||
499 | } | ||
500 | |||
501 | /* populate as many fields early on as possible - many of | 492 | /* populate as many fields early on as possible - many of |
502 | * these are used by the support functions here and in | 493 | * these are used by the support functions here and in |
503 | * callers. */ | 494 | * callers. */ |
@@ -591,6 +582,34 @@ leave: | |||
591 | return status; | 582 | return status; |
592 | } | 583 | } |
593 | 584 | ||
585 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, | ||
586 | struct inode *dir, | ||
587 | struct inode *inode, | ||
588 | dev_t dev, | ||
589 | struct buffer_head **new_fe_bh, | ||
590 | struct buffer_head *parent_fe_bh, | ||
591 | handle_t *handle, | ||
592 | struct ocfs2_alloc_context *inode_ac) | ||
593 | { | ||
594 | int status = 0; | ||
595 | u64 suballoc_loc, fe_blkno = 0; | ||
596 | u16 suballoc_bit; | ||
597 | |||
598 | *new_fe_bh = NULL; | ||
599 | |||
600 | status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh, | ||
601 | inode_ac, &suballoc_loc, | ||
602 | &suballoc_bit, &fe_blkno); | ||
603 | if (status < 0) { | ||
604 | mlog_errno(status); | ||
605 | return status; | ||
606 | } | ||
607 | |||
608 | return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, | ||
609 | parent_fe_bh, handle, inode_ac, | ||
610 | fe_blkno, suballoc_loc, suballoc_bit); | ||
611 | } | ||
612 | |||
594 | static int ocfs2_mkdir(struct inode *dir, | 613 | static int ocfs2_mkdir(struct inode *dir, |
595 | struct dentry *dentry, | 614 | struct dentry *dentry, |
596 | int mode) | 615 | int mode) |
@@ -1852,61 +1871,117 @@ bail: | |||
1852 | return status; | 1871 | return status; |
1853 | } | 1872 | } |
1854 | 1873 | ||
1855 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | 1874 | static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb, |
1856 | struct inode **ret_orphan_dir, | 1875 | struct inode **ret_orphan_dir, |
1857 | u64 blkno, | 1876 | struct buffer_head **ret_orphan_dir_bh) |
1858 | char *name, | ||
1859 | struct ocfs2_dir_lookup_result *lookup) | ||
1860 | { | 1877 | { |
1861 | struct inode *orphan_dir_inode; | 1878 | struct inode *orphan_dir_inode; |
1862 | struct buffer_head *orphan_dir_bh = NULL; | 1879 | struct buffer_head *orphan_dir_bh = NULL; |
1863 | int status = 0; | 1880 | int ret = 0; |
1864 | |||
1865 | status = ocfs2_blkno_stringify(blkno, name); | ||
1866 | if (status < 0) { | ||
1867 | mlog_errno(status); | ||
1868 | return status; | ||
1869 | } | ||
1870 | 1881 | ||
1871 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 1882 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
1872 | ORPHAN_DIR_SYSTEM_INODE, | 1883 | ORPHAN_DIR_SYSTEM_INODE, |
1873 | osb->slot_num); | 1884 | osb->slot_num); |
1874 | if (!orphan_dir_inode) { | 1885 | if (!orphan_dir_inode) { |
1875 | status = -ENOENT; | 1886 | ret = -ENOENT; |
1876 | mlog_errno(status); | 1887 | mlog_errno(ret); |
1877 | return status; | 1888 | return ret; |
1878 | } | 1889 | } |
1879 | 1890 | ||
1880 | mutex_lock(&orphan_dir_inode->i_mutex); | 1891 | mutex_lock(&orphan_dir_inode->i_mutex); |
1881 | 1892 | ||
1882 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 1893 | ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); |
1883 | if (status < 0) { | 1894 | if (ret < 0) { |
1884 | mlog_errno(status); | 1895 | mutex_unlock(&orphan_dir_inode->i_mutex); |
1885 | goto leave; | 1896 | iput(orphan_dir_inode); |
1897 | |||
1898 | mlog_errno(ret); | ||
1899 | return ret; | ||
1886 | } | 1900 | } |
1887 | 1901 | ||
1888 | status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, | 1902 | *ret_orphan_dir = orphan_dir_inode; |
1889 | orphan_dir_bh, name, | 1903 | *ret_orphan_dir_bh = orphan_dir_bh; |
1890 | OCFS2_ORPHAN_NAMELEN, lookup); | ||
1891 | if (status < 0) { | ||
1892 | ocfs2_inode_unlock(orphan_dir_inode, 1); | ||
1893 | 1904 | ||
1894 | mlog_errno(status); | 1905 | return 0; |
1895 | goto leave; | 1906 | } |
1907 | |||
1908 | static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode, | ||
1909 | struct buffer_head *orphan_dir_bh, | ||
1910 | u64 blkno, | ||
1911 | char *name, | ||
1912 | struct ocfs2_dir_lookup_result *lookup) | ||
1913 | { | ||
1914 | int ret; | ||
1915 | struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb); | ||
1916 | |||
1917 | ret = ocfs2_blkno_stringify(blkno, name); | ||
1918 | if (ret < 0) { | ||
1919 | mlog_errno(ret); | ||
1920 | return ret; | ||
1921 | } | ||
1922 | |||
1923 | ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, | ||
1924 | orphan_dir_bh, name, | ||
1925 | OCFS2_ORPHAN_NAMELEN, lookup); | ||
1926 | if (ret < 0) { | ||
1927 | mlog_errno(ret); | ||
1928 | return ret; | ||
1929 | } | ||
1930 | |||
1931 | return 0; | ||
1932 | } | ||
1933 | |||
1934 | /** | ||
1935 | * ocfs2_prepare_orphan_dir() - Prepare an orphan directory for | ||
1936 | * insertion of an orphan. | ||
1937 | * @osb: ocfs2 file system | ||
1938 | * @ret_orphan_dir: Orphan dir inode - returned locked! | ||
1939 | * @blkno: Actual block number of the inode to be inserted into orphan dir. | ||
1940 | * @lookup: dir lookup result, to be passed back into functions like | ||
1941 | * ocfs2_orphan_add | ||
1942 | * | ||
1943 | * Returns zero on success and the ret_orphan_dir, name and lookup | ||
1944 | * fields will be populated. | ||
1945 | * | ||
1946 | * Returns non-zero on failure. | ||
1947 | */ | ||
1948 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | ||
1949 | struct inode **ret_orphan_dir, | ||
1950 | u64 blkno, | ||
1951 | char *name, | ||
1952 | struct ocfs2_dir_lookup_result *lookup) | ||
1953 | { | ||
1954 | struct inode *orphan_dir_inode = NULL; | ||
1955 | struct buffer_head *orphan_dir_bh = NULL; | ||
1956 | int ret = 0; | ||
1957 | |||
1958 | ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir_inode, | ||
1959 | &orphan_dir_bh); | ||
1960 | if (ret < 0) { | ||
1961 | mlog_errno(ret); | ||
1962 | return ret; | ||
1963 | } | ||
1964 | |||
1965 | ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh, | ||
1966 | blkno, name, lookup); | ||
1967 | if (ret < 0) { | ||
1968 | mlog_errno(ret); | ||
1969 | goto out; | ||
1896 | } | 1970 | } |
1897 | 1971 | ||
1898 | *ret_orphan_dir = orphan_dir_inode; | 1972 | *ret_orphan_dir = orphan_dir_inode; |
1899 | 1973 | ||
1900 | leave: | 1974 | out: |
1901 | if (status) { | 1975 | brelse(orphan_dir_bh); |
1976 | |||
1977 | if (ret) { | ||
1978 | ocfs2_inode_unlock(orphan_dir_inode, 1); | ||
1902 | mutex_unlock(&orphan_dir_inode->i_mutex); | 1979 | mutex_unlock(&orphan_dir_inode->i_mutex); |
1903 | iput(orphan_dir_inode); | 1980 | iput(orphan_dir_inode); |
1904 | } | 1981 | } |
1905 | 1982 | ||
1906 | brelse(orphan_dir_bh); | 1983 | mlog_exit(ret); |
1907 | 1984 | return ret; | |
1908 | mlog_exit(status); | ||
1909 | return status; | ||
1910 | } | 1985 | } |
1911 | 1986 | ||
1912 | static int ocfs2_orphan_add(struct ocfs2_super *osb, | 1987 | static int ocfs2_orphan_add(struct ocfs2_super *osb, |
@@ -2053,6 +2128,99 @@ leave: | |||
2053 | return status; | 2128 | return status; |
2054 | } | 2129 | } |
2055 | 2130 | ||
2131 | /** | ||
2132 | * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to recieve a newly | ||
2133 | * allocated file. This is different from the typical 'add to orphan dir' | ||
2134 | * operation in that the inode does not yet exist. This is a problem because | ||
2135 | * the orphan dir stringifies the inode block number to come up with it's | ||
2136 | * dirent. Obviously if the inode does not yet exist we have a chicken and egg | ||
2137 | * problem. This function works around it by calling deeper into the orphan | ||
2138 | * and suballoc code than other callers. Use this only by necessity. | ||
2139 | * @dir: The directory which this inode will ultimately wind up under - not the | ||
2140 | * orphan dir! | ||
2141 | * @dir_bh: buffer_head the @dir inode block | ||
2142 | * @orphan_name: string of length (CFS2_ORPHAN_NAMELEN + 1). Will be filled | ||
2143 | * with the string to be used for orphan dirent. Pass back to the orphan dir | ||
2144 | * code. | ||
2145 | * @ret_orphan_dir: orphan dir inode returned to be passed back into orphan | ||
2146 | * dir code. | ||
2147 | * @ret_di_blkno: block number where the new inode will be allocated. | ||
2148 | * @orphan_insert: Dir insert context to be passed back into orphan dir code. | ||
2149 | * @ret_inode_ac: Inode alloc context to be passed back to the allocator. | ||
2150 | * | ||
2151 | * Returns zero on success and the ret_orphan_dir, name and lookup | ||
2152 | * fields will be populated. | ||
2153 | * | ||
2154 | * Returns non-zero on failure. | ||
2155 | */ | ||
2156 | static int ocfs2_prep_new_orphaned_file(struct inode *dir, | ||
2157 | struct buffer_head *dir_bh, | ||
2158 | char *orphan_name, | ||
2159 | struct inode **ret_orphan_dir, | ||
2160 | u64 *ret_di_blkno, | ||
2161 | struct ocfs2_dir_lookup_result *orphan_insert, | ||
2162 | struct ocfs2_alloc_context **ret_inode_ac) | ||
2163 | { | ||
2164 | int ret; | ||
2165 | u64 di_blkno; | ||
2166 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
2167 | struct inode *orphan_dir = NULL; | ||
2168 | struct buffer_head *orphan_dir_bh = NULL; | ||
2169 | struct ocfs2_alloc_context *inode_ac = NULL; | ||
2170 | |||
2171 | ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir, &orphan_dir_bh); | ||
2172 | if (ret < 0) { | ||
2173 | mlog_errno(ret); | ||
2174 | return ret; | ||
2175 | } | ||
2176 | |||
2177 | /* reserve an inode spot */ | ||
2178 | ret = ocfs2_reserve_new_inode(osb, &inode_ac); | ||
2179 | if (ret < 0) { | ||
2180 | if (ret != -ENOSPC) | ||
2181 | mlog_errno(ret); | ||
2182 | goto out; | ||
2183 | } | ||
2184 | |||
2185 | ret = ocfs2_find_new_inode_loc(dir, dir_bh, inode_ac, | ||
2186 | &di_blkno); | ||
2187 | if (ret) { | ||
2188 | mlog_errno(ret); | ||
2189 | goto out; | ||
2190 | } | ||
2191 | |||
2192 | ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh, | ||
2193 | di_blkno, orphan_name, orphan_insert); | ||
2194 | if (ret < 0) { | ||
2195 | mlog_errno(ret); | ||
2196 | goto out; | ||
2197 | } | ||
2198 | |||
2199 | out: | ||
2200 | if (ret == 0) { | ||
2201 | *ret_orphan_dir = orphan_dir; | ||
2202 | *ret_di_blkno = di_blkno; | ||
2203 | *ret_inode_ac = inode_ac; | ||
2204 | /* | ||
2205 | * orphan_name and orphan_insert are already up to | ||
2206 | * date via prepare_orphan_dir | ||
2207 | */ | ||
2208 | } else { | ||
2209 | /* Unroll reserve_new_inode* */ | ||
2210 | if (inode_ac) | ||
2211 | ocfs2_free_alloc_context(inode_ac); | ||
2212 | |||
2213 | /* Unroll orphan dir locking */ | ||
2214 | mutex_unlock(&orphan_dir->i_mutex); | ||
2215 | ocfs2_inode_unlock(orphan_dir, 1); | ||
2216 | iput(orphan_dir); | ||
2217 | } | ||
2218 | |||
2219 | brelse(orphan_dir_bh); | ||
2220 | |||
2221 | return 0; | ||
2222 | } | ||
2223 | |||
2056 | int ocfs2_create_inode_in_orphan(struct inode *dir, | 2224 | int ocfs2_create_inode_in_orphan(struct inode *dir, |
2057 | int mode, | 2225 | int mode, |
2058 | struct inode **new_inode) | 2226 | struct inode **new_inode) |
@@ -2068,6 +2236,8 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2068 | struct buffer_head *new_di_bh = NULL; | 2236 | struct buffer_head *new_di_bh = NULL; |
2069 | struct ocfs2_alloc_context *inode_ac = NULL; | 2237 | struct ocfs2_alloc_context *inode_ac = NULL; |
2070 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; | 2238 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; |
2239 | u64 uninitialized_var(di_blkno), suballoc_loc; | ||
2240 | u16 suballoc_bit; | ||
2071 | 2241 | ||
2072 | status = ocfs2_inode_lock(dir, &parent_di_bh, 1); | 2242 | status = ocfs2_inode_lock(dir, &parent_di_bh, 1); |
2073 | if (status < 0) { | 2243 | if (status < 0) { |
@@ -2076,20 +2246,9 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2076 | return status; | 2246 | return status; |
2077 | } | 2247 | } |
2078 | 2248 | ||
2079 | /* | 2249 | status = ocfs2_prep_new_orphaned_file(dir, parent_di_bh, |
2080 | * We give the orphan dir the root blkno to fake an orphan name, | 2250 | orphan_name, &orphan_dir, |
2081 | * and allocate enough space for our insertion. | 2251 | &di_blkno, &orphan_insert, &inode_ac); |
2082 | */ | ||
2083 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, | ||
2084 | osb->root_blkno, | ||
2085 | orphan_name, &orphan_insert); | ||
2086 | if (status < 0) { | ||
2087 | mlog_errno(status); | ||
2088 | goto leave; | ||
2089 | } | ||
2090 | |||
2091 | /* reserve an inode spot */ | ||
2092 | status = ocfs2_reserve_new_inode(osb, &inode_ac); | ||
2093 | if (status < 0) { | 2252 | if (status < 0) { |
2094 | if (status != -ENOSPC) | 2253 | if (status != -ENOSPC) |
2095 | mlog_errno(status); | 2254 | mlog_errno(status); |
@@ -2116,17 +2275,20 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2116 | goto leave; | 2275 | goto leave; |
2117 | did_quota_inode = 1; | 2276 | did_quota_inode = 1; |
2118 | 2277 | ||
2119 | inode->i_nlink = 0; | 2278 | status = ocfs2_claim_new_inode_at_loc(handle, dir, inode_ac, |
2120 | /* do the real work now. */ | 2279 | &suballoc_loc, |
2121 | status = ocfs2_mknod_locked(osb, dir, inode, | 2280 | &suballoc_bit, di_blkno); |
2122 | 0, &new_di_bh, parent_di_bh, handle, | ||
2123 | inode_ac); | ||
2124 | if (status < 0) { | 2281 | if (status < 0) { |
2125 | mlog_errno(status); | 2282 | mlog_errno(status); |
2126 | goto leave; | 2283 | goto leave; |
2127 | } | 2284 | } |
2128 | 2285 | ||
2129 | status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name); | 2286 | inode->i_nlink = 0; |
2287 | /* do the real work now. */ | ||
2288 | status = __ocfs2_mknod_locked(dir, inode, | ||
2289 | 0, &new_di_bh, parent_di_bh, handle, | ||
2290 | inode_ac, di_blkno, suballoc_loc, | ||
2291 | suballoc_bit); | ||
2130 | if (status < 0) { | 2292 | if (status < 0) { |
2131 | mlog_errno(status); | 2293 | mlog_errno(status); |
2132 | goto leave; | 2294 | goto leave; |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 33f1c9a8258d..fa31d05e41b7 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -235,18 +235,31 @@ | |||
235 | #define OCFS2_HAS_REFCOUNT_FL (0x0010) | 235 | #define OCFS2_HAS_REFCOUNT_FL (0x0010) |
236 | 236 | ||
237 | /* Inode attributes, keep in sync with EXT2 */ | 237 | /* Inode attributes, keep in sync with EXT2 */ |
238 | #define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ | 238 | #define OCFS2_SECRM_FL FS_SECRM_FL /* Secure deletion */ |
239 | #define OCFS2_UNRM_FL (0x00000002) /* Undelete */ | 239 | #define OCFS2_UNRM_FL FS_UNRM_FL /* Undelete */ |
240 | #define OCFS2_COMPR_FL (0x00000004) /* Compress file */ | 240 | #define OCFS2_COMPR_FL FS_COMPR_FL /* Compress file */ |
241 | #define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */ | 241 | #define OCFS2_SYNC_FL FS_SYNC_FL /* Synchronous updates */ |
242 | #define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */ | 242 | #define OCFS2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */ |
243 | #define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */ | 243 | #define OCFS2_APPEND_FL FS_APPEND_FL /* writes to file may only append */ |
244 | #define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */ | 244 | #define OCFS2_NODUMP_FL FS_NODUMP_FL /* do not dump file */ |
245 | #define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */ | 245 | #define OCFS2_NOATIME_FL FS_NOATIME_FL /* do not update atime */ |
246 | #define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */ | 246 | /* Reserved for compression usage... */ |
247 | 247 | #define OCFS2_DIRTY_FL FS_DIRTY_FL | |
248 | #define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */ | 248 | #define OCFS2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */ |
249 | #define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */ | 249 | #define OCFS2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */ |
250 | #define OCFS2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */ | ||
251 | /* End compression flags --- maybe not all used */ | ||
252 | #define OCFS2_BTREE_FL FS_BTREE_FL /* btree format dir */ | ||
253 | #define OCFS2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */ | ||
254 | #define OCFS2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */ | ||
255 | #define OCFS2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */ | ||
256 | #define OCFS2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */ | ||
257 | #define OCFS2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */ | ||
258 | #define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/ | ||
259 | #define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */ | ||
260 | |||
261 | #define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ | ||
262 | #define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ | ||
250 | 263 | ||
251 | /* | 264 | /* |
252 | * Extent record flags (e_node.leaf.flags) | 265 | * Extent record flags (e_node.leaf.flags) |
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 2d3420af1a83..5d241505690b 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h | |||
@@ -23,10 +23,10 @@ | |||
23 | /* | 23 | /* |
24 | * ioctl commands | 24 | * ioctl commands |
25 | */ | 25 | */ |
26 | #define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) | 26 | #define OCFS2_IOC_GETFLAGS FS_IOC_GETFLAGS |
27 | #define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) | 27 | #define OCFS2_IOC_SETFLAGS FS_IOC_SETFLAGS |
28 | #define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int) | 28 | #define OCFS2_IOC32_GETFLAGS FS_IOC32_GETFLAGS |
29 | #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) | 29 | #define OCFS2_IOC32_SETFLAGS FS_IOC32_SETFLAGS |
30 | 30 | ||
31 | /* | 31 | /* |
32 | * Space reservation / allocation / free ioctls and argument structure | 32 | * Space reservation / allocation / free ioctls and argument structure |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 73a11ccfd4c2..efdd75607406 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -2960,7 +2960,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2960 | if (map_end & (PAGE_CACHE_SIZE - 1)) | 2960 | if (map_end & (PAGE_CACHE_SIZE - 1)) |
2961 | to = map_end & (PAGE_CACHE_SIZE - 1); | 2961 | to = map_end & (PAGE_CACHE_SIZE - 1); |
2962 | 2962 | ||
2963 | page = grab_cache_page(mapping, page_index); | 2963 | page = find_or_create_page(mapping, page_index, GFP_NOFS); |
2964 | 2964 | ||
2965 | /* | 2965 | /* |
2966 | * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page | 2966 | * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page |
@@ -3179,7 +3179,8 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, | |||
3179 | if (map_end > end) | 3179 | if (map_end > end) |
3180 | map_end = end; | 3180 | map_end = end; |
3181 | 3181 | ||
3182 | page = grab_cache_page(context->inode->i_mapping, page_index); | 3182 | page = find_or_create_page(context->inode->i_mapping, |
3183 | page_index, GFP_NOFS); | ||
3183 | BUG_ON(!page); | 3184 | BUG_ON(!page); |
3184 | 3185 | ||
3185 | wait_on_page_writeback(page); | 3186 | wait_on_page_writeback(page); |
@@ -4200,8 +4201,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry, | |||
4200 | goto out; | 4201 | goto out; |
4201 | } | 4202 | } |
4202 | 4203 | ||
4203 | mutex_lock(&new_inode->i_mutex); | 4204 | mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD); |
4204 | ret = ocfs2_inode_lock(new_inode, &new_bh, 1); | 4205 | ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1, |
4206 | OI_LS_REFLINK_TARGET); | ||
4205 | if (ret) { | 4207 | if (ret) { |
4206 | mlog_errno(ret); | 4208 | mlog_errno(ret); |
4207 | goto out_unlock; | 4209 | goto out_unlock; |
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index d8b6e4259b80..3e78db361bc7 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c | |||
@@ -732,25 +732,23 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap, | |||
732 | struct ocfs2_alloc_reservation *resv, | 732 | struct ocfs2_alloc_reservation *resv, |
733 | int *cstart, int *clen) | 733 | int *cstart, int *clen) |
734 | { | 734 | { |
735 | unsigned int wanted = *clen; | ||
736 | |||
737 | if (resv == NULL || ocfs2_resmap_disabled(resmap)) | 735 | if (resv == NULL || ocfs2_resmap_disabled(resmap)) |
738 | return -ENOSPC; | 736 | return -ENOSPC; |
739 | 737 | ||
740 | spin_lock(&resv_lock); | 738 | spin_lock(&resv_lock); |
741 | 739 | ||
742 | /* | ||
743 | * We don't want to over-allocate for temporary | ||
744 | * windows. Otherwise, we run the risk of fragmenting the | ||
745 | * allocation space. | ||
746 | */ | ||
747 | wanted = ocfs2_resv_window_bits(resmap, resv); | ||
748 | if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) | ||
749 | wanted = *clen; | ||
750 | |||
751 | if (ocfs2_resv_empty(resv)) { | 740 | if (ocfs2_resv_empty(resv)) { |
752 | mlog(0, "empty reservation, find new window\n"); | 741 | /* |
742 | * We don't want to over-allocate for temporary | ||
743 | * windows. Otherwise, we run the risk of fragmenting the | ||
744 | * allocation space. | ||
745 | */ | ||
746 | unsigned int wanted = ocfs2_resv_window_bits(resmap, resv); | ||
753 | 747 | ||
748 | if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) | ||
749 | wanted = *clen; | ||
750 | |||
751 | mlog(0, "empty reservation, find new window\n"); | ||
754 | /* | 752 | /* |
755 | * Try to get a window here. If it works, we must fall | 753 | * Try to get a window here. If it works, we must fall |
756 | * through and test the bitmap . This avoids some | 754 | * through and test the bitmap . This avoids some |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index a8e6a95a353f..849c2f0e0a0e 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -57,11 +57,28 @@ struct ocfs2_suballoc_result { | |||
57 | u64 sr_bg_blkno; /* The bg we allocated from. Set | 57 | u64 sr_bg_blkno; /* The bg we allocated from. Set |
58 | to 0 when a block group is | 58 | to 0 when a block group is |
59 | contiguous. */ | 59 | contiguous. */ |
60 | u64 sr_bg_stable_blkno; /* | ||
61 | * Doesn't change, always | ||
62 | * set to target block | ||
63 | * group descriptor | ||
64 | * block. | ||
65 | */ | ||
60 | u64 sr_blkno; /* The first allocated block */ | 66 | u64 sr_blkno; /* The first allocated block */ |
61 | unsigned int sr_bit_offset; /* The bit in the bg */ | 67 | unsigned int sr_bit_offset; /* The bit in the bg */ |
62 | unsigned int sr_bits; /* How many bits we claimed */ | 68 | unsigned int sr_bits; /* How many bits we claimed */ |
63 | }; | 69 | }; |
64 | 70 | ||
71 | static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res) | ||
72 | { | ||
73 | if (res->sr_blkno == 0) | ||
74 | return 0; | ||
75 | |||
76 | if (res->sr_bg_blkno) | ||
77 | return res->sr_bg_blkno; | ||
78 | |||
79 | return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset); | ||
80 | } | ||
81 | |||
65 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); | 82 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); |
66 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); | 83 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); |
67 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); | 84 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); |
@@ -138,6 +155,10 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) | |||
138 | brelse(ac->ac_bh); | 155 | brelse(ac->ac_bh); |
139 | ac->ac_bh = NULL; | 156 | ac->ac_bh = NULL; |
140 | ac->ac_resv = NULL; | 157 | ac->ac_resv = NULL; |
158 | if (ac->ac_find_loc_priv) { | ||
159 | kfree(ac->ac_find_loc_priv); | ||
160 | ac->ac_find_loc_priv = NULL; | ||
161 | } | ||
141 | } | 162 | } |
142 | 163 | ||
143 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | 164 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) |
@@ -336,7 +357,7 @@ out: | |||
336 | static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, | 357 | static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, |
337 | struct ocfs2_group_desc *bg, | 358 | struct ocfs2_group_desc *bg, |
338 | struct ocfs2_chain_list *cl, | 359 | struct ocfs2_chain_list *cl, |
339 | u64 p_blkno, u32 clusters) | 360 | u64 p_blkno, unsigned int clusters) |
340 | { | 361 | { |
341 | struct ocfs2_extent_list *el = &bg->bg_list; | 362 | struct ocfs2_extent_list *el = &bg->bg_list; |
342 | struct ocfs2_extent_rec *rec; | 363 | struct ocfs2_extent_rec *rec; |
@@ -348,7 +369,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, | |||
348 | rec->e_blkno = cpu_to_le64(p_blkno); | 369 | rec->e_blkno = cpu_to_le64(p_blkno); |
349 | rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / | 370 | rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / |
350 | le16_to_cpu(cl->cl_bpc)); | 371 | le16_to_cpu(cl->cl_bpc)); |
351 | rec->e_leaf_clusters = cpu_to_le32(clusters); | 372 | rec->e_leaf_clusters = cpu_to_le16(clusters); |
352 | le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); | 373 | le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); |
353 | le16_add_cpu(&bg->bg_free_bits_count, | 374 | le16_add_cpu(&bg->bg_free_bits_count, |
354 | clusters * le16_to_cpu(cl->cl_bpc)); | 375 | clusters * le16_to_cpu(cl->cl_bpc)); |
@@ -1678,6 +1699,15 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
1678 | if (!ret) | 1699 | if (!ret) |
1679 | ocfs2_bg_discontig_fix_result(ac, gd, res); | 1700 | ocfs2_bg_discontig_fix_result(ac, gd, res); |
1680 | 1701 | ||
1702 | /* | ||
1703 | * sr_bg_blkno might have been changed by | ||
1704 | * ocfs2_bg_discontig_fix_result | ||
1705 | */ | ||
1706 | res->sr_bg_stable_blkno = group_bh->b_blocknr; | ||
1707 | |||
1708 | if (ac->ac_find_loc_only) | ||
1709 | goto out_loc_only; | ||
1710 | |||
1681 | ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, | 1711 | ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, |
1682 | res->sr_bits, | 1712 | res->sr_bits, |
1683 | le16_to_cpu(gd->bg_chain)); | 1713 | le16_to_cpu(gd->bg_chain)); |
@@ -1691,6 +1721,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
1691 | if (ret < 0) | 1721 | if (ret < 0) |
1692 | mlog_errno(ret); | 1722 | mlog_errno(ret); |
1693 | 1723 | ||
1724 | out_loc_only: | ||
1694 | *bits_left = le16_to_cpu(gd->bg_free_bits_count); | 1725 | *bits_left = le16_to_cpu(gd->bg_free_bits_count); |
1695 | 1726 | ||
1696 | out: | 1727 | out: |
@@ -1708,7 +1739,6 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1708 | { | 1739 | { |
1709 | int status; | 1740 | int status; |
1710 | u16 chain; | 1741 | u16 chain; |
1711 | u32 tmp_used; | ||
1712 | u64 next_group; | 1742 | u64 next_group; |
1713 | struct inode *alloc_inode = ac->ac_inode; | 1743 | struct inode *alloc_inode = ac->ac_inode; |
1714 | struct buffer_head *group_bh = NULL; | 1744 | struct buffer_head *group_bh = NULL; |
@@ -1770,6 +1800,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1770 | if (!status) | 1800 | if (!status) |
1771 | ocfs2_bg_discontig_fix_result(ac, bg, res); | 1801 | ocfs2_bg_discontig_fix_result(ac, bg, res); |
1772 | 1802 | ||
1803 | /* | ||
1804 | * sr_bg_blkno might have been changed by | ||
1805 | * ocfs2_bg_discontig_fix_result | ||
1806 | */ | ||
1807 | res->sr_bg_stable_blkno = group_bh->b_blocknr; | ||
1773 | 1808 | ||
1774 | /* | 1809 | /* |
1775 | * Keep track of previous block descriptor read. When | 1810 | * Keep track of previous block descriptor read. When |
@@ -1796,22 +1831,17 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1796 | } | 1831 | } |
1797 | } | 1832 | } |
1798 | 1833 | ||
1799 | /* Ok, claim our bits now: set the info on dinode, chainlist | 1834 | if (ac->ac_find_loc_only) |
1800 | * and then the group */ | 1835 | goto out_loc_only; |
1801 | status = ocfs2_journal_access_di(handle, | 1836 | |
1802 | INODE_CACHE(alloc_inode), | 1837 | status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, |
1803 | ac->ac_bh, | 1838 | ac->ac_bh, res->sr_bits, |
1804 | OCFS2_JOURNAL_ACCESS_WRITE); | 1839 | chain); |
1805 | if (status < 0) { | 1840 | if (status) { |
1806 | mlog_errno(status); | 1841 | mlog_errno(status); |
1807 | goto bail; | 1842 | goto bail; |
1808 | } | 1843 | } |
1809 | 1844 | ||
1810 | tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); | ||
1811 | fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used); | ||
1812 | le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits); | ||
1813 | ocfs2_journal_dirty(handle, ac->ac_bh); | ||
1814 | |||
1815 | status = ocfs2_block_group_set_bits(handle, | 1845 | status = ocfs2_block_group_set_bits(handle, |
1816 | alloc_inode, | 1846 | alloc_inode, |
1817 | bg, | 1847 | bg, |
@@ -1826,6 +1856,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1826 | mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, | 1856 | mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, |
1827 | (unsigned long long)le64_to_cpu(fe->i_blkno)); | 1857 | (unsigned long long)le64_to_cpu(fe->i_blkno)); |
1828 | 1858 | ||
1859 | out_loc_only: | ||
1829 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); | 1860 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); |
1830 | bail: | 1861 | bail: |
1831 | brelse(group_bh); | 1862 | brelse(group_bh); |
@@ -1845,6 +1876,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, | |||
1845 | int status; | 1876 | int status; |
1846 | u16 victim, i; | 1877 | u16 victim, i; |
1847 | u16 bits_left = 0; | 1878 | u16 bits_left = 0; |
1879 | u64 hint = ac->ac_last_group; | ||
1848 | struct ocfs2_chain_list *cl; | 1880 | struct ocfs2_chain_list *cl; |
1849 | struct ocfs2_dinode *fe; | 1881 | struct ocfs2_dinode *fe; |
1850 | 1882 | ||
@@ -1872,7 +1904,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, | |||
1872 | goto bail; | 1904 | goto bail; |
1873 | } | 1905 | } |
1874 | 1906 | ||
1875 | res->sr_bg_blkno = ac->ac_last_group; | 1907 | res->sr_bg_blkno = hint; |
1876 | if (res->sr_bg_blkno) { | 1908 | if (res->sr_bg_blkno) { |
1877 | /* Attempt to short-circuit the usual search mechanism | 1909 | /* Attempt to short-circuit the usual search mechanism |
1878 | * by jumping straight to the most recently used | 1910 | * by jumping straight to the most recently used |
@@ -1896,8 +1928,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, | |||
1896 | 1928 | ||
1897 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, | 1929 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, |
1898 | res, &bits_left); | 1930 | res, &bits_left); |
1899 | if (!status) | 1931 | if (!status) { |
1932 | hint = ocfs2_group_from_res(res); | ||
1900 | goto set_hint; | 1933 | goto set_hint; |
1934 | } | ||
1901 | if (status < 0 && status != -ENOSPC) { | 1935 | if (status < 0 && status != -ENOSPC) { |
1902 | mlog_errno(status); | 1936 | mlog_errno(status); |
1903 | goto bail; | 1937 | goto bail; |
@@ -1920,8 +1954,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, | |||
1920 | ac->ac_chain = i; | 1954 | ac->ac_chain = i; |
1921 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, | 1955 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, |
1922 | res, &bits_left); | 1956 | res, &bits_left); |
1923 | if (!status) | 1957 | if (!status) { |
1958 | hint = ocfs2_group_from_res(res); | ||
1924 | break; | 1959 | break; |
1960 | } | ||
1925 | if (status < 0 && status != -ENOSPC) { | 1961 | if (status < 0 && status != -ENOSPC) { |
1926 | mlog_errno(status); | 1962 | mlog_errno(status); |
1927 | goto bail; | 1963 | goto bail; |
@@ -1936,7 +1972,7 @@ set_hint: | |||
1936 | if (bits_left < min_bits) | 1972 | if (bits_left < min_bits) |
1937 | ac->ac_last_group = 0; | 1973 | ac->ac_last_group = 0; |
1938 | else | 1974 | else |
1939 | ac->ac_last_group = res->sr_bg_blkno; | 1975 | ac->ac_last_group = hint; |
1940 | } | 1976 | } |
1941 | 1977 | ||
1942 | bail: | 1978 | bail: |
@@ -2016,6 +2052,136 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir, | |||
2016 | OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; | 2052 | OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; |
2017 | } | 2053 | } |
2018 | 2054 | ||
2055 | int ocfs2_find_new_inode_loc(struct inode *dir, | ||
2056 | struct buffer_head *parent_fe_bh, | ||
2057 | struct ocfs2_alloc_context *ac, | ||
2058 | u64 *fe_blkno) | ||
2059 | { | ||
2060 | int ret; | ||
2061 | handle_t *handle = NULL; | ||
2062 | struct ocfs2_suballoc_result *res; | ||
2063 | |||
2064 | BUG_ON(!ac); | ||
2065 | BUG_ON(ac->ac_bits_given != 0); | ||
2066 | BUG_ON(ac->ac_bits_wanted != 1); | ||
2067 | BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); | ||
2068 | |||
2069 | res = kzalloc(sizeof(*res), GFP_NOFS); | ||
2070 | if (res == NULL) { | ||
2071 | ret = -ENOMEM; | ||
2072 | mlog_errno(ret); | ||
2073 | goto out; | ||
2074 | } | ||
2075 | |||
2076 | ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac); | ||
2077 | |||
2078 | /* | ||
2079 | * The handle started here is for chain relink. Alternatively, | ||
2080 | * we could just disable relink for these calls. | ||
2081 | */ | ||
2082 | handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC); | ||
2083 | if (IS_ERR(handle)) { | ||
2084 | ret = PTR_ERR(handle); | ||
2085 | handle = NULL; | ||
2086 | mlog_errno(ret); | ||
2087 | goto out; | ||
2088 | } | ||
2089 | |||
2090 | /* | ||
2091 | * This will instruct ocfs2_claim_suballoc_bits and | ||
2092 | * ocfs2_search_one_group to search but save actual allocation | ||
2093 | * for later. | ||
2094 | */ | ||
2095 | ac->ac_find_loc_only = 1; | ||
2096 | |||
2097 | ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res); | ||
2098 | if (ret < 0) { | ||
2099 | mlog_errno(ret); | ||
2100 | goto out; | ||
2101 | } | ||
2102 | |||
2103 | ac->ac_find_loc_priv = res; | ||
2104 | *fe_blkno = res->sr_blkno; | ||
2105 | |||
2106 | out: | ||
2107 | if (handle) | ||
2108 | ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle); | ||
2109 | |||
2110 | if (ret) | ||
2111 | kfree(res); | ||
2112 | |||
2113 | return ret; | ||
2114 | } | ||
2115 | |||
2116 | int ocfs2_claim_new_inode_at_loc(handle_t *handle, | ||
2117 | struct inode *dir, | ||
2118 | struct ocfs2_alloc_context *ac, | ||
2119 | u64 *suballoc_loc, | ||
2120 | u16 *suballoc_bit, | ||
2121 | u64 di_blkno) | ||
2122 | { | ||
2123 | int ret; | ||
2124 | u16 chain; | ||
2125 | struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv; | ||
2126 | struct buffer_head *bg_bh = NULL; | ||
2127 | struct ocfs2_group_desc *bg; | ||
2128 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data; | ||
2129 | |||
2130 | /* | ||
2131 | * Since di_blkno is being passed back in, we check for any | ||
2132 | * inconsistencies which may have happened between | ||
2133 | * calls. These are code bugs as di_blkno is not expected to | ||
2134 | * change once returned from ocfs2_find_new_inode_loc() | ||
2135 | */ | ||
2136 | BUG_ON(res->sr_blkno != di_blkno); | ||
2137 | |||
2138 | ret = ocfs2_read_group_descriptor(ac->ac_inode, di, | ||
2139 | res->sr_bg_stable_blkno, &bg_bh); | ||
2140 | if (ret) { | ||
2141 | mlog_errno(ret); | ||
2142 | goto out; | ||
2143 | } | ||
2144 | |||
2145 | bg = (struct ocfs2_group_desc *) bg_bh->b_data; | ||
2146 | chain = le16_to_cpu(bg->bg_chain); | ||
2147 | |||
2148 | ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle, | ||
2149 | ac->ac_bh, res->sr_bits, | ||
2150 | chain); | ||
2151 | if (ret) { | ||
2152 | mlog_errno(ret); | ||
2153 | goto out; | ||
2154 | } | ||
2155 | |||
2156 | ret = ocfs2_block_group_set_bits(handle, | ||
2157 | ac->ac_inode, | ||
2158 | bg, | ||
2159 | bg_bh, | ||
2160 | res->sr_bit_offset, | ||
2161 | res->sr_bits); | ||
2162 | if (ret < 0) { | ||
2163 | mlog_errno(ret); | ||
2164 | goto out; | ||
2165 | } | ||
2166 | |||
2167 | mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, | ||
2168 | (unsigned long long)di_blkno); | ||
2169 | |||
2170 | atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); | ||
2171 | |||
2172 | BUG_ON(res->sr_bits != 1); | ||
2173 | |||
2174 | *suballoc_loc = res->sr_bg_blkno; | ||
2175 | *suballoc_bit = res->sr_bit_offset; | ||
2176 | ac->ac_bits_given++; | ||
2177 | ocfs2_save_inode_ac_group(dir, ac); | ||
2178 | |||
2179 | out: | ||
2180 | brelse(bg_bh); | ||
2181 | |||
2182 | return ret; | ||
2183 | } | ||
2184 | |||
2019 | int ocfs2_claim_new_inode(handle_t *handle, | 2185 | int ocfs2_claim_new_inode(handle_t *handle, |
2020 | struct inode *dir, | 2186 | struct inode *dir, |
2021 | struct buffer_head *parent_fe_bh, | 2187 | struct buffer_head *parent_fe_bh, |
@@ -2567,7 +2733,8 @@ out: | |||
2567 | * suballoc_bit. | 2733 | * suballoc_bit. |
2568 | */ | 2734 | */ |
2569 | static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, | 2735 | static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, |
2570 | u16 *suballoc_slot, u16 *suballoc_bit) | 2736 | u16 *suballoc_slot, u64 *group_blkno, |
2737 | u16 *suballoc_bit) | ||
2571 | { | 2738 | { |
2572 | int status; | 2739 | int status; |
2573 | struct buffer_head *inode_bh = NULL; | 2740 | struct buffer_head *inode_bh = NULL; |
@@ -2604,6 +2771,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, | |||
2604 | *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); | 2771 | *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); |
2605 | if (suballoc_bit) | 2772 | if (suballoc_bit) |
2606 | *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); | 2773 | *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); |
2774 | if (group_blkno) | ||
2775 | *group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc); | ||
2607 | 2776 | ||
2608 | bail: | 2777 | bail: |
2609 | brelse(inode_bh); | 2778 | brelse(inode_bh); |
@@ -2621,7 +2790,8 @@ bail: | |||
2621 | */ | 2790 | */ |
2622 | static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, | 2791 | static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, |
2623 | struct inode *suballoc, | 2792 | struct inode *suballoc, |
2624 | struct buffer_head *alloc_bh, u64 blkno, | 2793 | struct buffer_head *alloc_bh, |
2794 | u64 group_blkno, u64 blkno, | ||
2625 | u16 bit, int *res) | 2795 | u16 bit, int *res) |
2626 | { | 2796 | { |
2627 | struct ocfs2_dinode *alloc_di; | 2797 | struct ocfs2_dinode *alloc_di; |
@@ -2642,10 +2812,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, | |||
2642 | goto bail; | 2812 | goto bail; |
2643 | } | 2813 | } |
2644 | 2814 | ||
2645 | if (alloc_di->i_suballoc_loc) | 2815 | bg_blkno = group_blkno ? group_blkno : |
2646 | bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc); | 2816 | ocfs2_which_suballoc_group(blkno, bit); |
2647 | else | ||
2648 | bg_blkno = ocfs2_which_suballoc_group(blkno, bit); | ||
2649 | status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno, | 2817 | status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno, |
2650 | &group_bh); | 2818 | &group_bh); |
2651 | if (status < 0) { | 2819 | if (status < 0) { |
@@ -2680,6 +2848,7 @@ bail: | |||
2680 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) | 2848 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) |
2681 | { | 2849 | { |
2682 | int status; | 2850 | int status; |
2851 | u64 group_blkno = 0; | ||
2683 | u16 suballoc_bit = 0, suballoc_slot = 0; | 2852 | u16 suballoc_bit = 0, suballoc_slot = 0; |
2684 | struct inode *inode_alloc_inode; | 2853 | struct inode *inode_alloc_inode; |
2685 | struct buffer_head *alloc_bh = NULL; | 2854 | struct buffer_head *alloc_bh = NULL; |
@@ -2687,7 +2856,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) | |||
2687 | mlog_entry("blkno: %llu", (unsigned long long)blkno); | 2856 | mlog_entry("blkno: %llu", (unsigned long long)blkno); |
2688 | 2857 | ||
2689 | status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, | 2858 | status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, |
2690 | &suballoc_bit); | 2859 | &group_blkno, &suballoc_bit); |
2691 | if (status < 0) { | 2860 | if (status < 0) { |
2692 | mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status); | 2861 | mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status); |
2693 | goto bail; | 2862 | goto bail; |
@@ -2715,7 +2884,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) | |||
2715 | } | 2884 | } |
2716 | 2885 | ||
2717 | status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, | 2886 | status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, |
2718 | blkno, suballoc_bit, res); | 2887 | group_blkno, blkno, suballoc_bit, res); |
2719 | if (status < 0) | 2888 | if (status < 0) |
2720 | mlog(ML_ERROR, "test suballoc bit failed %d\n", status); | 2889 | mlog(ML_ERROR, "test suballoc bit failed %d\n", status); |
2721 | 2890 | ||
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index a017dd3ee7d9..b8afabfeede4 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -56,6 +56,9 @@ struct ocfs2_alloc_context { | |||
56 | u64 ac_max_block; /* Highest block number to allocate. 0 is | 56 | u64 ac_max_block; /* Highest block number to allocate. 0 is |
57 | is the same as ~0 - unlimited */ | 57 | is the same as ~0 - unlimited */ |
58 | 58 | ||
59 | int ac_find_loc_only; /* hack for reflink operation ordering */ | ||
60 | struct ocfs2_suballoc_result *ac_find_loc_priv; /* */ | ||
61 | |||
59 | struct ocfs2_alloc_reservation *ac_resv; | 62 | struct ocfs2_alloc_reservation *ac_resv; |
60 | }; | 63 | }; |
61 | 64 | ||
@@ -197,4 +200,22 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, | |||
197 | struct ocfs2_alloc_context **meta_ac); | 200 | struct ocfs2_alloc_context **meta_ac); |
198 | 201 | ||
199 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res); | 202 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res); |
203 | |||
204 | |||
205 | |||
206 | /* | ||
207 | * The following two interfaces are for ocfs2_create_inode_in_orphan(). | ||
208 | */ | ||
209 | int ocfs2_find_new_inode_loc(struct inode *dir, | ||
210 | struct buffer_head *parent_fe_bh, | ||
211 | struct ocfs2_alloc_context *ac, | ||
212 | u64 *fe_blkno); | ||
213 | |||
214 | int ocfs2_claim_new_inode_at_loc(handle_t *handle, | ||
215 | struct inode *dir, | ||
216 | struct ocfs2_alloc_context *ac, | ||
217 | u64 *suballoc_loc, | ||
218 | u16 *suballoc_bit, | ||
219 | u64 di_blkno); | ||
220 | |||
200 | #endif /* _CHAINALLOC_H_ */ | 221 | #endif /* _CHAINALLOC_H_ */ |
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 32499d213fc4..9975457c981f 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c | |||
@@ -128,7 +128,7 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry, | |||
128 | } | 128 | } |
129 | 129 | ||
130 | /* Fast symlinks can't be large */ | 130 | /* Fast symlinks can't be large */ |
131 | len = strlen(target); | 131 | len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb)); |
132 | link = kzalloc(len + 1, GFP_NOFS); | 132 | link = kzalloc(len + 1, GFP_NOFS); |
133 | if (!link) { | 133 | if (!link) { |
134 | status = -ENOMEM; | 134 | status = -ENOMEM; |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d03469f61801..06fa5e77c40e 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -1286,13 +1286,11 @@ int ocfs2_xattr_get_nolock(struct inode *inode, | |||
1286 | xis.inode_bh = xbs.inode_bh = di_bh; | 1286 | xis.inode_bh = xbs.inode_bh = di_bh; |
1287 | di = (struct ocfs2_dinode *)di_bh->b_data; | 1287 | di = (struct ocfs2_dinode *)di_bh->b_data; |
1288 | 1288 | ||
1289 | down_read(&oi->ip_xattr_sem); | ||
1290 | ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, | 1289 | ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, |
1291 | buffer_size, &xis); | 1290 | buffer_size, &xis); |
1292 | if (ret == -ENODATA && di->i_xattr_loc) | 1291 | if (ret == -ENODATA && di->i_xattr_loc) |
1293 | ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, | 1292 | ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, |
1294 | buffer_size, &xbs); | 1293 | buffer_size, &xbs); |
1295 | up_read(&oi->ip_xattr_sem); | ||
1296 | 1294 | ||
1297 | return ret; | 1295 | return ret; |
1298 | } | 1296 | } |
@@ -1316,8 +1314,10 @@ static int ocfs2_xattr_get(struct inode *inode, | |||
1316 | mlog_errno(ret); | 1314 | mlog_errno(ret); |
1317 | return ret; | 1315 | return ret; |
1318 | } | 1316 | } |
1317 | down_read(&OCFS2_I(inode)->ip_xattr_sem); | ||
1319 | ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, | 1318 | ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, |
1320 | name, buffer, buffer_size); | 1319 | name, buffer, buffer_size); |
1320 | up_read(&OCFS2_I(inode)->ip_xattr_sem); | ||
1321 | 1321 | ||
1322 | ocfs2_inode_unlock(inode, 0); | 1322 | ocfs2_inode_unlock(inode, 0); |
1323 | 1323 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index a1c43e7c8a7b..8e4addaa5424 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -2675,7 +2675,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2675 | INF("auxv", S_IRUSR, proc_pid_auxv), | 2675 | INF("auxv", S_IRUSR, proc_pid_auxv), |
2676 | ONE("status", S_IRUGO, proc_pid_status), | 2676 | ONE("status", S_IRUGO, proc_pid_status), |
2677 | ONE("personality", S_IRUSR, proc_pid_personality), | 2677 | ONE("personality", S_IRUSR, proc_pid_personality), |
2678 | INF("limits", S_IRUSR, proc_pid_limits), | 2678 | INF("limits", S_IRUGO, proc_pid_limits), |
2679 | #ifdef CONFIG_SCHED_DEBUG | 2679 | #ifdef CONFIG_SCHED_DEBUG |
2680 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), | 2680 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), |
2681 | #endif | 2681 | #endif |
@@ -3011,7 +3011,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
3011 | INF("auxv", S_IRUSR, proc_pid_auxv), | 3011 | INF("auxv", S_IRUSR, proc_pid_auxv), |
3012 | ONE("status", S_IRUGO, proc_pid_status), | 3012 | ONE("status", S_IRUGO, proc_pid_status), |
3013 | ONE("personality", S_IRUSR, proc_pid_personality), | 3013 | ONE("personality", S_IRUSR, proc_pid_personality), |
3014 | INF("limits", S_IRUSR, proc_pid_limits), | 3014 | INF("limits", S_IRUGO, proc_pid_limits), |
3015 | #ifdef CONFIG_SCHED_DEBUG | 3015 | #ifdef CONFIG_SCHED_DEBUG |
3016 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), | 3016 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), |
3017 | #endif | 3017 | #endif |
diff --git a/fs/proc/page.c b/fs/proc/page.c index 180cf5a0bd67..3b8b45660331 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c | |||
@@ -146,7 +146,7 @@ u64 stable_page_flags(struct page *page) | |||
146 | u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); | 146 | u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); |
147 | #endif | 147 | #endif |
148 | 148 | ||
149 | #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR | 149 | #ifdef CONFIG_ARCH_USES_PG_UNCACHED |
150 | u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); | 150 | u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); |
151 | #endif | 151 | #endif |
152 | 152 | ||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 439fc1f1c1c4..1dbca4e8cc16 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -224,7 +224,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
224 | /* We don't show the stack guard page in /proc/maps */ | 224 | /* We don't show the stack guard page in /proc/maps */ |
225 | start = vma->vm_start; | 225 | start = vma->vm_start; |
226 | if (vma->vm_flags & VM_GROWSDOWN) | 226 | if (vma->vm_flags & VM_GROWSDOWN) |
227 | start += PAGE_SIZE; | 227 | if (!vma_stack_continue(vma->vm_prev, vma->vm_start)) |
228 | start += PAGE_SIZE; | ||
228 | 229 | ||
229 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", | 230 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", |
230 | start, | 231 | start, |
@@ -362,13 +363,13 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
362 | mss->referenced += PAGE_SIZE; | 363 | mss->referenced += PAGE_SIZE; |
363 | mapcount = page_mapcount(page); | 364 | mapcount = page_mapcount(page); |
364 | if (mapcount >= 2) { | 365 | if (mapcount >= 2) { |
365 | if (pte_dirty(ptent)) | 366 | if (pte_dirty(ptent) || PageDirty(page)) |
366 | mss->shared_dirty += PAGE_SIZE; | 367 | mss->shared_dirty += PAGE_SIZE; |
367 | else | 368 | else |
368 | mss->shared_clean += PAGE_SIZE; | 369 | mss->shared_clean += PAGE_SIZE; |
369 | mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; | 370 | mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; |
370 | } else { | 371 | } else { |
371 | if (pte_dirty(ptent)) | 372 | if (pte_dirty(ptent) || PageDirty(page)) |
372 | mss->private_dirty += PAGE_SIZE; | 373 | mss->private_dirty += PAGE_SIZE; |
373 | else | 374 | else |
374 | mss->private_clean += PAGE_SIZE; | 375 | mss->private_clean += PAGE_SIZE; |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 91c817ff02c3..2367fb3f70bc 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -163,7 +163,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
163 | 163 | ||
164 | static const struct file_operations proc_vmcore_operations = { | 164 | static const struct file_operations proc_vmcore_operations = { |
165 | .read = read_vmcore, | 165 | .read = read_vmcore, |
166 | .llseek = generic_file_llseek, | 166 | .llseek = default_llseek, |
167 | }; | 167 | }; |
168 | 168 | ||
169 | static struct vmcore* __init get_new_element(void) | 169 | static struct vmcore* __init get_new_element(void) |
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index f53505de0712..5cbb81e134ac 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c | |||
@@ -170,6 +170,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page, | |||
170 | int reiserfs_unpack(struct inode *inode, struct file *filp) | 170 | int reiserfs_unpack(struct inode *inode, struct file *filp) |
171 | { | 171 | { |
172 | int retval = 0; | 172 | int retval = 0; |
173 | int depth; | ||
173 | int index; | 174 | int index; |
174 | struct page *page; | 175 | struct page *page; |
175 | struct address_space *mapping; | 176 | struct address_space *mapping; |
@@ -188,8 +189,8 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) | |||
188 | /* we need to make sure nobody is changing the file size beneath | 189 | /* we need to make sure nobody is changing the file size beneath |
189 | ** us | 190 | ** us |
190 | */ | 191 | */ |
191 | mutex_lock(&inode->i_mutex); | 192 | reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb); |
192 | reiserfs_write_lock(inode->i_sb); | 193 | depth = reiserfs_write_lock_once(inode->i_sb); |
193 | 194 | ||
194 | write_from = inode->i_size & (blocksize - 1); | 195 | write_from = inode->i_size & (blocksize - 1); |
195 | /* if we are on a block boundary, we are already unpacked. */ | 196 | /* if we are on a block boundary, we are already unpacked. */ |
@@ -224,6 +225,6 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) | |||
224 | 225 | ||
225 | out: | 226 | out: |
226 | mutex_unlock(&inode->i_mutex); | 227 | mutex_unlock(&inode->i_mutex); |
227 | reiserfs_write_unlock(inode->i_sb); | 228 | reiserfs_write_unlock_once(inode->i_sb, depth); |
228 | return retval; | 229 | return retval; |
229 | } | 230 | } |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 1b27b5688f62..da3fefe91a8f 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -340,7 +340,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
340 | char *p; | 340 | char *p; |
341 | 341 | ||
342 | p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file)); | 342 | p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file)); |
343 | if (p) | 343 | if (!IS_ERR(p)) |
344 | memmove(last_sysfs_file, p, strlen(p) + 1); | 344 | memmove(last_sysfs_file, p, strlen(p) + 1); |
345 | 345 | ||
346 | /* need attr_sd for attr and ops, its parent for kobj */ | 346 | /* need attr_sd for attr and ops, its parent for kobj */ |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 15412fe15c3a..b552f816de15 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -852,8 +852,8 @@ xfs_convert_page( | |||
852 | SetPageUptodate(page); | 852 | SetPageUptodate(page); |
853 | 853 | ||
854 | if (count) { | 854 | if (count) { |
855 | wbc->nr_to_write--; | 855 | if (--wbc->nr_to_write <= 0 && |
856 | if (wbc->nr_to_write <= 0) | 856 | wbc->sync_mode == WB_SYNC_NONE) |
857 | done = 1; | 857 | done = 1; |
858 | } | 858 | } |
859 | xfs_start_page_writeback(page, !page_dirty, count); | 859 | xfs_start_page_writeback(page, !page_dirty, count); |
@@ -1068,7 +1068,7 @@ xfs_vm_writepage( | |||
1068 | * by themselves. | 1068 | * by themselves. |
1069 | */ | 1069 | */ |
1070 | if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) | 1070 | if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) |
1071 | goto out_fail; | 1071 | goto redirty; |
1072 | 1072 | ||
1073 | /* | 1073 | /* |
1074 | * We need a transaction if there are delalloc or unwritten buffers | 1074 | * We need a transaction if there are delalloc or unwritten buffers |
@@ -1080,7 +1080,7 @@ xfs_vm_writepage( | |||
1080 | */ | 1080 | */ |
1081 | xfs_count_page_state(page, &delalloc, &unwritten); | 1081 | xfs_count_page_state(page, &delalloc, &unwritten); |
1082 | if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) | 1082 | if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) |
1083 | goto out_fail; | 1083 | goto redirty; |
1084 | 1084 | ||
1085 | /* Is this page beyond the end of the file? */ | 1085 | /* Is this page beyond the end of the file? */ |
1086 | offset = i_size_read(inode); | 1086 | offset = i_size_read(inode); |
@@ -1245,12 +1245,15 @@ error: | |||
1245 | if (iohead) | 1245 | if (iohead) |
1246 | xfs_cancel_ioend(iohead); | 1246 | xfs_cancel_ioend(iohead); |
1247 | 1247 | ||
1248 | if (err == -EAGAIN) | ||
1249 | goto redirty; | ||
1250 | |||
1248 | xfs_aops_discard_page(page); | 1251 | xfs_aops_discard_page(page); |
1249 | ClearPageUptodate(page); | 1252 | ClearPageUptodate(page); |
1250 | unlock_page(page); | 1253 | unlock_page(page); |
1251 | return err; | 1254 | return err; |
1252 | 1255 | ||
1253 | out_fail: | 1256 | redirty: |
1254 | redirty_page_for_writepage(wbc, page); | 1257 | redirty_page_for_writepage(wbc, page); |
1255 | unlock_page(page); | 1258 | unlock_page(page); |
1256 | return 0; | 1259 | return 0; |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index b93ea3342281..1846a0dd7035 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -440,12 +440,7 @@ _xfs_buf_find( | |||
440 | ASSERT(btp == bp->b_target); | 440 | ASSERT(btp == bp->b_target); |
441 | if (bp->b_file_offset == range_base && | 441 | if (bp->b_file_offset == range_base && |
442 | bp->b_buffer_length == range_length) { | 442 | bp->b_buffer_length == range_length) { |
443 | /* | ||
444 | * If we look at something, bring it to the | ||
445 | * front of the list for next time. | ||
446 | */ | ||
447 | atomic_inc(&bp->b_hold); | 443 | atomic_inc(&bp->b_hold); |
448 | list_move(&bp->b_hash_list, &hash->bh_list); | ||
449 | goto found; | 444 | goto found; |
450 | } | 445 | } |
451 | } | 446 | } |
@@ -1431,8 +1426,7 @@ xfs_alloc_bufhash( | |||
1431 | { | 1426 | { |
1432 | unsigned int i; | 1427 | unsigned int i; |
1433 | 1428 | ||
1434 | btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ | 1429 | btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */ |
1435 | btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; | ||
1436 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * | 1430 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * |
1437 | sizeof(xfs_bufhash_t)); | 1431 | sizeof(xfs_bufhash_t)); |
1438 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | 1432 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { |
@@ -1926,7 +1920,8 @@ xfs_buf_init(void) | |||
1926 | if (!xfs_buf_zone) | 1920 | if (!xfs_buf_zone) |
1927 | goto out; | 1921 | goto out; |
1928 | 1922 | ||
1929 | xfslogd_workqueue = create_workqueue("xfslogd"); | 1923 | xfslogd_workqueue = alloc_workqueue("xfslogd", |
1924 | WQ_RESCUER | WQ_HIGHPRI, 1); | ||
1930 | if (!xfslogd_workqueue) | 1925 | if (!xfslogd_workqueue) |
1931 | goto out_free_buf_zone; | 1926 | goto out_free_buf_zone; |
1932 | 1927 | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index d533d64e2c3e..9d021c73ea52 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -128,7 +128,6 @@ typedef struct xfs_buftarg { | |||
128 | size_t bt_smask; | 128 | size_t bt_smask; |
129 | 129 | ||
130 | /* per device buffer hash table */ | 130 | /* per device buffer hash table */ |
131 | uint bt_hashmask; | ||
132 | uint bt_hashshift; | 131 | uint bt_hashshift; |
133 | xfs_bufhash_t *bt_hash; | 132 | xfs_bufhash_t *bt_hash; |
134 | 133 | ||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 237f5ffb2ee8..3b9e626f7cd1 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -785,6 +785,8 @@ xfs_ioc_fsgetxattr( | |||
785 | { | 785 | { |
786 | struct fsxattr fa; | 786 | struct fsxattr fa; |
787 | 787 | ||
788 | memset(&fa, 0, sizeof(struct fsxattr)); | ||
789 | |||
788 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 790 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
789 | fa.fsx_xflags = xfs_ip2xflags(ip); | 791 | fa.fsx_xflags = xfs_ip2xflags(ip); |
790 | fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; | 792 | fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; |
@@ -907,6 +909,13 @@ xfs_ioctl_setattr( | |||
907 | return XFS_ERROR(EIO); | 909 | return XFS_ERROR(EIO); |
908 | 910 | ||
909 | /* | 911 | /* |
912 | * Disallow 32bit project ids because on-disk structure | ||
913 | * is 16bit only. | ||
914 | */ | ||
915 | if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1)) | ||
916 | return XFS_ERROR(EINVAL); | ||
917 | |||
918 | /* | ||
910 | * If disk quotas is on, we make sure that the dquots do exist on disk, | 919 | * If disk quotas is on, we make sure that the dquots do exist on disk, |
911 | * before we start any other transactions. Trying to do this later | 920 | * before we start any other transactions. Trying to do this later |
912 | * is messy. We don't care to take a readlock to look at the ids | 921 | * is messy. We don't care to take a readlock to look at the ids |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 68be25dcd301..b1fc2a6bfe83 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -664,7 +664,7 @@ xfs_vn_fiemap( | |||
664 | fieinfo->fi_extents_max + 1; | 664 | fieinfo->fi_extents_max + 1; |
665 | bm.bmv_count = min_t(__s32, bm.bmv_count, | 665 | bm.bmv_count = min_t(__s32, bm.bmv_count, |
666 | (PAGE_SIZE * 16 / sizeof(struct getbmapx))); | 666 | (PAGE_SIZE * 16 / sizeof(struct getbmapx))); |
667 | bm.bmv_iflags = BMV_IF_PREALLOC; | 667 | bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES; |
668 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) | 668 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) |
669 | bm.bmv_iflags |= BMV_IF_ATTRFORK; | 669 | bm.bmv_iflags |= BMV_IF_ATTRFORK; |
670 | if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) | 670 | if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 5fa7a30cc3f0..08fd3102128c 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1225,6 +1225,7 @@ xfs_fs_statfs( | |||
1225 | struct xfs_inode *ip = XFS_I(dentry->d_inode); | 1225 | struct xfs_inode *ip = XFS_I(dentry->d_inode); |
1226 | __uint64_t fakeinos, id; | 1226 | __uint64_t fakeinos, id; |
1227 | xfs_extlen_t lsize; | 1227 | xfs_extlen_t lsize; |
1228 | __int64_t ffree; | ||
1228 | 1229 | ||
1229 | statp->f_type = XFS_SB_MAGIC; | 1230 | statp->f_type = XFS_SB_MAGIC; |
1230 | statp->f_namelen = MAXNAMELEN - 1; | 1231 | statp->f_namelen = MAXNAMELEN - 1; |
@@ -1248,7 +1249,11 @@ xfs_fs_statfs( | |||
1248 | statp->f_files = min_t(typeof(statp->f_files), | 1249 | statp->f_files = min_t(typeof(statp->f_files), |
1249 | statp->f_files, | 1250 | statp->f_files, |
1250 | mp->m_maxicount); | 1251 | mp->m_maxicount); |
1251 | statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); | 1252 | |
1253 | /* make sure statp->f_ffree does not underflow */ | ||
1254 | ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); | ||
1255 | statp->f_ffree = max_t(__int64_t, ffree, 0); | ||
1256 | |||
1252 | spin_unlock(&mp->m_sb_lock); | 1257 | spin_unlock(&mp->m_sb_lock); |
1253 | 1258 | ||
1254 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || | 1259 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || |
@@ -1401,7 +1406,7 @@ xfs_fs_freeze( | |||
1401 | 1406 | ||
1402 | xfs_save_resvblks(mp); | 1407 | xfs_save_resvblks(mp); |
1403 | xfs_quiesce_attr(mp); | 1408 | xfs_quiesce_attr(mp); |
1404 | return -xfs_fs_log_dummy(mp); | 1409 | return -xfs_fs_log_dummy(mp, SYNC_WAIT); |
1405 | } | 1410 | } |
1406 | 1411 | ||
1407 | STATIC int | 1412 | STATIC int |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index dfcbd98d1599..81976ffed7d6 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "xfs_inode_item.h" | 34 | #include "xfs_inode_item.h" |
35 | #include "xfs_quota.h" | 35 | #include "xfs_quota.h" |
36 | #include "xfs_trace.h" | 36 | #include "xfs_trace.h" |
37 | #include "xfs_fsops.h" | ||
37 | 38 | ||
38 | #include <linux/kthread.h> | 39 | #include <linux/kthread.h> |
39 | #include <linux/freezer.h> | 40 | #include <linux/freezer.h> |
@@ -341,38 +342,6 @@ xfs_sync_attr( | |||
341 | } | 342 | } |
342 | 343 | ||
343 | STATIC int | 344 | STATIC int |
344 | xfs_commit_dummy_trans( | ||
345 | struct xfs_mount *mp, | ||
346 | uint flags) | ||
347 | { | ||
348 | struct xfs_inode *ip = mp->m_rootip; | ||
349 | struct xfs_trans *tp; | ||
350 | int error; | ||
351 | |||
352 | /* | ||
353 | * Put a dummy transaction in the log to tell recovery | ||
354 | * that all others are OK. | ||
355 | */ | ||
356 | tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); | ||
357 | error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); | ||
358 | if (error) { | ||
359 | xfs_trans_cancel(tp, 0); | ||
360 | return error; | ||
361 | } | ||
362 | |||
363 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
364 | |||
365 | xfs_trans_ijoin(tp, ip); | ||
366 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
367 | error = xfs_trans_commit(tp, 0); | ||
368 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
369 | |||
370 | /* the log force ensures this transaction is pushed to disk */ | ||
371 | xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); | ||
372 | return error; | ||
373 | } | ||
374 | |||
375 | STATIC int | ||
376 | xfs_sync_fsdata( | 345 | xfs_sync_fsdata( |
377 | struct xfs_mount *mp) | 346 | struct xfs_mount *mp) |
378 | { | 347 | { |
@@ -432,7 +401,7 @@ xfs_quiesce_data( | |||
432 | 401 | ||
433 | /* mark the log as covered if needed */ | 402 | /* mark the log as covered if needed */ |
434 | if (xfs_log_need_covered(mp)) | 403 | if (xfs_log_need_covered(mp)) |
435 | error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT); | 404 | error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); |
436 | 405 | ||
437 | /* flush data-only devices */ | 406 | /* flush data-only devices */ |
438 | if (mp->m_rtdev_targp) | 407 | if (mp->m_rtdev_targp) |
@@ -563,7 +532,7 @@ xfs_flush_inodes( | |||
563 | /* | 532 | /* |
564 | * Every sync period we need to unpin all items, reclaim inodes and sync | 533 | * Every sync period we need to unpin all items, reclaim inodes and sync |
565 | * disk quotas. We might need to cover the log to indicate that the | 534 | * disk quotas. We might need to cover the log to indicate that the |
566 | * filesystem is idle. | 535 | * filesystem is idle and not frozen. |
567 | */ | 536 | */ |
568 | STATIC void | 537 | STATIC void |
569 | xfs_sync_worker( | 538 | xfs_sync_worker( |
@@ -577,8 +546,9 @@ xfs_sync_worker( | |||
577 | xfs_reclaim_inodes(mp, 0); | 546 | xfs_reclaim_inodes(mp, 0); |
578 | /* dgc: errors ignored here */ | 547 | /* dgc: errors ignored here */ |
579 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | 548 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); |
580 | if (xfs_log_need_covered(mp)) | 549 | if (mp->m_super->s_frozen == SB_UNFROZEN && |
581 | error = xfs_commit_dummy_trans(mp, 0); | 550 | xfs_log_need_covered(mp)) |
551 | error = xfs_fs_log_dummy(mp, 0); | ||
582 | } | 552 | } |
583 | mp->m_sync_seq++; | 553 | mp->m_sync_seq++; |
584 | wake_up(&mp->m_wait_single_sync_task); | 554 | wake_up(&mp->m_wait_single_sync_task); |
@@ -698,14 +668,11 @@ xfs_inode_set_reclaim_tag( | |||
698 | xfs_perag_put(pag); | 668 | xfs_perag_put(pag); |
699 | } | 669 | } |
700 | 670 | ||
701 | void | 671 | STATIC void |
702 | __xfs_inode_clear_reclaim_tag( | 672 | __xfs_inode_clear_reclaim( |
703 | xfs_mount_t *mp, | ||
704 | xfs_perag_t *pag, | 673 | xfs_perag_t *pag, |
705 | xfs_inode_t *ip) | 674 | xfs_inode_t *ip) |
706 | { | 675 | { |
707 | radix_tree_tag_clear(&pag->pag_ici_root, | ||
708 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
709 | pag->pag_ici_reclaimable--; | 676 | pag->pag_ici_reclaimable--; |
710 | if (!pag->pag_ici_reclaimable) { | 677 | if (!pag->pag_ici_reclaimable) { |
711 | /* clear the reclaim tag from the perag radix tree */ | 678 | /* clear the reclaim tag from the perag radix tree */ |
@@ -719,6 +686,17 @@ __xfs_inode_clear_reclaim_tag( | |||
719 | } | 686 | } |
720 | } | 687 | } |
721 | 688 | ||
689 | void | ||
690 | __xfs_inode_clear_reclaim_tag( | ||
691 | xfs_mount_t *mp, | ||
692 | xfs_perag_t *pag, | ||
693 | xfs_inode_t *ip) | ||
694 | { | ||
695 | radix_tree_tag_clear(&pag->pag_ici_root, | ||
696 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
697 | __xfs_inode_clear_reclaim(pag, ip); | ||
698 | } | ||
699 | |||
722 | /* | 700 | /* |
723 | * Inodes in different states need to be treated differently, and the return | 701 | * Inodes in different states need to be treated differently, and the return |
724 | * value of xfs_iflush is not sufficient to get this right. The following table | 702 | * value of xfs_iflush is not sufficient to get this right. The following table |
@@ -868,6 +846,7 @@ reclaim: | |||
868 | if (!radix_tree_delete(&pag->pag_ici_root, | 846 | if (!radix_tree_delete(&pag->pag_ici_root, |
869 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) | 847 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) |
870 | ASSERT(0); | 848 | ASSERT(0); |
849 | __xfs_inode_clear_reclaim(pag, ip); | ||
871 | write_unlock(&pag->pag_ici_lock); | 850 | write_unlock(&pag->pag_ici_lock); |
872 | 851 | ||
873 | /* | 852 | /* |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 23f14e595c18..f90dadd5a968 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -5533,12 +5533,24 @@ xfs_getbmap( | |||
5533 | map[i].br_startblock)) | 5533 | map[i].br_startblock)) |
5534 | goto out_free_map; | 5534 | goto out_free_map; |
5535 | 5535 | ||
5536 | nexleft--; | ||
5537 | bmv->bmv_offset = | 5536 | bmv->bmv_offset = |
5538 | out[cur_ext].bmv_offset + | 5537 | out[cur_ext].bmv_offset + |
5539 | out[cur_ext].bmv_length; | 5538 | out[cur_ext].bmv_length; |
5540 | bmv->bmv_length = | 5539 | bmv->bmv_length = |
5541 | max_t(__int64_t, 0, bmvend - bmv->bmv_offset); | 5540 | max_t(__int64_t, 0, bmvend - bmv->bmv_offset); |
5541 | |||
5542 | /* | ||
5543 | * In case we don't want to return the hole, | ||
5544 | * don't increase cur_ext so that we can reuse | ||
5545 | * it in the next loop. | ||
5546 | */ | ||
5547 | if ((iflags & BMV_IF_NO_HOLES) && | ||
5548 | map[i].br_startblock == HOLESTARTBLOCK) { | ||
5549 | memset(&out[cur_ext], 0, sizeof(out[cur_ext])); | ||
5550 | continue; | ||
5551 | } | ||
5552 | |||
5553 | nexleft--; | ||
5542 | bmv->bmv_entries++; | 5554 | bmv->bmv_entries++; |
5543 | cur_ext++; | 5555 | cur_ext++; |
5544 | } | 5556 | } |
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 7cf7220e7d5f..87c2e9d02288 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h | |||
@@ -114,8 +114,10 @@ struct getbmapx { | |||
114 | #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ | 114 | #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ |
115 | #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ | 115 | #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ |
116 | #define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ | 116 | #define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ |
117 | #define BMV_IF_NO_HOLES 0x10 /* Do not return holes */ | ||
117 | #define BMV_IF_VALID \ | 118 | #define BMV_IF_VALID \ |
118 | (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) | 119 | (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC| \ |
120 | BMV_IF_DELALLOC|BMV_IF_NO_HOLES) | ||
119 | 121 | ||
120 | /* bmv_oflags values - returned for each non-header segment */ | 122 | /* bmv_oflags values - returned for each non-header segment */ |
121 | #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ | 123 | #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index dbca5f5c37ba..43b1d5699335 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -604,31 +604,36 @@ out: | |||
604 | return 0; | 604 | return 0; |
605 | } | 605 | } |
606 | 606 | ||
607 | /* | ||
608 | * Dump a transaction into the log that contains no real change. This is needed | ||
609 | * to be able to make the log dirty or stamp the current tail LSN into the log | ||
610 | * during the covering operation. | ||
611 | * | ||
612 | * We cannot use an inode here for this - that will push dirty state back up | ||
613 | * into the VFS and then periodic inode flushing will prevent log covering from | ||
614 | * making progress. Hence we log a field in the superblock instead. | ||
615 | */ | ||
607 | int | 616 | int |
608 | xfs_fs_log_dummy( | 617 | xfs_fs_log_dummy( |
609 | xfs_mount_t *mp) | 618 | xfs_mount_t *mp, |
619 | int flags) | ||
610 | { | 620 | { |
611 | xfs_trans_t *tp; | 621 | xfs_trans_t *tp; |
612 | xfs_inode_t *ip; | ||
613 | int error; | 622 | int error; |
614 | 623 | ||
615 | tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); | 624 | tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); |
616 | error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); | 625 | error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, |
626 | XFS_DEFAULT_LOG_COUNT); | ||
617 | if (error) { | 627 | if (error) { |
618 | xfs_trans_cancel(tp, 0); | 628 | xfs_trans_cancel(tp, 0); |
619 | return error; | 629 | return error; |
620 | } | 630 | } |
621 | 631 | ||
622 | ip = mp->m_rootip; | 632 | /* log the UUID because it is an unchanging field */ |
623 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 633 | xfs_mod_sb(tp, XFS_SB_UUID); |
624 | 634 | if (flags & SYNC_WAIT) | |
625 | xfs_trans_ijoin(tp, ip); | 635 | xfs_trans_set_sync(tp); |
626 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 636 | return xfs_trans_commit(tp, 0); |
627 | xfs_trans_set_sync(tp); | ||
628 | error = xfs_trans_commit(tp, 0); | ||
629 | |||
630 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
631 | return error; | ||
632 | } | 637 | } |
633 | 638 | ||
634 | int | 639 | int |
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index 88435e0a77c9..a786c5212c1e 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h | |||
@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); | |||
25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, | 25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, |
26 | xfs_fsop_resblks_t *outval); | 26 | xfs_fsop_resblks_t *outval); |
27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); | 27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); |
28 | extern int xfs_fs_log_dummy(xfs_mount_t *mp); | 28 | extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags); |
29 | 29 | ||
30 | #endif /* __XFS_FSOPS_H__ */ | 30 | #endif /* __XFS_FSOPS_H__ */ |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index abf80ae1e95b..5371d2dc360e 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -1213,7 +1213,6 @@ xfs_imap_lookup( | |||
1213 | struct xfs_inobt_rec_incore rec; | 1213 | struct xfs_inobt_rec_incore rec; |
1214 | struct xfs_btree_cur *cur; | 1214 | struct xfs_btree_cur *cur; |
1215 | struct xfs_buf *agbp; | 1215 | struct xfs_buf *agbp; |
1216 | xfs_agino_t startino; | ||
1217 | int error; | 1216 | int error; |
1218 | int i; | 1217 | int i; |
1219 | 1218 | ||
@@ -1227,13 +1226,13 @@ xfs_imap_lookup( | |||
1227 | } | 1226 | } |
1228 | 1227 | ||
1229 | /* | 1228 | /* |
1230 | * derive and lookup the exact inode record for the given agino. If the | 1229 | * Lookup the inode record for the given agino. If the record cannot be |
1231 | * record cannot be found, then it's an invalid inode number and we | 1230 | * found, then it's an invalid inode number and we should abort. Once |
1232 | * should abort. | 1231 | * we have a record, we need to ensure it contains the inode number |
1232 | * we are looking up. | ||
1233 | */ | 1233 | */ |
1234 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); | 1234 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); |
1235 | startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); | 1235 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); |
1236 | error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i); | ||
1237 | if (!error) { | 1236 | if (!error) { |
1238 | if (i) | 1237 | if (i) |
1239 | error = xfs_inobt_get_rec(cur, &rec, &i); | 1238 | error = xfs_inobt_get_rec(cur, &rec, &i); |
@@ -1246,6 +1245,11 @@ xfs_imap_lookup( | |||
1246 | if (error) | 1245 | if (error) |
1247 | return error; | 1246 | return error; |
1248 | 1247 | ||
1248 | /* check that the returned record contains the required inode */ | ||
1249 | if (rec.ir_startino > agino || | ||
1250 | rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) | ||
1251 | return EINVAL; | ||
1252 | |||
1249 | /* for untrusted inodes check it is allocated first */ | 1253 | /* for untrusted inodes check it is allocated first */ |
1250 | if ((flags & XFS_IGET_UNTRUSTED) && | 1254 | if ((flags & XFS_IGET_UNTRUSTED) && |
1251 | (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) | 1255 | (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 68415cb4f23c..34798f391c49 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1914,6 +1914,11 @@ xfs_iunlink_remove( | |||
1914 | return 0; | 1914 | return 0; |
1915 | } | 1915 | } |
1916 | 1916 | ||
1917 | /* | ||
1918 | * A big issue when freeing the inode cluster is is that we _cannot_ skip any | ||
1919 | * inodes that are in memory - they all must be marked stale and attached to | ||
1920 | * the cluster buffer. | ||
1921 | */ | ||
1917 | STATIC void | 1922 | STATIC void |
1918 | xfs_ifree_cluster( | 1923 | xfs_ifree_cluster( |
1919 | xfs_inode_t *free_ip, | 1924 | xfs_inode_t *free_ip, |
@@ -1945,8 +1950,6 @@ xfs_ifree_cluster( | |||
1945 | } | 1950 | } |
1946 | 1951 | ||
1947 | for (j = 0; j < nbufs; j++, inum += ninodes) { | 1952 | for (j = 0; j < nbufs; j++, inum += ninodes) { |
1948 | int found = 0; | ||
1949 | |||
1950 | blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), | 1953 | blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), |
1951 | XFS_INO_TO_AGBNO(mp, inum)); | 1954 | XFS_INO_TO_AGBNO(mp, inum)); |
1952 | 1955 | ||
@@ -1965,7 +1968,9 @@ xfs_ifree_cluster( | |||
1965 | /* | 1968 | /* |
1966 | * Walk the inodes already attached to the buffer and mark them | 1969 | * Walk the inodes already attached to the buffer and mark them |
1967 | * stale. These will all have the flush locks held, so an | 1970 | * stale. These will all have the flush locks held, so an |
1968 | * in-memory inode walk can't lock them. | 1971 | * in-memory inode walk can't lock them. By marking them all |
1972 | * stale first, we will not attempt to lock them in the loop | ||
1973 | * below as the XFS_ISTALE flag will be set. | ||
1969 | */ | 1974 | */ |
1970 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 1975 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); |
1971 | while (lip) { | 1976 | while (lip) { |
@@ -1977,11 +1982,11 @@ xfs_ifree_cluster( | |||
1977 | &iip->ili_flush_lsn, | 1982 | &iip->ili_flush_lsn, |
1978 | &iip->ili_item.li_lsn); | 1983 | &iip->ili_item.li_lsn); |
1979 | xfs_iflags_set(iip->ili_inode, XFS_ISTALE); | 1984 | xfs_iflags_set(iip->ili_inode, XFS_ISTALE); |
1980 | found++; | ||
1981 | } | 1985 | } |
1982 | lip = lip->li_bio_list; | 1986 | lip = lip->li_bio_list; |
1983 | } | 1987 | } |
1984 | 1988 | ||
1989 | |||
1985 | /* | 1990 | /* |
1986 | * For each inode in memory attempt to add it to the inode | 1991 | * For each inode in memory attempt to add it to the inode |
1987 | * buffer and set it up for being staled on buffer IO | 1992 | * buffer and set it up for being staled on buffer IO |
@@ -1993,6 +1998,7 @@ xfs_ifree_cluster( | |||
1993 | * even trying to lock them. | 1998 | * even trying to lock them. |
1994 | */ | 1999 | */ |
1995 | for (i = 0; i < ninodes; i++) { | 2000 | for (i = 0; i < ninodes; i++) { |
2001 | retry: | ||
1996 | read_lock(&pag->pag_ici_lock); | 2002 | read_lock(&pag->pag_ici_lock); |
1997 | ip = radix_tree_lookup(&pag->pag_ici_root, | 2003 | ip = radix_tree_lookup(&pag->pag_ici_root, |
1998 | XFS_INO_TO_AGINO(mp, (inum + i))); | 2004 | XFS_INO_TO_AGINO(mp, (inum + i))); |
@@ -2003,38 +2009,36 @@ xfs_ifree_cluster( | |||
2003 | continue; | 2009 | continue; |
2004 | } | 2010 | } |
2005 | 2011 | ||
2006 | /* don't try to lock/unlock the current inode */ | 2012 | /* |
2013 | * Don't try to lock/unlock the current inode, but we | ||
2014 | * _cannot_ skip the other inodes that we did not find | ||
2015 | * in the list attached to the buffer and are not | ||
2016 | * already marked stale. If we can't lock it, back off | ||
2017 | * and retry. | ||
2018 | */ | ||
2007 | if (ip != free_ip && | 2019 | if (ip != free_ip && |
2008 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { | 2020 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { |
2009 | read_unlock(&pag->pag_ici_lock); | 2021 | read_unlock(&pag->pag_ici_lock); |
2010 | continue; | 2022 | delay(1); |
2023 | goto retry; | ||
2011 | } | 2024 | } |
2012 | read_unlock(&pag->pag_ici_lock); | 2025 | read_unlock(&pag->pag_ici_lock); |
2013 | 2026 | ||
2014 | if (!xfs_iflock_nowait(ip)) { | 2027 | xfs_iflock(ip); |
2015 | if (ip != free_ip) | ||
2016 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
2017 | continue; | ||
2018 | } | ||
2019 | |||
2020 | xfs_iflags_set(ip, XFS_ISTALE); | 2028 | xfs_iflags_set(ip, XFS_ISTALE); |
2021 | if (xfs_inode_clean(ip)) { | ||
2022 | ASSERT(ip != free_ip); | ||
2023 | xfs_ifunlock(ip); | ||
2024 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
2025 | continue; | ||
2026 | } | ||
2027 | 2029 | ||
2030 | /* | ||
2031 | * we don't need to attach clean inodes or those only | ||
2032 | * with unlogged changes (which we throw away, anyway). | ||
2033 | */ | ||
2028 | iip = ip->i_itemp; | 2034 | iip = ip->i_itemp; |
2029 | if (!iip) { | 2035 | if (!iip || xfs_inode_clean(ip)) { |
2030 | /* inode with unlogged changes only */ | ||
2031 | ASSERT(ip != free_ip); | 2036 | ASSERT(ip != free_ip); |
2032 | ip->i_update_core = 0; | 2037 | ip->i_update_core = 0; |
2033 | xfs_ifunlock(ip); | 2038 | xfs_ifunlock(ip); |
2034 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 2039 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2035 | continue; | 2040 | continue; |
2036 | } | 2041 | } |
2037 | found++; | ||
2038 | 2042 | ||
2039 | iip->ili_last_fields = iip->ili_format.ilf_fields; | 2043 | iip->ili_last_fields = iip->ili_format.ilf_fields; |
2040 | iip->ili_format.ilf_fields = 0; | 2044 | iip->ili_format.ilf_fields = 0; |
@@ -2049,8 +2053,7 @@ xfs_ifree_cluster( | |||
2049 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 2053 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2050 | } | 2054 | } |
2051 | 2055 | ||
2052 | if (found) | 2056 | xfs_trans_stale_inode_buf(tp, bp); |
2053 | xfs_trans_stale_inode_buf(tp, bp); | ||
2054 | xfs_trans_binval(tp, bp); | 2057 | xfs_trans_binval(tp, bp); |
2055 | } | 2058 | } |
2056 | 2059 | ||
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 430a8fc02c1f..ba8e36e0b4e7 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -3002,7 +3002,8 @@ _xfs_log_force( | |||
3002 | 3002 | ||
3003 | XFS_STATS_INC(xs_log_force); | 3003 | XFS_STATS_INC(xs_log_force); |
3004 | 3004 | ||
3005 | xlog_cil_push(log, 1); | 3005 | if (log->l_cilp) |
3006 | xlog_cil_force(log); | ||
3006 | 3007 | ||
3007 | spin_lock(&log->l_icloglock); | 3008 | spin_lock(&log->l_icloglock); |
3008 | 3009 | ||
@@ -3154,7 +3155,7 @@ _xfs_log_force_lsn( | |||
3154 | XFS_STATS_INC(xs_log_force); | 3155 | XFS_STATS_INC(xs_log_force); |
3155 | 3156 | ||
3156 | if (log->l_cilp) { | 3157 | if (log->l_cilp) { |
3157 | lsn = xlog_cil_push_lsn(log, lsn); | 3158 | lsn = xlog_cil_force_lsn(log, lsn); |
3158 | if (lsn == NULLCOMMITLSN) | 3159 | if (lsn == NULLCOMMITLSN) |
3159 | return 0; | 3160 | return 0; |
3160 | } | 3161 | } |
@@ -3711,7 +3712,7 @@ xfs_log_force_umount( | |||
3711 | * call below. | 3712 | * call below. |
3712 | */ | 3713 | */ |
3713 | if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) | 3714 | if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) |
3714 | xlog_cil_push(log, 1); | 3715 | xlog_cil_force(log); |
3715 | 3716 | ||
3716 | /* | 3717 | /* |
3717 | * We must hold both the GRANT lock and the LOG lock, | 3718 | * We must hold both the GRANT lock and the LOG lock, |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 31e4ea2d19ac..7e206fc1fa36 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -68,6 +68,7 @@ xlog_cil_init( | |||
68 | ctx->sequence = 1; | 68 | ctx->sequence = 1; |
69 | ctx->cil = cil; | 69 | ctx->cil = cil; |
70 | cil->xc_ctx = ctx; | 70 | cil->xc_ctx = ctx; |
71 | cil->xc_current_sequence = ctx->sequence; | ||
71 | 72 | ||
72 | cil->xc_log = log; | 73 | cil->xc_log = log; |
73 | log->l_cilp = cil; | 74 | log->l_cilp = cil; |
@@ -269,15 +270,10 @@ xlog_cil_insert( | |||
269 | static void | 270 | static void |
270 | xlog_cil_format_items( | 271 | xlog_cil_format_items( |
271 | struct log *log, | 272 | struct log *log, |
272 | struct xfs_log_vec *log_vector, | 273 | struct xfs_log_vec *log_vector) |
273 | struct xlog_ticket *ticket, | ||
274 | xfs_lsn_t *start_lsn) | ||
275 | { | 274 | { |
276 | struct xfs_log_vec *lv; | 275 | struct xfs_log_vec *lv; |
277 | 276 | ||
278 | if (start_lsn) | ||
279 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
280 | |||
281 | ASSERT(log_vector); | 277 | ASSERT(log_vector); |
282 | for (lv = log_vector; lv; lv = lv->lv_next) { | 278 | for (lv = log_vector; lv; lv = lv->lv_next) { |
283 | void *ptr; | 279 | void *ptr; |
@@ -301,9 +297,24 @@ xlog_cil_format_items( | |||
301 | ptr += vec->i_len; | 297 | ptr += vec->i_len; |
302 | } | 298 | } |
303 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); | 299 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); |
300 | } | ||
301 | } | ||
304 | 302 | ||
303 | static void | ||
304 | xlog_cil_insert_items( | ||
305 | struct log *log, | ||
306 | struct xfs_log_vec *log_vector, | ||
307 | struct xlog_ticket *ticket, | ||
308 | xfs_lsn_t *start_lsn) | ||
309 | { | ||
310 | struct xfs_log_vec *lv; | ||
311 | |||
312 | if (start_lsn) | ||
313 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
314 | |||
315 | ASSERT(log_vector); | ||
316 | for (lv = log_vector; lv; lv = lv->lv_next) | ||
305 | xlog_cil_insert(log, ticket, lv->lv_item, lv); | 317 | xlog_cil_insert(log, ticket, lv->lv_item, lv); |
306 | } | ||
307 | } | 318 | } |
308 | 319 | ||
309 | static void | 320 | static void |
@@ -321,80 +332,6 @@ xlog_cil_free_logvec( | |||
321 | } | 332 | } |
322 | 333 | ||
323 | /* | 334 | /* |
324 | * Commit a transaction with the given vector to the Committed Item List. | ||
325 | * | ||
326 | * To do this, we need to format the item, pin it in memory if required and | ||
327 | * account for the space used by the transaction. Once we have done that we | ||
328 | * need to release the unused reservation for the transaction, attach the | ||
329 | * transaction to the checkpoint context so we carry the busy extents through | ||
330 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
331 | * | ||
332 | * For more specific information about the order of operations in | ||
333 | * xfs_log_commit_cil() please refer to the comments in | ||
334 | * xfs_trans_commit_iclog(). | ||
335 | * | ||
336 | * Called with the context lock already held in read mode to lock out | ||
337 | * background commit, returns without it held once background commits are | ||
338 | * allowed again. | ||
339 | */ | ||
340 | int | ||
341 | xfs_log_commit_cil( | ||
342 | struct xfs_mount *mp, | ||
343 | struct xfs_trans *tp, | ||
344 | struct xfs_log_vec *log_vector, | ||
345 | xfs_lsn_t *commit_lsn, | ||
346 | int flags) | ||
347 | { | ||
348 | struct log *log = mp->m_log; | ||
349 | int log_flags = 0; | ||
350 | int push = 0; | ||
351 | |||
352 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
353 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
354 | |||
355 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
356 | xlog_cil_free_logvec(log_vector); | ||
357 | return XFS_ERROR(EIO); | ||
358 | } | ||
359 | |||
360 | /* lock out background commit */ | ||
361 | down_read(&log->l_cilp->xc_ctx_lock); | ||
362 | xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
363 | |||
364 | /* check we didn't blow the reservation */ | ||
365 | if (tp->t_ticket->t_curr_res < 0) | ||
366 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
367 | |||
368 | /* attach the transaction to the CIL if it has any busy extents */ | ||
369 | if (!list_empty(&tp->t_busy)) { | ||
370 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
371 | list_splice_init(&tp->t_busy, | ||
372 | &log->l_cilp->xc_ctx->busy_extents); | ||
373 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
374 | } | ||
375 | |||
376 | tp->t_commit_lsn = *commit_lsn; | ||
377 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
378 | xfs_trans_unreserve_and_mod_sb(tp); | ||
379 | |||
380 | /* check for background commit before unlock */ | ||
381 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
382 | push = 1; | ||
383 | up_read(&log->l_cilp->xc_ctx_lock); | ||
384 | |||
385 | /* | ||
386 | * We need to push CIL every so often so we don't cache more than we | ||
387 | * can fit in the log. The limit really is that a checkpoint can't be | ||
388 | * more than half the log (the current checkpoint is not allowed to | ||
389 | * overwrite the previous checkpoint), but commit latency and memory | ||
390 | * usage limit this to a smaller size in most cases. | ||
391 | */ | ||
392 | if (push) | ||
393 | xlog_cil_push(log, 0); | ||
394 | return 0; | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * Mark all items committed and clear busy extents. We free the log vector | 335 | * Mark all items committed and clear busy extents. We free the log vector |
399 | * chains in a separate pass so that we unpin the log items as quickly as | 336 | * chains in a separate pass so that we unpin the log items as quickly as |
400 | * possible. | 337 | * possible. |
@@ -427,13 +364,23 @@ xlog_cil_committed( | |||
427 | } | 364 | } |
428 | 365 | ||
429 | /* | 366 | /* |
430 | * Push the Committed Item List to the log. If the push_now flag is not set, | 367 | * Push the Committed Item List to the log. If @push_seq flag is zero, then it |
431 | * then it is a background flush and so we can chose to ignore it. | 368 | * is a background flush and so we can chose to ignore it. Otherwise, if the |
369 | * current sequence is the same as @push_seq we need to do a flush. If | ||
370 | * @push_seq is less than the current sequence, then it has already been | ||
371 | * flushed and we don't need to do anything - the caller will wait for it to | ||
372 | * complete if necessary. | ||
373 | * | ||
374 | * @push_seq is a value rather than a flag because that allows us to do an | ||
375 | * unlocked check of the sequence number for a match. Hence we can allows log | ||
376 | * forces to run racily and not issue pushes for the same sequence twice. If we | ||
377 | * get a race between multiple pushes for the same sequence they will block on | ||
378 | * the first one and then abort, hence avoiding needless pushes. | ||
432 | */ | 379 | */ |
433 | int | 380 | STATIC int |
434 | xlog_cil_push( | 381 | xlog_cil_push( |
435 | struct log *log, | 382 | struct log *log, |
436 | int push_now) | 383 | xfs_lsn_t push_seq) |
437 | { | 384 | { |
438 | struct xfs_cil *cil = log->l_cilp; | 385 | struct xfs_cil *cil = log->l_cilp; |
439 | struct xfs_log_vec *lv; | 386 | struct xfs_log_vec *lv; |
@@ -453,12 +400,20 @@ xlog_cil_push( | |||
453 | if (!cil) | 400 | if (!cil) |
454 | return 0; | 401 | return 0; |
455 | 402 | ||
403 | ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence); | ||
404 | |||
456 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | 405 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); |
457 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | 406 | new_ctx->ticket = xlog_cil_ticket_alloc(log); |
458 | 407 | ||
459 | /* lock out transaction commit, but don't block on background push */ | 408 | /* |
409 | * Lock out transaction commit, but don't block for background pushes | ||
410 | * unless we are well over the CIL space limit. See the definition of | ||
411 | * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic | ||
412 | * used here. | ||
413 | */ | ||
460 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | 414 | if (!down_write_trylock(&cil->xc_ctx_lock)) { |
461 | if (!push_now) | 415 | if (!push_seq && |
416 | cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log)) | ||
462 | goto out_free_ticket; | 417 | goto out_free_ticket; |
463 | down_write(&cil->xc_ctx_lock); | 418 | down_write(&cil->xc_ctx_lock); |
464 | } | 419 | } |
@@ -469,7 +424,11 @@ xlog_cil_push( | |||
469 | goto out_skip; | 424 | goto out_skip; |
470 | 425 | ||
471 | /* check for spurious background flush */ | 426 | /* check for spurious background flush */ |
472 | if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | 427 | if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) |
428 | goto out_skip; | ||
429 | |||
430 | /* check for a previously pushed seqeunce */ | ||
431 | if (push_seq && push_seq < cil->xc_ctx->sequence) | ||
473 | goto out_skip; | 432 | goto out_skip; |
474 | 433 | ||
475 | /* | 434 | /* |
@@ -515,6 +474,13 @@ xlog_cil_push( | |||
515 | cil->xc_ctx = new_ctx; | 474 | cil->xc_ctx = new_ctx; |
516 | 475 | ||
517 | /* | 476 | /* |
477 | * mirror the new sequence into the cil structure so that we can do | ||
478 | * unlocked checks against the current sequence in log forces without | ||
479 | * risking deferencing a freed context pointer. | ||
480 | */ | ||
481 | cil->xc_current_sequence = new_ctx->sequence; | ||
482 | |||
483 | /* | ||
518 | * The switch is now done, so we can drop the context lock and move out | 484 | * The switch is now done, so we can drop the context lock and move out |
519 | * of a shared context. We can't just go straight to the commit record, | 485 | * of a shared context. We can't just go straight to the commit record, |
520 | * though - we need to synchronise with previous and future commits so | 486 | * though - we need to synchronise with previous and future commits so |
@@ -626,6 +592,102 @@ out_abort: | |||
626 | } | 592 | } |
627 | 593 | ||
628 | /* | 594 | /* |
595 | * Commit a transaction with the given vector to the Committed Item List. | ||
596 | * | ||
597 | * To do this, we need to format the item, pin it in memory if required and | ||
598 | * account for the space used by the transaction. Once we have done that we | ||
599 | * need to release the unused reservation for the transaction, attach the | ||
600 | * transaction to the checkpoint context so we carry the busy extents through | ||
601 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
602 | * | ||
603 | * For more specific information about the order of operations in | ||
604 | * xfs_log_commit_cil() please refer to the comments in | ||
605 | * xfs_trans_commit_iclog(). | ||
606 | * | ||
607 | * Called with the context lock already held in read mode to lock out | ||
608 | * background commit, returns without it held once background commits are | ||
609 | * allowed again. | ||
610 | */ | ||
611 | int | ||
612 | xfs_log_commit_cil( | ||
613 | struct xfs_mount *mp, | ||
614 | struct xfs_trans *tp, | ||
615 | struct xfs_log_vec *log_vector, | ||
616 | xfs_lsn_t *commit_lsn, | ||
617 | int flags) | ||
618 | { | ||
619 | struct log *log = mp->m_log; | ||
620 | int log_flags = 0; | ||
621 | int push = 0; | ||
622 | |||
623 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
624 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
625 | |||
626 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
627 | xlog_cil_free_logvec(log_vector); | ||
628 | return XFS_ERROR(EIO); | ||
629 | } | ||
630 | |||
631 | /* | ||
632 | * do all the hard work of formatting items (including memory | ||
633 | * allocation) outside the CIL context lock. This prevents stalling CIL | ||
634 | * pushes when we are low on memory and a transaction commit spends a | ||
635 | * lot of time in memory reclaim. | ||
636 | */ | ||
637 | xlog_cil_format_items(log, log_vector); | ||
638 | |||
639 | /* lock out background commit */ | ||
640 | down_read(&log->l_cilp->xc_ctx_lock); | ||
641 | xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
642 | |||
643 | /* check we didn't blow the reservation */ | ||
644 | if (tp->t_ticket->t_curr_res < 0) | ||
645 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
646 | |||
647 | /* attach the transaction to the CIL if it has any busy extents */ | ||
648 | if (!list_empty(&tp->t_busy)) { | ||
649 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
650 | list_splice_init(&tp->t_busy, | ||
651 | &log->l_cilp->xc_ctx->busy_extents); | ||
652 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
653 | } | ||
654 | |||
655 | tp->t_commit_lsn = *commit_lsn; | ||
656 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
657 | xfs_trans_unreserve_and_mod_sb(tp); | ||
658 | |||
659 | /* | ||
660 | * Once all the items of the transaction have been copied to the CIL, | ||
661 | * the items can be unlocked and freed. | ||
662 | * | ||
663 | * This needs to be done before we drop the CIL context lock because we | ||
664 | * have to update state in the log items and unlock them before they go | ||
665 | * to disk. If we don't, then the CIL checkpoint can race with us and | ||
666 | * we can run checkpoint completion before we've updated and unlocked | ||
667 | * the log items. This affects (at least) processing of stale buffers, | ||
668 | * inodes and EFIs. | ||
669 | */ | ||
670 | xfs_trans_free_items(tp, *commit_lsn, 0); | ||
671 | |||
672 | /* check for background commit before unlock */ | ||
673 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
674 | push = 1; | ||
675 | |||
676 | up_read(&log->l_cilp->xc_ctx_lock); | ||
677 | |||
678 | /* | ||
679 | * We need to push CIL every so often so we don't cache more than we | ||
680 | * can fit in the log. The limit really is that a checkpoint can't be | ||
681 | * more than half the log (the current checkpoint is not allowed to | ||
682 | * overwrite the previous checkpoint), but commit latency and memory | ||
683 | * usage limit this to a smaller size in most cases. | ||
684 | */ | ||
685 | if (push) | ||
686 | xlog_cil_push(log, 0); | ||
687 | return 0; | ||
688 | } | ||
689 | |||
690 | /* | ||
629 | * Conditionally push the CIL based on the sequence passed in. | 691 | * Conditionally push the CIL based on the sequence passed in. |
630 | * | 692 | * |
631 | * We only need to push if we haven't already pushed the sequence | 693 | * We only need to push if we haven't already pushed the sequence |
@@ -639,39 +701,34 @@ out_abort: | |||
639 | * commit lsn is there. It'll be empty, so this is broken for now. | 701 | * commit lsn is there. It'll be empty, so this is broken for now. |
640 | */ | 702 | */ |
641 | xfs_lsn_t | 703 | xfs_lsn_t |
642 | xlog_cil_push_lsn( | 704 | xlog_cil_force_lsn( |
643 | struct log *log, | 705 | struct log *log, |
644 | xfs_lsn_t push_seq) | 706 | xfs_lsn_t sequence) |
645 | { | 707 | { |
646 | struct xfs_cil *cil = log->l_cilp; | 708 | struct xfs_cil *cil = log->l_cilp; |
647 | struct xfs_cil_ctx *ctx; | 709 | struct xfs_cil_ctx *ctx; |
648 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; | 710 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; |
649 | 711 | ||
650 | restart: | 712 | ASSERT(sequence <= cil->xc_current_sequence); |
651 | down_write(&cil->xc_ctx_lock); | 713 | |
652 | ASSERT(push_seq <= cil->xc_ctx->sequence); | 714 | /* |
653 | 715 | * check to see if we need to force out the current context. | |
654 | /* check to see if we need to force out the current context */ | 716 | * xlog_cil_push() handles racing pushes for the same sequence, |
655 | if (push_seq == cil->xc_ctx->sequence) { | 717 | * so no need to deal with it here. |
656 | up_write(&cil->xc_ctx_lock); | 718 | */ |
657 | xlog_cil_push(log, 1); | 719 | if (sequence == cil->xc_current_sequence) |
658 | goto restart; | 720 | xlog_cil_push(log, sequence); |
659 | } | ||
660 | 721 | ||
661 | /* | 722 | /* |
662 | * See if we can find a previous sequence still committing. | 723 | * See if we can find a previous sequence still committing. |
663 | * We can drop the flush lock as soon as we have the cil lock | ||
664 | * because we are now only comparing contexts protected by | ||
665 | * the cil lock. | ||
666 | * | ||
667 | * We need to wait for all previous sequence commits to complete | 724 | * We need to wait for all previous sequence commits to complete |
668 | * before allowing the force of push_seq to go ahead. Hence block | 725 | * before allowing the force of push_seq to go ahead. Hence block |
669 | * on commits for those as well. | 726 | * on commits for those as well. |
670 | */ | 727 | */ |
728 | restart: | ||
671 | spin_lock(&cil->xc_cil_lock); | 729 | spin_lock(&cil->xc_cil_lock); |
672 | up_write(&cil->xc_ctx_lock); | ||
673 | list_for_each_entry(ctx, &cil->xc_committing, committing) { | 730 | list_for_each_entry(ctx, &cil->xc_committing, committing) { |
674 | if (ctx->sequence > push_seq) | 731 | if (ctx->sequence > sequence) |
675 | continue; | 732 | continue; |
676 | if (!ctx->commit_lsn) { | 733 | if (!ctx->commit_lsn) { |
677 | /* | 734 | /* |
@@ -681,7 +738,7 @@ restart: | |||
681 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 738 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); |
682 | goto restart; | 739 | goto restart; |
683 | } | 740 | } |
684 | if (ctx->sequence != push_seq) | 741 | if (ctx->sequence != sequence) |
685 | continue; | 742 | continue; |
686 | /* found it! */ | 743 | /* found it! */ |
687 | commit_lsn = ctx->commit_lsn; | 744 | commit_lsn = ctx->commit_lsn; |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 8c072618965c..edcdfe01617f 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -422,16 +422,17 @@ struct xfs_cil { | |||
422 | struct rw_semaphore xc_ctx_lock; | 422 | struct rw_semaphore xc_ctx_lock; |
423 | struct list_head xc_committing; | 423 | struct list_head xc_committing; |
424 | sv_t xc_commit_wait; | 424 | sv_t xc_commit_wait; |
425 | xfs_lsn_t xc_current_sequence; | ||
425 | }; | 426 | }; |
426 | 427 | ||
427 | /* | 428 | /* |
428 | * The amount of log space we should the CIL to aggregate is difficult to size. | 429 | * The amount of log space we allow the CIL to aggregate is difficult to size. |
429 | * Whatever we chose we have to make we can get a reservation for the log space | 430 | * Whatever we choose, we have to make sure we can get a reservation for the |
430 | * effectively, that it is large enough to capture sufficient relogging to | 431 | * log space effectively, that it is large enough to capture sufficient |
431 | * reduce log buffer IO significantly, but it is not too large for the log or | 432 | * relogging to reduce log buffer IO significantly, but it is not too large for |
432 | * induces too much latency when writing out through the iclogs. We track both | 433 | * the log or induces too much latency when writing out through the iclogs. We |
433 | * space consumed and the number of vectors in the checkpoint context, so we | 434 | * track both space consumed and the number of vectors in the checkpoint |
434 | * need to decide which to use for limiting. | 435 | * context, so we need to decide which to use for limiting. |
435 | * | 436 | * |
436 | * Every log buffer we write out during a push needs a header reserved, which | 437 | * Every log buffer we write out during a push needs a header reserved, which |
437 | * is at least one sector and more for v2 logs. Hence we need a reservation of | 438 | * is at least one sector and more for v2 logs. Hence we need a reservation of |
@@ -458,16 +459,21 @@ struct xfs_cil { | |||
458 | * checkpoint transaction ticket is specific to the checkpoint context, rather | 459 | * checkpoint transaction ticket is specific to the checkpoint context, rather |
459 | * than the CIL itself. | 460 | * than the CIL itself. |
460 | * | 461 | * |
461 | * With dynamic reservations, we can basically make up arbitrary limits for the | 462 | * With dynamic reservations, we can effectively make up arbitrary limits for |
462 | * checkpoint size so long as they don't violate any other size rules. Hence | 463 | * the checkpoint size so long as they don't violate any other size rules. |
463 | * the initial maximum size for the checkpoint transaction will be set to a | 464 | * Recovery imposes a rule that no transaction exceed half the log, so we are |
464 | * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit | 465 | * limited by that. Furthermore, the log transaction reservation subsystem |
465 | * right now based on the latency of writing out a large amount of data through | 466 | * tries to keep 25% of the log free, so we need to keep below that limit or we |
466 | * the circular iclog buffers. | 467 | * risk running out of free log space to start any new transactions. |
468 | * | ||
469 | * In order to keep background CIL push efficient, we will set a lower | ||
470 | * threshold at which background pushing is attempted without blocking current | ||
471 | * transaction commits. A separate, higher bound defines when CIL pushes are | ||
472 | * enforced to ensure we stay within our maximum checkpoint size bounds. | ||
473 | * threshold, yet give us plenty of space for aggregation on large logs. | ||
467 | */ | 474 | */ |
468 | 475 | #define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) | |
469 | #define XLOG_CIL_SPACE_LIMIT(log) \ | 476 | #define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) |
470 | (min((log->l_logsize >> 2), (8 * 1024 * 1024))) | ||
471 | 477 | ||
472 | /* | 478 | /* |
473 | * The reservation head lsn is not made up of a cycle number and block number. | 479 | * The reservation head lsn is not made up of a cycle number and block number. |
@@ -562,8 +568,16 @@ int xlog_cil_init(struct log *log); | |||
562 | void xlog_cil_init_post_recovery(struct log *log); | 568 | void xlog_cil_init_post_recovery(struct log *log); |
563 | void xlog_cil_destroy(struct log *log); | 569 | void xlog_cil_destroy(struct log *log); |
564 | 570 | ||
565 | int xlog_cil_push(struct log *log, int push_now); | 571 | /* |
566 | xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence); | 572 | * CIL force routines |
573 | */ | ||
574 | xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence); | ||
575 | |||
576 | static inline void | ||
577 | xlog_cil_force(struct log *log) | ||
578 | { | ||
579 | xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence); | ||
580 | } | ||
567 | 581 | ||
568 | /* | 582 | /* |
569 | * Unmount record type is used as a pseudo transaction type for the ticket. | 583 | * Unmount record type is used as a pseudo transaction type for the ticket. |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index fdca7416c754..1c47edaea0d2 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -1167,7 +1167,7 @@ xfs_trans_del_item( | |||
1167 | * Unlock all of the items of a transaction and free all the descriptors | 1167 | * Unlock all of the items of a transaction and free all the descriptors |
1168 | * of that transaction. | 1168 | * of that transaction. |
1169 | */ | 1169 | */ |
1170 | STATIC void | 1170 | void |
1171 | xfs_trans_free_items( | 1171 | xfs_trans_free_items( |
1172 | struct xfs_trans *tp, | 1172 | struct xfs_trans *tp, |
1173 | xfs_lsn_t commit_lsn, | 1173 | xfs_lsn_t commit_lsn, |
@@ -1653,9 +1653,6 @@ xfs_trans_commit_cil( | |||
1653 | return error; | 1653 | return error; |
1654 | 1654 | ||
1655 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1655 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1656 | |||
1657 | /* xfs_trans_free_items() unlocks them first */ | ||
1658 | xfs_trans_free_items(tp, *commit_lsn, 0); | ||
1659 | xfs_trans_free(tp); | 1656 | xfs_trans_free(tp); |
1660 | return 0; | 1657 | return 0; |
1661 | } | 1658 | } |
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index e2d93d8ead7b..62da86c90de5 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -25,7 +25,8 @@ struct xfs_trans; | |||
25 | 25 | ||
26 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); | 26 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); |
27 | void xfs_trans_del_item(struct xfs_log_item *); | 27 | void xfs_trans_del_item(struct xfs_log_item *); |
28 | 28 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, | |
29 | int flags); | ||
29 | void xfs_trans_item_committed(struct xfs_log_item *lip, | 30 | void xfs_trans_item_committed(struct xfs_log_item *lip, |
30 | xfs_lsn_t commit_lsn, int aborted); | 31 | xfs_lsn_t commit_lsn, int aborted); |
31 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); | 32 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 66d585c6917c..4c7c7bfb2b2f 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -2299,15 +2299,22 @@ xfs_alloc_file_space( | |||
2299 | e = allocatesize_fsb; | 2299 | e = allocatesize_fsb; |
2300 | } | 2300 | } |
2301 | 2301 | ||
2302 | /* | ||
2303 | * The transaction reservation is limited to a 32-bit block | ||
2304 | * count, hence we need to limit the number of blocks we are | ||
2305 | * trying to reserve to avoid an overflow. We can't allocate | ||
2306 | * more than @nimaps extents, and an extent is limited on disk | ||
2307 | * to MAXEXTLEN (21 bits), so use that to enforce the limit. | ||
2308 | */ | ||
2309 | resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); | ||
2302 | if (unlikely(rt)) { | 2310 | if (unlikely(rt)) { |
2303 | resrtextents = qblocks = (uint)(e - s); | 2311 | resrtextents = qblocks = resblks; |
2304 | resrtextents /= mp->m_sb.sb_rextsize; | 2312 | resrtextents /= mp->m_sb.sb_rextsize; |
2305 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); | 2313 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); |
2306 | quota_flag = XFS_QMOPT_RES_RTBLKS; | 2314 | quota_flag = XFS_QMOPT_RES_RTBLKS; |
2307 | } else { | 2315 | } else { |
2308 | resrtextents = 0; | 2316 | resrtextents = 0; |
2309 | resblks = qblocks = \ | 2317 | resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); |
2310 | XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); | ||
2311 | quota_flag = XFS_QMOPT_RES_REGBLKS; | 2318 | quota_flag = XFS_QMOPT_RES_REGBLKS; |
2312 | } | 2319 | } |
2313 | 2320 | ||