aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2017-12-14 22:15:36 -0500
committerIlya Dryomov <idryomov@gmail.com>2018-01-29 12:36:08 -0500
commit5d988308283ecf062fa88f20ae05c52cce0bcdca (patch)
treefdc84f449e10eab2f74123d38fc0f3dcf26e5b54
parent5495c2d04f85da09512f5f346ed24dc0261d905d (diff)
ceph: track read contexts in ceph_file_info
Previously ceph_read_iter() uses current->journal to pass context info to ceph_readpages(), so that ceph_readpages() can distinguish read(2) from readahead(2)/fadvise(2)/madvise(2). The problem is that page fault can happen when copying data to userspace memory. Page fault may call other filesystem's page_mkwrite() if the userspace memory is mapped to a file. The later filesystem may also want to use current->journal. The fix is define a on-stack data structure in ceph_read_iter(), add it to context list in ceph_file_info. ceph_readpages() searches the list, find if there is a context belongs to current thread. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
-rw-r--r--fs/ceph/addr.c19
-rw-r--r--fs/ceph/file.c10
-rw-r--r--fs/ceph/super.h46
3 files changed, 66 insertions, 9 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index dbf07051aacd..78a1208b878e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -299,7 +299,8 @@ unlock:
299 * start an async read(ahead) operation. return nr_pages we submitted 299 * start an async read(ahead) operation. return nr_pages we submitted
300 * a read for on success, or negative error code. 300 * a read for on success, or negative error code.
301 */ 301 */
302static int start_read(struct inode *inode, struct list_head *page_list, int max) 302static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
303 struct list_head *page_list, int max)
303{ 304{
304 struct ceph_osd_client *osdc = 305 struct ceph_osd_client *osdc =
305 &ceph_inode_to_client(inode)->client->osdc; 306 &ceph_inode_to_client(inode)->client->osdc;
@@ -316,7 +317,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
316 int got = 0; 317 int got = 0;
317 int ret = 0; 318 int ret = 0;
318 319
319 if (!current->journal_info) { 320 if (!rw_ctx) {
320 /* caller of readpages does not hold buffer and read caps 321 /* caller of readpages does not hold buffer and read caps
321 * (fadvise, madvise and readahead cases) */ 322 * (fadvise, madvise and readahead cases) */
322 int want = CEPH_CAP_FILE_CACHE; 323 int want = CEPH_CAP_FILE_CACHE;
@@ -437,6 +438,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
437{ 438{
438 struct inode *inode = file_inode(file); 439 struct inode *inode = file_inode(file);
439 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 440 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
441 struct ceph_file_info *ci = file->private_data;
442 struct ceph_rw_context *rw_ctx;
440 int rc = 0; 443 int rc = 0;
441 int max = 0; 444 int max = 0;
442 445
@@ -449,11 +452,12 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
449 if (rc == 0) 452 if (rc == 0)
450 goto out; 453 goto out;
451 454
455 rw_ctx = ceph_find_rw_context(ci);
452 max = fsc->mount_options->rsize >> PAGE_SHIFT; 456 max = fsc->mount_options->rsize >> PAGE_SHIFT;
453 dout("readpages %p file %p nr_pages %d max %d\n", 457 dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
454 inode, file, nr_pages, max); 458 inode, file, rw_ctx, nr_pages, max);
455 while (!list_empty(page_list)) { 459 while (!list_empty(page_list)) {
456 rc = start_read(inode, page_list, max); 460 rc = start_read(inode, rw_ctx, page_list, max);
457 if (rc < 0) 461 if (rc < 0)
458 goto out; 462 goto out;
459 } 463 }
@@ -1450,9 +1454,10 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
1450 1454
1451 if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || 1455 if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
1452 ci->i_inline_version == CEPH_INLINE_NONE) { 1456 ci->i_inline_version == CEPH_INLINE_NONE) {
1453 current->journal_info = vma->vm_file; 1457 CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
1458 ceph_add_rw_context(fi, &rw_ctx);
1454 ret = filemap_fault(vmf); 1459 ret = filemap_fault(vmf);
1455 current->journal_info = NULL; 1460 ceph_del_rw_context(fi, &rw_ctx);
1456 } else 1461 } else
1457 ret = -EAGAIN; 1462 ret = -EAGAIN;
1458 1463
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 770dd3b413e4..6639926eed4e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -181,6 +181,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
181 return -ENOMEM; 181 return -ENOMEM;
182 } 182 }
183 cf->fmode = fmode; 183 cf->fmode = fmode;
184
185 spin_lock_init(&cf->rw_contexts_lock);
186 INIT_LIST_HEAD(&cf->rw_contexts);
187
184 cf->next_offset = 2; 188 cf->next_offset = 2;
185 cf->readdir_cache_idx = -1; 189 cf->readdir_cache_idx = -1;
186 file->private_data = cf; 190 file->private_data = cf;
@@ -464,6 +468,7 @@ int ceph_release(struct inode *inode, struct file *file)
464 ceph_mdsc_put_request(cf->last_readdir); 468 ceph_mdsc_put_request(cf->last_readdir);
465 kfree(cf->last_name); 469 kfree(cf->last_name);
466 kfree(cf->dir_info); 470 kfree(cf->dir_info);
471 WARN_ON(!list_empty(&cf->rw_contexts));
467 kmem_cache_free(ceph_file_cachep, cf); 472 kmem_cache_free(ceph_file_cachep, cf);
468 473
469 /* wake up anyone waiting for caps on this inode */ 474 /* wake up anyone waiting for caps on this inode */
@@ -1199,12 +1204,13 @@ again:
1199 retry_op = READ_INLINE; 1204 retry_op = READ_INLINE;
1200 } 1205 }
1201 } else { 1206 } else {
1207 CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
1202 dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", 1208 dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
1203 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, 1209 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
1204 ceph_cap_string(got)); 1210 ceph_cap_string(got));
1205 current->journal_info = filp; 1211 ceph_add_rw_context(fi, &rw_ctx);
1206 ret = generic_file_read_iter(iocb, to); 1212 ret = generic_file_read_iter(iocb, to);
1207 current->journal_info = NULL; 1213 ceph_del_rw_context(fi, &rw_ctx);
1208 } 1214 }
1209 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", 1215 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
1210 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); 1216 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 75701c199b2b..601100da738f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -669,6 +669,9 @@ struct ceph_file_info {
669 short fmode; /* initialized on open */ 669 short fmode; /* initialized on open */
670 short flags; /* CEPH_F_* */ 670 short flags; /* CEPH_F_* */
671 671
672 spinlock_t rw_contexts_lock;
673 struct list_head rw_contexts;
674
672 /* readdir: position within the dir */ 675 /* readdir: position within the dir */
673 u32 frag; 676 u32 frag;
674 struct ceph_mds_request *last_readdir; 677 struct ceph_mds_request *last_readdir;
@@ -685,6 +688,49 @@ struct ceph_file_info {
685 int dir_info_len; 688 int dir_info_len;
686}; 689};
687 690
691struct ceph_rw_context {
692 struct list_head list;
693 struct task_struct *thread;
694 int caps;
695};
696
697#define CEPH_DEFINE_RW_CONTEXT(_name, _caps) \
698 struct ceph_rw_context _name = { \
699 .thread = current, \
700 .caps = _caps, \
701 }
702
703static inline void ceph_add_rw_context(struct ceph_file_info *cf,
704 struct ceph_rw_context *ctx)
705{
706 spin_lock(&cf->rw_contexts_lock);
707 list_add(&ctx->list, &cf->rw_contexts);
708 spin_unlock(&cf->rw_contexts_lock);
709}
710
711static inline void ceph_del_rw_context(struct ceph_file_info *cf,
712 struct ceph_rw_context *ctx)
713{
714 spin_lock(&cf->rw_contexts_lock);
715 list_del(&ctx->list);
716 spin_unlock(&cf->rw_contexts_lock);
717}
718
719static inline struct ceph_rw_context*
720ceph_find_rw_context(struct ceph_file_info *cf)
721{
722 struct ceph_rw_context *ctx, *found = NULL;
723 spin_lock(&cf->rw_contexts_lock);
724 list_for_each_entry(ctx, &cf->rw_contexts, list) {
725 if (ctx->thread == current) {
726 found = ctx;
727 break;
728 }
729 }
730 spin_unlock(&cf->rw_contexts_lock);
731 return found;
732}
733
688struct ceph_readdir_cache_control { 734struct ceph_readdir_cache_control {
689 struct page *page; 735 struct page *page;
690 struct dentry **dentries; 736 struct dentry **dentries;