diff options
author | Yan, Zheng <zyan@redhat.com> | 2017-12-14 22:15:36 -0500 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2018-01-29 12:36:08 -0500 |
commit | 5d988308283ecf062fa88f20ae05c52cce0bcdca (patch) | |
tree | fdc84f449e10eab2f74123d38fc0f3dcf26e5b54 /fs/ceph/file.c | |
parent | 5495c2d04f85da09512f5f346ed24dc0261d905d (diff) |
ceph: track read contexts in ceph_file_info
Previously ceph_read_iter() uses current->journal to pass context info
to ceph_readpages(), so that ceph_readpages() can distinguish read(2)
from readahead(2)/fadvise(2)/madvise(2). The problem is that page fault
can happen when copying data to userspace memory. Page fault may call
other filesystem's page_mkwrite() if the userspace memory is mapped to a
file. The later filesystem may also want to use current->journal.
The fix is define a on-stack data structure in ceph_read_iter(), add it
to context list in ceph_file_info. ceph_readpages() searches the list,
find if there is a context belongs to current thread.
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r-- | fs/ceph/file.c | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 770dd3b413e4..6639926eed4e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -181,6 +181,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) | |||
181 | return -ENOMEM; | 181 | return -ENOMEM; |
182 | } | 182 | } |
183 | cf->fmode = fmode; | 183 | cf->fmode = fmode; |
184 | |||
185 | spin_lock_init(&cf->rw_contexts_lock); | ||
186 | INIT_LIST_HEAD(&cf->rw_contexts); | ||
187 | |||
184 | cf->next_offset = 2; | 188 | cf->next_offset = 2; |
185 | cf->readdir_cache_idx = -1; | 189 | cf->readdir_cache_idx = -1; |
186 | file->private_data = cf; | 190 | file->private_data = cf; |
@@ -464,6 +468,7 @@ int ceph_release(struct inode *inode, struct file *file) | |||
464 | ceph_mdsc_put_request(cf->last_readdir); | 468 | ceph_mdsc_put_request(cf->last_readdir); |
465 | kfree(cf->last_name); | 469 | kfree(cf->last_name); |
466 | kfree(cf->dir_info); | 470 | kfree(cf->dir_info); |
471 | WARN_ON(!list_empty(&cf->rw_contexts)); | ||
467 | kmem_cache_free(ceph_file_cachep, cf); | 472 | kmem_cache_free(ceph_file_cachep, cf); |
468 | 473 | ||
469 | /* wake up anyone waiting for caps on this inode */ | 474 | /* wake up anyone waiting for caps on this inode */ |
@@ -1199,12 +1204,13 @@ again: | |||
1199 | retry_op = READ_INLINE; | 1204 | retry_op = READ_INLINE; |
1200 | } | 1205 | } |
1201 | } else { | 1206 | } else { |
1207 | CEPH_DEFINE_RW_CONTEXT(rw_ctx, got); | ||
1202 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", | 1208 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", |
1203 | inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, | 1209 | inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, |
1204 | ceph_cap_string(got)); | 1210 | ceph_cap_string(got)); |
1205 | current->journal_info = filp; | 1211 | ceph_add_rw_context(fi, &rw_ctx); |
1206 | ret = generic_file_read_iter(iocb, to); | 1212 | ret = generic_file_read_iter(iocb, to); |
1207 | current->journal_info = NULL; | 1213 | ceph_del_rw_context(fi, &rw_ctx); |
1208 | } | 1214 | } |
1209 | dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", | 1215 | dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", |
1210 | inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); | 1216 | inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); |