From 7421ab8041d98363edfb85955fa3b9849ffae366 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 7 Nov 2010 09:07:15 -0800 Subject: ceph: fix open for write on clustered mds Normally when we open a file we already have a cap, and simply update the wanted set. However, if we open a file for write, but don't have an auth cap, that doesn't work; we need to open a new cap with the auth MDS. Only reuse existing caps if we are opening for read or the existing cap is auth. Signed-off-by: Sage Weil --- fs/ceph/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e77c28cf3690..87ee944724f8 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file) } /* - * No need to block if we have any caps. Update wanted set + * No need to block if we have caps on the auth MDS (for + * write) or any MDS (for read). Update wanted set * asynchronously. */ spin_lock(&inode->i_lock); - if (__ceph_is_any_real_caps(ci)) { + if (__ceph_is_any_real_caps(ci) && + (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { int mds_wanted = __ceph_caps_mds_wanted(ci); int issued = __ceph_caps_issued(ci, NULL); -- cgit v1.2.2 From e98b6fed84d0f0155d7b398e0dfeac74c792f2d0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Nov 2010 12:24:53 -0800 Subject: ceph: fix comment, remove extraneous args The offset/length arguments aren't used. Signed-off-by: Sage Weil --- fs/ceph/file.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 87ee944724f8..603fd00af0a6 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -376,21 +376,19 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, dout("sync_read on file %p %llu~%u %s\n", file, off, len, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); - if (file->f_flags & O_DIRECT) { - pages = ceph_get_direct_page_vector(data, num_pages, off, len); - - /* - * flush any page cache pages in this range. this - * will make concurrent normal and O_DIRECT io slow, - * but it will at least behave sensibly when they are - * in sequence. - */ - } else { + if (file->f_flags & O_DIRECT) + pages = ceph_get_direct_page_vector(data, num_pages); + else pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); - } if (IS_ERR(pages)) return PTR_ERR(pages); + /* + * flush any page cache pages in this range. this + * will make concurrent normal and sync io slow, + * but it will at least behave sensibly when they are + * in sequence. + */ ret = filemap_write_and_wait(inode->i_mapping); if (ret < 0) goto done; -- cgit v1.2.2 From b7495fc2ff941db6a118a93ab8d61149e3f4cef8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Nov 2010 12:43:12 -0800 Subject: ceph: make page alignment explicit in osd interface We used to infer alignment of IOs within a page based on the file offset, which assumed they matched. This broke with direct IO that was not aligned to pages (e.g., 512-byte aligned IO). We were also trusting the alignment specified in the OSD reply, which could have been adjusted by the server. Explicitly specify the page alignment when setting up OSD IO requests. Signed-off-by: Sage Weil --- fs/ceph/file.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 603fd00af0a6..8d79b8912e31 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -282,11 +282,12 @@ int ceph_release(struct inode *inode, struct file *file) static int striped_read(struct inode *inode, u64 off, u64 len, struct page **pages, int num_pages, - int *checkeof) + int *checkeof, bool align_to_pages) { struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 pos, this_len; + int io_align, page_align; int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ int left, pages_left; int read; @@ -302,14 +303,19 @@ static int striped_read(struct inode *inode, page_pos = pages; pages_left = num_pages; read = 0; + io_align = off & ~PAGE_MASK; more: + if (align_to_pages) + page_align = (pos - io_align) & ~PAGE_MASK; + else + page_align = pos & ~PAGE_MASK; this_len = left; ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), &ci->i_layout, pos, &this_len, ci->i_truncate_seq, ci->i_truncate_size, - page_pos, pages_left); + page_pos, pages_left, page_align); hit_stripe = this_len < left; was_short = ret >= 0 && ret < this_len; if (ret == -ENOENT) @@ -393,7 +399,8 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, if (ret < 0) goto done; - ret = striped_read(inode, off, len, pages, num_pages, checkeof); + ret = striped_read(inode, off, len, pages, num_pages, checkeof, + file->f_flags & O_DIRECT); if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) ret = ceph_copy_page_vector_to_user(pages, data, off, ret); @@ -448,6 +455,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, int flags; int do_sync = 0; int check_caps = 0; + int page_align, io_align; int ret; struct timespec mtime = CURRENT_TIME; @@ -462,6 +470,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, else pos = *offset; + io_align = pos & ~PAGE_MASK; + ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); if (ret < 0) return ret; @@ -486,20 +496,26 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, */ more: len = left; + if (file->f_flags & O_DIRECT) + /* write from beginning of first page, regardless of + io alignment */ + page_align = (pos - io_align) & ~PAGE_MASK; + else + page_align = pos & ~PAGE_MASK; req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), pos, &len, CEPH_OSD_OP_WRITE, flags, ci->i_snap_realm->cached_context, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &mtime, false, 2); + &mtime, false, 2, page_align); if (!req) return -ENOMEM; num_pages = calc_pages_for(pos, len); if (file->f_flags & O_DIRECT) { - pages = ceph_get_direct_page_vector(data, num_pages, pos, len); + pages = ceph_get_direct_page_vector(data, num_pages); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out; -- cgit v1.2.2