aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/addr.c4
-rw-r--r--fs/ceph/caps.c41
-rw-r--r--fs/ceph/dir.c2
-rw-r--r--fs/ceph/file.c35
-rw-r--r--fs/ceph/inode.c17
-rw-r--r--fs/ceph/ioctl.c12
-rw-r--r--fs/ceph/mds_client.c34
-rw-r--r--fs/ceph/super.h2
8 files changed, 119 insertions, 28 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5318a3b704f6..722585cd5c7e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -150,10 +150,6 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
150 struct ceph_inode_info *ci; 150 struct ceph_inode_info *ci;
151 struct ceph_snap_context *snapc = page_snap_context(page); 151 struct ceph_snap_context *snapc = page_snap_context(page);
152 152
153 BUG_ON(!PageLocked(page));
154 BUG_ON(!PagePrivate(page));
155 BUG_ON(!page->mapping);
156
157 inode = page->mapping->host; 153 inode = page->mapping->host;
158 154
159 /* 155 /*
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 25442b40c25a..430121a795bd 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2334,6 +2334,38 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2334} 2334}
2335 2335
2336/* 2336/*
2337 * Invalidate unlinked inode's aliases, so we can drop the inode ASAP.
2338 */
2339static void invalidate_aliases(struct inode *inode)
2340{
2341 struct dentry *dn, *prev = NULL;
2342
2343 dout("invalidate_aliases inode %p\n", inode);
2344 d_prune_aliases(inode);
2345 /*
2346 * For non-directory inode, d_find_alias() only returns
2347 * connected dentry. After calling d_delete(), the dentry
2348 * become disconnected.
2349 *
2350 * For directory inode, d_find_alias() only can return
2351 * disconnected dentry. But directory inode should have
2352 * one alias at most.
2353 */
2354 while ((dn = d_find_alias(inode))) {
2355 if (dn == prev) {
2356 dput(dn);
2357 break;
2358 }
2359 d_delete(dn);
2360 if (prev)
2361 dput(prev);
2362 prev = dn;
2363 }
2364 if (prev)
2365 dput(prev);
2366}
2367
2368/*
2337 * Handle a cap GRANT message from the MDS. (Note that a GRANT may 2369 * Handle a cap GRANT message from the MDS. (Note that a GRANT may
2338 * actually be a revocation if it specifies a smaller cap set.) 2370 * actually be a revocation if it specifies a smaller cap set.)
2339 * 2371 *
@@ -2363,6 +2395,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2363 int writeback = 0; 2395 int writeback = 0;
2364 int revoked_rdcache = 0; 2396 int revoked_rdcache = 0;
2365 int queue_invalidate = 0; 2397 int queue_invalidate = 0;
2398 int deleted_inode = 0;
2366 2399
2367 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", 2400 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
2368 inode, cap, mds, seq, ceph_cap_string(newcaps)); 2401 inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2407,8 +2440,12 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2407 from_kgid(&init_user_ns, inode->i_gid)); 2440 from_kgid(&init_user_ns, inode->i_gid));
2408 } 2441 }
2409 2442
2410 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 2443 if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
2411 set_nlink(inode, le32_to_cpu(grant->nlink)); 2444 set_nlink(inode, le32_to_cpu(grant->nlink));
2445 if (inode->i_nlink == 0 &&
2446 (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
2447 deleted_inode = 1;
2448 }
2412 2449
2413 if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) { 2450 if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
2414 int len = le32_to_cpu(grant->xattr_len); 2451 int len = le32_to_cpu(grant->xattr_len);
@@ -2517,6 +2554,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2517 ceph_queue_writeback(inode); 2554 ceph_queue_writeback(inode);
2518 if (queue_invalidate) 2555 if (queue_invalidate)
2519 ceph_queue_invalidate(inode); 2556 ceph_queue_invalidate(inode);
2557 if (deleted_inode)
2558 invalidate_aliases(inode);
2520 if (wake) 2559 if (wake)
2521 wake_up_all(&ci->i_cap_wq); 2560 wake_up_all(&ci->i_cap_wq);
2522 2561
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index a40ceda47a32..868b61d56cac 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -793,6 +793,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
793 req->r_locked_dir = dir; 793 req->r_locked_dir = dir;
794 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 794 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
795 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 795 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
796 /* release LINK_SHARED on source inode (mds will lock it) */
797 req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
796 err = ceph_mdsc_do_request(mdsc, dir, req); 798 err = ceph_mdsc_do_request(mdsc, dir, req);
797 if (err) { 799 if (err) {
798 d_drop(dentry); 800 d_drop(dentry);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 2ddf061c1c4a..bc0735498d29 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -313,9 +313,9 @@ static int striped_read(struct inode *inode,
313{ 313{
314 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 314 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
315 struct ceph_inode_info *ci = ceph_inode(inode); 315 struct ceph_inode_info *ci = ceph_inode(inode);
316 u64 pos, this_len; 316 u64 pos, this_len, left;
317 int io_align, page_align; 317 int io_align, page_align;
318 int left, pages_left; 318 int pages_left;
319 int read; 319 int read;
320 struct page **page_pos; 320 struct page **page_pos;
321 int ret; 321 int ret;
@@ -346,7 +346,7 @@ more:
346 ret = 0; 346 ret = 0;
347 hit_stripe = this_len < left; 347 hit_stripe = this_len < left;
348 was_short = ret >= 0 && ret < this_len; 348 was_short = ret >= 0 && ret < this_len;
349 dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read, 349 dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
350 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); 350 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
351 351
352 if (ret > 0) { 352 if (ret > 0) {
@@ -378,7 +378,7 @@ more:
378 if (pos + left > inode->i_size) 378 if (pos + left > inode->i_size)
379 left = inode->i_size - pos; 379 left = inode->i_size - pos;
380 380
381 dout("zero tail %d\n", left); 381 dout("zero tail %llu\n", left);
382 ceph_zero_page_vector_range(page_align + read, left, 382 ceph_zero_page_vector_range(page_align + read, left,
383 pages); 383 pages);
384 read += left; 384 read += left;
@@ -659,7 +659,6 @@ again:
659 659
660 if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || 660 if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
661 (iocb->ki_filp->f_flags & O_DIRECT) || 661 (iocb->ki_filp->f_flags & O_DIRECT) ||
662 (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
663 (fi->flags & CEPH_F_SYNC)) 662 (fi->flags & CEPH_F_SYNC))
664 /* hmm, this isn't really async... */ 663 /* hmm, this isn't really async... */
665 ret = ceph_sync_read(filp, base, len, ppos, &checkeof); 664 ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
@@ -711,13 +710,11 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
711 &ceph_sb_to_client(inode->i_sb)->client->osdc; 710 &ceph_sb_to_client(inode->i_sb)->client->osdc;
712 ssize_t count, written = 0; 711 ssize_t count, written = 0;
713 int err, want, got; 712 int err, want, got;
714 bool hold_mutex;
715 713
716 if (ceph_snap(inode) != CEPH_NOSNAP) 714 if (ceph_snap(inode) != CEPH_NOSNAP)
717 return -EROFS; 715 return -EROFS;
718 716
719 mutex_lock(&inode->i_mutex); 717 mutex_lock(&inode->i_mutex);
720 hold_mutex = true;
721 718
722 err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); 719 err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
723 if (err) 720 if (err)
@@ -763,18 +760,24 @@ retry_snap:
763 760
764 if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || 761 if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
765 (iocb->ki_filp->f_flags & O_DIRECT) || 762 (iocb->ki_filp->f_flags & O_DIRECT) ||
766 (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
767 (fi->flags & CEPH_F_SYNC)) { 763 (fi->flags & CEPH_F_SYNC)) {
768 mutex_unlock(&inode->i_mutex); 764 mutex_unlock(&inode->i_mutex);
769 written = ceph_sync_write(file, iov->iov_base, count, 765 written = ceph_sync_write(file, iov->iov_base, count,
770 pos, &iocb->ki_pos); 766 pos, &iocb->ki_pos);
767 if (written == -EOLDSNAPC) {
768 dout("aio_write %p %llx.%llx %llu~%u"
769 "got EOLDSNAPC, retrying\n",
770 inode, ceph_vinop(inode),
771 pos, (unsigned)iov->iov_len);
772 mutex_lock(&inode->i_mutex);
773 goto retry_snap;
774 }
771 } else { 775 } else {
772 written = generic_file_buffered_write(iocb, iov, nr_segs, 776 written = generic_file_buffered_write(iocb, iov, nr_segs,
773 pos, &iocb->ki_pos, 777 pos, &iocb->ki_pos,
774 count, 0); 778 count, 0);
775 mutex_unlock(&inode->i_mutex); 779 mutex_unlock(&inode->i_mutex);
776 } 780 }
777 hold_mutex = false;
778 781
779 if (written >= 0) { 782 if (written >= 0) {
780 int dirty; 783 int dirty;
@@ -798,18 +801,12 @@ retry_snap:
798 written = err; 801 written = err;
799 } 802 }
800 803
801 if (written == -EOLDSNAPC) { 804 goto out_unlocked;
802 dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", 805
803 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len);
804 mutex_lock(&inode->i_mutex);
805 hold_mutex = true;
806 goto retry_snap;
807 }
808out: 806out:
809 if (hold_mutex) 807 mutex_unlock(&inode->i_mutex);
810 mutex_unlock(&inode->i_mutex); 808out_unlocked:
811 current->backing_dev_info = NULL; 809 current->backing_dev_info = NULL;
812
813 return written ? written : err; 810 return written ? written : err;
814} 811}
815 812
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index f3a2abf28a77..98b6e50bde04 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -61,6 +61,14 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
61 return inode; 61 return inode;
62} 62}
63 63
64struct inode *ceph_lookup_inode(struct super_block *sb, struct ceph_vino vino)
65{
66 struct inode *inode;
67 ino_t t = ceph_vino_to_ino(vino);
68 inode = ilookup5_nowait(sb, t, ceph_ino_compare, &vino);
69 return inode;
70}
71
64/* 72/*
65 * get/constuct snapdir inode for a given directory 73 * get/constuct snapdir inode for a given directory
66 */ 74 */
@@ -1465,7 +1473,14 @@ static void ceph_vmtruncate_work(struct work_struct *work)
1465 struct inode *inode = &ci->vfs_inode; 1473 struct inode *inode = &ci->vfs_inode;
1466 1474
1467 dout("vmtruncate_work %p\n", inode); 1475 dout("vmtruncate_work %p\n", inode);
1468 mutex_lock(&inode->i_mutex); 1476 if (!mutex_trylock(&inode->i_mutex)) {
1477 /*
1478 * the i_mutex can be hold by a writer who is waiting for
1479 * caps. wake up waiters, they will do pending vmtruncate.
1480 */
1481 wake_up_all(&ci->i_cap_wq);
1482 mutex_lock(&inode->i_mutex);
1483 }
1469 __ceph_do_pending_vmtruncate(inode); 1484 __ceph_do_pending_vmtruncate(inode);
1470 mutex_unlock(&inode->i_mutex); 1485 mutex_unlock(&inode->i_mutex);
1471 iput(inode); 1486 iput(inode);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index e0b4ef31d3c8..669622fd1ae3 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -196,8 +196,10 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
196 r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len, 196 r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
197 &dl.object_no, &dl.object_offset, 197 &dl.object_no, &dl.object_offset,
198 &olen); 198 &olen);
199 if (r < 0) 199 if (r < 0) {
200 up_read(&osdc->map_sem);
200 return -EIO; 201 return -EIO;
202 }
201 dl.file_offset -= dl.object_offset; 203 dl.file_offset -= dl.object_offset;
202 dl.object_size = ceph_file_layout_object_size(ci->i_layout); 204 dl.object_size = ceph_file_layout_object_size(ci->i_layout);
203 dl.block_size = ceph_file_layout_su(ci->i_layout); 205 dl.block_size = ceph_file_layout_su(ci->i_layout);
@@ -209,8 +211,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
209 snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx", 211 snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
210 ceph_ino(inode), dl.object_no); 212 ceph_ino(inode), dl.object_no);
211 213
212 ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap, 214 r = ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
213 ceph_file_layout_pg_pool(ci->i_layout)); 215 ceph_file_layout_pg_pool(ci->i_layout));
216 if (r < 0) {
217 up_read(&osdc->map_sem);
218 return r;
219 }
214 220
215 dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); 221 dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
216 if (dl.osd >= 0) { 222 if (dl.osd >= 0) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 187bf214444d..603786b564be 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -414,6 +414,9 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
414{ 414{
415 struct ceph_mds_session *s; 415 struct ceph_mds_session *s;
416 416
417 if (mds >= mdsc->mdsmap->m_max_mds)
418 return ERR_PTR(-EINVAL);
419
417 s = kzalloc(sizeof(*s), GFP_NOFS); 420 s = kzalloc(sizeof(*s), GFP_NOFS);
418 if (!s) 421 if (!s)
419 return ERR_PTR(-ENOMEM); 422 return ERR_PTR(-ENOMEM);
@@ -1028,6 +1031,37 @@ static void remove_session_caps(struct ceph_mds_session *session)
1028{ 1031{
1029 dout("remove_session_caps on %p\n", session); 1032 dout("remove_session_caps on %p\n", session);
1030 iterate_session_caps(session, remove_session_caps_cb, NULL); 1033 iterate_session_caps(session, remove_session_caps_cb, NULL);
1034
1035 spin_lock(&session->s_cap_lock);
1036 if (session->s_nr_caps > 0) {
1037 struct super_block *sb = session->s_mdsc->fsc->sb;
1038 struct inode *inode;
1039 struct ceph_cap *cap, *prev = NULL;
1040 struct ceph_vino vino;
1041 /*
1042 * iterate_session_caps() skips inodes that are being
1043 * deleted, we need to wait until deletions are complete.
1044 * __wait_on_freeing_inode() is designed for the job,
1045 * but it is not exported, so use lookup inode function
1046 * to access it.
1047 */
1048 while (!list_empty(&session->s_caps)) {
1049 cap = list_entry(session->s_caps.next,
1050 struct ceph_cap, session_caps);
1051 if (cap == prev)
1052 break;
1053 prev = cap;
1054 vino = cap->ci->i_vino;
1055 spin_unlock(&session->s_cap_lock);
1056
1057 inode = ceph_lookup_inode(sb, vino);
1058 iput(inode);
1059
1060 spin_lock(&session->s_cap_lock);
1061 }
1062 }
1063 spin_unlock(&session->s_cap_lock);
1064
1031 BUG_ON(session->s_nr_caps > 0); 1065 BUG_ON(session->s_nr_caps > 0);
1032 BUG_ON(!list_empty(&session->s_cap_flushing)); 1066 BUG_ON(!list_empty(&session->s_cap_flushing));
1033 cleanup_cap_releases(session); 1067 cleanup_cap_releases(session);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index cbded572345e..afcd62a68916 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -677,6 +677,8 @@ extern void ceph_destroy_inode(struct inode *inode);
677 677
678extern struct inode *ceph_get_inode(struct super_block *sb, 678extern struct inode *ceph_get_inode(struct super_block *sb,
679 struct ceph_vino vino); 679 struct ceph_vino vino);
680extern struct inode *ceph_lookup_inode(struct super_block *sb,
681 struct ceph_vino vino);
680extern struct inode *ceph_get_snapdir(struct inode *parent); 682extern struct inode *ceph_get_snapdir(struct inode *parent);
681extern int ceph_fill_file_size(struct inode *inode, int issued, 683extern int ceph_fill_file_size(struct inode *inode, int issued,
682 u32 truncate_seq, u64 truncate_size, u64 size); 684 u32 truncate_seq, u64 truncate_size, u64 size);