aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-25 14:46:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-25 14:46:31 -0400
commit57bb55957432f20fd6e5bb5ddfbd9987439157ec (patch)
treede4adeffd13a5394b84f04c6f60582b63685adc9
parent2a651c7f8d377cf88271374315cbb5fe82eac784 (diff)
parentdb3540522e955c1ebb391f4f5324dff4f20ecd09 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (23 commits) ceph: fix cap flush race reentrancy libceph: subscribe to osdmap when cluster is full libceph: handle new osdmap down/state change encoding rbd: handle online resize of underlying rbd image ceph: avoid inode lookup on nfs fh reconnect ceph: use LOOKUPINO to make unconnected nfs fh more reliable rbd: use snprintf for disk->disk_name rbd: cleanup: make kfree match kmalloc rbd: warn on update_snaps failure on notify ceph: check return value for start_request in writepages ceph: remove useless check libceph: add missing breaks in addr_set_port libceph: fix TAG_WAIT case ceph: fix broken comparison in readdir loop libceph: fix osdmap timestamp assignment ceph: fix rare potential cap leak libceph: use snprintf for unknown addrs libceph: use snprintf for formatting object name ceph: use snprintf for dirstat content libceph: fix uninitialized value when no get_authorizer method is set ...
-rw-r--r--drivers/block/rbd.c27
-rw-r--r--fs/ceph/addr.c5
-rw-r--r--fs/ceph/caps.c61
-rw-r--r--fs/ceph/dir.c7
-rw-r--r--fs/ceph/export.c25
-rw-r--r--fs/ceph/mds_client.c7
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--include/linux/ceph/ceph_fs.h1
-rw-r--r--net/ceph/messenger.c82
-rw-r--r--net/ceph/osd_client.c19
-rw-r--r--net/ceph/osdmap.c13
11 files changed, 173 insertions, 75 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 9712fad82bc6..1278098624e6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1191,14 +1191,19 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev,
1191static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 1191static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
1192{ 1192{
1193 struct rbd_device *dev = (struct rbd_device *)data; 1193 struct rbd_device *dev = (struct rbd_device *)data;
1194 int rc;
1195
1194 if (!dev) 1196 if (!dev)
1195 return; 1197 return;
1196 1198
1197 dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name, 1199 dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
1198 notify_id, (int)opcode); 1200 notify_id, (int)opcode);
1199 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 1201 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1200 __rbd_update_snaps(dev); 1202 rc = __rbd_update_snaps(dev);
1201 mutex_unlock(&ctl_mutex); 1203 mutex_unlock(&ctl_mutex);
1204 if (rc)
1205 pr_warning(DRV_NAME "%d got notification but failed to update"
1206 " snaps: %d\n", dev->major, rc);
1202 1207
1203 rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name); 1208 rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name);
1204} 1209}
@@ -1597,7 +1602,7 @@ static int rbd_header_add_snap(struct rbd_device *dev,
1597 int name_len = strlen(snap_name); 1602 int name_len = strlen(snap_name);
1598 u64 new_snapid; 1603 u64 new_snapid;
1599 int ret; 1604 int ret;
1600 void *data, *data_start, *data_end; 1605 void *data, *p, *e;
1601 u64 ver; 1606 u64 ver;
1602 1607
1603 /* we should create a snapshot only if we're pointing at the head */ 1608 /* we should create a snapshot only if we're pointing at the head */
@@ -1614,16 +1619,16 @@ static int rbd_header_add_snap(struct rbd_device *dev,
1614 if (!data) 1619 if (!data)
1615 return -ENOMEM; 1620 return -ENOMEM;
1616 1621
1617 data_start = data; 1622 p = data;
1618 data_end = data + name_len + 16; 1623 e = data + name_len + 16;
1619 1624
1620 ceph_encode_string_safe(&data, data_end, snap_name, name_len, bad); 1625 ceph_encode_string_safe(&p, e, snap_name, name_len, bad);
1621 ceph_encode_64_safe(&data, data_end, new_snapid, bad); 1626 ceph_encode_64_safe(&p, e, new_snapid, bad);
1622 1627
1623 ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add", 1628 ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
1624 data_start, data - data_start, &ver); 1629 data, p - data, &ver);
1625 1630
1626 kfree(data_start); 1631 kfree(data);
1627 1632
1628 if (ret < 0) 1633 if (ret < 0)
1629 return ret; 1634 return ret;
@@ -1659,6 +1664,9 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev)
1659 if (ret < 0) 1664 if (ret < 0)
1660 return ret; 1665 return ret;
1661 1666
1667 /* resized? */
1668 set_capacity(rbd_dev->disk, h.image_size / 512ULL);
1669
1662 down_write(&rbd_dev->header.snap_rwsem); 1670 down_write(&rbd_dev->header.snap_rwsem);
1663 1671
1664 snap_seq = rbd_dev->header.snapc->seq; 1672 snap_seq = rbd_dev->header.snapc->seq;
@@ -1716,7 +1724,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
1716 if (!disk) 1724 if (!disk)
1717 goto out; 1725 goto out;
1718 1726
1719 sprintf(disk->disk_name, DRV_NAME "%d", rbd_dev->id); 1727 snprintf(disk->disk_name, sizeof(disk->disk_name), DRV_NAME "%d",
1728 rbd_dev->id);
1720 disk->major = rbd_dev->major; 1729 disk->major = rbd_dev->major;
1721 disk->first_minor = 0; 1730 disk->first_minor = 0;
1722 disk->fops = &rbd_bd_ops; 1731 disk->fops = &rbd_bd_ops;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 38b8ab554924..33da49dc3cc6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -848,7 +848,8 @@ get_more_pages:
848 op->payload_len = cpu_to_le32(len); 848 op->payload_len = cpu_to_le32(len);
849 req->r_request->hdr.data_len = cpu_to_le32(len); 849 req->r_request->hdr.data_len = cpu_to_le32(len);
850 850
851 ceph_osdc_start_request(&fsc->client->osdc, req, true); 851 rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
852 BUG_ON(rc);
852 req = NULL; 853 req = NULL;
853 854
854 /* continue? */ 855 /* continue? */
@@ -880,8 +881,6 @@ release_pvec_pages:
880out: 881out:
881 if (req) 882 if (req)
882 ceph_osdc_put_request(req); 883 ceph_osdc_put_request(req);
883 if (rc > 0)
884 rc = 0; /* vfs expects us to return 0 */
885 ceph_put_snap_context(snapc); 884 ceph_put_snap_context(snapc);
886 dout("writepages done, rc = %d\n", rc); 885 dout("writepages done, rc = %d\n", rc);
887 return rc; 886 return rc;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 2a5404c1c42f..1f72b00447c4 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -569,7 +569,8 @@ retry:
569 list_add_tail(&cap->session_caps, &session->s_caps); 569 list_add_tail(&cap->session_caps, &session->s_caps);
570 session->s_nr_caps++; 570 session->s_nr_caps++;
571 spin_unlock(&session->s_cap_lock); 571 spin_unlock(&session->s_cap_lock);
572 } 572 } else if (new_cap)
573 ceph_put_cap(mdsc, new_cap);
573 574
574 if (!ci->i_snap_realm) { 575 if (!ci->i_snap_realm) {
575 /* 576 /*
@@ -2634,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2634 struct ceph_mds_session *session, 2635 struct ceph_mds_session *session,
2635 int *open_target_sessions) 2636 int *open_target_sessions)
2636{ 2637{
2638 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
2637 struct ceph_inode_info *ci = ceph_inode(inode); 2639 struct ceph_inode_info *ci = ceph_inode(inode);
2638 int mds = session->s_mds; 2640 int mds = session->s_mds;
2639 unsigned mseq = le32_to_cpu(ex->migrate_seq); 2641 unsigned mseq = le32_to_cpu(ex->migrate_seq);
@@ -2670,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2670 * export targets, so that we get the matching IMPORT 2672 * export targets, so that we get the matching IMPORT
2671 */ 2673 */
2672 *open_target_sessions = 1; 2674 *open_target_sessions = 1;
2675
2676 /*
2677 * we can't flush dirty caps that we've seen the
2678 * EXPORT but no IMPORT for
2679 */
2680 spin_lock(&mdsc->cap_dirty_lock);
2681 if (!list_empty(&ci->i_dirty_item)) {
2682 dout(" moving %p to cap_dirty_migrating\n",
2683 inode);
2684 list_move(&ci->i_dirty_item,
2685 &mdsc->cap_dirty_migrating);
2686 }
2687 spin_unlock(&mdsc->cap_dirty_lock);
2673 } 2688 }
2674 __ceph_remove_cap(cap); 2689 __ceph_remove_cap(cap);
2675 } 2690 }
@@ -2707,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2707 ci->i_cap_exporting_issued = 0; 2722 ci->i_cap_exporting_issued = 0;
2708 ci->i_cap_exporting_mseq = 0; 2723 ci->i_cap_exporting_mseq = 0;
2709 ci->i_cap_exporting_mds = -1; 2724 ci->i_cap_exporting_mds = -1;
2725
2726 spin_lock(&mdsc->cap_dirty_lock);
2727 if (!list_empty(&ci->i_dirty_item)) {
2728 dout(" moving %p back to cap_dirty\n", inode);
2729 list_move(&ci->i_dirty_item, &mdsc->cap_dirty);
2730 }
2731 spin_unlock(&mdsc->cap_dirty_lock);
2710 } else { 2732 } else {
2711 dout("handle_cap_import inode %p ci %p mds%d mseq %d\n", 2733 dout("handle_cap_import inode %p ci %p mds%d mseq %d\n",
2712 inode, ci, mds, mseq); 2734 inode, ci, mds, mseq);
@@ -2910,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
2910 */ 2932 */
2911void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) 2933void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
2912{ 2934{
2913 struct ceph_inode_info *ci, *nci = NULL; 2935 struct ceph_inode_info *ci;
2914 struct inode *inode, *ninode = NULL; 2936 struct inode *inode;
2915 struct list_head *p, *n;
2916 2937
2917 dout("flush_dirty_caps\n"); 2938 dout("flush_dirty_caps\n");
2918 spin_lock(&mdsc->cap_dirty_lock); 2939 spin_lock(&mdsc->cap_dirty_lock);
2919 list_for_each_safe(p, n, &mdsc->cap_dirty) { 2940 while (!list_empty(&mdsc->cap_dirty)) {
2920 if (nci) { 2941 ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
2921 ci = nci; 2942 i_dirty_item);
2922 inode = ninode; 2943 inode = igrab(&ci->vfs_inode);
2923 ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; 2944 dout("flush_dirty_caps %p\n", inode);
2924 dout("flush_dirty_caps inode %p (was next inode)\n",
2925 inode);
2926 } else {
2927 ci = list_entry(p, struct ceph_inode_info,
2928 i_dirty_item);
2929 inode = igrab(&ci->vfs_inode);
2930 BUG_ON(!inode);
2931 dout("flush_dirty_caps inode %p\n", inode);
2932 }
2933 if (n != &mdsc->cap_dirty) {
2934 nci = list_entry(n, struct ceph_inode_info,
2935 i_dirty_item);
2936 ninode = igrab(&nci->vfs_inode);
2937 BUG_ON(!ninode);
2938 nci->i_ceph_flags |= CEPH_I_NOFLUSH;
2939 dout("flush_dirty_caps next inode %p, noflush\n",
2940 ninode);
2941 } else {
2942 nci = NULL;
2943 ninode = NULL;
2944 }
2945 spin_unlock(&mdsc->cap_dirty_lock); 2945 spin_unlock(&mdsc->cap_dirty_lock);
2946 if (inode) { 2946 if (inode) {
2947 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, 2947 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
@@ -2951,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
2951 spin_lock(&mdsc->cap_dirty_lock); 2951 spin_lock(&mdsc->cap_dirty_lock);
2952 } 2952 }
2953 spin_unlock(&mdsc->cap_dirty_lock); 2953 spin_unlock(&mdsc->cap_dirty_lock);
2954 dout("flush_dirty_caps done\n");
2954} 2955}
2955 2956
2956/* 2957/*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 1a867a3601ae..33729e822bb9 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -360,7 +360,7 @@ more:
360 rinfo = &fi->last_readdir->r_reply_info; 360 rinfo = &fi->last_readdir->r_reply_info;
361 dout("readdir frag %x num %d off %d chunkoff %d\n", frag, 361 dout("readdir frag %x num %d off %d chunkoff %d\n", frag,
362 rinfo->dir_nr, off, fi->offset); 362 rinfo->dir_nr, off, fi->offset);
363 while (off - fi->offset >= 0 && off - fi->offset < rinfo->dir_nr) { 363 while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) {
364 u64 pos = ceph_make_fpos(frag, off); 364 u64 pos = ceph_make_fpos(frag, off);
365 struct ceph_mds_reply_inode *in = 365 struct ceph_mds_reply_inode *in =
366 rinfo->dir_in[off - fi->offset].in; 366 rinfo->dir_in[off - fi->offset].in;
@@ -1066,16 +1066,17 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
1066 struct inode *inode = file->f_dentry->d_inode; 1066 struct inode *inode = file->f_dentry->d_inode;
1067 struct ceph_inode_info *ci = ceph_inode(inode); 1067 struct ceph_inode_info *ci = ceph_inode(inode);
1068 int left; 1068 int left;
1069 const int bufsize = 1024;
1069 1070
1070 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) 1071 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
1071 return -EISDIR; 1072 return -EISDIR;
1072 1073
1073 if (!cf->dir_info) { 1074 if (!cf->dir_info) {
1074 cf->dir_info = kmalloc(1024, GFP_NOFS); 1075 cf->dir_info = kmalloc(bufsize, GFP_NOFS);
1075 if (!cf->dir_info) 1076 if (!cf->dir_info)
1076 return -ENOMEM; 1077 return -ENOMEM;
1077 cf->dir_info_len = 1078 cf->dir_info_len =
1078 sprintf(cf->dir_info, 1079 snprintf(cf->dir_info, bufsize,
1079 "entries: %20lld\n" 1080 "entries: %20lld\n"
1080 " files: %20lld\n" 1081 " files: %20lld\n"
1081 " subdirs: %20lld\n" 1082 " subdirs: %20lld\n"
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e41056174bf8..a610d3d67488 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
86static struct dentry *__fh_to_dentry(struct super_block *sb, 86static struct dentry *__fh_to_dentry(struct super_block *sb,
87 struct ceph_nfs_fh *fh) 87 struct ceph_nfs_fh *fh)
88{ 88{
89 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
89 struct inode *inode; 90 struct inode *inode;
90 struct dentry *dentry; 91 struct dentry *dentry;
91 struct ceph_vino vino; 92 struct ceph_vino vino;
@@ -95,8 +96,24 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
95 vino.ino = fh->ino; 96 vino.ino = fh->ino;
96 vino.snap = CEPH_NOSNAP; 97 vino.snap = CEPH_NOSNAP;
97 inode = ceph_find_inode(sb, vino); 98 inode = ceph_find_inode(sb, vino);
98 if (!inode) 99 if (!inode) {
99 return ERR_PTR(-ESTALE); 100 struct ceph_mds_request *req;
101
102 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
103 USE_ANY_MDS);
104 if (IS_ERR(req))
105 return ERR_CAST(req);
106
107 req->r_ino1 = vino;
108 req->r_num_caps = 1;
109 err = ceph_mdsc_do_request(mdsc, NULL, req);
110 inode = req->r_target_inode;
111 if (inode)
112 igrab(inode);
113 ceph_mdsc_put_request(req);
114 if (!inode)
115 return ERR_PTR(-ESTALE);
116 }
100 117
101 dentry = d_obtain_alias(inode); 118 dentry = d_obtain_alias(inode);
102 if (IS_ERR(dentry)) { 119 if (IS_ERR(dentry)) {
@@ -148,8 +165,10 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
148 snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); 165 snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash);
149 req->r_num_caps = 1; 166 req->r_num_caps = 1;
150 err = ceph_mdsc_do_request(mdsc, NULL, req); 167 err = ceph_mdsc_do_request(mdsc, NULL, req);
168 inode = req->r_target_inode;
169 if (inode)
170 igrab(inode);
151 ceph_mdsc_put_request(req); 171 ceph_mdsc_put_request(req);
152 inode = ceph_find_inode(sb, vino);
153 if (!inode) 172 if (!inode)
154 return ERR_PTR(err ? err : -ESTALE); 173 return ERR_PTR(err ? err : -ESTALE);
155 } 174 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index d0fae4ce9ba5..79743d146be6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -578,6 +578,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
578 if (dir) { 578 if (dir) {
579 struct ceph_inode_info *ci = ceph_inode(dir); 579 struct ceph_inode_info *ci = ceph_inode(dir);
580 580
581 ihold(dir);
581 spin_lock(&ci->i_unsafe_lock); 582 spin_lock(&ci->i_unsafe_lock);
582 req->r_unsafe_dir = dir; 583 req->r_unsafe_dir = dir;
583 list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); 584 list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops);
@@ -598,6 +599,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
598 spin_lock(&ci->i_unsafe_lock); 599 spin_lock(&ci->i_unsafe_lock);
599 list_del_init(&req->r_unsafe_dir_item); 600 list_del_init(&req->r_unsafe_dir_item);
600 spin_unlock(&ci->i_unsafe_lock); 601 spin_unlock(&ci->i_unsafe_lock);
602
603 iput(req->r_unsafe_dir);
604 req->r_unsafe_dir = NULL;
601 } 605 }
602 606
603 ceph_mdsc_put_request(req); 607 ceph_mdsc_put_request(req);
@@ -2691,7 +2695,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2691{ 2695{
2692 struct super_block *sb = mdsc->fsc->sb; 2696 struct super_block *sb = mdsc->fsc->sb;
2693 struct inode *inode; 2697 struct inode *inode;
2694 struct ceph_inode_info *ci;
2695 struct dentry *parent, *dentry; 2698 struct dentry *parent, *dentry;
2696 struct ceph_dentry_info *di; 2699 struct ceph_dentry_info *di;
2697 int mds = session->s_mds; 2700 int mds = session->s_mds;
@@ -2728,7 +2731,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2728 dout("handle_lease no inode %llx\n", vino.ino); 2731 dout("handle_lease no inode %llx\n", vino.ino);
2729 goto release; 2732 goto release;
2730 } 2733 }
2731 ci = ceph_inode(inode);
2732 2734
2733 /* dentry */ 2735 /* dentry */
2734 parent = d_find_alias(inode); 2736 parent = d_find_alias(inode);
@@ -3002,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
3002 spin_lock_init(&mdsc->snap_flush_lock); 3004 spin_lock_init(&mdsc->snap_flush_lock);
3003 mdsc->cap_flush_seq = 0; 3005 mdsc->cap_flush_seq = 0;
3004 INIT_LIST_HEAD(&mdsc->cap_dirty); 3006 INIT_LIST_HEAD(&mdsc->cap_dirty);
3007 INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
3005 mdsc->num_cap_flushing = 0; 3008 mdsc->num_cap_flushing = 0;
3006 spin_lock_init(&mdsc->cap_dirty_lock); 3009 spin_lock_init(&mdsc->cap_dirty_lock);
3007 init_waitqueue_head(&mdsc->cap_flushing_wq); 3010 init_waitqueue_head(&mdsc->cap_flushing_wq);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4e3a9cc0bba6..7d8a0d662d56 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -278,6 +278,7 @@ struct ceph_mds_client {
278 278
279 u64 cap_flush_seq; 279 u64 cap_flush_seq;
280 struct list_head cap_dirty; /* inodes with dirty caps */ 280 struct list_head cap_dirty; /* inodes with dirty caps */
281 struct list_head cap_dirty_migrating; /* ...that are migration... */
281 int num_cap_flushing; /* # caps we are flushing */ 282 int num_cap_flushing; /* # caps we are flushing */
282 spinlock_t cap_dirty_lock; /* protects above items */ 283 spinlock_t cap_dirty_lock; /* protects above items */
283 wait_queue_head_t cap_flushing_wq; 284 wait_queue_head_t cap_flushing_wq;
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index b8e995fbd867..b8c60694b2b0 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -313,6 +313,7 @@ enum {
313 CEPH_MDS_OP_GETATTR = 0x00101, 313 CEPH_MDS_OP_GETATTR = 0x00101,
314 CEPH_MDS_OP_LOOKUPHASH = 0x00102, 314 CEPH_MDS_OP_LOOKUPHASH = 0x00102,
315 CEPH_MDS_OP_LOOKUPPARENT = 0x00103, 315 CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
316 CEPH_MDS_OP_LOOKUPINO = 0x00104,
316 317
317 CEPH_MDS_OP_SETXATTR = 0x01105, 318 CEPH_MDS_OP_SETXATTR = 0x01105,
318 CEPH_MDS_OP_RMXATTR = 0x01106, 319 CEPH_MDS_OP_RMXATTR = 0x01106,
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index e15a82ccc05f..78b55f49de7c 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -76,7 +76,8 @@ const char *ceph_pr_addr(const struct sockaddr_storage *ss)
76 break; 76 break;
77 77
78 default: 78 default:
79 sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family); 79 snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %d)",
80 (int)ss->ss_family);
80 } 81 }
81 82
82 return s; 83 return s;
@@ -598,7 +599,7 @@ static void prepare_write_keepalive(struct ceph_connection *con)
598 * Connection negotiation. 599 * Connection negotiation.
599 */ 600 */
600 601
601static void prepare_connect_authorizer(struct ceph_connection *con) 602static int prepare_connect_authorizer(struct ceph_connection *con)
602{ 603{
603 void *auth_buf; 604 void *auth_buf;
604 int auth_len = 0; 605 int auth_len = 0;
@@ -612,13 +613,20 @@ static void prepare_connect_authorizer(struct ceph_connection *con)
612 con->auth_retry); 613 con->auth_retry);
613 mutex_lock(&con->mutex); 614 mutex_lock(&con->mutex);
614 615
616 if (test_bit(CLOSED, &con->state) ||
617 test_bit(OPENING, &con->state))
618 return -EAGAIN;
619
615 con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); 620 con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
616 con->out_connect.authorizer_len = cpu_to_le32(auth_len); 621 con->out_connect.authorizer_len = cpu_to_le32(auth_len);
617 622
618 con->out_kvec[con->out_kvec_left].iov_base = auth_buf; 623 if (auth_len) {
619 con->out_kvec[con->out_kvec_left].iov_len = auth_len; 624 con->out_kvec[con->out_kvec_left].iov_base = auth_buf;
620 con->out_kvec_left++; 625 con->out_kvec[con->out_kvec_left].iov_len = auth_len;
621 con->out_kvec_bytes += auth_len; 626 con->out_kvec_left++;
627 con->out_kvec_bytes += auth_len;
628 }
629 return 0;
622} 630}
623 631
624/* 632/*
@@ -640,9 +648,9 @@ static void prepare_write_banner(struct ceph_messenger *msgr,
640 set_bit(WRITE_PENDING, &con->state); 648 set_bit(WRITE_PENDING, &con->state);
641} 649}
642 650
643static void prepare_write_connect(struct ceph_messenger *msgr, 651static int prepare_write_connect(struct ceph_messenger *msgr,
644 struct ceph_connection *con, 652 struct ceph_connection *con,
645 int after_banner) 653 int after_banner)
646{ 654{
647 unsigned global_seq = get_global_seq(con->msgr, 0); 655 unsigned global_seq = get_global_seq(con->msgr, 0);
648 int proto; 656 int proto;
@@ -683,7 +691,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
683 con->out_more = 0; 691 con->out_more = 0;
684 set_bit(WRITE_PENDING, &con->state); 692 set_bit(WRITE_PENDING, &con->state);
685 693
686 prepare_connect_authorizer(con); 694 return prepare_connect_authorizer(con);
687} 695}
688 696
689 697
@@ -1065,8 +1073,10 @@ static void addr_set_port(struct sockaddr_storage *ss, int p)
1065 switch (ss->ss_family) { 1073 switch (ss->ss_family) {
1066 case AF_INET: 1074 case AF_INET:
1067 ((struct sockaddr_in *)ss)->sin_port = htons(p); 1075 ((struct sockaddr_in *)ss)->sin_port = htons(p);
1076 break;
1068 case AF_INET6: 1077 case AF_INET6:
1069 ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); 1078 ((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
1079 break;
1070 } 1080 }
1071} 1081}
1072 1082
@@ -1216,6 +1226,7 @@ static int process_connect(struct ceph_connection *con)
1216 u64 sup_feat = con->msgr->supported_features; 1226 u64 sup_feat = con->msgr->supported_features;
1217 u64 req_feat = con->msgr->required_features; 1227 u64 req_feat = con->msgr->required_features;
1218 u64 server_feat = le64_to_cpu(con->in_reply.features); 1228 u64 server_feat = le64_to_cpu(con->in_reply.features);
1229 int ret;
1219 1230
1220 dout("process_connect on %p tag %d\n", con, (int)con->in_tag); 1231 dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
1221 1232
@@ -1250,7 +1261,9 @@ static int process_connect(struct ceph_connection *con)
1250 return -1; 1261 return -1;
1251 } 1262 }
1252 con->auth_retry = 1; 1263 con->auth_retry = 1;
1253 prepare_write_connect(con->msgr, con, 0); 1264 ret = prepare_write_connect(con->msgr, con, 0);
1265 if (ret < 0)
1266 return ret;
1254 prepare_read_connect(con); 1267 prepare_read_connect(con);
1255 break; 1268 break;
1256 1269
@@ -1277,6 +1290,9 @@ static int process_connect(struct ceph_connection *con)
1277 if (con->ops->peer_reset) 1290 if (con->ops->peer_reset)
1278 con->ops->peer_reset(con); 1291 con->ops->peer_reset(con);
1279 mutex_lock(&con->mutex); 1292 mutex_lock(&con->mutex);
1293 if (test_bit(CLOSED, &con->state) ||
1294 test_bit(OPENING, &con->state))
1295 return -EAGAIN;
1280 break; 1296 break;
1281 1297
1282 case CEPH_MSGR_TAG_RETRY_SESSION: 1298 case CEPH_MSGR_TAG_RETRY_SESSION:
@@ -1341,7 +1357,9 @@ static int process_connect(struct ceph_connection *con)
1341 * to WAIT. This shouldn't happen if we are the 1357 * to WAIT. This shouldn't happen if we are the
1342 * client. 1358 * client.
1343 */ 1359 */
1344 pr_err("process_connect peer connecting WAIT\n"); 1360 pr_err("process_connect got WAIT as client\n");
1361 con->error_msg = "protocol error, got WAIT as client";
1362 return -1;
1345 1363
1346 default: 1364 default:
1347 pr_err("connect protocol error, will retry\n"); 1365 pr_err("connect protocol error, will retry\n");
@@ -1810,6 +1828,17 @@ static int try_read(struct ceph_connection *con)
1810more: 1828more:
1811 dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, 1829 dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
1812 con->in_base_pos); 1830 con->in_base_pos);
1831
1832 /*
1833 * process_connect and process_message drop and re-take
1834 * con->mutex. make sure we handle a racing close or reopen.
1835 */
1836 if (test_bit(CLOSED, &con->state) ||
1837 test_bit(OPENING, &con->state)) {
1838 ret = -EAGAIN;
1839 goto out;
1840 }
1841
1813 if (test_bit(CONNECTING, &con->state)) { 1842 if (test_bit(CONNECTING, &con->state)) {
1814 if (!test_bit(NEGOTIATING, &con->state)) { 1843 if (!test_bit(NEGOTIATING, &con->state)) {
1815 dout("try_read connecting\n"); 1844 dout("try_read connecting\n");
@@ -1938,8 +1967,10 @@ static void con_work(struct work_struct *work)
1938{ 1967{
1939 struct ceph_connection *con = container_of(work, struct ceph_connection, 1968 struct ceph_connection *con = container_of(work, struct ceph_connection,
1940 work.work); 1969 work.work);
1970 int ret;
1941 1971
1942 mutex_lock(&con->mutex); 1972 mutex_lock(&con->mutex);
1973restart:
1943 if (test_and_clear_bit(BACKOFF, &con->state)) { 1974 if (test_and_clear_bit(BACKOFF, &con->state)) {
1944 dout("con_work %p backing off\n", con); 1975 dout("con_work %p backing off\n", con);
1945 if (queue_delayed_work(ceph_msgr_wq, &con->work, 1976 if (queue_delayed_work(ceph_msgr_wq, &con->work,
@@ -1969,18 +2000,31 @@ static void con_work(struct work_struct *work)
1969 con_close_socket(con); 2000 con_close_socket(con);
1970 } 2001 }
1971 2002
1972 if (test_and_clear_bit(SOCK_CLOSED, &con->state) || 2003 if (test_and_clear_bit(SOCK_CLOSED, &con->state))
1973 try_read(con) < 0 || 2004 goto fault;
1974 try_write(con) < 0) { 2005
1975 mutex_unlock(&con->mutex); 2006 ret = try_read(con);
1976 ceph_fault(con); /* error/fault path */ 2007 if (ret == -EAGAIN)
1977 goto done_unlocked; 2008 goto restart;
1978 } 2009 if (ret < 0)
2010 goto fault;
2011
2012 ret = try_write(con);
2013 if (ret == -EAGAIN)
2014 goto restart;
2015 if (ret < 0)
2016 goto fault;
1979 2017
1980done: 2018done:
1981 mutex_unlock(&con->mutex); 2019 mutex_unlock(&con->mutex);
1982done_unlocked: 2020done_unlocked:
1983 con->ops->put(con); 2021 con->ops->put(con);
2022 return;
2023
2024fault:
2025 mutex_unlock(&con->mutex);
2026 ceph_fault(con); /* error/fault path */
2027 goto done_unlocked;
1984} 2028}
1985 2029
1986 2030
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6b5dda1cb5df..6ea2b892f44b 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -124,7 +124,7 @@ static void calc_layout(struct ceph_osd_client *osdc,
124 ceph_calc_raw_layout(osdc, layout, vino.snap, off, 124 ceph_calc_raw_layout(osdc, layout, vino.snap, off,
125 plen, &bno, req, op); 125 plen, &bno, req, op);
126 126
127 sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); 127 snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
128 req->r_oid_len = strlen(req->r_oid); 128 req->r_oid_len = strlen(req->r_oid);
129} 129}
130 130
@@ -1421,6 +1421,15 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1421done: 1421done:
1422 downgrade_write(&osdc->map_sem); 1422 downgrade_write(&osdc->map_sem);
1423 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); 1423 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
1424
1425 /*
1426 * subscribe to subsequent osdmap updates if full to ensure
1427 * we find out when we are no longer full and stop returning
1428 * ENOSPC.
1429 */
1430 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
1431 ceph_monc_request_next_osdmap(&osdc->client->monc);
1432
1424 send_queued(osdc); 1433 send_queued(osdc);
1425 up_read(&osdc->map_sem); 1434 up_read(&osdc->map_sem);
1426 wake_up_all(&osdc->client->auth_wq); 1435 wake_up_all(&osdc->client->auth_wq);
@@ -1677,8 +1686,14 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
1677 */ 1686 */
1678 if (req->r_sent == 0) { 1687 if (req->r_sent == 0) {
1679 rc = __map_request(osdc, req); 1688 rc = __map_request(osdc, req);
1680 if (rc < 0) 1689 if (rc < 0) {
1690 if (nofail) {
1691 dout("osdc_start_request failed map, "
1692 " will retry %lld\n", req->r_tid);
1693 rc = 0;
1694 }
1681 goto out_unlock; 1695 goto out_unlock;
1696 }
1682 if (req->r_osd == NULL) { 1697 if (req->r_osd == NULL) {
1683 dout("send_request %p no up osds in pg\n", req); 1698 dout("send_request %p no up osds in pg\n", req);
1684 ceph_monc_request_next_osdmap(&osdc->client->monc); 1699 ceph_monc_request_next_osdmap(&osdc->client->monc);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 71603ac3dff5..e97c3588c3ec 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -765,7 +765,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
765 } 765 }
766 766
767 map->epoch++; 767 map->epoch++;
768 map->modified = map->modified; 768 map->modified = modified;
769 if (newcrush) { 769 if (newcrush) {
770 if (map->crush) 770 if (map->crush)
771 crush_destroy(map->crush); 771 crush_destroy(map->crush);
@@ -830,15 +830,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
830 map->osd_addr[osd] = addr; 830 map->osd_addr[osd] = addr;
831 } 831 }
832 832
833 /* new_down */ 833 /* new_state */
834 ceph_decode_32_safe(p, end, len, bad); 834 ceph_decode_32_safe(p, end, len, bad);
835 while (len--) { 835 while (len--) {
836 u32 osd; 836 u32 osd;
837 u8 xorstate;
837 ceph_decode_32_safe(p, end, osd, bad); 838 ceph_decode_32_safe(p, end, osd, bad);
839 xorstate = **(u8 **)p;
838 (*p)++; /* clean flag */ 840 (*p)++; /* clean flag */
839 pr_info("osd%d down\n", osd); 841 if (xorstate == 0)
842 xorstate = CEPH_OSD_UP;
843 if (xorstate & CEPH_OSD_UP)
844 pr_info("osd%d down\n", osd);
840 if (osd < map->max_osd) 845 if (osd < map->max_osd)
841 map->osd_state[osd] &= ~CEPH_OSD_UP; 846 map->osd_state[osd] ^= xorstate;
842 } 847 }
843 848
844 /* new_weight */ 849 /* new_weight */