diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-25 14:46:31 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-25 14:46:31 -0400 |
commit | 57bb55957432f20fd6e5bb5ddfbd9987439157ec (patch) | |
tree | de4adeffd13a5394b84f04c6f60582b63685adc9 | |
parent | 2a651c7f8d377cf88271374315cbb5fe82eac784 (diff) | |
parent | db3540522e955c1ebb391f4f5324dff4f20ecd09 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (23 commits)
ceph: fix cap flush race reentrancy
libceph: subscribe to osdmap when cluster is full
libceph: handle new osdmap down/state change encoding
rbd: handle online resize of underlying rbd image
ceph: avoid inode lookup on nfs fh reconnect
ceph: use LOOKUPINO to make unconnected nfs fh more reliable
rbd: use snprintf for disk->disk_name
rbd: cleanup: make kfree match kmalloc
rbd: warn on update_snaps failure on notify
ceph: check return value for start_request in writepages
ceph: remove useless check
libceph: add missing breaks in addr_set_port
libceph: fix TAG_WAIT case
ceph: fix broken comparison in readdir loop
libceph: fix osdmap timestamp assignment
ceph: fix rare potential cap leak
libceph: use snprintf for unknown addrs
libceph: use snprintf for formatting object name
ceph: use snprintf for dirstat content
libceph: fix uninitialized value when no get_authorizer method is set
...
-rw-r--r-- | drivers/block/rbd.c | 27 | ||||
-rw-r--r-- | fs/ceph/addr.c | 5 | ||||
-rw-r--r-- | fs/ceph/caps.c | 61 | ||||
-rw-r--r-- | fs/ceph/dir.c | 7 | ||||
-rw-r--r-- | fs/ceph/export.c | 25 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 7 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 1 | ||||
-rw-r--r-- | include/linux/ceph/ceph_fs.h | 1 | ||||
-rw-r--r-- | net/ceph/messenger.c | 82 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 19 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 13 |
11 files changed, 173 insertions, 75 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 9712fad82bc6..1278098624e6 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -1191,14 +1191,19 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev, | |||
1191 | static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | 1191 | static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) |
1192 | { | 1192 | { |
1193 | struct rbd_device *dev = (struct rbd_device *)data; | 1193 | struct rbd_device *dev = (struct rbd_device *)data; |
1194 | int rc; | ||
1195 | |||
1194 | if (!dev) | 1196 | if (!dev) |
1195 | return; | 1197 | return; |
1196 | 1198 | ||
1197 | dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name, | 1199 | dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name, |
1198 | notify_id, (int)opcode); | 1200 | notify_id, (int)opcode); |
1199 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 1201 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); |
1200 | __rbd_update_snaps(dev); | 1202 | rc = __rbd_update_snaps(dev); |
1201 | mutex_unlock(&ctl_mutex); | 1203 | mutex_unlock(&ctl_mutex); |
1204 | if (rc) | ||
1205 | pr_warning(DRV_NAME "%d got notification but failed to update" | ||
1206 | " snaps: %d\n", dev->major, rc); | ||
1202 | 1207 | ||
1203 | rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name); | 1208 | rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name); |
1204 | } | 1209 | } |
@@ -1597,7 +1602,7 @@ static int rbd_header_add_snap(struct rbd_device *dev, | |||
1597 | int name_len = strlen(snap_name); | 1602 | int name_len = strlen(snap_name); |
1598 | u64 new_snapid; | 1603 | u64 new_snapid; |
1599 | int ret; | 1604 | int ret; |
1600 | void *data, *data_start, *data_end; | 1605 | void *data, *p, *e; |
1601 | u64 ver; | 1606 | u64 ver; |
1602 | 1607 | ||
1603 | /* we should create a snapshot only if we're pointing at the head */ | 1608 | /* we should create a snapshot only if we're pointing at the head */ |
@@ -1614,16 +1619,16 @@ static int rbd_header_add_snap(struct rbd_device *dev, | |||
1614 | if (!data) | 1619 | if (!data) |
1615 | return -ENOMEM; | 1620 | return -ENOMEM; |
1616 | 1621 | ||
1617 | data_start = data; | 1622 | p = data; |
1618 | data_end = data + name_len + 16; | 1623 | e = data + name_len + 16; |
1619 | 1624 | ||
1620 | ceph_encode_string_safe(&data, data_end, snap_name, name_len, bad); | 1625 | ceph_encode_string_safe(&p, e, snap_name, name_len, bad); |
1621 | ceph_encode_64_safe(&data, data_end, new_snapid, bad); | 1626 | ceph_encode_64_safe(&p, e, new_snapid, bad); |
1622 | 1627 | ||
1623 | ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add", | 1628 | ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add", |
1624 | data_start, data - data_start, &ver); | 1629 | data, p - data, &ver); |
1625 | 1630 | ||
1626 | kfree(data_start); | 1631 | kfree(data); |
1627 | 1632 | ||
1628 | if (ret < 0) | 1633 | if (ret < 0) |
1629 | return ret; | 1634 | return ret; |
@@ -1659,6 +1664,9 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev) | |||
1659 | if (ret < 0) | 1664 | if (ret < 0) |
1660 | return ret; | 1665 | return ret; |
1661 | 1666 | ||
1667 | /* resized? */ | ||
1668 | set_capacity(rbd_dev->disk, h.image_size / 512ULL); | ||
1669 | |||
1662 | down_write(&rbd_dev->header.snap_rwsem); | 1670 | down_write(&rbd_dev->header.snap_rwsem); |
1663 | 1671 | ||
1664 | snap_seq = rbd_dev->header.snapc->seq; | 1672 | snap_seq = rbd_dev->header.snapc->seq; |
@@ -1716,7 +1724,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
1716 | if (!disk) | 1724 | if (!disk) |
1717 | goto out; | 1725 | goto out; |
1718 | 1726 | ||
1719 | sprintf(disk->disk_name, DRV_NAME "%d", rbd_dev->id); | 1727 | snprintf(disk->disk_name, sizeof(disk->disk_name), DRV_NAME "%d", |
1728 | rbd_dev->id); | ||
1720 | disk->major = rbd_dev->major; | 1729 | disk->major = rbd_dev->major; |
1721 | disk->first_minor = 0; | 1730 | disk->first_minor = 0; |
1722 | disk->fops = &rbd_bd_ops; | 1731 | disk->fops = &rbd_bd_ops; |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 38b8ab554924..33da49dc3cc6 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -848,7 +848,8 @@ get_more_pages: | |||
848 | op->payload_len = cpu_to_le32(len); | 848 | op->payload_len = cpu_to_le32(len); |
849 | req->r_request->hdr.data_len = cpu_to_le32(len); | 849 | req->r_request->hdr.data_len = cpu_to_le32(len); |
850 | 850 | ||
851 | ceph_osdc_start_request(&fsc->client->osdc, req, true); | 851 | rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); |
852 | BUG_ON(rc); | ||
852 | req = NULL; | 853 | req = NULL; |
853 | 854 | ||
854 | /* continue? */ | 855 | /* continue? */ |
@@ -880,8 +881,6 @@ release_pvec_pages: | |||
880 | out: | 881 | out: |
881 | if (req) | 882 | if (req) |
882 | ceph_osdc_put_request(req); | 883 | ceph_osdc_put_request(req); |
883 | if (rc > 0) | ||
884 | rc = 0; /* vfs expects us to return 0 */ | ||
885 | ceph_put_snap_context(snapc); | 884 | ceph_put_snap_context(snapc); |
886 | dout("writepages done, rc = %d\n", rc); | 885 | dout("writepages done, rc = %d\n", rc); |
887 | return rc; | 886 | return rc; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 2a5404c1c42f..1f72b00447c4 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -569,7 +569,8 @@ retry: | |||
569 | list_add_tail(&cap->session_caps, &session->s_caps); | 569 | list_add_tail(&cap->session_caps, &session->s_caps); |
570 | session->s_nr_caps++; | 570 | session->s_nr_caps++; |
571 | spin_unlock(&session->s_cap_lock); | 571 | spin_unlock(&session->s_cap_lock); |
572 | } | 572 | } else if (new_cap) |
573 | ceph_put_cap(mdsc, new_cap); | ||
573 | 574 | ||
574 | if (!ci->i_snap_realm) { | 575 | if (!ci->i_snap_realm) { |
575 | /* | 576 | /* |
@@ -2634,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2634 | struct ceph_mds_session *session, | 2635 | struct ceph_mds_session *session, |
2635 | int *open_target_sessions) | 2636 | int *open_target_sessions) |
2636 | { | 2637 | { |
2638 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | ||
2637 | struct ceph_inode_info *ci = ceph_inode(inode); | 2639 | struct ceph_inode_info *ci = ceph_inode(inode); |
2638 | int mds = session->s_mds; | 2640 | int mds = session->s_mds; |
2639 | unsigned mseq = le32_to_cpu(ex->migrate_seq); | 2641 | unsigned mseq = le32_to_cpu(ex->migrate_seq); |
@@ -2670,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2670 | * export targets, so that we get the matching IMPORT | 2672 | * export targets, so that we get the matching IMPORT |
2671 | */ | 2673 | */ |
2672 | *open_target_sessions = 1; | 2674 | *open_target_sessions = 1; |
2675 | |||
2676 | /* | ||
2677 | * we can't flush dirty caps that we've seen the | ||
2678 | * EXPORT but no IMPORT for | ||
2679 | */ | ||
2680 | spin_lock(&mdsc->cap_dirty_lock); | ||
2681 | if (!list_empty(&ci->i_dirty_item)) { | ||
2682 | dout(" moving %p to cap_dirty_migrating\n", | ||
2683 | inode); | ||
2684 | list_move(&ci->i_dirty_item, | ||
2685 | &mdsc->cap_dirty_migrating); | ||
2686 | } | ||
2687 | spin_unlock(&mdsc->cap_dirty_lock); | ||
2673 | } | 2688 | } |
2674 | __ceph_remove_cap(cap); | 2689 | __ceph_remove_cap(cap); |
2675 | } | 2690 | } |
@@ -2707,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, | |||
2707 | ci->i_cap_exporting_issued = 0; | 2722 | ci->i_cap_exporting_issued = 0; |
2708 | ci->i_cap_exporting_mseq = 0; | 2723 | ci->i_cap_exporting_mseq = 0; |
2709 | ci->i_cap_exporting_mds = -1; | 2724 | ci->i_cap_exporting_mds = -1; |
2725 | |||
2726 | spin_lock(&mdsc->cap_dirty_lock); | ||
2727 | if (!list_empty(&ci->i_dirty_item)) { | ||
2728 | dout(" moving %p back to cap_dirty\n", inode); | ||
2729 | list_move(&ci->i_dirty_item, &mdsc->cap_dirty); | ||
2730 | } | ||
2731 | spin_unlock(&mdsc->cap_dirty_lock); | ||
2710 | } else { | 2732 | } else { |
2711 | dout("handle_cap_import inode %p ci %p mds%d mseq %d\n", | 2733 | dout("handle_cap_import inode %p ci %p mds%d mseq %d\n", |
2712 | inode, ci, mds, mseq); | 2734 | inode, ci, mds, mseq); |
@@ -2910,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) | |||
2910 | */ | 2932 | */ |
2911 | void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) | 2933 | void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) |
2912 | { | 2934 | { |
2913 | struct ceph_inode_info *ci, *nci = NULL; | 2935 | struct ceph_inode_info *ci; |
2914 | struct inode *inode, *ninode = NULL; | 2936 | struct inode *inode; |
2915 | struct list_head *p, *n; | ||
2916 | 2937 | ||
2917 | dout("flush_dirty_caps\n"); | 2938 | dout("flush_dirty_caps\n"); |
2918 | spin_lock(&mdsc->cap_dirty_lock); | 2939 | spin_lock(&mdsc->cap_dirty_lock); |
2919 | list_for_each_safe(p, n, &mdsc->cap_dirty) { | 2940 | while (!list_empty(&mdsc->cap_dirty)) { |
2920 | if (nci) { | 2941 | ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info, |
2921 | ci = nci; | 2942 | i_dirty_item); |
2922 | inode = ninode; | 2943 | inode = igrab(&ci->vfs_inode); |
2923 | ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; | 2944 | dout("flush_dirty_caps %p\n", inode); |
2924 | dout("flush_dirty_caps inode %p (was next inode)\n", | ||
2925 | inode); | ||
2926 | } else { | ||
2927 | ci = list_entry(p, struct ceph_inode_info, | ||
2928 | i_dirty_item); | ||
2929 | inode = igrab(&ci->vfs_inode); | ||
2930 | BUG_ON(!inode); | ||
2931 | dout("flush_dirty_caps inode %p\n", inode); | ||
2932 | } | ||
2933 | if (n != &mdsc->cap_dirty) { | ||
2934 | nci = list_entry(n, struct ceph_inode_info, | ||
2935 | i_dirty_item); | ||
2936 | ninode = igrab(&nci->vfs_inode); | ||
2937 | BUG_ON(!ninode); | ||
2938 | nci->i_ceph_flags |= CEPH_I_NOFLUSH; | ||
2939 | dout("flush_dirty_caps next inode %p, noflush\n", | ||
2940 | ninode); | ||
2941 | } else { | ||
2942 | nci = NULL; | ||
2943 | ninode = NULL; | ||
2944 | } | ||
2945 | spin_unlock(&mdsc->cap_dirty_lock); | 2945 | spin_unlock(&mdsc->cap_dirty_lock); |
2946 | if (inode) { | 2946 | if (inode) { |
2947 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, | 2947 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, |
@@ -2951,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) | |||
2951 | spin_lock(&mdsc->cap_dirty_lock); | 2951 | spin_lock(&mdsc->cap_dirty_lock); |
2952 | } | 2952 | } |
2953 | spin_unlock(&mdsc->cap_dirty_lock); | 2953 | spin_unlock(&mdsc->cap_dirty_lock); |
2954 | dout("flush_dirty_caps done\n"); | ||
2954 | } | 2955 | } |
2955 | 2956 | ||
2956 | /* | 2957 | /* |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 1a867a3601ae..33729e822bb9 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -360,7 +360,7 @@ more: | |||
360 | rinfo = &fi->last_readdir->r_reply_info; | 360 | rinfo = &fi->last_readdir->r_reply_info; |
361 | dout("readdir frag %x num %d off %d chunkoff %d\n", frag, | 361 | dout("readdir frag %x num %d off %d chunkoff %d\n", frag, |
362 | rinfo->dir_nr, off, fi->offset); | 362 | rinfo->dir_nr, off, fi->offset); |
363 | while (off - fi->offset >= 0 && off - fi->offset < rinfo->dir_nr) { | 363 | while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) { |
364 | u64 pos = ceph_make_fpos(frag, off); | 364 | u64 pos = ceph_make_fpos(frag, off); |
365 | struct ceph_mds_reply_inode *in = | 365 | struct ceph_mds_reply_inode *in = |
366 | rinfo->dir_in[off - fi->offset].in; | 366 | rinfo->dir_in[off - fi->offset].in; |
@@ -1066,16 +1066,17 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, | |||
1066 | struct inode *inode = file->f_dentry->d_inode; | 1066 | struct inode *inode = file->f_dentry->d_inode; |
1067 | struct ceph_inode_info *ci = ceph_inode(inode); | 1067 | struct ceph_inode_info *ci = ceph_inode(inode); |
1068 | int left; | 1068 | int left; |
1069 | const int bufsize = 1024; | ||
1069 | 1070 | ||
1070 | if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) | 1071 | if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) |
1071 | return -EISDIR; | 1072 | return -EISDIR; |
1072 | 1073 | ||
1073 | if (!cf->dir_info) { | 1074 | if (!cf->dir_info) { |
1074 | cf->dir_info = kmalloc(1024, GFP_NOFS); | 1075 | cf->dir_info = kmalloc(bufsize, GFP_NOFS); |
1075 | if (!cf->dir_info) | 1076 | if (!cf->dir_info) |
1076 | return -ENOMEM; | 1077 | return -ENOMEM; |
1077 | cf->dir_info_len = | 1078 | cf->dir_info_len = |
1078 | sprintf(cf->dir_info, | 1079 | snprintf(cf->dir_info, bufsize, |
1079 | "entries: %20lld\n" | 1080 | "entries: %20lld\n" |
1080 | " files: %20lld\n" | 1081 | " files: %20lld\n" |
1081 | " subdirs: %20lld\n" | 1082 | " subdirs: %20lld\n" |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index e41056174bf8..a610d3d67488 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, | |||
86 | static struct dentry *__fh_to_dentry(struct super_block *sb, | 86 | static struct dentry *__fh_to_dentry(struct super_block *sb, |
87 | struct ceph_nfs_fh *fh) | 87 | struct ceph_nfs_fh *fh) |
88 | { | 88 | { |
89 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; | ||
89 | struct inode *inode; | 90 | struct inode *inode; |
90 | struct dentry *dentry; | 91 | struct dentry *dentry; |
91 | struct ceph_vino vino; | 92 | struct ceph_vino vino; |
@@ -95,8 +96,24 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
95 | vino.ino = fh->ino; | 96 | vino.ino = fh->ino; |
96 | vino.snap = CEPH_NOSNAP; | 97 | vino.snap = CEPH_NOSNAP; |
97 | inode = ceph_find_inode(sb, vino); | 98 | inode = ceph_find_inode(sb, vino); |
98 | if (!inode) | 99 | if (!inode) { |
99 | return ERR_PTR(-ESTALE); | 100 | struct ceph_mds_request *req; |
101 | |||
102 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO, | ||
103 | USE_ANY_MDS); | ||
104 | if (IS_ERR(req)) | ||
105 | return ERR_CAST(req); | ||
106 | |||
107 | req->r_ino1 = vino; | ||
108 | req->r_num_caps = 1; | ||
109 | err = ceph_mdsc_do_request(mdsc, NULL, req); | ||
110 | inode = req->r_target_inode; | ||
111 | if (inode) | ||
112 | igrab(inode); | ||
113 | ceph_mdsc_put_request(req); | ||
114 | if (!inode) | ||
115 | return ERR_PTR(-ESTALE); | ||
116 | } | ||
100 | 117 | ||
101 | dentry = d_obtain_alias(inode); | 118 | dentry = d_obtain_alias(inode); |
102 | if (IS_ERR(dentry)) { | 119 | if (IS_ERR(dentry)) { |
@@ -148,8 +165,10 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, | |||
148 | snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); | 165 | snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); |
149 | req->r_num_caps = 1; | 166 | req->r_num_caps = 1; |
150 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 167 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
168 | inode = req->r_target_inode; | ||
169 | if (inode) | ||
170 | igrab(inode); | ||
151 | ceph_mdsc_put_request(req); | 171 | ceph_mdsc_put_request(req); |
152 | inode = ceph_find_inode(sb, vino); | ||
153 | if (!inode) | 172 | if (!inode) |
154 | return ERR_PTR(err ? err : -ESTALE); | 173 | return ERR_PTR(err ? err : -ESTALE); |
155 | } | 174 | } |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d0fae4ce9ba5..79743d146be6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -578,6 +578,7 @@ static void __register_request(struct ceph_mds_client *mdsc, | |||
578 | if (dir) { | 578 | if (dir) { |
579 | struct ceph_inode_info *ci = ceph_inode(dir); | 579 | struct ceph_inode_info *ci = ceph_inode(dir); |
580 | 580 | ||
581 | ihold(dir); | ||
581 | spin_lock(&ci->i_unsafe_lock); | 582 | spin_lock(&ci->i_unsafe_lock); |
582 | req->r_unsafe_dir = dir; | 583 | req->r_unsafe_dir = dir; |
583 | list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); | 584 | list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); |
@@ -598,6 +599,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
598 | spin_lock(&ci->i_unsafe_lock); | 599 | spin_lock(&ci->i_unsafe_lock); |
599 | list_del_init(&req->r_unsafe_dir_item); | 600 | list_del_init(&req->r_unsafe_dir_item); |
600 | spin_unlock(&ci->i_unsafe_lock); | 601 | spin_unlock(&ci->i_unsafe_lock); |
602 | |||
603 | iput(req->r_unsafe_dir); | ||
604 | req->r_unsafe_dir = NULL; | ||
601 | } | 605 | } |
602 | 606 | ||
603 | ceph_mdsc_put_request(req); | 607 | ceph_mdsc_put_request(req); |
@@ -2691,7 +2695,6 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
2691 | { | 2695 | { |
2692 | struct super_block *sb = mdsc->fsc->sb; | 2696 | struct super_block *sb = mdsc->fsc->sb; |
2693 | struct inode *inode; | 2697 | struct inode *inode; |
2694 | struct ceph_inode_info *ci; | ||
2695 | struct dentry *parent, *dentry; | 2698 | struct dentry *parent, *dentry; |
2696 | struct ceph_dentry_info *di; | 2699 | struct ceph_dentry_info *di; |
2697 | int mds = session->s_mds; | 2700 | int mds = session->s_mds; |
@@ -2728,7 +2731,6 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
2728 | dout("handle_lease no inode %llx\n", vino.ino); | 2731 | dout("handle_lease no inode %llx\n", vino.ino); |
2729 | goto release; | 2732 | goto release; |
2730 | } | 2733 | } |
2731 | ci = ceph_inode(inode); | ||
2732 | 2734 | ||
2733 | /* dentry */ | 2735 | /* dentry */ |
2734 | parent = d_find_alias(inode); | 2736 | parent = d_find_alias(inode); |
@@ -3002,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) | |||
3002 | spin_lock_init(&mdsc->snap_flush_lock); | 3004 | spin_lock_init(&mdsc->snap_flush_lock); |
3003 | mdsc->cap_flush_seq = 0; | 3005 | mdsc->cap_flush_seq = 0; |
3004 | INIT_LIST_HEAD(&mdsc->cap_dirty); | 3006 | INIT_LIST_HEAD(&mdsc->cap_dirty); |
3007 | INIT_LIST_HEAD(&mdsc->cap_dirty_migrating); | ||
3005 | mdsc->num_cap_flushing = 0; | 3008 | mdsc->num_cap_flushing = 0; |
3006 | spin_lock_init(&mdsc->cap_dirty_lock); | 3009 | spin_lock_init(&mdsc->cap_dirty_lock); |
3007 | init_waitqueue_head(&mdsc->cap_flushing_wq); | 3010 | init_waitqueue_head(&mdsc->cap_flushing_wq); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 4e3a9cc0bba6..7d8a0d662d56 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -278,6 +278,7 @@ struct ceph_mds_client { | |||
278 | 278 | ||
279 | u64 cap_flush_seq; | 279 | u64 cap_flush_seq; |
280 | struct list_head cap_dirty; /* inodes with dirty caps */ | 280 | struct list_head cap_dirty; /* inodes with dirty caps */ |
281 | struct list_head cap_dirty_migrating; /* ...that are migration... */ | ||
281 | int num_cap_flushing; /* # caps we are flushing */ | 282 | int num_cap_flushing; /* # caps we are flushing */ |
282 | spinlock_t cap_dirty_lock; /* protects above items */ | 283 | spinlock_t cap_dirty_lock; /* protects above items */ |
283 | wait_queue_head_t cap_flushing_wq; | 284 | wait_queue_head_t cap_flushing_wq; |
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index b8e995fbd867..b8c60694b2b0 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h | |||
@@ -313,6 +313,7 @@ enum { | |||
313 | CEPH_MDS_OP_GETATTR = 0x00101, | 313 | CEPH_MDS_OP_GETATTR = 0x00101, |
314 | CEPH_MDS_OP_LOOKUPHASH = 0x00102, | 314 | CEPH_MDS_OP_LOOKUPHASH = 0x00102, |
315 | CEPH_MDS_OP_LOOKUPPARENT = 0x00103, | 315 | CEPH_MDS_OP_LOOKUPPARENT = 0x00103, |
316 | CEPH_MDS_OP_LOOKUPINO = 0x00104, | ||
316 | 317 | ||
317 | CEPH_MDS_OP_SETXATTR = 0x01105, | 318 | CEPH_MDS_OP_SETXATTR = 0x01105, |
318 | CEPH_MDS_OP_RMXATTR = 0x01106, | 319 | CEPH_MDS_OP_RMXATTR = 0x01106, |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index e15a82ccc05f..78b55f49de7c 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -76,7 +76,8 @@ const char *ceph_pr_addr(const struct sockaddr_storage *ss) | |||
76 | break; | 76 | break; |
77 | 77 | ||
78 | default: | 78 | default: |
79 | sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family); | 79 | snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %d)", |
80 | (int)ss->ss_family); | ||
80 | } | 81 | } |
81 | 82 | ||
82 | return s; | 83 | return s; |
@@ -598,7 +599,7 @@ static void prepare_write_keepalive(struct ceph_connection *con) | |||
598 | * Connection negotiation. | 599 | * Connection negotiation. |
599 | */ | 600 | */ |
600 | 601 | ||
601 | static void prepare_connect_authorizer(struct ceph_connection *con) | 602 | static int prepare_connect_authorizer(struct ceph_connection *con) |
602 | { | 603 | { |
603 | void *auth_buf; | 604 | void *auth_buf; |
604 | int auth_len = 0; | 605 | int auth_len = 0; |
@@ -612,13 +613,20 @@ static void prepare_connect_authorizer(struct ceph_connection *con) | |||
612 | con->auth_retry); | 613 | con->auth_retry); |
613 | mutex_lock(&con->mutex); | 614 | mutex_lock(&con->mutex); |
614 | 615 | ||
616 | if (test_bit(CLOSED, &con->state) || | ||
617 | test_bit(OPENING, &con->state)) | ||
618 | return -EAGAIN; | ||
619 | |||
615 | con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); | 620 | con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); |
616 | con->out_connect.authorizer_len = cpu_to_le32(auth_len); | 621 | con->out_connect.authorizer_len = cpu_to_le32(auth_len); |
617 | 622 | ||
618 | con->out_kvec[con->out_kvec_left].iov_base = auth_buf; | 623 | if (auth_len) { |
619 | con->out_kvec[con->out_kvec_left].iov_len = auth_len; | 624 | con->out_kvec[con->out_kvec_left].iov_base = auth_buf; |
620 | con->out_kvec_left++; | 625 | con->out_kvec[con->out_kvec_left].iov_len = auth_len; |
621 | con->out_kvec_bytes += auth_len; | 626 | con->out_kvec_left++; |
627 | con->out_kvec_bytes += auth_len; | ||
628 | } | ||
629 | return 0; | ||
622 | } | 630 | } |
623 | 631 | ||
624 | /* | 632 | /* |
@@ -640,9 +648,9 @@ static void prepare_write_banner(struct ceph_messenger *msgr, | |||
640 | set_bit(WRITE_PENDING, &con->state); | 648 | set_bit(WRITE_PENDING, &con->state); |
641 | } | 649 | } |
642 | 650 | ||
643 | static void prepare_write_connect(struct ceph_messenger *msgr, | 651 | static int prepare_write_connect(struct ceph_messenger *msgr, |
644 | struct ceph_connection *con, | 652 | struct ceph_connection *con, |
645 | int after_banner) | 653 | int after_banner) |
646 | { | 654 | { |
647 | unsigned global_seq = get_global_seq(con->msgr, 0); | 655 | unsigned global_seq = get_global_seq(con->msgr, 0); |
648 | int proto; | 656 | int proto; |
@@ -683,7 +691,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||
683 | con->out_more = 0; | 691 | con->out_more = 0; |
684 | set_bit(WRITE_PENDING, &con->state); | 692 | set_bit(WRITE_PENDING, &con->state); |
685 | 693 | ||
686 | prepare_connect_authorizer(con); | 694 | return prepare_connect_authorizer(con); |
687 | } | 695 | } |
688 | 696 | ||
689 | 697 | ||
@@ -1065,8 +1073,10 @@ static void addr_set_port(struct sockaddr_storage *ss, int p) | |||
1065 | switch (ss->ss_family) { | 1073 | switch (ss->ss_family) { |
1066 | case AF_INET: | 1074 | case AF_INET: |
1067 | ((struct sockaddr_in *)ss)->sin_port = htons(p); | 1075 | ((struct sockaddr_in *)ss)->sin_port = htons(p); |
1076 | break; | ||
1068 | case AF_INET6: | 1077 | case AF_INET6: |
1069 | ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); | 1078 | ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); |
1079 | break; | ||
1070 | } | 1080 | } |
1071 | } | 1081 | } |
1072 | 1082 | ||
@@ -1216,6 +1226,7 @@ static int process_connect(struct ceph_connection *con) | |||
1216 | u64 sup_feat = con->msgr->supported_features; | 1226 | u64 sup_feat = con->msgr->supported_features; |
1217 | u64 req_feat = con->msgr->required_features; | 1227 | u64 req_feat = con->msgr->required_features; |
1218 | u64 server_feat = le64_to_cpu(con->in_reply.features); | 1228 | u64 server_feat = le64_to_cpu(con->in_reply.features); |
1229 | int ret; | ||
1219 | 1230 | ||
1220 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 1231 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); |
1221 | 1232 | ||
@@ -1250,7 +1261,9 @@ static int process_connect(struct ceph_connection *con) | |||
1250 | return -1; | 1261 | return -1; |
1251 | } | 1262 | } |
1252 | con->auth_retry = 1; | 1263 | con->auth_retry = 1; |
1253 | prepare_write_connect(con->msgr, con, 0); | 1264 | ret = prepare_write_connect(con->msgr, con, 0); |
1265 | if (ret < 0) | ||
1266 | return ret; | ||
1254 | prepare_read_connect(con); | 1267 | prepare_read_connect(con); |
1255 | break; | 1268 | break; |
1256 | 1269 | ||
@@ -1277,6 +1290,9 @@ static int process_connect(struct ceph_connection *con) | |||
1277 | if (con->ops->peer_reset) | 1290 | if (con->ops->peer_reset) |
1278 | con->ops->peer_reset(con); | 1291 | con->ops->peer_reset(con); |
1279 | mutex_lock(&con->mutex); | 1292 | mutex_lock(&con->mutex); |
1293 | if (test_bit(CLOSED, &con->state) || | ||
1294 | test_bit(OPENING, &con->state)) | ||
1295 | return -EAGAIN; | ||
1280 | break; | 1296 | break; |
1281 | 1297 | ||
1282 | case CEPH_MSGR_TAG_RETRY_SESSION: | 1298 | case CEPH_MSGR_TAG_RETRY_SESSION: |
@@ -1341,7 +1357,9 @@ static int process_connect(struct ceph_connection *con) | |||
1341 | * to WAIT. This shouldn't happen if we are the | 1357 | * to WAIT. This shouldn't happen if we are the |
1342 | * client. | 1358 | * client. |
1343 | */ | 1359 | */ |
1344 | pr_err("process_connect peer connecting WAIT\n"); | 1360 | pr_err("process_connect got WAIT as client\n"); |
1361 | con->error_msg = "protocol error, got WAIT as client"; | ||
1362 | return -1; | ||
1345 | 1363 | ||
1346 | default: | 1364 | default: |
1347 | pr_err("connect protocol error, will retry\n"); | 1365 | pr_err("connect protocol error, will retry\n"); |
@@ -1810,6 +1828,17 @@ static int try_read(struct ceph_connection *con) | |||
1810 | more: | 1828 | more: |
1811 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, | 1829 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, |
1812 | con->in_base_pos); | 1830 | con->in_base_pos); |
1831 | |||
1832 | /* | ||
1833 | * process_connect and process_message drop and re-take | ||
1834 | * con->mutex. make sure we handle a racing close or reopen. | ||
1835 | */ | ||
1836 | if (test_bit(CLOSED, &con->state) || | ||
1837 | test_bit(OPENING, &con->state)) { | ||
1838 | ret = -EAGAIN; | ||
1839 | goto out; | ||
1840 | } | ||
1841 | |||
1813 | if (test_bit(CONNECTING, &con->state)) { | 1842 | if (test_bit(CONNECTING, &con->state)) { |
1814 | if (!test_bit(NEGOTIATING, &con->state)) { | 1843 | if (!test_bit(NEGOTIATING, &con->state)) { |
1815 | dout("try_read connecting\n"); | 1844 | dout("try_read connecting\n"); |
@@ -1938,8 +1967,10 @@ static void con_work(struct work_struct *work) | |||
1938 | { | 1967 | { |
1939 | struct ceph_connection *con = container_of(work, struct ceph_connection, | 1968 | struct ceph_connection *con = container_of(work, struct ceph_connection, |
1940 | work.work); | 1969 | work.work); |
1970 | int ret; | ||
1941 | 1971 | ||
1942 | mutex_lock(&con->mutex); | 1972 | mutex_lock(&con->mutex); |
1973 | restart: | ||
1943 | if (test_and_clear_bit(BACKOFF, &con->state)) { | 1974 | if (test_and_clear_bit(BACKOFF, &con->state)) { |
1944 | dout("con_work %p backing off\n", con); | 1975 | dout("con_work %p backing off\n", con); |
1945 | if (queue_delayed_work(ceph_msgr_wq, &con->work, | 1976 | if (queue_delayed_work(ceph_msgr_wq, &con->work, |
@@ -1969,18 +2000,31 @@ static void con_work(struct work_struct *work) | |||
1969 | con_close_socket(con); | 2000 | con_close_socket(con); |
1970 | } | 2001 | } |
1971 | 2002 | ||
1972 | if (test_and_clear_bit(SOCK_CLOSED, &con->state) || | 2003 | if (test_and_clear_bit(SOCK_CLOSED, &con->state)) |
1973 | try_read(con) < 0 || | 2004 | goto fault; |
1974 | try_write(con) < 0) { | 2005 | |
1975 | mutex_unlock(&con->mutex); | 2006 | ret = try_read(con); |
1976 | ceph_fault(con); /* error/fault path */ | 2007 | if (ret == -EAGAIN) |
1977 | goto done_unlocked; | 2008 | goto restart; |
1978 | } | 2009 | if (ret < 0) |
2010 | goto fault; | ||
2011 | |||
2012 | ret = try_write(con); | ||
2013 | if (ret == -EAGAIN) | ||
2014 | goto restart; | ||
2015 | if (ret < 0) | ||
2016 | goto fault; | ||
1979 | 2017 | ||
1980 | done: | 2018 | done: |
1981 | mutex_unlock(&con->mutex); | 2019 | mutex_unlock(&con->mutex); |
1982 | done_unlocked: | 2020 | done_unlocked: |
1983 | con->ops->put(con); | 2021 | con->ops->put(con); |
2022 | return; | ||
2023 | |||
2024 | fault: | ||
2025 | mutex_unlock(&con->mutex); | ||
2026 | ceph_fault(con); /* error/fault path */ | ||
2027 | goto done_unlocked; | ||
1984 | } | 2028 | } |
1985 | 2029 | ||
1986 | 2030 | ||
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6b5dda1cb5df..6ea2b892f44b 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -124,7 +124,7 @@ static void calc_layout(struct ceph_osd_client *osdc, | |||
124 | ceph_calc_raw_layout(osdc, layout, vino.snap, off, | 124 | ceph_calc_raw_layout(osdc, layout, vino.snap, off, |
125 | plen, &bno, req, op); | 125 | plen, &bno, req, op); |
126 | 126 | ||
127 | sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); | 127 | snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); |
128 | req->r_oid_len = strlen(req->r_oid); | 128 | req->r_oid_len = strlen(req->r_oid); |
129 | } | 129 | } |
130 | 130 | ||
@@ -1421,6 +1421,15 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) | |||
1421 | done: | 1421 | done: |
1422 | downgrade_write(&osdc->map_sem); | 1422 | downgrade_write(&osdc->map_sem); |
1423 | ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); | 1423 | ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); |
1424 | |||
1425 | /* | ||
1426 | * subscribe to subsequent osdmap updates if full to ensure | ||
1427 | * we find out when we are no longer full and stop returning | ||
1428 | * ENOSPC. | ||
1429 | */ | ||
1430 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) | ||
1431 | ceph_monc_request_next_osdmap(&osdc->client->monc); | ||
1432 | |||
1424 | send_queued(osdc); | 1433 | send_queued(osdc); |
1425 | up_read(&osdc->map_sem); | 1434 | up_read(&osdc->map_sem); |
1426 | wake_up_all(&osdc->client->auth_wq); | 1435 | wake_up_all(&osdc->client->auth_wq); |
@@ -1677,8 +1686,14 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, | |||
1677 | */ | 1686 | */ |
1678 | if (req->r_sent == 0) { | 1687 | if (req->r_sent == 0) { |
1679 | rc = __map_request(osdc, req); | 1688 | rc = __map_request(osdc, req); |
1680 | if (rc < 0) | 1689 | if (rc < 0) { |
1690 | if (nofail) { | ||
1691 | dout("osdc_start_request failed map, " | ||
1692 | " will retry %lld\n", req->r_tid); | ||
1693 | rc = 0; | ||
1694 | } | ||
1681 | goto out_unlock; | 1695 | goto out_unlock; |
1696 | } | ||
1682 | if (req->r_osd == NULL) { | 1697 | if (req->r_osd == NULL) { |
1683 | dout("send_request %p no up osds in pg\n", req); | 1698 | dout("send_request %p no up osds in pg\n", req); |
1684 | ceph_monc_request_next_osdmap(&osdc->client->monc); | 1699 | ceph_monc_request_next_osdmap(&osdc->client->monc); |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 71603ac3dff5..e97c3588c3ec 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -765,7 +765,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
765 | } | 765 | } |
766 | 766 | ||
767 | map->epoch++; | 767 | map->epoch++; |
768 | map->modified = map->modified; | 768 | map->modified = modified; |
769 | if (newcrush) { | 769 | if (newcrush) { |
770 | if (map->crush) | 770 | if (map->crush) |
771 | crush_destroy(map->crush); | 771 | crush_destroy(map->crush); |
@@ -830,15 +830,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
830 | map->osd_addr[osd] = addr; | 830 | map->osd_addr[osd] = addr; |
831 | } | 831 | } |
832 | 832 | ||
833 | /* new_down */ | 833 | /* new_state */ |
834 | ceph_decode_32_safe(p, end, len, bad); | 834 | ceph_decode_32_safe(p, end, len, bad); |
835 | while (len--) { | 835 | while (len--) { |
836 | u32 osd; | 836 | u32 osd; |
837 | u8 xorstate; | ||
837 | ceph_decode_32_safe(p, end, osd, bad); | 838 | ceph_decode_32_safe(p, end, osd, bad); |
839 | xorstate = **(u8 **)p; | ||
838 | (*p)++; /* clean flag */ | 840 | (*p)++; /* clean flag */ |
839 | pr_info("osd%d down\n", osd); | 841 | if (xorstate == 0) |
842 | xorstate = CEPH_OSD_UP; | ||
843 | if (xorstate & CEPH_OSD_UP) | ||
844 | pr_info("osd%d down\n", osd); | ||
840 | if (osd < map->max_osd) | 845 | if (osd < map->max_osd) |
841 | map->osd_state[osd] &= ~CEPH_OSD_UP; | 846 | map->osd_state[osd] ^= xorstate; |
842 | } | 847 | } |
843 | 848 | ||
844 | /* new_weight */ | 849 | /* new_weight */ |