aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2015-02-17 11:37:15 -0500
committerIlya Dryomov <idryomov@gmail.com>2015-02-19 06:27:50 -0500
commit7eb71e0351fbb1b242ae70abb7bb17107fe2f792 (patch)
treea5b06178c59e77df73f6c86dbf4b160f18dbf62c
parent7ad18afad02f9802f1eeade91cf880b97e7a9902 (diff)
libceph: fix double __remove_osd() problem
It turns out it's possible to get __remove_osd() called twice on the same OSD. That doesn't sit well with rb_erase() - depending on the shape of the tree we can get a NULL dereference, a soft lockup or a random crash at some point in the future as we end up touching freed memory. One scenario that I was able to reproduce is as follows: <osd3 is idle, on the osd lru list> <con reset - osd3> con_fault_finish() osd_reset() <osdmap - osd3 down> ceph_osdc_handle_map() <takes map_sem> kick_requests() <takes request_mutex> reset_changed_osds() __reset_osd() __remove_osd() <releases request_mutex> <releases map_sem> <takes map_sem> <takes request_mutex> __kick_osd_requests() __reset_osd() __remove_osd() <-- !!! A case can be made that osd refcounting is imperfect and reworking it would be a proper resolution, but for now Sage and I decided to fix this by adding a safe guard around __remove_osd(). Fixes: http://tracker.ceph.com/issues/8087 Cc: Sage Weil <sage@redhat.com> Cc: stable@vger.kernel.org # 3.9+: 7c6e6fc53e73: libceph: assert both regular and lingering lists in __remove_osd() Cc: stable@vger.kernel.org # 3.9+: cc9f1f518cec: libceph: change from BUG to WARN for __remove_osd() asserts Cc: stable@vger.kernel.org # 3.9+ Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Sage Weil <sage@redhat.com> Reviewed-by: Alex Elder <elder@linaro.org>
-rw-r--r--net/ceph/osd_client.c26
1 files changed, 18 insertions, 8 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 53299c7b0ca4..f693a2f8ac86 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1048,14 +1048,24 @@ static void put_osd(struct ceph_osd *osd)
1048 */ 1048 */
1049static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) 1049static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
1050{ 1050{
1051 dout("__remove_osd %p\n", osd); 1051 dout("%s %p osd%d\n", __func__, osd, osd->o_osd);
1052 WARN_ON(!list_empty(&osd->o_requests)); 1052 WARN_ON(!list_empty(&osd->o_requests));
1053 WARN_ON(!list_empty(&osd->o_linger_requests)); 1053 WARN_ON(!list_empty(&osd->o_linger_requests));
1054 1054
1055 rb_erase(&osd->o_node, &osdc->osds);
1056 list_del_init(&osd->o_osd_lru); 1055 list_del_init(&osd->o_osd_lru);
1057 ceph_con_close(&osd->o_con); 1056 rb_erase(&osd->o_node, &osdc->osds);
1058 put_osd(osd); 1057 RB_CLEAR_NODE(&osd->o_node);
1058}
1059
1060static void remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
1061{
1062 dout("%s %p osd%d\n", __func__, osd, osd->o_osd);
1063
1064 if (!RB_EMPTY_NODE(&osd->o_node)) {
1065 ceph_con_close(&osd->o_con);
1066 __remove_osd(osdc, osd);
1067 put_osd(osd);
1068 }
1059} 1069}
1060 1070
1061static void remove_all_osds(struct ceph_osd_client *osdc) 1071static void remove_all_osds(struct ceph_osd_client *osdc)
@@ -1065,7 +1075,7 @@ static void remove_all_osds(struct ceph_osd_client *osdc)
1065 while (!RB_EMPTY_ROOT(&osdc->osds)) { 1075 while (!RB_EMPTY_ROOT(&osdc->osds)) {
1066 struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), 1076 struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
1067 struct ceph_osd, o_node); 1077 struct ceph_osd, o_node);
1068 __remove_osd(osdc, osd); 1078 remove_osd(osdc, osd);
1069 } 1079 }
1070 mutex_unlock(&osdc->request_mutex); 1080 mutex_unlock(&osdc->request_mutex);
1071} 1081}
@@ -1106,7 +1116,7 @@ static void remove_old_osds(struct ceph_osd_client *osdc)
1106 list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { 1116 list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) {
1107 if (time_before(jiffies, osd->lru_ttl)) 1117 if (time_before(jiffies, osd->lru_ttl))
1108 break; 1118 break;
1109 __remove_osd(osdc, osd); 1119 remove_osd(osdc, osd);
1110 } 1120 }
1111 mutex_unlock(&osdc->request_mutex); 1121 mutex_unlock(&osdc->request_mutex);
1112} 1122}
@@ -1121,8 +1131,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
1121 dout("__reset_osd %p osd%d\n", osd, osd->o_osd); 1131 dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
1122 if (list_empty(&osd->o_requests) && 1132 if (list_empty(&osd->o_requests) &&
1123 list_empty(&osd->o_linger_requests)) { 1133 list_empty(&osd->o_linger_requests)) {
1124 __remove_osd(osdc, osd); 1134 remove_osd(osdc, osd);
1125
1126 return -ENODEV; 1135 return -ENODEV;
1127 } 1136 }
1128 1137
@@ -1926,6 +1935,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc)
1926{ 1935{
1927 struct rb_node *p, *n; 1936 struct rb_node *p, *n;
1928 1937
1938 dout("%s %p\n", __func__, osdc);
1929 for (p = rb_first(&osdc->osds); p; p = n) { 1939 for (p = rb_first(&osdc->osds); p; p = n) {
1930 struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node); 1940 struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node);
1931 1941