diff options
| author | Ilya Dryomov <idryomov@gmail.com> | 2015-02-17 11:37:15 -0500 |
|---|---|---|
| committer | Ilya Dryomov <idryomov@gmail.com> | 2015-02-19 06:27:50 -0500 |
| commit | 7eb71e0351fbb1b242ae70abb7bb17107fe2f792 (patch) | |
| tree | a5b06178c59e77df73f6c86dbf4b160f18dbf62c | |
| parent | 7ad18afad02f9802f1eeade91cf880b97e7a9902 (diff) | |
libceph: fix double __remove_osd() problem
It turns out it's possible to get __remove_osd() called twice on the
same OSD. That doesn't sit well with rb_erase() - depending on the
shape of the tree we can get a NULL dereference, a soft lockup or
a random crash at some point in the future as we end up touching freed
memory. One scenario that I was able to reproduce is as follows:
<osd3 is idle, on the osd lru list>
<con reset - osd3>
con_fault_finish()
osd_reset()
<osdmap - osd3 down>
ceph_osdc_handle_map()
<takes map_sem>
kick_requests()
<takes request_mutex>
reset_changed_osds()
__reset_osd()
__remove_osd()
<releases request_mutex>
<releases map_sem>
<takes map_sem>
<takes request_mutex>
__kick_osd_requests()
__reset_osd()
__remove_osd() <-- !!!
A case can be made that osd refcounting is imperfect and reworking it
would be a proper resolution, but for now Sage and I decided to fix
this by adding a safe guard around __remove_osd().
Fixes: http://tracker.ceph.com/issues/8087
Cc: Sage Weil <sage@redhat.com>
Cc: stable@vger.kernel.org # 3.9+: 7c6e6fc53e73: libceph: assert both regular and lingering lists in __remove_osd()
Cc: stable@vger.kernel.org # 3.9+: cc9f1f518cec: libceph: change from BUG to WARN for __remove_osd() asserts
Cc: stable@vger.kernel.org # 3.9+
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
Reviewed-by: Alex Elder <elder@linaro.org>
| -rw-r--r-- | net/ceph/osd_client.c | 26 |
1 files changed, 18 insertions, 8 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 53299c7b0ca4..f693a2f8ac86 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
| @@ -1048,14 +1048,24 @@ static void put_osd(struct ceph_osd *osd) | |||
| 1048 | */ | 1048 | */ |
| 1049 | static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | 1049 | static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) |
| 1050 | { | 1050 | { |
| 1051 | dout("__remove_osd %p\n", osd); | 1051 | dout("%s %p osd%d\n", __func__, osd, osd->o_osd); |
| 1052 | WARN_ON(!list_empty(&osd->o_requests)); | 1052 | WARN_ON(!list_empty(&osd->o_requests)); |
| 1053 | WARN_ON(!list_empty(&osd->o_linger_requests)); | 1053 | WARN_ON(!list_empty(&osd->o_linger_requests)); |
| 1054 | 1054 | ||
| 1055 | rb_erase(&osd->o_node, &osdc->osds); | ||
| 1056 | list_del_init(&osd->o_osd_lru); | 1055 | list_del_init(&osd->o_osd_lru); |
| 1057 | ceph_con_close(&osd->o_con); | 1056 | rb_erase(&osd->o_node, &osdc->osds); |
| 1058 | put_osd(osd); | 1057 | RB_CLEAR_NODE(&osd->o_node); |
| 1058 | } | ||
| 1059 | |||
| 1060 | static void remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | ||
| 1061 | { | ||
| 1062 | dout("%s %p osd%d\n", __func__, osd, osd->o_osd); | ||
| 1063 | |||
| 1064 | if (!RB_EMPTY_NODE(&osd->o_node)) { | ||
| 1065 | ceph_con_close(&osd->o_con); | ||
| 1066 | __remove_osd(osdc, osd); | ||
| 1067 | put_osd(osd); | ||
| 1068 | } | ||
| 1059 | } | 1069 | } |
| 1060 | 1070 | ||
| 1061 | static void remove_all_osds(struct ceph_osd_client *osdc) | 1071 | static void remove_all_osds(struct ceph_osd_client *osdc) |
| @@ -1065,7 +1075,7 @@ static void remove_all_osds(struct ceph_osd_client *osdc) | |||
| 1065 | while (!RB_EMPTY_ROOT(&osdc->osds)) { | 1075 | while (!RB_EMPTY_ROOT(&osdc->osds)) { |
| 1066 | struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), | 1076 | struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), |
| 1067 | struct ceph_osd, o_node); | 1077 | struct ceph_osd, o_node); |
| 1068 | __remove_osd(osdc, osd); | 1078 | remove_osd(osdc, osd); |
| 1069 | } | 1079 | } |
| 1070 | mutex_unlock(&osdc->request_mutex); | 1080 | mutex_unlock(&osdc->request_mutex); |
| 1071 | } | 1081 | } |
| @@ -1106,7 +1116,7 @@ static void remove_old_osds(struct ceph_osd_client *osdc) | |||
| 1106 | list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { | 1116 | list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { |
| 1107 | if (time_before(jiffies, osd->lru_ttl)) | 1117 | if (time_before(jiffies, osd->lru_ttl)) |
| 1108 | break; | 1118 | break; |
| 1109 | __remove_osd(osdc, osd); | 1119 | remove_osd(osdc, osd); |
| 1110 | } | 1120 | } |
| 1111 | mutex_unlock(&osdc->request_mutex); | 1121 | mutex_unlock(&osdc->request_mutex); |
| 1112 | } | 1122 | } |
| @@ -1121,8 +1131,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | |||
| 1121 | dout("__reset_osd %p osd%d\n", osd, osd->o_osd); | 1131 | dout("__reset_osd %p osd%d\n", osd, osd->o_osd); |
| 1122 | if (list_empty(&osd->o_requests) && | 1132 | if (list_empty(&osd->o_requests) && |
| 1123 | list_empty(&osd->o_linger_requests)) { | 1133 | list_empty(&osd->o_linger_requests)) { |
| 1124 | __remove_osd(osdc, osd); | 1134 | remove_osd(osdc, osd); |
| 1125 | |||
| 1126 | return -ENODEV; | 1135 | return -ENODEV; |
| 1127 | } | 1136 | } |
| 1128 | 1137 | ||
| @@ -1926,6 +1935,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc) | |||
| 1926 | { | 1935 | { |
| 1927 | struct rb_node *p, *n; | 1936 | struct rb_node *p, *n; |
| 1928 | 1937 | ||
| 1938 | dout("%s %p\n", __func__, osdc); | ||
| 1929 | for (p = rb_first(&osdc->osds); p; p = n) { | 1939 | for (p = rb_first(&osdc->osds); p; p = n) { |
| 1930 | struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node); | 1940 | struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node); |
| 1931 | 1941 | ||
