aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-01-02 20:32:49 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-01-02 20:32:49 -0500
commit58890c06691462ca29900d1116b28c7a3e131252 (patch)
tree2fe512c5a664176de8078f06264da850b1460fb5 /net/ceph
parent42288fe366c4f1ce7522bc9f27d0bc2a81c55264 (diff)
parent0fa6ebc600bc8e830551aee47a0e929e818a1868 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph fixes from Sage Weil: "Two of Alex's patches deal with a race when reseting server connections for open RBD images, one demotes some non-fatal BUGs to WARNs, and my patch fixes a protocol feature bit failure path." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: libceph: fix protocol feature mismatch failure path libceph: WARN, don't BUG on unexpected connection states libceph: always reset osds when kicking libceph: move linger requests sooner in kick_requests()
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/messenger.c23
-rw-r--r--net/ceph/osd_client.c34
2 files changed, 29 insertions, 28 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 4d111fd2b492..5ccf87ed8d68 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -506,6 +506,7 @@ static void reset_connection(struct ceph_connection *con)
506{ 506{
507 /* reset connection, out_queue, msg_ and connect_seq */ 507 /* reset connection, out_queue, msg_ and connect_seq */
508 /* discard existing out_queue and msg_seq */ 508 /* discard existing out_queue and msg_seq */
509 dout("reset_connection %p\n", con);
509 ceph_msg_remove_list(&con->out_queue); 510 ceph_msg_remove_list(&con->out_queue);
510 ceph_msg_remove_list(&con->out_sent); 511 ceph_msg_remove_list(&con->out_sent);
511 512
@@ -561,7 +562,7 @@ void ceph_con_open(struct ceph_connection *con,
561 mutex_lock(&con->mutex); 562 mutex_lock(&con->mutex);
562 dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); 563 dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
563 564
564 BUG_ON(con->state != CON_STATE_CLOSED); 565 WARN_ON(con->state != CON_STATE_CLOSED);
565 con->state = CON_STATE_PREOPEN; 566 con->state = CON_STATE_PREOPEN;
566 567
567 con->peer_name.type = (__u8) entity_type; 568 con->peer_name.type = (__u8) entity_type;
@@ -1506,13 +1507,6 @@ static int process_banner(struct ceph_connection *con)
1506 return 0; 1507 return 0;
1507} 1508}
1508 1509
1509static void fail_protocol(struct ceph_connection *con)
1510{
1511 reset_connection(con);
1512 BUG_ON(con->state != CON_STATE_NEGOTIATING);
1513 con->state = CON_STATE_CLOSED;
1514}
1515
1516static int process_connect(struct ceph_connection *con) 1510static int process_connect(struct ceph_connection *con)
1517{ 1511{
1518 u64 sup_feat = con->msgr->supported_features; 1512 u64 sup_feat = con->msgr->supported_features;
@@ -1530,7 +1524,7 @@ static int process_connect(struct ceph_connection *con)
1530 ceph_pr_addr(&con->peer_addr.in_addr), 1524 ceph_pr_addr(&con->peer_addr.in_addr),
1531 sup_feat, server_feat, server_feat & ~sup_feat); 1525 sup_feat, server_feat, server_feat & ~sup_feat);
1532 con->error_msg = "missing required protocol features"; 1526 con->error_msg = "missing required protocol features";
1533 fail_protocol(con); 1527 reset_connection(con);
1534 return -1; 1528 return -1;
1535 1529
1536 case CEPH_MSGR_TAG_BADPROTOVER: 1530 case CEPH_MSGR_TAG_BADPROTOVER:
@@ -1541,7 +1535,7 @@ static int process_connect(struct ceph_connection *con)
1541 le32_to_cpu(con->out_connect.protocol_version), 1535 le32_to_cpu(con->out_connect.protocol_version),
1542 le32_to_cpu(con->in_reply.protocol_version)); 1536 le32_to_cpu(con->in_reply.protocol_version));
1543 con->error_msg = "protocol version mismatch"; 1537 con->error_msg = "protocol version mismatch";
1544 fail_protocol(con); 1538 reset_connection(con);
1545 return -1; 1539 return -1;
1546 1540
1547 case CEPH_MSGR_TAG_BADAUTHORIZER: 1541 case CEPH_MSGR_TAG_BADAUTHORIZER:
@@ -1631,11 +1625,11 @@ static int process_connect(struct ceph_connection *con)
1631 ceph_pr_addr(&con->peer_addr.in_addr), 1625 ceph_pr_addr(&con->peer_addr.in_addr),
1632 req_feat, server_feat, req_feat & ~server_feat); 1626 req_feat, server_feat, req_feat & ~server_feat);
1633 con->error_msg = "missing required protocol features"; 1627 con->error_msg = "missing required protocol features";
1634 fail_protocol(con); 1628 reset_connection(con);
1635 return -1; 1629 return -1;
1636 } 1630 }
1637 1631
1638 BUG_ON(con->state != CON_STATE_NEGOTIATING); 1632 WARN_ON(con->state != CON_STATE_NEGOTIATING);
1639 con->state = CON_STATE_OPEN; 1633 con->state = CON_STATE_OPEN;
1640 1634
1641 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); 1635 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
@@ -2132,7 +2126,6 @@ more:
2132 if (ret < 0) 2126 if (ret < 0)
2133 goto out; 2127 goto out;
2134 2128
2135 BUG_ON(con->state != CON_STATE_CONNECTING);
2136 con->state = CON_STATE_NEGOTIATING; 2129 con->state = CON_STATE_NEGOTIATING;
2137 2130
2138 /* 2131 /*
@@ -2160,7 +2153,7 @@ more:
2160 goto more; 2153 goto more;
2161 } 2154 }
2162 2155
2163 BUG_ON(con->state != CON_STATE_OPEN); 2156 WARN_ON(con->state != CON_STATE_OPEN);
2164 2157
2165 if (con->in_base_pos < 0) { 2158 if (con->in_base_pos < 0) {
2166 /* 2159 /*
@@ -2382,7 +2375,7 @@ static void ceph_fault(struct ceph_connection *con)
2382 dout("fault %p state %lu to peer %s\n", 2375 dout("fault %p state %lu to peer %s\n",
2383 con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); 2376 con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
2384 2377
2385 BUG_ON(con->state != CON_STATE_CONNECTING && 2378 WARN_ON(con->state != CON_STATE_CONNECTING &&
2386 con->state != CON_STATE_NEGOTIATING && 2379 con->state != CON_STATE_NEGOTIATING &&
2387 con->state != CON_STATE_OPEN); 2380 con->state != CON_STATE_OPEN);
2388 2381
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 780caf6b0491..eb9a44478764 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1270,7 +1270,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc)
1270 * Requeue requests whose mapping to an OSD has changed. If requests map to 1270 * Requeue requests whose mapping to an OSD has changed. If requests map to
1271 * no osd, request a new map. 1271 * no osd, request a new map.
1272 * 1272 *
1273 * Caller should hold map_sem for read and request_mutex. 1273 * Caller should hold map_sem for read.
1274 */ 1274 */
1275static void kick_requests(struct ceph_osd_client *osdc, int force_resend) 1275static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
1276{ 1276{
@@ -1284,6 +1284,24 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
1284 for (p = rb_first(&osdc->requests); p; ) { 1284 for (p = rb_first(&osdc->requests); p; ) {
1285 req = rb_entry(p, struct ceph_osd_request, r_node); 1285 req = rb_entry(p, struct ceph_osd_request, r_node);
1286 p = rb_next(p); 1286 p = rb_next(p);
1287
1288 /*
1289 * For linger requests that have not yet been
1290 * registered, move them to the linger list; they'll
1291 * be sent to the osd in the loop below. Unregister
1292 * the request before re-registering it as a linger
1293 * request to ensure the __map_request() below
1294 * will decide it needs to be sent.
1295 */
1296 if (req->r_linger && list_empty(&req->r_linger_item)) {
1297 dout("%p tid %llu restart on osd%d\n",
1298 req, req->r_tid,
1299 req->r_osd ? req->r_osd->o_osd : -1);
1300 __unregister_request(osdc, req);
1301 __register_linger_request(osdc, req);
1302 continue;
1303 }
1304
1287 err = __map_request(osdc, req, force_resend); 1305 err = __map_request(osdc, req, force_resend);
1288 if (err < 0) 1306 if (err < 0)
1289 continue; /* error */ 1307 continue; /* error */
@@ -1298,17 +1316,6 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
1298 req->r_flags |= CEPH_OSD_FLAG_RETRY; 1316 req->r_flags |= CEPH_OSD_FLAG_RETRY;
1299 } 1317 }
1300 } 1318 }
1301 if (req->r_linger && list_empty(&req->r_linger_item)) {
1302 /*
1303 * register as a linger so that we will
1304 * re-submit below and get a new tid
1305 */
1306 dout("%p tid %llu restart on osd%d\n",
1307 req, req->r_tid,
1308 req->r_osd ? req->r_osd->o_osd : -1);
1309 __register_linger_request(osdc, req);
1310 __unregister_request(osdc, req);
1311 }
1312 } 1319 }
1313 1320
1314 list_for_each_entry_safe(req, nreq, &osdc->req_linger, 1321 list_for_each_entry_safe(req, nreq, &osdc->req_linger,
@@ -1316,6 +1323,7 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
1316 dout("linger req=%p req->r_osd=%p\n", req, req->r_osd); 1323 dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
1317 1324
1318 err = __map_request(osdc, req, force_resend); 1325 err = __map_request(osdc, req, force_resend);
1326 dout("__map_request returned %d\n", err);
1319 if (err == 0) 1327 if (err == 0)
1320 continue; /* no change and no osd was specified */ 1328 continue; /* no change and no osd was specified */
1321 if (err < 0) 1329 if (err < 0)
@@ -1337,6 +1345,7 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
1337 dout("%d requests for down osds, need new map\n", needmap); 1345 dout("%d requests for down osds, need new map\n", needmap);
1338 ceph_monc_request_next_osdmap(&osdc->client->monc); 1346 ceph_monc_request_next_osdmap(&osdc->client->monc);
1339 } 1347 }
1348 reset_changed_osds(osdc);
1340} 1349}
1341 1350
1342 1351
@@ -1393,7 +1402,6 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1393 osdc->osdmap = newmap; 1402 osdc->osdmap = newmap;
1394 } 1403 }
1395 kick_requests(osdc, 0); 1404 kick_requests(osdc, 0);
1396 reset_changed_osds(osdc);
1397 } else { 1405 } else {
1398 dout("ignoring incremental map %u len %d\n", 1406 dout("ignoring incremental map %u len %d\n",
1399 epoch, maplen); 1407 epoch, maplen);