diff options
author | Ilya Dryomov <idryomov@gmail.com> | 2016-04-28 10:07:23 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2016-05-25 18:36:26 -0400 |
commit | 63244fa123a755e4bbaee03022b68613c71d1332 (patch) | |
tree | 9e8e983a7ddcd9c03e67abb56a81f90ef24fe75d /net | |
parent | 04812acf572ef41fd51c11e0bf3385f34c0e1b5b (diff) |
libceph: introduce ceph_osd_request_target, calc_target()
Introduce ceph_osd_request_target, containing all mapping-related
fields of ceph_osd_request and calc_target() for calculating mappings
and populating it.
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'net')
-rw-r--r-- | net/ceph/osd_client.c | 157 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 121 |
2 files changed, 276 insertions, 2 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 0ff400a56cd6..cff3a7e29233 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -299,6 +299,30 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, | |||
299 | } | 299 | } |
300 | 300 | ||
301 | /* | 301 | /* |
302 | * Assumes @t is zero-initialized. | ||
303 | */ | ||
304 | static void target_init(struct ceph_osd_request_target *t) | ||
305 | { | ||
306 | ceph_oid_init(&t->base_oid); | ||
307 | ceph_oloc_init(&t->base_oloc); | ||
308 | ceph_oid_init(&t->target_oid); | ||
309 | ceph_oloc_init(&t->target_oloc); | ||
310 | |||
311 | ceph_osds_init(&t->acting); | ||
312 | ceph_osds_init(&t->up); | ||
313 | t->size = -1; | ||
314 | t->min_size = -1; | ||
315 | |||
316 | t->osd = CEPH_HOMELESS_OSD; | ||
317 | } | ||
318 | |||
319 | static void target_destroy(struct ceph_osd_request_target *t) | ||
320 | { | ||
321 | ceph_oid_destroy(&t->base_oid); | ||
322 | ceph_oid_destroy(&t->target_oid); | ||
323 | } | ||
324 | |||
325 | /* | ||
302 | * requests | 326 | * requests |
303 | */ | 327 | */ |
304 | static void ceph_osdc_release_request(struct kref *kref) | 328 | static void ceph_osdc_release_request(struct kref *kref) |
@@ -1273,6 +1297,11 @@ void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, | |||
1273 | } | 1297 | } |
1274 | EXPORT_SYMBOL(ceph_osdc_set_request_linger); | 1298 | EXPORT_SYMBOL(ceph_osdc_set_request_linger); |
1275 | 1299 | ||
1300 | static bool __pool_full(struct ceph_pg_pool_info *pi) | ||
1301 | { | ||
1302 | return pi->flags & CEPH_POOL_FLAG_FULL; | ||
1303 | } | ||
1304 | |||
1276 | /* | 1305 | /* |
1277 | * Returns whether a request should be blocked from being sent | 1306 | * Returns whether a request should be blocked from being sent |
1278 | * based on the current osdmap and osd_client settings. | 1307 | * based on the current osdmap and osd_client settings. |
@@ -1289,6 +1318,20 @@ static bool __req_should_be_paused(struct ceph_osd_client *osdc, | |||
1289 | (req->r_flags & CEPH_OSD_FLAG_WRITE && pausewr); | 1318 | (req->r_flags & CEPH_OSD_FLAG_WRITE && pausewr); |
1290 | } | 1319 | } |
1291 | 1320 | ||
1321 | static bool target_should_be_paused(struct ceph_osd_client *osdc, | ||
1322 | const struct ceph_osd_request_target *t, | ||
1323 | struct ceph_pg_pool_info *pi) | ||
1324 | { | ||
1325 | bool pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD); | ||
1326 | bool pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) || | ||
1327 | ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) || | ||
1328 | __pool_full(pi); | ||
1329 | |||
1330 | WARN_ON(pi->id != t->base_oloc.pool); | ||
1331 | return (t->flags & CEPH_OSD_FLAG_READ && pauserd) || | ||
1332 | (t->flags & CEPH_OSD_FLAG_WRITE && pausewr); | ||
1333 | } | ||
1334 | |||
1292 | /* | 1335 | /* |
1293 | * Calculate mapping of a request to a PG. Takes tiering into account. | 1336 | * Calculate mapping of a request to a PG. Takes tiering into account. |
1294 | */ | 1337 | */ |
@@ -1328,6 +1371,116 @@ static int __calc_request_pg(struct ceph_osdmap *osdmap, | |||
1328 | &req->r_target_oloc, pg_out); | 1371 | &req->r_target_oloc, pg_out); |
1329 | } | 1372 | } |
1330 | 1373 | ||
1374 | enum calc_target_result { | ||
1375 | CALC_TARGET_NO_ACTION = 0, | ||
1376 | CALC_TARGET_NEED_RESEND, | ||
1377 | CALC_TARGET_POOL_DNE, | ||
1378 | }; | ||
1379 | |||
1380 | static enum calc_target_result calc_target(struct ceph_osd_client *osdc, | ||
1381 | struct ceph_osd_request_target *t, | ||
1382 | u32 *last_force_resend, | ||
1383 | bool any_change) | ||
1384 | { | ||
1385 | struct ceph_pg_pool_info *pi; | ||
1386 | struct ceph_pg pgid, last_pgid; | ||
1387 | struct ceph_osds up, acting; | ||
1388 | bool force_resend = false; | ||
1389 | bool need_check_tiering = false; | ||
1390 | bool need_resend = false; | ||
1391 | bool sort_bitwise = ceph_osdmap_flag(osdc->osdmap, | ||
1392 | CEPH_OSDMAP_SORTBITWISE); | ||
1393 | enum calc_target_result ct_res; | ||
1394 | int ret; | ||
1395 | |||
1396 | pi = ceph_pg_pool_by_id(osdc->osdmap, t->base_oloc.pool); | ||
1397 | if (!pi) { | ||
1398 | t->osd = CEPH_HOMELESS_OSD; | ||
1399 | ct_res = CALC_TARGET_POOL_DNE; | ||
1400 | goto out; | ||
1401 | } | ||
1402 | |||
1403 | if (osdc->osdmap->epoch == pi->last_force_request_resend) { | ||
1404 | if (last_force_resend && | ||
1405 | *last_force_resend < pi->last_force_request_resend) { | ||
1406 | *last_force_resend = pi->last_force_request_resend; | ||
1407 | force_resend = true; | ||
1408 | } else if (!last_force_resend) { | ||
1409 | force_resend = true; | ||
1410 | } | ||
1411 | } | ||
1412 | if (ceph_oid_empty(&t->target_oid) || force_resend) { | ||
1413 | ceph_oid_copy(&t->target_oid, &t->base_oid); | ||
1414 | need_check_tiering = true; | ||
1415 | } | ||
1416 | if (ceph_oloc_empty(&t->target_oloc) || force_resend) { | ||
1417 | ceph_oloc_copy(&t->target_oloc, &t->base_oloc); | ||
1418 | need_check_tiering = true; | ||
1419 | } | ||
1420 | |||
1421 | if (need_check_tiering && | ||
1422 | (t->flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) { | ||
1423 | if (t->flags & CEPH_OSD_FLAG_READ && pi->read_tier >= 0) | ||
1424 | t->target_oloc.pool = pi->read_tier; | ||
1425 | if (t->flags & CEPH_OSD_FLAG_WRITE && pi->write_tier >= 0) | ||
1426 | t->target_oloc.pool = pi->write_tier; | ||
1427 | } | ||
1428 | |||
1429 | ret = ceph_object_locator_to_pg(osdc->osdmap, &t->target_oid, | ||
1430 | &t->target_oloc, &pgid); | ||
1431 | if (ret) { | ||
1432 | WARN_ON(ret != -ENOENT); | ||
1433 | t->osd = CEPH_HOMELESS_OSD; | ||
1434 | ct_res = CALC_TARGET_POOL_DNE; | ||
1435 | goto out; | ||
1436 | } | ||
1437 | last_pgid.pool = pgid.pool; | ||
1438 | last_pgid.seed = ceph_stable_mod(pgid.seed, t->pg_num, t->pg_num_mask); | ||
1439 | |||
1440 | ceph_pg_to_up_acting_osds(osdc->osdmap, &pgid, &up, &acting); | ||
1441 | if (any_change && | ||
1442 | ceph_is_new_interval(&t->acting, | ||
1443 | &acting, | ||
1444 | &t->up, | ||
1445 | &up, | ||
1446 | t->size, | ||
1447 | pi->size, | ||
1448 | t->min_size, | ||
1449 | pi->min_size, | ||
1450 | t->pg_num, | ||
1451 | pi->pg_num, | ||
1452 | t->sort_bitwise, | ||
1453 | sort_bitwise, | ||
1454 | &last_pgid)) | ||
1455 | force_resend = true; | ||
1456 | |||
1457 | if (t->paused && !target_should_be_paused(osdc, t, pi)) { | ||
1458 | t->paused = false; | ||
1459 | need_resend = true; | ||
1460 | } | ||
1461 | |||
1462 | if (ceph_pg_compare(&t->pgid, &pgid) || | ||
1463 | ceph_osds_changed(&t->acting, &acting, any_change) || | ||
1464 | force_resend) { | ||
1465 | t->pgid = pgid; /* struct */ | ||
1466 | ceph_osds_copy(&t->acting, &acting); | ||
1467 | ceph_osds_copy(&t->up, &up); | ||
1468 | t->size = pi->size; | ||
1469 | t->min_size = pi->min_size; | ||
1470 | t->pg_num = pi->pg_num; | ||
1471 | t->pg_num_mask = pi->pg_num_mask; | ||
1472 | t->sort_bitwise = sort_bitwise; | ||
1473 | |||
1474 | t->osd = acting.primary; | ||
1475 | need_resend = true; | ||
1476 | } | ||
1477 | |||
1478 | ct_res = need_resend ? CALC_TARGET_NEED_RESEND : CALC_TARGET_NO_ACTION; | ||
1479 | out: | ||
1480 | dout("%s t %p -> ct_res %d osd %d\n", __func__, t, ct_res, t->osd); | ||
1481 | return ct_res; | ||
1482 | } | ||
1483 | |||
1331 | static void __enqueue_request(struct ceph_osd_request *req) | 1484 | static void __enqueue_request(struct ceph_osd_request *req) |
1332 | { | 1485 | { |
1333 | struct ceph_osd_client *osdc = req->r_osdc; | 1486 | struct ceph_osd_client *osdc = req->r_osdc; |
@@ -1805,12 +1958,12 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg) | |||
1805 | redir.oloc.pool = -1; | 1958 | redir.oloc.pool = -1; |
1806 | } | 1959 | } |
1807 | 1960 | ||
1808 | if (redir.oloc.pool != -1) { | 1961 | if (!ceph_oloc_empty(&redir.oloc)) { |
1809 | dout("redirect pool %lld\n", redir.oloc.pool); | 1962 | dout("redirect pool %lld\n", redir.oloc.pool); |
1810 | 1963 | ||
1811 | __unregister_request(osdc, req); | 1964 | __unregister_request(osdc, req); |
1812 | 1965 | ||
1813 | req->r_target_oloc = redir.oloc; /* struct */ | 1966 | ceph_oloc_copy(&req->r_target_oloc, &redir.oloc); |
1814 | 1967 | ||
1815 | /* | 1968 | /* |
1816 | * Start redirect requests with nofail=true. If | 1969 | * Start redirect requests with nofail=true. If |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 66c3ebead92f..7d4a5b43085e 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -1521,6 +1521,32 @@ void ceph_oid_destroy(struct ceph_object_id *oid) | |||
1521 | } | 1521 | } |
1522 | EXPORT_SYMBOL(ceph_oid_destroy); | 1522 | EXPORT_SYMBOL(ceph_oid_destroy); |
1523 | 1523 | ||
1524 | /* | ||
1525 | * osds only | ||
1526 | */ | ||
1527 | static bool __osds_equal(const struct ceph_osds *lhs, | ||
1528 | const struct ceph_osds *rhs) | ||
1529 | { | ||
1530 | if (lhs->size == rhs->size && | ||
1531 | !memcmp(lhs->osds, rhs->osds, rhs->size * sizeof(rhs->osds[0]))) | ||
1532 | return true; | ||
1533 | |||
1534 | return false; | ||
1535 | } | ||
1536 | |||
1537 | /* | ||
1538 | * osds + primary | ||
1539 | */ | ||
1540 | static bool osds_equal(const struct ceph_osds *lhs, | ||
1541 | const struct ceph_osds *rhs) | ||
1542 | { | ||
1543 | if (__osds_equal(lhs, rhs) && | ||
1544 | lhs->primary == rhs->primary) | ||
1545 | return true; | ||
1546 | |||
1547 | return false; | ||
1548 | } | ||
1549 | |||
1524 | static bool osds_valid(const struct ceph_osds *set) | 1550 | static bool osds_valid(const struct ceph_osds *set) |
1525 | { | 1551 | { |
1526 | /* non-empty set */ | 1552 | /* non-empty set */ |
@@ -1553,6 +1579,101 @@ void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src) | |||
1553 | dest->primary = src->primary; | 1579 | dest->primary = src->primary; |
1554 | } | 1580 | } |
1555 | 1581 | ||
1582 | static bool is_split(const struct ceph_pg *pgid, | ||
1583 | u32 old_pg_num, | ||
1584 | u32 new_pg_num) | ||
1585 | { | ||
1586 | int old_bits = calc_bits_of(old_pg_num); | ||
1587 | int old_mask = (1 << old_bits) - 1; | ||
1588 | int n; | ||
1589 | |||
1590 | WARN_ON(pgid->seed >= old_pg_num); | ||
1591 | if (new_pg_num <= old_pg_num) | ||
1592 | return false; | ||
1593 | |||
1594 | for (n = 1; ; n++) { | ||
1595 | int next_bit = n << (old_bits - 1); | ||
1596 | u32 s = next_bit | pgid->seed; | ||
1597 | |||
1598 | if (s < old_pg_num || s == pgid->seed) | ||
1599 | continue; | ||
1600 | if (s >= new_pg_num) | ||
1601 | break; | ||
1602 | |||
1603 | s = ceph_stable_mod(s, old_pg_num, old_mask); | ||
1604 | if (s == pgid->seed) | ||
1605 | return true; | ||
1606 | } | ||
1607 | |||
1608 | return false; | ||
1609 | } | ||
1610 | |||
1611 | bool ceph_is_new_interval(const struct ceph_osds *old_acting, | ||
1612 | const struct ceph_osds *new_acting, | ||
1613 | const struct ceph_osds *old_up, | ||
1614 | const struct ceph_osds *new_up, | ||
1615 | int old_size, | ||
1616 | int new_size, | ||
1617 | int old_min_size, | ||
1618 | int new_min_size, | ||
1619 | u32 old_pg_num, | ||
1620 | u32 new_pg_num, | ||
1621 | bool old_sort_bitwise, | ||
1622 | bool new_sort_bitwise, | ||
1623 | const struct ceph_pg *pgid) | ||
1624 | { | ||
1625 | return !osds_equal(old_acting, new_acting) || | ||
1626 | !osds_equal(old_up, new_up) || | ||
1627 | old_size != new_size || | ||
1628 | old_min_size != new_min_size || | ||
1629 | is_split(pgid, old_pg_num, new_pg_num) || | ||
1630 | old_sort_bitwise != new_sort_bitwise; | ||
1631 | } | ||
1632 | |||
1633 | static int calc_pg_rank(int osd, const struct ceph_osds *acting) | ||
1634 | { | ||
1635 | int i; | ||
1636 | |||
1637 | for (i = 0; i < acting->size; i++) { | ||
1638 | if (acting->osds[i] == osd) | ||
1639 | return i; | ||
1640 | } | ||
1641 | |||
1642 | return -1; | ||
1643 | } | ||
1644 | |||
1645 | static bool primary_changed(const struct ceph_osds *old_acting, | ||
1646 | const struct ceph_osds *new_acting) | ||
1647 | { | ||
1648 | if (!old_acting->size && !new_acting->size) | ||
1649 | return false; /* both still empty */ | ||
1650 | |||
1651 | if (!old_acting->size ^ !new_acting->size) | ||
1652 | return true; /* was empty, now not, or vice versa */ | ||
1653 | |||
1654 | if (old_acting->primary != new_acting->primary) | ||
1655 | return true; /* primary changed */ | ||
1656 | |||
1657 | if (calc_pg_rank(old_acting->primary, old_acting) != | ||
1658 | calc_pg_rank(new_acting->primary, new_acting)) | ||
1659 | return true; | ||
1660 | |||
1661 | return false; /* same primary (tho replicas may have changed) */ | ||
1662 | } | ||
1663 | |||
1664 | bool ceph_osds_changed(const struct ceph_osds *old_acting, | ||
1665 | const struct ceph_osds *new_acting, | ||
1666 | bool any_change) | ||
1667 | { | ||
1668 | if (primary_changed(old_acting, new_acting)) | ||
1669 | return true; | ||
1670 | |||
1671 | if (any_change && !__osds_equal(old_acting, new_acting)) | ||
1672 | return true; | ||
1673 | |||
1674 | return false; | ||
1675 | } | ||
1676 | |||
1556 | /* | 1677 | /* |
1557 | * calculate file layout from given offset, length. | 1678 | * calculate file layout from given offset, length. |
1558 | * fill in correct oid, logical length, and object extent | 1679 | * fill in correct oid, logical length, and object extent |