aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2016-04-28 10:07:23 -0400
committerIlya Dryomov <idryomov@gmail.com>2016-05-25 18:36:26 -0400
commit63244fa123a755e4bbaee03022b68613c71d1332 (patch)
tree9e8e983a7ddcd9c03e67abb56a81f90ef24fe75d /net
parent04812acf572ef41fd51c11e0bf3385f34c0e1b5b (diff)
libceph: introduce ceph_osd_request_target, calc_target()
Introduce ceph_osd_request_target, containing all mapping-related fields of ceph_osd_request and calc_target() for calculating mappings and populating it. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'net')
-rw-r--r--net/ceph/osd_client.c157
-rw-r--r--net/ceph/osdmap.c121
2 files changed, 276 insertions, 2 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 0ff400a56cd6..cff3a7e29233 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -299,6 +299,30 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
299} 299}
300 300
301/* 301/*
302 * Assumes @t is zero-initialized.
303 */
304static void target_init(struct ceph_osd_request_target *t)
305{
306 ceph_oid_init(&t->base_oid);
307 ceph_oloc_init(&t->base_oloc);
308 ceph_oid_init(&t->target_oid);
309 ceph_oloc_init(&t->target_oloc);
310
311 ceph_osds_init(&t->acting);
312 ceph_osds_init(&t->up);
313 t->size = -1;
314 t->min_size = -1;
315
316 t->osd = CEPH_HOMELESS_OSD;
317}
318
319static void target_destroy(struct ceph_osd_request_target *t)
320{
321 ceph_oid_destroy(&t->base_oid);
322 ceph_oid_destroy(&t->target_oid);
323}
324
325/*
302 * requests 326 * requests
303 */ 327 */
304static void ceph_osdc_release_request(struct kref *kref) 328static void ceph_osdc_release_request(struct kref *kref)
@@ -1273,6 +1297,11 @@ void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
1273} 1297}
1274EXPORT_SYMBOL(ceph_osdc_set_request_linger); 1298EXPORT_SYMBOL(ceph_osdc_set_request_linger);
1275 1299
1300static bool __pool_full(struct ceph_pg_pool_info *pi)
1301{
1302 return pi->flags & CEPH_POOL_FLAG_FULL;
1303}
1304
1276/* 1305/*
1277 * Returns whether a request should be blocked from being sent 1306 * Returns whether a request should be blocked from being sent
1278 * based on the current osdmap and osd_client settings. 1307 * based on the current osdmap and osd_client settings.
@@ -1289,6 +1318,20 @@ static bool __req_should_be_paused(struct ceph_osd_client *osdc,
1289 (req->r_flags & CEPH_OSD_FLAG_WRITE && pausewr); 1318 (req->r_flags & CEPH_OSD_FLAG_WRITE && pausewr);
1290} 1319}
1291 1320
1321static bool target_should_be_paused(struct ceph_osd_client *osdc,
1322 const struct ceph_osd_request_target *t,
1323 struct ceph_pg_pool_info *pi)
1324{
1325 bool pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD);
1326 bool pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) ||
1327 ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) ||
1328 __pool_full(pi);
1329
1330 WARN_ON(pi->id != t->base_oloc.pool);
1331 return (t->flags & CEPH_OSD_FLAG_READ && pauserd) ||
1332 (t->flags & CEPH_OSD_FLAG_WRITE && pausewr);
1333}
1334
1292/* 1335/*
1293 * Calculate mapping of a request to a PG. Takes tiering into account. 1336 * Calculate mapping of a request to a PG. Takes tiering into account.
1294 */ 1337 */
@@ -1328,6 +1371,116 @@ static int __calc_request_pg(struct ceph_osdmap *osdmap,
1328 &req->r_target_oloc, pg_out); 1371 &req->r_target_oloc, pg_out);
1329} 1372}
1330 1373
1374enum calc_target_result {
1375 CALC_TARGET_NO_ACTION = 0,
1376 CALC_TARGET_NEED_RESEND,
1377 CALC_TARGET_POOL_DNE,
1378};
1379
1380static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
1381 struct ceph_osd_request_target *t,
1382 u32 *last_force_resend,
1383 bool any_change)
1384{
1385 struct ceph_pg_pool_info *pi;
1386 struct ceph_pg pgid, last_pgid;
1387 struct ceph_osds up, acting;
1388 bool force_resend = false;
1389 bool need_check_tiering = false;
1390 bool need_resend = false;
1391 bool sort_bitwise = ceph_osdmap_flag(osdc->osdmap,
1392 CEPH_OSDMAP_SORTBITWISE);
1393 enum calc_target_result ct_res;
1394 int ret;
1395
1396 pi = ceph_pg_pool_by_id(osdc->osdmap, t->base_oloc.pool);
1397 if (!pi) {
1398 t->osd = CEPH_HOMELESS_OSD;
1399 ct_res = CALC_TARGET_POOL_DNE;
1400 goto out;
1401 }
1402
1403 if (osdc->osdmap->epoch == pi->last_force_request_resend) {
1404 if (last_force_resend &&
1405 *last_force_resend < pi->last_force_request_resend) {
1406 *last_force_resend = pi->last_force_request_resend;
1407 force_resend = true;
1408 } else if (!last_force_resend) {
1409 force_resend = true;
1410 }
1411 }
1412 if (ceph_oid_empty(&t->target_oid) || force_resend) {
1413 ceph_oid_copy(&t->target_oid, &t->base_oid);
1414 need_check_tiering = true;
1415 }
1416 if (ceph_oloc_empty(&t->target_oloc) || force_resend) {
1417 ceph_oloc_copy(&t->target_oloc, &t->base_oloc);
1418 need_check_tiering = true;
1419 }
1420
1421 if (need_check_tiering &&
1422 (t->flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) {
1423 if (t->flags & CEPH_OSD_FLAG_READ && pi->read_tier >= 0)
1424 t->target_oloc.pool = pi->read_tier;
1425 if (t->flags & CEPH_OSD_FLAG_WRITE && pi->write_tier >= 0)
1426 t->target_oloc.pool = pi->write_tier;
1427 }
1428
1429 ret = ceph_object_locator_to_pg(osdc->osdmap, &t->target_oid,
1430 &t->target_oloc, &pgid);
1431 if (ret) {
1432 WARN_ON(ret != -ENOENT);
1433 t->osd = CEPH_HOMELESS_OSD;
1434 ct_res = CALC_TARGET_POOL_DNE;
1435 goto out;
1436 }
1437 last_pgid.pool = pgid.pool;
1438 last_pgid.seed = ceph_stable_mod(pgid.seed, t->pg_num, t->pg_num_mask);
1439
1440 ceph_pg_to_up_acting_osds(osdc->osdmap, &pgid, &up, &acting);
1441 if (any_change &&
1442 ceph_is_new_interval(&t->acting,
1443 &acting,
1444 &t->up,
1445 &up,
1446 t->size,
1447 pi->size,
1448 t->min_size,
1449 pi->min_size,
1450 t->pg_num,
1451 pi->pg_num,
1452 t->sort_bitwise,
1453 sort_bitwise,
1454 &last_pgid))
1455 force_resend = true;
1456
1457 if (t->paused && !target_should_be_paused(osdc, t, pi)) {
1458 t->paused = false;
1459 need_resend = true;
1460 }
1461
1462 if (ceph_pg_compare(&t->pgid, &pgid) ||
1463 ceph_osds_changed(&t->acting, &acting, any_change) ||
1464 force_resend) {
1465 t->pgid = pgid; /* struct */
1466 ceph_osds_copy(&t->acting, &acting);
1467 ceph_osds_copy(&t->up, &up);
1468 t->size = pi->size;
1469 t->min_size = pi->min_size;
1470 t->pg_num = pi->pg_num;
1471 t->pg_num_mask = pi->pg_num_mask;
1472 t->sort_bitwise = sort_bitwise;
1473
1474 t->osd = acting.primary;
1475 need_resend = true;
1476 }
1477
1478 ct_res = need_resend ? CALC_TARGET_NEED_RESEND : CALC_TARGET_NO_ACTION;
1479out:
1480 dout("%s t %p -> ct_res %d osd %d\n", __func__, t, ct_res, t->osd);
1481 return ct_res;
1482}
1483
1331static void __enqueue_request(struct ceph_osd_request *req) 1484static void __enqueue_request(struct ceph_osd_request *req)
1332{ 1485{
1333 struct ceph_osd_client *osdc = req->r_osdc; 1486 struct ceph_osd_client *osdc = req->r_osdc;
@@ -1805,12 +1958,12 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1805 redir.oloc.pool = -1; 1958 redir.oloc.pool = -1;
1806 } 1959 }
1807 1960
1808 if (redir.oloc.pool != -1) { 1961 if (!ceph_oloc_empty(&redir.oloc)) {
1809 dout("redirect pool %lld\n", redir.oloc.pool); 1962 dout("redirect pool %lld\n", redir.oloc.pool);
1810 1963
1811 __unregister_request(osdc, req); 1964 __unregister_request(osdc, req);
1812 1965
1813 req->r_target_oloc = redir.oloc; /* struct */ 1966 ceph_oloc_copy(&req->r_target_oloc, &redir.oloc);
1814 1967
1815 /* 1968 /*
1816 * Start redirect requests with nofail=true. If 1969 * Start redirect requests with nofail=true. If
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 66c3ebead92f..7d4a5b43085e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1521,6 +1521,32 @@ void ceph_oid_destroy(struct ceph_object_id *oid)
1521} 1521}
1522EXPORT_SYMBOL(ceph_oid_destroy); 1522EXPORT_SYMBOL(ceph_oid_destroy);
1523 1523
1524/*
1525 * osds only
1526 */
1527static bool __osds_equal(const struct ceph_osds *lhs,
1528 const struct ceph_osds *rhs)
1529{
1530 if (lhs->size == rhs->size &&
1531 !memcmp(lhs->osds, rhs->osds, rhs->size * sizeof(rhs->osds[0])))
1532 return true;
1533
1534 return false;
1535}
1536
1537/*
1538 * osds + primary
1539 */
1540static bool osds_equal(const struct ceph_osds *lhs,
1541 const struct ceph_osds *rhs)
1542{
1543 if (__osds_equal(lhs, rhs) &&
1544 lhs->primary == rhs->primary)
1545 return true;
1546
1547 return false;
1548}
1549
1524static bool osds_valid(const struct ceph_osds *set) 1550static bool osds_valid(const struct ceph_osds *set)
1525{ 1551{
1526 /* non-empty set */ 1552 /* non-empty set */
@@ -1553,6 +1579,101 @@ void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src)
1553 dest->primary = src->primary; 1579 dest->primary = src->primary;
1554} 1580}
1555 1581
1582static bool is_split(const struct ceph_pg *pgid,
1583 u32 old_pg_num,
1584 u32 new_pg_num)
1585{
1586 int old_bits = calc_bits_of(old_pg_num);
1587 int old_mask = (1 << old_bits) - 1;
1588 int n;
1589
1590 WARN_ON(pgid->seed >= old_pg_num);
1591 if (new_pg_num <= old_pg_num)
1592 return false;
1593
1594 for (n = 1; ; n++) {
1595 int next_bit = n << (old_bits - 1);
1596 u32 s = next_bit | pgid->seed;
1597
1598 if (s < old_pg_num || s == pgid->seed)
1599 continue;
1600 if (s >= new_pg_num)
1601 break;
1602
1603 s = ceph_stable_mod(s, old_pg_num, old_mask);
1604 if (s == pgid->seed)
1605 return true;
1606 }
1607
1608 return false;
1609}
1610
1611bool ceph_is_new_interval(const struct ceph_osds *old_acting,
1612 const struct ceph_osds *new_acting,
1613 const struct ceph_osds *old_up,
1614 const struct ceph_osds *new_up,
1615 int old_size,
1616 int new_size,
1617 int old_min_size,
1618 int new_min_size,
1619 u32 old_pg_num,
1620 u32 new_pg_num,
1621 bool old_sort_bitwise,
1622 bool new_sort_bitwise,
1623 const struct ceph_pg *pgid)
1624{
1625 return !osds_equal(old_acting, new_acting) ||
1626 !osds_equal(old_up, new_up) ||
1627 old_size != new_size ||
1628 old_min_size != new_min_size ||
1629 is_split(pgid, old_pg_num, new_pg_num) ||
1630 old_sort_bitwise != new_sort_bitwise;
1631}
1632
1633static int calc_pg_rank(int osd, const struct ceph_osds *acting)
1634{
1635 int i;
1636
1637 for (i = 0; i < acting->size; i++) {
1638 if (acting->osds[i] == osd)
1639 return i;
1640 }
1641
1642 return -1;
1643}
1644
1645static bool primary_changed(const struct ceph_osds *old_acting,
1646 const struct ceph_osds *new_acting)
1647{
1648 if (!old_acting->size && !new_acting->size)
1649 return false; /* both still empty */
1650
1651 if (!old_acting->size ^ !new_acting->size)
1652 return true; /* was empty, now not, or vice versa */
1653
1654 if (old_acting->primary != new_acting->primary)
1655 return true; /* primary changed */
1656
1657 if (calc_pg_rank(old_acting->primary, old_acting) !=
1658 calc_pg_rank(new_acting->primary, new_acting))
1659 return true;
1660
1661 return false; /* same primary (tho replicas may have changed) */
1662}
1663
1664bool ceph_osds_changed(const struct ceph_osds *old_acting,
1665 const struct ceph_osds *new_acting,
1666 bool any_change)
1667{
1668 if (primary_changed(old_acting, new_acting))
1669 return true;
1670
1671 if (any_change && !__osds_equal(old_acting, new_acting))
1672 return true;
1673
1674 return false;
1675}
1676
1556/* 1677/*
1557 * calculate file layout from given offset, length. 1678 * calculate file layout from given offset, length.
1558 * fill in correct oid, logical length, and object extent 1679 * fill in correct oid, logical length, and object extent