aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Dryomov <ilya.dryomov@inktank.com>2014-03-24 11:12:49 -0400
committerSage Weil <sage@inktank.com>2014-04-05 00:08:17 -0400
commit47ec1f3cc46dde00deb34922dbffdeda254ad76d (patch)
tree1dbfa1b18638f70684a531ba53c16e40a7e3f320
parent5e8d4d36bf23bb7baf027c479d54395840219928 (diff)
libceph: add support for osd primary affinity
Respond to non-default primary_affinity values accordingly. (Primary affinity allows the admin to shift 'primary responsibility' away from specific osds, effectively shifting around the read side of the workload and whatever overhead is incurred by peering and writes by virtue of being the primary). Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com> Reviewed-by: Alex Elder <elder@linaro.org>
-rw-r--r--net/ceph/osdmap.c68
1 files changed, 68 insertions, 0 deletions
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 20a38a37794c..ae8f367c5291 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1596,6 +1596,72 @@ static int raw_to_up_osds(struct ceph_osdmap *osdmap,
1596 return len; 1596 return len;
1597} 1597}
1598 1598
1599static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
1600 struct ceph_pg_pool_info *pool,
1601 int *osds, int len, int *primary)
1602{
1603 int i;
1604 int pos = -1;
1605
1606 /*
1607 * Do we have any non-default primary_affinity values for these
1608 * osds?
1609 */
1610 if (!osdmap->osd_primary_affinity)
1611 return;
1612
1613 for (i = 0; i < len; i++) {
1614 if (osds[i] != CRUSH_ITEM_NONE &&
1615 osdmap->osd_primary_affinity[i] !=
1616 CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) {
1617 break;
1618 }
1619 }
1620 if (i == len)
1621 return;
1622
1623 /*
1624 * Pick the primary. Feed both the seed (for the pg) and the
1625 * osd into the hash/rng so that a proportional fraction of an
1626 * osd's pgs get rejected as primary.
1627 */
1628 for (i = 0; i < len; i++) {
1629 int osd;
1630 u32 aff;
1631
1632 osd = osds[i];
1633 if (osd == CRUSH_ITEM_NONE)
1634 continue;
1635
1636 aff = osdmap->osd_primary_affinity[osd];
1637 if (aff < CEPH_OSD_MAX_PRIMARY_AFFINITY &&
1638 (crush_hash32_2(CRUSH_HASH_RJENKINS1,
1639 pps, osd) >> 16) >= aff) {
1640 /*
1641 * We chose not to use this primary. Note it
1642 * anyway as a fallback in case we don't pick
1643 * anyone else, but keep looking.
1644 */
1645 if (pos < 0)
1646 pos = i;
1647 } else {
1648 pos = i;
1649 break;
1650 }
1651 }
1652 if (pos < 0)
1653 return;
1654
1655 *primary = osds[pos];
1656
1657 if (ceph_can_shift_osds(pool) && pos > 0) {
1658 /* move the new primary to the front */
1659 for (i = pos; i > 0; i--)
1660 osds[i] = osds[i - 1];
1661 osds[0] = *primary;
1662 }
1663}
1664
1599/* 1665/*
1600 * Given up set, apply pg_temp and primary_temp mappings. 1666 * Given up set, apply pg_temp and primary_temp mappings.
1601 * 1667 *
@@ -1698,6 +1764,8 @@ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1698 1764
1699 len = raw_to_up_osds(osdmap, pool, osds, len, primary); 1765 len = raw_to_up_osds(osdmap, pool, osds, len, primary);
1700 1766
1767 apply_primary_affinity(osdmap, pps, pool, osds, len, primary);
1768
1701 len = apply_temps(osdmap, pool, pgid, osds, len, primary); 1769 len = apply_temps(osdmap, pool, pgid, osds, len, primary);
1702 1770
1703 return len; 1771 return len;