aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--net/ceph/osdmap.c68
1 files changed, 68 insertions, 0 deletions
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 20a38a37794c..ae8f367c5291 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1596,6 +1596,72 @@ static int raw_to_up_osds(struct ceph_osdmap *osdmap,
1596 return len; 1596 return len;
1597} 1597}
1598 1598
1599static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
1600 struct ceph_pg_pool_info *pool,
1601 int *osds, int len, int *primary)
1602{
1603 int i;
1604 int pos = -1;
1605
1606 /*
1607 * Do we have any non-default primary_affinity values for these
1608 * osds?
1609 */
1610 if (!osdmap->osd_primary_affinity)
1611 return;
1612
1613 for (i = 0; i < len; i++) {
1614 if (osds[i] != CRUSH_ITEM_NONE &&
1615 osdmap->osd_primary_affinity[i] !=
1616 CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) {
1617 break;
1618 }
1619 }
1620 if (i == len)
1621 return;
1622
1623 /*
1624 * Pick the primary. Feed both the seed (for the pg) and the
1625 * osd into the hash/rng so that a proportional fraction of an
1626 * osd's pgs get rejected as primary.
1627 */
1628 for (i = 0; i < len; i++) {
1629 int osd;
1630 u32 aff;
1631
1632 osd = osds[i];
1633 if (osd == CRUSH_ITEM_NONE)
1634 continue;
1635
1636 aff = osdmap->osd_primary_affinity[osd];
1637 if (aff < CEPH_OSD_MAX_PRIMARY_AFFINITY &&
1638 (crush_hash32_2(CRUSH_HASH_RJENKINS1,
1639 pps, osd) >> 16) >= aff) {
1640 /*
1641 * We chose not to use this primary. Note it
1642 * anyway as a fallback in case we don't pick
1643 * anyone else, but keep looking.
1644 */
1645 if (pos < 0)
1646 pos = i;
1647 } else {
1648 pos = i;
1649 break;
1650 }
1651 }
1652 if (pos < 0)
1653 return;
1654
1655 *primary = osds[pos];
1656
1657 if (ceph_can_shift_osds(pool) && pos > 0) {
1658 /* move the new primary to the front */
1659 for (i = pos; i > 0; i--)
1660 osds[i] = osds[i - 1];
1661 osds[0] = *primary;
1662 }
1663}
1664
1599/* 1665/*
1600 * Given up set, apply pg_temp and primary_temp mappings. 1666 * Given up set, apply pg_temp and primary_temp mappings.
1601 * 1667 *
@@ -1698,6 +1764,8 @@ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1698 1764
1699 len = raw_to_up_osds(osdmap, pool, osds, len, primary); 1765 len = raw_to_up_osds(osdmap, pool, osds, len, primary);
1700 1766
1767 apply_primary_affinity(osdmap, pps, pool, osds, len, primary);
1768
1701 len = apply_temps(osdmap, pool, pgid, osds, len, primary); 1769 len = apply_temps(osdmap, pool, pgid, osds, len, primary);
1702 1770
1703 return len; 1771 return len;