aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2016-04-28 10:07:22 -0400
committerIlya Dryomov <idryomov@gmail.com>2016-05-25 18:36:25 -0400
commit6f3bfd45cd233eea0b07e3cabc0386b5de9321d2 (patch)
treecda9593b00d971b10ebeb9279ad1893978236df8 /net
parentd9591f5e28686277d9312d3c7422faf1368b305e (diff)
libceph: ceph_osds, ceph_pg_to_up_acting_osds()
Knowning just acting set isn't enough, we need to be able to record up set as well to detect interval changes. This means returning (up[], up_len, up_primary, acting[], acting_len, acting_primary) and passing it around. Introduce and switch to ceph_osds to help with that. Rename ceph_calc_pg_acting() to ceph_pg_to_up_acting_osds() and return both up and acting sets from it. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'net')
-rw-r--r--net/ceph/osd_client.c36
-rw-r--r--net/ceph/osdmap.c304
2 files changed, 197 insertions, 143 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index cb9f1953f5fb..0ff400a56cd6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1358,8 +1358,7 @@ static int __map_request(struct ceph_osd_client *osdc,
1358 struct ceph_osd_request *req, int force_resend) 1358 struct ceph_osd_request *req, int force_resend)
1359{ 1359{
1360 struct ceph_pg pgid; 1360 struct ceph_pg pgid;
1361 int acting[CEPH_PG_MAX_SIZE]; 1361 struct ceph_osds up, acting;
1362 int num, o;
1363 int err; 1362 int err;
1364 bool was_paused; 1363 bool was_paused;
1365 1364
@@ -1372,9 +1371,7 @@ static int __map_request(struct ceph_osd_client *osdc,
1372 } 1371 }
1373 req->r_pgid = pgid; 1372 req->r_pgid = pgid;
1374 1373
1375 num = ceph_calc_pg_acting(osdc->osdmap, pgid, acting, &o); 1374 ceph_pg_to_up_acting_osds(osdc->osdmap, &pgid, &up, &acting);
1376 if (num < 0)
1377 num = 0;
1378 1375
1379 was_paused = req->r_paused; 1376 was_paused = req->r_paused;
1380 req->r_paused = __req_should_be_paused(osdc, req); 1377 req->r_paused = __req_should_be_paused(osdc, req);
@@ -1382,21 +1379,23 @@ static int __map_request(struct ceph_osd_client *osdc,
1382 force_resend = 1; 1379 force_resend = 1;
1383 1380
1384 if ((!force_resend && 1381 if ((!force_resend &&
1385 req->r_osd && req->r_osd->o_osd == o && 1382 req->r_osd && req->r_osd->o_osd == acting.primary &&
1386 req->r_sent >= req->r_osd->o_incarnation && 1383 req->r_sent >= req->r_osd->o_incarnation &&
1387 req->r_num_pg_osds == num && 1384 req->r_num_pg_osds == acting.size &&
1388 memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || 1385 memcmp(req->r_pg_osds, acting.osds,
1389 (req->r_osd == NULL && o == -1) || 1386 acting.size * sizeof(acting.osds[0])) == 0) ||
1387 (req->r_osd == NULL && acting.primary == -1) ||
1390 req->r_paused) 1388 req->r_paused)
1391 return 0; /* no change */ 1389 return 0; /* no change */
1392 1390
1393 dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n", 1391 dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n",
1394 req->r_tid, pgid.pool, pgid.seed, o, 1392 req->r_tid, pgid.pool, pgid.seed, acting.primary,
1395 req->r_osd ? req->r_osd->o_osd : -1); 1393 req->r_osd ? req->r_osd->o_osd : -1);
1396 1394
1397 /* record full pg acting set */ 1395 /* record full pg acting set */
1398 memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num); 1396 memcpy(req->r_pg_osds, acting.osds,
1399 req->r_num_pg_osds = num; 1397 acting.size * sizeof(acting.osds[0]));
1398 req->r_num_pg_osds = acting.size;
1400 1399
1401 if (req->r_osd) { 1400 if (req->r_osd) {
1402 __cancel_request(req); 1401 __cancel_request(req);
@@ -1405,21 +1404,22 @@ static int __map_request(struct ceph_osd_client *osdc,
1405 req->r_osd = NULL; 1404 req->r_osd = NULL;
1406 } 1405 }
1407 1406
1408 req->r_osd = lookup_osd(&osdc->osds, o); 1407 req->r_osd = lookup_osd(&osdc->osds, acting.primary);
1409 if (!req->r_osd && o >= 0) { 1408 if (!req->r_osd && acting.primary >= 0) {
1410 err = -ENOMEM; 1409 err = -ENOMEM;
1411 req->r_osd = create_osd(osdc, o); 1410 req->r_osd = create_osd(osdc, acting.primary);
1412 if (!req->r_osd) { 1411 if (!req->r_osd) {
1413 list_move(&req->r_req_lru_item, &osdc->req_notarget); 1412 list_move(&req->r_req_lru_item, &osdc->req_notarget);
1414 goto out; 1413 goto out;
1415 } 1414 }
1416 1415
1417 dout("map_request osd %p is osd%d\n", req->r_osd, o); 1416 dout("map_request osd %p is osd%d\n", req->r_osd,
1417 acting.primary);
1418 insert_osd(&osdc->osds, req->r_osd); 1418 insert_osd(&osdc->osds, req->r_osd);
1419 1419
1420 ceph_con_open(&req->r_osd->o_con, 1420 ceph_con_open(&req->r_osd->o_con,
1421 CEPH_ENTITY_TYPE_OSD, o, 1421 CEPH_ENTITY_TYPE_OSD, acting.primary,
1422 &osdc->osdmap->osd_addr[o]); 1422 &osdc->osdmap->osd_addr[acting.primary]);
1423 } 1423 }
1424 1424
1425 __enqueue_request(req); 1425 __enqueue_request(req);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 6267839cb246..f5fc8fc63879 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1474,6 +1474,38 @@ void ceph_oid_destroy(struct ceph_object_id *oid)
1474} 1474}
1475EXPORT_SYMBOL(ceph_oid_destroy); 1475EXPORT_SYMBOL(ceph_oid_destroy);
1476 1476
1477static bool osds_valid(const struct ceph_osds *set)
1478{
1479 /* non-empty set */
1480 if (set->size > 0 && set->primary >= 0)
1481 return true;
1482
1483 /* empty can_shift_osds set */
1484 if (!set->size && set->primary == -1)
1485 return true;
1486
1487 /* empty !can_shift_osds set - all NONE */
1488 if (set->size > 0 && set->primary == -1) {
1489 int i;
1490
1491 for (i = 0; i < set->size; i++) {
1492 if (set->osds[i] != CRUSH_ITEM_NONE)
1493 break;
1494 }
1495 if (i == set->size)
1496 return true;
1497 }
1498
1499 return false;
1500}
1501
1502void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src)
1503{
1504 memcpy(dest->osds, src->osds, src->size * sizeof(src->osds[0]));
1505 dest->size = src->size;
1506 dest->primary = src->primary;
1507}
1508
1477/* 1509/*
1478 * calculate file layout from given offset, length. 1510 * calculate file layout from given offset, length.
1479 * fill in correct oid, logical length, and object extent 1511 * fill in correct oid, logical length, and object extent
@@ -1571,6 +1603,46 @@ int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
1571} 1603}
1572EXPORT_SYMBOL(ceph_object_locator_to_pg); 1604EXPORT_SYMBOL(ceph_object_locator_to_pg);
1573 1605
1606/*
1607 * Map a raw PG (full precision ps) into an actual PG.
1608 */
1609static void raw_pg_to_pg(struct ceph_pg_pool_info *pi,
1610 const struct ceph_pg *raw_pgid,
1611 struct ceph_pg *pgid)
1612{
1613 pgid->pool = raw_pgid->pool;
1614 pgid->seed = ceph_stable_mod(raw_pgid->seed, pi->pg_num,
1615 pi->pg_num_mask);
1616}
1617
1618/*
1619 * Map a raw PG (full precision ps) into a placement ps (placement
1620 * seed). Include pool id in that value so that different pools don't
1621 * use the same seeds.
1622 */
1623static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
1624 const struct ceph_pg *raw_pgid)
1625{
1626 if (pi->flags & CEPH_POOL_FLAG_HASHPSPOOL) {
1627 /* hash pool id and seed so that pool PGs do not overlap */
1628 return crush_hash32_2(CRUSH_HASH_RJENKINS1,
1629 ceph_stable_mod(raw_pgid->seed,
1630 pi->pgp_num,
1631 pi->pgp_num_mask),
1632 raw_pgid->pool);
1633 } else {
1634 /*
1635 * legacy behavior: add ps and pool together. this is
1636 * not a great approach because the PGs from each pool
1637 * will overlap on top of each other: 0.5 == 1.4 ==
1638 * 2.3 == ...
1639 */
1640 return ceph_stable_mod(raw_pgid->seed, pi->pgp_num,
1641 pi->pgp_num_mask) +
1642 (unsigned)raw_pgid->pool;
1643 }
1644}
1645
1574static int do_crush(struct ceph_osdmap *map, int ruleno, int x, 1646static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
1575 int *result, int result_max, 1647 int *result, int result_max,
1576 const __u32 *weight, int weight_max) 1648 const __u32 *weight, int weight_max)
@@ -1588,84 +1660,92 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
1588} 1660}
1589 1661
1590/* 1662/*
1591 * Calculate raw (crush) set for given pgid. 1663 * Calculate raw set (CRUSH output) for given PG. The result may
1664 * contain nonexistent OSDs. ->primary is undefined for a raw set.
1592 * 1665 *
1593 * Return raw set length, or error. 1666 * Placement seed (CRUSH input) is returned through @ppps.
1594 */ 1667 */
1595static int pg_to_raw_osds(struct ceph_osdmap *osdmap, 1668static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
1596 struct ceph_pg_pool_info *pool, 1669 struct ceph_pg_pool_info *pi,
1597 struct ceph_pg pgid, u32 pps, int *osds) 1670 const struct ceph_pg *raw_pgid,
1671 struct ceph_osds *raw,
1672 u32 *ppps)
1598{ 1673{
1674 u32 pps = raw_pg_to_pps(pi, raw_pgid);
1599 int ruleno; 1675 int ruleno;
1600 int len; 1676 int len;
1601 1677
1602 /* crush */ 1678 ceph_osds_init(raw);
1603 ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, 1679 if (ppps)
1604 pool->type, pool->size); 1680 *ppps = pps;
1681
1682 ruleno = crush_find_rule(osdmap->crush, pi->crush_ruleset, pi->type,
1683 pi->size);
1605 if (ruleno < 0) { 1684 if (ruleno < 0) {
1606 pr_err("no crush rule: pool %lld ruleset %d type %d size %d\n", 1685 pr_err("no crush rule: pool %lld ruleset %d type %d size %d\n",
1607 pgid.pool, pool->crush_ruleset, pool->type, 1686 pi->id, pi->crush_ruleset, pi->type, pi->size);
1608 pool->size); 1687 return;
1609 return -ENOENT;
1610 } 1688 }
1611 1689
1612 len = do_crush(osdmap, ruleno, pps, osds, 1690 len = do_crush(osdmap, ruleno, pps, raw->osds,
1613 min_t(int, pool->size, CEPH_PG_MAX_SIZE), 1691 min_t(int, pi->size, ARRAY_SIZE(raw->osds)),
1614 osdmap->osd_weight, osdmap->max_osd); 1692 osdmap->osd_weight, osdmap->max_osd);
1615 if (len < 0) { 1693 if (len < 0) {
1616 pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n", 1694 pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
1617 len, ruleno, pgid.pool, pool->crush_ruleset, 1695 len, ruleno, pi->id, pi->crush_ruleset, pi->type,
1618 pool->type, pool->size); 1696 pi->size);
1619 return len; 1697 return;
1620 } 1698 }
1621 1699
1622 return len; 1700 raw->size = len;
1623} 1701}
1624 1702
1625/* 1703/*
1626 * Given raw set, calculate up set and up primary. 1704 * Given raw set, calculate up set and up primary. By definition of an
1705 * up set, the result won't contain nonexistent or down OSDs.
1627 * 1706 *
1628 * Return up set length. *primary is set to up primary osd id, or -1 1707 * This is done in-place - on return @set is the up set. If it's
1629 * if up set is empty. 1708 * empty, ->primary will remain undefined.
1630 */ 1709 */
1631static int raw_to_up_osds(struct ceph_osdmap *osdmap, 1710static void raw_to_up_osds(struct ceph_osdmap *osdmap,
1632 struct ceph_pg_pool_info *pool, 1711 struct ceph_pg_pool_info *pi,
1633 int *osds, int len, int *primary) 1712 struct ceph_osds *set)
1634{ 1713{
1635 int up_primary = -1;
1636 int i; 1714 int i;
1637 1715
1638 if (ceph_can_shift_osds(pool)) { 1716 /* ->primary is undefined for a raw set */
1717 BUG_ON(set->primary != -1);
1718
1719 if (ceph_can_shift_osds(pi)) {
1639 int removed = 0; 1720 int removed = 0;
1640 1721
1641 for (i = 0; i < len; i++) { 1722 /* shift left */
1642 if (ceph_osd_is_down(osdmap, osds[i])) { 1723 for (i = 0; i < set->size; i++) {
1724 if (ceph_osd_is_down(osdmap, set->osds[i])) {
1643 removed++; 1725 removed++;
1644 continue; 1726 continue;
1645 } 1727 }
1646 if (removed) 1728 if (removed)
1647 osds[i - removed] = osds[i]; 1729 set->osds[i - removed] = set->osds[i];
1648 } 1730 }
1649 1731 set->size -= removed;
1650 len -= removed; 1732 if (set->size > 0)
1651 if (len > 0) 1733 set->primary = set->osds[0];
1652 up_primary = osds[0];
1653 } else { 1734 } else {
1654 for (i = len - 1; i >= 0; i--) { 1735 /* set down/dne devices to NONE */
1655 if (ceph_osd_is_down(osdmap, osds[i])) 1736 for (i = set->size - 1; i >= 0; i--) {
1656 osds[i] = CRUSH_ITEM_NONE; 1737 if (ceph_osd_is_down(osdmap, set->osds[i]))
1738 set->osds[i] = CRUSH_ITEM_NONE;
1657 else 1739 else
1658 up_primary = osds[i]; 1740 set->primary = set->osds[i];
1659 } 1741 }
1660 } 1742 }
1661
1662 *primary = up_primary;
1663 return len;
1664} 1743}
1665 1744
1666static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps, 1745static void apply_primary_affinity(struct ceph_osdmap *osdmap,
1667 struct ceph_pg_pool_info *pool, 1746 struct ceph_pg_pool_info *pi,
1668 int *osds, int len, int *primary) 1747 u32 pps,
1748 struct ceph_osds *up)
1669{ 1749{
1670 int i; 1750 int i;
1671 int pos = -1; 1751 int pos = -1;
@@ -1677,8 +1757,8 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
1677 if (!osdmap->osd_primary_affinity) 1757 if (!osdmap->osd_primary_affinity)
1678 return; 1758 return;
1679 1759
1680 for (i = 0; i < len; i++) { 1760 for (i = 0; i < up->size; i++) {
1681 int osd = osds[i]; 1761 int osd = up->osds[i];
1682 1762
1683 if (osd != CRUSH_ITEM_NONE && 1763 if (osd != CRUSH_ITEM_NONE &&
1684 osdmap->osd_primary_affinity[osd] != 1764 osdmap->osd_primary_affinity[osd] !=
@@ -1686,7 +1766,7 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
1686 break; 1766 break;
1687 } 1767 }
1688 } 1768 }
1689 if (i == len) 1769 if (i == up->size)
1690 return; 1770 return;
1691 1771
1692 /* 1772 /*
@@ -1694,8 +1774,8 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
1694 * osd into the hash/rng so that a proportional fraction of an 1774 * osd into the hash/rng so that a proportional fraction of an
1695 * osd's pgs get rejected as primary. 1775 * osd's pgs get rejected as primary.
1696 */ 1776 */
1697 for (i = 0; i < len; i++) { 1777 for (i = 0; i < up->size; i++) {
1698 int osd = osds[i]; 1778 int osd = up->osds[i];
1699 u32 aff; 1779 u32 aff;
1700 1780
1701 if (osd == CRUSH_ITEM_NONE) 1781 if (osd == CRUSH_ITEM_NONE)
@@ -1720,123 +1800,99 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
1720 if (pos < 0) 1800 if (pos < 0)
1721 return; 1801 return;
1722 1802
1723 *primary = osds[pos]; 1803 up->primary = up->osds[pos];
1724 1804
1725 if (ceph_can_shift_osds(pool) && pos > 0) { 1805 if (ceph_can_shift_osds(pi) && pos > 0) {
1726 /* move the new primary to the front */ 1806 /* move the new primary to the front */
1727 for (i = pos; i > 0; i--) 1807 for (i = pos; i > 0; i--)
1728 osds[i] = osds[i - 1]; 1808 up->osds[i] = up->osds[i - 1];
1729 osds[0] = *primary; 1809 up->osds[0] = up->primary;
1730 } 1810 }
1731} 1811}
1732 1812
1733/* 1813/*
1734 * Given up set, apply pg_temp and primary_temp mappings. 1814 * Get pg_temp and primary_temp mappings for given PG.
1735 * 1815 *
1736 * Return acting set length. *primary is set to acting primary osd id, 1816 * Note that a PG may have none, only pg_temp, only primary_temp or
1737 * or -1 if acting set is empty. 1817 * both pg_temp and primary_temp mappings. This means @temp isn't
1818 * always a valid OSD set on return: in the "only primary_temp" case,
1819 * @temp will have its ->primary >= 0 but ->size == 0.
1738 */ 1820 */
1739static int apply_temps(struct ceph_osdmap *osdmap, 1821static void get_temp_osds(struct ceph_osdmap *osdmap,
1740 struct ceph_pg_pool_info *pool, struct ceph_pg pgid, 1822 struct ceph_pg_pool_info *pi,
1741 int *osds, int len, int *primary) 1823 const struct ceph_pg *raw_pgid,
1824 struct ceph_osds *temp)
1742{ 1825{
1826 struct ceph_pg pgid;
1743 struct ceph_pg_mapping *pg; 1827 struct ceph_pg_mapping *pg;
1744 int temp_len;
1745 int temp_primary;
1746 int i; 1828 int i;
1747 1829
1748 /* raw_pg -> pg */ 1830 raw_pg_to_pg(pi, raw_pgid, &pgid);
1749 pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num, 1831 ceph_osds_init(temp);
1750 pool->pg_num_mask);
1751 1832
1752 /* pg_temp? */ 1833 /* pg_temp? */
1753 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); 1834 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
1754 if (pg) { 1835 if (pg) {
1755 temp_len = 0;
1756 temp_primary = -1;
1757
1758 for (i = 0; i < pg->pg_temp.len; i++) { 1836 for (i = 0; i < pg->pg_temp.len; i++) {
1759 if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) { 1837 if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) {
1760 if (ceph_can_shift_osds(pool)) 1838 if (ceph_can_shift_osds(pi))
1761 continue; 1839 continue;
1762 else 1840
1763 osds[temp_len++] = CRUSH_ITEM_NONE; 1841 temp->osds[temp->size++] = CRUSH_ITEM_NONE;
1764 } else { 1842 } else {
1765 osds[temp_len++] = pg->pg_temp.osds[i]; 1843 temp->osds[temp->size++] = pg->pg_temp.osds[i];
1766 } 1844 }
1767 } 1845 }
1768 1846
1769 /* apply pg_temp's primary */ 1847 /* apply pg_temp's primary */
1770 for (i = 0; i < temp_len; i++) { 1848 for (i = 0; i < temp->size; i++) {
1771 if (osds[i] != CRUSH_ITEM_NONE) { 1849 if (temp->osds[i] != CRUSH_ITEM_NONE) {
1772 temp_primary = osds[i]; 1850 temp->primary = temp->osds[i];
1773 break; 1851 break;
1774 } 1852 }
1775 } 1853 }
1776 } else {
1777 temp_len = len;
1778 temp_primary = *primary;
1779 } 1854 }
1780 1855
1781 /* primary_temp? */ 1856 /* primary_temp? */
1782 pg = __lookup_pg_mapping(&osdmap->primary_temp, pgid); 1857 pg = __lookup_pg_mapping(&osdmap->primary_temp, pgid);
1783 if (pg) 1858 if (pg)
1784 temp_primary = pg->primary_temp.osd; 1859 temp->primary = pg->primary_temp.osd;
1785
1786 *primary = temp_primary;
1787 return temp_len;
1788} 1860}
1789 1861
1790/* 1862/*
1791 * Calculate acting set for given pgid. 1863 * Map a PG to its acting set as well as its up set.
1792 * 1864 *
1793 * Return acting set length, or error. *primary is set to acting 1865 * Acting set is used for data mapping purposes, while up set can be
1794 * primary osd id, or -1 if acting set is empty or on error. 1866 * recorded for detecting interval changes and deciding whether to
1867 * resend a request.
1795 */ 1868 */
1796int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, 1869void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
1797 int *osds, int *primary) 1870 const struct ceph_pg *raw_pgid,
1871 struct ceph_osds *up,
1872 struct ceph_osds *acting)
1798{ 1873{
1799 struct ceph_pg_pool_info *pool; 1874 struct ceph_pg_pool_info *pi;
1800 u32 pps; 1875 u32 pps;
1801 int len;
1802 1876
1803 pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); 1877 pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
1804 if (!pool) { 1878 if (!pi) {
1805 *primary = -1; 1879 ceph_osds_init(up);
1806 return -ENOENT; 1880 ceph_osds_init(acting);
1881 goto out;
1807 } 1882 }
1808 1883
1809 if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) { 1884 pg_to_raw_osds(osdmap, pi, raw_pgid, up, &pps);
1810 /* hash pool id and seed so that pool PGs do not overlap */ 1885 raw_to_up_osds(osdmap, pi, up);
1811 pps = crush_hash32_2(CRUSH_HASH_RJENKINS1, 1886 apply_primary_affinity(osdmap, pi, pps, up);
1812 ceph_stable_mod(pgid.seed, pool->pgp_num, 1887 get_temp_osds(osdmap, pi, raw_pgid, acting);
1813 pool->pgp_num_mask), 1888 if (!acting->size) {
1814 pgid.pool); 1889 memcpy(acting->osds, up->osds, up->size * sizeof(up->osds[0]));
1815 } else { 1890 acting->size = up->size;
1816 /* 1891 if (acting->primary == -1)
1817 * legacy behavior: add ps and pool together. this is 1892 acting->primary = up->primary;
1818 * not a great approach because the PGs from each pool
1819 * will overlap on top of each other: 0.5 == 1.4 ==
1820 * 2.3 == ...
1821 */
1822 pps = ceph_stable_mod(pgid.seed, pool->pgp_num,
1823 pool->pgp_num_mask) +
1824 (unsigned)pgid.pool;
1825 }
1826
1827 len = pg_to_raw_osds(osdmap, pool, pgid, pps, osds);
1828 if (len < 0) {
1829 *primary = -1;
1830 return len;
1831 } 1893 }
1832 1894out:
1833 len = raw_to_up_osds(osdmap, pool, osds, len, primary); 1895 WARN_ON(!osds_valid(up) || !osds_valid(acting));
1834
1835 apply_primary_affinity(osdmap, pps, pool, osds, len, primary);
1836
1837 len = apply_temps(osdmap, pool, pgid, osds, len, primary);
1838
1839 return len;
1840} 1896}
1841 1897
1842/* 1898/*
@@ -1844,11 +1900,9 @@ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1844 */ 1900 */
1845int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) 1901int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
1846{ 1902{
1847 int osds[CEPH_PG_MAX_SIZE]; 1903 struct ceph_osds up, acting;
1848 int primary;
1849
1850 ceph_calc_pg_acting(osdmap, pgid, osds, &primary);
1851 1904
1852 return primary; 1905 ceph_pg_to_up_acting_osds(osdmap, &pgid, &up, &acting);
1906 return acting.primary;
1853} 1907}
1854EXPORT_SYMBOL(ceph_calc_pg_primary); 1908EXPORT_SYMBOL(ceph_calc_pg_primary);