aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c80
1 files changed, 72 insertions, 8 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8ab1b4e518b8..214806deca99 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1611,17 +1611,81 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
1611 return 0; 1611 return 0;
1612} 1612}
1613 1613
1614/*
1615 * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive
1616 * ancestor to the specified cpuset. Call while holding cpuset_sem.
1617 * If no ancestor is mem_exclusive (an unusual configuration), then
1618 * returns the root cpuset.
1619 */
1620static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
1621{
1622 while (!is_mem_exclusive(cs) && cs->parent)
1623 cs = cs->parent;
1624 return cs;
1625}
1626
1614/** 1627/**
1615 * cpuset_zone_allowed - is zone z allowed in current->mems_allowed 1628 * cpuset_zone_allowed - Can we allocate memory on zone z's memory node?
1616 * @z: zone in question 1629 * @z: is this zone on an allowed node?
1630 * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL)
1617 * 1631 *
1618 * Is zone z allowed in current->mems_allowed, or is 1632 * If we're in interrupt, yes, we can always allocate. If zone
1619 * the CPU in interrupt context? (zone is always allowed in this case) 1633 * z's node is in our tasks mems_allowed, yes. If it's not a
1620 */ 1634 * __GFP_HARDWALL request and this zone's nodes is in the nearest
1621int cpuset_zone_allowed(struct zone *z) 1635 * mem_exclusive cpuset ancestor to this tasks cpuset, yes.
1636 * Otherwise, no.
1637 *
1638 * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
1639 * and do not allow allocations outside the current tasks cpuset.
1640 * GFP_KERNEL allocations are not so marked, so can escape to the
1641 * nearest mem_exclusive ancestor cpuset.
1642 *
1643 * Scanning up parent cpusets requires cpuset_sem. The __alloc_pages()
1644 * routine only calls here with __GFP_HARDWALL bit _not_ set if
1645 * it's a GFP_KERNEL allocation, and all nodes in the current tasks
1646 * mems_allowed came up empty on the first pass over the zonelist.
1647 * So only GFP_KERNEL allocations, if all nodes in the cpuset are
1648 * short of memory, might require taking the cpuset_sem semaphore.
1649 *
1650 * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages()
1651 * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing
1652 * hardwall cpusets - no allocation on a node outside the cpuset is
1653 * allowed (unless in interrupt, of course).
1654 *
1655 * The second loop doesn't even call here for GFP_ATOMIC requests
1656 * (if the __alloc_pages() local variable 'wait' is set). That check
1657 * and the checks below have the combined affect in the second loop of
1658 * the __alloc_pages() routine that:
1659 * in_interrupt - any node ok (current task context irrelevant)
1660 * GFP_ATOMIC - any node ok
1661 * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
1662 * GFP_USER - only nodes in current tasks mems allowed ok.
1663 **/
1664
1665int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask)
1622{ 1666{
1623 return in_interrupt() || 1667 int node; /* node that zone z is on */
1624 node_isset(z->zone_pgdat->node_id, current->mems_allowed); 1668 const struct cpuset *cs; /* current cpuset ancestors */
1669 int allowed = 1; /* is allocation in zone z allowed? */
1670
1671 if (in_interrupt())
1672 return 1;
1673 node = z->zone_pgdat->node_id;
1674 if (node_isset(node, current->mems_allowed))
1675 return 1;
1676 if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
1677 return 0;
1678
1679 /* Not hardwall and node outside mems_allowed: scan up cpusets */
1680 down(&cpuset_sem);
1681 cs = current->cpuset;
1682 if (!cs)
1683 goto done; /* current task exiting */
1684 cs = nearest_exclusive_ancestor(cs);
1685 allowed = node_isset(node, cs->mems_allowed);
1686done:
1687 up(&cpuset_sem);
1688 return allowed;
1625} 1689}
1626 1690
1627/* 1691/*