diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpuset.c | 80 |
1 files changed, 72 insertions, 8 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8ab1b4e518b8..214806deca99 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1611,17 +1611,81 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) | |||
1611 | return 0; | 1611 | return 0; |
1612 | } | 1612 | } |
1613 | 1613 | ||
1614 | /* | ||
1615 | * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive | ||
1616 | * ancestor to the specified cpuset. Call while holding cpuset_sem. | ||
1617 | * If no ancestor is mem_exclusive (an unusual configuration), then | ||
1618 | * returns the root cpuset. | ||
1619 | */ | ||
1620 | static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | ||
1621 | { | ||
1622 | while (!is_mem_exclusive(cs) && cs->parent) | ||
1623 | cs = cs->parent; | ||
1624 | return cs; | ||
1625 | } | ||
1626 | |||
1614 | /** | 1627 | /** |
1615 | * cpuset_zone_allowed - is zone z allowed in current->mems_allowed | 1628 | * cpuset_zone_allowed - Can we allocate memory on zone z's memory node? |
1616 | * @z: zone in question | 1629 | * @z: is this zone on an allowed node? |
1630 | * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL) | ||
1617 | * | 1631 | * |
1618 | * Is zone z allowed in current->mems_allowed, or is | 1632 | * If we're in interrupt, yes, we can always allocate. If zone |
1619 | * the CPU in interrupt context? (zone is always allowed in this case) | 1633 | * z's node is in our tasks mems_allowed, yes. If it's not a |
1620 | */ | 1634 | * __GFP_HARDWALL request and this zone's nodes is in the nearest |
1621 | int cpuset_zone_allowed(struct zone *z) | 1635 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. |
1636 | * Otherwise, no. | ||
1637 | * | ||
1638 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, | ||
1639 | * and do not allow allocations outside the current tasks cpuset. | ||
1640 | * GFP_KERNEL allocations are not so marked, so can escape to the | ||
1641 | * nearest mem_exclusive ancestor cpuset. | ||
1642 | * | ||
1643 | * Scanning up parent cpusets requires cpuset_sem. The __alloc_pages() | ||
1644 | * routine only calls here with __GFP_HARDWALL bit _not_ set if | ||
1645 | * it's a GFP_KERNEL allocation, and all nodes in the current tasks | ||
1646 | * mems_allowed came up empty on the first pass over the zonelist. | ||
1647 | * So only GFP_KERNEL allocations, if all nodes in the cpuset are | ||
1648 | * short of memory, might require taking the cpuset_sem semaphore. | ||
1649 | * | ||
1650 | * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() | ||
1651 | * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing | ||
1652 | * hardwall cpusets - no allocation on a node outside the cpuset is | ||
1653 | * allowed (unless in interrupt, of course). | ||
1654 | * | ||
1655 | * The second loop doesn't even call here for GFP_ATOMIC requests | ||
1656 | * (if the __alloc_pages() local variable 'wait' is set). That check | ||
1657 | * and the checks below have the combined affect in the second loop of | ||
1658 | * the __alloc_pages() routine that: | ||
1659 | * in_interrupt - any node ok (current task context irrelevant) | ||
1660 | * GFP_ATOMIC - any node ok | ||
1661 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok | ||
1662 | * GFP_USER - only nodes in current tasks mems allowed ok. | ||
1663 | **/ | ||
1664 | |||
1665 | int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask) | ||
1622 | { | 1666 | { |
1623 | return in_interrupt() || | 1667 | int node; /* node that zone z is on */ |
1624 | node_isset(z->zone_pgdat->node_id, current->mems_allowed); | 1668 | const struct cpuset *cs; /* current cpuset ancestors */ |
1669 | int allowed = 1; /* is allocation in zone z allowed? */ | ||
1670 | |||
1671 | if (in_interrupt()) | ||
1672 | return 1; | ||
1673 | node = z->zone_pgdat->node_id; | ||
1674 | if (node_isset(node, current->mems_allowed)) | ||
1675 | return 1; | ||
1676 | if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ | ||
1677 | return 0; | ||
1678 | |||
1679 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ | ||
1680 | down(&cpuset_sem); | ||
1681 | cs = current->cpuset; | ||
1682 | if (!cs) | ||
1683 | goto done; /* current task exiting */ | ||
1684 | cs = nearest_exclusive_ancestor(cs); | ||
1685 | allowed = node_isset(node, cs->mems_allowed); | ||
1686 | done: | ||
1687 | up(&cpuset_sem); | ||
1688 | return allowed; | ||
1625 | } | 1689 | } |
1626 | 1690 | ||
1627 | /* | 1691 | /* |