diff options
Diffstat (limited to 'kernel/cpuset.c')
| -rw-r--r-- | kernel/cpuset.c | 125 |
1 files changed, 105 insertions, 20 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8ab1b4e518b8..1f06e7690106 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -628,13 +628,6 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
| 628 | * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. | 628 | * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. |
| 629 | */ | 629 | */ |
| 630 | 630 | ||
| 631 | /* | ||
| 632 | * Hack to avoid 2.6.13 partial node dynamic sched domain bug. | ||
| 633 | * Disable letting 'cpu_exclusive' cpusets define dynamic sched | ||
| 634 | * domains, until the sched domain can handle partial nodes. | ||
| 635 | * Remove this #if hackery when sched domains fixed. | ||
| 636 | */ | ||
| 637 | #if 0 | ||
| 638 | static void update_cpu_domains(struct cpuset *cur) | 631 | static void update_cpu_domains(struct cpuset *cur) |
| 639 | { | 632 | { |
| 640 | struct cpuset *c, *par = cur->parent; | 633 | struct cpuset *c, *par = cur->parent; |
| @@ -675,11 +668,6 @@ static void update_cpu_domains(struct cpuset *cur) | |||
| 675 | partition_sched_domains(&pspan, &cspan); | 668 | partition_sched_domains(&pspan, &cspan); |
| 676 | unlock_cpu_hotplug(); | 669 | unlock_cpu_hotplug(); |
| 677 | } | 670 | } |
| 678 | #else | ||
| 679 | static void update_cpu_domains(struct cpuset *cur) | ||
| 680 | { | ||
| 681 | } | ||
| 682 | #endif | ||
| 683 | 671 | ||
| 684 | static int update_cpumask(struct cpuset *cs, char *buf) | 672 | static int update_cpumask(struct cpuset *cs, char *buf) |
| 685 | { | 673 | { |
| @@ -1611,17 +1599,114 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) | |||
| 1611 | return 0; | 1599 | return 0; |
| 1612 | } | 1600 | } |
| 1613 | 1601 | ||
| 1602 | /* | ||
| 1603 | * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive | ||
| 1604 | * ancestor to the specified cpuset. Call while holding cpuset_sem. | ||
| 1605 | * If no ancestor is mem_exclusive (an unusual configuration), then | ||
| 1606 | * returns the root cpuset. | ||
| 1607 | */ | ||
| 1608 | static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | ||
| 1609 | { | ||
| 1610 | while (!is_mem_exclusive(cs) && cs->parent) | ||
| 1611 | cs = cs->parent; | ||
| 1612 | return cs; | ||
| 1613 | } | ||
| 1614 | |||
| 1614 | /** | 1615 | /** |
| 1615 | * cpuset_zone_allowed - is zone z allowed in current->mems_allowed | 1616 | * cpuset_zone_allowed - Can we allocate memory on zone z's memory node? |
| 1616 | * @z: zone in question | 1617 | * @z: is this zone on an allowed node? |
| 1618 | * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL) | ||
| 1617 | * | 1619 | * |
| 1618 | * Is zone z allowed in current->mems_allowed, or is | 1620 | * If we're in interrupt, yes, we can always allocate. If zone |
| 1619 | * the CPU in interrupt context? (zone is always allowed in this case) | 1621 | * z's node is in our tasks mems_allowed, yes. If it's not a |
| 1620 | */ | 1622 | * __GFP_HARDWALL request and this zone's nodes is in the nearest |
| 1621 | int cpuset_zone_allowed(struct zone *z) | 1623 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. |
| 1624 | * Otherwise, no. | ||
| 1625 | * | ||
| 1626 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, | ||
| 1627 | * and do not allow allocations outside the current tasks cpuset. | ||
| 1628 | * GFP_KERNEL allocations are not so marked, so can escape to the | ||
| 1629 | * nearest mem_exclusive ancestor cpuset. | ||
| 1630 | * | ||
| 1631 | * Scanning up parent cpusets requires cpuset_sem. The __alloc_pages() | ||
| 1632 | * routine only calls here with __GFP_HARDWALL bit _not_ set if | ||
| 1633 | * it's a GFP_KERNEL allocation, and all nodes in the current tasks | ||
| 1634 | * mems_allowed came up empty on the first pass over the zonelist. | ||
| 1635 | * So only GFP_KERNEL allocations, if all nodes in the cpuset are | ||
| 1636 | * short of memory, might require taking the cpuset_sem semaphore. | ||
| 1637 | * | ||
| 1638 | * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() | ||
| 1639 | * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing | ||
| 1640 | * hardwall cpusets - no allocation on a node outside the cpuset is | ||
| 1641 | * allowed (unless in interrupt, of course). | ||
| 1642 | * | ||
| 1643 | * The second loop doesn't even call here for GFP_ATOMIC requests | ||
| 1644 | * (if the __alloc_pages() local variable 'wait' is set). That check | ||
| 1645 | * and the checks below have the combined affect in the second loop of | ||
| 1646 | * the __alloc_pages() routine that: | ||
| 1647 | * in_interrupt - any node ok (current task context irrelevant) | ||
| 1648 | * GFP_ATOMIC - any node ok | ||
| 1649 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok | ||
| 1650 | * GFP_USER - only nodes in current tasks mems allowed ok. | ||
| 1651 | **/ | ||
| 1652 | |||
| 1653 | int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask) | ||
| 1622 | { | 1654 | { |
| 1623 | return in_interrupt() || | 1655 | int node; /* node that zone z is on */ |
| 1624 | node_isset(z->zone_pgdat->node_id, current->mems_allowed); | 1656 | const struct cpuset *cs; /* current cpuset ancestors */ |
| 1657 | int allowed = 1; /* is allocation in zone z allowed? */ | ||
| 1658 | |||
| 1659 | if (in_interrupt()) | ||
| 1660 | return 1; | ||
| 1661 | node = z->zone_pgdat->node_id; | ||
| 1662 | if (node_isset(node, current->mems_allowed)) | ||
| 1663 | return 1; | ||
| 1664 | if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ | ||
| 1665 | return 0; | ||
| 1666 | |||
| 1667 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ | ||
| 1668 | down(&cpuset_sem); | ||
| 1669 | cs = current->cpuset; | ||
| 1670 | if (!cs) | ||
| 1671 | goto done; /* current task exiting */ | ||
| 1672 | cs = nearest_exclusive_ancestor(cs); | ||
| 1673 | allowed = node_isset(node, cs->mems_allowed); | ||
| 1674 | done: | ||
| 1675 | up(&cpuset_sem); | ||
| 1676 | return allowed; | ||
| 1677 | } | ||
| 1678 | |||
| 1679 | /** | ||
| 1680 | * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? | ||
| 1681 | * @p: pointer to task_struct of some other task. | ||
| 1682 | * | ||
| 1683 | * Description: Return true if the nearest mem_exclusive ancestor | ||
| 1684 | * cpusets of tasks @p and current overlap. Used by oom killer to | ||
| 1685 | * determine if task @p's memory usage might impact the memory | ||
| 1686 | * available to the current task. | ||
| 1687 | * | ||
| 1688 | * Acquires cpuset_sem - not suitable for calling from a fast path. | ||
| 1689 | **/ | ||
| 1690 | |||
| 1691 | int cpuset_excl_nodes_overlap(const struct task_struct *p) | ||
| 1692 | { | ||
| 1693 | const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ | ||
| 1694 | int overlap = 0; /* do cpusets overlap? */ | ||
| 1695 | |||
| 1696 | down(&cpuset_sem); | ||
| 1697 | cs1 = current->cpuset; | ||
| 1698 | if (!cs1) | ||
| 1699 | goto done; /* current task exiting */ | ||
| 1700 | cs2 = p->cpuset; | ||
| 1701 | if (!cs2) | ||
| 1702 | goto done; /* task p is exiting */ | ||
| 1703 | cs1 = nearest_exclusive_ancestor(cs1); | ||
| 1704 | cs2 = nearest_exclusive_ancestor(cs2); | ||
| 1705 | overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); | ||
| 1706 | done: | ||
| 1707 | up(&cpuset_sem); | ||
| 1708 | |||
| 1709 | return overlap; | ||
| 1625 | } | 1710 | } |
| 1626 | 1711 | ||
| 1627 | /* | 1712 | /* |
