aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c125
1 files changed, 105 insertions, 20 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8ab1b4e518b8..1f06e7690106 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -628,13 +628,6 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
628 * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. 628 * lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
629 */ 629 */
630 630
631/*
632 * Hack to avoid 2.6.13 partial node dynamic sched domain bug.
633 * Disable letting 'cpu_exclusive' cpusets define dynamic sched
634 * domains, until the sched domain can handle partial nodes.
635 * Remove this #if hackery when sched domains fixed.
636 */
637#if 0
638static void update_cpu_domains(struct cpuset *cur) 631static void update_cpu_domains(struct cpuset *cur)
639{ 632{
640 struct cpuset *c, *par = cur->parent; 633 struct cpuset *c, *par = cur->parent;
@@ -675,11 +668,6 @@ static void update_cpu_domains(struct cpuset *cur)
675 partition_sched_domains(&pspan, &cspan); 668 partition_sched_domains(&pspan, &cspan);
676 unlock_cpu_hotplug(); 669 unlock_cpu_hotplug();
677} 670}
678#else
679static void update_cpu_domains(struct cpuset *cur)
680{
681}
682#endif
683 671
684static int update_cpumask(struct cpuset *cs, char *buf) 672static int update_cpumask(struct cpuset *cs, char *buf)
685{ 673{
@@ -1611,17 +1599,114 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
1611 return 0; 1599 return 0;
1612} 1600}
1613 1601
1602/*
1603 * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive
1604 * ancestor to the specified cpuset. Call while holding cpuset_sem.
1605 * If no ancestor is mem_exclusive (an unusual configuration), then
1606 * returns the root cpuset.
1607 */
1608static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
1609{
1610 while (!is_mem_exclusive(cs) && cs->parent)
1611 cs = cs->parent;
1612 return cs;
1613}
1614
1614/** 1615/**
1615 * cpuset_zone_allowed - is zone z allowed in current->mems_allowed 1616 * cpuset_zone_allowed - Can we allocate memory on zone z's memory node?
1616 * @z: zone in question 1617 * @z: is this zone on an allowed node?
1618 * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL)
1617 * 1619 *
1618 * Is zone z allowed in current->mems_allowed, or is 1620 * If we're in interrupt, yes, we can always allocate. If zone
1619 * the CPU in interrupt context? (zone is always allowed in this case) 1621 * z's node is in our tasks mems_allowed, yes. If it's not a
1620 */ 1622 * __GFP_HARDWALL request and this zone's nodes is in the nearest
1621int cpuset_zone_allowed(struct zone *z) 1623 * mem_exclusive cpuset ancestor to this tasks cpuset, yes.
1624 * Otherwise, no.
1625 *
1626 * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
1627 * and do not allow allocations outside the current tasks cpuset.
1628 * GFP_KERNEL allocations are not so marked, so can escape to the
1629 * nearest mem_exclusive ancestor cpuset.
1630 *
1631 * Scanning up parent cpusets requires cpuset_sem. The __alloc_pages()
1632 * routine only calls here with __GFP_HARDWALL bit _not_ set if
1633 * it's a GFP_KERNEL allocation, and all nodes in the current tasks
1634 * mems_allowed came up empty on the first pass over the zonelist.
1635 * So only GFP_KERNEL allocations, if all nodes in the cpuset are
1636 * short of memory, might require taking the cpuset_sem semaphore.
1637 *
1638 * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages()
1639 * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing
1640 * hardwall cpusets - no allocation on a node outside the cpuset is
1641 * allowed (unless in interrupt, of course).
1642 *
1643 * The second loop doesn't even call here for GFP_ATOMIC requests
1644 * (if the __alloc_pages() local variable 'wait' is set). That check
1645 * and the checks below have the combined affect in the second loop of
1646 * the __alloc_pages() routine that:
1647 * in_interrupt - any node ok (current task context irrelevant)
1648 * GFP_ATOMIC - any node ok
1649 * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
1650 * GFP_USER - only nodes in current tasks mems allowed ok.
1651 **/
1652
1653int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask)
1622{ 1654{
1623 return in_interrupt() || 1655 int node; /* node that zone z is on */
1624 node_isset(z->zone_pgdat->node_id, current->mems_allowed); 1656 const struct cpuset *cs; /* current cpuset ancestors */
1657 int allowed = 1; /* is allocation in zone z allowed? */
1658
1659 if (in_interrupt())
1660 return 1;
1661 node = z->zone_pgdat->node_id;
1662 if (node_isset(node, current->mems_allowed))
1663 return 1;
1664 if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
1665 return 0;
1666
1667 /* Not hardwall and node outside mems_allowed: scan up cpusets */
1668 down(&cpuset_sem);
1669 cs = current->cpuset;
1670 if (!cs)
1671 goto done; /* current task exiting */
1672 cs = nearest_exclusive_ancestor(cs);
1673 allowed = node_isset(node, cs->mems_allowed);
1674done:
1675 up(&cpuset_sem);
1676 return allowed;
1677}
1678
1679/**
1680 * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
1681 * @p: pointer to task_struct of some other task.
1682 *
1683 * Description: Return true if the nearest mem_exclusive ancestor
1684 * cpusets of tasks @p and current overlap. Used by oom killer to
1685 * determine if task @p's memory usage might impact the memory
1686 * available to the current task.
1687 *
1688 * Acquires cpuset_sem - not suitable for calling from a fast path.
1689 **/
1690
1691int cpuset_excl_nodes_overlap(const struct task_struct *p)
1692{
1693 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
1694 int overlap = 0; /* do cpusets overlap? */
1695
1696 down(&cpuset_sem);
1697 cs1 = current->cpuset;
1698 if (!cs1)
1699 goto done; /* current task exiting */
1700 cs2 = p->cpuset;
1701 if (!cs2)
1702 goto done; /* task p is exiting */
1703 cs1 = nearest_exclusive_ancestor(cs1);
1704 cs2 = nearest_exclusive_ancestor(cs2);
1705 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
1706done:
1707 up(&cpuset_sem);
1708
1709 return overlap;
1625} 1710}
1626 1711
1627/* 1712/*