diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-07-06 08:23:39 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-06 08:23:39 -0400 |
commit | 68083e05d72d94f347293d8cc0067050ba904bfa (patch) | |
tree | 842e71365bd90866be7add181661a4039d891564 /kernel/sched.c | |
parent | 7baac8b91f9871ba8cb09af84de4ae1d86d07812 (diff) | |
parent | b7279469d66b55119784b8b9529c99c1955fe747 (diff) |
Merge commit 'v2.6.26-rc9' into cpus4096
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 521 |
1 files changed, 96 insertions, 425 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 814d6e17f1e1..e6795e39c8ab 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -136,7 +136,7 @@ static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val) | |||
136 | 136 | ||
137 | static inline int rt_policy(int policy) | 137 | static inline int rt_policy(int policy) |
138 | { | 138 | { |
139 | if (unlikely(policy == SCHED_FIFO) || unlikely(policy == SCHED_RR)) | 139 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) |
140 | return 1; | 140 | return 1; |
141 | return 0; | 141 | return 0; |
142 | } | 142 | } |
@@ -312,12 +312,15 @@ static DEFINE_SPINLOCK(task_group_lock); | |||
312 | #endif | 312 | #endif |
313 | 313 | ||
314 | /* | 314 | /* |
315 | * A weight of 0, 1 or ULONG_MAX can cause arithmetics problems. | 315 | * A weight of 0 or 1 can cause arithmetics problems. |
316 | * A weight of a cfs_rq is the sum of weights of which entities | ||
317 | * are queued on this cfs_rq, so a weight of a entity should not be | ||
318 | * too large, so as the shares value of a task group. | ||
316 | * (The default weight is 1024 - so there's no practical | 319 | * (The default weight is 1024 - so there's no practical |
317 | * limitation from this.) | 320 | * limitation from this.) |
318 | */ | 321 | */ |
319 | #define MIN_SHARES 2 | 322 | #define MIN_SHARES 2 |
320 | #define MAX_SHARES (ULONG_MAX - 1) | 323 | #define MAX_SHARES (1UL << 18) |
321 | 324 | ||
322 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; | 325 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; |
323 | #endif | 326 | #endif |
@@ -398,43 +401,6 @@ struct cfs_rq { | |||
398 | */ | 401 | */ |
399 | struct list_head leaf_cfs_rq_list; | 402 | struct list_head leaf_cfs_rq_list; |
400 | struct task_group *tg; /* group that "owns" this runqueue */ | 403 | struct task_group *tg; /* group that "owns" this runqueue */ |
401 | |||
402 | #ifdef CONFIG_SMP | ||
403 | unsigned long task_weight; | ||
404 | unsigned long shares; | ||
405 | /* | ||
406 | * We need space to build a sched_domain wide view of the full task | ||
407 | * group tree, in order to avoid depending on dynamic memory allocation | ||
408 | * during the load balancing we place this in the per cpu task group | ||
409 | * hierarchy. This limits the load balancing to one instance per cpu, | ||
410 | * but more should not be needed anyway. | ||
411 | */ | ||
412 | struct aggregate_struct { | ||
413 | /* | ||
414 | * load = weight(cpus) * f(tg) | ||
415 | * | ||
416 | * Where f(tg) is the recursive weight fraction assigned to | ||
417 | * this group. | ||
418 | */ | ||
419 | unsigned long load; | ||
420 | |||
421 | /* | ||
422 | * part of the group weight distributed to this span. | ||
423 | */ | ||
424 | unsigned long shares; | ||
425 | |||
426 | /* | ||
427 | * The sum of all runqueue weights within this span. | ||
428 | */ | ||
429 | unsigned long rq_weight; | ||
430 | |||
431 | /* | ||
432 | * Weight contributed by tasks; this is the part we can | ||
433 | * influence by moving tasks around. | ||
434 | */ | ||
435 | unsigned long task_weight; | ||
436 | } aggregate; | ||
437 | #endif | ||
438 | #endif | 404 | #endif |
439 | }; | 405 | }; |
440 | 406 | ||
@@ -1161,6 +1127,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) | |||
1161 | return HRTIMER_NORESTART; | 1127 | return HRTIMER_NORESTART; |
1162 | } | 1128 | } |
1163 | 1129 | ||
1130 | #ifdef CONFIG_SMP | ||
1164 | static void hotplug_hrtick_disable(int cpu) | 1131 | static void hotplug_hrtick_disable(int cpu) |
1165 | { | 1132 | { |
1166 | struct rq *rq = cpu_rq(cpu); | 1133 | struct rq *rq = cpu_rq(cpu); |
@@ -1216,6 +1183,7 @@ static void init_hrtick(void) | |||
1216 | { | 1183 | { |
1217 | hotcpu_notifier(hotplug_hrtick, 0); | 1184 | hotcpu_notifier(hotplug_hrtick, 0); |
1218 | } | 1185 | } |
1186 | #endif /* CONFIG_SMP */ | ||
1219 | 1187 | ||
1220 | static void init_rq_hrtick(struct rq *rq) | 1188 | static void init_rq_hrtick(struct rq *rq) |
1221 | { | 1189 | { |
@@ -1368,17 +1336,19 @@ static void __resched_task(struct task_struct *p, int tif_bit) | |||
1368 | */ | 1336 | */ |
1369 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | 1337 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) |
1370 | 1338 | ||
1371 | /* | ||
1372 | * delta *= weight / lw | ||
1373 | */ | ||
1374 | static unsigned long | 1339 | static unsigned long |
1375 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | 1340 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, |
1376 | struct load_weight *lw) | 1341 | struct load_weight *lw) |
1377 | { | 1342 | { |
1378 | u64 tmp; | 1343 | u64 tmp; |
1379 | 1344 | ||
1380 | if (!lw->inv_weight) | 1345 | if (!lw->inv_weight) { |
1381 | lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)/(lw->weight+1); | 1346 | if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST)) |
1347 | lw->inv_weight = 1; | ||
1348 | else | ||
1349 | lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2) | ||
1350 | / (lw->weight+1); | ||
1351 | } | ||
1382 | 1352 | ||
1383 | tmp = (u64)delta_exec * weight; | 1353 | tmp = (u64)delta_exec * weight; |
1384 | /* | 1354 | /* |
@@ -1393,6 +1363,12 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
1393 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | 1363 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); |
1394 | } | 1364 | } |
1395 | 1365 | ||
1366 | static inline unsigned long | ||
1367 | calc_delta_fair(unsigned long delta_exec, struct load_weight *lw) | ||
1368 | { | ||
1369 | return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); | ||
1370 | } | ||
1371 | |||
1396 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) | 1372 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) |
1397 | { | 1373 | { |
1398 | lw->weight += inc; | 1374 | lw->weight += inc; |
@@ -1505,326 +1481,6 @@ static unsigned long source_load(int cpu, int type); | |||
1505 | static unsigned long target_load(int cpu, int type); | 1481 | static unsigned long target_load(int cpu, int type); |
1506 | static unsigned long cpu_avg_load_per_task(int cpu); | 1482 | static unsigned long cpu_avg_load_per_task(int cpu); |
1507 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1483 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
1508 | |||
1509 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1510 | |||
1511 | /* | ||
1512 | * Group load balancing. | ||
1513 | * | ||
1514 | * We calculate a few balance domain wide aggregate numbers; load and weight. | ||
1515 | * Given the pictures below, and assuming each item has equal weight: | ||
1516 | * | ||
1517 | * root 1 - thread | ||
1518 | * / | \ A - group | ||
1519 | * A 1 B | ||
1520 | * /|\ / \ | ||
1521 | * C 2 D 3 4 | ||
1522 | * | | | ||
1523 | * 5 6 | ||
1524 | * | ||
1525 | * load: | ||
1526 | * A and B get 1/3-rd of the total load. C and D get 1/3-rd of A's 1/3-rd, | ||
1527 | * which equals 1/9-th of the total load. | ||
1528 | * | ||
1529 | * shares: | ||
1530 | * The weight of this group on the selected cpus. | ||
1531 | * | ||
1532 | * rq_weight: | ||
1533 | * Direct sum of all the cpu's their rq weight, e.g. A would get 3 while | ||
1534 | * B would get 2. | ||
1535 | * | ||
1536 | * task_weight: | ||
1537 | * Part of the rq_weight contributed by tasks; all groups except B would | ||
1538 | * get 1, B gets 2. | ||
1539 | */ | ||
1540 | |||
1541 | static inline struct aggregate_struct * | ||
1542 | aggregate(struct task_group *tg, struct sched_domain *sd) | ||
1543 | { | ||
1544 | return &tg->cfs_rq[sd->first_cpu]->aggregate; | ||
1545 | } | ||
1546 | |||
1547 | typedef void (*aggregate_func)(struct task_group *, struct sched_domain *); | ||
1548 | |||
1549 | /* | ||
1550 | * Iterate the full tree, calling @down when first entering a node and @up when | ||
1551 | * leaving it for the final time. | ||
1552 | */ | ||
1553 | static | ||
1554 | void aggregate_walk_tree(aggregate_func down, aggregate_func up, | ||
1555 | struct sched_domain *sd) | ||
1556 | { | ||
1557 | struct task_group *parent, *child; | ||
1558 | |||
1559 | rcu_read_lock(); | ||
1560 | parent = &root_task_group; | ||
1561 | down: | ||
1562 | (*down)(parent, sd); | ||
1563 | list_for_each_entry_rcu(child, &parent->children, siblings) { | ||
1564 | parent = child; | ||
1565 | goto down; | ||
1566 | |||
1567 | up: | ||
1568 | continue; | ||
1569 | } | ||
1570 | (*up)(parent, sd); | ||
1571 | |||
1572 | child = parent; | ||
1573 | parent = parent->parent; | ||
1574 | if (parent) | ||
1575 | goto up; | ||
1576 | rcu_read_unlock(); | ||
1577 | } | ||
1578 | |||
1579 | /* | ||
1580 | * Calculate the aggregate runqueue weight. | ||
1581 | */ | ||
1582 | static | ||
1583 | void aggregate_group_weight(struct task_group *tg, struct sched_domain *sd) | ||
1584 | { | ||
1585 | unsigned long rq_weight = 0; | ||
1586 | unsigned long task_weight = 0; | ||
1587 | int i; | ||
1588 | |||
1589 | for_each_cpu_mask(i, sd->span) { | ||
1590 | rq_weight += tg->cfs_rq[i]->load.weight; | ||
1591 | task_weight += tg->cfs_rq[i]->task_weight; | ||
1592 | } | ||
1593 | |||
1594 | aggregate(tg, sd)->rq_weight = rq_weight; | ||
1595 | aggregate(tg, sd)->task_weight = task_weight; | ||
1596 | } | ||
1597 | |||
1598 | /* | ||
1599 | * Compute the weight of this group on the given cpus. | ||
1600 | */ | ||
1601 | static | ||
1602 | void aggregate_group_shares(struct task_group *tg, struct sched_domain *sd) | ||
1603 | { | ||
1604 | unsigned long shares = 0; | ||
1605 | int i; | ||
1606 | |||
1607 | for_each_cpu_mask(i, sd->span) | ||
1608 | shares += tg->cfs_rq[i]->shares; | ||
1609 | |||
1610 | if ((!shares && aggregate(tg, sd)->rq_weight) || shares > tg->shares) | ||
1611 | shares = tg->shares; | ||
1612 | |||
1613 | aggregate(tg, sd)->shares = shares; | ||
1614 | } | ||
1615 | |||
1616 | /* | ||
1617 | * Compute the load fraction assigned to this group, relies on the aggregate | ||
1618 | * weight and this group's parent's load, i.e. top-down. | ||
1619 | */ | ||
1620 | static | ||
1621 | void aggregate_group_load(struct task_group *tg, struct sched_domain *sd) | ||
1622 | { | ||
1623 | unsigned long load; | ||
1624 | |||
1625 | if (!tg->parent) { | ||
1626 | int i; | ||
1627 | |||
1628 | load = 0; | ||
1629 | for_each_cpu_mask(i, sd->span) | ||
1630 | load += cpu_rq(i)->load.weight; | ||
1631 | |||
1632 | } else { | ||
1633 | load = aggregate(tg->parent, sd)->load; | ||
1634 | |||
1635 | /* | ||
1636 | * shares is our weight in the parent's rq so | ||
1637 | * shares/parent->rq_weight gives our fraction of the load | ||
1638 | */ | ||
1639 | load *= aggregate(tg, sd)->shares; | ||
1640 | load /= aggregate(tg->parent, sd)->rq_weight + 1; | ||
1641 | } | ||
1642 | |||
1643 | aggregate(tg, sd)->load = load; | ||
1644 | } | ||
1645 | |||
1646 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); | ||
1647 | |||
1648 | /* | ||
1649 | * Calculate and set the cpu's group shares. | ||
1650 | */ | ||
1651 | static void | ||
1652 | __update_group_shares_cpu(struct task_group *tg, struct sched_domain *sd, | ||
1653 | int tcpu) | ||
1654 | { | ||
1655 | int boost = 0; | ||
1656 | unsigned long shares; | ||
1657 | unsigned long rq_weight; | ||
1658 | |||
1659 | if (!tg->se[tcpu]) | ||
1660 | return; | ||
1661 | |||
1662 | rq_weight = tg->cfs_rq[tcpu]->load.weight; | ||
1663 | |||
1664 | /* | ||
1665 | * If there are currently no tasks on the cpu pretend there is one of | ||
1666 | * average load so that when a new task gets to run here it will not | ||
1667 | * get delayed by group starvation. | ||
1668 | */ | ||
1669 | if (!rq_weight) { | ||
1670 | boost = 1; | ||
1671 | rq_weight = NICE_0_LOAD; | ||
1672 | } | ||
1673 | |||
1674 | /* | ||
1675 | * \Sum shares * rq_weight | ||
1676 | * shares = ----------------------- | ||
1677 | * \Sum rq_weight | ||
1678 | * | ||
1679 | */ | ||
1680 | shares = aggregate(tg, sd)->shares * rq_weight; | ||
1681 | shares /= aggregate(tg, sd)->rq_weight + 1; | ||
1682 | |||
1683 | /* | ||
1684 | * record the actual number of shares, not the boosted amount. | ||
1685 | */ | ||
1686 | tg->cfs_rq[tcpu]->shares = boost ? 0 : shares; | ||
1687 | |||
1688 | if (shares < MIN_SHARES) | ||
1689 | shares = MIN_SHARES; | ||
1690 | else if (shares > MAX_SHARES) | ||
1691 | shares = MAX_SHARES; | ||
1692 | |||
1693 | __set_se_shares(tg->se[tcpu], shares); | ||
1694 | } | ||
1695 | |||
1696 | /* | ||
1697 | * Re-adjust the weights on the cpu the task came from and on the cpu the | ||
1698 | * task went to. | ||
1699 | */ | ||
1700 | static void | ||
1701 | __move_group_shares(struct task_group *tg, struct sched_domain *sd, | ||
1702 | int scpu, int dcpu) | ||
1703 | { | ||
1704 | unsigned long shares; | ||
1705 | |||
1706 | shares = tg->cfs_rq[scpu]->shares + tg->cfs_rq[dcpu]->shares; | ||
1707 | |||
1708 | __update_group_shares_cpu(tg, sd, scpu); | ||
1709 | __update_group_shares_cpu(tg, sd, dcpu); | ||
1710 | |||
1711 | /* | ||
1712 | * ensure we never loose shares due to rounding errors in the | ||
1713 | * above redistribution. | ||
1714 | */ | ||
1715 | shares -= tg->cfs_rq[scpu]->shares + tg->cfs_rq[dcpu]->shares; | ||
1716 | if (shares) | ||
1717 | tg->cfs_rq[dcpu]->shares += shares; | ||
1718 | } | ||
1719 | |||
1720 | /* | ||
1721 | * Because changing a group's shares changes the weight of the super-group | ||
1722 | * we need to walk up the tree and change all shares until we hit the root. | ||
1723 | */ | ||
1724 | static void | ||
1725 | move_group_shares(struct task_group *tg, struct sched_domain *sd, | ||
1726 | int scpu, int dcpu) | ||
1727 | { | ||
1728 | while (tg) { | ||
1729 | __move_group_shares(tg, sd, scpu, dcpu); | ||
1730 | tg = tg->parent; | ||
1731 | } | ||
1732 | } | ||
1733 | |||
1734 | static | ||
1735 | void aggregate_group_set_shares(struct task_group *tg, struct sched_domain *sd) | ||
1736 | { | ||
1737 | unsigned long shares = aggregate(tg, sd)->shares; | ||
1738 | int i; | ||
1739 | |||
1740 | for_each_cpu_mask(i, sd->span) { | ||
1741 | struct rq *rq = cpu_rq(i); | ||
1742 | unsigned long flags; | ||
1743 | |||
1744 | spin_lock_irqsave(&rq->lock, flags); | ||
1745 | __update_group_shares_cpu(tg, sd, i); | ||
1746 | spin_unlock_irqrestore(&rq->lock, flags); | ||
1747 | } | ||
1748 | |||
1749 | aggregate_group_shares(tg, sd); | ||
1750 | |||
1751 | /* | ||
1752 | * ensure we never loose shares due to rounding errors in the | ||
1753 | * above redistribution. | ||
1754 | */ | ||
1755 | shares -= aggregate(tg, sd)->shares; | ||
1756 | if (shares) { | ||
1757 | tg->cfs_rq[sd->first_cpu]->shares += shares; | ||
1758 | aggregate(tg, sd)->shares += shares; | ||
1759 | } | ||
1760 | } | ||
1761 | |||
1762 | /* | ||
1763 | * Calculate the accumulative weight and recursive load of each task group | ||
1764 | * while walking down the tree. | ||
1765 | */ | ||
1766 | static | ||
1767 | void aggregate_get_down(struct task_group *tg, struct sched_domain *sd) | ||
1768 | { | ||
1769 | aggregate_group_weight(tg, sd); | ||
1770 | aggregate_group_shares(tg, sd); | ||
1771 | aggregate_group_load(tg, sd); | ||
1772 | } | ||
1773 | |||
1774 | /* | ||
1775 | * Rebalance the cpu shares while walking back up the tree. | ||
1776 | */ | ||
1777 | static | ||
1778 | void aggregate_get_up(struct task_group *tg, struct sched_domain *sd) | ||
1779 | { | ||
1780 | aggregate_group_set_shares(tg, sd); | ||
1781 | } | ||
1782 | |||
1783 | static DEFINE_PER_CPU(spinlock_t, aggregate_lock); | ||
1784 | |||
1785 | static void __init init_aggregate(void) | ||
1786 | { | ||
1787 | int i; | ||
1788 | |||
1789 | for_each_possible_cpu(i) | ||
1790 | spin_lock_init(&per_cpu(aggregate_lock, i)); | ||
1791 | } | ||
1792 | |||
1793 | static int get_aggregate(struct sched_domain *sd) | ||
1794 | { | ||
1795 | if (!spin_trylock(&per_cpu(aggregate_lock, sd->first_cpu))) | ||
1796 | return 0; | ||
1797 | |||
1798 | aggregate_walk_tree(aggregate_get_down, aggregate_get_up, sd); | ||
1799 | return 1; | ||
1800 | } | ||
1801 | |||
1802 | static void put_aggregate(struct sched_domain *sd) | ||
1803 | { | ||
1804 | spin_unlock(&per_cpu(aggregate_lock, sd->first_cpu)); | ||
1805 | } | ||
1806 | |||
1807 | static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | ||
1808 | { | ||
1809 | cfs_rq->shares = shares; | ||
1810 | } | ||
1811 | |||
1812 | #else | ||
1813 | |||
1814 | static inline void init_aggregate(void) | ||
1815 | { | ||
1816 | } | ||
1817 | |||
1818 | static inline int get_aggregate(struct sched_domain *sd) | ||
1819 | { | ||
1820 | return 0; | ||
1821 | } | ||
1822 | |||
1823 | static inline void put_aggregate(struct sched_domain *sd) | ||
1824 | { | ||
1825 | } | ||
1826 | #endif | ||
1827 | |||
1828 | #else /* CONFIG_SMP */ | 1484 | #else /* CONFIG_SMP */ |
1829 | 1485 | ||
1830 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1486 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -1845,14 +1501,26 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1845 | 1501 | ||
1846 | #define sched_class_highest (&rt_sched_class) | 1502 | #define sched_class_highest (&rt_sched_class) |
1847 | 1503 | ||
1848 | static void inc_nr_running(struct rq *rq) | 1504 | static inline void inc_load(struct rq *rq, const struct task_struct *p) |
1505 | { | ||
1506 | update_load_add(&rq->load, p->se.load.weight); | ||
1507 | } | ||
1508 | |||
1509 | static inline void dec_load(struct rq *rq, const struct task_struct *p) | ||
1510 | { | ||
1511 | update_load_sub(&rq->load, p->se.load.weight); | ||
1512 | } | ||
1513 | |||
1514 | static void inc_nr_running(struct task_struct *p, struct rq *rq) | ||
1849 | { | 1515 | { |
1850 | rq->nr_running++; | 1516 | rq->nr_running++; |
1517 | inc_load(rq, p); | ||
1851 | } | 1518 | } |
1852 | 1519 | ||
1853 | static void dec_nr_running(struct rq *rq) | 1520 | static void dec_nr_running(struct task_struct *p, struct rq *rq) |
1854 | { | 1521 | { |
1855 | rq->nr_running--; | 1522 | rq->nr_running--; |
1523 | dec_load(rq, p); | ||
1856 | } | 1524 | } |
1857 | 1525 | ||
1858 | static void set_load_weight(struct task_struct *p) | 1526 | static void set_load_weight(struct task_struct *p) |
@@ -1944,7 +1612,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) | |||
1944 | rq->nr_uninterruptible--; | 1612 | rq->nr_uninterruptible--; |
1945 | 1613 | ||
1946 | enqueue_task(rq, p, wakeup); | 1614 | enqueue_task(rq, p, wakeup); |
1947 | inc_nr_running(rq); | 1615 | inc_nr_running(p, rq); |
1948 | } | 1616 | } |
1949 | 1617 | ||
1950 | /* | 1618 | /* |
@@ -1956,7 +1624,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) | |||
1956 | rq->nr_uninterruptible++; | 1624 | rq->nr_uninterruptible++; |
1957 | 1625 | ||
1958 | dequeue_task(rq, p, sleep); | 1626 | dequeue_task(rq, p, sleep); |
1959 | dec_nr_running(rq); | 1627 | dec_nr_running(p, rq); |
1960 | } | 1628 | } |
1961 | 1629 | ||
1962 | /** | 1630 | /** |
@@ -2609,7 +2277,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2609 | * management (if any): | 2277 | * management (if any): |
2610 | */ | 2278 | */ |
2611 | p->sched_class->task_new(rq, p); | 2279 | p->sched_class->task_new(rq, p); |
2612 | inc_nr_running(rq); | 2280 | inc_nr_running(p, rq); |
2613 | } | 2281 | } |
2614 | check_preempt_curr(rq, p); | 2282 | check_preempt_curr(rq, p); |
2615 | #ifdef CONFIG_SMP | 2283 | #ifdef CONFIG_SMP |
@@ -3600,12 +3268,9 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
3600 | unsigned long imbalance; | 3268 | unsigned long imbalance; |
3601 | struct rq *busiest; | 3269 | struct rq *busiest; |
3602 | unsigned long flags; | 3270 | unsigned long flags; |
3603 | int unlock_aggregate; | ||
3604 | 3271 | ||
3605 | cpus_setall(*cpus); | 3272 | cpus_setall(*cpus); |
3606 | 3273 | ||
3607 | unlock_aggregate = get_aggregate(sd); | ||
3608 | |||
3609 | /* | 3274 | /* |
3610 | * When power savings policy is enabled for the parent domain, idle | 3275 | * When power savings policy is enabled for the parent domain, idle |
3611 | * sibling can pick up load irrespective of busy siblings. In this case, | 3276 | * sibling can pick up load irrespective of busy siblings. In this case, |
@@ -3721,9 +3386,8 @@ redo: | |||
3721 | 3386 | ||
3722 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3387 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
3723 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 3388 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
3724 | ld_moved = -1; | 3389 | return -1; |
3725 | 3390 | return ld_moved; | |
3726 | goto out; | ||
3727 | 3391 | ||
3728 | out_balanced: | 3392 | out_balanced: |
3729 | schedstat_inc(sd, lb_balanced[idle]); | 3393 | schedstat_inc(sd, lb_balanced[idle]); |
@@ -3738,13 +3402,8 @@ out_one_pinned: | |||
3738 | 3402 | ||
3739 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3403 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
3740 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 3404 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
3741 | ld_moved = -1; | 3405 | return -1; |
3742 | else | 3406 | return 0; |
3743 | ld_moved = 0; | ||
3744 | out: | ||
3745 | if (unlock_aggregate) | ||
3746 | put_aggregate(sd); | ||
3747 | return ld_moved; | ||
3748 | } | 3407 | } |
3749 | 3408 | ||
3750 | /* | 3409 | /* |
@@ -4430,7 +4089,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
4430 | * schedule() atomically, we ignore that path for now. | 4089 | * schedule() atomically, we ignore that path for now. |
4431 | * Otherwise, whine if we are scheduling when we should not be. | 4090 | * Otherwise, whine if we are scheduling when we should not be. |
4432 | */ | 4091 | */ |
4433 | if (unlikely(in_atomic_preempt_off()) && unlikely(!prev->exit_state)) | 4092 | if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) |
4434 | __schedule_bug(prev); | 4093 | __schedule_bug(prev); |
4435 | 4094 | ||
4436 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 4095 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
@@ -4510,12 +4169,10 @@ need_resched_nonpreemptible: | |||
4510 | clear_tsk_need_resched(prev); | 4169 | clear_tsk_need_resched(prev); |
4511 | 4170 | ||
4512 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 4171 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
4513 | if (unlikely((prev->state & TASK_INTERRUPTIBLE) && | 4172 | if (unlikely(signal_pending_state(prev->state, prev))) |
4514 | signal_pending(prev))) { | ||
4515 | prev->state = TASK_RUNNING; | 4173 | prev->state = TASK_RUNNING; |
4516 | } else { | 4174 | else |
4517 | deactivate_task(rq, prev, 1); | 4175 | deactivate_task(rq, prev, 1); |
4518 | } | ||
4519 | switch_count = &prev->nvcsw; | 4176 | switch_count = &prev->nvcsw; |
4520 | } | 4177 | } |
4521 | 4178 | ||
@@ -4741,22 +4398,20 @@ do_wait_for_common(struct completion *x, long timeout, int state) | |||
4741 | signal_pending(current)) || | 4398 | signal_pending(current)) || |
4742 | (state == TASK_KILLABLE && | 4399 | (state == TASK_KILLABLE && |
4743 | fatal_signal_pending(current))) { | 4400 | fatal_signal_pending(current))) { |
4744 | __remove_wait_queue(&x->wait, &wait); | 4401 | timeout = -ERESTARTSYS; |
4745 | return -ERESTARTSYS; | 4402 | break; |
4746 | } | 4403 | } |
4747 | __set_current_state(state); | 4404 | __set_current_state(state); |
4748 | spin_unlock_irq(&x->wait.lock); | 4405 | spin_unlock_irq(&x->wait.lock); |
4749 | timeout = schedule_timeout(timeout); | 4406 | timeout = schedule_timeout(timeout); |
4750 | spin_lock_irq(&x->wait.lock); | 4407 | spin_lock_irq(&x->wait.lock); |
4751 | if (!timeout) { | 4408 | } while (!x->done && timeout); |
4752 | __remove_wait_queue(&x->wait, &wait); | ||
4753 | return timeout; | ||
4754 | } | ||
4755 | } while (!x->done); | ||
4756 | __remove_wait_queue(&x->wait, &wait); | 4409 | __remove_wait_queue(&x->wait, &wait); |
4410 | if (!x->done) | ||
4411 | return timeout; | ||
4757 | } | 4412 | } |
4758 | x->done--; | 4413 | x->done--; |
4759 | return timeout; | 4414 | return timeout ?: 1; |
4760 | } | 4415 | } |
4761 | 4416 | ||
4762 | static long __sched | 4417 | static long __sched |
@@ -4931,8 +4586,10 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4931 | goto out_unlock; | 4586 | goto out_unlock; |
4932 | } | 4587 | } |
4933 | on_rq = p->se.on_rq; | 4588 | on_rq = p->se.on_rq; |
4934 | if (on_rq) | 4589 | if (on_rq) { |
4935 | dequeue_task(rq, p, 0); | 4590 | dequeue_task(rq, p, 0); |
4591 | dec_load(rq, p); | ||
4592 | } | ||
4936 | 4593 | ||
4937 | p->static_prio = NICE_TO_PRIO(nice); | 4594 | p->static_prio = NICE_TO_PRIO(nice); |
4938 | set_load_weight(p); | 4595 | set_load_weight(p); |
@@ -4942,6 +4599,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4942 | 4599 | ||
4943 | if (on_rq) { | 4600 | if (on_rq) { |
4944 | enqueue_task(rq, p, 0); | 4601 | enqueue_task(rq, p, 0); |
4602 | inc_load(rq, p); | ||
4945 | /* | 4603 | /* |
4946 | * If the task increased its priority or is running and | 4604 | * If the task increased its priority or is running and |
4947 | * lowered its priority, then reschedule its CPU: | 4605 | * lowered its priority, then reschedule its CPU: |
@@ -6229,6 +5887,7 @@ static void migrate_dead_tasks(unsigned int dead_cpu) | |||
6229 | next = pick_next_task(rq, rq->curr); | 5887 | next = pick_next_task(rq, rq->curr); |
6230 | if (!next) | 5888 | if (!next) |
6231 | break; | 5889 | break; |
5890 | next->sched_class->put_prev_task(rq, next); | ||
6232 | migrate_dead(dead_cpu, next); | 5891 | migrate_dead(dead_cpu, next); |
6233 | 5892 | ||
6234 | } | 5893 | } |
@@ -7219,7 +6878,12 @@ static int default_relax_domain_level = -1; | |||
7219 | 6878 | ||
7220 | static int __init setup_relax_domain_level(char *str) | 6879 | static int __init setup_relax_domain_level(char *str) |
7221 | { | 6880 | { |
7222 | default_relax_domain_level = simple_strtoul(str, NULL, 0); | 6881 | unsigned long val; |
6882 | |||
6883 | val = simple_strtoul(str, NULL, 0); | ||
6884 | if (val < SD_LV_MAX) | ||
6885 | default_relax_domain_level = val; | ||
6886 | |||
7223 | return 1; | 6887 | return 1; |
7224 | } | 6888 | } |
7225 | __setup("relax_domain_level=", setup_relax_domain_level); | 6889 | __setup("relax_domain_level=", setup_relax_domain_level); |
@@ -7316,7 +6980,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7316 | SD_INIT(sd, ALLNODES); | 6980 | SD_INIT(sd, ALLNODES); |
7317 | set_domain_attribute(sd, attr); | 6981 | set_domain_attribute(sd, attr); |
7318 | sd->span = *cpu_map; | 6982 | sd->span = *cpu_map; |
7319 | sd->first_cpu = first_cpu(sd->span); | ||
7320 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); | 6983 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); |
7321 | p = sd; | 6984 | p = sd; |
7322 | sd_allnodes = 1; | 6985 | sd_allnodes = 1; |
@@ -7327,7 +6990,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7327 | SD_INIT(sd, NODE); | 6990 | SD_INIT(sd, NODE); |
7328 | set_domain_attribute(sd, attr); | 6991 | set_domain_attribute(sd, attr); |
7329 | sched_domain_node_span(cpu_to_node(i), &sd->span); | 6992 | sched_domain_node_span(cpu_to_node(i), &sd->span); |
7330 | sd->first_cpu = first_cpu(sd->span); | ||
7331 | sd->parent = p; | 6993 | sd->parent = p; |
7332 | if (p) | 6994 | if (p) |
7333 | p->child = sd; | 6995 | p->child = sd; |
@@ -7339,7 +7001,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7339 | SD_INIT(sd, CPU); | 7001 | SD_INIT(sd, CPU); |
7340 | set_domain_attribute(sd, attr); | 7002 | set_domain_attribute(sd, attr); |
7341 | sd->span = *nodemask; | 7003 | sd->span = *nodemask; |
7342 | sd->first_cpu = first_cpu(sd->span); | ||
7343 | sd->parent = p; | 7004 | sd->parent = p; |
7344 | if (p) | 7005 | if (p) |
7345 | p->child = sd; | 7006 | p->child = sd; |
@@ -7351,7 +7012,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7351 | SD_INIT(sd, MC); | 7012 | SD_INIT(sd, MC); |
7352 | set_domain_attribute(sd, attr); | 7013 | set_domain_attribute(sd, attr); |
7353 | sd->span = cpu_coregroup_map(i); | 7014 | sd->span = cpu_coregroup_map(i); |
7354 | sd->first_cpu = first_cpu(sd->span); | ||
7355 | cpus_and(sd->span, sd->span, *cpu_map); | 7015 | cpus_and(sd->span, sd->span, *cpu_map); |
7356 | sd->parent = p; | 7016 | sd->parent = p; |
7357 | p->child = sd; | 7017 | p->child = sd; |
@@ -7364,7 +7024,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7364 | SD_INIT(sd, SIBLING); | 7024 | SD_INIT(sd, SIBLING); |
7365 | set_domain_attribute(sd, attr); | 7025 | set_domain_attribute(sd, attr); |
7366 | sd->span = per_cpu(cpu_sibling_map, i); | 7026 | sd->span = per_cpu(cpu_sibling_map, i); |
7367 | sd->first_cpu = first_cpu(sd->span); | ||
7368 | cpus_and(sd->span, sd->span, *cpu_map); | 7027 | cpus_and(sd->span, sd->span, *cpu_map); |
7369 | sd->parent = p; | 7028 | sd->parent = p; |
7370 | p->child = sd; | 7029 | p->child = sd; |
@@ -7568,8 +7227,8 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
7568 | 7227 | ||
7569 | static cpumask_t *doms_cur; /* current sched domains */ | 7228 | static cpumask_t *doms_cur; /* current sched domains */ |
7570 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | 7229 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ |
7571 | static struct sched_domain_attr *dattr_cur; /* attribues of custom domains | 7230 | static struct sched_domain_attr *dattr_cur; |
7572 | in 'doms_cur' */ | 7231 | /* attribues of custom domains in 'doms_cur' */ |
7573 | 7232 | ||
7574 | /* | 7233 | /* |
7575 | * Special case: If a kmalloc of a doms_cur partition (array of | 7234 | * Special case: If a kmalloc of a doms_cur partition (array of |
@@ -7583,6 +7242,18 @@ void __attribute__((weak)) arch_update_cpu_topology(void) | |||
7583 | } | 7242 | } |
7584 | 7243 | ||
7585 | /* | 7244 | /* |
7245 | * Free current domain masks. | ||
7246 | * Called after all cpus are attached to NULL domain. | ||
7247 | */ | ||
7248 | static void free_sched_domains(void) | ||
7249 | { | ||
7250 | ndoms_cur = 0; | ||
7251 | if (doms_cur != &fallback_doms) | ||
7252 | kfree(doms_cur); | ||
7253 | doms_cur = &fallback_doms; | ||
7254 | } | ||
7255 | |||
7256 | /* | ||
7586 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 7257 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
7587 | * For now this just excludes isolated cpus, but could be used to | 7258 | * For now this just excludes isolated cpus, but could be used to |
7588 | * exclude other special cases in the future. | 7259 | * exclude other special cases in the future. |
@@ -7729,6 +7400,7 @@ int arch_reinit_sched_domains(void) | |||
7729 | get_online_cpus(); | 7400 | get_online_cpus(); |
7730 | mutex_lock(&sched_domains_mutex); | 7401 | mutex_lock(&sched_domains_mutex); |
7731 | detach_destroy_domains(&cpu_online_map); | 7402 | detach_destroy_domains(&cpu_online_map); |
7403 | free_sched_domains(); | ||
7732 | err = arch_init_sched_domains(&cpu_online_map); | 7404 | err = arch_init_sched_domains(&cpu_online_map); |
7733 | mutex_unlock(&sched_domains_mutex); | 7405 | mutex_unlock(&sched_domains_mutex); |
7734 | put_online_cpus(); | 7406 | put_online_cpus(); |
@@ -7814,6 +7486,7 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
7814 | case CPU_DOWN_PREPARE: | 7486 | case CPU_DOWN_PREPARE: |
7815 | case CPU_DOWN_PREPARE_FROZEN: | 7487 | case CPU_DOWN_PREPARE_FROZEN: |
7816 | detach_destroy_domains(&cpu_online_map); | 7488 | detach_destroy_domains(&cpu_online_map); |
7489 | free_sched_domains(); | ||
7817 | return NOTIFY_OK; | 7490 | return NOTIFY_OK; |
7818 | 7491 | ||
7819 | case CPU_UP_CANCELED: | 7492 | case CPU_UP_CANCELED: |
@@ -7832,8 +7505,16 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
7832 | return NOTIFY_DONE; | 7505 | return NOTIFY_DONE; |
7833 | } | 7506 | } |
7834 | 7507 | ||
7508 | #ifndef CONFIG_CPUSETS | ||
7509 | /* | ||
7510 | * Create default domain partitioning if cpusets are disabled. | ||
7511 | * Otherwise we let cpusets rebuild the domains based on the | ||
7512 | * current setup. | ||
7513 | */ | ||
7514 | |||
7835 | /* The hotplug lock is already held by cpu_up/cpu_down */ | 7515 | /* The hotplug lock is already held by cpu_up/cpu_down */ |
7836 | arch_init_sched_domains(&cpu_online_map); | 7516 | arch_init_sched_domains(&cpu_online_map); |
7517 | #endif | ||
7837 | 7518 | ||
7838 | return NOTIFY_OK; | 7519 | return NOTIFY_OK; |
7839 | } | 7520 | } |
@@ -7973,7 +7654,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | |||
7973 | else | 7654 | else |
7974 | rt_se->rt_rq = parent->my_q; | 7655 | rt_se->rt_rq = parent->my_q; |
7975 | 7656 | ||
7976 | rt_se->rt_rq = &rq->rt; | ||
7977 | rt_se->my_q = rt_rq; | 7657 | rt_se->my_q = rt_rq; |
7978 | rt_se->parent = parent; | 7658 | rt_se->parent = parent; |
7979 | INIT_LIST_HEAD(&rt_se->run_list); | 7659 | INIT_LIST_HEAD(&rt_se->run_list); |
@@ -8034,7 +7714,6 @@ void __init sched_init(void) | |||
8034 | } | 7714 | } |
8035 | 7715 | ||
8036 | #ifdef CONFIG_SMP | 7716 | #ifdef CONFIG_SMP |
8037 | init_aggregate(); | ||
8038 | init_defrootdomain(); | 7717 | init_defrootdomain(); |
8039 | #endif | 7718 | #endif |
8040 | 7719 | ||
@@ -8599,11 +8278,14 @@ void sched_move_task(struct task_struct *tsk) | |||
8599 | #endif | 8278 | #endif |
8600 | 8279 | ||
8601 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8280 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8602 | static void __set_se_shares(struct sched_entity *se, unsigned long shares) | 8281 | static void set_se_shares(struct sched_entity *se, unsigned long shares) |
8603 | { | 8282 | { |
8604 | struct cfs_rq *cfs_rq = se->cfs_rq; | 8283 | struct cfs_rq *cfs_rq = se->cfs_rq; |
8284 | struct rq *rq = cfs_rq->rq; | ||
8605 | int on_rq; | 8285 | int on_rq; |
8606 | 8286 | ||
8287 | spin_lock_irq(&rq->lock); | ||
8288 | |||
8607 | on_rq = se->on_rq; | 8289 | on_rq = se->on_rq; |
8608 | if (on_rq) | 8290 | if (on_rq) |
8609 | dequeue_entity(cfs_rq, se, 0); | 8291 | dequeue_entity(cfs_rq, se, 0); |
@@ -8613,17 +8295,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares) | |||
8613 | 8295 | ||
8614 | if (on_rq) | 8296 | if (on_rq) |
8615 | enqueue_entity(cfs_rq, se, 0); | 8297 | enqueue_entity(cfs_rq, se, 0); |
8616 | } | ||
8617 | 8298 | ||
8618 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | 8299 | spin_unlock_irq(&rq->lock); |
8619 | { | ||
8620 | struct cfs_rq *cfs_rq = se->cfs_rq; | ||
8621 | struct rq *rq = cfs_rq->rq; | ||
8622 | unsigned long flags; | ||
8623 | |||
8624 | spin_lock_irqsave(&rq->lock, flags); | ||
8625 | __set_se_shares(se, shares); | ||
8626 | spin_unlock_irqrestore(&rq->lock, flags); | ||
8627 | } | 8300 | } |
8628 | 8301 | ||
8629 | static DEFINE_MUTEX(shares_mutex); | 8302 | static DEFINE_MUTEX(shares_mutex); |
@@ -8662,13 +8335,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
8662 | * w/o tripping rebalance_share or load_balance_fair. | 8335 | * w/o tripping rebalance_share or load_balance_fair. |
8663 | */ | 8336 | */ |
8664 | tg->shares = shares; | 8337 | tg->shares = shares; |
8665 | for_each_possible_cpu(i) { | 8338 | for_each_possible_cpu(i) |
8666 | /* | ||
8667 | * force a rebalance | ||
8668 | */ | ||
8669 | cfs_rq_set_shares(tg->cfs_rq[i], 0); | ||
8670 | set_se_shares(tg->se[i], shares); | 8339 | set_se_shares(tg->se[i], shares); |
8671 | } | ||
8672 | 8340 | ||
8673 | /* | 8341 | /* |
8674 | * Enable load balance activity on this group, by inserting it back on | 8342 | * Enable load balance activity on this group, by inserting it back on |
@@ -8707,7 +8375,7 @@ static unsigned long to_ratio(u64 period, u64 runtime) | |||
8707 | #ifdef CONFIG_CGROUP_SCHED | 8375 | #ifdef CONFIG_CGROUP_SCHED |
8708 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | 8376 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) |
8709 | { | 8377 | { |
8710 | struct task_group *tgi, *parent = tg->parent; | 8378 | struct task_group *tgi, *parent = tg ? tg->parent : NULL; |
8711 | unsigned long total = 0; | 8379 | unsigned long total = 0; |
8712 | 8380 | ||
8713 | if (!parent) { | 8381 | if (!parent) { |
@@ -8834,6 +8502,9 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) | |||
8834 | rt_period = (u64)rt_period_us * NSEC_PER_USEC; | 8502 | rt_period = (u64)rt_period_us * NSEC_PER_USEC; |
8835 | rt_runtime = tg->rt_bandwidth.rt_runtime; | 8503 | rt_runtime = tg->rt_bandwidth.rt_runtime; |
8836 | 8504 | ||
8505 | if (rt_period == 0) | ||
8506 | return -EINVAL; | ||
8507 | |||
8837 | return tg_set_bandwidth(tg, rt_period, rt_runtime); | 8508 | return tg_set_bandwidth(tg, rt_period, rt_runtime); |
8838 | } | 8509 | } |
8839 | 8510 | ||