diff options
| author | Ingo Molnar <mingo@elte.hu> | 2008-06-16 05:16:46 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-06-16 05:16:46 -0400 |
| commit | cb9aa97c21c59ad01c9514d7faf45dc166fba226 (patch) | |
| tree | 66a530f154db78b85f5b1406ebc51401df8d3913 /kernel/sched.c | |
| parent | 668a6c3654560aef8741642478973e205a4f02bf (diff) | |
| parent | 066519068ad2fbe98c7f45552b1f592903a9c8c8 (diff) | |
Merge branch 'linus' into tracing/mmiotrace-mergefixups
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 469 |
1 files changed, 55 insertions, 414 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index e2e985eeee78..c994d12abbf6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -137,7 +137,7 @@ static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val) | |||
| 137 | 137 | ||
| 138 | static inline int rt_policy(int policy) | 138 | static inline int rt_policy(int policy) |
| 139 | { | 139 | { |
| 140 | if (unlikely(policy == SCHED_FIFO) || unlikely(policy == SCHED_RR)) | 140 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) |
| 141 | return 1; | 141 | return 1; |
| 142 | return 0; | 142 | return 0; |
| 143 | } | 143 | } |
| @@ -313,12 +313,15 @@ static DEFINE_SPINLOCK(task_group_lock); | |||
| 313 | #endif | 313 | #endif |
| 314 | 314 | ||
| 315 | /* | 315 | /* |
| 316 | * A weight of 0, 1 or ULONG_MAX can cause arithmetics problems. | 316 | * A weight of 0 or 1 can cause arithmetics problems. |
| 317 | * A weight of a cfs_rq is the sum of weights of which entities | ||
| 318 | * are queued on this cfs_rq, so a weight of a entity should not be | ||
| 319 | * too large, so as the shares value of a task group. | ||
| 317 | * (The default weight is 1024 - so there's no practical | 320 | * (The default weight is 1024 - so there's no practical |
| 318 | * limitation from this.) | 321 | * limitation from this.) |
| 319 | */ | 322 | */ |
| 320 | #define MIN_SHARES 2 | 323 | #define MIN_SHARES 2 |
| 321 | #define MAX_SHARES (ULONG_MAX - 1) | 324 | #define MAX_SHARES (1UL << 18) |
| 322 | 325 | ||
| 323 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; | 326 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; |
| 324 | #endif | 327 | #endif |
| @@ -399,43 +402,6 @@ struct cfs_rq { | |||
| 399 | */ | 402 | */ |
| 400 | struct list_head leaf_cfs_rq_list; | 403 | struct list_head leaf_cfs_rq_list; |
| 401 | struct task_group *tg; /* group that "owns" this runqueue */ | 404 | struct task_group *tg; /* group that "owns" this runqueue */ |
| 402 | |||
| 403 | #ifdef CONFIG_SMP | ||
| 404 | unsigned long task_weight; | ||
| 405 | unsigned long shares; | ||
| 406 | /* | ||
| 407 | * We need space to build a sched_domain wide view of the full task | ||
| 408 | * group tree, in order to avoid depending on dynamic memory allocation | ||
| 409 | * during the load balancing we place this in the per cpu task group | ||
| 410 | * hierarchy. This limits the load balancing to one instance per cpu, | ||
| 411 | * but more should not be needed anyway. | ||
| 412 | */ | ||
| 413 | struct aggregate_struct { | ||
| 414 | /* | ||
| 415 | * load = weight(cpus) * f(tg) | ||
| 416 | * | ||
| 417 | * Where f(tg) is the recursive weight fraction assigned to | ||
| 418 | * this group. | ||
| 419 | */ | ||
| 420 | unsigned long load; | ||
| 421 | |||
| 422 | /* | ||
| 423 | * part of the group weight distributed to this span. | ||
| 424 | */ | ||
| 425 | unsigned long shares; | ||
| 426 | |||
| 427 | /* | ||
| 428 | * The sum of all runqueue weights within this span. | ||
| 429 | */ | ||
| 430 | unsigned long rq_weight; | ||
| 431 | |||
| 432 | /* | ||
| 433 | * Weight contributed by tasks; this is the part we can | ||
| 434 | * influence by moving tasks around. | ||
| 435 | */ | ||
| 436 | unsigned long task_weight; | ||
| 437 | } aggregate; | ||
| 438 | #endif | ||
| 439 | #endif | 405 | #endif |
| 440 | }; | 406 | }; |
| 441 | 407 | ||
| @@ -1387,17 +1353,19 @@ static void __resched_task(struct task_struct *p, int tif_bit) | |||
| 1387 | */ | 1353 | */ |
| 1388 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | 1354 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) |
| 1389 | 1355 | ||
| 1390 | /* | ||
| 1391 | * delta *= weight / lw | ||
| 1392 | */ | ||
| 1393 | static unsigned long | 1356 | static unsigned long |
| 1394 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | 1357 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, |
| 1395 | struct load_weight *lw) | 1358 | struct load_weight *lw) |
| 1396 | { | 1359 | { |
| 1397 | u64 tmp; | 1360 | u64 tmp; |
| 1398 | 1361 | ||
| 1399 | if (!lw->inv_weight) | 1362 | if (!lw->inv_weight) { |
| 1400 | lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)/(lw->weight+1); | 1363 | if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST)) |
| 1364 | lw->inv_weight = 1; | ||
| 1365 | else | ||
| 1366 | lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2) | ||
| 1367 | / (lw->weight+1); | ||
| 1368 | } | ||
| 1401 | 1369 | ||
| 1402 | tmp = (u64)delta_exec * weight; | 1370 | tmp = (u64)delta_exec * weight; |
| 1403 | /* | 1371 | /* |
| @@ -1412,6 +1380,12 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
| 1412 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | 1380 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); |
| 1413 | } | 1381 | } |
| 1414 | 1382 | ||
| 1383 | static inline unsigned long | ||
| 1384 | calc_delta_fair(unsigned long delta_exec, struct load_weight *lw) | ||
| 1385 | { | ||
| 1386 | return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); | ||
| 1387 | } | ||
| 1388 | |||
| 1415 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) | 1389 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) |
| 1416 | { | 1390 | { |
| 1417 | lw->weight += inc; | 1391 | lw->weight += inc; |
| @@ -1524,326 +1498,6 @@ static unsigned long source_load(int cpu, int type); | |||
| 1524 | static unsigned long target_load(int cpu, int type); | 1498 | static unsigned long target_load(int cpu, int type); |
| 1525 | static unsigned long cpu_avg_load_per_task(int cpu); | 1499 | static unsigned long cpu_avg_load_per_task(int cpu); |
| 1526 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1500 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
| 1527 | |||
| 1528 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 1529 | |||
| 1530 | /* | ||
| 1531 | * Group load balancing. | ||
| 1532 | * | ||
| 1533 | * We calculate a few balance domain wide aggregate numbers; load and weight. | ||
| 1534 | * Given the pictures below, and assuming each item has equal weight: | ||
| 1535 | * | ||
| 1536 | * root 1 - thread | ||
| 1537 | * / | \ A - group | ||
| 1538 | * A 1 B | ||
| 1539 | * /|\ / \ | ||
| 1540 | * C 2 D 3 4 | ||
| 1541 | * | | | ||
| 1542 | * 5 6 | ||
| 1543 | * | ||
| 1544 | * load: | ||
| 1545 | * A and B get 1/3-rd of the total load. C and D get 1/3-rd of A's 1/3-rd, | ||
| 1546 | * which equals 1/9-th of the total load. | ||
| 1547 | * | ||
| 1548 | * shares: | ||
| 1549 | * The weight of this group on the selected cpus. | ||
| 1550 | * | ||
| 1551 | * rq_weight: | ||
| 1552 | * Direct sum of all the cpu's their rq weight, e.g. A would get 3 while | ||
| 1553 | * B would get 2. | ||
| 1554 | * | ||
| 1555 | * task_weight: | ||
| 1556 | * Part of the rq_weight contributed by tasks; all groups except B would | ||
| 1557 | * get 1, B gets 2. | ||
| 1558 | */ | ||
| 1559 | |||
| 1560 | static inline struct aggregate_struct * | ||
| 1561 | aggregate(struct task_group *tg, struct sched_domain *sd) | ||
| 1562 | { | ||
| 1563 | return &tg->cfs_rq[sd->first_cpu]->aggregate; | ||
| 1564 | } | ||
| 1565 | |||
| 1566 | typedef void (*aggregate_func)(struct task_group *, struct sched_domain *); | ||
| 1567 | |||
| 1568 | /* | ||
| 1569 | * Iterate the full tree, calling @down when first entering a node and @up when | ||
| 1570 | * leaving it for the final time. | ||
| 1571 | */ | ||
| 1572 | static | ||
| 1573 | void aggregate_walk_tree(aggregate_func down, aggregate_func up, | ||
| 1574 | struct sched_domain *sd) | ||
| 1575 | { | ||
| 1576 | struct task_group *parent, *child; | ||
| 1577 | |||
| 1578 | rcu_read_lock(); | ||
| 1579 | parent = &root_task_group; | ||
| 1580 | down: | ||
| 1581 | (*down)(parent, sd); | ||
| 1582 | list_for_each_entry_rcu(child, &parent->children, siblings) { | ||
| 1583 | parent = child; | ||
| 1584 | goto down; | ||
| 1585 | |||
| 1586 | up: | ||
| 1587 | continue; | ||
| 1588 | } | ||
| 1589 | (*up)(parent, sd); | ||
| 1590 | |||
| 1591 | child = parent; | ||
| 1592 | parent = parent->parent; | ||
| 1593 | if (parent) | ||
| 1594 | goto up; | ||
| 1595 | rcu_read_unlock(); | ||
| 1596 | } | ||
| 1597 | |||
| 1598 | /* | ||
| 1599 | * Calculate the aggregate runqueue weight. | ||
| 1600 | */ | ||
| 1601 | static | ||
| 1602 | void aggregate_group_weight(struct task_group *tg, struct sched_domain *sd) | ||
| 1603 | { | ||
| 1604 | unsigned long rq_weight = 0; | ||
| 1605 | unsigned long task_weight = 0; | ||
| 1606 | int i; | ||
| 1607 | |||
| 1608 | for_each_cpu_mask(i, sd->span) { | ||
| 1609 | rq_weight += tg->cfs_rq[i]->load.weight; | ||
| 1610 | task_weight += tg->cfs_rq[i]->task_weight; | ||
| 1611 | } | ||
| 1612 | |||
| 1613 | aggregate(tg, sd)->rq_weight = rq_weight; | ||
| 1614 | aggregate(tg, sd)->task_weight = task_weight; | ||
| 1615 | } | ||
| 1616 | |||
| 1617 | /* | ||
| 1618 | * Compute the weight of this group on the given cpus. | ||
| 1619 | */ | ||
| 1620 | static | ||
| 1621 | void aggregate_group_shares(struct task_group *tg, struct sched_domain *sd) | ||
| 1622 | { | ||
| 1623 | unsigned long shares = 0; | ||
| 1624 | int i; | ||
| 1625 | |||
| 1626 | for_each_cpu_mask(i, sd->span) | ||
| 1627 | shares += tg->cfs_rq[i]->shares; | ||
| 1628 | |||
| 1629 | if ((!shares && aggregate(tg, sd)->rq_weight) || shares > tg->shares) | ||
| 1630 | shares = tg->shares; | ||
| 1631 | |||
| 1632 | aggregate(tg, sd)->shares = shares; | ||
| 1633 | } | ||
| 1634 | |||
| 1635 | /* | ||
| 1636 | * Compute the load fraction assigned to this group, relies on the aggregate | ||
| 1637 | * weight and this group's parent's load, i.e. top-down. | ||
| 1638 | */ | ||
| 1639 | static | ||
| 1640 | void aggregate_group_load(struct task_group *tg, struct sched_domain *sd) | ||
| 1641 | { | ||
| 1642 | unsigned long load; | ||
| 1643 | |||
| 1644 | if (!tg->parent) { | ||
| 1645 | int i; | ||
| 1646 | |||
| 1647 | load = 0; | ||
| 1648 | for_each_cpu_mask(i, sd->span) | ||
| 1649 | load += cpu_rq(i)->load.weight; | ||
| 1650 | |||
| 1651 | } else { | ||
| 1652 | load = aggregate(tg->parent, sd)->load; | ||
| 1653 | |||
| 1654 | /* | ||
| 1655 | * shares is our weight in the parent's rq so | ||
| 1656 | * shares/parent->rq_weight gives our fraction of the load | ||
| 1657 | */ | ||
| 1658 | load *= aggregate(tg, sd)->shares; | ||
| 1659 | load /= aggregate(tg->parent, sd)->rq_weight + 1; | ||
| 1660 | } | ||
| 1661 | |||
| 1662 | aggregate(tg, sd)->load = load; | ||
| 1663 | } | ||
| 1664 | |||
| 1665 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); | ||
| 1666 | |||
| 1667 | /* | ||
| 1668 | * Calculate and set the cpu's group shares. | ||
| 1669 | */ | ||
| 1670 | static void | ||
| 1671 | __update_group_shares_cpu(struct task_group *tg, struct sched_domain *sd, | ||
| 1672 | int tcpu) | ||
| 1673 | { | ||
| 1674 | int boost = 0; | ||
| 1675 | unsigned long shares; | ||
| 1676 | unsigned long rq_weight; | ||
| 1677 | |||
| 1678 | if (!tg->se[tcpu]) | ||
| 1679 | return; | ||
| 1680 | |||
| 1681 | rq_weight = tg->cfs_rq[tcpu]->load.weight; | ||
| 1682 | |||
| 1683 | /* | ||
| 1684 | * If there are currently no tasks on the cpu pretend there is one of | ||
| 1685 | * average load so that when a new task gets to run here it will not | ||
| 1686 | * get delayed by group starvation. | ||
| 1687 | */ | ||
| 1688 | if (!rq_weight) { | ||
| 1689 | boost = 1; | ||
| 1690 | rq_weight = NICE_0_LOAD; | ||
| 1691 | } | ||
| 1692 | |||
| 1693 | /* | ||
| 1694 | * \Sum shares * rq_weight | ||
| 1695 | * shares = ----------------------- | ||
| 1696 | * \Sum rq_weight | ||
| 1697 | * | ||
| 1698 | */ | ||
| 1699 | shares = aggregate(tg, sd)->shares * rq_weight; | ||
| 1700 | shares /= aggregate(tg, sd)->rq_weight + 1; | ||
| 1701 | |||
| 1702 | /* | ||
| 1703 | * record the actual number of shares, not the boosted amount. | ||
| 1704 | */ | ||
| 1705 | tg->cfs_rq[tcpu]->shares = boost ? 0 : shares; | ||
| 1706 | |||
| 1707 | if (shares < MIN_SHARES) | ||
| 1708 | shares = MIN_SHARES; | ||
| 1709 | else if (shares > MAX_SHARES) | ||
| 1710 | shares = MAX_SHARES; | ||
| 1711 | |||
| 1712 | __set_se_shares(tg->se[tcpu], shares); | ||
| 1713 | } | ||
| 1714 | |||
| 1715 | /* | ||
| 1716 | * Re-adjust the weights on the cpu the task came from and on the cpu the | ||
| 1717 | * task went to. | ||
| 1718 | */ | ||
| 1719 | static void | ||
| 1720 | __move_group_shares(struct task_group *tg, struct sched_domain *sd, | ||
| 1721 | int scpu, int dcpu) | ||
| 1722 | { | ||
| 1723 | unsigned long shares; | ||
| 1724 | |||
| 1725 | shares = tg->cfs_rq[scpu]->shares + tg->cfs_rq[dcpu]->shares; | ||
| 1726 | |||
| 1727 | __update_group_shares_cpu(tg, sd, scpu); | ||
| 1728 | __update_group_shares_cpu(tg, sd, dcpu); | ||
| 1729 | |||
| 1730 | /* | ||
| 1731 | * ensure we never loose shares due to rounding errors in the | ||
| 1732 | * above redistribution. | ||
| 1733 | */ | ||
| 1734 | shares -= tg->cfs_rq[scpu]->shares + tg->cfs_rq[dcpu]->shares; | ||
| 1735 | if (shares) | ||
| 1736 | tg->cfs_rq[dcpu]->shares += shares; | ||
| 1737 | } | ||
| 1738 | |||
| 1739 | /* | ||
| 1740 | * Because changing a group's shares changes the weight of the super-group | ||
| 1741 | * we need to walk up the tree and change all shares until we hit the root. | ||
| 1742 | */ | ||
| 1743 | static void | ||
| 1744 | move_group_shares(struct task_group *tg, struct sched_domain *sd, | ||
| 1745 | int scpu, int dcpu) | ||
| 1746 | { | ||
| 1747 | while (tg) { | ||
| 1748 | __move_group_shares(tg, sd, scpu, dcpu); | ||
| 1749 | tg = tg->parent; | ||
| 1750 | } | ||
| 1751 | } | ||
| 1752 | |||
| 1753 | static | ||
| 1754 | void aggregate_group_set_shares(struct task_group *tg, struct sched_domain *sd) | ||
| 1755 | { | ||
| 1756 | unsigned long shares = aggregate(tg, sd)->shares; | ||
| 1757 | int i; | ||
| 1758 | |||
| 1759 | for_each_cpu_mask(i, sd->span) { | ||
| 1760 | struct rq *rq = cpu_rq(i); | ||
| 1761 | unsigned long flags; | ||
| 1762 | |||
| 1763 | spin_lock_irqsave(&rq->lock, flags); | ||
| 1764 | __update_group_shares_cpu(tg, sd, i); | ||
| 1765 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 1766 | } | ||
| 1767 | |||
| 1768 | aggregate_group_shares(tg, sd); | ||
| 1769 | |||
| 1770 | /* | ||
| 1771 | * ensure we never loose shares due to rounding errors in the | ||
| 1772 | * above redistribution. | ||
| 1773 | */ | ||
| 1774 | shares -= aggregate(tg, sd)->shares; | ||
| 1775 | if (shares) { | ||
| 1776 | tg->cfs_rq[sd->first_cpu]->shares += shares; | ||
| 1777 | aggregate(tg, sd)->shares += shares; | ||
| 1778 | } | ||
| 1779 | } | ||
| 1780 | |||
| 1781 | /* | ||
| 1782 | * Calculate the accumulative weight and recursive load of each task group | ||
| 1783 | * while walking down the tree. | ||
| 1784 | */ | ||
| 1785 | static | ||
| 1786 | void aggregate_get_down(struct task_group *tg, struct sched_domain *sd) | ||
| 1787 | { | ||
| 1788 | aggregate_group_weight(tg, sd); | ||
| 1789 | aggregate_group_shares(tg, sd); | ||
| 1790 | aggregate_group_load(tg, sd); | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | /* | ||
| 1794 | * Rebalance the cpu shares while walking back up the tree. | ||
| 1795 | */ | ||
| 1796 | static | ||
| 1797 | void aggregate_get_up(struct task_group *tg, struct sched_domain *sd) | ||
| 1798 | { | ||
| 1799 | aggregate_group_set_shares(tg, sd); | ||
| 1800 | } | ||
| 1801 | |||
| 1802 | static DEFINE_PER_CPU(spinlock_t, aggregate_lock); | ||
| 1803 | |||
| 1804 | static void __init init_aggregate(void) | ||
| 1805 | { | ||
| 1806 | int i; | ||
| 1807 | |||
| 1808 | for_each_possible_cpu(i) | ||
| 1809 | spin_lock_init(&per_cpu(aggregate_lock, i)); | ||
| 1810 | } | ||
| 1811 | |||
| 1812 | static int get_aggregate(struct sched_domain *sd) | ||
| 1813 | { | ||
| 1814 | if (!spin_trylock(&per_cpu(aggregate_lock, sd->first_cpu))) | ||
| 1815 | return 0; | ||
| 1816 | |||
| 1817 | aggregate_walk_tree(aggregate_get_down, aggregate_get_up, sd); | ||
| 1818 | return 1; | ||
| 1819 | } | ||
| 1820 | |||
| 1821 | static void put_aggregate(struct sched_domain *sd) | ||
| 1822 | { | ||
| 1823 | spin_unlock(&per_cpu(aggregate_lock, sd->first_cpu)); | ||
| 1824 | } | ||
| 1825 | |||
| 1826 | static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | ||
| 1827 | { | ||
| 1828 | cfs_rq->shares = shares; | ||
| 1829 | } | ||
| 1830 | |||
| 1831 | #else | ||
| 1832 | |||
| 1833 | static inline void init_aggregate(void) | ||
| 1834 | { | ||
| 1835 | } | ||
| 1836 | |||
| 1837 | static inline int get_aggregate(struct sched_domain *sd) | ||
| 1838 | { | ||
| 1839 | return 0; | ||
| 1840 | } | ||
| 1841 | |||
| 1842 | static inline void put_aggregate(struct sched_domain *sd) | ||
| 1843 | { | ||
| 1844 | } | ||
| 1845 | #endif | ||
| 1846 | |||
| 1847 | #else /* CONFIG_SMP */ | 1501 | #else /* CONFIG_SMP */ |
| 1848 | 1502 | ||
| 1849 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1503 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| @@ -1864,14 +1518,26 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
| 1864 | 1518 | ||
| 1865 | #define sched_class_highest (&rt_sched_class) | 1519 | #define sched_class_highest (&rt_sched_class) |
| 1866 | 1520 | ||
| 1867 | static void inc_nr_running(struct rq *rq) | 1521 | static inline void inc_load(struct rq *rq, const struct task_struct *p) |
| 1522 | { | ||
| 1523 | update_load_add(&rq->load, p->se.load.weight); | ||
| 1524 | } | ||
| 1525 | |||
| 1526 | static inline void dec_load(struct rq *rq, const struct task_struct *p) | ||
| 1527 | { | ||
| 1528 | update_load_sub(&rq->load, p->se.load.weight); | ||
| 1529 | } | ||
| 1530 | |||
| 1531 | static void inc_nr_running(struct task_struct *p, struct rq *rq) | ||
| 1868 | { | 1532 | { |
| 1869 | rq->nr_running++; | 1533 | rq->nr_running++; |
| 1534 | inc_load(rq, p); | ||
| 1870 | } | 1535 | } |
| 1871 | 1536 | ||
| 1872 | static void dec_nr_running(struct rq *rq) | 1537 | static void dec_nr_running(struct task_struct *p, struct rq *rq) |
| 1873 | { | 1538 | { |
| 1874 | rq->nr_running--; | 1539 | rq->nr_running--; |
| 1540 | dec_load(rq, p); | ||
| 1875 | } | 1541 | } |
| 1876 | 1542 | ||
| 1877 | static void set_load_weight(struct task_struct *p) | 1543 | static void set_load_weight(struct task_struct *p) |
| @@ -1963,7 +1629,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) | |||
| 1963 | rq->nr_uninterruptible--; | 1629 | rq->nr_uninterruptible--; |
| 1964 | 1630 | ||
| 1965 | enqueue_task(rq, p, wakeup); | 1631 | enqueue_task(rq, p, wakeup); |
| 1966 | inc_nr_running(rq); | 1632 | inc_nr_running(p, rq); |
| 1967 | } | 1633 | } |
| 1968 | 1634 | ||
| 1969 | /* | 1635 | /* |
| @@ -1975,7 +1641,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) | |||
| 1975 | rq->nr_uninterruptible++; | 1641 | rq->nr_uninterruptible++; |
| 1976 | 1642 | ||
| 1977 | dequeue_task(rq, p, sleep); | 1643 | dequeue_task(rq, p, sleep); |
| 1978 | dec_nr_running(rq); | 1644 | dec_nr_running(p, rq); |
| 1979 | } | 1645 | } |
| 1980 | 1646 | ||
| 1981 | /** | 1647 | /** |
| @@ -2631,7 +2297,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
| 2631 | * management (if any): | 2297 | * management (if any): |
| 2632 | */ | 2298 | */ |
| 2633 | p->sched_class->task_new(rq, p); | 2299 | p->sched_class->task_new(rq, p); |
| 2634 | inc_nr_running(rq); | 2300 | inc_nr_running(p, rq); |
| 2635 | } | 2301 | } |
| 2636 | trace_mark(kernel_sched_wakeup_new, | 2302 | trace_mark(kernel_sched_wakeup_new, |
| 2637 | "pid %d state %ld ## rq %p task %p rq->curr %p", | 2303 | "pid %d state %ld ## rq %p task %p rq->curr %p", |
| @@ -3630,12 +3296,9 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 3630 | unsigned long imbalance; | 3296 | unsigned long imbalance; |
| 3631 | struct rq *busiest; | 3297 | struct rq *busiest; |
| 3632 | unsigned long flags; | 3298 | unsigned long flags; |
| 3633 | int unlock_aggregate; | ||
| 3634 | 3299 | ||
| 3635 | cpus_setall(*cpus); | 3300 | cpus_setall(*cpus); |
| 3636 | 3301 | ||
| 3637 | unlock_aggregate = get_aggregate(sd); | ||
| 3638 | |||
| 3639 | /* | 3302 | /* |
| 3640 | * When power savings policy is enabled for the parent domain, idle | 3303 | * When power savings policy is enabled for the parent domain, idle |
| 3641 | * sibling can pick up load irrespective of busy siblings. In this case, | 3304 | * sibling can pick up load irrespective of busy siblings. In this case, |
| @@ -3751,9 +3414,8 @@ redo: | |||
| 3751 | 3414 | ||
| 3752 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3415 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
| 3753 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 3416 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
| 3754 | ld_moved = -1; | 3417 | return -1; |
| 3755 | 3418 | return ld_moved; | |
| 3756 | goto out; | ||
| 3757 | 3419 | ||
| 3758 | out_balanced: | 3420 | out_balanced: |
| 3759 | schedstat_inc(sd, lb_balanced[idle]); | 3421 | schedstat_inc(sd, lb_balanced[idle]); |
| @@ -3768,13 +3430,8 @@ out_one_pinned: | |||
| 3768 | 3430 | ||
| 3769 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3431 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
| 3770 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 3432 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
| 3771 | ld_moved = -1; | 3433 | return -1; |
| 3772 | else | 3434 | return 0; |
| 3773 | ld_moved = 0; | ||
| 3774 | out: | ||
| 3775 | if (unlock_aggregate) | ||
| 3776 | put_aggregate(sd); | ||
| 3777 | return ld_moved; | ||
| 3778 | } | 3435 | } |
| 3779 | 3436 | ||
| 3780 | /* | 3437 | /* |
| @@ -4481,7 +4138,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
| 4481 | * schedule() atomically, we ignore that path for now. | 4138 | * schedule() atomically, we ignore that path for now. |
| 4482 | * Otherwise, whine if we are scheduling when we should not be. | 4139 | * Otherwise, whine if we are scheduling when we should not be. |
| 4483 | */ | 4140 | */ |
| 4484 | if (unlikely(in_atomic_preempt_off()) && unlikely(!prev->exit_state)) | 4141 | if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) |
| 4485 | __schedule_bug(prev); | 4142 | __schedule_bug(prev); |
| 4486 | 4143 | ||
| 4487 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 4144 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
| @@ -4561,12 +4218,10 @@ need_resched_nonpreemptible: | |||
| 4561 | clear_tsk_need_resched(prev); | 4218 | clear_tsk_need_resched(prev); |
| 4562 | 4219 | ||
| 4563 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 4220 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
| 4564 | if (unlikely((prev->state & TASK_INTERRUPTIBLE) && | 4221 | if (unlikely(signal_pending_state(prev->state, prev))) |
| 4565 | signal_pending(prev))) { | ||
| 4566 | prev->state = TASK_RUNNING; | 4222 | prev->state = TASK_RUNNING; |
| 4567 | } else { | 4223 | else |
| 4568 | deactivate_task(rq, prev, 1); | 4224 | deactivate_task(rq, prev, 1); |
| 4569 | } | ||
| 4570 | switch_count = &prev->nvcsw; | 4225 | switch_count = &prev->nvcsw; |
| 4571 | } | 4226 | } |
| 4572 | 4227 | ||
| @@ -4982,8 +4637,10 @@ void set_user_nice(struct task_struct *p, long nice) | |||
| 4982 | goto out_unlock; | 4637 | goto out_unlock; |
| 4983 | } | 4638 | } |
| 4984 | on_rq = p->se.on_rq; | 4639 | on_rq = p->se.on_rq; |
| 4985 | if (on_rq) | 4640 | if (on_rq) { |
| 4986 | dequeue_task(rq, p, 0); | 4641 | dequeue_task(rq, p, 0); |
| 4642 | dec_load(rq, p); | ||
| 4643 | } | ||
| 4987 | 4644 | ||
| 4988 | p->static_prio = NICE_TO_PRIO(nice); | 4645 | p->static_prio = NICE_TO_PRIO(nice); |
| 4989 | set_load_weight(p); | 4646 | set_load_weight(p); |
| @@ -4993,6 +4650,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
| 4993 | 4650 | ||
| 4994 | if (on_rq) { | 4651 | if (on_rq) { |
| 4995 | enqueue_task(rq, p, 0); | 4652 | enqueue_task(rq, p, 0); |
| 4653 | inc_load(rq, p); | ||
| 4996 | /* | 4654 | /* |
| 4997 | * If the task increased its priority or is running and | 4655 | * If the task increased its priority or is running and |
| 4998 | * lowered its priority, then reschedule its CPU: | 4656 | * lowered its priority, then reschedule its CPU: |
| @@ -7367,7 +7025,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
| 7367 | SD_INIT(sd, ALLNODES); | 7025 | SD_INIT(sd, ALLNODES); |
| 7368 | set_domain_attribute(sd, attr); | 7026 | set_domain_attribute(sd, attr); |
| 7369 | sd->span = *cpu_map; | 7027 | sd->span = *cpu_map; |
| 7370 | sd->first_cpu = first_cpu(sd->span); | ||
| 7371 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); | 7028 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); |
| 7372 | p = sd; | 7029 | p = sd; |
| 7373 | sd_allnodes = 1; | 7030 | sd_allnodes = 1; |
| @@ -7378,7 +7035,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
| 7378 | SD_INIT(sd, NODE); | 7035 | SD_INIT(sd, NODE); |
| 7379 | set_domain_attribute(sd, attr); | 7036 | set_domain_attribute(sd, attr); |
| 7380 | sched_domain_node_span(cpu_to_node(i), &sd->span); | 7037 | sched_domain_node_span(cpu_to_node(i), &sd->span); |
| 7381 | sd->first_cpu = first_cpu(sd->span); | ||
| 7382 | sd->parent = p; | 7038 | sd->parent = p; |
| 7383 | if (p) | 7039 | if (p) |
| 7384 | p->child = sd; | 7040 | p->child = sd; |
| @@ -7390,7 +7046,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
| 7390 | SD_INIT(sd, CPU); | 7046 | SD_INIT(sd, CPU); |
| 7391 | set_domain_attribute(sd, attr); | 7047 | set_domain_attribute(sd, attr); |
| 7392 | sd->span = *nodemask; | 7048 | sd->span = *nodemask; |
| 7393 | sd->first_cpu = first_cpu(sd->span); | ||
| 7394 | sd->parent = p; | 7049 | sd->parent = p; |
| 7395 | if (p) | 7050 | if (p) |
| 7396 | p->child = sd; | 7051 | p->child = sd; |
| @@ -7402,7 +7057,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
| 7402 | SD_INIT(sd, MC); | 7057 | SD_INIT(sd, MC); |
| 7403 | set_domain_attribute(sd, attr); | 7058 | set_domain_attribute(sd, attr); |
| 7404 | sd->span = cpu_coregroup_map(i); | 7059 | sd->span = cpu_coregroup_map(i); |
| 7405 | sd->first_cpu = first_cpu(sd->span); | ||
| 7406 | cpus_and(sd->span, sd->span, *cpu_map); | 7060 | cpus_and(sd->span, sd->span, *cpu_map); |
| 7407 | sd->parent = p; | 7061 | sd->parent = p; |
| 7408 | p->child = sd; | 7062 | p->child = sd; |
| @@ -7415,7 +7069,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
| 7415 | SD_INIT(sd, SIBLING); | 7069 | SD_INIT(sd, SIBLING); |
| 7416 | set_domain_attribute(sd, attr); | 7070 | set_domain_attribute(sd, attr); |
| 7417 | sd->span = per_cpu(cpu_sibling_map, i); | 7071 | sd->span = per_cpu(cpu_sibling_map, i); |
| 7418 | sd->first_cpu = first_cpu(sd->span); | ||
| 7419 | cpus_and(sd->span, sd->span, *cpu_map); | 7072 | cpus_and(sd->span, sd->span, *cpu_map); |
| 7420 | sd->parent = p; | 7073 | sd->parent = p; |
| 7421 | p->child = sd; | 7074 | p->child = sd; |
| @@ -7619,8 +7272,8 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 7619 | 7272 | ||
| 7620 | static cpumask_t *doms_cur; /* current sched domains */ | 7273 | static cpumask_t *doms_cur; /* current sched domains */ |
| 7621 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | 7274 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ |
| 7622 | static struct sched_domain_attr *dattr_cur; /* attribues of custom domains | 7275 | static struct sched_domain_attr *dattr_cur; |
| 7623 | in 'doms_cur' */ | 7276 | /* attribues of custom domains in 'doms_cur' */ |
| 7624 | 7277 | ||
| 7625 | /* | 7278 | /* |
| 7626 | * Special case: If a kmalloc of a doms_cur partition (array of | 7279 | * Special case: If a kmalloc of a doms_cur partition (array of |
| @@ -8085,7 +7738,6 @@ void __init sched_init(void) | |||
| 8085 | } | 7738 | } |
| 8086 | 7739 | ||
| 8087 | #ifdef CONFIG_SMP | 7740 | #ifdef CONFIG_SMP |
| 8088 | init_aggregate(); | ||
| 8089 | init_defrootdomain(); | 7741 | init_defrootdomain(); |
| 8090 | #endif | 7742 | #endif |
| 8091 | 7743 | ||
| @@ -8650,11 +8302,14 @@ void sched_move_task(struct task_struct *tsk) | |||
| 8650 | #endif | 8302 | #endif |
| 8651 | 8303 | ||
| 8652 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8304 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 8653 | static void __set_se_shares(struct sched_entity *se, unsigned long shares) | 8305 | static void set_se_shares(struct sched_entity *se, unsigned long shares) |
| 8654 | { | 8306 | { |
| 8655 | struct cfs_rq *cfs_rq = se->cfs_rq; | 8307 | struct cfs_rq *cfs_rq = se->cfs_rq; |
| 8308 | struct rq *rq = cfs_rq->rq; | ||
| 8656 | int on_rq; | 8309 | int on_rq; |
| 8657 | 8310 | ||
| 8311 | spin_lock_irq(&rq->lock); | ||
| 8312 | |||
| 8658 | on_rq = se->on_rq; | 8313 | on_rq = se->on_rq; |
| 8659 | if (on_rq) | 8314 | if (on_rq) |
| 8660 | dequeue_entity(cfs_rq, se, 0); | 8315 | dequeue_entity(cfs_rq, se, 0); |
| @@ -8664,17 +8319,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares) | |||
| 8664 | 8319 | ||
| 8665 | if (on_rq) | 8320 | if (on_rq) |
| 8666 | enqueue_entity(cfs_rq, se, 0); | 8321 | enqueue_entity(cfs_rq, se, 0); |
| 8667 | } | ||
| 8668 | 8322 | ||
| 8669 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | 8323 | spin_unlock_irq(&rq->lock); |
| 8670 | { | ||
| 8671 | struct cfs_rq *cfs_rq = se->cfs_rq; | ||
| 8672 | struct rq *rq = cfs_rq->rq; | ||
| 8673 | unsigned long flags; | ||
| 8674 | |||
| 8675 | spin_lock_irqsave(&rq->lock, flags); | ||
| 8676 | __set_se_shares(se, shares); | ||
| 8677 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 8678 | } | 8324 | } |
| 8679 | 8325 | ||
| 8680 | static DEFINE_MUTEX(shares_mutex); | 8326 | static DEFINE_MUTEX(shares_mutex); |
| @@ -8713,13 +8359,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
| 8713 | * w/o tripping rebalance_share or load_balance_fair. | 8359 | * w/o tripping rebalance_share or load_balance_fair. |
| 8714 | */ | 8360 | */ |
| 8715 | tg->shares = shares; | 8361 | tg->shares = shares; |
| 8716 | for_each_possible_cpu(i) { | 8362 | for_each_possible_cpu(i) |
| 8717 | /* | ||
| 8718 | * force a rebalance | ||
| 8719 | */ | ||
| 8720 | cfs_rq_set_shares(tg->cfs_rq[i], 0); | ||
| 8721 | set_se_shares(tg->se[i], shares); | 8363 | set_se_shares(tg->se[i], shares); |
| 8722 | } | ||
| 8723 | 8364 | ||
| 8724 | /* | 8365 | /* |
| 8725 | * Enable load balance activity on this group, by inserting it back on | 8366 | * Enable load balance activity on this group, by inserting it back on |
