diff options
-rw-r--r-- | include/linux/sched.h | 23 | ||||
-rw-r--r-- | kernel/sched.c | 28 |
2 files changed, 41 insertions, 10 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index f2f440221b70..c34a718e20dd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -788,9 +788,28 @@ enum cpu_idle_type { | |||
788 | }; | 788 | }; |
789 | 789 | ||
790 | /* | 790 | /* |
791 | * Increase resolution of nice-level calculations: | 791 | * Increase resolution of nice-level calculations for 64-bit architectures. |
792 | * The extra resolution improves shares distribution and load balancing of | ||
793 | * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup | ||
794 | * hierarchies, especially on larger systems. This is not a user-visible change | ||
795 | * and does not change the user-interface for setting shares/weights. | ||
796 | * | ||
797 | * We increase resolution only if we have enough bits to allow this increased | ||
798 | * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution | ||
799 | * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the | ||
800 | * increased costs. | ||
792 | */ | 801 | */ |
793 | #define SCHED_LOAD_SHIFT 10 | 802 | #if BITS_PER_LONG > 32 |
803 | # define SCHED_LOAD_RESOLUTION 10 | ||
804 | # define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION) | ||
805 | # define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION) | ||
806 | #else | ||
807 | # define SCHED_LOAD_RESOLUTION 0 | ||
808 | # define scale_load(w) (w) | ||
809 | # define scale_load_down(w) (w) | ||
810 | #endif | ||
811 | |||
812 | #define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION) | ||
794 | #define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) | 813 | #define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) |
795 | 814 | ||
796 | /* | 815 | /* |
diff --git a/kernel/sched.c b/kernel/sched.c index 375e9c677d58..bb504e1839e5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -293,7 +293,7 @@ static DEFINE_SPINLOCK(task_group_lock); | |||
293 | * limitation from this.) | 293 | * limitation from this.) |
294 | */ | 294 | */ |
295 | #define MIN_SHARES 2 | 295 | #define MIN_SHARES 2 |
296 | #define MAX_SHARES (1UL << 18) | 296 | #define MAX_SHARES (1UL << (18 + SCHED_LOAD_RESOLUTION)) |
297 | 297 | ||
298 | static int root_task_group_load = ROOT_TASK_GROUP_LOAD; | 298 | static int root_task_group_load = ROOT_TASK_GROUP_LOAD; |
299 | #endif | 299 | #endif |
@@ -1330,13 +1330,25 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
1330 | { | 1330 | { |
1331 | u64 tmp; | 1331 | u64 tmp; |
1332 | 1332 | ||
1333 | tmp = (u64)delta_exec * weight; | 1333 | /* |
1334 | * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched | ||
1335 | * entities since MIN_SHARES = 2. Treat weight as 1 if less than | ||
1336 | * 2^SCHED_LOAD_RESOLUTION. | ||
1337 | */ | ||
1338 | if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION))) | ||
1339 | tmp = (u64)delta_exec * scale_load_down(weight); | ||
1340 | else | ||
1341 | tmp = (u64)delta_exec; | ||
1334 | 1342 | ||
1335 | if (!lw->inv_weight) { | 1343 | if (!lw->inv_weight) { |
1336 | if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST)) | 1344 | unsigned long w = scale_load_down(lw->weight); |
1345 | |||
1346 | if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST)) | ||
1337 | lw->inv_weight = 1; | 1347 | lw->inv_weight = 1; |
1348 | else if (unlikely(!w)) | ||
1349 | lw->inv_weight = WMULT_CONST; | ||
1338 | else | 1350 | else |
1339 | lw->inv_weight = WMULT_CONST / lw->weight; | 1351 | lw->inv_weight = WMULT_CONST / w; |
1340 | } | 1352 | } |
1341 | 1353 | ||
1342 | /* | 1354 | /* |
@@ -1785,12 +1797,12 @@ static void set_load_weight(struct task_struct *p) | |||
1785 | * SCHED_IDLE tasks get minimal weight: | 1797 | * SCHED_IDLE tasks get minimal weight: |
1786 | */ | 1798 | */ |
1787 | if (p->policy == SCHED_IDLE) { | 1799 | if (p->policy == SCHED_IDLE) { |
1788 | load->weight = WEIGHT_IDLEPRIO; | 1800 | load->weight = scale_load(WEIGHT_IDLEPRIO); |
1789 | load->inv_weight = WMULT_IDLEPRIO; | 1801 | load->inv_weight = WMULT_IDLEPRIO; |
1790 | return; | 1802 | return; |
1791 | } | 1803 | } |
1792 | 1804 | ||
1793 | load->weight = prio_to_weight[prio]; | 1805 | load->weight = scale_load(prio_to_weight[prio]); |
1794 | load->inv_weight = prio_to_wmult[prio]; | 1806 | load->inv_weight = prio_to_wmult[prio]; |
1795 | } | 1807 | } |
1796 | 1808 | ||
@@ -8809,14 +8821,14 @@ cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
8809 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, | 8821 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, |
8810 | u64 shareval) | 8822 | u64 shareval) |
8811 | { | 8823 | { |
8812 | return sched_group_set_shares(cgroup_tg(cgrp), shareval); | 8824 | return sched_group_set_shares(cgroup_tg(cgrp), scale_load(shareval)); |
8813 | } | 8825 | } |
8814 | 8826 | ||
8815 | static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) | 8827 | static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) |
8816 | { | 8828 | { |
8817 | struct task_group *tg = cgroup_tg(cgrp); | 8829 | struct task_group *tg = cgroup_tg(cgrp); |
8818 | 8830 | ||
8819 | return (u64) tg->shares; | 8831 | return (u64) scale_load_down(tg->shares); |
8820 | } | 8832 | } |
8821 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 8833 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8822 | 8834 | ||