diff options
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | kernel/sched.c | 51 | ||||
-rw-r--r-- | kernel/sched_fair.c | 62 | ||||
-rw-r--r-- | kernel/sched_features.h | 2 | ||||
-rw-r--r-- | kernel/sched_stats.h | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 10 |
6 files changed, 79 insertions, 50 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c38db536e07..10bff55b0824 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -287,7 +287,6 @@ extern void trap_init(void); | |||
287 | extern void account_process_tick(struct task_struct *task, int user); | 287 | extern void account_process_tick(struct task_struct *task, int user); |
288 | extern void update_process_times(int user); | 288 | extern void update_process_times(int user); |
289 | extern void scheduler_tick(void); | 289 | extern void scheduler_tick(void); |
290 | extern void hrtick_resched(void); | ||
291 | 290 | ||
292 | extern void sched_show_task(struct task_struct *p); | 291 | extern void sched_show_task(struct task_struct *p); |
293 | 292 | ||
@@ -1665,6 +1664,7 @@ extern unsigned int sysctl_sched_features; | |||
1665 | extern unsigned int sysctl_sched_migration_cost; | 1664 | extern unsigned int sysctl_sched_migration_cost; |
1666 | extern unsigned int sysctl_sched_nr_migrate; | 1665 | extern unsigned int sysctl_sched_nr_migrate; |
1667 | extern unsigned int sysctl_sched_shares_ratelimit; | 1666 | extern unsigned int sysctl_sched_shares_ratelimit; |
1667 | extern unsigned int sysctl_sched_shares_thresh; | ||
1668 | 1668 | ||
1669 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 1669 | int sched_nr_latency_handler(struct ctl_table *table, int write, |
1670 | struct file *file, void __user *buffer, size_t *length, | 1670 | struct file *file, void __user *buffer, size_t *length, |
diff --git a/kernel/sched.c b/kernel/sched.c index d906f72b42d2..945a97b9600d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -819,6 +819,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
819 | unsigned int sysctl_sched_shares_ratelimit = 250000; | 819 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
820 | 820 | ||
821 | /* | 821 | /* |
822 | * Inject some fuzzyness into changing the per-cpu group shares | ||
823 | * this avoids remote rq-locks at the expense of fairness. | ||
824 | * default: 4 | ||
825 | */ | ||
826 | unsigned int sysctl_sched_shares_thresh = 4; | ||
827 | |||
828 | /* | ||
822 | * period over which we measure -rt task cpu usage in us. | 829 | * period over which we measure -rt task cpu usage in us. |
823 | * default: 1s | 830 | * default: 1s |
824 | */ | 831 | */ |
@@ -1454,8 +1461,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares); | |||
1454 | * Calculate and set the cpu's group shares. | 1461 | * Calculate and set the cpu's group shares. |
1455 | */ | 1462 | */ |
1456 | static void | 1463 | static void |
1457 | __update_group_shares_cpu(struct task_group *tg, int cpu, | 1464 | update_group_shares_cpu(struct task_group *tg, int cpu, |
1458 | unsigned long sd_shares, unsigned long sd_rq_weight) | 1465 | unsigned long sd_shares, unsigned long sd_rq_weight) |
1459 | { | 1466 | { |
1460 | int boost = 0; | 1467 | int boost = 0; |
1461 | unsigned long shares; | 1468 | unsigned long shares; |
@@ -1486,19 +1493,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1486 | * | 1493 | * |
1487 | */ | 1494 | */ |
1488 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); | 1495 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); |
1496 | shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); | ||
1489 | 1497 | ||
1490 | /* | 1498 | if (abs(shares - tg->se[cpu]->load.weight) > |
1491 | * record the actual number of shares, not the boosted amount. | 1499 | sysctl_sched_shares_thresh) { |
1492 | */ | 1500 | struct rq *rq = cpu_rq(cpu); |
1493 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | 1501 | unsigned long flags; |
1494 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
1495 | 1502 | ||
1496 | if (shares < MIN_SHARES) | 1503 | spin_lock_irqsave(&rq->lock, flags); |
1497 | shares = MIN_SHARES; | 1504 | /* |
1498 | else if (shares > MAX_SHARES) | 1505 | * record the actual number of shares, not the boosted amount. |
1499 | shares = MAX_SHARES; | 1506 | */ |
1507 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | ||
1508 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
1500 | 1509 | ||
1501 | __set_se_shares(tg->se[cpu], shares); | 1510 | __set_se_shares(tg->se[cpu], shares); |
1511 | spin_unlock_irqrestore(&rq->lock, flags); | ||
1512 | } | ||
1502 | } | 1513 | } |
1503 | 1514 | ||
1504 | /* | 1515 | /* |
@@ -1527,14 +1538,8 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1527 | if (!rq_weight) | 1538 | if (!rq_weight) |
1528 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; | 1539 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; |
1529 | 1540 | ||
1530 | for_each_cpu_mask(i, sd->span) { | 1541 | for_each_cpu_mask(i, sd->span) |
1531 | struct rq *rq = cpu_rq(i); | 1542 | update_group_shares_cpu(tg, i, shares, rq_weight); |
1532 | unsigned long flags; | ||
1533 | |||
1534 | spin_lock_irqsave(&rq->lock, flags); | ||
1535 | __update_group_shares_cpu(tg, i, shares, rq_weight); | ||
1536 | spin_unlock_irqrestore(&rq->lock, flags); | ||
1537 | } | ||
1538 | 1543 | ||
1539 | return 0; | 1544 | return 0; |
1540 | } | 1545 | } |
@@ -4443,12 +4448,8 @@ need_resched_nonpreemptible: | |||
4443 | if (sched_feat(HRTICK)) | 4448 | if (sched_feat(HRTICK)) |
4444 | hrtick_clear(rq); | 4449 | hrtick_clear(rq); |
4445 | 4450 | ||
4446 | /* | 4451 | spin_lock_irq(&rq->lock); |
4447 | * Do the rq-clock update outside the rq lock: | ||
4448 | */ | ||
4449 | local_irq_disable(); | ||
4450 | update_rq_clock(rq); | 4452 | update_rq_clock(rq); |
4451 | spin_lock(&rq->lock); | ||
4452 | clear_tsk_need_resched(prev); | 4453 | clear_tsk_need_resched(prev); |
4453 | 4454 | ||
4454 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 4455 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f604dae71316..9573c33688b8 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -73,6 +73,8 @@ unsigned int sysctl_sched_wakeup_granularity = 5000000UL; | |||
73 | 73 | ||
74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
75 | 75 | ||
76 | static const struct sched_class fair_sched_class; | ||
77 | |||
76 | /************************************************************** | 78 | /************************************************************** |
77 | * CFS operations on generic schedulable entities: | 79 | * CFS operations on generic schedulable entities: |
78 | */ | 80 | */ |
@@ -334,7 +336,7 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
334 | #endif | 336 | #endif |
335 | 337 | ||
336 | /* | 338 | /* |
337 | * delta *= w / rw | 339 | * delta *= P[w / rw] |
338 | */ | 340 | */ |
339 | static inline unsigned long | 341 | static inline unsigned long |
340 | calc_delta_weight(unsigned long delta, struct sched_entity *se) | 342 | calc_delta_weight(unsigned long delta, struct sched_entity *se) |
@@ -348,15 +350,13 @@ calc_delta_weight(unsigned long delta, struct sched_entity *se) | |||
348 | } | 350 | } |
349 | 351 | ||
350 | /* | 352 | /* |
351 | * delta *= rw / w | 353 | * delta /= w |
352 | */ | 354 | */ |
353 | static inline unsigned long | 355 | static inline unsigned long |
354 | calc_delta_fair(unsigned long delta, struct sched_entity *se) | 356 | calc_delta_fair(unsigned long delta, struct sched_entity *se) |
355 | { | 357 | { |
356 | for_each_sched_entity(se) { | 358 | if (unlikely(se->load.weight != NICE_0_LOAD)) |
357 | delta = calc_delta_mine(delta, | 359 | delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load); |
358 | cfs_rq_of(se)->load.weight, &se->load); | ||
359 | } | ||
360 | 360 | ||
361 | return delta; | 361 | return delta; |
362 | } | 362 | } |
@@ -386,26 +386,26 @@ static u64 __sched_period(unsigned long nr_running) | |||
386 | * We calculate the wall-time slice from the period by taking a part | 386 | * We calculate the wall-time slice from the period by taking a part |
387 | * proportional to the weight. | 387 | * proportional to the weight. |
388 | * | 388 | * |
389 | * s = p*w/rw | 389 | * s = p*P[w/rw] |
390 | */ | 390 | */ |
391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | 391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
392 | { | 392 | { |
393 | return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); | 393 | unsigned long nr_running = cfs_rq->nr_running; |
394 | |||
395 | if (unlikely(!se->on_rq)) | ||
396 | nr_running++; | ||
397 | |||
398 | return calc_delta_weight(__sched_period(nr_running), se); | ||
394 | } | 399 | } |
395 | 400 | ||
396 | /* | 401 | /* |
397 | * We calculate the vruntime slice of a to be inserted task | 402 | * We calculate the vruntime slice of a to be inserted task |
398 | * | 403 | * |
399 | * vs = s*rw/w = p | 404 | * vs = s/w |
400 | */ | 405 | */ |
401 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) | 406 | static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
402 | { | 407 | { |
403 | unsigned long nr_running = cfs_rq->nr_running; | 408 | return calc_delta_fair(sched_slice(cfs_rq, se), se); |
404 | |||
405 | if (!se->on_rq) | ||
406 | nr_running++; | ||
407 | |||
408 | return __sched_period(nr_running); | ||
409 | } | 409 | } |
410 | 410 | ||
411 | /* | 411 | /* |
@@ -628,7 +628,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
628 | * stays open at the end. | 628 | * stays open at the end. |
629 | */ | 629 | */ |
630 | if (initial && sched_feat(START_DEBIT)) | 630 | if (initial && sched_feat(START_DEBIT)) |
631 | vruntime += sched_vslice_add(cfs_rq, se); | 631 | vruntime += sched_vslice(cfs_rq, se); |
632 | 632 | ||
633 | if (!initial) { | 633 | if (!initial) { |
634 | /* sleeps upto a single latency don't count. */ | 634 | /* sleeps upto a single latency don't count. */ |
@@ -748,7 +748,7 @@ pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
748 | struct rq *rq = rq_of(cfs_rq); | 748 | struct rq *rq = rq_of(cfs_rq); |
749 | u64 pair_slice = rq->clock - cfs_rq->pair_start; | 749 | u64 pair_slice = rq->clock - cfs_rq->pair_start; |
750 | 750 | ||
751 | if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) { | 751 | if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) { |
752 | cfs_rq->pair_start = rq->clock; | 752 | cfs_rq->pair_start = rq->clock; |
753 | return se; | 753 | return se; |
754 | } | 754 | } |
@@ -849,11 +849,31 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) | |||
849 | hrtick_start(rq, delta); | 849 | hrtick_start(rq, delta); |
850 | } | 850 | } |
851 | } | 851 | } |
852 | |||
853 | /* | ||
854 | * called from enqueue/dequeue and updates the hrtick when the | ||
855 | * current task is from our class and nr_running is low enough | ||
856 | * to matter. | ||
857 | */ | ||
858 | static void hrtick_update(struct rq *rq) | ||
859 | { | ||
860 | struct task_struct *curr = rq->curr; | ||
861 | |||
862 | if (curr->sched_class != &fair_sched_class) | ||
863 | return; | ||
864 | |||
865 | if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency) | ||
866 | hrtick_start_fair(rq, curr); | ||
867 | } | ||
852 | #else /* !CONFIG_SCHED_HRTICK */ | 868 | #else /* !CONFIG_SCHED_HRTICK */ |
853 | static inline void | 869 | static inline void |
854 | hrtick_start_fair(struct rq *rq, struct task_struct *p) | 870 | hrtick_start_fair(struct rq *rq, struct task_struct *p) |
855 | { | 871 | { |
856 | } | 872 | } |
873 | |||
874 | static inline void hrtick_update(struct rq *rq) | ||
875 | { | ||
876 | } | ||
857 | #endif | 877 | #endif |
858 | 878 | ||
859 | /* | 879 | /* |
@@ -874,7 +894,7 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) | |||
874 | wakeup = 1; | 894 | wakeup = 1; |
875 | } | 895 | } |
876 | 896 | ||
877 | hrtick_start_fair(rq, rq->curr); | 897 | hrtick_update(rq); |
878 | } | 898 | } |
879 | 899 | ||
880 | /* | 900 | /* |
@@ -896,7 +916,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) | |||
896 | sleep = 1; | 916 | sleep = 1; |
897 | } | 917 | } |
898 | 918 | ||
899 | hrtick_start_fair(rq, rq->curr); | 919 | hrtick_update(rq); |
900 | } | 920 | } |
901 | 921 | ||
902 | /* | 922 | /* |
@@ -1002,8 +1022,6 @@ static inline int wake_idle(int cpu, struct task_struct *p) | |||
1002 | 1022 | ||
1003 | #ifdef CONFIG_SMP | 1023 | #ifdef CONFIG_SMP |
1004 | 1024 | ||
1005 | static const struct sched_class fair_sched_class; | ||
1006 | |||
1007 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1025 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1008 | /* | 1026 | /* |
1009 | * effective_load() calculates the load change as seen from the root_task_group | 1027 | * effective_load() calculates the load change as seen from the root_task_group |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 7c9e8f4a049f..fda016218296 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -5,7 +5,7 @@ SCHED_FEAT(START_DEBIT, 1) | |||
5 | SCHED_FEAT(AFFINE_WAKEUPS, 1) | 5 | SCHED_FEAT(AFFINE_WAKEUPS, 1) |
6 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) | 6 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) |
7 | SCHED_FEAT(SYNC_WAKEUPS, 1) | 7 | SCHED_FEAT(SYNC_WAKEUPS, 1) |
8 | SCHED_FEAT(HRTICK, 1) | 8 | SCHED_FEAT(HRTICK, 0) |
9 | SCHED_FEAT(DOUBLE_TICK, 0) | 9 | SCHED_FEAT(DOUBLE_TICK, 0) |
10 | SCHED_FEAT(ASYM_GRAN, 1) | 10 | SCHED_FEAT(ASYM_GRAN, 1) |
11 | SCHED_FEAT(LB_BIAS, 1) | 11 | SCHED_FEAT(LB_BIAS, 1) |
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index b8c156979cf2..2df9d297d292 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -9,7 +9,7 @@ | |||
9 | static int show_schedstat(struct seq_file *seq, void *v) | 9 | static int show_schedstat(struct seq_file *seq, void *v) |
10 | { | 10 | { |
11 | int cpu; | 11 | int cpu; |
12 | int mask_len = NR_CPUS/32 * 9; | 12 | int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9; |
13 | char *mask_str = kmalloc(mask_len, GFP_KERNEL); | 13 | char *mask_str = kmalloc(mask_len, GFP_KERNEL); |
14 | 14 | ||
15 | if (mask_str == NULL) | 15 | if (mask_str == NULL) |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b3cc73931d1f..a13bd4dfaeb1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -276,6 +276,16 @@ static struct ctl_table kern_table[] = { | |||
276 | }, | 276 | }, |
277 | { | 277 | { |
278 | .ctl_name = CTL_UNNUMBERED, | 278 | .ctl_name = CTL_UNNUMBERED, |
279 | .procname = "sched_shares_thresh", | ||
280 | .data = &sysctl_sched_shares_thresh, | ||
281 | .maxlen = sizeof(unsigned int), | ||
282 | .mode = 0644, | ||
283 | .proc_handler = &proc_dointvec_minmax, | ||
284 | .strategy = &sysctl_intvec, | ||
285 | .extra1 = &zero, | ||
286 | }, | ||
287 | { | ||
288 | .ctl_name = CTL_UNNUMBERED, | ||
279 | .procname = "sched_child_runs_first", | 289 | .procname = "sched_child_runs_first", |
280 | .data = &sysctl_sched_child_runs_first, | 290 | .data = &sysctl_sched_child_runs_first, |
281 | .maxlen = sizeof(unsigned int), | 291 | .maxlen = sizeof(unsigned int), |