diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-23 12:37:16 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-23 12:37:16 -0400 |
| commit | 133e887f90208d339088dd60cb1d08a72ba27288 (patch) | |
| tree | 4f6bab9d13df7fac50bc7d699d8486f441deb428 | |
| parent | e82cff752f57810a2259415ad2e9087c2d69484c (diff) | |
| parent | 0c4b83da58ec2e96ce9c44c211d6eac5f9dae478 (diff) | |
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
sched: disable the hrtick for now
sched: revert back to per-rq vruntime
sched: fair scheduler should not resched rt tasks
sched: optimize group load balancer
sched: minor fast-path overhead reduction
sched: fix the wrong mask_len, cleanup
sched: kill unused scheduler decl.
sched: fix the wrong mask_len
sched: only update rq->clock while holding rq->lock
| -rw-r--r-- | include/linux/sched.h | 2 | ||||
| -rw-r--r-- | kernel/sched.c | 51 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 62 | ||||
| -rw-r--r-- | kernel/sched_features.h | 2 | ||||
| -rw-r--r-- | kernel/sched_stats.h | 2 | ||||
| -rw-r--r-- | kernel/sysctl.c | 10 |
6 files changed, 79 insertions, 50 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c38db536e07..10bff55b0824 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -287,7 +287,6 @@ extern void trap_init(void); | |||
| 287 | extern void account_process_tick(struct task_struct *task, int user); | 287 | extern void account_process_tick(struct task_struct *task, int user); |
| 288 | extern void update_process_times(int user); | 288 | extern void update_process_times(int user); |
| 289 | extern void scheduler_tick(void); | 289 | extern void scheduler_tick(void); |
| 290 | extern void hrtick_resched(void); | ||
| 291 | 290 | ||
| 292 | extern void sched_show_task(struct task_struct *p); | 291 | extern void sched_show_task(struct task_struct *p); |
| 293 | 292 | ||
| @@ -1665,6 +1664,7 @@ extern unsigned int sysctl_sched_features; | |||
| 1665 | extern unsigned int sysctl_sched_migration_cost; | 1664 | extern unsigned int sysctl_sched_migration_cost; |
| 1666 | extern unsigned int sysctl_sched_nr_migrate; | 1665 | extern unsigned int sysctl_sched_nr_migrate; |
| 1667 | extern unsigned int sysctl_sched_shares_ratelimit; | 1666 | extern unsigned int sysctl_sched_shares_ratelimit; |
| 1667 | extern unsigned int sysctl_sched_shares_thresh; | ||
| 1668 | 1668 | ||
| 1669 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 1669 | int sched_nr_latency_handler(struct ctl_table *table, int write, |
| 1670 | struct file *file, void __user *buffer, size_t *length, | 1670 | struct file *file, void __user *buffer, size_t *length, |
diff --git a/kernel/sched.c b/kernel/sched.c index d906f72b42d2..945a97b9600d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -819,6 +819,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
| 819 | unsigned int sysctl_sched_shares_ratelimit = 250000; | 819 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
| 820 | 820 | ||
| 821 | /* | 821 | /* |
| 822 | * Inject some fuzzyness into changing the per-cpu group shares | ||
| 823 | * this avoids remote rq-locks at the expense of fairness. | ||
| 824 | * default: 4 | ||
| 825 | */ | ||
| 826 | unsigned int sysctl_sched_shares_thresh = 4; | ||
| 827 | |||
| 828 | /* | ||
| 822 | * period over which we measure -rt task cpu usage in us. | 829 | * period over which we measure -rt task cpu usage in us. |
| 823 | * default: 1s | 830 | * default: 1s |
| 824 | */ | 831 | */ |
| @@ -1454,8 +1461,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares); | |||
| 1454 | * Calculate and set the cpu's group shares. | 1461 | * Calculate and set the cpu's group shares. |
| 1455 | */ | 1462 | */ |
| 1456 | static void | 1463 | static void |
| 1457 | __update_group_shares_cpu(struct task_group *tg, int cpu, | 1464 | update_group_shares_cpu(struct task_group *tg, int cpu, |
| 1458 | unsigned long sd_shares, unsigned long sd_rq_weight) | 1465 | unsigned long sd_shares, unsigned long sd_rq_weight) |
| 1459 | { | 1466 | { |
| 1460 | int boost = 0; | 1467 | int boost = 0; |
| 1461 | unsigned long shares; | 1468 | unsigned long shares; |
| @@ -1486,19 +1493,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu, | |||
| 1486 | * | 1493 | * |
| 1487 | */ | 1494 | */ |
| 1488 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); | 1495 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); |
| 1496 | shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); | ||
| 1489 | 1497 | ||
| 1490 | /* | 1498 | if (abs(shares - tg->se[cpu]->load.weight) > |
| 1491 | * record the actual number of shares, not the boosted amount. | 1499 | sysctl_sched_shares_thresh) { |
| 1492 | */ | 1500 | struct rq *rq = cpu_rq(cpu); |
| 1493 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | 1501 | unsigned long flags; |
| 1494 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
| 1495 | 1502 | ||
| 1496 | if (shares < MIN_SHARES) | 1503 | spin_lock_irqsave(&rq->lock, flags); |
| 1497 | shares = MIN_SHARES; | 1504 | /* |
| 1498 | else if (shares > MAX_SHARES) | 1505 | * record the actual number of shares, not the boosted amount. |
| 1499 | shares = MAX_SHARES; | 1506 | */ |
| 1507 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | ||
| 1508 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
| 1500 | 1509 | ||
| 1501 | __set_se_shares(tg->se[cpu], shares); | 1510 | __set_se_shares(tg->se[cpu], shares); |
| 1511 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 1512 | } | ||
| 1502 | } | 1513 | } |
| 1503 | 1514 | ||
| 1504 | /* | 1515 | /* |
| @@ -1527,14 +1538,8 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
| 1527 | if (!rq_weight) | 1538 | if (!rq_weight) |
| 1528 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; | 1539 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; |
| 1529 | 1540 | ||
| 1530 | for_each_cpu_mask(i, sd->span) { | 1541 | for_each_cpu_mask(i, sd->span) |
| 1531 | struct rq *rq = cpu_rq(i); | 1542 | update_group_shares_cpu(tg, i, shares, rq_weight); |
| 1532 | unsigned long flags; | ||
| 1533 | |||
| 1534 | spin_lock_irqsave(&rq->lock, flags); | ||
| 1535 | __update_group_shares_cpu(tg, i, shares, rq_weight); | ||
| 1536 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 1537 | } | ||
| 1538 | 1543 | ||
| 1539 | return 0; | 1544 | return 0; |
| 1540 | } | 1545 | } |
| @@ -4443,12 +4448,8 @@ need_resched_nonpreemptible: | |||
| 4443 | if (sched_feat(HRTICK)) | 4448 | if (sched_feat(HRTICK)) |
| 4444 | hrtick_clear(rq); | 4449 | hrtick_clear(rq); |
| 4445 | 4450 | ||
| 4446 | /* | 4451 | spin_lock_irq(&rq->lock); |
| 4447 | * Do the rq-clock update outside the rq lock: | ||
| 4448 | */ | ||
| 4449 | local_irq_disable(); | ||
| 4450 | update_rq_clock(rq); | 4452 | update_rq_clock(rq); |
| 4451 | spin_lock(&rq->lock); | ||
| 4452 | clear_tsk_need_resched(prev); | 4453 | clear_tsk_need_resched(prev); |
| 4453 | 4454 | ||
| 4454 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 4455 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f604dae71316..9573c33688b8 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -73,6 +73,8 @@ unsigned int sysctl_sched_wakeup_granularity = 5000000UL; | |||
| 73 | 73 | ||
| 74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
| 75 | 75 | ||
| 76 | static const struct sched_class fair_sched_class; | ||
| 77 | |||
| 76 | /************************************************************** | 78 | /************************************************************** |
| 77 | * CFS operations on generic schedulable entities: | 79 | * CFS operations on generic schedulable entities: |
| 78 | */ | 80 | */ |
| @@ -334,7 +336,7 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
| 334 | #endif | 336 | #endif |
| 335 | 337 | ||
| 336 | /* | 338 | /* |
| 337 | * delta *= w / rw | 339 | * delta *= P[w / rw] |
| 338 | */ | 340 | */ |
| 339 | static inline unsigned long | 341 | static inline unsigned long |
| 340 | calc_delta_weight(unsigned long delta, struct sched_entity *se) | 342 | calc_delta_weight(unsigned long delta, struct sched_entity *se) |
| @@ -348,15 +350,13 @@ calc_delta_weight(unsigned long delta, struct sched_entity *se) | |||
| 348 | } | 350 | } |
| 349 | 351 | ||
| 350 | /* | 352 | /* |
| 351 | * delta *= rw / w | 353 | * delta /= w |
| 352 | */ | 354 | */ |
| 353 | static inline unsigned long | 355 | static inline unsigned long |
| 354 | calc_delta_fair(unsigned long delta, struct sched_entity *se) | 356 | calc_delta_fair(unsigned long delta, struct sched_entity *se) |
| 355 | { | 357 | { |
| 356 | for_each_sched_entity(se) { | 358 | if (unlikely(se->load.weight != NICE_0_LOAD)) |
| 357 | delta = calc_delta_mine(delta, | 359 | delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load); |
| 358 | cfs_rq_of(se)->load.weight, &se->load); | ||
| 359 | } | ||
| 360 | 360 | ||
| 361 | return delta; | 361 | return delta; |
| 362 | } | 362 | } |
| @@ -386,26 +386,26 @@ static u64 __sched_period(unsigned long nr_running) | |||
| 386 | * We calculate the wall-time slice from the period by taking a part | 386 | * We calculate the wall-time slice from the period by taking a part |
| 387 | * proportional to the weight. | 387 | * proportional to the weight. |
| 388 | * | 388 | * |
| 389 | * s = p*w/rw | 389 | * s = p*P[w/rw] |
| 390 | */ | 390 | */ |
| 391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | 391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 392 | { | 392 | { |
| 393 | return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); | 393 | unsigned long nr_running = cfs_rq->nr_running; |
| 394 | |||
| 395 | if (unlikely(!se->on_rq)) | ||
| 396 | nr_running++; | ||
| 397 | |||
| 398 | return calc_delta_weight(__sched_period(nr_running), se); | ||
| 394 | } | 399 | } |
| 395 | 400 | ||
| 396 | /* | 401 | /* |
| 397 | * We calculate the vruntime slice of a to be inserted task | 402 | * We calculate the vruntime slice of a to be inserted task |
| 398 | * | 403 | * |
| 399 | * vs = s*rw/w = p | 404 | * vs = s/w |
| 400 | */ | 405 | */ |
| 401 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) | 406 | static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 402 | { | 407 | { |
| 403 | unsigned long nr_running = cfs_rq->nr_running; | 408 | return calc_delta_fair(sched_slice(cfs_rq, se), se); |
| 404 | |||
| 405 | if (!se->on_rq) | ||
| 406 | nr_running++; | ||
| 407 | |||
| 408 | return __sched_period(nr_running); | ||
| 409 | } | 409 | } |
| 410 | 410 | ||
| 411 | /* | 411 | /* |
| @@ -628,7 +628,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
| 628 | * stays open at the end. | 628 | * stays open at the end. |
| 629 | */ | 629 | */ |
| 630 | if (initial && sched_feat(START_DEBIT)) | 630 | if (initial && sched_feat(START_DEBIT)) |
| 631 | vruntime += sched_vslice_add(cfs_rq, se); | 631 | vruntime += sched_vslice(cfs_rq, se); |
| 632 | 632 | ||
| 633 | if (!initial) { | 633 | if (!initial) { |
| 634 | /* sleeps upto a single latency don't count. */ | 634 | /* sleeps upto a single latency don't count. */ |
| @@ -748,7 +748,7 @@ pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 748 | struct rq *rq = rq_of(cfs_rq); | 748 | struct rq *rq = rq_of(cfs_rq); |
| 749 | u64 pair_slice = rq->clock - cfs_rq->pair_start; | 749 | u64 pair_slice = rq->clock - cfs_rq->pair_start; |
| 750 | 750 | ||
| 751 | if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) { | 751 | if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) { |
| 752 | cfs_rq->pair_start = rq->clock; | 752 | cfs_rq->pair_start = rq->clock; |
| 753 | return se; | 753 | return se; |
| 754 | } | 754 | } |
| @@ -849,11 +849,31 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) | |||
| 849 | hrtick_start(rq, delta); | 849 | hrtick_start(rq, delta); |
| 850 | } | 850 | } |
| 851 | } | 851 | } |
| 852 | |||
| 853 | /* | ||
| 854 | * called from enqueue/dequeue and updates the hrtick when the | ||
| 855 | * current task is from our class and nr_running is low enough | ||
| 856 | * to matter. | ||
| 857 | */ | ||
| 858 | static void hrtick_update(struct rq *rq) | ||
| 859 | { | ||
| 860 | struct task_struct *curr = rq->curr; | ||
| 861 | |||
| 862 | if (curr->sched_class != &fair_sched_class) | ||
| 863 | return; | ||
| 864 | |||
| 865 | if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency) | ||
| 866 | hrtick_start_fair(rq, curr); | ||
| 867 | } | ||
| 852 | #else /* !CONFIG_SCHED_HRTICK */ | 868 | #else /* !CONFIG_SCHED_HRTICK */ |
| 853 | static inline void | 869 | static inline void |
| 854 | hrtick_start_fair(struct rq *rq, struct task_struct *p) | 870 | hrtick_start_fair(struct rq *rq, struct task_struct *p) |
| 855 | { | 871 | { |
| 856 | } | 872 | } |
| 873 | |||
| 874 | static inline void hrtick_update(struct rq *rq) | ||
| 875 | { | ||
| 876 | } | ||
| 857 | #endif | 877 | #endif |
| 858 | 878 | ||
| 859 | /* | 879 | /* |
| @@ -874,7 +894,7 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) | |||
| 874 | wakeup = 1; | 894 | wakeup = 1; |
| 875 | } | 895 | } |
| 876 | 896 | ||
| 877 | hrtick_start_fair(rq, rq->curr); | 897 | hrtick_update(rq); |
| 878 | } | 898 | } |
| 879 | 899 | ||
| 880 | /* | 900 | /* |
| @@ -896,7 +916,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) | |||
| 896 | sleep = 1; | 916 | sleep = 1; |
| 897 | } | 917 | } |
| 898 | 918 | ||
| 899 | hrtick_start_fair(rq, rq->curr); | 919 | hrtick_update(rq); |
| 900 | } | 920 | } |
| 901 | 921 | ||
| 902 | /* | 922 | /* |
| @@ -1002,8 +1022,6 @@ static inline int wake_idle(int cpu, struct task_struct *p) | |||
| 1002 | 1022 | ||
| 1003 | #ifdef CONFIG_SMP | 1023 | #ifdef CONFIG_SMP |
| 1004 | 1024 | ||
| 1005 | static const struct sched_class fair_sched_class; | ||
| 1006 | |||
| 1007 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1025 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 1008 | /* | 1026 | /* |
| 1009 | * effective_load() calculates the load change as seen from the root_task_group | 1027 | * effective_load() calculates the load change as seen from the root_task_group |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 7c9e8f4a049f..fda016218296 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
| @@ -5,7 +5,7 @@ SCHED_FEAT(START_DEBIT, 1) | |||
| 5 | SCHED_FEAT(AFFINE_WAKEUPS, 1) | 5 | SCHED_FEAT(AFFINE_WAKEUPS, 1) |
| 6 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) | 6 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) |
| 7 | SCHED_FEAT(SYNC_WAKEUPS, 1) | 7 | SCHED_FEAT(SYNC_WAKEUPS, 1) |
| 8 | SCHED_FEAT(HRTICK, 1) | 8 | SCHED_FEAT(HRTICK, 0) |
| 9 | SCHED_FEAT(DOUBLE_TICK, 0) | 9 | SCHED_FEAT(DOUBLE_TICK, 0) |
| 10 | SCHED_FEAT(ASYM_GRAN, 1) | 10 | SCHED_FEAT(ASYM_GRAN, 1) |
| 11 | SCHED_FEAT(LB_BIAS, 1) | 11 | SCHED_FEAT(LB_BIAS, 1) |
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index b8c156979cf2..2df9d297d292 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | static int show_schedstat(struct seq_file *seq, void *v) | 9 | static int show_schedstat(struct seq_file *seq, void *v) |
| 10 | { | 10 | { |
| 11 | int cpu; | 11 | int cpu; |
| 12 | int mask_len = NR_CPUS/32 * 9; | 12 | int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9; |
| 13 | char *mask_str = kmalloc(mask_len, GFP_KERNEL); | 13 | char *mask_str = kmalloc(mask_len, GFP_KERNEL); |
| 14 | 14 | ||
| 15 | if (mask_str == NULL) | 15 | if (mask_str == NULL) |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b3cc73931d1f..a13bd4dfaeb1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -276,6 +276,16 @@ static struct ctl_table kern_table[] = { | |||
| 276 | }, | 276 | }, |
| 277 | { | 277 | { |
| 278 | .ctl_name = CTL_UNNUMBERED, | 278 | .ctl_name = CTL_UNNUMBERED, |
| 279 | .procname = "sched_shares_thresh", | ||
| 280 | .data = &sysctl_sched_shares_thresh, | ||
| 281 | .maxlen = sizeof(unsigned int), | ||
| 282 | .mode = 0644, | ||
| 283 | .proc_handler = &proc_dointvec_minmax, | ||
| 284 | .strategy = &sysctl_intvec, | ||
| 285 | .extra1 = &zero, | ||
| 286 | }, | ||
| 287 | { | ||
| 288 | .ctl_name = CTL_UNNUMBERED, | ||
| 279 | .procname = "sched_child_runs_first", | 289 | .procname = "sched_child_runs_first", |
| 280 | .data = &sysctl_sched_child_runs_first, | 290 | .data = &sysctl_sched_child_runs_first, |
| 281 | .maxlen = sizeof(unsigned int), | 291 | .maxlen = sizeof(unsigned int), |
