aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-23 12:37:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-23 12:37:16 -0400
commit133e887f90208d339088dd60cb1d08a72ba27288 (patch)
tree4f6bab9d13df7fac50bc7d699d8486f441deb428
parente82cff752f57810a2259415ad2e9087c2d69484c (diff)
parent0c4b83da58ec2e96ce9c44c211d6eac5f9dae478 (diff)
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: sched: disable the hrtick for now sched: revert back to per-rq vruntime sched: fair scheduler should not resched rt tasks sched: optimize group load balancer sched: minor fast-path overhead reduction sched: fix the wrong mask_len, cleanup sched: kill unused scheduler decl. sched: fix the wrong mask_len sched: only update rq->clock while holding rq->lock
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/sched.c51
-rw-r--r--kernel/sched_fair.c62
-rw-r--r--kernel/sched_features.h2
-rw-r--r--kernel/sched_stats.h2
-rw-r--r--kernel/sysctl.c10
6 files changed, 79 insertions, 50 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c38db536e07..10bff55b0824 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -287,7 +287,6 @@ extern void trap_init(void);
287extern void account_process_tick(struct task_struct *task, int user); 287extern void account_process_tick(struct task_struct *task, int user);
288extern void update_process_times(int user); 288extern void update_process_times(int user);
289extern void scheduler_tick(void); 289extern void scheduler_tick(void);
290extern void hrtick_resched(void);
291 290
292extern void sched_show_task(struct task_struct *p); 291extern void sched_show_task(struct task_struct *p);
293 292
@@ -1665,6 +1664,7 @@ extern unsigned int sysctl_sched_features;
1665extern unsigned int sysctl_sched_migration_cost; 1664extern unsigned int sysctl_sched_migration_cost;
1666extern unsigned int sysctl_sched_nr_migrate; 1665extern unsigned int sysctl_sched_nr_migrate;
1667extern unsigned int sysctl_sched_shares_ratelimit; 1666extern unsigned int sysctl_sched_shares_ratelimit;
1667extern unsigned int sysctl_sched_shares_thresh;
1668 1668
1669int sched_nr_latency_handler(struct ctl_table *table, int write, 1669int sched_nr_latency_handler(struct ctl_table *table, int write,
1670 struct file *file, void __user *buffer, size_t *length, 1670 struct file *file, void __user *buffer, size_t *length,
diff --git a/kernel/sched.c b/kernel/sched.c
index d906f72b42d2..945a97b9600d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -819,6 +819,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
819unsigned int sysctl_sched_shares_ratelimit = 250000; 819unsigned int sysctl_sched_shares_ratelimit = 250000;
820 820
821/* 821/*
822 * Inject some fuzzyness into changing the per-cpu group shares
823 * this avoids remote rq-locks at the expense of fairness.
824 * default: 4
825 */
826unsigned int sysctl_sched_shares_thresh = 4;
827
828/*
822 * period over which we measure -rt task cpu usage in us. 829 * period over which we measure -rt task cpu usage in us.
823 * default: 1s 830 * default: 1s
824 */ 831 */
@@ -1454,8 +1461,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1454 * Calculate and set the cpu's group shares. 1461 * Calculate and set the cpu's group shares.
1455 */ 1462 */
1456static void 1463static void
1457__update_group_shares_cpu(struct task_group *tg, int cpu, 1464update_group_shares_cpu(struct task_group *tg, int cpu,
1458 unsigned long sd_shares, unsigned long sd_rq_weight) 1465 unsigned long sd_shares, unsigned long sd_rq_weight)
1459{ 1466{
1460 int boost = 0; 1467 int boost = 0;
1461 unsigned long shares; 1468 unsigned long shares;
@@ -1486,19 +1493,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
1486 * 1493 *
1487 */ 1494 */
1488 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); 1495 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
1496 shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
1489 1497
1490 /* 1498 if (abs(shares - tg->se[cpu]->load.weight) >
1491 * record the actual number of shares, not the boosted amount. 1499 sysctl_sched_shares_thresh) {
1492 */ 1500 struct rq *rq = cpu_rq(cpu);
1493 tg->cfs_rq[cpu]->shares = boost ? 0 : shares; 1501 unsigned long flags;
1494 tg->cfs_rq[cpu]->rq_weight = rq_weight;
1495 1502
1496 if (shares < MIN_SHARES) 1503 spin_lock_irqsave(&rq->lock, flags);
1497 shares = MIN_SHARES; 1504 /*
1498 else if (shares > MAX_SHARES) 1505 * record the actual number of shares, not the boosted amount.
1499 shares = MAX_SHARES; 1506 */
1507 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1508 tg->cfs_rq[cpu]->rq_weight = rq_weight;
1500 1509
1501 __set_se_shares(tg->se[cpu], shares); 1510 __set_se_shares(tg->se[cpu], shares);
1511 spin_unlock_irqrestore(&rq->lock, flags);
1512 }
1502} 1513}
1503 1514
1504/* 1515/*
@@ -1527,14 +1538,8 @@ static int tg_shares_up(struct task_group *tg, void *data)
1527 if (!rq_weight) 1538 if (!rq_weight)
1528 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; 1539 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
1529 1540
1530 for_each_cpu_mask(i, sd->span) { 1541 for_each_cpu_mask(i, sd->span)
1531 struct rq *rq = cpu_rq(i); 1542 update_group_shares_cpu(tg, i, shares, rq_weight);
1532 unsigned long flags;
1533
1534 spin_lock_irqsave(&rq->lock, flags);
1535 __update_group_shares_cpu(tg, i, shares, rq_weight);
1536 spin_unlock_irqrestore(&rq->lock, flags);
1537 }
1538 1543
1539 return 0; 1544 return 0;
1540} 1545}
@@ -4443,12 +4448,8 @@ need_resched_nonpreemptible:
4443 if (sched_feat(HRTICK)) 4448 if (sched_feat(HRTICK))
4444 hrtick_clear(rq); 4449 hrtick_clear(rq);
4445 4450
4446 /* 4451 spin_lock_irq(&rq->lock);
4447 * Do the rq-clock update outside the rq lock:
4448 */
4449 local_irq_disable();
4450 update_rq_clock(rq); 4452 update_rq_clock(rq);
4451 spin_lock(&rq->lock);
4452 clear_tsk_need_resched(prev); 4453 clear_tsk_need_resched(prev);
4453 4454
4454 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 4455 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f604dae71316..9573c33688b8 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -73,6 +73,8 @@ unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
73 73
74const_debug unsigned int sysctl_sched_migration_cost = 500000UL; 74const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
75 75
76static const struct sched_class fair_sched_class;
77
76/************************************************************** 78/**************************************************************
77 * CFS operations on generic schedulable entities: 79 * CFS operations on generic schedulable entities:
78 */ 80 */
@@ -334,7 +336,7 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
334#endif 336#endif
335 337
336/* 338/*
337 * delta *= w / rw 339 * delta *= P[w / rw]
338 */ 340 */
339static inline unsigned long 341static inline unsigned long
340calc_delta_weight(unsigned long delta, struct sched_entity *se) 342calc_delta_weight(unsigned long delta, struct sched_entity *se)
@@ -348,15 +350,13 @@ calc_delta_weight(unsigned long delta, struct sched_entity *se)
348} 350}
349 351
350/* 352/*
351 * delta *= rw / w 353 * delta /= w
352 */ 354 */
353static inline unsigned long 355static inline unsigned long
354calc_delta_fair(unsigned long delta, struct sched_entity *se) 356calc_delta_fair(unsigned long delta, struct sched_entity *se)
355{ 357{
356 for_each_sched_entity(se) { 358 if (unlikely(se->load.weight != NICE_0_LOAD))
357 delta = calc_delta_mine(delta, 359 delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);
358 cfs_rq_of(se)->load.weight, &se->load);
359 }
360 360
361 return delta; 361 return delta;
362} 362}
@@ -386,26 +386,26 @@ static u64 __sched_period(unsigned long nr_running)
386 * We calculate the wall-time slice from the period by taking a part 386 * We calculate the wall-time slice from the period by taking a part
387 * proportional to the weight. 387 * proportional to the weight.
388 * 388 *
389 * s = p*w/rw 389 * s = p*P[w/rw]
390 */ 390 */
391static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) 391static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
392{ 392{
393 return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); 393 unsigned long nr_running = cfs_rq->nr_running;
394
395 if (unlikely(!se->on_rq))
396 nr_running++;
397
398 return calc_delta_weight(__sched_period(nr_running), se);
394} 399}
395 400
396/* 401/*
397 * We calculate the vruntime slice of a to be inserted task 402 * We calculate the vruntime slice of a to be inserted task
398 * 403 *
399 * vs = s*rw/w = p 404 * vs = s/w
400 */ 405 */
401static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) 406static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
402{ 407{
403 unsigned long nr_running = cfs_rq->nr_running; 408 return calc_delta_fair(sched_slice(cfs_rq, se), se);
404
405 if (!se->on_rq)
406 nr_running++;
407
408 return __sched_period(nr_running);
409} 409}
410 410
411/* 411/*
@@ -628,7 +628,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
628 * stays open at the end. 628 * stays open at the end.
629 */ 629 */
630 if (initial && sched_feat(START_DEBIT)) 630 if (initial && sched_feat(START_DEBIT))
631 vruntime += sched_vslice_add(cfs_rq, se); 631 vruntime += sched_vslice(cfs_rq, se);
632 632
633 if (!initial) { 633 if (!initial) {
634 /* sleeps upto a single latency don't count. */ 634 /* sleeps upto a single latency don't count. */
@@ -748,7 +748,7 @@ pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
748 struct rq *rq = rq_of(cfs_rq); 748 struct rq *rq = rq_of(cfs_rq);
749 u64 pair_slice = rq->clock - cfs_rq->pair_start; 749 u64 pair_slice = rq->clock - cfs_rq->pair_start;
750 750
751 if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) { 751 if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
752 cfs_rq->pair_start = rq->clock; 752 cfs_rq->pair_start = rq->clock;
753 return se; 753 return se;
754 } 754 }
@@ -849,11 +849,31 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
849 hrtick_start(rq, delta); 849 hrtick_start(rq, delta);
850 } 850 }
851} 851}
852
853/*
854 * called from enqueue/dequeue and updates the hrtick when the
855 * current task is from our class and nr_running is low enough
856 * to matter.
857 */
858static void hrtick_update(struct rq *rq)
859{
860 struct task_struct *curr = rq->curr;
861
862 if (curr->sched_class != &fair_sched_class)
863 return;
864
865 if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
866 hrtick_start_fair(rq, curr);
867}
852#else /* !CONFIG_SCHED_HRTICK */ 868#else /* !CONFIG_SCHED_HRTICK */
853static inline void 869static inline void
854hrtick_start_fair(struct rq *rq, struct task_struct *p) 870hrtick_start_fair(struct rq *rq, struct task_struct *p)
855{ 871{
856} 872}
873
874static inline void hrtick_update(struct rq *rq)
875{
876}
857#endif 877#endif
858 878
859/* 879/*
@@ -874,7 +894,7 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
874 wakeup = 1; 894 wakeup = 1;
875 } 895 }
876 896
877 hrtick_start_fair(rq, rq->curr); 897 hrtick_update(rq);
878} 898}
879 899
880/* 900/*
@@ -896,7 +916,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
896 sleep = 1; 916 sleep = 1;
897 } 917 }
898 918
899 hrtick_start_fair(rq, rq->curr); 919 hrtick_update(rq);
900} 920}
901 921
902/* 922/*
@@ -1002,8 +1022,6 @@ static inline int wake_idle(int cpu, struct task_struct *p)
1002 1022
1003#ifdef CONFIG_SMP 1023#ifdef CONFIG_SMP
1004 1024
1005static const struct sched_class fair_sched_class;
1006
1007#ifdef CONFIG_FAIR_GROUP_SCHED 1025#ifdef CONFIG_FAIR_GROUP_SCHED
1008/* 1026/*
1009 * effective_load() calculates the load change as seen from the root_task_group 1027 * effective_load() calculates the load change as seen from the root_task_group
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 7c9e8f4a049f..fda016218296 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -5,7 +5,7 @@ SCHED_FEAT(START_DEBIT, 1)
5SCHED_FEAT(AFFINE_WAKEUPS, 1) 5SCHED_FEAT(AFFINE_WAKEUPS, 1)
6SCHED_FEAT(CACHE_HOT_BUDDY, 1) 6SCHED_FEAT(CACHE_HOT_BUDDY, 1)
7SCHED_FEAT(SYNC_WAKEUPS, 1) 7SCHED_FEAT(SYNC_WAKEUPS, 1)
8SCHED_FEAT(HRTICK, 1) 8SCHED_FEAT(HRTICK, 0)
9SCHED_FEAT(DOUBLE_TICK, 0) 9SCHED_FEAT(DOUBLE_TICK, 0)
10SCHED_FEAT(ASYM_GRAN, 1) 10SCHED_FEAT(ASYM_GRAN, 1)
11SCHED_FEAT(LB_BIAS, 1) 11SCHED_FEAT(LB_BIAS, 1)
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index b8c156979cf2..2df9d297d292 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -9,7 +9,7 @@
9static int show_schedstat(struct seq_file *seq, void *v) 9static int show_schedstat(struct seq_file *seq, void *v)
10{ 10{
11 int cpu; 11 int cpu;
12 int mask_len = NR_CPUS/32 * 9; 12 int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
13 char *mask_str = kmalloc(mask_len, GFP_KERNEL); 13 char *mask_str = kmalloc(mask_len, GFP_KERNEL);
14 14
15 if (mask_str == NULL) 15 if (mask_str == NULL)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b3cc73931d1f..a13bd4dfaeb1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -276,6 +276,16 @@ static struct ctl_table kern_table[] = {
276 }, 276 },
277 { 277 {
278 .ctl_name = CTL_UNNUMBERED, 278 .ctl_name = CTL_UNNUMBERED,
279 .procname = "sched_shares_thresh",
280 .data = &sysctl_sched_shares_thresh,
281 .maxlen = sizeof(unsigned int),
282 .mode = 0644,
283 .proc_handler = &proc_dointvec_minmax,
284 .strategy = &sysctl_intvec,
285 .extra1 = &zero,
286 },
287 {
288 .ctl_name = CTL_UNNUMBERED,
279 .procname = "sched_child_runs_first", 289 .procname = "sched_child_runs_first",
280 .data = &sysctl_sched_child_runs_first, 290 .data = &sysctl_sched_child_runs_first,
281 .maxlen = sizeof(unsigned int), 291 .maxlen = sizeof(unsigned int),