diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 11 | ||||
-rw-r--r-- | kernel/audit_tree.c | 5 | ||||
-rw-r--r-- | kernel/cgroup.c | 2 | ||||
-rw-r--r-- | kernel/exit.c | 7 | ||||
-rw-r--r-- | kernel/fork.c | 130 | ||||
-rw-r--r-- | kernel/module.c | 18 | ||||
-rw-r--r-- | kernel/relay.c | 2 | ||||
-rw-r--r-- | kernel/sched.c | 447 | ||||
-rw-r--r-- | kernel/sched_clock.c | 18 | ||||
-rw-r--r-- | kernel/sched_debug.c | 5 | ||||
-rw-r--r-- | kernel/sched_fair.c | 254 | ||||
-rw-r--r-- | kernel/sched_rt.c | 4 | ||||
-rw-r--r-- | kernel/sched_stats.h | 1 | ||||
-rw-r--r-- | kernel/signal.c | 51 | ||||
-rw-r--r-- | kernel/stop_machine.c | 7 | ||||
-rw-r--r-- | kernel/sys.c | 6 | ||||
-rw-r--r-- | kernel/sysctl.c | 5 |
17 files changed, 233 insertions, 740 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index b7d3709cc452..e8692a5748c2 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -572,16 +572,17 @@ void audit_send_reply(int pid, int seq, int type, int done, int multi, | |||
572 | 572 | ||
573 | skb = audit_make_reply(pid, seq, type, done, multi, payload, size); | 573 | skb = audit_make_reply(pid, seq, type, done, multi, payload, size); |
574 | if (!skb) | 574 | if (!skb) |
575 | return; | 575 | goto out; |
576 | 576 | ||
577 | reply->pid = pid; | 577 | reply->pid = pid; |
578 | reply->skb = skb; | 578 | reply->skb = skb; |
579 | 579 | ||
580 | tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply"); | 580 | tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply"); |
581 | if (IS_ERR(tsk)) { | 581 | if (!IS_ERR(tsk)) |
582 | kfree(reply); | 582 | return; |
583 | kfree_skb(skb); | 583 | kfree_skb(skb); |
584 | } | 584 | out: |
585 | kfree(reply); | ||
585 | } | 586 | } |
586 | 587 | ||
587 | /* | 588 | /* |
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 9ef5e0aacc3c..f7921a2ecf16 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
@@ -172,10 +172,9 @@ static void insert_hash(struct audit_chunk *chunk) | |||
172 | struct audit_chunk *audit_tree_lookup(const struct inode *inode) | 172 | struct audit_chunk *audit_tree_lookup(const struct inode *inode) |
173 | { | 173 | { |
174 | struct list_head *list = chunk_hash(inode); | 174 | struct list_head *list = chunk_hash(inode); |
175 | struct list_head *pos; | 175 | struct audit_chunk *p; |
176 | 176 | ||
177 | list_for_each_rcu(pos, list) { | 177 | list_for_each_entry_rcu(p, list, hash) { |
178 | struct audit_chunk *p = container_of(pos, struct audit_chunk, hash); | ||
179 | if (p->watch.inode == inode) { | 178 | if (p->watch.inode == inode) { |
180 | get_inotify_watch(&p->watch); | 179 | get_inotify_watch(&p->watch); |
181 | return p; | 180 | return p; |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index fbc6fc8949b4..15ac0e1e4f4d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -2903,7 +2903,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) | |||
2903 | cg = tsk->cgroups; | 2903 | cg = tsk->cgroups; |
2904 | parent = task_cgroup(tsk, subsys->subsys_id); | 2904 | parent = task_cgroup(tsk, subsys->subsys_id); |
2905 | 2905 | ||
2906 | snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "node_%d", tsk->pid); | 2906 | snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "%d", tsk->pid); |
2907 | 2907 | ||
2908 | /* Pin the hierarchy */ | 2908 | /* Pin the hierarchy */ |
2909 | atomic_inc(&parent->root->sb->s_active); | 2909 | atomic_inc(&parent->root->sb->s_active); |
diff --git a/kernel/exit.c b/kernel/exit.c index 1510f78a0ffa..8f6185e69b69 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -126,6 +126,12 @@ static void __exit_signal(struct task_struct *tsk) | |||
126 | 126 | ||
127 | __unhash_process(tsk); | 127 | __unhash_process(tsk); |
128 | 128 | ||
129 | /* | ||
130 | * Do this under ->siglock, we can race with another thread | ||
131 | * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. | ||
132 | */ | ||
133 | flush_sigqueue(&tsk->pending); | ||
134 | |||
129 | tsk->signal = NULL; | 135 | tsk->signal = NULL; |
130 | tsk->sighand = NULL; | 136 | tsk->sighand = NULL; |
131 | spin_unlock(&sighand->siglock); | 137 | spin_unlock(&sighand->siglock); |
@@ -133,7 +139,6 @@ static void __exit_signal(struct task_struct *tsk) | |||
133 | 139 | ||
134 | __cleanup_sighand(sighand); | 140 | __cleanup_sighand(sighand); |
135 | clear_tsk_thread_flag(tsk,TIF_SIGPENDING); | 141 | clear_tsk_thread_flag(tsk,TIF_SIGPENDING); |
136 | flush_sigqueue(&tsk->pending); | ||
137 | if (sig) { | 142 | if (sig) { |
138 | flush_sigqueue(&sig->shared_pending); | 143 | flush_sigqueue(&sig->shared_pending); |
139 | taskstats_tgid_free(sig); | 144 | taskstats_tgid_free(sig); |
diff --git a/kernel/fork.c b/kernel/fork.c index 933e60ebccae..19908b26cf80 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -660,136 +660,6 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) | |||
660 | return 0; | 660 | return 0; |
661 | } | 661 | } |
662 | 662 | ||
663 | static int count_open_files(struct fdtable *fdt) | ||
664 | { | ||
665 | int size = fdt->max_fds; | ||
666 | int i; | ||
667 | |||
668 | /* Find the last open fd */ | ||
669 | for (i = size/(8*sizeof(long)); i > 0; ) { | ||
670 | if (fdt->open_fds->fds_bits[--i]) | ||
671 | break; | ||
672 | } | ||
673 | i = (i+1) * 8 * sizeof(long); | ||
674 | return i; | ||
675 | } | ||
676 | |||
677 | static struct files_struct *alloc_files(void) | ||
678 | { | ||
679 | struct files_struct *newf; | ||
680 | struct fdtable *fdt; | ||
681 | |||
682 | newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); | ||
683 | if (!newf) | ||
684 | goto out; | ||
685 | |||
686 | atomic_set(&newf->count, 1); | ||
687 | |||
688 | spin_lock_init(&newf->file_lock); | ||
689 | newf->next_fd = 0; | ||
690 | fdt = &newf->fdtab; | ||
691 | fdt->max_fds = NR_OPEN_DEFAULT; | ||
692 | fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; | ||
693 | fdt->open_fds = (fd_set *)&newf->open_fds_init; | ||
694 | fdt->fd = &newf->fd_array[0]; | ||
695 | INIT_RCU_HEAD(&fdt->rcu); | ||
696 | fdt->next = NULL; | ||
697 | rcu_assign_pointer(newf->fdt, fdt); | ||
698 | out: | ||
699 | return newf; | ||
700 | } | ||
701 | |||
702 | /* | ||
703 | * Allocate a new files structure and copy contents from the | ||
704 | * passed in files structure. | ||
705 | * errorp will be valid only when the returned files_struct is NULL. | ||
706 | */ | ||
707 | static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | ||
708 | { | ||
709 | struct files_struct *newf; | ||
710 | struct file **old_fds, **new_fds; | ||
711 | int open_files, size, i; | ||
712 | struct fdtable *old_fdt, *new_fdt; | ||
713 | |||
714 | *errorp = -ENOMEM; | ||
715 | newf = alloc_files(); | ||
716 | if (!newf) | ||
717 | goto out; | ||
718 | |||
719 | spin_lock(&oldf->file_lock); | ||
720 | old_fdt = files_fdtable(oldf); | ||
721 | new_fdt = files_fdtable(newf); | ||
722 | open_files = count_open_files(old_fdt); | ||
723 | |||
724 | /* | ||
725 | * Check whether we need to allocate a larger fd array and fd set. | ||
726 | * Note: we're not a clone task, so the open count won't change. | ||
727 | */ | ||
728 | if (open_files > new_fdt->max_fds) { | ||
729 | new_fdt->max_fds = 0; | ||
730 | spin_unlock(&oldf->file_lock); | ||
731 | spin_lock(&newf->file_lock); | ||
732 | *errorp = expand_files(newf, open_files-1); | ||
733 | spin_unlock(&newf->file_lock); | ||
734 | if (*errorp < 0) | ||
735 | goto out_release; | ||
736 | new_fdt = files_fdtable(newf); | ||
737 | /* | ||
738 | * Reacquire the oldf lock and a pointer to its fd table | ||
739 | * who knows it may have a new bigger fd table. We need | ||
740 | * the latest pointer. | ||
741 | */ | ||
742 | spin_lock(&oldf->file_lock); | ||
743 | old_fdt = files_fdtable(oldf); | ||
744 | } | ||
745 | |||
746 | old_fds = old_fdt->fd; | ||
747 | new_fds = new_fdt->fd; | ||
748 | |||
749 | memcpy(new_fdt->open_fds->fds_bits, | ||
750 | old_fdt->open_fds->fds_bits, open_files/8); | ||
751 | memcpy(new_fdt->close_on_exec->fds_bits, | ||
752 | old_fdt->close_on_exec->fds_bits, open_files/8); | ||
753 | |||
754 | for (i = open_files; i != 0; i--) { | ||
755 | struct file *f = *old_fds++; | ||
756 | if (f) { | ||
757 | get_file(f); | ||
758 | } else { | ||
759 | /* | ||
760 | * The fd may be claimed in the fd bitmap but not yet | ||
761 | * instantiated in the files array if a sibling thread | ||
762 | * is partway through open(). So make sure that this | ||
763 | * fd is available to the new process. | ||
764 | */ | ||
765 | FD_CLR(open_files - i, new_fdt->open_fds); | ||
766 | } | ||
767 | rcu_assign_pointer(*new_fds++, f); | ||
768 | } | ||
769 | spin_unlock(&oldf->file_lock); | ||
770 | |||
771 | /* compute the remainder to be cleared */ | ||
772 | size = (new_fdt->max_fds - open_files) * sizeof(struct file *); | ||
773 | |||
774 | /* This is long word aligned thus could use a optimized version */ | ||
775 | memset(new_fds, 0, size); | ||
776 | |||
777 | if (new_fdt->max_fds > open_files) { | ||
778 | int left = (new_fdt->max_fds-open_files)/8; | ||
779 | int start = open_files / (8 * sizeof(unsigned long)); | ||
780 | |||
781 | memset(&new_fdt->open_fds->fds_bits[start], 0, left); | ||
782 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); | ||
783 | } | ||
784 | |||
785 | return newf; | ||
786 | |||
787 | out_release: | ||
788 | kmem_cache_free(files_cachep, newf); | ||
789 | out: | ||
790 | return NULL; | ||
791 | } | ||
792 | |||
793 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | 663 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) |
794 | { | 664 | { |
795 | struct files_struct *oldf, *newf; | 665 | struct files_struct *oldf, *newf; |
diff --git a/kernel/module.c b/kernel/module.c index f5e9491ef7ac..5f80478b746d 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -1337,7 +1337,19 @@ out_unreg: | |||
1337 | kobject_put(&mod->mkobj.kobj); | 1337 | kobject_put(&mod->mkobj.kobj); |
1338 | return err; | 1338 | return err; |
1339 | } | 1339 | } |
1340 | #endif | 1340 | |
1341 | static void mod_sysfs_fini(struct module *mod) | ||
1342 | { | ||
1343 | kobject_put(&mod->mkobj.kobj); | ||
1344 | } | ||
1345 | |||
1346 | #else /* CONFIG_SYSFS */ | ||
1347 | |||
1348 | static void mod_sysfs_fini(struct module *mod) | ||
1349 | { | ||
1350 | } | ||
1351 | |||
1352 | #endif /* CONFIG_SYSFS */ | ||
1341 | 1353 | ||
1342 | static void mod_kobject_remove(struct module *mod) | 1354 | static void mod_kobject_remove(struct module *mod) |
1343 | { | 1355 | { |
@@ -1345,7 +1357,7 @@ static void mod_kobject_remove(struct module *mod) | |||
1345 | module_param_sysfs_remove(mod); | 1357 | module_param_sysfs_remove(mod); |
1346 | kobject_put(mod->mkobj.drivers_dir); | 1358 | kobject_put(mod->mkobj.drivers_dir); |
1347 | kobject_put(mod->holders_dir); | 1359 | kobject_put(mod->holders_dir); |
1348 | kobject_put(&mod->mkobj.kobj); | 1360 | mod_sysfs_fini(mod); |
1349 | } | 1361 | } |
1350 | 1362 | ||
1351 | /* | 1363 | /* |
@@ -1780,7 +1792,7 @@ static struct module *load_module(void __user *umod, | |||
1780 | 1792 | ||
1781 | /* Sanity checks against insmoding binaries or wrong arch, | 1793 | /* Sanity checks against insmoding binaries or wrong arch, |
1782 | weird elf version */ | 1794 | weird elf version */ |
1783 | if (memcmp(hdr->e_ident, ELFMAG, 4) != 0 | 1795 | if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 |
1784 | || hdr->e_type != ET_REL | 1796 | || hdr->e_type != ET_REL |
1785 | || !elf_check_arch(hdr) | 1797 | || !elf_check_arch(hdr) |
1786 | || hdr->e_shentsize != sizeof(*sechdrs)) { | 1798 | || hdr->e_shentsize != sizeof(*sechdrs)) { |
diff --git a/kernel/relay.c b/kernel/relay.c index bc24dcdc570f..7de644cdec43 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -1191,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in, | |||
1191 | ret = 0; | 1191 | ret = 0; |
1192 | spliced = 0; | 1192 | spliced = 0; |
1193 | 1193 | ||
1194 | while (len) { | 1194 | while (len && !spliced) { |
1195 | ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); | 1195 | ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); |
1196 | if (ret < 0) | 1196 | if (ret < 0) |
1197 | break; | 1197 | break; |
diff --git a/kernel/sched.c b/kernel/sched.c index cfa222a91539..bfb8ad8ed171 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -136,7 +136,7 @@ static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val) | |||
136 | 136 | ||
137 | static inline int rt_policy(int policy) | 137 | static inline int rt_policy(int policy) |
138 | { | 138 | { |
139 | if (unlikely(policy == SCHED_FIFO) || unlikely(policy == SCHED_RR)) | 139 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) |
140 | return 1; | 140 | return 1; |
141 | return 0; | 141 | return 0; |
142 | } | 142 | } |
@@ -398,43 +398,6 @@ struct cfs_rq { | |||
398 | */ | 398 | */ |
399 | struct list_head leaf_cfs_rq_list; | 399 | struct list_head leaf_cfs_rq_list; |
400 | struct task_group *tg; /* group that "owns" this runqueue */ | 400 | struct task_group *tg; /* group that "owns" this runqueue */ |
401 | |||
402 | #ifdef CONFIG_SMP | ||
403 | unsigned long task_weight; | ||
404 | unsigned long shares; | ||
405 | /* | ||
406 | * We need space to build a sched_domain wide view of the full task | ||
407 | * group tree, in order to avoid depending on dynamic memory allocation | ||
408 | * during the load balancing we place this in the per cpu task group | ||
409 | * hierarchy. This limits the load balancing to one instance per cpu, | ||
410 | * but more should not be needed anyway. | ||
411 | */ | ||
412 | struct aggregate_struct { | ||
413 | /* | ||
414 | * load = weight(cpus) * f(tg) | ||
415 | * | ||
416 | * Where f(tg) is the recursive weight fraction assigned to | ||
417 | * this group. | ||
418 | */ | ||
419 | unsigned long load; | ||
420 | |||
421 | /* | ||
422 | * part of the group weight distributed to this span. | ||
423 | */ | ||
424 | unsigned long shares; | ||
425 | |||
426 | /* | ||
427 | * The sum of all runqueue weights within this span. | ||
428 | */ | ||
429 | unsigned long rq_weight; | ||
430 | |||
431 | /* | ||
432 | * Weight contributed by tasks; this is the part we can | ||
433 | * influence by moving tasks around. | ||
434 | */ | ||
435 | unsigned long task_weight; | ||
436 | } aggregate; | ||
437 | #endif | ||
438 | #endif | 401 | #endif |
439 | }; | 402 | }; |
440 | 403 | ||
@@ -1368,9 +1331,6 @@ static void __resched_task(struct task_struct *p, int tif_bit) | |||
1368 | */ | 1331 | */ |
1369 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | 1332 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) |
1370 | 1333 | ||
1371 | /* | ||
1372 | * delta *= weight / lw | ||
1373 | */ | ||
1374 | static unsigned long | 1334 | static unsigned long |
1375 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | 1335 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, |
1376 | struct load_weight *lw) | 1336 | struct load_weight *lw) |
@@ -1393,6 +1353,12 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
1393 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | 1353 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); |
1394 | } | 1354 | } |
1395 | 1355 | ||
1356 | static inline unsigned long | ||
1357 | calc_delta_fair(unsigned long delta_exec, struct load_weight *lw) | ||
1358 | { | ||
1359 | return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); | ||
1360 | } | ||
1361 | |||
1396 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) | 1362 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) |
1397 | { | 1363 | { |
1398 | lw->weight += inc; | 1364 | lw->weight += inc; |
@@ -1505,326 +1471,6 @@ static unsigned long source_load(int cpu, int type); | |||
1505 | static unsigned long target_load(int cpu, int type); | 1471 | static unsigned long target_load(int cpu, int type); |
1506 | static unsigned long cpu_avg_load_per_task(int cpu); | 1472 | static unsigned long cpu_avg_load_per_task(int cpu); |
1507 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1473 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
1508 | |||
1509 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1510 | |||
1511 | /* | ||
1512 | * Group load balancing. | ||
1513 | * | ||
1514 | * We calculate a few balance domain wide aggregate numbers; load and weight. | ||
1515 | * Given the pictures below, and assuming each item has equal weight: | ||
1516 | * | ||
1517 | * root 1 - thread | ||
1518 | * / | \ A - group | ||
1519 | * A 1 B | ||
1520 | * /|\ / \ | ||
1521 | * C 2 D 3 4 | ||
1522 | * | | | ||
1523 | * 5 6 | ||
1524 | * | ||
1525 | * load: | ||
1526 | * A and B get 1/3-rd of the total load. C and D get 1/3-rd of A's 1/3-rd, | ||
1527 | * which equals 1/9-th of the total load. | ||
1528 | * | ||
1529 | * shares: | ||
1530 | * The weight of this group on the selected cpus. | ||
1531 | * | ||
1532 | * rq_weight: | ||
1533 | * Direct sum of all the cpu's their rq weight, e.g. A would get 3 while | ||
1534 | * B would get 2. | ||
1535 | * | ||
1536 | * task_weight: | ||
1537 | * Part of the rq_weight contributed by tasks; all groups except B would | ||
1538 | * get 1, B gets 2. | ||
1539 | */ | ||
1540 | |||
1541 | static inline struct aggregate_struct * | ||
1542 | aggregate(struct task_group *tg, struct sched_domain *sd) | ||
1543 | { | ||
1544 | return &tg->cfs_rq[sd->first_cpu]->aggregate; | ||
1545 | } | ||
1546 | |||
1547 | typedef void (*aggregate_func)(struct task_group *, struct sched_domain *); | ||
1548 | |||
1549 | /* | ||
1550 | * Iterate the full tree, calling @down when first entering a node and @up when | ||
1551 | * leaving it for the final time. | ||
1552 | */ | ||
1553 | static | ||
1554 | void aggregate_walk_tree(aggregate_func down, aggregate_func up, | ||
1555 | struct sched_domain *sd) | ||
1556 | { | ||
1557 | struct task_group *parent, *child; | ||
1558 | |||
1559 | rcu_read_lock(); | ||
1560 | parent = &root_task_group; | ||
1561 | down: | ||
1562 | (*down)(parent, sd); | ||
1563 | list_for_each_entry_rcu(child, &parent->children, siblings) { | ||
1564 | parent = child; | ||
1565 | goto down; | ||
1566 | |||
1567 | up: | ||
1568 | continue; | ||
1569 | } | ||
1570 | (*up)(parent, sd); | ||
1571 | |||
1572 | child = parent; | ||
1573 | parent = parent->parent; | ||
1574 | if (parent) | ||
1575 | goto up; | ||
1576 | rcu_read_unlock(); | ||
1577 | } | ||
1578 | |||
1579 | /* | ||
1580 | * Calculate the aggregate runqueue weight. | ||
1581 | */ | ||
1582 | static | ||
1583 | void aggregate_group_weight(struct task_group *tg, struct sched_domain *sd) | ||
1584 | { | ||
1585 | unsigned long rq_weight = 0; | ||
1586 | unsigned long task_weight = 0; | ||
1587 | int i; | ||
1588 | |||
1589 | for_each_cpu_mask(i, sd->span) { | ||
1590 | rq_weight += tg->cfs_rq[i]->load.weight; | ||
1591 | task_weight += tg->cfs_rq[i]->task_weight; | ||
1592 | } | ||
1593 | |||
1594 | aggregate(tg, sd)->rq_weight = rq_weight; | ||
1595 | aggregate(tg, sd)->task_weight = task_weight; | ||
1596 | } | ||
1597 | |||
1598 | /* | ||
1599 | * Compute the weight of this group on the given cpus. | ||
1600 | */ | ||
1601 | static | ||
1602 | void aggregate_group_shares(struct task_group *tg, struct sched_domain *sd) | ||
1603 | { | ||
1604 | unsigned long shares = 0; | ||
1605 | int i; | ||
1606 | |||
1607 | for_each_cpu_mask(i, sd->span) | ||
1608 | shares += tg->cfs_rq[i]->shares; | ||
1609 | |||
1610 | if ((!shares && aggregate(tg, sd)->rq_weight) || shares > tg->shares) | ||
1611 | shares = tg->shares; | ||
1612 | |||
1613 | aggregate(tg, sd)->shares = shares; | ||
1614 | } | ||
1615 | |||
1616 | /* | ||
1617 | * Compute the load fraction assigned to this group, relies on the aggregate | ||
1618 | * weight and this group's parent's load, i.e. top-down. | ||
1619 | */ | ||
1620 | static | ||
1621 | void aggregate_group_load(struct task_group *tg, struct sched_domain *sd) | ||
1622 | { | ||
1623 | unsigned long load; | ||
1624 | |||
1625 | if (!tg->parent) { | ||
1626 | int i; | ||
1627 | |||
1628 | load = 0; | ||
1629 | for_each_cpu_mask(i, sd->span) | ||
1630 | load += cpu_rq(i)->load.weight; | ||
1631 | |||
1632 | } else { | ||
1633 | load = aggregate(tg->parent, sd)->load; | ||
1634 | |||
1635 | /* | ||
1636 | * shares is our weight in the parent's rq so | ||
1637 | * shares/parent->rq_weight gives our fraction of the load | ||
1638 | */ | ||
1639 | load *= aggregate(tg, sd)->shares; | ||
1640 | load /= aggregate(tg->parent, sd)->rq_weight + 1; | ||
1641 | } | ||
1642 | |||
1643 | aggregate(tg, sd)->load = load; | ||
1644 | } | ||
1645 | |||
1646 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); | ||
1647 | |||
1648 | /* | ||
1649 | * Calculate and set the cpu's group shares. | ||
1650 | */ | ||
1651 | static void | ||
1652 | __update_group_shares_cpu(struct task_group *tg, struct sched_domain *sd, | ||
1653 | int tcpu) | ||
1654 | { | ||
1655 | int boost = 0; | ||
1656 | unsigned long shares; | ||
1657 | unsigned long rq_weight; | ||
1658 | |||
1659 | if (!tg->se[tcpu]) | ||
1660 | return; | ||
1661 | |||
1662 | rq_weight = tg->cfs_rq[tcpu]->load.weight; | ||
1663 | |||
1664 | /* | ||
1665 | * If there are currently no tasks on the cpu pretend there is one of | ||
1666 | * average load so that when a new task gets to run here it will not | ||
1667 | * get delayed by group starvation. | ||
1668 | */ | ||
1669 | if (!rq_weight) { | ||
1670 | boost = 1; | ||
1671 | rq_weight = NICE_0_LOAD; | ||
1672 | } | ||
1673 | |||
1674 | /* | ||
1675 | * \Sum shares * rq_weight | ||
1676 | * shares = ----------------------- | ||
1677 | * \Sum rq_weight | ||
1678 | * | ||
1679 | */ | ||
1680 | shares = aggregate(tg, sd)->shares * rq_weight; | ||
1681 | shares /= aggregate(tg, sd)->rq_weight + 1; | ||
1682 | |||
1683 | /* | ||
1684 | * record the actual number of shares, not the boosted amount. | ||
1685 | */ | ||
1686 | tg->cfs_rq[tcpu]->shares = boost ? 0 : shares; | ||
1687 | |||
1688 | if (shares < MIN_SHARES) | ||
1689 | shares = MIN_SHARES; | ||
1690 | else if (shares > MAX_SHARES) | ||
1691 | shares = MAX_SHARES; | ||
1692 | |||
1693 | __set_se_shares(tg->se[tcpu], shares); | ||
1694 | } | ||
1695 | |||
1696 | /* | ||
1697 | * Re-adjust the weights on the cpu the task came from and on the cpu the | ||
1698 | * task went to. | ||
1699 | */ | ||
1700 | static void | ||
1701 | __move_group_shares(struct task_group *tg, struct sched_domain *sd, | ||
1702 | int scpu, int dcpu) | ||
1703 | { | ||
1704 | unsigned long shares; | ||
1705 | |||
1706 | shares = tg->cfs_rq[scpu]->shares + tg->cfs_rq[dcpu]->shares; | ||
1707 | |||
1708 | __update_group_shares_cpu(tg, sd, scpu); | ||
1709 | __update_group_shares_cpu(tg, sd, dcpu); | ||
1710 | |||
1711 | /* | ||
1712 | * ensure we never loose shares due to rounding errors in the | ||
1713 | * above redistribution. | ||
1714 | */ | ||
1715 | shares -= tg->cfs_rq[scpu]->shares + tg->cfs_rq[dcpu]->shares; | ||
1716 | if (shares) | ||
1717 | tg->cfs_rq[dcpu]->shares += shares; | ||
1718 | } | ||
1719 | |||
1720 | /* | ||
1721 | * Because changing a group's shares changes the weight of the super-group | ||
1722 | * we need to walk up the tree and change all shares until we hit the root. | ||
1723 | */ | ||
1724 | static void | ||
1725 | move_group_shares(struct task_group *tg, struct sched_domain *sd, | ||
1726 | int scpu, int dcpu) | ||
1727 | { | ||
1728 | while (tg) { | ||
1729 | __move_group_shares(tg, sd, scpu, dcpu); | ||
1730 | tg = tg->parent; | ||
1731 | } | ||
1732 | } | ||
1733 | |||
1734 | static | ||
1735 | void aggregate_group_set_shares(struct task_group *tg, struct sched_domain *sd) | ||
1736 | { | ||
1737 | unsigned long shares = aggregate(tg, sd)->shares; | ||
1738 | int i; | ||
1739 | |||
1740 | for_each_cpu_mask(i, sd->span) { | ||
1741 | struct rq *rq = cpu_rq(i); | ||
1742 | unsigned long flags; | ||
1743 | |||
1744 | spin_lock_irqsave(&rq->lock, flags); | ||
1745 | __update_group_shares_cpu(tg, sd, i); | ||
1746 | spin_unlock_irqrestore(&rq->lock, flags); | ||
1747 | } | ||
1748 | |||
1749 | aggregate_group_shares(tg, sd); | ||
1750 | |||
1751 | /* | ||
1752 | * ensure we never loose shares due to rounding errors in the | ||
1753 | * above redistribution. | ||
1754 | */ | ||
1755 | shares -= aggregate(tg, sd)->shares; | ||
1756 | if (shares) { | ||
1757 | tg->cfs_rq[sd->first_cpu]->shares += shares; | ||
1758 | aggregate(tg, sd)->shares += shares; | ||
1759 | } | ||
1760 | } | ||
1761 | |||
1762 | /* | ||
1763 | * Calculate the accumulative weight and recursive load of each task group | ||
1764 | * while walking down the tree. | ||
1765 | */ | ||
1766 | static | ||
1767 | void aggregate_get_down(struct task_group *tg, struct sched_domain *sd) | ||
1768 | { | ||
1769 | aggregate_group_weight(tg, sd); | ||
1770 | aggregate_group_shares(tg, sd); | ||
1771 | aggregate_group_load(tg, sd); | ||
1772 | } | ||
1773 | |||
1774 | /* | ||
1775 | * Rebalance the cpu shares while walking back up the tree. | ||
1776 | */ | ||
1777 | static | ||
1778 | void aggregate_get_up(struct task_group *tg, struct sched_domain *sd) | ||
1779 | { | ||
1780 | aggregate_group_set_shares(tg, sd); | ||
1781 | } | ||
1782 | |||
1783 | static DEFINE_PER_CPU(spinlock_t, aggregate_lock); | ||
1784 | |||
1785 | static void __init init_aggregate(void) | ||
1786 | { | ||
1787 | int i; | ||
1788 | |||
1789 | for_each_possible_cpu(i) | ||
1790 | spin_lock_init(&per_cpu(aggregate_lock, i)); | ||
1791 | } | ||
1792 | |||
1793 | static int get_aggregate(struct sched_domain *sd) | ||
1794 | { | ||
1795 | if (!spin_trylock(&per_cpu(aggregate_lock, sd->first_cpu))) | ||
1796 | return 0; | ||
1797 | |||
1798 | aggregate_walk_tree(aggregate_get_down, aggregate_get_up, sd); | ||
1799 | return 1; | ||
1800 | } | ||
1801 | |||
1802 | static void put_aggregate(struct sched_domain *sd) | ||
1803 | { | ||
1804 | spin_unlock(&per_cpu(aggregate_lock, sd->first_cpu)); | ||
1805 | } | ||
1806 | |||
1807 | static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | ||
1808 | { | ||
1809 | cfs_rq->shares = shares; | ||
1810 | } | ||
1811 | |||
1812 | #else | ||
1813 | |||
1814 | static inline void init_aggregate(void) | ||
1815 | { | ||
1816 | } | ||
1817 | |||
1818 | static inline int get_aggregate(struct sched_domain *sd) | ||
1819 | { | ||
1820 | return 0; | ||
1821 | } | ||
1822 | |||
1823 | static inline void put_aggregate(struct sched_domain *sd) | ||
1824 | { | ||
1825 | } | ||
1826 | #endif | ||
1827 | |||
1828 | #else /* CONFIG_SMP */ | 1474 | #else /* CONFIG_SMP */ |
1829 | 1475 | ||
1830 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1476 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -1845,14 +1491,26 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1845 | 1491 | ||
1846 | #define sched_class_highest (&rt_sched_class) | 1492 | #define sched_class_highest (&rt_sched_class) |
1847 | 1493 | ||
1848 | static void inc_nr_running(struct rq *rq) | 1494 | static inline void inc_load(struct rq *rq, const struct task_struct *p) |
1495 | { | ||
1496 | update_load_add(&rq->load, p->se.load.weight); | ||
1497 | } | ||
1498 | |||
1499 | static inline void dec_load(struct rq *rq, const struct task_struct *p) | ||
1500 | { | ||
1501 | update_load_sub(&rq->load, p->se.load.weight); | ||
1502 | } | ||
1503 | |||
1504 | static void inc_nr_running(struct task_struct *p, struct rq *rq) | ||
1849 | { | 1505 | { |
1850 | rq->nr_running++; | 1506 | rq->nr_running++; |
1507 | inc_load(rq, p); | ||
1851 | } | 1508 | } |
1852 | 1509 | ||
1853 | static void dec_nr_running(struct rq *rq) | 1510 | static void dec_nr_running(struct task_struct *p, struct rq *rq) |
1854 | { | 1511 | { |
1855 | rq->nr_running--; | 1512 | rq->nr_running--; |
1513 | dec_load(rq, p); | ||
1856 | } | 1514 | } |
1857 | 1515 | ||
1858 | static void set_load_weight(struct task_struct *p) | 1516 | static void set_load_weight(struct task_struct *p) |
@@ -1944,7 +1602,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) | |||
1944 | rq->nr_uninterruptible--; | 1602 | rq->nr_uninterruptible--; |
1945 | 1603 | ||
1946 | enqueue_task(rq, p, wakeup); | 1604 | enqueue_task(rq, p, wakeup); |
1947 | inc_nr_running(rq); | 1605 | inc_nr_running(p, rq); |
1948 | } | 1606 | } |
1949 | 1607 | ||
1950 | /* | 1608 | /* |
@@ -1956,7 +1614,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) | |||
1956 | rq->nr_uninterruptible++; | 1614 | rq->nr_uninterruptible++; |
1957 | 1615 | ||
1958 | dequeue_task(rq, p, sleep); | 1616 | dequeue_task(rq, p, sleep); |
1959 | dec_nr_running(rq); | 1617 | dec_nr_running(p, rq); |
1960 | } | 1618 | } |
1961 | 1619 | ||
1962 | /** | 1620 | /** |
@@ -2609,7 +2267,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2609 | * management (if any): | 2267 | * management (if any): |
2610 | */ | 2268 | */ |
2611 | p->sched_class->task_new(rq, p); | 2269 | p->sched_class->task_new(rq, p); |
2612 | inc_nr_running(rq); | 2270 | inc_nr_running(p, rq); |
2613 | } | 2271 | } |
2614 | check_preempt_curr(rq, p); | 2272 | check_preempt_curr(rq, p); |
2615 | #ifdef CONFIG_SMP | 2273 | #ifdef CONFIG_SMP |
@@ -3600,12 +3258,9 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
3600 | unsigned long imbalance; | 3258 | unsigned long imbalance; |
3601 | struct rq *busiest; | 3259 | struct rq *busiest; |
3602 | unsigned long flags; | 3260 | unsigned long flags; |
3603 | int unlock_aggregate; | ||
3604 | 3261 | ||
3605 | cpus_setall(*cpus); | 3262 | cpus_setall(*cpus); |
3606 | 3263 | ||
3607 | unlock_aggregate = get_aggregate(sd); | ||
3608 | |||
3609 | /* | 3264 | /* |
3610 | * When power savings policy is enabled for the parent domain, idle | 3265 | * When power savings policy is enabled for the parent domain, idle |
3611 | * sibling can pick up load irrespective of busy siblings. In this case, | 3266 | * sibling can pick up load irrespective of busy siblings. In this case, |
@@ -3721,9 +3376,8 @@ redo: | |||
3721 | 3376 | ||
3722 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3377 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
3723 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 3378 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
3724 | ld_moved = -1; | 3379 | return -1; |
3725 | 3380 | return ld_moved; | |
3726 | goto out; | ||
3727 | 3381 | ||
3728 | out_balanced: | 3382 | out_balanced: |
3729 | schedstat_inc(sd, lb_balanced[idle]); | 3383 | schedstat_inc(sd, lb_balanced[idle]); |
@@ -3738,13 +3392,8 @@ out_one_pinned: | |||
3738 | 3392 | ||
3739 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3393 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
3740 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 3394 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
3741 | ld_moved = -1; | 3395 | return -1; |
3742 | else | 3396 | return 0; |
3743 | ld_moved = 0; | ||
3744 | out: | ||
3745 | if (unlock_aggregate) | ||
3746 | put_aggregate(sd); | ||
3747 | return ld_moved; | ||
3748 | } | 3397 | } |
3749 | 3398 | ||
3750 | /* | 3399 | /* |
@@ -4430,7 +4079,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
4430 | * schedule() atomically, we ignore that path for now. | 4079 | * schedule() atomically, we ignore that path for now. |
4431 | * Otherwise, whine if we are scheduling when we should not be. | 4080 | * Otherwise, whine if we are scheduling when we should not be. |
4432 | */ | 4081 | */ |
4433 | if (unlikely(in_atomic_preempt_off()) && unlikely(!prev->exit_state)) | 4082 | if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) |
4434 | __schedule_bug(prev); | 4083 | __schedule_bug(prev); |
4435 | 4084 | ||
4436 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 4085 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
@@ -4931,8 +4580,10 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4931 | goto out_unlock; | 4580 | goto out_unlock; |
4932 | } | 4581 | } |
4933 | on_rq = p->se.on_rq; | 4582 | on_rq = p->se.on_rq; |
4934 | if (on_rq) | 4583 | if (on_rq) { |
4935 | dequeue_task(rq, p, 0); | 4584 | dequeue_task(rq, p, 0); |
4585 | dec_load(rq, p); | ||
4586 | } | ||
4936 | 4587 | ||
4937 | p->static_prio = NICE_TO_PRIO(nice); | 4588 | p->static_prio = NICE_TO_PRIO(nice); |
4938 | set_load_weight(p); | 4589 | set_load_weight(p); |
@@ -4942,6 +4593,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4942 | 4593 | ||
4943 | if (on_rq) { | 4594 | if (on_rq) { |
4944 | enqueue_task(rq, p, 0); | 4595 | enqueue_task(rq, p, 0); |
4596 | inc_load(rq, p); | ||
4945 | /* | 4597 | /* |
4946 | * If the task increased its priority or is running and | 4598 | * If the task increased its priority or is running and |
4947 | * lowered its priority, then reschedule its CPU: | 4599 | * lowered its priority, then reschedule its CPU: |
@@ -7316,7 +6968,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7316 | SD_INIT(sd, ALLNODES); | 6968 | SD_INIT(sd, ALLNODES); |
7317 | set_domain_attribute(sd, attr); | 6969 | set_domain_attribute(sd, attr); |
7318 | sd->span = *cpu_map; | 6970 | sd->span = *cpu_map; |
7319 | sd->first_cpu = first_cpu(sd->span); | ||
7320 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); | 6971 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); |
7321 | p = sd; | 6972 | p = sd; |
7322 | sd_allnodes = 1; | 6973 | sd_allnodes = 1; |
@@ -7327,7 +6978,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7327 | SD_INIT(sd, NODE); | 6978 | SD_INIT(sd, NODE); |
7328 | set_domain_attribute(sd, attr); | 6979 | set_domain_attribute(sd, attr); |
7329 | sched_domain_node_span(cpu_to_node(i), &sd->span); | 6980 | sched_domain_node_span(cpu_to_node(i), &sd->span); |
7330 | sd->first_cpu = first_cpu(sd->span); | ||
7331 | sd->parent = p; | 6981 | sd->parent = p; |
7332 | if (p) | 6982 | if (p) |
7333 | p->child = sd; | 6983 | p->child = sd; |
@@ -7339,7 +6989,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7339 | SD_INIT(sd, CPU); | 6989 | SD_INIT(sd, CPU); |
7340 | set_domain_attribute(sd, attr); | 6990 | set_domain_attribute(sd, attr); |
7341 | sd->span = *nodemask; | 6991 | sd->span = *nodemask; |
7342 | sd->first_cpu = first_cpu(sd->span); | ||
7343 | sd->parent = p; | 6992 | sd->parent = p; |
7344 | if (p) | 6993 | if (p) |
7345 | p->child = sd; | 6994 | p->child = sd; |
@@ -7351,7 +7000,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7351 | SD_INIT(sd, MC); | 7000 | SD_INIT(sd, MC); |
7352 | set_domain_attribute(sd, attr); | 7001 | set_domain_attribute(sd, attr); |
7353 | sd->span = cpu_coregroup_map(i); | 7002 | sd->span = cpu_coregroup_map(i); |
7354 | sd->first_cpu = first_cpu(sd->span); | ||
7355 | cpus_and(sd->span, sd->span, *cpu_map); | 7003 | cpus_and(sd->span, sd->span, *cpu_map); |
7356 | sd->parent = p; | 7004 | sd->parent = p; |
7357 | p->child = sd; | 7005 | p->child = sd; |
@@ -7364,7 +7012,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7364 | SD_INIT(sd, SIBLING); | 7012 | SD_INIT(sd, SIBLING); |
7365 | set_domain_attribute(sd, attr); | 7013 | set_domain_attribute(sd, attr); |
7366 | sd->span = per_cpu(cpu_sibling_map, i); | 7014 | sd->span = per_cpu(cpu_sibling_map, i); |
7367 | sd->first_cpu = first_cpu(sd->span); | ||
7368 | cpus_and(sd->span, sd->span, *cpu_map); | 7015 | cpus_and(sd->span, sd->span, *cpu_map); |
7369 | sd->parent = p; | 7016 | sd->parent = p; |
7370 | p->child = sd; | 7017 | p->child = sd; |
@@ -7568,8 +7215,8 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
7568 | 7215 | ||
7569 | static cpumask_t *doms_cur; /* current sched domains */ | 7216 | static cpumask_t *doms_cur; /* current sched domains */ |
7570 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | 7217 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ |
7571 | static struct sched_domain_attr *dattr_cur; /* attribues of custom domains | 7218 | static struct sched_domain_attr *dattr_cur; |
7572 | in 'doms_cur' */ | 7219 | /* attribues of custom domains in 'doms_cur' */ |
7573 | 7220 | ||
7574 | /* | 7221 | /* |
7575 | * Special case: If a kmalloc of a doms_cur partition (array of | 7222 | * Special case: If a kmalloc of a doms_cur partition (array of |
@@ -8034,7 +7681,6 @@ void __init sched_init(void) | |||
8034 | } | 7681 | } |
8035 | 7682 | ||
8036 | #ifdef CONFIG_SMP | 7683 | #ifdef CONFIG_SMP |
8037 | init_aggregate(); | ||
8038 | init_defrootdomain(); | 7684 | init_defrootdomain(); |
8039 | #endif | 7685 | #endif |
8040 | 7686 | ||
@@ -8599,11 +8245,14 @@ void sched_move_task(struct task_struct *tsk) | |||
8599 | #endif | 8245 | #endif |
8600 | 8246 | ||
8601 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8247 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8602 | static void __set_se_shares(struct sched_entity *se, unsigned long shares) | 8248 | static void set_se_shares(struct sched_entity *se, unsigned long shares) |
8603 | { | 8249 | { |
8604 | struct cfs_rq *cfs_rq = se->cfs_rq; | 8250 | struct cfs_rq *cfs_rq = se->cfs_rq; |
8251 | struct rq *rq = cfs_rq->rq; | ||
8605 | int on_rq; | 8252 | int on_rq; |
8606 | 8253 | ||
8254 | spin_lock_irq(&rq->lock); | ||
8255 | |||
8607 | on_rq = se->on_rq; | 8256 | on_rq = se->on_rq; |
8608 | if (on_rq) | 8257 | if (on_rq) |
8609 | dequeue_entity(cfs_rq, se, 0); | 8258 | dequeue_entity(cfs_rq, se, 0); |
@@ -8613,17 +8262,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares) | |||
8613 | 8262 | ||
8614 | if (on_rq) | 8263 | if (on_rq) |
8615 | enqueue_entity(cfs_rq, se, 0); | 8264 | enqueue_entity(cfs_rq, se, 0); |
8616 | } | ||
8617 | 8265 | ||
8618 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | 8266 | spin_unlock_irq(&rq->lock); |
8619 | { | ||
8620 | struct cfs_rq *cfs_rq = se->cfs_rq; | ||
8621 | struct rq *rq = cfs_rq->rq; | ||
8622 | unsigned long flags; | ||
8623 | |||
8624 | spin_lock_irqsave(&rq->lock, flags); | ||
8625 | __set_se_shares(se, shares); | ||
8626 | spin_unlock_irqrestore(&rq->lock, flags); | ||
8627 | } | 8267 | } |
8628 | 8268 | ||
8629 | static DEFINE_MUTEX(shares_mutex); | 8269 | static DEFINE_MUTEX(shares_mutex); |
@@ -8662,13 +8302,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
8662 | * w/o tripping rebalance_share or load_balance_fair. | 8302 | * w/o tripping rebalance_share or load_balance_fair. |
8663 | */ | 8303 | */ |
8664 | tg->shares = shares; | 8304 | tg->shares = shares; |
8665 | for_each_possible_cpu(i) { | 8305 | for_each_possible_cpu(i) |
8666 | /* | ||
8667 | * force a rebalance | ||
8668 | */ | ||
8669 | cfs_rq_set_shares(tg->cfs_rq[i], 0); | ||
8670 | set_se_shares(tg->se[i], shares); | 8306 | set_se_shares(tg->se[i], shares); |
8671 | } | ||
8672 | 8307 | ||
8673 | /* | 8308 | /* |
8674 | * Enable load balance activity on this group, by inserting it back on | 8309 | * Enable load balance activity on this group, by inserting it back on |
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 9c597e37f7de..ce05271219ab 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
@@ -59,22 +59,26 @@ static inline struct sched_clock_data *cpu_sdc(int cpu) | |||
59 | return &per_cpu(sched_clock_data, cpu); | 59 | return &per_cpu(sched_clock_data, cpu); |
60 | } | 60 | } |
61 | 61 | ||
62 | static __read_mostly int sched_clock_running; | ||
63 | |||
62 | void sched_clock_init(void) | 64 | void sched_clock_init(void) |
63 | { | 65 | { |
64 | u64 ktime_now = ktime_to_ns(ktime_get()); | 66 | u64 ktime_now = ktime_to_ns(ktime_get()); |
65 | u64 now = 0; | 67 | unsigned long now_jiffies = jiffies; |
66 | int cpu; | 68 | int cpu; |
67 | 69 | ||
68 | for_each_possible_cpu(cpu) { | 70 | for_each_possible_cpu(cpu) { |
69 | struct sched_clock_data *scd = cpu_sdc(cpu); | 71 | struct sched_clock_data *scd = cpu_sdc(cpu); |
70 | 72 | ||
71 | scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 73 | scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; |
72 | scd->prev_jiffies = jiffies; | 74 | scd->prev_jiffies = now_jiffies; |
73 | scd->prev_raw = now; | 75 | scd->prev_raw = 0; |
74 | scd->tick_raw = now; | 76 | scd->tick_raw = 0; |
75 | scd->tick_gtod = ktime_now; | 77 | scd->tick_gtod = ktime_now; |
76 | scd->clock = ktime_now; | 78 | scd->clock = ktime_now; |
77 | } | 79 | } |
80 | |||
81 | sched_clock_running = 1; | ||
78 | } | 82 | } |
79 | 83 | ||
80 | /* | 84 | /* |
@@ -136,6 +140,9 @@ u64 sched_clock_cpu(int cpu) | |||
136 | struct sched_clock_data *scd = cpu_sdc(cpu); | 140 | struct sched_clock_data *scd = cpu_sdc(cpu); |
137 | u64 now, clock; | 141 | u64 now, clock; |
138 | 142 | ||
143 | if (unlikely(!sched_clock_running)) | ||
144 | return 0ull; | ||
145 | |||
139 | WARN_ON_ONCE(!irqs_disabled()); | 146 | WARN_ON_ONCE(!irqs_disabled()); |
140 | now = sched_clock(); | 147 | now = sched_clock(); |
141 | 148 | ||
@@ -174,6 +181,9 @@ void sched_clock_tick(void) | |||
174 | struct sched_clock_data *scd = this_scd(); | 181 | struct sched_clock_data *scd = this_scd(); |
175 | u64 now, now_gtod; | 182 | u64 now, now_gtod; |
176 | 183 | ||
184 | if (unlikely(!sched_clock_running)) | ||
185 | return; | ||
186 | |||
177 | WARN_ON_ONCE(!irqs_disabled()); | 187 | WARN_ON_ONCE(!irqs_disabled()); |
178 | 188 | ||
179 | now = sched_clock(); | 189 | now = sched_clock(); |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 5f06118fbc31..8bb713040ac9 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -167,11 +167,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
167 | #endif | 167 | #endif |
168 | SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over", | 168 | SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over", |
169 | cfs_rq->nr_spread_over); | 169 | cfs_rq->nr_spread_over); |
170 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
171 | #ifdef CONFIG_SMP | ||
172 | SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); | ||
173 | #endif | ||
174 | #endif | ||
175 | } | 170 | } |
176 | 171 | ||
177 | static void print_cpu(struct seq_file *m, int cpu) | 172 | static void print_cpu(struct seq_file *m, int cpu) |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e24ecd39c4b8..08ae848b71d4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -334,34 +334,6 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
334 | #endif | 334 | #endif |
335 | 335 | ||
336 | /* | 336 | /* |
337 | * delta *= w / rw | ||
338 | */ | ||
339 | static inline unsigned long | ||
340 | calc_delta_weight(unsigned long delta, struct sched_entity *se) | ||
341 | { | ||
342 | for_each_sched_entity(se) { | ||
343 | delta = calc_delta_mine(delta, | ||
344 | se->load.weight, &cfs_rq_of(se)->load); | ||
345 | } | ||
346 | |||
347 | return delta; | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * delta *= rw / w | ||
352 | */ | ||
353 | static inline unsigned long | ||
354 | calc_delta_fair(unsigned long delta, struct sched_entity *se) | ||
355 | { | ||
356 | for_each_sched_entity(se) { | ||
357 | delta = calc_delta_mine(delta, | ||
358 | cfs_rq_of(se)->load.weight, &se->load); | ||
359 | } | ||
360 | |||
361 | return delta; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * The idea is to set a period in which each task runs once. | 337 | * The idea is to set a period in which each task runs once. |
366 | * | 338 | * |
367 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch | 339 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch |
@@ -390,54 +362,47 @@ static u64 __sched_period(unsigned long nr_running) | |||
390 | */ | 362 | */ |
391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | 363 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
392 | { | 364 | { |
393 | return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); | 365 | u64 slice = __sched_period(cfs_rq->nr_running); |
366 | |||
367 | for_each_sched_entity(se) { | ||
368 | cfs_rq = cfs_rq_of(se); | ||
369 | |||
370 | slice *= se->load.weight; | ||
371 | do_div(slice, cfs_rq->load.weight); | ||
372 | } | ||
373 | |||
374 | |||
375 | return slice; | ||
394 | } | 376 | } |
395 | 377 | ||
396 | /* | 378 | /* |
397 | * We calculate the vruntime slice of a to be inserted task | 379 | * We calculate the vruntime slice of a to be inserted task |
398 | * | 380 | * |
399 | * vs = s*rw/w = p | 381 | * vs = s/w = p/rw |
400 | */ | 382 | */ |
401 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) | 383 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) |
402 | { | 384 | { |
403 | unsigned long nr_running = cfs_rq->nr_running; | 385 | unsigned long nr_running = cfs_rq->nr_running; |
386 | unsigned long weight; | ||
387 | u64 vslice; | ||
404 | 388 | ||
405 | if (!se->on_rq) | 389 | if (!se->on_rq) |
406 | nr_running++; | 390 | nr_running++; |
407 | 391 | ||
408 | return __sched_period(nr_running); | 392 | vslice = __sched_period(nr_running); |
409 | } | ||
410 | |||
411 | /* | ||
412 | * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in | ||
413 | * that it favours >=0 over <0. | ||
414 | * | ||
415 | * -20 | | ||
416 | * | | ||
417 | * 0 --------+------- | ||
418 | * .' | ||
419 | * 19 .' | ||
420 | * | ||
421 | */ | ||
422 | static unsigned long | ||
423 | calc_delta_asym(unsigned long delta, struct sched_entity *se) | ||
424 | { | ||
425 | struct load_weight lw = { | ||
426 | .weight = NICE_0_LOAD, | ||
427 | .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT) | ||
428 | }; | ||
429 | 393 | ||
430 | for_each_sched_entity(se) { | 394 | for_each_sched_entity(se) { |
431 | struct load_weight *se_lw = &se->load; | 395 | cfs_rq = cfs_rq_of(se); |
432 | 396 | ||
433 | if (se->load.weight < NICE_0_LOAD) | 397 | weight = cfs_rq->load.weight; |
434 | se_lw = &lw; | 398 | if (!se->on_rq) |
399 | weight += se->load.weight; | ||
435 | 400 | ||
436 | delta = calc_delta_mine(delta, | 401 | vslice *= NICE_0_LOAD; |
437 | cfs_rq_of(se)->load.weight, se_lw); | 402 | do_div(vslice, weight); |
438 | } | 403 | } |
439 | 404 | ||
440 | return delta; | 405 | return vslice; |
441 | } | 406 | } |
442 | 407 | ||
443 | /* | 408 | /* |
@@ -454,7 +419,11 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
454 | 419 | ||
455 | curr->sum_exec_runtime += delta_exec; | 420 | curr->sum_exec_runtime += delta_exec; |
456 | schedstat_add(cfs_rq, exec_clock, delta_exec); | 421 | schedstat_add(cfs_rq, exec_clock, delta_exec); |
457 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); | 422 | delta_exec_weighted = delta_exec; |
423 | if (unlikely(curr->load.weight != NICE_0_LOAD)) { | ||
424 | delta_exec_weighted = calc_delta_fair(delta_exec_weighted, | ||
425 | &curr->load); | ||
426 | } | ||
458 | curr->vruntime += delta_exec_weighted; | 427 | curr->vruntime += delta_exec_weighted; |
459 | } | 428 | } |
460 | 429 | ||
@@ -541,27 +510,10 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
541 | * Scheduling class queueing methods: | 510 | * Scheduling class queueing methods: |
542 | */ | 511 | */ |
543 | 512 | ||
544 | #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | ||
545 | static void | ||
546 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
547 | { | ||
548 | cfs_rq->task_weight += weight; | ||
549 | } | ||
550 | #else | ||
551 | static inline void | ||
552 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
553 | { | ||
554 | } | ||
555 | #endif | ||
556 | |||
557 | static void | 513 | static void |
558 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 514 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
559 | { | 515 | { |
560 | update_load_add(&cfs_rq->load, se->load.weight); | 516 | update_load_add(&cfs_rq->load, se->load.weight); |
561 | if (!parent_entity(se)) | ||
562 | inc_cpu_load(rq_of(cfs_rq), se->load.weight); | ||
563 | if (entity_is_task(se)) | ||
564 | add_cfs_task_weight(cfs_rq, se->load.weight); | ||
565 | cfs_rq->nr_running++; | 517 | cfs_rq->nr_running++; |
566 | se->on_rq = 1; | 518 | se->on_rq = 1; |
567 | list_add(&se->group_node, &cfs_rq->tasks); | 519 | list_add(&se->group_node, &cfs_rq->tasks); |
@@ -571,10 +523,6 @@ static void | |||
571 | account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 523 | account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
572 | { | 524 | { |
573 | update_load_sub(&cfs_rq->load, se->load.weight); | 525 | update_load_sub(&cfs_rq->load, se->load.weight); |
574 | if (!parent_entity(se)) | ||
575 | dec_cpu_load(rq_of(cfs_rq), se->load.weight); | ||
576 | if (entity_is_task(se)) | ||
577 | add_cfs_task_weight(cfs_rq, -se->load.weight); | ||
578 | cfs_rq->nr_running--; | 526 | cfs_rq->nr_running--; |
579 | se->on_rq = 0; | 527 | se->on_rq = 0; |
580 | list_del_init(&se->group_node); | 528 | list_del_init(&se->group_node); |
@@ -661,17 +609,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
661 | 609 | ||
662 | if (!initial) { | 610 | if (!initial) { |
663 | /* sleeps upto a single latency don't count. */ | 611 | /* sleeps upto a single latency don't count. */ |
664 | if (sched_feat(NEW_FAIR_SLEEPERS)) { | 612 | if (sched_feat(NEW_FAIR_SLEEPERS)) |
665 | unsigned long thresh = sysctl_sched_latency; | 613 | vruntime -= sysctl_sched_latency; |
666 | |||
667 | /* | ||
668 | * convert the sleeper threshold into virtual time | ||
669 | */ | ||
670 | if (sched_feat(NORMALIZED_SLEEPER)) | ||
671 | thresh = calc_delta_fair(thresh, se); | ||
672 | |||
673 | vruntime -= thresh; | ||
674 | } | ||
675 | 614 | ||
676 | /* ensure we never gain time by being placed backwards. */ | 615 | /* ensure we never gain time by being placed backwards. */ |
677 | vruntime = max_vruntime(se->vruntime, vruntime); | 616 | vruntime = max_vruntime(se->vruntime, vruntime); |
@@ -1057,16 +996,27 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, | |||
1057 | struct task_struct *curr = this_rq->curr; | 996 | struct task_struct *curr = this_rq->curr; |
1058 | unsigned long tl = this_load; | 997 | unsigned long tl = this_load; |
1059 | unsigned long tl_per_task; | 998 | unsigned long tl_per_task; |
999 | int balanced; | ||
1060 | 1000 | ||
1061 | if (!(this_sd->flags & SD_WAKE_AFFINE)) | 1001 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) |
1062 | return 0; | 1002 | return 0; |
1063 | 1003 | ||
1064 | /* | 1004 | /* |
1005 | * If sync wakeup then subtract the (maximum possible) | ||
1006 | * effect of the currently running task from the load | ||
1007 | * of the current CPU: | ||
1008 | */ | ||
1009 | if (sync) | ||
1010 | tl -= current->se.load.weight; | ||
1011 | |||
1012 | balanced = 100*(tl + p->se.load.weight) <= imbalance*load; | ||
1013 | |||
1014 | /* | ||
1065 | * If the currently running task will sleep within | 1015 | * If the currently running task will sleep within |
1066 | * a reasonable amount of time then attract this newly | 1016 | * a reasonable amount of time then attract this newly |
1067 | * woken task: | 1017 | * woken task: |
1068 | */ | 1018 | */ |
1069 | if (sync && curr->sched_class == &fair_sched_class) { | 1019 | if (sync && balanced && curr->sched_class == &fair_sched_class) { |
1070 | if (curr->se.avg_overlap < sysctl_sched_migration_cost && | 1020 | if (curr->se.avg_overlap < sysctl_sched_migration_cost && |
1071 | p->se.avg_overlap < sysctl_sched_migration_cost) | 1021 | p->se.avg_overlap < sysctl_sched_migration_cost) |
1072 | return 1; | 1022 | return 1; |
@@ -1075,16 +1025,8 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, | |||
1075 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | 1025 | schedstat_inc(p, se.nr_wakeups_affine_attempts); |
1076 | tl_per_task = cpu_avg_load_per_task(this_cpu); | 1026 | tl_per_task = cpu_avg_load_per_task(this_cpu); |
1077 | 1027 | ||
1078 | /* | ||
1079 | * If sync wakeup then subtract the (maximum possible) | ||
1080 | * effect of the currently running task from the load | ||
1081 | * of the current CPU: | ||
1082 | */ | ||
1083 | if (sync) | ||
1084 | tl -= current->se.load.weight; | ||
1085 | |||
1086 | if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) || | 1028 | if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) || |
1087 | 100*(tl + p->se.load.weight) <= imbalance*load) { | 1029 | balanced) { |
1088 | /* | 1030 | /* |
1089 | * This domain has SD_WAKE_AFFINE and | 1031 | * This domain has SD_WAKE_AFFINE and |
1090 | * p is cache cold in this domain, and | 1032 | * p is cache cold in this domain, and |
@@ -1169,10 +1111,11 @@ static unsigned long wakeup_gran(struct sched_entity *se) | |||
1169 | unsigned long gran = sysctl_sched_wakeup_granularity; | 1111 | unsigned long gran = sysctl_sched_wakeup_granularity; |
1170 | 1112 | ||
1171 | /* | 1113 | /* |
1172 | * More easily preempt - nice tasks, while not making it harder for | 1114 | * More easily preempt - nice tasks, while not making |
1173 | * + nice tasks. | 1115 | * it harder for + nice tasks. |
1174 | */ | 1116 | */ |
1175 | gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se); | 1117 | if (unlikely(se->load.weight > NICE_0_LOAD)) |
1118 | gran = calc_delta_fair(gran, &se->load); | ||
1176 | 1119 | ||
1177 | return gran; | 1120 | return gran; |
1178 | } | 1121 | } |
@@ -1366,90 +1309,75 @@ static struct task_struct *load_balance_next_fair(void *arg) | |||
1366 | return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); | 1309 | return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); |
1367 | } | 1310 | } |
1368 | 1311 | ||
1369 | static unsigned long | 1312 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1370 | __load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1313 | static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) |
1371 | unsigned long max_load_move, struct sched_domain *sd, | ||
1372 | enum cpu_idle_type idle, int *all_pinned, int *this_best_prio, | ||
1373 | struct cfs_rq *cfs_rq) | ||
1374 | { | 1314 | { |
1375 | struct rq_iterator cfs_rq_iterator; | 1315 | struct sched_entity *curr; |
1316 | struct task_struct *p; | ||
1376 | 1317 | ||
1377 | cfs_rq_iterator.start = load_balance_start_fair; | 1318 | if (!cfs_rq->nr_running || !first_fair(cfs_rq)) |
1378 | cfs_rq_iterator.next = load_balance_next_fair; | 1319 | return MAX_PRIO; |
1379 | cfs_rq_iterator.arg = cfs_rq; | 1320 | |
1321 | curr = cfs_rq->curr; | ||
1322 | if (!curr) | ||
1323 | curr = __pick_next_entity(cfs_rq); | ||
1324 | |||
1325 | p = task_of(curr); | ||
1380 | 1326 | ||
1381 | return balance_tasks(this_rq, this_cpu, busiest, | 1327 | return p->prio; |
1382 | max_load_move, sd, idle, all_pinned, | ||
1383 | this_best_prio, &cfs_rq_iterator); | ||
1384 | } | 1328 | } |
1329 | #endif | ||
1385 | 1330 | ||
1386 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1387 | static unsigned long | 1331 | static unsigned long |
1388 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1332 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
1389 | unsigned long max_load_move, | 1333 | unsigned long max_load_move, |
1390 | struct sched_domain *sd, enum cpu_idle_type idle, | 1334 | struct sched_domain *sd, enum cpu_idle_type idle, |
1391 | int *all_pinned, int *this_best_prio) | 1335 | int *all_pinned, int *this_best_prio) |
1392 | { | 1336 | { |
1337 | struct cfs_rq *busy_cfs_rq; | ||
1393 | long rem_load_move = max_load_move; | 1338 | long rem_load_move = max_load_move; |
1394 | int busiest_cpu = cpu_of(busiest); | 1339 | struct rq_iterator cfs_rq_iterator; |
1395 | struct task_group *tg; | ||
1396 | |||
1397 | rcu_read_lock(); | ||
1398 | list_for_each_entry(tg, &task_groups, list) { | ||
1399 | long imbalance; | ||
1400 | unsigned long this_weight, busiest_weight; | ||
1401 | long rem_load, max_load, moved_load; | ||
1402 | |||
1403 | /* | ||
1404 | * empty group | ||
1405 | */ | ||
1406 | if (!aggregate(tg, sd)->task_weight) | ||
1407 | continue; | ||
1408 | |||
1409 | rem_load = rem_load_move * aggregate(tg, sd)->rq_weight; | ||
1410 | rem_load /= aggregate(tg, sd)->load + 1; | ||
1411 | |||
1412 | this_weight = tg->cfs_rq[this_cpu]->task_weight; | ||
1413 | busiest_weight = tg->cfs_rq[busiest_cpu]->task_weight; | ||
1414 | 1340 | ||
1415 | imbalance = (busiest_weight - this_weight) / 2; | 1341 | cfs_rq_iterator.start = load_balance_start_fair; |
1342 | cfs_rq_iterator.next = load_balance_next_fair; | ||
1416 | 1343 | ||
1417 | if (imbalance < 0) | 1344 | for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { |
1418 | imbalance = busiest_weight; | 1345 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1346 | struct cfs_rq *this_cfs_rq; | ||
1347 | long imbalance; | ||
1348 | unsigned long maxload; | ||
1419 | 1349 | ||
1420 | max_load = max(rem_load, imbalance); | 1350 | this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); |
1421 | moved_load = __load_balance_fair(this_rq, this_cpu, busiest, | ||
1422 | max_load, sd, idle, all_pinned, this_best_prio, | ||
1423 | tg->cfs_rq[busiest_cpu]); | ||
1424 | 1351 | ||
1425 | if (!moved_load) | 1352 | imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; |
1353 | /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ | ||
1354 | if (imbalance <= 0) | ||
1426 | continue; | 1355 | continue; |
1427 | 1356 | ||
1428 | move_group_shares(tg, sd, busiest_cpu, this_cpu); | 1357 | /* Don't pull more than imbalance/2 */ |
1358 | imbalance /= 2; | ||
1359 | maxload = min(rem_load_move, imbalance); | ||
1429 | 1360 | ||
1430 | moved_load *= aggregate(tg, sd)->load; | 1361 | *this_best_prio = cfs_rq_best_prio(this_cfs_rq); |
1431 | moved_load /= aggregate(tg, sd)->rq_weight + 1; | 1362 | #else |
1363 | # define maxload rem_load_move | ||
1364 | #endif | ||
1365 | /* | ||
1366 | * pass busy_cfs_rq argument into | ||
1367 | * load_balance_[start|next]_fair iterators | ||
1368 | */ | ||
1369 | cfs_rq_iterator.arg = busy_cfs_rq; | ||
1370 | rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, | ||
1371 | maxload, sd, idle, all_pinned, | ||
1372 | this_best_prio, | ||
1373 | &cfs_rq_iterator); | ||
1432 | 1374 | ||
1433 | rem_load_move -= moved_load; | 1375 | if (rem_load_move <= 0) |
1434 | if (rem_load_move < 0) | ||
1435 | break; | 1376 | break; |
1436 | } | 1377 | } |
1437 | rcu_read_unlock(); | ||
1438 | 1378 | ||
1439 | return max_load_move - rem_load_move; | 1379 | return max_load_move - rem_load_move; |
1440 | } | 1380 | } |
1441 | #else | ||
1442 | static unsigned long | ||
1443 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
1444 | unsigned long max_load_move, | ||
1445 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
1446 | int *all_pinned, int *this_best_prio) | ||
1447 | { | ||
1448 | return __load_balance_fair(this_rq, this_cpu, busiest, | ||
1449 | max_load_move, sd, idle, all_pinned, | ||
1450 | this_best_prio, &busiest->cfs); | ||
1451 | } | ||
1452 | #endif | ||
1453 | 1381 | ||
1454 | static int | 1382 | static int |
1455 | move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1383 | move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 060e87b0cb1c..3432d573205d 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -513,8 +513,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | |||
513 | */ | 513 | */ |
514 | for_each_sched_rt_entity(rt_se) | 514 | for_each_sched_rt_entity(rt_se) |
515 | enqueue_rt_entity(rt_se); | 515 | enqueue_rt_entity(rt_se); |
516 | |||
517 | inc_cpu_load(rq, p->se.load.weight); | ||
518 | } | 516 | } |
519 | 517 | ||
520 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | 518 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) |
@@ -534,8 +532,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | |||
534 | if (rt_rq && rt_rq->rt_nr_running) | 532 | if (rt_rq && rt_rq->rt_nr_running) |
535 | enqueue_rt_entity(rt_se); | 533 | enqueue_rt_entity(rt_se); |
536 | } | 534 | } |
537 | |||
538 | dec_cpu_load(rq, p->se.load.weight); | ||
539 | } | 535 | } |
540 | 536 | ||
541 | /* | 537 | /* |
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 5bae2e0c3ff2..a38878e0e49d 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -67,6 +67,7 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
67 | preempt_enable(); | 67 | preempt_enable(); |
68 | #endif | 68 | #endif |
69 | } | 69 | } |
70 | kfree(mask_str); | ||
70 | return 0; | 71 | return 0; |
71 | } | 72 | } |
72 | 73 | ||
diff --git a/kernel/signal.c b/kernel/signal.c index 72bb4f51f963..6c0958e52ea7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -231,6 +231,40 @@ void flush_signals(struct task_struct *t) | |||
231 | spin_unlock_irqrestore(&t->sighand->siglock, flags); | 231 | spin_unlock_irqrestore(&t->sighand->siglock, flags); |
232 | } | 232 | } |
233 | 233 | ||
234 | static void __flush_itimer_signals(struct sigpending *pending) | ||
235 | { | ||
236 | sigset_t signal, retain; | ||
237 | struct sigqueue *q, *n; | ||
238 | |||
239 | signal = pending->signal; | ||
240 | sigemptyset(&retain); | ||
241 | |||
242 | list_for_each_entry_safe(q, n, &pending->list, list) { | ||
243 | int sig = q->info.si_signo; | ||
244 | |||
245 | if (likely(q->info.si_code != SI_TIMER)) { | ||
246 | sigaddset(&retain, sig); | ||
247 | } else { | ||
248 | sigdelset(&signal, sig); | ||
249 | list_del_init(&q->list); | ||
250 | __sigqueue_free(q); | ||
251 | } | ||
252 | } | ||
253 | |||
254 | sigorsets(&pending->signal, &signal, &retain); | ||
255 | } | ||
256 | |||
257 | void flush_itimer_signals(void) | ||
258 | { | ||
259 | struct task_struct *tsk = current; | ||
260 | unsigned long flags; | ||
261 | |||
262 | spin_lock_irqsave(&tsk->sighand->siglock, flags); | ||
263 | __flush_itimer_signals(&tsk->pending); | ||
264 | __flush_itimer_signals(&tsk->signal->shared_pending); | ||
265 | spin_unlock_irqrestore(&tsk->sighand->siglock, flags); | ||
266 | } | ||
267 | |||
234 | void ignore_signals(struct task_struct *t) | 268 | void ignore_signals(struct task_struct *t) |
235 | { | 269 | { |
236 | int i; | 270 | int i; |
@@ -1240,17 +1274,22 @@ void sigqueue_free(struct sigqueue *q) | |||
1240 | 1274 | ||
1241 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); | 1275 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); |
1242 | /* | 1276 | /* |
1243 | * If the signal is still pending remove it from the | 1277 | * We must hold ->siglock while testing q->list |
1244 | * pending queue. We must hold ->siglock while testing | 1278 | * to serialize with collect_signal() or with |
1245 | * q->list to serialize with collect_signal(). | 1279 | * __exit_signal()->flush_sigqueue(). |
1246 | */ | 1280 | */ |
1247 | spin_lock_irqsave(lock, flags); | 1281 | spin_lock_irqsave(lock, flags); |
1282 | q->flags &= ~SIGQUEUE_PREALLOC; | ||
1283 | /* | ||
1284 | * If it is queued it will be freed when dequeued, | ||
1285 | * like the "regular" sigqueue. | ||
1286 | */ | ||
1248 | if (!list_empty(&q->list)) | 1287 | if (!list_empty(&q->list)) |
1249 | list_del_init(&q->list); | 1288 | q = NULL; |
1250 | spin_unlock_irqrestore(lock, flags); | 1289 | spin_unlock_irqrestore(lock, flags); |
1251 | 1290 | ||
1252 | q->flags &= ~SIGQUEUE_PREALLOC; | 1291 | if (q) |
1253 | __sigqueue_free(q); | 1292 | __sigqueue_free(q); |
1254 | } | 1293 | } |
1255 | 1294 | ||
1256 | int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | 1295 | int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 0101aeef7ed7..b7350bbfb076 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -62,8 +62,7 @@ static int stopmachine(void *cpu) | |||
62 | * help our sisters onto their CPUs. */ | 62 | * help our sisters onto their CPUs. */ |
63 | if (!prepared && !irqs_disabled) | 63 | if (!prepared && !irqs_disabled) |
64 | yield(); | 64 | yield(); |
65 | else | 65 | cpu_relax(); |
66 | cpu_relax(); | ||
67 | } | 66 | } |
68 | 67 | ||
69 | /* Ack: we are exiting. */ | 68 | /* Ack: we are exiting. */ |
@@ -106,8 +105,10 @@ static int stop_machine(void) | |||
106 | } | 105 | } |
107 | 106 | ||
108 | /* Wait for them all to come to life. */ | 107 | /* Wait for them all to come to life. */ |
109 | while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) | 108 | while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) { |
110 | yield(); | 109 | yield(); |
110 | cpu_relax(); | ||
111 | } | ||
111 | 112 | ||
112 | /* If some failed, kill them all. */ | 113 | /* If some failed, kill them all. */ |
113 | if (ret < 0) { | 114 | if (ret < 0) { |
diff --git a/kernel/sys.c b/kernel/sys.c index 895d2d4c9493..14e97282eb6c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1652,7 +1652,7 @@ asmlinkage long sys_umask(int mask) | |||
1652 | asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | 1652 | asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, |
1653 | unsigned long arg4, unsigned long arg5) | 1653 | unsigned long arg4, unsigned long arg5) |
1654 | { | 1654 | { |
1655 | long uninitialized_var(error); | 1655 | long error = 0; |
1656 | 1656 | ||
1657 | if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) | 1657 | if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) |
1658 | return error; | 1658 | return error; |
@@ -1701,9 +1701,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
1701 | error = PR_TIMING_STATISTICAL; | 1701 | error = PR_TIMING_STATISTICAL; |
1702 | break; | 1702 | break; |
1703 | case PR_SET_TIMING: | 1703 | case PR_SET_TIMING: |
1704 | if (arg2 == PR_TIMING_STATISTICAL) | 1704 | if (arg2 != PR_TIMING_STATISTICAL) |
1705 | error = 0; | ||
1706 | else | ||
1707 | error = -EINVAL; | 1705 | error = -EINVAL; |
1708 | break; | 1706 | break; |
1709 | 1707 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d7ffdc59816a..29116652dca8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -81,6 +81,7 @@ extern int compat_log; | |||
81 | extern int maps_protect; | 81 | extern int maps_protect; |
82 | extern int sysctl_stat_interval; | 82 | extern int sysctl_stat_interval; |
83 | extern int latencytop_enabled; | 83 | extern int latencytop_enabled; |
84 | extern int sysctl_nr_open_min, sysctl_nr_open_max; | ||
84 | 85 | ||
85 | /* Constants used for minimum and maximum */ | 86 | /* Constants used for minimum and maximum */ |
86 | #if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) | 87 | #if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) |
@@ -1190,7 +1191,9 @@ static struct ctl_table fs_table[] = { | |||
1190 | .data = &sysctl_nr_open, | 1191 | .data = &sysctl_nr_open, |
1191 | .maxlen = sizeof(int), | 1192 | .maxlen = sizeof(int), |
1192 | .mode = 0644, | 1193 | .mode = 0644, |
1193 | .proc_handler = &proc_dointvec, | 1194 | .proc_handler = &proc_dointvec_minmax, |
1195 | .extra1 = &sysctl_nr_open_min, | ||
1196 | .extra2 = &sysctl_nr_open_max, | ||
1194 | }, | 1197 | }, |
1195 | { | 1198 | { |
1196 | .ctl_name = FS_DENTRY, | 1199 | .ctl_name = FS_DENTRY, |