aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-06-11 04:51:35 -0400
committerIngo Molnar <mingo@kernel.org>2012-06-11 04:51:35 -0400
commitc3e228d59bd2054fd57f7f146ef0f6fb0e1996b7 (patch)
tree1817910408fa7b5027e463725a6eb54933c71928 /kernel
parent7eb9ba5ed312ec6ed9d22259c5da1acb7cf4bd29 (diff)
parentcfaf025112d3856637ff34a767ef785ef5cf2ca9 (diff)
Merge tag 'v3.5-rc2' into perf/core
Merge in Linux 3.5-rc2 - to pick up fixes. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c17
-rw-r--r--kernel/sched/core.c187
-rw-r--r--kernel/sched/fair.c29
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/sys.c60
-rw-r--r--kernel/time/timekeeping.c2
7 files changed, 214 insertions, 85 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0f3527d6184a..72fcd3069a90 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -896,10 +896,13 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
896 mutex_unlock(&cgroup_mutex); 896 mutex_unlock(&cgroup_mutex);
897 897
898 /* 898 /*
899 * Drop the active superblock reference that we took when we 899 * We want to drop the active superblock reference from the
900 * created the cgroup 900 * cgroup creation after all the dentry refs are gone -
901 * kill_sb gets mighty unhappy otherwise. Mark
902 * dentry->d_fsdata with cgroup_diput() to tell
903 * cgroup_d_release() to call deactivate_super().
901 */ 904 */
902 deactivate_super(cgrp->root->sb); 905 dentry->d_fsdata = cgroup_diput;
903 906
904 /* 907 /*
905 * if we're getting rid of the cgroup, refcount should ensure 908 * if we're getting rid of the cgroup, refcount should ensure
@@ -925,6 +928,13 @@ static int cgroup_delete(const struct dentry *d)
925 return 1; 928 return 1;
926} 929}
927 930
931static void cgroup_d_release(struct dentry *dentry)
932{
933 /* did cgroup_diput() tell me to deactivate super? */
934 if (dentry->d_fsdata == cgroup_diput)
935 deactivate_super(dentry->d_sb);
936}
937
928static void remove_dir(struct dentry *d) 938static void remove_dir(struct dentry *d)
929{ 939{
930 struct dentry *parent = dget(d->d_parent); 940 struct dentry *parent = dget(d->d_parent);
@@ -1532,6 +1542,7 @@ static int cgroup_get_rootdir(struct super_block *sb)
1532 static const struct dentry_operations cgroup_dops = { 1542 static const struct dentry_operations cgroup_dops = {
1533 .d_iput = cgroup_diput, 1543 .d_iput = cgroup_diput,
1534 .d_delete = cgroup_delete, 1544 .d_delete = cgroup_delete,
1545 .d_release = cgroup_d_release,
1535 }; 1546 };
1536 1547
1537 struct inode *inode = 1548 struct inode *inode =
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c46958e26121..d5594a4268d4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5556,15 +5556,20 @@ static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
5556 5556
5557#ifdef CONFIG_SCHED_DEBUG 5557#ifdef CONFIG_SCHED_DEBUG
5558 5558
5559static __read_mostly int sched_domain_debug_enabled; 5559static __read_mostly int sched_debug_enabled;
5560 5560
5561static int __init sched_domain_debug_setup(char *str) 5561static int __init sched_debug_setup(char *str)
5562{ 5562{
5563 sched_domain_debug_enabled = 1; 5563 sched_debug_enabled = 1;
5564 5564
5565 return 0; 5565 return 0;
5566} 5566}
5567early_param("sched_debug", sched_domain_debug_setup); 5567early_param("sched_debug", sched_debug_setup);
5568
5569static inline bool sched_debug(void)
5570{
5571 return sched_debug_enabled;
5572}
5568 5573
5569static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, 5574static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
5570 struct cpumask *groupmask) 5575 struct cpumask *groupmask)
@@ -5604,7 +5609,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
5604 break; 5609 break;
5605 } 5610 }
5606 5611
5607 if (!group->sgp->power) { 5612 /*
5613 * Even though we initialize ->power to something semi-sane,
5614 * we leave power_orig unset. This allows us to detect if
5615 * domain iteration is still funny without causing /0 traps.
5616 */
5617 if (!group->sgp->power_orig) {
5608 printk(KERN_CONT "\n"); 5618 printk(KERN_CONT "\n");
5609 printk(KERN_ERR "ERROR: domain->cpu_power not " 5619 printk(KERN_ERR "ERROR: domain->cpu_power not "
5610 "set\n"); 5620 "set\n");
@@ -5652,7 +5662,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
5652{ 5662{
5653 int level = 0; 5663 int level = 0;
5654 5664
5655 if (!sched_domain_debug_enabled) 5665 if (!sched_debug_enabled)
5656 return; 5666 return;
5657 5667
5658 if (!sd) { 5668 if (!sd) {
@@ -5673,6 +5683,10 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
5673} 5683}
5674#else /* !CONFIG_SCHED_DEBUG */ 5684#else /* !CONFIG_SCHED_DEBUG */
5675# define sched_domain_debug(sd, cpu) do { } while (0) 5685# define sched_domain_debug(sd, cpu) do { } while (0)
5686static inline bool sched_debug(void)
5687{
5688 return false;
5689}
5676#endif /* CONFIG_SCHED_DEBUG */ 5690#endif /* CONFIG_SCHED_DEBUG */
5677 5691
5678static int sd_degenerate(struct sched_domain *sd) 5692static int sd_degenerate(struct sched_domain *sd)
@@ -5994,6 +6008,44 @@ struct sched_domain_topology_level {
5994 struct sd_data data; 6008 struct sd_data data;
5995}; 6009};
5996 6010
6011/*
6012 * Build an iteration mask that can exclude certain CPUs from the upwards
6013 * domain traversal.
6014 *
6015 * Asymmetric node setups can result in situations where the domain tree is of
6016 * unequal depth, make sure to skip domains that already cover the entire
6017 * range.
6018 *
6019 * In that case build_sched_domains() will have terminated the iteration early
6020 * and our sibling sd spans will be empty. Domains should always include the
6021 * cpu they're built on, so check that.
6022 *
6023 */
6024static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
6025{
6026 const struct cpumask *span = sched_domain_span(sd);
6027 struct sd_data *sdd = sd->private;
6028 struct sched_domain *sibling;
6029 int i;
6030
6031 for_each_cpu(i, span) {
6032 sibling = *per_cpu_ptr(sdd->sd, i);
6033 if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
6034 continue;
6035
6036 cpumask_set_cpu(i, sched_group_mask(sg));
6037 }
6038}
6039
6040/*
6041 * Return the canonical balance cpu for this group, this is the first cpu
6042 * of this group that's also in the iteration mask.
6043 */
6044int group_balance_cpu(struct sched_group *sg)
6045{
6046 return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
6047}
6048
5997static int 6049static int
5998build_overlap_sched_groups(struct sched_domain *sd, int cpu) 6050build_overlap_sched_groups(struct sched_domain *sd, int cpu)
5999{ 6051{
@@ -6012,6 +6064,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
6012 if (cpumask_test_cpu(i, covered)) 6064 if (cpumask_test_cpu(i, covered))
6013 continue; 6065 continue;
6014 6066
6067 child = *per_cpu_ptr(sdd->sd, i);
6068
6069 /* See the comment near build_group_mask(). */
6070 if (!cpumask_test_cpu(i, sched_domain_span(child)))
6071 continue;
6072
6015 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), 6073 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
6016 GFP_KERNEL, cpu_to_node(cpu)); 6074 GFP_KERNEL, cpu_to_node(cpu));
6017 6075
@@ -6019,8 +6077,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
6019 goto fail; 6077 goto fail;
6020 6078
6021 sg_span = sched_group_cpus(sg); 6079 sg_span = sched_group_cpus(sg);
6022
6023 child = *per_cpu_ptr(sdd->sd, i);
6024 if (child->child) { 6080 if (child->child) {
6025 child = child->child; 6081 child = child->child;
6026 cpumask_copy(sg_span, sched_domain_span(child)); 6082 cpumask_copy(sg_span, sched_domain_span(child));
@@ -6030,13 +6086,24 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
6030 cpumask_or(covered, covered, sg_span); 6086 cpumask_or(covered, covered, sg_span);
6031 6087
6032 sg->sgp = *per_cpu_ptr(sdd->sgp, i); 6088 sg->sgp = *per_cpu_ptr(sdd->sgp, i);
6033 atomic_inc(&sg->sgp->ref); 6089 if (atomic_inc_return(&sg->sgp->ref) == 1)
6090 build_group_mask(sd, sg);
6034 6091
6092 /*
6093 * Initialize sgp->power such that even if we mess up the
6094 * domains and no possible iteration will get us here, we won't
6095 * die on a /0 trap.
6096 */
6097 sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span);
6098
6099 /*
6100 * Make sure the first group of this domain contains the
6101 * canonical balance cpu. Otherwise the sched_domain iteration
6102 * breaks. See update_sg_lb_stats().
6103 */
6035 if ((!groups && cpumask_test_cpu(cpu, sg_span)) || 6104 if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
6036 cpumask_first(sg_span) == cpu) { 6105 group_balance_cpu(sg) == cpu)
6037 WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span));
6038 groups = sg; 6106 groups = sg;
6039 }
6040 6107
6041 if (!first) 6108 if (!first)
6042 first = sg; 6109 first = sg;
@@ -6109,6 +6176,7 @@ build_sched_groups(struct sched_domain *sd, int cpu)
6109 6176
6110 cpumask_clear(sched_group_cpus(sg)); 6177 cpumask_clear(sched_group_cpus(sg));
6111 sg->sgp->power = 0; 6178 sg->sgp->power = 0;
6179 cpumask_setall(sched_group_mask(sg));
6112 6180
6113 for_each_cpu(j, span) { 6181 for_each_cpu(j, span) {
6114 if (get_group(j, sdd, NULL) != group) 6182 if (get_group(j, sdd, NULL) != group)
@@ -6150,7 +6218,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6150 sg = sg->next; 6218 sg = sg->next;
6151 } while (sg != sd->groups); 6219 } while (sg != sd->groups);
6152 6220
6153 if (cpu != group_first_cpu(sg)) 6221 if (cpu != group_balance_cpu(sg))
6154 return; 6222 return;
6155 6223
6156 update_group_power(sd, cpu); 6224 update_group_power(sd, cpu);
@@ -6200,11 +6268,8 @@ int sched_domain_level_max;
6200 6268
6201static int __init setup_relax_domain_level(char *str) 6269static int __init setup_relax_domain_level(char *str)
6202{ 6270{
6203 unsigned long val; 6271 if (kstrtoint(str, 0, &default_relax_domain_level))
6204 6272 pr_warn("Unable to set relax_domain_level\n");
6205 val = simple_strtoul(str, NULL, 0);
6206 if (val < sched_domain_level_max)
6207 default_relax_domain_level = val;
6208 6273
6209 return 1; 6274 return 1;
6210} 6275}
@@ -6314,14 +6379,13 @@ static struct sched_domain_topology_level *sched_domain_topology = default_topol
6314#ifdef CONFIG_NUMA 6379#ifdef CONFIG_NUMA
6315 6380
6316static int sched_domains_numa_levels; 6381static int sched_domains_numa_levels;
6317static int sched_domains_numa_scale;
6318static int *sched_domains_numa_distance; 6382static int *sched_domains_numa_distance;
6319static struct cpumask ***sched_domains_numa_masks; 6383static struct cpumask ***sched_domains_numa_masks;
6320static int sched_domains_curr_level; 6384static int sched_domains_curr_level;
6321 6385
6322static inline int sd_local_flags(int level) 6386static inline int sd_local_flags(int level)
6323{ 6387{
6324 if (sched_domains_numa_distance[level] > REMOTE_DISTANCE) 6388 if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE)
6325 return 0; 6389 return 0;
6326 6390
6327 return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; 6391 return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE;
@@ -6379,6 +6443,42 @@ static const struct cpumask *sd_numa_mask(int cpu)
6379 return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; 6443 return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
6380} 6444}
6381 6445
6446static void sched_numa_warn(const char *str)
6447{
6448 static int done = false;
6449 int i,j;
6450
6451 if (done)
6452 return;
6453
6454 done = true;
6455
6456 printk(KERN_WARNING "ERROR: %s\n\n", str);
6457
6458 for (i = 0; i < nr_node_ids; i++) {
6459 printk(KERN_WARNING " ");
6460 for (j = 0; j < nr_node_ids; j++)
6461 printk(KERN_CONT "%02d ", node_distance(i,j));
6462 printk(KERN_CONT "\n");
6463 }
6464 printk(KERN_WARNING "\n");
6465}
6466
6467static bool find_numa_distance(int distance)
6468{
6469 int i;
6470
6471 if (distance == node_distance(0, 0))
6472 return true;
6473
6474 for (i = 0; i < sched_domains_numa_levels; i++) {
6475 if (sched_domains_numa_distance[i] == distance)
6476 return true;
6477 }
6478
6479 return false;
6480}
6481
6382static void sched_init_numa(void) 6482static void sched_init_numa(void)
6383{ 6483{
6384 int next_distance, curr_distance = node_distance(0, 0); 6484 int next_distance, curr_distance = node_distance(0, 0);
@@ -6386,7 +6486,6 @@ static void sched_init_numa(void)
6386 int level = 0; 6486 int level = 0;
6387 int i, j, k; 6487 int i, j, k;
6388 6488
6389 sched_domains_numa_scale = curr_distance;
6390 sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL); 6489 sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL);
6391 if (!sched_domains_numa_distance) 6490 if (!sched_domains_numa_distance)
6392 return; 6491 return;
@@ -6397,23 +6496,41 @@ static void sched_init_numa(void)
6397 * 6496 *
6398 * Assumes node_distance(0,j) includes all distances in 6497 * Assumes node_distance(0,j) includes all distances in
6399 * node_distance(i,j) in order to avoid cubic time. 6498 * node_distance(i,j) in order to avoid cubic time.
6400 *
6401 * XXX: could be optimized to O(n log n) by using sort()
6402 */ 6499 */
6403 next_distance = curr_distance; 6500 next_distance = curr_distance;
6404 for (i = 0; i < nr_node_ids; i++) { 6501 for (i = 0; i < nr_node_ids; i++) {
6405 for (j = 0; j < nr_node_ids; j++) { 6502 for (j = 0; j < nr_node_ids; j++) {
6406 int distance = node_distance(0, j); 6503 for (k = 0; k < nr_node_ids; k++) {
6407 if (distance > curr_distance && 6504 int distance = node_distance(i, k);
6408 (distance < next_distance || 6505
6409 next_distance == curr_distance)) 6506 if (distance > curr_distance &&
6410 next_distance = distance; 6507 (distance < next_distance ||
6508 next_distance == curr_distance))
6509 next_distance = distance;
6510
6511 /*
6512 * While not a strong assumption it would be nice to know
6513 * about cases where if node A is connected to B, B is not
6514 * equally connected to A.
6515 */
6516 if (sched_debug() && node_distance(k, i) != distance)
6517 sched_numa_warn("Node-distance not symmetric");
6518
6519 if (sched_debug() && i && !find_numa_distance(distance))
6520 sched_numa_warn("Node-0 not representative");
6521 }
6522 if (next_distance != curr_distance) {
6523 sched_domains_numa_distance[level++] = next_distance;
6524 sched_domains_numa_levels = level;
6525 curr_distance = next_distance;
6526 } else break;
6411 } 6527 }
6412 if (next_distance != curr_distance) { 6528
6413 sched_domains_numa_distance[level++] = next_distance; 6529 /*
6414 sched_domains_numa_levels = level; 6530 * In case of sched_debug() we verify the above assumption.
6415 curr_distance = next_distance; 6531 */
6416 } else break; 6532 if (!sched_debug())
6533 break;
6417 } 6534 }
6418 /* 6535 /*
6419 * 'level' contains the number of unique distances, excluding the 6536 * 'level' contains the number of unique distances, excluding the
@@ -6525,7 +6642,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
6525 6642
6526 *per_cpu_ptr(sdd->sg, j) = sg; 6643 *per_cpu_ptr(sdd->sg, j) = sg;
6527 6644
6528 sgp = kzalloc_node(sizeof(struct sched_group_power), 6645 sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(),
6529 GFP_KERNEL, cpu_to_node(j)); 6646 GFP_KERNEL, cpu_to_node(j));
6530 if (!sgp) 6647 if (!sgp)
6531 return -ENOMEM; 6648 return -ENOMEM;
@@ -6578,7 +6695,6 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
6578 if (!sd) 6695 if (!sd)
6579 return child; 6696 return child;
6580 6697
6581 set_domain_attribute(sd, attr);
6582 cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); 6698 cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
6583 if (child) { 6699 if (child) {
6584 sd->level = child->level + 1; 6700 sd->level = child->level + 1;
@@ -6586,6 +6702,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
6586 child->parent = sd; 6702 child->parent = sd;
6587 } 6703 }
6588 sd->child = child; 6704 sd->child = child;
6705 set_domain_attribute(sd, attr);
6589 6706
6590 return sd; 6707 return sd;
6591} 6708}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b2a2d236f27b..c099cc6eebe3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3602,7 +3602,7 @@ void update_group_power(struct sched_domain *sd, int cpu)
3602 } while (group != child->groups); 3602 } while (group != child->groups);
3603 } 3603 }
3604 3604
3605 sdg->sgp->power = power; 3605 sdg->sgp->power_orig = sdg->sgp->power = power;
3606} 3606}
3607 3607
3608/* 3608/*
@@ -3632,7 +3632,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
3632 3632
3633/** 3633/**
3634 * update_sg_lb_stats - Update sched_group's statistics for load balancing. 3634 * update_sg_lb_stats - Update sched_group's statistics for load balancing.
3635 * @sd: The sched_domain whose statistics are to be updated. 3635 * @env: The load balancing environment.
3636 * @group: sched_group whose statistics are to be updated. 3636 * @group: sched_group whose statistics are to be updated.
3637 * @load_idx: Load index of sched_domain of this_cpu for load calc. 3637 * @load_idx: Load index of sched_domain of this_cpu for load calc.
3638 * @local_group: Does group contain this_cpu. 3638 * @local_group: Does group contain this_cpu.
@@ -3652,7 +3652,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3652 int i; 3652 int i;
3653 3653
3654 if (local_group) 3654 if (local_group)
3655 balance_cpu = group_first_cpu(group); 3655 balance_cpu = group_balance_cpu(group);
3656 3656
3657 /* Tally up the load of all CPUs in the group */ 3657 /* Tally up the load of all CPUs in the group */
3658 max_cpu_load = 0; 3658 max_cpu_load = 0;
@@ -3667,7 +3667,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3667 3667
3668 /* Bias balancing toward cpus of our domain */ 3668 /* Bias balancing toward cpus of our domain */
3669 if (local_group) { 3669 if (local_group) {
3670 if (idle_cpu(i) && !first_idle_cpu) { 3670 if (idle_cpu(i) && !first_idle_cpu &&
3671 cpumask_test_cpu(i, sched_group_mask(group))) {
3671 first_idle_cpu = 1; 3672 first_idle_cpu = 1;
3672 balance_cpu = i; 3673 balance_cpu = i;
3673 } 3674 }
@@ -3741,11 +3742,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3741 3742
3742/** 3743/**
3743 * update_sd_pick_busiest - return 1 on busiest group 3744 * update_sd_pick_busiest - return 1 on busiest group
3744 * @sd: sched_domain whose statistics are to be checked 3745 * @env: The load balancing environment.
3745 * @sds: sched_domain statistics 3746 * @sds: sched_domain statistics
3746 * @sg: sched_group candidate to be checked for being the busiest 3747 * @sg: sched_group candidate to be checked for being the busiest
3747 * @sgs: sched_group statistics 3748 * @sgs: sched_group statistics
3748 * @this_cpu: the current cpu
3749 * 3749 *
3750 * Determine if @sg is a busier group than the previously selected 3750 * Determine if @sg is a busier group than the previously selected
3751 * busiest group. 3751 * busiest group.
@@ -3783,9 +3783,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
3783 3783
3784/** 3784/**
3785 * update_sd_lb_stats - Update sched_domain's statistics for load balancing. 3785 * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
3786 * @sd: sched_domain whose statistics are to be updated. 3786 * @env: The load balancing environment.
3787 * @this_cpu: Cpu for which load balance is currently performed.
3788 * @idle: Idle status of this_cpu
3789 * @cpus: Set of cpus considered for load balancing. 3787 * @cpus: Set of cpus considered for load balancing.
3790 * @balance: Should we balance. 3788 * @balance: Should we balance.
3791 * @sds: variable to hold the statistics for this sched_domain. 3789 * @sds: variable to hold the statistics for this sched_domain.
@@ -3874,10 +3872,8 @@ static inline void update_sd_lb_stats(struct lb_env *env,
3874 * Returns 1 when packing is required and a task should be moved to 3872 * Returns 1 when packing is required and a task should be moved to
3875 * this CPU. The amount of the imbalance is returned in *imbalance. 3873 * this CPU. The amount of the imbalance is returned in *imbalance.
3876 * 3874 *
3877 * @sd: The sched_domain whose packing is to be checked. 3875 * @env: The load balancing environment.
3878 * @sds: Statistics of the sched_domain which is to be packed 3876 * @sds: Statistics of the sched_domain which is to be packed
3879 * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
3880 * @imbalance: returns amount of imbalanced due to packing.
3881 */ 3877 */
3882static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) 3878static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
3883{ 3879{
@@ -3903,9 +3899,8 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
3903 * fix_small_imbalance - Calculate the minor imbalance that exists 3899 * fix_small_imbalance - Calculate the minor imbalance that exists
3904 * amongst the groups of a sched_domain, during 3900 * amongst the groups of a sched_domain, during
3905 * load balancing. 3901 * load balancing.
3902 * @env: The load balancing environment.
3906 * @sds: Statistics of the sched_domain whose imbalance is to be calculated. 3903 * @sds: Statistics of the sched_domain whose imbalance is to be calculated.
3907 * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
3908 * @imbalance: Variable to store the imbalance.
3909 */ 3904 */
3910static inline 3905static inline
3911void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) 3906void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
@@ -4048,11 +4043,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
4048 * Also calculates the amount of weighted load which should be moved 4043 * Also calculates the amount of weighted load which should be moved
4049 * to restore balance. 4044 * to restore balance.
4050 * 4045 *
4051 * @sd: The sched_domain whose busiest group is to be returned. 4046 * @env: The load balancing environment.
4052 * @this_cpu: The cpu for which load balancing is currently being performed.
4053 * @imbalance: Variable which stores amount of weighted load which should
4054 * be moved to restore balance/put a group to idle.
4055 * @idle: The idle status of this_cpu.
4056 * @cpus: The set of CPUs under consideration for load-balancing. 4047 * @cpus: The set of CPUs under consideration for load-balancing.
4057 * @balance: Pointer to a variable indicating if this_cpu 4048 * @balance: Pointer to a variable indicating if this_cpu
4058 * is the appropriate cpu to perform load balancing at this_level. 4049 * is the appropriate cpu to perform load balancing at this_level.
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 2a4e8dffbd6b..573e1ca01102 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1562,7 +1562,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1562 task_running(rq, task) || 1562 task_running(rq, task) ||
1563 !task->on_rq)) { 1563 !task->on_rq)) {
1564 1564
1565 raw_spin_unlock(&lowest_rq->lock); 1565 double_unlock_balance(rq, lowest_rq);
1566 lowest_rq = NULL; 1566 lowest_rq = NULL;
1567 break; 1567 break;
1568 } 1568 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ba9dccfd24ce..6d52cea7f33d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -526,6 +526,8 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
526DECLARE_PER_CPU(struct sched_domain *, sd_llc); 526DECLARE_PER_CPU(struct sched_domain *, sd_llc);
527DECLARE_PER_CPU(int, sd_llc_id); 527DECLARE_PER_CPU(int, sd_llc_id);
528 528
529extern int group_balance_cpu(struct sched_group *sg);
530
529#endif /* CONFIG_SMP */ 531#endif /* CONFIG_SMP */
530 532
531#include "stats.h" 533#include "stats.h"
diff --git a/kernel/sys.c b/kernel/sys.c
index 9ff89cb9657a..f0ec44dcd415 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1786,27 +1786,13 @@ SYSCALL_DEFINE1(umask, int, mask)
1786} 1786}
1787 1787
1788#ifdef CONFIG_CHECKPOINT_RESTORE 1788#ifdef CONFIG_CHECKPOINT_RESTORE
1789static bool vma_flags_mismatch(struct vm_area_struct *vma,
1790 unsigned long required,
1791 unsigned long banned)
1792{
1793 return (vma->vm_flags & required) != required ||
1794 (vma->vm_flags & banned);
1795}
1796
1797static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1789static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1798{ 1790{
1791 struct vm_area_struct *vma;
1799 struct file *exe_file; 1792 struct file *exe_file;
1800 struct dentry *dentry; 1793 struct dentry *dentry;
1801 int err; 1794 int err;
1802 1795
1803 /*
1804 * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's
1805 * remain. So perform a quick test first.
1806 */
1807 if (mm->num_exe_file_vmas)
1808 return -EBUSY;
1809
1810 exe_file = fget(fd); 1796 exe_file = fget(fd);
1811 if (!exe_file) 1797 if (!exe_file)
1812 return -EBADF; 1798 return -EBADF;
@@ -1827,17 +1813,30 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1827 if (err) 1813 if (err)
1828 goto exit; 1814 goto exit;
1829 1815
1816 down_write(&mm->mmap_sem);
1817
1818 /*
1819 * Forbid mm->exe_file change if there are mapped other files.
1820 */
1821 err = -EBUSY;
1822 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1823 if (vma->vm_file && !path_equal(&vma->vm_file->f_path,
1824 &exe_file->f_path))
1825 goto exit_unlock;
1826 }
1827
1830 /* 1828 /*
1831 * The symlink can be changed only once, just to disallow arbitrary 1829 * The symlink can be changed only once, just to disallow arbitrary
1832 * transitions malicious software might bring in. This means one 1830 * transitions malicious software might bring in. This means one
1833 * could make a snapshot over all processes running and monitor 1831 * could make a snapshot over all processes running and monitor
1834 * /proc/pid/exe changes to notice unusual activity if needed. 1832 * /proc/pid/exe changes to notice unusual activity if needed.
1835 */ 1833 */
1836 down_write(&mm->mmap_sem); 1834 err = -EPERM;
1837 if (likely(!mm->exe_file)) 1835 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
1838 set_mm_exe_file(mm, exe_file); 1836 goto exit_unlock;
1839 else 1837
1840 err = -EBUSY; 1838 set_mm_exe_file(mm, exe_file);
1839exit_unlock:
1841 up_write(&mm->mmap_sem); 1840 up_write(&mm->mmap_sem);
1842 1841
1843exit: 1842exit:
@@ -1862,7 +1861,7 @@ static int prctl_set_mm(int opt, unsigned long addr,
1862 if (opt == PR_SET_MM_EXE_FILE) 1861 if (opt == PR_SET_MM_EXE_FILE)
1863 return prctl_set_mm_exe_file(mm, (unsigned int)addr); 1862 return prctl_set_mm_exe_file(mm, (unsigned int)addr);
1864 1863
1865 if (addr >= TASK_SIZE) 1864 if (addr >= TASK_SIZE || addr < mmap_min_addr)
1866 return -EINVAL; 1865 return -EINVAL;
1867 1866
1868 error = -EINVAL; 1867 error = -EINVAL;
@@ -1924,12 +1923,6 @@ static int prctl_set_mm(int opt, unsigned long addr,
1924 error = -EFAULT; 1923 error = -EFAULT;
1925 goto out; 1924 goto out;
1926 } 1925 }
1927#ifdef CONFIG_STACK_GROWSUP
1928 if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0))
1929#else
1930 if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0))
1931#endif
1932 goto out;
1933 if (opt == PR_SET_MM_START_STACK) 1926 if (opt == PR_SET_MM_START_STACK)
1934 mm->start_stack = addr; 1927 mm->start_stack = addr;
1935 else if (opt == PR_SET_MM_ARG_START) 1928 else if (opt == PR_SET_MM_ARG_START)
@@ -1981,12 +1974,22 @@ out:
1981 up_read(&mm->mmap_sem); 1974 up_read(&mm->mmap_sem);
1982 return error; 1975 return error;
1983} 1976}
1977
1978static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
1979{
1980 return put_user(me->clear_child_tid, tid_addr);
1981}
1982
1984#else /* CONFIG_CHECKPOINT_RESTORE */ 1983#else /* CONFIG_CHECKPOINT_RESTORE */
1985static int prctl_set_mm(int opt, unsigned long addr, 1984static int prctl_set_mm(int opt, unsigned long addr,
1986 unsigned long arg4, unsigned long arg5) 1985 unsigned long arg4, unsigned long arg5)
1987{ 1986{
1988 return -EINVAL; 1987 return -EINVAL;
1989} 1988}
1989static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
1990{
1991 return -EINVAL;
1992}
1990#endif 1993#endif
1991 1994
1992SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 1995SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
@@ -2124,6 +2127,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2124 else 2127 else
2125 return -EINVAL; 2128 return -EINVAL;
2126 break; 2129 break;
2130 case PR_GET_TID_ADDRESS:
2131 error = prctl_get_tid_address(me, (int __user **)arg2);
2132 break;
2127 default: 2133 default:
2128 return -EINVAL; 2134 return -EINVAL;
2129 } 2135 }
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 6e46cacf5969..6f46a00a1e8a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -962,6 +962,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
962 timekeeper.xtime.tv_sec++; 962 timekeeper.xtime.tv_sec++;
963 leap = second_overflow(timekeeper.xtime.tv_sec); 963 leap = second_overflow(timekeeper.xtime.tv_sec);
964 timekeeper.xtime.tv_sec += leap; 964 timekeeper.xtime.tv_sec += leap;
965 timekeeper.wall_to_monotonic.tv_sec -= leap;
965 } 966 }
966 967
967 /* Accumulate raw time */ 968 /* Accumulate raw time */
@@ -1077,6 +1078,7 @@ static void update_wall_time(void)
1077 timekeeper.xtime.tv_sec++; 1078 timekeeper.xtime.tv_sec++;
1078 leap = second_overflow(timekeeper.xtime.tv_sec); 1079 leap = second_overflow(timekeeper.xtime.tv_sec);
1079 timekeeper.xtime.tv_sec += leap; 1080 timekeeper.xtime.tv_sec += leap;
1081 timekeeper.wall_to_monotonic.tv_sec -= leap;
1080 } 1082 }
1081 1083
1082 timekeeping_update(false); 1084 timekeeping_update(false);