diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-06-11 04:51:35 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-06-11 04:51:35 -0400 |
commit | c3e228d59bd2054fd57f7f146ef0f6fb0e1996b7 (patch) | |
tree | 1817910408fa7b5027e463725a6eb54933c71928 /kernel | |
parent | 7eb9ba5ed312ec6ed9d22259c5da1acb7cf4bd29 (diff) | |
parent | cfaf025112d3856637ff34a767ef785ef5cf2ca9 (diff) |
Merge tag 'v3.5-rc2' into perf/core
Merge in Linux 3.5-rc2 - to pick up fixes.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 17 | ||||
-rw-r--r-- | kernel/sched/core.c | 187 | ||||
-rw-r--r-- | kernel/sched/fair.c | 29 | ||||
-rw-r--r-- | kernel/sched/rt.c | 2 | ||||
-rw-r--r-- | kernel/sched/sched.h | 2 | ||||
-rw-r--r-- | kernel/sys.c | 60 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 2 |
7 files changed, 214 insertions, 85 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0f3527d6184a..72fcd3069a90 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -896,10 +896,13 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
896 | mutex_unlock(&cgroup_mutex); | 896 | mutex_unlock(&cgroup_mutex); |
897 | 897 | ||
898 | /* | 898 | /* |
899 | * Drop the active superblock reference that we took when we | 899 | * We want to drop the active superblock reference from the |
900 | * created the cgroup | 900 | * cgroup creation after all the dentry refs are gone - |
901 | * kill_sb gets mighty unhappy otherwise. Mark | ||
902 | * dentry->d_fsdata with cgroup_diput() to tell | ||
903 | * cgroup_d_release() to call deactivate_super(). | ||
901 | */ | 904 | */ |
902 | deactivate_super(cgrp->root->sb); | 905 | dentry->d_fsdata = cgroup_diput; |
903 | 906 | ||
904 | /* | 907 | /* |
905 | * if we're getting rid of the cgroup, refcount should ensure | 908 | * if we're getting rid of the cgroup, refcount should ensure |
@@ -925,6 +928,13 @@ static int cgroup_delete(const struct dentry *d) | |||
925 | return 1; | 928 | return 1; |
926 | } | 929 | } |
927 | 930 | ||
931 | static void cgroup_d_release(struct dentry *dentry) | ||
932 | { | ||
933 | /* did cgroup_diput() tell me to deactivate super? */ | ||
934 | if (dentry->d_fsdata == cgroup_diput) | ||
935 | deactivate_super(dentry->d_sb); | ||
936 | } | ||
937 | |||
928 | static void remove_dir(struct dentry *d) | 938 | static void remove_dir(struct dentry *d) |
929 | { | 939 | { |
930 | struct dentry *parent = dget(d->d_parent); | 940 | struct dentry *parent = dget(d->d_parent); |
@@ -1532,6 +1542,7 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
1532 | static const struct dentry_operations cgroup_dops = { | 1542 | static const struct dentry_operations cgroup_dops = { |
1533 | .d_iput = cgroup_diput, | 1543 | .d_iput = cgroup_diput, |
1534 | .d_delete = cgroup_delete, | 1544 | .d_delete = cgroup_delete, |
1545 | .d_release = cgroup_d_release, | ||
1535 | }; | 1546 | }; |
1536 | 1547 | ||
1537 | struct inode *inode = | 1548 | struct inode *inode = |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c46958e26121..d5594a4268d4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -5556,15 +5556,20 @@ static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */ | |||
5556 | 5556 | ||
5557 | #ifdef CONFIG_SCHED_DEBUG | 5557 | #ifdef CONFIG_SCHED_DEBUG |
5558 | 5558 | ||
5559 | static __read_mostly int sched_domain_debug_enabled; | 5559 | static __read_mostly int sched_debug_enabled; |
5560 | 5560 | ||
5561 | static int __init sched_domain_debug_setup(char *str) | 5561 | static int __init sched_debug_setup(char *str) |
5562 | { | 5562 | { |
5563 | sched_domain_debug_enabled = 1; | 5563 | sched_debug_enabled = 1; |
5564 | 5564 | ||
5565 | return 0; | 5565 | return 0; |
5566 | } | 5566 | } |
5567 | early_param("sched_debug", sched_domain_debug_setup); | 5567 | early_param("sched_debug", sched_debug_setup); |
5568 | |||
5569 | static inline bool sched_debug(void) | ||
5570 | { | ||
5571 | return sched_debug_enabled; | ||
5572 | } | ||
5568 | 5573 | ||
5569 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 5574 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
5570 | struct cpumask *groupmask) | 5575 | struct cpumask *groupmask) |
@@ -5604,7 +5609,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
5604 | break; | 5609 | break; |
5605 | } | 5610 | } |
5606 | 5611 | ||
5607 | if (!group->sgp->power) { | 5612 | /* |
5613 | * Even though we initialize ->power to something semi-sane, | ||
5614 | * we leave power_orig unset. This allows us to detect if | ||
5615 | * domain iteration is still funny without causing /0 traps. | ||
5616 | */ | ||
5617 | if (!group->sgp->power_orig) { | ||
5608 | printk(KERN_CONT "\n"); | 5618 | printk(KERN_CONT "\n"); |
5609 | printk(KERN_ERR "ERROR: domain->cpu_power not " | 5619 | printk(KERN_ERR "ERROR: domain->cpu_power not " |
5610 | "set\n"); | 5620 | "set\n"); |
@@ -5652,7 +5662,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
5652 | { | 5662 | { |
5653 | int level = 0; | 5663 | int level = 0; |
5654 | 5664 | ||
5655 | if (!sched_domain_debug_enabled) | 5665 | if (!sched_debug_enabled) |
5656 | return; | 5666 | return; |
5657 | 5667 | ||
5658 | if (!sd) { | 5668 | if (!sd) { |
@@ -5673,6 +5683,10 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
5673 | } | 5683 | } |
5674 | #else /* !CONFIG_SCHED_DEBUG */ | 5684 | #else /* !CONFIG_SCHED_DEBUG */ |
5675 | # define sched_domain_debug(sd, cpu) do { } while (0) | 5685 | # define sched_domain_debug(sd, cpu) do { } while (0) |
5686 | static inline bool sched_debug(void) | ||
5687 | { | ||
5688 | return false; | ||
5689 | } | ||
5676 | #endif /* CONFIG_SCHED_DEBUG */ | 5690 | #endif /* CONFIG_SCHED_DEBUG */ |
5677 | 5691 | ||
5678 | static int sd_degenerate(struct sched_domain *sd) | 5692 | static int sd_degenerate(struct sched_domain *sd) |
@@ -5994,6 +6008,44 @@ struct sched_domain_topology_level { | |||
5994 | struct sd_data data; | 6008 | struct sd_data data; |
5995 | }; | 6009 | }; |
5996 | 6010 | ||
6011 | /* | ||
6012 | * Build an iteration mask that can exclude certain CPUs from the upwards | ||
6013 | * domain traversal. | ||
6014 | * | ||
6015 | * Asymmetric node setups can result in situations where the domain tree is of | ||
6016 | * unequal depth, make sure to skip domains that already cover the entire | ||
6017 | * range. | ||
6018 | * | ||
6019 | * In that case build_sched_domains() will have terminated the iteration early | ||
6020 | * and our sibling sd spans will be empty. Domains should always include the | ||
6021 | * cpu they're built on, so check that. | ||
6022 | * | ||
6023 | */ | ||
6024 | static void build_group_mask(struct sched_domain *sd, struct sched_group *sg) | ||
6025 | { | ||
6026 | const struct cpumask *span = sched_domain_span(sd); | ||
6027 | struct sd_data *sdd = sd->private; | ||
6028 | struct sched_domain *sibling; | ||
6029 | int i; | ||
6030 | |||
6031 | for_each_cpu(i, span) { | ||
6032 | sibling = *per_cpu_ptr(sdd->sd, i); | ||
6033 | if (!cpumask_test_cpu(i, sched_domain_span(sibling))) | ||
6034 | continue; | ||
6035 | |||
6036 | cpumask_set_cpu(i, sched_group_mask(sg)); | ||
6037 | } | ||
6038 | } | ||
6039 | |||
6040 | /* | ||
6041 | * Return the canonical balance cpu for this group, this is the first cpu | ||
6042 | * of this group that's also in the iteration mask. | ||
6043 | */ | ||
6044 | int group_balance_cpu(struct sched_group *sg) | ||
6045 | { | ||
6046 | return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg)); | ||
6047 | } | ||
6048 | |||
5997 | static int | 6049 | static int |
5998 | build_overlap_sched_groups(struct sched_domain *sd, int cpu) | 6050 | build_overlap_sched_groups(struct sched_domain *sd, int cpu) |
5999 | { | 6051 | { |
@@ -6012,6 +6064,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
6012 | if (cpumask_test_cpu(i, covered)) | 6064 | if (cpumask_test_cpu(i, covered)) |
6013 | continue; | 6065 | continue; |
6014 | 6066 | ||
6067 | child = *per_cpu_ptr(sdd->sd, i); | ||
6068 | |||
6069 | /* See the comment near build_group_mask(). */ | ||
6070 | if (!cpumask_test_cpu(i, sched_domain_span(child))) | ||
6071 | continue; | ||
6072 | |||
6015 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), | 6073 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), |
6016 | GFP_KERNEL, cpu_to_node(cpu)); | 6074 | GFP_KERNEL, cpu_to_node(cpu)); |
6017 | 6075 | ||
@@ -6019,8 +6077,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
6019 | goto fail; | 6077 | goto fail; |
6020 | 6078 | ||
6021 | sg_span = sched_group_cpus(sg); | 6079 | sg_span = sched_group_cpus(sg); |
6022 | |||
6023 | child = *per_cpu_ptr(sdd->sd, i); | ||
6024 | if (child->child) { | 6080 | if (child->child) { |
6025 | child = child->child; | 6081 | child = child->child; |
6026 | cpumask_copy(sg_span, sched_domain_span(child)); | 6082 | cpumask_copy(sg_span, sched_domain_span(child)); |
@@ -6030,13 +6086,24 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
6030 | cpumask_or(covered, covered, sg_span); | 6086 | cpumask_or(covered, covered, sg_span); |
6031 | 6087 | ||
6032 | sg->sgp = *per_cpu_ptr(sdd->sgp, i); | 6088 | sg->sgp = *per_cpu_ptr(sdd->sgp, i); |
6033 | atomic_inc(&sg->sgp->ref); | 6089 | if (atomic_inc_return(&sg->sgp->ref) == 1) |
6090 | build_group_mask(sd, sg); | ||
6034 | 6091 | ||
6092 | /* | ||
6093 | * Initialize sgp->power such that even if we mess up the | ||
6094 | * domains and no possible iteration will get us here, we won't | ||
6095 | * die on a /0 trap. | ||
6096 | */ | ||
6097 | sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span); | ||
6098 | |||
6099 | /* | ||
6100 | * Make sure the first group of this domain contains the | ||
6101 | * canonical balance cpu. Otherwise the sched_domain iteration | ||
6102 | * breaks. See update_sg_lb_stats(). | ||
6103 | */ | ||
6035 | if ((!groups && cpumask_test_cpu(cpu, sg_span)) || | 6104 | if ((!groups && cpumask_test_cpu(cpu, sg_span)) || |
6036 | cpumask_first(sg_span) == cpu) { | 6105 | group_balance_cpu(sg) == cpu) |
6037 | WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span)); | ||
6038 | groups = sg; | 6106 | groups = sg; |
6039 | } | ||
6040 | 6107 | ||
6041 | if (!first) | 6108 | if (!first) |
6042 | first = sg; | 6109 | first = sg; |
@@ -6109,6 +6176,7 @@ build_sched_groups(struct sched_domain *sd, int cpu) | |||
6109 | 6176 | ||
6110 | cpumask_clear(sched_group_cpus(sg)); | 6177 | cpumask_clear(sched_group_cpus(sg)); |
6111 | sg->sgp->power = 0; | 6178 | sg->sgp->power = 0; |
6179 | cpumask_setall(sched_group_mask(sg)); | ||
6112 | 6180 | ||
6113 | for_each_cpu(j, span) { | 6181 | for_each_cpu(j, span) { |
6114 | if (get_group(j, sdd, NULL) != group) | 6182 | if (get_group(j, sdd, NULL) != group) |
@@ -6150,7 +6218,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
6150 | sg = sg->next; | 6218 | sg = sg->next; |
6151 | } while (sg != sd->groups); | 6219 | } while (sg != sd->groups); |
6152 | 6220 | ||
6153 | if (cpu != group_first_cpu(sg)) | 6221 | if (cpu != group_balance_cpu(sg)) |
6154 | return; | 6222 | return; |
6155 | 6223 | ||
6156 | update_group_power(sd, cpu); | 6224 | update_group_power(sd, cpu); |
@@ -6200,11 +6268,8 @@ int sched_domain_level_max; | |||
6200 | 6268 | ||
6201 | static int __init setup_relax_domain_level(char *str) | 6269 | static int __init setup_relax_domain_level(char *str) |
6202 | { | 6270 | { |
6203 | unsigned long val; | 6271 | if (kstrtoint(str, 0, &default_relax_domain_level)) |
6204 | 6272 | pr_warn("Unable to set relax_domain_level\n"); | |
6205 | val = simple_strtoul(str, NULL, 0); | ||
6206 | if (val < sched_domain_level_max) | ||
6207 | default_relax_domain_level = val; | ||
6208 | 6273 | ||
6209 | return 1; | 6274 | return 1; |
6210 | } | 6275 | } |
@@ -6314,14 +6379,13 @@ static struct sched_domain_topology_level *sched_domain_topology = default_topol | |||
6314 | #ifdef CONFIG_NUMA | 6379 | #ifdef CONFIG_NUMA |
6315 | 6380 | ||
6316 | static int sched_domains_numa_levels; | 6381 | static int sched_domains_numa_levels; |
6317 | static int sched_domains_numa_scale; | ||
6318 | static int *sched_domains_numa_distance; | 6382 | static int *sched_domains_numa_distance; |
6319 | static struct cpumask ***sched_domains_numa_masks; | 6383 | static struct cpumask ***sched_domains_numa_masks; |
6320 | static int sched_domains_curr_level; | 6384 | static int sched_domains_curr_level; |
6321 | 6385 | ||
6322 | static inline int sd_local_flags(int level) | 6386 | static inline int sd_local_flags(int level) |
6323 | { | 6387 | { |
6324 | if (sched_domains_numa_distance[level] > REMOTE_DISTANCE) | 6388 | if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE) |
6325 | return 0; | 6389 | return 0; |
6326 | 6390 | ||
6327 | return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; | 6391 | return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; |
@@ -6379,6 +6443,42 @@ static const struct cpumask *sd_numa_mask(int cpu) | |||
6379 | return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; | 6443 | return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; |
6380 | } | 6444 | } |
6381 | 6445 | ||
6446 | static void sched_numa_warn(const char *str) | ||
6447 | { | ||
6448 | static int done = false; | ||
6449 | int i,j; | ||
6450 | |||
6451 | if (done) | ||
6452 | return; | ||
6453 | |||
6454 | done = true; | ||
6455 | |||
6456 | printk(KERN_WARNING "ERROR: %s\n\n", str); | ||
6457 | |||
6458 | for (i = 0; i < nr_node_ids; i++) { | ||
6459 | printk(KERN_WARNING " "); | ||
6460 | for (j = 0; j < nr_node_ids; j++) | ||
6461 | printk(KERN_CONT "%02d ", node_distance(i,j)); | ||
6462 | printk(KERN_CONT "\n"); | ||
6463 | } | ||
6464 | printk(KERN_WARNING "\n"); | ||
6465 | } | ||
6466 | |||
6467 | static bool find_numa_distance(int distance) | ||
6468 | { | ||
6469 | int i; | ||
6470 | |||
6471 | if (distance == node_distance(0, 0)) | ||
6472 | return true; | ||
6473 | |||
6474 | for (i = 0; i < sched_domains_numa_levels; i++) { | ||
6475 | if (sched_domains_numa_distance[i] == distance) | ||
6476 | return true; | ||
6477 | } | ||
6478 | |||
6479 | return false; | ||
6480 | } | ||
6481 | |||
6382 | static void sched_init_numa(void) | 6482 | static void sched_init_numa(void) |
6383 | { | 6483 | { |
6384 | int next_distance, curr_distance = node_distance(0, 0); | 6484 | int next_distance, curr_distance = node_distance(0, 0); |
@@ -6386,7 +6486,6 @@ static void sched_init_numa(void) | |||
6386 | int level = 0; | 6486 | int level = 0; |
6387 | int i, j, k; | 6487 | int i, j, k; |
6388 | 6488 | ||
6389 | sched_domains_numa_scale = curr_distance; | ||
6390 | sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL); | 6489 | sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL); |
6391 | if (!sched_domains_numa_distance) | 6490 | if (!sched_domains_numa_distance) |
6392 | return; | 6491 | return; |
@@ -6397,23 +6496,41 @@ static void sched_init_numa(void) | |||
6397 | * | 6496 | * |
6398 | * Assumes node_distance(0,j) includes all distances in | 6497 | * Assumes node_distance(0,j) includes all distances in |
6399 | * node_distance(i,j) in order to avoid cubic time. | 6498 | * node_distance(i,j) in order to avoid cubic time. |
6400 | * | ||
6401 | * XXX: could be optimized to O(n log n) by using sort() | ||
6402 | */ | 6499 | */ |
6403 | next_distance = curr_distance; | 6500 | next_distance = curr_distance; |
6404 | for (i = 0; i < nr_node_ids; i++) { | 6501 | for (i = 0; i < nr_node_ids; i++) { |
6405 | for (j = 0; j < nr_node_ids; j++) { | 6502 | for (j = 0; j < nr_node_ids; j++) { |
6406 | int distance = node_distance(0, j); | 6503 | for (k = 0; k < nr_node_ids; k++) { |
6407 | if (distance > curr_distance && | 6504 | int distance = node_distance(i, k); |
6408 | (distance < next_distance || | 6505 | |
6409 | next_distance == curr_distance)) | 6506 | if (distance > curr_distance && |
6410 | next_distance = distance; | 6507 | (distance < next_distance || |
6508 | next_distance == curr_distance)) | ||
6509 | next_distance = distance; | ||
6510 | |||
6511 | /* | ||
6512 | * While not a strong assumption it would be nice to know | ||
6513 | * about cases where if node A is connected to B, B is not | ||
6514 | * equally connected to A. | ||
6515 | */ | ||
6516 | if (sched_debug() && node_distance(k, i) != distance) | ||
6517 | sched_numa_warn("Node-distance not symmetric"); | ||
6518 | |||
6519 | if (sched_debug() && i && !find_numa_distance(distance)) | ||
6520 | sched_numa_warn("Node-0 not representative"); | ||
6521 | } | ||
6522 | if (next_distance != curr_distance) { | ||
6523 | sched_domains_numa_distance[level++] = next_distance; | ||
6524 | sched_domains_numa_levels = level; | ||
6525 | curr_distance = next_distance; | ||
6526 | } else break; | ||
6411 | } | 6527 | } |
6412 | if (next_distance != curr_distance) { | 6528 | |
6413 | sched_domains_numa_distance[level++] = next_distance; | 6529 | /* |
6414 | sched_domains_numa_levels = level; | 6530 | * In case of sched_debug() we verify the above assumption. |
6415 | curr_distance = next_distance; | 6531 | */ |
6416 | } else break; | 6532 | if (!sched_debug()) |
6533 | break; | ||
6417 | } | 6534 | } |
6418 | /* | 6535 | /* |
6419 | * 'level' contains the number of unique distances, excluding the | 6536 | * 'level' contains the number of unique distances, excluding the |
@@ -6525,7 +6642,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map) | |||
6525 | 6642 | ||
6526 | *per_cpu_ptr(sdd->sg, j) = sg; | 6643 | *per_cpu_ptr(sdd->sg, j) = sg; |
6527 | 6644 | ||
6528 | sgp = kzalloc_node(sizeof(struct sched_group_power), | 6645 | sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(), |
6529 | GFP_KERNEL, cpu_to_node(j)); | 6646 | GFP_KERNEL, cpu_to_node(j)); |
6530 | if (!sgp) | 6647 | if (!sgp) |
6531 | return -ENOMEM; | 6648 | return -ENOMEM; |
@@ -6578,7 +6695,6 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, | |||
6578 | if (!sd) | 6695 | if (!sd) |
6579 | return child; | 6696 | return child; |
6580 | 6697 | ||
6581 | set_domain_attribute(sd, attr); | ||
6582 | cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); | 6698 | cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); |
6583 | if (child) { | 6699 | if (child) { |
6584 | sd->level = child->level + 1; | 6700 | sd->level = child->level + 1; |
@@ -6586,6 +6702,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, | |||
6586 | child->parent = sd; | 6702 | child->parent = sd; |
6587 | } | 6703 | } |
6588 | sd->child = child; | 6704 | sd->child = child; |
6705 | set_domain_attribute(sd, attr); | ||
6589 | 6706 | ||
6590 | return sd; | 6707 | return sd; |
6591 | } | 6708 | } |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b2a2d236f27b..c099cc6eebe3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -3602,7 +3602,7 @@ void update_group_power(struct sched_domain *sd, int cpu) | |||
3602 | } while (group != child->groups); | 3602 | } while (group != child->groups); |
3603 | } | 3603 | } |
3604 | 3604 | ||
3605 | sdg->sgp->power = power; | 3605 | sdg->sgp->power_orig = sdg->sgp->power = power; |
3606 | } | 3606 | } |
3607 | 3607 | ||
3608 | /* | 3608 | /* |
@@ -3632,7 +3632,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
3632 | 3632 | ||
3633 | /** | 3633 | /** |
3634 | * update_sg_lb_stats - Update sched_group's statistics for load balancing. | 3634 | * update_sg_lb_stats - Update sched_group's statistics for load balancing. |
3635 | * @sd: The sched_domain whose statistics are to be updated. | 3635 | * @env: The load balancing environment. |
3636 | * @group: sched_group whose statistics are to be updated. | 3636 | * @group: sched_group whose statistics are to be updated. |
3637 | * @load_idx: Load index of sched_domain of this_cpu for load calc. | 3637 | * @load_idx: Load index of sched_domain of this_cpu for load calc. |
3638 | * @local_group: Does group contain this_cpu. | 3638 | * @local_group: Does group contain this_cpu. |
@@ -3652,7 +3652,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, | |||
3652 | int i; | 3652 | int i; |
3653 | 3653 | ||
3654 | if (local_group) | 3654 | if (local_group) |
3655 | balance_cpu = group_first_cpu(group); | 3655 | balance_cpu = group_balance_cpu(group); |
3656 | 3656 | ||
3657 | /* Tally up the load of all CPUs in the group */ | 3657 | /* Tally up the load of all CPUs in the group */ |
3658 | max_cpu_load = 0; | 3658 | max_cpu_load = 0; |
@@ -3667,7 +3667,8 @@ static inline void update_sg_lb_stats(struct lb_env *env, | |||
3667 | 3667 | ||
3668 | /* Bias balancing toward cpus of our domain */ | 3668 | /* Bias balancing toward cpus of our domain */ |
3669 | if (local_group) { | 3669 | if (local_group) { |
3670 | if (idle_cpu(i) && !first_idle_cpu) { | 3670 | if (idle_cpu(i) && !first_idle_cpu && |
3671 | cpumask_test_cpu(i, sched_group_mask(group))) { | ||
3671 | first_idle_cpu = 1; | 3672 | first_idle_cpu = 1; |
3672 | balance_cpu = i; | 3673 | balance_cpu = i; |
3673 | } | 3674 | } |
@@ -3741,11 +3742,10 @@ static inline void update_sg_lb_stats(struct lb_env *env, | |||
3741 | 3742 | ||
3742 | /** | 3743 | /** |
3743 | * update_sd_pick_busiest - return 1 on busiest group | 3744 | * update_sd_pick_busiest - return 1 on busiest group |
3744 | * @sd: sched_domain whose statistics are to be checked | 3745 | * @env: The load balancing environment. |
3745 | * @sds: sched_domain statistics | 3746 | * @sds: sched_domain statistics |
3746 | * @sg: sched_group candidate to be checked for being the busiest | 3747 | * @sg: sched_group candidate to be checked for being the busiest |
3747 | * @sgs: sched_group statistics | 3748 | * @sgs: sched_group statistics |
3748 | * @this_cpu: the current cpu | ||
3749 | * | 3749 | * |
3750 | * Determine if @sg is a busier group than the previously selected | 3750 | * Determine if @sg is a busier group than the previously selected |
3751 | * busiest group. | 3751 | * busiest group. |
@@ -3783,9 +3783,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, | |||
3783 | 3783 | ||
3784 | /** | 3784 | /** |
3785 | * update_sd_lb_stats - Update sched_domain's statistics for load balancing. | 3785 | * update_sd_lb_stats - Update sched_domain's statistics for load balancing. |
3786 | * @sd: sched_domain whose statistics are to be updated. | 3786 | * @env: The load balancing environment. |
3787 | * @this_cpu: Cpu for which load balance is currently performed. | ||
3788 | * @idle: Idle status of this_cpu | ||
3789 | * @cpus: Set of cpus considered for load balancing. | 3787 | * @cpus: Set of cpus considered for load balancing. |
3790 | * @balance: Should we balance. | 3788 | * @balance: Should we balance. |
3791 | * @sds: variable to hold the statistics for this sched_domain. | 3789 | * @sds: variable to hold the statistics for this sched_domain. |
@@ -3874,10 +3872,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, | |||
3874 | * Returns 1 when packing is required and a task should be moved to | 3872 | * Returns 1 when packing is required and a task should be moved to |
3875 | * this CPU. The amount of the imbalance is returned in *imbalance. | 3873 | * this CPU. The amount of the imbalance is returned in *imbalance. |
3876 | * | 3874 | * |
3877 | * @sd: The sched_domain whose packing is to be checked. | 3875 | * @env: The load balancing environment. |
3878 | * @sds: Statistics of the sched_domain which is to be packed | 3876 | * @sds: Statistics of the sched_domain which is to be packed |
3879 | * @this_cpu: The cpu at whose sched_domain we're performing load-balance. | ||
3880 | * @imbalance: returns amount of imbalanced due to packing. | ||
3881 | */ | 3877 | */ |
3882 | static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) | 3878 | static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) |
3883 | { | 3879 | { |
@@ -3903,9 +3899,8 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) | |||
3903 | * fix_small_imbalance - Calculate the minor imbalance that exists | 3899 | * fix_small_imbalance - Calculate the minor imbalance that exists |
3904 | * amongst the groups of a sched_domain, during | 3900 | * amongst the groups of a sched_domain, during |
3905 | * load balancing. | 3901 | * load balancing. |
3902 | * @env: The load balancing environment. | ||
3906 | * @sds: Statistics of the sched_domain whose imbalance is to be calculated. | 3903 | * @sds: Statistics of the sched_domain whose imbalance is to be calculated. |
3907 | * @this_cpu: The cpu at whose sched_domain we're performing load-balance. | ||
3908 | * @imbalance: Variable to store the imbalance. | ||
3909 | */ | 3904 | */ |
3910 | static inline | 3905 | static inline |
3911 | void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) | 3906 | void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) |
@@ -4048,11 +4043,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s | |||
4048 | * Also calculates the amount of weighted load which should be moved | 4043 | * Also calculates the amount of weighted load which should be moved |
4049 | * to restore balance. | 4044 | * to restore balance. |
4050 | * | 4045 | * |
4051 | * @sd: The sched_domain whose busiest group is to be returned. | 4046 | * @env: The load balancing environment. |
4052 | * @this_cpu: The cpu for which load balancing is currently being performed. | ||
4053 | * @imbalance: Variable which stores amount of weighted load which should | ||
4054 | * be moved to restore balance/put a group to idle. | ||
4055 | * @idle: The idle status of this_cpu. | ||
4056 | * @cpus: The set of CPUs under consideration for load-balancing. | 4047 | * @cpus: The set of CPUs under consideration for load-balancing. |
4057 | * @balance: Pointer to a variable indicating if this_cpu | 4048 | * @balance: Pointer to a variable indicating if this_cpu |
4058 | * is the appropriate cpu to perform load balancing at this_level. | 4049 | * is the appropriate cpu to perform load balancing at this_level. |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 2a4e8dffbd6b..573e1ca01102 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -1562,7 +1562,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) | |||
1562 | task_running(rq, task) || | 1562 | task_running(rq, task) || |
1563 | !task->on_rq)) { | 1563 | !task->on_rq)) { |
1564 | 1564 | ||
1565 | raw_spin_unlock(&lowest_rq->lock); | 1565 | double_unlock_balance(rq, lowest_rq); |
1566 | lowest_rq = NULL; | 1566 | lowest_rq = NULL; |
1567 | break; | 1567 | break; |
1568 | } | 1568 | } |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ba9dccfd24ce..6d52cea7f33d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -526,6 +526,8 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag) | |||
526 | DECLARE_PER_CPU(struct sched_domain *, sd_llc); | 526 | DECLARE_PER_CPU(struct sched_domain *, sd_llc); |
527 | DECLARE_PER_CPU(int, sd_llc_id); | 527 | DECLARE_PER_CPU(int, sd_llc_id); |
528 | 528 | ||
529 | extern int group_balance_cpu(struct sched_group *sg); | ||
530 | |||
529 | #endif /* CONFIG_SMP */ | 531 | #endif /* CONFIG_SMP */ |
530 | 532 | ||
531 | #include "stats.h" | 533 | #include "stats.h" |
diff --git a/kernel/sys.c b/kernel/sys.c index 9ff89cb9657a..f0ec44dcd415 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1786,27 +1786,13 @@ SYSCALL_DEFINE1(umask, int, mask) | |||
1786 | } | 1786 | } |
1787 | 1787 | ||
1788 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1788 | #ifdef CONFIG_CHECKPOINT_RESTORE |
1789 | static bool vma_flags_mismatch(struct vm_area_struct *vma, | ||
1790 | unsigned long required, | ||
1791 | unsigned long banned) | ||
1792 | { | ||
1793 | return (vma->vm_flags & required) != required || | ||
1794 | (vma->vm_flags & banned); | ||
1795 | } | ||
1796 | |||
1797 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | 1789 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) |
1798 | { | 1790 | { |
1791 | struct vm_area_struct *vma; | ||
1799 | struct file *exe_file; | 1792 | struct file *exe_file; |
1800 | struct dentry *dentry; | 1793 | struct dentry *dentry; |
1801 | int err; | 1794 | int err; |
1802 | 1795 | ||
1803 | /* | ||
1804 | * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's | ||
1805 | * remain. So perform a quick test first. | ||
1806 | */ | ||
1807 | if (mm->num_exe_file_vmas) | ||
1808 | return -EBUSY; | ||
1809 | |||
1810 | exe_file = fget(fd); | 1796 | exe_file = fget(fd); |
1811 | if (!exe_file) | 1797 | if (!exe_file) |
1812 | return -EBADF; | 1798 | return -EBADF; |
@@ -1827,17 +1813,30 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | |||
1827 | if (err) | 1813 | if (err) |
1828 | goto exit; | 1814 | goto exit; |
1829 | 1815 | ||
1816 | down_write(&mm->mmap_sem); | ||
1817 | |||
1818 | /* | ||
1819 | * Forbid mm->exe_file change if there are mapped other files. | ||
1820 | */ | ||
1821 | err = -EBUSY; | ||
1822 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
1823 | if (vma->vm_file && !path_equal(&vma->vm_file->f_path, | ||
1824 | &exe_file->f_path)) | ||
1825 | goto exit_unlock; | ||
1826 | } | ||
1827 | |||
1830 | /* | 1828 | /* |
1831 | * The symlink can be changed only once, just to disallow arbitrary | 1829 | * The symlink can be changed only once, just to disallow arbitrary |
1832 | * transitions malicious software might bring in. This means one | 1830 | * transitions malicious software might bring in. This means one |
1833 | * could make a snapshot over all processes running and monitor | 1831 | * could make a snapshot over all processes running and monitor |
1834 | * /proc/pid/exe changes to notice unusual activity if needed. | 1832 | * /proc/pid/exe changes to notice unusual activity if needed. |
1835 | */ | 1833 | */ |
1836 | down_write(&mm->mmap_sem); | 1834 | err = -EPERM; |
1837 | if (likely(!mm->exe_file)) | 1835 | if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) |
1838 | set_mm_exe_file(mm, exe_file); | 1836 | goto exit_unlock; |
1839 | else | 1837 | |
1840 | err = -EBUSY; | 1838 | set_mm_exe_file(mm, exe_file); |
1839 | exit_unlock: | ||
1841 | up_write(&mm->mmap_sem); | 1840 | up_write(&mm->mmap_sem); |
1842 | 1841 | ||
1843 | exit: | 1842 | exit: |
@@ -1862,7 +1861,7 @@ static int prctl_set_mm(int opt, unsigned long addr, | |||
1862 | if (opt == PR_SET_MM_EXE_FILE) | 1861 | if (opt == PR_SET_MM_EXE_FILE) |
1863 | return prctl_set_mm_exe_file(mm, (unsigned int)addr); | 1862 | return prctl_set_mm_exe_file(mm, (unsigned int)addr); |
1864 | 1863 | ||
1865 | if (addr >= TASK_SIZE) | 1864 | if (addr >= TASK_SIZE || addr < mmap_min_addr) |
1866 | return -EINVAL; | 1865 | return -EINVAL; |
1867 | 1866 | ||
1868 | error = -EINVAL; | 1867 | error = -EINVAL; |
@@ -1924,12 +1923,6 @@ static int prctl_set_mm(int opt, unsigned long addr, | |||
1924 | error = -EFAULT; | 1923 | error = -EFAULT; |
1925 | goto out; | 1924 | goto out; |
1926 | } | 1925 | } |
1927 | #ifdef CONFIG_STACK_GROWSUP | ||
1928 | if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0)) | ||
1929 | #else | ||
1930 | if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0)) | ||
1931 | #endif | ||
1932 | goto out; | ||
1933 | if (opt == PR_SET_MM_START_STACK) | 1926 | if (opt == PR_SET_MM_START_STACK) |
1934 | mm->start_stack = addr; | 1927 | mm->start_stack = addr; |
1935 | else if (opt == PR_SET_MM_ARG_START) | 1928 | else if (opt == PR_SET_MM_ARG_START) |
@@ -1981,12 +1974,22 @@ out: | |||
1981 | up_read(&mm->mmap_sem); | 1974 | up_read(&mm->mmap_sem); |
1982 | return error; | 1975 | return error; |
1983 | } | 1976 | } |
1977 | |||
1978 | static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) | ||
1979 | { | ||
1980 | return put_user(me->clear_child_tid, tid_addr); | ||
1981 | } | ||
1982 | |||
1984 | #else /* CONFIG_CHECKPOINT_RESTORE */ | 1983 | #else /* CONFIG_CHECKPOINT_RESTORE */ |
1985 | static int prctl_set_mm(int opt, unsigned long addr, | 1984 | static int prctl_set_mm(int opt, unsigned long addr, |
1986 | unsigned long arg4, unsigned long arg5) | 1985 | unsigned long arg4, unsigned long arg5) |
1987 | { | 1986 | { |
1988 | return -EINVAL; | 1987 | return -EINVAL; |
1989 | } | 1988 | } |
1989 | static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) | ||
1990 | { | ||
1991 | return -EINVAL; | ||
1992 | } | ||
1990 | #endif | 1993 | #endif |
1991 | 1994 | ||
1992 | SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | 1995 | SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, |
@@ -2124,6 +2127,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
2124 | else | 2127 | else |
2125 | return -EINVAL; | 2128 | return -EINVAL; |
2126 | break; | 2129 | break; |
2130 | case PR_GET_TID_ADDRESS: | ||
2131 | error = prctl_get_tid_address(me, (int __user **)arg2); | ||
2132 | break; | ||
2127 | default: | 2133 | default: |
2128 | return -EINVAL; | 2134 | return -EINVAL; |
2129 | } | 2135 | } |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 6e46cacf5969..6f46a00a1e8a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -962,6 +962,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) | |||
962 | timekeeper.xtime.tv_sec++; | 962 | timekeeper.xtime.tv_sec++; |
963 | leap = second_overflow(timekeeper.xtime.tv_sec); | 963 | leap = second_overflow(timekeeper.xtime.tv_sec); |
964 | timekeeper.xtime.tv_sec += leap; | 964 | timekeeper.xtime.tv_sec += leap; |
965 | timekeeper.wall_to_monotonic.tv_sec -= leap; | ||
965 | } | 966 | } |
966 | 967 | ||
967 | /* Accumulate raw time */ | 968 | /* Accumulate raw time */ |
@@ -1077,6 +1078,7 @@ static void update_wall_time(void) | |||
1077 | timekeeper.xtime.tv_sec++; | 1078 | timekeeper.xtime.tv_sec++; |
1078 | leap = second_overflow(timekeeper.xtime.tv_sec); | 1079 | leap = second_overflow(timekeeper.xtime.tv_sec); |
1079 | timekeeper.xtime.tv_sec += leap; | 1080 | timekeeper.xtime.tv_sec += leap; |
1081 | timekeeper.wall_to_monotonic.tv_sec -= leap; | ||
1080 | } | 1082 | } |
1081 | 1083 | ||
1082 | timekeeping_update(false); | 1084 | timekeeping_update(false); |