diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 19 | ||||
-rw-r--r-- | kernel/auditsc.c | 7 | ||||
-rw-r--r-- | kernel/cgroup.c | 157 | ||||
-rw-r--r-- | kernel/cpuset.c | 4 | ||||
-rw-r--r-- | kernel/exit.c | 98 | ||||
-rw-r--r-- | kernel/futex.c | 50 | ||||
-rw-r--r-- | kernel/futex_compat.c | 9 | ||||
-rw-r--r-- | kernel/irq/chip.c | 20 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 3 | ||||
-rw-r--r-- | kernel/kprobes.c | 52 | ||||
-rw-r--r-- | kernel/lockdep.c | 8 | ||||
-rw-r--r-- | kernel/marker.c | 13 | ||||
-rw-r--r-- | kernel/module.c | 16 | ||||
-rw-r--r-- | kernel/power/disk.c | 4 | ||||
-rw-r--r-- | kernel/power/process.c | 29 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 42 | ||||
-rw-r--r-- | kernel/printk.c | 2 | ||||
-rw-r--r-- | kernel/rcupreempt.c | 233 | ||||
-rw-r--r-- | kernel/res_counter.c | 1 | ||||
-rw-r--r-- | kernel/sched.c | 358 | ||||
-rw-r--r-- | kernel/sched_fair.c | 142 | ||||
-rw-r--r-- | kernel/sched_rt.c | 10 | ||||
-rw-r--r-- | kernel/signal.c | 16 | ||||
-rw-r--r-- | kernel/softirq.c | 1 | ||||
-rw-r--r-- | kernel/softlockup.c | 13 | ||||
-rw-r--r-- | kernel/sysctl.c | 18 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 3 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 4 |
28 files changed, 713 insertions, 619 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 2eeea9a14240..10c4930c2bbf 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -170,7 +170,9 @@ void audit_panic(const char *message) | |||
170 | printk(KERN_ERR "audit: %s\n", message); | 170 | printk(KERN_ERR "audit: %s\n", message); |
171 | break; | 171 | break; |
172 | case AUDIT_FAIL_PANIC: | 172 | case AUDIT_FAIL_PANIC: |
173 | panic("audit: %s\n", message); | 173 | /* test audit_pid since printk is always losey, why bother? */ |
174 | if (audit_pid) | ||
175 | panic("audit: %s\n", message); | ||
174 | break; | 176 | break; |
175 | } | 177 | } |
176 | } | 178 | } |
@@ -352,6 +354,7 @@ static int kauditd_thread(void *dummy) | |||
352 | if (err < 0) { | 354 | if (err < 0) { |
353 | BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */ | 355 | BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */ |
354 | printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); | 356 | printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); |
357 | audit_log_lost("auditd dissapeared\n"); | ||
355 | audit_pid = 0; | 358 | audit_pid = 0; |
356 | } | 359 | } |
357 | } else { | 360 | } else { |
@@ -1350,17 +1353,19 @@ void audit_log_end(struct audit_buffer *ab) | |||
1350 | if (!audit_rate_check()) { | 1353 | if (!audit_rate_check()) { |
1351 | audit_log_lost("rate limit exceeded"); | 1354 | audit_log_lost("rate limit exceeded"); |
1352 | } else { | 1355 | } else { |
1356 | struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); | ||
1353 | if (audit_pid) { | 1357 | if (audit_pid) { |
1354 | struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); | ||
1355 | nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); | 1358 | nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); |
1356 | skb_queue_tail(&audit_skb_queue, ab->skb); | 1359 | skb_queue_tail(&audit_skb_queue, ab->skb); |
1357 | ab->skb = NULL; | 1360 | ab->skb = NULL; |
1358 | wake_up_interruptible(&kauditd_wait); | 1361 | wake_up_interruptible(&kauditd_wait); |
1359 | } else if (printk_ratelimit()) { | 1362 | } else if (nlh->nlmsg_type != AUDIT_EOE) { |
1360 | struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); | 1363 | if (printk_ratelimit()) { |
1361 | printk(KERN_NOTICE "type=%d %s\n", nlh->nlmsg_type, ab->skb->data + NLMSG_SPACE(0)); | 1364 | printk(KERN_NOTICE "type=%d %s\n", |
1362 | } else { | 1365 | nlh->nlmsg_type, |
1363 | audit_log_lost("printk limit exceeded\n"); | 1366 | ab->skb->data + NLMSG_SPACE(0)); |
1367 | } else | ||
1368 | audit_log_lost("printk limit exceeded\n"); | ||
1364 | } | 1369 | } |
1365 | } | 1370 | } |
1366 | audit_buffer_free(ab); | 1371 | audit_buffer_free(ab); |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ac6d9b23b018..782262e4107d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -1000,9 +1000,10 @@ static int audit_log_single_execve_arg(struct audit_context *context, | |||
1000 | * for strings that are too long, we should not have created | 1000 | * for strings that are too long, we should not have created |
1001 | * any. | 1001 | * any. |
1002 | */ | 1002 | */ |
1003 | if (unlikely((len = -1) || len > MAX_ARG_STRLEN - 1)) { | 1003 | if (unlikely((len == -1) || len > MAX_ARG_STRLEN - 1)) { |
1004 | WARN_ON(1); | 1004 | WARN_ON(1); |
1005 | send_sig(SIGKILL, current, 0); | 1005 | send_sig(SIGKILL, current, 0); |
1006 | return -1; | ||
1006 | } | 1007 | } |
1007 | 1008 | ||
1008 | /* walk the whole argument looking for non-ascii chars */ | 1009 | /* walk the whole argument looking for non-ascii chars */ |
@@ -1020,6 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, | |||
1020 | if (ret) { | 1021 | if (ret) { |
1021 | WARN_ON(1); | 1022 | WARN_ON(1); |
1022 | send_sig(SIGKILL, current, 0); | 1023 | send_sig(SIGKILL, current, 0); |
1024 | return -1; | ||
1023 | } | 1025 | } |
1024 | buf[to_send] = '\0'; | 1026 | buf[to_send] = '\0'; |
1025 | has_cntl = audit_string_contains_control(buf, to_send); | 1027 | has_cntl = audit_string_contains_control(buf, to_send); |
@@ -1068,7 +1070,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, | |||
1068 | * so we can be sure nothing was lost. | 1070 | * so we can be sure nothing was lost. |
1069 | */ | 1071 | */ |
1070 | if ((i == 0) && (too_long)) | 1072 | if ((i == 0) && (too_long)) |
1071 | audit_log_format(*ab, "a%d_len=%ld ", arg_num, | 1073 | audit_log_format(*ab, "a%d_len=%zu ", arg_num, |
1072 | has_cntl ? 2*len : len); | 1074 | has_cntl ? 2*len : len); |
1073 | 1075 | ||
1074 | /* | 1076 | /* |
@@ -1083,6 +1085,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, | |||
1083 | if (ret) { | 1085 | if (ret) { |
1084 | WARN_ON(1); | 1086 | WARN_ON(1); |
1085 | send_sig(SIGKILL, current, 0); | 1087 | send_sig(SIGKILL, current, 0); |
1088 | return -1; | ||
1086 | } | 1089 | } |
1087 | buf[to_send] = '\0'; | 1090 | buf[to_send] = '\0'; |
1088 | 1091 | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4766bb65e4d9..e9c2fb01e89b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -113,9 +113,9 @@ static int root_count; | |||
113 | #define dummytop (&rootnode.top_cgroup) | 113 | #define dummytop (&rootnode.top_cgroup) |
114 | 114 | ||
115 | /* This flag indicates whether tasks in the fork and exit paths should | 115 | /* This flag indicates whether tasks in the fork and exit paths should |
116 | * take callback_mutex and check for fork/exit handlers to call. This | 116 | * check for fork/exit handlers to call. This avoids us having to do |
117 | * avoids us having to do extra work in the fork/exit path if none of the | 117 | * extra work in the fork/exit path if none of the subsystems need to |
118 | * subsystems need to be called. | 118 | * be called. |
119 | */ | 119 | */ |
120 | static int need_forkexit_callback; | 120 | static int need_forkexit_callback; |
121 | 121 | ||
@@ -307,7 +307,6 @@ static inline void put_css_set_taskexit(struct css_set *cg) | |||
307 | * template: location in which to build the desired set of subsystem | 307 | * template: location in which to build the desired set of subsystem |
308 | * state objects for the new cgroup group | 308 | * state objects for the new cgroup group |
309 | */ | 309 | */ |
310 | |||
311 | static struct css_set *find_existing_css_set( | 310 | static struct css_set *find_existing_css_set( |
312 | struct css_set *oldcg, | 311 | struct css_set *oldcg, |
313 | struct cgroup *cgrp, | 312 | struct cgroup *cgrp, |
@@ -320,7 +319,7 @@ static struct css_set *find_existing_css_set( | |||
320 | /* Built the set of subsystem state objects that we want to | 319 | /* Built the set of subsystem state objects that we want to |
321 | * see in the new css_set */ | 320 | * see in the new css_set */ |
322 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 321 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
323 | if (root->subsys_bits & (1ull << i)) { | 322 | if (root->subsys_bits & (1UL << i)) { |
324 | /* Subsystem is in this hierarchy. So we want | 323 | /* Subsystem is in this hierarchy. So we want |
325 | * the subsystem state from the new | 324 | * the subsystem state from the new |
326 | * cgroup */ | 325 | * cgroup */ |
@@ -354,7 +353,6 @@ static struct css_set *find_existing_css_set( | |||
354 | * and chains them on tmp through their cgrp_link_list fields. Returns 0 on | 353 | * and chains them on tmp through their cgrp_link_list fields. Returns 0 on |
355 | * success or a negative error | 354 | * success or a negative error |
356 | */ | 355 | */ |
357 | |||
358 | static int allocate_cg_links(int count, struct list_head *tmp) | 356 | static int allocate_cg_links(int count, struct list_head *tmp) |
359 | { | 357 | { |
360 | struct cg_cgroup_link *link; | 358 | struct cg_cgroup_link *link; |
@@ -396,7 +394,6 @@ static void free_cg_links(struct list_head *tmp) | |||
396 | * substituted into the appropriate hierarchy. Must be called with | 394 | * substituted into the appropriate hierarchy. Must be called with |
397 | * cgroup_mutex held | 395 | * cgroup_mutex held |
398 | */ | 396 | */ |
399 | |||
400 | static struct css_set *find_css_set( | 397 | static struct css_set *find_css_set( |
401 | struct css_set *oldcg, struct cgroup *cgrp) | 398 | struct css_set *oldcg, struct cgroup *cgrp) |
402 | { | 399 | { |
@@ -473,7 +470,6 @@ static struct css_set *find_css_set( | |||
473 | /* Link this cgroup group into the list */ | 470 | /* Link this cgroup group into the list */ |
474 | list_add(&res->list, &init_css_set.list); | 471 | list_add(&res->list, &init_css_set.list); |
475 | css_set_count++; | 472 | css_set_count++; |
476 | INIT_LIST_HEAD(&res->tasks); | ||
477 | write_unlock(&css_set_lock); | 473 | write_unlock(&css_set_lock); |
478 | 474 | ||
479 | return res; | 475 | return res; |
@@ -507,8 +503,8 @@ static struct css_set *find_css_set( | |||
507 | * critical pieces of code here. The exception occurs on cgroup_exit(), | 503 | * critical pieces of code here. The exception occurs on cgroup_exit(), |
508 | * when a task in a notify_on_release cgroup exits. Then cgroup_mutex | 504 | * when a task in a notify_on_release cgroup exits. Then cgroup_mutex |
509 | * is taken, and if the cgroup count is zero, a usermode call made | 505 | * is taken, and if the cgroup count is zero, a usermode call made |
510 | * to /sbin/cgroup_release_agent with the name of the cgroup (path | 506 | * to the release agent with the name of the cgroup (path relative to |
511 | * relative to the root of cgroup file system) as the argument. | 507 | * the root of cgroup file system) as the argument. |
512 | * | 508 | * |
513 | * A cgroup can only be deleted if both its 'count' of using tasks | 509 | * A cgroup can only be deleted if both its 'count' of using tasks |
514 | * is zero, and its list of 'children' cgroups is empty. Since all | 510 | * is zero, and its list of 'children' cgroups is empty. Since all |
@@ -521,7 +517,7 @@ static struct css_set *find_css_set( | |||
521 | * | 517 | * |
522 | * The need for this exception arises from the action of | 518 | * The need for this exception arises from the action of |
523 | * cgroup_attach_task(), which overwrites one tasks cgroup pointer with | 519 | * cgroup_attach_task(), which overwrites one tasks cgroup pointer with |
524 | * another. It does so using cgroup_mutexe, however there are | 520 | * another. It does so using cgroup_mutex, however there are |
525 | * several performance critical places that need to reference | 521 | * several performance critical places that need to reference |
526 | * task->cgroup without the expense of grabbing a system global | 522 | * task->cgroup without the expense of grabbing a system global |
527 | * mutex. Therefore except as noted below, when dereferencing or, as | 523 | * mutex. Therefore except as noted below, when dereferencing or, as |
@@ -537,7 +533,6 @@ static struct css_set *find_css_set( | |||
537 | * cgroup_lock - lock out any changes to cgroup structures | 533 | * cgroup_lock - lock out any changes to cgroup structures |
538 | * | 534 | * |
539 | */ | 535 | */ |
540 | |||
541 | void cgroup_lock(void) | 536 | void cgroup_lock(void) |
542 | { | 537 | { |
543 | mutex_lock(&cgroup_mutex); | 538 | mutex_lock(&cgroup_mutex); |
@@ -548,7 +543,6 @@ void cgroup_lock(void) | |||
548 | * | 543 | * |
549 | * Undo the lock taken in a previous cgroup_lock() call. | 544 | * Undo the lock taken in a previous cgroup_lock() call. |
550 | */ | 545 | */ |
551 | |||
552 | void cgroup_unlock(void) | 546 | void cgroup_unlock(void) |
553 | { | 547 | { |
554 | mutex_unlock(&cgroup_mutex); | 548 | mutex_unlock(&cgroup_mutex); |
@@ -590,7 +584,6 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | |||
590 | * Call subsys's pre_destroy handler. | 584 | * Call subsys's pre_destroy handler. |
591 | * This is called before css refcnt check. | 585 | * This is called before css refcnt check. |
592 | */ | 586 | */ |
593 | |||
594 | static void cgroup_call_pre_destroy(struct cgroup *cgrp) | 587 | static void cgroup_call_pre_destroy(struct cgroup *cgrp) |
595 | { | 588 | { |
596 | struct cgroup_subsys *ss; | 589 | struct cgroup_subsys *ss; |
@@ -600,7 +593,6 @@ static void cgroup_call_pre_destroy(struct cgroup *cgrp) | |||
600 | return; | 593 | return; |
601 | } | 594 | } |
602 | 595 | ||
603 | |||
604 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 596 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) |
605 | { | 597 | { |
606 | /* is dentry a directory ? if so, kfree() associated cgroup */ | 598 | /* is dentry a directory ? if so, kfree() associated cgroup */ |
@@ -696,7 +688,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
696 | added_bits = final_bits & ~root->actual_subsys_bits; | 688 | added_bits = final_bits & ~root->actual_subsys_bits; |
697 | /* Check that any added subsystems are currently free */ | 689 | /* Check that any added subsystems are currently free */ |
698 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 690 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
699 | unsigned long long bit = 1ull << i; | 691 | unsigned long bit = 1UL << i; |
700 | struct cgroup_subsys *ss = subsys[i]; | 692 | struct cgroup_subsys *ss = subsys[i]; |
701 | if (!(bit & added_bits)) | 693 | if (!(bit & added_bits)) |
702 | continue; | 694 | continue; |
@@ -927,7 +919,6 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
927 | if (!inode) | 919 | if (!inode) |
928 | return -ENOMEM; | 920 | return -ENOMEM; |
929 | 921 | ||
930 | inode->i_op = &simple_dir_inode_operations; | ||
931 | inode->i_fop = &simple_dir_operations; | 922 | inode->i_fop = &simple_dir_operations; |
932 | inode->i_op = &cgroup_dir_inode_operations; | 923 | inode->i_op = &cgroup_dir_inode_operations; |
933 | /* directories start off with i_nlink == 2 (for "." entry) */ | 924 | /* directories start off with i_nlink == 2 (for "." entry) */ |
@@ -961,8 +952,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
961 | } | 952 | } |
962 | 953 | ||
963 | root = kzalloc(sizeof(*root), GFP_KERNEL); | 954 | root = kzalloc(sizeof(*root), GFP_KERNEL); |
964 | if (!root) | 955 | if (!root) { |
956 | if (opts.release_agent) | ||
957 | kfree(opts.release_agent); | ||
965 | return -ENOMEM; | 958 | return -ENOMEM; |
959 | } | ||
966 | 960 | ||
967 | init_cgroup_root(root); | 961 | init_cgroup_root(root); |
968 | root->subsys_bits = opts.subsys_bits; | 962 | root->subsys_bits = opts.subsys_bits; |
@@ -1129,8 +1123,13 @@ static inline struct cftype *__d_cft(struct dentry *dentry) | |||
1129 | return dentry->d_fsdata; | 1123 | return dentry->d_fsdata; |
1130 | } | 1124 | } |
1131 | 1125 | ||
1132 | /* | 1126 | /** |
1133 | * Called with cgroup_mutex held. Writes path of cgroup into buf. | 1127 | * cgroup_path - generate the path of a cgroup |
1128 | * @cgrp: the cgroup in question | ||
1129 | * @buf: the buffer to write the path into | ||
1130 | * @buflen: the length of the buffer | ||
1131 | * | ||
1132 | * Called with cgroup_mutex held. Writes path of cgroup into buf. | ||
1134 | * Returns 0 on success, -errno on error. | 1133 | * Returns 0 on success, -errno on error. |
1135 | */ | 1134 | */ |
1136 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | 1135 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) |
@@ -1188,11 +1187,13 @@ static void get_first_subsys(const struct cgroup *cgrp, | |||
1188 | *subsys_id = test_ss->subsys_id; | 1187 | *subsys_id = test_ss->subsys_id; |
1189 | } | 1188 | } |
1190 | 1189 | ||
1191 | /* | 1190 | /** |
1192 | * Attach task 'tsk' to cgroup 'cgrp' | 1191 | * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' |
1192 | * @cgrp: the cgroup the task is attaching to | ||
1193 | * @tsk: the task to be attached | ||
1193 | * | 1194 | * |
1194 | * Call holding cgroup_mutex. May take task_lock of | 1195 | * Call holding cgroup_mutex. May take task_lock of |
1195 | * the task 'pid' during call. | 1196 | * the task 'tsk' during call. |
1196 | */ | 1197 | */ |
1197 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 1198 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
1198 | { | 1199 | { |
@@ -1293,7 +1294,6 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf) | |||
1293 | } | 1294 | } |
1294 | 1295 | ||
1295 | /* The various types of files and directories in a cgroup file system */ | 1296 | /* The various types of files and directories in a cgroup file system */ |
1296 | |||
1297 | enum cgroup_filetype { | 1297 | enum cgroup_filetype { |
1298 | FILE_ROOT, | 1298 | FILE_ROOT, |
1299 | FILE_DIR, | 1299 | FILE_DIR, |
@@ -1584,12 +1584,11 @@ static int cgroup_create_file(struct dentry *dentry, int mode, | |||
1584 | } | 1584 | } |
1585 | 1585 | ||
1586 | /* | 1586 | /* |
1587 | * cgroup_create_dir - create a directory for an object. | 1587 | * cgroup_create_dir - create a directory for an object. |
1588 | * cgrp: the cgroup we create the directory for. | 1588 | * @cgrp: the cgroup we create the directory for. It must have a valid |
1589 | * It must have a valid ->parent field | 1589 | * ->parent field. And we are going to fill its ->dentry field. |
1590 | * And we are going to fill its ->dentry field. | 1590 | * @dentry: dentry of the new cgroup |
1591 | * dentry: dentry of the new cgroup | 1591 | * @mode: mode to set on new directory. |
1592 | * mode: mode to set on new directory. | ||
1593 | */ | 1592 | */ |
1594 | static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, | 1593 | static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, |
1595 | int mode) | 1594 | int mode) |
@@ -1651,8 +1650,12 @@ int cgroup_add_files(struct cgroup *cgrp, | |||
1651 | return 0; | 1650 | return 0; |
1652 | } | 1651 | } |
1653 | 1652 | ||
1654 | /* Count the number of tasks in a cgroup. */ | 1653 | /** |
1655 | 1654 | * cgroup_task_count - count the number of tasks in a cgroup. | |
1655 | * @cgrp: the cgroup in question | ||
1656 | * | ||
1657 | * Return the number of tasks in the cgroup. | ||
1658 | */ | ||
1656 | int cgroup_task_count(const struct cgroup *cgrp) | 1659 | int cgroup_task_count(const struct cgroup *cgrp) |
1657 | { | 1660 | { |
1658 | int count = 0; | 1661 | int count = 0; |
@@ -1962,12 +1965,13 @@ static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) | |||
1962 | } | 1965 | } |
1963 | 1966 | ||
1964 | /** | 1967 | /** |
1965 | * Build and fill cgroupstats so that taskstats can export it to user | 1968 | * cgroupstats_build - build and fill cgroupstats |
1966 | * space. | ||
1967 | * | ||
1968 | * @stats: cgroupstats to fill information into | 1969 | * @stats: cgroupstats to fill information into |
1969 | * @dentry: A dentry entry belonging to the cgroup for which stats have | 1970 | * @dentry: A dentry entry belonging to the cgroup for which stats have |
1970 | * been requested. | 1971 | * been requested. |
1972 | * | ||
1973 | * Build and fill cgroupstats so that taskstats can export it to user | ||
1974 | * space. | ||
1971 | */ | 1975 | */ |
1972 | int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | 1976 | int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) |
1973 | { | 1977 | { |
@@ -2199,14 +2203,13 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
2199 | } | 2203 | } |
2200 | 2204 | ||
2201 | /* | 2205 | /* |
2202 | * cgroup_create - create a cgroup | 2206 | * cgroup_create - create a cgroup |
2203 | * parent: cgroup that will be parent of the new cgroup. | 2207 | * @parent: cgroup that will be parent of the new cgroup |
2204 | * name: name of the new cgroup. Will be strcpy'ed. | 2208 | * @dentry: dentry of the new cgroup |
2205 | * mode: mode to set on new inode | 2209 | * @mode: mode to set on new inode |
2206 | * | 2210 | * |
2207 | * Must be called with the mutex on the parent inode held | 2211 | * Must be called with the mutex on the parent inode held |
2208 | */ | 2212 | */ |
2209 | |||
2210 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | 2213 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, |
2211 | int mode) | 2214 | int mode) |
2212 | { | 2215 | { |
@@ -2229,7 +2232,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
2229 | 2232 | ||
2230 | mutex_lock(&cgroup_mutex); | 2233 | mutex_lock(&cgroup_mutex); |
2231 | 2234 | ||
2232 | cgrp->flags = 0; | ||
2233 | INIT_LIST_HEAD(&cgrp->sibling); | 2235 | INIT_LIST_HEAD(&cgrp->sibling); |
2234 | INIT_LIST_HEAD(&cgrp->children); | 2236 | INIT_LIST_HEAD(&cgrp->children); |
2235 | INIT_LIST_HEAD(&cgrp->css_sets); | 2237 | INIT_LIST_HEAD(&cgrp->css_sets); |
@@ -2239,6 +2241,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
2239 | cgrp->root = parent->root; | 2241 | cgrp->root = parent->root; |
2240 | cgrp->top_cgroup = parent->top_cgroup; | 2242 | cgrp->top_cgroup = parent->top_cgroup; |
2241 | 2243 | ||
2244 | if (notify_on_release(parent)) | ||
2245 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | ||
2246 | |||
2242 | for_each_subsys(root, ss) { | 2247 | for_each_subsys(root, ss) { |
2243 | struct cgroup_subsys_state *css = ss->create(ss, cgrp); | 2248 | struct cgroup_subsys_state *css = ss->create(ss, cgrp); |
2244 | if (IS_ERR(css)) { | 2249 | if (IS_ERR(css)) { |
@@ -2349,13 +2354,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2349 | parent = cgrp->parent; | 2354 | parent = cgrp->parent; |
2350 | root = cgrp->root; | 2355 | root = cgrp->root; |
2351 | sb = root->sb; | 2356 | sb = root->sb; |
2357 | |||
2352 | /* | 2358 | /* |
2353 | * Call pre_destroy handlers of subsys | 2359 | * Call pre_destroy handlers of subsys. Notify subsystems |
2360 | * that rmdir() request comes. | ||
2354 | */ | 2361 | */ |
2355 | cgroup_call_pre_destroy(cgrp); | 2362 | cgroup_call_pre_destroy(cgrp); |
2356 | /* | ||
2357 | * Notify subsyses that rmdir() request comes. | ||
2358 | */ | ||
2359 | 2363 | ||
2360 | if (cgroup_has_css_refs(cgrp)) { | 2364 | if (cgroup_has_css_refs(cgrp)) { |
2361 | mutex_unlock(&cgroup_mutex); | 2365 | mutex_unlock(&cgroup_mutex); |
@@ -2431,8 +2435,10 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss) | |||
2431 | } | 2435 | } |
2432 | 2436 | ||
2433 | /** | 2437 | /** |
2434 | * cgroup_init_early - initialize cgroups at system boot, and | 2438 | * cgroup_init_early - cgroup initialization at system boot |
2435 | * initialize any subsystems that request early init. | 2439 | * |
2440 | * Initialize cgroups at system boot, and initialize any | ||
2441 | * subsystems that request early init. | ||
2436 | */ | 2442 | */ |
2437 | int __init cgroup_init_early(void) | 2443 | int __init cgroup_init_early(void) |
2438 | { | 2444 | { |
@@ -2474,8 +2480,10 @@ int __init cgroup_init_early(void) | |||
2474 | } | 2480 | } |
2475 | 2481 | ||
2476 | /** | 2482 | /** |
2477 | * cgroup_init - register cgroup filesystem and /proc file, and | 2483 | * cgroup_init - cgroup initialization |
2478 | * initialize any subsystems that didn't request early init. | 2484 | * |
2485 | * Register cgroup filesystem and /proc file, and initialize | ||
2486 | * any subsystems that didn't request early init. | ||
2479 | */ | 2487 | */ |
2480 | int __init cgroup_init(void) | 2488 | int __init cgroup_init(void) |
2481 | { | 2489 | { |
@@ -2618,7 +2626,7 @@ static struct file_operations proc_cgroupstats_operations = { | |||
2618 | 2626 | ||
2619 | /** | 2627 | /** |
2620 | * cgroup_fork - attach newly forked task to its parents cgroup. | 2628 | * cgroup_fork - attach newly forked task to its parents cgroup. |
2621 | * @tsk: pointer to task_struct of forking parent process. | 2629 | * @child: pointer to task_struct of forking parent process. |
2622 | * | 2630 | * |
2623 | * Description: A task inherits its parent's cgroup at fork(). | 2631 | * Description: A task inherits its parent's cgroup at fork(). |
2624 | * | 2632 | * |
@@ -2642,9 +2650,12 @@ void cgroup_fork(struct task_struct *child) | |||
2642 | } | 2650 | } |
2643 | 2651 | ||
2644 | /** | 2652 | /** |
2645 | * cgroup_fork_callbacks - called on a new task very soon before | 2653 | * cgroup_fork_callbacks - run fork callbacks |
2646 | * adding it to the tasklist. No need to take any locks since no-one | 2654 | * @child: the new task |
2647 | * can be operating on this task | 2655 | * |
2656 | * Called on a new task very soon before adding it to the | ||
2657 | * tasklist. No need to take any locks since no-one can | ||
2658 | * be operating on this task. | ||
2648 | */ | 2659 | */ |
2649 | void cgroup_fork_callbacks(struct task_struct *child) | 2660 | void cgroup_fork_callbacks(struct task_struct *child) |
2650 | { | 2661 | { |
@@ -2659,11 +2670,14 @@ void cgroup_fork_callbacks(struct task_struct *child) | |||
2659 | } | 2670 | } |
2660 | 2671 | ||
2661 | /** | 2672 | /** |
2662 | * cgroup_post_fork - called on a new task after adding it to the | 2673 | * cgroup_post_fork - called on a new task after adding it to the task list |
2663 | * task list. Adds the task to the list running through its css_set | 2674 | * @child: the task in question |
2664 | * if necessary. Has to be after the task is visible on the task list | 2675 | * |
2665 | * in case we race with the first call to cgroup_iter_start() - to | 2676 | * Adds the task to the list running through its css_set if necessary. |
2666 | * guarantee that the new task ends up on its list. */ | 2677 | * Has to be after the task is visible on the task list in case we race |
2678 | * with the first call to cgroup_iter_start() - to guarantee that the | ||
2679 | * new task ends up on its list. | ||
2680 | */ | ||
2667 | void cgroup_post_fork(struct task_struct *child) | 2681 | void cgroup_post_fork(struct task_struct *child) |
2668 | { | 2682 | { |
2669 | if (use_task_css_set_links) { | 2683 | if (use_task_css_set_links) { |
@@ -2676,6 +2690,7 @@ void cgroup_post_fork(struct task_struct *child) | |||
2676 | /** | 2690 | /** |
2677 | * cgroup_exit - detach cgroup from exiting task | 2691 | * cgroup_exit - detach cgroup from exiting task |
2678 | * @tsk: pointer to task_struct of exiting process | 2692 | * @tsk: pointer to task_struct of exiting process |
2693 | * @run_callback: run exit callbacks? | ||
2679 | * | 2694 | * |
2680 | * Description: Detach cgroup from @tsk and release it. | 2695 | * Description: Detach cgroup from @tsk and release it. |
2681 | * | 2696 | * |
@@ -2706,7 +2721,6 @@ void cgroup_post_fork(struct task_struct *child) | |||
2706 | * top_cgroup isn't going away, and either task has PF_EXITING set, | 2721 | * top_cgroup isn't going away, and either task has PF_EXITING set, |
2707 | * which wards off any cgroup_attach_task() attempts, or task is a failed | 2722 | * which wards off any cgroup_attach_task() attempts, or task is a failed |
2708 | * fork, never visible to cgroup_attach_task. | 2723 | * fork, never visible to cgroup_attach_task. |
2709 | * | ||
2710 | */ | 2724 | */ |
2711 | void cgroup_exit(struct task_struct *tsk, int run_callbacks) | 2725 | void cgroup_exit(struct task_struct *tsk, int run_callbacks) |
2712 | { | 2726 | { |
@@ -2743,9 +2757,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
2743 | } | 2757 | } |
2744 | 2758 | ||
2745 | /** | 2759 | /** |
2746 | * cgroup_clone - duplicate the current cgroup in the hierarchy | 2760 | * cgroup_clone - clone the cgroup the given subsystem is attached to |
2747 | * that the given subsystem is attached to, and move this task into | 2761 | * @tsk: the task to be moved |
2748 | * the new child | 2762 | * @subsys: the given subsystem |
2763 | * | ||
2764 | * Duplicate the current cgroup in the hierarchy that the given | ||
2765 | * subsystem is attached to, and move this task into the new | ||
2766 | * child. | ||
2749 | */ | 2767 | */ |
2750 | int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) | 2768 | int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) |
2751 | { | 2769 | { |
@@ -2858,9 +2876,12 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) | |||
2858 | return ret; | 2876 | return ret; |
2859 | } | 2877 | } |
2860 | 2878 | ||
2861 | /* | 2879 | /** |
2862 | * See if "cgrp" is a descendant of the current task's cgroup in | 2880 | * cgroup_is_descendant - see if @cgrp is a descendant of current task's cgrp |
2863 | * the appropriate hierarchy | 2881 | * @cgrp: the cgroup in question |
2882 | * | ||
2883 | * See if @cgrp is a descendant of the current task's cgroup in | ||
2884 | * the appropriate hierarchy. | ||
2864 | * | 2885 | * |
2865 | * If we are sending in dummytop, then presumably we are creating | 2886 | * If we are sending in dummytop, then presumably we are creating |
2866 | * the top cgroup in the subsystem. | 2887 | * the top cgroup in the subsystem. |
@@ -2939,9 +2960,7 @@ void __css_put(struct cgroup_subsys_state *css) | |||
2939 | * release agent task. We don't bother to wait because the caller of | 2960 | * release agent task. We don't bother to wait because the caller of |
2940 | * this routine has no use for the exit status of the release agent | 2961 | * this routine has no use for the exit status of the release agent |
2941 | * task, so no sense holding our caller up for that. | 2962 | * task, so no sense holding our caller up for that. |
2942 | * | ||
2943 | */ | 2963 | */ |
2944 | |||
2945 | static void cgroup_release_agent(struct work_struct *work) | 2964 | static void cgroup_release_agent(struct work_struct *work) |
2946 | { | 2965 | { |
2947 | BUG_ON(work != &release_agent_work); | 2966 | BUG_ON(work != &release_agent_work); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3e296ed81d4d..a1b61f414228 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -322,8 +322,8 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | |||
322 | * Call without callback_mutex or task_lock() held. May be | 322 | * Call without callback_mutex or task_lock() held. May be |
323 | * called with or without cgroup_mutex held. Thanks in part to | 323 | * called with or without cgroup_mutex held. Thanks in part to |
324 | * 'the_top_cpuset_hack', the task's cpuset pointer will never | 324 | * 'the_top_cpuset_hack', the task's cpuset pointer will never |
325 | * be NULL. This routine also might acquire callback_mutex and | 325 | * be NULL. This routine also might acquire callback_mutex during |
326 | * current->mm->mmap_sem during call. | 326 | * call. |
327 | * | 327 | * |
328 | * Reading current->cpuset->mems_generation doesn't need task_lock | 328 | * Reading current->cpuset->mems_generation doesn't need task_lock |
329 | * to guard the current->cpuset derefence, because it is guarded | 329 | * to guard the current->cpuset derefence, because it is guarded |
diff --git a/kernel/exit.c b/kernel/exit.c index 506a957b665a..53872bf993fa 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -214,20 +214,19 @@ struct pid *session_of_pgrp(struct pid *pgrp) | |||
214 | static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task) | 214 | static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task) |
215 | { | 215 | { |
216 | struct task_struct *p; | 216 | struct task_struct *p; |
217 | int ret = 1; | ||
218 | 217 | ||
219 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { | 218 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { |
220 | if (p == ignored_task | 219 | if ((p == ignored_task) || |
221 | || p->exit_state | 220 | (p->exit_state && thread_group_empty(p)) || |
222 | || is_global_init(p->real_parent)) | 221 | is_global_init(p->real_parent)) |
223 | continue; | 222 | continue; |
223 | |||
224 | if (task_pgrp(p->real_parent) != pgrp && | 224 | if (task_pgrp(p->real_parent) != pgrp && |
225 | task_session(p->real_parent) == task_session(p)) { | 225 | task_session(p->real_parent) == task_session(p)) |
226 | ret = 0; | 226 | return 0; |
227 | break; | ||
228 | } | ||
229 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); | 227 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); |
230 | return ret; /* (sighing) "Often!" */ | 228 | |
229 | return 1; | ||
231 | } | 230 | } |
232 | 231 | ||
233 | int is_current_pgrp_orphaned(void) | 232 | int is_current_pgrp_orphaned(void) |
@@ -255,6 +254,37 @@ static int has_stopped_jobs(struct pid *pgrp) | |||
255 | return retval; | 254 | return retval; |
256 | } | 255 | } |
257 | 256 | ||
257 | /* | ||
258 | * Check to see if any process groups have become orphaned as | ||
259 | * a result of our exiting, and if they have any stopped jobs, | ||
260 | * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) | ||
261 | */ | ||
262 | static void | ||
263 | kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) | ||
264 | { | ||
265 | struct pid *pgrp = task_pgrp(tsk); | ||
266 | struct task_struct *ignored_task = tsk; | ||
267 | |||
268 | if (!parent) | ||
269 | /* exit: our father is in a different pgrp than | ||
270 | * we are and we were the only connection outside. | ||
271 | */ | ||
272 | parent = tsk->real_parent; | ||
273 | else | ||
274 | /* reparent: our child is in a different pgrp than | ||
275 | * we are, and it was the only connection outside. | ||
276 | */ | ||
277 | ignored_task = NULL; | ||
278 | |||
279 | if (task_pgrp(parent) != pgrp && | ||
280 | task_session(parent) == task_session(tsk) && | ||
281 | will_become_orphaned_pgrp(pgrp, ignored_task) && | ||
282 | has_stopped_jobs(pgrp)) { | ||
283 | __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); | ||
284 | __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); | ||
285 | } | ||
286 | } | ||
287 | |||
258 | /** | 288 | /** |
259 | * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd | 289 | * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd |
260 | * | 290 | * |
@@ -635,22 +665,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) | |||
635 | p->exit_signal != -1 && thread_group_empty(p)) | 665 | p->exit_signal != -1 && thread_group_empty(p)) |
636 | do_notify_parent(p, p->exit_signal); | 666 | do_notify_parent(p, p->exit_signal); |
637 | 667 | ||
638 | /* | 668 | kill_orphaned_pgrp(p, father); |
639 | * process group orphan check | ||
640 | * Case ii: Our child is in a different pgrp | ||
641 | * than we are, and it was the only connection | ||
642 | * outside, so the child pgrp is now orphaned. | ||
643 | */ | ||
644 | if ((task_pgrp(p) != task_pgrp(father)) && | ||
645 | (task_session(p) == task_session(father))) { | ||
646 | struct pid *pgrp = task_pgrp(p); | ||
647 | |||
648 | if (will_become_orphaned_pgrp(pgrp, NULL) && | ||
649 | has_stopped_jobs(pgrp)) { | ||
650 | __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); | ||
651 | __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); | ||
652 | } | ||
653 | } | ||
654 | } | 669 | } |
655 | 670 | ||
656 | /* | 671 | /* |
@@ -735,11 +750,9 @@ static void forget_original_parent(struct task_struct *father) | |||
735 | * Send signals to all our closest relatives so that they know | 750 | * Send signals to all our closest relatives so that they know |
736 | * to properly mourn us.. | 751 | * to properly mourn us.. |
737 | */ | 752 | */ |
738 | static void exit_notify(struct task_struct *tsk) | 753 | static void exit_notify(struct task_struct *tsk, int group_dead) |
739 | { | 754 | { |
740 | int state; | 755 | int state; |
741 | struct task_struct *t; | ||
742 | struct pid *pgrp; | ||
743 | 756 | ||
744 | /* | 757 | /* |
745 | * This does two things: | 758 | * This does two things: |
@@ -753,25 +766,8 @@ static void exit_notify(struct task_struct *tsk) | |||
753 | exit_task_namespaces(tsk); | 766 | exit_task_namespaces(tsk); |
754 | 767 | ||
755 | write_lock_irq(&tasklist_lock); | 768 | write_lock_irq(&tasklist_lock); |
756 | /* | 769 | if (group_dead) |
757 | * Check to see if any process groups have become orphaned | 770 | kill_orphaned_pgrp(tsk->group_leader, NULL); |
758 | * as a result of our exiting, and if they have any stopped | ||
759 | * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) | ||
760 | * | ||
761 | * Case i: Our father is in a different pgrp than we are | ||
762 | * and we were the only connection outside, so our pgrp | ||
763 | * is about to become orphaned. | ||
764 | */ | ||
765 | t = tsk->real_parent; | ||
766 | |||
767 | pgrp = task_pgrp(tsk); | ||
768 | if ((task_pgrp(t) != pgrp) && | ||
769 | (task_session(t) == task_session(tsk)) && | ||
770 | will_become_orphaned_pgrp(pgrp, tsk) && | ||
771 | has_stopped_jobs(pgrp)) { | ||
772 | __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); | ||
773 | __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); | ||
774 | } | ||
775 | 771 | ||
776 | /* Let father know we died | 772 | /* Let father know we died |
777 | * | 773 | * |
@@ -788,8 +784,8 @@ static void exit_notify(struct task_struct *tsk) | |||
788 | * the same after a fork. | 784 | * the same after a fork. |
789 | */ | 785 | */ |
790 | if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 && | 786 | if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 && |
791 | ( tsk->parent_exec_id != t->self_exec_id || | 787 | (tsk->parent_exec_id != tsk->real_parent->self_exec_id || |
792 | tsk->self_exec_id != tsk->parent_exec_id) | 788 | tsk->self_exec_id != tsk->parent_exec_id) |
793 | && !capable(CAP_KILL)) | 789 | && !capable(CAP_KILL)) |
794 | tsk->exit_signal = SIGCHLD; | 790 | tsk->exit_signal = SIGCHLD; |
795 | 791 | ||
@@ -986,7 +982,7 @@ NORET_TYPE void do_exit(long code) | |||
986 | module_put(tsk->binfmt->module); | 982 | module_put(tsk->binfmt->module); |
987 | 983 | ||
988 | proc_exit_connector(tsk); | 984 | proc_exit_connector(tsk); |
989 | exit_notify(tsk); | 985 | exit_notify(tsk, group_dead); |
990 | #ifdef CONFIG_NUMA | 986 | #ifdef CONFIG_NUMA |
991 | mpol_free(tsk->mempolicy); | 987 | mpol_free(tsk->mempolicy); |
992 | tsk->mempolicy = NULL; | 988 | tsk->mempolicy = NULL; |
@@ -1382,7 +1378,7 @@ unlock_sig: | |||
1382 | if (!retval && infop) | 1378 | if (!retval && infop) |
1383 | retval = put_user(0, &infop->si_errno); | 1379 | retval = put_user(0, &infop->si_errno); |
1384 | if (!retval && infop) | 1380 | if (!retval && infop) |
1385 | retval = put_user(why, &infop->si_code); | 1381 | retval = put_user((short)why, &infop->si_code); |
1386 | if (!retval && infop) | 1382 | if (!retval && infop) |
1387 | retval = put_user(exit_code, &infop->si_status); | 1383 | retval = put_user(exit_code, &infop->si_status); |
1388 | if (!retval && infop) | 1384 | if (!retval && infop) |
diff --git a/kernel/futex.c b/kernel/futex.c index 221f2128a437..06968cd79200 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -60,6 +60,8 @@ | |||
60 | 60 | ||
61 | #include "rtmutex_common.h" | 61 | #include "rtmutex_common.h" |
62 | 62 | ||
63 | int __read_mostly futex_cmpxchg_enabled; | ||
64 | |||
63 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) | 65 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) |
64 | 66 | ||
65 | /* | 67 | /* |
@@ -469,6 +471,8 @@ void exit_pi_state_list(struct task_struct *curr) | |||
469 | struct futex_hash_bucket *hb; | 471 | struct futex_hash_bucket *hb; |
470 | union futex_key key; | 472 | union futex_key key; |
471 | 473 | ||
474 | if (!futex_cmpxchg_enabled) | ||
475 | return; | ||
472 | /* | 476 | /* |
473 | * We are a ZOMBIE and nobody can enqueue itself on | 477 | * We are a ZOMBIE and nobody can enqueue itself on |
474 | * pi_state_list anymore, but we have to be careful | 478 | * pi_state_list anymore, but we have to be careful |
@@ -1870,6 +1874,8 @@ asmlinkage long | |||
1870 | sys_set_robust_list(struct robust_list_head __user *head, | 1874 | sys_set_robust_list(struct robust_list_head __user *head, |
1871 | size_t len) | 1875 | size_t len) |
1872 | { | 1876 | { |
1877 | if (!futex_cmpxchg_enabled) | ||
1878 | return -ENOSYS; | ||
1873 | /* | 1879 | /* |
1874 | * The kernel knows only one size for now: | 1880 | * The kernel knows only one size for now: |
1875 | */ | 1881 | */ |
@@ -1894,6 +1900,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, | |||
1894 | struct robust_list_head __user *head; | 1900 | struct robust_list_head __user *head; |
1895 | unsigned long ret; | 1901 | unsigned long ret; |
1896 | 1902 | ||
1903 | if (!futex_cmpxchg_enabled) | ||
1904 | return -ENOSYS; | ||
1905 | |||
1897 | if (!pid) | 1906 | if (!pid) |
1898 | head = current->robust_list; | 1907 | head = current->robust_list; |
1899 | else { | 1908 | else { |
@@ -1997,6 +2006,9 @@ void exit_robust_list(struct task_struct *curr) | |||
1997 | unsigned long futex_offset; | 2006 | unsigned long futex_offset; |
1998 | int rc; | 2007 | int rc; |
1999 | 2008 | ||
2009 | if (!futex_cmpxchg_enabled) | ||
2010 | return; | ||
2011 | |||
2000 | /* | 2012 | /* |
2001 | * Fetch the list head (which was registered earlier, via | 2013 | * Fetch the list head (which was registered earlier, via |
2002 | * sys_set_robust_list()): | 2014 | * sys_set_robust_list()): |
@@ -2051,7 +2063,7 @@ void exit_robust_list(struct task_struct *curr) | |||
2051 | long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | 2063 | long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, |
2052 | u32 __user *uaddr2, u32 val2, u32 val3) | 2064 | u32 __user *uaddr2, u32 val2, u32 val3) |
2053 | { | 2065 | { |
2054 | int ret; | 2066 | int ret = -ENOSYS; |
2055 | int cmd = op & FUTEX_CMD_MASK; | 2067 | int cmd = op & FUTEX_CMD_MASK; |
2056 | struct rw_semaphore *fshared = NULL; | 2068 | struct rw_semaphore *fshared = NULL; |
2057 | 2069 | ||
@@ -2083,13 +2095,16 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | |||
2083 | ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); | 2095 | ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); |
2084 | break; | 2096 | break; |
2085 | case FUTEX_LOCK_PI: | 2097 | case FUTEX_LOCK_PI: |
2086 | ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); | 2098 | if (futex_cmpxchg_enabled) |
2099 | ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); | ||
2087 | break; | 2100 | break; |
2088 | case FUTEX_UNLOCK_PI: | 2101 | case FUTEX_UNLOCK_PI: |
2089 | ret = futex_unlock_pi(uaddr, fshared); | 2102 | if (futex_cmpxchg_enabled) |
2103 | ret = futex_unlock_pi(uaddr, fshared); | ||
2090 | break; | 2104 | break; |
2091 | case FUTEX_TRYLOCK_PI: | 2105 | case FUTEX_TRYLOCK_PI: |
2092 | ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); | 2106 | if (futex_cmpxchg_enabled) |
2107 | ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); | ||
2093 | break; | 2108 | break; |
2094 | default: | 2109 | default: |
2095 | ret = -ENOSYS; | 2110 | ret = -ENOSYS; |
@@ -2145,8 +2160,29 @@ static struct file_system_type futex_fs_type = { | |||
2145 | 2160 | ||
2146 | static int __init init(void) | 2161 | static int __init init(void) |
2147 | { | 2162 | { |
2148 | int i = register_filesystem(&futex_fs_type); | 2163 | u32 curval; |
2164 | int i; | ||
2165 | |||
2166 | /* | ||
2167 | * This will fail and we want it. Some arch implementations do | ||
2168 | * runtime detection of the futex_atomic_cmpxchg_inatomic() | ||
2169 | * functionality. We want to know that before we call in any | ||
2170 | * of the complex code paths. Also we want to prevent | ||
2171 | * registration of robust lists in that case. NULL is | ||
2172 | * guaranteed to fault and we get -EFAULT on functional | ||
2173 | * implementation, the non functional ones will return | ||
2174 | * -ENOSYS. | ||
2175 | */ | ||
2176 | curval = cmpxchg_futex_value_locked(NULL, 0, 0); | ||
2177 | if (curval == -EFAULT) | ||
2178 | futex_cmpxchg_enabled = 1; | ||
2149 | 2179 | ||
2180 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { | ||
2181 | plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock); | ||
2182 | spin_lock_init(&futex_queues[i].lock); | ||
2183 | } | ||
2184 | |||
2185 | i = register_filesystem(&futex_fs_type); | ||
2150 | if (i) | 2186 | if (i) |
2151 | return i; | 2187 | return i; |
2152 | 2188 | ||
@@ -2156,10 +2192,6 @@ static int __init init(void) | |||
2156 | return PTR_ERR(futex_mnt); | 2192 | return PTR_ERR(futex_mnt); |
2157 | } | 2193 | } |
2158 | 2194 | ||
2159 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { | ||
2160 | plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock); | ||
2161 | spin_lock_init(&futex_queues[i].lock); | ||
2162 | } | ||
2163 | return 0; | 2195 | return 0; |
2164 | } | 2196 | } |
2165 | __initcall(init); | 2197 | __initcall(init); |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 7d5e4b016f39..ff90f049f8f6 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
@@ -54,6 +54,9 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
54 | compat_long_t futex_offset; | 54 | compat_long_t futex_offset; |
55 | int rc; | 55 | int rc; |
56 | 56 | ||
57 | if (!futex_cmpxchg_enabled) | ||
58 | return; | ||
59 | |||
57 | /* | 60 | /* |
58 | * Fetch the list head (which was registered earlier, via | 61 | * Fetch the list head (which was registered earlier, via |
59 | * sys_set_robust_list()): | 62 | * sys_set_robust_list()): |
@@ -115,6 +118,9 @@ asmlinkage long | |||
115 | compat_sys_set_robust_list(struct compat_robust_list_head __user *head, | 118 | compat_sys_set_robust_list(struct compat_robust_list_head __user *head, |
116 | compat_size_t len) | 119 | compat_size_t len) |
117 | { | 120 | { |
121 | if (!futex_cmpxchg_enabled) | ||
122 | return -ENOSYS; | ||
123 | |||
118 | if (unlikely(len != sizeof(*head))) | 124 | if (unlikely(len != sizeof(*head))) |
119 | return -EINVAL; | 125 | return -EINVAL; |
120 | 126 | ||
@@ -130,6 +136,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, | |||
130 | struct compat_robust_list_head __user *head; | 136 | struct compat_robust_list_head __user *head; |
131 | unsigned long ret; | 137 | unsigned long ret; |
132 | 138 | ||
139 | if (!futex_cmpxchg_enabled) | ||
140 | return -ENOSYS; | ||
141 | |||
133 | if (!pid) | 142 | if (!pid) |
134 | head = current->compat_robust_list; | 143 | head = current->compat_robust_list; |
135 | else { | 144 | else { |
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index cc54c6276356..fdb3fbe2b0c4 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -246,6 +246,17 @@ static unsigned int default_startup(unsigned int irq) | |||
246 | } | 246 | } |
247 | 247 | ||
248 | /* | 248 | /* |
249 | * default shutdown function | ||
250 | */ | ||
251 | static void default_shutdown(unsigned int irq) | ||
252 | { | ||
253 | struct irq_desc *desc = irq_desc + irq; | ||
254 | |||
255 | desc->chip->mask(irq); | ||
256 | desc->status |= IRQ_MASKED; | ||
257 | } | ||
258 | |||
259 | /* | ||
249 | * Fixup enable/disable function pointers | 260 | * Fixup enable/disable function pointers |
250 | */ | 261 | */ |
251 | void irq_chip_set_defaults(struct irq_chip *chip) | 262 | void irq_chip_set_defaults(struct irq_chip *chip) |
@@ -256,8 +267,15 @@ void irq_chip_set_defaults(struct irq_chip *chip) | |||
256 | chip->disable = default_disable; | 267 | chip->disable = default_disable; |
257 | if (!chip->startup) | 268 | if (!chip->startup) |
258 | chip->startup = default_startup; | 269 | chip->startup = default_startup; |
270 | /* | ||
271 | * We use chip->disable, when the user provided its own. When | ||
272 | * we have default_disable set for chip->disable, then we need | ||
273 | * to use default_shutdown, otherwise the irq line is not | ||
274 | * disabled on free_irq(): | ||
275 | */ | ||
259 | if (!chip->shutdown) | 276 | if (!chip->shutdown) |
260 | chip->shutdown = chip->disable; | 277 | chip->shutdown = chip->disable != default_disable ? |
278 | chip->disable : default_shutdown; | ||
261 | if (!chip->name) | 279 | if (!chip->name) |
262 | chip->name = chip->typename; | 280 | chip->name = chip->typename; |
263 | if (!chip->end) | 281 | if (!chip->end) |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index a6b2bc831dd0..088dabbf2d6a 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -6,6 +6,7 @@ | |||
6 | * This file contains spurious interrupt handling. | 6 | * This file contains spurious interrupt handling. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/jiffies.h> | ||
9 | #include <linux/irq.h> | 10 | #include <linux/irq.h> |
10 | #include <linux/module.h> | 11 | #include <linux/module.h> |
11 | #include <linux/kallsyms.h> | 12 | #include <linux/kallsyms.h> |
@@ -179,7 +180,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
179 | * otherwise the couter becomes a doomsday timer for otherwise | 180 | * otherwise the couter becomes a doomsday timer for otherwise |
180 | * working systems | 181 | * working systems |
181 | */ | 182 | */ |
182 | if (jiffies - desc->last_unhandled > HZ/10) | 183 | if (time_after(jiffies, desc->last_unhandled + HZ/10)) |
183 | desc->irqs_unhandled = 1; | 184 | desc->irqs_unhandled = 1; |
184 | else | 185 | else |
185 | desc->irqs_unhandled++; | 186 | desc->irqs_unhandled++; |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 7a86e6432338..fcfb580c3afc 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -498,27 +498,36 @@ static int __kprobes in_kprobes_functions(unsigned long addr) | |||
498 | return 0; | 498 | return 0; |
499 | } | 499 | } |
500 | 500 | ||
501 | /* | ||
502 | * If we have a symbol_name argument, look it up and add the offset field | ||
503 | * to it. This way, we can specify a relative address to a symbol. | ||
504 | */ | ||
505 | static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) | ||
506 | { | ||
507 | kprobe_opcode_t *addr = p->addr; | ||
508 | if (p->symbol_name) { | ||
509 | if (addr) | ||
510 | return NULL; | ||
511 | kprobe_lookup_name(p->symbol_name, addr); | ||
512 | } | ||
513 | |||
514 | if (!addr) | ||
515 | return NULL; | ||
516 | return (kprobe_opcode_t *)(((char *)addr) + p->offset); | ||
517 | } | ||
518 | |||
501 | static int __kprobes __register_kprobe(struct kprobe *p, | 519 | static int __kprobes __register_kprobe(struct kprobe *p, |
502 | unsigned long called_from) | 520 | unsigned long called_from) |
503 | { | 521 | { |
504 | int ret = 0; | 522 | int ret = 0; |
505 | struct kprobe *old_p; | 523 | struct kprobe *old_p; |
506 | struct module *probed_mod; | 524 | struct module *probed_mod; |
525 | kprobe_opcode_t *addr; | ||
507 | 526 | ||
508 | /* | 527 | addr = kprobe_addr(p); |
509 | * If we have a symbol_name argument look it up, | 528 | if (!addr) |
510 | * and add it to the address. That way the addr | ||
511 | * field can either be global or relative to a symbol. | ||
512 | */ | ||
513 | if (p->symbol_name) { | ||
514 | if (p->addr) | ||
515 | return -EINVAL; | ||
516 | kprobe_lookup_name(p->symbol_name, p->addr); | ||
517 | } | ||
518 | |||
519 | if (!p->addr) | ||
520 | return -EINVAL; | 529 | return -EINVAL; |
521 | p->addr = (kprobe_opcode_t *)(((char *)p->addr)+ p->offset); | 530 | p->addr = addr; |
522 | 531 | ||
523 | if (!kernel_text_address((unsigned long) p->addr) || | 532 | if (!kernel_text_address((unsigned long) p->addr) || |
524 | in_kprobes_functions((unsigned long) p->addr)) | 533 | in_kprobes_functions((unsigned long) p->addr)) |
@@ -678,8 +687,7 @@ void __kprobes unregister_jprobe(struct jprobe *jp) | |||
678 | unregister_kprobe(&jp->kp); | 687 | unregister_kprobe(&jp->kp); |
679 | } | 688 | } |
680 | 689 | ||
681 | #ifdef ARCH_SUPPORTS_KRETPROBES | 690 | #ifdef CONFIG_KRETPROBES |
682 | |||
683 | /* | 691 | /* |
684 | * This kprobe pre_handler is registered with every kretprobe. When probe | 692 | * This kprobe pre_handler is registered with every kretprobe. When probe |
685 | * hits it will set up the return probe. | 693 | * hits it will set up the return probe. |
@@ -722,12 +730,12 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
722 | int ret = 0; | 730 | int ret = 0; |
723 | struct kretprobe_instance *inst; | 731 | struct kretprobe_instance *inst; |
724 | int i; | 732 | int i; |
725 | void *addr = rp->kp.addr; | 733 | void *addr; |
726 | 734 | ||
727 | if (kretprobe_blacklist_size) { | 735 | if (kretprobe_blacklist_size) { |
728 | if (addr == NULL) | 736 | addr = kprobe_addr(&rp->kp); |
729 | kprobe_lookup_name(rp->kp.symbol_name, addr); | 737 | if (!addr) |
730 | addr += rp->kp.offset; | 738 | return -EINVAL; |
731 | 739 | ||
732 | for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { | 740 | for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { |
733 | if (kretprobe_blacklist[i].addr == addr) | 741 | if (kretprobe_blacklist[i].addr == addr) |
@@ -769,8 +777,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
769 | return ret; | 777 | return ret; |
770 | } | 778 | } |
771 | 779 | ||
772 | #else /* ARCH_SUPPORTS_KRETPROBES */ | 780 | #else /* CONFIG_KRETPROBES */ |
773 | |||
774 | int __kprobes register_kretprobe(struct kretprobe *rp) | 781 | int __kprobes register_kretprobe(struct kretprobe *rp) |
775 | { | 782 | { |
776 | return -ENOSYS; | 783 | return -ENOSYS; |
@@ -781,8 +788,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, | |||
781 | { | 788 | { |
782 | return 0; | 789 | return 0; |
783 | } | 790 | } |
784 | 791 | #endif /* CONFIG_KRETPROBES */ | |
785 | #endif /* ARCH_SUPPORTS_KRETPROBES */ | ||
786 | 792 | ||
787 | void __kprobes unregister_kretprobe(struct kretprobe *rp) | 793 | void __kprobes unregister_kretprobe(struct kretprobe *rp) |
788 | { | 794 | { |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 3574379f4d62..81a4e4a3f087 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -779,6 +779,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
779 | * parallel walking of the hash-list safe: | 779 | * parallel walking of the hash-list safe: |
780 | */ | 780 | */ |
781 | list_add_tail_rcu(&class->hash_entry, hash_head); | 781 | list_add_tail_rcu(&class->hash_entry, hash_head); |
782 | /* | ||
783 | * Add it to the global list of classes: | ||
784 | */ | ||
785 | list_add_tail_rcu(&class->lock_entry, &all_lock_classes); | ||
782 | 786 | ||
783 | if (verbose(class)) { | 787 | if (verbose(class)) { |
784 | graph_unlock(); | 788 | graph_unlock(); |
@@ -2282,10 +2286,6 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
2282 | return 0; | 2286 | return 0; |
2283 | break; | 2287 | break; |
2284 | case LOCK_USED: | 2288 | case LOCK_USED: |
2285 | /* | ||
2286 | * Add it to the global list of classes: | ||
2287 | */ | ||
2288 | list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); | ||
2289 | debug_atomic_dec(&nr_unused_locks); | 2289 | debug_atomic_dec(&nr_unused_locks); |
2290 | break; | 2290 | break; |
2291 | default: | 2291 | default: |
diff --git a/kernel/marker.c b/kernel/marker.c index c4c2cd8b61f5..48a4ea5afffd 100644 --- a/kernel/marker.c +++ b/kernel/marker.c | |||
@@ -61,8 +61,8 @@ struct marker_entry { | |||
61 | int refcount; /* Number of times armed. 0 if disarmed. */ | 61 | int refcount; /* Number of times armed. 0 if disarmed. */ |
62 | struct rcu_head rcu; | 62 | struct rcu_head rcu; |
63 | void *oldptr; | 63 | void *oldptr; |
64 | char rcu_pending:1; | 64 | unsigned char rcu_pending:1; |
65 | char ptype:1; | 65 | unsigned char ptype:1; |
66 | char name[0]; /* Contains name'\0'format'\0' */ | 66 | char name[0]; /* Contains name'\0'format'\0' */ |
67 | }; | 67 | }; |
68 | 68 | ||
@@ -698,14 +698,12 @@ int marker_probe_unregister(const char *name, | |||
698 | { | 698 | { |
699 | struct marker_entry *entry; | 699 | struct marker_entry *entry; |
700 | struct marker_probe_closure *old; | 700 | struct marker_probe_closure *old; |
701 | int ret = 0; | 701 | int ret = -ENOENT; |
702 | 702 | ||
703 | mutex_lock(&markers_mutex); | 703 | mutex_lock(&markers_mutex); |
704 | entry = get_marker(name); | 704 | entry = get_marker(name); |
705 | if (!entry) { | 705 | if (!entry) |
706 | ret = -ENOENT; | ||
707 | goto end; | 706 | goto end; |
708 | } | ||
709 | if (entry->rcu_pending) | 707 | if (entry->rcu_pending) |
710 | rcu_barrier(); | 708 | rcu_barrier(); |
711 | old = marker_entry_remove_probe(entry, probe, probe_private); | 709 | old = marker_entry_remove_probe(entry, probe, probe_private); |
@@ -713,12 +711,15 @@ int marker_probe_unregister(const char *name, | |||
713 | marker_update_probes(); /* may update entry */ | 711 | marker_update_probes(); /* may update entry */ |
714 | mutex_lock(&markers_mutex); | 712 | mutex_lock(&markers_mutex); |
715 | entry = get_marker(name); | 713 | entry = get_marker(name); |
714 | if (!entry) | ||
715 | goto end; | ||
716 | entry->oldptr = old; | 716 | entry->oldptr = old; |
717 | entry->rcu_pending = 1; | 717 | entry->rcu_pending = 1; |
718 | /* write rcu_pending before calling the RCU callback */ | 718 | /* write rcu_pending before calling the RCU callback */ |
719 | smp_wmb(); | 719 | smp_wmb(); |
720 | call_rcu(&entry->rcu, free_old_closure); | 720 | call_rcu(&entry->rcu, free_old_closure); |
721 | remove_marker(name); /* Ignore busy error message */ | 721 | remove_marker(name); /* Ignore busy error message */ |
722 | ret = 0; | ||
722 | end: | 723 | end: |
723 | mutex_unlock(&markers_mutex); | 724 | mutex_unlock(&markers_mutex); |
724 | return ret; | 725 | return ret; |
diff --git a/kernel/module.c b/kernel/module.c index 92595bad3812..be4807fb90e4 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -987,12 +987,11 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, | |||
987 | return ret; | 987 | return ret; |
988 | } | 988 | } |
989 | 989 | ||
990 | |||
991 | /* | 990 | /* |
992 | * /sys/module/foo/sections stuff | 991 | * /sys/module/foo/sections stuff |
993 | * J. Corbet <corbet@lwn.net> | 992 | * J. Corbet <corbet@lwn.net> |
994 | */ | 993 | */ |
995 | #ifdef CONFIG_KALLSYMS | 994 | #if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) |
996 | static ssize_t module_sect_show(struct module_attribute *mattr, | 995 | static ssize_t module_sect_show(struct module_attribute *mattr, |
997 | struct module *mod, char *buf) | 996 | struct module *mod, char *buf) |
998 | { | 997 | { |
@@ -1188,7 +1187,7 @@ static inline void add_notes_attrs(struct module *mod, unsigned int nsect, | |||
1188 | static inline void remove_notes_attrs(struct module *mod) | 1187 | static inline void remove_notes_attrs(struct module *mod) |
1189 | { | 1188 | { |
1190 | } | 1189 | } |
1191 | #endif /* CONFIG_KALLSYMS */ | 1190 | #endif |
1192 | 1191 | ||
1193 | #ifdef CONFIG_SYSFS | 1192 | #ifdef CONFIG_SYSFS |
1194 | int module_add_modinfo_attrs(struct module *mod) | 1193 | int module_add_modinfo_attrs(struct module *mod) |
@@ -1231,9 +1230,7 @@ void module_remove_modinfo_attrs(struct module *mod) | |||
1231 | } | 1230 | } |
1232 | kfree(mod->modinfo_attrs); | 1231 | kfree(mod->modinfo_attrs); |
1233 | } | 1232 | } |
1234 | #endif | ||
1235 | 1233 | ||
1236 | #ifdef CONFIG_SYSFS | ||
1237 | int mod_sysfs_init(struct module *mod) | 1234 | int mod_sysfs_init(struct module *mod) |
1238 | { | 1235 | { |
1239 | int err; | 1236 | int err; |
@@ -1936,8 +1933,15 @@ static struct module *load_module(void __user *umod, | |||
1936 | /* Set up license info based on the info section */ | 1933 | /* Set up license info based on the info section */ |
1937 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); | 1934 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); |
1938 | 1935 | ||
1936 | /* | ||
1937 | * ndiswrapper is under GPL by itself, but loads proprietary modules. | ||
1938 | * Don't use add_taint_module(), as it would prevent ndiswrapper from | ||
1939 | * using GPL-only symbols it needs. | ||
1940 | */ | ||
1939 | if (strcmp(mod->name, "ndiswrapper") == 0) | 1941 | if (strcmp(mod->name, "ndiswrapper") == 0) |
1940 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); | 1942 | add_taint(TAINT_PROPRIETARY_MODULE); |
1943 | |||
1944 | /* driverloader was caught wrongly pretending to be under GPL */ | ||
1941 | if (strcmp(mod->name, "driverloader") == 0) | 1945 | if (strcmp(mod->name, "driverloader") == 0) |
1942 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); | 1946 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); |
1943 | 1947 | ||
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 859a8e59773a..14a656cdc652 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -391,7 +391,7 @@ int hibernation_platform_enter(void) | |||
391 | goto Close; | 391 | goto Close; |
392 | 392 | ||
393 | suspend_console(); | 393 | suspend_console(); |
394 | error = device_suspend(PMSG_SUSPEND); | 394 | error = device_suspend(PMSG_HIBERNATE); |
395 | if (error) | 395 | if (error) |
396 | goto Resume_console; | 396 | goto Resume_console; |
397 | 397 | ||
@@ -404,7 +404,7 @@ int hibernation_platform_enter(void) | |||
404 | goto Finish; | 404 | goto Finish; |
405 | 405 | ||
406 | local_irq_disable(); | 406 | local_irq_disable(); |
407 | error = device_power_down(PMSG_SUSPEND); | 407 | error = device_power_down(PMSG_HIBERNATE); |
408 | if (!error) { | 408 | if (!error) { |
409 | hibernation_ops->enter(); | 409 | hibernation_ops->enter(); |
410 | /* We should never get here */ | 410 | /* We should never get here */ |
diff --git a/kernel/power/process.c b/kernel/power/process.c index 7c2118f9597f..f1d0b345c9ba 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -75,22 +75,15 @@ void refrigerator(void) | |||
75 | __set_current_state(save); | 75 | __set_current_state(save); |
76 | } | 76 | } |
77 | 77 | ||
78 | static void fake_signal_wake_up(struct task_struct *p, int resume) | 78 | static void fake_signal_wake_up(struct task_struct *p) |
79 | { | 79 | { |
80 | unsigned long flags; | 80 | unsigned long flags; |
81 | 81 | ||
82 | spin_lock_irqsave(&p->sighand->siglock, flags); | 82 | spin_lock_irqsave(&p->sighand->siglock, flags); |
83 | signal_wake_up(p, resume); | 83 | signal_wake_up(p, 0); |
84 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 84 | spin_unlock_irqrestore(&p->sighand->siglock, flags); |
85 | } | 85 | } |
86 | 86 | ||
87 | static void send_fake_signal(struct task_struct *p) | ||
88 | { | ||
89 | if (task_is_stopped(p)) | ||
90 | force_sig_specific(SIGSTOP, p); | ||
91 | fake_signal_wake_up(p, task_is_stopped(p)); | ||
92 | } | ||
93 | |||
94 | static int has_mm(struct task_struct *p) | 87 | static int has_mm(struct task_struct *p) |
95 | { | 88 | { |
96 | return (p->mm && !(p->flags & PF_BORROWED_MM)); | 89 | return (p->mm && !(p->flags & PF_BORROWED_MM)); |
@@ -121,7 +114,7 @@ static int freeze_task(struct task_struct *p, int with_mm_only) | |||
121 | if (freezing(p)) { | 114 | if (freezing(p)) { |
122 | if (has_mm(p)) { | 115 | if (has_mm(p)) { |
123 | if (!signal_pending(p)) | 116 | if (!signal_pending(p)) |
124 | fake_signal_wake_up(p, 0); | 117 | fake_signal_wake_up(p); |
125 | } else { | 118 | } else { |
126 | if (with_mm_only) | 119 | if (with_mm_only) |
127 | ret = 0; | 120 | ret = 0; |
@@ -135,7 +128,7 @@ static int freeze_task(struct task_struct *p, int with_mm_only) | |||
135 | } else { | 128 | } else { |
136 | if (has_mm(p)) { | 129 | if (has_mm(p)) { |
137 | set_freeze_flag(p); | 130 | set_freeze_flag(p); |
138 | send_fake_signal(p); | 131 | fake_signal_wake_up(p); |
139 | } else { | 132 | } else { |
140 | if (with_mm_only) { | 133 | if (with_mm_only) { |
141 | ret = 0; | 134 | ret = 0; |
@@ -182,15 +175,17 @@ static int try_to_freeze_tasks(int freeze_user_space) | |||
182 | if (frozen(p) || !freezeable(p)) | 175 | if (frozen(p) || !freezeable(p)) |
183 | continue; | 176 | continue; |
184 | 177 | ||
185 | if (task_is_traced(p) && frozen(p->parent)) { | ||
186 | cancel_freezing(p); | ||
187 | continue; | ||
188 | } | ||
189 | |||
190 | if (!freeze_task(p, freeze_user_space)) | 178 | if (!freeze_task(p, freeze_user_space)) |
191 | continue; | 179 | continue; |
192 | 180 | ||
193 | if (!freezer_should_skip(p)) | 181 | /* |
182 | * Now that we've done set_freeze_flag, don't | ||
183 | * perturb a task in TASK_STOPPED or TASK_TRACED. | ||
184 | * It is "frozen enough". If the task does wake | ||
185 | * up, it will immediately call try_to_freeze. | ||
186 | */ | ||
187 | if (!task_is_stopped_or_traced(p) && | ||
188 | !freezer_should_skip(p)) | ||
194 | todo++; | 189 | todo++; |
195 | } while_each_thread(g, p); | 190 | } while_each_thread(g, p); |
196 | read_unlock(&tasklist_lock); | 191 | read_unlock(&tasklist_lock); |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 95250d7c8d91..72a020cabb4c 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -875,8 +875,8 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } | |||
875 | #endif /* CONFIG_HIGHMEM */ | 875 | #endif /* CONFIG_HIGHMEM */ |
876 | 876 | ||
877 | /** | 877 | /** |
878 | * saveable - Determine whether a non-highmem page should be included in | 878 | * saveable_page - Determine whether a non-highmem page should be included |
879 | * the suspend image. | 879 | * in the suspend image. |
880 | * | 880 | * |
881 | * We should save the page if it isn't Nosave, and is not in the range | 881 | * We should save the page if it isn't Nosave, and is not in the range |
882 | * of pages statically defined as 'unsaveable', and it isn't a part of | 882 | * of pages statically defined as 'unsaveable', and it isn't a part of |
@@ -897,7 +897,8 @@ static struct page *saveable_page(unsigned long pfn) | |||
897 | if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) | 897 | if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) |
898 | return NULL; | 898 | return NULL; |
899 | 899 | ||
900 | if (PageReserved(page) && pfn_is_nosave(pfn)) | 900 | if (PageReserved(page) |
901 | && (!kernel_page_present(page) || pfn_is_nosave(pfn))) | ||
901 | return NULL; | 902 | return NULL; |
902 | 903 | ||
903 | return page; | 904 | return page; |
@@ -938,6 +939,25 @@ static inline void do_copy_page(long *dst, long *src) | |||
938 | *dst++ = *src++; | 939 | *dst++ = *src++; |
939 | } | 940 | } |
940 | 941 | ||
942 | |||
943 | /** | ||
944 | * safe_copy_page - check if the page we are going to copy is marked as | ||
945 | * present in the kernel page tables (this always is the case if | ||
946 | * CONFIG_DEBUG_PAGEALLOC is not set and in that case | ||
947 | * kernel_page_present() always returns 'true'). | ||
948 | */ | ||
949 | static void safe_copy_page(void *dst, struct page *s_page) | ||
950 | { | ||
951 | if (kernel_page_present(s_page)) { | ||
952 | do_copy_page(dst, page_address(s_page)); | ||
953 | } else { | ||
954 | kernel_map_pages(s_page, 1, 1); | ||
955 | do_copy_page(dst, page_address(s_page)); | ||
956 | kernel_map_pages(s_page, 1, 0); | ||
957 | } | ||
958 | } | ||
959 | |||
960 | |||
941 | #ifdef CONFIG_HIGHMEM | 961 | #ifdef CONFIG_HIGHMEM |
942 | static inline struct page * | 962 | static inline struct page * |
943 | page_is_saveable(struct zone *zone, unsigned long pfn) | 963 | page_is_saveable(struct zone *zone, unsigned long pfn) |
@@ -946,8 +966,7 @@ page_is_saveable(struct zone *zone, unsigned long pfn) | |||
946 | saveable_highmem_page(pfn) : saveable_page(pfn); | 966 | saveable_highmem_page(pfn) : saveable_page(pfn); |
947 | } | 967 | } |
948 | 968 | ||
949 | static inline void | 969 | static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) |
950 | copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | ||
951 | { | 970 | { |
952 | struct page *s_page, *d_page; | 971 | struct page *s_page, *d_page; |
953 | void *src, *dst; | 972 | void *src, *dst; |
@@ -961,29 +980,26 @@ copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | |||
961 | kunmap_atomic(src, KM_USER0); | 980 | kunmap_atomic(src, KM_USER0); |
962 | kunmap_atomic(dst, KM_USER1); | 981 | kunmap_atomic(dst, KM_USER1); |
963 | } else { | 982 | } else { |
964 | src = page_address(s_page); | ||
965 | if (PageHighMem(d_page)) { | 983 | if (PageHighMem(d_page)) { |
966 | /* Page pointed to by src may contain some kernel | 984 | /* Page pointed to by src may contain some kernel |
967 | * data modified by kmap_atomic() | 985 | * data modified by kmap_atomic() |
968 | */ | 986 | */ |
969 | do_copy_page(buffer, src); | 987 | safe_copy_page(buffer, s_page); |
970 | dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0); | 988 | dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0); |
971 | memcpy(dst, buffer, PAGE_SIZE); | 989 | memcpy(dst, buffer, PAGE_SIZE); |
972 | kunmap_atomic(dst, KM_USER0); | 990 | kunmap_atomic(dst, KM_USER0); |
973 | } else { | 991 | } else { |
974 | dst = page_address(d_page); | 992 | safe_copy_page(page_address(d_page), s_page); |
975 | do_copy_page(dst, src); | ||
976 | } | 993 | } |
977 | } | 994 | } |
978 | } | 995 | } |
979 | #else | 996 | #else |
980 | #define page_is_saveable(zone, pfn) saveable_page(pfn) | 997 | #define page_is_saveable(zone, pfn) saveable_page(pfn) |
981 | 998 | ||
982 | static inline void | 999 | static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) |
983 | copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | ||
984 | { | 1000 | { |
985 | do_copy_page(page_address(pfn_to_page(dst_pfn)), | 1001 | safe_copy_page(page_address(pfn_to_page(dst_pfn)), |
986 | page_address(pfn_to_page(src_pfn))); | 1002 | pfn_to_page(src_pfn)); |
987 | } | 1003 | } |
988 | #endif /* CONFIG_HIGHMEM */ | 1004 | #endif /* CONFIG_HIGHMEM */ |
989 | 1005 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index bee36100f110..9adc2a473e6e 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -666,7 +666,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
666 | } | 666 | } |
667 | /* Emit the output into the temporary buffer */ | 667 | /* Emit the output into the temporary buffer */ |
668 | printed_len += vscnprintf(printk_buf + printed_len, | 668 | printed_len += vscnprintf(printk_buf + printed_len, |
669 | sizeof(printk_buf), fmt, args); | 669 | sizeof(printk_buf) - printed_len, fmt, args); |
670 | 670 | ||
671 | /* | 671 | /* |
672 | * Copy the output into log_buf. If the caller didn't provide | 672 | * Copy the output into log_buf. If the caller didn't provide |
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 987cfb7ade89..e9517014b57c 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c | |||
@@ -23,6 +23,10 @@ | |||
23 | * to Suparna Bhattacharya for pushing me completely away | 23 | * to Suparna Bhattacharya for pushing me completely away |
24 | * from atomic instructions on the read side. | 24 | * from atomic instructions on the read side. |
25 | * | 25 | * |
26 | * - Added handling of Dynamic Ticks | ||
27 | * Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com> | ||
28 | * - Steven Rostedt <srostedt@redhat.com> | ||
29 | * | ||
26 | * Papers: http://www.rdrop.com/users/paulmck/RCU | 30 | * Papers: http://www.rdrop.com/users/paulmck/RCU |
27 | * | 31 | * |
28 | * Design Document: http://lwn.net/Articles/253651/ | 32 | * Design Document: http://lwn.net/Articles/253651/ |
@@ -409,6 +413,212 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp) | |||
409 | } | 413 | } |
410 | } | 414 | } |
411 | 415 | ||
416 | #ifdef CONFIG_NO_HZ | ||
417 | |||
418 | DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; | ||
419 | static DEFINE_PER_CPU(long, rcu_dyntick_snapshot); | ||
420 | static DEFINE_PER_CPU(int, rcu_update_flag); | ||
421 | |||
422 | /** | ||
423 | * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. | ||
424 | * | ||
425 | * If the CPU was idle with dynamic ticks active, this updates the | ||
426 | * dynticks_progress_counter to let the RCU handling know that the | ||
427 | * CPU is active. | ||
428 | */ | ||
429 | void rcu_irq_enter(void) | ||
430 | { | ||
431 | int cpu = smp_processor_id(); | ||
432 | |||
433 | if (per_cpu(rcu_update_flag, cpu)) | ||
434 | per_cpu(rcu_update_flag, cpu)++; | ||
435 | |||
436 | /* | ||
437 | * Only update if we are coming from a stopped ticks mode | ||
438 | * (dynticks_progress_counter is even). | ||
439 | */ | ||
440 | if (!in_interrupt() && | ||
441 | (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { | ||
442 | /* | ||
443 | * The following might seem like we could have a race | ||
444 | * with NMI/SMIs. But this really isn't a problem. | ||
445 | * Here we do a read/modify/write, and the race happens | ||
446 | * when an NMI/SMI comes in after the read and before | ||
447 | * the write. But NMI/SMIs will increment this counter | ||
448 | * twice before returning, so the zero bit will not | ||
449 | * be corrupted by the NMI/SMI which is the most important | ||
450 | * part. | ||
451 | * | ||
452 | * The only thing is that we would bring back the counter | ||
453 | * to a postion that it was in during the NMI/SMI. | ||
454 | * But the zero bit would be set, so the rest of the | ||
455 | * counter would again be ignored. | ||
456 | * | ||
457 | * On return from the IRQ, the counter may have the zero | ||
458 | * bit be 0 and the counter the same as the return from | ||
459 | * the NMI/SMI. If the state machine was so unlucky to | ||
460 | * see that, it still doesn't matter, since all | ||
461 | * RCU read-side critical sections on this CPU would | ||
462 | * have already completed. | ||
463 | */ | ||
464 | per_cpu(dynticks_progress_counter, cpu)++; | ||
465 | /* | ||
466 | * The following memory barrier ensures that any | ||
467 | * rcu_read_lock() primitives in the irq handler | ||
468 | * are seen by other CPUs to follow the above | ||
469 | * increment to dynticks_progress_counter. This is | ||
470 | * required in order for other CPUs to correctly | ||
471 | * determine when it is safe to advance the RCU | ||
472 | * grace-period state machine. | ||
473 | */ | ||
474 | smp_mb(); /* see above block comment. */ | ||
475 | /* | ||
476 | * Since we can't determine the dynamic tick mode from | ||
477 | * the dynticks_progress_counter after this routine, | ||
478 | * we use a second flag to acknowledge that we came | ||
479 | * from an idle state with ticks stopped. | ||
480 | */ | ||
481 | per_cpu(rcu_update_flag, cpu)++; | ||
482 | /* | ||
483 | * If we take an NMI/SMI now, they will also increment | ||
484 | * the rcu_update_flag, and will not update the | ||
485 | * dynticks_progress_counter on exit. That is for | ||
486 | * this IRQ to do. | ||
487 | */ | ||
488 | } | ||
489 | } | ||
490 | |||
491 | /** | ||
492 | * rcu_irq_exit - Called from exiting Hard irq context. | ||
493 | * | ||
494 | * If the CPU was idle with dynamic ticks active, update the | ||
495 | * dynticks_progress_counter to put let the RCU handling be | ||
496 | * aware that the CPU is going back to idle with no ticks. | ||
497 | */ | ||
498 | void rcu_irq_exit(void) | ||
499 | { | ||
500 | int cpu = smp_processor_id(); | ||
501 | |||
502 | /* | ||
503 | * rcu_update_flag is set if we interrupted the CPU | ||
504 | * when it was idle with ticks stopped. | ||
505 | * Once this occurs, we keep track of interrupt nesting | ||
506 | * because a NMI/SMI could also come in, and we still | ||
507 | * only want the IRQ that started the increment of the | ||
508 | * dynticks_progress_counter to be the one that modifies | ||
509 | * it on exit. | ||
510 | */ | ||
511 | if (per_cpu(rcu_update_flag, cpu)) { | ||
512 | if (--per_cpu(rcu_update_flag, cpu)) | ||
513 | return; | ||
514 | |||
515 | /* This must match the interrupt nesting */ | ||
516 | WARN_ON(in_interrupt()); | ||
517 | |||
518 | /* | ||
519 | * If an NMI/SMI happens now we are still | ||
520 | * protected by the dynticks_progress_counter being odd. | ||
521 | */ | ||
522 | |||
523 | /* | ||
524 | * The following memory barrier ensures that any | ||
525 | * rcu_read_unlock() primitives in the irq handler | ||
526 | * are seen by other CPUs to preceed the following | ||
527 | * increment to dynticks_progress_counter. This | ||
528 | * is required in order for other CPUs to determine | ||
529 | * when it is safe to advance the RCU grace-period | ||
530 | * state machine. | ||
531 | */ | ||
532 | smp_mb(); /* see above block comment. */ | ||
533 | per_cpu(dynticks_progress_counter, cpu)++; | ||
534 | WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); | ||
535 | } | ||
536 | } | ||
537 | |||
538 | static void dyntick_save_progress_counter(int cpu) | ||
539 | { | ||
540 | per_cpu(rcu_dyntick_snapshot, cpu) = | ||
541 | per_cpu(dynticks_progress_counter, cpu); | ||
542 | } | ||
543 | |||
544 | static inline int | ||
545 | rcu_try_flip_waitack_needed(int cpu) | ||
546 | { | ||
547 | long curr; | ||
548 | long snap; | ||
549 | |||
550 | curr = per_cpu(dynticks_progress_counter, cpu); | ||
551 | snap = per_cpu(rcu_dyntick_snapshot, cpu); | ||
552 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | ||
553 | |||
554 | /* | ||
555 | * If the CPU remained in dynticks mode for the entire time | ||
556 | * and didn't take any interrupts, NMIs, SMIs, or whatever, | ||
557 | * then it cannot be in the middle of an rcu_read_lock(), so | ||
558 | * the next rcu_read_lock() it executes must use the new value | ||
559 | * of the counter. So we can safely pretend that this CPU | ||
560 | * already acknowledged the counter. | ||
561 | */ | ||
562 | |||
563 | if ((curr == snap) && ((curr & 0x1) == 0)) | ||
564 | return 0; | ||
565 | |||
566 | /* | ||
567 | * If the CPU passed through or entered a dynticks idle phase with | ||
568 | * no active irq handlers, then, as above, we can safely pretend | ||
569 | * that this CPU already acknowledged the counter. | ||
570 | */ | ||
571 | |||
572 | if ((curr - snap) > 2 || (snap & 0x1) == 0) | ||
573 | return 0; | ||
574 | |||
575 | /* We need this CPU to explicitly acknowledge the counter flip. */ | ||
576 | |||
577 | return 1; | ||
578 | } | ||
579 | |||
580 | static inline int | ||
581 | rcu_try_flip_waitmb_needed(int cpu) | ||
582 | { | ||
583 | long curr; | ||
584 | long snap; | ||
585 | |||
586 | curr = per_cpu(dynticks_progress_counter, cpu); | ||
587 | snap = per_cpu(rcu_dyntick_snapshot, cpu); | ||
588 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | ||
589 | |||
590 | /* | ||
591 | * If the CPU remained in dynticks mode for the entire time | ||
592 | * and didn't take any interrupts, NMIs, SMIs, or whatever, | ||
593 | * then it cannot have executed an RCU read-side critical section | ||
594 | * during that time, so there is no need for it to execute a | ||
595 | * memory barrier. | ||
596 | */ | ||
597 | |||
598 | if ((curr == snap) && ((curr & 0x1) == 0)) | ||
599 | return 0; | ||
600 | |||
601 | /* | ||
602 | * If the CPU either entered or exited an outermost interrupt, | ||
603 | * SMI, NMI, or whatever handler, then we know that it executed | ||
604 | * a memory barrier when doing so. So we don't need another one. | ||
605 | */ | ||
606 | if (curr != snap) | ||
607 | return 0; | ||
608 | |||
609 | /* We need the CPU to execute a memory barrier. */ | ||
610 | |||
611 | return 1; | ||
612 | } | ||
613 | |||
614 | #else /* !CONFIG_NO_HZ */ | ||
615 | |||
616 | # define dyntick_save_progress_counter(cpu) do { } while (0) | ||
617 | # define rcu_try_flip_waitack_needed(cpu) (1) | ||
618 | # define rcu_try_flip_waitmb_needed(cpu) (1) | ||
619 | |||
620 | #endif /* CONFIG_NO_HZ */ | ||
621 | |||
412 | /* | 622 | /* |
413 | * Get here when RCU is idle. Decide whether we need to | 623 | * Get here when RCU is idle. Decide whether we need to |
414 | * move out of idle state, and return non-zero if so. | 624 | * move out of idle state, and return non-zero if so. |
@@ -447,8 +657,10 @@ rcu_try_flip_idle(void) | |||
447 | 657 | ||
448 | /* Now ask each CPU for acknowledgement of the flip. */ | 658 | /* Now ask each CPU for acknowledgement of the flip. */ |
449 | 659 | ||
450 | for_each_cpu_mask(cpu, rcu_cpu_online_map) | 660 | for_each_cpu_mask(cpu, rcu_cpu_online_map) { |
451 | per_cpu(rcu_flip_flag, cpu) = rcu_flipped; | 661 | per_cpu(rcu_flip_flag, cpu) = rcu_flipped; |
662 | dyntick_save_progress_counter(cpu); | ||
663 | } | ||
452 | 664 | ||
453 | return 1; | 665 | return 1; |
454 | } | 666 | } |
@@ -464,7 +676,8 @@ rcu_try_flip_waitack(void) | |||
464 | 676 | ||
465 | RCU_TRACE_ME(rcupreempt_trace_try_flip_a1); | 677 | RCU_TRACE_ME(rcupreempt_trace_try_flip_a1); |
466 | for_each_cpu_mask(cpu, rcu_cpu_online_map) | 678 | for_each_cpu_mask(cpu, rcu_cpu_online_map) |
467 | if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { | 679 | if (rcu_try_flip_waitack_needed(cpu) && |
680 | per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { | ||
468 | RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1); | 681 | RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1); |
469 | return 0; | 682 | return 0; |
470 | } | 683 | } |
@@ -509,8 +722,10 @@ rcu_try_flip_waitzero(void) | |||
509 | smp_mb(); /* ^^^^^^^^^^^^ */ | 722 | smp_mb(); /* ^^^^^^^^^^^^ */ |
510 | 723 | ||
511 | /* Call for a memory barrier from each CPU. */ | 724 | /* Call for a memory barrier from each CPU. */ |
512 | for_each_cpu_mask(cpu, rcu_cpu_online_map) | 725 | for_each_cpu_mask(cpu, rcu_cpu_online_map) { |
513 | per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed; | 726 | per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed; |
727 | dyntick_save_progress_counter(cpu); | ||
728 | } | ||
514 | 729 | ||
515 | RCU_TRACE_ME(rcupreempt_trace_try_flip_z2); | 730 | RCU_TRACE_ME(rcupreempt_trace_try_flip_z2); |
516 | return 1; | 731 | return 1; |
@@ -528,7 +743,8 @@ rcu_try_flip_waitmb(void) | |||
528 | 743 | ||
529 | RCU_TRACE_ME(rcupreempt_trace_try_flip_m1); | 744 | RCU_TRACE_ME(rcupreempt_trace_try_flip_m1); |
530 | for_each_cpu_mask(cpu, rcu_cpu_online_map) | 745 | for_each_cpu_mask(cpu, rcu_cpu_online_map) |
531 | if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { | 746 | if (rcu_try_flip_waitmb_needed(cpu) && |
747 | per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { | ||
532 | RCU_TRACE_ME(rcupreempt_trace_try_flip_me1); | 748 | RCU_TRACE_ME(rcupreempt_trace_try_flip_me1); |
533 | return 0; | 749 | return 0; |
534 | } | 750 | } |
@@ -702,8 +918,9 @@ void rcu_offline_cpu(int cpu) | |||
702 | * fix. | 918 | * fix. |
703 | */ | 919 | */ |
704 | 920 | ||
921 | local_irq_save(flags); | ||
705 | rdp = RCU_DATA_ME(); | 922 | rdp = RCU_DATA_ME(); |
706 | spin_lock_irqsave(&rdp->lock, flags); | 923 | spin_lock(&rdp->lock); |
707 | *rdp->nexttail = list; | 924 | *rdp->nexttail = list; |
708 | if (list) | 925 | if (list) |
709 | rdp->nexttail = tail; | 926 | rdp->nexttail = tail; |
@@ -735,9 +952,11 @@ static void rcu_process_callbacks(struct softirq_action *unused) | |||
735 | { | 952 | { |
736 | unsigned long flags; | 953 | unsigned long flags; |
737 | struct rcu_head *next, *list; | 954 | struct rcu_head *next, *list; |
738 | struct rcu_data *rdp = RCU_DATA_ME(); | 955 | struct rcu_data *rdp; |
739 | 956 | ||
740 | spin_lock_irqsave(&rdp->lock, flags); | 957 | local_irq_save(flags); |
958 | rdp = RCU_DATA_ME(); | ||
959 | spin_lock(&rdp->lock); | ||
741 | list = rdp->donelist; | 960 | list = rdp->donelist; |
742 | if (list == NULL) { | 961 | if (list == NULL) { |
743 | spin_unlock_irqrestore(&rdp->lock, flags); | 962 | spin_unlock_irqrestore(&rdp->lock, flags); |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index 16cbec2d5d60..efbfc0fc232f 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -113,6 +113,7 @@ ssize_t res_counter_write(struct res_counter *counter, int member, | |||
113 | 113 | ||
114 | ret = -EINVAL; | 114 | ret = -EINVAL; |
115 | 115 | ||
116 | strstrip(buf); | ||
116 | if (write_strategy) { | 117 | if (write_strategy) { |
117 | if (write_strategy(buf, &tmp)) { | 118 | if (write_strategy(buf, &tmp)) { |
118 | goto out_free; | 119 | goto out_free; |
diff --git a/kernel/sched.c b/kernel/sched.c index f28f19e65b59..b02e4fc25645 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -174,41 +174,6 @@ struct task_group { | |||
174 | struct sched_entity **se; | 174 | struct sched_entity **se; |
175 | /* runqueue "owned" by this group on each cpu */ | 175 | /* runqueue "owned" by this group on each cpu */ |
176 | struct cfs_rq **cfs_rq; | 176 | struct cfs_rq **cfs_rq; |
177 | |||
178 | /* | ||
179 | * shares assigned to a task group governs how much of cpu bandwidth | ||
180 | * is allocated to the group. The more shares a group has, the more is | ||
181 | * the cpu bandwidth allocated to it. | ||
182 | * | ||
183 | * For ex, lets say that there are three task groups, A, B and C which | ||
184 | * have been assigned shares 1000, 2000 and 3000 respectively. Then, | ||
185 | * cpu bandwidth allocated by the scheduler to task groups A, B and C | ||
186 | * should be: | ||
187 | * | ||
188 | * Bw(A) = 1000/(1000+2000+3000) * 100 = 16.66% | ||
189 | * Bw(B) = 2000/(1000+2000+3000) * 100 = 33.33% | ||
190 | * Bw(C) = 3000/(1000+2000+3000) * 100 = 50% | ||
191 | * | ||
192 | * The weight assigned to a task group's schedulable entities on every | ||
193 | * cpu (task_group.se[a_cpu]->load.weight) is derived from the task | ||
194 | * group's shares. For ex: lets say that task group A has been | ||
195 | * assigned shares of 1000 and there are two CPUs in a system. Then, | ||
196 | * | ||
197 | * tg_A->se[0]->load.weight = tg_A->se[1]->load.weight = 1000; | ||
198 | * | ||
199 | * Note: It's not necessary that each of a task's group schedulable | ||
200 | * entity have the same weight on all CPUs. If the group | ||
201 | * has 2 of its tasks on CPU0 and 1 task on CPU1, then a | ||
202 | * better distribution of weight could be: | ||
203 | * | ||
204 | * tg_A->se[0]->load.weight = 2/3 * 2000 = 1333 | ||
205 | * tg_A->se[1]->load.weight = 1/2 * 2000 = 667 | ||
206 | * | ||
207 | * rebalance_shares() is responsible for distributing the shares of a | ||
208 | * task groups like this among the group's schedulable entities across | ||
209 | * cpus. | ||
210 | * | ||
211 | */ | ||
212 | unsigned long shares; | 177 | unsigned long shares; |
213 | #endif | 178 | #endif |
214 | 179 | ||
@@ -250,22 +215,12 @@ static DEFINE_SPINLOCK(task_group_lock); | |||
250 | static DEFINE_MUTEX(doms_cur_mutex); | 215 | static DEFINE_MUTEX(doms_cur_mutex); |
251 | 216 | ||
252 | #ifdef CONFIG_FAIR_GROUP_SCHED | 217 | #ifdef CONFIG_FAIR_GROUP_SCHED |
253 | #ifdef CONFIG_SMP | ||
254 | /* kernel thread that runs rebalance_shares() periodically */ | ||
255 | static struct task_struct *lb_monitor_task; | ||
256 | static int load_balance_monitor(void *unused); | ||
257 | #endif | ||
258 | |||
259 | static void set_se_shares(struct sched_entity *se, unsigned long shares); | ||
260 | |||
261 | #ifdef CONFIG_USER_SCHED | 218 | #ifdef CONFIG_USER_SCHED |
262 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | 219 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) |
263 | #else | 220 | #else |
264 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | 221 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD |
265 | #endif | 222 | #endif |
266 | 223 | ||
267 | #define MIN_GROUP_SHARES 2 | ||
268 | |||
269 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; | 224 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; |
270 | #endif | 225 | #endif |
271 | 226 | ||
@@ -668,6 +623,8 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
668 | */ | 623 | */ |
669 | unsigned int sysctl_sched_rt_period = 1000000; | 624 | unsigned int sysctl_sched_rt_period = 1000000; |
670 | 625 | ||
626 | static __read_mostly int scheduler_running; | ||
627 | |||
671 | /* | 628 | /* |
672 | * part of the period that we allow rt tasks to run in us. | 629 | * part of the period that we allow rt tasks to run in us. |
673 | * default: 0.95s | 630 | * default: 0.95s |
@@ -689,14 +646,16 @@ unsigned long long cpu_clock(int cpu) | |||
689 | unsigned long flags; | 646 | unsigned long flags; |
690 | struct rq *rq; | 647 | struct rq *rq; |
691 | 648 | ||
692 | local_irq_save(flags); | ||
693 | rq = cpu_rq(cpu); | ||
694 | /* | 649 | /* |
695 | * Only call sched_clock() if the scheduler has already been | 650 | * Only call sched_clock() if the scheduler has already been |
696 | * initialized (some code might call cpu_clock() very early): | 651 | * initialized (some code might call cpu_clock() very early): |
697 | */ | 652 | */ |
698 | if (rq->idle) | 653 | if (unlikely(!scheduler_running)) |
699 | update_rq_clock(rq); | 654 | return 0; |
655 | |||
656 | local_irq_save(flags); | ||
657 | rq = cpu_rq(cpu); | ||
658 | update_rq_clock(rq); | ||
700 | now = rq->clock; | 659 | now = rq->clock; |
701 | local_irq_restore(flags); | 660 | local_irq_restore(flags); |
702 | 661 | ||
@@ -1241,16 +1200,6 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime); | |||
1241 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | 1200 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} |
1242 | #endif | 1201 | #endif |
1243 | 1202 | ||
1244 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) | ||
1245 | { | ||
1246 | update_load_add(&rq->load, load); | ||
1247 | } | ||
1248 | |||
1249 | static inline void dec_cpu_load(struct rq *rq, unsigned long load) | ||
1250 | { | ||
1251 | update_load_sub(&rq->load, load); | ||
1252 | } | ||
1253 | |||
1254 | #ifdef CONFIG_SMP | 1203 | #ifdef CONFIG_SMP |
1255 | static unsigned long source_load(int cpu, int type); | 1204 | static unsigned long source_load(int cpu, int type); |
1256 | static unsigned long target_load(int cpu, int type); | 1205 | static unsigned long target_load(int cpu, int type); |
@@ -1268,14 +1217,26 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | |||
1268 | 1217 | ||
1269 | #define sched_class_highest (&rt_sched_class) | 1218 | #define sched_class_highest (&rt_sched_class) |
1270 | 1219 | ||
1271 | static void inc_nr_running(struct rq *rq) | 1220 | static inline void inc_load(struct rq *rq, const struct task_struct *p) |
1221 | { | ||
1222 | update_load_add(&rq->load, p->se.load.weight); | ||
1223 | } | ||
1224 | |||
1225 | static inline void dec_load(struct rq *rq, const struct task_struct *p) | ||
1226 | { | ||
1227 | update_load_sub(&rq->load, p->se.load.weight); | ||
1228 | } | ||
1229 | |||
1230 | static void inc_nr_running(struct task_struct *p, struct rq *rq) | ||
1272 | { | 1231 | { |
1273 | rq->nr_running++; | 1232 | rq->nr_running++; |
1233 | inc_load(rq, p); | ||
1274 | } | 1234 | } |
1275 | 1235 | ||
1276 | static void dec_nr_running(struct rq *rq) | 1236 | static void dec_nr_running(struct task_struct *p, struct rq *rq) |
1277 | { | 1237 | { |
1278 | rq->nr_running--; | 1238 | rq->nr_running--; |
1239 | dec_load(rq, p); | ||
1279 | } | 1240 | } |
1280 | 1241 | ||
1281 | static void set_load_weight(struct task_struct *p) | 1242 | static void set_load_weight(struct task_struct *p) |
@@ -1367,7 +1328,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) | |||
1367 | rq->nr_uninterruptible--; | 1328 | rq->nr_uninterruptible--; |
1368 | 1329 | ||
1369 | enqueue_task(rq, p, wakeup); | 1330 | enqueue_task(rq, p, wakeup); |
1370 | inc_nr_running(rq); | 1331 | inc_nr_running(p, rq); |
1371 | } | 1332 | } |
1372 | 1333 | ||
1373 | /* | 1334 | /* |
@@ -1379,7 +1340,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) | |||
1379 | rq->nr_uninterruptible++; | 1340 | rq->nr_uninterruptible++; |
1380 | 1341 | ||
1381 | dequeue_task(rq, p, sleep); | 1342 | dequeue_task(rq, p, sleep); |
1382 | dec_nr_running(rq); | 1343 | dec_nr_running(p, rq); |
1383 | } | 1344 | } |
1384 | 1345 | ||
1385 | /** | 1346 | /** |
@@ -1831,6 +1792,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
1831 | long old_state; | 1792 | long old_state; |
1832 | struct rq *rq; | 1793 | struct rq *rq; |
1833 | 1794 | ||
1795 | smp_wmb(); | ||
1834 | rq = task_rq_lock(p, &flags); | 1796 | rq = task_rq_lock(p, &flags); |
1835 | old_state = p->state; | 1797 | old_state = p->state; |
1836 | if (!(old_state & state)) | 1798 | if (!(old_state & state)) |
@@ -2018,7 +1980,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2018 | * management (if any): | 1980 | * management (if any): |
2019 | */ | 1981 | */ |
2020 | p->sched_class->task_new(rq, p); | 1982 | p->sched_class->task_new(rq, p); |
2021 | inc_nr_running(rq); | 1983 | inc_nr_running(p, rq); |
2022 | } | 1984 | } |
2023 | check_preempt_curr(rq, p); | 1985 | check_preempt_curr(rq, p); |
2024 | #ifdef CONFIG_SMP | 1986 | #ifdef CONFIG_SMP |
@@ -3766,7 +3728,7 @@ void scheduler_tick(void) | |||
3766 | 3728 | ||
3767 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) | 3729 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) |
3768 | 3730 | ||
3769 | void add_preempt_count(int val) | 3731 | void __kprobes add_preempt_count(int val) |
3770 | { | 3732 | { |
3771 | /* | 3733 | /* |
3772 | * Underflow? | 3734 | * Underflow? |
@@ -3782,7 +3744,7 @@ void add_preempt_count(int val) | |||
3782 | } | 3744 | } |
3783 | EXPORT_SYMBOL(add_preempt_count); | 3745 | EXPORT_SYMBOL(add_preempt_count); |
3784 | 3746 | ||
3785 | void sub_preempt_count(int val) | 3747 | void __kprobes sub_preempt_count(int val) |
3786 | { | 3748 | { |
3787 | /* | 3749 | /* |
3788 | * Underflow? | 3750 | * Underflow? |
@@ -3884,7 +3846,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev) | |||
3884 | asmlinkage void __sched schedule(void) | 3846 | asmlinkage void __sched schedule(void) |
3885 | { | 3847 | { |
3886 | struct task_struct *prev, *next; | 3848 | struct task_struct *prev, *next; |
3887 | long *switch_count; | 3849 | unsigned long *switch_count; |
3888 | struct rq *rq; | 3850 | struct rq *rq; |
3889 | int cpu; | 3851 | int cpu; |
3890 | 3852 | ||
@@ -4357,8 +4319,10 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4357 | goto out_unlock; | 4319 | goto out_unlock; |
4358 | } | 4320 | } |
4359 | on_rq = p->se.on_rq; | 4321 | on_rq = p->se.on_rq; |
4360 | if (on_rq) | 4322 | if (on_rq) { |
4361 | dequeue_task(rq, p, 0); | 4323 | dequeue_task(rq, p, 0); |
4324 | dec_load(rq, p); | ||
4325 | } | ||
4362 | 4326 | ||
4363 | p->static_prio = NICE_TO_PRIO(nice); | 4327 | p->static_prio = NICE_TO_PRIO(nice); |
4364 | set_load_weight(p); | 4328 | set_load_weight(p); |
@@ -4368,6 +4332,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4368 | 4332 | ||
4369 | if (on_rq) { | 4333 | if (on_rq) { |
4370 | enqueue_task(rq, p, 0); | 4334 | enqueue_task(rq, p, 0); |
4335 | inc_load(rq, p); | ||
4371 | /* | 4336 | /* |
4372 | * If the task increased its priority or is running and | 4337 | * If the task increased its priority or is running and |
4373 | * lowered its priority, then reschedule its CPU: | 4338 | * lowered its priority, then reschedule its CPU: |
@@ -4457,7 +4422,7 @@ int task_nice(const struct task_struct *p) | |||
4457 | { | 4422 | { |
4458 | return TASK_NICE(p); | 4423 | return TASK_NICE(p); |
4459 | } | 4424 | } |
4460 | EXPORT_SYMBOL_GPL(task_nice); | 4425 | EXPORT_SYMBOL(task_nice); |
4461 | 4426 | ||
4462 | /** | 4427 | /** |
4463 | * idle_cpu - is a given cpu idle currently? | 4428 | * idle_cpu - is a given cpu idle currently? |
@@ -5135,7 +5100,7 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) | |||
5135 | time_slice = 0; | 5100 | time_slice = 0; |
5136 | if (p->policy == SCHED_RR) { | 5101 | if (p->policy == SCHED_RR) { |
5137 | time_slice = DEF_TIMESLICE; | 5102 | time_slice = DEF_TIMESLICE; |
5138 | } else { | 5103 | } else if (p->policy != SCHED_FIFO) { |
5139 | struct sched_entity *se = &p->se; | 5104 | struct sched_entity *se = &p->se; |
5140 | unsigned long flags; | 5105 | unsigned long flags; |
5141 | struct rq *rq; | 5106 | struct rq *rq; |
@@ -5848,6 +5813,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5848 | /* Must be high prio: stop_machine expects to yield to it. */ | 5813 | /* Must be high prio: stop_machine expects to yield to it. */ |
5849 | rq = task_rq_lock(p, &flags); | 5814 | rq = task_rq_lock(p, &flags); |
5850 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); | 5815 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); |
5816 | |||
5817 | /* Update our root-domain */ | ||
5818 | if (rq->rd) { | ||
5819 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); | ||
5820 | cpu_set(cpu, rq->rd->online); | ||
5821 | } | ||
5822 | |||
5851 | task_rq_unlock(rq, &flags); | 5823 | task_rq_unlock(rq, &flags); |
5852 | cpu_rq(cpu)->migration_thread = p; | 5824 | cpu_rq(cpu)->migration_thread = p; |
5853 | break; | 5825 | break; |
@@ -5856,15 +5828,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5856 | case CPU_ONLINE_FROZEN: | 5828 | case CPU_ONLINE_FROZEN: |
5857 | /* Strictly unnecessary, as first user will wake it. */ | 5829 | /* Strictly unnecessary, as first user will wake it. */ |
5858 | wake_up_process(cpu_rq(cpu)->migration_thread); | 5830 | wake_up_process(cpu_rq(cpu)->migration_thread); |
5859 | |||
5860 | /* Update our root-domain */ | ||
5861 | rq = cpu_rq(cpu); | ||
5862 | spin_lock_irqsave(&rq->lock, flags); | ||
5863 | if (rq->rd) { | ||
5864 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); | ||
5865 | cpu_set(cpu, rq->rd->online); | ||
5866 | } | ||
5867 | spin_unlock_irqrestore(&rq->lock, flags); | ||
5868 | break; | 5831 | break; |
5869 | 5832 | ||
5870 | #ifdef CONFIG_HOTPLUG_CPU | 5833 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -6140,8 +6103,6 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
6140 | rq->rd = rd; | 6103 | rq->rd = rd; |
6141 | 6104 | ||
6142 | cpu_set(rq->cpu, rd->span); | 6105 | cpu_set(rq->cpu, rd->span); |
6143 | if (cpu_isset(rq->cpu, cpu_online_map)) | ||
6144 | cpu_set(rq->cpu, rd->online); | ||
6145 | 6106 | ||
6146 | for (class = sched_class_highest; class; class = class->next) { | 6107 | for (class = sched_class_highest; class; class = class->next) { |
6147 | if (class->join_domain) | 6108 | if (class->join_domain) |
@@ -7082,21 +7043,6 @@ void __init sched_init_smp(void) | |||
7082 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) | 7043 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) |
7083 | BUG(); | 7044 | BUG(); |
7084 | sched_init_granularity(); | 7045 | sched_init_granularity(); |
7085 | |||
7086 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
7087 | if (nr_cpu_ids == 1) | ||
7088 | return; | ||
7089 | |||
7090 | lb_monitor_task = kthread_create(load_balance_monitor, NULL, | ||
7091 | "group_balance"); | ||
7092 | if (!IS_ERR(lb_monitor_task)) { | ||
7093 | lb_monitor_task->flags |= PF_NOFREEZE; | ||
7094 | wake_up_process(lb_monitor_task); | ||
7095 | } else { | ||
7096 | printk(KERN_ERR "Could not create load balance monitor thread" | ||
7097 | "(error = %ld) \n", PTR_ERR(lb_monitor_task)); | ||
7098 | } | ||
7099 | #endif | ||
7100 | } | 7046 | } |
7101 | #else | 7047 | #else |
7102 | void __init sched_init_smp(void) | 7048 | void __init sched_init_smp(void) |
@@ -7283,6 +7229,8 @@ void __init sched_init(void) | |||
7283 | * During early bootup we pretend to be a normal task: | 7229 | * During early bootup we pretend to be a normal task: |
7284 | */ | 7230 | */ |
7285 | current->sched_class = &fair_sched_class; | 7231 | current->sched_class = &fair_sched_class; |
7232 | |||
7233 | scheduler_running = 1; | ||
7286 | } | 7234 | } |
7287 | 7235 | ||
7288 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | 7236 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP |
@@ -7417,157 +7365,6 @@ void set_curr_task(int cpu, struct task_struct *p) | |||
7417 | 7365 | ||
7418 | #ifdef CONFIG_GROUP_SCHED | 7366 | #ifdef CONFIG_GROUP_SCHED |
7419 | 7367 | ||
7420 | #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP | ||
7421 | /* | ||
7422 | * distribute shares of all task groups among their schedulable entities, | ||
7423 | * to reflect load distribution across cpus. | ||
7424 | */ | ||
7425 | static int rebalance_shares(struct sched_domain *sd, int this_cpu) | ||
7426 | { | ||
7427 | struct cfs_rq *cfs_rq; | ||
7428 | struct rq *rq = cpu_rq(this_cpu); | ||
7429 | cpumask_t sdspan = sd->span; | ||
7430 | int balanced = 1; | ||
7431 | |||
7432 | /* Walk thr' all the task groups that we have */ | ||
7433 | for_each_leaf_cfs_rq(rq, cfs_rq) { | ||
7434 | int i; | ||
7435 | unsigned long total_load = 0, total_shares; | ||
7436 | struct task_group *tg = cfs_rq->tg; | ||
7437 | |||
7438 | /* Gather total task load of this group across cpus */ | ||
7439 | for_each_cpu_mask(i, sdspan) | ||
7440 | total_load += tg->cfs_rq[i]->load.weight; | ||
7441 | |||
7442 | /* Nothing to do if this group has no load */ | ||
7443 | if (!total_load) | ||
7444 | continue; | ||
7445 | |||
7446 | /* | ||
7447 | * tg->shares represents the number of cpu shares the task group | ||
7448 | * is eligible to hold on a single cpu. On N cpus, it is | ||
7449 | * eligible to hold (N * tg->shares) number of cpu shares. | ||
7450 | */ | ||
7451 | total_shares = tg->shares * cpus_weight(sdspan); | ||
7452 | |||
7453 | /* | ||
7454 | * redistribute total_shares across cpus as per the task load | ||
7455 | * distribution. | ||
7456 | */ | ||
7457 | for_each_cpu_mask(i, sdspan) { | ||
7458 | unsigned long local_load, local_shares; | ||
7459 | |||
7460 | local_load = tg->cfs_rq[i]->load.weight; | ||
7461 | local_shares = (local_load * total_shares) / total_load; | ||
7462 | if (!local_shares) | ||
7463 | local_shares = MIN_GROUP_SHARES; | ||
7464 | if (local_shares == tg->se[i]->load.weight) | ||
7465 | continue; | ||
7466 | |||
7467 | spin_lock_irq(&cpu_rq(i)->lock); | ||
7468 | set_se_shares(tg->se[i], local_shares); | ||
7469 | spin_unlock_irq(&cpu_rq(i)->lock); | ||
7470 | balanced = 0; | ||
7471 | } | ||
7472 | } | ||
7473 | |||
7474 | return balanced; | ||
7475 | } | ||
7476 | |||
7477 | /* | ||
7478 | * How frequently should we rebalance_shares() across cpus? | ||
7479 | * | ||
7480 | * The more frequently we rebalance shares, the more accurate is the fairness | ||
7481 | * of cpu bandwidth distribution between task groups. However higher frequency | ||
7482 | * also implies increased scheduling overhead. | ||
7483 | * | ||
7484 | * sysctl_sched_min_bal_int_shares represents the minimum interval between | ||
7485 | * consecutive calls to rebalance_shares() in the same sched domain. | ||
7486 | * | ||
7487 | * sysctl_sched_max_bal_int_shares represents the maximum interval between | ||
7488 | * consecutive calls to rebalance_shares() in the same sched domain. | ||
7489 | * | ||
7490 | * These settings allows for the appropriate trade-off between accuracy of | ||
7491 | * fairness and the associated overhead. | ||
7492 | * | ||
7493 | */ | ||
7494 | |||
7495 | /* default: 8ms, units: milliseconds */ | ||
7496 | const_debug unsigned int sysctl_sched_min_bal_int_shares = 8; | ||
7497 | |||
7498 | /* default: 128ms, units: milliseconds */ | ||
7499 | const_debug unsigned int sysctl_sched_max_bal_int_shares = 128; | ||
7500 | |||
7501 | /* kernel thread that runs rebalance_shares() periodically */ | ||
7502 | static int load_balance_monitor(void *unused) | ||
7503 | { | ||
7504 | unsigned int timeout = sysctl_sched_min_bal_int_shares; | ||
7505 | struct sched_param schedparm; | ||
7506 | int ret; | ||
7507 | |||
7508 | /* | ||
7509 | * We don't want this thread's execution to be limited by the shares | ||
7510 | * assigned to default group (init_task_group). Hence make it run | ||
7511 | * as a SCHED_RR RT task at the lowest priority. | ||
7512 | */ | ||
7513 | schedparm.sched_priority = 1; | ||
7514 | ret = sched_setscheduler(current, SCHED_RR, &schedparm); | ||
7515 | if (ret) | ||
7516 | printk(KERN_ERR "Couldn't set SCHED_RR policy for load balance" | ||
7517 | " monitor thread (error = %d) \n", ret); | ||
7518 | |||
7519 | while (!kthread_should_stop()) { | ||
7520 | int i, cpu, balanced = 1; | ||
7521 | |||
7522 | /* Prevent cpus going down or coming up */ | ||
7523 | get_online_cpus(); | ||
7524 | /* lockout changes to doms_cur[] array */ | ||
7525 | lock_doms_cur(); | ||
7526 | /* | ||
7527 | * Enter a rcu read-side critical section to safely walk rq->sd | ||
7528 | * chain on various cpus and to walk task group list | ||
7529 | * (rq->leaf_cfs_rq_list) in rebalance_shares(). | ||
7530 | */ | ||
7531 | rcu_read_lock(); | ||
7532 | |||
7533 | for (i = 0; i < ndoms_cur; i++) { | ||
7534 | cpumask_t cpumap = doms_cur[i]; | ||
7535 | struct sched_domain *sd = NULL, *sd_prev = NULL; | ||
7536 | |||
7537 | cpu = first_cpu(cpumap); | ||
7538 | |||
7539 | /* Find the highest domain at which to balance shares */ | ||
7540 | for_each_domain(cpu, sd) { | ||
7541 | if (!(sd->flags & SD_LOAD_BALANCE)) | ||
7542 | continue; | ||
7543 | sd_prev = sd; | ||
7544 | } | ||
7545 | |||
7546 | sd = sd_prev; | ||
7547 | /* sd == NULL? No load balance reqd in this domain */ | ||
7548 | if (!sd) | ||
7549 | continue; | ||
7550 | |||
7551 | balanced &= rebalance_shares(sd, cpu); | ||
7552 | } | ||
7553 | |||
7554 | rcu_read_unlock(); | ||
7555 | |||
7556 | unlock_doms_cur(); | ||
7557 | put_online_cpus(); | ||
7558 | |||
7559 | if (!balanced) | ||
7560 | timeout = sysctl_sched_min_bal_int_shares; | ||
7561 | else if (timeout < sysctl_sched_max_bal_int_shares) | ||
7562 | timeout *= 2; | ||
7563 | |||
7564 | msleep_interruptible(timeout); | ||
7565 | } | ||
7566 | |||
7567 | return 0; | ||
7568 | } | ||
7569 | #endif /* CONFIG_SMP */ | ||
7570 | |||
7571 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7368 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7572 | static void free_fair_sched_group(struct task_group *tg) | 7369 | static void free_fair_sched_group(struct task_group *tg) |
7573 | { | 7370 | { |
@@ -7824,6 +7621,11 @@ void sched_move_task(struct task_struct *tsk) | |||
7824 | 7621 | ||
7825 | set_task_rq(tsk, task_cpu(tsk)); | 7622 | set_task_rq(tsk, task_cpu(tsk)); |
7826 | 7623 | ||
7624 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
7625 | if (tsk->sched_class->moved_group) | ||
7626 | tsk->sched_class->moved_group(tsk); | ||
7627 | #endif | ||
7628 | |||
7827 | if (on_rq) { | 7629 | if (on_rq) { |
7828 | if (unlikely(running)) | 7630 | if (unlikely(running)) |
7829 | tsk->sched_class->set_curr_task(rq); | 7631 | tsk->sched_class->set_curr_task(rq); |
@@ -7834,29 +7636,25 @@ void sched_move_task(struct task_struct *tsk) | |||
7834 | } | 7636 | } |
7835 | 7637 | ||
7836 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7638 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7837 | /* rq->lock to be locked by caller */ | ||
7838 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | 7639 | static void set_se_shares(struct sched_entity *se, unsigned long shares) |
7839 | { | 7640 | { |
7840 | struct cfs_rq *cfs_rq = se->cfs_rq; | 7641 | struct cfs_rq *cfs_rq = se->cfs_rq; |
7841 | struct rq *rq = cfs_rq->rq; | 7642 | struct rq *rq = cfs_rq->rq; |
7842 | int on_rq; | 7643 | int on_rq; |
7843 | 7644 | ||
7844 | if (!shares) | 7645 | spin_lock_irq(&rq->lock); |
7845 | shares = MIN_GROUP_SHARES; | ||
7846 | 7646 | ||
7847 | on_rq = se->on_rq; | 7647 | on_rq = se->on_rq; |
7848 | if (on_rq) { | 7648 | if (on_rq) |
7849 | dequeue_entity(cfs_rq, se, 0); | 7649 | dequeue_entity(cfs_rq, se, 0); |
7850 | dec_cpu_load(rq, se->load.weight); | ||
7851 | } | ||
7852 | 7650 | ||
7853 | se->load.weight = shares; | 7651 | se->load.weight = shares; |
7854 | se->load.inv_weight = div64_64((1ULL<<32), shares); | 7652 | se->load.inv_weight = div64_64((1ULL<<32), shares); |
7855 | 7653 | ||
7856 | if (on_rq) { | 7654 | if (on_rq) |
7857 | enqueue_entity(cfs_rq, se, 0); | 7655 | enqueue_entity(cfs_rq, se, 0); |
7858 | inc_cpu_load(rq, se->load.weight); | 7656 | |
7859 | } | 7657 | spin_unlock_irq(&rq->lock); |
7860 | } | 7658 | } |
7861 | 7659 | ||
7862 | static DEFINE_MUTEX(shares_mutex); | 7660 | static DEFINE_MUTEX(shares_mutex); |
@@ -7866,18 +7664,18 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
7866 | int i; | 7664 | int i; |
7867 | unsigned long flags; | 7665 | unsigned long flags; |
7868 | 7666 | ||
7667 | /* | ||
7668 | * A weight of 0 or 1 can cause arithmetics problems. | ||
7669 | * (The default weight is 1024 - so there's no practical | ||
7670 | * limitation from this.) | ||
7671 | */ | ||
7672 | if (shares < 2) | ||
7673 | shares = 2; | ||
7674 | |||
7869 | mutex_lock(&shares_mutex); | 7675 | mutex_lock(&shares_mutex); |
7870 | if (tg->shares == shares) | 7676 | if (tg->shares == shares) |
7871 | goto done; | 7677 | goto done; |
7872 | 7678 | ||
7873 | if (shares < MIN_GROUP_SHARES) | ||
7874 | shares = MIN_GROUP_SHARES; | ||
7875 | |||
7876 | /* | ||
7877 | * Prevent any load balance activity (rebalance_shares, | ||
7878 | * load_balance_fair) from referring to this group first, | ||
7879 | * by taking it off the rq->leaf_cfs_rq_list on each cpu. | ||
7880 | */ | ||
7881 | spin_lock_irqsave(&task_group_lock, flags); | 7679 | spin_lock_irqsave(&task_group_lock, flags); |
7882 | for_each_possible_cpu(i) | 7680 | for_each_possible_cpu(i) |
7883 | unregister_fair_sched_group(tg, i); | 7681 | unregister_fair_sched_group(tg, i); |
@@ -7891,11 +7689,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
7891 | * w/o tripping rebalance_share or load_balance_fair. | 7689 | * w/o tripping rebalance_share or load_balance_fair. |
7892 | */ | 7690 | */ |
7893 | tg->shares = shares; | 7691 | tg->shares = shares; |
7894 | for_each_possible_cpu(i) { | 7692 | for_each_possible_cpu(i) |
7895 | spin_lock_irq(&cpu_rq(i)->lock); | ||
7896 | set_se_shares(tg->se[i], shares); | 7693 | set_se_shares(tg->se[i], shares); |
7897 | spin_unlock_irq(&cpu_rq(i)->lock); | ||
7898 | } | ||
7899 | 7694 | ||
7900 | /* | 7695 | /* |
7901 | * Enable load balance activity on this group, by inserting it back on | 7696 | * Enable load balance activity on this group, by inserting it back on |
@@ -7927,9 +7722,7 @@ static unsigned long to_ratio(u64 period, u64 runtime) | |||
7927 | if (runtime == RUNTIME_INF) | 7722 | if (runtime == RUNTIME_INF) |
7928 | return 1ULL << 16; | 7723 | return 1ULL << 16; |
7929 | 7724 | ||
7930 | runtime *= (1ULL << 16); | 7725 | return div64_64(runtime << 16, period); |
7931 | div64_64(runtime, period); | ||
7932 | return runtime; | ||
7933 | } | 7726 | } |
7934 | 7727 | ||
7935 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | 7728 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) |
@@ -7953,25 +7746,40 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | |||
7953 | return total + to_ratio(period, runtime) < global_ratio; | 7746 | return total + to_ratio(period, runtime) < global_ratio; |
7954 | } | 7747 | } |
7955 | 7748 | ||
7749 | /* Must be called with tasklist_lock held */ | ||
7750 | static inline int tg_has_rt_tasks(struct task_group *tg) | ||
7751 | { | ||
7752 | struct task_struct *g, *p; | ||
7753 | do_each_thread(g, p) { | ||
7754 | if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg) | ||
7755 | return 1; | ||
7756 | } while_each_thread(g, p); | ||
7757 | return 0; | ||
7758 | } | ||
7759 | |||
7956 | int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) | 7760 | int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) |
7957 | { | 7761 | { |
7958 | u64 rt_runtime, rt_period; | 7762 | u64 rt_runtime, rt_period; |
7959 | int err = 0; | 7763 | int err = 0; |
7960 | 7764 | ||
7961 | rt_period = sysctl_sched_rt_period * NSEC_PER_USEC; | 7765 | rt_period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC; |
7962 | rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; | 7766 | rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; |
7963 | if (rt_runtime_us == -1) | 7767 | if (rt_runtime_us == -1) |
7964 | rt_runtime = rt_period; | 7768 | rt_runtime = RUNTIME_INF; |
7965 | 7769 | ||
7966 | mutex_lock(&rt_constraints_mutex); | 7770 | mutex_lock(&rt_constraints_mutex); |
7771 | read_lock(&tasklist_lock); | ||
7772 | if (rt_runtime_us == 0 && tg_has_rt_tasks(tg)) { | ||
7773 | err = -EBUSY; | ||
7774 | goto unlock; | ||
7775 | } | ||
7967 | if (!__rt_schedulable(tg, rt_period, rt_runtime)) { | 7776 | if (!__rt_schedulable(tg, rt_period, rt_runtime)) { |
7968 | err = -EINVAL; | 7777 | err = -EINVAL; |
7969 | goto unlock; | 7778 | goto unlock; |
7970 | } | 7779 | } |
7971 | if (rt_runtime_us == -1) | ||
7972 | rt_runtime = RUNTIME_INF; | ||
7973 | tg->rt_runtime = rt_runtime; | 7780 | tg->rt_runtime = rt_runtime; |
7974 | unlock: | 7781 | unlock: |
7782 | read_unlock(&tasklist_lock); | ||
7975 | mutex_unlock(&rt_constraints_mutex); | 7783 | mutex_unlock(&rt_constraints_mutex); |
7976 | 7784 | ||
7977 | return err; | 7785 | return err; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6c091d6e159d..e2a530515619 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -202,17 +202,12 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | |||
202 | 202 | ||
203 | static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | 203 | static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
204 | { | 204 | { |
205 | struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; | 205 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); |
206 | struct sched_entity *se = NULL; | ||
207 | struct rb_node *parent; | ||
208 | 206 | ||
209 | while (*link) { | 207 | if (!last) |
210 | parent = *link; | 208 | return NULL; |
211 | se = rb_entry(parent, struct sched_entity, run_node); | ||
212 | link = &parent->rb_right; | ||
213 | } | ||
214 | 209 | ||
215 | return se; | 210 | return rb_entry(last, struct sched_entity, run_node); |
216 | } | 211 | } |
217 | 212 | ||
218 | /************************************************************** | 213 | /************************************************************** |
@@ -732,8 +727,6 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) | |||
732 | return se->parent; | 727 | return se->parent; |
733 | } | 728 | } |
734 | 729 | ||
735 | #define GROUP_IMBALANCE_PCT 20 | ||
736 | |||
737 | #else /* CONFIG_FAIR_GROUP_SCHED */ | 730 | #else /* CONFIG_FAIR_GROUP_SCHED */ |
738 | 731 | ||
739 | #define for_each_sched_entity(se) \ | 732 | #define for_each_sched_entity(se) \ |
@@ -824,26 +817,15 @@ hrtick_start_fair(struct rq *rq, struct task_struct *p) | |||
824 | static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) | 817 | static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) |
825 | { | 818 | { |
826 | struct cfs_rq *cfs_rq; | 819 | struct cfs_rq *cfs_rq; |
827 | struct sched_entity *se = &p->se, | 820 | struct sched_entity *se = &p->se; |
828 | *topse = NULL; /* Highest schedulable entity */ | ||
829 | int incload = 1; | ||
830 | 821 | ||
831 | for_each_sched_entity(se) { | 822 | for_each_sched_entity(se) { |
832 | topse = se; | 823 | if (se->on_rq) |
833 | if (se->on_rq) { | ||
834 | incload = 0; | ||
835 | break; | 824 | break; |
836 | } | ||
837 | cfs_rq = cfs_rq_of(se); | 825 | cfs_rq = cfs_rq_of(se); |
838 | enqueue_entity(cfs_rq, se, wakeup); | 826 | enqueue_entity(cfs_rq, se, wakeup); |
839 | wakeup = 1; | 827 | wakeup = 1; |
840 | } | 828 | } |
841 | /* Increment cpu load if we just enqueued the first task of a group on | ||
842 | * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs | ||
843 | * at the highest grouping level. | ||
844 | */ | ||
845 | if (incload) | ||
846 | inc_cpu_load(rq, topse->load.weight); | ||
847 | 829 | ||
848 | hrtick_start_fair(rq, rq->curr); | 830 | hrtick_start_fair(rq, rq->curr); |
849 | } | 831 | } |
@@ -856,28 +838,16 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) | |||
856 | static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) | 838 | static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) |
857 | { | 839 | { |
858 | struct cfs_rq *cfs_rq; | 840 | struct cfs_rq *cfs_rq; |
859 | struct sched_entity *se = &p->se, | 841 | struct sched_entity *se = &p->se; |
860 | *topse = NULL; /* Highest schedulable entity */ | ||
861 | int decload = 1; | ||
862 | 842 | ||
863 | for_each_sched_entity(se) { | 843 | for_each_sched_entity(se) { |
864 | topse = se; | ||
865 | cfs_rq = cfs_rq_of(se); | 844 | cfs_rq = cfs_rq_of(se); |
866 | dequeue_entity(cfs_rq, se, sleep); | 845 | dequeue_entity(cfs_rq, se, sleep); |
867 | /* Don't dequeue parent if it has other entities besides us */ | 846 | /* Don't dequeue parent if it has other entities besides us */ |
868 | if (cfs_rq->load.weight) { | 847 | if (cfs_rq->load.weight) |
869 | if (parent_entity(se)) | ||
870 | decload = 0; | ||
871 | break; | 848 | break; |
872 | } | ||
873 | sleep = 1; | 849 | sleep = 1; |
874 | } | 850 | } |
875 | /* Decrement cpu load if we just dequeued the last task of a group on | ||
876 | * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs | ||
877 | * at the highest grouping level. | ||
878 | */ | ||
879 | if (decload) | ||
880 | dec_cpu_load(rq, topse->load.weight); | ||
881 | 851 | ||
882 | hrtick_start_fair(rq, rq->curr); | 852 | hrtick_start_fair(rq, rq->curr); |
883 | } | 853 | } |
@@ -1191,6 +1161,25 @@ static struct task_struct *load_balance_next_fair(void *arg) | |||
1191 | return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); | 1161 | return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); |
1192 | } | 1162 | } |
1193 | 1163 | ||
1164 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1165 | static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) | ||
1166 | { | ||
1167 | struct sched_entity *curr; | ||
1168 | struct task_struct *p; | ||
1169 | |||
1170 | if (!cfs_rq->nr_running || !first_fair(cfs_rq)) | ||
1171 | return MAX_PRIO; | ||
1172 | |||
1173 | curr = cfs_rq->curr; | ||
1174 | if (!curr) | ||
1175 | curr = __pick_next_entity(cfs_rq); | ||
1176 | |||
1177 | p = task_of(curr); | ||
1178 | |||
1179 | return p->prio; | ||
1180 | } | ||
1181 | #endif | ||
1182 | |||
1194 | static unsigned long | 1183 | static unsigned long |
1195 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1184 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
1196 | unsigned long max_load_move, | 1185 | unsigned long max_load_move, |
@@ -1200,45 +1189,28 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1200 | struct cfs_rq *busy_cfs_rq; | 1189 | struct cfs_rq *busy_cfs_rq; |
1201 | long rem_load_move = max_load_move; | 1190 | long rem_load_move = max_load_move; |
1202 | struct rq_iterator cfs_rq_iterator; | 1191 | struct rq_iterator cfs_rq_iterator; |
1203 | unsigned long load_moved; | ||
1204 | 1192 | ||
1205 | cfs_rq_iterator.start = load_balance_start_fair; | 1193 | cfs_rq_iterator.start = load_balance_start_fair; |
1206 | cfs_rq_iterator.next = load_balance_next_fair; | 1194 | cfs_rq_iterator.next = load_balance_next_fair; |
1207 | 1195 | ||
1208 | for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { | 1196 | for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { |
1209 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1197 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1210 | struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu]; | 1198 | struct cfs_rq *this_cfs_rq; |
1211 | unsigned long maxload, task_load, group_weight; | 1199 | long imbalance; |
1212 | unsigned long thisload, per_task_load; | 1200 | unsigned long maxload; |
1213 | struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu]; | ||
1214 | |||
1215 | task_load = busy_cfs_rq->load.weight; | ||
1216 | group_weight = se->load.weight; | ||
1217 | 1201 | ||
1218 | /* | 1202 | this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); |
1219 | * 'group_weight' is contributed by tasks of total weight | ||
1220 | * 'task_load'. To move 'rem_load_move' worth of weight only, | ||
1221 | * we need to move a maximum task load of: | ||
1222 | * | ||
1223 | * maxload = (remload / group_weight) * task_load; | ||
1224 | */ | ||
1225 | maxload = (rem_load_move * task_load) / group_weight; | ||
1226 | 1203 | ||
1227 | if (!maxload || !task_load) | 1204 | imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; |
1205 | /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ | ||
1206 | if (imbalance <= 0) | ||
1228 | continue; | 1207 | continue; |
1229 | 1208 | ||
1230 | per_task_load = task_load / busy_cfs_rq->nr_running; | 1209 | /* Don't pull more than imbalance/2 */ |
1231 | /* | 1210 | imbalance /= 2; |
1232 | * balance_tasks will try to forcibly move atleast one task if | 1211 | maxload = min(rem_load_move, imbalance); |
1233 | * possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if | ||
1234 | * maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load. | ||
1235 | */ | ||
1236 | if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load) | ||
1237 | continue; | ||
1238 | 1212 | ||
1239 | /* Disable priority-based load balance */ | 1213 | *this_best_prio = cfs_rq_best_prio(this_cfs_rq); |
1240 | *this_best_prio = 0; | ||
1241 | thisload = this_cfs_rq->load.weight; | ||
1242 | #else | 1214 | #else |
1243 | # define maxload rem_load_move | 1215 | # define maxload rem_load_move |
1244 | #endif | 1216 | #endif |
@@ -1247,33 +1219,11 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1247 | * load_balance_[start|next]_fair iterators | 1219 | * load_balance_[start|next]_fair iterators |
1248 | */ | 1220 | */ |
1249 | cfs_rq_iterator.arg = busy_cfs_rq; | 1221 | cfs_rq_iterator.arg = busy_cfs_rq; |
1250 | load_moved = balance_tasks(this_rq, this_cpu, busiest, | 1222 | rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, |
1251 | maxload, sd, idle, all_pinned, | 1223 | maxload, sd, idle, all_pinned, |
1252 | this_best_prio, | 1224 | this_best_prio, |
1253 | &cfs_rq_iterator); | 1225 | &cfs_rq_iterator); |
1254 | 1226 | ||
1255 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1256 | /* | ||
1257 | * load_moved holds the task load that was moved. The | ||
1258 | * effective (group) weight moved would be: | ||
1259 | * load_moved_eff = load_moved/task_load * group_weight; | ||
1260 | */ | ||
1261 | load_moved = (group_weight * load_moved) / task_load; | ||
1262 | |||
1263 | /* Adjust shares on both cpus to reflect load_moved */ | ||
1264 | group_weight -= load_moved; | ||
1265 | set_se_shares(se, group_weight); | ||
1266 | |||
1267 | se = busy_cfs_rq->tg->se[this_cpu]; | ||
1268 | if (!thisload) | ||
1269 | group_weight = load_moved; | ||
1270 | else | ||
1271 | group_weight = se->load.weight + load_moved; | ||
1272 | set_se_shares(se, group_weight); | ||
1273 | #endif | ||
1274 | |||
1275 | rem_load_move -= load_moved; | ||
1276 | |||
1277 | if (rem_load_move <= 0) | 1227 | if (rem_load_move <= 0) |
1278 | break; | 1228 | break; |
1279 | } | 1229 | } |
@@ -1403,6 +1353,16 @@ static void set_curr_task_fair(struct rq *rq) | |||
1403 | set_next_entity(cfs_rq_of(se), se); | 1353 | set_next_entity(cfs_rq_of(se), se); |
1404 | } | 1354 | } |
1405 | 1355 | ||
1356 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1357 | static void moved_group_fair(struct task_struct *p) | ||
1358 | { | ||
1359 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | ||
1360 | |||
1361 | update_curr(cfs_rq); | ||
1362 | place_entity(cfs_rq, &p->se, 1); | ||
1363 | } | ||
1364 | #endif | ||
1365 | |||
1406 | /* | 1366 | /* |
1407 | * All the scheduling class methods: | 1367 | * All the scheduling class methods: |
1408 | */ | 1368 | */ |
@@ -1431,6 +1391,10 @@ static const struct sched_class fair_sched_class = { | |||
1431 | 1391 | ||
1432 | .prio_changed = prio_changed_fair, | 1392 | .prio_changed = prio_changed_fair, |
1433 | .switched_to = switched_to_fair, | 1393 | .switched_to = switched_to_fair, |
1394 | |||
1395 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1396 | .moved_group = moved_group_fair, | ||
1397 | #endif | ||
1434 | }; | 1398 | }; |
1435 | 1399 | ||
1436 | #ifdef CONFIG_SCHED_DEBUG | 1400 | #ifdef CONFIG_SCHED_DEBUG |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index f54792b175b2..0a6d2e516420 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -393,8 +393,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | |||
393 | */ | 393 | */ |
394 | for_each_sched_rt_entity(rt_se) | 394 | for_each_sched_rt_entity(rt_se) |
395 | enqueue_rt_entity(rt_se); | 395 | enqueue_rt_entity(rt_se); |
396 | |||
397 | inc_cpu_load(rq, p->se.load.weight); | ||
398 | } | 396 | } |
399 | 397 | ||
400 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | 398 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) |
@@ -414,8 +412,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | |||
414 | if (rt_rq && rt_rq->rt_nr_running) | 412 | if (rt_rq && rt_rq->rt_nr_running) |
415 | enqueue_rt_entity(rt_se); | 413 | enqueue_rt_entity(rt_se); |
416 | } | 414 | } |
417 | |||
418 | dec_cpu_load(rq, p->se.load.weight); | ||
419 | } | 415 | } |
420 | 416 | ||
421 | /* | 417 | /* |
@@ -1111,9 +1107,11 @@ static void prio_changed_rt(struct rq *rq, struct task_struct *p, | |||
1111 | pull_rt_task(rq); | 1107 | pull_rt_task(rq); |
1112 | /* | 1108 | /* |
1113 | * If there's a higher priority task waiting to run | 1109 | * If there's a higher priority task waiting to run |
1114 | * then reschedule. | 1110 | * then reschedule. Note, the above pull_rt_task |
1111 | * can release the rq lock and p could migrate. | ||
1112 | * Only reschedule if p is still on the same runqueue. | ||
1115 | */ | 1113 | */ |
1116 | if (p->prio > rq->rt.highest_prio) | 1114 | if (p->prio > rq->rt.highest_prio && rq->curr == p) |
1117 | resched_task(p); | 1115 | resched_task(p); |
1118 | #else | 1116 | #else |
1119 | /* For UP simply resched on drop of prio */ | 1117 | /* For UP simply resched on drop of prio */ |
diff --git a/kernel/signal.c b/kernel/signal.c index 84917fe507f7..6af1210092c3 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1623,7 +1623,6 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) | |||
1623 | /* Let the debugger run. */ | 1623 | /* Let the debugger run. */ |
1624 | __set_current_state(TASK_TRACED); | 1624 | __set_current_state(TASK_TRACED); |
1625 | spin_unlock_irq(¤t->sighand->siglock); | 1625 | spin_unlock_irq(¤t->sighand->siglock); |
1626 | try_to_freeze(); | ||
1627 | read_lock(&tasklist_lock); | 1626 | read_lock(&tasklist_lock); |
1628 | if (!unlikely(killed) && may_ptrace_stop()) { | 1627 | if (!unlikely(killed) && may_ptrace_stop()) { |
1629 | do_notify_parent_cldstop(current, CLD_TRAPPED); | 1628 | do_notify_parent_cldstop(current, CLD_TRAPPED); |
@@ -1641,6 +1640,13 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) | |||
1641 | } | 1640 | } |
1642 | 1641 | ||
1643 | /* | 1642 | /* |
1643 | * While in TASK_TRACED, we were considered "frozen enough". | ||
1644 | * Now that we woke up, it's crucial if we're supposed to be | ||
1645 | * frozen that we freeze now before running anything substantial. | ||
1646 | */ | ||
1647 | try_to_freeze(); | ||
1648 | |||
1649 | /* | ||
1644 | * We are back. Now reacquire the siglock before touching | 1650 | * We are back. Now reacquire the siglock before touching |
1645 | * last_siginfo, so that we are sure to have synchronized with | 1651 | * last_siginfo, so that we are sure to have synchronized with |
1646 | * any signal-sending on another CPU that wants to examine it. | 1652 | * any signal-sending on another CPU that wants to examine it. |
@@ -1757,9 +1763,15 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, | |||
1757 | sigset_t *mask = ¤t->blocked; | 1763 | sigset_t *mask = ¤t->blocked; |
1758 | int signr = 0; | 1764 | int signr = 0; |
1759 | 1765 | ||
1766 | relock: | ||
1767 | /* | ||
1768 | * We'll jump back here after any time we were stopped in TASK_STOPPED. | ||
1769 | * While in TASK_STOPPED, we were considered "frozen enough". | ||
1770 | * Now that we woke up, it's crucial if we're supposed to be | ||
1771 | * frozen that we freeze now before running anything substantial. | ||
1772 | */ | ||
1760 | try_to_freeze(); | 1773 | try_to_freeze(); |
1761 | 1774 | ||
1762 | relock: | ||
1763 | spin_lock_irq(¤t->sighand->siglock); | 1775 | spin_lock_irq(¤t->sighand->siglock); |
1764 | for (;;) { | 1776 | for (;;) { |
1765 | struct k_sigaction *ka; | 1777 | struct k_sigaction *ka; |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 5b3aea5f471e..31e9f2a47928 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -313,6 +313,7 @@ void irq_exit(void) | |||
313 | /* Make sure that timer wheel updates are propagated */ | 313 | /* Make sure that timer wheel updates are propagated */ |
314 | if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) | 314 | if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) |
315 | tick_nohz_stop_sched_tick(); | 315 | tick_nohz_stop_sched_tick(); |
316 | rcu_irq_exit(); | ||
316 | #endif | 317 | #endif |
317 | preempt_enable_no_resched(); | 318 | preempt_enable_no_resched(); |
318 | } | 319 | } |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 7c2da88db4ed..01b6522fd92b 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -216,26 +216,27 @@ static int watchdog(void *__bind_cpu) | |||
216 | /* initialize timestamp */ | 216 | /* initialize timestamp */ |
217 | touch_softlockup_watchdog(); | 217 | touch_softlockup_watchdog(); |
218 | 218 | ||
219 | set_current_state(TASK_INTERRUPTIBLE); | ||
219 | /* | 220 | /* |
220 | * Run briefly once per second to reset the softlockup timestamp. | 221 | * Run briefly once per second to reset the softlockup timestamp. |
221 | * If this gets delayed for more than 60 seconds then the | 222 | * If this gets delayed for more than 60 seconds then the |
222 | * debug-printout triggers in softlockup_tick(). | 223 | * debug-printout triggers in softlockup_tick(). |
223 | */ | 224 | */ |
224 | while (!kthread_should_stop()) { | 225 | while (!kthread_should_stop()) { |
225 | set_current_state(TASK_INTERRUPTIBLE); | ||
226 | touch_softlockup_watchdog(); | 226 | touch_softlockup_watchdog(); |
227 | schedule(); | 227 | schedule(); |
228 | 228 | ||
229 | if (kthread_should_stop()) | 229 | if (kthread_should_stop()) |
230 | break; | 230 | break; |
231 | 231 | ||
232 | if (this_cpu != check_cpu) | 232 | if (this_cpu == check_cpu) { |
233 | continue; | 233 | if (sysctl_hung_task_timeout_secs) |
234 | 234 | check_hung_uninterruptible_tasks(this_cpu); | |
235 | if (sysctl_hung_task_timeout_secs) | 235 | } |
236 | check_hung_uninterruptible_tasks(this_cpu); | ||
237 | 236 | ||
237 | set_current_state(TASK_INTERRUPTIBLE); | ||
238 | } | 238 | } |
239 | __set_current_state(TASK_RUNNING); | ||
239 | 240 | ||
240 | return 0; | 241 | return 0; |
241 | } | 242 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8b7e95411795..b2a2d6889bab 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -311,24 +311,6 @@ static struct ctl_table kern_table[] = { | |||
311 | .mode = 0644, | 311 | .mode = 0644, |
312 | .proc_handler = &proc_dointvec, | 312 | .proc_handler = &proc_dointvec, |
313 | }, | 313 | }, |
314 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) | ||
315 | { | ||
316 | .ctl_name = CTL_UNNUMBERED, | ||
317 | .procname = "sched_min_bal_int_shares", | ||
318 | .data = &sysctl_sched_min_bal_int_shares, | ||
319 | .maxlen = sizeof(unsigned int), | ||
320 | .mode = 0644, | ||
321 | .proc_handler = &proc_dointvec, | ||
322 | }, | ||
323 | { | ||
324 | .ctl_name = CTL_UNNUMBERED, | ||
325 | .procname = "sched_max_bal_int_shares", | ||
326 | .data = &sysctl_sched_max_bal_int_shares, | ||
327 | .maxlen = sizeof(unsigned int), | ||
328 | .mode = 0644, | ||
329 | .proc_handler = &proc_dointvec, | ||
330 | }, | ||
331 | #endif | ||
332 | #endif | 314 | #endif |
333 | { | 315 | { |
334 | .ctl_name = CTL_UNNUMBERED, | 316 | .ctl_name = CTL_UNNUMBERED, |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index fa9bb73dbdb4..2968298f8f36 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -282,6 +282,7 @@ void tick_nohz_stop_sched_tick(void) | |||
282 | ts->idle_tick = ts->sched_timer.expires; | 282 | ts->idle_tick = ts->sched_timer.expires; |
283 | ts->tick_stopped = 1; | 283 | ts->tick_stopped = 1; |
284 | ts->idle_jiffies = last_jiffies; | 284 | ts->idle_jiffies = last_jiffies; |
285 | rcu_enter_nohz(); | ||
285 | } | 286 | } |
286 | 287 | ||
287 | /* | 288 | /* |
@@ -375,6 +376,8 @@ void tick_nohz_restart_sched_tick(void) | |||
375 | return; | 376 | return; |
376 | } | 377 | } |
377 | 378 | ||
379 | rcu_exit_nohz(); | ||
380 | |||
378 | /* Update jiffies first */ | 381 | /* Update jiffies first */ |
379 | select_nohz_load_balancer(0); | 382 | select_nohz_load_balancer(0); |
380 | now = ktime_get(); | 383 | now = ktime_get(); |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index d3d94c1a0fd2..67fe8fc21fb1 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -65,9 +65,9 @@ print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now) | |||
65 | SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); | 65 | SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); |
66 | #endif | 66 | #endif |
67 | SEQ_printf(m, "\n"); | 67 | SEQ_printf(m, "\n"); |
68 | SEQ_printf(m, " # expires at %Lu nsecs [in %Lu nsecs]\n", | 68 | SEQ_printf(m, " # expires at %Lu nsecs [in %Ld nsecs]\n", |
69 | (unsigned long long)ktime_to_ns(timer->expires), | 69 | (unsigned long long)ktime_to_ns(timer->expires), |
70 | (unsigned long long)(ktime_to_ns(timer->expires) - now)); | 70 | (long long)(ktime_to_ns(timer->expires) - now)); |
71 | } | 71 | } |
72 | 72 | ||
73 | static void | 73 | static void |