diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/audit.c | 19 | ||||
| -rw-r--r-- | kernel/auditsc.c | 7 | ||||
| -rw-r--r-- | kernel/cgroup.c | 157 | ||||
| -rw-r--r-- | kernel/cpuset.c | 4 | ||||
| -rw-r--r-- | kernel/exit.c | 98 | ||||
| -rw-r--r-- | kernel/futex.c | 50 | ||||
| -rw-r--r-- | kernel/futex_compat.c | 9 | ||||
| -rw-r--r-- | kernel/irq/chip.c | 20 | ||||
| -rw-r--r-- | kernel/irq/spurious.c | 3 | ||||
| -rw-r--r-- | kernel/kprobes.c | 52 | ||||
| -rw-r--r-- | kernel/lockdep.c | 8 | ||||
| -rw-r--r-- | kernel/marker.c | 13 | ||||
| -rw-r--r-- | kernel/module.c | 16 | ||||
| -rw-r--r-- | kernel/power/disk.c | 4 | ||||
| -rw-r--r-- | kernel/power/process.c | 29 | ||||
| -rw-r--r-- | kernel/power/snapshot.c | 42 | ||||
| -rw-r--r-- | kernel/printk.c | 2 | ||||
| -rw-r--r-- | kernel/rcupreempt.c | 233 | ||||
| -rw-r--r-- | kernel/res_counter.c | 1 | ||||
| -rw-r--r-- | kernel/sched.c | 358 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 142 | ||||
| -rw-r--r-- | kernel/sched_rt.c | 10 | ||||
| -rw-r--r-- | kernel/signal.c | 16 | ||||
| -rw-r--r-- | kernel/softirq.c | 1 | ||||
| -rw-r--r-- | kernel/softlockup.c | 13 | ||||
| -rw-r--r-- | kernel/sysctl.c | 18 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 3 | ||||
| -rw-r--r-- | kernel/time/timer_list.c | 4 |
28 files changed, 713 insertions, 619 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 2eeea9a14240..10c4930c2bbf 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
| @@ -170,7 +170,9 @@ void audit_panic(const char *message) | |||
| 170 | printk(KERN_ERR "audit: %s\n", message); | 170 | printk(KERN_ERR "audit: %s\n", message); |
| 171 | break; | 171 | break; |
| 172 | case AUDIT_FAIL_PANIC: | 172 | case AUDIT_FAIL_PANIC: |
| 173 | panic("audit: %s\n", message); | 173 | /* test audit_pid since printk is always losey, why bother? */ |
| 174 | if (audit_pid) | ||
| 175 | panic("audit: %s\n", message); | ||
| 174 | break; | 176 | break; |
| 175 | } | 177 | } |
| 176 | } | 178 | } |
| @@ -352,6 +354,7 @@ static int kauditd_thread(void *dummy) | |||
| 352 | if (err < 0) { | 354 | if (err < 0) { |
| 353 | BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */ | 355 | BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */ |
| 354 | printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); | 356 | printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); |
| 357 | audit_log_lost("auditd dissapeared\n"); | ||
| 355 | audit_pid = 0; | 358 | audit_pid = 0; |
| 356 | } | 359 | } |
| 357 | } else { | 360 | } else { |
| @@ -1350,17 +1353,19 @@ void audit_log_end(struct audit_buffer *ab) | |||
| 1350 | if (!audit_rate_check()) { | 1353 | if (!audit_rate_check()) { |
| 1351 | audit_log_lost("rate limit exceeded"); | 1354 | audit_log_lost("rate limit exceeded"); |
| 1352 | } else { | 1355 | } else { |
| 1356 | struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); | ||
| 1353 | if (audit_pid) { | 1357 | if (audit_pid) { |
| 1354 | struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); | ||
| 1355 | nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); | 1358 | nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); |
| 1356 | skb_queue_tail(&audit_skb_queue, ab->skb); | 1359 | skb_queue_tail(&audit_skb_queue, ab->skb); |
| 1357 | ab->skb = NULL; | 1360 | ab->skb = NULL; |
| 1358 | wake_up_interruptible(&kauditd_wait); | 1361 | wake_up_interruptible(&kauditd_wait); |
| 1359 | } else if (printk_ratelimit()) { | 1362 | } else if (nlh->nlmsg_type != AUDIT_EOE) { |
| 1360 | struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); | 1363 | if (printk_ratelimit()) { |
| 1361 | printk(KERN_NOTICE "type=%d %s\n", nlh->nlmsg_type, ab->skb->data + NLMSG_SPACE(0)); | 1364 | printk(KERN_NOTICE "type=%d %s\n", |
| 1362 | } else { | 1365 | nlh->nlmsg_type, |
| 1363 | audit_log_lost("printk limit exceeded\n"); | 1366 | ab->skb->data + NLMSG_SPACE(0)); |
| 1367 | } else | ||
| 1368 | audit_log_lost("printk limit exceeded\n"); | ||
| 1364 | } | 1369 | } |
| 1365 | } | 1370 | } |
| 1366 | audit_buffer_free(ab); | 1371 | audit_buffer_free(ab); |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ac6d9b23b018..782262e4107d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
| @@ -1000,9 +1000,10 @@ static int audit_log_single_execve_arg(struct audit_context *context, | |||
| 1000 | * for strings that are too long, we should not have created | 1000 | * for strings that are too long, we should not have created |
| 1001 | * any. | 1001 | * any. |
| 1002 | */ | 1002 | */ |
| 1003 | if (unlikely((len = -1) || len > MAX_ARG_STRLEN - 1)) { | 1003 | if (unlikely((len == -1) || len > MAX_ARG_STRLEN - 1)) { |
| 1004 | WARN_ON(1); | 1004 | WARN_ON(1); |
| 1005 | send_sig(SIGKILL, current, 0); | 1005 | send_sig(SIGKILL, current, 0); |
| 1006 | return -1; | ||
| 1006 | } | 1007 | } |
| 1007 | 1008 | ||
| 1008 | /* walk the whole argument looking for non-ascii chars */ | 1009 | /* walk the whole argument looking for non-ascii chars */ |
| @@ -1020,6 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, | |||
| 1020 | if (ret) { | 1021 | if (ret) { |
| 1021 | WARN_ON(1); | 1022 | WARN_ON(1); |
| 1022 | send_sig(SIGKILL, current, 0); | 1023 | send_sig(SIGKILL, current, 0); |
| 1024 | return -1; | ||
| 1023 | } | 1025 | } |
| 1024 | buf[to_send] = '\0'; | 1026 | buf[to_send] = '\0'; |
| 1025 | has_cntl = audit_string_contains_control(buf, to_send); | 1027 | has_cntl = audit_string_contains_control(buf, to_send); |
| @@ -1068,7 +1070,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, | |||
| 1068 | * so we can be sure nothing was lost. | 1070 | * so we can be sure nothing was lost. |
| 1069 | */ | 1071 | */ |
| 1070 | if ((i == 0) && (too_long)) | 1072 | if ((i == 0) && (too_long)) |
| 1071 | audit_log_format(*ab, "a%d_len=%ld ", arg_num, | 1073 | audit_log_format(*ab, "a%d_len=%zu ", arg_num, |
| 1072 | has_cntl ? 2*len : len); | 1074 | has_cntl ? 2*len : len); |
| 1073 | 1075 | ||
| 1074 | /* | 1076 | /* |
| @@ -1083,6 +1085,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, | |||
| 1083 | if (ret) { | 1085 | if (ret) { |
| 1084 | WARN_ON(1); | 1086 | WARN_ON(1); |
| 1085 | send_sig(SIGKILL, current, 0); | 1087 | send_sig(SIGKILL, current, 0); |
| 1088 | return -1; | ||
| 1086 | } | 1089 | } |
| 1087 | buf[to_send] = '\0'; | 1090 | buf[to_send] = '\0'; |
| 1088 | 1091 | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4766bb65e4d9..e9c2fb01e89b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -113,9 +113,9 @@ static int root_count; | |||
| 113 | #define dummytop (&rootnode.top_cgroup) | 113 | #define dummytop (&rootnode.top_cgroup) |
| 114 | 114 | ||
| 115 | /* This flag indicates whether tasks in the fork and exit paths should | 115 | /* This flag indicates whether tasks in the fork and exit paths should |
| 116 | * take callback_mutex and check for fork/exit handlers to call. This | 116 | * check for fork/exit handlers to call. This avoids us having to do |
| 117 | * avoids us having to do extra work in the fork/exit path if none of the | 117 | * extra work in the fork/exit path if none of the subsystems need to |
| 118 | * subsystems need to be called. | 118 | * be called. |
| 119 | */ | 119 | */ |
| 120 | static int need_forkexit_callback; | 120 | static int need_forkexit_callback; |
| 121 | 121 | ||
| @@ -307,7 +307,6 @@ static inline void put_css_set_taskexit(struct css_set *cg) | |||
| 307 | * template: location in which to build the desired set of subsystem | 307 | * template: location in which to build the desired set of subsystem |
| 308 | * state objects for the new cgroup group | 308 | * state objects for the new cgroup group |
| 309 | */ | 309 | */ |
| 310 | |||
| 311 | static struct css_set *find_existing_css_set( | 310 | static struct css_set *find_existing_css_set( |
| 312 | struct css_set *oldcg, | 311 | struct css_set *oldcg, |
| 313 | struct cgroup *cgrp, | 312 | struct cgroup *cgrp, |
| @@ -320,7 +319,7 @@ static struct css_set *find_existing_css_set( | |||
| 320 | /* Built the set of subsystem state objects that we want to | 319 | /* Built the set of subsystem state objects that we want to |
| 321 | * see in the new css_set */ | 320 | * see in the new css_set */ |
| 322 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 321 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 323 | if (root->subsys_bits & (1ull << i)) { | 322 | if (root->subsys_bits & (1UL << i)) { |
| 324 | /* Subsystem is in this hierarchy. So we want | 323 | /* Subsystem is in this hierarchy. So we want |
| 325 | * the subsystem state from the new | 324 | * the subsystem state from the new |
| 326 | * cgroup */ | 325 | * cgroup */ |
| @@ -354,7 +353,6 @@ static struct css_set *find_existing_css_set( | |||
| 354 | * and chains them on tmp through their cgrp_link_list fields. Returns 0 on | 353 | * and chains them on tmp through their cgrp_link_list fields. Returns 0 on |
| 355 | * success or a negative error | 354 | * success or a negative error |
| 356 | */ | 355 | */ |
| 357 | |||
| 358 | static int allocate_cg_links(int count, struct list_head *tmp) | 356 | static int allocate_cg_links(int count, struct list_head *tmp) |
| 359 | { | 357 | { |
| 360 | struct cg_cgroup_link *link; | 358 | struct cg_cgroup_link *link; |
| @@ -396,7 +394,6 @@ static void free_cg_links(struct list_head *tmp) | |||
| 396 | * substituted into the appropriate hierarchy. Must be called with | 394 | * substituted into the appropriate hierarchy. Must be called with |
| 397 | * cgroup_mutex held | 395 | * cgroup_mutex held |
| 398 | */ | 396 | */ |
| 399 | |||
| 400 | static struct css_set *find_css_set( | 397 | static struct css_set *find_css_set( |
| 401 | struct css_set *oldcg, struct cgroup *cgrp) | 398 | struct css_set *oldcg, struct cgroup *cgrp) |
| 402 | { | 399 | { |
| @@ -473,7 +470,6 @@ static struct css_set *find_css_set( | |||
| 473 | /* Link this cgroup group into the list */ | 470 | /* Link this cgroup group into the list */ |
| 474 | list_add(&res->list, &init_css_set.list); | 471 | list_add(&res->list, &init_css_set.list); |
| 475 | css_set_count++; | 472 | css_set_count++; |
| 476 | INIT_LIST_HEAD(&res->tasks); | ||
| 477 | write_unlock(&css_set_lock); | 473 | write_unlock(&css_set_lock); |
| 478 | 474 | ||
| 479 | return res; | 475 | return res; |
| @@ -507,8 +503,8 @@ static struct css_set *find_css_set( | |||
| 507 | * critical pieces of code here. The exception occurs on cgroup_exit(), | 503 | * critical pieces of code here. The exception occurs on cgroup_exit(), |
| 508 | * when a task in a notify_on_release cgroup exits. Then cgroup_mutex | 504 | * when a task in a notify_on_release cgroup exits. Then cgroup_mutex |
| 509 | * is taken, and if the cgroup count is zero, a usermode call made | 505 | * is taken, and if the cgroup count is zero, a usermode call made |
| 510 | * to /sbin/cgroup_release_agent with the name of the cgroup (path | 506 | * to the release agent with the name of the cgroup (path relative to |
| 511 | * relative to the root of cgroup file system) as the argument. | 507 | * the root of cgroup file system) as the argument. |
| 512 | * | 508 | * |
| 513 | * A cgroup can only be deleted if both its 'count' of using tasks | 509 | * A cgroup can only be deleted if both its 'count' of using tasks |
| 514 | * is zero, and its list of 'children' cgroups is empty. Since all | 510 | * is zero, and its list of 'children' cgroups is empty. Since all |
| @@ -521,7 +517,7 @@ static struct css_set *find_css_set( | |||
| 521 | * | 517 | * |
| 522 | * The need for this exception arises from the action of | 518 | * The need for this exception arises from the action of |
| 523 | * cgroup_attach_task(), which overwrites one tasks cgroup pointer with | 519 | * cgroup_attach_task(), which overwrites one tasks cgroup pointer with |
| 524 | * another. It does so using cgroup_mutexe, however there are | 520 | * another. It does so using cgroup_mutex, however there are |
| 525 | * several performance critical places that need to reference | 521 | * several performance critical places that need to reference |
| 526 | * task->cgroup without the expense of grabbing a system global | 522 | * task->cgroup without the expense of grabbing a system global |
| 527 | * mutex. Therefore except as noted below, when dereferencing or, as | 523 | * mutex. Therefore except as noted below, when dereferencing or, as |
| @@ -537,7 +533,6 @@ static struct css_set *find_css_set( | |||
| 537 | * cgroup_lock - lock out any changes to cgroup structures | 533 | * cgroup_lock - lock out any changes to cgroup structures |
| 538 | * | 534 | * |
| 539 | */ | 535 | */ |
| 540 | |||
| 541 | void cgroup_lock(void) | 536 | void cgroup_lock(void) |
| 542 | { | 537 | { |
| 543 | mutex_lock(&cgroup_mutex); | 538 | mutex_lock(&cgroup_mutex); |
| @@ -548,7 +543,6 @@ void cgroup_lock(void) | |||
| 548 | * | 543 | * |
| 549 | * Undo the lock taken in a previous cgroup_lock() call. | 544 | * Undo the lock taken in a previous cgroup_lock() call. |
| 550 | */ | 545 | */ |
| 551 | |||
| 552 | void cgroup_unlock(void) | 546 | void cgroup_unlock(void) |
| 553 | { | 547 | { |
| 554 | mutex_unlock(&cgroup_mutex); | 548 | mutex_unlock(&cgroup_mutex); |
| @@ -590,7 +584,6 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | |||
| 590 | * Call subsys's pre_destroy handler. | 584 | * Call subsys's pre_destroy handler. |
| 591 | * This is called before css refcnt check. | 585 | * This is called before css refcnt check. |
| 592 | */ | 586 | */ |
| 593 | |||
| 594 | static void cgroup_call_pre_destroy(struct cgroup *cgrp) | 587 | static void cgroup_call_pre_destroy(struct cgroup *cgrp) |
| 595 | { | 588 | { |
| 596 | struct cgroup_subsys *ss; | 589 | struct cgroup_subsys *ss; |
| @@ -600,7 +593,6 @@ static void cgroup_call_pre_destroy(struct cgroup *cgrp) | |||
| 600 | return; | 593 | return; |
| 601 | } | 594 | } |
| 602 | 595 | ||
| 603 | |||
| 604 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 596 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) |
| 605 | { | 597 | { |
| 606 | /* is dentry a directory ? if so, kfree() associated cgroup */ | 598 | /* is dentry a directory ? if so, kfree() associated cgroup */ |
| @@ -696,7 +688,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
| 696 | added_bits = final_bits & ~root->actual_subsys_bits; | 688 | added_bits = final_bits & ~root->actual_subsys_bits; |
| 697 | /* Check that any added subsystems are currently free */ | 689 | /* Check that any added subsystems are currently free */ |
| 698 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 690 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 699 | unsigned long long bit = 1ull << i; | 691 | unsigned long bit = 1UL << i; |
| 700 | struct cgroup_subsys *ss = subsys[i]; | 692 | struct cgroup_subsys *ss = subsys[i]; |
| 701 | if (!(bit & added_bits)) | 693 | if (!(bit & added_bits)) |
| 702 | continue; | 694 | continue; |
| @@ -927,7 +919,6 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
| 927 | if (!inode) | 919 | if (!inode) |
| 928 | return -ENOMEM; | 920 | return -ENOMEM; |
| 929 | 921 | ||
| 930 | inode->i_op = &simple_dir_inode_operations; | ||
| 931 | inode->i_fop = &simple_dir_operations; | 922 | inode->i_fop = &simple_dir_operations; |
| 932 | inode->i_op = &cgroup_dir_inode_operations; | 923 | inode->i_op = &cgroup_dir_inode_operations; |
| 933 | /* directories start off with i_nlink == 2 (for "." entry) */ | 924 | /* directories start off with i_nlink == 2 (for "." entry) */ |
| @@ -961,8 +952,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
| 961 | } | 952 | } |
| 962 | 953 | ||
| 963 | root = kzalloc(sizeof(*root), GFP_KERNEL); | 954 | root = kzalloc(sizeof(*root), GFP_KERNEL); |
| 964 | if (!root) | 955 | if (!root) { |
| 956 | if (opts.release_agent) | ||
| 957 | kfree(opts.release_agent); | ||
| 965 | return -ENOMEM; | 958 | return -ENOMEM; |
| 959 | } | ||
| 966 | 960 | ||
| 967 | init_cgroup_root(root); | 961 | init_cgroup_root(root); |
| 968 | root->subsys_bits = opts.subsys_bits; | 962 | root->subsys_bits = opts.subsys_bits; |
| @@ -1129,8 +1123,13 @@ static inline struct cftype *__d_cft(struct dentry *dentry) | |||
| 1129 | return dentry->d_fsdata; | 1123 | return dentry->d_fsdata; |
| 1130 | } | 1124 | } |
| 1131 | 1125 | ||
| 1132 | /* | 1126 | /** |
| 1133 | * Called with cgroup_mutex held. Writes path of cgroup into buf. | 1127 | * cgroup_path - generate the path of a cgroup |
| 1128 | * @cgrp: the cgroup in question | ||
| 1129 | * @buf: the buffer to write the path into | ||
| 1130 | * @buflen: the length of the buffer | ||
| 1131 | * | ||
| 1132 | * Called with cgroup_mutex held. Writes path of cgroup into buf. | ||
| 1134 | * Returns 0 on success, -errno on error. | 1133 | * Returns 0 on success, -errno on error. |
| 1135 | */ | 1134 | */ |
| 1136 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | 1135 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) |
| @@ -1188,11 +1187,13 @@ static void get_first_subsys(const struct cgroup *cgrp, | |||
| 1188 | *subsys_id = test_ss->subsys_id; | 1187 | *subsys_id = test_ss->subsys_id; |
| 1189 | } | 1188 | } |
| 1190 | 1189 | ||
| 1191 | /* | 1190 | /** |
| 1192 | * Attach task 'tsk' to cgroup 'cgrp' | 1191 | * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' |
| 1192 | * @cgrp: the cgroup the task is attaching to | ||
| 1193 | * @tsk: the task to be attached | ||
| 1193 | * | 1194 | * |
| 1194 | * Call holding cgroup_mutex. May take task_lock of | 1195 | * Call holding cgroup_mutex. May take task_lock of |
| 1195 | * the task 'pid' during call. | 1196 | * the task 'tsk' during call. |
| 1196 | */ | 1197 | */ |
| 1197 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 1198 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
| 1198 | { | 1199 | { |
| @@ -1293,7 +1294,6 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf) | |||
| 1293 | } | 1294 | } |
| 1294 | 1295 | ||
| 1295 | /* The various types of files and directories in a cgroup file system */ | 1296 | /* The various types of files and directories in a cgroup file system */ |
| 1296 | |||
| 1297 | enum cgroup_filetype { | 1297 | enum cgroup_filetype { |
| 1298 | FILE_ROOT, | 1298 | FILE_ROOT, |
| 1299 | FILE_DIR, | 1299 | FILE_DIR, |
| @@ -1584,12 +1584,11 @@ static int cgroup_create_file(struct dentry *dentry, int mode, | |||
| 1584 | } | 1584 | } |
| 1585 | 1585 | ||
| 1586 | /* | 1586 | /* |
| 1587 | * cgroup_create_dir - create a directory for an object. | 1587 | * cgroup_create_dir - create a directory for an object. |
| 1588 | * cgrp: the cgroup we create the directory for. | 1588 | * @cgrp: the cgroup we create the directory for. It must have a valid |
| 1589 | * It must have a valid ->parent field | 1589 | * ->parent field. And we are going to fill its ->dentry field. |
| 1590 | * And we are going to fill its ->dentry field. | 1590 | * @dentry: dentry of the new cgroup |
| 1591 | * dentry: dentry of the new cgroup | 1591 | * @mode: mode to set on new directory. |
| 1592 | * mode: mode to set on new directory. | ||
| 1593 | */ | 1592 | */ |
| 1594 | static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, | 1593 | static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, |
| 1595 | int mode) | 1594 | int mode) |
| @@ -1651,8 +1650,12 @@ int cgroup_add_files(struct cgroup *cgrp, | |||
| 1651 | return 0; | 1650 | return 0; |
| 1652 | } | 1651 | } |
| 1653 | 1652 | ||
| 1654 | /* Count the number of tasks in a cgroup. */ | 1653 | /** |
| 1655 | 1654 | * cgroup_task_count - count the number of tasks in a cgroup. | |
| 1655 | * @cgrp: the cgroup in question | ||
| 1656 | * | ||
| 1657 | * Return the number of tasks in the cgroup. | ||
| 1658 | */ | ||
| 1656 | int cgroup_task_count(const struct cgroup *cgrp) | 1659 | int cgroup_task_count(const struct cgroup *cgrp) |
| 1657 | { | 1660 | { |
| 1658 | int count = 0; | 1661 | int count = 0; |
| @@ -1962,12 +1965,13 @@ static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) | |||
| 1962 | } | 1965 | } |
| 1963 | 1966 | ||
| 1964 | /** | 1967 | /** |
| 1965 | * Build and fill cgroupstats so that taskstats can export it to user | 1968 | * cgroupstats_build - build and fill cgroupstats |
| 1966 | * space. | ||
| 1967 | * | ||
| 1968 | * @stats: cgroupstats to fill information into | 1969 | * @stats: cgroupstats to fill information into |
| 1969 | * @dentry: A dentry entry belonging to the cgroup for which stats have | 1970 | * @dentry: A dentry entry belonging to the cgroup for which stats have |
| 1970 | * been requested. | 1971 | * been requested. |
| 1972 | * | ||
| 1973 | * Build and fill cgroupstats so that taskstats can export it to user | ||
| 1974 | * space. | ||
| 1971 | */ | 1975 | */ |
| 1972 | int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | 1976 | int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) |
| 1973 | { | 1977 | { |
| @@ -2199,14 +2203,13 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
| 2199 | } | 2203 | } |
| 2200 | 2204 | ||
| 2201 | /* | 2205 | /* |
| 2202 | * cgroup_create - create a cgroup | 2206 | * cgroup_create - create a cgroup |
| 2203 | * parent: cgroup that will be parent of the new cgroup. | 2207 | * @parent: cgroup that will be parent of the new cgroup |
| 2204 | * name: name of the new cgroup. Will be strcpy'ed. | 2208 | * @dentry: dentry of the new cgroup |
| 2205 | * mode: mode to set on new inode | 2209 | * @mode: mode to set on new inode |
| 2206 | * | 2210 | * |
| 2207 | * Must be called with the mutex on the parent inode held | 2211 | * Must be called with the mutex on the parent inode held |
| 2208 | */ | 2212 | */ |
| 2209 | |||
| 2210 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | 2213 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, |
| 2211 | int mode) | 2214 | int mode) |
| 2212 | { | 2215 | { |
| @@ -2229,7 +2232,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 2229 | 2232 | ||
| 2230 | mutex_lock(&cgroup_mutex); | 2233 | mutex_lock(&cgroup_mutex); |
| 2231 | 2234 | ||
| 2232 | cgrp->flags = 0; | ||
| 2233 | INIT_LIST_HEAD(&cgrp->sibling); | 2235 | INIT_LIST_HEAD(&cgrp->sibling); |
| 2234 | INIT_LIST_HEAD(&cgrp->children); | 2236 | INIT_LIST_HEAD(&cgrp->children); |
| 2235 | INIT_LIST_HEAD(&cgrp->css_sets); | 2237 | INIT_LIST_HEAD(&cgrp->css_sets); |
| @@ -2239,6 +2241,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 2239 | cgrp->root = parent->root; | 2241 | cgrp->root = parent->root; |
| 2240 | cgrp->top_cgroup = parent->top_cgroup; | 2242 | cgrp->top_cgroup = parent->top_cgroup; |
| 2241 | 2243 | ||
| 2244 | if (notify_on_release(parent)) | ||
| 2245 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | ||
| 2246 | |||
| 2242 | for_each_subsys(root, ss) { | 2247 | for_each_subsys(root, ss) { |
| 2243 | struct cgroup_subsys_state *css = ss->create(ss, cgrp); | 2248 | struct cgroup_subsys_state *css = ss->create(ss, cgrp); |
| 2244 | if (IS_ERR(css)) { | 2249 | if (IS_ERR(css)) { |
| @@ -2349,13 +2354,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
| 2349 | parent = cgrp->parent; | 2354 | parent = cgrp->parent; |
| 2350 | root = cgrp->root; | 2355 | root = cgrp->root; |
| 2351 | sb = root->sb; | 2356 | sb = root->sb; |
| 2357 | |||
| 2352 | /* | 2358 | /* |
| 2353 | * Call pre_destroy handlers of subsys | 2359 | * Call pre_destroy handlers of subsys. Notify subsystems |
| 2360 | * that rmdir() request comes. | ||
| 2354 | */ | 2361 | */ |
| 2355 | cgroup_call_pre_destroy(cgrp); | 2362 | cgroup_call_pre_destroy(cgrp); |
| 2356 | /* | ||
| 2357 | * Notify subsyses that rmdir() request comes. | ||
| 2358 | */ | ||
| 2359 | 2363 | ||
| 2360 | if (cgroup_has_css_refs(cgrp)) { | 2364 | if (cgroup_has_css_refs(cgrp)) { |
| 2361 | mutex_unlock(&cgroup_mutex); | 2365 | mutex_unlock(&cgroup_mutex); |
| @@ -2431,8 +2435,10 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss) | |||
| 2431 | } | 2435 | } |
| 2432 | 2436 | ||
| 2433 | /** | 2437 | /** |
| 2434 | * cgroup_init_early - initialize cgroups at system boot, and | 2438 | * cgroup_init_early - cgroup initialization at system boot |
| 2435 | * initialize any subsystems that request early init. | 2439 | * |
| 2440 | * Initialize cgroups at system boot, and initialize any | ||
| 2441 | * subsystems that request early init. | ||
| 2436 | */ | 2442 | */ |
| 2437 | int __init cgroup_init_early(void) | 2443 | int __init cgroup_init_early(void) |
| 2438 | { | 2444 | { |
| @@ -2474,8 +2480,10 @@ int __init cgroup_init_early(void) | |||
| 2474 | } | 2480 | } |
| 2475 | 2481 | ||
| 2476 | /** | 2482 | /** |
| 2477 | * cgroup_init - register cgroup filesystem and /proc file, and | 2483 | * cgroup_init - cgroup initialization |
| 2478 | * initialize any subsystems that didn't request early init. | 2484 | * |
| 2485 | * Register cgroup filesystem and /proc file, and initialize | ||
| 2486 | * any subsystems that didn't request early init. | ||
| 2479 | */ | 2487 | */ |
| 2480 | int __init cgroup_init(void) | 2488 | int __init cgroup_init(void) |
| 2481 | { | 2489 | { |
| @@ -2618,7 +2626,7 @@ static struct file_operations proc_cgroupstats_operations = { | |||
| 2618 | 2626 | ||
| 2619 | /** | 2627 | /** |
| 2620 | * cgroup_fork - attach newly forked task to its parents cgroup. | 2628 | * cgroup_fork - attach newly forked task to its parents cgroup. |
| 2621 | * @tsk: pointer to task_struct of forking parent process. | 2629 | * @child: pointer to task_struct of forking parent process. |
| 2622 | * | 2630 | * |
| 2623 | * Description: A task inherits its parent's cgroup at fork(). | 2631 | * Description: A task inherits its parent's cgroup at fork(). |
| 2624 | * | 2632 | * |
| @@ -2642,9 +2650,12 @@ void cgroup_fork(struct task_struct *child) | |||
| 2642 | } | 2650 | } |
| 2643 | 2651 | ||
| 2644 | /** | 2652 | /** |
| 2645 | * cgroup_fork_callbacks - called on a new task very soon before | 2653 | * cgroup_fork_callbacks - run fork callbacks |
| 2646 | * adding it to the tasklist. No need to take any locks since no-one | 2654 | * @child: the new task |
| 2647 | * can be operating on this task | 2655 | * |
| 2656 | * Called on a new task very soon before adding it to the | ||
| 2657 | * tasklist. No need to take any locks since no-one can | ||
| 2658 | * be operating on this task. | ||
| 2648 | */ | 2659 | */ |
| 2649 | void cgroup_fork_callbacks(struct task_struct *child) | 2660 | void cgroup_fork_callbacks(struct task_struct *child) |
| 2650 | { | 2661 | { |
| @@ -2659,11 +2670,14 @@ void cgroup_fork_callbacks(struct task_struct *child) | |||
| 2659 | } | 2670 | } |
| 2660 | 2671 | ||
| 2661 | /** | 2672 | /** |
| 2662 | * cgroup_post_fork - called on a new task after adding it to the | 2673 | * cgroup_post_fork - called on a new task after adding it to the task list |
| 2663 | * task list. Adds the task to the list running through its css_set | 2674 | * @child: the task in question |
| 2664 | * if necessary. Has to be after the task is visible on the task list | 2675 | * |
| 2665 | * in case we race with the first call to cgroup_iter_start() - to | 2676 | * Adds the task to the list running through its css_set if necessary. |
| 2666 | * guarantee that the new task ends up on its list. */ | 2677 | * Has to be after the task is visible on the task list in case we race |
| 2678 | * with the first call to cgroup_iter_start() - to guarantee that the | ||
| 2679 | * new task ends up on its list. | ||
| 2680 | */ | ||
| 2667 | void cgroup_post_fork(struct task_struct *child) | 2681 | void cgroup_post_fork(struct task_struct *child) |
| 2668 | { | 2682 | { |
| 2669 | if (use_task_css_set_links) { | 2683 | if (use_task_css_set_links) { |
| @@ -2676,6 +2690,7 @@ void cgroup_post_fork(struct task_struct *child) | |||
| 2676 | /** | 2690 | /** |
| 2677 | * cgroup_exit - detach cgroup from exiting task | 2691 | * cgroup_exit - detach cgroup from exiting task |
| 2678 | * @tsk: pointer to task_struct of exiting process | 2692 | * @tsk: pointer to task_struct of exiting process |
| 2693 | * @run_callback: run exit callbacks? | ||
| 2679 | * | 2694 | * |
| 2680 | * Description: Detach cgroup from @tsk and release it. | 2695 | * Description: Detach cgroup from @tsk and release it. |
| 2681 | * | 2696 | * |
| @@ -2706,7 +2721,6 @@ void cgroup_post_fork(struct task_struct *child) | |||
| 2706 | * top_cgroup isn't going away, and either task has PF_EXITING set, | 2721 | * top_cgroup isn't going away, and either task has PF_EXITING set, |
| 2707 | * which wards off any cgroup_attach_task() attempts, or task is a failed | 2722 | * which wards off any cgroup_attach_task() attempts, or task is a failed |
| 2708 | * fork, never visible to cgroup_attach_task. | 2723 | * fork, never visible to cgroup_attach_task. |
| 2709 | * | ||
| 2710 | */ | 2724 | */ |
| 2711 | void cgroup_exit(struct task_struct *tsk, int run_callbacks) | 2725 | void cgroup_exit(struct task_struct *tsk, int run_callbacks) |
| 2712 | { | 2726 | { |
| @@ -2743,9 +2757,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
| 2743 | } | 2757 | } |
| 2744 | 2758 | ||
| 2745 | /** | 2759 | /** |
| 2746 | * cgroup_clone - duplicate the current cgroup in the hierarchy | 2760 | * cgroup_clone - clone the cgroup the given subsystem is attached to |
| 2747 | * that the given subsystem is attached to, and move this task into | 2761 | * @tsk: the task to be moved |
| 2748 | * the new child | 2762 | * @subsys: the given subsystem |
| 2763 | * | ||
| 2764 | * Duplicate the current cgroup in the hierarchy that the given | ||
| 2765 | * subsystem is attached to, and move this task into the new | ||
| 2766 | * child. | ||
| 2749 | */ | 2767 | */ |
| 2750 | int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) | 2768 | int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) |
| 2751 | { | 2769 | { |
| @@ -2858,9 +2876,12 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) | |||
| 2858 | return ret; | 2876 | return ret; |
| 2859 | } | 2877 | } |
| 2860 | 2878 | ||
| 2861 | /* | 2879 | /** |
| 2862 | * See if "cgrp" is a descendant of the current task's cgroup in | 2880 | * cgroup_is_descendant - see if @cgrp is a descendant of current task's cgrp |
| 2863 | * the appropriate hierarchy | 2881 | * @cgrp: the cgroup in question |
| 2882 | * | ||
| 2883 | * See if @cgrp is a descendant of the current task's cgroup in | ||
| 2884 | * the appropriate hierarchy. | ||
| 2864 | * | 2885 | * |
| 2865 | * If we are sending in dummytop, then presumably we are creating | 2886 | * If we are sending in dummytop, then presumably we are creating |
| 2866 | * the top cgroup in the subsystem. | 2887 | * the top cgroup in the subsystem. |
| @@ -2939,9 +2960,7 @@ void __css_put(struct cgroup_subsys_state *css) | |||
| 2939 | * release agent task. We don't bother to wait because the caller of | 2960 | * release agent task. We don't bother to wait because the caller of |
| 2940 | * this routine has no use for the exit status of the release agent | 2961 | * this routine has no use for the exit status of the release agent |
| 2941 | * task, so no sense holding our caller up for that. | 2962 | * task, so no sense holding our caller up for that. |
| 2942 | * | ||
| 2943 | */ | 2963 | */ |
| 2944 | |||
| 2945 | static void cgroup_release_agent(struct work_struct *work) | 2964 | static void cgroup_release_agent(struct work_struct *work) |
| 2946 | { | 2965 | { |
| 2947 | BUG_ON(work != &release_agent_work); | 2966 | BUG_ON(work != &release_agent_work); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3e296ed81d4d..a1b61f414228 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -322,8 +322,8 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | |||
| 322 | * Call without callback_mutex or task_lock() held. May be | 322 | * Call without callback_mutex or task_lock() held. May be |
| 323 | * called with or without cgroup_mutex held. Thanks in part to | 323 | * called with or without cgroup_mutex held. Thanks in part to |
| 324 | * 'the_top_cpuset_hack', the task's cpuset pointer will never | 324 | * 'the_top_cpuset_hack', the task's cpuset pointer will never |
| 325 | * be NULL. This routine also might acquire callback_mutex and | 325 | * be NULL. This routine also might acquire callback_mutex during |
| 326 | * current->mm->mmap_sem during call. | 326 | * call. |
| 327 | * | 327 | * |
| 328 | * Reading current->cpuset->mems_generation doesn't need task_lock | 328 | * Reading current->cpuset->mems_generation doesn't need task_lock |
| 329 | * to guard the current->cpuset derefence, because it is guarded | 329 | * to guard the current->cpuset derefence, because it is guarded |
diff --git a/kernel/exit.c b/kernel/exit.c index 506a957b665a..53872bf993fa 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -214,20 +214,19 @@ struct pid *session_of_pgrp(struct pid *pgrp) | |||
| 214 | static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task) | 214 | static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task) |
| 215 | { | 215 | { |
| 216 | struct task_struct *p; | 216 | struct task_struct *p; |
| 217 | int ret = 1; | ||
| 218 | 217 | ||
| 219 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { | 218 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { |
| 220 | if (p == ignored_task | 219 | if ((p == ignored_task) || |
| 221 | || p->exit_state | 220 | (p->exit_state && thread_group_empty(p)) || |
| 222 | || is_global_init(p->real_parent)) | 221 | is_global_init(p->real_parent)) |
| 223 | continue; | 222 | continue; |
| 223 | |||
| 224 | if (task_pgrp(p->real_parent) != pgrp && | 224 | if (task_pgrp(p->real_parent) != pgrp && |
| 225 | task_session(p->real_parent) == task_session(p)) { | 225 | task_session(p->real_parent) == task_session(p)) |
| 226 | ret = 0; | 226 | return 0; |
| 227 | break; | ||
| 228 | } | ||
| 229 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); | 227 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); |
| 230 | return ret; /* (sighing) "Often!" */ | 228 | |
| 229 | return 1; | ||
| 231 | } | 230 | } |
| 232 | 231 | ||
| 233 | int is_current_pgrp_orphaned(void) | 232 | int is_current_pgrp_orphaned(void) |
| @@ -255,6 +254,37 @@ static int has_stopped_jobs(struct pid *pgrp) | |||
| 255 | return retval; | 254 | return retval; |
| 256 | } | 255 | } |
| 257 | 256 | ||
| 257 | /* | ||
| 258 | * Check to see if any process groups have become orphaned as | ||
| 259 | * a result of our exiting, and if they have any stopped jobs, | ||
| 260 | * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) | ||
| 261 | */ | ||
| 262 | static void | ||
| 263 | kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) | ||
| 264 | { | ||
| 265 | struct pid *pgrp = task_pgrp(tsk); | ||
| 266 | struct task_struct *ignored_task = tsk; | ||
| 267 | |||
| 268 | if (!parent) | ||
| 269 | /* exit: our father is in a different pgrp than | ||
| 270 | * we are and we were the only connection outside. | ||
| 271 | */ | ||
| 272 | parent = tsk->real_parent; | ||
| 273 | else | ||
| 274 | /* reparent: our child is in a different pgrp than | ||
| 275 | * we are, and it was the only connection outside. | ||
| 276 | */ | ||
| 277 | ignored_task = NULL; | ||
| 278 | |||
| 279 | if (task_pgrp(parent) != pgrp && | ||
| 280 | task_session(parent) == task_session(tsk) && | ||
| 281 | will_become_orphaned_pgrp(pgrp, ignored_task) && | ||
| 282 | has_stopped_jobs(pgrp)) { | ||
| 283 | __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); | ||
| 284 | __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); | ||
| 285 | } | ||
| 286 | } | ||
| 287 | |||
| 258 | /** | 288 | /** |
| 259 | * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd | 289 | * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd |
| 260 | * | 290 | * |
| @@ -635,22 +665,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) | |||
| 635 | p->exit_signal != -1 && thread_group_empty(p)) | 665 | p->exit_signal != -1 && thread_group_empty(p)) |
| 636 | do_notify_parent(p, p->exit_signal); | 666 | do_notify_parent(p, p->exit_signal); |
| 637 | 667 | ||
| 638 | /* | 668 | kill_orphaned_pgrp(p, father); |
| 639 | * process group orphan check | ||
| 640 | * Case ii: Our child is in a different pgrp | ||
| 641 | * than we are, and it was the only connection | ||
| 642 | * outside, so the child pgrp is now orphaned. | ||
| 643 | */ | ||
| 644 | if ((task_pgrp(p) != task_pgrp(father)) && | ||
| 645 | (task_session(p) == task_session(father))) { | ||
| 646 | struct pid *pgrp = task_pgrp(p); | ||
| 647 | |||
| 648 | if (will_become_orphaned_pgrp(pgrp, NULL) && | ||
| 649 | has_stopped_jobs(pgrp)) { | ||
| 650 | __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); | ||
| 651 | __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); | ||
| 652 | } | ||
| 653 | } | ||
| 654 | } | 669 | } |
| 655 | 670 | ||
| 656 | /* | 671 | /* |
| @@ -735,11 +750,9 @@ static void forget_original_parent(struct task_struct *father) | |||
| 735 | * Send signals to all our closest relatives so that they know | 750 | * Send signals to all our closest relatives so that they know |
| 736 | * to properly mourn us.. | 751 | * to properly mourn us.. |
| 737 | */ | 752 | */ |
| 738 | static void exit_notify(struct task_struct *tsk) | 753 | static void exit_notify(struct task_struct *tsk, int group_dead) |
| 739 | { | 754 | { |
| 740 | int state; | 755 | int state; |
| 741 | struct task_struct *t; | ||
| 742 | struct pid *pgrp; | ||
| 743 | 756 | ||
| 744 | /* | 757 | /* |
| 745 | * This does two things: | 758 | * This does two things: |
| @@ -753,25 +766,8 @@ static void exit_notify(struct task_struct *tsk) | |||
| 753 | exit_task_namespaces(tsk); | 766 | exit_task_namespaces(tsk); |
| 754 | 767 | ||
| 755 | write_lock_irq(&tasklist_lock); | 768 | write_lock_irq(&tasklist_lock); |
| 756 | /* | 769 | if (group_dead) |
| 757 | * Check to see if any process groups have become orphaned | 770 | kill_orphaned_pgrp(tsk->group_leader, NULL); |
| 758 | * as a result of our exiting, and if they have any stopped | ||
| 759 | * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) | ||
| 760 | * | ||
| 761 | * Case i: Our father is in a different pgrp than we are | ||
| 762 | * and we were the only connection outside, so our pgrp | ||
| 763 | * is about to become orphaned. | ||
| 764 | */ | ||
| 765 | t = tsk->real_parent; | ||
| 766 | |||
| 767 | pgrp = task_pgrp(tsk); | ||
| 768 | if ((task_pgrp(t) != pgrp) && | ||
| 769 | (task_session(t) == task_session(tsk)) && | ||
| 770 | will_become_orphaned_pgrp(pgrp, tsk) && | ||
| 771 | has_stopped_jobs(pgrp)) { | ||
| 772 | __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); | ||
| 773 | __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); | ||
| 774 | } | ||
| 775 | 771 | ||
| 776 | /* Let father know we died | 772 | /* Let father know we died |
| 777 | * | 773 | * |
| @@ -788,8 +784,8 @@ static void exit_notify(struct task_struct *tsk) | |||
| 788 | * the same after a fork. | 784 | * the same after a fork. |
| 789 | */ | 785 | */ |
| 790 | if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 && | 786 | if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 && |
| 791 | ( tsk->parent_exec_id != t->self_exec_id || | 787 | (tsk->parent_exec_id != tsk->real_parent->self_exec_id || |
| 792 | tsk->self_exec_id != tsk->parent_exec_id) | 788 | tsk->self_exec_id != tsk->parent_exec_id) |
| 793 | && !capable(CAP_KILL)) | 789 | && !capable(CAP_KILL)) |
| 794 | tsk->exit_signal = SIGCHLD; | 790 | tsk->exit_signal = SIGCHLD; |
| 795 | 791 | ||
| @@ -986,7 +982,7 @@ NORET_TYPE void do_exit(long code) | |||
| 986 | module_put(tsk->binfmt->module); | 982 | module_put(tsk->binfmt->module); |
| 987 | 983 | ||
| 988 | proc_exit_connector(tsk); | 984 | proc_exit_connector(tsk); |
| 989 | exit_notify(tsk); | 985 | exit_notify(tsk, group_dead); |
| 990 | #ifdef CONFIG_NUMA | 986 | #ifdef CONFIG_NUMA |
| 991 | mpol_free(tsk->mempolicy); | 987 | mpol_free(tsk->mempolicy); |
| 992 | tsk->mempolicy = NULL; | 988 | tsk->mempolicy = NULL; |
| @@ -1382,7 +1378,7 @@ unlock_sig: | |||
| 1382 | if (!retval && infop) | 1378 | if (!retval && infop) |
| 1383 | retval = put_user(0, &infop->si_errno); | 1379 | retval = put_user(0, &infop->si_errno); |
| 1384 | if (!retval && infop) | 1380 | if (!retval && infop) |
| 1385 | retval = put_user(why, &infop->si_code); | 1381 | retval = put_user((short)why, &infop->si_code); |
| 1386 | if (!retval && infop) | 1382 | if (!retval && infop) |
| 1387 | retval = put_user(exit_code, &infop->si_status); | 1383 | retval = put_user(exit_code, &infop->si_status); |
| 1388 | if (!retval && infop) | 1384 | if (!retval && infop) |
diff --git a/kernel/futex.c b/kernel/futex.c index 221f2128a437..06968cd79200 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -60,6 +60,8 @@ | |||
| 60 | 60 | ||
| 61 | #include "rtmutex_common.h" | 61 | #include "rtmutex_common.h" |
| 62 | 62 | ||
| 63 | int __read_mostly futex_cmpxchg_enabled; | ||
| 64 | |||
| 63 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) | 65 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) |
| 64 | 66 | ||
| 65 | /* | 67 | /* |
| @@ -469,6 +471,8 @@ void exit_pi_state_list(struct task_struct *curr) | |||
| 469 | struct futex_hash_bucket *hb; | 471 | struct futex_hash_bucket *hb; |
| 470 | union futex_key key; | 472 | union futex_key key; |
| 471 | 473 | ||
| 474 | if (!futex_cmpxchg_enabled) | ||
| 475 | return; | ||
| 472 | /* | 476 | /* |
| 473 | * We are a ZOMBIE and nobody can enqueue itself on | 477 | * We are a ZOMBIE and nobody can enqueue itself on |
| 474 | * pi_state_list anymore, but we have to be careful | 478 | * pi_state_list anymore, but we have to be careful |
| @@ -1870,6 +1874,8 @@ asmlinkage long | |||
| 1870 | sys_set_robust_list(struct robust_list_head __user *head, | 1874 | sys_set_robust_list(struct robust_list_head __user *head, |
| 1871 | size_t len) | 1875 | size_t len) |
| 1872 | { | 1876 | { |
| 1877 | if (!futex_cmpxchg_enabled) | ||
| 1878 | return -ENOSYS; | ||
| 1873 | /* | 1879 | /* |
| 1874 | * The kernel knows only one size for now: | 1880 | * The kernel knows only one size for now: |
| 1875 | */ | 1881 | */ |
| @@ -1894,6 +1900,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, | |||
| 1894 | struct robust_list_head __user *head; | 1900 | struct robust_list_head __user *head; |
| 1895 | unsigned long ret; | 1901 | unsigned long ret; |
| 1896 | 1902 | ||
| 1903 | if (!futex_cmpxchg_enabled) | ||
| 1904 | return -ENOSYS; | ||
| 1905 | |||
| 1897 | if (!pid) | 1906 | if (!pid) |
| 1898 | head = current->robust_list; | 1907 | head = current->robust_list; |
| 1899 | else { | 1908 | else { |
| @@ -1997,6 +2006,9 @@ void exit_robust_list(struct task_struct *curr) | |||
| 1997 | unsigned long futex_offset; | 2006 | unsigned long futex_offset; |
| 1998 | int rc; | 2007 | int rc; |
| 1999 | 2008 | ||
| 2009 | if (!futex_cmpxchg_enabled) | ||
| 2010 | return; | ||
| 2011 | |||
| 2000 | /* | 2012 | /* |
| 2001 | * Fetch the list head (which was registered earlier, via | 2013 | * Fetch the list head (which was registered earlier, via |
| 2002 | * sys_set_robust_list()): | 2014 | * sys_set_robust_list()): |
| @@ -2051,7 +2063,7 @@ void exit_robust_list(struct task_struct *curr) | |||
| 2051 | long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | 2063 | long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, |
| 2052 | u32 __user *uaddr2, u32 val2, u32 val3) | 2064 | u32 __user *uaddr2, u32 val2, u32 val3) |
| 2053 | { | 2065 | { |
| 2054 | int ret; | 2066 | int ret = -ENOSYS; |
| 2055 | int cmd = op & FUTEX_CMD_MASK; | 2067 | int cmd = op & FUTEX_CMD_MASK; |
| 2056 | struct rw_semaphore *fshared = NULL; | 2068 | struct rw_semaphore *fshared = NULL; |
| 2057 | 2069 | ||
| @@ -2083,13 +2095,16 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | |||
| 2083 | ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); | 2095 | ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); |
| 2084 | break; | 2096 | break; |
| 2085 | case FUTEX_LOCK_PI: | 2097 | case FUTEX_LOCK_PI: |
| 2086 | ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); | 2098 | if (futex_cmpxchg_enabled) |
| 2099 | ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); | ||
| 2087 | break; | 2100 | break; |
| 2088 | case FUTEX_UNLOCK_PI: | 2101 | case FUTEX_UNLOCK_PI: |
| 2089 | ret = futex_unlock_pi(uaddr, fshared); | 2102 | if (futex_cmpxchg_enabled) |
| 2103 | ret = futex_unlock_pi(uaddr, fshared); | ||
| 2090 | break; | 2104 | break; |
| 2091 | case FUTEX_TRYLOCK_PI: | 2105 | case FUTEX_TRYLOCK_PI: |
| 2092 | ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); | 2106 | if (futex_cmpxchg_enabled) |
| 2107 | ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); | ||
| 2093 | break; | 2108 | break; |
| 2094 | default: | 2109 | default: |
| 2095 | ret = -ENOSYS; | 2110 | ret = -ENOSYS; |
| @@ -2145,8 +2160,29 @@ static struct file_system_type futex_fs_type = { | |||
| 2145 | 2160 | ||
| 2146 | static int __init init(void) | 2161 | static int __init init(void) |
| 2147 | { | 2162 | { |
| 2148 | int i = register_filesystem(&futex_fs_type); | 2163 | u32 curval; |
| 2164 | int i; | ||
| 2165 | |||
| 2166 | /* | ||
| 2167 | * This will fail and we want it. Some arch implementations do | ||
| 2168 | * runtime detection of the futex_atomic_cmpxchg_inatomic() | ||
| 2169 | * functionality. We want to know that before we call in any | ||
| 2170 | * of the complex code paths. Also we want to prevent | ||
| 2171 | * registration of robust lists in that case. NULL is | ||
| 2172 | * guaranteed to fault and we get -EFAULT on functional | ||
| 2173 | * implementation, the non functional ones will return | ||
| 2174 | * -ENOSYS. | ||
| 2175 | */ | ||
| 2176 | curval = cmpxchg_futex_value_locked(NULL, 0, 0); | ||
| 2177 | if (curval == -EFAULT) | ||
| 2178 | futex_cmpxchg_enabled = 1; | ||
| 2149 | 2179 | ||
| 2180 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { | ||
| 2181 | plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock); | ||
| 2182 | spin_lock_init(&futex_queues[i].lock); | ||
| 2183 | } | ||
| 2184 | |||
| 2185 | i = register_filesystem(&futex_fs_type); | ||
| 2150 | if (i) | 2186 | if (i) |
| 2151 | return i; | 2187 | return i; |
| 2152 | 2188 | ||
| @@ -2156,10 +2192,6 @@ static int __init init(void) | |||
| 2156 | return PTR_ERR(futex_mnt); | 2192 | return PTR_ERR(futex_mnt); |
| 2157 | } | 2193 | } |
| 2158 | 2194 | ||
| 2159 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { | ||
| 2160 | plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock); | ||
| 2161 | spin_lock_init(&futex_queues[i].lock); | ||
| 2162 | } | ||
| 2163 | return 0; | 2195 | return 0; |
| 2164 | } | 2196 | } |
| 2165 | __initcall(init); | 2197 | __initcall(init); |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 7d5e4b016f39..ff90f049f8f6 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
| @@ -54,6 +54,9 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
| 54 | compat_long_t futex_offset; | 54 | compat_long_t futex_offset; |
| 55 | int rc; | 55 | int rc; |
| 56 | 56 | ||
| 57 | if (!futex_cmpxchg_enabled) | ||
| 58 | return; | ||
| 59 | |||
| 57 | /* | 60 | /* |
| 58 | * Fetch the list head (which was registered earlier, via | 61 | * Fetch the list head (which was registered earlier, via |
| 59 | * sys_set_robust_list()): | 62 | * sys_set_robust_list()): |
| @@ -115,6 +118,9 @@ asmlinkage long | |||
| 115 | compat_sys_set_robust_list(struct compat_robust_list_head __user *head, | 118 | compat_sys_set_robust_list(struct compat_robust_list_head __user *head, |
| 116 | compat_size_t len) | 119 | compat_size_t len) |
| 117 | { | 120 | { |
| 121 | if (!futex_cmpxchg_enabled) | ||
| 122 | return -ENOSYS; | ||
| 123 | |||
| 118 | if (unlikely(len != sizeof(*head))) | 124 | if (unlikely(len != sizeof(*head))) |
| 119 | return -EINVAL; | 125 | return -EINVAL; |
| 120 | 126 | ||
| @@ -130,6 +136,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, | |||
| 130 | struct compat_robust_list_head __user *head; | 136 | struct compat_robust_list_head __user *head; |
| 131 | unsigned long ret; | 137 | unsigned long ret; |
| 132 | 138 | ||
| 139 | if (!futex_cmpxchg_enabled) | ||
| 140 | return -ENOSYS; | ||
| 141 | |||
| 133 | if (!pid) | 142 | if (!pid) |
| 134 | head = current->compat_robust_list; | 143 | head = current->compat_robust_list; |
| 135 | else { | 144 | else { |
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index cc54c6276356..fdb3fbe2b0c4 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
| @@ -246,6 +246,17 @@ static unsigned int default_startup(unsigned int irq) | |||
| 246 | } | 246 | } |
| 247 | 247 | ||
| 248 | /* | 248 | /* |
| 249 | * default shutdown function | ||
| 250 | */ | ||
| 251 | static void default_shutdown(unsigned int irq) | ||
| 252 | { | ||
| 253 | struct irq_desc *desc = irq_desc + irq; | ||
| 254 | |||
| 255 | desc->chip->mask(irq); | ||
| 256 | desc->status |= IRQ_MASKED; | ||
| 257 | } | ||
| 258 | |||
| 259 | /* | ||
| 249 | * Fixup enable/disable function pointers | 260 | * Fixup enable/disable function pointers |
| 250 | */ | 261 | */ |
| 251 | void irq_chip_set_defaults(struct irq_chip *chip) | 262 | void irq_chip_set_defaults(struct irq_chip *chip) |
| @@ -256,8 +267,15 @@ void irq_chip_set_defaults(struct irq_chip *chip) | |||
| 256 | chip->disable = default_disable; | 267 | chip->disable = default_disable; |
| 257 | if (!chip->startup) | 268 | if (!chip->startup) |
| 258 | chip->startup = default_startup; | 269 | chip->startup = default_startup; |
| 270 | /* | ||
| 271 | * We use chip->disable, when the user provided its own. When | ||
| 272 | * we have default_disable set for chip->disable, then we need | ||
| 273 | * to use default_shutdown, otherwise the irq line is not | ||
| 274 | * disabled on free_irq(): | ||
| 275 | */ | ||
| 259 | if (!chip->shutdown) | 276 | if (!chip->shutdown) |
| 260 | chip->shutdown = chip->disable; | 277 | chip->shutdown = chip->disable != default_disable ? |
| 278 | chip->disable : default_shutdown; | ||
| 261 | if (!chip->name) | 279 | if (!chip->name) |
| 262 | chip->name = chip->typename; | 280 | chip->name = chip->typename; |
| 263 | if (!chip->end) | 281 | if (!chip->end) |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index a6b2bc831dd0..088dabbf2d6a 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | * This file contains spurious interrupt handling. | 6 | * This file contains spurious interrupt handling. |
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #include <linux/jiffies.h> | ||
| 9 | #include <linux/irq.h> | 10 | #include <linux/irq.h> |
| 10 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| 11 | #include <linux/kallsyms.h> | 12 | #include <linux/kallsyms.h> |
| @@ -179,7 +180,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
| 179 | * otherwise the couter becomes a doomsday timer for otherwise | 180 | * otherwise the couter becomes a doomsday timer for otherwise |
| 180 | * working systems | 181 | * working systems |
| 181 | */ | 182 | */ |
| 182 | if (jiffies - desc->last_unhandled > HZ/10) | 183 | if (time_after(jiffies, desc->last_unhandled + HZ/10)) |
| 183 | desc->irqs_unhandled = 1; | 184 | desc->irqs_unhandled = 1; |
| 184 | else | 185 | else |
| 185 | desc->irqs_unhandled++; | 186 | desc->irqs_unhandled++; |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 7a86e6432338..fcfb580c3afc 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
| @@ -498,27 +498,36 @@ static int __kprobes in_kprobes_functions(unsigned long addr) | |||
| 498 | return 0; | 498 | return 0; |
| 499 | } | 499 | } |
| 500 | 500 | ||
| 501 | /* | ||
| 502 | * If we have a symbol_name argument, look it up and add the offset field | ||
| 503 | * to it. This way, we can specify a relative address to a symbol. | ||
| 504 | */ | ||
| 505 | static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) | ||
| 506 | { | ||
| 507 | kprobe_opcode_t *addr = p->addr; | ||
| 508 | if (p->symbol_name) { | ||
| 509 | if (addr) | ||
| 510 | return NULL; | ||
| 511 | kprobe_lookup_name(p->symbol_name, addr); | ||
| 512 | } | ||
| 513 | |||
| 514 | if (!addr) | ||
| 515 | return NULL; | ||
| 516 | return (kprobe_opcode_t *)(((char *)addr) + p->offset); | ||
| 517 | } | ||
| 518 | |||
| 501 | static int __kprobes __register_kprobe(struct kprobe *p, | 519 | static int __kprobes __register_kprobe(struct kprobe *p, |
| 502 | unsigned long called_from) | 520 | unsigned long called_from) |
| 503 | { | 521 | { |
| 504 | int ret = 0; | 522 | int ret = 0; |
| 505 | struct kprobe *old_p; | 523 | struct kprobe *old_p; |
| 506 | struct module *probed_mod; | 524 | struct module *probed_mod; |
| 525 | kprobe_opcode_t *addr; | ||
| 507 | 526 | ||
| 508 | /* | 527 | addr = kprobe_addr(p); |
| 509 | * If we have a symbol_name argument look it up, | 528 | if (!addr) |
| 510 | * and add it to the address. That way the addr | ||
| 511 | * field can either be global or relative to a symbol. | ||
| 512 | */ | ||
| 513 | if (p->symbol_name) { | ||
| 514 | if (p->addr) | ||
| 515 | return -EINVAL; | ||
| 516 | kprobe_lookup_name(p->symbol_name, p->addr); | ||
| 517 | } | ||
| 518 | |||
| 519 | if (!p->addr) | ||
| 520 | return -EINVAL; | 529 | return -EINVAL; |
| 521 | p->addr = (kprobe_opcode_t *)(((char *)p->addr)+ p->offset); | 530 | p->addr = addr; |
| 522 | 531 | ||
| 523 | if (!kernel_text_address((unsigned long) p->addr) || | 532 | if (!kernel_text_address((unsigned long) p->addr) || |
| 524 | in_kprobes_functions((unsigned long) p->addr)) | 533 | in_kprobes_functions((unsigned long) p->addr)) |
| @@ -678,8 +687,7 @@ void __kprobes unregister_jprobe(struct jprobe *jp) | |||
| 678 | unregister_kprobe(&jp->kp); | 687 | unregister_kprobe(&jp->kp); |
| 679 | } | 688 | } |
| 680 | 689 | ||
| 681 | #ifdef ARCH_SUPPORTS_KRETPROBES | 690 | #ifdef CONFIG_KRETPROBES |
| 682 | |||
| 683 | /* | 691 | /* |
| 684 | * This kprobe pre_handler is registered with every kretprobe. When probe | 692 | * This kprobe pre_handler is registered with every kretprobe. When probe |
| 685 | * hits it will set up the return probe. | 693 | * hits it will set up the return probe. |
| @@ -722,12 +730,12 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
| 722 | int ret = 0; | 730 | int ret = 0; |
| 723 | struct kretprobe_instance *inst; | 731 | struct kretprobe_instance *inst; |
| 724 | int i; | 732 | int i; |
| 725 | void *addr = rp->kp.addr; | 733 | void *addr; |
| 726 | 734 | ||
| 727 | if (kretprobe_blacklist_size) { | 735 | if (kretprobe_blacklist_size) { |
| 728 | if (addr == NULL) | 736 | addr = kprobe_addr(&rp->kp); |
| 729 | kprobe_lookup_name(rp->kp.symbol_name, addr); | 737 | if (!addr) |
| 730 | addr += rp->kp.offset; | 738 | return -EINVAL; |
| 731 | 739 | ||
| 732 | for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { | 740 | for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { |
| 733 | if (kretprobe_blacklist[i].addr == addr) | 741 | if (kretprobe_blacklist[i].addr == addr) |
| @@ -769,8 +777,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
| 769 | return ret; | 777 | return ret; |
| 770 | } | 778 | } |
| 771 | 779 | ||
| 772 | #else /* ARCH_SUPPORTS_KRETPROBES */ | 780 | #else /* CONFIG_KRETPROBES */ |
| 773 | |||
| 774 | int __kprobes register_kretprobe(struct kretprobe *rp) | 781 | int __kprobes register_kretprobe(struct kretprobe *rp) |
| 775 | { | 782 | { |
| 776 | return -ENOSYS; | 783 | return -ENOSYS; |
| @@ -781,8 +788,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, | |||
| 781 | { | 788 | { |
| 782 | return 0; | 789 | return 0; |
| 783 | } | 790 | } |
| 784 | 791 | #endif /* CONFIG_KRETPROBES */ | |
| 785 | #endif /* ARCH_SUPPORTS_KRETPROBES */ | ||
| 786 | 792 | ||
| 787 | void __kprobes unregister_kretprobe(struct kretprobe *rp) | 793 | void __kprobes unregister_kretprobe(struct kretprobe *rp) |
| 788 | { | 794 | { |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 3574379f4d62..81a4e4a3f087 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
| @@ -779,6 +779,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
| 779 | * parallel walking of the hash-list safe: | 779 | * parallel walking of the hash-list safe: |
| 780 | */ | 780 | */ |
| 781 | list_add_tail_rcu(&class->hash_entry, hash_head); | 781 | list_add_tail_rcu(&class->hash_entry, hash_head); |
| 782 | /* | ||
| 783 | * Add it to the global list of classes: | ||
| 784 | */ | ||
| 785 | list_add_tail_rcu(&class->lock_entry, &all_lock_classes); | ||
| 782 | 786 | ||
| 783 | if (verbose(class)) { | 787 | if (verbose(class)) { |
| 784 | graph_unlock(); | 788 | graph_unlock(); |
| @@ -2282,10 +2286,6 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
| 2282 | return 0; | 2286 | return 0; |
| 2283 | break; | 2287 | break; |
| 2284 | case LOCK_USED: | 2288 | case LOCK_USED: |
| 2285 | /* | ||
| 2286 | * Add it to the global list of classes: | ||
| 2287 | */ | ||
| 2288 | list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); | ||
| 2289 | debug_atomic_dec(&nr_unused_locks); | 2289 | debug_atomic_dec(&nr_unused_locks); |
| 2290 | break; | 2290 | break; |
| 2291 | default: | 2291 | default: |
diff --git a/kernel/marker.c b/kernel/marker.c index c4c2cd8b61f5..48a4ea5afffd 100644 --- a/kernel/marker.c +++ b/kernel/marker.c | |||
| @@ -61,8 +61,8 @@ struct marker_entry { | |||
| 61 | int refcount; /* Number of times armed. 0 if disarmed. */ | 61 | int refcount; /* Number of times armed. 0 if disarmed. */ |
| 62 | struct rcu_head rcu; | 62 | struct rcu_head rcu; |
| 63 | void *oldptr; | 63 | void *oldptr; |
| 64 | char rcu_pending:1; | 64 | unsigned char rcu_pending:1; |
| 65 | char ptype:1; | 65 | unsigned char ptype:1; |
| 66 | char name[0]; /* Contains name'\0'format'\0' */ | 66 | char name[0]; /* Contains name'\0'format'\0' */ |
| 67 | }; | 67 | }; |
| 68 | 68 | ||
| @@ -698,14 +698,12 @@ int marker_probe_unregister(const char *name, | |||
| 698 | { | 698 | { |
| 699 | struct marker_entry *entry; | 699 | struct marker_entry *entry; |
| 700 | struct marker_probe_closure *old; | 700 | struct marker_probe_closure *old; |
| 701 | int ret = 0; | 701 | int ret = -ENOENT; |
| 702 | 702 | ||
| 703 | mutex_lock(&markers_mutex); | 703 | mutex_lock(&markers_mutex); |
| 704 | entry = get_marker(name); | 704 | entry = get_marker(name); |
| 705 | if (!entry) { | 705 | if (!entry) |
| 706 | ret = -ENOENT; | ||
| 707 | goto end; | 706 | goto end; |
| 708 | } | ||
| 709 | if (entry->rcu_pending) | 707 | if (entry->rcu_pending) |
| 710 | rcu_barrier(); | 708 | rcu_barrier(); |
| 711 | old = marker_entry_remove_probe(entry, probe, probe_private); | 709 | old = marker_entry_remove_probe(entry, probe, probe_private); |
| @@ -713,12 +711,15 @@ int marker_probe_unregister(const char *name, | |||
| 713 | marker_update_probes(); /* may update entry */ | 711 | marker_update_probes(); /* may update entry */ |
| 714 | mutex_lock(&markers_mutex); | 712 | mutex_lock(&markers_mutex); |
| 715 | entry = get_marker(name); | 713 | entry = get_marker(name); |
| 714 | if (!entry) | ||
| 715 | goto end; | ||
| 716 | entry->oldptr = old; | 716 | entry->oldptr = old; |
| 717 | entry->rcu_pending = 1; | 717 | entry->rcu_pending = 1; |
| 718 | /* write rcu_pending before calling the RCU callback */ | 718 | /* write rcu_pending before calling the RCU callback */ |
| 719 | smp_wmb(); | 719 | smp_wmb(); |
| 720 | call_rcu(&entry->rcu, free_old_closure); | 720 | call_rcu(&entry->rcu, free_old_closure); |
| 721 | remove_marker(name); /* Ignore busy error message */ | 721 | remove_marker(name); /* Ignore busy error message */ |
| 722 | ret = 0; | ||
| 722 | end: | 723 | end: |
| 723 | mutex_unlock(&markers_mutex); | 724 | mutex_unlock(&markers_mutex); |
| 724 | return ret; | 725 | return ret; |
diff --git a/kernel/module.c b/kernel/module.c index 92595bad3812..be4807fb90e4 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -987,12 +987,11 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, | |||
| 987 | return ret; | 987 | return ret; |
| 988 | } | 988 | } |
| 989 | 989 | ||
| 990 | |||
| 991 | /* | 990 | /* |
| 992 | * /sys/module/foo/sections stuff | 991 | * /sys/module/foo/sections stuff |
| 993 | * J. Corbet <corbet@lwn.net> | 992 | * J. Corbet <corbet@lwn.net> |
| 994 | */ | 993 | */ |
| 995 | #ifdef CONFIG_KALLSYMS | 994 | #if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) |
| 996 | static ssize_t module_sect_show(struct module_attribute *mattr, | 995 | static ssize_t module_sect_show(struct module_attribute *mattr, |
| 997 | struct module *mod, char *buf) | 996 | struct module *mod, char *buf) |
| 998 | { | 997 | { |
| @@ -1188,7 +1187,7 @@ static inline void add_notes_attrs(struct module *mod, unsigned int nsect, | |||
| 1188 | static inline void remove_notes_attrs(struct module *mod) | 1187 | static inline void remove_notes_attrs(struct module *mod) |
| 1189 | { | 1188 | { |
| 1190 | } | 1189 | } |
| 1191 | #endif /* CONFIG_KALLSYMS */ | 1190 | #endif |
| 1192 | 1191 | ||
| 1193 | #ifdef CONFIG_SYSFS | 1192 | #ifdef CONFIG_SYSFS |
| 1194 | int module_add_modinfo_attrs(struct module *mod) | 1193 | int module_add_modinfo_attrs(struct module *mod) |
| @@ -1231,9 +1230,7 @@ void module_remove_modinfo_attrs(struct module *mod) | |||
| 1231 | } | 1230 | } |
| 1232 | kfree(mod->modinfo_attrs); | 1231 | kfree(mod->modinfo_attrs); |
| 1233 | } | 1232 | } |
| 1234 | #endif | ||
| 1235 | 1233 | ||
| 1236 | #ifdef CONFIG_SYSFS | ||
| 1237 | int mod_sysfs_init(struct module *mod) | 1234 | int mod_sysfs_init(struct module *mod) |
| 1238 | { | 1235 | { |
| 1239 | int err; | 1236 | int err; |
| @@ -1936,8 +1933,15 @@ static struct module *load_module(void __user *umod, | |||
| 1936 | /* Set up license info based on the info section */ | 1933 | /* Set up license info based on the info section */ |
| 1937 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); | 1934 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); |
| 1938 | 1935 | ||
| 1936 | /* | ||
| 1937 | * ndiswrapper is under GPL by itself, but loads proprietary modules. | ||
| 1938 | * Don't use add_taint_module(), as it would prevent ndiswrapper from | ||
| 1939 | * using GPL-only symbols it needs. | ||
| 1940 | */ | ||
| 1939 | if (strcmp(mod->name, "ndiswrapper") == 0) | 1941 | if (strcmp(mod->name, "ndiswrapper") == 0) |
| 1940 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); | 1942 | add_taint(TAINT_PROPRIETARY_MODULE); |
| 1943 | |||
| 1944 | /* driverloader was caught wrongly pretending to be under GPL */ | ||
| 1941 | if (strcmp(mod->name, "driverloader") == 0) | 1945 | if (strcmp(mod->name, "driverloader") == 0) |
| 1942 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); | 1946 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); |
| 1943 | 1947 | ||
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 859a8e59773a..14a656cdc652 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
| @@ -391,7 +391,7 @@ int hibernation_platform_enter(void) | |||
| 391 | goto Close; | 391 | goto Close; |
| 392 | 392 | ||
| 393 | suspend_console(); | 393 | suspend_console(); |
| 394 | error = device_suspend(PMSG_SUSPEND); | 394 | error = device_suspend(PMSG_HIBERNATE); |
| 395 | if (error) | 395 | if (error) |
| 396 | goto Resume_console; | 396 | goto Resume_console; |
| 397 | 397 | ||
| @@ -404,7 +404,7 @@ int hibernation_platform_enter(void) | |||
| 404 | goto Finish; | 404 | goto Finish; |
| 405 | 405 | ||
| 406 | local_irq_disable(); | 406 | local_irq_disable(); |
| 407 | error = device_power_down(PMSG_SUSPEND); | 407 | error = device_power_down(PMSG_HIBERNATE); |
| 408 | if (!error) { | 408 | if (!error) { |
| 409 | hibernation_ops->enter(); | 409 | hibernation_ops->enter(); |
| 410 | /* We should never get here */ | 410 | /* We should never get here */ |
diff --git a/kernel/power/process.c b/kernel/power/process.c index 7c2118f9597f..f1d0b345c9ba 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
| @@ -75,22 +75,15 @@ void refrigerator(void) | |||
| 75 | __set_current_state(save); | 75 | __set_current_state(save); |
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | static void fake_signal_wake_up(struct task_struct *p, int resume) | 78 | static void fake_signal_wake_up(struct task_struct *p) |
| 79 | { | 79 | { |
| 80 | unsigned long flags; | 80 | unsigned long flags; |
| 81 | 81 | ||
| 82 | spin_lock_irqsave(&p->sighand->siglock, flags); | 82 | spin_lock_irqsave(&p->sighand->siglock, flags); |
| 83 | signal_wake_up(p, resume); | 83 | signal_wake_up(p, 0); |
| 84 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 84 | spin_unlock_irqrestore(&p->sighand->siglock, flags); |
| 85 | } | 85 | } |
| 86 | 86 | ||
| 87 | static void send_fake_signal(struct task_struct *p) | ||
| 88 | { | ||
| 89 | if (task_is_stopped(p)) | ||
| 90 | force_sig_specific(SIGSTOP, p); | ||
| 91 | fake_signal_wake_up(p, task_is_stopped(p)); | ||
| 92 | } | ||
| 93 | |||
| 94 | static int has_mm(struct task_struct *p) | 87 | static int has_mm(struct task_struct *p) |
| 95 | { | 88 | { |
| 96 | return (p->mm && !(p->flags & PF_BORROWED_MM)); | 89 | return (p->mm && !(p->flags & PF_BORROWED_MM)); |
| @@ -121,7 +114,7 @@ static int freeze_task(struct task_struct *p, int with_mm_only) | |||
| 121 | if (freezing(p)) { | 114 | if (freezing(p)) { |
| 122 | if (has_mm(p)) { | 115 | if (has_mm(p)) { |
| 123 | if (!signal_pending(p)) | 116 | if (!signal_pending(p)) |
| 124 | fake_signal_wake_up(p, 0); | 117 | fake_signal_wake_up(p); |
| 125 | } else { | 118 | } else { |
| 126 | if (with_mm_only) | 119 | if (with_mm_only) |
| 127 | ret = 0; | 120 | ret = 0; |
| @@ -135,7 +128,7 @@ static int freeze_task(struct task_struct *p, int with_mm_only) | |||
| 135 | } else { | 128 | } else { |
| 136 | if (has_mm(p)) { | 129 | if (has_mm(p)) { |
| 137 | set_freeze_flag(p); | 130 | set_freeze_flag(p); |
| 138 | send_fake_signal(p); | 131 | fake_signal_wake_up(p); |
| 139 | } else { | 132 | } else { |
| 140 | if (with_mm_only) { | 133 | if (with_mm_only) { |
| 141 | ret = 0; | 134 | ret = 0; |
| @@ -182,15 +175,17 @@ static int try_to_freeze_tasks(int freeze_user_space) | |||
| 182 | if (frozen(p) || !freezeable(p)) | 175 | if (frozen(p) || !freezeable(p)) |
| 183 | continue; | 176 | continue; |
| 184 | 177 | ||
| 185 | if (task_is_traced(p) && frozen(p->parent)) { | ||
| 186 | cancel_freezing(p); | ||
| 187 | continue; | ||
| 188 | } | ||
| 189 | |||
| 190 | if (!freeze_task(p, freeze_user_space)) | 178 | if (!freeze_task(p, freeze_user_space)) |
| 191 | continue; | 179 | continue; |
| 192 | 180 | ||
| 193 | if (!freezer_should_skip(p)) | 181 | /* |
| 182 | * Now that we've done set_freeze_flag, don't | ||
| 183 | * perturb a task in TASK_STOPPED or TASK_TRACED. | ||
| 184 | * It is "frozen enough". If the task does wake | ||
| 185 | * up, it will immediately call try_to_freeze. | ||
| 186 | */ | ||
| 187 | if (!task_is_stopped_or_traced(p) && | ||
| 188 | !freezer_should_skip(p)) | ||
| 194 | todo++; | 189 | todo++; |
| 195 | } while_each_thread(g, p); | 190 | } while_each_thread(g, p); |
| 196 | read_unlock(&tasklist_lock); | 191 | read_unlock(&tasklist_lock); |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 95250d7c8d91..72a020cabb4c 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
| @@ -875,8 +875,8 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } | |||
| 875 | #endif /* CONFIG_HIGHMEM */ | 875 | #endif /* CONFIG_HIGHMEM */ |
| 876 | 876 | ||
| 877 | /** | 877 | /** |
| 878 | * saveable - Determine whether a non-highmem page should be included in | 878 | * saveable_page - Determine whether a non-highmem page should be included |
| 879 | * the suspend image. | 879 | * in the suspend image. |
| 880 | * | 880 | * |
| 881 | * We should save the page if it isn't Nosave, and is not in the range | 881 | * We should save the page if it isn't Nosave, and is not in the range |
| 882 | * of pages statically defined as 'unsaveable', and it isn't a part of | 882 | * of pages statically defined as 'unsaveable', and it isn't a part of |
| @@ -897,7 +897,8 @@ static struct page *saveable_page(unsigned long pfn) | |||
| 897 | if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) | 897 | if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) |
| 898 | return NULL; | 898 | return NULL; |
| 899 | 899 | ||
| 900 | if (PageReserved(page) && pfn_is_nosave(pfn)) | 900 | if (PageReserved(page) |
| 901 | && (!kernel_page_present(page) || pfn_is_nosave(pfn))) | ||
| 901 | return NULL; | 902 | return NULL; |
| 902 | 903 | ||
| 903 | return page; | 904 | return page; |
| @@ -938,6 +939,25 @@ static inline void do_copy_page(long *dst, long *src) | |||
| 938 | *dst++ = *src++; | 939 | *dst++ = *src++; |
| 939 | } | 940 | } |
| 940 | 941 | ||
| 942 | |||
| 943 | /** | ||
| 944 | * safe_copy_page - check if the page we are going to copy is marked as | ||
| 945 | * present in the kernel page tables (this always is the case if | ||
| 946 | * CONFIG_DEBUG_PAGEALLOC is not set and in that case | ||
| 947 | * kernel_page_present() always returns 'true'). | ||
| 948 | */ | ||
| 949 | static void safe_copy_page(void *dst, struct page *s_page) | ||
| 950 | { | ||
| 951 | if (kernel_page_present(s_page)) { | ||
| 952 | do_copy_page(dst, page_address(s_page)); | ||
| 953 | } else { | ||
| 954 | kernel_map_pages(s_page, 1, 1); | ||
| 955 | do_copy_page(dst, page_address(s_page)); | ||
| 956 | kernel_map_pages(s_page, 1, 0); | ||
| 957 | } | ||
| 958 | } | ||
| 959 | |||
| 960 | |||
| 941 | #ifdef CONFIG_HIGHMEM | 961 | #ifdef CONFIG_HIGHMEM |
| 942 | static inline struct page * | 962 | static inline struct page * |
| 943 | page_is_saveable(struct zone *zone, unsigned long pfn) | 963 | page_is_saveable(struct zone *zone, unsigned long pfn) |
| @@ -946,8 +966,7 @@ page_is_saveable(struct zone *zone, unsigned long pfn) | |||
| 946 | saveable_highmem_page(pfn) : saveable_page(pfn); | 966 | saveable_highmem_page(pfn) : saveable_page(pfn); |
| 947 | } | 967 | } |
| 948 | 968 | ||
| 949 | static inline void | 969 | static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) |
| 950 | copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | ||
| 951 | { | 970 | { |
| 952 | struct page *s_page, *d_page; | 971 | struct page *s_page, *d_page; |
| 953 | void *src, *dst; | 972 | void *src, *dst; |
| @@ -961,29 +980,26 @@ copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | |||
| 961 | kunmap_atomic(src, KM_USER0); | 980 | kunmap_atomic(src, KM_USER0); |
| 962 | kunmap_atomic(dst, KM_USER1); | 981 | kunmap_atomic(dst, KM_USER1); |
| 963 | } else { | 982 | } else { |
| 964 | src = page_address(s_page); | ||
| 965 | if (PageHighMem(d_page)) { | 983 | if (PageHighMem(d_page)) { |
| 966 | /* Page pointed to by src may contain some kernel | 984 | /* Page pointed to by src may contain some kernel |
| 967 | * data modified by kmap_atomic() | 985 | * data modified by kmap_atomic() |
| 968 | */ | 986 | */ |
| 969 | do_copy_page(buffer, src); | 987 | safe_copy_page(buffer, s_page); |
| 970 | dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0); | 988 | dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0); |
| 971 | memcpy(dst, buffer, PAGE_SIZE); | 989 | memcpy(dst, buffer, PAGE_SIZE); |
| 972 | kunmap_atomic(dst, KM_USER0); | 990 | kunmap_atomic(dst, KM_USER0); |
| 973 | } else { | 991 | } else { |
| 974 | dst = page_address(d_page); | 992 | safe_copy_page(page_address(d_page), s_page); |
| 975 | do_copy_page(dst, src); | ||
| 976 | } | 993 | } |
| 977 | } | 994 | } |
| 978 | } | 995 | } |
| 979 | #else | 996 | #else |
| 980 | #define page_is_saveable(zone, pfn) saveable_page(pfn) | 997 | #define page_is_saveable(zone, pfn) saveable_page(pfn) |
| 981 | 998 | ||
| 982 | static inline void | 999 | static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) |
| 983 | copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | ||
| 984 | { | 1000 | { |
| 985 | do_copy_page(page_address(pfn_to_page(dst_pfn)), | 1001 | safe_copy_page(page_address(pfn_to_page(dst_pfn)), |
| 986 | page_address(pfn_to_page(src_pfn))); | 1002 | pfn_to_page(src_pfn)); |
| 987 | } | 1003 | } |
| 988 | #endif /* CONFIG_HIGHMEM */ | 1004 | #endif /* CONFIG_HIGHMEM */ |
| 989 | 1005 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index bee36100f110..9adc2a473e6e 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -666,7 +666,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
| 666 | } | 666 | } |
| 667 | /* Emit the output into the temporary buffer */ | 667 | /* Emit the output into the temporary buffer */ |
| 668 | printed_len += vscnprintf(printk_buf + printed_len, | 668 | printed_len += vscnprintf(printk_buf + printed_len, |
| 669 | sizeof(printk_buf), fmt, args); | 669 | sizeof(printk_buf) - printed_len, fmt, args); |
| 670 | 670 | ||
| 671 | /* | 671 | /* |
| 672 | * Copy the output into log_buf. If the caller didn't provide | 672 | * Copy the output into log_buf. If the caller didn't provide |
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 987cfb7ade89..e9517014b57c 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c | |||
| @@ -23,6 +23,10 @@ | |||
| 23 | * to Suparna Bhattacharya for pushing me completely away | 23 | * to Suparna Bhattacharya for pushing me completely away |
| 24 | * from atomic instructions on the read side. | 24 | * from atomic instructions on the read side. |
| 25 | * | 25 | * |
| 26 | * - Added handling of Dynamic Ticks | ||
| 27 | * Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com> | ||
| 28 | * - Steven Rostedt <srostedt@redhat.com> | ||
| 29 | * | ||
| 26 | * Papers: http://www.rdrop.com/users/paulmck/RCU | 30 | * Papers: http://www.rdrop.com/users/paulmck/RCU |
| 27 | * | 31 | * |
| 28 | * Design Document: http://lwn.net/Articles/253651/ | 32 | * Design Document: http://lwn.net/Articles/253651/ |
| @@ -409,6 +413,212 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp) | |||
| 409 | } | 413 | } |
| 410 | } | 414 | } |
| 411 | 415 | ||
| 416 | #ifdef CONFIG_NO_HZ | ||
| 417 | |||
| 418 | DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; | ||
| 419 | static DEFINE_PER_CPU(long, rcu_dyntick_snapshot); | ||
| 420 | static DEFINE_PER_CPU(int, rcu_update_flag); | ||
| 421 | |||
| 422 | /** | ||
| 423 | * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. | ||
| 424 | * | ||
| 425 | * If the CPU was idle with dynamic ticks active, this updates the | ||
| 426 | * dynticks_progress_counter to let the RCU handling know that the | ||
| 427 | * CPU is active. | ||
| 428 | */ | ||
| 429 | void rcu_irq_enter(void) | ||
| 430 | { | ||
| 431 | int cpu = smp_processor_id(); | ||
| 432 | |||
| 433 | if (per_cpu(rcu_update_flag, cpu)) | ||
| 434 | per_cpu(rcu_update_flag, cpu)++; | ||
| 435 | |||
| 436 | /* | ||
| 437 | * Only update if we are coming from a stopped ticks mode | ||
| 438 | * (dynticks_progress_counter is even). | ||
| 439 | */ | ||
| 440 | if (!in_interrupt() && | ||
| 441 | (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { | ||
| 442 | /* | ||
| 443 | * The following might seem like we could have a race | ||
| 444 | * with NMI/SMIs. But this really isn't a problem. | ||
| 445 | * Here we do a read/modify/write, and the race happens | ||
| 446 | * when an NMI/SMI comes in after the read and before | ||
| 447 | * the write. But NMI/SMIs will increment this counter | ||
| 448 | * twice before returning, so the zero bit will not | ||
| 449 | * be corrupted by the NMI/SMI which is the most important | ||
| 450 | * part. | ||
| 451 | * | ||
| 452 | * The only thing is that we would bring back the counter | ||
| 453 | * to a postion that it was in during the NMI/SMI. | ||
| 454 | * But the zero bit would be set, so the rest of the | ||
| 455 | * counter would again be ignored. | ||
| 456 | * | ||
| 457 | * On return from the IRQ, the counter may have the zero | ||
| 458 | * bit be 0 and the counter the same as the return from | ||
| 459 | * the NMI/SMI. If the state machine was so unlucky to | ||
| 460 | * see that, it still doesn't matter, since all | ||
| 461 | * RCU read-side critical sections on this CPU would | ||
| 462 | * have already completed. | ||
| 463 | */ | ||
| 464 | per_cpu(dynticks_progress_counter, cpu)++; | ||
| 465 | /* | ||
| 466 | * The following memory barrier ensures that any | ||
| 467 | * rcu_read_lock() primitives in the irq handler | ||
| 468 | * are seen by other CPUs to follow the above | ||
| 469 | * increment to dynticks_progress_counter. This is | ||
| 470 | * required in order for other CPUs to correctly | ||
| 471 | * determine when it is safe to advance the RCU | ||
| 472 | * grace-period state machine. | ||
| 473 | */ | ||
| 474 | smp_mb(); /* see above block comment. */ | ||
| 475 | /* | ||
| 476 | * Since we can't determine the dynamic tick mode from | ||
| 477 | * the dynticks_progress_counter after this routine, | ||
| 478 | * we use a second flag to acknowledge that we came | ||
| 479 | * from an idle state with ticks stopped. | ||
| 480 | */ | ||
| 481 | per_cpu(rcu_update_flag, cpu)++; | ||
| 482 | /* | ||
| 483 | * If we take an NMI/SMI now, they will also increment | ||
| 484 | * the rcu_update_flag, and will not update the | ||
| 485 | * dynticks_progress_counter on exit. That is for | ||
| 486 | * this IRQ to do. | ||
| 487 | */ | ||
| 488 | } | ||
| 489 | } | ||
| 490 | |||
| 491 | /** | ||
| 492 | * rcu_irq_exit - Called from exiting Hard irq context. | ||
| 493 | * | ||
| 494 | * If the CPU was idle with dynamic ticks active, update the | ||
| 495 | * dynticks_progress_counter to put let the RCU handling be | ||
| 496 | * aware that the CPU is going back to idle with no ticks. | ||
| 497 | */ | ||
| 498 | void rcu_irq_exit(void) | ||
| 499 | { | ||
| 500 | int cpu = smp_processor_id(); | ||
| 501 | |||
| 502 | /* | ||
| 503 | * rcu_update_flag is set if we interrupted the CPU | ||
| 504 | * when it was idle with ticks stopped. | ||
| 505 | * Once this occurs, we keep track of interrupt nesting | ||
| 506 | * because a NMI/SMI could also come in, and we still | ||
| 507 | * only want the IRQ that started the increment of the | ||
| 508 | * dynticks_progress_counter to be the one that modifies | ||
| 509 | * it on exit. | ||
| 510 | */ | ||
| 511 | if (per_cpu(rcu_update_flag, cpu)) { | ||
| 512 | if (--per_cpu(rcu_update_flag, cpu)) | ||
| 513 | return; | ||
| 514 | |||
| 515 | /* This must match the interrupt nesting */ | ||
| 516 | WARN_ON(in_interrupt()); | ||
| 517 | |||
| 518 | /* | ||
| 519 | * If an NMI/SMI happens now we are still | ||
| 520 | * protected by the dynticks_progress_counter being odd. | ||
| 521 | */ | ||
| 522 | |||
| 523 | /* | ||
| 524 | * The following memory barrier ensures that any | ||
| 525 | * rcu_read_unlock() primitives in the irq handler | ||
| 526 | * are seen by other CPUs to preceed the following | ||
| 527 | * increment to dynticks_progress_counter. This | ||
| 528 | * is required in order for other CPUs to determine | ||
| 529 | * when it is safe to advance the RCU grace-period | ||
| 530 | * state machine. | ||
| 531 | */ | ||
| 532 | smp_mb(); /* see above block comment. */ | ||
| 533 | per_cpu(dynticks_progress_counter, cpu)++; | ||
| 534 | WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); | ||
| 535 | } | ||
| 536 | } | ||
| 537 | |||
| 538 | static void dyntick_save_progress_counter(int cpu) | ||
| 539 | { | ||
| 540 | per_cpu(rcu_dyntick_snapshot, cpu) = | ||
| 541 | per_cpu(dynticks_progress_counter, cpu); | ||
| 542 | } | ||
| 543 | |||
| 544 | static inline int | ||
| 545 | rcu_try_flip_waitack_needed(int cpu) | ||
| 546 | { | ||
| 547 | long curr; | ||
| 548 | long snap; | ||
| 549 | |||
| 550 | curr = per_cpu(dynticks_progress_counter, cpu); | ||
| 551 | snap = per_cpu(rcu_dyntick_snapshot, cpu); | ||
| 552 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | ||
| 553 | |||
| 554 | /* | ||
| 555 | * If the CPU remained in dynticks mode for the entire time | ||
| 556 | * and didn't take any interrupts, NMIs, SMIs, or whatever, | ||
| 557 | * then it cannot be in the middle of an rcu_read_lock(), so | ||
| 558 | * the next rcu_read_lock() it executes must use the new value | ||
| 559 | * of the counter. So we can safely pretend that this CPU | ||
| 560 | * already acknowledged the counter. | ||
| 561 | */ | ||
| 562 | |||
| 563 | if ((curr == snap) && ((curr & 0x1) == 0)) | ||
| 564 | return 0; | ||
| 565 | |||
| 566 | /* | ||
| 567 | * If the CPU passed through or entered a dynticks idle phase with | ||
| 568 | * no active irq handlers, then, as above, we can safely pretend | ||
| 569 | * that this CPU already acknowledged the counter. | ||
| 570 | */ | ||
| 571 | |||
| 572 | if ((curr - snap) > 2 || (snap & 0x1) == 0) | ||
| 573 | return 0; | ||
| 574 | |||
| 575 | /* We need this CPU to explicitly acknowledge the counter flip. */ | ||
| 576 | |||
| 577 | return 1; | ||
| 578 | } | ||
| 579 | |||
| 580 | static inline int | ||
| 581 | rcu_try_flip_waitmb_needed(int cpu) | ||
| 582 | { | ||
| 583 | long curr; | ||
| 584 | long snap; | ||
| 585 | |||
| 586 | curr = per_cpu(dynticks_progress_counter, cpu); | ||
| 587 | snap = per_cpu(rcu_dyntick_snapshot, cpu); | ||
| 588 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | ||
| 589 | |||
| 590 | /* | ||
| 591 | * If the CPU remained in dynticks mode for the entire time | ||
| 592 | * and didn't take any interrupts, NMIs, SMIs, or whatever, | ||
| 593 | * then it cannot have executed an RCU read-side critical section | ||
| 594 | * during that time, so there is no need for it to execute a | ||
| 595 | * memory barrier. | ||
| 596 | */ | ||
| 597 | |||
| 598 | if ((curr == snap) && ((curr & 0x1) == 0)) | ||
| 599 | return 0; | ||
| 600 | |||
| 601 | /* | ||
| 602 | * If the CPU either entered or exited an outermost interrupt, | ||
| 603 | * SMI, NMI, or whatever handler, then we know that it executed | ||
| 604 | * a memory barrier when doing so. So we don't need another one. | ||
| 605 | */ | ||
| 606 | if (curr != snap) | ||
| 607 | return 0; | ||
| 608 | |||
| 609 | /* We need the CPU to execute a memory barrier. */ | ||
| 610 | |||
| 611 | return 1; | ||
| 612 | } | ||
| 613 | |||
| 614 | #else /* !CONFIG_NO_HZ */ | ||
| 615 | |||
| 616 | # define dyntick_save_progress_counter(cpu) do { } while (0) | ||
| 617 | # define rcu_try_flip_waitack_needed(cpu) (1) | ||
| 618 | # define rcu_try_flip_waitmb_needed(cpu) (1) | ||
| 619 | |||
| 620 | #endif /* CONFIG_NO_HZ */ | ||
| 621 | |||
| 412 | /* | 622 | /* |
| 413 | * Get here when RCU is idle. Decide whether we need to | 623 | * Get here when RCU is idle. Decide whether we need to |
| 414 | * move out of idle state, and return non-zero if so. | 624 | * move out of idle state, and return non-zero if so. |
| @@ -447,8 +657,10 @@ rcu_try_flip_idle(void) | |||
| 447 | 657 | ||
| 448 | /* Now ask each CPU for acknowledgement of the flip. */ | 658 | /* Now ask each CPU for acknowledgement of the flip. */ |
| 449 | 659 | ||
| 450 | for_each_cpu_mask(cpu, rcu_cpu_online_map) | 660 | for_each_cpu_mask(cpu, rcu_cpu_online_map) { |
| 451 | per_cpu(rcu_flip_flag, cpu) = rcu_flipped; | 661 | per_cpu(rcu_flip_flag, cpu) = rcu_flipped; |
| 662 | dyntick_save_progress_counter(cpu); | ||
| 663 | } | ||
| 452 | 664 | ||
| 453 | return 1; | 665 | return 1; |
| 454 | } | 666 | } |
| @@ -464,7 +676,8 @@ rcu_try_flip_waitack(void) | |||
| 464 | 676 | ||
| 465 | RCU_TRACE_ME(rcupreempt_trace_try_flip_a1); | 677 | RCU_TRACE_ME(rcupreempt_trace_try_flip_a1); |
| 466 | for_each_cpu_mask(cpu, rcu_cpu_online_map) | 678 | for_each_cpu_mask(cpu, rcu_cpu_online_map) |
| 467 | if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { | 679 | if (rcu_try_flip_waitack_needed(cpu) && |
| 680 | per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { | ||
| 468 | RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1); | 681 | RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1); |
| 469 | return 0; | 682 | return 0; |
| 470 | } | 683 | } |
| @@ -509,8 +722,10 @@ rcu_try_flip_waitzero(void) | |||
| 509 | smp_mb(); /* ^^^^^^^^^^^^ */ | 722 | smp_mb(); /* ^^^^^^^^^^^^ */ |
| 510 | 723 | ||
| 511 | /* Call for a memory barrier from each CPU. */ | 724 | /* Call for a memory barrier from each CPU. */ |
| 512 | for_each_cpu_mask(cpu, rcu_cpu_online_map) | 725 | for_each_cpu_mask(cpu, rcu_cpu_online_map) { |
| 513 | per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed; | 726 | per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed; |
| 727 | dyntick_save_progress_counter(cpu); | ||
| 728 | } | ||
| 514 | 729 | ||
| 515 | RCU_TRACE_ME(rcupreempt_trace_try_flip_z2); | 730 | RCU_TRACE_ME(rcupreempt_trace_try_flip_z2); |
| 516 | return 1; | 731 | return 1; |
| @@ -528,7 +743,8 @@ rcu_try_flip_waitmb(void) | |||
| 528 | 743 | ||
| 529 | RCU_TRACE_ME(rcupreempt_trace_try_flip_m1); | 744 | RCU_TRACE_ME(rcupreempt_trace_try_flip_m1); |
| 530 | for_each_cpu_mask(cpu, rcu_cpu_online_map) | 745 | for_each_cpu_mask(cpu, rcu_cpu_online_map) |
| 531 | if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { | 746 | if (rcu_try_flip_waitmb_needed(cpu) && |
| 747 | per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { | ||
| 532 | RCU_TRACE_ME(rcupreempt_trace_try_flip_me1); | 748 | RCU_TRACE_ME(rcupreempt_trace_try_flip_me1); |
| 533 | return 0; | 749 | return 0; |
| 534 | } | 750 | } |
| @@ -702,8 +918,9 @@ void rcu_offline_cpu(int cpu) | |||
| 702 | * fix. | 918 | * fix. |
| 703 | */ | 919 | */ |
| 704 | 920 | ||
| 921 | local_irq_save(flags); | ||
| 705 | rdp = RCU_DATA_ME(); | 922 | rdp = RCU_DATA_ME(); |
| 706 | spin_lock_irqsave(&rdp->lock, flags); | 923 | spin_lock(&rdp->lock); |
| 707 | *rdp->nexttail = list; | 924 | *rdp->nexttail = list; |
| 708 | if (list) | 925 | if (list) |
| 709 | rdp->nexttail = tail; | 926 | rdp->nexttail = tail; |
| @@ -735,9 +952,11 @@ static void rcu_process_callbacks(struct softirq_action *unused) | |||
| 735 | { | 952 | { |
| 736 | unsigned long flags; | 953 | unsigned long flags; |
| 737 | struct rcu_head *next, *list; | 954 | struct rcu_head *next, *list; |
| 738 | struct rcu_data *rdp = RCU_DATA_ME(); | 955 | struct rcu_data *rdp; |
| 739 | 956 | ||
| 740 | spin_lock_irqsave(&rdp->lock, flags); | 957 | local_irq_save(flags); |
| 958 | rdp = RCU_DATA_ME(); | ||
| 959 | spin_lock(&rdp->lock); | ||
| 741 | list = rdp->donelist; | 960 | list = rdp->donelist; |
| 742 | if (list == NULL) { | 961 | if (list == NULL) { |
| 743 | spin_unlock_irqrestore(&rdp->lock, flags); | 962 | spin_unlock_irqrestore(&rdp->lock, flags); |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index 16cbec2d5d60..efbfc0fc232f 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
| @@ -113,6 +113,7 @@ ssize_t res_counter_write(struct res_counter *counter, int member, | |||
| 113 | 113 | ||
| 114 | ret = -EINVAL; | 114 | ret = -EINVAL; |
| 115 | 115 | ||
| 116 | strstrip(buf); | ||
| 116 | if (write_strategy) { | 117 | if (write_strategy) { |
| 117 | if (write_strategy(buf, &tmp)) { | 118 | if (write_strategy(buf, &tmp)) { |
| 118 | goto out_free; | 119 | goto out_free; |
diff --git a/kernel/sched.c b/kernel/sched.c index f28f19e65b59..b02e4fc25645 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -174,41 +174,6 @@ struct task_group { | |||
| 174 | struct sched_entity **se; | 174 | struct sched_entity **se; |
| 175 | /* runqueue "owned" by this group on each cpu */ | 175 | /* runqueue "owned" by this group on each cpu */ |
| 176 | struct cfs_rq **cfs_rq; | 176 | struct cfs_rq **cfs_rq; |
| 177 | |||
| 178 | /* | ||
| 179 | * shares assigned to a task group governs how much of cpu bandwidth | ||
| 180 | * is allocated to the group. The more shares a group has, the more is | ||
| 181 | * the cpu bandwidth allocated to it. | ||
| 182 | * | ||
| 183 | * For ex, lets say that there are three task groups, A, B and C which | ||
| 184 | * have been assigned shares 1000, 2000 and 3000 respectively. Then, | ||
| 185 | * cpu bandwidth allocated by the scheduler to task groups A, B and C | ||
| 186 | * should be: | ||
| 187 | * | ||
| 188 | * Bw(A) = 1000/(1000+2000+3000) * 100 = 16.66% | ||
| 189 | * Bw(B) = 2000/(1000+2000+3000) * 100 = 33.33% | ||
| 190 | * Bw(C) = 3000/(1000+2000+3000) * 100 = 50% | ||
| 191 | * | ||
| 192 | * The weight assigned to a task group's schedulable entities on every | ||
| 193 | * cpu (task_group.se[a_cpu]->load.weight) is derived from the task | ||
| 194 | * group's shares. For ex: lets say that task group A has been | ||
| 195 | * assigned shares of 1000 and there are two CPUs in a system. Then, | ||
| 196 | * | ||
| 197 | * tg_A->se[0]->load.weight = tg_A->se[1]->load.weight = 1000; | ||
| 198 | * | ||
| 199 | * Note: It's not necessary that each of a task's group schedulable | ||
| 200 | * entity have the same weight on all CPUs. If the group | ||
| 201 | * has 2 of its tasks on CPU0 and 1 task on CPU1, then a | ||
| 202 | * better distribution of weight could be: | ||
| 203 | * | ||
| 204 | * tg_A->se[0]->load.weight = 2/3 * 2000 = 1333 | ||
| 205 | * tg_A->se[1]->load.weight = 1/2 * 2000 = 667 | ||
| 206 | * | ||
| 207 | * rebalance_shares() is responsible for distributing the shares of a | ||
| 208 | * task groups like this among the group's schedulable entities across | ||
| 209 | * cpus. | ||
| 210 | * | ||
| 211 | */ | ||
| 212 | unsigned long shares; | 177 | unsigned long shares; |
| 213 | #endif | 178 | #endif |
| 214 | 179 | ||
| @@ -250,22 +215,12 @@ static DEFINE_SPINLOCK(task_group_lock); | |||
| 250 | static DEFINE_MUTEX(doms_cur_mutex); | 215 | static DEFINE_MUTEX(doms_cur_mutex); |
| 251 | 216 | ||
| 252 | #ifdef CONFIG_FAIR_GROUP_SCHED | 217 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 253 | #ifdef CONFIG_SMP | ||
| 254 | /* kernel thread that runs rebalance_shares() periodically */ | ||
| 255 | static struct task_struct *lb_monitor_task; | ||
| 256 | static int load_balance_monitor(void *unused); | ||
| 257 | #endif | ||
| 258 | |||
| 259 | static void set_se_shares(struct sched_entity *se, unsigned long shares); | ||
| 260 | |||
| 261 | #ifdef CONFIG_USER_SCHED | 218 | #ifdef CONFIG_USER_SCHED |
| 262 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | 219 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) |
| 263 | #else | 220 | #else |
| 264 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | 221 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD |
| 265 | #endif | 222 | #endif |
| 266 | 223 | ||
| 267 | #define MIN_GROUP_SHARES 2 | ||
| 268 | |||
| 269 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; | 224 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; |
| 270 | #endif | 225 | #endif |
| 271 | 226 | ||
| @@ -668,6 +623,8 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
| 668 | */ | 623 | */ |
| 669 | unsigned int sysctl_sched_rt_period = 1000000; | 624 | unsigned int sysctl_sched_rt_period = 1000000; |
| 670 | 625 | ||
| 626 | static __read_mostly int scheduler_running; | ||
| 627 | |||
| 671 | /* | 628 | /* |
| 672 | * part of the period that we allow rt tasks to run in us. | 629 | * part of the period that we allow rt tasks to run in us. |
| 673 | * default: 0.95s | 630 | * default: 0.95s |
| @@ -689,14 +646,16 @@ unsigned long long cpu_clock(int cpu) | |||
| 689 | unsigned long flags; | 646 | unsigned long flags; |
| 690 | struct rq *rq; | 647 | struct rq *rq; |
| 691 | 648 | ||
| 692 | local_irq_save(flags); | ||
| 693 | rq = cpu_rq(cpu); | ||
| 694 | /* | 649 | /* |
| 695 | * Only call sched_clock() if the scheduler has already been | 650 | * Only call sched_clock() if the scheduler has already been |
| 696 | * initialized (some code might call cpu_clock() very early): | 651 | * initialized (some code might call cpu_clock() very early): |
| 697 | */ | 652 | */ |
| 698 | if (rq->idle) | 653 | if (unlikely(!scheduler_running)) |
| 699 | update_rq_clock(rq); | 654 | return 0; |
| 655 | |||
| 656 | local_irq_save(flags); | ||
| 657 | rq = cpu_rq(cpu); | ||
| 658 | update_rq_clock(rq); | ||
| 700 | now = rq->clock; | 659 | now = rq->clock; |
| 701 | local_irq_restore(flags); | 660 | local_irq_restore(flags); |
| 702 | 661 | ||
| @@ -1241,16 +1200,6 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime); | |||
| 1241 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | 1200 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} |
| 1242 | #endif | 1201 | #endif |
| 1243 | 1202 | ||
| 1244 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) | ||
| 1245 | { | ||
| 1246 | update_load_add(&rq->load, load); | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | static inline void dec_cpu_load(struct rq *rq, unsigned long load) | ||
| 1250 | { | ||
| 1251 | update_load_sub(&rq->load, load); | ||
| 1252 | } | ||
| 1253 | |||
| 1254 | #ifdef CONFIG_SMP | 1203 | #ifdef CONFIG_SMP |
| 1255 | static unsigned long source_load(int cpu, int type); | 1204 | static unsigned long source_load(int cpu, int type); |
| 1256 | static unsigned long target_load(int cpu, int type); | 1205 | static unsigned long target_load(int cpu, int type); |
| @@ -1268,14 +1217,26 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | |||
| 1268 | 1217 | ||
| 1269 | #define sched_class_highest (&rt_sched_class) | 1218 | #define sched_class_highest (&rt_sched_class) |
| 1270 | 1219 | ||
| 1271 | static void inc_nr_running(struct rq *rq) | 1220 | static inline void inc_load(struct rq *rq, const struct task_struct *p) |
| 1221 | { | ||
| 1222 | update_load_add(&rq->load, p->se.load.weight); | ||
| 1223 | } | ||
| 1224 | |||
| 1225 | static inline void dec_load(struct rq *rq, const struct task_struct *p) | ||
| 1226 | { | ||
| 1227 | update_load_sub(&rq->load, p->se.load.weight); | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | static void inc_nr_running(struct task_struct *p, struct rq *rq) | ||
| 1272 | { | 1231 | { |
| 1273 | rq->nr_running++; | 1232 | rq->nr_running++; |
| 1233 | inc_load(rq, p); | ||
| 1274 | } | 1234 | } |
| 1275 | 1235 | ||
| 1276 | static void dec_nr_running(struct rq *rq) | 1236 | static void dec_nr_running(struct task_struct *p, struct rq *rq) |
| 1277 | { | 1237 | { |
| 1278 | rq->nr_running--; | 1238 | rq->nr_running--; |
| 1239 | dec_load(rq, p); | ||
| 1279 | } | 1240 | } |
| 1280 | 1241 | ||
| 1281 | static void set_load_weight(struct task_struct *p) | 1242 | static void set_load_weight(struct task_struct *p) |
| @@ -1367,7 +1328,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) | |||
| 1367 | rq->nr_uninterruptible--; | 1328 | rq->nr_uninterruptible--; |
| 1368 | 1329 | ||
| 1369 | enqueue_task(rq, p, wakeup); | 1330 | enqueue_task(rq, p, wakeup); |
| 1370 | inc_nr_running(rq); | 1331 | inc_nr_running(p, rq); |
| 1371 | } | 1332 | } |
| 1372 | 1333 | ||
| 1373 | /* | 1334 | /* |
| @@ -1379,7 +1340,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) | |||
| 1379 | rq->nr_uninterruptible++; | 1340 | rq->nr_uninterruptible++; |
| 1380 | 1341 | ||
| 1381 | dequeue_task(rq, p, sleep); | 1342 | dequeue_task(rq, p, sleep); |
| 1382 | dec_nr_running(rq); | 1343 | dec_nr_running(p, rq); |
| 1383 | } | 1344 | } |
| 1384 | 1345 | ||
| 1385 | /** | 1346 | /** |
| @@ -1831,6 +1792,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
| 1831 | long old_state; | 1792 | long old_state; |
| 1832 | struct rq *rq; | 1793 | struct rq *rq; |
| 1833 | 1794 | ||
| 1795 | smp_wmb(); | ||
| 1834 | rq = task_rq_lock(p, &flags); | 1796 | rq = task_rq_lock(p, &flags); |
| 1835 | old_state = p->state; | 1797 | old_state = p->state; |
| 1836 | if (!(old_state & state)) | 1798 | if (!(old_state & state)) |
| @@ -2018,7 +1980,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
| 2018 | * management (if any): | 1980 | * management (if any): |
| 2019 | */ | 1981 | */ |
| 2020 | p->sched_class->task_new(rq, p); | 1982 | p->sched_class->task_new(rq, p); |
| 2021 | inc_nr_running(rq); | 1983 | inc_nr_running(p, rq); |
| 2022 | } | 1984 | } |
| 2023 | check_preempt_curr(rq, p); | 1985 | check_preempt_curr(rq, p); |
| 2024 | #ifdef CONFIG_SMP | 1986 | #ifdef CONFIG_SMP |
| @@ -3766,7 +3728,7 @@ void scheduler_tick(void) | |||
| 3766 | 3728 | ||
| 3767 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) | 3729 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) |
| 3768 | 3730 | ||
| 3769 | void add_preempt_count(int val) | 3731 | void __kprobes add_preempt_count(int val) |
| 3770 | { | 3732 | { |
| 3771 | /* | 3733 | /* |
| 3772 | * Underflow? | 3734 | * Underflow? |
| @@ -3782,7 +3744,7 @@ void add_preempt_count(int val) | |||
| 3782 | } | 3744 | } |
| 3783 | EXPORT_SYMBOL(add_preempt_count); | 3745 | EXPORT_SYMBOL(add_preempt_count); |
| 3784 | 3746 | ||
| 3785 | void sub_preempt_count(int val) | 3747 | void __kprobes sub_preempt_count(int val) |
| 3786 | { | 3748 | { |
| 3787 | /* | 3749 | /* |
| 3788 | * Underflow? | 3750 | * Underflow? |
| @@ -3884,7 +3846,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev) | |||
| 3884 | asmlinkage void __sched schedule(void) | 3846 | asmlinkage void __sched schedule(void) |
| 3885 | { | 3847 | { |
| 3886 | struct task_struct *prev, *next; | 3848 | struct task_struct *prev, *next; |
| 3887 | long *switch_count; | 3849 | unsigned long *switch_count; |
| 3888 | struct rq *rq; | 3850 | struct rq *rq; |
| 3889 | int cpu; | 3851 | int cpu; |
| 3890 | 3852 | ||
| @@ -4357,8 +4319,10 @@ void set_user_nice(struct task_struct *p, long nice) | |||
| 4357 | goto out_unlock; | 4319 | goto out_unlock; |
| 4358 | } | 4320 | } |
| 4359 | on_rq = p->se.on_rq; | 4321 | on_rq = p->se.on_rq; |
| 4360 | if (on_rq) | 4322 | if (on_rq) { |
| 4361 | dequeue_task(rq, p, 0); | 4323 | dequeue_task(rq, p, 0); |
| 4324 | dec_load(rq, p); | ||
| 4325 | } | ||
| 4362 | 4326 | ||
| 4363 | p->static_prio = NICE_TO_PRIO(nice); | 4327 | p->static_prio = NICE_TO_PRIO(nice); |
| 4364 | set_load_weight(p); | 4328 | set_load_weight(p); |
| @@ -4368,6 +4332,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
| 4368 | 4332 | ||
| 4369 | if (on_rq) { | 4333 | if (on_rq) { |
| 4370 | enqueue_task(rq, p, 0); | 4334 | enqueue_task(rq, p, 0); |
| 4335 | inc_load(rq, p); | ||
| 4371 | /* | 4336 | /* |
| 4372 | * If the task increased its priority or is running and | 4337 | * If the task increased its priority or is running and |
| 4373 | * lowered its priority, then reschedule its CPU: | 4338 | * lowered its priority, then reschedule its CPU: |
| @@ -4457,7 +4422,7 @@ int task_nice(const struct task_struct *p) | |||
| 4457 | { | 4422 | { |
| 4458 | return TASK_NICE(p); | 4423 | return TASK_NICE(p); |
| 4459 | } | 4424 | } |
| 4460 | EXPORT_SYMBOL_GPL(task_nice); | 4425 | EXPORT_SYMBOL(task_nice); |
| 4461 | 4426 | ||
| 4462 | /** | 4427 | /** |
| 4463 | * idle_cpu - is a given cpu idle currently? | 4428 | * idle_cpu - is a given cpu idle currently? |
| @@ -5135,7 +5100,7 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) | |||
| 5135 | time_slice = 0; | 5100 | time_slice = 0; |
| 5136 | if (p->policy == SCHED_RR) { | 5101 | if (p->policy == SCHED_RR) { |
| 5137 | time_slice = DEF_TIMESLICE; | 5102 | time_slice = DEF_TIMESLICE; |
| 5138 | } else { | 5103 | } else if (p->policy != SCHED_FIFO) { |
| 5139 | struct sched_entity *se = &p->se; | 5104 | struct sched_entity *se = &p->se; |
| 5140 | unsigned long flags; | 5105 | unsigned long flags; |
| 5141 | struct rq *rq; | 5106 | struct rq *rq; |
| @@ -5848,6 +5813,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 5848 | /* Must be high prio: stop_machine expects to yield to it. */ | 5813 | /* Must be high prio: stop_machine expects to yield to it. */ |
| 5849 | rq = task_rq_lock(p, &flags); | 5814 | rq = task_rq_lock(p, &flags); |
| 5850 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); | 5815 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); |
| 5816 | |||
| 5817 | /* Update our root-domain */ | ||
| 5818 | if (rq->rd) { | ||
| 5819 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); | ||
| 5820 | cpu_set(cpu, rq->rd->online); | ||
| 5821 | } | ||
| 5822 | |||
| 5851 | task_rq_unlock(rq, &flags); | 5823 | task_rq_unlock(rq, &flags); |
| 5852 | cpu_rq(cpu)->migration_thread = p; | 5824 | cpu_rq(cpu)->migration_thread = p; |
| 5853 | break; | 5825 | break; |
| @@ -5856,15 +5828,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 5856 | case CPU_ONLINE_FROZEN: | 5828 | case CPU_ONLINE_FROZEN: |
| 5857 | /* Strictly unnecessary, as first user will wake it. */ | 5829 | /* Strictly unnecessary, as first user will wake it. */ |
| 5858 | wake_up_process(cpu_rq(cpu)->migration_thread); | 5830 | wake_up_process(cpu_rq(cpu)->migration_thread); |
| 5859 | |||
| 5860 | /* Update our root-domain */ | ||
| 5861 | rq = cpu_rq(cpu); | ||
| 5862 | spin_lock_irqsave(&rq->lock, flags); | ||
| 5863 | if (rq->rd) { | ||
| 5864 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); | ||
| 5865 | cpu_set(cpu, rq->rd->online); | ||
| 5866 | } | ||
| 5867 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 5868 | break; | 5831 | break; |
| 5869 | 5832 | ||
| 5870 | #ifdef CONFIG_HOTPLUG_CPU | 5833 | #ifdef CONFIG_HOTPLUG_CPU |
| @@ -6140,8 +6103,6 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
| 6140 | rq->rd = rd; | 6103 | rq->rd = rd; |
| 6141 | 6104 | ||
| 6142 | cpu_set(rq->cpu, rd->span); | 6105 | cpu_set(rq->cpu, rd->span); |
| 6143 | if (cpu_isset(rq->cpu, cpu_online_map)) | ||
| 6144 | cpu_set(rq->cpu, rd->online); | ||
| 6145 | 6106 | ||
| 6146 | for (class = sched_class_highest; class; class = class->next) { | 6107 | for (class = sched_class_highest; class; class = class->next) { |
| 6147 | if (class->join_domain) | 6108 | if (class->join_domain) |
| @@ -7082,21 +7043,6 @@ void __init sched_init_smp(void) | |||
| 7082 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) | 7043 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) |
| 7083 | BUG(); | 7044 | BUG(); |
| 7084 | sched_init_granularity(); | 7045 | sched_init_granularity(); |
| 7085 | |||
| 7086 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 7087 | if (nr_cpu_ids == 1) | ||
| 7088 | return; | ||
| 7089 | |||
| 7090 | lb_monitor_task = kthread_create(load_balance_monitor, NULL, | ||
| 7091 | "group_balance"); | ||
| 7092 | if (!IS_ERR(lb_monitor_task)) { | ||
| 7093 | lb_monitor_task->flags |= PF_NOFREEZE; | ||
| 7094 | wake_up_process(lb_monitor_task); | ||
| 7095 | } else { | ||
| 7096 | printk(KERN_ERR "Could not create load balance monitor thread" | ||
| 7097 | "(error = %ld) \n", PTR_ERR(lb_monitor_task)); | ||
| 7098 | } | ||
| 7099 | #endif | ||
| 7100 | } | 7046 | } |
| 7101 | #else | 7047 | #else |
| 7102 | void __init sched_init_smp(void) | 7048 | void __init sched_init_smp(void) |
| @@ -7283,6 +7229,8 @@ void __init sched_init(void) | |||
| 7283 | * During early bootup we pretend to be a normal task: | 7229 | * During early bootup we pretend to be a normal task: |
| 7284 | */ | 7230 | */ |
| 7285 | current->sched_class = &fair_sched_class; | 7231 | current->sched_class = &fair_sched_class; |
| 7232 | |||
| 7233 | scheduler_running = 1; | ||
| 7286 | } | 7234 | } |
| 7287 | 7235 | ||
| 7288 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | 7236 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP |
| @@ -7417,157 +7365,6 @@ void set_curr_task(int cpu, struct task_struct *p) | |||
| 7417 | 7365 | ||
| 7418 | #ifdef CONFIG_GROUP_SCHED | 7366 | #ifdef CONFIG_GROUP_SCHED |
| 7419 | 7367 | ||
| 7420 | #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP | ||
| 7421 | /* | ||
| 7422 | * distribute shares of all task groups among their schedulable entities, | ||
| 7423 | * to reflect load distribution across cpus. | ||
| 7424 | */ | ||
| 7425 | static int rebalance_shares(struct sched_domain *sd, int this_cpu) | ||
| 7426 | { | ||
| 7427 | struct cfs_rq *cfs_rq; | ||
| 7428 | struct rq *rq = cpu_rq(this_cpu); | ||
| 7429 | cpumask_t sdspan = sd->span; | ||
| 7430 | int balanced = 1; | ||
| 7431 | |||
| 7432 | /* Walk thr' all the task groups that we have */ | ||
| 7433 | for_each_leaf_cfs_rq(rq, cfs_rq) { | ||
| 7434 | int i; | ||
| 7435 | unsigned long total_load = 0, total_shares; | ||
| 7436 | struct task_group *tg = cfs_rq->tg; | ||
| 7437 | |||
| 7438 | /* Gather total task load of this group across cpus */ | ||
| 7439 | for_each_cpu_mask(i, sdspan) | ||
| 7440 | total_load += tg->cfs_rq[i]->load.weight; | ||
| 7441 | |||
| 7442 | /* Nothing to do if this group has no load */ | ||
| 7443 | if (!total_load) | ||
| 7444 | continue; | ||
| 7445 | |||
| 7446 | /* | ||
| 7447 | * tg->shares represents the number of cpu shares the task group | ||
| 7448 | * is eligible to hold on a single cpu. On N cpus, it is | ||
| 7449 | * eligible to hold (N * tg->shares) number of cpu shares. | ||
| 7450 | */ | ||
| 7451 | total_shares = tg->shares * cpus_weight(sdspan); | ||
| 7452 | |||
| 7453 | /* | ||
| 7454 | * redistribute total_shares across cpus as per the task load | ||
| 7455 | * distribution. | ||
| 7456 | */ | ||
| 7457 | for_each_cpu_mask(i, sdspan) { | ||
| 7458 | unsigned long local_load, local_shares; | ||
| 7459 | |||
| 7460 | local_load = tg->cfs_rq[i]->load.weight; | ||
| 7461 | local_shares = (local_load * total_shares) / total_load; | ||
| 7462 | if (!local_shares) | ||
| 7463 | local_shares = MIN_GROUP_SHARES; | ||
| 7464 | if (local_shares == tg->se[i]->load.weight) | ||
| 7465 | continue; | ||
| 7466 | |||
| 7467 | spin_lock_irq(&cpu_rq(i)->lock); | ||
| 7468 | set_se_shares(tg->se[i], local_shares); | ||
| 7469 | spin_unlock_irq(&cpu_rq(i)->lock); | ||
| 7470 | balanced = 0; | ||
| 7471 | } | ||
| 7472 | } | ||
| 7473 | |||
| 7474 | return balanced; | ||
| 7475 | } | ||
| 7476 | |||
| 7477 | /* | ||
| 7478 | * How frequently should we rebalance_shares() across cpus? | ||
| 7479 | * | ||
| 7480 | * The more frequently we rebalance shares, the more accurate is the fairness | ||
| 7481 | * of cpu bandwidth distribution between task groups. However higher frequency | ||
| 7482 | * also implies increased scheduling overhead. | ||
| 7483 | * | ||
| 7484 | * sysctl_sched_min_bal_int_shares represents the minimum interval between | ||
| 7485 | * consecutive calls to rebalance_shares() in the same sched domain. | ||
| 7486 | * | ||
| 7487 | * sysctl_sched_max_bal_int_shares represents the maximum interval between | ||
| 7488 | * consecutive calls to rebalance_shares() in the same sched domain. | ||
| 7489 | * | ||
| 7490 | * These settings allows for the appropriate trade-off between accuracy of | ||
| 7491 | * fairness and the associated overhead. | ||
| 7492 | * | ||
| 7493 | */ | ||
| 7494 | |||
| 7495 | /* default: 8ms, units: milliseconds */ | ||
| 7496 | const_debug unsigned int sysctl_sched_min_bal_int_shares = 8; | ||
| 7497 | |||
| 7498 | /* default: 128ms, units: milliseconds */ | ||
| 7499 | const_debug unsigned int sysctl_sched_max_bal_int_shares = 128; | ||
| 7500 | |||
| 7501 | /* kernel thread that runs rebalance_shares() periodically */ | ||
| 7502 | static int load_balance_monitor(void *unused) | ||
| 7503 | { | ||
| 7504 | unsigned int timeout = sysctl_sched_min_bal_int_shares; | ||
| 7505 | struct sched_param schedparm; | ||
| 7506 | int ret; | ||
| 7507 | |||
| 7508 | /* | ||
| 7509 | * We don't want this thread's execution to be limited by the shares | ||
| 7510 | * assigned to default group (init_task_group). Hence make it run | ||
| 7511 | * as a SCHED_RR RT task at the lowest priority. | ||
| 7512 | */ | ||
| 7513 | schedparm.sched_priority = 1; | ||
| 7514 | ret = sched_setscheduler(current, SCHED_RR, &schedparm); | ||
| 7515 | if (ret) | ||
| 7516 | printk(KERN_ERR "Couldn't set SCHED_RR policy for load balance" | ||
| 7517 | " monitor thread (error = %d) \n", ret); | ||
| 7518 | |||
| 7519 | while (!kthread_should_stop()) { | ||
| 7520 | int i, cpu, balanced = 1; | ||
| 7521 | |||
| 7522 | /* Prevent cpus going down or coming up */ | ||
| 7523 | get_online_cpus(); | ||
| 7524 | /* lockout changes to doms_cur[] array */ | ||
| 7525 | lock_doms_cur(); | ||
| 7526 | /* | ||
| 7527 | * Enter a rcu read-side critical section to safely walk rq->sd | ||
| 7528 | * chain on various cpus and to walk task group list | ||
| 7529 | * (rq->leaf_cfs_rq_list) in rebalance_shares(). | ||
| 7530 | */ | ||
| 7531 | rcu_read_lock(); | ||
| 7532 | |||
| 7533 | for (i = 0; i < ndoms_cur; i++) { | ||
| 7534 | cpumask_t cpumap = doms_cur[i]; | ||
| 7535 | struct sched_domain *sd = NULL, *sd_prev = NULL; | ||
| 7536 | |||
| 7537 | cpu = first_cpu(cpumap); | ||
| 7538 | |||
| 7539 | /* Find the highest domain at which to balance shares */ | ||
| 7540 | for_each_domain(cpu, sd) { | ||
| 7541 | if (!(sd->flags & SD_LOAD_BALANCE)) | ||
| 7542 | continue; | ||
| 7543 | sd_prev = sd; | ||
| 7544 | } | ||
| 7545 | |||
| 7546 | sd = sd_prev; | ||
| 7547 | /* sd == NULL? No load balance reqd in this domain */ | ||
| 7548 | if (!sd) | ||
| 7549 | continue; | ||
| 7550 | |||
| 7551 | balanced &= rebalance_shares(sd, cpu); | ||
| 7552 | } | ||
| 7553 | |||
| 7554 | rcu_read_unlock(); | ||
| 7555 | |||
| 7556 | unlock_doms_cur(); | ||
| 7557 | put_online_cpus(); | ||
| 7558 | |||
| 7559 | if (!balanced) | ||
| 7560 | timeout = sysctl_sched_min_bal_int_shares; | ||
| 7561 | else if (timeout < sysctl_sched_max_bal_int_shares) | ||
| 7562 | timeout *= 2; | ||
| 7563 | |||
| 7564 | msleep_interruptible(timeout); | ||
| 7565 | } | ||
| 7566 | |||
| 7567 | return 0; | ||
| 7568 | } | ||
| 7569 | #endif /* CONFIG_SMP */ | ||
| 7570 | |||
| 7571 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7368 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 7572 | static void free_fair_sched_group(struct task_group *tg) | 7369 | static void free_fair_sched_group(struct task_group *tg) |
| 7573 | { | 7370 | { |
| @@ -7824,6 +7621,11 @@ void sched_move_task(struct task_struct *tsk) | |||
| 7824 | 7621 | ||
| 7825 | set_task_rq(tsk, task_cpu(tsk)); | 7622 | set_task_rq(tsk, task_cpu(tsk)); |
| 7826 | 7623 | ||
| 7624 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 7625 | if (tsk->sched_class->moved_group) | ||
| 7626 | tsk->sched_class->moved_group(tsk); | ||
| 7627 | #endif | ||
| 7628 | |||
| 7827 | if (on_rq) { | 7629 | if (on_rq) { |
| 7828 | if (unlikely(running)) | 7630 | if (unlikely(running)) |
| 7829 | tsk->sched_class->set_curr_task(rq); | 7631 | tsk->sched_class->set_curr_task(rq); |
| @@ -7834,29 +7636,25 @@ void sched_move_task(struct task_struct *tsk) | |||
| 7834 | } | 7636 | } |
| 7835 | 7637 | ||
| 7836 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7638 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 7837 | /* rq->lock to be locked by caller */ | ||
| 7838 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | 7639 | static void set_se_shares(struct sched_entity *se, unsigned long shares) |
| 7839 | { | 7640 | { |
| 7840 | struct cfs_rq *cfs_rq = se->cfs_rq; | 7641 | struct cfs_rq *cfs_rq = se->cfs_rq; |
| 7841 | struct rq *rq = cfs_rq->rq; | 7642 | struct rq *rq = cfs_rq->rq; |
| 7842 | int on_rq; | 7643 | int on_rq; |
| 7843 | 7644 | ||
| 7844 | if (!shares) | 7645 | spin_lock_irq(&rq->lock); |
| 7845 | shares = MIN_GROUP_SHARES; | ||
| 7846 | 7646 | ||
| 7847 | on_rq = se->on_rq; | 7647 | on_rq = se->on_rq; |
| 7848 | if (on_rq) { | 7648 | if (on_rq) |
| 7849 | dequeue_entity(cfs_rq, se, 0); | 7649 | dequeue_entity(cfs_rq, se, 0); |
| 7850 | dec_cpu_load(rq, se->load.weight); | ||
| 7851 | } | ||
| 7852 | 7650 | ||
| 7853 | se->load.weight = shares; | 7651 | se->load.weight = shares; |
| 7854 | se->load.inv_weight = div64_64((1ULL<<32), shares); | 7652 | se->load.inv_weight = div64_64((1ULL<<32), shares); |
| 7855 | 7653 | ||
| 7856 | if (on_rq) { | 7654 | if (on_rq) |
| 7857 | enqueue_entity(cfs_rq, se, 0); | 7655 | enqueue_entity(cfs_rq, se, 0); |
| 7858 | inc_cpu_load(rq, se->load.weight); | 7656 | |
| 7859 | } | 7657 | spin_unlock_irq(&rq->lock); |
| 7860 | } | 7658 | } |
| 7861 | 7659 | ||
| 7862 | static DEFINE_MUTEX(shares_mutex); | 7660 | static DEFINE_MUTEX(shares_mutex); |
| @@ -7866,18 +7664,18 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
| 7866 | int i; | 7664 | int i; |
| 7867 | unsigned long flags; | 7665 | unsigned long flags; |
| 7868 | 7666 | ||
| 7667 | /* | ||
| 7668 | * A weight of 0 or 1 can cause arithmetics problems. | ||
| 7669 | * (The default weight is 1024 - so there's no practical | ||
| 7670 | * limitation from this.) | ||
| 7671 | */ | ||
| 7672 | if (shares < 2) | ||
| 7673 | shares = 2; | ||
| 7674 | |||
| 7869 | mutex_lock(&shares_mutex); | 7675 | mutex_lock(&shares_mutex); |
| 7870 | if (tg->shares == shares) | 7676 | if (tg->shares == shares) |
| 7871 | goto done; | 7677 | goto done; |
| 7872 | 7678 | ||
| 7873 | if (shares < MIN_GROUP_SHARES) | ||
| 7874 | shares = MIN_GROUP_SHARES; | ||
| 7875 | |||
| 7876 | /* | ||
| 7877 | * Prevent any load balance activity (rebalance_shares, | ||
| 7878 | * load_balance_fair) from referring to this group first, | ||
| 7879 | * by taking it off the rq->leaf_cfs_rq_list on each cpu. | ||
| 7880 | */ | ||
| 7881 | spin_lock_irqsave(&task_group_lock, flags); | 7679 | spin_lock_irqsave(&task_group_lock, flags); |
| 7882 | for_each_possible_cpu(i) | 7680 | for_each_possible_cpu(i) |
| 7883 | unregister_fair_sched_group(tg, i); | 7681 | unregister_fair_sched_group(tg, i); |
| @@ -7891,11 +7689,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
| 7891 | * w/o tripping rebalance_share or load_balance_fair. | 7689 | * w/o tripping rebalance_share or load_balance_fair. |
| 7892 | */ | 7690 | */ |
| 7893 | tg->shares = shares; | 7691 | tg->shares = shares; |
| 7894 | for_each_possible_cpu(i) { | 7692 | for_each_possible_cpu(i) |
| 7895 | spin_lock_irq(&cpu_rq(i)->lock); | ||
| 7896 | set_se_shares(tg->se[i], shares); | 7693 | set_se_shares(tg->se[i], shares); |
| 7897 | spin_unlock_irq(&cpu_rq(i)->lock); | ||
| 7898 | } | ||
| 7899 | 7694 | ||
| 7900 | /* | 7695 | /* |
| 7901 | * Enable load balance activity on this group, by inserting it back on | 7696 | * Enable load balance activity on this group, by inserting it back on |
| @@ -7927,9 +7722,7 @@ static unsigned long to_ratio(u64 period, u64 runtime) | |||
| 7927 | if (runtime == RUNTIME_INF) | 7722 | if (runtime == RUNTIME_INF) |
| 7928 | return 1ULL << 16; | 7723 | return 1ULL << 16; |
| 7929 | 7724 | ||
| 7930 | runtime *= (1ULL << 16); | 7725 | return div64_64(runtime << 16, period); |
| 7931 | div64_64(runtime, period); | ||
| 7932 | return runtime; | ||
| 7933 | } | 7726 | } |
| 7934 | 7727 | ||
| 7935 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | 7728 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) |
| @@ -7953,25 +7746,40 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | |||
| 7953 | return total + to_ratio(period, runtime) < global_ratio; | 7746 | return total + to_ratio(period, runtime) < global_ratio; |
| 7954 | } | 7747 | } |
| 7955 | 7748 | ||
| 7749 | /* Must be called with tasklist_lock held */ | ||
| 7750 | static inline int tg_has_rt_tasks(struct task_group *tg) | ||
| 7751 | { | ||
| 7752 | struct task_struct *g, *p; | ||
| 7753 | do_each_thread(g, p) { | ||
| 7754 | if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg) | ||
| 7755 | return 1; | ||
| 7756 | } while_each_thread(g, p); | ||
| 7757 | return 0; | ||
| 7758 | } | ||
| 7759 | |||
| 7956 | int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) | 7760 | int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) |
| 7957 | { | 7761 | { |
| 7958 | u64 rt_runtime, rt_period; | 7762 | u64 rt_runtime, rt_period; |
| 7959 | int err = 0; | 7763 | int err = 0; |
| 7960 | 7764 | ||
| 7961 | rt_period = sysctl_sched_rt_period * NSEC_PER_USEC; | 7765 | rt_period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC; |
| 7962 | rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; | 7766 | rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; |
| 7963 | if (rt_runtime_us == -1) | 7767 | if (rt_runtime_us == -1) |
| 7964 | rt_runtime = rt_period; | 7768 | rt_runtime = RUNTIME_INF; |
| 7965 | 7769 | ||
| 7966 | mutex_lock(&rt_constraints_mutex); | 7770 | mutex_lock(&rt_constraints_mutex); |
| 7771 | read_lock(&tasklist_lock); | ||
| 7772 | if (rt_runtime_us == 0 && tg_has_rt_tasks(tg)) { | ||
| 7773 | err = -EBUSY; | ||
| 7774 | goto unlock; | ||
| 7775 | } | ||
| 7967 | if (!__rt_schedulable(tg, rt_period, rt_runtime)) { | 7776 | if (!__rt_schedulable(tg, rt_period, rt_runtime)) { |
| 7968 | err = -EINVAL; | 7777 | err = -EINVAL; |
| 7969 | goto unlock; | 7778 | goto unlock; |
| 7970 | } | 7779 | } |
| 7971 | if (rt_runtime_us == -1) | ||
| 7972 | rt_runtime = RUNTIME_INF; | ||
| 7973 | tg->rt_runtime = rt_runtime; | 7780 | tg->rt_runtime = rt_runtime; |
| 7974 | unlock: | 7781 | unlock: |
| 7782 | read_unlock(&tasklist_lock); | ||
| 7975 | mutex_unlock(&rt_constraints_mutex); | 7783 | mutex_unlock(&rt_constraints_mutex); |
| 7976 | 7784 | ||
| 7977 | return err; | 7785 | return err; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6c091d6e159d..e2a530515619 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -202,17 +202,12 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | |||
| 202 | 202 | ||
| 203 | static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | 203 | static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
| 204 | { | 204 | { |
| 205 | struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; | 205 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); |
| 206 | struct sched_entity *se = NULL; | ||
| 207 | struct rb_node *parent; | ||
| 208 | 206 | ||
| 209 | while (*link) { | 207 | if (!last) |
| 210 | parent = *link; | 208 | return NULL; |
| 211 | se = rb_entry(parent, struct sched_entity, run_node); | ||
| 212 | link = &parent->rb_right; | ||
| 213 | } | ||
| 214 | 209 | ||
| 215 | return se; | 210 | return rb_entry(last, struct sched_entity, run_node); |
| 216 | } | 211 | } |
| 217 | 212 | ||
| 218 | /************************************************************** | 213 | /************************************************************** |
| @@ -732,8 +727,6 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) | |||
| 732 | return se->parent; | 727 | return se->parent; |
| 733 | } | 728 | } |
| 734 | 729 | ||
| 735 | #define GROUP_IMBALANCE_PCT 20 | ||
| 736 | |||
| 737 | #else /* CONFIG_FAIR_GROUP_SCHED */ | 730 | #else /* CONFIG_FAIR_GROUP_SCHED */ |
| 738 | 731 | ||
| 739 | #define for_each_sched_entity(se) \ | 732 | #define for_each_sched_entity(se) \ |
| @@ -824,26 +817,15 @@ hrtick_start_fair(struct rq *rq, struct task_struct *p) | |||
| 824 | static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) | 817 | static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) |
| 825 | { | 818 | { |
| 826 | struct cfs_rq *cfs_rq; | 819 | struct cfs_rq *cfs_rq; |
| 827 | struct sched_entity *se = &p->se, | 820 | struct sched_entity *se = &p->se; |
| 828 | *topse = NULL; /* Highest schedulable entity */ | ||
| 829 | int incload = 1; | ||
| 830 | 821 | ||
| 831 | for_each_sched_entity(se) { | 822 | for_each_sched_entity(se) { |
| 832 | topse = se; | 823 | if (se->on_rq) |
| 833 | if (se->on_rq) { | ||
| 834 | incload = 0; | ||
| 835 | break; | 824 | break; |
| 836 | } | ||
| 837 | cfs_rq = cfs_rq_of(se); | 825 | cfs_rq = cfs_rq_of(se); |
| 838 | enqueue_entity(cfs_rq, se, wakeup); | 826 | enqueue_entity(cfs_rq, se, wakeup); |
| 839 | wakeup = 1; | 827 | wakeup = 1; |
| 840 | } | 828 | } |
| 841 | /* Increment cpu load if we just enqueued the first task of a group on | ||
| 842 | * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs | ||
| 843 | * at the highest grouping level. | ||
| 844 | */ | ||
| 845 | if (incload) | ||
| 846 | inc_cpu_load(rq, topse->load.weight); | ||
| 847 | 829 | ||
| 848 | hrtick_start_fair(rq, rq->curr); | 830 | hrtick_start_fair(rq, rq->curr); |
| 849 | } | 831 | } |
| @@ -856,28 +838,16 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) | |||
| 856 | static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) | 838 | static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) |
| 857 | { | 839 | { |
| 858 | struct cfs_rq *cfs_rq; | 840 | struct cfs_rq *cfs_rq; |
| 859 | struct sched_entity *se = &p->se, | 841 | struct sched_entity *se = &p->se; |
| 860 | *topse = NULL; /* Highest schedulable entity */ | ||
| 861 | int decload = 1; | ||
| 862 | 842 | ||
| 863 | for_each_sched_entity(se) { | 843 | for_each_sched_entity(se) { |
| 864 | topse = se; | ||
| 865 | cfs_rq = cfs_rq_of(se); | 844 | cfs_rq = cfs_rq_of(se); |
| 866 | dequeue_entity(cfs_rq, se, sleep); | 845 | dequeue_entity(cfs_rq, se, sleep); |
| 867 | /* Don't dequeue parent if it has other entities besides us */ | 846 | /* Don't dequeue parent if it has other entities besides us */ |
| 868 | if (cfs_rq->load.weight) { | 847 | if (cfs_rq->load.weight) |
| 869 | if (parent_entity(se)) | ||
| 870 | decload = 0; | ||
| 871 | break; | 848 | break; |
| 872 | } | ||
| 873 | sleep = 1; | 849 | sleep = 1; |
| 874 | } | 850 | } |
| 875 | /* Decrement cpu load if we just dequeued the last task of a group on | ||
| 876 | * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs | ||
| 877 | * at the highest grouping level. | ||
| 878 | */ | ||
| 879 | if (decload) | ||
| 880 | dec_cpu_load(rq, topse->load.weight); | ||
| 881 | 851 | ||
| 882 | hrtick_start_fair(rq, rq->curr); | 852 | hrtick_start_fair(rq, rq->curr); |
| 883 | } | 853 | } |
| @@ -1191,6 +1161,25 @@ static struct task_struct *load_balance_next_fair(void *arg) | |||
| 1191 | return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); | 1161 | return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); |
| 1192 | } | 1162 | } |
| 1193 | 1163 | ||
| 1164 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 1165 | static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) | ||
| 1166 | { | ||
| 1167 | struct sched_entity *curr; | ||
| 1168 | struct task_struct *p; | ||
| 1169 | |||
| 1170 | if (!cfs_rq->nr_running || !first_fair(cfs_rq)) | ||
| 1171 | return MAX_PRIO; | ||
| 1172 | |||
| 1173 | curr = cfs_rq->curr; | ||
| 1174 | if (!curr) | ||
| 1175 | curr = __pick_next_entity(cfs_rq); | ||
| 1176 | |||
| 1177 | p = task_of(curr); | ||
| 1178 | |||
| 1179 | return p->prio; | ||
| 1180 | } | ||
| 1181 | #endif | ||
| 1182 | |||
| 1194 | static unsigned long | 1183 | static unsigned long |
| 1195 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1184 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
| 1196 | unsigned long max_load_move, | 1185 | unsigned long max_load_move, |
| @@ -1200,45 +1189,28 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
| 1200 | struct cfs_rq *busy_cfs_rq; | 1189 | struct cfs_rq *busy_cfs_rq; |
| 1201 | long rem_load_move = max_load_move; | 1190 | long rem_load_move = max_load_move; |
| 1202 | struct rq_iterator cfs_rq_iterator; | 1191 | struct rq_iterator cfs_rq_iterator; |
| 1203 | unsigned long load_moved; | ||
| 1204 | 1192 | ||
| 1205 | cfs_rq_iterator.start = load_balance_start_fair; | 1193 | cfs_rq_iterator.start = load_balance_start_fair; |
| 1206 | cfs_rq_iterator.next = load_balance_next_fair; | 1194 | cfs_rq_iterator.next = load_balance_next_fair; |
| 1207 | 1195 | ||
| 1208 | for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { | 1196 | for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { |
| 1209 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1197 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 1210 | struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu]; | 1198 | struct cfs_rq *this_cfs_rq; |
| 1211 | unsigned long maxload, task_load, group_weight; | 1199 | long imbalance; |
| 1212 | unsigned long thisload, per_task_load; | 1200 | unsigned long maxload; |
| 1213 | struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu]; | ||
| 1214 | |||
| 1215 | task_load = busy_cfs_rq->load.weight; | ||
| 1216 | group_weight = se->load.weight; | ||
| 1217 | 1201 | ||
| 1218 | /* | 1202 | this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); |
| 1219 | * 'group_weight' is contributed by tasks of total weight | ||
| 1220 | * 'task_load'. To move 'rem_load_move' worth of weight only, | ||
| 1221 | * we need to move a maximum task load of: | ||
| 1222 | * | ||
| 1223 | * maxload = (remload / group_weight) * task_load; | ||
| 1224 | */ | ||
| 1225 | maxload = (rem_load_move * task_load) / group_weight; | ||
| 1226 | 1203 | ||
| 1227 | if (!maxload || !task_load) | 1204 | imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; |
| 1205 | /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ | ||
| 1206 | if (imbalance <= 0) | ||
| 1228 | continue; | 1207 | continue; |
| 1229 | 1208 | ||
| 1230 | per_task_load = task_load / busy_cfs_rq->nr_running; | 1209 | /* Don't pull more than imbalance/2 */ |
| 1231 | /* | 1210 | imbalance /= 2; |
| 1232 | * balance_tasks will try to forcibly move atleast one task if | 1211 | maxload = min(rem_load_move, imbalance); |
| 1233 | * possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if | ||
| 1234 | * maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load. | ||
| 1235 | */ | ||
| 1236 | if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load) | ||
| 1237 | continue; | ||
| 1238 | 1212 | ||
| 1239 | /* Disable priority-based load balance */ | 1213 | *this_best_prio = cfs_rq_best_prio(this_cfs_rq); |
| 1240 | *this_best_prio = 0; | ||
| 1241 | thisload = this_cfs_rq->load.weight; | ||
| 1242 | #else | 1214 | #else |
| 1243 | # define maxload rem_load_move | 1215 | # define maxload rem_load_move |
| 1244 | #endif | 1216 | #endif |
| @@ -1247,33 +1219,11 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
| 1247 | * load_balance_[start|next]_fair iterators | 1219 | * load_balance_[start|next]_fair iterators |
| 1248 | */ | 1220 | */ |
| 1249 | cfs_rq_iterator.arg = busy_cfs_rq; | 1221 | cfs_rq_iterator.arg = busy_cfs_rq; |
| 1250 | load_moved = balance_tasks(this_rq, this_cpu, busiest, | 1222 | rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, |
| 1251 | maxload, sd, idle, all_pinned, | 1223 | maxload, sd, idle, all_pinned, |
| 1252 | this_best_prio, | 1224 | this_best_prio, |
| 1253 | &cfs_rq_iterator); | 1225 | &cfs_rq_iterator); |
| 1254 | 1226 | ||
| 1255 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 1256 | /* | ||
| 1257 | * load_moved holds the task load that was moved. The | ||
| 1258 | * effective (group) weight moved would be: | ||
| 1259 | * load_moved_eff = load_moved/task_load * group_weight; | ||
| 1260 | */ | ||
| 1261 | load_moved = (group_weight * load_moved) / task_load; | ||
| 1262 | |||
| 1263 | /* Adjust shares on both cpus to reflect load_moved */ | ||
| 1264 | group_weight -= load_moved; | ||
| 1265 | set_se_shares(se, group_weight); | ||
| 1266 | |||
| 1267 | se = busy_cfs_rq->tg->se[this_cpu]; | ||
| 1268 | if (!thisload) | ||
| 1269 | group_weight = load_moved; | ||
| 1270 | else | ||
| 1271 | group_weight = se->load.weight + load_moved; | ||
| 1272 | set_se_shares(se, group_weight); | ||
| 1273 | #endif | ||
| 1274 | |||
| 1275 | rem_load_move -= load_moved; | ||
| 1276 | |||
| 1277 | if (rem_load_move <= 0) | 1227 | if (rem_load_move <= 0) |
| 1278 | break; | 1228 | break; |
| 1279 | } | 1229 | } |
| @@ -1403,6 +1353,16 @@ static void set_curr_task_fair(struct rq *rq) | |||
| 1403 | set_next_entity(cfs_rq_of(se), se); | 1353 | set_next_entity(cfs_rq_of(se), se); |
| 1404 | } | 1354 | } |
| 1405 | 1355 | ||
| 1356 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 1357 | static void moved_group_fair(struct task_struct *p) | ||
| 1358 | { | ||
| 1359 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | ||
| 1360 | |||
| 1361 | update_curr(cfs_rq); | ||
| 1362 | place_entity(cfs_rq, &p->se, 1); | ||
| 1363 | } | ||
| 1364 | #endif | ||
| 1365 | |||
| 1406 | /* | 1366 | /* |
| 1407 | * All the scheduling class methods: | 1367 | * All the scheduling class methods: |
| 1408 | */ | 1368 | */ |
| @@ -1431,6 +1391,10 @@ static const struct sched_class fair_sched_class = { | |||
| 1431 | 1391 | ||
| 1432 | .prio_changed = prio_changed_fair, | 1392 | .prio_changed = prio_changed_fair, |
| 1433 | .switched_to = switched_to_fair, | 1393 | .switched_to = switched_to_fair, |
| 1394 | |||
| 1395 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 1396 | .moved_group = moved_group_fair, | ||
| 1397 | #endif | ||
| 1434 | }; | 1398 | }; |
| 1435 | 1399 | ||
| 1436 | #ifdef CONFIG_SCHED_DEBUG | 1400 | #ifdef CONFIG_SCHED_DEBUG |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index f54792b175b2..0a6d2e516420 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -393,8 +393,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | |||
| 393 | */ | 393 | */ |
| 394 | for_each_sched_rt_entity(rt_se) | 394 | for_each_sched_rt_entity(rt_se) |
| 395 | enqueue_rt_entity(rt_se); | 395 | enqueue_rt_entity(rt_se); |
| 396 | |||
| 397 | inc_cpu_load(rq, p->se.load.weight); | ||
| 398 | } | 396 | } |
| 399 | 397 | ||
| 400 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | 398 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) |
| @@ -414,8 +412,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | |||
| 414 | if (rt_rq && rt_rq->rt_nr_running) | 412 | if (rt_rq && rt_rq->rt_nr_running) |
| 415 | enqueue_rt_entity(rt_se); | 413 | enqueue_rt_entity(rt_se); |
| 416 | } | 414 | } |
| 417 | |||
| 418 | dec_cpu_load(rq, p->se.load.weight); | ||
| 419 | } | 415 | } |
| 420 | 416 | ||
| 421 | /* | 417 | /* |
| @@ -1111,9 +1107,11 @@ static void prio_changed_rt(struct rq *rq, struct task_struct *p, | |||
| 1111 | pull_rt_task(rq); | 1107 | pull_rt_task(rq); |
| 1112 | /* | 1108 | /* |
| 1113 | * If there's a higher priority task waiting to run | 1109 | * If there's a higher priority task waiting to run |
| 1114 | * then reschedule. | 1110 | * then reschedule. Note, the above pull_rt_task |
| 1111 | * can release the rq lock and p could migrate. | ||
| 1112 | * Only reschedule if p is still on the same runqueue. | ||
| 1115 | */ | 1113 | */ |
| 1116 | if (p->prio > rq->rt.highest_prio) | 1114 | if (p->prio > rq->rt.highest_prio && rq->curr == p) |
| 1117 | resched_task(p); | 1115 | resched_task(p); |
| 1118 | #else | 1116 | #else |
| 1119 | /* For UP simply resched on drop of prio */ | 1117 | /* For UP simply resched on drop of prio */ |
diff --git a/kernel/signal.c b/kernel/signal.c index 84917fe507f7..6af1210092c3 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -1623,7 +1623,6 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) | |||
| 1623 | /* Let the debugger run. */ | 1623 | /* Let the debugger run. */ |
| 1624 | __set_current_state(TASK_TRACED); | 1624 | __set_current_state(TASK_TRACED); |
| 1625 | spin_unlock_irq(¤t->sighand->siglock); | 1625 | spin_unlock_irq(¤t->sighand->siglock); |
| 1626 | try_to_freeze(); | ||
| 1627 | read_lock(&tasklist_lock); | 1626 | read_lock(&tasklist_lock); |
| 1628 | if (!unlikely(killed) && may_ptrace_stop()) { | 1627 | if (!unlikely(killed) && may_ptrace_stop()) { |
| 1629 | do_notify_parent_cldstop(current, CLD_TRAPPED); | 1628 | do_notify_parent_cldstop(current, CLD_TRAPPED); |
| @@ -1641,6 +1640,13 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) | |||
| 1641 | } | 1640 | } |
| 1642 | 1641 | ||
| 1643 | /* | 1642 | /* |
| 1643 | * While in TASK_TRACED, we were considered "frozen enough". | ||
| 1644 | * Now that we woke up, it's crucial if we're supposed to be | ||
| 1645 | * frozen that we freeze now before running anything substantial. | ||
| 1646 | */ | ||
| 1647 | try_to_freeze(); | ||
| 1648 | |||
| 1649 | /* | ||
| 1644 | * We are back. Now reacquire the siglock before touching | 1650 | * We are back. Now reacquire the siglock before touching |
| 1645 | * last_siginfo, so that we are sure to have synchronized with | 1651 | * last_siginfo, so that we are sure to have synchronized with |
| 1646 | * any signal-sending on another CPU that wants to examine it. | 1652 | * any signal-sending on another CPU that wants to examine it. |
| @@ -1757,9 +1763,15 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, | |||
| 1757 | sigset_t *mask = ¤t->blocked; | 1763 | sigset_t *mask = ¤t->blocked; |
| 1758 | int signr = 0; | 1764 | int signr = 0; |
| 1759 | 1765 | ||
| 1766 | relock: | ||
| 1767 | /* | ||
| 1768 | * We'll jump back here after any time we were stopped in TASK_STOPPED. | ||
| 1769 | * While in TASK_STOPPED, we were considered "frozen enough". | ||
| 1770 | * Now that we woke up, it's crucial if we're supposed to be | ||
| 1771 | * frozen that we freeze now before running anything substantial. | ||
| 1772 | */ | ||
| 1760 | try_to_freeze(); | 1773 | try_to_freeze(); |
| 1761 | 1774 | ||
| 1762 | relock: | ||
| 1763 | spin_lock_irq(¤t->sighand->siglock); | 1775 | spin_lock_irq(¤t->sighand->siglock); |
| 1764 | for (;;) { | 1776 | for (;;) { |
| 1765 | struct k_sigaction *ka; | 1777 | struct k_sigaction *ka; |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 5b3aea5f471e..31e9f2a47928 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -313,6 +313,7 @@ void irq_exit(void) | |||
| 313 | /* Make sure that timer wheel updates are propagated */ | 313 | /* Make sure that timer wheel updates are propagated */ |
| 314 | if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) | 314 | if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) |
| 315 | tick_nohz_stop_sched_tick(); | 315 | tick_nohz_stop_sched_tick(); |
| 316 | rcu_irq_exit(); | ||
| 316 | #endif | 317 | #endif |
| 317 | preempt_enable_no_resched(); | 318 | preempt_enable_no_resched(); |
| 318 | } | 319 | } |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 7c2da88db4ed..01b6522fd92b 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
| @@ -216,26 +216,27 @@ static int watchdog(void *__bind_cpu) | |||
| 216 | /* initialize timestamp */ | 216 | /* initialize timestamp */ |
| 217 | touch_softlockup_watchdog(); | 217 | touch_softlockup_watchdog(); |
| 218 | 218 | ||
| 219 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 219 | /* | 220 | /* |
| 220 | * Run briefly once per second to reset the softlockup timestamp. | 221 | * Run briefly once per second to reset the softlockup timestamp. |
| 221 | * If this gets delayed for more than 60 seconds then the | 222 | * If this gets delayed for more than 60 seconds then the |
| 222 | * debug-printout triggers in softlockup_tick(). | 223 | * debug-printout triggers in softlockup_tick(). |
| 223 | */ | 224 | */ |
| 224 | while (!kthread_should_stop()) { | 225 | while (!kthread_should_stop()) { |
| 225 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 226 | touch_softlockup_watchdog(); | 226 | touch_softlockup_watchdog(); |
| 227 | schedule(); | 227 | schedule(); |
| 228 | 228 | ||
| 229 | if (kthread_should_stop()) | 229 | if (kthread_should_stop()) |
| 230 | break; | 230 | break; |
| 231 | 231 | ||
| 232 | if (this_cpu != check_cpu) | 232 | if (this_cpu == check_cpu) { |
| 233 | continue; | 233 | if (sysctl_hung_task_timeout_secs) |
| 234 | 234 | check_hung_uninterruptible_tasks(this_cpu); | |
| 235 | if (sysctl_hung_task_timeout_secs) | 235 | } |
| 236 | check_hung_uninterruptible_tasks(this_cpu); | ||
| 237 | 236 | ||
| 237 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 238 | } | 238 | } |
| 239 | __set_current_state(TASK_RUNNING); | ||
| 239 | 240 | ||
| 240 | return 0; | 241 | return 0; |
| 241 | } | 242 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8b7e95411795..b2a2d6889bab 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -311,24 +311,6 @@ static struct ctl_table kern_table[] = { | |||
| 311 | .mode = 0644, | 311 | .mode = 0644, |
| 312 | .proc_handler = &proc_dointvec, | 312 | .proc_handler = &proc_dointvec, |
| 313 | }, | 313 | }, |
| 314 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) | ||
| 315 | { | ||
| 316 | .ctl_name = CTL_UNNUMBERED, | ||
| 317 | .procname = "sched_min_bal_int_shares", | ||
| 318 | .data = &sysctl_sched_min_bal_int_shares, | ||
| 319 | .maxlen = sizeof(unsigned int), | ||
| 320 | .mode = 0644, | ||
| 321 | .proc_handler = &proc_dointvec, | ||
| 322 | }, | ||
| 323 | { | ||
| 324 | .ctl_name = CTL_UNNUMBERED, | ||
| 325 | .procname = "sched_max_bal_int_shares", | ||
| 326 | .data = &sysctl_sched_max_bal_int_shares, | ||
| 327 | .maxlen = sizeof(unsigned int), | ||
| 328 | .mode = 0644, | ||
| 329 | .proc_handler = &proc_dointvec, | ||
| 330 | }, | ||
| 331 | #endif | ||
| 332 | #endif | 314 | #endif |
| 333 | { | 315 | { |
| 334 | .ctl_name = CTL_UNNUMBERED, | 316 | .ctl_name = CTL_UNNUMBERED, |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index fa9bb73dbdb4..2968298f8f36 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -282,6 +282,7 @@ void tick_nohz_stop_sched_tick(void) | |||
| 282 | ts->idle_tick = ts->sched_timer.expires; | 282 | ts->idle_tick = ts->sched_timer.expires; |
| 283 | ts->tick_stopped = 1; | 283 | ts->tick_stopped = 1; |
| 284 | ts->idle_jiffies = last_jiffies; | 284 | ts->idle_jiffies = last_jiffies; |
| 285 | rcu_enter_nohz(); | ||
| 285 | } | 286 | } |
| 286 | 287 | ||
| 287 | /* | 288 | /* |
| @@ -375,6 +376,8 @@ void tick_nohz_restart_sched_tick(void) | |||
| 375 | return; | 376 | return; |
| 376 | } | 377 | } |
| 377 | 378 | ||
| 379 | rcu_exit_nohz(); | ||
| 380 | |||
| 378 | /* Update jiffies first */ | 381 | /* Update jiffies first */ |
| 379 | select_nohz_load_balancer(0); | 382 | select_nohz_load_balancer(0); |
| 380 | now = ktime_get(); | 383 | now = ktime_get(); |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index d3d94c1a0fd2..67fe8fc21fb1 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
| @@ -65,9 +65,9 @@ print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now) | |||
| 65 | SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); | 65 | SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); |
| 66 | #endif | 66 | #endif |
| 67 | SEQ_printf(m, "\n"); | 67 | SEQ_printf(m, "\n"); |
| 68 | SEQ_printf(m, " # expires at %Lu nsecs [in %Lu nsecs]\n", | 68 | SEQ_printf(m, " # expires at %Lu nsecs [in %Ld nsecs]\n", |
| 69 | (unsigned long long)ktime_to_ns(timer->expires), | 69 | (unsigned long long)ktime_to_ns(timer->expires), |
| 70 | (unsigned long long)(ktime_to_ns(timer->expires) - now)); | 70 | (long long)(ktime_to_ns(timer->expires) - now)); |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | static void | 73 | static void |
