diff options
Diffstat (limited to 'kernel')
70 files changed, 3112 insertions, 1738 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 642d4277c2ea..2a999836ca18 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -4,11 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | 5 | obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ |
| 6 | exit.o itimer.o time.o softirq.o resource.o \ | 6 | exit.o itimer.o time.o softirq.o resource.o \ |
| 7 | sysctl.o capability.o ptrace.o timer.o user.o \ | 7 | sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ |
| 8 | signal.o sys.o kmod.o workqueue.o pid.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o \ |
| 9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
| 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
| 11 | hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o | 11 | hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \ |
| 12 | utsname.o | ||
| 12 | 13 | ||
| 13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 14 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
| 14 | obj-y += time/ | 15 | obj-y += time/ |
| @@ -48,7 +49,6 @@ obj-$(CONFIG_SECCOMP) += seccomp.o | |||
| 48 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 49 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
| 49 | obj-$(CONFIG_RELAY) += relay.o | 50 | obj-$(CONFIG_RELAY) += relay.o |
| 50 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o | 51 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o |
| 51 | obj-$(CONFIG_UTS_NS) += utsname.o | ||
| 52 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 52 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
| 53 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o | 53 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o |
| 54 | 54 | ||
diff --git a/kernel/audit.c b/kernel/audit.c index d13276d41410..eb0f9165b401 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
| @@ -58,6 +58,7 @@ | |||
| 58 | #include <linux/selinux.h> | 58 | #include <linux/selinux.h> |
| 59 | #include <linux/inotify.h> | 59 | #include <linux/inotify.h> |
| 60 | #include <linux/freezer.h> | 60 | #include <linux/freezer.h> |
| 61 | #include <linux/tty.h> | ||
| 61 | 62 | ||
| 62 | #include "audit.h" | 63 | #include "audit.h" |
| 63 | 64 | ||
| @@ -391,6 +392,7 @@ static int kauditd_thread(void *dummy) | |||
| 391 | { | 392 | { |
| 392 | struct sk_buff *skb; | 393 | struct sk_buff *skb; |
| 393 | 394 | ||
| 395 | set_freezable(); | ||
| 394 | while (!kthread_should_stop()) { | 396 | while (!kthread_should_stop()) { |
| 395 | skb = skb_dequeue(&audit_skb_queue); | 397 | skb = skb_dequeue(&audit_skb_queue); |
| 396 | wake_up(&audit_backlog_wait); | 398 | wake_up(&audit_backlog_wait); |
| @@ -423,6 +425,31 @@ static int kauditd_thread(void *dummy) | |||
| 423 | return 0; | 425 | return 0; |
| 424 | } | 426 | } |
| 425 | 427 | ||
| 428 | static int audit_prepare_user_tty(pid_t pid, uid_t loginuid) | ||
| 429 | { | ||
| 430 | struct task_struct *tsk; | ||
| 431 | int err; | ||
| 432 | |||
| 433 | read_lock(&tasklist_lock); | ||
| 434 | tsk = find_task_by_pid(pid); | ||
| 435 | err = -ESRCH; | ||
| 436 | if (!tsk) | ||
| 437 | goto out; | ||
| 438 | err = 0; | ||
| 439 | |||
| 440 | spin_lock_irq(&tsk->sighand->siglock); | ||
| 441 | if (!tsk->signal->audit_tty) | ||
| 442 | err = -EPERM; | ||
| 443 | spin_unlock_irq(&tsk->sighand->siglock); | ||
| 444 | if (err) | ||
| 445 | goto out; | ||
| 446 | |||
| 447 | tty_audit_push_task(tsk, loginuid); | ||
| 448 | out: | ||
| 449 | read_unlock(&tasklist_lock); | ||
| 450 | return err; | ||
| 451 | } | ||
| 452 | |||
| 426 | int audit_send_list(void *_dest) | 453 | int audit_send_list(void *_dest) |
| 427 | { | 454 | { |
| 428 | struct audit_netlink_list *dest = _dest; | 455 | struct audit_netlink_list *dest = _dest; |
| @@ -511,6 +538,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) | |||
| 511 | case AUDIT_DEL: | 538 | case AUDIT_DEL: |
| 512 | case AUDIT_DEL_RULE: | 539 | case AUDIT_DEL_RULE: |
| 513 | case AUDIT_SIGNAL_INFO: | 540 | case AUDIT_SIGNAL_INFO: |
| 541 | case AUDIT_TTY_GET: | ||
| 542 | case AUDIT_TTY_SET: | ||
| 514 | if (security_netlink_recv(skb, CAP_AUDIT_CONTROL)) | 543 | if (security_netlink_recv(skb, CAP_AUDIT_CONTROL)) |
| 515 | err = -EPERM; | 544 | err = -EPERM; |
| 516 | break; | 545 | break; |
| @@ -622,6 +651,11 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 622 | err = audit_filter_user(&NETLINK_CB(skb), msg_type); | 651 | err = audit_filter_user(&NETLINK_CB(skb), msg_type); |
| 623 | if (err == 1) { | 652 | if (err == 1) { |
| 624 | err = 0; | 653 | err = 0; |
| 654 | if (msg_type == AUDIT_USER_TTY) { | ||
| 655 | err = audit_prepare_user_tty(pid, loginuid); | ||
| 656 | if (err) | ||
| 657 | break; | ||
| 658 | } | ||
| 625 | ab = audit_log_start(NULL, GFP_KERNEL, msg_type); | 659 | ab = audit_log_start(NULL, GFP_KERNEL, msg_type); |
| 626 | if (ab) { | 660 | if (ab) { |
| 627 | audit_log_format(ab, | 661 | audit_log_format(ab, |
| @@ -638,8 +672,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 638 | " subj=%s", ctx); | 672 | " subj=%s", ctx); |
| 639 | kfree(ctx); | 673 | kfree(ctx); |
| 640 | } | 674 | } |
| 641 | audit_log_format(ab, " msg='%.1024s'", | 675 | if (msg_type != AUDIT_USER_TTY) |
| 642 | (char *)data); | 676 | audit_log_format(ab, " msg='%.1024s'", |
| 677 | (char *)data); | ||
| 678 | else { | ||
| 679 | int size; | ||
| 680 | |||
| 681 | audit_log_format(ab, " msg="); | ||
| 682 | size = nlmsg_len(nlh); | ||
| 683 | audit_log_n_untrustedstring(ab, size, | ||
| 684 | data); | ||
| 685 | } | ||
| 643 | audit_set_pid(ab, pid); | 686 | audit_set_pid(ab, pid); |
| 644 | audit_log_end(ab); | 687 | audit_log_end(ab); |
| 645 | } | 688 | } |
| @@ -730,6 +773,45 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 730 | 0, 0, sig_data, sizeof(*sig_data) + len); | 773 | 0, 0, sig_data, sizeof(*sig_data) + len); |
| 731 | kfree(sig_data); | 774 | kfree(sig_data); |
| 732 | break; | 775 | break; |
| 776 | case AUDIT_TTY_GET: { | ||
| 777 | struct audit_tty_status s; | ||
| 778 | struct task_struct *tsk; | ||
| 779 | |||
| 780 | read_lock(&tasklist_lock); | ||
| 781 | tsk = find_task_by_pid(pid); | ||
| 782 | if (!tsk) | ||
| 783 | err = -ESRCH; | ||
| 784 | else { | ||
| 785 | spin_lock_irq(&tsk->sighand->siglock); | ||
| 786 | s.enabled = tsk->signal->audit_tty != 0; | ||
| 787 | spin_unlock_irq(&tsk->sighand->siglock); | ||
| 788 | } | ||
| 789 | read_unlock(&tasklist_lock); | ||
| 790 | audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_TTY_GET, 0, 0, | ||
| 791 | &s, sizeof(s)); | ||
| 792 | break; | ||
| 793 | } | ||
| 794 | case AUDIT_TTY_SET: { | ||
| 795 | struct audit_tty_status *s; | ||
| 796 | struct task_struct *tsk; | ||
| 797 | |||
| 798 | if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) | ||
| 799 | return -EINVAL; | ||
| 800 | s = data; | ||
| 801 | if (s->enabled != 0 && s->enabled != 1) | ||
| 802 | return -EINVAL; | ||
| 803 | read_lock(&tasklist_lock); | ||
| 804 | tsk = find_task_by_pid(pid); | ||
| 805 | if (!tsk) | ||
| 806 | err = -ESRCH; | ||
| 807 | else { | ||
| 808 | spin_lock_irq(&tsk->sighand->siglock); | ||
| 809 | tsk->signal->audit_tty = s->enabled != 0; | ||
| 810 | spin_unlock_irq(&tsk->sighand->siglock); | ||
| 811 | } | ||
| 812 | read_unlock(&tasklist_lock); | ||
| 813 | break; | ||
| 814 | } | ||
| 733 | default: | 815 | default: |
| 734 | err = -EINVAL; | 816 | err = -EINVAL; |
| 735 | break; | 817 | break; |
| @@ -1185,7 +1267,7 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen, | |||
| 1185 | } | 1267 | } |
| 1186 | 1268 | ||
| 1187 | /** | 1269 | /** |
| 1188 | * audit_log_n_unstrustedstring - log a string that may contain random characters | 1270 | * audit_log_n_untrustedstring - log a string that may contain random characters |
| 1189 | * @ab: audit_buffer | 1271 | * @ab: audit_buffer |
| 1190 | * @len: lenth of string (not including trailing null) | 1272 | * @len: lenth of string (not including trailing null) |
| 1191 | * @string: string to be logged | 1273 | * @string: string to be logged |
| @@ -1201,25 +1283,24 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen, | |||
| 1201 | const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, | 1283 | const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, |
| 1202 | const char *string) | 1284 | const char *string) |
| 1203 | { | 1285 | { |
| 1204 | const unsigned char *p = string; | 1286 | const unsigned char *p; |
| 1205 | 1287 | ||
| 1206 | while (*p) { | 1288 | for (p = string; p < (const unsigned char *)string + len && *p; p++) { |
| 1207 | if (*p == '"' || *p < 0x21 || *p > 0x7f) { | 1289 | if (*p == '"' || *p < 0x21 || *p > 0x7f) { |
| 1208 | audit_log_hex(ab, string, len); | 1290 | audit_log_hex(ab, string, len); |
| 1209 | return string + len + 1; | 1291 | return string + len + 1; |
| 1210 | } | 1292 | } |
| 1211 | p++; | ||
| 1212 | } | 1293 | } |
| 1213 | audit_log_n_string(ab, len, string); | 1294 | audit_log_n_string(ab, len, string); |
| 1214 | return p + 1; | 1295 | return p + 1; |
| 1215 | } | 1296 | } |
| 1216 | 1297 | ||
| 1217 | /** | 1298 | /** |
| 1218 | * audit_log_unstrustedstring - log a string that may contain random characters | 1299 | * audit_log_untrustedstring - log a string that may contain random characters |
| 1219 | * @ab: audit_buffer | 1300 | * @ab: audit_buffer |
| 1220 | * @string: string to be logged | 1301 | * @string: string to be logged |
| 1221 | * | 1302 | * |
| 1222 | * Same as audit_log_n_unstrustedstring(), except that strlen is used to | 1303 | * Same as audit_log_n_untrustedstring(), except that strlen is used to |
| 1223 | * determine string length. | 1304 | * determine string length. |
| 1224 | */ | 1305 | */ |
| 1225 | const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) | 1306 | const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) |
diff --git a/kernel/audit.h b/kernel/audit.h index 815d6f5c04ee..95877435c347 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
| @@ -115,7 +115,6 @@ extern struct sk_buff * audit_make_reply(int pid, int seq, int type, | |||
| 115 | extern void audit_send_reply(int pid, int seq, int type, | 115 | extern void audit_send_reply(int pid, int seq, int type, |
| 116 | int done, int multi, | 116 | int done, int multi, |
| 117 | void *payload, int size); | 117 | void *payload, int size); |
| 118 | extern void audit_log_lost(const char *message); | ||
| 119 | extern void audit_panic(const char *message); | 118 | extern void audit_panic(const char *message); |
| 120 | 119 | ||
| 121 | struct audit_netlink_list { | 120 | struct audit_netlink_list { |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index ce61f423542c..359645cff5b2 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
| @@ -304,7 +304,7 @@ int __init audit_register_class(int class, unsigned *list) | |||
| 304 | 304 | ||
| 305 | int audit_match_class(int class, unsigned syscall) | 305 | int audit_match_class(int class, unsigned syscall) |
| 306 | { | 306 | { |
| 307 | if (unlikely(syscall >= AUDIT_BITMASK_SIZE * sizeof(__u32))) | 307 | if (unlikely(syscall >= AUDIT_BITMASK_SIZE * 32)) |
| 308 | return 0; | 308 | return 0; |
| 309 | if (unlikely(class >= AUDIT_SYSCALL_CLASSES || !classes[class])) | 309 | if (unlikely(class >= AUDIT_SYSCALL_CLASSES || !classes[class])) |
| 310 | return 0; | 310 | return 0; |
| @@ -456,6 +456,13 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | |||
| 456 | case AUDIT_DEVMINOR: | 456 | case AUDIT_DEVMINOR: |
| 457 | case AUDIT_EXIT: | 457 | case AUDIT_EXIT: |
| 458 | case AUDIT_SUCCESS: | 458 | case AUDIT_SUCCESS: |
| 459 | /* bit ops are only useful on syscall args */ | ||
| 460 | if (f->op == AUDIT_BIT_MASK || | ||
| 461 | f->op == AUDIT_BIT_TEST) { | ||
| 462 | err = -EINVAL; | ||
| 463 | goto exit_free; | ||
| 464 | } | ||
| 465 | break; | ||
| 459 | case AUDIT_ARG0: | 466 | case AUDIT_ARG0: |
| 460 | case AUDIT_ARG1: | 467 | case AUDIT_ARG1: |
| 461 | case AUDIT_ARG2: | 468 | case AUDIT_ARG2: |
| @@ -1210,8 +1217,8 @@ static inline int audit_add_rule(struct audit_entry *entry, | |||
| 1210 | struct audit_entry *e; | 1217 | struct audit_entry *e; |
| 1211 | struct audit_field *inode_f = entry->rule.inode_f; | 1218 | struct audit_field *inode_f = entry->rule.inode_f; |
| 1212 | struct audit_watch *watch = entry->rule.watch; | 1219 | struct audit_watch *watch = entry->rule.watch; |
| 1213 | struct nameidata *ndp, *ndw; | 1220 | struct nameidata *ndp = NULL, *ndw = NULL; |
| 1214 | int h, err, putnd_needed = 0; | 1221 | int h, err; |
| 1215 | #ifdef CONFIG_AUDITSYSCALL | 1222 | #ifdef CONFIG_AUDITSYSCALL |
| 1216 | int dont_count = 0; | 1223 | int dont_count = 0; |
| 1217 | 1224 | ||
| @@ -1239,7 +1246,6 @@ static inline int audit_add_rule(struct audit_entry *entry, | |||
| 1239 | err = audit_get_nd(watch->path, &ndp, &ndw); | 1246 | err = audit_get_nd(watch->path, &ndp, &ndw); |
| 1240 | if (err) | 1247 | if (err) |
| 1241 | goto error; | 1248 | goto error; |
| 1242 | putnd_needed = 1; | ||
| 1243 | } | 1249 | } |
| 1244 | 1250 | ||
| 1245 | mutex_lock(&audit_filter_mutex); | 1251 | mutex_lock(&audit_filter_mutex); |
| @@ -1269,14 +1275,11 @@ static inline int audit_add_rule(struct audit_entry *entry, | |||
| 1269 | #endif | 1275 | #endif |
| 1270 | mutex_unlock(&audit_filter_mutex); | 1276 | mutex_unlock(&audit_filter_mutex); |
| 1271 | 1277 | ||
| 1272 | if (putnd_needed) | 1278 | audit_put_nd(ndp, ndw); /* NULL args OK */ |
| 1273 | audit_put_nd(ndp, ndw); | ||
| 1274 | |||
| 1275 | return 0; | 1279 | return 0; |
| 1276 | 1280 | ||
| 1277 | error: | 1281 | error: |
| 1278 | if (putnd_needed) | 1282 | audit_put_nd(ndp, ndw); /* NULL args OK */ |
| 1279 | audit_put_nd(ndp, ndw); | ||
| 1280 | if (watch) | 1283 | if (watch) |
| 1281 | audit_put_watch(watch); /* tmp watch, matches initial get */ | 1284 | audit_put_watch(watch); /* tmp watch, matches initial get */ |
| 1282 | return err; | 1285 | return err; |
| @@ -1570,6 +1573,10 @@ int audit_comparator(const u32 left, const u32 op, const u32 right) | |||
| 1570 | return (left > right); | 1573 | return (left > right); |
| 1571 | case AUDIT_GREATER_THAN_OR_EQUAL: | 1574 | case AUDIT_GREATER_THAN_OR_EQUAL: |
| 1572 | return (left >= right); | 1575 | return (left >= right); |
| 1576 | case AUDIT_BIT_MASK: | ||
| 1577 | return (left & right); | ||
| 1578 | case AUDIT_BIT_TEST: | ||
| 1579 | return ((left & right) == right); | ||
| 1573 | } | 1580 | } |
| 1574 | BUG(); | 1581 | BUG(); |
| 1575 | return 0; | 1582 | return 0; |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e36481ed61b4..bde1124d5908 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
| @@ -71,9 +71,6 @@ | |||
| 71 | 71 | ||
| 72 | extern struct list_head audit_filter_list[]; | 72 | extern struct list_head audit_filter_list[]; |
| 73 | 73 | ||
| 74 | /* No syscall auditing will take place unless audit_enabled != 0. */ | ||
| 75 | extern int audit_enabled; | ||
| 76 | |||
| 77 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context | 74 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context |
| 78 | * for saving names from getname(). */ | 75 | * for saving names from getname(). */ |
| 79 | #define AUDIT_NAMES 20 | 76 | #define AUDIT_NAMES 20 |
| @@ -156,7 +153,7 @@ struct audit_aux_data_execve { | |||
| 156 | struct audit_aux_data d; | 153 | struct audit_aux_data d; |
| 157 | int argc; | 154 | int argc; |
| 158 | int envc; | 155 | int envc; |
| 159 | char mem[0]; | 156 | struct mm_struct *mm; |
| 160 | }; | 157 | }; |
| 161 | 158 | ||
| 162 | struct audit_aux_data_socketcall { | 159 | struct audit_aux_data_socketcall { |
| @@ -176,12 +173,6 @@ struct audit_aux_data_fd_pair { | |||
| 176 | int fd[2]; | 173 | int fd[2]; |
| 177 | }; | 174 | }; |
| 178 | 175 | ||
| 179 | struct audit_aux_data_path { | ||
| 180 | struct audit_aux_data d; | ||
| 181 | struct dentry *dentry; | ||
| 182 | struct vfsmount *mnt; | ||
| 183 | }; | ||
| 184 | |||
| 185 | struct audit_aux_data_pids { | 176 | struct audit_aux_data_pids { |
| 186 | struct audit_aux_data d; | 177 | struct audit_aux_data d; |
| 187 | pid_t target_pid[AUDIT_AUX_PIDS]; | 178 | pid_t target_pid[AUDIT_AUX_PIDS]; |
| @@ -657,12 +648,6 @@ static inline void audit_free_aux(struct audit_context *context) | |||
| 657 | struct audit_aux_data *aux; | 648 | struct audit_aux_data *aux; |
| 658 | 649 | ||
| 659 | while ((aux = context->aux)) { | 650 | while ((aux = context->aux)) { |
| 660 | if (aux->type == AUDIT_AVC_PATH) { | ||
| 661 | struct audit_aux_data_path *axi = (void *)aux; | ||
| 662 | dput(axi->dentry); | ||
| 663 | mntput(axi->mnt); | ||
| 664 | } | ||
| 665 | |||
| 666 | context->aux = aux->next; | 651 | context->aux = aux->next; |
| 667 | kfree(aux); | 652 | kfree(aux); |
| 668 | } | 653 | } |
| @@ -834,6 +819,55 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, | |||
| 834 | return rc; | 819 | return rc; |
| 835 | } | 820 | } |
| 836 | 821 | ||
| 822 | static void audit_log_execve_info(struct audit_buffer *ab, | ||
| 823 | struct audit_aux_data_execve *axi) | ||
| 824 | { | ||
| 825 | int i; | ||
| 826 | long len, ret; | ||
| 827 | const char __user *p = (const char __user *)axi->mm->arg_start; | ||
| 828 | char *buf; | ||
| 829 | |||
| 830 | if (axi->mm != current->mm) | ||
| 831 | return; /* execve failed, no additional info */ | ||
| 832 | |||
| 833 | for (i = 0; i < axi->argc; i++, p += len) { | ||
| 834 | len = strnlen_user(p, MAX_ARG_STRLEN); | ||
| 835 | /* | ||
| 836 | * We just created this mm, if we can't find the strings | ||
| 837 | * we just copied into it something is _very_ wrong. Similar | ||
| 838 | * for strings that are too long, we should not have created | ||
| 839 | * any. | ||
| 840 | */ | ||
| 841 | if (!len || len > MAX_ARG_STRLEN) { | ||
| 842 | WARN_ON(1); | ||
| 843 | send_sig(SIGKILL, current, 0); | ||
| 844 | } | ||
| 845 | |||
| 846 | buf = kmalloc(len, GFP_KERNEL); | ||
| 847 | if (!buf) { | ||
| 848 | audit_panic("out of memory for argv string\n"); | ||
| 849 | break; | ||
| 850 | } | ||
| 851 | |||
| 852 | ret = copy_from_user(buf, p, len); | ||
| 853 | /* | ||
| 854 | * There is no reason for this copy to be short. We just | ||
| 855 | * copied them here, and the mm hasn't been exposed to user- | ||
| 856 | * space yet. | ||
| 857 | */ | ||
| 858 | if (!ret) { | ||
| 859 | WARN_ON(1); | ||
| 860 | send_sig(SIGKILL, current, 0); | ||
| 861 | } | ||
| 862 | |||
| 863 | audit_log_format(ab, "a%d=", i); | ||
| 864 | audit_log_untrustedstring(ab, buf); | ||
| 865 | audit_log_format(ab, "\n"); | ||
| 866 | |||
| 867 | kfree(buf); | ||
| 868 | } | ||
| 869 | } | ||
| 870 | |||
| 837 | static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) | 871 | static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) |
| 838 | { | 872 | { |
| 839 | int i, call_panic = 0; | 873 | int i, call_panic = 0; |
| @@ -949,7 +983,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
| 949 | case AUDIT_IPC: { | 983 | case AUDIT_IPC: { |
| 950 | struct audit_aux_data_ipcctl *axi = (void *)aux; | 984 | struct audit_aux_data_ipcctl *axi = (void *)aux; |
| 951 | audit_log_format(ab, | 985 | audit_log_format(ab, |
| 952 | "ouid=%u ogid=%u mode=%x", | 986 | "ouid=%u ogid=%u mode=%#o", |
| 953 | axi->uid, axi->gid, axi->mode); | 987 | axi->uid, axi->gid, axi->mode); |
| 954 | if (axi->osid != 0) { | 988 | if (axi->osid != 0) { |
| 955 | char *ctx = NULL; | 989 | char *ctx = NULL; |
| @@ -968,19 +1002,13 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
| 968 | case AUDIT_IPC_SET_PERM: { | 1002 | case AUDIT_IPC_SET_PERM: { |
| 969 | struct audit_aux_data_ipcctl *axi = (void *)aux; | 1003 | struct audit_aux_data_ipcctl *axi = (void *)aux; |
| 970 | audit_log_format(ab, | 1004 | audit_log_format(ab, |
| 971 | "qbytes=%lx ouid=%u ogid=%u mode=%x", | 1005 | "qbytes=%lx ouid=%u ogid=%u mode=%#o", |
| 972 | axi->qbytes, axi->uid, axi->gid, axi->mode); | 1006 | axi->qbytes, axi->uid, axi->gid, axi->mode); |
| 973 | break; } | 1007 | break; } |
| 974 | 1008 | ||
| 975 | case AUDIT_EXECVE: { | 1009 | case AUDIT_EXECVE: { |
| 976 | struct audit_aux_data_execve *axi = (void *)aux; | 1010 | struct audit_aux_data_execve *axi = (void *)aux; |
| 977 | int i; | 1011 | audit_log_execve_info(ab, axi); |
| 978 | const char *p; | ||
| 979 | for (i = 0, p = axi->mem; i < axi->argc; i++) { | ||
| 980 | audit_log_format(ab, "a%d=", i); | ||
| 981 | p = audit_log_untrustedstring(ab, p); | ||
| 982 | audit_log_format(ab, "\n"); | ||
| 983 | } | ||
| 984 | break; } | 1012 | break; } |
| 985 | 1013 | ||
| 986 | case AUDIT_SOCKETCALL: { | 1014 | case AUDIT_SOCKETCALL: { |
| @@ -998,11 +1026,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
| 998 | audit_log_hex(ab, axs->a, axs->len); | 1026 | audit_log_hex(ab, axs->a, axs->len); |
| 999 | break; } | 1027 | break; } |
| 1000 | 1028 | ||
| 1001 | case AUDIT_AVC_PATH: { | ||
| 1002 | struct audit_aux_data_path *axi = (void *)aux; | ||
| 1003 | audit_log_d_path(ab, "path=", axi->dentry, axi->mnt); | ||
| 1004 | break; } | ||
| 1005 | |||
| 1006 | case AUDIT_FD_PAIR: { | 1029 | case AUDIT_FD_PAIR: { |
| 1007 | struct audit_aux_data_fd_pair *axs = (void *)aux; | 1030 | struct audit_aux_data_fd_pair *axs = (void *)aux; |
| 1008 | audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]); | 1031 | audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]); |
| @@ -1824,32 +1847,31 @@ int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode | |||
| 1824 | return 0; | 1847 | return 0; |
| 1825 | } | 1848 | } |
| 1826 | 1849 | ||
| 1850 | int audit_argv_kb = 32; | ||
| 1851 | |||
| 1827 | int audit_bprm(struct linux_binprm *bprm) | 1852 | int audit_bprm(struct linux_binprm *bprm) |
| 1828 | { | 1853 | { |
| 1829 | struct audit_aux_data_execve *ax; | 1854 | struct audit_aux_data_execve *ax; |
| 1830 | struct audit_context *context = current->audit_context; | 1855 | struct audit_context *context = current->audit_context; |
| 1831 | unsigned long p, next; | ||
| 1832 | void *to; | ||
| 1833 | 1856 | ||
| 1834 | if (likely(!audit_enabled || !context || context->dummy)) | 1857 | if (likely(!audit_enabled || !context || context->dummy)) |
| 1835 | return 0; | 1858 | return 0; |
| 1836 | 1859 | ||
| 1837 | ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p, | 1860 | /* |
| 1838 | GFP_KERNEL); | 1861 | * Even though the stack code doesn't limit the arg+env size any more, |
| 1862 | * the audit code requires that _all_ arguments be logged in a single | ||
| 1863 | * netlink skb. Hence cap it :-( | ||
| 1864 | */ | ||
| 1865 | if (bprm->argv_len > (audit_argv_kb << 10)) | ||
| 1866 | return -E2BIG; | ||
| 1867 | |||
| 1868 | ax = kmalloc(sizeof(*ax), GFP_KERNEL); | ||
| 1839 | if (!ax) | 1869 | if (!ax) |
| 1840 | return -ENOMEM; | 1870 | return -ENOMEM; |
| 1841 | 1871 | ||
| 1842 | ax->argc = bprm->argc; | 1872 | ax->argc = bprm->argc; |
| 1843 | ax->envc = bprm->envc; | 1873 | ax->envc = bprm->envc; |
| 1844 | for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) { | 1874 | ax->mm = bprm->mm; |
| 1845 | struct page *page = bprm->page[p / PAGE_SIZE]; | ||
| 1846 | void *kaddr = kmap(page); | ||
| 1847 | next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1); | ||
| 1848 | memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p); | ||
| 1849 | to += next - p; | ||
| 1850 | kunmap(page); | ||
| 1851 | } | ||
| 1852 | |||
| 1853 | ax->d.type = AUDIT_EXECVE; | 1875 | ax->d.type = AUDIT_EXECVE; |
| 1854 | ax->d.next = context->aux; | 1876 | ax->d.next = context->aux; |
| 1855 | context->aux = (void *)ax; | 1877 | context->aux = (void *)ax; |
| @@ -1952,36 +1974,6 @@ void __audit_ptrace(struct task_struct *t) | |||
| 1952 | } | 1974 | } |
| 1953 | 1975 | ||
| 1954 | /** | 1976 | /** |
| 1955 | * audit_avc_path - record the granting or denial of permissions | ||
| 1956 | * @dentry: dentry to record | ||
| 1957 | * @mnt: mnt to record | ||
| 1958 | * | ||
| 1959 | * Returns 0 for success or NULL context or < 0 on error. | ||
| 1960 | * | ||
| 1961 | * Called from security/selinux/avc.c::avc_audit() | ||
| 1962 | */ | ||
| 1963 | int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt) | ||
| 1964 | { | ||
| 1965 | struct audit_aux_data_path *ax; | ||
| 1966 | struct audit_context *context = current->audit_context; | ||
| 1967 | |||
| 1968 | if (likely(!context)) | ||
| 1969 | return 0; | ||
| 1970 | |||
| 1971 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); | ||
| 1972 | if (!ax) | ||
| 1973 | return -ENOMEM; | ||
| 1974 | |||
| 1975 | ax->dentry = dget(dentry); | ||
| 1976 | ax->mnt = mntget(mnt); | ||
| 1977 | |||
| 1978 | ax->d.type = AUDIT_AVC_PATH; | ||
| 1979 | ax->d.next = context->aux; | ||
| 1980 | context->aux = (void *)ax; | ||
| 1981 | return 0; | ||
| 1982 | } | ||
| 1983 | |||
| 1984 | /** | ||
| 1985 | * audit_signal_info - record signal info for shutting down audit subsystem | 1977 | * audit_signal_info - record signal info for shutting down audit subsystem |
| 1986 | * @sig: signal value | 1978 | * @sig: signal value |
| 1987 | * @t: task being signaled | 1979 | * @t: task being signaled |
| @@ -2040,7 +2032,7 @@ int __audit_signal_info(int sig, struct task_struct *t) | |||
| 2040 | 2032 | ||
| 2041 | /** | 2033 | /** |
| 2042 | * audit_core_dumps - record information about processes that end abnormally | 2034 | * audit_core_dumps - record information about processes that end abnormally |
| 2043 | * @sig: signal value | 2035 | * @signr: signal value |
| 2044 | * | 2036 | * |
| 2045 | * If a process ends with a core dump, something fishy is going on and we | 2037 | * If a process ends with a core dump, something fishy is going on and we |
| 2046 | * should record the event for investigation. | 2038 | * should record the event for investigation. |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 208cf3497c10..181ae7086029 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -103,11 +103,19 @@ static inline void check_for_tasks(int cpu) | |||
| 103 | write_unlock_irq(&tasklist_lock); | 103 | write_unlock_irq(&tasklist_lock); |
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | struct take_cpu_down_param { | ||
| 107 | unsigned long mod; | ||
| 108 | void *hcpu; | ||
| 109 | }; | ||
| 110 | |||
| 106 | /* Take this CPU down. */ | 111 | /* Take this CPU down. */ |
| 107 | static int take_cpu_down(void *unused) | 112 | static int take_cpu_down(void *_param) |
| 108 | { | 113 | { |
| 114 | struct take_cpu_down_param *param = _param; | ||
| 109 | int err; | 115 | int err; |
| 110 | 116 | ||
| 117 | raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod, | ||
| 118 | param->hcpu); | ||
| 111 | /* Ensure this CPU doesn't handle any more interrupts. */ | 119 | /* Ensure this CPU doesn't handle any more interrupts. */ |
| 112 | err = __cpu_disable(); | 120 | err = __cpu_disable(); |
| 113 | if (err < 0) | 121 | if (err < 0) |
| @@ -127,6 +135,10 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |||
| 127 | cpumask_t old_allowed, tmp; | 135 | cpumask_t old_allowed, tmp; |
| 128 | void *hcpu = (void *)(long)cpu; | 136 | void *hcpu = (void *)(long)cpu; |
| 129 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; | 137 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; |
| 138 | struct take_cpu_down_param tcd_param = { | ||
| 139 | .mod = mod, | ||
| 140 | .hcpu = hcpu, | ||
| 141 | }; | ||
| 130 | 142 | ||
| 131 | if (num_online_cpus() == 1) | 143 | if (num_online_cpus() == 1) |
| 132 | return -EBUSY; | 144 | return -EBUSY; |
| @@ -153,7 +165,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |||
| 153 | set_cpus_allowed(current, tmp); | 165 | set_cpus_allowed(current, tmp); |
| 154 | 166 | ||
| 155 | mutex_lock(&cpu_bitmask_lock); | 167 | mutex_lock(&cpu_bitmask_lock); |
| 156 | p = __stop_machine_run(take_cpu_down, NULL, cpu); | 168 | p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); |
| 157 | mutex_unlock(&cpu_bitmask_lock); | 169 | mutex_unlock(&cpu_bitmask_lock); |
| 158 | 170 | ||
| 159 | if (IS_ERR(p) || cpu_online(cpu)) { | 171 | if (IS_ERR(p) || cpu_online(cpu)) { |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4c49188cc49b..57e6448b171e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -516,7 +516,7 @@ static void cpuset_release_agent(const char *pathbuf) | |||
| 516 | envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; | 516 | envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; |
| 517 | envp[i] = NULL; | 517 | envp[i] = NULL; |
| 518 | 518 | ||
| 519 | call_usermodehelper(argv[0], argv, envp, 0); | 519 | call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); |
| 520 | kfree(pathbuf); | 520 | kfree(pathbuf); |
| 521 | } | 521 | } |
| 522 | 522 | ||
| @@ -981,10 +981,10 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
| 981 | mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); | 981 | mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); |
| 982 | if (!mmarray) | 982 | if (!mmarray) |
| 983 | goto done; | 983 | goto done; |
| 984 | write_lock_irq(&tasklist_lock); /* block fork */ | 984 | read_lock(&tasklist_lock); /* block fork */ |
| 985 | if (atomic_read(&cs->count) <= ntasks) | 985 | if (atomic_read(&cs->count) <= ntasks) |
| 986 | break; /* got enough */ | 986 | break; /* got enough */ |
| 987 | write_unlock_irq(&tasklist_lock); /* try again */ | 987 | read_unlock(&tasklist_lock); /* try again */ |
| 988 | kfree(mmarray); | 988 | kfree(mmarray); |
| 989 | } | 989 | } |
| 990 | 990 | ||
| @@ -1006,7 +1006,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
| 1006 | continue; | 1006 | continue; |
| 1007 | mmarray[n++] = mm; | 1007 | mmarray[n++] = mm; |
| 1008 | } while_each_thread(g, p); | 1008 | } while_each_thread(g, p); |
| 1009 | write_unlock_irq(&tasklist_lock); | 1009 | read_unlock(&tasklist_lock); |
| 1010 | 1010 | ||
| 1011 | /* | 1011 | /* |
| 1012 | * Now that we've dropped the tasklist spinlock, we can | 1012 | * Now that we've dropped the tasklist spinlock, we can |
| @@ -2138,6 +2138,9 @@ static void common_cpu_mem_hotplug_unplug(void) | |||
| 2138 | static int cpuset_handle_cpuhp(struct notifier_block *nb, | 2138 | static int cpuset_handle_cpuhp(struct notifier_block *nb, |
| 2139 | unsigned long phase, void *cpu) | 2139 | unsigned long phase, void *cpu) |
| 2140 | { | 2140 | { |
| 2141 | if (phase == CPU_DYING || phase == CPU_DYING_FROZEN) | ||
| 2142 | return NOTIFY_DONE; | ||
| 2143 | |||
| 2141 | common_cpu_mem_hotplug_unplug(); | 2144 | common_cpu_mem_hotplug_unplug(); |
| 2142 | return 0; | 2145 | return 0; |
| 2143 | } | 2146 | } |
diff --git a/kernel/exit.c b/kernel/exit.c index ca6a11b73023..464c2b172f07 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/mempolicy.h> | 31 | #include <linux/mempolicy.h> |
| 32 | #include <linux/taskstats_kern.h> | 32 | #include <linux/taskstats_kern.h> |
| 33 | #include <linux/delayacct.h> | 33 | #include <linux/delayacct.h> |
| 34 | #include <linux/freezer.h> | ||
| 34 | #include <linux/cpuset.h> | 35 | #include <linux/cpuset.h> |
| 35 | #include <linux/syscalls.h> | 36 | #include <linux/syscalls.h> |
| 36 | #include <linux/signal.h> | 37 | #include <linux/signal.h> |
| @@ -44,6 +45,7 @@ | |||
| 44 | #include <linux/resource.h> | 45 | #include <linux/resource.h> |
| 45 | #include <linux/blkdev.h> | 46 | #include <linux/blkdev.h> |
| 46 | #include <linux/task_io_accounting_ops.h> | 47 | #include <linux/task_io_accounting_ops.h> |
| 48 | #include <linux/freezer.h> | ||
| 47 | 49 | ||
| 48 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
| 49 | #include <asm/unistd.h> | 51 | #include <asm/unistd.h> |
| @@ -387,6 +389,11 @@ void daemonize(const char *name, ...) | |||
| 387 | * they would be locked into memory. | 389 | * they would be locked into memory. |
| 388 | */ | 390 | */ |
| 389 | exit_mm(current); | 391 | exit_mm(current); |
| 392 | /* | ||
| 393 | * We don't want to have TIF_FREEZE set if the system-wide hibernation | ||
| 394 | * or suspend transition begins right now. | ||
| 395 | */ | ||
| 396 | current->flags |= PF_NOFREEZE; | ||
| 390 | 397 | ||
| 391 | set_special_pids(1, 1); | 398 | set_special_pids(1, 1); |
| 392 | proc_clear_tty(current); | 399 | proc_clear_tty(current); |
| @@ -588,6 +595,8 @@ static void exit_mm(struct task_struct * tsk) | |||
| 588 | tsk->mm = NULL; | 595 | tsk->mm = NULL; |
| 589 | up_read(&mm->mmap_sem); | 596 | up_read(&mm->mmap_sem); |
| 590 | enter_lazy_tlb(mm, current); | 597 | enter_lazy_tlb(mm, current); |
| 598 | /* We don't want this task to be frozen prematurely */ | ||
| 599 | clear_freeze_flag(tsk); | ||
| 591 | task_unlock(tsk); | 600 | task_unlock(tsk); |
| 592 | mmput(mm); | 601 | mmput(mm); |
| 593 | } | 602 | } |
| @@ -858,6 +867,34 @@ static void exit_notify(struct task_struct *tsk) | |||
| 858 | release_task(tsk); | 867 | release_task(tsk); |
| 859 | } | 868 | } |
| 860 | 869 | ||
| 870 | #ifdef CONFIG_DEBUG_STACK_USAGE | ||
| 871 | static void check_stack_usage(void) | ||
| 872 | { | ||
| 873 | static DEFINE_SPINLOCK(low_water_lock); | ||
| 874 | static int lowest_to_date = THREAD_SIZE; | ||
| 875 | unsigned long *n = end_of_stack(current); | ||
| 876 | unsigned long free; | ||
| 877 | |||
| 878 | while (*n == 0) | ||
| 879 | n++; | ||
| 880 | free = (unsigned long)n - (unsigned long)end_of_stack(current); | ||
| 881 | |||
| 882 | if (free >= lowest_to_date) | ||
| 883 | return; | ||
| 884 | |||
| 885 | spin_lock(&low_water_lock); | ||
| 886 | if (free < lowest_to_date) { | ||
| 887 | printk(KERN_WARNING "%s used greatest stack depth: %lu bytes " | ||
| 888 | "left\n", | ||
| 889 | current->comm, free); | ||
| 890 | lowest_to_date = free; | ||
| 891 | } | ||
| 892 | spin_unlock(&low_water_lock); | ||
| 893 | } | ||
| 894 | #else | ||
| 895 | static inline void check_stack_usage(void) {} | ||
| 896 | #endif | ||
| 897 | |||
| 861 | fastcall NORET_TYPE void do_exit(long code) | 898 | fastcall NORET_TYPE void do_exit(long code) |
| 862 | { | 899 | { |
| 863 | struct task_struct *tsk = current; | 900 | struct task_struct *tsk = current; |
| @@ -937,6 +974,8 @@ fastcall NORET_TYPE void do_exit(long code) | |||
| 937 | if (unlikely(tsk->compat_robust_list)) | 974 | if (unlikely(tsk->compat_robust_list)) |
| 938 | compat_exit_robust_list(tsk); | 975 | compat_exit_robust_list(tsk); |
| 939 | #endif | 976 | #endif |
| 977 | if (group_dead) | ||
| 978 | tty_audit_exit(); | ||
| 940 | if (unlikely(tsk->audit_context)) | 979 | if (unlikely(tsk->audit_context)) |
| 941 | audit_free(tsk); | 980 | audit_free(tsk); |
| 942 | 981 | ||
| @@ -949,6 +988,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
| 949 | exit_sem(tsk); | 988 | exit_sem(tsk); |
| 950 | __exit_files(tsk); | 989 | __exit_files(tsk); |
| 951 | __exit_fs(tsk); | 990 | __exit_fs(tsk); |
| 991 | check_stack_usage(); | ||
| 952 | exit_thread(); | 992 | exit_thread(); |
| 953 | cpuset_exit(tsk); | 993 | cpuset_exit(tsk); |
| 954 | exit_keys(tsk); | 994 | exit_keys(tsk); |
diff --git a/kernel/fork.c b/kernel/fork.c index da3a155bba0d..7332e236d367 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #include <linux/delayacct.h> | 49 | #include <linux/delayacct.h> |
| 50 | #include <linux/taskstats_kern.h> | 50 | #include <linux/taskstats_kern.h> |
| 51 | #include <linux/random.h> | 51 | #include <linux/random.h> |
| 52 | #include <linux/tty.h> | ||
| 52 | 53 | ||
| 53 | #include <asm/pgtable.h> | 54 | #include <asm/pgtable.h> |
| 54 | #include <asm/pgalloc.h> | 55 | #include <asm/pgalloc.h> |
| @@ -136,7 +137,7 @@ void __init fork_init(unsigned long mempages) | |||
| 136 | /* create a slab on which task_structs can be allocated */ | 137 | /* create a slab on which task_structs can be allocated */ |
| 137 | task_struct_cachep = | 138 | task_struct_cachep = |
| 138 | kmem_cache_create("task_struct", sizeof(struct task_struct), | 139 | kmem_cache_create("task_struct", sizeof(struct task_struct), |
| 139 | ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL); | 140 | ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL); |
| 140 | #endif | 141 | #endif |
| 141 | 142 | ||
| 142 | /* | 143 | /* |
| @@ -333,6 +334,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm) | |||
| 333 | atomic_set(&mm->mm_count, 1); | 334 | atomic_set(&mm->mm_count, 1); |
| 334 | init_rwsem(&mm->mmap_sem); | 335 | init_rwsem(&mm->mmap_sem); |
| 335 | INIT_LIST_HEAD(&mm->mmlist); | 336 | INIT_LIST_HEAD(&mm->mmlist); |
| 337 | mm->flags = (current->mm) ? current->mm->flags | ||
| 338 | : MMF_DUMP_FILTER_DEFAULT; | ||
| 336 | mm->core_waiters = 0; | 339 | mm->core_waiters = 0; |
| 337 | mm->nr_ptes = 0; | 340 | mm->nr_ptes = 0; |
| 338 | set_mm_counter(mm, file_rss, 0); | 341 | set_mm_counter(mm, file_rss, 0); |
| @@ -897,6 +900,8 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
| 897 | } | 900 | } |
| 898 | acct_init_pacct(&sig->pacct); | 901 | acct_init_pacct(&sig->pacct); |
| 899 | 902 | ||
| 903 | tty_audit_fork(sig); | ||
| 904 | |||
| 900 | return 0; | 905 | return 0; |
| 901 | } | 906 | } |
| 902 | 907 | ||
| @@ -920,7 +925,7 @@ static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) | |||
| 920 | { | 925 | { |
| 921 | unsigned long new_flags = p->flags; | 926 | unsigned long new_flags = p->flags; |
| 922 | 927 | ||
| 923 | new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE); | 928 | new_flags &= ~PF_SUPERPRIV; |
| 924 | new_flags |= PF_FORKNOEXEC; | 929 | new_flags |= PF_FORKNOEXEC; |
| 925 | if (!(clone_flags & CLONE_PTRACE)) | 930 | if (!(clone_flags & CLONE_PTRACE)) |
| 926 | p->ptrace = 0; | 931 | p->ptrace = 0; |
| @@ -999,7 +1004,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 999 | if (atomic_read(&p->user->processes) >= | 1004 | if (atomic_read(&p->user->processes) >= |
| 1000 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { | 1005 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { |
| 1001 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && | 1006 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && |
| 1002 | p->user != &root_user) | 1007 | p->user != current->nsproxy->user_ns->root_user) |
| 1003 | goto bad_fork_free; | 1008 | goto bad_fork_free; |
| 1004 | } | 1009 | } |
| 1005 | 1010 | ||
| @@ -1059,6 +1064,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1059 | 1064 | ||
| 1060 | p->lock_depth = -1; /* -1 = no lock */ | 1065 | p->lock_depth = -1; /* -1 = no lock */ |
| 1061 | do_posix_clock_monotonic_gettime(&p->start_time); | 1066 | do_posix_clock_monotonic_gettime(&p->start_time); |
| 1067 | p->real_start_time = p->start_time; | ||
| 1068 | monotonic_to_bootbased(&p->real_start_time); | ||
| 1062 | p->security = NULL; | 1069 | p->security = NULL; |
| 1063 | p->io_context = NULL; | 1070 | p->io_context = NULL; |
| 1064 | p->io_wait = NULL; | 1071 | p->io_wait = NULL; |
| @@ -1439,22 +1446,22 @@ void __init proc_caches_init(void) | |||
| 1439 | sighand_cachep = kmem_cache_create("sighand_cache", | 1446 | sighand_cachep = kmem_cache_create("sighand_cache", |
| 1440 | sizeof(struct sighand_struct), 0, | 1447 | sizeof(struct sighand_struct), 0, |
| 1441 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU, | 1448 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU, |
| 1442 | sighand_ctor, NULL); | 1449 | sighand_ctor); |
| 1443 | signal_cachep = kmem_cache_create("signal_cache", | 1450 | signal_cachep = kmem_cache_create("signal_cache", |
| 1444 | sizeof(struct signal_struct), 0, | 1451 | sizeof(struct signal_struct), 0, |
| 1445 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | 1452 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
| 1446 | files_cachep = kmem_cache_create("files_cache", | 1453 | files_cachep = kmem_cache_create("files_cache", |
| 1447 | sizeof(struct files_struct), 0, | 1454 | sizeof(struct files_struct), 0, |
| 1448 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | 1455 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
| 1449 | fs_cachep = kmem_cache_create("fs_cache", | 1456 | fs_cachep = kmem_cache_create("fs_cache", |
| 1450 | sizeof(struct fs_struct), 0, | 1457 | sizeof(struct fs_struct), 0, |
| 1451 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | 1458 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
| 1452 | vm_area_cachep = kmem_cache_create("vm_area_struct", | 1459 | vm_area_cachep = kmem_cache_create("vm_area_struct", |
| 1453 | sizeof(struct vm_area_struct), 0, | 1460 | sizeof(struct vm_area_struct), 0, |
| 1454 | SLAB_PANIC, NULL, NULL); | 1461 | SLAB_PANIC, NULL); |
| 1455 | mm_cachep = kmem_cache_create("mm_struct", | 1462 | mm_cachep = kmem_cache_create("mm_struct", |
| 1456 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, | 1463 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
| 1457 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | 1464 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
| 1458 | } | 1465 | } |
| 1459 | 1466 | ||
| 1460 | /* | 1467 | /* |
| @@ -1601,7 +1608,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
| 1601 | err = -EINVAL; | 1608 | err = -EINVAL; |
| 1602 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | 1609 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| |
| 1603 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | 1610 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| |
| 1604 | CLONE_NEWUTS|CLONE_NEWIPC)) | 1611 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER)) |
| 1605 | goto bad_unshare_out; | 1612 | goto bad_unshare_out; |
| 1606 | 1613 | ||
| 1607 | if ((err = unshare_thread(unshare_flags))) | 1614 | if ((err = unshare_thread(unshare_flags))) |
diff --git a/kernel/futex.c b/kernel/futex.c index 45490bec5831..a12425051ee9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -121,6 +121,24 @@ static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; | |||
| 121 | static struct vfsmount *futex_mnt; | 121 | static struct vfsmount *futex_mnt; |
| 122 | 122 | ||
| 123 | /* | 123 | /* |
| 124 | * Take mm->mmap_sem, when futex is shared | ||
| 125 | */ | ||
| 126 | static inline void futex_lock_mm(struct rw_semaphore *fshared) | ||
| 127 | { | ||
| 128 | if (fshared) | ||
| 129 | down_read(fshared); | ||
| 130 | } | ||
| 131 | |||
| 132 | /* | ||
| 133 | * Release mm->mmap_sem, when the futex is shared | ||
| 134 | */ | ||
| 135 | static inline void futex_unlock_mm(struct rw_semaphore *fshared) | ||
| 136 | { | ||
| 137 | if (fshared) | ||
| 138 | up_read(fshared); | ||
| 139 | } | ||
| 140 | |||
| 141 | /* | ||
| 124 | * We hash on the keys returned from get_futex_key (see below). | 142 | * We hash on the keys returned from get_futex_key (see below). |
| 125 | */ | 143 | */ |
| 126 | static struct futex_hash_bucket *hash_futex(union futex_key *key) | 144 | static struct futex_hash_bucket *hash_futex(union futex_key *key) |
| @@ -287,7 +305,18 @@ void drop_futex_key_refs(union futex_key *key) | |||
| 287 | } | 305 | } |
| 288 | EXPORT_SYMBOL_GPL(drop_futex_key_refs); | 306 | EXPORT_SYMBOL_GPL(drop_futex_key_refs); |
| 289 | 307 | ||
| 290 | static inline int get_futex_value_locked(u32 *dest, u32 __user *from) | 308 | static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) |
| 309 | { | ||
| 310 | u32 curval; | ||
| 311 | |||
| 312 | pagefault_disable(); | ||
| 313 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | ||
| 314 | pagefault_enable(); | ||
| 315 | |||
| 316 | return curval; | ||
| 317 | } | ||
| 318 | |||
| 319 | static int get_futex_value_locked(u32 *dest, u32 __user *from) | ||
| 291 | { | 320 | { |
| 292 | int ret; | 321 | int ret; |
| 293 | 322 | ||
| @@ -317,15 +346,20 @@ static int futex_handle_fault(unsigned long address, | |||
| 317 | vma = find_vma(mm, address); | 346 | vma = find_vma(mm, address); |
| 318 | if (vma && address >= vma->vm_start && | 347 | if (vma && address >= vma->vm_start && |
| 319 | (vma->vm_flags & VM_WRITE)) { | 348 | (vma->vm_flags & VM_WRITE)) { |
| 320 | switch (handle_mm_fault(mm, vma, address, 1)) { | 349 | int fault; |
| 321 | case VM_FAULT_MINOR: | 350 | fault = handle_mm_fault(mm, vma, address, 1); |
| 322 | ret = 0; | 351 | if (unlikely((fault & VM_FAULT_ERROR))) { |
| 323 | current->min_flt++; | 352 | #if 0 |
| 324 | break; | 353 | /* XXX: let's do this when we verify it is OK */ |
| 325 | case VM_FAULT_MAJOR: | 354 | if (ret & VM_FAULT_OOM) |
| 355 | ret = -ENOMEM; | ||
| 356 | #endif | ||
| 357 | } else { | ||
| 326 | ret = 0; | 358 | ret = 0; |
| 327 | current->maj_flt++; | 359 | if (fault & VM_FAULT_MAJOR) |
| 328 | break; | 360 | current->maj_flt++; |
| 361 | else | ||
| 362 | current->min_flt++; | ||
| 329 | } | 363 | } |
| 330 | } | 364 | } |
| 331 | if (!fshared) | 365 | if (!fshared) |
| @@ -620,9 +654,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
| 620 | 654 | ||
| 621 | newval = FUTEX_WAITERS | new_owner->pid; | 655 | newval = FUTEX_WAITERS | new_owner->pid; |
| 622 | 656 | ||
| 623 | pagefault_disable(); | 657 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); |
| 624 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | ||
| 625 | pagefault_enable(); | ||
| 626 | 658 | ||
| 627 | if (curval == -EFAULT) | 659 | if (curval == -EFAULT) |
| 628 | ret = -EFAULT; | 660 | ret = -EFAULT; |
| @@ -659,9 +691,7 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval) | |||
| 659 | * There is no waiter, so we unlock the futex. The owner died | 691 | * There is no waiter, so we unlock the futex. The owner died |
| 660 | * bit has not to be preserved here. We are the owner: | 692 | * bit has not to be preserved here. We are the owner: |
| 661 | */ | 693 | */ |
| 662 | pagefault_disable(); | 694 | oldval = cmpxchg_futex_value_locked(uaddr, uval, 0); |
| 663 | oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0); | ||
| 664 | pagefault_enable(); | ||
| 665 | 695 | ||
| 666 | if (oldval == -EFAULT) | 696 | if (oldval == -EFAULT) |
| 667 | return oldval; | 697 | return oldval; |
| @@ -700,8 +730,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 700 | union futex_key key; | 730 | union futex_key key; |
| 701 | int ret; | 731 | int ret; |
| 702 | 732 | ||
| 703 | if (fshared) | 733 | futex_lock_mm(fshared); |
| 704 | down_read(fshared); | ||
| 705 | 734 | ||
| 706 | ret = get_futex_key(uaddr, fshared, &key); | 735 | ret = get_futex_key(uaddr, fshared, &key); |
| 707 | if (unlikely(ret != 0)) | 736 | if (unlikely(ret != 0)) |
| @@ -725,8 +754,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 725 | 754 | ||
| 726 | spin_unlock(&hb->lock); | 755 | spin_unlock(&hb->lock); |
| 727 | out: | 756 | out: |
| 728 | if (fshared) | 757 | futex_unlock_mm(fshared); |
| 729 | up_read(fshared); | ||
| 730 | return ret; | 758 | return ret; |
| 731 | } | 759 | } |
| 732 | 760 | ||
| @@ -746,8 +774,7 @@ futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, | |||
| 746 | int ret, op_ret, attempt = 0; | 774 | int ret, op_ret, attempt = 0; |
| 747 | 775 | ||
| 748 | retryfull: | 776 | retryfull: |
| 749 | if (fshared) | 777 | futex_lock_mm(fshared); |
| 750 | down_read(fshared); | ||
| 751 | 778 | ||
| 752 | ret = get_futex_key(uaddr1, fshared, &key1); | 779 | ret = get_futex_key(uaddr1, fshared, &key1); |
| 753 | if (unlikely(ret != 0)) | 780 | if (unlikely(ret != 0)) |
| @@ -793,7 +820,7 @@ retry: | |||
| 793 | */ | 820 | */ |
| 794 | if (attempt++) { | 821 | if (attempt++) { |
| 795 | ret = futex_handle_fault((unsigned long)uaddr2, | 822 | ret = futex_handle_fault((unsigned long)uaddr2, |
| 796 | fshared, attempt); | 823 | fshared, attempt); |
| 797 | if (ret) | 824 | if (ret) |
| 798 | goto out; | 825 | goto out; |
| 799 | goto retry; | 826 | goto retry; |
| @@ -803,8 +830,7 @@ retry: | |||
| 803 | * If we would have faulted, release mmap_sem, | 830 | * If we would have faulted, release mmap_sem, |
| 804 | * fault it in and start all over again. | 831 | * fault it in and start all over again. |
| 805 | */ | 832 | */ |
| 806 | if (fshared) | 833 | futex_unlock_mm(fshared); |
| 807 | up_read(fshared); | ||
| 808 | 834 | ||
| 809 | ret = get_user(dummy, uaddr2); | 835 | ret = get_user(dummy, uaddr2); |
| 810 | if (ret) | 836 | if (ret) |
| @@ -841,8 +867,8 @@ retry: | |||
| 841 | if (hb1 != hb2) | 867 | if (hb1 != hb2) |
| 842 | spin_unlock(&hb2->lock); | 868 | spin_unlock(&hb2->lock); |
| 843 | out: | 869 | out: |
| 844 | if (fshared) | 870 | futex_unlock_mm(fshared); |
| 845 | up_read(fshared); | 871 | |
| 846 | return ret; | 872 | return ret; |
| 847 | } | 873 | } |
| 848 | 874 | ||
| @@ -861,8 +887,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, | |||
| 861 | int ret, drop_count = 0; | 887 | int ret, drop_count = 0; |
| 862 | 888 | ||
| 863 | retry: | 889 | retry: |
| 864 | if (fshared) | 890 | futex_lock_mm(fshared); |
| 865 | down_read(fshared); | ||
| 866 | 891 | ||
| 867 | ret = get_futex_key(uaddr1, fshared, &key1); | 892 | ret = get_futex_key(uaddr1, fshared, &key1); |
| 868 | if (unlikely(ret != 0)) | 893 | if (unlikely(ret != 0)) |
| @@ -890,8 +915,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, | |||
| 890 | * If we would have faulted, release mmap_sem, fault | 915 | * If we would have faulted, release mmap_sem, fault |
| 891 | * it in and start all over again. | 916 | * it in and start all over again. |
| 892 | */ | 917 | */ |
| 893 | if (fshared) | 918 | futex_unlock_mm(fshared); |
| 894 | up_read(fshared); | ||
| 895 | 919 | ||
| 896 | ret = get_user(curval, uaddr1); | 920 | ret = get_user(curval, uaddr1); |
| 897 | 921 | ||
| @@ -944,8 +968,7 @@ out_unlock: | |||
| 944 | drop_futex_key_refs(&key1); | 968 | drop_futex_key_refs(&key1); |
| 945 | 969 | ||
| 946 | out: | 970 | out: |
| 947 | if (fshared) | 971 | futex_unlock_mm(fshared); |
| 948 | up_read(fshared); | ||
| 949 | return ret; | 972 | return ret; |
| 950 | } | 973 | } |
| 951 | 974 | ||
| @@ -1113,10 +1136,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
| 1113 | while (!ret) { | 1136 | while (!ret) { |
| 1114 | newval = (uval & FUTEX_OWNER_DIED) | newtid; | 1137 | newval = (uval & FUTEX_OWNER_DIED) | newtid; |
| 1115 | 1138 | ||
| 1116 | pagefault_disable(); | 1139 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); |
| 1117 | curval = futex_atomic_cmpxchg_inatomic(uaddr, | ||
| 1118 | uval, newval); | ||
| 1119 | pagefault_enable(); | ||
| 1120 | 1140 | ||
| 1121 | if (curval == -EFAULT) | 1141 | if (curval == -EFAULT) |
| 1122 | ret = -EFAULT; | 1142 | ret = -EFAULT; |
| @@ -1134,6 +1154,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
| 1134 | #define ARG3_SHARED 1 | 1154 | #define ARG3_SHARED 1 |
| 1135 | 1155 | ||
| 1136 | static long futex_wait_restart(struct restart_block *restart); | 1156 | static long futex_wait_restart(struct restart_block *restart); |
| 1157 | |||
| 1137 | static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | 1158 | static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, |
| 1138 | u32 val, ktime_t *abs_time) | 1159 | u32 val, ktime_t *abs_time) |
| 1139 | { | 1160 | { |
| @@ -1148,8 +1169,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1148 | 1169 | ||
| 1149 | q.pi_state = NULL; | 1170 | q.pi_state = NULL; |
| 1150 | retry: | 1171 | retry: |
| 1151 | if (fshared) | 1172 | futex_lock_mm(fshared); |
| 1152 | down_read(fshared); | ||
| 1153 | 1173 | ||
| 1154 | ret = get_futex_key(uaddr, fshared, &q.key); | 1174 | ret = get_futex_key(uaddr, fshared, &q.key); |
| 1155 | if (unlikely(ret != 0)) | 1175 | if (unlikely(ret != 0)) |
| @@ -1186,8 +1206,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1186 | * If we would have faulted, release mmap_sem, fault it in and | 1206 | * If we would have faulted, release mmap_sem, fault it in and |
| 1187 | * start all over again. | 1207 | * start all over again. |
| 1188 | */ | 1208 | */ |
| 1189 | if (fshared) | 1209 | futex_unlock_mm(fshared); |
| 1190 | up_read(fshared); | ||
| 1191 | 1210 | ||
| 1192 | ret = get_user(uval, uaddr); | 1211 | ret = get_user(uval, uaddr); |
| 1193 | 1212 | ||
| @@ -1206,8 +1225,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1206 | * Now the futex is queued and we have checked the data, we | 1225 | * Now the futex is queued and we have checked the data, we |
| 1207 | * don't want to hold mmap_sem while we sleep. | 1226 | * don't want to hold mmap_sem while we sleep. |
| 1208 | */ | 1227 | */ |
| 1209 | if (fshared) | 1228 | futex_unlock_mm(fshared); |
| 1210 | up_read(fshared); | ||
| 1211 | 1229 | ||
| 1212 | /* | 1230 | /* |
| 1213 | * There might have been scheduling since the queue_me(), as we | 1231 | * There might have been scheduling since the queue_me(), as we |
| @@ -1285,8 +1303,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1285 | queue_unlock(&q, hb); | 1303 | queue_unlock(&q, hb); |
| 1286 | 1304 | ||
| 1287 | out_release_sem: | 1305 | out_release_sem: |
| 1288 | if (fshared) | 1306 | futex_unlock_mm(fshared); |
| 1289 | up_read(fshared); | ||
| 1290 | return ret; | 1307 | return ret; |
| 1291 | } | 1308 | } |
| 1292 | 1309 | ||
| @@ -1333,8 +1350,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1333 | 1350 | ||
| 1334 | q.pi_state = NULL; | 1351 | q.pi_state = NULL; |
| 1335 | retry: | 1352 | retry: |
| 1336 | if (fshared) | 1353 | futex_lock_mm(fshared); |
| 1337 | down_read(fshared); | ||
| 1338 | 1354 | ||
| 1339 | ret = get_futex_key(uaddr, fshared, &q.key); | 1355 | ret = get_futex_key(uaddr, fshared, &q.key); |
| 1340 | if (unlikely(ret != 0)) | 1356 | if (unlikely(ret != 0)) |
| @@ -1353,9 +1369,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1353 | */ | 1369 | */ |
| 1354 | newval = current->pid; | 1370 | newval = current->pid; |
| 1355 | 1371 | ||
| 1356 | pagefault_disable(); | 1372 | curval = cmpxchg_futex_value_locked(uaddr, 0, newval); |
| 1357 | curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval); | ||
| 1358 | pagefault_enable(); | ||
| 1359 | 1373 | ||
| 1360 | if (unlikely(curval == -EFAULT)) | 1374 | if (unlikely(curval == -EFAULT)) |
| 1361 | goto uaddr_faulted; | 1375 | goto uaddr_faulted; |
| @@ -1398,9 +1412,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1398 | lock_taken = 1; | 1412 | lock_taken = 1; |
| 1399 | } | 1413 | } |
| 1400 | 1414 | ||
| 1401 | pagefault_disable(); | 1415 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); |
| 1402 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | ||
| 1403 | pagefault_enable(); | ||
| 1404 | 1416 | ||
| 1405 | if (unlikely(curval == -EFAULT)) | 1417 | if (unlikely(curval == -EFAULT)) |
| 1406 | goto uaddr_faulted; | 1418 | goto uaddr_faulted; |
| @@ -1428,8 +1440,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1428 | * exit to complete. | 1440 | * exit to complete. |
| 1429 | */ | 1441 | */ |
| 1430 | queue_unlock(&q, hb); | 1442 | queue_unlock(&q, hb); |
| 1431 | if (fshared) | 1443 | futex_unlock_mm(fshared); |
| 1432 | up_read(fshared); | ||
| 1433 | cond_resched(); | 1444 | cond_resched(); |
| 1434 | goto retry; | 1445 | goto retry; |
| 1435 | 1446 | ||
| @@ -1465,8 +1476,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1465 | * Now the futex is queued and we have checked the data, we | 1476 | * Now the futex is queued and we have checked the data, we |
| 1466 | * don't want to hold mmap_sem while we sleep. | 1477 | * don't want to hold mmap_sem while we sleep. |
| 1467 | */ | 1478 | */ |
| 1468 | if (fshared) | 1479 | futex_unlock_mm(fshared); |
| 1469 | up_read(fshared); | ||
| 1470 | 1480 | ||
| 1471 | WARN_ON(!q.pi_state); | 1481 | WARN_ON(!q.pi_state); |
| 1472 | /* | 1482 | /* |
| @@ -1480,8 +1490,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1480 | ret = ret ? 0 : -EWOULDBLOCK; | 1490 | ret = ret ? 0 : -EWOULDBLOCK; |
| 1481 | } | 1491 | } |
| 1482 | 1492 | ||
| 1483 | if (fshared) | 1493 | futex_lock_mm(fshared); |
| 1484 | down_read(fshared); | ||
| 1485 | spin_lock(q.lock_ptr); | 1494 | spin_lock(q.lock_ptr); |
| 1486 | 1495 | ||
| 1487 | if (!ret) { | 1496 | if (!ret) { |
| @@ -1518,8 +1527,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1518 | 1527 | ||
| 1519 | /* Unqueue and drop the lock */ | 1528 | /* Unqueue and drop the lock */ |
| 1520 | unqueue_me_pi(&q); | 1529 | unqueue_me_pi(&q); |
| 1521 | if (fshared) | 1530 | futex_unlock_mm(fshared); |
| 1522 | up_read(fshared); | ||
| 1523 | 1531 | ||
| 1524 | return ret != -EINTR ? ret : -ERESTARTNOINTR; | 1532 | return ret != -EINTR ? ret : -ERESTARTNOINTR; |
| 1525 | 1533 | ||
| @@ -1527,8 +1535,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1527 | queue_unlock(&q, hb); | 1535 | queue_unlock(&q, hb); |
| 1528 | 1536 | ||
| 1529 | out_release_sem: | 1537 | out_release_sem: |
| 1530 | if (fshared) | 1538 | futex_unlock_mm(fshared); |
| 1531 | up_read(fshared); | ||
| 1532 | return ret; | 1539 | return ret; |
| 1533 | 1540 | ||
| 1534 | uaddr_faulted: | 1541 | uaddr_faulted: |
| @@ -1550,8 +1557,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1550 | goto retry_unlocked; | 1557 | goto retry_unlocked; |
| 1551 | } | 1558 | } |
| 1552 | 1559 | ||
| 1553 | if (fshared) | 1560 | futex_unlock_mm(fshared); |
| 1554 | up_read(fshared); | ||
| 1555 | 1561 | ||
| 1556 | ret = get_user(uval, uaddr); | 1562 | ret = get_user(uval, uaddr); |
| 1557 | if (!ret && (uval != -EFAULT)) | 1563 | if (!ret && (uval != -EFAULT)) |
| @@ -1585,8 +1591,7 @@ retry: | |||
| 1585 | /* | 1591 | /* |
| 1586 | * First take all the futex related locks: | 1592 | * First take all the futex related locks: |
| 1587 | */ | 1593 | */ |
| 1588 | if (fshared) | 1594 | futex_lock_mm(fshared); |
| 1589 | down_read(fshared); | ||
| 1590 | 1595 | ||
| 1591 | ret = get_futex_key(uaddr, fshared, &key); | 1596 | ret = get_futex_key(uaddr, fshared, &key); |
| 1592 | if (unlikely(ret != 0)) | 1597 | if (unlikely(ret != 0)) |
| @@ -1601,11 +1606,9 @@ retry_unlocked: | |||
| 1601 | * again. If it succeeds then we can return without waking | 1606 | * again. If it succeeds then we can return without waking |
| 1602 | * anyone else up: | 1607 | * anyone else up: |
| 1603 | */ | 1608 | */ |
| 1604 | if (!(uval & FUTEX_OWNER_DIED)) { | 1609 | if (!(uval & FUTEX_OWNER_DIED)) |
| 1605 | pagefault_disable(); | 1610 | uval = cmpxchg_futex_value_locked(uaddr, current->pid, 0); |
| 1606 | uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); | 1611 | |
| 1607 | pagefault_enable(); | ||
| 1608 | } | ||
| 1609 | 1612 | ||
| 1610 | if (unlikely(uval == -EFAULT)) | 1613 | if (unlikely(uval == -EFAULT)) |
| 1611 | goto pi_faulted; | 1614 | goto pi_faulted; |
| @@ -1647,8 +1650,7 @@ retry_unlocked: | |||
| 1647 | out_unlock: | 1650 | out_unlock: |
| 1648 | spin_unlock(&hb->lock); | 1651 | spin_unlock(&hb->lock); |
| 1649 | out: | 1652 | out: |
| 1650 | if (fshared) | 1653 | futex_unlock_mm(fshared); |
| 1651 | up_read(fshared); | ||
| 1652 | 1654 | ||
| 1653 | return ret; | 1655 | return ret; |
| 1654 | 1656 | ||
| @@ -1671,8 +1673,7 @@ pi_faulted: | |||
| 1671 | goto retry_unlocked; | 1673 | goto retry_unlocked; |
| 1672 | } | 1674 | } |
| 1673 | 1675 | ||
| 1674 | if (fshared) | 1676 | futex_unlock_mm(fshared); |
| 1675 | up_read(fshared); | ||
| 1676 | 1677 | ||
| 1677 | ret = get_user(uval, uaddr); | 1678 | ret = get_user(uval, uaddr); |
| 1678 | if (!ret && (uval != -EFAULT)) | 1679 | if (!ret && (uval != -EFAULT)) |
| @@ -1729,8 +1730,8 @@ static int futex_fd(u32 __user *uaddr, int signal) | |||
| 1729 | 1730 | ||
| 1730 | if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { | 1731 | if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { |
| 1731 | printk(KERN_WARNING "Process `%s' used FUTEX_FD, which " | 1732 | printk(KERN_WARNING "Process `%s' used FUTEX_FD, which " |
| 1732 | "will be removed from the kernel in June 2007\n", | 1733 | "will be removed from the kernel in June 2007\n", |
| 1733 | current->comm); | 1734 | current->comm); |
| 1734 | } | 1735 | } |
| 1735 | 1736 | ||
| 1736 | ret = -EINVAL; | 1737 | ret = -EINVAL; |
| @@ -1908,10 +1909,8 @@ retry: | |||
| 1908 | * Wake robust non-PI futexes here. The wakeup of | 1909 | * Wake robust non-PI futexes here. The wakeup of |
| 1909 | * PI futexes happens in exit_pi_state(): | 1910 | * PI futexes happens in exit_pi_state(): |
| 1910 | */ | 1911 | */ |
| 1911 | if (!pi) { | 1912 | if (!pi && (uval & FUTEX_WAITERS)) |
| 1912 | if (uval & FUTEX_WAITERS) | ||
| 1913 | futex_wake(uaddr, &curr->mm->mmap_sem, 1); | 1913 | futex_wake(uaddr, &curr->mm->mmap_sem, 1); |
| 1914 | } | ||
| 1915 | } | 1914 | } |
| 1916 | return 0; | 1915 | return 0; |
| 1917 | } | 1916 | } |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 23c03f43e196..eb1ddebd2c04 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -558,7 +558,8 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | |||
| 558 | */ | 558 | */ |
| 559 | static int hrtimer_switch_to_hres(void) | 559 | static int hrtimer_switch_to_hres(void) |
| 560 | { | 560 | { |
| 561 | struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); | 561 | int cpu = smp_processor_id(); |
| 562 | struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu); | ||
| 562 | unsigned long flags; | 563 | unsigned long flags; |
| 563 | 564 | ||
| 564 | if (base->hres_active) | 565 | if (base->hres_active) |
| @@ -568,6 +569,8 @@ static int hrtimer_switch_to_hres(void) | |||
| 568 | 569 | ||
| 569 | if (tick_init_highres()) { | 570 | if (tick_init_highres()) { |
| 570 | local_irq_restore(flags); | 571 | local_irq_restore(flags); |
| 572 | printk(KERN_WARNING "Could not switch to high resolution " | ||
| 573 | "mode on CPU %d\n", cpu); | ||
| 571 | return 0; | 574 | return 0; |
| 572 | } | 575 | } |
| 573 | base->hres_active = 1; | 576 | base->hres_active = 1; |
| @@ -683,6 +686,7 @@ static void enqueue_hrtimer(struct hrtimer *timer, | |||
| 683 | struct rb_node **link = &base->active.rb_node; | 686 | struct rb_node **link = &base->active.rb_node; |
| 684 | struct rb_node *parent = NULL; | 687 | struct rb_node *parent = NULL; |
| 685 | struct hrtimer *entry; | 688 | struct hrtimer *entry; |
| 689 | int leftmost = 1; | ||
| 686 | 690 | ||
| 687 | /* | 691 | /* |
| 688 | * Find the right place in the rbtree: | 692 | * Find the right place in the rbtree: |
| @@ -694,18 +698,19 @@ static void enqueue_hrtimer(struct hrtimer *timer, | |||
| 694 | * We dont care about collisions. Nodes with | 698 | * We dont care about collisions. Nodes with |
| 695 | * the same expiry time stay together. | 699 | * the same expiry time stay together. |
| 696 | */ | 700 | */ |
| 697 | if (timer->expires.tv64 < entry->expires.tv64) | 701 | if (timer->expires.tv64 < entry->expires.tv64) { |
| 698 | link = &(*link)->rb_left; | 702 | link = &(*link)->rb_left; |
| 699 | else | 703 | } else { |
| 700 | link = &(*link)->rb_right; | 704 | link = &(*link)->rb_right; |
| 705 | leftmost = 0; | ||
| 706 | } | ||
| 701 | } | 707 | } |
| 702 | 708 | ||
| 703 | /* | 709 | /* |
| 704 | * Insert the timer to the rbtree and check whether it | 710 | * Insert the timer to the rbtree and check whether it |
| 705 | * replaces the first pending timer | 711 | * replaces the first pending timer |
| 706 | */ | 712 | */ |
| 707 | if (!base->first || timer->expires.tv64 < | 713 | if (leftmost) { |
| 708 | rb_entry(base->first, struct hrtimer, node)->expires.tv64) { | ||
| 709 | /* | 714 | /* |
| 710 | * Reprogram the clock event device. When the timer is already | 715 | * Reprogram the clock event device. When the timer is already |
| 711 | * expired hrtimer_enqueue_reprogram has either called the | 716 | * expired hrtimer_enqueue_reprogram has either called the |
| @@ -1406,7 +1411,7 @@ static void migrate_hrtimers(int cpu) | |||
| 1406 | static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, | 1411 | static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, |
| 1407 | unsigned long action, void *hcpu) | 1412 | unsigned long action, void *hcpu) |
| 1408 | { | 1413 | { |
| 1409 | long cpu = (long)hcpu; | 1414 | unsigned int cpu = (long)hcpu; |
| 1410 | 1415 | ||
| 1411 | switch (action) { | 1416 | switch (action) { |
| 1412 | 1417 | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index b4f1674fca79..50b81b98046a 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
| @@ -19,7 +19,15 @@ static struct proc_dir_entry *root_irq_dir; | |||
| 19 | static int irq_affinity_read_proc(char *page, char **start, off_t off, | 19 | static int irq_affinity_read_proc(char *page, char **start, off_t off, |
| 20 | int count, int *eof, void *data) | 20 | int count, int *eof, void *data) |
| 21 | { | 21 | { |
| 22 | int len = cpumask_scnprintf(page, count, irq_desc[(long)data].affinity); | 22 | struct irq_desc *desc = irq_desc + (long)data; |
| 23 | cpumask_t *mask = &desc->affinity; | ||
| 24 | int len; | ||
| 25 | |||
| 26 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 27 | if (desc->status & IRQ_MOVE_PENDING) | ||
| 28 | mask = &desc->pending_mask; | ||
| 29 | #endif | ||
| 30 | len = cpumask_scnprintf(page, count, *mask); | ||
| 23 | 31 | ||
| 24 | if (count - len < 2) | 32 | if (count - len < 2) |
| 25 | return -EINVAL; | 33 | return -EINVAL; |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index bd9e272d55e9..32b161972fad 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
| @@ -172,7 +172,17 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
| 172 | irqreturn_t action_ret) | 172 | irqreturn_t action_ret) |
| 173 | { | 173 | { |
| 174 | if (unlikely(action_ret != IRQ_HANDLED)) { | 174 | if (unlikely(action_ret != IRQ_HANDLED)) { |
| 175 | desc->irqs_unhandled++; | 175 | /* |
| 176 | * If we are seeing only the odd spurious IRQ caused by | ||
| 177 | * bus asynchronicity then don't eventually trigger an error, | ||
| 178 | * otherwise the couter becomes a doomsday timer for otherwise | ||
| 179 | * working systems | ||
| 180 | */ | ||
| 181 | if (jiffies - desc->last_unhandled > HZ/10) | ||
| 182 | desc->irqs_unhandled = 1; | ||
| 183 | else | ||
| 184 | desc->irqs_unhandled++; | ||
| 185 | desc->last_unhandled = jiffies; | ||
| 176 | if (unlikely(action_ret != IRQ_NONE)) | 186 | if (unlikely(action_ret != IRQ_NONE)) |
| 177 | report_bad_irq(irq, desc, action_ret); | 187 | report_bad_irq(irq, desc, action_ret); |
| 178 | } | 188 | } |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fed54418626c..474219a41929 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
| @@ -152,7 +152,7 @@ static unsigned int get_symbol_offset(unsigned long pos) | |||
| 152 | /* Lookup the address for this symbol. Returns 0 if not found. */ | 152 | /* Lookup the address for this symbol. Returns 0 if not found. */ |
| 153 | unsigned long kallsyms_lookup_name(const char *name) | 153 | unsigned long kallsyms_lookup_name(const char *name) |
| 154 | { | 154 | { |
| 155 | char namebuf[KSYM_NAME_LEN+1]; | 155 | char namebuf[KSYM_NAME_LEN]; |
| 156 | unsigned long i; | 156 | unsigned long i; |
| 157 | unsigned int off; | 157 | unsigned int off; |
| 158 | 158 | ||
| @@ -248,7 +248,7 @@ const char *kallsyms_lookup(unsigned long addr, | |||
| 248 | { | 248 | { |
| 249 | const char *msym; | 249 | const char *msym; |
| 250 | 250 | ||
| 251 | namebuf[KSYM_NAME_LEN] = 0; | 251 | namebuf[KSYM_NAME_LEN - 1] = 0; |
| 252 | namebuf[0] = 0; | 252 | namebuf[0] = 0; |
| 253 | 253 | ||
| 254 | if (is_ksym_addr(addr)) { | 254 | if (is_ksym_addr(addr)) { |
| @@ -265,7 +265,7 @@ const char *kallsyms_lookup(unsigned long addr, | |||
| 265 | /* see if it's in a module */ | 265 | /* see if it's in a module */ |
| 266 | msym = module_address_lookup(addr, symbolsize, offset, modname); | 266 | msym = module_address_lookup(addr, symbolsize, offset, modname); |
| 267 | if (msym) | 267 | if (msym) |
| 268 | return strncpy(namebuf, msym, KSYM_NAME_LEN); | 268 | return strncpy(namebuf, msym, KSYM_NAME_LEN - 1); |
| 269 | 269 | ||
| 270 | return NULL; | 270 | return NULL; |
| 271 | } | 271 | } |
| @@ -273,7 +273,7 @@ const char *kallsyms_lookup(unsigned long addr, | |||
| 273 | int lookup_symbol_name(unsigned long addr, char *symname) | 273 | int lookup_symbol_name(unsigned long addr, char *symname) |
| 274 | { | 274 | { |
| 275 | symname[0] = '\0'; | 275 | symname[0] = '\0'; |
| 276 | symname[KSYM_NAME_LEN] = '\0'; | 276 | symname[KSYM_NAME_LEN - 1] = '\0'; |
| 277 | 277 | ||
| 278 | if (is_ksym_addr(addr)) { | 278 | if (is_ksym_addr(addr)) { |
| 279 | unsigned long pos; | 279 | unsigned long pos; |
| @@ -291,7 +291,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, | |||
| 291 | unsigned long *offset, char *modname, char *name) | 291 | unsigned long *offset, char *modname, char *name) |
| 292 | { | 292 | { |
| 293 | name[0] = '\0'; | 293 | name[0] = '\0'; |
| 294 | name[KSYM_NAME_LEN] = '\0'; | 294 | name[KSYM_NAME_LEN - 1] = '\0'; |
| 295 | 295 | ||
| 296 | if (is_ksym_addr(addr)) { | 296 | if (is_ksym_addr(addr)) { |
| 297 | unsigned long pos; | 297 | unsigned long pos; |
| @@ -312,18 +312,17 @@ int sprint_symbol(char *buffer, unsigned long address) | |||
| 312 | char *modname; | 312 | char *modname; |
| 313 | const char *name; | 313 | const char *name; |
| 314 | unsigned long offset, size; | 314 | unsigned long offset, size; |
| 315 | char namebuf[KSYM_NAME_LEN+1]; | 315 | char namebuf[KSYM_NAME_LEN]; |
| 316 | 316 | ||
| 317 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); | 317 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); |
| 318 | if (!name) | 318 | if (!name) |
| 319 | return sprintf(buffer, "0x%lx", address); | 319 | return sprintf(buffer, "0x%lx", address); |
| 320 | else { | 320 | |
| 321 | if (modname) | 321 | if (modname) |
| 322 | return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, | 322 | return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, |
| 323 | size, modname); | 323 | size, modname); |
| 324 | else | 324 | else |
| 325 | return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); | 325 | return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); |
| 326 | } | ||
| 327 | } | 326 | } |
| 328 | 327 | ||
| 329 | /* Look up a kernel symbol and print it to the kernel messages. */ | 328 | /* Look up a kernel symbol and print it to the kernel messages. */ |
| @@ -343,8 +342,8 @@ struct kallsym_iter | |||
| 343 | unsigned long value; | 342 | unsigned long value; |
| 344 | unsigned int nameoff; /* If iterating in core kernel symbols */ | 343 | unsigned int nameoff; /* If iterating in core kernel symbols */ |
| 345 | char type; | 344 | char type; |
| 346 | char name[KSYM_NAME_LEN+1]; | 345 | char name[KSYM_NAME_LEN]; |
| 347 | char module_name[MODULE_NAME_LEN + 1]; | 346 | char module_name[MODULE_NAME_LEN]; |
| 348 | int exported; | 347 | int exported; |
| 349 | }; | 348 | }; |
| 350 | 349 | ||
diff --git a/kernel/kfifo.c b/kernel/kfifo.c index cee419143fd4..bc41ad0f24f8 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
| 25 | #include <linux/err.h> | 25 | #include <linux/err.h> |
| 26 | #include <linux/kfifo.h> | 26 | #include <linux/kfifo.h> |
| 27 | #include <linux/log2.h> | ||
| 27 | 28 | ||
| 28 | /** | 29 | /** |
| 29 | * kfifo_init - allocates a new FIFO using a preallocated buffer | 30 | * kfifo_init - allocates a new FIFO using a preallocated buffer |
| @@ -41,7 +42,7 @@ struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, | |||
| 41 | struct kfifo *fifo; | 42 | struct kfifo *fifo; |
| 42 | 43 | ||
| 43 | /* size must be a power of 2 */ | 44 | /* size must be a power of 2 */ |
| 44 | BUG_ON(size & (size - 1)); | 45 | BUG_ON(!is_power_of_2(size)); |
| 45 | 46 | ||
| 46 | fifo = kmalloc(sizeof(struct kfifo), gfp_mask); | 47 | fifo = kmalloc(sizeof(struct kfifo), gfp_mask); |
| 47 | if (!fifo) | 48 | if (!fifo) |
diff --git a/kernel/kmod.c b/kernel/kmod.c index 4d32eb077179..beedbdc64608 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
| @@ -33,6 +33,8 @@ | |||
| 33 | #include <linux/kernel.h> | 33 | #include <linux/kernel.h> |
| 34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
| 35 | #include <linux/resource.h> | 35 | #include <linux/resource.h> |
| 36 | #include <linux/notifier.h> | ||
| 37 | #include <linux/suspend.h> | ||
| 36 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
| 37 | 39 | ||
| 38 | extern int max_threads; | 40 | extern int max_threads; |
| @@ -119,9 +121,10 @@ struct subprocess_info { | |||
| 119 | char **argv; | 121 | char **argv; |
| 120 | char **envp; | 122 | char **envp; |
| 121 | struct key *ring; | 123 | struct key *ring; |
| 122 | int wait; | 124 | enum umh_wait wait; |
| 123 | int retval; | 125 | int retval; |
| 124 | struct file *stdin; | 126 | struct file *stdin; |
| 127 | void (*cleanup)(char **argv, char **envp); | ||
| 125 | }; | 128 | }; |
| 126 | 129 | ||
| 127 | /* | 130 | /* |
| @@ -180,6 +183,14 @@ static int ____call_usermodehelper(void *data) | |||
| 180 | do_exit(0); | 183 | do_exit(0); |
| 181 | } | 184 | } |
| 182 | 185 | ||
| 186 | void call_usermodehelper_freeinfo(struct subprocess_info *info) | ||
| 187 | { | ||
| 188 | if (info->cleanup) | ||
| 189 | (*info->cleanup)(info->argv, info->envp); | ||
| 190 | kfree(info); | ||
| 191 | } | ||
| 192 | EXPORT_SYMBOL(call_usermodehelper_freeinfo); | ||
| 193 | |||
| 183 | /* Keventd can't block, but this (a child) can. */ | 194 | /* Keventd can't block, but this (a child) can. */ |
| 184 | static int wait_for_helper(void *data) | 195 | static int wait_for_helper(void *data) |
| 185 | { | 196 | { |
| @@ -216,8 +227,8 @@ static int wait_for_helper(void *data) | |||
| 216 | sub_info->retval = ret; | 227 | sub_info->retval = ret; |
| 217 | } | 228 | } |
| 218 | 229 | ||
| 219 | if (sub_info->wait < 0) | 230 | if (sub_info->wait == UMH_NO_WAIT) |
| 220 | kfree(sub_info); | 231 | call_usermodehelper_freeinfo(sub_info); |
| 221 | else | 232 | else |
| 222 | complete(sub_info->complete); | 233 | complete(sub_info->complete); |
| 223 | return 0; | 234 | return 0; |
| @@ -229,34 +240,204 @@ static void __call_usermodehelper(struct work_struct *work) | |||
| 229 | struct subprocess_info *sub_info = | 240 | struct subprocess_info *sub_info = |
| 230 | container_of(work, struct subprocess_info, work); | 241 | container_of(work, struct subprocess_info, work); |
| 231 | pid_t pid; | 242 | pid_t pid; |
| 232 | int wait = sub_info->wait; | 243 | enum umh_wait wait = sub_info->wait; |
| 233 | 244 | ||
| 234 | /* CLONE_VFORK: wait until the usermode helper has execve'd | 245 | /* CLONE_VFORK: wait until the usermode helper has execve'd |
| 235 | * successfully We need the data structures to stay around | 246 | * successfully We need the data structures to stay around |
| 236 | * until that is done. */ | 247 | * until that is done. */ |
| 237 | if (wait) | 248 | if (wait == UMH_WAIT_PROC || wait == UMH_NO_WAIT) |
| 238 | pid = kernel_thread(wait_for_helper, sub_info, | 249 | pid = kernel_thread(wait_for_helper, sub_info, |
| 239 | CLONE_FS | CLONE_FILES | SIGCHLD); | 250 | CLONE_FS | CLONE_FILES | SIGCHLD); |
| 240 | else | 251 | else |
| 241 | pid = kernel_thread(____call_usermodehelper, sub_info, | 252 | pid = kernel_thread(____call_usermodehelper, sub_info, |
| 242 | CLONE_VFORK | SIGCHLD); | 253 | CLONE_VFORK | SIGCHLD); |
| 243 | 254 | ||
| 244 | if (wait < 0) | 255 | switch (wait) { |
| 245 | return; | 256 | case UMH_NO_WAIT: |
| 257 | break; | ||
| 246 | 258 | ||
| 247 | if (pid < 0) { | 259 | case UMH_WAIT_PROC: |
| 260 | if (pid > 0) | ||
| 261 | break; | ||
| 248 | sub_info->retval = pid; | 262 | sub_info->retval = pid; |
| 263 | /* FALLTHROUGH */ | ||
| 264 | |||
| 265 | case UMH_WAIT_EXEC: | ||
| 249 | complete(sub_info->complete); | 266 | complete(sub_info->complete); |
| 250 | } else if (!wait) | 267 | } |
| 251 | complete(sub_info->complete); | 268 | } |
| 269 | |||
| 270 | #ifdef CONFIG_PM | ||
| 271 | /* | ||
| 272 | * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY | ||
| 273 | * (used for preventing user land processes from being created after the user | ||
| 274 | * land has been frozen during a system-wide hibernation or suspend operation). | ||
| 275 | */ | ||
| 276 | static int usermodehelper_disabled; | ||
| 277 | |||
| 278 | /* Number of helpers running */ | ||
| 279 | static atomic_t running_helpers = ATOMIC_INIT(0); | ||
| 280 | |||
| 281 | /* | ||
| 282 | * Wait queue head used by usermodehelper_pm_callback() to wait for all running | ||
| 283 | * helpers to finish. | ||
| 284 | */ | ||
| 285 | static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); | ||
| 286 | |||
| 287 | /* | ||
| 288 | * Time to wait for running_helpers to become zero before the setting of | ||
| 289 | * usermodehelper_disabled in usermodehelper_pm_callback() fails | ||
| 290 | */ | ||
| 291 | #define RUNNING_HELPERS_TIMEOUT (5 * HZ) | ||
| 292 | |||
| 293 | static int usermodehelper_pm_callback(struct notifier_block *nfb, | ||
| 294 | unsigned long action, | ||
| 295 | void *ignored) | ||
| 296 | { | ||
| 297 | long retval; | ||
| 298 | |||
| 299 | switch (action) { | ||
| 300 | case PM_HIBERNATION_PREPARE: | ||
| 301 | case PM_SUSPEND_PREPARE: | ||
| 302 | usermodehelper_disabled = 1; | ||
| 303 | smp_mb(); | ||
| 304 | /* | ||
| 305 | * From now on call_usermodehelper_exec() won't start any new | ||
| 306 | * helpers, so it is sufficient if running_helpers turns out to | ||
| 307 | * be zero at one point (it may be increased later, but that | ||
| 308 | * doesn't matter). | ||
| 309 | */ | ||
| 310 | retval = wait_event_timeout(running_helpers_waitq, | ||
| 311 | atomic_read(&running_helpers) == 0, | ||
| 312 | RUNNING_HELPERS_TIMEOUT); | ||
| 313 | if (retval) { | ||
| 314 | return NOTIFY_OK; | ||
| 315 | } else { | ||
| 316 | usermodehelper_disabled = 0; | ||
| 317 | return NOTIFY_BAD; | ||
| 318 | } | ||
| 319 | case PM_POST_HIBERNATION: | ||
| 320 | case PM_POST_SUSPEND: | ||
| 321 | usermodehelper_disabled = 0; | ||
| 322 | return NOTIFY_OK; | ||
| 323 | } | ||
| 324 | |||
| 325 | return NOTIFY_DONE; | ||
| 326 | } | ||
| 327 | |||
| 328 | static void helper_lock(void) | ||
| 329 | { | ||
| 330 | atomic_inc(&running_helpers); | ||
| 331 | smp_mb__after_atomic_inc(); | ||
| 332 | } | ||
| 333 | |||
| 334 | static void helper_unlock(void) | ||
| 335 | { | ||
| 336 | if (atomic_dec_and_test(&running_helpers)) | ||
| 337 | wake_up(&running_helpers_waitq); | ||
| 338 | } | ||
| 339 | |||
| 340 | static void register_pm_notifier_callback(void) | ||
| 341 | { | ||
| 342 | pm_notifier(usermodehelper_pm_callback, 0); | ||
| 252 | } | 343 | } |
| 344 | #else /* CONFIG_PM */ | ||
| 345 | #define usermodehelper_disabled 0 | ||
| 346 | |||
| 347 | static inline void helper_lock(void) {} | ||
| 348 | static inline void helper_unlock(void) {} | ||
| 349 | static inline void register_pm_notifier_callback(void) {} | ||
| 350 | #endif /* CONFIG_PM */ | ||
| 253 | 351 | ||
| 254 | /** | 352 | /** |
| 255 | * call_usermodehelper_keys - start a usermode application | 353 | * call_usermodehelper_setup - prepare to call a usermode helper |
| 256 | * @path: pathname for the application | 354 | * @path - path to usermode executable |
| 257 | * @argv: null-terminated argument list | 355 | * @argv - arg vector for process |
| 258 | * @envp: null-terminated environment list | 356 | * @envp - environment for process |
| 259 | * @session_keyring: session keyring for process (NULL for an empty keyring) | 357 | * |
| 358 | * Returns either NULL on allocation failure, or a subprocess_info | ||
| 359 | * structure. This should be passed to call_usermodehelper_exec to | ||
| 360 | * exec the process and free the structure. | ||
| 361 | */ | ||
| 362 | struct subprocess_info *call_usermodehelper_setup(char *path, | ||
| 363 | char **argv, char **envp) | ||
| 364 | { | ||
| 365 | struct subprocess_info *sub_info; | ||
| 366 | sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); | ||
| 367 | if (!sub_info) | ||
| 368 | goto out; | ||
| 369 | |||
| 370 | INIT_WORK(&sub_info->work, __call_usermodehelper); | ||
| 371 | sub_info->path = path; | ||
| 372 | sub_info->argv = argv; | ||
| 373 | sub_info->envp = envp; | ||
| 374 | |||
| 375 | out: | ||
| 376 | return sub_info; | ||
| 377 | } | ||
| 378 | EXPORT_SYMBOL(call_usermodehelper_setup); | ||
| 379 | |||
| 380 | /** | ||
| 381 | * call_usermodehelper_setkeys - set the session keys for usermode helper | ||
| 382 | * @info: a subprocess_info returned by call_usermodehelper_setup | ||
| 383 | * @session_keyring: the session keyring for the process | ||
| 384 | */ | ||
| 385 | void call_usermodehelper_setkeys(struct subprocess_info *info, | ||
| 386 | struct key *session_keyring) | ||
| 387 | { | ||
| 388 | info->ring = session_keyring; | ||
| 389 | } | ||
| 390 | EXPORT_SYMBOL(call_usermodehelper_setkeys); | ||
| 391 | |||
| 392 | /** | ||
| 393 | * call_usermodehelper_setcleanup - set a cleanup function | ||
| 394 | * @info: a subprocess_info returned by call_usermodehelper_setup | ||
| 395 | * @cleanup: a cleanup function | ||
| 396 | * | ||
| 397 | * The cleanup function is just befor ethe subprocess_info is about to | ||
| 398 | * be freed. This can be used for freeing the argv and envp. The | ||
| 399 | * Function must be runnable in either a process context or the | ||
| 400 | * context in which call_usermodehelper_exec is called. | ||
| 401 | */ | ||
| 402 | void call_usermodehelper_setcleanup(struct subprocess_info *info, | ||
| 403 | void (*cleanup)(char **argv, char **envp)) | ||
| 404 | { | ||
| 405 | info->cleanup = cleanup; | ||
| 406 | } | ||
| 407 | EXPORT_SYMBOL(call_usermodehelper_setcleanup); | ||
| 408 | |||
| 409 | /** | ||
| 410 | * call_usermodehelper_stdinpipe - set up a pipe to be used for stdin | ||
| 411 | * @sub_info: a subprocess_info returned by call_usermodehelper_setup | ||
| 412 | * @filp: set to the write-end of a pipe | ||
| 413 | * | ||
| 414 | * This constructs a pipe, and sets the read end to be the stdin of the | ||
| 415 | * subprocess, and returns the write-end in *@filp. | ||
| 416 | */ | ||
| 417 | int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info, | ||
| 418 | struct file **filp) | ||
| 419 | { | ||
| 420 | struct file *f; | ||
| 421 | |||
| 422 | f = create_write_pipe(); | ||
| 423 | if (IS_ERR(f)) | ||
| 424 | return PTR_ERR(f); | ||
| 425 | *filp = f; | ||
| 426 | |||
| 427 | f = create_read_pipe(f); | ||
| 428 | if (IS_ERR(f)) { | ||
| 429 | free_write_pipe(*filp); | ||
| 430 | return PTR_ERR(f); | ||
| 431 | } | ||
| 432 | sub_info->stdin = f; | ||
| 433 | |||
| 434 | return 0; | ||
| 435 | } | ||
| 436 | EXPORT_SYMBOL(call_usermodehelper_stdinpipe); | ||
| 437 | |||
| 438 | /** | ||
| 439 | * call_usermodehelper_exec - start a usermode application | ||
| 440 | * @sub_info: information about the subprocessa | ||
| 260 | * @wait: wait for the application to finish and return status. | 441 | * @wait: wait for the application to finish and return status. |
| 261 | * when -1 don't wait at all, but you get no useful error back when | 442 | * when -1 don't wait at all, but you get no useful error back when |
| 262 | * the program couldn't be exec'ed. This makes it safe to call | 443 | * the program couldn't be exec'ed. This makes it safe to call |
| @@ -265,81 +446,70 @@ static void __call_usermodehelper(struct work_struct *work) | |||
| 265 | * Runs a user-space application. The application is started | 446 | * Runs a user-space application. The application is started |
| 266 | * asynchronously if wait is not set, and runs as a child of keventd. | 447 | * asynchronously if wait is not set, and runs as a child of keventd. |
| 267 | * (ie. it runs with full root capabilities). | 448 | * (ie. it runs with full root capabilities). |
| 268 | * | ||
| 269 | * Must be called from process context. Returns a negative error code | ||
| 270 | * if program was not execed successfully, or 0. | ||
| 271 | */ | 449 | */ |
| 272 | int call_usermodehelper_keys(char *path, char **argv, char **envp, | 450 | int call_usermodehelper_exec(struct subprocess_info *sub_info, |
| 273 | struct key *session_keyring, int wait) | 451 | enum umh_wait wait) |
| 274 | { | 452 | { |
| 275 | DECLARE_COMPLETION_ONSTACK(done); | 453 | DECLARE_COMPLETION_ONSTACK(done); |
| 276 | struct subprocess_info *sub_info; | ||
| 277 | int retval; | 454 | int retval; |
| 278 | 455 | ||
| 279 | if (!khelper_wq) | 456 | helper_lock(); |
| 280 | return -EBUSY; | 457 | if (sub_info->path[0] == '\0') { |
| 281 | 458 | retval = 0; | |
| 282 | if (path[0] == '\0') | 459 | goto out; |
| 283 | return 0; | 460 | } |
| 284 | 461 | ||
| 285 | sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); | 462 | if (!khelper_wq || usermodehelper_disabled) { |
| 286 | if (!sub_info) | 463 | retval = -EBUSY; |
| 287 | return -ENOMEM; | 464 | goto out; |
| 465 | } | ||
| 288 | 466 | ||
| 289 | INIT_WORK(&sub_info->work, __call_usermodehelper); | ||
| 290 | sub_info->complete = &done; | 467 | sub_info->complete = &done; |
| 291 | sub_info->path = path; | ||
| 292 | sub_info->argv = argv; | ||
| 293 | sub_info->envp = envp; | ||
| 294 | sub_info->ring = session_keyring; | ||
| 295 | sub_info->wait = wait; | 468 | sub_info->wait = wait; |
| 296 | 469 | ||
| 297 | queue_work(khelper_wq, &sub_info->work); | 470 | queue_work(khelper_wq, &sub_info->work); |
| 298 | if (wait < 0) /* task has freed sub_info */ | 471 | if (wait == UMH_NO_WAIT) /* task has freed sub_info */ |
| 299 | return 0; | 472 | return 0; |
| 300 | wait_for_completion(&done); | 473 | wait_for_completion(&done); |
| 301 | retval = sub_info->retval; | 474 | retval = sub_info->retval; |
| 302 | kfree(sub_info); | 475 | |
| 476 | out: | ||
| 477 | call_usermodehelper_freeinfo(sub_info); | ||
| 478 | helper_unlock(); | ||
| 303 | return retval; | 479 | return retval; |
| 304 | } | 480 | } |
| 305 | EXPORT_SYMBOL(call_usermodehelper_keys); | 481 | EXPORT_SYMBOL(call_usermodehelper_exec); |
| 306 | 482 | ||
| 483 | /** | ||
| 484 | * call_usermodehelper_pipe - call a usermode helper process with a pipe stdin | ||
| 485 | * @path: path to usermode executable | ||
| 486 | * @argv: arg vector for process | ||
| 487 | * @envp: environment for process | ||
| 488 | * @filp: set to the write-end of a pipe | ||
| 489 | * | ||
| 490 | * This is a simple wrapper which executes a usermode-helper function | ||
| 491 | * with a pipe as stdin. It is implemented entirely in terms of | ||
| 492 | * lower-level call_usermodehelper_* functions. | ||
| 493 | */ | ||
| 307 | int call_usermodehelper_pipe(char *path, char **argv, char **envp, | 494 | int call_usermodehelper_pipe(char *path, char **argv, char **envp, |
| 308 | struct file **filp) | 495 | struct file **filp) |
| 309 | { | 496 | { |
| 310 | DECLARE_COMPLETION(done); | 497 | struct subprocess_info *sub_info; |
| 311 | struct subprocess_info sub_info = { | 498 | int ret; |
| 312 | .work = __WORK_INITIALIZER(sub_info.work, | ||
| 313 | __call_usermodehelper), | ||
| 314 | .complete = &done, | ||
| 315 | .path = path, | ||
| 316 | .argv = argv, | ||
| 317 | .envp = envp, | ||
| 318 | .retval = 0, | ||
| 319 | }; | ||
| 320 | struct file *f; | ||
| 321 | |||
| 322 | if (!khelper_wq) | ||
| 323 | return -EBUSY; | ||
| 324 | 499 | ||
| 325 | if (path[0] == '\0') | 500 | sub_info = call_usermodehelper_setup(path, argv, envp); |
| 326 | return 0; | 501 | if (sub_info == NULL) |
| 502 | return -ENOMEM; | ||
| 327 | 503 | ||
| 328 | f = create_write_pipe(); | 504 | ret = call_usermodehelper_stdinpipe(sub_info, filp); |
| 329 | if (IS_ERR(f)) | 505 | if (ret < 0) |
| 330 | return PTR_ERR(f); | 506 | goto out; |
| 331 | *filp = f; | ||
| 332 | 507 | ||
| 333 | f = create_read_pipe(f); | 508 | return call_usermodehelper_exec(sub_info, 1); |
| 334 | if (IS_ERR(f)) { | ||
| 335 | free_write_pipe(*filp); | ||
| 336 | return PTR_ERR(f); | ||
| 337 | } | ||
| 338 | sub_info.stdin = f; | ||
| 339 | 509 | ||
| 340 | queue_work(khelper_wq, &sub_info.work); | 510 | out: |
| 341 | wait_for_completion(&done); | 511 | call_usermodehelper_freeinfo(sub_info); |
| 342 | return sub_info.retval; | 512 | return ret; |
| 343 | } | 513 | } |
| 344 | EXPORT_SYMBOL(call_usermodehelper_pipe); | 514 | EXPORT_SYMBOL(call_usermodehelper_pipe); |
| 345 | 515 | ||
| @@ -347,4 +517,5 @@ void __init usermodehelper_init(void) | |||
| 347 | { | 517 | { |
| 348 | khelper_wq = create_singlethread_workqueue("khelper"); | 518 | khelper_wq = create_singlethread_workqueue("khelper"); |
| 349 | BUG_ON(!khelper_wq); | 519 | BUG_ON(!khelper_wq); |
| 520 | register_pm_notifier_callback(); | ||
| 350 | } | 521 | } |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9e47d8c493f3..3e9f513a728d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
| @@ -675,9 +675,18 @@ static struct notifier_block kprobe_exceptions_nb = { | |||
| 675 | .priority = 0x7fffffff /* we need to be notified first */ | 675 | .priority = 0x7fffffff /* we need to be notified first */ |
| 676 | }; | 676 | }; |
| 677 | 677 | ||
| 678 | unsigned long __weak arch_deref_entry_point(void *entry) | ||
| 679 | { | ||
| 680 | return (unsigned long)entry; | ||
| 681 | } | ||
| 678 | 682 | ||
| 679 | int __kprobes register_jprobe(struct jprobe *jp) | 683 | int __kprobes register_jprobe(struct jprobe *jp) |
| 680 | { | 684 | { |
| 685 | unsigned long addr = arch_deref_entry_point(jp->entry); | ||
| 686 | |||
| 687 | if (!kernel_text_address(addr)) | ||
| 688 | return -EINVAL; | ||
| 689 | |||
| 681 | /* Todo: Verify probepoint is a function entry point */ | 690 | /* Todo: Verify probepoint is a function entry point */ |
| 682 | jp->kp.pre_handler = setjmp_pre_handler; | 691 | jp->kp.pre_handler = setjmp_pre_handler; |
| 683 | jp->kp.break_handler = longjmp_break_handler; | 692 | jp->kp.break_handler = longjmp_break_handler; |
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 559deca5ed15..d0e5c48e18c7 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c | |||
| @@ -62,6 +62,28 @@ static ssize_t kexec_crash_loaded_show(struct kset *kset, char *page) | |||
| 62 | KERNEL_ATTR_RO(kexec_crash_loaded); | 62 | KERNEL_ATTR_RO(kexec_crash_loaded); |
| 63 | #endif /* CONFIG_KEXEC */ | 63 | #endif /* CONFIG_KEXEC */ |
| 64 | 64 | ||
| 65 | /* | ||
| 66 | * Make /sys/kernel/notes give the raw contents of our kernel .notes section. | ||
| 67 | */ | ||
| 68 | extern const void __start_notes __attribute__((weak)); | ||
| 69 | extern const void __stop_notes __attribute__((weak)); | ||
| 70 | #define notes_size (&__stop_notes - &__start_notes) | ||
| 71 | |||
| 72 | static ssize_t notes_read(struct kobject *kobj, struct bin_attribute *bin_attr, | ||
| 73 | char *buf, loff_t off, size_t count) | ||
| 74 | { | ||
| 75 | memcpy(buf, &__start_notes + off, count); | ||
| 76 | return count; | ||
| 77 | } | ||
| 78 | |||
| 79 | static struct bin_attribute notes_attr = { | ||
| 80 | .attr = { | ||
| 81 | .name = "notes", | ||
| 82 | .mode = S_IRUGO, | ||
| 83 | }, | ||
| 84 | .read = ¬es_read, | ||
| 85 | }; | ||
| 86 | |||
| 65 | decl_subsys(kernel, NULL, NULL); | 87 | decl_subsys(kernel, NULL, NULL); |
| 66 | EXPORT_SYMBOL_GPL(kernel_subsys); | 88 | EXPORT_SYMBOL_GPL(kernel_subsys); |
| 67 | 89 | ||
| @@ -88,6 +110,12 @@ static int __init ksysfs_init(void) | |||
| 88 | error = sysfs_create_group(&kernel_subsys.kobj, | 110 | error = sysfs_create_group(&kernel_subsys.kobj, |
| 89 | &kernel_attr_group); | 111 | &kernel_attr_group); |
| 90 | 112 | ||
| 113 | if (!error && notes_size > 0) { | ||
| 114 | notes_attr.size = notes_size; | ||
| 115 | error = sysfs_create_bin_file(&kernel_subsys.kobj, | ||
| 116 | ¬es_attr); | ||
| 117 | } | ||
| 118 | |||
| 91 | return error; | 119 | return error; |
| 92 | } | 120 | } |
| 93 | 121 | ||
diff --git a/kernel/kthread.c b/kernel/kthread.c index bbd51b81a3e8..a404f7ee7395 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
| @@ -215,7 +215,7 @@ int kthread_stop(struct task_struct *k) | |||
| 215 | EXPORT_SYMBOL(kthread_stop); | 215 | EXPORT_SYMBOL(kthread_stop); |
| 216 | 216 | ||
| 217 | 217 | ||
| 218 | static __init void kthreadd_setup(void) | 218 | static noinline __init_refok void kthreadd_setup(void) |
| 219 | { | 219 | { |
| 220 | struct task_struct *tsk = current; | 220 | struct task_struct *tsk = current; |
| 221 | 221 | ||
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 1a5ff2211d88..734da579ad13 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
| @@ -5,7 +5,8 @@ | |||
| 5 | * | 5 | * |
| 6 | * Started by Ingo Molnar: | 6 | * Started by Ingo Molnar: |
| 7 | * | 7 | * |
| 8 | * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | 8 | * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> |
| 9 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
| 9 | * | 10 | * |
| 10 | * this code maps all the lock dependencies as they occur in a live kernel | 11 | * this code maps all the lock dependencies as they occur in a live kernel |
| 11 | * and will warn about the following classes of locking bugs: | 12 | * and will warn about the following classes of locking bugs: |
| @@ -37,11 +38,26 @@ | |||
| 37 | #include <linux/debug_locks.h> | 38 | #include <linux/debug_locks.h> |
| 38 | #include <linux/irqflags.h> | 39 | #include <linux/irqflags.h> |
| 39 | #include <linux/utsname.h> | 40 | #include <linux/utsname.h> |
| 41 | #include <linux/hash.h> | ||
| 40 | 42 | ||
| 41 | #include <asm/sections.h> | 43 | #include <asm/sections.h> |
| 42 | 44 | ||
| 43 | #include "lockdep_internals.h" | 45 | #include "lockdep_internals.h" |
| 44 | 46 | ||
| 47 | #ifdef CONFIG_PROVE_LOCKING | ||
| 48 | int prove_locking = 1; | ||
| 49 | module_param(prove_locking, int, 0644); | ||
| 50 | #else | ||
| 51 | #define prove_locking 0 | ||
| 52 | #endif | ||
| 53 | |||
| 54 | #ifdef CONFIG_LOCK_STAT | ||
| 55 | int lock_stat = 1; | ||
| 56 | module_param(lock_stat, int, 0644); | ||
| 57 | #else | ||
| 58 | #define lock_stat 0 | ||
| 59 | #endif | ||
| 60 | |||
| 45 | /* | 61 | /* |
| 46 | * lockdep_lock: protects the lockdep graph, the hashes and the | 62 | * lockdep_lock: protects the lockdep graph, the hashes and the |
| 47 | * class/list/hash allocators. | 63 | * class/list/hash allocators. |
| @@ -96,23 +112,6 @@ unsigned long nr_list_entries; | |||
| 96 | static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; | 112 | static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; |
| 97 | 113 | ||
| 98 | /* | 114 | /* |
| 99 | * Allocate a lockdep entry. (assumes the graph_lock held, returns | ||
| 100 | * with NULL on failure) | ||
| 101 | */ | ||
| 102 | static struct lock_list *alloc_list_entry(void) | ||
| 103 | { | ||
| 104 | if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { | ||
| 105 | if (!debug_locks_off_graph_unlock()) | ||
| 106 | return NULL; | ||
| 107 | |||
| 108 | printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); | ||
| 109 | printk("turning off the locking correctness validator.\n"); | ||
| 110 | return NULL; | ||
| 111 | } | ||
| 112 | return list_entries + nr_list_entries++; | ||
| 113 | } | ||
| 114 | |||
| 115 | /* | ||
| 116 | * All data structures here are protected by the global debug_lock. | 115 | * All data structures here are protected by the global debug_lock. |
| 117 | * | 116 | * |
| 118 | * Mutex key structs only get allocated, once during bootup, and never | 117 | * Mutex key structs only get allocated, once during bootup, and never |
| @@ -121,6 +120,117 @@ static struct lock_list *alloc_list_entry(void) | |||
| 121 | unsigned long nr_lock_classes; | 120 | unsigned long nr_lock_classes; |
| 122 | static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; | 121 | static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; |
| 123 | 122 | ||
| 123 | #ifdef CONFIG_LOCK_STAT | ||
| 124 | static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); | ||
| 125 | |||
| 126 | static int lock_contention_point(struct lock_class *class, unsigned long ip) | ||
| 127 | { | ||
| 128 | int i; | ||
| 129 | |||
| 130 | for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { | ||
| 131 | if (class->contention_point[i] == 0) { | ||
| 132 | class->contention_point[i] = ip; | ||
| 133 | break; | ||
| 134 | } | ||
| 135 | if (class->contention_point[i] == ip) | ||
| 136 | break; | ||
| 137 | } | ||
| 138 | |||
| 139 | return i; | ||
| 140 | } | ||
| 141 | |||
| 142 | static void lock_time_inc(struct lock_time *lt, s64 time) | ||
| 143 | { | ||
| 144 | if (time > lt->max) | ||
| 145 | lt->max = time; | ||
| 146 | |||
| 147 | if (time < lt->min || !lt->min) | ||
| 148 | lt->min = time; | ||
| 149 | |||
| 150 | lt->total += time; | ||
| 151 | lt->nr++; | ||
| 152 | } | ||
| 153 | |||
| 154 | static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) | ||
| 155 | { | ||
| 156 | dst->min += src->min; | ||
| 157 | dst->max += src->max; | ||
| 158 | dst->total += src->total; | ||
| 159 | dst->nr += src->nr; | ||
| 160 | } | ||
| 161 | |||
| 162 | struct lock_class_stats lock_stats(struct lock_class *class) | ||
| 163 | { | ||
| 164 | struct lock_class_stats stats; | ||
| 165 | int cpu, i; | ||
| 166 | |||
| 167 | memset(&stats, 0, sizeof(struct lock_class_stats)); | ||
| 168 | for_each_possible_cpu(cpu) { | ||
| 169 | struct lock_class_stats *pcs = | ||
| 170 | &per_cpu(lock_stats, cpu)[class - lock_classes]; | ||
| 171 | |||
| 172 | for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) | ||
| 173 | stats.contention_point[i] += pcs->contention_point[i]; | ||
| 174 | |||
| 175 | lock_time_add(&pcs->read_waittime, &stats.read_waittime); | ||
| 176 | lock_time_add(&pcs->write_waittime, &stats.write_waittime); | ||
| 177 | |||
| 178 | lock_time_add(&pcs->read_holdtime, &stats.read_holdtime); | ||
| 179 | lock_time_add(&pcs->write_holdtime, &stats.write_holdtime); | ||
| 180 | |||
| 181 | for (i = 0; i < ARRAY_SIZE(stats.bounces); i++) | ||
| 182 | stats.bounces[i] += pcs->bounces[i]; | ||
| 183 | } | ||
| 184 | |||
| 185 | return stats; | ||
| 186 | } | ||
| 187 | |||
| 188 | void clear_lock_stats(struct lock_class *class) | ||
| 189 | { | ||
| 190 | int cpu; | ||
| 191 | |||
| 192 | for_each_possible_cpu(cpu) { | ||
| 193 | struct lock_class_stats *cpu_stats = | ||
| 194 | &per_cpu(lock_stats, cpu)[class - lock_classes]; | ||
| 195 | |||
| 196 | memset(cpu_stats, 0, sizeof(struct lock_class_stats)); | ||
| 197 | } | ||
| 198 | memset(class->contention_point, 0, sizeof(class->contention_point)); | ||
| 199 | } | ||
| 200 | |||
| 201 | static struct lock_class_stats *get_lock_stats(struct lock_class *class) | ||
| 202 | { | ||
| 203 | return &get_cpu_var(lock_stats)[class - lock_classes]; | ||
| 204 | } | ||
| 205 | |||
| 206 | static void put_lock_stats(struct lock_class_stats *stats) | ||
| 207 | { | ||
| 208 | put_cpu_var(lock_stats); | ||
| 209 | } | ||
| 210 | |||
| 211 | static void lock_release_holdtime(struct held_lock *hlock) | ||
| 212 | { | ||
| 213 | struct lock_class_stats *stats; | ||
| 214 | s64 holdtime; | ||
| 215 | |||
| 216 | if (!lock_stat) | ||
| 217 | return; | ||
| 218 | |||
| 219 | holdtime = sched_clock() - hlock->holdtime_stamp; | ||
| 220 | |||
| 221 | stats = get_lock_stats(hlock->class); | ||
| 222 | if (hlock->read) | ||
| 223 | lock_time_inc(&stats->read_holdtime, holdtime); | ||
| 224 | else | ||
| 225 | lock_time_inc(&stats->write_holdtime, holdtime); | ||
| 226 | put_lock_stats(stats); | ||
| 227 | } | ||
| 228 | #else | ||
| 229 | static inline void lock_release_holdtime(struct held_lock *hlock) | ||
| 230 | { | ||
| 231 | } | ||
| 232 | #endif | ||
| 233 | |||
| 124 | /* | 234 | /* |
| 125 | * We keep a global list of all lock classes. The list only grows, | 235 | * We keep a global list of all lock classes. The list only grows, |
| 126 | * never shrinks. The list is only accessed with the lockdep | 236 | * never shrinks. The list is only accessed with the lockdep |
| @@ -133,24 +243,18 @@ LIST_HEAD(all_lock_classes); | |||
| 133 | */ | 243 | */ |
| 134 | #define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1) | 244 | #define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1) |
| 135 | #define CLASSHASH_SIZE (1UL << CLASSHASH_BITS) | 245 | #define CLASSHASH_SIZE (1UL << CLASSHASH_BITS) |
| 136 | #define CLASSHASH_MASK (CLASSHASH_SIZE - 1) | 246 | #define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS) |
| 137 | #define __classhashfn(key) ((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK) | ||
| 138 | #define classhashentry(key) (classhash_table + __classhashfn((key))) | 247 | #define classhashentry(key) (classhash_table + __classhashfn((key))) |
| 139 | 248 | ||
| 140 | static struct list_head classhash_table[CLASSHASH_SIZE]; | 249 | static struct list_head classhash_table[CLASSHASH_SIZE]; |
| 141 | 250 | ||
| 142 | unsigned long nr_lock_chains; | ||
| 143 | static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; | ||
| 144 | |||
| 145 | /* | 251 | /* |
| 146 | * We put the lock dependency chains into a hash-table as well, to cache | 252 | * We put the lock dependency chains into a hash-table as well, to cache |
| 147 | * their existence: | 253 | * their existence: |
| 148 | */ | 254 | */ |
| 149 | #define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1) | 255 | #define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1) |
| 150 | #define CHAINHASH_SIZE (1UL << CHAINHASH_BITS) | 256 | #define CHAINHASH_SIZE (1UL << CHAINHASH_BITS) |
| 151 | #define CHAINHASH_MASK (CHAINHASH_SIZE - 1) | 257 | #define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS) |
| 152 | #define __chainhashfn(chain) \ | ||
| 153 | (((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK) | ||
| 154 | #define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) | 258 | #define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) |
| 155 | 259 | ||
| 156 | static struct list_head chainhash_table[CHAINHASH_SIZE]; | 260 | static struct list_head chainhash_table[CHAINHASH_SIZE]; |
| @@ -223,26 +327,6 @@ static int verbose(struct lock_class *class) | |||
| 223 | return 0; | 327 | return 0; |
| 224 | } | 328 | } |
| 225 | 329 | ||
| 226 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
| 227 | |||
| 228 | static int hardirq_verbose(struct lock_class *class) | ||
| 229 | { | ||
| 230 | #if HARDIRQ_VERBOSE | ||
| 231 | return class_filter(class); | ||
| 232 | #endif | ||
| 233 | return 0; | ||
| 234 | } | ||
| 235 | |||
| 236 | static int softirq_verbose(struct lock_class *class) | ||
| 237 | { | ||
| 238 | #if SOFTIRQ_VERBOSE | ||
| 239 | return class_filter(class); | ||
| 240 | #endif | ||
| 241 | return 0; | ||
| 242 | } | ||
| 243 | |||
| 244 | #endif | ||
| 245 | |||
| 246 | /* | 330 | /* |
| 247 | * Stack-trace: tightly packed array of stack backtrace | 331 | * Stack-trace: tightly packed array of stack backtrace |
| 248 | * addresses. Protected by the graph_lock. | 332 | * addresses. Protected by the graph_lock. |
| @@ -291,6 +375,11 @@ unsigned int max_recursion_depth; | |||
| 291 | * about it later on, in lockdep_info(). | 375 | * about it later on, in lockdep_info(). |
| 292 | */ | 376 | */ |
| 293 | static int lockdep_init_error; | 377 | static int lockdep_init_error; |
| 378 | static unsigned long lockdep_init_trace_data[20]; | ||
| 379 | static struct stack_trace lockdep_init_trace = { | ||
| 380 | .max_entries = ARRAY_SIZE(lockdep_init_trace_data), | ||
| 381 | .entries = lockdep_init_trace_data, | ||
| 382 | }; | ||
| 294 | 383 | ||
| 295 | /* | 384 | /* |
| 296 | * Various lockdep statistics: | 385 | * Various lockdep statistics: |
| @@ -379,7 +468,7 @@ get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4 | |||
| 379 | 468 | ||
| 380 | static void print_lock_name(struct lock_class *class) | 469 | static void print_lock_name(struct lock_class *class) |
| 381 | { | 470 | { |
| 382 | char str[KSYM_NAME_LEN + 1], c1, c2, c3, c4; | 471 | char str[KSYM_NAME_LEN], c1, c2, c3, c4; |
| 383 | const char *name; | 472 | const char *name; |
| 384 | 473 | ||
| 385 | get_usage_chars(class, &c1, &c2, &c3, &c4); | 474 | get_usage_chars(class, &c1, &c2, &c3, &c4); |
| @@ -401,7 +490,7 @@ static void print_lock_name(struct lock_class *class) | |||
| 401 | static void print_lockdep_cache(struct lockdep_map *lock) | 490 | static void print_lockdep_cache(struct lockdep_map *lock) |
| 402 | { | 491 | { |
| 403 | const char *name; | 492 | const char *name; |
| 404 | char str[KSYM_NAME_LEN + 1]; | 493 | char str[KSYM_NAME_LEN]; |
| 405 | 494 | ||
| 406 | name = lock->name; | 495 | name = lock->name; |
| 407 | if (!name) | 496 | if (!name) |
| @@ -482,6 +571,262 @@ static void print_lock_dependencies(struct lock_class *class, int depth) | |||
| 482 | } | 571 | } |
| 483 | } | 572 | } |
| 484 | 573 | ||
| 574 | static void print_kernel_version(void) | ||
| 575 | { | ||
| 576 | printk("%s %.*s\n", init_utsname()->release, | ||
| 577 | (int)strcspn(init_utsname()->version, " "), | ||
| 578 | init_utsname()->version); | ||
| 579 | } | ||
| 580 | |||
| 581 | static int very_verbose(struct lock_class *class) | ||
| 582 | { | ||
| 583 | #if VERY_VERBOSE | ||
| 584 | return class_filter(class); | ||
| 585 | #endif | ||
| 586 | return 0; | ||
| 587 | } | ||
| 588 | |||
| 589 | /* | ||
| 590 | * Is this the address of a static object: | ||
| 591 | */ | ||
| 592 | static int static_obj(void *obj) | ||
| 593 | { | ||
| 594 | unsigned long start = (unsigned long) &_stext, | ||
| 595 | end = (unsigned long) &_end, | ||
| 596 | addr = (unsigned long) obj; | ||
| 597 | #ifdef CONFIG_SMP | ||
| 598 | int i; | ||
| 599 | #endif | ||
| 600 | |||
| 601 | /* | ||
| 602 | * static variable? | ||
| 603 | */ | ||
| 604 | if ((addr >= start) && (addr < end)) | ||
| 605 | return 1; | ||
| 606 | |||
| 607 | #ifdef CONFIG_SMP | ||
| 608 | /* | ||
| 609 | * percpu var? | ||
| 610 | */ | ||
| 611 | for_each_possible_cpu(i) { | ||
| 612 | start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); | ||
| 613 | end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM | ||
| 614 | + per_cpu_offset(i); | ||
| 615 | |||
| 616 | if ((addr >= start) && (addr < end)) | ||
| 617 | return 1; | ||
| 618 | } | ||
| 619 | #endif | ||
| 620 | |||
| 621 | /* | ||
| 622 | * module var? | ||
| 623 | */ | ||
| 624 | return is_module_address(addr); | ||
| 625 | } | ||
| 626 | |||
| 627 | /* | ||
| 628 | * To make lock name printouts unique, we calculate a unique | ||
| 629 | * class->name_version generation counter: | ||
| 630 | */ | ||
| 631 | static int count_matching_names(struct lock_class *new_class) | ||
| 632 | { | ||
| 633 | struct lock_class *class; | ||
| 634 | int count = 0; | ||
| 635 | |||
| 636 | if (!new_class->name) | ||
| 637 | return 0; | ||
| 638 | |||
| 639 | list_for_each_entry(class, &all_lock_classes, lock_entry) { | ||
| 640 | if (new_class->key - new_class->subclass == class->key) | ||
| 641 | return class->name_version; | ||
| 642 | if (class->name && !strcmp(class->name, new_class->name)) | ||
| 643 | count = max(count, class->name_version); | ||
| 644 | } | ||
| 645 | |||
| 646 | return count + 1; | ||
| 647 | } | ||
| 648 | |||
| 649 | /* | ||
| 650 | * Register a lock's class in the hash-table, if the class is not present | ||
| 651 | * yet. Otherwise we look it up. We cache the result in the lock object | ||
| 652 | * itself, so actual lookup of the hash should be once per lock object. | ||
| 653 | */ | ||
| 654 | static inline struct lock_class * | ||
| 655 | look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | ||
| 656 | { | ||
| 657 | struct lockdep_subclass_key *key; | ||
| 658 | struct list_head *hash_head; | ||
| 659 | struct lock_class *class; | ||
| 660 | |||
| 661 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
| 662 | /* | ||
| 663 | * If the architecture calls into lockdep before initializing | ||
| 664 | * the hashes then we'll warn about it later. (we cannot printk | ||
| 665 | * right now) | ||
| 666 | */ | ||
| 667 | if (unlikely(!lockdep_initialized)) { | ||
| 668 | lockdep_init(); | ||
| 669 | lockdep_init_error = 1; | ||
| 670 | save_stack_trace(&lockdep_init_trace); | ||
| 671 | } | ||
| 672 | #endif | ||
| 673 | |||
| 674 | /* | ||
| 675 | * Static locks do not have their class-keys yet - for them the key | ||
| 676 | * is the lock object itself: | ||
| 677 | */ | ||
| 678 | if (unlikely(!lock->key)) | ||
| 679 | lock->key = (void *)lock; | ||
| 680 | |||
| 681 | /* | ||
| 682 | * NOTE: the class-key must be unique. For dynamic locks, a static | ||
| 683 | * lock_class_key variable is passed in through the mutex_init() | ||
| 684 | * (or spin_lock_init()) call - which acts as the key. For static | ||
| 685 | * locks we use the lock object itself as the key. | ||
| 686 | */ | ||
| 687 | BUILD_BUG_ON(sizeof(struct lock_class_key) > | ||
| 688 | sizeof(struct lockdep_map)); | ||
| 689 | |||
| 690 | key = lock->key->subkeys + subclass; | ||
| 691 | |||
| 692 | hash_head = classhashentry(key); | ||
| 693 | |||
| 694 | /* | ||
| 695 | * We can walk the hash lockfree, because the hash only | ||
| 696 | * grows, and we are careful when adding entries to the end: | ||
| 697 | */ | ||
| 698 | list_for_each_entry(class, hash_head, hash_entry) { | ||
| 699 | if (class->key == key) { | ||
| 700 | WARN_ON_ONCE(class->name != lock->name); | ||
| 701 | return class; | ||
| 702 | } | ||
| 703 | } | ||
| 704 | |||
| 705 | return NULL; | ||
| 706 | } | ||
| 707 | |||
| 708 | /* | ||
| 709 | * Register a lock's class in the hash-table, if the class is not present | ||
| 710 | * yet. Otherwise we look it up. We cache the result in the lock object | ||
| 711 | * itself, so actual lookup of the hash should be once per lock object. | ||
| 712 | */ | ||
| 713 | static inline struct lock_class * | ||
| 714 | register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | ||
| 715 | { | ||
| 716 | struct lockdep_subclass_key *key; | ||
| 717 | struct list_head *hash_head; | ||
| 718 | struct lock_class *class; | ||
| 719 | unsigned long flags; | ||
| 720 | |||
| 721 | class = look_up_lock_class(lock, subclass); | ||
| 722 | if (likely(class)) | ||
| 723 | return class; | ||
| 724 | |||
| 725 | /* | ||
| 726 | * Debug-check: all keys must be persistent! | ||
| 727 | */ | ||
| 728 | if (!static_obj(lock->key)) { | ||
| 729 | debug_locks_off(); | ||
| 730 | printk("INFO: trying to register non-static key.\n"); | ||
| 731 | printk("the code is fine but needs lockdep annotation.\n"); | ||
| 732 | printk("turning off the locking correctness validator.\n"); | ||
| 733 | dump_stack(); | ||
| 734 | |||
| 735 | return NULL; | ||
| 736 | } | ||
| 737 | |||
| 738 | key = lock->key->subkeys + subclass; | ||
| 739 | hash_head = classhashentry(key); | ||
| 740 | |||
| 741 | raw_local_irq_save(flags); | ||
| 742 | if (!graph_lock()) { | ||
| 743 | raw_local_irq_restore(flags); | ||
| 744 | return NULL; | ||
| 745 | } | ||
| 746 | /* | ||
| 747 | * We have to do the hash-walk again, to avoid races | ||
| 748 | * with another CPU: | ||
| 749 | */ | ||
| 750 | list_for_each_entry(class, hash_head, hash_entry) | ||
| 751 | if (class->key == key) | ||
| 752 | goto out_unlock_set; | ||
| 753 | /* | ||
| 754 | * Allocate a new key from the static array, and add it to | ||
| 755 | * the hash: | ||
| 756 | */ | ||
| 757 | if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { | ||
| 758 | if (!debug_locks_off_graph_unlock()) { | ||
| 759 | raw_local_irq_restore(flags); | ||
| 760 | return NULL; | ||
| 761 | } | ||
| 762 | raw_local_irq_restore(flags); | ||
| 763 | |||
| 764 | printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); | ||
| 765 | printk("turning off the locking correctness validator.\n"); | ||
| 766 | return NULL; | ||
| 767 | } | ||
| 768 | class = lock_classes + nr_lock_classes++; | ||
| 769 | debug_atomic_inc(&nr_unused_locks); | ||
| 770 | class->key = key; | ||
| 771 | class->name = lock->name; | ||
| 772 | class->subclass = subclass; | ||
| 773 | INIT_LIST_HEAD(&class->lock_entry); | ||
| 774 | INIT_LIST_HEAD(&class->locks_before); | ||
| 775 | INIT_LIST_HEAD(&class->locks_after); | ||
| 776 | class->name_version = count_matching_names(class); | ||
| 777 | /* | ||
| 778 | * We use RCU's safe list-add method to make | ||
| 779 | * parallel walking of the hash-list safe: | ||
| 780 | */ | ||
| 781 | list_add_tail_rcu(&class->hash_entry, hash_head); | ||
| 782 | |||
| 783 | if (verbose(class)) { | ||
| 784 | graph_unlock(); | ||
| 785 | raw_local_irq_restore(flags); | ||
| 786 | |||
| 787 | printk("\nnew class %p: %s", class->key, class->name); | ||
| 788 | if (class->name_version > 1) | ||
| 789 | printk("#%d", class->name_version); | ||
| 790 | printk("\n"); | ||
| 791 | dump_stack(); | ||
| 792 | |||
| 793 | raw_local_irq_save(flags); | ||
| 794 | if (!graph_lock()) { | ||
| 795 | raw_local_irq_restore(flags); | ||
| 796 | return NULL; | ||
| 797 | } | ||
| 798 | } | ||
| 799 | out_unlock_set: | ||
| 800 | graph_unlock(); | ||
| 801 | raw_local_irq_restore(flags); | ||
| 802 | |||
| 803 | if (!subclass || force) | ||
| 804 | lock->class_cache = class; | ||
| 805 | |||
| 806 | if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) | ||
| 807 | return NULL; | ||
| 808 | |||
| 809 | return class; | ||
| 810 | } | ||
| 811 | |||
| 812 | #ifdef CONFIG_PROVE_LOCKING | ||
| 813 | /* | ||
| 814 | * Allocate a lockdep entry. (assumes the graph_lock held, returns | ||
| 815 | * with NULL on failure) | ||
| 816 | */ | ||
| 817 | static struct lock_list *alloc_list_entry(void) | ||
| 818 | { | ||
| 819 | if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { | ||
| 820 | if (!debug_locks_off_graph_unlock()) | ||
| 821 | return NULL; | ||
| 822 | |||
| 823 | printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); | ||
| 824 | printk("turning off the locking correctness validator.\n"); | ||
| 825 | return NULL; | ||
| 826 | } | ||
| 827 | return list_entries + nr_list_entries++; | ||
| 828 | } | ||
| 829 | |||
| 485 | /* | 830 | /* |
| 486 | * Add a new dependency to the head of the list: | 831 | * Add a new dependency to the head of the list: |
| 487 | */ | 832 | */ |
| @@ -542,13 +887,6 @@ print_circular_bug_entry(struct lock_list *target, unsigned int depth) | |||
| 542 | return 0; | 887 | return 0; |
| 543 | } | 888 | } |
| 544 | 889 | ||
| 545 | static void print_kernel_version(void) | ||
| 546 | { | ||
| 547 | printk("%s %.*s\n", init_utsname()->release, | ||
| 548 | (int)strcspn(init_utsname()->version, " "), | ||
| 549 | init_utsname()->version); | ||
| 550 | } | ||
| 551 | |||
| 552 | /* | 890 | /* |
| 553 | * When a circular dependency is detected, print the | 891 | * When a circular dependency is detected, print the |
| 554 | * header first: | 892 | * header first: |
| @@ -640,15 +978,7 @@ check_noncircular(struct lock_class *source, unsigned int depth) | |||
| 640 | return 1; | 978 | return 1; |
| 641 | } | 979 | } |
| 642 | 980 | ||
| 643 | static int very_verbose(struct lock_class *class) | ||
| 644 | { | ||
| 645 | #if VERY_VERBOSE | ||
| 646 | return class_filter(class); | ||
| 647 | #endif | ||
| 648 | return 0; | ||
| 649 | } | ||
| 650 | #ifdef CONFIG_TRACE_IRQFLAGS | 981 | #ifdef CONFIG_TRACE_IRQFLAGS |
| 651 | |||
| 652 | /* | 982 | /* |
| 653 | * Forwards and backwards subgraph searching, for the purposes of | 983 | * Forwards and backwards subgraph searching, for the purposes of |
| 654 | * proving that two subgraphs can be connected by a new dependency | 984 | * proving that two subgraphs can be connected by a new dependency |
| @@ -821,6 +1151,78 @@ check_usage(struct task_struct *curr, struct held_lock *prev, | |||
| 821 | bit_backwards, bit_forwards, irqclass); | 1151 | bit_backwards, bit_forwards, irqclass); |
| 822 | } | 1152 | } |
| 823 | 1153 | ||
| 1154 | static int | ||
| 1155 | check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, | ||
| 1156 | struct held_lock *next) | ||
| 1157 | { | ||
| 1158 | /* | ||
| 1159 | * Prove that the new dependency does not connect a hardirq-safe | ||
| 1160 | * lock with a hardirq-unsafe lock - to achieve this we search | ||
| 1161 | * the backwards-subgraph starting at <prev>, and the | ||
| 1162 | * forwards-subgraph starting at <next>: | ||
| 1163 | */ | ||
| 1164 | if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, | ||
| 1165 | LOCK_ENABLED_HARDIRQS, "hard")) | ||
| 1166 | return 0; | ||
| 1167 | |||
| 1168 | /* | ||
| 1169 | * Prove that the new dependency does not connect a hardirq-safe-read | ||
| 1170 | * lock with a hardirq-unsafe lock - to achieve this we search | ||
| 1171 | * the backwards-subgraph starting at <prev>, and the | ||
| 1172 | * forwards-subgraph starting at <next>: | ||
| 1173 | */ | ||
| 1174 | if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, | ||
| 1175 | LOCK_ENABLED_HARDIRQS, "hard-read")) | ||
| 1176 | return 0; | ||
| 1177 | |||
| 1178 | /* | ||
| 1179 | * Prove that the new dependency does not connect a softirq-safe | ||
| 1180 | * lock with a softirq-unsafe lock - to achieve this we search | ||
| 1181 | * the backwards-subgraph starting at <prev>, and the | ||
| 1182 | * forwards-subgraph starting at <next>: | ||
| 1183 | */ | ||
| 1184 | if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, | ||
| 1185 | LOCK_ENABLED_SOFTIRQS, "soft")) | ||
| 1186 | return 0; | ||
| 1187 | /* | ||
| 1188 | * Prove that the new dependency does not connect a softirq-safe-read | ||
| 1189 | * lock with a softirq-unsafe lock - to achieve this we search | ||
| 1190 | * the backwards-subgraph starting at <prev>, and the | ||
| 1191 | * forwards-subgraph starting at <next>: | ||
| 1192 | */ | ||
| 1193 | if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ, | ||
| 1194 | LOCK_ENABLED_SOFTIRQS, "soft")) | ||
| 1195 | return 0; | ||
| 1196 | |||
| 1197 | return 1; | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | static void inc_chains(void) | ||
| 1201 | { | ||
| 1202 | if (current->hardirq_context) | ||
| 1203 | nr_hardirq_chains++; | ||
| 1204 | else { | ||
| 1205 | if (current->softirq_context) | ||
| 1206 | nr_softirq_chains++; | ||
| 1207 | else | ||
| 1208 | nr_process_chains++; | ||
| 1209 | } | ||
| 1210 | } | ||
| 1211 | |||
| 1212 | #else | ||
| 1213 | |||
| 1214 | static inline int | ||
| 1215 | check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, | ||
| 1216 | struct held_lock *next) | ||
| 1217 | { | ||
| 1218 | return 1; | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | static inline void inc_chains(void) | ||
| 1222 | { | ||
| 1223 | nr_process_chains++; | ||
| 1224 | } | ||
| 1225 | |||
| 824 | #endif | 1226 | #endif |
| 825 | 1227 | ||
| 826 | static int | 1228 | static int |
| @@ -922,47 +1324,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
| 922 | if (!(check_noncircular(next->class, 0))) | 1324 | if (!(check_noncircular(next->class, 0))) |
| 923 | return print_circular_bug_tail(); | 1325 | return print_circular_bug_tail(); |
| 924 | 1326 | ||
| 925 | #ifdef CONFIG_TRACE_IRQFLAGS | 1327 | if (!check_prev_add_irq(curr, prev, next)) |
| 926 | /* | ||
| 927 | * Prove that the new dependency does not connect a hardirq-safe | ||
| 928 | * lock with a hardirq-unsafe lock - to achieve this we search | ||
| 929 | * the backwards-subgraph starting at <prev>, and the | ||
| 930 | * forwards-subgraph starting at <next>: | ||
| 931 | */ | ||
| 932 | if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, | ||
| 933 | LOCK_ENABLED_HARDIRQS, "hard")) | ||
| 934 | return 0; | 1328 | return 0; |
| 935 | 1329 | ||
| 936 | /* | 1330 | /* |
| 937 | * Prove that the new dependency does not connect a hardirq-safe-read | ||
| 938 | * lock with a hardirq-unsafe lock - to achieve this we search | ||
| 939 | * the backwards-subgraph starting at <prev>, and the | ||
| 940 | * forwards-subgraph starting at <next>: | ||
| 941 | */ | ||
| 942 | if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, | ||
| 943 | LOCK_ENABLED_HARDIRQS, "hard-read")) | ||
| 944 | return 0; | ||
| 945 | |||
| 946 | /* | ||
| 947 | * Prove that the new dependency does not connect a softirq-safe | ||
| 948 | * lock with a softirq-unsafe lock - to achieve this we search | ||
| 949 | * the backwards-subgraph starting at <prev>, and the | ||
| 950 | * forwards-subgraph starting at <next>: | ||
| 951 | */ | ||
| 952 | if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, | ||
| 953 | LOCK_ENABLED_SOFTIRQS, "soft")) | ||
| 954 | return 0; | ||
| 955 | /* | ||
| 956 | * Prove that the new dependency does not connect a softirq-safe-read | ||
| 957 | * lock with a softirq-unsafe lock - to achieve this we search | ||
| 958 | * the backwards-subgraph starting at <prev>, and the | ||
| 959 | * forwards-subgraph starting at <next>: | ||
| 960 | */ | ||
| 961 | if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ, | ||
| 962 | LOCK_ENABLED_SOFTIRQS, "soft")) | ||
| 963 | return 0; | ||
| 964 | #endif | ||
| 965 | /* | ||
| 966 | * For recursive read-locks we do all the dependency checks, | 1331 | * For recursive read-locks we do all the dependency checks, |
| 967 | * but we dont store read-triggered dependencies (only | 1332 | * but we dont store read-triggered dependencies (only |
| 968 | * write-triggered dependencies). This ensures that only the | 1333 | * write-triggered dependencies). This ensures that only the |
| @@ -1088,224 +1453,8 @@ out_bug: | |||
| 1088 | return 0; | 1453 | return 0; |
| 1089 | } | 1454 | } |
| 1090 | 1455 | ||
| 1091 | 1456 | unsigned long nr_lock_chains; | |
| 1092 | /* | 1457 | static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; |
| 1093 | * Is this the address of a static object: | ||
| 1094 | */ | ||
| 1095 | static int static_obj(void *obj) | ||
| 1096 | { | ||
| 1097 | unsigned long start = (unsigned long) &_stext, | ||
| 1098 | end = (unsigned long) &_end, | ||
| 1099 | addr = (unsigned long) obj; | ||
| 1100 | #ifdef CONFIG_SMP | ||
| 1101 | int i; | ||
| 1102 | #endif | ||
| 1103 | |||
| 1104 | /* | ||
| 1105 | * static variable? | ||
| 1106 | */ | ||
| 1107 | if ((addr >= start) && (addr < end)) | ||
| 1108 | return 1; | ||
| 1109 | |||
| 1110 | #ifdef CONFIG_SMP | ||
| 1111 | /* | ||
| 1112 | * percpu var? | ||
| 1113 | */ | ||
| 1114 | for_each_possible_cpu(i) { | ||
| 1115 | start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); | ||
| 1116 | end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM | ||
| 1117 | + per_cpu_offset(i); | ||
| 1118 | |||
| 1119 | if ((addr >= start) && (addr < end)) | ||
| 1120 | return 1; | ||
| 1121 | } | ||
| 1122 | #endif | ||
| 1123 | |||
| 1124 | /* | ||
| 1125 | * module var? | ||
| 1126 | */ | ||
| 1127 | return is_module_address(addr); | ||
| 1128 | } | ||
| 1129 | |||
| 1130 | /* | ||
| 1131 | * To make lock name printouts unique, we calculate a unique | ||
| 1132 | * class->name_version generation counter: | ||
| 1133 | */ | ||
| 1134 | static int count_matching_names(struct lock_class *new_class) | ||
| 1135 | { | ||
| 1136 | struct lock_class *class; | ||
| 1137 | int count = 0; | ||
| 1138 | |||
| 1139 | if (!new_class->name) | ||
| 1140 | return 0; | ||
| 1141 | |||
| 1142 | list_for_each_entry(class, &all_lock_classes, lock_entry) { | ||
| 1143 | if (new_class->key - new_class->subclass == class->key) | ||
| 1144 | return class->name_version; | ||
| 1145 | if (class->name && !strcmp(class->name, new_class->name)) | ||
| 1146 | count = max(count, class->name_version); | ||
| 1147 | } | ||
| 1148 | |||
| 1149 | return count + 1; | ||
| 1150 | } | ||
| 1151 | |||
| 1152 | /* | ||
| 1153 | * Register a lock's class in the hash-table, if the class is not present | ||
| 1154 | * yet. Otherwise we look it up. We cache the result in the lock object | ||
| 1155 | * itself, so actual lookup of the hash should be once per lock object. | ||
| 1156 | */ | ||
| 1157 | static inline struct lock_class * | ||
| 1158 | look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | ||
| 1159 | { | ||
| 1160 | struct lockdep_subclass_key *key; | ||
| 1161 | struct list_head *hash_head; | ||
| 1162 | struct lock_class *class; | ||
| 1163 | |||
| 1164 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
| 1165 | /* | ||
| 1166 | * If the architecture calls into lockdep before initializing | ||
| 1167 | * the hashes then we'll warn about it later. (we cannot printk | ||
| 1168 | * right now) | ||
| 1169 | */ | ||
| 1170 | if (unlikely(!lockdep_initialized)) { | ||
| 1171 | lockdep_init(); | ||
| 1172 | lockdep_init_error = 1; | ||
| 1173 | } | ||
| 1174 | #endif | ||
| 1175 | |||
| 1176 | /* | ||
| 1177 | * Static locks do not have their class-keys yet - for them the key | ||
| 1178 | * is the lock object itself: | ||
| 1179 | */ | ||
| 1180 | if (unlikely(!lock->key)) | ||
| 1181 | lock->key = (void *)lock; | ||
| 1182 | |||
| 1183 | /* | ||
| 1184 | * NOTE: the class-key must be unique. For dynamic locks, a static | ||
| 1185 | * lock_class_key variable is passed in through the mutex_init() | ||
| 1186 | * (or spin_lock_init()) call - which acts as the key. For static | ||
| 1187 | * locks we use the lock object itself as the key. | ||
| 1188 | */ | ||
| 1189 | BUILD_BUG_ON(sizeof(struct lock_class_key) > sizeof(struct lock_class)); | ||
| 1190 | |||
| 1191 | key = lock->key->subkeys + subclass; | ||
| 1192 | |||
| 1193 | hash_head = classhashentry(key); | ||
| 1194 | |||
| 1195 | /* | ||
| 1196 | * We can walk the hash lockfree, because the hash only | ||
| 1197 | * grows, and we are careful when adding entries to the end: | ||
| 1198 | */ | ||
| 1199 | list_for_each_entry(class, hash_head, hash_entry) | ||
| 1200 | if (class->key == key) | ||
| 1201 | return class; | ||
| 1202 | |||
| 1203 | return NULL; | ||
| 1204 | } | ||
| 1205 | |||
| 1206 | /* | ||
| 1207 | * Register a lock's class in the hash-table, if the class is not present | ||
| 1208 | * yet. Otherwise we look it up. We cache the result in the lock object | ||
| 1209 | * itself, so actual lookup of the hash should be once per lock object. | ||
| 1210 | */ | ||
| 1211 | static inline struct lock_class * | ||
| 1212 | register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | ||
| 1213 | { | ||
| 1214 | struct lockdep_subclass_key *key; | ||
| 1215 | struct list_head *hash_head; | ||
| 1216 | struct lock_class *class; | ||
| 1217 | unsigned long flags; | ||
| 1218 | |||
| 1219 | class = look_up_lock_class(lock, subclass); | ||
| 1220 | if (likely(class)) | ||
| 1221 | return class; | ||
| 1222 | |||
| 1223 | /* | ||
| 1224 | * Debug-check: all keys must be persistent! | ||
| 1225 | */ | ||
| 1226 | if (!static_obj(lock->key)) { | ||
| 1227 | debug_locks_off(); | ||
| 1228 | printk("INFO: trying to register non-static key.\n"); | ||
| 1229 | printk("the code is fine but needs lockdep annotation.\n"); | ||
| 1230 | printk("turning off the locking correctness validator.\n"); | ||
| 1231 | dump_stack(); | ||
| 1232 | |||
| 1233 | return NULL; | ||
| 1234 | } | ||
| 1235 | |||
| 1236 | key = lock->key->subkeys + subclass; | ||
| 1237 | hash_head = classhashentry(key); | ||
| 1238 | |||
| 1239 | raw_local_irq_save(flags); | ||
| 1240 | if (!graph_lock()) { | ||
| 1241 | raw_local_irq_restore(flags); | ||
| 1242 | return NULL; | ||
| 1243 | } | ||
| 1244 | /* | ||
| 1245 | * We have to do the hash-walk again, to avoid races | ||
| 1246 | * with another CPU: | ||
| 1247 | */ | ||
| 1248 | list_for_each_entry(class, hash_head, hash_entry) | ||
| 1249 | if (class->key == key) | ||
| 1250 | goto out_unlock_set; | ||
| 1251 | /* | ||
| 1252 | * Allocate a new key from the static array, and add it to | ||
| 1253 | * the hash: | ||
| 1254 | */ | ||
| 1255 | if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { | ||
| 1256 | if (!debug_locks_off_graph_unlock()) { | ||
| 1257 | raw_local_irq_restore(flags); | ||
| 1258 | return NULL; | ||
| 1259 | } | ||
| 1260 | raw_local_irq_restore(flags); | ||
| 1261 | |||
| 1262 | printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); | ||
| 1263 | printk("turning off the locking correctness validator.\n"); | ||
| 1264 | return NULL; | ||
| 1265 | } | ||
| 1266 | class = lock_classes + nr_lock_classes++; | ||
| 1267 | debug_atomic_inc(&nr_unused_locks); | ||
| 1268 | class->key = key; | ||
| 1269 | class->name = lock->name; | ||
| 1270 | class->subclass = subclass; | ||
| 1271 | INIT_LIST_HEAD(&class->lock_entry); | ||
| 1272 | INIT_LIST_HEAD(&class->locks_before); | ||
| 1273 | INIT_LIST_HEAD(&class->locks_after); | ||
| 1274 | class->name_version = count_matching_names(class); | ||
| 1275 | /* | ||
| 1276 | * We use RCU's safe list-add method to make | ||
| 1277 | * parallel walking of the hash-list safe: | ||
| 1278 | */ | ||
| 1279 | list_add_tail_rcu(&class->hash_entry, hash_head); | ||
| 1280 | |||
| 1281 | if (verbose(class)) { | ||
| 1282 | graph_unlock(); | ||
| 1283 | raw_local_irq_restore(flags); | ||
| 1284 | |||
| 1285 | printk("\nnew class %p: %s", class->key, class->name); | ||
| 1286 | if (class->name_version > 1) | ||
| 1287 | printk("#%d", class->name_version); | ||
| 1288 | printk("\n"); | ||
| 1289 | dump_stack(); | ||
| 1290 | |||
| 1291 | raw_local_irq_save(flags); | ||
| 1292 | if (!graph_lock()) { | ||
| 1293 | raw_local_irq_restore(flags); | ||
| 1294 | return NULL; | ||
| 1295 | } | ||
| 1296 | } | ||
| 1297 | out_unlock_set: | ||
| 1298 | graph_unlock(); | ||
| 1299 | raw_local_irq_restore(flags); | ||
| 1300 | |||
| 1301 | if (!subclass || force) | ||
| 1302 | lock->class_cache = class; | ||
| 1303 | |||
| 1304 | if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) | ||
| 1305 | return NULL; | ||
| 1306 | |||
| 1307 | return class; | ||
| 1308 | } | ||
| 1309 | 1458 | ||
| 1310 | /* | 1459 | /* |
| 1311 | * Look up a dependency chain. If the key is not present yet then | 1460 | * Look up a dependency chain. If the key is not present yet then |
| @@ -1366,21 +1515,72 @@ cache_hit: | |||
| 1366 | chain->chain_key = chain_key; | 1515 | chain->chain_key = chain_key; |
| 1367 | list_add_tail_rcu(&chain->entry, hash_head); | 1516 | list_add_tail_rcu(&chain->entry, hash_head); |
| 1368 | debug_atomic_inc(&chain_lookup_misses); | 1517 | debug_atomic_inc(&chain_lookup_misses); |
| 1369 | #ifdef CONFIG_TRACE_IRQFLAGS | 1518 | inc_chains(); |
| 1370 | if (current->hardirq_context) | 1519 | |
| 1371 | nr_hardirq_chains++; | 1520 | return 1; |
| 1372 | else { | 1521 | } |
| 1373 | if (current->softirq_context) | 1522 | |
| 1374 | nr_softirq_chains++; | 1523 | static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, |
| 1375 | else | 1524 | struct held_lock *hlock, int chain_head) |
| 1376 | nr_process_chains++; | 1525 | { |
| 1377 | } | 1526 | /* |
| 1378 | #else | 1527 | * Trylock needs to maintain the stack of held locks, but it |
| 1379 | nr_process_chains++; | 1528 | * does not add new dependencies, because trylock can be done |
| 1380 | #endif | 1529 | * in any order. |
| 1530 | * | ||
| 1531 | * We look up the chain_key and do the O(N^2) check and update of | ||
| 1532 | * the dependencies only if this is a new dependency chain. | ||
| 1533 | * (If lookup_chain_cache() returns with 1 it acquires | ||
| 1534 | * graph_lock for us) | ||
| 1535 | */ | ||
| 1536 | if (!hlock->trylock && (hlock->check == 2) && | ||
| 1537 | lookup_chain_cache(curr->curr_chain_key, hlock->class)) { | ||
| 1538 | /* | ||
| 1539 | * Check whether last held lock: | ||
| 1540 | * | ||
| 1541 | * - is irq-safe, if this lock is irq-unsafe | ||
| 1542 | * - is softirq-safe, if this lock is hardirq-unsafe | ||
| 1543 | * | ||
| 1544 | * And check whether the new lock's dependency graph | ||
| 1545 | * could lead back to the previous lock. | ||
| 1546 | * | ||
| 1547 | * any of these scenarios could lead to a deadlock. If | ||
| 1548 | * All validations | ||
| 1549 | */ | ||
| 1550 | int ret = check_deadlock(curr, hlock, lock, hlock->read); | ||
| 1551 | |||
| 1552 | if (!ret) | ||
| 1553 | return 0; | ||
| 1554 | /* | ||
| 1555 | * Mark recursive read, as we jump over it when | ||
| 1556 | * building dependencies (just like we jump over | ||
| 1557 | * trylock entries): | ||
| 1558 | */ | ||
| 1559 | if (ret == 2) | ||
| 1560 | hlock->read = 2; | ||
| 1561 | /* | ||
| 1562 | * Add dependency only if this lock is not the head | ||
| 1563 | * of the chain, and if it's not a secondary read-lock: | ||
| 1564 | */ | ||
| 1565 | if (!chain_head && ret != 2) | ||
| 1566 | if (!check_prevs_add(curr, hlock)) | ||
| 1567 | return 0; | ||
| 1568 | graph_unlock(); | ||
| 1569 | } else | ||
| 1570 | /* after lookup_chain_cache(): */ | ||
| 1571 | if (unlikely(!debug_locks)) | ||
| 1572 | return 0; | ||
| 1381 | 1573 | ||
| 1382 | return 1; | 1574 | return 1; |
| 1383 | } | 1575 | } |
| 1576 | #else | ||
| 1577 | static inline int validate_chain(struct task_struct *curr, | ||
| 1578 | struct lockdep_map *lock, struct held_lock *hlock, | ||
| 1579 | int chain_head) | ||
| 1580 | { | ||
| 1581 | return 1; | ||
| 1582 | } | ||
| 1583 | #endif | ||
| 1384 | 1584 | ||
| 1385 | /* | 1585 | /* |
| 1386 | * We are building curr_chain_key incrementally, so double-check | 1586 | * We are building curr_chain_key incrementally, so double-check |
| @@ -1425,6 +1625,57 @@ static void check_chain_key(struct task_struct *curr) | |||
| 1425 | #endif | 1625 | #endif |
| 1426 | } | 1626 | } |
| 1427 | 1627 | ||
| 1628 | static int | ||
| 1629 | print_usage_bug(struct task_struct *curr, struct held_lock *this, | ||
| 1630 | enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) | ||
| 1631 | { | ||
| 1632 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | ||
| 1633 | return 0; | ||
| 1634 | |||
| 1635 | printk("\n=================================\n"); | ||
| 1636 | printk( "[ INFO: inconsistent lock state ]\n"); | ||
| 1637 | print_kernel_version(); | ||
| 1638 | printk( "---------------------------------\n"); | ||
| 1639 | |||
| 1640 | printk("inconsistent {%s} -> {%s} usage.\n", | ||
| 1641 | usage_str[prev_bit], usage_str[new_bit]); | ||
| 1642 | |||
| 1643 | printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", | ||
| 1644 | curr->comm, curr->pid, | ||
| 1645 | trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, | ||
| 1646 | trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, | ||
| 1647 | trace_hardirqs_enabled(curr), | ||
| 1648 | trace_softirqs_enabled(curr)); | ||
| 1649 | print_lock(this); | ||
| 1650 | |||
| 1651 | printk("{%s} state was registered at:\n", usage_str[prev_bit]); | ||
| 1652 | print_stack_trace(this->class->usage_traces + prev_bit, 1); | ||
| 1653 | |||
| 1654 | print_irqtrace_events(curr); | ||
| 1655 | printk("\nother info that might help us debug this:\n"); | ||
| 1656 | lockdep_print_held_locks(curr); | ||
| 1657 | |||
| 1658 | printk("\nstack backtrace:\n"); | ||
| 1659 | dump_stack(); | ||
| 1660 | |||
| 1661 | return 0; | ||
| 1662 | } | ||
| 1663 | |||
| 1664 | /* | ||
| 1665 | * Print out an error if an invalid bit is set: | ||
| 1666 | */ | ||
| 1667 | static inline int | ||
| 1668 | valid_state(struct task_struct *curr, struct held_lock *this, | ||
| 1669 | enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) | ||
| 1670 | { | ||
| 1671 | if (unlikely(this->class->usage_mask & (1 << bad_bit))) | ||
| 1672 | return print_usage_bug(curr, this, bad_bit, new_bit); | ||
| 1673 | return 1; | ||
| 1674 | } | ||
| 1675 | |||
| 1676 | static int mark_lock(struct task_struct *curr, struct held_lock *this, | ||
| 1677 | enum lock_usage_bit new_bit); | ||
| 1678 | |||
| 1428 | #ifdef CONFIG_TRACE_IRQFLAGS | 1679 | #ifdef CONFIG_TRACE_IRQFLAGS |
| 1429 | 1680 | ||
| 1430 | /* | 1681 | /* |
| @@ -1518,90 +1769,30 @@ void print_irqtrace_events(struct task_struct *curr) | |||
| 1518 | print_ip_sym(curr->softirq_disable_ip); | 1769 | print_ip_sym(curr->softirq_disable_ip); |
| 1519 | } | 1770 | } |
| 1520 | 1771 | ||
| 1521 | #endif | 1772 | static int hardirq_verbose(struct lock_class *class) |
| 1522 | |||
| 1523 | static int | ||
| 1524 | print_usage_bug(struct task_struct *curr, struct held_lock *this, | ||
| 1525 | enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) | ||
| 1526 | { | 1773 | { |
| 1527 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 1774 | #if HARDIRQ_VERBOSE |
| 1528 | return 0; | 1775 | return class_filter(class); |
| 1529 | 1776 | #endif | |
| 1530 | printk("\n=================================\n"); | ||
| 1531 | printk( "[ INFO: inconsistent lock state ]\n"); | ||
| 1532 | print_kernel_version(); | ||
| 1533 | printk( "---------------------------------\n"); | ||
| 1534 | |||
| 1535 | printk("inconsistent {%s} -> {%s} usage.\n", | ||
| 1536 | usage_str[prev_bit], usage_str[new_bit]); | ||
| 1537 | |||
| 1538 | printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", | ||
| 1539 | curr->comm, curr->pid, | ||
| 1540 | trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, | ||
| 1541 | trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, | ||
| 1542 | trace_hardirqs_enabled(curr), | ||
| 1543 | trace_softirqs_enabled(curr)); | ||
| 1544 | print_lock(this); | ||
| 1545 | |||
| 1546 | printk("{%s} state was registered at:\n", usage_str[prev_bit]); | ||
| 1547 | print_stack_trace(this->class->usage_traces + prev_bit, 1); | ||
| 1548 | |||
| 1549 | print_irqtrace_events(curr); | ||
| 1550 | printk("\nother info that might help us debug this:\n"); | ||
| 1551 | lockdep_print_held_locks(curr); | ||
| 1552 | |||
| 1553 | printk("\nstack backtrace:\n"); | ||
| 1554 | dump_stack(); | ||
| 1555 | |||
| 1556 | return 0; | 1777 | return 0; |
| 1557 | } | 1778 | } |
| 1558 | 1779 | ||
| 1559 | /* | 1780 | static int softirq_verbose(struct lock_class *class) |
| 1560 | * Print out an error if an invalid bit is set: | ||
| 1561 | */ | ||
| 1562 | static inline int | ||
| 1563 | valid_state(struct task_struct *curr, struct held_lock *this, | ||
| 1564 | enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) | ||
| 1565 | { | 1781 | { |
| 1566 | if (unlikely(this->class->usage_mask & (1 << bad_bit))) | 1782 | #if SOFTIRQ_VERBOSE |
| 1567 | return print_usage_bug(curr, this, bad_bit, new_bit); | 1783 | return class_filter(class); |
| 1568 | return 1; | 1784 | #endif |
| 1785 | return 0; | ||
| 1569 | } | 1786 | } |
| 1570 | 1787 | ||
| 1571 | #define STRICT_READ_CHECKS 1 | 1788 | #define STRICT_READ_CHECKS 1 |
| 1572 | 1789 | ||
| 1573 | /* | 1790 | static int mark_lock_irq(struct task_struct *curr, struct held_lock *this, |
| 1574 | * Mark a lock with a usage bit, and validate the state transition: | 1791 | enum lock_usage_bit new_bit) |
| 1575 | */ | ||
| 1576 | static int mark_lock(struct task_struct *curr, struct held_lock *this, | ||
| 1577 | enum lock_usage_bit new_bit) | ||
| 1578 | { | 1792 | { |
| 1579 | unsigned int new_mask = 1 << new_bit, ret = 1; | 1793 | int ret = 1; |
| 1580 | |||
| 1581 | /* | ||
| 1582 | * If already set then do not dirty the cacheline, | ||
| 1583 | * nor do any checks: | ||
| 1584 | */ | ||
| 1585 | if (likely(this->class->usage_mask & new_mask)) | ||
| 1586 | return 1; | ||
| 1587 | |||
| 1588 | if (!graph_lock()) | ||
| 1589 | return 0; | ||
| 1590 | /* | ||
| 1591 | * Make sure we didnt race: | ||
| 1592 | */ | ||
| 1593 | if (unlikely(this->class->usage_mask & new_mask)) { | ||
| 1594 | graph_unlock(); | ||
| 1595 | return 1; | ||
| 1596 | } | ||
| 1597 | |||
| 1598 | this->class->usage_mask |= new_mask; | ||
| 1599 | 1794 | ||
| 1600 | if (!save_trace(this->class->usage_traces + new_bit)) | 1795 | switch(new_bit) { |
| 1601 | return 0; | ||
| 1602 | |||
| 1603 | switch (new_bit) { | ||
| 1604 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
| 1605 | case LOCK_USED_IN_HARDIRQ: | 1796 | case LOCK_USED_IN_HARDIRQ: |
| 1606 | if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) | 1797 | if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) |
| 1607 | return 0; | 1798 | return 0; |
| @@ -1760,37 +1951,14 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
| 1760 | if (softirq_verbose(this->class)) | 1951 | if (softirq_verbose(this->class)) |
| 1761 | ret = 2; | 1952 | ret = 2; |
| 1762 | break; | 1953 | break; |
| 1763 | #endif | ||
| 1764 | case LOCK_USED: | ||
| 1765 | /* | ||
| 1766 | * Add it to the global list of classes: | ||
| 1767 | */ | ||
| 1768 | list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); | ||
| 1769 | debug_atomic_dec(&nr_unused_locks); | ||
| 1770 | break; | ||
| 1771 | default: | 1954 | default: |
| 1772 | if (!debug_locks_off_graph_unlock()) | ||
| 1773 | return 0; | ||
| 1774 | WARN_ON(1); | 1955 | WARN_ON(1); |
| 1775 | return 0; | 1956 | break; |
| 1776 | } | ||
| 1777 | |||
| 1778 | graph_unlock(); | ||
| 1779 | |||
| 1780 | /* | ||
| 1781 | * We must printk outside of the graph_lock: | ||
| 1782 | */ | ||
| 1783 | if (ret == 2) { | ||
| 1784 | printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); | ||
| 1785 | print_lock(this); | ||
| 1786 | print_irqtrace_events(curr); | ||
| 1787 | dump_stack(); | ||
| 1788 | } | 1957 | } |
| 1789 | 1958 | ||
| 1790 | return ret; | 1959 | return ret; |
| 1791 | } | 1960 | } |
| 1792 | 1961 | ||
| 1793 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
| 1794 | /* | 1962 | /* |
| 1795 | * Mark all held locks with a usage bit: | 1963 | * Mark all held locks with a usage bit: |
| 1796 | */ | 1964 | */ |
| @@ -1973,9 +2141,176 @@ void trace_softirqs_off(unsigned long ip) | |||
| 1973 | debug_atomic_inc(&redundant_softirqs_off); | 2141 | debug_atomic_inc(&redundant_softirqs_off); |
| 1974 | } | 2142 | } |
| 1975 | 2143 | ||
| 2144 | static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) | ||
| 2145 | { | ||
| 2146 | /* | ||
| 2147 | * If non-trylock use in a hardirq or softirq context, then | ||
| 2148 | * mark the lock as used in these contexts: | ||
| 2149 | */ | ||
| 2150 | if (!hlock->trylock) { | ||
| 2151 | if (hlock->read) { | ||
| 2152 | if (curr->hardirq_context) | ||
| 2153 | if (!mark_lock(curr, hlock, | ||
| 2154 | LOCK_USED_IN_HARDIRQ_READ)) | ||
| 2155 | return 0; | ||
| 2156 | if (curr->softirq_context) | ||
| 2157 | if (!mark_lock(curr, hlock, | ||
| 2158 | LOCK_USED_IN_SOFTIRQ_READ)) | ||
| 2159 | return 0; | ||
| 2160 | } else { | ||
| 2161 | if (curr->hardirq_context) | ||
| 2162 | if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) | ||
| 2163 | return 0; | ||
| 2164 | if (curr->softirq_context) | ||
| 2165 | if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ)) | ||
| 2166 | return 0; | ||
| 2167 | } | ||
| 2168 | } | ||
| 2169 | if (!hlock->hardirqs_off) { | ||
| 2170 | if (hlock->read) { | ||
| 2171 | if (!mark_lock(curr, hlock, | ||
| 2172 | LOCK_ENABLED_HARDIRQS_READ)) | ||
| 2173 | return 0; | ||
| 2174 | if (curr->softirqs_enabled) | ||
| 2175 | if (!mark_lock(curr, hlock, | ||
| 2176 | LOCK_ENABLED_SOFTIRQS_READ)) | ||
| 2177 | return 0; | ||
| 2178 | } else { | ||
| 2179 | if (!mark_lock(curr, hlock, | ||
| 2180 | LOCK_ENABLED_HARDIRQS)) | ||
| 2181 | return 0; | ||
| 2182 | if (curr->softirqs_enabled) | ||
| 2183 | if (!mark_lock(curr, hlock, | ||
| 2184 | LOCK_ENABLED_SOFTIRQS)) | ||
| 2185 | return 0; | ||
| 2186 | } | ||
| 2187 | } | ||
| 2188 | |||
| 2189 | return 1; | ||
| 2190 | } | ||
| 2191 | |||
| 2192 | static int separate_irq_context(struct task_struct *curr, | ||
| 2193 | struct held_lock *hlock) | ||
| 2194 | { | ||
| 2195 | unsigned int depth = curr->lockdep_depth; | ||
| 2196 | |||
| 2197 | /* | ||
| 2198 | * Keep track of points where we cross into an interrupt context: | ||
| 2199 | */ | ||
| 2200 | hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + | ||
| 2201 | curr->softirq_context; | ||
| 2202 | if (depth) { | ||
| 2203 | struct held_lock *prev_hlock; | ||
| 2204 | |||
| 2205 | prev_hlock = curr->held_locks + depth-1; | ||
| 2206 | /* | ||
| 2207 | * If we cross into another context, reset the | ||
| 2208 | * hash key (this also prevents the checking and the | ||
| 2209 | * adding of the dependency to 'prev'): | ||
| 2210 | */ | ||
| 2211 | if (prev_hlock->irq_context != hlock->irq_context) | ||
| 2212 | return 1; | ||
| 2213 | } | ||
| 2214 | return 0; | ||
| 2215 | } | ||
| 2216 | |||
| 2217 | #else | ||
| 2218 | |||
| 2219 | static inline | ||
| 2220 | int mark_lock_irq(struct task_struct *curr, struct held_lock *this, | ||
| 2221 | enum lock_usage_bit new_bit) | ||
| 2222 | { | ||
| 2223 | WARN_ON(1); | ||
| 2224 | return 1; | ||
| 2225 | } | ||
| 2226 | |||
| 2227 | static inline int mark_irqflags(struct task_struct *curr, | ||
| 2228 | struct held_lock *hlock) | ||
| 2229 | { | ||
| 2230 | return 1; | ||
| 2231 | } | ||
| 2232 | |||
| 2233 | static inline int separate_irq_context(struct task_struct *curr, | ||
| 2234 | struct held_lock *hlock) | ||
| 2235 | { | ||
| 2236 | return 0; | ||
| 2237 | } | ||
| 2238 | |||
| 1976 | #endif | 2239 | #endif |
| 1977 | 2240 | ||
| 1978 | /* | 2241 | /* |
| 2242 | * Mark a lock with a usage bit, and validate the state transition: | ||
| 2243 | */ | ||
| 2244 | static int mark_lock(struct task_struct *curr, struct held_lock *this, | ||
| 2245 | enum lock_usage_bit new_bit) | ||
| 2246 | { | ||
| 2247 | unsigned int new_mask = 1 << new_bit, ret = 1; | ||
| 2248 | |||
| 2249 | /* | ||
| 2250 | * If already set then do not dirty the cacheline, | ||
| 2251 | * nor do any checks: | ||
| 2252 | */ | ||
| 2253 | if (likely(this->class->usage_mask & new_mask)) | ||
| 2254 | return 1; | ||
| 2255 | |||
| 2256 | if (!graph_lock()) | ||
| 2257 | return 0; | ||
| 2258 | /* | ||
| 2259 | * Make sure we didnt race: | ||
| 2260 | */ | ||
| 2261 | if (unlikely(this->class->usage_mask & new_mask)) { | ||
| 2262 | graph_unlock(); | ||
| 2263 | return 1; | ||
| 2264 | } | ||
| 2265 | |||
| 2266 | this->class->usage_mask |= new_mask; | ||
| 2267 | |||
| 2268 | if (!save_trace(this->class->usage_traces + new_bit)) | ||
| 2269 | return 0; | ||
| 2270 | |||
| 2271 | switch (new_bit) { | ||
| 2272 | case LOCK_USED_IN_HARDIRQ: | ||
| 2273 | case LOCK_USED_IN_SOFTIRQ: | ||
| 2274 | case LOCK_USED_IN_HARDIRQ_READ: | ||
| 2275 | case LOCK_USED_IN_SOFTIRQ_READ: | ||
| 2276 | case LOCK_ENABLED_HARDIRQS: | ||
| 2277 | case LOCK_ENABLED_SOFTIRQS: | ||
| 2278 | case LOCK_ENABLED_HARDIRQS_READ: | ||
| 2279 | case LOCK_ENABLED_SOFTIRQS_READ: | ||
| 2280 | ret = mark_lock_irq(curr, this, new_bit); | ||
| 2281 | if (!ret) | ||
| 2282 | return 0; | ||
| 2283 | break; | ||
| 2284 | case LOCK_USED: | ||
| 2285 | /* | ||
| 2286 | * Add it to the global list of classes: | ||
| 2287 | */ | ||
| 2288 | list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); | ||
| 2289 | debug_atomic_dec(&nr_unused_locks); | ||
| 2290 | break; | ||
| 2291 | default: | ||
| 2292 | if (!debug_locks_off_graph_unlock()) | ||
| 2293 | return 0; | ||
| 2294 | WARN_ON(1); | ||
| 2295 | return 0; | ||
| 2296 | } | ||
| 2297 | |||
| 2298 | graph_unlock(); | ||
| 2299 | |||
| 2300 | /* | ||
| 2301 | * We must printk outside of the graph_lock: | ||
| 2302 | */ | ||
| 2303 | if (ret == 2) { | ||
| 2304 | printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); | ||
| 2305 | print_lock(this); | ||
| 2306 | print_irqtrace_events(curr); | ||
| 2307 | dump_stack(); | ||
| 2308 | } | ||
| 2309 | |||
| 2310 | return ret; | ||
| 2311 | } | ||
| 2312 | |||
| 2313 | /* | ||
| 1979 | * Initialize a lock instance's lock-class mapping info: | 2314 | * Initialize a lock instance's lock-class mapping info: |
| 1980 | */ | 2315 | */ |
| 1981 | void lockdep_init_map(struct lockdep_map *lock, const char *name, | 2316 | void lockdep_init_map(struct lockdep_map *lock, const char *name, |
| @@ -1999,6 +2334,9 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
| 1999 | lock->name = name; | 2334 | lock->name = name; |
| 2000 | lock->key = key; | 2335 | lock->key = key; |
| 2001 | lock->class_cache = NULL; | 2336 | lock->class_cache = NULL; |
| 2337 | #ifdef CONFIG_LOCK_STAT | ||
| 2338 | lock->cpu = raw_smp_processor_id(); | ||
| 2339 | #endif | ||
| 2002 | if (subclass) | 2340 | if (subclass) |
| 2003 | register_lock_class(lock, subclass, 1); | 2341 | register_lock_class(lock, subclass, 1); |
| 2004 | } | 2342 | } |
| @@ -2020,6 +2358,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 2020 | int chain_head = 0; | 2358 | int chain_head = 0; |
| 2021 | u64 chain_key; | 2359 | u64 chain_key; |
| 2022 | 2360 | ||
| 2361 | if (!prove_locking) | ||
| 2362 | check = 1; | ||
| 2363 | |||
| 2023 | if (unlikely(!debug_locks)) | 2364 | if (unlikely(!debug_locks)) |
| 2024 | return 0; | 2365 | return 0; |
| 2025 | 2366 | ||
| @@ -2070,57 +2411,18 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 2070 | hlock->read = read; | 2411 | hlock->read = read; |
| 2071 | hlock->check = check; | 2412 | hlock->check = check; |
| 2072 | hlock->hardirqs_off = hardirqs_off; | 2413 | hlock->hardirqs_off = hardirqs_off; |
| 2073 | 2414 | #ifdef CONFIG_LOCK_STAT | |
| 2074 | if (check != 2) | 2415 | hlock->waittime_stamp = 0; |
| 2075 | goto out_calc_hash; | 2416 | hlock->holdtime_stamp = sched_clock(); |
| 2076 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
| 2077 | /* | ||
| 2078 | * If non-trylock use in a hardirq or softirq context, then | ||
| 2079 | * mark the lock as used in these contexts: | ||
| 2080 | */ | ||
| 2081 | if (!trylock) { | ||
| 2082 | if (read) { | ||
| 2083 | if (curr->hardirq_context) | ||
| 2084 | if (!mark_lock(curr, hlock, | ||
| 2085 | LOCK_USED_IN_HARDIRQ_READ)) | ||
| 2086 | return 0; | ||
| 2087 | if (curr->softirq_context) | ||
| 2088 | if (!mark_lock(curr, hlock, | ||
| 2089 | LOCK_USED_IN_SOFTIRQ_READ)) | ||
| 2090 | return 0; | ||
| 2091 | } else { | ||
| 2092 | if (curr->hardirq_context) | ||
| 2093 | if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) | ||
| 2094 | return 0; | ||
| 2095 | if (curr->softirq_context) | ||
| 2096 | if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ)) | ||
| 2097 | return 0; | ||
| 2098 | } | ||
| 2099 | } | ||
| 2100 | if (!hardirqs_off) { | ||
| 2101 | if (read) { | ||
| 2102 | if (!mark_lock(curr, hlock, | ||
| 2103 | LOCK_ENABLED_HARDIRQS_READ)) | ||
| 2104 | return 0; | ||
| 2105 | if (curr->softirqs_enabled) | ||
| 2106 | if (!mark_lock(curr, hlock, | ||
| 2107 | LOCK_ENABLED_SOFTIRQS_READ)) | ||
| 2108 | return 0; | ||
| 2109 | } else { | ||
| 2110 | if (!mark_lock(curr, hlock, | ||
| 2111 | LOCK_ENABLED_HARDIRQS)) | ||
| 2112 | return 0; | ||
| 2113 | if (curr->softirqs_enabled) | ||
| 2114 | if (!mark_lock(curr, hlock, | ||
| 2115 | LOCK_ENABLED_SOFTIRQS)) | ||
| 2116 | return 0; | ||
| 2117 | } | ||
| 2118 | } | ||
| 2119 | #endif | 2417 | #endif |
| 2418 | |||
| 2419 | if (check == 2 && !mark_irqflags(curr, hlock)) | ||
| 2420 | return 0; | ||
| 2421 | |||
| 2120 | /* mark it as used: */ | 2422 | /* mark it as used: */ |
| 2121 | if (!mark_lock(curr, hlock, LOCK_USED)) | 2423 | if (!mark_lock(curr, hlock, LOCK_USED)) |
| 2122 | return 0; | 2424 | return 0; |
| 2123 | out_calc_hash: | 2425 | |
| 2124 | /* | 2426 | /* |
| 2125 | * Calculate the chain hash: it's the combined has of all the | 2427 | * Calculate the chain hash: it's the combined has of all the |
| 2126 | * lock keys along the dependency chain. We save the hash value | 2428 | * lock keys along the dependency chain. We save the hash value |
| @@ -2143,77 +2445,15 @@ out_calc_hash: | |||
| 2143 | } | 2445 | } |
| 2144 | 2446 | ||
| 2145 | hlock->prev_chain_key = chain_key; | 2447 | hlock->prev_chain_key = chain_key; |
| 2146 | 2448 | if (separate_irq_context(curr, hlock)) { | |
| 2147 | #ifdef CONFIG_TRACE_IRQFLAGS | 2449 | chain_key = 0; |
| 2148 | /* | 2450 | chain_head = 1; |
| 2149 | * Keep track of points where we cross into an interrupt context: | ||
| 2150 | */ | ||
| 2151 | hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + | ||
| 2152 | curr->softirq_context; | ||
| 2153 | if (depth) { | ||
| 2154 | struct held_lock *prev_hlock; | ||
| 2155 | |||
| 2156 | prev_hlock = curr->held_locks + depth-1; | ||
| 2157 | /* | ||
| 2158 | * If we cross into another context, reset the | ||
| 2159 | * hash key (this also prevents the checking and the | ||
| 2160 | * adding of the dependency to 'prev'): | ||
| 2161 | */ | ||
| 2162 | if (prev_hlock->irq_context != hlock->irq_context) { | ||
| 2163 | chain_key = 0; | ||
| 2164 | chain_head = 1; | ||
| 2165 | } | ||
| 2166 | } | 2451 | } |
| 2167 | #endif | ||
| 2168 | chain_key = iterate_chain_key(chain_key, id); | 2452 | chain_key = iterate_chain_key(chain_key, id); |
| 2169 | curr->curr_chain_key = chain_key; | 2453 | curr->curr_chain_key = chain_key; |
| 2170 | 2454 | ||
| 2171 | /* | 2455 | if (!validate_chain(curr, lock, hlock, chain_head)) |
| 2172 | * Trylock needs to maintain the stack of held locks, but it | 2456 | return 0; |
| 2173 | * does not add new dependencies, because trylock can be done | ||
| 2174 | * in any order. | ||
| 2175 | * | ||
| 2176 | * We look up the chain_key and do the O(N^2) check and update of | ||
| 2177 | * the dependencies only if this is a new dependency chain. | ||
| 2178 | * (If lookup_chain_cache() returns with 1 it acquires | ||
| 2179 | * graph_lock for us) | ||
| 2180 | */ | ||
| 2181 | if (!trylock && (check == 2) && lookup_chain_cache(chain_key, class)) { | ||
| 2182 | /* | ||
| 2183 | * Check whether last held lock: | ||
| 2184 | * | ||
| 2185 | * - is irq-safe, if this lock is irq-unsafe | ||
| 2186 | * - is softirq-safe, if this lock is hardirq-unsafe | ||
| 2187 | * | ||
| 2188 | * And check whether the new lock's dependency graph | ||
| 2189 | * could lead back to the previous lock. | ||
| 2190 | * | ||
| 2191 | * any of these scenarios could lead to a deadlock. If | ||
| 2192 | * All validations | ||
| 2193 | */ | ||
| 2194 | int ret = check_deadlock(curr, hlock, lock, read); | ||
| 2195 | |||
| 2196 | if (!ret) | ||
| 2197 | return 0; | ||
| 2198 | /* | ||
| 2199 | * Mark recursive read, as we jump over it when | ||
| 2200 | * building dependencies (just like we jump over | ||
| 2201 | * trylock entries): | ||
| 2202 | */ | ||
| 2203 | if (ret == 2) | ||
| 2204 | hlock->read = 2; | ||
| 2205 | /* | ||
| 2206 | * Add dependency only if this lock is not the head | ||
| 2207 | * of the chain, and if it's not a secondary read-lock: | ||
| 2208 | */ | ||
| 2209 | if (!chain_head && ret != 2) | ||
| 2210 | if (!check_prevs_add(curr, hlock)) | ||
| 2211 | return 0; | ||
| 2212 | graph_unlock(); | ||
| 2213 | } else | ||
| 2214 | /* after lookup_chain_cache(): */ | ||
| 2215 | if (unlikely(!debug_locks)) | ||
| 2216 | return 0; | ||
| 2217 | 2457 | ||
| 2218 | curr->lockdep_depth++; | 2458 | curr->lockdep_depth++; |
| 2219 | check_chain_key(curr); | 2459 | check_chain_key(curr); |
| @@ -2315,6 +2555,8 @@ lock_release_non_nested(struct task_struct *curr, | |||
| 2315 | return print_unlock_inbalance_bug(curr, lock, ip); | 2555 | return print_unlock_inbalance_bug(curr, lock, ip); |
| 2316 | 2556 | ||
| 2317 | found_it: | 2557 | found_it: |
| 2558 | lock_release_holdtime(hlock); | ||
| 2559 | |||
| 2318 | /* | 2560 | /* |
| 2319 | * We have the right lock to unlock, 'hlock' points to it. | 2561 | * We have the right lock to unlock, 'hlock' points to it. |
| 2320 | * Now we remove it from the stack, and add back the other | 2562 | * Now we remove it from the stack, and add back the other |
| @@ -2367,6 +2609,8 @@ static int lock_release_nested(struct task_struct *curr, | |||
| 2367 | 2609 | ||
| 2368 | curr->curr_chain_key = hlock->prev_chain_key; | 2610 | curr->curr_chain_key = hlock->prev_chain_key; |
| 2369 | 2611 | ||
| 2612 | lock_release_holdtime(hlock); | ||
| 2613 | |||
| 2370 | #ifdef CONFIG_DEBUG_LOCKDEP | 2614 | #ifdef CONFIG_DEBUG_LOCKDEP |
| 2371 | hlock->prev_chain_key = 0; | 2615 | hlock->prev_chain_key = 0; |
| 2372 | hlock->class = NULL; | 2616 | hlock->class = NULL; |
| @@ -2441,6 +2685,9 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 2441 | { | 2685 | { |
| 2442 | unsigned long flags; | 2686 | unsigned long flags; |
| 2443 | 2687 | ||
| 2688 | if (unlikely(!lock_stat && !prove_locking)) | ||
| 2689 | return; | ||
| 2690 | |||
| 2444 | if (unlikely(current->lockdep_recursion)) | 2691 | if (unlikely(current->lockdep_recursion)) |
| 2445 | return; | 2692 | return; |
| 2446 | 2693 | ||
| @@ -2460,6 +2707,9 @@ void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) | |||
| 2460 | { | 2707 | { |
| 2461 | unsigned long flags; | 2708 | unsigned long flags; |
| 2462 | 2709 | ||
| 2710 | if (unlikely(!lock_stat && !prove_locking)) | ||
| 2711 | return; | ||
| 2712 | |||
| 2463 | if (unlikely(current->lockdep_recursion)) | 2713 | if (unlikely(current->lockdep_recursion)) |
| 2464 | return; | 2714 | return; |
| 2465 | 2715 | ||
| @@ -2473,6 +2723,166 @@ void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) | |||
| 2473 | 2723 | ||
| 2474 | EXPORT_SYMBOL_GPL(lock_release); | 2724 | EXPORT_SYMBOL_GPL(lock_release); |
| 2475 | 2725 | ||
| 2726 | #ifdef CONFIG_LOCK_STAT | ||
| 2727 | static int | ||
| 2728 | print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, | ||
| 2729 | unsigned long ip) | ||
| 2730 | { | ||
| 2731 | if (!debug_locks_off()) | ||
| 2732 | return 0; | ||
| 2733 | if (debug_locks_silent) | ||
| 2734 | return 0; | ||
| 2735 | |||
| 2736 | printk("\n=================================\n"); | ||
| 2737 | printk( "[ BUG: bad contention detected! ]\n"); | ||
| 2738 | printk( "---------------------------------\n"); | ||
| 2739 | printk("%s/%d is trying to contend lock (", | ||
| 2740 | curr->comm, curr->pid); | ||
| 2741 | print_lockdep_cache(lock); | ||
| 2742 | printk(") at:\n"); | ||
| 2743 | print_ip_sym(ip); | ||
| 2744 | printk("but there are no locks held!\n"); | ||
| 2745 | printk("\nother info that might help us debug this:\n"); | ||
| 2746 | lockdep_print_held_locks(curr); | ||
| 2747 | |||
| 2748 | printk("\nstack backtrace:\n"); | ||
| 2749 | dump_stack(); | ||
| 2750 | |||
| 2751 | return 0; | ||
| 2752 | } | ||
| 2753 | |||
| 2754 | static void | ||
| 2755 | __lock_contended(struct lockdep_map *lock, unsigned long ip) | ||
| 2756 | { | ||
| 2757 | struct task_struct *curr = current; | ||
| 2758 | struct held_lock *hlock, *prev_hlock; | ||
| 2759 | struct lock_class_stats *stats; | ||
| 2760 | unsigned int depth; | ||
| 2761 | int i, point; | ||
| 2762 | |||
| 2763 | depth = curr->lockdep_depth; | ||
| 2764 | if (DEBUG_LOCKS_WARN_ON(!depth)) | ||
| 2765 | return; | ||
| 2766 | |||
| 2767 | prev_hlock = NULL; | ||
| 2768 | for (i = depth-1; i >= 0; i--) { | ||
| 2769 | hlock = curr->held_locks + i; | ||
| 2770 | /* | ||
| 2771 | * We must not cross into another context: | ||
| 2772 | */ | ||
| 2773 | if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) | ||
| 2774 | break; | ||
| 2775 | if (hlock->instance == lock) | ||
| 2776 | goto found_it; | ||
| 2777 | prev_hlock = hlock; | ||
| 2778 | } | ||
| 2779 | print_lock_contention_bug(curr, lock, ip); | ||
| 2780 | return; | ||
| 2781 | |||
| 2782 | found_it: | ||
| 2783 | hlock->waittime_stamp = sched_clock(); | ||
| 2784 | |||
| 2785 | point = lock_contention_point(hlock->class, ip); | ||
| 2786 | |||
| 2787 | stats = get_lock_stats(hlock->class); | ||
| 2788 | if (point < ARRAY_SIZE(stats->contention_point)) | ||
| 2789 | stats->contention_point[i]++; | ||
| 2790 | if (lock->cpu != smp_processor_id()) | ||
| 2791 | stats->bounces[bounce_contended + !!hlock->read]++; | ||
| 2792 | put_lock_stats(stats); | ||
| 2793 | } | ||
| 2794 | |||
| 2795 | static void | ||
| 2796 | __lock_acquired(struct lockdep_map *lock) | ||
| 2797 | { | ||
| 2798 | struct task_struct *curr = current; | ||
| 2799 | struct held_lock *hlock, *prev_hlock; | ||
| 2800 | struct lock_class_stats *stats; | ||
| 2801 | unsigned int depth; | ||
| 2802 | u64 now; | ||
| 2803 | s64 waittime = 0; | ||
| 2804 | int i, cpu; | ||
| 2805 | |||
| 2806 | depth = curr->lockdep_depth; | ||
| 2807 | if (DEBUG_LOCKS_WARN_ON(!depth)) | ||
| 2808 | return; | ||
| 2809 | |||
| 2810 | prev_hlock = NULL; | ||
| 2811 | for (i = depth-1; i >= 0; i--) { | ||
| 2812 | hlock = curr->held_locks + i; | ||
| 2813 | /* | ||
| 2814 | * We must not cross into another context: | ||
| 2815 | */ | ||
| 2816 | if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) | ||
| 2817 | break; | ||
| 2818 | if (hlock->instance == lock) | ||
| 2819 | goto found_it; | ||
| 2820 | prev_hlock = hlock; | ||
| 2821 | } | ||
| 2822 | print_lock_contention_bug(curr, lock, _RET_IP_); | ||
| 2823 | return; | ||
| 2824 | |||
| 2825 | found_it: | ||
| 2826 | cpu = smp_processor_id(); | ||
| 2827 | if (hlock->waittime_stamp) { | ||
| 2828 | now = sched_clock(); | ||
| 2829 | waittime = now - hlock->waittime_stamp; | ||
| 2830 | hlock->holdtime_stamp = now; | ||
| 2831 | } | ||
| 2832 | |||
| 2833 | stats = get_lock_stats(hlock->class); | ||
| 2834 | if (waittime) { | ||
| 2835 | if (hlock->read) | ||
| 2836 | lock_time_inc(&stats->read_waittime, waittime); | ||
| 2837 | else | ||
| 2838 | lock_time_inc(&stats->write_waittime, waittime); | ||
| 2839 | } | ||
| 2840 | if (lock->cpu != cpu) | ||
| 2841 | stats->bounces[bounce_acquired + !!hlock->read]++; | ||
| 2842 | put_lock_stats(stats); | ||
| 2843 | |||
| 2844 | lock->cpu = cpu; | ||
| 2845 | } | ||
| 2846 | |||
| 2847 | void lock_contended(struct lockdep_map *lock, unsigned long ip) | ||
| 2848 | { | ||
| 2849 | unsigned long flags; | ||
| 2850 | |||
| 2851 | if (unlikely(!lock_stat)) | ||
| 2852 | return; | ||
| 2853 | |||
| 2854 | if (unlikely(current->lockdep_recursion)) | ||
| 2855 | return; | ||
| 2856 | |||
| 2857 | raw_local_irq_save(flags); | ||
| 2858 | check_flags(flags); | ||
| 2859 | current->lockdep_recursion = 1; | ||
| 2860 | __lock_contended(lock, ip); | ||
| 2861 | current->lockdep_recursion = 0; | ||
| 2862 | raw_local_irq_restore(flags); | ||
| 2863 | } | ||
| 2864 | EXPORT_SYMBOL_GPL(lock_contended); | ||
| 2865 | |||
| 2866 | void lock_acquired(struct lockdep_map *lock) | ||
| 2867 | { | ||
| 2868 | unsigned long flags; | ||
| 2869 | |||
| 2870 | if (unlikely(!lock_stat)) | ||
| 2871 | return; | ||
| 2872 | |||
| 2873 | if (unlikely(current->lockdep_recursion)) | ||
| 2874 | return; | ||
| 2875 | |||
| 2876 | raw_local_irq_save(flags); | ||
| 2877 | check_flags(flags); | ||
| 2878 | current->lockdep_recursion = 1; | ||
| 2879 | __lock_acquired(lock); | ||
| 2880 | current->lockdep_recursion = 0; | ||
| 2881 | raw_local_irq_restore(flags); | ||
| 2882 | } | ||
| 2883 | EXPORT_SYMBOL_GPL(lock_acquired); | ||
| 2884 | #endif | ||
| 2885 | |||
| 2476 | /* | 2886 | /* |
| 2477 | * Used by the testsuite, sanitize the validator state | 2887 | * Used by the testsuite, sanitize the validator state |
| 2478 | * after a simulated failure: | 2888 | * after a simulated failure: |
| @@ -2636,8 +3046,11 @@ void __init lockdep_info(void) | |||
| 2636 | sizeof(struct held_lock) * MAX_LOCK_DEPTH); | 3046 | sizeof(struct held_lock) * MAX_LOCK_DEPTH); |
| 2637 | 3047 | ||
| 2638 | #ifdef CONFIG_DEBUG_LOCKDEP | 3048 | #ifdef CONFIG_DEBUG_LOCKDEP |
| 2639 | if (lockdep_init_error) | 3049 | if (lockdep_init_error) { |
| 2640 | printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n"); | 3050 | printk("WARNING: lockdep init error! Arch code didn't call lockdep_init() early enough?\n"); |
| 3051 | printk("Call stack leading to lockdep invocation was:\n"); | ||
| 3052 | print_stack_trace(&lockdep_init_trace, 0); | ||
| 3053 | } | ||
| 2641 | #endif | 3054 | #endif |
| 2642 | } | 3055 | } |
| 2643 | 3056 | ||
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 58f35e586ee3..9f17af4a2490 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c | |||
| @@ -5,7 +5,8 @@ | |||
| 5 | * | 5 | * |
| 6 | * Started by Ingo Molnar: | 6 | * Started by Ingo Molnar: |
| 7 | * | 7 | * |
| 8 | * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | 8 | * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> |
| 9 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
| 9 | * | 10 | * |
| 10 | * Code for /proc/lockdep and /proc/lockdep_stats: | 11 | * Code for /proc/lockdep and /proc/lockdep_stats: |
| 11 | * | 12 | * |
| @@ -15,6 +16,10 @@ | |||
| 15 | #include <linux/seq_file.h> | 16 | #include <linux/seq_file.h> |
| 16 | #include <linux/kallsyms.h> | 17 | #include <linux/kallsyms.h> |
| 17 | #include <linux/debug_locks.h> | 18 | #include <linux/debug_locks.h> |
| 19 | #include <linux/vmalloc.h> | ||
| 20 | #include <linux/sort.h> | ||
| 21 | #include <asm/uaccess.h> | ||
| 22 | #include <asm/div64.h> | ||
| 18 | 23 | ||
| 19 | #include "lockdep_internals.h" | 24 | #include "lockdep_internals.h" |
| 20 | 25 | ||
| @@ -271,8 +276,10 @@ static int lockdep_stats_show(struct seq_file *m, void *v) | |||
| 271 | if (nr_list_entries) | 276 | if (nr_list_entries) |
| 272 | factor = sum_forward_deps / nr_list_entries; | 277 | factor = sum_forward_deps / nr_list_entries; |
| 273 | 278 | ||
| 279 | #ifdef CONFIG_PROVE_LOCKING | ||
| 274 | seq_printf(m, " dependency chains: %11lu [max: %lu]\n", | 280 | seq_printf(m, " dependency chains: %11lu [max: %lu]\n", |
| 275 | nr_lock_chains, MAX_LOCKDEP_CHAINS); | 281 | nr_lock_chains, MAX_LOCKDEP_CHAINS); |
| 282 | #endif | ||
| 276 | 283 | ||
| 277 | #ifdef CONFIG_TRACE_IRQFLAGS | 284 | #ifdef CONFIG_TRACE_IRQFLAGS |
| 278 | seq_printf(m, " in-hardirq chains: %11u\n", | 285 | seq_printf(m, " in-hardirq chains: %11u\n", |
| @@ -342,6 +349,292 @@ static const struct file_operations proc_lockdep_stats_operations = { | |||
| 342 | .release = seq_release, | 349 | .release = seq_release, |
| 343 | }; | 350 | }; |
| 344 | 351 | ||
| 352 | #ifdef CONFIG_LOCK_STAT | ||
| 353 | |||
| 354 | struct lock_stat_data { | ||
| 355 | struct lock_class *class; | ||
| 356 | struct lock_class_stats stats; | ||
| 357 | }; | ||
| 358 | |||
| 359 | struct lock_stat_seq { | ||
| 360 | struct lock_stat_data *iter; | ||
| 361 | struct lock_stat_data *iter_end; | ||
| 362 | struct lock_stat_data stats[MAX_LOCKDEP_KEYS]; | ||
| 363 | }; | ||
| 364 | |||
| 365 | /* | ||
| 366 | * sort on absolute number of contentions | ||
| 367 | */ | ||
| 368 | static int lock_stat_cmp(const void *l, const void *r) | ||
| 369 | { | ||
| 370 | const struct lock_stat_data *dl = l, *dr = r; | ||
| 371 | unsigned long nl, nr; | ||
| 372 | |||
| 373 | nl = dl->stats.read_waittime.nr + dl->stats.write_waittime.nr; | ||
| 374 | nr = dr->stats.read_waittime.nr + dr->stats.write_waittime.nr; | ||
| 375 | |||
| 376 | return nr - nl; | ||
| 377 | } | ||
| 378 | |||
| 379 | static void seq_line(struct seq_file *m, char c, int offset, int length) | ||
| 380 | { | ||
| 381 | int i; | ||
| 382 | |||
| 383 | for (i = 0; i < offset; i++) | ||
| 384 | seq_puts(m, " "); | ||
| 385 | for (i = 0; i < length; i++) | ||
| 386 | seq_printf(m, "%c", c); | ||
| 387 | seq_puts(m, "\n"); | ||
| 388 | } | ||
| 389 | |||
| 390 | static void snprint_time(char *buf, size_t bufsiz, s64 nr) | ||
| 391 | { | ||
| 392 | unsigned long rem; | ||
| 393 | |||
| 394 | rem = do_div(nr, 1000); /* XXX: do_div_signed */ | ||
| 395 | snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, ((int)rem+5)/10); | ||
| 396 | } | ||
| 397 | |||
| 398 | static void seq_time(struct seq_file *m, s64 time) | ||
| 399 | { | ||
| 400 | char num[15]; | ||
| 401 | |||
| 402 | snprint_time(num, sizeof(num), time); | ||
| 403 | seq_printf(m, " %14s", num); | ||
| 404 | } | ||
| 405 | |||
| 406 | static void seq_lock_time(struct seq_file *m, struct lock_time *lt) | ||
| 407 | { | ||
| 408 | seq_printf(m, "%14lu", lt->nr); | ||
| 409 | seq_time(m, lt->min); | ||
| 410 | seq_time(m, lt->max); | ||
| 411 | seq_time(m, lt->total); | ||
| 412 | } | ||
| 413 | |||
| 414 | static void seq_stats(struct seq_file *m, struct lock_stat_data *data) | ||
| 415 | { | ||
| 416 | char name[39]; | ||
| 417 | struct lock_class *class; | ||
| 418 | struct lock_class_stats *stats; | ||
| 419 | int i, namelen; | ||
| 420 | |||
| 421 | class = data->class; | ||
| 422 | stats = &data->stats; | ||
| 423 | |||
| 424 | namelen = 38; | ||
| 425 | if (class->name_version > 1) | ||
| 426 | namelen -= 2; /* XXX truncates versions > 9 */ | ||
| 427 | if (class->subclass) | ||
| 428 | namelen -= 2; | ||
| 429 | |||
| 430 | if (!class->name) { | ||
| 431 | char str[KSYM_NAME_LEN]; | ||
| 432 | const char *key_name; | ||
| 433 | |||
| 434 | key_name = __get_key_name(class->key, str); | ||
| 435 | snprintf(name, namelen, "%s", key_name); | ||
| 436 | } else { | ||
| 437 | snprintf(name, namelen, "%s", class->name); | ||
| 438 | } | ||
| 439 | namelen = strlen(name); | ||
| 440 | if (class->name_version > 1) { | ||
| 441 | snprintf(name+namelen, 3, "#%d", class->name_version); | ||
| 442 | namelen += 2; | ||
| 443 | } | ||
| 444 | if (class->subclass) { | ||
| 445 | snprintf(name+namelen, 3, "/%d", class->subclass); | ||
| 446 | namelen += 2; | ||
| 447 | } | ||
| 448 | |||
| 449 | if (stats->write_holdtime.nr) { | ||
| 450 | if (stats->read_holdtime.nr) | ||
| 451 | seq_printf(m, "%38s-W:", name); | ||
| 452 | else | ||
| 453 | seq_printf(m, "%40s:", name); | ||
| 454 | |||
| 455 | seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]); | ||
| 456 | seq_lock_time(m, &stats->write_waittime); | ||
| 457 | seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]); | ||
| 458 | seq_lock_time(m, &stats->write_holdtime); | ||
| 459 | seq_puts(m, "\n"); | ||
| 460 | } | ||
| 461 | |||
| 462 | if (stats->read_holdtime.nr) { | ||
| 463 | seq_printf(m, "%38s-R:", name); | ||
| 464 | seq_printf(m, "%14lu ", stats->bounces[bounce_contended_read]); | ||
| 465 | seq_lock_time(m, &stats->read_waittime); | ||
| 466 | seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_read]); | ||
| 467 | seq_lock_time(m, &stats->read_holdtime); | ||
| 468 | seq_puts(m, "\n"); | ||
| 469 | } | ||
| 470 | |||
| 471 | if (stats->read_waittime.nr + stats->write_waittime.nr == 0) | ||
| 472 | return; | ||
| 473 | |||
| 474 | if (stats->read_holdtime.nr) | ||
| 475 | namelen += 2; | ||
| 476 | |||
| 477 | for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { | ||
| 478 | char sym[KSYM_SYMBOL_LEN]; | ||
| 479 | char ip[32]; | ||
| 480 | |||
| 481 | if (class->contention_point[i] == 0) | ||
| 482 | break; | ||
| 483 | |||
| 484 | if (!i) | ||
| 485 | seq_line(m, '-', 40-namelen, namelen); | ||
| 486 | |||
| 487 | sprint_symbol(sym, class->contention_point[i]); | ||
| 488 | snprintf(ip, sizeof(ip), "[<%p>]", | ||
| 489 | (void *)class->contention_point[i]); | ||
| 490 | seq_printf(m, "%40s %14lu %29s %s\n", name, | ||
| 491 | stats->contention_point[i], | ||
| 492 | ip, sym); | ||
| 493 | } | ||
| 494 | if (i) { | ||
| 495 | seq_puts(m, "\n"); | ||
| 496 | seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); | ||
| 497 | seq_puts(m, "\n"); | ||
| 498 | } | ||
| 499 | } | ||
| 500 | |||
| 501 | static void seq_header(struct seq_file *m) | ||
| 502 | { | ||
| 503 | seq_printf(m, "lock_stat version 0.2\n"); | ||
| 504 | seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); | ||
| 505 | seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " | ||
| 506 | "%14s %14s\n", | ||
| 507 | "class name", | ||
| 508 | "con-bounces", | ||
| 509 | "contentions", | ||
| 510 | "waittime-min", | ||
| 511 | "waittime-max", | ||
| 512 | "waittime-total", | ||
| 513 | "acq-bounces", | ||
| 514 | "acquisitions", | ||
| 515 | "holdtime-min", | ||
| 516 | "holdtime-max", | ||
| 517 | "holdtime-total"); | ||
| 518 | seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); | ||
| 519 | seq_printf(m, "\n"); | ||
| 520 | } | ||
| 521 | |||
| 522 | static void *ls_start(struct seq_file *m, loff_t *pos) | ||
| 523 | { | ||
| 524 | struct lock_stat_seq *data = m->private; | ||
| 525 | |||
| 526 | if (data->iter == data->stats) | ||
| 527 | seq_header(m); | ||
| 528 | |||
| 529 | if (data->iter == data->iter_end) | ||
| 530 | data->iter = NULL; | ||
| 531 | |||
| 532 | return data->iter; | ||
| 533 | } | ||
| 534 | |||
| 535 | static void *ls_next(struct seq_file *m, void *v, loff_t *pos) | ||
| 536 | { | ||
| 537 | struct lock_stat_seq *data = m->private; | ||
| 538 | |||
| 539 | (*pos)++; | ||
| 540 | |||
| 541 | data->iter = v; | ||
| 542 | data->iter++; | ||
| 543 | if (data->iter == data->iter_end) | ||
| 544 | data->iter = NULL; | ||
| 545 | |||
| 546 | return data->iter; | ||
| 547 | } | ||
| 548 | |||
| 549 | static void ls_stop(struct seq_file *m, void *v) | ||
| 550 | { | ||
| 551 | } | ||
| 552 | |||
| 553 | static int ls_show(struct seq_file *m, void *v) | ||
| 554 | { | ||
| 555 | struct lock_stat_seq *data = m->private; | ||
| 556 | |||
| 557 | seq_stats(m, data->iter); | ||
| 558 | return 0; | ||
| 559 | } | ||
| 560 | |||
| 561 | static struct seq_operations lockstat_ops = { | ||
| 562 | .start = ls_start, | ||
| 563 | .next = ls_next, | ||
| 564 | .stop = ls_stop, | ||
| 565 | .show = ls_show, | ||
| 566 | }; | ||
| 567 | |||
| 568 | static int lock_stat_open(struct inode *inode, struct file *file) | ||
| 569 | { | ||
| 570 | int res; | ||
| 571 | struct lock_class *class; | ||
| 572 | struct lock_stat_seq *data = vmalloc(sizeof(struct lock_stat_seq)); | ||
| 573 | |||
| 574 | if (!data) | ||
| 575 | return -ENOMEM; | ||
| 576 | |||
| 577 | res = seq_open(file, &lockstat_ops); | ||
| 578 | if (!res) { | ||
| 579 | struct lock_stat_data *iter = data->stats; | ||
| 580 | struct seq_file *m = file->private_data; | ||
| 581 | |||
| 582 | data->iter = iter; | ||
| 583 | list_for_each_entry(class, &all_lock_classes, lock_entry) { | ||
| 584 | iter->class = class; | ||
| 585 | iter->stats = lock_stats(class); | ||
| 586 | iter++; | ||
| 587 | } | ||
| 588 | data->iter_end = iter; | ||
| 589 | |||
| 590 | sort(data->stats, data->iter_end - data->iter, | ||
| 591 | sizeof(struct lock_stat_data), | ||
| 592 | lock_stat_cmp, NULL); | ||
| 593 | |||
| 594 | m->private = data; | ||
| 595 | } else | ||
| 596 | vfree(data); | ||
| 597 | |||
| 598 | return res; | ||
| 599 | } | ||
| 600 | |||
| 601 | static ssize_t lock_stat_write(struct file *file, const char __user *buf, | ||
| 602 | size_t count, loff_t *ppos) | ||
| 603 | { | ||
| 604 | struct lock_class *class; | ||
| 605 | char c; | ||
| 606 | |||
| 607 | if (count) { | ||
| 608 | if (get_user(c, buf)) | ||
| 609 | return -EFAULT; | ||
| 610 | |||
| 611 | if (c != '0') | ||
| 612 | return count; | ||
| 613 | |||
| 614 | list_for_each_entry(class, &all_lock_classes, lock_entry) | ||
| 615 | clear_lock_stats(class); | ||
| 616 | } | ||
| 617 | return count; | ||
| 618 | } | ||
| 619 | |||
| 620 | static int lock_stat_release(struct inode *inode, struct file *file) | ||
| 621 | { | ||
| 622 | struct seq_file *seq = file->private_data; | ||
| 623 | |||
| 624 | vfree(seq->private); | ||
| 625 | seq->private = NULL; | ||
| 626 | return seq_release(inode, file); | ||
| 627 | } | ||
| 628 | |||
| 629 | static const struct file_operations proc_lock_stat_operations = { | ||
| 630 | .open = lock_stat_open, | ||
| 631 | .write = lock_stat_write, | ||
| 632 | .read = seq_read, | ||
| 633 | .llseek = seq_lseek, | ||
| 634 | .release = lock_stat_release, | ||
| 635 | }; | ||
| 636 | #endif /* CONFIG_LOCK_STAT */ | ||
| 637 | |||
| 345 | static int __init lockdep_proc_init(void) | 638 | static int __init lockdep_proc_init(void) |
| 346 | { | 639 | { |
| 347 | struct proc_dir_entry *entry; | 640 | struct proc_dir_entry *entry; |
| @@ -354,6 +647,12 @@ static int __init lockdep_proc_init(void) | |||
| 354 | if (entry) | 647 | if (entry) |
| 355 | entry->proc_fops = &proc_lockdep_stats_operations; | 648 | entry->proc_fops = &proc_lockdep_stats_operations; |
| 356 | 649 | ||
| 650 | #ifdef CONFIG_LOCK_STAT | ||
| 651 | entry = create_proc_entry("lock_stat", S_IRUSR, NULL); | ||
| 652 | if (entry) | ||
| 653 | entry->proc_fops = &proc_lock_stat_operations; | ||
| 654 | #endif | ||
| 655 | |||
| 357 | return 0; | 656 | return 0; |
| 358 | } | 657 | } |
| 359 | 658 | ||
diff --git a/kernel/module.c b/kernel/module.c index 015d60cfd90e..33c04ad51175 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -61,10 +61,8 @@ extern int module_sysfs_initialized; | |||
| 61 | /* If this is set, the section belongs in the init part of the module */ | 61 | /* If this is set, the section belongs in the init part of the module */ |
| 62 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) | 62 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) |
| 63 | 63 | ||
| 64 | /* Protects module list */ | 64 | /* List of modules, protected by module_mutex or preempt_disable |
| 65 | static DEFINE_SPINLOCK(modlist_lock); | 65 | * (add/delete uses stop_machine). */ |
| 66 | |||
| 67 | /* List of modules, protected by module_mutex AND modlist_lock */ | ||
| 68 | static DEFINE_MUTEX(module_mutex); | 66 | static DEFINE_MUTEX(module_mutex); |
| 69 | static LIST_HEAD(modules); | 67 | static LIST_HEAD(modules); |
| 70 | 68 | ||
| @@ -760,14 +758,13 @@ static void print_unload_info(struct seq_file *m, struct module *mod) | |||
| 760 | void __symbol_put(const char *symbol) | 758 | void __symbol_put(const char *symbol) |
| 761 | { | 759 | { |
| 762 | struct module *owner; | 760 | struct module *owner; |
| 763 | unsigned long flags; | ||
| 764 | const unsigned long *crc; | 761 | const unsigned long *crc; |
| 765 | 762 | ||
| 766 | spin_lock_irqsave(&modlist_lock, flags); | 763 | preempt_disable(); |
| 767 | if (!__find_symbol(symbol, &owner, &crc, 1)) | 764 | if (!__find_symbol(symbol, &owner, &crc, 1)) |
| 768 | BUG(); | 765 | BUG(); |
| 769 | module_put(owner); | 766 | module_put(owner); |
| 770 | spin_unlock_irqrestore(&modlist_lock, flags); | 767 | preempt_enable(); |
| 771 | } | 768 | } |
| 772 | EXPORT_SYMBOL(__symbol_put); | 769 | EXPORT_SYMBOL(__symbol_put); |
| 773 | 770 | ||
| @@ -1228,14 +1225,14 @@ static void free_module(struct module *mod) | |||
| 1228 | void *__symbol_get(const char *symbol) | 1225 | void *__symbol_get(const char *symbol) |
| 1229 | { | 1226 | { |
| 1230 | struct module *owner; | 1227 | struct module *owner; |
| 1231 | unsigned long value, flags; | 1228 | unsigned long value; |
| 1232 | const unsigned long *crc; | 1229 | const unsigned long *crc; |
| 1233 | 1230 | ||
| 1234 | spin_lock_irqsave(&modlist_lock, flags); | 1231 | preempt_disable(); |
| 1235 | value = __find_symbol(symbol, &owner, &crc, 1); | 1232 | value = __find_symbol(symbol, &owner, &crc, 1); |
| 1236 | if (value && !strong_try_module_get(owner)) | 1233 | if (value && !strong_try_module_get(owner)) |
| 1237 | value = 0; | 1234 | value = 0; |
| 1238 | spin_unlock_irqrestore(&modlist_lock, flags); | 1235 | preempt_enable(); |
| 1239 | 1236 | ||
| 1240 | return (void *)value; | 1237 | return (void *)value; |
| 1241 | } | 1238 | } |
| @@ -2136,7 +2133,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) | |||
| 2136 | sym = get_ksymbol(mod, addr, NULL, NULL); | 2133 | sym = get_ksymbol(mod, addr, NULL, NULL); |
| 2137 | if (!sym) | 2134 | if (!sym) |
| 2138 | goto out; | 2135 | goto out; |
| 2139 | strlcpy(symname, sym, KSYM_NAME_LEN + 1); | 2136 | strlcpy(symname, sym, KSYM_NAME_LEN); |
| 2140 | mutex_unlock(&module_mutex); | 2137 | mutex_unlock(&module_mutex); |
| 2141 | return 0; | 2138 | return 0; |
| 2142 | } | 2139 | } |
| @@ -2161,9 +2158,9 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, | |||
| 2161 | if (!sym) | 2158 | if (!sym) |
| 2162 | goto out; | 2159 | goto out; |
| 2163 | if (modname) | 2160 | if (modname) |
| 2164 | strlcpy(modname, mod->name, MODULE_NAME_LEN + 1); | 2161 | strlcpy(modname, mod->name, MODULE_NAME_LEN); |
| 2165 | if (name) | 2162 | if (name) |
| 2166 | strlcpy(name, sym, KSYM_NAME_LEN + 1); | 2163 | strlcpy(name, sym, KSYM_NAME_LEN); |
| 2167 | mutex_unlock(&module_mutex); | 2164 | mutex_unlock(&module_mutex); |
| 2168 | return 0; | 2165 | return 0; |
| 2169 | } | 2166 | } |
| @@ -2184,8 +2181,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, | |||
| 2184 | *value = mod->symtab[symnum].st_value; | 2181 | *value = mod->symtab[symnum].st_value; |
| 2185 | *type = mod->symtab[symnum].st_info; | 2182 | *type = mod->symtab[symnum].st_info; |
| 2186 | strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, | 2183 | strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, |
| 2187 | KSYM_NAME_LEN + 1); | 2184 | KSYM_NAME_LEN); |
| 2188 | strlcpy(module_name, mod->name, MODULE_NAME_LEN + 1); | 2185 | strlcpy(module_name, mod->name, MODULE_NAME_LEN); |
| 2189 | *exported = is_exported(name, mod); | 2186 | *exported = is_exported(name, mod); |
| 2190 | mutex_unlock(&module_mutex); | 2187 | mutex_unlock(&module_mutex); |
| 2191 | return 0; | 2188 | return 0; |
| @@ -2232,26 +2229,13 @@ unsigned long module_kallsyms_lookup_name(const char *name) | |||
| 2232 | /* Called by the /proc file system to return a list of modules. */ | 2229 | /* Called by the /proc file system to return a list of modules. */ |
| 2233 | static void *m_start(struct seq_file *m, loff_t *pos) | 2230 | static void *m_start(struct seq_file *m, loff_t *pos) |
| 2234 | { | 2231 | { |
| 2235 | struct list_head *i; | ||
| 2236 | loff_t n = 0; | ||
| 2237 | |||
| 2238 | mutex_lock(&module_mutex); | 2232 | mutex_lock(&module_mutex); |
| 2239 | list_for_each(i, &modules) { | 2233 | return seq_list_start(&modules, *pos); |
| 2240 | if (n++ == *pos) | ||
| 2241 | break; | ||
| 2242 | } | ||
| 2243 | if (i == &modules) | ||
| 2244 | return NULL; | ||
| 2245 | return i; | ||
| 2246 | } | 2234 | } |
| 2247 | 2235 | ||
| 2248 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) | 2236 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) |
| 2249 | { | 2237 | { |
| 2250 | struct list_head *i = p; | 2238 | return seq_list_next(p, &modules, pos); |
| 2251 | (*pos)++; | ||
| 2252 | if (i->next == &modules) | ||
| 2253 | return NULL; | ||
| 2254 | return i->next; | ||
| 2255 | } | 2239 | } |
| 2256 | 2240 | ||
| 2257 | static void m_stop(struct seq_file *m, void *p) | 2241 | static void m_stop(struct seq_file *m, void *p) |
| @@ -2321,11 +2305,10 @@ const struct seq_operations modules_op = { | |||
| 2321 | /* Given an address, look for it in the module exception tables. */ | 2305 | /* Given an address, look for it in the module exception tables. */ |
| 2322 | const struct exception_table_entry *search_module_extables(unsigned long addr) | 2306 | const struct exception_table_entry *search_module_extables(unsigned long addr) |
| 2323 | { | 2307 | { |
| 2324 | unsigned long flags; | ||
| 2325 | const struct exception_table_entry *e = NULL; | 2308 | const struct exception_table_entry *e = NULL; |
| 2326 | struct module *mod; | 2309 | struct module *mod; |
| 2327 | 2310 | ||
| 2328 | spin_lock_irqsave(&modlist_lock, flags); | 2311 | preempt_disable(); |
| 2329 | list_for_each_entry(mod, &modules, list) { | 2312 | list_for_each_entry(mod, &modules, list) { |
| 2330 | if (mod->num_exentries == 0) | 2313 | if (mod->num_exentries == 0) |
| 2331 | continue; | 2314 | continue; |
| @@ -2336,7 +2319,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) | |||
| 2336 | if (e) | 2319 | if (e) |
| 2337 | break; | 2320 | break; |
| 2338 | } | 2321 | } |
| 2339 | spin_unlock_irqrestore(&modlist_lock, flags); | 2322 | preempt_enable(); |
| 2340 | 2323 | ||
| 2341 | /* Now, if we found one, we are running inside it now, hence | 2324 | /* Now, if we found one, we are running inside it now, hence |
| 2342 | we cannot unload the module, hence no refcnt needed. */ | 2325 | we cannot unload the module, hence no refcnt needed. */ |
| @@ -2348,25 +2331,24 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) | |||
| 2348 | */ | 2331 | */ |
| 2349 | int is_module_address(unsigned long addr) | 2332 | int is_module_address(unsigned long addr) |
| 2350 | { | 2333 | { |
| 2351 | unsigned long flags; | ||
| 2352 | struct module *mod; | 2334 | struct module *mod; |
| 2353 | 2335 | ||
| 2354 | spin_lock_irqsave(&modlist_lock, flags); | 2336 | preempt_disable(); |
| 2355 | 2337 | ||
| 2356 | list_for_each_entry(mod, &modules, list) { | 2338 | list_for_each_entry(mod, &modules, list) { |
| 2357 | if (within(addr, mod->module_core, mod->core_size)) { | 2339 | if (within(addr, mod->module_core, mod->core_size)) { |
| 2358 | spin_unlock_irqrestore(&modlist_lock, flags); | 2340 | preempt_enable(); |
| 2359 | return 1; | 2341 | return 1; |
| 2360 | } | 2342 | } |
| 2361 | } | 2343 | } |
| 2362 | 2344 | ||
| 2363 | spin_unlock_irqrestore(&modlist_lock, flags); | 2345 | preempt_enable(); |
| 2364 | 2346 | ||
| 2365 | return 0; | 2347 | return 0; |
| 2366 | } | 2348 | } |
| 2367 | 2349 | ||
| 2368 | 2350 | ||
| 2369 | /* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ | 2351 | /* Is this a valid kernel address? */ |
| 2370 | struct module *__module_text_address(unsigned long addr) | 2352 | struct module *__module_text_address(unsigned long addr) |
| 2371 | { | 2353 | { |
| 2372 | struct module *mod; | 2354 | struct module *mod; |
| @@ -2381,11 +2363,10 @@ struct module *__module_text_address(unsigned long addr) | |||
| 2381 | struct module *module_text_address(unsigned long addr) | 2363 | struct module *module_text_address(unsigned long addr) |
| 2382 | { | 2364 | { |
| 2383 | struct module *mod; | 2365 | struct module *mod; |
| 2384 | unsigned long flags; | ||
| 2385 | 2366 | ||
| 2386 | spin_lock_irqsave(&modlist_lock, flags); | 2367 | preempt_disable(); |
| 2387 | mod = __module_text_address(addr); | 2368 | mod = __module_text_address(addr); |
| 2388 | spin_unlock_irqrestore(&modlist_lock, flags); | 2369 | preempt_enable(); |
| 2389 | 2370 | ||
| 2390 | return mod; | 2371 | return mod; |
| 2391 | } | 2372 | } |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 303eab18484b..691b86564dd9 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
| @@ -139,6 +139,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) | |||
| 139 | list_add_tail(&waiter.list, &lock->wait_list); | 139 | list_add_tail(&waiter.list, &lock->wait_list); |
| 140 | waiter.task = task; | 140 | waiter.task = task; |
| 141 | 141 | ||
| 142 | old_val = atomic_xchg(&lock->count, -1); | ||
| 143 | if (old_val == 1) | ||
| 144 | goto done; | ||
| 145 | |||
| 146 | lock_contended(&lock->dep_map, _RET_IP_); | ||
| 147 | |||
| 142 | for (;;) { | 148 | for (;;) { |
| 143 | /* | 149 | /* |
| 144 | * Lets try to take the lock again - this is needed even if | 150 | * Lets try to take the lock again - this is needed even if |
| @@ -174,6 +180,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) | |||
| 174 | spin_lock_mutex(&lock->wait_lock, flags); | 180 | spin_lock_mutex(&lock->wait_lock, flags); |
| 175 | } | 181 | } |
| 176 | 182 | ||
| 183 | done: | ||
| 184 | lock_acquired(&lock->dep_map); | ||
| 177 | /* got the lock - rejoice! */ | 185 | /* got the lock - rejoice! */ |
| 178 | mutex_remove_waiter(lock, &waiter, task_thread_info(task)); | 186 | mutex_remove_waiter(lock, &waiter, task_thread_info(task)); |
| 179 | debug_mutex_set_owner(lock, task_thread_info(task)); | 187 | debug_mutex_set_owner(lock, task_thread_info(task)); |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 9e83b589f754..a4fb7d46971f 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
| @@ -21,6 +21,8 @@ | |||
| 21 | #include <linux/utsname.h> | 21 | #include <linux/utsname.h> |
| 22 | #include <linux/pid_namespace.h> | 22 | #include <linux/pid_namespace.h> |
| 23 | 23 | ||
| 24 | static struct kmem_cache *nsproxy_cachep; | ||
| 25 | |||
| 24 | struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); | 26 | struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); |
| 25 | 27 | ||
| 26 | static inline void get_nsproxy(struct nsproxy *ns) | 28 | static inline void get_nsproxy(struct nsproxy *ns) |
| @@ -43,9 +45,11 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig) | |||
| 43 | { | 45 | { |
| 44 | struct nsproxy *ns; | 46 | struct nsproxy *ns; |
| 45 | 47 | ||
| 46 | ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL); | 48 | ns = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); |
| 47 | if (ns) | 49 | if (ns) { |
| 50 | memcpy(ns, orig, sizeof(struct nsproxy)); | ||
| 48 | atomic_set(&ns->count, 1); | 51 | atomic_set(&ns->count, 1); |
| 52 | } | ||
| 49 | return ns; | 53 | return ns; |
| 50 | } | 54 | } |
| 51 | 55 | ||
| @@ -54,33 +58,51 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig) | |||
| 54 | * Return the newly created nsproxy. Do not attach this to the task, | 58 | * Return the newly created nsproxy. Do not attach this to the task, |
| 55 | * leave it to the caller to do proper locking and attach it to task. | 59 | * leave it to the caller to do proper locking and attach it to task. |
| 56 | */ | 60 | */ |
| 57 | static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk, | 61 | static struct nsproxy *create_new_namespaces(unsigned long flags, |
| 58 | struct fs_struct *new_fs) | 62 | struct task_struct *tsk, struct fs_struct *new_fs) |
| 59 | { | 63 | { |
| 60 | struct nsproxy *new_nsp; | 64 | struct nsproxy *new_nsp; |
| 65 | int err; | ||
| 61 | 66 | ||
| 62 | new_nsp = clone_nsproxy(tsk->nsproxy); | 67 | new_nsp = clone_nsproxy(tsk->nsproxy); |
| 63 | if (!new_nsp) | 68 | if (!new_nsp) |
| 64 | return ERR_PTR(-ENOMEM); | 69 | return ERR_PTR(-ENOMEM); |
| 65 | 70 | ||
| 66 | new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); | 71 | new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); |
| 67 | if (IS_ERR(new_nsp->mnt_ns)) | 72 | if (IS_ERR(new_nsp->mnt_ns)) { |
| 73 | err = PTR_ERR(new_nsp->mnt_ns); | ||
| 68 | goto out_ns; | 74 | goto out_ns; |
| 75 | } | ||
| 69 | 76 | ||
| 70 | new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); | 77 | new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); |
| 71 | if (IS_ERR(new_nsp->uts_ns)) | 78 | if (IS_ERR(new_nsp->uts_ns)) { |
| 79 | err = PTR_ERR(new_nsp->uts_ns); | ||
| 72 | goto out_uts; | 80 | goto out_uts; |
| 81 | } | ||
| 73 | 82 | ||
| 74 | new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); | 83 | new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); |
| 75 | if (IS_ERR(new_nsp->ipc_ns)) | 84 | if (IS_ERR(new_nsp->ipc_ns)) { |
| 85 | err = PTR_ERR(new_nsp->ipc_ns); | ||
| 76 | goto out_ipc; | 86 | goto out_ipc; |
| 87 | } | ||
| 77 | 88 | ||
| 78 | new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns); | 89 | new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns); |
| 79 | if (IS_ERR(new_nsp->pid_ns)) | 90 | if (IS_ERR(new_nsp->pid_ns)) { |
| 91 | err = PTR_ERR(new_nsp->pid_ns); | ||
| 80 | goto out_pid; | 92 | goto out_pid; |
| 93 | } | ||
| 94 | |||
| 95 | new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns); | ||
| 96 | if (IS_ERR(new_nsp->user_ns)) { | ||
| 97 | err = PTR_ERR(new_nsp->user_ns); | ||
| 98 | goto out_user; | ||
| 99 | } | ||
| 81 | 100 | ||
| 82 | return new_nsp; | 101 | return new_nsp; |
| 83 | 102 | ||
| 103 | out_user: | ||
| 104 | if (new_nsp->pid_ns) | ||
| 105 | put_pid_ns(new_nsp->pid_ns); | ||
| 84 | out_pid: | 106 | out_pid: |
| 85 | if (new_nsp->ipc_ns) | 107 | if (new_nsp->ipc_ns) |
| 86 | put_ipc_ns(new_nsp->ipc_ns); | 108 | put_ipc_ns(new_nsp->ipc_ns); |
| @@ -91,15 +113,15 @@ out_uts: | |||
| 91 | if (new_nsp->mnt_ns) | 113 | if (new_nsp->mnt_ns) |
| 92 | put_mnt_ns(new_nsp->mnt_ns); | 114 | put_mnt_ns(new_nsp->mnt_ns); |
| 93 | out_ns: | 115 | out_ns: |
| 94 | kfree(new_nsp); | 116 | kmem_cache_free(nsproxy_cachep, new_nsp); |
| 95 | return ERR_PTR(-ENOMEM); | 117 | return ERR_PTR(err); |
| 96 | } | 118 | } |
| 97 | 119 | ||
| 98 | /* | 120 | /* |
| 99 | * called from clone. This now handles copy for nsproxy and all | 121 | * called from clone. This now handles copy for nsproxy and all |
| 100 | * namespaces therein. | 122 | * namespaces therein. |
| 101 | */ | 123 | */ |
| 102 | int copy_namespaces(int flags, struct task_struct *tsk) | 124 | int copy_namespaces(unsigned long flags, struct task_struct *tsk) |
| 103 | { | 125 | { |
| 104 | struct nsproxy *old_ns = tsk->nsproxy; | 126 | struct nsproxy *old_ns = tsk->nsproxy; |
| 105 | struct nsproxy *new_ns; | 127 | struct nsproxy *new_ns; |
| @@ -110,7 +132,7 @@ int copy_namespaces(int flags, struct task_struct *tsk) | |||
| 110 | 132 | ||
| 111 | get_nsproxy(old_ns); | 133 | get_nsproxy(old_ns); |
| 112 | 134 | ||
| 113 | if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) | 135 | if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER))) |
| 114 | return 0; | 136 | return 0; |
| 115 | 137 | ||
| 116 | if (!capable(CAP_SYS_ADMIN)) { | 138 | if (!capable(CAP_SYS_ADMIN)) { |
| @@ -140,7 +162,9 @@ void free_nsproxy(struct nsproxy *ns) | |||
| 140 | put_ipc_ns(ns->ipc_ns); | 162 | put_ipc_ns(ns->ipc_ns); |
| 141 | if (ns->pid_ns) | 163 | if (ns->pid_ns) |
| 142 | put_pid_ns(ns->pid_ns); | 164 | put_pid_ns(ns->pid_ns); |
| 143 | kfree(ns); | 165 | if (ns->user_ns) |
| 166 | put_user_ns(ns->user_ns); | ||
| 167 | kmem_cache_free(nsproxy_cachep, ns); | ||
| 144 | } | 168 | } |
| 145 | 169 | ||
| 146 | /* | 170 | /* |
| @@ -152,19 +176,10 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, | |||
| 152 | { | 176 | { |
| 153 | int err = 0; | 177 | int err = 0; |
| 154 | 178 | ||
| 155 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) | 179 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | |
| 180 | CLONE_NEWUSER))) | ||
| 156 | return 0; | 181 | return 0; |
| 157 | 182 | ||
| 158 | #ifndef CONFIG_IPC_NS | ||
| 159 | if (unshare_flags & CLONE_NEWIPC) | ||
| 160 | return -EINVAL; | ||
| 161 | #endif | ||
| 162 | |||
| 163 | #ifndef CONFIG_UTS_NS | ||
| 164 | if (unshare_flags & CLONE_NEWUTS) | ||
| 165 | return -EINVAL; | ||
| 166 | #endif | ||
| 167 | |||
| 168 | if (!capable(CAP_SYS_ADMIN)) | 183 | if (!capable(CAP_SYS_ADMIN)) |
| 169 | return -EPERM; | 184 | return -EPERM; |
| 170 | 185 | ||
| @@ -174,3 +189,12 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, | |||
| 174 | err = PTR_ERR(*new_nsp); | 189 | err = PTR_ERR(*new_nsp); |
| 175 | return err; | 190 | return err; |
| 176 | } | 191 | } |
| 192 | |||
| 193 | static int __init nsproxy_cache_init(void) | ||
| 194 | { | ||
| 195 | nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy), | ||
| 196 | 0, SLAB_PANIC, NULL); | ||
| 197 | return 0; | ||
| 198 | } | ||
| 199 | |||
| 200 | module_init(nsproxy_cache_init); | ||
diff --git a/kernel/panic.c b/kernel/panic.c index 623d1828259a..f64f4c1ac11f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -159,14 +159,15 @@ const char *print_tainted(void) | |||
| 159 | { | 159 | { |
| 160 | static char buf[20]; | 160 | static char buf[20]; |
| 161 | if (tainted) { | 161 | if (tainted) { |
| 162 | snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c", | 162 | snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c", |
| 163 | tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', | 163 | tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', |
| 164 | tainted & TAINT_FORCED_MODULE ? 'F' : ' ', | 164 | tainted & TAINT_FORCED_MODULE ? 'F' : ' ', |
| 165 | tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', | 165 | tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', |
| 166 | tainted & TAINT_FORCED_RMMOD ? 'R' : ' ', | 166 | tainted & TAINT_FORCED_RMMOD ? 'R' : ' ', |
| 167 | tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', | 167 | tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', |
| 168 | tainted & TAINT_BAD_PAGE ? 'B' : ' ', | 168 | tainted & TAINT_BAD_PAGE ? 'B' : ' ', |
| 169 | tainted & TAINT_USER ? 'U' : ' '); | 169 | tainted & TAINT_USER ? 'U' : ' ', |
| 170 | tainted & TAINT_DIE ? 'D' : ' '); | ||
| 170 | } | 171 | } |
| 171 | else | 172 | else |
| 172 | snprintf(buf, sizeof(buf), "Not tainted"); | 173 | snprintf(buf, sizeof(buf), "Not tainted"); |
diff --git a/kernel/pid.c b/kernel/pid.c index eb66bd2953ab..c6e3f9ffff87 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
| @@ -365,7 +365,7 @@ struct pid *find_ge_pid(int nr) | |||
| 365 | } | 365 | } |
| 366 | EXPORT_SYMBOL_GPL(find_get_pid); | 366 | EXPORT_SYMBOL_GPL(find_get_pid); |
| 367 | 367 | ||
| 368 | struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *old_ns) | 368 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) |
| 369 | { | 369 | { |
| 370 | BUG_ON(!old_ns); | 370 | BUG_ON(!old_ns); |
| 371 | get_pid_ns(old_ns); | 371 | get_pid_ns(old_ns); |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 329ce0172074..55b3761edaa9 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
| @@ -241,7 +241,7 @@ static __init int init_posix_timers(void) | |||
| 241 | register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); | 241 | register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); |
| 242 | 242 | ||
| 243 | posix_timers_cache = kmem_cache_create("posix_timers_cache", | 243 | posix_timers_cache = kmem_cache_create("posix_timers_cache", |
| 244 | sizeof (struct k_itimer), 0, 0, NULL, NULL); | 244 | sizeof (struct k_itimer), 0, 0, NULL); |
| 245 | idr_init(&posix_timers_id); | 245 | idr_init(&posix_timers_id); |
| 246 | return 0; | 246 | return 0; |
| 247 | } | 247 | } |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 495b7d4dd330..c1a106d87d90 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
| @@ -33,13 +33,20 @@ config PM_DEBUG | |||
| 33 | bool "Power Management Debug Support" | 33 | bool "Power Management Debug Support" |
| 34 | depends on PM | 34 | depends on PM |
| 35 | ---help--- | 35 | ---help--- |
| 36 | This option enables verbose debugging support in the Power Management | 36 | This option enables various debugging support in the Power Management |
| 37 | code. This is helpful when debugging and reporting various PM bugs, | 37 | code. This is helpful when debugging and reporting PM bugs, like |
| 38 | like suspend support. | 38 | suspend support. |
| 39 | |||
| 40 | config PM_VERBOSE | ||
| 41 | bool "Verbose Power Management debugging" | ||
| 42 | depends on PM_DEBUG | ||
| 43 | default n | ||
| 44 | ---help--- | ||
| 45 | This option enables verbose messages from the Power Management code. | ||
| 39 | 46 | ||
| 40 | config DISABLE_CONSOLE_SUSPEND | 47 | config DISABLE_CONSOLE_SUSPEND |
| 41 | bool "Keep console(s) enabled during suspend/resume (DANGEROUS)" | 48 | bool "Keep console(s) enabled during suspend/resume (DANGEROUS)" |
| 42 | depends on PM && PM_DEBUG | 49 | depends on PM_DEBUG |
| 43 | default n | 50 | default n |
| 44 | ---help--- | 51 | ---help--- |
| 45 | This option turns off the console suspend mechanism that prevents | 52 | This option turns off the console suspend mechanism that prevents |
| @@ -50,7 +57,7 @@ config DISABLE_CONSOLE_SUSPEND | |||
| 50 | 57 | ||
| 51 | config PM_TRACE | 58 | config PM_TRACE |
| 52 | bool "Suspend/resume event tracing" | 59 | bool "Suspend/resume event tracing" |
| 53 | depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL | 60 | depends on PM_DEBUG && X86 && EXPERIMENTAL |
| 54 | default n | 61 | default n |
| 55 | ---help--- | 62 | ---help--- |
| 56 | This enables some cheesy code to save the last PM event point in the | 63 | This enables some cheesy code to save the last PM event point in the |
| @@ -65,18 +72,6 @@ config PM_TRACE | |||
| 65 | CAUTION: this option will cause your machine's real-time clock to be | 72 | CAUTION: this option will cause your machine's real-time clock to be |
| 66 | set to an invalid time after a resume. | 73 | set to an invalid time after a resume. |
| 67 | 74 | ||
| 68 | config PM_SYSFS_DEPRECATED | ||
| 69 | bool "Driver model /sys/devices/.../power/state files (DEPRECATED)" | ||
| 70 | depends on PM && SYSFS | ||
| 71 | default n | ||
| 72 | help | ||
| 73 | The driver model started out with a sysfs file intended to provide | ||
| 74 | a userspace hook for device power management. This feature has never | ||
| 75 | worked very well, except for limited testing purposes, and so it will | ||
| 76 | be removed. It's not clear that a generic mechanism could really | ||
| 77 | handle the wide variability of device power states; any replacements | ||
| 78 | are likely to be bus or driver specific. | ||
| 79 | |||
| 80 | config SOFTWARE_SUSPEND | 75 | config SOFTWARE_SUSPEND |
| 81 | bool "Software Suspend (Hibernation)" | 76 | bool "Software Suspend (Hibernation)" |
| 82 | depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) | 77 | depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) |
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index f445b9cd60fb..324ac0188ce1 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
| @@ -45,7 +45,7 @@ enum { | |||
| 45 | 45 | ||
| 46 | static int hibernation_mode = HIBERNATION_SHUTDOWN; | 46 | static int hibernation_mode = HIBERNATION_SHUTDOWN; |
| 47 | 47 | ||
| 48 | struct hibernation_ops *hibernation_ops; | 48 | static struct hibernation_ops *hibernation_ops; |
| 49 | 49 | ||
| 50 | /** | 50 | /** |
| 51 | * hibernation_set_ops - set the global hibernate operations | 51 | * hibernation_set_ops - set the global hibernate operations |
| @@ -54,7 +54,8 @@ struct hibernation_ops *hibernation_ops; | |||
| 54 | 54 | ||
| 55 | void hibernation_set_ops(struct hibernation_ops *ops) | 55 | void hibernation_set_ops(struct hibernation_ops *ops) |
| 56 | { | 56 | { |
| 57 | if (ops && !(ops->prepare && ops->enter && ops->finish)) { | 57 | if (ops && !(ops->prepare && ops->enter && ops->finish |
| 58 | && ops->pre_restore && ops->restore_cleanup)) { | ||
| 58 | WARN_ON(1); | 59 | WARN_ON(1); |
| 59 | return; | 60 | return; |
| 60 | } | 61 | } |
| @@ -74,9 +75,9 @@ void hibernation_set_ops(struct hibernation_ops *ops) | |||
| 74 | * platform driver if so configured and return an error code if it fails | 75 | * platform driver if so configured and return an error code if it fails |
| 75 | */ | 76 | */ |
| 76 | 77 | ||
| 77 | static int platform_prepare(void) | 78 | static int platform_prepare(int platform_mode) |
| 78 | { | 79 | { |
| 79 | return (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) ? | 80 | return (platform_mode && hibernation_ops) ? |
| 80 | hibernation_ops->prepare() : 0; | 81 | hibernation_ops->prepare() : 0; |
| 81 | } | 82 | } |
| 82 | 83 | ||
| @@ -85,13 +86,145 @@ static int platform_prepare(void) | |||
| 85 | * using the platform driver (must be called after platform_prepare()) | 86 | * using the platform driver (must be called after platform_prepare()) |
| 86 | */ | 87 | */ |
| 87 | 88 | ||
| 88 | static void platform_finish(void) | 89 | static void platform_finish(int platform_mode) |
| 89 | { | 90 | { |
| 90 | if (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) | 91 | if (platform_mode && hibernation_ops) |
| 91 | hibernation_ops->finish(); | 92 | hibernation_ops->finish(); |
| 92 | } | 93 | } |
| 93 | 94 | ||
| 94 | /** | 95 | /** |
| 96 | * platform_pre_restore - prepare the platform for the restoration from a | ||
| 97 | * hibernation image. If the restore fails after this function has been | ||
| 98 | * called, platform_restore_cleanup() must be called. | ||
| 99 | */ | ||
| 100 | |||
| 101 | static int platform_pre_restore(int platform_mode) | ||
| 102 | { | ||
| 103 | return (platform_mode && hibernation_ops) ? | ||
| 104 | hibernation_ops->pre_restore() : 0; | ||
| 105 | } | ||
| 106 | |||
| 107 | /** | ||
| 108 | * platform_restore_cleanup - switch the platform to the normal mode of | ||
| 109 | * operation after a failing restore. If platform_pre_restore() has been | ||
| 110 | * called before the failing restore, this function must be called too, | ||
| 111 | * regardless of the result of platform_pre_restore(). | ||
| 112 | */ | ||
| 113 | |||
| 114 | static void platform_restore_cleanup(int platform_mode) | ||
| 115 | { | ||
| 116 | if (platform_mode && hibernation_ops) | ||
| 117 | hibernation_ops->restore_cleanup(); | ||
| 118 | } | ||
| 119 | |||
| 120 | /** | ||
| 121 | * hibernation_snapshot - quiesce devices and create the hibernation | ||
| 122 | * snapshot image. | ||
| 123 | * @platform_mode - if set, use the platform driver, if available, to | ||
| 124 | * prepare the platform frimware for the power transition. | ||
| 125 | * | ||
| 126 | * Must be called with pm_mutex held | ||
| 127 | */ | ||
| 128 | |||
| 129 | int hibernation_snapshot(int platform_mode) | ||
| 130 | { | ||
| 131 | int error; | ||
| 132 | |||
| 133 | /* Free memory before shutting down devices. */ | ||
| 134 | error = swsusp_shrink_memory(); | ||
| 135 | if (error) | ||
| 136 | return error; | ||
| 137 | |||
| 138 | suspend_console(); | ||
| 139 | error = device_suspend(PMSG_FREEZE); | ||
| 140 | if (error) | ||
| 141 | goto Resume_console; | ||
| 142 | |||
| 143 | error = platform_prepare(platform_mode); | ||
| 144 | if (error) | ||
| 145 | goto Resume_devices; | ||
| 146 | |||
| 147 | error = disable_nonboot_cpus(); | ||
| 148 | if (!error) { | ||
| 149 | if (hibernation_mode != HIBERNATION_TEST) { | ||
| 150 | in_suspend = 1; | ||
| 151 | error = swsusp_suspend(); | ||
| 152 | /* Control returns here after successful restore */ | ||
| 153 | } else { | ||
| 154 | printk("swsusp debug: Waiting for 5 seconds.\n"); | ||
| 155 | mdelay(5000); | ||
| 156 | } | ||
| 157 | } | ||
| 158 | enable_nonboot_cpus(); | ||
| 159 | Resume_devices: | ||
| 160 | platform_finish(platform_mode); | ||
| 161 | device_resume(); | ||
| 162 | Resume_console: | ||
| 163 | resume_console(); | ||
| 164 | return error; | ||
| 165 | } | ||
| 166 | |||
| 167 | /** | ||
| 168 | * hibernation_restore - quiesce devices and restore the hibernation | ||
| 169 | * snapshot image. If successful, control returns in hibernation_snaphot() | ||
| 170 | * @platform_mode - if set, use the platform driver, if available, to | ||
| 171 | * prepare the platform frimware for the transition. | ||
| 172 | * | ||
| 173 | * Must be called with pm_mutex held | ||
| 174 | */ | ||
| 175 | |||
| 176 | int hibernation_restore(int platform_mode) | ||
| 177 | { | ||
| 178 | int error; | ||
| 179 | |||
| 180 | pm_prepare_console(); | ||
| 181 | suspend_console(); | ||
| 182 | error = device_suspend(PMSG_PRETHAW); | ||
| 183 | if (error) | ||
| 184 | goto Finish; | ||
| 185 | |||
| 186 | error = platform_pre_restore(platform_mode); | ||
| 187 | if (!error) { | ||
| 188 | error = disable_nonboot_cpus(); | ||
| 189 | if (!error) | ||
| 190 | error = swsusp_resume(); | ||
| 191 | enable_nonboot_cpus(); | ||
| 192 | } | ||
| 193 | platform_restore_cleanup(platform_mode); | ||
| 194 | device_resume(); | ||
| 195 | Finish: | ||
| 196 | resume_console(); | ||
| 197 | pm_restore_console(); | ||
| 198 | return error; | ||
| 199 | } | ||
| 200 | |||
| 201 | /** | ||
| 202 | * hibernation_platform_enter - enter the hibernation state using the | ||
| 203 | * platform driver (if available) | ||
| 204 | */ | ||
| 205 | |||
| 206 | int hibernation_platform_enter(void) | ||
| 207 | { | ||
| 208 | int error; | ||
| 209 | |||
| 210 | if (hibernation_ops) { | ||
| 211 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); | ||
| 212 | /* | ||
| 213 | * We have cancelled the power transition by running | ||
| 214 | * hibernation_ops->finish() before saving the image, so we | ||
| 215 | * should let the firmware know that we're going to enter the | ||
| 216 | * sleep state after all | ||
| 217 | */ | ||
| 218 | error = hibernation_ops->prepare(); | ||
| 219 | if (!error) | ||
| 220 | error = hibernation_ops->enter(); | ||
| 221 | } else { | ||
| 222 | error = -ENOSYS; | ||
| 223 | } | ||
| 224 | return error; | ||
| 225 | } | ||
| 226 | |||
| 227 | /** | ||
| 95 | * power_down - Shut the machine down for hibernation. | 228 | * power_down - Shut the machine down for hibernation. |
| 96 | * | 229 | * |
| 97 | * Use the platform driver, if configured so; otherwise try | 230 | * Use the platform driver, if configured so; otherwise try |
| @@ -111,11 +244,7 @@ static void power_down(void) | |||
| 111 | kernel_restart(NULL); | 244 | kernel_restart(NULL); |
| 112 | break; | 245 | break; |
| 113 | case HIBERNATION_PLATFORM: | 246 | case HIBERNATION_PLATFORM: |
| 114 | if (hibernation_ops) { | 247 | hibernation_platform_enter(); |
| 115 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); | ||
| 116 | hibernation_ops->enter(); | ||
| 117 | break; | ||
| 118 | } | ||
| 119 | } | 248 | } |
| 120 | kernel_halt(); | 249 | kernel_halt(); |
| 121 | /* | 250 | /* |
| @@ -152,9 +281,16 @@ int hibernate(void) | |||
| 152 | { | 281 | { |
| 153 | int error; | 282 | int error; |
| 154 | 283 | ||
| 284 | mutex_lock(&pm_mutex); | ||
| 155 | /* The snapshot device should not be opened while we're running */ | 285 | /* The snapshot device should not be opened while we're running */ |
| 156 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) | 286 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { |
| 157 | return -EBUSY; | 287 | error = -EBUSY; |
| 288 | goto Unlock; | ||
| 289 | } | ||
| 290 | |||
| 291 | error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); | ||
| 292 | if (error) | ||
| 293 | goto Exit; | ||
| 158 | 294 | ||
| 159 | /* Allocate memory management structures */ | 295 | /* Allocate memory management structures */ |
| 160 | error = create_basic_memory_bitmaps(); | 296 | error = create_basic_memory_bitmaps(); |
| @@ -165,75 +301,35 @@ int hibernate(void) | |||
| 165 | if (error) | 301 | if (error) |
| 166 | goto Finish; | 302 | goto Finish; |
| 167 | 303 | ||
| 168 | mutex_lock(&pm_mutex); | ||
| 169 | if (hibernation_mode == HIBERNATION_TESTPROC) { | 304 | if (hibernation_mode == HIBERNATION_TESTPROC) { |
| 170 | printk("swsusp debug: Waiting for 5 seconds.\n"); | 305 | printk("swsusp debug: Waiting for 5 seconds.\n"); |
| 171 | mdelay(5000); | 306 | mdelay(5000); |
| 172 | goto Thaw; | 307 | goto Thaw; |
| 173 | } | 308 | } |
| 309 | error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); | ||
| 310 | if (in_suspend && !error) { | ||
| 311 | unsigned int flags = 0; | ||
| 174 | 312 | ||
| 175 | /* Free memory before shutting down devices. */ | 313 | if (hibernation_mode == HIBERNATION_PLATFORM) |
| 176 | error = swsusp_shrink_memory(); | 314 | flags |= SF_PLATFORM_MODE; |
| 177 | if (error) | ||
| 178 | goto Thaw; | ||
| 179 | |||
| 180 | error = platform_prepare(); | ||
| 181 | if (error) | ||
| 182 | goto Thaw; | ||
| 183 | |||
| 184 | suspend_console(); | ||
| 185 | error = device_suspend(PMSG_FREEZE); | ||
| 186 | if (error) { | ||
| 187 | printk(KERN_ERR "PM: Some devices failed to suspend\n"); | ||
| 188 | goto Resume_devices; | ||
| 189 | } | ||
| 190 | error = disable_nonboot_cpus(); | ||
| 191 | if (error) | ||
| 192 | goto Enable_cpus; | ||
| 193 | |||
| 194 | if (hibernation_mode == HIBERNATION_TEST) { | ||
| 195 | printk("swsusp debug: Waiting for 5 seconds.\n"); | ||
| 196 | mdelay(5000); | ||
| 197 | goto Enable_cpus; | ||
| 198 | } | ||
| 199 | |||
| 200 | pr_debug("PM: snapshotting memory.\n"); | ||
| 201 | in_suspend = 1; | ||
| 202 | error = swsusp_suspend(); | ||
| 203 | if (error) | ||
| 204 | goto Enable_cpus; | ||
| 205 | |||
| 206 | if (in_suspend) { | ||
| 207 | enable_nonboot_cpus(); | ||
| 208 | platform_finish(); | ||
| 209 | device_resume(); | ||
| 210 | resume_console(); | ||
| 211 | pr_debug("PM: writing image.\n"); | 315 | pr_debug("PM: writing image.\n"); |
| 212 | error = swsusp_write(); | 316 | error = swsusp_write(flags); |
| 317 | swsusp_free(); | ||
| 213 | if (!error) | 318 | if (!error) |
| 214 | power_down(); | 319 | power_down(); |
| 215 | else { | ||
| 216 | swsusp_free(); | ||
| 217 | goto Thaw; | ||
| 218 | } | ||
| 219 | } else { | 320 | } else { |
| 220 | pr_debug("PM: Image restored successfully.\n"); | 321 | pr_debug("PM: Image restored successfully.\n"); |
| 322 | swsusp_free(); | ||
| 221 | } | 323 | } |
| 222 | |||
| 223 | swsusp_free(); | ||
| 224 | Enable_cpus: | ||
| 225 | enable_nonboot_cpus(); | ||
| 226 | Resume_devices: | ||
| 227 | platform_finish(); | ||
| 228 | device_resume(); | ||
| 229 | resume_console(); | ||
| 230 | Thaw: | 324 | Thaw: |
| 231 | mutex_unlock(&pm_mutex); | ||
| 232 | unprepare_processes(); | 325 | unprepare_processes(); |
| 233 | Finish: | 326 | Finish: |
| 234 | free_basic_memory_bitmaps(); | 327 | free_basic_memory_bitmaps(); |
| 235 | Exit: | 328 | Exit: |
| 329 | pm_notifier_call_chain(PM_POST_HIBERNATION); | ||
| 236 | atomic_inc(&snapshot_device_available); | 330 | atomic_inc(&snapshot_device_available); |
| 331 | Unlock: | ||
| 332 | mutex_unlock(&pm_mutex); | ||
| 237 | return error; | 333 | return error; |
| 238 | } | 334 | } |
| 239 | 335 | ||
| @@ -253,6 +349,7 @@ int hibernate(void) | |||
| 253 | static int software_resume(void) | 349 | static int software_resume(void) |
| 254 | { | 350 | { |
| 255 | int error; | 351 | int error; |
| 352 | unsigned int flags; | ||
| 256 | 353 | ||
| 257 | mutex_lock(&pm_mutex); | 354 | mutex_lock(&pm_mutex); |
| 258 | if (!swsusp_resume_device) { | 355 | if (!swsusp_resume_device) { |
| @@ -300,30 +397,12 @@ static int software_resume(void) | |||
| 300 | 397 | ||
| 301 | pr_debug("PM: Reading swsusp image.\n"); | 398 | pr_debug("PM: Reading swsusp image.\n"); |
| 302 | 399 | ||
| 303 | error = swsusp_read(); | 400 | error = swsusp_read(&flags); |
| 304 | if (error) { | ||
| 305 | swsusp_free(); | ||
| 306 | goto Thaw; | ||
| 307 | } | ||
| 308 | |||
| 309 | pr_debug("PM: Preparing devices for restore.\n"); | ||
| 310 | |||
| 311 | suspend_console(); | ||
| 312 | error = device_suspend(PMSG_PRETHAW); | ||
| 313 | if (error) | ||
| 314 | goto Free; | ||
| 315 | |||
| 316 | error = disable_nonboot_cpus(); | ||
| 317 | if (!error) | 401 | if (!error) |
| 318 | swsusp_resume(); | 402 | hibernation_restore(flags & SF_PLATFORM_MODE); |
| 319 | 403 | ||
| 320 | enable_nonboot_cpus(); | ||
| 321 | Free: | ||
| 322 | swsusp_free(); | ||
| 323 | device_resume(); | ||
| 324 | resume_console(); | ||
| 325 | Thaw: | ||
| 326 | printk(KERN_ERR "PM: Restore failed, recovering.\n"); | 404 | printk(KERN_ERR "PM: Restore failed, recovering.\n"); |
| 405 | swsusp_free(); | ||
| 327 | unprepare_processes(); | 406 | unprepare_processes(); |
| 328 | Done: | 407 | Done: |
| 329 | free_basic_memory_bitmaps(); | 408 | free_basic_memory_bitmaps(); |
| @@ -333,7 +412,7 @@ static int software_resume(void) | |||
| 333 | Unlock: | 412 | Unlock: |
| 334 | mutex_unlock(&pm_mutex); | 413 | mutex_unlock(&pm_mutex); |
| 335 | pr_debug("PM: Resume from disk failed.\n"); | 414 | pr_debug("PM: Resume from disk failed.\n"); |
| 336 | return 0; | 415 | return error; |
| 337 | } | 416 | } |
| 338 | 417 | ||
| 339 | late_initcall(software_resume); | 418 | late_initcall(software_resume); |
diff --git a/kernel/power/main.c b/kernel/power/main.c index fc45ed22620f..32147b57c3bf 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
| @@ -23,6 +23,8 @@ | |||
| 23 | 23 | ||
| 24 | #include "power.h" | 24 | #include "power.h" |
| 25 | 25 | ||
| 26 | BLOCKING_NOTIFIER_HEAD(pm_chain_head); | ||
| 27 | |||
| 26 | /*This is just an arbitrary number */ | 28 | /*This is just an arbitrary number */ |
| 27 | #define FREE_PAGE_NUMBER (100) | 29 | #define FREE_PAGE_NUMBER (100) |
| 28 | 30 | ||
| @@ -63,14 +65,11 @@ static inline void pm_finish(suspend_state_t state) | |||
| 63 | 65 | ||
| 64 | /** | 66 | /** |
| 65 | * suspend_prepare - Do prep work before entering low-power state. | 67 | * suspend_prepare - Do prep work before entering low-power state. |
| 66 | * @state: State we're entering. | ||
| 67 | * | 68 | * |
| 68 | * This is common code that is called for each state that we're | 69 | * This is common code that is called for each state that we're entering. |
| 69 | * entering. Allocate a console, stop all processes, then make sure | 70 | * Run suspend notifiers, allocate a console and stop all processes. |
| 70 | * the platform can enter the requested state. | ||
| 71 | */ | 71 | */ |
| 72 | 72 | static int suspend_prepare(void) | |
| 73 | static int suspend_prepare(suspend_state_t state) | ||
| 74 | { | 73 | { |
| 75 | int error; | 74 | int error; |
| 76 | unsigned int free_pages; | 75 | unsigned int free_pages; |
| @@ -78,6 +77,10 @@ static int suspend_prepare(suspend_state_t state) | |||
| 78 | if (!pm_ops || !pm_ops->enter) | 77 | if (!pm_ops || !pm_ops->enter) |
| 79 | return -EPERM; | 78 | return -EPERM; |
| 80 | 79 | ||
| 80 | error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); | ||
| 81 | if (error) | ||
| 82 | goto Finish; | ||
| 83 | |||
| 81 | pm_prepare_console(); | 84 | pm_prepare_console(); |
| 82 | 85 | ||
| 83 | if (freeze_processes()) { | 86 | if (freeze_processes()) { |
| @@ -85,46 +88,23 @@ static int suspend_prepare(suspend_state_t state) | |||
| 85 | goto Thaw; | 88 | goto Thaw; |
| 86 | } | 89 | } |
| 87 | 90 | ||
| 88 | if ((free_pages = global_page_state(NR_FREE_PAGES)) | 91 | free_pages = global_page_state(NR_FREE_PAGES); |
| 89 | < FREE_PAGE_NUMBER) { | 92 | if (free_pages < FREE_PAGE_NUMBER) { |
| 90 | pr_debug("PM: free some memory\n"); | 93 | pr_debug("PM: free some memory\n"); |
| 91 | shrink_all_memory(FREE_PAGE_NUMBER - free_pages); | 94 | shrink_all_memory(FREE_PAGE_NUMBER - free_pages); |
| 92 | if (nr_free_pages() < FREE_PAGE_NUMBER) { | 95 | if (nr_free_pages() < FREE_PAGE_NUMBER) { |
| 93 | error = -ENOMEM; | 96 | error = -ENOMEM; |
| 94 | printk(KERN_ERR "PM: No enough memory\n"); | 97 | printk(KERN_ERR "PM: No enough memory\n"); |
| 95 | goto Thaw; | ||
| 96 | } | 98 | } |
| 97 | } | 99 | } |
| 98 | |||
| 99 | if (pm_ops->set_target) { | ||
| 100 | error = pm_ops->set_target(state); | ||
| 101 | if (error) | ||
| 102 | goto Thaw; | ||
| 103 | } | ||
| 104 | suspend_console(); | ||
| 105 | error = device_suspend(PMSG_SUSPEND); | ||
| 106 | if (error) { | ||
| 107 | printk(KERN_ERR "Some devices failed to suspend\n"); | ||
| 108 | goto Resume_console; | ||
| 109 | } | ||
| 110 | if (pm_ops->prepare) { | ||
| 111 | if ((error = pm_ops->prepare(state))) | ||
| 112 | goto Resume_devices; | ||
| 113 | } | ||
| 114 | |||
| 115 | error = disable_nonboot_cpus(); | ||
| 116 | if (!error) | 100 | if (!error) |
| 117 | return 0; | 101 | return 0; |
| 118 | 102 | ||
| 119 | enable_nonboot_cpus(); | ||
| 120 | pm_finish(state); | ||
| 121 | Resume_devices: | ||
| 122 | device_resume(); | ||
| 123 | Resume_console: | ||
| 124 | resume_console(); | ||
| 125 | Thaw: | 103 | Thaw: |
| 126 | thaw_processes(); | 104 | thaw_processes(); |
| 127 | pm_restore_console(); | 105 | pm_restore_console(); |
| 106 | Finish: | ||
| 107 | pm_notifier_call_chain(PM_POST_SUSPEND); | ||
| 128 | return error; | 108 | return error; |
| 129 | } | 109 | } |
| 130 | 110 | ||
| @@ -140,6 +120,12 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void) | |||
| 140 | local_irq_enable(); | 120 | local_irq_enable(); |
| 141 | } | 121 | } |
| 142 | 122 | ||
| 123 | /** | ||
| 124 | * suspend_enter - enter the desired system sleep state. | ||
| 125 | * @state: state to enter | ||
| 126 | * | ||
| 127 | * This function should be called after devices have been suspended. | ||
| 128 | */ | ||
| 143 | int suspend_enter(suspend_state_t state) | 129 | int suspend_enter(suspend_state_t state) |
| 144 | { | 130 | { |
| 145 | int error = 0; | 131 | int error = 0; |
| @@ -159,23 +145,58 @@ int suspend_enter(suspend_state_t state) | |||
| 159 | return error; | 145 | return error; |
| 160 | } | 146 | } |
| 161 | 147 | ||
| 148 | /** | ||
| 149 | * suspend_devices_and_enter - suspend devices and enter the desired system sleep | ||
| 150 | * state. | ||
| 151 | * @state: state to enter | ||
| 152 | */ | ||
| 153 | int suspend_devices_and_enter(suspend_state_t state) | ||
| 154 | { | ||
| 155 | int error; | ||
| 156 | |||
| 157 | if (!pm_ops) | ||
| 158 | return -ENOSYS; | ||
| 159 | |||
| 160 | if (pm_ops->set_target) { | ||
| 161 | error = pm_ops->set_target(state); | ||
| 162 | if (error) | ||
| 163 | return error; | ||
| 164 | } | ||
| 165 | suspend_console(); | ||
| 166 | error = device_suspend(PMSG_SUSPEND); | ||
| 167 | if (error) { | ||
| 168 | printk(KERN_ERR "Some devices failed to suspend\n"); | ||
| 169 | goto Resume_console; | ||
| 170 | } | ||
| 171 | if (pm_ops->prepare) { | ||
| 172 | error = pm_ops->prepare(state); | ||
| 173 | if (error) | ||
| 174 | goto Resume_devices; | ||
| 175 | } | ||
| 176 | error = disable_nonboot_cpus(); | ||
| 177 | if (!error) | ||
| 178 | suspend_enter(state); | ||
| 179 | |||
| 180 | enable_nonboot_cpus(); | ||
| 181 | pm_finish(state); | ||
| 182 | Resume_devices: | ||
| 183 | device_resume(); | ||
| 184 | Resume_console: | ||
| 185 | resume_console(); | ||
| 186 | return error; | ||
| 187 | } | ||
| 162 | 188 | ||
| 163 | /** | 189 | /** |
| 164 | * suspend_finish - Do final work before exiting suspend sequence. | 190 | * suspend_finish - Do final work before exiting suspend sequence. |
| 165 | * @state: State we're coming out of. | ||
| 166 | * | 191 | * |
| 167 | * Call platform code to clean up, restart processes, and free the | 192 | * Call platform code to clean up, restart processes, and free the |
| 168 | * console that we've allocated. This is not called for suspend-to-disk. | 193 | * console that we've allocated. This is not called for suspend-to-disk. |
| 169 | */ | 194 | */ |
| 170 | 195 | static void suspend_finish(void) | |
| 171 | static void suspend_finish(suspend_state_t state) | ||
| 172 | { | 196 | { |
| 173 | enable_nonboot_cpus(); | ||
| 174 | pm_finish(state); | ||
| 175 | device_resume(); | ||
| 176 | resume_console(); | ||
| 177 | thaw_processes(); | 197 | thaw_processes(); |
| 178 | pm_restore_console(); | 198 | pm_restore_console(); |
| 199 | pm_notifier_call_chain(PM_POST_SUSPEND); | ||
| 179 | } | 200 | } |
| 180 | 201 | ||
| 181 | 202 | ||
| @@ -207,7 +228,6 @@ static inline int valid_state(suspend_state_t state) | |||
| 207 | * Then, do the setup for suspend, enter the state, and cleaup (after | 228 | * Then, do the setup for suspend, enter the state, and cleaup (after |
| 208 | * we've woken up). | 229 | * we've woken up). |
| 209 | */ | 230 | */ |
| 210 | |||
| 211 | static int enter_state(suspend_state_t state) | 231 | static int enter_state(suspend_state_t state) |
| 212 | { | 232 | { |
| 213 | int error; | 233 | int error; |
| @@ -218,14 +238,14 @@ static int enter_state(suspend_state_t state) | |||
| 218 | return -EBUSY; | 238 | return -EBUSY; |
| 219 | 239 | ||
| 220 | pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); | 240 | pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); |
| 221 | if ((error = suspend_prepare(state))) | 241 | if ((error = suspend_prepare())) |
| 222 | goto Unlock; | 242 | goto Unlock; |
| 223 | 243 | ||
| 224 | pr_debug("PM: Entering %s sleep\n", pm_states[state]); | 244 | pr_debug("PM: Entering %s sleep\n", pm_states[state]); |
| 225 | error = suspend_enter(state); | 245 | error = suspend_devices_and_enter(state); |
| 226 | 246 | ||
| 227 | pr_debug("PM: Finishing wakeup.\n"); | 247 | pr_debug("PM: Finishing wakeup.\n"); |
| 228 | suspend_finish(state); | 248 | suspend_finish(); |
| 229 | Unlock: | 249 | Unlock: |
| 230 | mutex_unlock(&pm_mutex); | 250 | mutex_unlock(&pm_mutex); |
| 231 | return error; | 251 | return error; |
diff --git a/kernel/power/power.h b/kernel/power/power.h index 51381487103f..5f24c786f8ec 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
| @@ -25,7 +25,10 @@ struct swsusp_info { | |||
| 25 | */ | 25 | */ |
| 26 | #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) | 26 | #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) |
| 27 | 27 | ||
| 28 | extern struct hibernation_ops *hibernation_ops; | 28 | /* kernel/power/disk.c */ |
| 29 | extern int hibernation_snapshot(int platform_mode); | ||
| 30 | extern int hibernation_restore(int platform_mode); | ||
| 31 | extern int hibernation_platform_enter(void); | ||
| 29 | #endif | 32 | #endif |
| 30 | 33 | ||
| 31 | extern int pfn_is_nosave(unsigned long); | 34 | extern int pfn_is_nosave(unsigned long); |
| @@ -152,16 +155,34 @@ extern sector_t alloc_swapdev_block(int swap); | |||
| 152 | extern void free_all_swap_pages(int swap); | 155 | extern void free_all_swap_pages(int swap); |
| 153 | extern int swsusp_swap_in_use(void); | 156 | extern int swsusp_swap_in_use(void); |
| 154 | 157 | ||
| 158 | /* | ||
| 159 | * Flags that can be passed from the hibernatig hernel to the "boot" kernel in | ||
| 160 | * the image header. | ||
| 161 | */ | ||
| 162 | #define SF_PLATFORM_MODE 1 | ||
| 163 | |||
| 164 | /* kernel/power/disk.c */ | ||
| 155 | extern int swsusp_check(void); | 165 | extern int swsusp_check(void); |
| 156 | extern int swsusp_shrink_memory(void); | 166 | extern int swsusp_shrink_memory(void); |
| 157 | extern void swsusp_free(void); | 167 | extern void swsusp_free(void); |
| 158 | extern int swsusp_suspend(void); | 168 | extern int swsusp_suspend(void); |
| 159 | extern int swsusp_resume(void); | 169 | extern int swsusp_resume(void); |
| 160 | extern int swsusp_read(void); | 170 | extern int swsusp_read(unsigned int *flags_p); |
| 161 | extern int swsusp_write(void); | 171 | extern int swsusp_write(unsigned int flags); |
| 162 | extern void swsusp_close(void); | 172 | extern void swsusp_close(void); |
| 163 | extern int suspend_enter(suspend_state_t state); | ||
| 164 | 173 | ||
| 165 | struct timeval; | 174 | struct timeval; |
| 175 | /* kernel/power/swsusp.c */ | ||
| 166 | extern void swsusp_show_speed(struct timeval *, struct timeval *, | 176 | extern void swsusp_show_speed(struct timeval *, struct timeval *, |
| 167 | unsigned int, char *); | 177 | unsigned int, char *); |
| 178 | |||
| 179 | /* kernel/power/main.c */ | ||
| 180 | extern int suspend_enter(suspend_state_t state); | ||
| 181 | extern int suspend_devices_and_enter(suspend_state_t state); | ||
| 182 | extern struct blocking_notifier_head pm_chain_head; | ||
| 183 | |||
| 184 | static inline int pm_notifier_call_chain(unsigned long val) | ||
| 185 | { | ||
| 186 | return (blocking_notifier_call_chain(&pm_chain_head, val, NULL) | ||
| 187 | == NOTIFY_BAD) ? -EINVAL : 0; | ||
| 188 | } | ||
diff --git a/kernel/power/process.c b/kernel/power/process.c index e0233d8422b9..3434940a3df1 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
| @@ -40,7 +40,7 @@ static inline void frozen_process(void) | |||
| 40 | current->flags |= PF_FROZEN; | 40 | current->flags |= PF_FROZEN; |
| 41 | wmb(); | 41 | wmb(); |
| 42 | } | 42 | } |
| 43 | clear_tsk_thread_flag(current, TIF_FREEZE); | 43 | clear_freeze_flag(current); |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | /* Refrigerator is place where frozen processes are stored :-). */ | 46 | /* Refrigerator is place where frozen processes are stored :-). */ |
| @@ -72,20 +72,19 @@ void refrigerator(void) | |||
| 72 | schedule(); | 72 | schedule(); |
| 73 | } | 73 | } |
| 74 | pr_debug("%s left refrigerator\n", current->comm); | 74 | pr_debug("%s left refrigerator\n", current->comm); |
| 75 | current->state = save; | 75 | __set_current_state(save); |
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | static inline void freeze_process(struct task_struct *p) | 78 | static void freeze_task(struct task_struct *p) |
| 79 | { | 79 | { |
| 80 | unsigned long flags; | 80 | unsigned long flags; |
| 81 | 81 | ||
| 82 | if (!freezing(p)) { | 82 | if (!freezing(p)) { |
| 83 | rmb(); | 83 | rmb(); |
| 84 | if (!frozen(p)) { | 84 | if (!frozen(p)) { |
| 85 | set_freeze_flag(p); | ||
| 85 | if (p->state == TASK_STOPPED) | 86 | if (p->state == TASK_STOPPED) |
| 86 | force_sig_specific(SIGSTOP, p); | 87 | force_sig_specific(SIGSTOP, p); |
| 87 | |||
| 88 | freeze(p); | ||
| 89 | spin_lock_irqsave(&p->sighand->siglock, flags); | 88 | spin_lock_irqsave(&p->sighand->siglock, flags); |
| 90 | signal_wake_up(p, p->state == TASK_STOPPED); | 89 | signal_wake_up(p, p->state == TASK_STOPPED); |
| 91 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 90 | spin_unlock_irqrestore(&p->sighand->siglock, flags); |
| @@ -99,19 +98,14 @@ static void cancel_freezing(struct task_struct *p) | |||
| 99 | 98 | ||
| 100 | if (freezing(p)) { | 99 | if (freezing(p)) { |
| 101 | pr_debug(" clean up: %s\n", p->comm); | 100 | pr_debug(" clean up: %s\n", p->comm); |
| 102 | do_not_freeze(p); | 101 | clear_freeze_flag(p); |
| 103 | spin_lock_irqsave(&p->sighand->siglock, flags); | 102 | spin_lock_irqsave(&p->sighand->siglock, flags); |
| 104 | recalc_sigpending_and_wake(p); | 103 | recalc_sigpending_and_wake(p); |
| 105 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 104 | spin_unlock_irqrestore(&p->sighand->siglock, flags); |
| 106 | } | 105 | } |
| 107 | } | 106 | } |
| 108 | 107 | ||
| 109 | static inline int is_user_space(struct task_struct *p) | 108 | static int try_to_freeze_tasks(int freeze_user_space) |
| 110 | { | ||
| 111 | return p->mm && !(p->flags & PF_BORROWED_MM); | ||
| 112 | } | ||
| 113 | |||
| 114 | static unsigned int try_to_freeze_tasks(int freeze_user_space) | ||
| 115 | { | 109 | { |
| 116 | struct task_struct *g, *p; | 110 | struct task_struct *g, *p; |
| 117 | unsigned long end_time; | 111 | unsigned long end_time; |
| @@ -122,26 +116,40 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) | |||
| 122 | todo = 0; | 116 | todo = 0; |
| 123 | read_lock(&tasklist_lock); | 117 | read_lock(&tasklist_lock); |
| 124 | do_each_thread(g, p) { | 118 | do_each_thread(g, p) { |
| 125 | if (!freezeable(p)) | 119 | if (frozen(p) || !freezeable(p)) |
| 126 | continue; | 120 | continue; |
| 127 | 121 | ||
| 128 | if (frozen(p)) | 122 | if (freeze_user_space) { |
| 129 | continue; | 123 | if (p->state == TASK_TRACED && |
| 130 | 124 | frozen(p->parent)) { | |
| 131 | if (p->state == TASK_TRACED && frozen(p->parent)) { | 125 | cancel_freezing(p); |
| 132 | cancel_freezing(p); | 126 | continue; |
| 133 | continue; | 127 | } |
| 128 | /* | ||
| 129 | * Kernel threads should not have TIF_FREEZE set | ||
| 130 | * at this point, so we must ensure that either | ||
| 131 | * p->mm is not NULL *and* PF_BORROWED_MM is | ||
| 132 | * unset, or TIF_FRREZE is left unset. | ||
| 133 | * The task_lock() is necessary to prevent races | ||
| 134 | * with exit_mm() or use_mm()/unuse_mm() from | ||
| 135 | * occuring. | ||
| 136 | */ | ||
| 137 | task_lock(p); | ||
| 138 | if (!p->mm || (p->flags & PF_BORROWED_MM)) { | ||
| 139 | task_unlock(p); | ||
| 140 | continue; | ||
| 141 | } | ||
| 142 | freeze_task(p); | ||
| 143 | task_unlock(p); | ||
| 144 | } else { | ||
| 145 | freeze_task(p); | ||
| 134 | } | 146 | } |
| 135 | if (freeze_user_space && !is_user_space(p)) | ||
| 136 | continue; | ||
| 137 | |||
| 138 | freeze_process(p); | ||
| 139 | if (!freezer_should_skip(p)) | 147 | if (!freezer_should_skip(p)) |
| 140 | todo++; | 148 | todo++; |
| 141 | } while_each_thread(g, p); | 149 | } while_each_thread(g, p); |
| 142 | read_unlock(&tasklist_lock); | 150 | read_unlock(&tasklist_lock); |
| 143 | yield(); /* Yield is okay here */ | 151 | yield(); /* Yield is okay here */ |
| 144 | if (todo && time_after(jiffies, end_time)) | 152 | if (time_after(jiffies, end_time)) |
| 145 | break; | 153 | break; |
| 146 | } while (todo); | 154 | } while (todo); |
| 147 | 155 | ||
| @@ -152,49 +160,41 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) | |||
| 152 | * but it cleans up leftover PF_FREEZE requests. | 160 | * but it cleans up leftover PF_FREEZE requests. |
| 153 | */ | 161 | */ |
| 154 | printk("\n"); | 162 | printk("\n"); |
| 155 | printk(KERN_ERR "Stopping %s timed out after %d seconds " | 163 | printk(KERN_ERR "Freezing of %s timed out after %d seconds " |
| 156 | "(%d tasks refusing to freeze):\n", | 164 | "(%d tasks refusing to freeze):\n", |
| 157 | freeze_user_space ? "user space processes" : | 165 | freeze_user_space ? "user space " : "tasks ", |
| 158 | "kernel threads", | ||
| 159 | TIMEOUT / HZ, todo); | 166 | TIMEOUT / HZ, todo); |
| 167 | show_state(); | ||
| 160 | read_lock(&tasklist_lock); | 168 | read_lock(&tasklist_lock); |
| 161 | do_each_thread(g, p) { | 169 | do_each_thread(g, p) { |
| 162 | if (freeze_user_space && !is_user_space(p)) | ||
| 163 | continue; | ||
| 164 | |||
| 165 | task_lock(p); | 170 | task_lock(p); |
| 166 | if (freezeable(p) && !frozen(p) && | 171 | if (freezing(p) && !freezer_should_skip(p)) |
| 167 | !freezer_should_skip(p)) | ||
| 168 | printk(KERN_ERR " %s\n", p->comm); | 172 | printk(KERN_ERR " %s\n", p->comm); |
| 169 | |||
| 170 | cancel_freezing(p); | 173 | cancel_freezing(p); |
| 171 | task_unlock(p); | 174 | task_unlock(p); |
| 172 | } while_each_thread(g, p); | 175 | } while_each_thread(g, p); |
| 173 | read_unlock(&tasklist_lock); | 176 | read_unlock(&tasklist_lock); |
| 174 | } | 177 | } |
| 175 | 178 | ||
| 176 | return todo; | 179 | return todo ? -EBUSY : 0; |
| 177 | } | 180 | } |
| 178 | 181 | ||
| 179 | /** | 182 | /** |
| 180 | * freeze_processes - tell processes to enter the refrigerator | 183 | * freeze_processes - tell processes to enter the refrigerator |
| 181 | * | ||
| 182 | * Returns 0 on success, or the number of processes that didn't freeze, | ||
| 183 | * although they were told to. | ||
| 184 | */ | 184 | */ |
| 185 | int freeze_processes(void) | 185 | int freeze_processes(void) |
| 186 | { | 186 | { |
| 187 | unsigned int nr_unfrozen; | 187 | int error; |
| 188 | 188 | ||
| 189 | printk("Stopping tasks ... "); | 189 | printk("Stopping tasks ... "); |
| 190 | nr_unfrozen = try_to_freeze_tasks(FREEZER_USER_SPACE); | 190 | error = try_to_freeze_tasks(FREEZER_USER_SPACE); |
| 191 | if (nr_unfrozen) | 191 | if (error) |
| 192 | return nr_unfrozen; | 192 | return error; |
| 193 | 193 | ||
| 194 | sys_sync(); | 194 | sys_sync(); |
| 195 | nr_unfrozen = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); | 195 | error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); |
| 196 | if (nr_unfrozen) | 196 | if (error) |
| 197 | return nr_unfrozen; | 197 | return error; |
| 198 | 198 | ||
| 199 | printk("done.\n"); | 199 | printk("done.\n"); |
| 200 | BUG_ON(in_atomic()); | 200 | BUG_ON(in_atomic()); |
| @@ -210,7 +210,7 @@ static void thaw_tasks(int thaw_user_space) | |||
| 210 | if (!freezeable(p)) | 210 | if (!freezeable(p)) |
| 211 | continue; | 211 | continue; |
| 212 | 212 | ||
| 213 | if (is_user_space(p) == !thaw_user_space) | 213 | if (!p->mm == thaw_user_space) |
| 214 | continue; | 214 | continue; |
| 215 | 215 | ||
| 216 | thaw_process(p); | 216 | thaw_process(p); |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 8b1a1b837145..917aba100575 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
| @@ -33,8 +33,9 @@ extern char resume_file[]; | |||
| 33 | #define SWSUSP_SIG "S1SUSPEND" | 33 | #define SWSUSP_SIG "S1SUSPEND" |
| 34 | 34 | ||
| 35 | struct swsusp_header { | 35 | struct swsusp_header { |
| 36 | char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; | 36 | char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)]; |
| 37 | sector_t image; | 37 | sector_t image; |
| 38 | unsigned int flags; /* Flags to pass to the "boot" kernel */ | ||
| 38 | char orig_sig[10]; | 39 | char orig_sig[10]; |
| 39 | char sig[10]; | 40 | char sig[10]; |
| 40 | } __attribute__((packed)); | 41 | } __attribute__((packed)); |
| @@ -138,7 +139,7 @@ static int wait_on_bio_chain(struct bio **bio_chain) | |||
| 138 | * Saving part | 139 | * Saving part |
| 139 | */ | 140 | */ |
| 140 | 141 | ||
| 141 | static int mark_swapfiles(sector_t start) | 142 | static int mark_swapfiles(sector_t start, unsigned int flags) |
| 142 | { | 143 | { |
| 143 | int error; | 144 | int error; |
| 144 | 145 | ||
| @@ -148,6 +149,7 @@ static int mark_swapfiles(sector_t start) | |||
| 148 | memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); | 149 | memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); |
| 149 | memcpy(swsusp_header->sig,SWSUSP_SIG, 10); | 150 | memcpy(swsusp_header->sig,SWSUSP_SIG, 10); |
| 150 | swsusp_header->image = start; | 151 | swsusp_header->image = start; |
| 152 | swsusp_header->flags = flags; | ||
| 151 | error = bio_write_page(swsusp_resume_block, | 153 | error = bio_write_page(swsusp_resume_block, |
| 152 | swsusp_header, NULL); | 154 | swsusp_header, NULL); |
| 153 | } else { | 155 | } else { |
| @@ -369,6 +371,7 @@ static int enough_swap(unsigned int nr_pages) | |||
| 369 | 371 | ||
| 370 | /** | 372 | /** |
| 371 | * swsusp_write - Write entire image and metadata. | 373 | * swsusp_write - Write entire image and metadata. |
| 374 | * @flags: flags to pass to the "boot" kernel in the image header | ||
| 372 | * | 375 | * |
| 373 | * It is important _NOT_ to umount filesystems at this point. We want | 376 | * It is important _NOT_ to umount filesystems at this point. We want |
| 374 | * them synced (in case something goes wrong) but we DO not want to mark | 377 | * them synced (in case something goes wrong) but we DO not want to mark |
| @@ -376,7 +379,7 @@ static int enough_swap(unsigned int nr_pages) | |||
| 376 | * correctly, we'll mark system clean, anyway.) | 379 | * correctly, we'll mark system clean, anyway.) |
| 377 | */ | 380 | */ |
| 378 | 381 | ||
| 379 | int swsusp_write(void) | 382 | int swsusp_write(unsigned int flags) |
| 380 | { | 383 | { |
| 381 | struct swap_map_handle handle; | 384 | struct swap_map_handle handle; |
| 382 | struct snapshot_handle snapshot; | 385 | struct snapshot_handle snapshot; |
| @@ -415,7 +418,7 @@ int swsusp_write(void) | |||
| 415 | if (!error) { | 418 | if (!error) { |
| 416 | flush_swap_writer(&handle); | 419 | flush_swap_writer(&handle); |
| 417 | printk("S"); | 420 | printk("S"); |
| 418 | error = mark_swapfiles(start); | 421 | error = mark_swapfiles(start, flags); |
| 419 | printk("|\n"); | 422 | printk("|\n"); |
| 420 | } | 423 | } |
| 421 | } | 424 | } |
| @@ -540,13 +543,20 @@ static int load_image(struct swap_map_handle *handle, | |||
| 540 | return error; | 543 | return error; |
| 541 | } | 544 | } |
| 542 | 545 | ||
| 543 | int swsusp_read(void) | 546 | /** |
| 547 | * swsusp_read - read the hibernation image. | ||
| 548 | * @flags_p: flags passed by the "frozen" kernel in the image header should | ||
| 549 | * be written into this memeory location | ||
| 550 | */ | ||
| 551 | |||
| 552 | int swsusp_read(unsigned int *flags_p) | ||
| 544 | { | 553 | { |
| 545 | int error; | 554 | int error; |
| 546 | struct swap_map_handle handle; | 555 | struct swap_map_handle handle; |
| 547 | struct snapshot_handle snapshot; | 556 | struct snapshot_handle snapshot; |
| 548 | struct swsusp_info *header; | 557 | struct swsusp_info *header; |
| 549 | 558 | ||
| 559 | *flags_p = swsusp_header->flags; | ||
| 550 | if (IS_ERR(resume_bdev)) { | 560 | if (IS_ERR(resume_bdev)) { |
| 551 | pr_debug("swsusp: block device not initialised\n"); | 561 | pr_debug("swsusp: block device not initialised\n"); |
| 552 | return PTR_ERR(resume_bdev); | 562 | return PTR_ERR(resume_bdev); |
diff --git a/kernel/power/user.c b/kernel/power/user.c index d65305b515b1..bd0723a7df3f 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
| @@ -128,92 +128,6 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, | |||
| 128 | return res; | 128 | return res; |
| 129 | } | 129 | } |
| 130 | 130 | ||
| 131 | static inline int platform_prepare(void) | ||
| 132 | { | ||
| 133 | int error = 0; | ||
| 134 | |||
| 135 | if (hibernation_ops) | ||
| 136 | error = hibernation_ops->prepare(); | ||
| 137 | |||
| 138 | return error; | ||
| 139 | } | ||
| 140 | |||
| 141 | static inline void platform_finish(void) | ||
| 142 | { | ||
| 143 | if (hibernation_ops) | ||
| 144 | hibernation_ops->finish(); | ||
| 145 | } | ||
| 146 | |||
| 147 | static inline int snapshot_suspend(int platform_suspend) | ||
| 148 | { | ||
| 149 | int error; | ||
| 150 | |||
| 151 | mutex_lock(&pm_mutex); | ||
| 152 | /* Free memory before shutting down devices. */ | ||
| 153 | error = swsusp_shrink_memory(); | ||
| 154 | if (error) | ||
| 155 | goto Finish; | ||
| 156 | |||
| 157 | if (platform_suspend) { | ||
| 158 | error = platform_prepare(); | ||
| 159 | if (error) | ||
| 160 | goto Finish; | ||
| 161 | } | ||
| 162 | suspend_console(); | ||
| 163 | error = device_suspend(PMSG_FREEZE); | ||
| 164 | if (error) | ||
| 165 | goto Resume_devices; | ||
| 166 | |||
| 167 | error = disable_nonboot_cpus(); | ||
| 168 | if (!error) { | ||
| 169 | in_suspend = 1; | ||
| 170 | error = swsusp_suspend(); | ||
| 171 | } | ||
| 172 | enable_nonboot_cpus(); | ||
| 173 | Resume_devices: | ||
| 174 | if (platform_suspend) | ||
| 175 | platform_finish(); | ||
| 176 | |||
| 177 | device_resume(); | ||
| 178 | resume_console(); | ||
| 179 | Finish: | ||
| 180 | mutex_unlock(&pm_mutex); | ||
| 181 | return error; | ||
| 182 | } | ||
| 183 | |||
| 184 | static inline int snapshot_restore(int platform_suspend) | ||
| 185 | { | ||
| 186 | int error; | ||
| 187 | |||
| 188 | mutex_lock(&pm_mutex); | ||
| 189 | pm_prepare_console(); | ||
| 190 | if (platform_suspend) { | ||
| 191 | error = platform_prepare(); | ||
| 192 | if (error) | ||
| 193 | goto Finish; | ||
| 194 | } | ||
| 195 | suspend_console(); | ||
| 196 | error = device_suspend(PMSG_PRETHAW); | ||
| 197 | if (error) | ||
| 198 | goto Resume_devices; | ||
| 199 | |||
| 200 | error = disable_nonboot_cpus(); | ||
| 201 | if (!error) | ||
| 202 | error = swsusp_resume(); | ||
| 203 | |||
| 204 | enable_nonboot_cpus(); | ||
| 205 | Resume_devices: | ||
| 206 | if (platform_suspend) | ||
| 207 | platform_finish(); | ||
| 208 | |||
| 209 | device_resume(); | ||
| 210 | resume_console(); | ||
| 211 | Finish: | ||
| 212 | pm_restore_console(); | ||
| 213 | mutex_unlock(&pm_mutex); | ||
| 214 | return error; | ||
| 215 | } | ||
| 216 | |||
| 217 | static int snapshot_ioctl(struct inode *inode, struct file *filp, | 131 | static int snapshot_ioctl(struct inode *inode, struct file *filp, |
| 218 | unsigned int cmd, unsigned long arg) | 132 | unsigned int cmd, unsigned long arg) |
| 219 | { | 133 | { |
| @@ -237,10 +151,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
| 237 | if (data->frozen) | 151 | if (data->frozen) |
| 238 | break; | 152 | break; |
| 239 | mutex_lock(&pm_mutex); | 153 | mutex_lock(&pm_mutex); |
| 240 | if (freeze_processes()) { | 154 | error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); |
| 241 | thaw_processes(); | 155 | if (!error) { |
| 242 | error = -EBUSY; | 156 | error = freeze_processes(); |
| 157 | if (error) | ||
| 158 | thaw_processes(); | ||
| 243 | } | 159 | } |
| 160 | if (error) | ||
| 161 | pm_notifier_call_chain(PM_POST_HIBERNATION); | ||
| 244 | mutex_unlock(&pm_mutex); | 162 | mutex_unlock(&pm_mutex); |
| 245 | if (!error) | 163 | if (!error) |
| 246 | data->frozen = 1; | 164 | data->frozen = 1; |
| @@ -251,6 +169,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
| 251 | break; | 169 | break; |
| 252 | mutex_lock(&pm_mutex); | 170 | mutex_lock(&pm_mutex); |
| 253 | thaw_processes(); | 171 | thaw_processes(); |
| 172 | pm_notifier_call_chain(PM_POST_HIBERNATION); | ||
| 254 | mutex_unlock(&pm_mutex); | 173 | mutex_unlock(&pm_mutex); |
| 255 | data->frozen = 0; | 174 | data->frozen = 0; |
| 256 | break; | 175 | break; |
| @@ -260,7 +179,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
| 260 | error = -EPERM; | 179 | error = -EPERM; |
| 261 | break; | 180 | break; |
| 262 | } | 181 | } |
| 263 | error = snapshot_suspend(data->platform_suspend); | 182 | error = hibernation_snapshot(data->platform_suspend); |
| 264 | if (!error) | 183 | if (!error) |
| 265 | error = put_user(in_suspend, (unsigned int __user *)arg); | 184 | error = put_user(in_suspend, (unsigned int __user *)arg); |
| 266 | if (!error) | 185 | if (!error) |
| @@ -274,7 +193,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
| 274 | error = -EPERM; | 193 | error = -EPERM; |
| 275 | break; | 194 | break; |
| 276 | } | 195 | } |
| 277 | error = snapshot_restore(data->platform_suspend); | 196 | error = hibernation_restore(data->platform_suspend); |
| 278 | break; | 197 | break; |
| 279 | 198 | ||
| 280 | case SNAPSHOT_FREE: | 199 | case SNAPSHOT_FREE: |
| @@ -336,47 +255,19 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
| 336 | break; | 255 | break; |
| 337 | 256 | ||
| 338 | case SNAPSHOT_S2RAM: | 257 | case SNAPSHOT_S2RAM: |
| 339 | if (!pm_ops) { | ||
| 340 | error = -ENOSYS; | ||
| 341 | break; | ||
| 342 | } | ||
| 343 | |||
| 344 | if (!data->frozen) { | 258 | if (!data->frozen) { |
| 345 | error = -EPERM; | 259 | error = -EPERM; |
| 346 | break; | 260 | break; |
| 347 | } | 261 | } |
| 348 | |||
| 349 | if (!mutex_trylock(&pm_mutex)) { | 262 | if (!mutex_trylock(&pm_mutex)) { |
| 350 | error = -EBUSY; | 263 | error = -EBUSY; |
| 351 | break; | 264 | break; |
| 352 | } | 265 | } |
| 353 | 266 | /* | |
| 354 | if (pm_ops->prepare) { | 267 | * Tasks are frozen and the notifiers have been called with |
| 355 | error = pm_ops->prepare(PM_SUSPEND_MEM); | 268 | * PM_HIBERNATION_PREPARE |
| 356 | if (error) | 269 | */ |
| 357 | goto OutS3; | 270 | error = suspend_devices_and_enter(PM_SUSPEND_MEM); |
| 358 | } | ||
| 359 | |||
| 360 | /* Put devices to sleep */ | ||
| 361 | suspend_console(); | ||
| 362 | error = device_suspend(PMSG_SUSPEND); | ||
| 363 | if (error) { | ||
| 364 | printk(KERN_ERR "Failed to suspend some devices.\n"); | ||
| 365 | } else { | ||
| 366 | error = disable_nonboot_cpus(); | ||
| 367 | if (!error) { | ||
| 368 | /* Enter S3, system is already frozen */ | ||
| 369 | suspend_enter(PM_SUSPEND_MEM); | ||
| 370 | enable_nonboot_cpus(); | ||
| 371 | } | ||
| 372 | /* Wake up devices */ | ||
| 373 | device_resume(); | ||
| 374 | } | ||
| 375 | resume_console(); | ||
| 376 | if (pm_ops->finish) | ||
| 377 | pm_ops->finish(PM_SUSPEND_MEM); | ||
| 378 | |||
| 379 | OutS3: | ||
| 380 | mutex_unlock(&pm_mutex); | 271 | mutex_unlock(&pm_mutex); |
| 381 | break; | 272 | break; |
| 382 | 273 | ||
| @@ -386,19 +277,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
| 386 | switch (arg) { | 277 | switch (arg) { |
| 387 | 278 | ||
| 388 | case PMOPS_PREPARE: | 279 | case PMOPS_PREPARE: |
| 389 | if (hibernation_ops) { | 280 | data->platform_suspend = 1; |
| 390 | data->platform_suspend = 1; | 281 | error = 0; |
| 391 | error = 0; | ||
| 392 | } else { | ||
| 393 | error = -ENOSYS; | ||
| 394 | } | ||
| 395 | break; | 282 | break; |
| 396 | 283 | ||
| 397 | case PMOPS_ENTER: | 284 | case PMOPS_ENTER: |
| 398 | if (data->platform_suspend) { | 285 | if (data->platform_suspend) |
| 399 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); | 286 | error = hibernation_platform_enter(); |
| 400 | error = hibernation_ops->enter(); | 287 | |
| 401 | } | ||
| 402 | break; | 288 | break; |
| 403 | 289 | ||
| 404 | case PMOPS_FINISH: | 290 | case PMOPS_FINISH: |
diff --git a/kernel/printk.c b/kernel/printk.c index 0bbdeac2810c..051d27e36a6c 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -449,13 +449,16 @@ static int printk_time = 1; | |||
| 449 | #else | 449 | #else |
| 450 | static int printk_time = 0; | 450 | static int printk_time = 0; |
| 451 | #endif | 451 | #endif |
| 452 | module_param(printk_time, int, S_IRUGO | S_IWUSR); | 452 | module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); |
| 453 | 453 | ||
| 454 | static int __init printk_time_setup(char *str) | 454 | static int __init printk_time_setup(char *str) |
| 455 | { | 455 | { |
| 456 | if (*str) | 456 | if (*str) |
| 457 | return 0; | 457 | return 0; |
| 458 | printk_time = 1; | 458 | printk_time = 1; |
| 459 | printk(KERN_NOTICE "The 'time' option is deprecated and " | ||
| 460 | "is scheduled for removal in early 2008\n"); | ||
| 461 | printk(KERN_NOTICE "Use 'printk.time=<value>' instead\n"); | ||
| 459 | return 1; | 462 | return 1; |
| 460 | } | 463 | } |
| 461 | 464 | ||
| @@ -483,6 +486,9 @@ static int have_callable_console(void) | |||
| 483 | * @fmt: format string | 486 | * @fmt: format string |
| 484 | * | 487 | * |
| 485 | * This is printk(). It can be called from any context. We want it to work. | 488 | * This is printk(). It can be called from any context. We want it to work. |
| 489 | * Be aware of the fact that if oops_in_progress is not set, we might try to | ||
| 490 | * wake klogd up which could deadlock on runqueue lock if printk() is called | ||
| 491 | * from scheduler code. | ||
| 486 | * | 492 | * |
| 487 | * We try to grab the console_sem. If we succeed, it's easy - we log the output and | 493 | * We try to grab the console_sem. If we succeed, it's easy - we log the output and |
| 488 | * call the console drivers. If we fail to get the semaphore we place the output | 494 | * call the console drivers. If we fail to get the semaphore we place the output |
| @@ -654,7 +660,7 @@ static void call_console_drivers(unsigned long start, unsigned long end) | |||
| 654 | */ | 660 | */ |
| 655 | static int __init console_setup(char *str) | 661 | static int __init console_setup(char *str) |
| 656 | { | 662 | { |
| 657 | char name[sizeof(console_cmdline[0].name)]; | 663 | char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */ |
| 658 | char *s, *options; | 664 | char *s, *options; |
| 659 | int idx; | 665 | int idx; |
| 660 | 666 | ||
| @@ -662,27 +668,27 @@ static int __init console_setup(char *str) | |||
| 662 | * Decode str into name, index, options. | 668 | * Decode str into name, index, options. |
| 663 | */ | 669 | */ |
| 664 | if (str[0] >= '0' && str[0] <= '9') { | 670 | if (str[0] >= '0' && str[0] <= '9') { |
| 665 | strcpy(name, "ttyS"); | 671 | strcpy(buf, "ttyS"); |
| 666 | strncpy(name + 4, str, sizeof(name) - 5); | 672 | strncpy(buf + 4, str, sizeof(buf) - 5); |
| 667 | } else { | 673 | } else { |
| 668 | strncpy(name, str, sizeof(name) - 1); | 674 | strncpy(buf, str, sizeof(buf) - 1); |
| 669 | } | 675 | } |
| 670 | name[sizeof(name) - 1] = 0; | 676 | buf[sizeof(buf) - 1] = 0; |
| 671 | if ((options = strchr(str, ',')) != NULL) | 677 | if ((options = strchr(str, ',')) != NULL) |
| 672 | *(options++) = 0; | 678 | *(options++) = 0; |
| 673 | #ifdef __sparc__ | 679 | #ifdef __sparc__ |
| 674 | if (!strcmp(str, "ttya")) | 680 | if (!strcmp(str, "ttya")) |
| 675 | strcpy(name, "ttyS0"); | 681 | strcpy(buf, "ttyS0"); |
| 676 | if (!strcmp(str, "ttyb")) | 682 | if (!strcmp(str, "ttyb")) |
| 677 | strcpy(name, "ttyS1"); | 683 | strcpy(buf, "ttyS1"); |
| 678 | #endif | 684 | #endif |
| 679 | for (s = name; *s; s++) | 685 | for (s = buf; *s; s++) |
| 680 | if ((*s >= '0' && *s <= '9') || *s == ',') | 686 | if ((*s >= '0' && *s <= '9') || *s == ',') |
| 681 | break; | 687 | break; |
| 682 | idx = simple_strtoul(s, NULL, 10); | 688 | idx = simple_strtoul(s, NULL, 10); |
| 683 | *s = 0; | 689 | *s = 0; |
| 684 | 690 | ||
| 685 | add_preferred_console(name, idx, options); | 691 | add_preferred_console(buf, idx, options); |
| 686 | return 1; | 692 | return 1; |
| 687 | } | 693 | } |
| 688 | __setup("console=", console_setup); | 694 | __setup("console=", console_setup); |
| @@ -709,7 +715,7 @@ int __init add_preferred_console(char *name, int idx, char *options) | |||
| 709 | * See if this tty is not yet registered, and | 715 | * See if this tty is not yet registered, and |
| 710 | * if we have a slot free. | 716 | * if we have a slot free. |
| 711 | */ | 717 | */ |
| 712 | for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) | 718 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) |
| 713 | if (strcmp(console_cmdline[i].name, name) == 0 && | 719 | if (strcmp(console_cmdline[i].name, name) == 0 && |
| 714 | console_cmdline[i].index == idx) { | 720 | console_cmdline[i].index == idx) { |
| 715 | selected_console = i; | 721 | selected_console = i; |
| @@ -726,6 +732,25 @@ int __init add_preferred_console(char *name, int idx, char *options) | |||
| 726 | return 0; | 732 | return 0; |
| 727 | } | 733 | } |
| 728 | 734 | ||
| 735 | int __init update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) | ||
| 736 | { | ||
| 737 | struct console_cmdline *c; | ||
| 738 | int i; | ||
| 739 | |||
| 740 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) | ||
| 741 | if (strcmp(console_cmdline[i].name, name) == 0 && | ||
| 742 | console_cmdline[i].index == idx) { | ||
| 743 | c = &console_cmdline[i]; | ||
| 744 | memcpy(c->name, name_new, sizeof(c->name)); | ||
| 745 | c->name[sizeof(c->name) - 1] = 0; | ||
| 746 | c->options = options; | ||
| 747 | c->index = idx_new; | ||
| 748 | return i; | ||
| 749 | } | ||
| 750 | /* not found */ | ||
| 751 | return -1; | ||
| 752 | } | ||
| 753 | |||
| 729 | #ifndef CONFIG_DISABLE_CONSOLE_SUSPEND | 754 | #ifndef CONFIG_DISABLE_CONSOLE_SUSPEND |
| 730 | /** | 755 | /** |
| 731 | * suspend_console - suspend the console subsystem | 756 | * suspend_console - suspend the console subsystem |
| @@ -942,6 +967,9 @@ void register_console(struct console *console) | |||
| 942 | if (preferred_console < 0 || bootconsole || !console_drivers) | 967 | if (preferred_console < 0 || bootconsole || !console_drivers) |
| 943 | preferred_console = selected_console; | 968 | preferred_console = selected_console; |
| 944 | 969 | ||
| 970 | if (console->early_setup) | ||
| 971 | console->early_setup(); | ||
| 972 | |||
| 945 | /* | 973 | /* |
| 946 | * See if we want to use this console driver. If we | 974 | * See if we want to use this console driver. If we |
| 947 | * didn't select a console we take the first one | 975 | * didn't select a console we take the first one |
| @@ -985,12 +1013,15 @@ void register_console(struct console *console) | |||
| 985 | if (!(console->flags & CON_ENABLED)) | 1013 | if (!(console->flags & CON_ENABLED)) |
| 986 | return; | 1014 | return; |
| 987 | 1015 | ||
| 988 | if (bootconsole) { | 1016 | if (bootconsole && (console->flags & CON_CONSDEV)) { |
| 989 | printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n", | 1017 | printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n", |
| 990 | bootconsole->name, bootconsole->index, | 1018 | bootconsole->name, bootconsole->index, |
| 991 | console->name, console->index); | 1019 | console->name, console->index); |
| 992 | unregister_console(bootconsole); | 1020 | unregister_console(bootconsole); |
| 993 | console->flags &= ~CON_PRINTBUFFER; | 1021 | console->flags &= ~CON_PRINTBUFFER; |
| 1022 | } else { | ||
| 1023 | printk(KERN_INFO "console [%s%d] enabled\n", | ||
| 1024 | console->name, console->index); | ||
| 994 | } | 1025 | } |
| 995 | 1026 | ||
| 996 | /* | 1027 | /* |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ad7949a589dd..82a558b655da 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
| @@ -142,7 +142,7 @@ static int may_attach(struct task_struct *task) | |||
| 142 | return -EPERM; | 142 | return -EPERM; |
| 143 | smp_rmb(); | 143 | smp_rmb(); |
| 144 | if (task->mm) | 144 | if (task->mm) |
| 145 | dumpable = task->mm->dumpable; | 145 | dumpable = get_dumpable(task->mm); |
| 146 | if (!dumpable && !capable(CAP_SYS_PTRACE)) | 146 | if (!dumpable && !capable(CAP_SYS_PTRACE)) |
| 147 | return -EPERM; | 147 | return -EPERM; |
| 148 | 148 | ||
| @@ -161,6 +161,7 @@ int ptrace_may_attach(struct task_struct *task) | |||
| 161 | int ptrace_attach(struct task_struct *task) | 161 | int ptrace_attach(struct task_struct *task) |
| 162 | { | 162 | { |
| 163 | int retval; | 163 | int retval; |
| 164 | unsigned long flags; | ||
| 164 | 165 | ||
| 165 | audit_ptrace(task); | 166 | audit_ptrace(task); |
| 166 | 167 | ||
| @@ -181,9 +182,7 @@ repeat: | |||
| 181 | * cpu's that may have task_lock). | 182 | * cpu's that may have task_lock). |
| 182 | */ | 183 | */ |
| 183 | task_lock(task); | 184 | task_lock(task); |
| 184 | local_irq_disable(); | 185 | if (!write_trylock_irqsave(&tasklist_lock, flags)) { |
| 185 | if (!write_trylock(&tasklist_lock)) { | ||
| 186 | local_irq_enable(); | ||
| 187 | task_unlock(task); | 186 | task_unlock(task); |
| 188 | do { | 187 | do { |
| 189 | cpu_relax(); | 188 | cpu_relax(); |
| @@ -211,7 +210,7 @@ repeat: | |||
| 211 | force_sig_specific(SIGSTOP, task); | 210 | force_sig_specific(SIGSTOP, task); |
| 212 | 211 | ||
| 213 | bad: | 212 | bad: |
| 214 | write_unlock_irq(&tasklist_lock); | 213 | write_unlock_irqrestore(&tasklist_lock, flags); |
| 215 | task_unlock(task); | 214 | task_unlock(task); |
| 216 | out: | 215 | out: |
| 217 | return retval; | 216 | return retval; |
| @@ -491,3 +490,22 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) | |||
| 491 | return ret; | 490 | return ret; |
| 492 | } | 491 | } |
| 493 | #endif /* __ARCH_SYS_PTRACE */ | 492 | #endif /* __ARCH_SYS_PTRACE */ |
| 493 | |||
| 494 | int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) | ||
| 495 | { | ||
| 496 | unsigned long tmp; | ||
| 497 | int copied; | ||
| 498 | |||
| 499 | copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), 0); | ||
| 500 | if (copied != sizeof(tmp)) | ||
| 501 | return -EIO; | ||
| 502 | return put_user(tmp, (unsigned long __user *)data); | ||
| 503 | } | ||
| 504 | |||
| 505 | int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) | ||
| 506 | { | ||
| 507 | int copied; | ||
| 508 | |||
| 509 | copied = access_process_vm(tsk, addr, &data, sizeof(data), 1); | ||
| 510 | return (copied == sizeof(data)) ? 0 : -EIO; | ||
| 511 | } | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 55ba82a85a66..ddff33247785 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include <linux/moduleparam.h> | 40 | #include <linux/moduleparam.h> |
| 41 | #include <linux/percpu.h> | 41 | #include <linux/percpu.h> |
| 42 | #include <linux/notifier.h> | 42 | #include <linux/notifier.h> |
| 43 | #include <linux/freezer.h> | ||
| 43 | #include <linux/cpu.h> | 44 | #include <linux/cpu.h> |
| 44 | #include <linux/random.h> | 45 | #include <linux/random.h> |
| 45 | #include <linux/delay.h> | 46 | #include <linux/delay.h> |
| @@ -518,7 +519,6 @@ rcu_torture_writer(void *arg) | |||
| 518 | 519 | ||
| 519 | VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); | 520 | VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); |
| 520 | set_user_nice(current, 19); | 521 | set_user_nice(current, 19); |
| 521 | current->flags |= PF_NOFREEZE; | ||
| 522 | 522 | ||
| 523 | do { | 523 | do { |
| 524 | schedule_timeout_uninterruptible(1); | 524 | schedule_timeout_uninterruptible(1); |
| @@ -558,7 +558,6 @@ rcu_torture_fakewriter(void *arg) | |||
| 558 | 558 | ||
| 559 | VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); | 559 | VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); |
| 560 | set_user_nice(current, 19); | 560 | set_user_nice(current, 19); |
| 561 | current->flags |= PF_NOFREEZE; | ||
| 562 | 561 | ||
| 563 | do { | 562 | do { |
| 564 | schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); | 563 | schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); |
| @@ -589,7 +588,6 @@ rcu_torture_reader(void *arg) | |||
| 589 | 588 | ||
| 590 | VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); | 589 | VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); |
| 591 | set_user_nice(current, 19); | 590 | set_user_nice(current, 19); |
| 592 | current->flags |= PF_NOFREEZE; | ||
| 593 | 591 | ||
| 594 | do { | 592 | do { |
| 595 | idx = cur_ops->readlock(); | 593 | idx = cur_ops->readlock(); |
diff --git a/kernel/relay.c b/kernel/relay.c index a615a8f513fc..510fbbd7b500 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
| @@ -80,7 +80,7 @@ static struct vm_operations_struct relay_file_mmap_ops = { | |||
| 80 | * | 80 | * |
| 81 | * Caller should already have grabbed mmap_sem. | 81 | * Caller should already have grabbed mmap_sem. |
| 82 | */ | 82 | */ |
| 83 | int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) | 83 | static int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) |
| 84 | { | 84 | { |
| 85 | unsigned long length = vma->vm_end - vma->vm_start; | 85 | unsigned long length = vma->vm_end - vma->vm_start; |
| 86 | struct file *filp = vma->vm_file; | 86 | struct file *filp = vma->vm_file; |
| @@ -145,7 +145,7 @@ depopulate: | |||
| 145 | * | 145 | * |
| 146 | * Returns channel buffer if successful, %NULL otherwise. | 146 | * Returns channel buffer if successful, %NULL otherwise. |
| 147 | */ | 147 | */ |
| 148 | struct rchan_buf *relay_create_buf(struct rchan *chan) | 148 | static struct rchan_buf *relay_create_buf(struct rchan *chan) |
| 149 | { | 149 | { |
| 150 | struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); | 150 | struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); |
| 151 | if (!buf) | 151 | if (!buf) |
| @@ -175,7 +175,7 @@ free_buf: | |||
| 175 | * | 175 | * |
| 176 | * Should only be called from kref_put(). | 176 | * Should only be called from kref_put(). |
| 177 | */ | 177 | */ |
| 178 | void relay_destroy_channel(struct kref *kref) | 178 | static void relay_destroy_channel(struct kref *kref) |
| 179 | { | 179 | { |
| 180 | struct rchan *chan = container_of(kref, struct rchan, kref); | 180 | struct rchan *chan = container_of(kref, struct rchan, kref); |
| 181 | kfree(chan); | 181 | kfree(chan); |
| @@ -185,7 +185,7 @@ void relay_destroy_channel(struct kref *kref) | |||
| 185 | * relay_destroy_buf - destroy an rchan_buf struct and associated buffer | 185 | * relay_destroy_buf - destroy an rchan_buf struct and associated buffer |
| 186 | * @buf: the buffer struct | 186 | * @buf: the buffer struct |
| 187 | */ | 187 | */ |
| 188 | void relay_destroy_buf(struct rchan_buf *buf) | 188 | static void relay_destroy_buf(struct rchan_buf *buf) |
| 189 | { | 189 | { |
| 190 | struct rchan *chan = buf->chan; | 190 | struct rchan *chan = buf->chan; |
| 191 | unsigned int i; | 191 | unsigned int i; |
| @@ -210,7 +210,7 @@ void relay_destroy_buf(struct rchan_buf *buf) | |||
| 210 | * rchan_buf_struct and the channel buffer. Should only be called from | 210 | * rchan_buf_struct and the channel buffer. Should only be called from |
| 211 | * kref_put(). | 211 | * kref_put(). |
| 212 | */ | 212 | */ |
| 213 | void relay_remove_buf(struct kref *kref) | 213 | static void relay_remove_buf(struct kref *kref) |
| 214 | { | 214 | { |
| 215 | struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); | 215 | struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); |
| 216 | buf->chan->cb->remove_buf_file(buf->dentry); | 216 | buf->chan->cb->remove_buf_file(buf->dentry); |
| @@ -223,11 +223,10 @@ void relay_remove_buf(struct kref *kref) | |||
| 223 | * | 223 | * |
| 224 | * Returns 1 if the buffer is empty, 0 otherwise. | 224 | * Returns 1 if the buffer is empty, 0 otherwise. |
| 225 | */ | 225 | */ |
| 226 | int relay_buf_empty(struct rchan_buf *buf) | 226 | static int relay_buf_empty(struct rchan_buf *buf) |
| 227 | { | 227 | { |
| 228 | return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1; | 228 | return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1; |
| 229 | } | 229 | } |
| 230 | EXPORT_SYMBOL_GPL(relay_buf_empty); | ||
| 231 | 230 | ||
| 232 | /** | 231 | /** |
| 233 | * relay_buf_full - boolean, is the channel buffer full? | 232 | * relay_buf_full - boolean, is the channel buffer full? |
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index da8d6bf46457..5aedbee014df 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c | |||
| @@ -29,12 +29,6 @@ | |||
| 29 | 29 | ||
| 30 | #include "rtmutex_common.h" | 30 | #include "rtmutex_common.h" |
| 31 | 31 | ||
| 32 | #ifdef CONFIG_DEBUG_RT_MUTEXES | ||
| 33 | # include "rtmutex-debug.h" | ||
| 34 | #else | ||
| 35 | # include "rtmutex.h" | ||
| 36 | #endif | ||
| 37 | |||
| 38 | # define TRACE_WARN_ON(x) WARN_ON(x) | 32 | # define TRACE_WARN_ON(x) WARN_ON(x) |
| 39 | # define TRACE_BUG_ON(x) BUG_ON(x) | 33 | # define TRACE_BUG_ON(x) BUG_ON(x) |
| 40 | 34 | ||
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 015fc633c96c..e3055ba69159 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c | |||
| @@ -260,6 +260,7 @@ static int test_func(void *data) | |||
| 260 | int ret; | 260 | int ret; |
| 261 | 261 | ||
| 262 | current->flags |= PF_MUTEX_TESTER; | 262 | current->flags |= PF_MUTEX_TESTER; |
| 263 | set_freezable(); | ||
| 263 | allow_signal(SIGHUP); | 264 | allow_signal(SIGHUP); |
| 264 | 265 | ||
| 265 | for(;;) { | 266 | for(;;) { |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 17d28ce20300..8cd9bd2cdb34 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
| @@ -17,12 +17,6 @@ | |||
| 17 | 17 | ||
| 18 | #include "rtmutex_common.h" | 18 | #include "rtmutex_common.h" |
| 19 | 19 | ||
| 20 | #ifdef CONFIG_DEBUG_RT_MUTEXES | ||
| 21 | # include "rtmutex-debug.h" | ||
| 22 | #else | ||
| 23 | # include "rtmutex.h" | ||
| 24 | #endif | ||
| 25 | |||
| 26 | /* | 20 | /* |
| 27 | * lock->owner state tracking: | 21 | * lock->owner state tracking: |
| 28 | * | 22 | * |
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h index 9c75856e791e..2d3b83593ca3 100644 --- a/kernel/rtmutex_common.h +++ b/kernel/rtmutex_common.h | |||
| @@ -103,7 +103,7 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) | |||
| 103 | 103 | ||
| 104 | static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock) | 104 | static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock) |
| 105 | { | 105 | { |
| 106 | return (struct task_struct *) | 106 | return (struct task_struct *) |
| 107 | ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); | 107 | ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); |
| 108 | } | 108 | } |
| 109 | 109 | ||
| @@ -120,4 +120,11 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | |||
| 120 | struct task_struct *proxy_owner); | 120 | struct task_struct *proxy_owner); |
| 121 | extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, | 121 | extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, |
| 122 | struct task_struct *proxy_owner); | 122 | struct task_struct *proxy_owner); |
| 123 | |||
| 124 | #ifdef CONFIG_DEBUG_RT_MUTEXES | ||
| 125 | # include "rtmutex-debug.h" | ||
| 126 | #else | ||
| 127 | # include "rtmutex.h" | ||
| 128 | #endif | ||
| 129 | |||
| 123 | #endif | 130 | #endif |
diff --git a/kernel/rwsem.c b/kernel/rwsem.c index 9a87886b022e..1ec620c03064 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c | |||
| @@ -20,7 +20,7 @@ void down_read(struct rw_semaphore *sem) | |||
| 20 | might_sleep(); | 20 | might_sleep(); |
| 21 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | 21 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); |
| 22 | 22 | ||
| 23 | __down_read(sem); | 23 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | EXPORT_SYMBOL(down_read); | 26 | EXPORT_SYMBOL(down_read); |
| @@ -47,7 +47,7 @@ void down_write(struct rw_semaphore *sem) | |||
| 47 | might_sleep(); | 47 | might_sleep(); |
| 48 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | 48 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); |
| 49 | 49 | ||
| 50 | __down_write(sem); | 50 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | EXPORT_SYMBOL(down_write); | 53 | EXPORT_SYMBOL(down_write); |
| @@ -111,7 +111,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) | |||
| 111 | might_sleep(); | 111 | might_sleep(); |
| 112 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); | 112 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); |
| 113 | 113 | ||
| 114 | __down_read(sem); | 114 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | EXPORT_SYMBOL(down_read_nested); | 117 | EXPORT_SYMBOL(down_read_nested); |
| @@ -130,7 +130,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) | |||
| 130 | might_sleep(); | 130 | might_sleep(); |
| 131 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | 131 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); |
| 132 | 132 | ||
| 133 | __down_write_nested(sem, subclass); | 133 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | EXPORT_SYMBOL(down_write_nested); | 136 | EXPORT_SYMBOL(down_write_nested); |
diff --git a/kernel/sched.c b/kernel/sched.c index 3332bbb5d5cf..93cf241cfbe9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -301,7 +301,7 @@ struct rq { | |||
| 301 | struct lock_class_key rq_lock_key; | 301 | struct lock_class_key rq_lock_key; |
| 302 | }; | 302 | }; |
| 303 | 303 | ||
| 304 | static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp; | 304 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
| 305 | static DEFINE_MUTEX(sched_hotcpu_mutex); | 305 | static DEFINE_MUTEX(sched_hotcpu_mutex); |
| 306 | 306 | ||
| 307 | static inline void check_preempt_curr(struct rq *rq, struct task_struct *p) | 307 | static inline void check_preempt_curr(struct rq *rq, struct task_struct *p) |
| @@ -379,6 +379,23 @@ static inline unsigned long long rq_clock(struct rq *rq) | |||
| 379 | #define task_rq(p) cpu_rq(task_cpu(p)) | 379 | #define task_rq(p) cpu_rq(task_cpu(p)) |
| 380 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 380 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
| 381 | 381 | ||
| 382 | /* | ||
| 383 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | ||
| 384 | * clock constructed from sched_clock(): | ||
| 385 | */ | ||
| 386 | unsigned long long cpu_clock(int cpu) | ||
| 387 | { | ||
| 388 | struct rq *rq = cpu_rq(cpu); | ||
| 389 | unsigned long long now; | ||
| 390 | unsigned long flags; | ||
| 391 | |||
| 392 | spin_lock_irqsave(&rq->lock, flags); | ||
| 393 | now = rq_clock(rq); | ||
| 394 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 395 | |||
| 396 | return now; | ||
| 397 | } | ||
| 398 | |||
| 382 | #ifdef CONFIG_FAIR_GROUP_SCHED | 399 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 383 | /* Change a task's ->cfs_rq if it moves across CPUs */ | 400 | /* Change a task's ->cfs_rq if it moves across CPUs */ |
| 384 | static inline void set_task_cfs_rq(struct task_struct *p) | 401 | static inline void set_task_cfs_rq(struct task_struct *p) |
| @@ -736,7 +753,9 @@ static void update_curr_load(struct rq *rq, u64 now) | |||
| 736 | * | 753 | * |
| 737 | * The "10% effect" is relative and cumulative: from _any_ nice level, | 754 | * The "10% effect" is relative and cumulative: from _any_ nice level, |
| 738 | * if you go up 1 level, it's -10% CPU usage, if you go down 1 level | 755 | * if you go up 1 level, it's -10% CPU usage, if you go down 1 level |
| 739 | * it's +10% CPU usage. | 756 | * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25. |
| 757 | * If a task goes up by ~10% and another task goes down by ~10% then | ||
| 758 | * the relative distance between them is ~25%.) | ||
| 740 | */ | 759 | */ |
| 741 | static const int prio_to_weight[40] = { | 760 | static const int prio_to_weight[40] = { |
| 742 | /* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, | 761 | /* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, |
| @@ -746,15 +765,22 @@ static const int prio_to_weight[40] = { | |||
| 746 | /* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, | 765 | /* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, |
| 747 | }; | 766 | }; |
| 748 | 767 | ||
| 768 | /* | ||
| 769 | * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated. | ||
| 770 | * | ||
| 771 | * In cases where the weight does not change often, we can use the | ||
| 772 | * precalculated inverse to speed up arithmetics by turning divisions | ||
| 773 | * into multiplications: | ||
| 774 | */ | ||
| 749 | static const u32 prio_to_wmult[40] = { | 775 | static const u32 prio_to_wmult[40] = { |
| 750 | 48356, 60446, 75558, 94446, 118058, 147573, | 776 | /* -20 */ 48356, 60446, 75558, 94446, 118058, |
| 751 | 184467, 230589, 288233, 360285, 450347, | 777 | /* -15 */ 147573, 184467, 230589, 288233, 360285, |
| 752 | 562979, 703746, 879575, 1099582, 1374389, | 778 | /* -10 */ 450347, 562979, 703746, 879575, 1099582, |
| 753 | 1717986, 2147483, 2684354, 3355443, 4194304, | 779 | /* -5 */ 1374389, 1717986, 2147483, 2684354, 3355443, |
| 754 | 5244160, 6557201, 8196502, 10250518, 12782640, | 780 | /* 0 */ 4194304, 5244160, 6557201, 8196502, 10250518, |
| 755 | 16025997, 19976592, 24970740, 31350126, 39045157, | 781 | /* 5 */ 12782640, 16025997, 19976592, 24970740, 31350126, |
| 756 | 49367440, 61356675, 76695844, 95443717, 119304647, | 782 | /* 10 */ 39045157, 49367440, 61356675, 76695844, 95443717, |
| 757 | 148102320, 186737708, 238609294, 286331153, | 783 | /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, |
| 758 | }; | 784 | }; |
| 759 | 785 | ||
| 760 | static inline void | 786 | static inline void |
| @@ -2226,7 +2252,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2226 | 2252 | ||
| 2227 | rq = cpu_rq(i); | 2253 | rq = cpu_rq(i); |
| 2228 | 2254 | ||
| 2229 | if (*sd_idle && !idle_cpu(i)) | 2255 | if (*sd_idle && rq->nr_running) |
| 2230 | *sd_idle = 0; | 2256 | *sd_idle = 0; |
| 2231 | 2257 | ||
| 2232 | /* Bias balancing toward cpus of our domain */ | 2258 | /* Bias balancing toward cpus of our domain */ |
| @@ -2248,9 +2274,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2248 | /* | 2274 | /* |
| 2249 | * First idle cpu or the first cpu(busiest) in this sched group | 2275 | * First idle cpu or the first cpu(busiest) in this sched group |
| 2250 | * is eligible for doing load balancing at this and above | 2276 | * is eligible for doing load balancing at this and above |
| 2251 | * domains. | 2277 | * domains. In the newly idle case, we will allow all the cpu's |
| 2278 | * to do the newly idle load balance. | ||
| 2252 | */ | 2279 | */ |
| 2253 | if (local_group && balance_cpu != this_cpu && balance) { | 2280 | if (idle != CPU_NEWLY_IDLE && local_group && |
| 2281 | balance_cpu != this_cpu && balance) { | ||
| 2254 | *balance = 0; | 2282 | *balance = 0; |
| 2255 | goto ret; | 2283 | goto ret; |
| 2256 | } | 2284 | } |
| @@ -2668,6 +2696,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
| 2668 | unsigned long imbalance; | 2696 | unsigned long imbalance; |
| 2669 | int nr_moved = 0; | 2697 | int nr_moved = 0; |
| 2670 | int sd_idle = 0; | 2698 | int sd_idle = 0; |
| 2699 | int all_pinned = 0; | ||
| 2671 | cpumask_t cpus = CPU_MASK_ALL; | 2700 | cpumask_t cpus = CPU_MASK_ALL; |
| 2672 | 2701 | ||
| 2673 | /* | 2702 | /* |
| @@ -2706,10 +2735,11 @@ redo: | |||
| 2706 | double_lock_balance(this_rq, busiest); | 2735 | double_lock_balance(this_rq, busiest); |
| 2707 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2736 | nr_moved = move_tasks(this_rq, this_cpu, busiest, |
| 2708 | minus_1_or_zero(busiest->nr_running), | 2737 | minus_1_or_zero(busiest->nr_running), |
| 2709 | imbalance, sd, CPU_NEWLY_IDLE, NULL); | 2738 | imbalance, sd, CPU_NEWLY_IDLE, |
| 2739 | &all_pinned); | ||
| 2710 | spin_unlock(&busiest->lock); | 2740 | spin_unlock(&busiest->lock); |
| 2711 | 2741 | ||
| 2712 | if (!nr_moved) { | 2742 | if (unlikely(all_pinned)) { |
| 2713 | cpu_clear(cpu_of(busiest), cpus); | 2743 | cpu_clear(cpu_of(busiest), cpus); |
| 2714 | if (!cpus_empty(cpus)) | 2744 | if (!cpus_empty(cpus)) |
| 2715 | goto redo; | 2745 | goto redo; |
| @@ -4903,8 +4933,6 @@ static int migration_thread(void *data) | |||
| 4903 | struct migration_req *req; | 4933 | struct migration_req *req; |
| 4904 | struct list_head *head; | 4934 | struct list_head *head; |
| 4905 | 4935 | ||
| 4906 | try_to_freeze(); | ||
| 4907 | |||
| 4908 | spin_lock_irq(&rq->lock); | 4936 | spin_lock_irq(&rq->lock); |
| 4909 | 4937 | ||
| 4910 | if (cpu_is_offline(cpu)) { | 4938 | if (cpu_is_offline(cpu)) { |
| @@ -5138,7 +5166,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 5138 | p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); | 5166 | p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); |
| 5139 | if (IS_ERR(p)) | 5167 | if (IS_ERR(p)) |
| 5140 | return NOTIFY_BAD; | 5168 | return NOTIFY_BAD; |
| 5141 | p->flags |= PF_NOFREEZE; | ||
| 5142 | kthread_bind(p, cpu); | 5169 | kthread_bind(p, cpu); |
| 5143 | /* Must be high prio: stop_machine expects to yield to it. */ | 5170 | /* Must be high prio: stop_machine expects to yield to it. */ |
| 5144 | rq = task_rq_lock(p, &flags); | 5171 | rq = task_rq_lock(p, &flags); |
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index c3391b6020e8..ad64fcb731f2 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
| 11 | 11 | ||
| 12 | /* #define SECCOMP_DEBUG 1 */ | 12 | /* #define SECCOMP_DEBUG 1 */ |
| 13 | #define NR_SECCOMP_MODES 1 | ||
| 13 | 14 | ||
| 14 | /* | 15 | /* |
| 15 | * Secure computing mode 1 allows only read/write/exit/sigreturn. | 16 | * Secure computing mode 1 allows only read/write/exit/sigreturn. |
| @@ -54,3 +55,31 @@ void __secure_computing(int this_syscall) | |||
| 54 | #endif | 55 | #endif |
| 55 | do_exit(SIGKILL); | 56 | do_exit(SIGKILL); |
| 56 | } | 57 | } |
| 58 | |||
| 59 | long prctl_get_seccomp(void) | ||
| 60 | { | ||
| 61 | return current->seccomp.mode; | ||
| 62 | } | ||
| 63 | |||
| 64 | long prctl_set_seccomp(unsigned long seccomp_mode) | ||
| 65 | { | ||
| 66 | long ret; | ||
| 67 | |||
| 68 | /* can set it only once to be even more secure */ | ||
| 69 | ret = -EPERM; | ||
| 70 | if (unlikely(current->seccomp.mode)) | ||
| 71 | goto out; | ||
| 72 | |||
| 73 | ret = -EINVAL; | ||
| 74 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { | ||
| 75 | current->seccomp.mode = seccomp_mode; | ||
| 76 | set_thread_flag(TIF_SECCOMP); | ||
| 77 | #ifdef TIF_NOTSC | ||
| 78 | disable_TSC(); | ||
| 79 | #endif | ||
| 80 | ret = 0; | ||
| 81 | } | ||
| 82 | |||
| 83 | out: | ||
| 84 | return ret; | ||
| 85 | } | ||
diff --git a/kernel/signal.c b/kernel/signal.c index f9405609774e..ef8156a6aad5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -255,6 +255,16 @@ flush_signal_handlers(struct task_struct *t, int force_default) | |||
| 255 | } | 255 | } |
| 256 | } | 256 | } |
| 257 | 257 | ||
| 258 | int unhandled_signal(struct task_struct *tsk, int sig) | ||
| 259 | { | ||
| 260 | if (is_init(tsk)) | ||
| 261 | return 1; | ||
| 262 | if (tsk->ptrace & PT_PTRACED) | ||
| 263 | return 0; | ||
| 264 | return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) || | ||
| 265 | (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL); | ||
| 266 | } | ||
| 267 | |||
| 258 | 268 | ||
| 259 | /* Notify the system that a driver wants to block all signals for this | 269 | /* Notify the system that a driver wants to block all signals for this |
| 260 | * process, and wants to be notified if any signals at all were to be | 270 | * process, and wants to be notified if any signals at all were to be |
| @@ -718,6 +728,37 @@ out_set: | |||
| 718 | #define LEGACY_QUEUE(sigptr, sig) \ | 728 | #define LEGACY_QUEUE(sigptr, sig) \ |
| 719 | (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig))) | 729 | (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig))) |
| 720 | 730 | ||
| 731 | int print_fatal_signals; | ||
| 732 | |||
| 733 | static void print_fatal_signal(struct pt_regs *regs, int signr) | ||
| 734 | { | ||
| 735 | printk("%s/%d: potentially unexpected fatal signal %d.\n", | ||
| 736 | current->comm, current->pid, signr); | ||
| 737 | |||
| 738 | #ifdef __i386__ | ||
| 739 | printk("code at %08lx: ", regs->eip); | ||
| 740 | { | ||
| 741 | int i; | ||
| 742 | for (i = 0; i < 16; i++) { | ||
| 743 | unsigned char insn; | ||
| 744 | |||
| 745 | __get_user(insn, (unsigned char *)(regs->eip + i)); | ||
| 746 | printk("%02x ", insn); | ||
| 747 | } | ||
| 748 | } | ||
| 749 | #endif | ||
| 750 | printk("\n"); | ||
| 751 | show_regs(regs); | ||
| 752 | } | ||
| 753 | |||
| 754 | static int __init setup_print_fatal_signals(char *str) | ||
| 755 | { | ||
| 756 | get_option (&str, &print_fatal_signals); | ||
| 757 | |||
| 758 | return 1; | ||
| 759 | } | ||
| 760 | |||
| 761 | __setup("print-fatal-signals=", setup_print_fatal_signals); | ||
| 721 | 762 | ||
| 722 | static int | 763 | static int |
| 723 | specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) | 764 | specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) |
| @@ -1855,6 +1896,8 @@ relock: | |||
| 1855 | * Anything else is fatal, maybe with a core dump. | 1896 | * Anything else is fatal, maybe with a core dump. |
| 1856 | */ | 1897 | */ |
| 1857 | current->flags |= PF_SIGNALED; | 1898 | current->flags |= PF_SIGNALED; |
| 1899 | if ((signr != SIGKILL) && print_fatal_signals) | ||
| 1900 | print_fatal_signal(regs, signr); | ||
| 1858 | if (sig_kernel_coredump(signr)) { | 1901 | if (sig_kernel_coredump(signr)) { |
| 1859 | /* | 1902 | /* |
| 1860 | * If it was able to dump core, this kills all | 1903 | * If it was able to dump core, this kills all |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 73217a9e2875..0f546ddea43d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/notifier.h> | 14 | #include <linux/notifier.h> |
| 15 | #include <linux/percpu.h> | 15 | #include <linux/percpu.h> |
| 16 | #include <linux/cpu.h> | 16 | #include <linux/cpu.h> |
| 17 | #include <linux/freezer.h> | ||
| 17 | #include <linux/kthread.h> | 18 | #include <linux/kthread.h> |
| 18 | #include <linux/rcupdate.h> | 19 | #include <linux/rcupdate.h> |
| 19 | #include <linux/smp.h> | 20 | #include <linux/smp.h> |
| @@ -488,8 +489,6 @@ void __init softirq_init(void) | |||
| 488 | 489 | ||
| 489 | static int ksoftirqd(void * __bind_cpu) | 490 | static int ksoftirqd(void * __bind_cpu) |
| 490 | { | 491 | { |
| 491 | current->flags |= PF_NOFREEZE; | ||
| 492 | |||
| 493 | set_current_state(TASK_INTERRUPTIBLE); | 492 | set_current_state(TASK_INTERRUPTIBLE); |
| 494 | 493 | ||
| 495 | while (!kthread_should_stop()) { | 494 | while (!kthread_should_stop()) { |
| @@ -614,12 +613,16 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
| 614 | kthread_bind(per_cpu(ksoftirqd, hotcpu), | 613 | kthread_bind(per_cpu(ksoftirqd, hotcpu), |
| 615 | any_online_cpu(cpu_online_map)); | 614 | any_online_cpu(cpu_online_map)); |
| 616 | case CPU_DEAD: | 615 | case CPU_DEAD: |
| 617 | case CPU_DEAD_FROZEN: | 616 | case CPU_DEAD_FROZEN: { |
| 617 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
| 618 | |||
| 618 | p = per_cpu(ksoftirqd, hotcpu); | 619 | p = per_cpu(ksoftirqd, hotcpu); |
| 619 | per_cpu(ksoftirqd, hotcpu) = NULL; | 620 | per_cpu(ksoftirqd, hotcpu) = NULL; |
| 621 | sched_setscheduler(p, SCHED_FIFO, ¶m); | ||
| 620 | kthread_stop(p); | 622 | kthread_stop(p); |
| 621 | takeover_tasklets(hotcpu); | 623 | takeover_tasklets(hotcpu); |
| 622 | break; | 624 | break; |
| 625 | } | ||
| 623 | #endif /* CONFIG_HOTPLUG_CPU */ | 626 | #endif /* CONFIG_HOTPLUG_CPU */ |
| 624 | } | 627 | } |
| 625 | return NOTIFY_OK; | 628 | return NOTIFY_OK; |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 0131e296ffb4..708d4882c0c3 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/cpu.h> | 10 | #include <linux/cpu.h> |
| 11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
| 12 | #include <linux/delay.h> | 12 | #include <linux/delay.h> |
| 13 | #include <linux/freezer.h> | ||
| 13 | #include <linux/kthread.h> | 14 | #include <linux/kthread.h> |
| 14 | #include <linux/notifier.h> | 15 | #include <linux/notifier.h> |
| 15 | #include <linux/module.h> | 16 | #include <linux/module.h> |
| @@ -116,7 +117,6 @@ static int watchdog(void * __bind_cpu) | |||
| 116 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 117 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
| 117 | 118 | ||
| 118 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 119 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
| 119 | current->flags |= PF_NOFREEZE; | ||
| 120 | 120 | ||
| 121 | /* initialize timestamp */ | 121 | /* initialize timestamp */ |
| 122 | touch_softlockup_watchdog(); | 122 | touch_softlockup_watchdog(); |
diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 2c6c2bf85514..cd72424c2662 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c | |||
| @@ -72,7 +72,7 @@ void __lockfunc _read_lock(rwlock_t *lock) | |||
| 72 | { | 72 | { |
| 73 | preempt_disable(); | 73 | preempt_disable(); |
| 74 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); | 74 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); |
| 75 | _raw_read_lock(lock); | 75 | LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); |
| 76 | } | 76 | } |
| 77 | EXPORT_SYMBOL(_read_lock); | 77 | EXPORT_SYMBOL(_read_lock); |
| 78 | 78 | ||
| @@ -88,8 +88,8 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) | |||
| 88 | * _raw_spin_lock_flags() code, because lockdep assumes | 88 | * _raw_spin_lock_flags() code, because lockdep assumes |
| 89 | * that interrupts are not re-enabled during lock-acquire: | 89 | * that interrupts are not re-enabled during lock-acquire: |
| 90 | */ | 90 | */ |
| 91 | #ifdef CONFIG_PROVE_LOCKING | 91 | #ifdef CONFIG_LOCKDEP |
| 92 | _raw_spin_lock(lock); | 92 | LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); |
| 93 | #else | 93 | #else |
| 94 | _raw_spin_lock_flags(lock, &flags); | 94 | _raw_spin_lock_flags(lock, &flags); |
| 95 | #endif | 95 | #endif |
| @@ -102,7 +102,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock) | |||
| 102 | local_irq_disable(); | 102 | local_irq_disable(); |
| 103 | preempt_disable(); | 103 | preempt_disable(); |
| 104 | spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 104 | spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
| 105 | _raw_spin_lock(lock); | 105 | LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); |
| 106 | } | 106 | } |
| 107 | EXPORT_SYMBOL(_spin_lock_irq); | 107 | EXPORT_SYMBOL(_spin_lock_irq); |
| 108 | 108 | ||
| @@ -111,7 +111,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock) | |||
| 111 | local_bh_disable(); | 111 | local_bh_disable(); |
| 112 | preempt_disable(); | 112 | preempt_disable(); |
| 113 | spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 113 | spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
| 114 | _raw_spin_lock(lock); | 114 | LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); |
| 115 | } | 115 | } |
| 116 | EXPORT_SYMBOL(_spin_lock_bh); | 116 | EXPORT_SYMBOL(_spin_lock_bh); |
| 117 | 117 | ||
| @@ -122,7 +122,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) | |||
| 122 | local_irq_save(flags); | 122 | local_irq_save(flags); |
| 123 | preempt_disable(); | 123 | preempt_disable(); |
| 124 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); | 124 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); |
| 125 | _raw_read_lock(lock); | 125 | LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); |
| 126 | return flags; | 126 | return flags; |
| 127 | } | 127 | } |
| 128 | EXPORT_SYMBOL(_read_lock_irqsave); | 128 | EXPORT_SYMBOL(_read_lock_irqsave); |
| @@ -132,7 +132,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock) | |||
| 132 | local_irq_disable(); | 132 | local_irq_disable(); |
| 133 | preempt_disable(); | 133 | preempt_disable(); |
| 134 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); | 134 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); |
| 135 | _raw_read_lock(lock); | 135 | LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); |
| 136 | } | 136 | } |
| 137 | EXPORT_SYMBOL(_read_lock_irq); | 137 | EXPORT_SYMBOL(_read_lock_irq); |
| 138 | 138 | ||
| @@ -141,7 +141,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock) | |||
| 141 | local_bh_disable(); | 141 | local_bh_disable(); |
| 142 | preempt_disable(); | 142 | preempt_disable(); |
| 143 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); | 143 | rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); |
| 144 | _raw_read_lock(lock); | 144 | LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); |
| 145 | } | 145 | } |
| 146 | EXPORT_SYMBOL(_read_lock_bh); | 146 | EXPORT_SYMBOL(_read_lock_bh); |
| 147 | 147 | ||
| @@ -152,7 +152,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) | |||
| 152 | local_irq_save(flags); | 152 | local_irq_save(flags); |
| 153 | preempt_disable(); | 153 | preempt_disable(); |
| 154 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 154 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
| 155 | _raw_write_lock(lock); | 155 | LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); |
| 156 | return flags; | 156 | return flags; |
| 157 | } | 157 | } |
| 158 | EXPORT_SYMBOL(_write_lock_irqsave); | 158 | EXPORT_SYMBOL(_write_lock_irqsave); |
| @@ -162,7 +162,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock) | |||
| 162 | local_irq_disable(); | 162 | local_irq_disable(); |
| 163 | preempt_disable(); | 163 | preempt_disable(); |
| 164 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 164 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
| 165 | _raw_write_lock(lock); | 165 | LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); |
| 166 | } | 166 | } |
| 167 | EXPORT_SYMBOL(_write_lock_irq); | 167 | EXPORT_SYMBOL(_write_lock_irq); |
| 168 | 168 | ||
| @@ -171,7 +171,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock) | |||
| 171 | local_bh_disable(); | 171 | local_bh_disable(); |
| 172 | preempt_disable(); | 172 | preempt_disable(); |
| 173 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 173 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
| 174 | _raw_write_lock(lock); | 174 | LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); |
| 175 | } | 175 | } |
| 176 | EXPORT_SYMBOL(_write_lock_bh); | 176 | EXPORT_SYMBOL(_write_lock_bh); |
| 177 | 177 | ||
| @@ -179,7 +179,7 @@ void __lockfunc _spin_lock(spinlock_t *lock) | |||
| 179 | { | 179 | { |
| 180 | preempt_disable(); | 180 | preempt_disable(); |
| 181 | spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 181 | spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
| 182 | _raw_spin_lock(lock); | 182 | LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); |
| 183 | } | 183 | } |
| 184 | 184 | ||
| 185 | EXPORT_SYMBOL(_spin_lock); | 185 | EXPORT_SYMBOL(_spin_lock); |
| @@ -188,7 +188,7 @@ void __lockfunc _write_lock(rwlock_t *lock) | |||
| 188 | { | 188 | { |
| 189 | preempt_disable(); | 189 | preempt_disable(); |
| 190 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); | 190 | rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
| 191 | _raw_write_lock(lock); | 191 | LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); |
| 192 | } | 192 | } |
| 193 | 193 | ||
| 194 | EXPORT_SYMBOL(_write_lock); | 194 | EXPORT_SYMBOL(_write_lock); |
| @@ -289,7 +289,7 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) | |||
| 289 | { | 289 | { |
| 290 | preempt_disable(); | 290 | preempt_disable(); |
| 291 | spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | 291 | spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); |
| 292 | _raw_spin_lock(lock); | 292 | LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); |
| 293 | } | 293 | } |
| 294 | 294 | ||
| 295 | EXPORT_SYMBOL(_spin_lock_nested); | 295 | EXPORT_SYMBOL(_spin_lock_nested); |
| @@ -305,8 +305,8 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclas | |||
| 305 | * _raw_spin_lock_flags() code, because lockdep assumes | 305 | * _raw_spin_lock_flags() code, because lockdep assumes |
| 306 | * that interrupts are not re-enabled during lock-acquire: | 306 | * that interrupts are not re-enabled during lock-acquire: |
| 307 | */ | 307 | */ |
| 308 | #ifdef CONFIG_PROVE_SPIN_LOCKING | 308 | #ifdef CONFIG_LOCKDEP |
| 309 | _raw_spin_lock(lock); | 309 | LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); |
| 310 | #else | 310 | #else |
| 311 | _raw_spin_lock_flags(lock, &flags); | 311 | _raw_spin_lock_flags(lock, &flags); |
| 312 | #endif | 312 | #endif |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index fcee2a8e6da3..319821ef78af 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -93,10 +93,6 @@ static void stopmachine_set_state(enum stopmachine_state state) | |||
| 93 | static int stop_machine(void) | 93 | static int stop_machine(void) |
| 94 | { | 94 | { |
| 95 | int i, ret = 0; | 95 | int i, ret = 0; |
| 96 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
| 97 | |||
| 98 | /* One high-prio thread per cpu. We'll do this one. */ | ||
| 99 | sched_setscheduler(current, SCHED_FIFO, ¶m); | ||
| 100 | 96 | ||
| 101 | atomic_set(&stopmachine_thread_ack, 0); | 97 | atomic_set(&stopmachine_thread_ack, 0); |
| 102 | stopmachine_num_threads = 0; | 98 | stopmachine_num_threads = 0; |
| @@ -189,6 +185,10 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, | |||
| 189 | 185 | ||
| 190 | p = kthread_create(do_stop, &smdata, "kstopmachine"); | 186 | p = kthread_create(do_stop, &smdata, "kstopmachine"); |
| 191 | if (!IS_ERR(p)) { | 187 | if (!IS_ERR(p)) { |
| 188 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
| 189 | |||
| 190 | /* One high-prio thread per cpu. We'll do this one. */ | ||
| 191 | sched_setscheduler(p, SCHED_FIFO, ¶m); | ||
| 192 | kthread_bind(p, cpu); | 192 | kthread_bind(p, cpu); |
| 193 | wake_up_process(p); | 193 | wake_up_process(p); |
| 194 | wait_for_completion(&smdata.done); | 194 | wait_for_completion(&smdata.done); |
diff --git a/kernel/sys.c b/kernel/sys.c index 872271ccc384..08562f419768 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -31,10 +31,12 @@ | |||
| 31 | #include <linux/cn_proc.h> | 31 | #include <linux/cn_proc.h> |
| 32 | #include <linux/getcpu.h> | 32 | #include <linux/getcpu.h> |
| 33 | #include <linux/task_io_accounting_ops.h> | 33 | #include <linux/task_io_accounting_ops.h> |
| 34 | #include <linux/seccomp.h> | ||
| 34 | 35 | ||
| 35 | #include <linux/compat.h> | 36 | #include <linux/compat.h> |
| 36 | #include <linux/syscalls.h> | 37 | #include <linux/syscalls.h> |
| 37 | #include <linux/kprobes.h> | 38 | #include <linux/kprobes.h> |
| 39 | #include <linux/user_namespace.h> | ||
| 38 | 40 | ||
| 39 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
| 40 | #include <asm/io.h> | 42 | #include <asm/io.h> |
| @@ -98,6 +100,13 @@ struct pid *cad_pid; | |||
| 98 | EXPORT_SYMBOL(cad_pid); | 100 | EXPORT_SYMBOL(cad_pid); |
| 99 | 101 | ||
| 100 | /* | 102 | /* |
| 103 | * If set, this is used for preparing the system to power off. | ||
| 104 | */ | ||
| 105 | |||
| 106 | void (*pm_power_off_prepare)(void); | ||
| 107 | EXPORT_SYMBOL(pm_power_off_prepare); | ||
| 108 | |||
| 109 | /* | ||
| 101 | * Notifier list for kernel code which wants to be called | 110 | * Notifier list for kernel code which wants to be called |
| 102 | * at shutdown. This is used to stop any idling DMA operations | 111 | * at shutdown. This is used to stop any idling DMA operations |
| 103 | * and the like. | 112 | * and the like. |
| @@ -865,6 +874,8 @@ EXPORT_SYMBOL_GPL(kernel_halt); | |||
| 865 | void kernel_power_off(void) | 874 | void kernel_power_off(void) |
| 866 | { | 875 | { |
| 867 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); | 876 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); |
| 877 | if (pm_power_off_prepare) | ||
| 878 | pm_power_off_prepare(); | ||
| 868 | printk(KERN_EMERG "Power down.\n"); | 879 | printk(KERN_EMERG "Power down.\n"); |
| 869 | machine_power_off(); | 880 | machine_power_off(); |
| 870 | } | 881 | } |
| @@ -1025,7 +1036,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) | |||
| 1025 | return -EPERM; | 1036 | return -EPERM; |
| 1026 | } | 1037 | } |
| 1027 | if (new_egid != old_egid) { | 1038 | if (new_egid != old_egid) { |
| 1028 | current->mm->dumpable = suid_dumpable; | 1039 | set_dumpable(current->mm, suid_dumpable); |
| 1029 | smp_wmb(); | 1040 | smp_wmb(); |
| 1030 | } | 1041 | } |
| 1031 | if (rgid != (gid_t) -1 || | 1042 | if (rgid != (gid_t) -1 || |
| @@ -1055,13 +1066,13 @@ asmlinkage long sys_setgid(gid_t gid) | |||
| 1055 | 1066 | ||
| 1056 | if (capable(CAP_SETGID)) { | 1067 | if (capable(CAP_SETGID)) { |
| 1057 | if (old_egid != gid) { | 1068 | if (old_egid != gid) { |
| 1058 | current->mm->dumpable = suid_dumpable; | 1069 | set_dumpable(current->mm, suid_dumpable); |
| 1059 | smp_wmb(); | 1070 | smp_wmb(); |
| 1060 | } | 1071 | } |
| 1061 | current->gid = current->egid = current->sgid = current->fsgid = gid; | 1072 | current->gid = current->egid = current->sgid = current->fsgid = gid; |
| 1062 | } else if ((gid == current->gid) || (gid == current->sgid)) { | 1073 | } else if ((gid == current->gid) || (gid == current->sgid)) { |
| 1063 | if (old_egid != gid) { | 1074 | if (old_egid != gid) { |
| 1064 | current->mm->dumpable = suid_dumpable; | 1075 | set_dumpable(current->mm, suid_dumpable); |
| 1065 | smp_wmb(); | 1076 | smp_wmb(); |
| 1066 | } | 1077 | } |
| 1067 | current->egid = current->fsgid = gid; | 1078 | current->egid = current->fsgid = gid; |
| @@ -1078,13 +1089,13 @@ static int set_user(uid_t new_ruid, int dumpclear) | |||
| 1078 | { | 1089 | { |
| 1079 | struct user_struct *new_user; | 1090 | struct user_struct *new_user; |
| 1080 | 1091 | ||
| 1081 | new_user = alloc_uid(new_ruid); | 1092 | new_user = alloc_uid(current->nsproxy->user_ns, new_ruid); |
| 1082 | if (!new_user) | 1093 | if (!new_user) |
| 1083 | return -EAGAIN; | 1094 | return -EAGAIN; |
| 1084 | 1095 | ||
| 1085 | if (atomic_read(&new_user->processes) >= | 1096 | if (atomic_read(&new_user->processes) >= |
| 1086 | current->signal->rlim[RLIMIT_NPROC].rlim_cur && | 1097 | current->signal->rlim[RLIMIT_NPROC].rlim_cur && |
| 1087 | new_user != &root_user) { | 1098 | new_user != current->nsproxy->user_ns->root_user) { |
| 1088 | free_uid(new_user); | 1099 | free_uid(new_user); |
| 1089 | return -EAGAIN; | 1100 | return -EAGAIN; |
| 1090 | } | 1101 | } |
| @@ -1092,7 +1103,7 @@ static int set_user(uid_t new_ruid, int dumpclear) | |||
| 1092 | switch_uid(new_user); | 1103 | switch_uid(new_user); |
| 1093 | 1104 | ||
| 1094 | if (dumpclear) { | 1105 | if (dumpclear) { |
| 1095 | current->mm->dumpable = suid_dumpable; | 1106 | set_dumpable(current->mm, suid_dumpable); |
| 1096 | smp_wmb(); | 1107 | smp_wmb(); |
| 1097 | } | 1108 | } |
| 1098 | current->uid = new_ruid; | 1109 | current->uid = new_ruid; |
| @@ -1148,7 +1159,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) | |||
| 1148 | return -EAGAIN; | 1159 | return -EAGAIN; |
| 1149 | 1160 | ||
| 1150 | if (new_euid != old_euid) { | 1161 | if (new_euid != old_euid) { |
| 1151 | current->mm->dumpable = suid_dumpable; | 1162 | set_dumpable(current->mm, suid_dumpable); |
| 1152 | smp_wmb(); | 1163 | smp_wmb(); |
| 1153 | } | 1164 | } |
| 1154 | current->fsuid = current->euid = new_euid; | 1165 | current->fsuid = current->euid = new_euid; |
| @@ -1198,7 +1209,7 @@ asmlinkage long sys_setuid(uid_t uid) | |||
| 1198 | return -EPERM; | 1209 | return -EPERM; |
| 1199 | 1210 | ||
| 1200 | if (old_euid != uid) { | 1211 | if (old_euid != uid) { |
| 1201 | current->mm->dumpable = suid_dumpable; | 1212 | set_dumpable(current->mm, suid_dumpable); |
| 1202 | smp_wmb(); | 1213 | smp_wmb(); |
| 1203 | } | 1214 | } |
| 1204 | current->fsuid = current->euid = uid; | 1215 | current->fsuid = current->euid = uid; |
| @@ -1243,7 +1254,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) | |||
| 1243 | } | 1254 | } |
| 1244 | if (euid != (uid_t) -1) { | 1255 | if (euid != (uid_t) -1) { |
| 1245 | if (euid != current->euid) { | 1256 | if (euid != current->euid) { |
| 1246 | current->mm->dumpable = suid_dumpable; | 1257 | set_dumpable(current->mm, suid_dumpable); |
| 1247 | smp_wmb(); | 1258 | smp_wmb(); |
| 1248 | } | 1259 | } |
| 1249 | current->euid = euid; | 1260 | current->euid = euid; |
| @@ -1293,7 +1304,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) | |||
| 1293 | } | 1304 | } |
| 1294 | if (egid != (gid_t) -1) { | 1305 | if (egid != (gid_t) -1) { |
| 1295 | if (egid != current->egid) { | 1306 | if (egid != current->egid) { |
| 1296 | current->mm->dumpable = suid_dumpable; | 1307 | set_dumpable(current->mm, suid_dumpable); |
| 1297 | smp_wmb(); | 1308 | smp_wmb(); |
| 1298 | } | 1309 | } |
| 1299 | current->egid = egid; | 1310 | current->egid = egid; |
| @@ -1339,7 +1350,7 @@ asmlinkage long sys_setfsuid(uid_t uid) | |||
| 1339 | uid == current->suid || uid == current->fsuid || | 1350 | uid == current->suid || uid == current->fsuid || |
| 1340 | capable(CAP_SETUID)) { | 1351 | capable(CAP_SETUID)) { |
| 1341 | if (uid != old_fsuid) { | 1352 | if (uid != old_fsuid) { |
| 1342 | current->mm->dumpable = suid_dumpable; | 1353 | set_dumpable(current->mm, suid_dumpable); |
| 1343 | smp_wmb(); | 1354 | smp_wmb(); |
| 1344 | } | 1355 | } |
| 1345 | current->fsuid = uid; | 1356 | current->fsuid = uid; |
| @@ -1368,7 +1379,7 @@ asmlinkage long sys_setfsgid(gid_t gid) | |||
| 1368 | gid == current->sgid || gid == current->fsgid || | 1379 | gid == current->sgid || gid == current->fsgid || |
| 1369 | capable(CAP_SETGID)) { | 1380 | capable(CAP_SETGID)) { |
| 1370 | if (gid != old_fsgid) { | 1381 | if (gid != old_fsgid) { |
| 1371 | current->mm->dumpable = suid_dumpable; | 1382 | set_dumpable(current->mm, suid_dumpable); |
| 1372 | smp_wmb(); | 1383 | smp_wmb(); |
| 1373 | } | 1384 | } |
| 1374 | current->fsgid = gid; | 1385 | current->fsgid = gid; |
| @@ -2165,14 +2176,14 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
| 2165 | error = put_user(current->pdeath_signal, (int __user *)arg2); | 2176 | error = put_user(current->pdeath_signal, (int __user *)arg2); |
| 2166 | break; | 2177 | break; |
| 2167 | case PR_GET_DUMPABLE: | 2178 | case PR_GET_DUMPABLE: |
| 2168 | error = current->mm->dumpable; | 2179 | error = get_dumpable(current->mm); |
| 2169 | break; | 2180 | break; |
| 2170 | case PR_SET_DUMPABLE: | 2181 | case PR_SET_DUMPABLE: |
| 2171 | if (arg2 < 0 || arg2 > 1) { | 2182 | if (arg2 < 0 || arg2 > 1) { |
| 2172 | error = -EINVAL; | 2183 | error = -EINVAL; |
| 2173 | break; | 2184 | break; |
| 2174 | } | 2185 | } |
| 2175 | current->mm->dumpable = arg2; | 2186 | set_dumpable(current->mm, arg2); |
| 2176 | break; | 2187 | break; |
| 2177 | 2188 | ||
| 2178 | case PR_SET_UNALIGN: | 2189 | case PR_SET_UNALIGN: |
| @@ -2241,6 +2252,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
| 2241 | error = SET_ENDIAN(current, arg2); | 2252 | error = SET_ENDIAN(current, arg2); |
| 2242 | break; | 2253 | break; |
| 2243 | 2254 | ||
| 2255 | case PR_GET_SECCOMP: | ||
| 2256 | error = prctl_get_seccomp(); | ||
| 2257 | break; | ||
| 2258 | case PR_SET_SECCOMP: | ||
| 2259 | error = prctl_set_seccomp(arg2); | ||
| 2260 | break; | ||
| 2261 | |||
| 2244 | default: | 2262 | default: |
| 2245 | error = -EINVAL; | 2263 | error = -EINVAL; |
| 2246 | break; | 2264 | break; |
| @@ -2277,3 +2295,61 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, | |||
| 2277 | } | 2295 | } |
| 2278 | return err ? -EFAULT : 0; | 2296 | return err ? -EFAULT : 0; |
| 2279 | } | 2297 | } |
| 2298 | |||
| 2299 | char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; | ||
| 2300 | |||
| 2301 | static void argv_cleanup(char **argv, char **envp) | ||
| 2302 | { | ||
| 2303 | argv_free(argv); | ||
| 2304 | } | ||
| 2305 | |||
| 2306 | /** | ||
| 2307 | * orderly_poweroff - Trigger an orderly system poweroff | ||
| 2308 | * @force: force poweroff if command execution fails | ||
| 2309 | * | ||
| 2310 | * This may be called from any context to trigger a system shutdown. | ||
| 2311 | * If the orderly shutdown fails, it will force an immediate shutdown. | ||
| 2312 | */ | ||
| 2313 | int orderly_poweroff(bool force) | ||
| 2314 | { | ||
| 2315 | int argc; | ||
| 2316 | char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); | ||
| 2317 | static char *envp[] = { | ||
| 2318 | "HOME=/", | ||
| 2319 | "PATH=/sbin:/bin:/usr/sbin:/usr/bin", | ||
| 2320 | NULL | ||
| 2321 | }; | ||
| 2322 | int ret = -ENOMEM; | ||
| 2323 | struct subprocess_info *info; | ||
| 2324 | |||
| 2325 | if (argv == NULL) { | ||
| 2326 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", | ||
| 2327 | __func__, poweroff_cmd); | ||
| 2328 | goto out; | ||
| 2329 | } | ||
| 2330 | |||
| 2331 | info = call_usermodehelper_setup(argv[0], argv, envp); | ||
| 2332 | if (info == NULL) { | ||
| 2333 | argv_free(argv); | ||
| 2334 | goto out; | ||
| 2335 | } | ||
| 2336 | |||
| 2337 | call_usermodehelper_setcleanup(info, argv_cleanup); | ||
| 2338 | |||
| 2339 | ret = call_usermodehelper_exec(info, UMH_NO_WAIT); | ||
| 2340 | |||
| 2341 | out: | ||
| 2342 | if (ret && force) { | ||
| 2343 | printk(KERN_WARNING "Failed to start orderly shutdown: " | ||
| 2344 | "forcing the issue\n"); | ||
| 2345 | |||
| 2346 | /* I guess this should try to kick off some daemon to | ||
| 2347 | sync and poweroff asap. Or not even bother syncing | ||
| 2348 | if we're doing an emergency shutdown? */ | ||
| 2349 | emergency_sync(); | ||
| 2350 | kernel_power_off(); | ||
| 2351 | } | ||
| 2352 | |||
| 2353 | return ret; | ||
| 2354 | } | ||
| 2355 | EXPORT_SYMBOL_GPL(orderly_poweroff); | ||
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 7e11e2c98bf9..b0ec498a18d9 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
| @@ -14,6 +14,7 @@ asmlinkage long sys_ni_syscall(void) | |||
| 14 | 14 | ||
| 15 | cond_syscall(sys_nfsservctl); | 15 | cond_syscall(sys_nfsservctl); |
| 16 | cond_syscall(sys_quotactl); | 16 | cond_syscall(sys_quotactl); |
| 17 | cond_syscall(sys32_quotactl); | ||
| 17 | cond_syscall(sys_acct); | 18 | cond_syscall(sys_acct); |
| 18 | cond_syscall(sys_lookup_dcookie); | 19 | cond_syscall(sys_lookup_dcookie); |
| 19 | cond_syscall(sys_swapon); | 20 | cond_syscall(sys_swapon); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d93e13d93f24..ddebf3f2affe 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/utsname.h> | 29 | #include <linux/utsname.h> |
| 30 | #include <linux/capability.h> | 30 | #include <linux/capability.h> |
| 31 | #include <linux/smp_lock.h> | 31 | #include <linux/smp_lock.h> |
| 32 | #include <linux/fs.h> | ||
| 32 | #include <linux/init.h> | 33 | #include <linux/init.h> |
| 33 | #include <linux/kernel.h> | 34 | #include <linux/kernel.h> |
| 34 | #include <linux/kobject.h> | 35 | #include <linux/kobject.h> |
| @@ -45,13 +46,11 @@ | |||
| 45 | #include <linux/syscalls.h> | 46 | #include <linux/syscalls.h> |
| 46 | #include <linux/nfs_fs.h> | 47 | #include <linux/nfs_fs.h> |
| 47 | #include <linux/acpi.h> | 48 | #include <linux/acpi.h> |
| 49 | #include <linux/reboot.h> | ||
| 48 | 50 | ||
| 49 | #include <asm/uaccess.h> | 51 | #include <asm/uaccess.h> |
| 50 | #include <asm/processor.h> | 52 | #include <asm/processor.h> |
| 51 | 53 | ||
| 52 | extern int proc_nr_files(ctl_table *table, int write, struct file *filp, | ||
| 53 | void __user *buffer, size_t *lenp, loff_t *ppos); | ||
| 54 | |||
| 55 | #ifdef CONFIG_X86 | 54 | #ifdef CONFIG_X86 |
| 56 | #include <asm/nmi.h> | 55 | #include <asm/nmi.h> |
| 57 | #include <asm/stacktrace.h> | 56 | #include <asm/stacktrace.h> |
| @@ -61,6 +60,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, | |||
| 61 | 60 | ||
| 62 | /* External variables not in a header file. */ | 61 | /* External variables not in a header file. */ |
| 63 | extern int C_A_D; | 62 | extern int C_A_D; |
| 63 | extern int print_fatal_signals; | ||
| 64 | extern int sysctl_overcommit_memory; | 64 | extern int sysctl_overcommit_memory; |
| 65 | extern int sysctl_overcommit_ratio; | 65 | extern int sysctl_overcommit_ratio; |
| 66 | extern int sysctl_panic_on_oom; | 66 | extern int sysctl_panic_on_oom; |
| @@ -78,6 +78,7 @@ extern int percpu_pagelist_fraction; | |||
| 78 | extern int compat_log; | 78 | extern int compat_log; |
| 79 | extern int maps_protect; | 79 | extern int maps_protect; |
| 80 | extern int sysctl_stat_interval; | 80 | extern int sysctl_stat_interval; |
| 81 | extern int audit_argv_kb; | ||
| 81 | 82 | ||
| 82 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ | 83 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ |
| 83 | static int maxolduid = 65535; | 84 | static int maxolduid = 65535; |
| @@ -160,6 +161,8 @@ extern ctl_table inotify_table[]; | |||
| 160 | int sysctl_legacy_va_layout; | 161 | int sysctl_legacy_va_layout; |
| 161 | #endif | 162 | #endif |
| 162 | 163 | ||
| 164 | extern int prove_locking; | ||
| 165 | extern int lock_stat; | ||
| 163 | 166 | ||
| 164 | /* The default sysctl tables: */ | 167 | /* The default sysctl tables: */ |
| 165 | 168 | ||
| @@ -202,7 +205,10 @@ static ctl_table root_table[] = { | |||
| 202 | .mode = 0555, | 205 | .mode = 0555, |
| 203 | .child = dev_table, | 206 | .child = dev_table, |
| 204 | }, | 207 | }, |
| 205 | 208 | /* | |
| 209 | * NOTE: do not add new entries to this table unless you have read | ||
| 210 | * Documentation/sysctl/ctl_unnumbered.txt | ||
| 211 | */ | ||
| 206 | { .ctl_name = 0 } | 212 | { .ctl_name = 0 } |
| 207 | }; | 213 | }; |
| 208 | 214 | ||
| @@ -278,6 +284,26 @@ static ctl_table kern_table[] = { | |||
| 278 | .mode = 0644, | 284 | .mode = 0644, |
| 279 | .proc_handler = &proc_dointvec, | 285 | .proc_handler = &proc_dointvec, |
| 280 | }, | 286 | }, |
| 287 | #ifdef CONFIG_PROVE_LOCKING | ||
| 288 | { | ||
| 289 | .ctl_name = CTL_UNNUMBERED, | ||
| 290 | .procname = "prove_locking", | ||
| 291 | .data = &prove_locking, | ||
| 292 | .maxlen = sizeof(int), | ||
| 293 | .mode = 0644, | ||
| 294 | .proc_handler = &proc_dointvec, | ||
| 295 | }, | ||
| 296 | #endif | ||
| 297 | #ifdef CONFIG_LOCK_STAT | ||
| 298 | { | ||
| 299 | .ctl_name = CTL_UNNUMBERED, | ||
| 300 | .procname = "lock_stat", | ||
| 301 | .data = &lock_stat, | ||
| 302 | .maxlen = sizeof(int), | ||
| 303 | .mode = 0644, | ||
| 304 | .proc_handler = &proc_dointvec, | ||
| 305 | }, | ||
| 306 | #endif | ||
| 281 | { | 307 | { |
| 282 | .ctl_name = CTL_UNNUMBERED, | 308 | .ctl_name = CTL_UNNUMBERED, |
| 283 | .procname = "sched_features", | 309 | .procname = "sched_features", |
| @@ -303,6 +329,16 @@ static ctl_table kern_table[] = { | |||
| 303 | .mode = 0644, | 329 | .mode = 0644, |
| 304 | .proc_handler = &proc_dointvec, | 330 | .proc_handler = &proc_dointvec, |
| 305 | }, | 331 | }, |
| 332 | #ifdef CONFIG_AUDITSYSCALL | ||
| 333 | { | ||
| 334 | .ctl_name = CTL_UNNUMBERED, | ||
| 335 | .procname = "audit_argv_kb", | ||
| 336 | .data = &audit_argv_kb, | ||
| 337 | .maxlen = sizeof(int), | ||
| 338 | .mode = 0644, | ||
| 339 | .proc_handler = &proc_dointvec, | ||
| 340 | }, | ||
| 341 | #endif | ||
| 306 | { | 342 | { |
| 307 | .ctl_name = KERN_CORE_PATTERN, | 343 | .ctl_name = KERN_CORE_PATTERN, |
| 308 | .procname = "core_pattern", | 344 | .procname = "core_pattern", |
| @@ -340,6 +376,14 @@ static ctl_table kern_table[] = { | |||
| 340 | .proc_handler = &proc_dointvec, | 376 | .proc_handler = &proc_dointvec, |
| 341 | }, | 377 | }, |
| 342 | #endif | 378 | #endif |
| 379 | { | ||
| 380 | .ctl_name = CTL_UNNUMBERED, | ||
| 381 | .procname = "print-fatal-signals", | ||
| 382 | .data = &print_fatal_signals, | ||
| 383 | .maxlen = sizeof(int), | ||
| 384 | .mode = 0644, | ||
| 385 | .proc_handler = &proc_dointvec, | ||
| 386 | }, | ||
| 343 | #ifdef __sparc__ | 387 | #ifdef __sparc__ |
| 344 | { | 388 | { |
| 345 | .ctl_name = KERN_SPARC_REBOOT, | 389 | .ctl_name = KERN_SPARC_REBOOT, |
| @@ -649,7 +693,7 @@ static ctl_table kern_table[] = { | |||
| 649 | { | 693 | { |
| 650 | .ctl_name = KERN_ACPI_VIDEO_FLAGS, | 694 | .ctl_name = KERN_ACPI_VIDEO_FLAGS, |
| 651 | .procname = "acpi_video_flags", | 695 | .procname = "acpi_video_flags", |
| 652 | .data = &acpi_video_flags, | 696 | .data = &acpi_realmode_flags, |
| 653 | .maxlen = sizeof (unsigned long), | 697 | .maxlen = sizeof (unsigned long), |
| 654 | .mode = 0644, | 698 | .mode = 0644, |
| 655 | .proc_handler = &proc_doulongvec_minmax, | 699 | .proc_handler = &proc_doulongvec_minmax, |
| @@ -695,13 +739,26 @@ static ctl_table kern_table[] = { | |||
| 695 | .proc_handler = &proc_dointvec, | 739 | .proc_handler = &proc_dointvec, |
| 696 | }, | 740 | }, |
| 697 | #endif | 741 | #endif |
| 698 | 742 | { | |
| 743 | .ctl_name = CTL_UNNUMBERED, | ||
| 744 | .procname = "poweroff_cmd", | ||
| 745 | .data = &poweroff_cmd, | ||
| 746 | .maxlen = POWEROFF_CMD_PATH_LEN, | ||
| 747 | .mode = 0644, | ||
| 748 | .proc_handler = &proc_dostring, | ||
| 749 | .strategy = &sysctl_string, | ||
| 750 | }, | ||
| 751 | /* | ||
| 752 | * NOTE: do not add new entries to this table unless you have read | ||
| 753 | * Documentation/sysctl/ctl_unnumbered.txt | ||
| 754 | */ | ||
| 699 | { .ctl_name = 0 } | 755 | { .ctl_name = 0 } |
| 700 | }; | 756 | }; |
| 701 | 757 | ||
| 702 | /* Constants for minimum and maximum testing in vm_table. | 758 | /* Constants for minimum and maximum testing in vm_table. |
| 703 | We use these as one-element integer vectors. */ | 759 | We use these as one-element integer vectors. */ |
| 704 | static int zero; | 760 | static int zero; |
| 761 | static int two = 2; | ||
| 705 | static int one_hundred = 100; | 762 | static int one_hundred = 100; |
| 706 | 763 | ||
| 707 | 764 | ||
| @@ -814,6 +871,14 @@ static ctl_table vm_table[] = { | |||
| 814 | .mode = 0644, | 871 | .mode = 0644, |
| 815 | .proc_handler = &proc_dointvec, | 872 | .proc_handler = &proc_dointvec, |
| 816 | }, | 873 | }, |
| 874 | { | ||
| 875 | .ctl_name = CTL_UNNUMBERED, | ||
| 876 | .procname = "hugepages_treat_as_movable", | ||
| 877 | .data = &hugepages_treat_as_movable, | ||
| 878 | .maxlen = sizeof(int), | ||
| 879 | .mode = 0644, | ||
| 880 | .proc_handler = &hugetlb_treat_movable_handler, | ||
| 881 | }, | ||
| 817 | #endif | 882 | #endif |
| 818 | { | 883 | { |
| 819 | .ctl_name = VM_LOWMEM_RESERVE_RATIO, | 884 | .ctl_name = VM_LOWMEM_RESERVE_RATIO, |
| @@ -958,6 +1023,17 @@ static ctl_table vm_table[] = { | |||
| 958 | .mode = 0644, | 1023 | .mode = 0644, |
| 959 | .proc_handler = &proc_doulongvec_minmax, | 1024 | .proc_handler = &proc_doulongvec_minmax, |
| 960 | }, | 1025 | }, |
| 1026 | #ifdef CONFIG_NUMA | ||
| 1027 | { | ||
| 1028 | .ctl_name = CTL_UNNUMBERED, | ||
| 1029 | .procname = "numa_zonelist_order", | ||
| 1030 | .data = &numa_zonelist_order, | ||
| 1031 | .maxlen = NUMA_ZONELIST_ORDER_LEN, | ||
| 1032 | .mode = 0644, | ||
| 1033 | .proc_handler = &numa_zonelist_order_handler, | ||
| 1034 | .strategy = &sysctl_string, | ||
| 1035 | }, | ||
| 1036 | #endif | ||
| 961 | #endif | 1037 | #endif |
| 962 | #if defined(CONFIG_X86_32) || \ | 1038 | #if defined(CONFIG_X86_32) || \ |
| 963 | (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) | 1039 | (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) |
| @@ -972,6 +1048,10 @@ static ctl_table vm_table[] = { | |||
| 972 | .extra1 = &zero, | 1048 | .extra1 = &zero, |
| 973 | }, | 1049 | }, |
| 974 | #endif | 1050 | #endif |
| 1051 | /* | ||
| 1052 | * NOTE: do not add new entries to this table unless you have read | ||
| 1053 | * Documentation/sysctl/ctl_unnumbered.txt | ||
| 1054 | */ | ||
| 975 | { .ctl_name = 0 } | 1055 | { .ctl_name = 0 } |
| 976 | }; | 1056 | }; |
| 977 | 1057 | ||
| @@ -1069,7 +1149,10 @@ static ctl_table fs_table[] = { | |||
| 1069 | .data = &lease_break_time, | 1149 | .data = &lease_break_time, |
| 1070 | .maxlen = sizeof(int), | 1150 | .maxlen = sizeof(int), |
| 1071 | .mode = 0644, | 1151 | .mode = 0644, |
| 1072 | .proc_handler = &proc_dointvec, | 1152 | .proc_handler = &proc_dointvec_minmax, |
| 1153 | .strategy = &sysctl_intvec, | ||
| 1154 | .extra1 = &zero, | ||
| 1155 | .extra2 = &two, | ||
| 1073 | }, | 1156 | }, |
| 1074 | { | 1157 | { |
| 1075 | .ctl_name = FS_AIO_NR, | 1158 | .ctl_name = FS_AIO_NR, |
| @@ -1112,10 +1195,24 @@ static ctl_table fs_table[] = { | |||
| 1112 | .child = binfmt_misc_table, | 1195 | .child = binfmt_misc_table, |
| 1113 | }, | 1196 | }, |
| 1114 | #endif | 1197 | #endif |
| 1198 | /* | ||
| 1199 | * NOTE: do not add new entries to this table unless you have read | ||
| 1200 | * Documentation/sysctl/ctl_unnumbered.txt | ||
| 1201 | */ | ||
| 1115 | { .ctl_name = 0 } | 1202 | { .ctl_name = 0 } |
| 1116 | }; | 1203 | }; |
| 1117 | 1204 | ||
| 1118 | static ctl_table debug_table[] = { | 1205 | static ctl_table debug_table[] = { |
| 1206 | #ifdef CONFIG_X86 | ||
| 1207 | { | ||
| 1208 | .ctl_name = CTL_UNNUMBERED, | ||
| 1209 | .procname = "exception-trace", | ||
| 1210 | .data = &show_unhandled_signals, | ||
| 1211 | .maxlen = sizeof(int), | ||
| 1212 | .mode = 0644, | ||
| 1213 | .proc_handler = proc_dointvec | ||
| 1214 | }, | ||
| 1215 | #endif | ||
| 1119 | { .ctl_name = 0 } | 1216 | { .ctl_name = 0 } |
| 1120 | }; | 1217 | }; |
| 1121 | 1218 | ||
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 906cae771585..059431ed67db 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
| @@ -196,6 +196,8 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, | |||
| 196 | 196 | ||
| 197 | /* fill in basic acct fields */ | 197 | /* fill in basic acct fields */ |
| 198 | stats->version = TASKSTATS_VERSION; | 198 | stats->version = TASKSTATS_VERSION; |
| 199 | stats->nvcsw = tsk->nvcsw; | ||
| 200 | stats->nivcsw = tsk->nivcsw; | ||
| 199 | bacct_add_tsk(stats, tsk); | 201 | bacct_add_tsk(stats, tsk); |
| 200 | 202 | ||
| 201 | /* fill in extended acct fields */ | 203 | /* fill in extended acct fields */ |
| @@ -242,6 +244,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, | |||
| 242 | */ | 244 | */ |
| 243 | delayacct_add_tsk(stats, tsk); | 245 | delayacct_add_tsk(stats, tsk); |
| 244 | 246 | ||
| 247 | stats->nvcsw += tsk->nvcsw; | ||
| 248 | stats->nivcsw += tsk->nivcsw; | ||
| 245 | } while_each_thread(first, tsk); | 249 | } while_each_thread(first, tsk); |
| 246 | 250 | ||
| 247 | unlock_task_sighand(first, &flags); | 251 | unlock_task_sighand(first, &flags); |
diff --git a/kernel/time.c b/kernel/time.c index f04791f69408..5b81da08bbdb 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
| @@ -58,9 +58,9 @@ EXPORT_SYMBOL(sys_tz); | |||
| 58 | asmlinkage long sys_time(time_t __user * tloc) | 58 | asmlinkage long sys_time(time_t __user * tloc) |
| 59 | { | 59 | { |
| 60 | time_t i; | 60 | time_t i; |
| 61 | struct timeval tv; | 61 | struct timespec tv; |
| 62 | 62 | ||
| 63 | do_gettimeofday(&tv); | 63 | getnstimeofday(&tv); |
| 64 | i = tv.tv_sec; | 64 | i = tv.tv_sec; |
| 65 | 65 | ||
| 66 | if (tloc) { | 66 | if (tloc) { |
| @@ -133,7 +133,6 @@ static inline void warp_clock(void) | |||
| 133 | write_seqlock_irq(&xtime_lock); | 133 | write_seqlock_irq(&xtime_lock); |
| 134 | wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; | 134 | wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; |
| 135 | xtime.tv_sec += sys_tz.tz_minuteswest * 60; | 135 | xtime.tv_sec += sys_tz.tz_minuteswest * 60; |
| 136 | time_interpolator_reset(); | ||
| 137 | write_sequnlock_irq(&xtime_lock); | 136 | write_sequnlock_irq(&xtime_lock); |
| 138 | clock_was_set(); | 137 | clock_was_set(); |
| 139 | } | 138 | } |
| @@ -306,79 +305,6 @@ struct timespec timespec_trunc(struct timespec t, unsigned gran) | |||
| 306 | } | 305 | } |
| 307 | EXPORT_SYMBOL(timespec_trunc); | 306 | EXPORT_SYMBOL(timespec_trunc); |
| 308 | 307 | ||
| 309 | #ifdef CONFIG_TIME_INTERPOLATION | ||
| 310 | void getnstimeofday (struct timespec *tv) | ||
| 311 | { | ||
| 312 | unsigned long seq,sec,nsec; | ||
| 313 | |||
| 314 | do { | ||
| 315 | seq = read_seqbegin(&xtime_lock); | ||
| 316 | sec = xtime.tv_sec; | ||
| 317 | nsec = xtime.tv_nsec+time_interpolator_get_offset(); | ||
| 318 | } while (unlikely(read_seqretry(&xtime_lock, seq))); | ||
| 319 | |||
| 320 | while (unlikely(nsec >= NSEC_PER_SEC)) { | ||
| 321 | nsec -= NSEC_PER_SEC; | ||
| 322 | ++sec; | ||
| 323 | } | ||
| 324 | tv->tv_sec = sec; | ||
| 325 | tv->tv_nsec = nsec; | ||
| 326 | } | ||
| 327 | EXPORT_SYMBOL_GPL(getnstimeofday); | ||
| 328 | |||
| 329 | int do_settimeofday (struct timespec *tv) | ||
| 330 | { | ||
| 331 | time_t wtm_sec, sec = tv->tv_sec; | ||
| 332 | long wtm_nsec, nsec = tv->tv_nsec; | ||
| 333 | |||
| 334 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | ||
| 335 | return -EINVAL; | ||
| 336 | |||
| 337 | write_seqlock_irq(&xtime_lock); | ||
| 338 | { | ||
| 339 | wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); | ||
| 340 | wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); | ||
| 341 | |||
| 342 | set_normalized_timespec(&xtime, sec, nsec); | ||
| 343 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); | ||
| 344 | |||
| 345 | time_adjust = 0; /* stop active adjtime() */ | ||
| 346 | time_status |= STA_UNSYNC; | ||
| 347 | time_maxerror = NTP_PHASE_LIMIT; | ||
| 348 | time_esterror = NTP_PHASE_LIMIT; | ||
| 349 | time_interpolator_reset(); | ||
| 350 | } | ||
| 351 | write_sequnlock_irq(&xtime_lock); | ||
| 352 | clock_was_set(); | ||
| 353 | return 0; | ||
| 354 | } | ||
| 355 | EXPORT_SYMBOL(do_settimeofday); | ||
| 356 | |||
| 357 | void do_gettimeofday (struct timeval *tv) | ||
| 358 | { | ||
| 359 | unsigned long seq, nsec, usec, sec, offset; | ||
| 360 | do { | ||
| 361 | seq = read_seqbegin(&xtime_lock); | ||
| 362 | offset = time_interpolator_get_offset(); | ||
| 363 | sec = xtime.tv_sec; | ||
| 364 | nsec = xtime.tv_nsec; | ||
| 365 | } while (unlikely(read_seqretry(&xtime_lock, seq))); | ||
| 366 | |||
| 367 | usec = (nsec + offset) / 1000; | ||
| 368 | |||
| 369 | while (unlikely(usec >= USEC_PER_SEC)) { | ||
| 370 | usec -= USEC_PER_SEC; | ||
| 371 | ++sec; | ||
| 372 | } | ||
| 373 | |||
| 374 | tv->tv_sec = sec; | ||
| 375 | tv->tv_usec = usec; | ||
| 376 | } | ||
| 377 | |||
| 378 | EXPORT_SYMBOL(do_gettimeofday); | ||
| 379 | |||
| 380 | |||
| 381 | #else | ||
| 382 | #ifndef CONFIG_GENERIC_TIME | 308 | #ifndef CONFIG_GENERIC_TIME |
| 383 | /* | 309 | /* |
| 384 | * Simulate gettimeofday using do_gettimeofday which only allows a timeval | 310 | * Simulate gettimeofday using do_gettimeofday which only allows a timeval |
| @@ -394,7 +320,6 @@ void getnstimeofday(struct timespec *tv) | |||
| 394 | } | 320 | } |
| 395 | EXPORT_SYMBOL_GPL(getnstimeofday); | 321 | EXPORT_SYMBOL_GPL(getnstimeofday); |
| 396 | #endif | 322 | #endif |
| 397 | #endif | ||
| 398 | 323 | ||
| 399 | /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. | 324 | /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. |
| 400 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 | 325 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 76212b2a99de..2ad1c37b8dfe 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
| @@ -205,47 +205,6 @@ void clockevents_exchange_device(struct clock_event_device *old, | |||
| 205 | } | 205 | } |
| 206 | 206 | ||
| 207 | /** | 207 | /** |
| 208 | * clockevents_request_device | ||
| 209 | */ | ||
| 210 | struct clock_event_device *clockevents_request_device(unsigned int features, | ||
| 211 | cpumask_t cpumask) | ||
| 212 | { | ||
| 213 | struct clock_event_device *cur, *dev = NULL; | ||
| 214 | struct list_head *tmp; | ||
| 215 | |||
| 216 | spin_lock(&clockevents_lock); | ||
| 217 | |||
| 218 | list_for_each(tmp, &clockevent_devices) { | ||
| 219 | cur = list_entry(tmp, struct clock_event_device, list); | ||
| 220 | |||
| 221 | if ((cur->features & features) == features && | ||
| 222 | cpus_equal(cpumask, cur->cpumask)) { | ||
| 223 | if (!dev || dev->rating < cur->rating) | ||
| 224 | dev = cur; | ||
| 225 | } | ||
| 226 | } | ||
| 227 | |||
| 228 | clockevents_exchange_device(NULL, dev); | ||
| 229 | |||
| 230 | spin_unlock(&clockevents_lock); | ||
| 231 | |||
| 232 | return dev; | ||
| 233 | } | ||
| 234 | |||
| 235 | /** | ||
| 236 | * clockevents_release_device | ||
| 237 | */ | ||
| 238 | void clockevents_release_device(struct clock_event_device *dev) | ||
| 239 | { | ||
| 240 | spin_lock(&clockevents_lock); | ||
| 241 | |||
| 242 | clockevents_exchange_device(dev, NULL); | ||
| 243 | clockevents_notify_released(); | ||
| 244 | |||
| 245 | spin_unlock(&clockevents_lock); | ||
| 246 | } | ||
| 247 | |||
| 248 | /** | ||
| 249 | * clockevents_notify - notification about relevant events | 208 | * clockevents_notify - notification about relevant events |
| 250 | */ | 209 | */ |
| 251 | void clockevents_notify(unsigned long reason, void *arg) | 210 | void clockevents_notify(unsigned long reason, void *arg) |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index cf53bb5814cb..cd91237dbfe3 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -10,10 +10,11 @@ | |||
| 10 | 10 | ||
| 11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
| 12 | #include <linux/time.h> | 12 | #include <linux/time.h> |
| 13 | #include <linux/timer.h> | ||
| 13 | #include <linux/timex.h> | 14 | #include <linux/timex.h> |
| 14 | #include <linux/jiffies.h> | 15 | #include <linux/jiffies.h> |
| 15 | #include <linux/hrtimer.h> | 16 | #include <linux/hrtimer.h> |
| 16 | 17 | #include <linux/capability.h> | |
| 17 | #include <asm/div64.h> | 18 | #include <asm/div64.h> |
| 18 | #include <asm/timex.h> | 19 | #include <asm/timex.h> |
| 19 | 20 | ||
| @@ -116,11 +117,6 @@ void second_overflow(void) | |||
| 116 | if (xtime.tv_sec % 86400 == 0) { | 117 | if (xtime.tv_sec % 86400 == 0) { |
| 117 | xtime.tv_sec--; | 118 | xtime.tv_sec--; |
| 118 | wall_to_monotonic.tv_sec++; | 119 | wall_to_monotonic.tv_sec++; |
| 119 | /* | ||
| 120 | * The timer interpolator will make time change | ||
| 121 | * gradually instead of an immediate jump by one second | ||
| 122 | */ | ||
| 123 | time_interpolator_update(-NSEC_PER_SEC); | ||
| 124 | time_state = TIME_OOP; | 120 | time_state = TIME_OOP; |
| 125 | printk(KERN_NOTICE "Clock: inserting leap second " | 121 | printk(KERN_NOTICE "Clock: inserting leap second " |
| 126 | "23:59:60 UTC\n"); | 122 | "23:59:60 UTC\n"); |
| @@ -130,11 +126,6 @@ void second_overflow(void) | |||
| 130 | if ((xtime.tv_sec + 1) % 86400 == 0) { | 126 | if ((xtime.tv_sec + 1) % 86400 == 0) { |
| 131 | xtime.tv_sec++; | 127 | xtime.tv_sec++; |
| 132 | wall_to_monotonic.tv_sec--; | 128 | wall_to_monotonic.tv_sec--; |
| 133 | /* | ||
| 134 | * Use of time interpolator for a gradual change of | ||
| 135 | * time | ||
| 136 | */ | ||
| 137 | time_interpolator_update(NSEC_PER_SEC); | ||
| 138 | time_state = TIME_WAIT; | 129 | time_state = TIME_WAIT; |
| 139 | printk(KERN_NOTICE "Clock: deleting leap second " | 130 | printk(KERN_NOTICE "Clock: deleting leap second " |
| 140 | "23:59:59 UTC\n"); | 131 | "23:59:59 UTC\n"); |
| @@ -185,12 +176,64 @@ u64 current_tick_length(void) | |||
| 185 | return tick_length; | 176 | return tick_length; |
| 186 | } | 177 | } |
| 187 | 178 | ||
| 179 | #ifdef CONFIG_GENERIC_CMOS_UPDATE | ||
| 180 | |||
| 181 | /* Disable the cmos update - used by virtualization and embedded */ | ||
| 182 | int no_sync_cmos_clock __read_mostly; | ||
| 183 | |||
| 184 | static void sync_cmos_clock(unsigned long dummy); | ||
| 185 | |||
| 186 | static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); | ||
| 188 | 187 | ||
| 189 | void __attribute__ ((weak)) notify_arch_cmos_timer(void) | 188 | static void sync_cmos_clock(unsigned long dummy) |
| 190 | { | 189 | { |
| 191 | return; | 190 | struct timespec now, next; |
| 191 | int fail = 1; | ||
| 192 | |||
| 193 | /* | ||
| 194 | * If we have an externally synchronized Linux clock, then update | ||
| 195 | * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be | ||
| 196 | * called as close as possible to 500 ms before the new second starts. | ||
| 197 | * This code is run on a timer. If the clock is set, that timer | ||
| 198 | * may not expire at the correct time. Thus, we adjust... | ||
| 199 | */ | ||
| 200 | if (!ntp_synced()) | ||
| 201 | /* | ||
| 202 | * Not synced, exit, do not restart a timer (if one is | ||
| 203 | * running, let it run out). | ||
| 204 | */ | ||
| 205 | return; | ||
| 206 | |||
| 207 | getnstimeofday(&now); | ||
| 208 | if (abs(xtime.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) | ||
| 209 | fail = update_persistent_clock(now); | ||
| 210 | |||
| 211 | next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec; | ||
| 212 | if (next.tv_nsec <= 0) | ||
| 213 | next.tv_nsec += NSEC_PER_SEC; | ||
| 214 | |||
| 215 | if (!fail) | ||
| 216 | next.tv_sec = 659; | ||
| 217 | else | ||
| 218 | next.tv_sec = 0; | ||
| 219 | |||
| 220 | if (next.tv_nsec >= NSEC_PER_SEC) { | ||
| 221 | next.tv_sec++; | ||
| 222 | next.tv_nsec -= NSEC_PER_SEC; | ||
| 223 | } | ||
| 224 | mod_timer(&sync_cmos_timer, jiffies + timespec_to_jiffies(&next)); | ||
| 192 | } | 225 | } |
| 193 | 226 | ||
| 227 | static void notify_cmos_timer(void) | ||
| 228 | { | ||
| 229 | if (no_sync_cmos_clock) | ||
| 230 | mod_timer(&sync_cmos_timer, jiffies + 1); | ||
| 231 | } | ||
| 232 | |||
| 233 | #else | ||
| 234 | static inline void notify_cmos_timer(void) { } | ||
| 235 | #endif | ||
| 236 | |||
| 194 | /* adjtimex mainly allows reading (and writing, if superuser) of | 237 | /* adjtimex mainly allows reading (and writing, if superuser) of |
| 195 | * kernel time-keeping variables. used by xntpd. | 238 | * kernel time-keeping variables. used by xntpd. |
| 196 | */ | 239 | */ |
| @@ -355,6 +398,6 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) | |||
| 355 | txc->stbcnt = 0; | 398 | txc->stbcnt = 0; |
| 356 | write_sequnlock_irq(&xtime_lock); | 399 | write_sequnlock_irq(&xtime_lock); |
| 357 | do_gettimeofday(&txc->time); | 400 | do_gettimeofday(&txc->time); |
| 358 | notify_arch_cmos_timer(); | 401 | notify_cmos_timer(); |
| 359 | return(result); | 402 | return(result); |
| 360 | } | 403 | } |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 8001d37071f5..db8e0f3d409b 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
| @@ -31,6 +31,12 @@ struct tick_device tick_broadcast_device; | |||
| 31 | static cpumask_t tick_broadcast_mask; | 31 | static cpumask_t tick_broadcast_mask; |
| 32 | static DEFINE_SPINLOCK(tick_broadcast_lock); | 32 | static DEFINE_SPINLOCK(tick_broadcast_lock); |
| 33 | 33 | ||
| 34 | #ifdef CONFIG_TICK_ONESHOT | ||
| 35 | static void tick_broadcast_clear_oneshot(int cpu); | ||
| 36 | #else | ||
| 37 | static inline void tick_broadcast_clear_oneshot(int cpu) { } | ||
| 38 | #endif | ||
| 39 | |||
| 34 | /* | 40 | /* |
| 35 | * Debugging: see timer_list.c | 41 | * Debugging: see timer_list.c |
| 36 | */ | 42 | */ |
| @@ -49,7 +55,7 @@ cpumask_t *tick_get_broadcast_mask(void) | |||
| 49 | */ | 55 | */ |
| 50 | static void tick_broadcast_start_periodic(struct clock_event_device *bc) | 56 | static void tick_broadcast_start_periodic(struct clock_event_device *bc) |
| 51 | { | 57 | { |
| 52 | if (bc && bc->mode == CLOCK_EVT_MODE_SHUTDOWN) | 58 | if (bc) |
| 53 | tick_setup_periodic(bc, 1); | 59 | tick_setup_periodic(bc, 1); |
| 54 | } | 60 | } |
| 55 | 61 | ||
| @@ -99,8 +105,19 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) | |||
| 99 | cpu_set(cpu, tick_broadcast_mask); | 105 | cpu_set(cpu, tick_broadcast_mask); |
| 100 | tick_broadcast_start_periodic(tick_broadcast_device.evtdev); | 106 | tick_broadcast_start_periodic(tick_broadcast_device.evtdev); |
| 101 | ret = 1; | 107 | ret = 1; |
| 102 | } | 108 | } else { |
| 109 | /* | ||
| 110 | * When the new device is not affected by the stop | ||
| 111 | * feature and the cpu is marked in the broadcast mask | ||
| 112 | * then clear the broadcast bit. | ||
| 113 | */ | ||
| 114 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { | ||
| 115 | int cpu = smp_processor_id(); | ||
| 103 | 116 | ||
| 117 | cpu_clear(cpu, tick_broadcast_mask); | ||
| 118 | tick_broadcast_clear_oneshot(cpu); | ||
| 119 | } | ||
| 120 | } | ||
| 104 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 121 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
| 105 | return ret; | 122 | return ret; |
| 106 | } | 123 | } |
| @@ -299,7 +316,7 @@ void tick_suspend_broadcast(void) | |||
| 299 | spin_lock_irqsave(&tick_broadcast_lock, flags); | 316 | spin_lock_irqsave(&tick_broadcast_lock, flags); |
| 300 | 317 | ||
| 301 | bc = tick_broadcast_device.evtdev; | 318 | bc = tick_broadcast_device.evtdev; |
| 302 | if (bc && tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) | 319 | if (bc) |
| 303 | clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); | 320 | clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); |
| 304 | 321 | ||
| 305 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 322 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
| @@ -316,6 +333,8 @@ int tick_resume_broadcast(void) | |||
| 316 | bc = tick_broadcast_device.evtdev; | 333 | bc = tick_broadcast_device.evtdev; |
| 317 | 334 | ||
| 318 | if (bc) { | 335 | if (bc) { |
| 336 | clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME); | ||
| 337 | |||
| 319 | switch (tick_broadcast_device.mode) { | 338 | switch (tick_broadcast_device.mode) { |
| 320 | case TICKDEV_MODE_PERIODIC: | 339 | case TICKDEV_MODE_PERIODIC: |
| 321 | if(!cpus_empty(tick_broadcast_mask)) | 340 | if(!cpus_empty(tick_broadcast_mask)) |
| @@ -485,6 +504,16 @@ out: | |||
| 485 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 504 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
| 486 | } | 505 | } |
| 487 | 506 | ||
| 507 | /* | ||
| 508 | * Reset the one shot broadcast for a cpu | ||
| 509 | * | ||
| 510 | * Called with tick_broadcast_lock held | ||
| 511 | */ | ||
| 512 | static void tick_broadcast_clear_oneshot(int cpu) | ||
| 513 | { | ||
| 514 | cpu_clear(cpu, tick_broadcast_oneshot_mask); | ||
| 515 | } | ||
| 516 | |||
| 488 | /** | 517 | /** |
| 489 | * tick_broadcast_setup_highres - setup the broadcast device for highres | 518 | * tick_broadcast_setup_highres - setup the broadcast device for highres |
| 490 | */ | 519 | */ |
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index a96ec9ab3454..77a21abc8716 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
| @@ -318,12 +318,17 @@ static void tick_resume(void) | |||
| 318 | { | 318 | { |
| 319 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | 319 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); |
| 320 | unsigned long flags; | 320 | unsigned long flags; |
| 321 | int broadcast = tick_resume_broadcast(); | ||
| 321 | 322 | ||
| 322 | spin_lock_irqsave(&tick_device_lock, flags); | 323 | spin_lock_irqsave(&tick_device_lock, flags); |
| 323 | if (td->mode == TICKDEV_MODE_PERIODIC) | 324 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); |
| 324 | tick_setup_periodic(td->evtdev, 0); | 325 | |
| 325 | else | 326 | if (!broadcast) { |
| 326 | tick_resume_oneshot(); | 327 | if (td->mode == TICKDEV_MODE_PERIODIC) |
| 328 | tick_setup_periodic(td->evtdev, 0); | ||
| 329 | else | ||
| 330 | tick_resume_oneshot(); | ||
| 331 | } | ||
| 327 | spin_unlock_irqrestore(&tick_device_lock, flags); | 332 | spin_unlock_irqrestore(&tick_device_lock, flags); |
| 328 | } | 333 | } |
| 329 | 334 | ||
| @@ -360,8 +365,7 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason, | |||
| 360 | break; | 365 | break; |
| 361 | 366 | ||
| 362 | case CLOCK_EVT_NOTIFY_RESUME: | 367 | case CLOCK_EVT_NOTIFY_RESUME: |
| 363 | if (!tick_resume_broadcast()) | 368 | tick_resume(); |
| 364 | tick_resume(); | ||
| 365 | break; | 369 | break; |
| 366 | 370 | ||
| 367 | default: | 371 | default: |
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index f6997ab0c3c9..0258d3115d54 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c | |||
| @@ -73,8 +73,21 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) | |||
| 73 | struct clock_event_device *dev = td->evtdev; | 73 | struct clock_event_device *dev = td->evtdev; |
| 74 | 74 | ||
| 75 | if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) || | 75 | if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) || |
| 76 | !tick_device_is_functional(dev)) | 76 | !tick_device_is_functional(dev)) { |
| 77 | |||
| 78 | printk(KERN_INFO "Clockevents: " | ||
| 79 | "could not switch to one-shot mode:"); | ||
| 80 | if (!dev) { | ||
| 81 | printk(" no tick device\n"); | ||
| 82 | } else { | ||
| 83 | if (!tick_device_is_functional(dev)) | ||
| 84 | printk(" %s is not functional.\n", dev->name); | ||
| 85 | else | ||
| 86 | printk(" %s does not support one-shot mode.\n", | ||
| 87 | dev->name); | ||
| 88 | } | ||
| 77 | return -EINVAL; | 89 | return -EINVAL; |
| 90 | } | ||
| 78 | 91 | ||
| 79 | td->mode = TICKDEV_MODE_ONESHOT; | 92 | td->mode = TICKDEV_MODE_ONESHOT; |
| 80 | dev->event_handler = handler; | 93 | dev->event_handler = handler; |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 52db9e3c526e..b416995b9757 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -546,6 +546,7 @@ void tick_setup_sched_timer(void) | |||
| 546 | { | 546 | { |
| 547 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 547 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
| 548 | ktime_t now = ktime_get(); | 548 | ktime_t now = ktime_get(); |
| 549 | u64 offset; | ||
| 549 | 550 | ||
| 550 | /* | 551 | /* |
| 551 | * Emulate tick processing via per-CPU hrtimers: | 552 | * Emulate tick processing via per-CPU hrtimers: |
| @@ -554,8 +555,12 @@ void tick_setup_sched_timer(void) | |||
| 554 | ts->sched_timer.function = tick_sched_timer; | 555 | ts->sched_timer.function = tick_sched_timer; |
| 555 | ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | 556 | ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; |
| 556 | 557 | ||
| 557 | /* Get the next period */ | 558 | /* Get the next period (per cpu) */ |
| 558 | ts->sched_timer.expires = tick_init_jiffy_update(); | 559 | ts->sched_timer.expires = tick_init_jiffy_update(); |
| 560 | offset = ktime_to_ns(tick_period) >> 1; | ||
| 561 | do_div(offset, NR_CPUS); | ||
| 562 | offset *= smp_processor_id(); | ||
| 563 | ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset); | ||
| 559 | 564 | ||
| 560 | for (;;) { | 565 | for (;;) { |
| 561 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 566 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3d1042f82a68..88c81026e003 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -36,9 +36,17 @@ EXPORT_SYMBOL(xtime_lock); | |||
| 36 | * at zero at system boot time, so wall_to_monotonic will be negative, | 36 | * at zero at system boot time, so wall_to_monotonic will be negative, |
| 37 | * however, we will ALWAYS keep the tv_nsec part positive so we can use | 37 | * however, we will ALWAYS keep the tv_nsec part positive so we can use |
| 38 | * the usual normalization. | 38 | * the usual normalization. |
| 39 | * | ||
| 40 | * wall_to_monotonic is moved after resume from suspend for the monotonic | ||
| 41 | * time not to jump. We need to add total_sleep_time to wall_to_monotonic | ||
| 42 | * to get the real boot based time offset. | ||
| 43 | * | ||
| 44 | * - wall_to_monotonic is no longer the boot time, getboottime must be | ||
| 45 | * used instead. | ||
| 39 | */ | 46 | */ |
| 40 | struct timespec xtime __attribute__ ((aligned (16))); | 47 | struct timespec xtime __attribute__ ((aligned (16))); |
| 41 | struct timespec wall_to_monotonic __attribute__ ((aligned (16))); | 48 | struct timespec wall_to_monotonic __attribute__ ((aligned (16))); |
| 49 | static unsigned long total_sleep_time; /* seconds */ | ||
| 42 | 50 | ||
| 43 | EXPORT_SYMBOL(xtime); | 51 | EXPORT_SYMBOL(xtime); |
| 44 | 52 | ||
| @@ -251,6 +259,7 @@ void __init timekeeping_init(void) | |||
| 251 | xtime.tv_nsec = 0; | 259 | xtime.tv_nsec = 0; |
| 252 | set_normalized_timespec(&wall_to_monotonic, | 260 | set_normalized_timespec(&wall_to_monotonic, |
| 253 | -xtime.tv_sec, -xtime.tv_nsec); | 261 | -xtime.tv_sec, -xtime.tv_nsec); |
| 262 | total_sleep_time = 0; | ||
| 254 | 263 | ||
| 255 | write_sequnlock_irqrestore(&xtime_lock, flags); | 264 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 256 | } | 265 | } |
| @@ -282,6 +291,7 @@ static int timekeeping_resume(struct sys_device *dev) | |||
| 282 | 291 | ||
| 283 | xtime.tv_sec += sleep_length; | 292 | xtime.tv_sec += sleep_length; |
| 284 | wall_to_monotonic.tv_sec -= sleep_length; | 293 | wall_to_monotonic.tv_sec -= sleep_length; |
| 294 | total_sleep_time += sleep_length; | ||
| 285 | } | 295 | } |
| 286 | /* re-base the last cycle value */ | 296 | /* re-base the last cycle value */ |
| 287 | clock->cycle_last = clocksource_read(clock); | 297 | clock->cycle_last = clocksource_read(clock); |
| @@ -391,7 +401,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, | |||
| 391 | * this is optimized for the most common adjustments of -1,0,1, | 401 | * this is optimized for the most common adjustments of -1,0,1, |
| 392 | * for other values we can do a bit more work. | 402 | * for other values we can do a bit more work. |
| 393 | */ | 403 | */ |
| 394 | static void clocksource_adjust(struct clocksource *clock, s64 offset) | 404 | static void clocksource_adjust(s64 offset) |
| 395 | { | 405 | { |
| 396 | s64 error, interval = clock->cycle_interval; | 406 | s64 error, interval = clock->cycle_interval; |
| 397 | int adj; | 407 | int adj; |
| @@ -456,17 +466,13 @@ void update_wall_time(void) | |||
| 456 | second_overflow(); | 466 | second_overflow(); |
| 457 | } | 467 | } |
| 458 | 468 | ||
| 459 | /* interpolator bits */ | ||
| 460 | time_interpolator_update(clock->xtime_interval | ||
| 461 | >> clock->shift); | ||
| 462 | |||
| 463 | /* accumulate error between NTP and clock interval */ | 469 | /* accumulate error between NTP and clock interval */ |
| 464 | clock->error += current_tick_length(); | 470 | clock->error += current_tick_length(); |
| 465 | clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift); | 471 | clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift); |
| 466 | } | 472 | } |
| 467 | 473 | ||
| 468 | /* correct the clock when NTP error is too big */ | 474 | /* correct the clock when NTP error is too big */ |
| 469 | clocksource_adjust(clock, offset); | 475 | clocksource_adjust(offset); |
| 470 | 476 | ||
| 471 | /* store full nanoseconds into xtime */ | 477 | /* store full nanoseconds into xtime */ |
| 472 | xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; | 478 | xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; |
| @@ -476,3 +482,30 @@ void update_wall_time(void) | |||
| 476 | change_clocksource(); | 482 | change_clocksource(); |
| 477 | update_vsyscall(&xtime, clock); | 483 | update_vsyscall(&xtime, clock); |
| 478 | } | 484 | } |
| 485 | |||
| 486 | /** | ||
| 487 | * getboottime - Return the real time of system boot. | ||
| 488 | * @ts: pointer to the timespec to be set | ||
| 489 | * | ||
| 490 | * Returns the time of day in a timespec. | ||
| 491 | * | ||
| 492 | * This is based on the wall_to_monotonic offset and the total suspend | ||
| 493 | * time. Calls to settimeofday will affect the value returned (which | ||
| 494 | * basically means that however wrong your real time clock is at boot time, | ||
| 495 | * you get the right time here). | ||
| 496 | */ | ||
| 497 | void getboottime(struct timespec *ts) | ||
| 498 | { | ||
| 499 | set_normalized_timespec(ts, | ||
| 500 | - (wall_to_monotonic.tv_sec + total_sleep_time), | ||
| 501 | - wall_to_monotonic.tv_nsec); | ||
| 502 | } | ||
| 503 | |||
| 504 | /** | ||
| 505 | * monotonic_to_bootbased - Convert the monotonic time to boot based. | ||
| 506 | * @ts: pointer to the timespec to be converted | ||
| 507 | */ | ||
| 508 | void monotonic_to_bootbased(struct timespec *ts) | ||
| 509 | { | ||
| 510 | ts->tv_sec += total_sleep_time; | ||
| 511 | } | ||
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 8bbcfb77f7d2..e5edc3a22a08 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
| @@ -38,7 +38,7 @@ DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); | |||
| 38 | 38 | ||
| 39 | static void print_name_offset(struct seq_file *m, void *sym) | 39 | static void print_name_offset(struct seq_file *m, void *sym) |
| 40 | { | 40 | { |
| 41 | char symname[KSYM_NAME_LEN+1]; | 41 | char symname[KSYM_NAME_LEN]; |
| 42 | 42 | ||
| 43 | if (lookup_symbol_name((unsigned long)sym, symname) < 0) | 43 | if (lookup_symbol_name((unsigned long)sym, symname) < 0) |
| 44 | SEQ_printf(m, "<%p>", sym); | 44 | SEQ_printf(m, "<%p>", sym); |
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 321693724ad7..8ed62fda16c6 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c | |||
| @@ -68,6 +68,7 @@ struct entry { | |||
| 68 | * Number of timeout events: | 68 | * Number of timeout events: |
| 69 | */ | 69 | */ |
| 70 | unsigned long count; | 70 | unsigned long count; |
| 71 | unsigned int timer_flag; | ||
| 71 | 72 | ||
| 72 | /* | 73 | /* |
| 73 | * We save the command-line string to preserve | 74 | * We save the command-line string to preserve |
| @@ -231,7 +232,8 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm) | |||
| 231 | * incremented. Otherwise the timer is registered in a free slot. | 232 | * incremented. Otherwise the timer is registered in a free slot. |
| 232 | */ | 233 | */ |
| 233 | void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | 234 | void timer_stats_update_stats(void *timer, pid_t pid, void *startf, |
| 234 | void *timerf, char * comm) | 235 | void *timerf, char *comm, |
| 236 | unsigned int timer_flag) | ||
| 235 | { | 237 | { |
| 236 | /* | 238 | /* |
| 237 | * It doesnt matter which lock we take: | 239 | * It doesnt matter which lock we take: |
| @@ -249,6 +251,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |||
| 249 | input.start_func = startf; | 251 | input.start_func = startf; |
| 250 | input.expire_func = timerf; | 252 | input.expire_func = timerf; |
| 251 | input.pid = pid; | 253 | input.pid = pid; |
| 254 | input.timer_flag = timer_flag; | ||
| 252 | 255 | ||
| 253 | spin_lock_irqsave(lock, flags); | 256 | spin_lock_irqsave(lock, flags); |
| 254 | if (!active) | 257 | if (!active) |
| @@ -266,7 +269,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |||
| 266 | 269 | ||
| 267 | static void print_name_offset(struct seq_file *m, unsigned long addr) | 270 | static void print_name_offset(struct seq_file *m, unsigned long addr) |
| 268 | { | 271 | { |
| 269 | char symname[KSYM_NAME_LEN+1]; | 272 | char symname[KSYM_NAME_LEN]; |
| 270 | 273 | ||
| 271 | if (lookup_symbol_name(addr, symname) < 0) | 274 | if (lookup_symbol_name(addr, symname) < 0) |
| 272 | seq_printf(m, "<%p>", (void *)addr); | 275 | seq_printf(m, "<%p>", (void *)addr); |
| @@ -295,7 +298,7 @@ static int tstats_show(struct seq_file *m, void *v) | |||
| 295 | period = ktime_to_timespec(time); | 298 | period = ktime_to_timespec(time); |
| 296 | ms = period.tv_nsec / 1000000; | 299 | ms = period.tv_nsec / 1000000; |
| 297 | 300 | ||
| 298 | seq_puts(m, "Timer Stats Version: v0.1\n"); | 301 | seq_puts(m, "Timer Stats Version: v0.2\n"); |
| 299 | seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); | 302 | seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); |
| 300 | if (atomic_read(&overflow_count)) | 303 | if (atomic_read(&overflow_count)) |
| 301 | seq_printf(m, "Overflow: %d entries\n", | 304 | seq_printf(m, "Overflow: %d entries\n", |
| @@ -303,8 +306,13 @@ static int tstats_show(struct seq_file *m, void *v) | |||
| 303 | 306 | ||
| 304 | for (i = 0; i < nr_entries; i++) { | 307 | for (i = 0; i < nr_entries; i++) { |
| 305 | entry = entries + i; | 308 | entry = entries + i; |
| 306 | seq_printf(m, "%4lu, %5d %-16s ", | 309 | if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) { |
| 310 | seq_printf(m, "%4luD, %5d %-16s ", | ||
| 307 | entry->count, entry->pid, entry->comm); | 311 | entry->count, entry->pid, entry->comm); |
| 312 | } else { | ||
| 313 | seq_printf(m, " %4lu, %5d %-16s ", | ||
| 314 | entry->count, entry->pid, entry->comm); | ||
| 315 | } | ||
| 308 | 316 | ||
| 309 | print_name_offset(m, (unsigned long)entry->start_func); | 317 | print_name_offset(m, (unsigned long)entry->start_func); |
| 310 | seq_puts(m, " ("); | 318 | seq_puts(m, " ("); |
diff --git a/kernel/timer.c b/kernel/timer.c index 1a69705c2fb9..6ce1952eea7d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -103,14 +103,14 @@ static inline tvec_base_t *tbase_get_base(tvec_base_t *base) | |||
| 103 | static inline void timer_set_deferrable(struct timer_list *timer) | 103 | static inline void timer_set_deferrable(struct timer_list *timer) |
| 104 | { | 104 | { |
| 105 | timer->base = ((tvec_base_t *)((unsigned long)(timer->base) | | 105 | timer->base = ((tvec_base_t *)((unsigned long)(timer->base) | |
| 106 | TBASE_DEFERRABLE_FLAG)); | 106 | TBASE_DEFERRABLE_FLAG)); |
| 107 | } | 107 | } |
| 108 | 108 | ||
| 109 | static inline void | 109 | static inline void |
| 110 | timer_set_base(struct timer_list *timer, tvec_base_t *new_base) | 110 | timer_set_base(struct timer_list *timer, tvec_base_t *new_base) |
| 111 | { | 111 | { |
| 112 | timer->base = (tvec_base_t *)((unsigned long)(new_base) | | 112 | timer->base = (tvec_base_t *)((unsigned long)(new_base) | |
| 113 | tbase_get_deferrable(timer->base)); | 113 | tbase_get_deferrable(timer->base)); |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | /** | 116 | /** |
| @@ -305,6 +305,20 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) | |||
| 305 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | 305 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); |
| 306 | timer->start_pid = current->pid; | 306 | timer->start_pid = current->pid; |
| 307 | } | 307 | } |
| 308 | |||
| 309 | static void timer_stats_account_timer(struct timer_list *timer) | ||
| 310 | { | ||
| 311 | unsigned int flag = 0; | ||
| 312 | |||
| 313 | if (unlikely(tbase_get_deferrable(timer->base))) | ||
| 314 | flag |= TIMER_STATS_FLAG_DEFERRABLE; | ||
| 315 | |||
| 316 | timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | ||
| 317 | timer->function, timer->start_comm, flag); | ||
| 318 | } | ||
| 319 | |||
| 320 | #else | ||
| 321 | static void timer_stats_account_timer(struct timer_list *timer) {} | ||
| 308 | #endif | 322 | #endif |
| 309 | 323 | ||
| 310 | /** | 324 | /** |
| @@ -431,10 +445,10 @@ EXPORT_SYMBOL(__mod_timer); | |||
| 431 | void add_timer_on(struct timer_list *timer, int cpu) | 445 | void add_timer_on(struct timer_list *timer, int cpu) |
| 432 | { | 446 | { |
| 433 | tvec_base_t *base = per_cpu(tvec_bases, cpu); | 447 | tvec_base_t *base = per_cpu(tvec_bases, cpu); |
| 434 | unsigned long flags; | 448 | unsigned long flags; |
| 435 | 449 | ||
| 436 | timer_stats_timer_set_start_info(timer); | 450 | timer_stats_timer_set_start_info(timer); |
| 437 | BUG_ON(timer_pending(timer) || !timer->function); | 451 | BUG_ON(timer_pending(timer) || !timer->function); |
| 438 | spin_lock_irqsave(&base->lock, flags); | 452 | spin_lock_irqsave(&base->lock, flags); |
| 439 | timer_set_base(timer, base); | 453 | timer_set_base(timer, base); |
| 440 | internal_add_timer(base, timer); | 454 | internal_add_timer(base, timer); |
| @@ -613,7 +627,7 @@ static inline void __run_timers(tvec_base_t *base) | |||
| 613 | while (time_after_eq(jiffies, base->timer_jiffies)) { | 627 | while (time_after_eq(jiffies, base->timer_jiffies)) { |
| 614 | struct list_head work_list; | 628 | struct list_head work_list; |
| 615 | struct list_head *head = &work_list; | 629 | struct list_head *head = &work_list; |
| 616 | int index = base->timer_jiffies & TVR_MASK; | 630 | int index = base->timer_jiffies & TVR_MASK; |
| 617 | 631 | ||
| 618 | /* | 632 | /* |
| 619 | * Cascade timers: | 633 | * Cascade timers: |
| @@ -630,8 +644,8 @@ static inline void __run_timers(tvec_base_t *base) | |||
| 630 | unsigned long data; | 644 | unsigned long data; |
| 631 | 645 | ||
| 632 | timer = list_first_entry(head, struct timer_list,entry); | 646 | timer = list_first_entry(head, struct timer_list,entry); |
| 633 | fn = timer->function; | 647 | fn = timer->function; |
| 634 | data = timer->data; | 648 | data = timer->data; |
| 635 | 649 | ||
| 636 | timer_stats_account_timer(timer); | 650 | timer_stats_account_timer(timer); |
| 637 | 651 | ||
| @@ -675,8 +689,8 @@ static unsigned long __next_timer_interrupt(tvec_base_t *base) | |||
| 675 | index = slot = timer_jiffies & TVR_MASK; | 689 | index = slot = timer_jiffies & TVR_MASK; |
| 676 | do { | 690 | do { |
| 677 | list_for_each_entry(nte, base->tv1.vec + slot, entry) { | 691 | list_for_each_entry(nte, base->tv1.vec + slot, entry) { |
| 678 | if (tbase_get_deferrable(nte->base)) | 692 | if (tbase_get_deferrable(nte->base)) |
| 679 | continue; | 693 | continue; |
| 680 | 694 | ||
| 681 | found = 1; | 695 | found = 1; |
| 682 | expires = nte->expires; | 696 | expires = nte->expires; |
| @@ -820,7 +834,7 @@ void update_process_times(int user_tick) | |||
| 820 | if (rcu_pending(cpu)) | 834 | if (rcu_pending(cpu)) |
| 821 | rcu_check_callbacks(cpu, user_tick); | 835 | rcu_check_callbacks(cpu, user_tick); |
| 822 | scheduler_tick(); | 836 | scheduler_tick(); |
| 823 | run_posix_cpu_timers(p); | 837 | run_posix_cpu_timers(p); |
| 824 | } | 838 | } |
| 825 | 839 | ||
| 826 | /* | 840 | /* |
| @@ -895,7 +909,7 @@ static inline void update_times(unsigned long ticks) | |||
| 895 | update_wall_time(); | 909 | update_wall_time(); |
| 896 | calc_load(ticks); | 910 | calc_load(ticks); |
| 897 | } | 911 | } |
| 898 | 912 | ||
| 899 | /* | 913 | /* |
| 900 | * The 64-bit jiffies value is not atomic - you MUST NOT read it | 914 | * The 64-bit jiffies value is not atomic - you MUST NOT read it |
| 901 | * without sampling the sequence number in xtime_lock. | 915 | * without sampling the sequence number in xtime_lock. |
| @@ -1091,7 +1105,7 @@ asmlinkage long sys_gettid(void) | |||
| 1091 | /** | 1105 | /** |
| 1092 | * do_sysinfo - fill in sysinfo struct | 1106 | * do_sysinfo - fill in sysinfo struct |
| 1093 | * @info: pointer to buffer to fill | 1107 | * @info: pointer to buffer to fill |
| 1094 | */ | 1108 | */ |
| 1095 | int do_sysinfo(struct sysinfo *info) | 1109 | int do_sysinfo(struct sysinfo *info) |
| 1096 | { | 1110 | { |
| 1097 | unsigned long mem_total, sav_total; | 1111 | unsigned long mem_total, sav_total; |
| @@ -1114,6 +1128,7 @@ int do_sysinfo(struct sysinfo *info) | |||
| 1114 | getnstimeofday(&tp); | 1128 | getnstimeofday(&tp); |
| 1115 | tp.tv_sec += wall_to_monotonic.tv_sec; | 1129 | tp.tv_sec += wall_to_monotonic.tv_sec; |
| 1116 | tp.tv_nsec += wall_to_monotonic.tv_nsec; | 1130 | tp.tv_nsec += wall_to_monotonic.tv_nsec; |
| 1131 | monotonic_to_bootbased(&tp); | ||
| 1117 | if (tp.tv_nsec - NSEC_PER_SEC >= 0) { | 1132 | if (tp.tv_nsec - NSEC_PER_SEC >= 0) { |
| 1118 | tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; | 1133 | tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; |
| 1119 | tp.tv_sec++; | 1134 | tp.tv_sec++; |
| @@ -1206,7 +1221,8 @@ static int __devinit init_timers_cpu(int cpu) | |||
| 1206 | /* | 1221 | /* |
| 1207 | * The APs use this path later in boot | 1222 | * The APs use this path later in boot |
| 1208 | */ | 1223 | */ |
| 1209 | base = kmalloc_node(sizeof(*base), GFP_KERNEL, | 1224 | base = kmalloc_node(sizeof(*base), |
| 1225 | GFP_KERNEL | __GFP_ZERO, | ||
| 1210 | cpu_to_node(cpu)); | 1226 | cpu_to_node(cpu)); |
| 1211 | if (!base) | 1227 | if (!base) |
| 1212 | return -ENOMEM; | 1228 | return -ENOMEM; |
| @@ -1217,7 +1233,6 @@ static int __devinit init_timers_cpu(int cpu) | |||
| 1217 | kfree(base); | 1233 | kfree(base); |
| 1218 | return -ENOMEM; | 1234 | return -ENOMEM; |
| 1219 | } | 1235 | } |
| 1220 | memset(base, 0, sizeof(*base)); | ||
| 1221 | per_cpu(tvec_bases, cpu) = base; | 1236 | per_cpu(tvec_bases, cpu) = base; |
| 1222 | } else { | 1237 | } else { |
| 1223 | /* | 1238 | /* |
| @@ -1334,194 +1349,6 @@ void __init init_timers(void) | |||
| 1334 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); | 1349 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); |
| 1335 | } | 1350 | } |
| 1336 | 1351 | ||
| 1337 | #ifdef CONFIG_TIME_INTERPOLATION | ||
| 1338 | |||
| 1339 | struct time_interpolator *time_interpolator __read_mostly; | ||
| 1340 | static struct time_interpolator *time_interpolator_list __read_mostly; | ||
| 1341 | static DEFINE_SPINLOCK(time_interpolator_lock); | ||
| 1342 | |||
| 1343 | static inline cycles_t time_interpolator_get_cycles(unsigned int src) | ||
| 1344 | { | ||
| 1345 | unsigned long (*x)(void); | ||
| 1346 | |||
| 1347 | switch (src) | ||
| 1348 | { | ||
| 1349 | case TIME_SOURCE_FUNCTION: | ||
| 1350 | x = time_interpolator->addr; | ||
| 1351 | return x(); | ||
| 1352 | |||
| 1353 | case TIME_SOURCE_MMIO64 : | ||
| 1354 | return readq_relaxed((void __iomem *)time_interpolator->addr); | ||
| 1355 | |||
| 1356 | case TIME_SOURCE_MMIO32 : | ||
| 1357 | return readl_relaxed((void __iomem *)time_interpolator->addr); | ||
| 1358 | |||
| 1359 | default: return get_cycles(); | ||
| 1360 | } | ||
| 1361 | } | ||
| 1362 | |||
| 1363 | static inline u64 time_interpolator_get_counter(int writelock) | ||
| 1364 | { | ||
| 1365 | unsigned int src = time_interpolator->source; | ||
| 1366 | |||
| 1367 | if (time_interpolator->jitter) | ||
| 1368 | { | ||
| 1369 | cycles_t lcycle; | ||
| 1370 | cycles_t now; | ||
| 1371 | |||
| 1372 | do { | ||
| 1373 | lcycle = time_interpolator->last_cycle; | ||
| 1374 | now = time_interpolator_get_cycles(src); | ||
| 1375 | if (lcycle && time_after(lcycle, now)) | ||
| 1376 | return lcycle; | ||
| 1377 | |||
| 1378 | /* When holding the xtime write lock, there's no need | ||
| 1379 | * to add the overhead of the cmpxchg. Readers are | ||
| 1380 | * force to retry until the write lock is released. | ||
| 1381 | */ | ||
| 1382 | if (writelock) { | ||
| 1383 | time_interpolator->last_cycle = now; | ||
| 1384 | return now; | ||
| 1385 | } | ||
| 1386 | /* Keep track of the last timer value returned. The use of cmpxchg here | ||
| 1387 | * will cause contention in an SMP environment. | ||
| 1388 | */ | ||
| 1389 | } while (unlikely(cmpxchg(&time_interpolator->last_cycle, lcycle, now) != lcycle)); | ||
| 1390 | return now; | ||
| 1391 | } | ||
| 1392 | else | ||
| 1393 | return time_interpolator_get_cycles(src); | ||
| 1394 | } | ||
| 1395 | |||
| 1396 | void time_interpolator_reset(void) | ||
| 1397 | { | ||
| 1398 | time_interpolator->offset = 0; | ||
| 1399 | time_interpolator->last_counter = time_interpolator_get_counter(1); | ||
| 1400 | } | ||
| 1401 | |||
| 1402 | #define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift) | ||
| 1403 | |||
| 1404 | unsigned long time_interpolator_get_offset(void) | ||
| 1405 | { | ||
| 1406 | /* If we do not have a time interpolator set up then just return zero */ | ||
| 1407 | if (!time_interpolator) | ||
| 1408 | return 0; | ||
| 1409 | |||
| 1410 | return time_interpolator->offset + | ||
| 1411 | GET_TI_NSECS(time_interpolator_get_counter(0), time_interpolator); | ||
| 1412 | } | ||
| 1413 | |||
| 1414 | #define INTERPOLATOR_ADJUST 65536 | ||
| 1415 | #define INTERPOLATOR_MAX_SKIP 10*INTERPOLATOR_ADJUST | ||
| 1416 | |||
| 1417 | void time_interpolator_update(long delta_nsec) | ||
| 1418 | { | ||
| 1419 | u64 counter; | ||
| 1420 | unsigned long offset; | ||
| 1421 | |||
| 1422 | /* If there is no time interpolator set up then do nothing */ | ||
| 1423 | if (!time_interpolator) | ||
| 1424 | return; | ||
| 1425 | |||
| 1426 | /* | ||
| 1427 | * The interpolator compensates for late ticks by accumulating the late | ||
| 1428 | * time in time_interpolator->offset. A tick earlier than expected will | ||
| 1429 | * lead to a reset of the offset and a corresponding jump of the clock | ||
| 1430 | * forward. Again this only works if the interpolator clock is running | ||
| 1431 | * slightly slower than the regular clock and the tuning logic insures | ||
| 1432 | * that. | ||
| 1433 | */ | ||
| 1434 | |||
| 1435 | counter = time_interpolator_get_counter(1); | ||
| 1436 | offset = time_interpolator->offset + | ||
| 1437 | GET_TI_NSECS(counter, time_interpolator); | ||
| 1438 | |||
| 1439 | if (delta_nsec < 0 || (unsigned long) delta_nsec < offset) | ||
| 1440 | time_interpolator->offset = offset - delta_nsec; | ||
| 1441 | else { | ||
| 1442 | time_interpolator->skips++; | ||
| 1443 | time_interpolator->ns_skipped += delta_nsec - offset; | ||
| 1444 | time_interpolator->offset = 0; | ||
| 1445 | } | ||
| 1446 | time_interpolator->last_counter = counter; | ||
| 1447 | |||
| 1448 | /* Tuning logic for time interpolator invoked every minute or so. | ||
| 1449 | * Decrease interpolator clock speed if no skips occurred and an offset is carried. | ||
| 1450 | * Increase interpolator clock speed if we skip too much time. | ||
| 1451 | */ | ||
| 1452 | if (jiffies % INTERPOLATOR_ADJUST == 0) | ||
| 1453 | { | ||
| 1454 | if (time_interpolator->skips == 0 && time_interpolator->offset > tick_nsec) | ||
| 1455 | time_interpolator->nsec_per_cyc--; | ||
| 1456 | if (time_interpolator->ns_skipped > INTERPOLATOR_MAX_SKIP && time_interpolator->offset == 0) | ||
| 1457 | time_interpolator->nsec_per_cyc++; | ||
| 1458 | time_interpolator->skips = 0; | ||
| 1459 | time_interpolator->ns_skipped = 0; | ||
| 1460 | } | ||
| 1461 | } | ||
| 1462 | |||
| 1463 | static inline int | ||
| 1464 | is_better_time_interpolator(struct time_interpolator *new) | ||
| 1465 | { | ||
| 1466 | if (!time_interpolator) | ||
| 1467 | return 1; | ||
| 1468 | return new->frequency > 2*time_interpolator->frequency || | ||
| 1469 | (unsigned long)new->drift < (unsigned long)time_interpolator->drift; | ||
| 1470 | } | ||
| 1471 | |||
| 1472 | void | ||
| 1473 | register_time_interpolator(struct time_interpolator *ti) | ||
| 1474 | { | ||
| 1475 | unsigned long flags; | ||
| 1476 | |||
| 1477 | /* Sanity check */ | ||
| 1478 | BUG_ON(ti->frequency == 0 || ti->mask == 0); | ||
| 1479 | |||
| 1480 | ti->nsec_per_cyc = ((u64)NSEC_PER_SEC << ti->shift) / ti->frequency; | ||
| 1481 | spin_lock(&time_interpolator_lock); | ||
| 1482 | write_seqlock_irqsave(&xtime_lock, flags); | ||
| 1483 | if (is_better_time_interpolator(ti)) { | ||
| 1484 | time_interpolator = ti; | ||
| 1485 | time_interpolator_reset(); | ||
| 1486 | } | ||
| 1487 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
| 1488 | |||
| 1489 | ti->next = time_interpolator_list; | ||
| 1490 | time_interpolator_list = ti; | ||
| 1491 | spin_unlock(&time_interpolator_lock); | ||
| 1492 | } | ||
| 1493 | |||
| 1494 | void | ||
| 1495 | unregister_time_interpolator(struct time_interpolator *ti) | ||
| 1496 | { | ||
| 1497 | struct time_interpolator *curr, **prev; | ||
| 1498 | unsigned long flags; | ||
| 1499 | |||
| 1500 | spin_lock(&time_interpolator_lock); | ||
| 1501 | prev = &time_interpolator_list; | ||
| 1502 | for (curr = *prev; curr; curr = curr->next) { | ||
| 1503 | if (curr == ti) { | ||
| 1504 | *prev = curr->next; | ||
| 1505 | break; | ||
| 1506 | } | ||
| 1507 | prev = &curr->next; | ||
| 1508 | } | ||
| 1509 | |||
| 1510 | write_seqlock_irqsave(&xtime_lock, flags); | ||
| 1511 | if (ti == time_interpolator) { | ||
| 1512 | /* we lost the best time-interpolator: */ | ||
| 1513 | time_interpolator = NULL; | ||
| 1514 | /* find the next-best interpolator */ | ||
| 1515 | for (curr = time_interpolator_list; curr; curr = curr->next) | ||
| 1516 | if (is_better_time_interpolator(curr)) | ||
| 1517 | time_interpolator = curr; | ||
| 1518 | time_interpolator_reset(); | ||
| 1519 | } | ||
| 1520 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
| 1521 | spin_unlock(&time_interpolator_lock); | ||
| 1522 | } | ||
| 1523 | #endif /* CONFIG_TIME_INTERPOLATION */ | ||
| 1524 | |||
| 1525 | /** | 1352 | /** |
| 1526 | * msleep - sleep safely even with waitqueue interruptions | 1353 | * msleep - sleep safely even with waitqueue interruptions |
| 1527 | * @msecs: Time in milliseconds to sleep for | 1354 | * @msecs: Time in milliseconds to sleep for |
diff --git a/kernel/user.c b/kernel/user.c index 4869563080e9..e7d11cef6998 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
| @@ -14,20 +14,19 @@ | |||
| 14 | #include <linux/bitops.h> | 14 | #include <linux/bitops.h> |
| 15 | #include <linux/key.h> | 15 | #include <linux/key.h> |
| 16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
| 17 | #include <linux/module.h> | ||
| 18 | #include <linux/user_namespace.h> | ||
| 17 | 19 | ||
| 18 | /* | 20 | /* |
| 19 | * UID task count cache, to get fast user lookup in "alloc_uid" | 21 | * UID task count cache, to get fast user lookup in "alloc_uid" |
| 20 | * when changing user ID's (ie setuid() and friends). | 22 | * when changing user ID's (ie setuid() and friends). |
| 21 | */ | 23 | */ |
| 22 | 24 | ||
| 23 | #define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) | ||
| 24 | #define UIDHASH_SZ (1 << UIDHASH_BITS) | ||
| 25 | #define UIDHASH_MASK (UIDHASH_SZ - 1) | 25 | #define UIDHASH_MASK (UIDHASH_SZ - 1) |
| 26 | #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) | 26 | #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) |
| 27 | #define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) | 27 | #define uidhashentry(ns, uid) ((ns)->uidhash_table + __uidhashfn((uid))) |
| 28 | 28 | ||
| 29 | static struct kmem_cache *uid_cachep; | 29 | static struct kmem_cache *uid_cachep; |
| 30 | static struct list_head uidhash_table[UIDHASH_SZ]; | ||
| 31 | 30 | ||
| 32 | /* | 31 | /* |
| 33 | * The uidhash_lock is mostly taken from process context, but it is | 32 | * The uidhash_lock is mostly taken from process context, but it is |
| @@ -94,9 +93,10 @@ struct user_struct *find_user(uid_t uid) | |||
| 94 | { | 93 | { |
| 95 | struct user_struct *ret; | 94 | struct user_struct *ret; |
| 96 | unsigned long flags; | 95 | unsigned long flags; |
| 96 | struct user_namespace *ns = current->nsproxy->user_ns; | ||
| 97 | 97 | ||
| 98 | spin_lock_irqsave(&uidhash_lock, flags); | 98 | spin_lock_irqsave(&uidhash_lock, flags); |
| 99 | ret = uid_hash_find(uid, uidhashentry(uid)); | 99 | ret = uid_hash_find(uid, uidhashentry(ns, uid)); |
| 100 | spin_unlock_irqrestore(&uidhash_lock, flags); | 100 | spin_unlock_irqrestore(&uidhash_lock, flags); |
| 101 | return ret; | 101 | return ret; |
| 102 | } | 102 | } |
| @@ -120,9 +120,9 @@ void free_uid(struct user_struct *up) | |||
| 120 | } | 120 | } |
| 121 | } | 121 | } |
| 122 | 122 | ||
| 123 | struct user_struct * alloc_uid(uid_t uid) | 123 | struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) |
| 124 | { | 124 | { |
| 125 | struct list_head *hashent = uidhashentry(uid); | 125 | struct list_head *hashent = uidhashentry(ns, uid); |
| 126 | struct user_struct *up; | 126 | struct user_struct *up; |
| 127 | 127 | ||
| 128 | spin_lock_irq(&uidhash_lock); | 128 | spin_lock_irq(&uidhash_lock); |
| @@ -208,14 +208,14 @@ static int __init uid_cache_init(void) | |||
| 208 | int n; | 208 | int n; |
| 209 | 209 | ||
| 210 | uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct), | 210 | uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct), |
| 211 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | 211 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
| 212 | 212 | ||
| 213 | for(n = 0; n < UIDHASH_SZ; ++n) | 213 | for(n = 0; n < UIDHASH_SZ; ++n) |
| 214 | INIT_LIST_HEAD(uidhash_table + n); | 214 | INIT_LIST_HEAD(init_user_ns.uidhash_table + n); |
| 215 | 215 | ||
| 216 | /* Insert the root user immediately (init already runs as root) */ | 216 | /* Insert the root user immediately (init already runs as root) */ |
| 217 | spin_lock_irq(&uidhash_lock); | 217 | spin_lock_irq(&uidhash_lock); |
| 218 | uid_hash_insert(&root_user, uidhashentry(0)); | 218 | uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0)); |
| 219 | spin_unlock_irq(&uidhash_lock); | 219 | spin_unlock_irq(&uidhash_lock); |
| 220 | 220 | ||
| 221 | return 0; | 221 | return 0; |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c new file mode 100644 index 000000000000..d055d987850c --- /dev/null +++ b/kernel/user_namespace.c | |||
| @@ -0,0 +1,87 @@ | |||
| 1 | /* | ||
| 2 | * This program is free software; you can redistribute it and/or | ||
| 3 | * modify it under the terms of the GNU General Public License as | ||
| 4 | * published by the Free Software Foundation, version 2 of the | ||
| 5 | * License. | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include <linux/module.h> | ||
| 9 | #include <linux/version.h> | ||
| 10 | #include <linux/nsproxy.h> | ||
| 11 | #include <linux/user_namespace.h> | ||
| 12 | |||
| 13 | struct user_namespace init_user_ns = { | ||
| 14 | .kref = { | ||
| 15 | .refcount = ATOMIC_INIT(2), | ||
| 16 | }, | ||
| 17 | .root_user = &root_user, | ||
| 18 | }; | ||
| 19 | |||
| 20 | EXPORT_SYMBOL_GPL(init_user_ns); | ||
| 21 | |||
| 22 | #ifdef CONFIG_USER_NS | ||
| 23 | |||
| 24 | /* | ||
| 25 | * Clone a new ns copying an original user ns, setting refcount to 1 | ||
| 26 | * @old_ns: namespace to clone | ||
| 27 | * Return NULL on error (failure to kmalloc), new ns otherwise | ||
| 28 | */ | ||
| 29 | static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) | ||
| 30 | { | ||
| 31 | struct user_namespace *ns; | ||
| 32 | struct user_struct *new_user; | ||
| 33 | int n; | ||
| 34 | |||
| 35 | ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); | ||
| 36 | if (!ns) | ||
| 37 | return ERR_PTR(-ENOMEM); | ||
| 38 | |||
| 39 | kref_init(&ns->kref); | ||
| 40 | |||
| 41 | for (n = 0; n < UIDHASH_SZ; ++n) | ||
| 42 | INIT_LIST_HEAD(ns->uidhash_table + n); | ||
| 43 | |||
| 44 | /* Insert new root user. */ | ||
| 45 | ns->root_user = alloc_uid(ns, 0); | ||
| 46 | if (!ns->root_user) { | ||
| 47 | kfree(ns); | ||
| 48 | return ERR_PTR(-ENOMEM); | ||
| 49 | } | ||
| 50 | |||
| 51 | /* Reset current->user with a new one */ | ||
| 52 | new_user = alloc_uid(ns, current->uid); | ||
| 53 | if (!new_user) { | ||
| 54 | free_uid(ns->root_user); | ||
| 55 | kfree(ns); | ||
| 56 | return ERR_PTR(-ENOMEM); | ||
| 57 | } | ||
| 58 | |||
| 59 | switch_uid(new_user); | ||
| 60 | return ns; | ||
| 61 | } | ||
| 62 | |||
| 63 | struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns) | ||
| 64 | { | ||
| 65 | struct user_namespace *new_ns; | ||
| 66 | |||
| 67 | BUG_ON(!old_ns); | ||
| 68 | get_user_ns(old_ns); | ||
| 69 | |||
| 70 | if (!(flags & CLONE_NEWUSER)) | ||
| 71 | return old_ns; | ||
| 72 | |||
| 73 | new_ns = clone_user_ns(old_ns); | ||
| 74 | |||
| 75 | put_user_ns(old_ns); | ||
| 76 | return new_ns; | ||
| 77 | } | ||
| 78 | |||
| 79 | void free_user_ns(struct kref *kref) | ||
| 80 | { | ||
| 81 | struct user_namespace *ns; | ||
| 82 | |||
| 83 | ns = container_of(kref, struct user_namespace, kref); | ||
| 84 | kfree(ns); | ||
| 85 | } | ||
| 86 | |||
| 87 | #endif /* CONFIG_USER_NS */ | ||
diff --git a/kernel/utsname.c b/kernel/utsname.c index 160c8c5136bd..9d8180a0f0d8 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/uts.h> | 13 | #include <linux/uts.h> |
| 14 | #include <linux/utsname.h> | 14 | #include <linux/utsname.h> |
| 15 | #include <linux/version.h> | 15 | #include <linux/version.h> |
| 16 | #include <linux/err.h> | ||
| 16 | 17 | ||
| 17 | /* | 18 | /* |
| 18 | * Clone a new ns copying an original utsname, setting refcount to 1 | 19 | * Clone a new ns copying an original utsname, setting refcount to 1 |
| @@ -24,10 +25,11 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | |||
| 24 | struct uts_namespace *ns; | 25 | struct uts_namespace *ns; |
| 25 | 26 | ||
| 26 | ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); | 27 | ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); |
| 27 | if (ns) { | 28 | if (!ns) |
| 28 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); | 29 | return ERR_PTR(-ENOMEM); |
| 29 | kref_init(&ns->kref); | 30 | |
| 30 | } | 31 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); |
| 32 | kref_init(&ns->kref); | ||
| 31 | return ns; | 33 | return ns; |
| 32 | } | 34 | } |
| 33 | 35 | ||
| @@ -37,7 +39,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | |||
| 37 | * utsname of this process won't be seen by parent, and vice | 39 | * utsname of this process won't be seen by parent, and vice |
| 38 | * versa. | 40 | * versa. |
| 39 | */ | 41 | */ |
| 40 | struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns) | 42 | struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns) |
| 41 | { | 43 | { |
| 42 | struct uts_namespace *new_ns; | 44 | struct uts_namespace *new_ns; |
| 43 | 45 | ||
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index f22b9dbd2a9c..c76c06466bfd 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c | |||
| @@ -18,10 +18,7 @@ | |||
| 18 | static void *get_uts(ctl_table *table, int write) | 18 | static void *get_uts(ctl_table *table, int write) |
| 19 | { | 19 | { |
| 20 | char *which = table->data; | 20 | char *which = table->data; |
| 21 | #ifdef CONFIG_UTS_NS | 21 | |
| 22 | struct uts_namespace *uts_ns = current->nsproxy->uts_ns; | ||
| 23 | which = (which - (char *)&init_uts_ns) + (char *)uts_ns; | ||
| 24 | #endif | ||
| 25 | if (!write) | 22 | if (!write) |
| 26 | down_read(&uts_sem); | 23 | down_read(&uts_sem); |
| 27 | else | 24 | else |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3bebf73be976..58e5c152a6bb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -282,8 +282,8 @@ static int worker_thread(void *__cwq) | |||
| 282 | struct cpu_workqueue_struct *cwq = __cwq; | 282 | struct cpu_workqueue_struct *cwq = __cwq; |
| 283 | DEFINE_WAIT(wait); | 283 | DEFINE_WAIT(wait); |
| 284 | 284 | ||
| 285 | if (!cwq->wq->freezeable) | 285 | if (cwq->wq->freezeable) |
| 286 | current->flags |= PF_NOFREEZE; | 286 | set_freezable(); |
| 287 | 287 | ||
| 288 | set_user_nice(current, -5); | 288 | set_user_nice(current, -5); |
| 289 | 289 | ||
| @@ -382,16 +382,16 @@ void fastcall flush_workqueue(struct workqueue_struct *wq) | |||
| 382 | EXPORT_SYMBOL_GPL(flush_workqueue); | 382 | EXPORT_SYMBOL_GPL(flush_workqueue); |
| 383 | 383 | ||
| 384 | /* | 384 | /* |
| 385 | * Upon a successful return, the caller "owns" WORK_STRUCT_PENDING bit, | 385 | * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, |
| 386 | * so this work can't be re-armed in any way. | 386 | * so this work can't be re-armed in any way. |
| 387 | */ | 387 | */ |
| 388 | static int try_to_grab_pending(struct work_struct *work) | 388 | static int try_to_grab_pending(struct work_struct *work) |
| 389 | { | 389 | { |
| 390 | struct cpu_workqueue_struct *cwq; | 390 | struct cpu_workqueue_struct *cwq; |
| 391 | int ret = 0; | 391 | int ret = -1; |
| 392 | 392 | ||
| 393 | if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) | 393 | if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) |
| 394 | return 1; | 394 | return 0; |
| 395 | 395 | ||
| 396 | /* | 396 | /* |
| 397 | * The queueing is in progress, or it is already queued. Try to | 397 | * The queueing is in progress, or it is already queued. Try to |
| @@ -457,10 +457,28 @@ static void wait_on_work(struct work_struct *work) | |||
| 457 | wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work); | 457 | wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work); |
| 458 | } | 458 | } |
| 459 | 459 | ||
| 460 | static int __cancel_work_timer(struct work_struct *work, | ||
| 461 | struct timer_list* timer) | ||
| 462 | { | ||
| 463 | int ret; | ||
| 464 | |||
| 465 | do { | ||
| 466 | ret = (timer && likely(del_timer(timer))); | ||
| 467 | if (!ret) | ||
| 468 | ret = try_to_grab_pending(work); | ||
| 469 | wait_on_work(work); | ||
| 470 | } while (unlikely(ret < 0)); | ||
| 471 | |||
| 472 | work_clear_pending(work); | ||
| 473 | return ret; | ||
| 474 | } | ||
| 475 | |||
| 460 | /** | 476 | /** |
| 461 | * cancel_work_sync - block until a work_struct's callback has terminated | 477 | * cancel_work_sync - block until a work_struct's callback has terminated |
| 462 | * @work: the work which is to be flushed | 478 | * @work: the work which is to be flushed |
| 463 | * | 479 | * |
| 480 | * Returns true if @work was pending. | ||
| 481 | * | ||
| 464 | * cancel_work_sync() will cancel the work if it is queued. If the work's | 482 | * cancel_work_sync() will cancel the work if it is queued. If the work's |
| 465 | * callback appears to be running, cancel_work_sync() will block until it | 483 | * callback appears to be running, cancel_work_sync() will block until it |
| 466 | * has completed. | 484 | * has completed. |
| @@ -476,31 +494,26 @@ static void wait_on_work(struct work_struct *work) | |||
| 476 | * The caller must ensure that workqueue_struct on which this work was last | 494 | * The caller must ensure that workqueue_struct on which this work was last |
| 477 | * queued can't be destroyed before this function returns. | 495 | * queued can't be destroyed before this function returns. |
| 478 | */ | 496 | */ |
| 479 | void cancel_work_sync(struct work_struct *work) | 497 | int cancel_work_sync(struct work_struct *work) |
| 480 | { | 498 | { |
| 481 | while (!try_to_grab_pending(work)) | 499 | return __cancel_work_timer(work, NULL); |
| 482 | cpu_relax(); | ||
| 483 | wait_on_work(work); | ||
| 484 | work_clear_pending(work); | ||
| 485 | } | 500 | } |
| 486 | EXPORT_SYMBOL_GPL(cancel_work_sync); | 501 | EXPORT_SYMBOL_GPL(cancel_work_sync); |
| 487 | 502 | ||
| 488 | /** | 503 | /** |
| 489 | * cancel_rearming_delayed_work - reliably kill off a delayed work. | 504 | * cancel_delayed_work_sync - reliably kill off a delayed work. |
| 490 | * @dwork: the delayed work struct | 505 | * @dwork: the delayed work struct |
| 491 | * | 506 | * |
| 507 | * Returns true if @dwork was pending. | ||
| 508 | * | ||
| 492 | * It is possible to use this function if @dwork rearms itself via queue_work() | 509 | * It is possible to use this function if @dwork rearms itself via queue_work() |
| 493 | * or queue_delayed_work(). See also the comment for cancel_work_sync(). | 510 | * or queue_delayed_work(). See also the comment for cancel_work_sync(). |
| 494 | */ | 511 | */ |
| 495 | void cancel_rearming_delayed_work(struct delayed_work *dwork) | 512 | int cancel_delayed_work_sync(struct delayed_work *dwork) |
| 496 | { | 513 | { |
| 497 | while (!del_timer(&dwork->timer) && | 514 | return __cancel_work_timer(&dwork->work, &dwork->timer); |
| 498 | !try_to_grab_pending(&dwork->work)) | ||
| 499 | cpu_relax(); | ||
| 500 | wait_on_work(&dwork->work); | ||
| 501 | work_clear_pending(&dwork->work); | ||
| 502 | } | 515 | } |
| 503 | EXPORT_SYMBOL(cancel_rearming_delayed_work); | 516 | EXPORT_SYMBOL(cancel_delayed_work_sync); |
| 504 | 517 | ||
| 505 | static struct workqueue_struct *keventd_wq __read_mostly; | 518 | static struct workqueue_struct *keventd_wq __read_mostly; |
| 506 | 519 | ||
| @@ -739,18 +752,17 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | |||
| 739 | if (cwq->thread == NULL) | 752 | if (cwq->thread == NULL) |
| 740 | return; | 753 | return; |
| 741 | 754 | ||
| 755 | flush_cpu_workqueue(cwq); | ||
| 742 | /* | 756 | /* |
| 743 | * If the caller is CPU_DEAD the single flush_cpu_workqueue() | 757 | * If the caller is CPU_DEAD and cwq->worklist was not empty, |
| 744 | * is not enough, a concurrent flush_workqueue() can insert a | 758 | * a concurrent flush_workqueue() can insert a barrier after us. |
| 745 | * barrier after us. | 759 | * However, in that case run_workqueue() won't return and check |
| 760 | * kthread_should_stop() until it flushes all work_struct's. | ||
| 746 | * When ->worklist becomes empty it is safe to exit because no | 761 | * When ->worklist becomes empty it is safe to exit because no |
| 747 | * more work_structs can be queued on this cwq: flush_workqueue | 762 | * more work_structs can be queued on this cwq: flush_workqueue |
| 748 | * checks list_empty(), and a "normal" queue_work() can't use | 763 | * checks list_empty(), and a "normal" queue_work() can't use |
| 749 | * a dead CPU. | 764 | * a dead CPU. |
| 750 | */ | 765 | */ |
| 751 | while (flush_cpu_workqueue(cwq)) | ||
| 752 | ; | ||
| 753 | |||
| 754 | kthread_stop(cwq->thread); | 766 | kthread_stop(cwq->thread); |
| 755 | cwq->thread = NULL; | 767 | cwq->thread = NULL; |
| 756 | } | 768 | } |
