aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c444
-rw-r--r--kernel/auditfilter.c54
-rw-r--r--kernel/auditsc.c349
-rw-r--r--kernel/exit.c88
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/futex.c42
-rw-r--r--kernel/futex_compat.c3
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/mutex.c36
-rw-r--r--kernel/power/Kconfig65
-rw-r--r--kernel/power/disk.c204
-rw-r--r--kernel/power/main.c171
-rw-r--r--kernel/power/power.h90
-rw-r--r--kernel/power/process.c6
-rw-r--r--kernel/power/snapshot.c31
-rw-r--r--kernel/power/swap.c33
-rw-r--r--kernel/power/swsusp.c48
-rw-r--r--kernel/power/user.c109
-rw-r--r--kernel/printk.c4
-rw-r--r--kernel/ptrace.c8
-rw-r--r--kernel/sched.c38
-rw-r--r--kernel/sched_fair.c8
-rw-r--r--kernel/signal.c20
-rw-r--r--kernel/softlockup.c30
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/time.c1
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/time/timekeeping.c6
-rw-r--r--kernel/time/timer_list.c2
-rw-r--r--kernel/timer.c7
-rw-r--r--kernel/wait.c2
32 files changed, 1178 insertions, 738 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index f93c2713017d..c8555b180213 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -66,10 +66,11 @@
66 * (Initialization happens after skb_init is called.) */ 66 * (Initialization happens after skb_init is called.) */
67static int audit_initialized; 67static int audit_initialized;
68 68
69/* 0 - no auditing 69#define AUDIT_OFF 0
70 * 1 - auditing enabled 70#define AUDIT_ON 1
71 * 2 - auditing enabled and configuration is locked/unchangeable. */ 71#define AUDIT_LOCKED 2
72int audit_enabled; 72int audit_enabled;
73int audit_ever_enabled;
73 74
74/* Default state when kernel boots without any parameters. */ 75/* Default state when kernel boots without any parameters. */
75static int audit_default; 76static int audit_default;
@@ -152,8 +153,10 @@ struct audit_buffer {
152 153
153static void audit_set_pid(struct audit_buffer *ab, pid_t pid) 154static void audit_set_pid(struct audit_buffer *ab, pid_t pid)
154{ 155{
155 struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); 156 if (ab) {
156 nlh->nlmsg_pid = pid; 157 struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
158 nlh->nlmsg_pid = pid;
159 }
157} 160}
158 161
159void audit_panic(const char *message) 162void audit_panic(const char *message)
@@ -163,7 +166,8 @@ void audit_panic(const char *message)
163 case AUDIT_FAIL_SILENT: 166 case AUDIT_FAIL_SILENT:
164 break; 167 break;
165 case AUDIT_FAIL_PRINTK: 168 case AUDIT_FAIL_PRINTK:
166 printk(KERN_ERR "audit: %s\n", message); 169 if (printk_ratelimit())
170 printk(KERN_ERR "audit: %s\n", message);
167 break; 171 break;
168 case AUDIT_FAIL_PANIC: 172 case AUDIT_FAIL_PANIC:
169 panic("audit: %s\n", message); 173 panic("audit: %s\n", message);
@@ -231,161 +235,107 @@ void audit_log_lost(const char *message)
231 } 235 }
232 236
233 if (print) { 237 if (print) {
234 printk(KERN_WARNING 238 if (printk_ratelimit())
235 "audit: audit_lost=%d audit_rate_limit=%d audit_backlog_limit=%d\n", 239 printk(KERN_WARNING
236 atomic_read(&audit_lost), 240 "audit: audit_lost=%d audit_rate_limit=%d "
237 audit_rate_limit, 241 "audit_backlog_limit=%d\n",
238 audit_backlog_limit); 242 atomic_read(&audit_lost),
243 audit_rate_limit,
244 audit_backlog_limit);
239 audit_panic(message); 245 audit_panic(message);
240 } 246 }
241} 247}
242 248
243static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) 249static int audit_log_config_change(char *function_name, int new, int old,
250 uid_t loginuid, u32 sid, int allow_changes)
244{ 251{
245 int res, rc = 0, old = audit_rate_limit; 252 struct audit_buffer *ab;
246 253 int rc = 0;
247 /* check if we are locked */
248 if (audit_enabled == 2)
249 res = 0;
250 else
251 res = 1;
252 254
255 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
256 audit_log_format(ab, "%s=%d old=%d by auid=%u", function_name, new,
257 old, loginuid);
253 if (sid) { 258 if (sid) {
254 char *ctx = NULL; 259 char *ctx = NULL;
255 u32 len; 260 u32 len;
256 if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) { 261
257 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 262 rc = selinux_sid_to_string(sid, &ctx, &len);
258 "audit_rate_limit=%d old=%d by auid=%u" 263 if (rc) {
259 " subj=%s res=%d", 264 audit_log_format(ab, " sid=%u", sid);
260 limit, old, loginuid, ctx, res); 265 allow_changes = 0; /* Something weird, deny request */
266 } else {
267 audit_log_format(ab, " subj=%s", ctx);
261 kfree(ctx); 268 kfree(ctx);
262 } else 269 }
263 res = 0; /* Something weird, deny request */
264 } 270 }
265 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 271 audit_log_format(ab, " res=%d", allow_changes);
266 "audit_rate_limit=%d old=%d by auid=%u res=%d", 272 audit_log_end(ab);
267 limit, old, loginuid, res);
268
269 /* If we are allowed, make the change */
270 if (res == 1)
271 audit_rate_limit = limit;
272 /* Not allowed, update reason */
273 else if (rc == 0)
274 rc = -EPERM;
275 return rc; 273 return rc;
276} 274}
277 275
278static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) 276static int audit_do_config_change(char *function_name, int *to_change,
277 int new, uid_t loginuid, u32 sid)
279{ 278{
280 int res, rc = 0, old = audit_backlog_limit; 279 int allow_changes, rc = 0, old = *to_change;
281 280
282 /* check if we are locked */ 281 /* check if we are locked */
283 if (audit_enabled == 2) 282 if (audit_enabled == AUDIT_LOCKED)
284 res = 0; 283 allow_changes = 0;
285 else 284 else
286 res = 1; 285 allow_changes = 1;
287 286
288 if (sid) { 287 if (audit_enabled != AUDIT_OFF) {
289 char *ctx = NULL; 288 rc = audit_log_config_change(function_name, new, old,
290 u32 len; 289 loginuid, sid, allow_changes);
291 if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) { 290 if (rc)
292 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 291 allow_changes = 0;
293 "audit_backlog_limit=%d old=%d by auid=%u"
294 " subj=%s res=%d",
295 limit, old, loginuid, ctx, res);
296 kfree(ctx);
297 } else
298 res = 0; /* Something weird, deny request */
299 } 292 }
300 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
301 "audit_backlog_limit=%d old=%d by auid=%u res=%d",
302 limit, old, loginuid, res);
303 293
304 /* If we are allowed, make the change */ 294 /* If we are allowed, make the change */
305 if (res == 1) 295 if (allow_changes == 1)
306 audit_backlog_limit = limit; 296 *to_change = new;
307 /* Not allowed, update reason */ 297 /* Not allowed, update reason */
308 else if (rc == 0) 298 else if (rc == 0)
309 rc = -EPERM; 299 rc = -EPERM;
310 return rc; 300 return rc;
311} 301}
312 302
313static int audit_set_enabled(int state, uid_t loginuid, u32 sid) 303static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
314{ 304{
315 int res, rc = 0, old = audit_enabled; 305 return audit_do_config_change("audit_rate_limit", &audit_rate_limit,
306 limit, loginuid, sid);
307}
308
309static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
310{
311 return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit,
312 limit, loginuid, sid);
313}
316 314
317 if (state < 0 || state > 2) 315static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
316{
317 int rc;
318 if (state < AUDIT_OFF || state > AUDIT_LOCKED)
318 return -EINVAL; 319 return -EINVAL;
319 320
320 /* check if we are locked */ 321 rc = audit_do_config_change("audit_enabled", &audit_enabled, state,
321 if (audit_enabled == 2) 322 loginuid, sid);
322 res = 0;
323 else
324 res = 1;
325 323
326 if (sid) { 324 if (!rc)
327 char *ctx = NULL; 325 audit_ever_enabled |= !!state;
328 u32 len;
329 if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) {
330 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
331 "audit_enabled=%d old=%d by auid=%u"
332 " subj=%s res=%d",
333 state, old, loginuid, ctx, res);
334 kfree(ctx);
335 } else
336 res = 0; /* Something weird, deny request */
337 }
338 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
339 "audit_enabled=%d old=%d by auid=%u res=%d",
340 state, old, loginuid, res);
341 326
342 /* If we are allowed, make the change */
343 if (res == 1)
344 audit_enabled = state;
345 /* Not allowed, update reason */
346 else if (rc == 0)
347 rc = -EPERM;
348 return rc; 327 return rc;
349} 328}
350 329
351static int audit_set_failure(int state, uid_t loginuid, u32 sid) 330static int audit_set_failure(int state, uid_t loginuid, u32 sid)
352{ 331{
353 int res, rc = 0, old = audit_failure;
354
355 if (state != AUDIT_FAIL_SILENT 332 if (state != AUDIT_FAIL_SILENT
356 && state != AUDIT_FAIL_PRINTK 333 && state != AUDIT_FAIL_PRINTK
357 && state != AUDIT_FAIL_PANIC) 334 && state != AUDIT_FAIL_PANIC)
358 return -EINVAL; 335 return -EINVAL;
359 336
360 /* check if we are locked */ 337 return audit_do_config_change("audit_failure", &audit_failure, state,
361 if (audit_enabled == 2) 338 loginuid, sid);
362 res = 0;
363 else
364 res = 1;
365
366 if (sid) {
367 char *ctx = NULL;
368 u32 len;
369 if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) {
370 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
371 "audit_failure=%d old=%d by auid=%u"
372 " subj=%s res=%d",
373 state, old, loginuid, ctx, res);
374 kfree(ctx);
375 } else
376 res = 0; /* Something weird, deny request */
377 }
378 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
379 "audit_failure=%d old=%d by auid=%u res=%d",
380 state, old, loginuid, res);
381
382 /* If we are allowed, make the change */
383 if (res == 1)
384 audit_failure = state;
385 /* Not allowed, update reason */
386 else if (rc == 0)
387 rc = -EPERM;
388 return rc;
389} 339}
390 340
391static int kauditd_thread(void *dummy) 341static int kauditd_thread(void *dummy)
@@ -405,7 +355,11 @@ static int kauditd_thread(void *dummy)
405 audit_pid = 0; 355 audit_pid = 0;
406 } 356 }
407 } else { 357 } else {
408 printk(KERN_NOTICE "%s\n", skb->data + NLMSG_SPACE(0)); 358 if (printk_ratelimit())
359 printk(KERN_NOTICE "%s\n", skb->data +
360 NLMSG_SPACE(0));
361 else
362 audit_log_lost("printk limit exceeded\n");
409 kfree_skb(skb); 363 kfree_skb(skb);
410 } 364 }
411 } else { 365 } else {
@@ -573,6 +527,33 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
573 return err; 527 return err;
574} 528}
575 529
530static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type,
531 u32 pid, u32 uid, uid_t auid, u32 sid)
532{
533 int rc = 0;
534 char *ctx = NULL;
535 u32 len;
536
537 if (!audit_enabled) {
538 *ab = NULL;
539 return rc;
540 }
541
542 *ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
543 audit_log_format(*ab, "user pid=%d uid=%u auid=%u",
544 pid, uid, auid);
545 if (sid) {
546 rc = selinux_sid_to_string(sid, &ctx, &len);
547 if (rc)
548 audit_log_format(*ab, " ssid=%u", sid);
549 else
550 audit_log_format(*ab, " subj=%s", ctx);
551 kfree(ctx);
552 }
553
554 return rc;
555}
556
576static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 557static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
577{ 558{
578 u32 uid, pid, seq, sid; 559 u32 uid, pid, seq, sid;
@@ -583,7 +564,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
583 u16 msg_type = nlh->nlmsg_type; 564 u16 msg_type = nlh->nlmsg_type;
584 uid_t loginuid; /* loginuid of sender */ 565 uid_t loginuid; /* loginuid of sender */
585 struct audit_sig_info *sig_data; 566 struct audit_sig_info *sig_data;
586 char *ctx; 567 char *ctx = NULL;
587 u32 len; 568 u32 len;
588 569
589 err = audit_netlink_ok(skb, msg_type); 570 err = audit_netlink_ok(skb, msg_type);
@@ -634,23 +615,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
634 if (err < 0) return err; 615 if (err < 0) return err;
635 } 616 }
636 if (status_get->mask & AUDIT_STATUS_PID) { 617 if (status_get->mask & AUDIT_STATUS_PID) {
637 int old = audit_pid; 618 int new_pid = status_get->pid;
638 if (sid) { 619
639 if ((err = selinux_sid_to_string( 620 if (audit_enabled != AUDIT_OFF)
640 sid, &ctx, &len))) 621 audit_log_config_change("audit_pid", new_pid,
641 return err; 622 audit_pid, loginuid,
642 else 623 sid, 1);
643 audit_log(NULL, GFP_KERNEL, 624
644 AUDIT_CONFIG_CHANGE, 625 audit_pid = new_pid;
645 "audit_pid=%d old=%d by auid=%u subj=%s",
646 status_get->pid, old,
647 loginuid, ctx);
648 kfree(ctx);
649 } else
650 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
651 "audit_pid=%d old=%d by auid=%u",
652 status_get->pid, old, loginuid);
653 audit_pid = status_get->pid;
654 } 626 }
655 if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) 627 if (status_get->mask & AUDIT_STATUS_RATE_LIMIT)
656 err = audit_set_rate_limit(status_get->rate_limit, 628 err = audit_set_rate_limit(status_get->rate_limit,
@@ -673,64 +645,35 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
673 if (err) 645 if (err)
674 break; 646 break;
675 } 647 }
676 ab = audit_log_start(NULL, GFP_KERNEL, msg_type); 648 audit_log_common_recv_msg(&ab, msg_type, pid, uid,
677 if (ab) { 649 loginuid, sid);
678 audit_log_format(ab, 650
679 "user pid=%d uid=%u auid=%u", 651 if (msg_type != AUDIT_USER_TTY)
680 pid, uid, loginuid); 652 audit_log_format(ab, " msg='%.1024s'",
681 if (sid) { 653 (char *)data);
682 if (selinux_sid_to_string( 654 else {
683 sid, &ctx, &len)) { 655 int size;
684 audit_log_format(ab, 656
685 " ssid=%u", sid); 657 audit_log_format(ab, " msg=");
686 /* Maybe call audit_panic? */ 658 size = nlmsg_len(nlh);
687 } else 659 audit_log_n_untrustedstring(ab, size,
688 audit_log_format(ab, 660 data);
689 " subj=%s", ctx);
690 kfree(ctx);
691 }
692 if (msg_type != AUDIT_USER_TTY)
693 audit_log_format(ab, " msg='%.1024s'",
694 (char *)data);
695 else {
696 int size;
697
698 audit_log_format(ab, " msg=");
699 size = nlmsg_len(nlh);
700 audit_log_n_untrustedstring(ab, size,
701 data);
702 }
703 audit_set_pid(ab, pid);
704 audit_log_end(ab);
705 } 661 }
662 audit_set_pid(ab, pid);
663 audit_log_end(ab);
706 } 664 }
707 break; 665 break;
708 case AUDIT_ADD: 666 case AUDIT_ADD:
709 case AUDIT_DEL: 667 case AUDIT_DEL:
710 if (nlmsg_len(nlh) < sizeof(struct audit_rule)) 668 if (nlmsg_len(nlh) < sizeof(struct audit_rule))
711 return -EINVAL; 669 return -EINVAL;
712 if (audit_enabled == 2) { 670 if (audit_enabled == AUDIT_LOCKED) {
713 ab = audit_log_start(NULL, GFP_KERNEL, 671 audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid,
714 AUDIT_CONFIG_CHANGE); 672 uid, loginuid, sid);
715 if (ab) { 673
716 audit_log_format(ab, 674 audit_log_format(ab, " audit_enabled=%d res=0",
717 "pid=%d uid=%u auid=%u", 675 audit_enabled);
718 pid, uid, loginuid); 676 audit_log_end(ab);
719 if (sid) {
720 if (selinux_sid_to_string(
721 sid, &ctx, &len)) {
722 audit_log_format(ab,
723 " ssid=%u", sid);
724 /* Maybe call audit_panic? */
725 } else
726 audit_log_format(ab,
727 " subj=%s", ctx);
728 kfree(ctx);
729 }
730 audit_log_format(ab, " audit_enabled=%d res=0",
731 audit_enabled);
732 audit_log_end(ab);
733 }
734 return -EPERM; 677 return -EPERM;
735 } 678 }
736 /* fallthrough */ 679 /* fallthrough */
@@ -743,28 +686,13 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
743 case AUDIT_DEL_RULE: 686 case AUDIT_DEL_RULE:
744 if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) 687 if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
745 return -EINVAL; 688 return -EINVAL;
746 if (audit_enabled == 2) { 689 if (audit_enabled == AUDIT_LOCKED) {
747 ab = audit_log_start(NULL, GFP_KERNEL, 690 audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid,
748 AUDIT_CONFIG_CHANGE); 691 uid, loginuid, sid);
749 if (ab) { 692
750 audit_log_format(ab, 693 audit_log_format(ab, " audit_enabled=%d res=0",
751 "pid=%d uid=%u auid=%u", 694 audit_enabled);
752 pid, uid, loginuid); 695 audit_log_end(ab);
753 if (sid) {
754 if (selinux_sid_to_string(
755 sid, &ctx, &len)) {
756 audit_log_format(ab,
757 " ssid=%u", sid);
758 /* Maybe call audit_panic? */
759 } else
760 audit_log_format(ab,
761 " subj=%s", ctx);
762 kfree(ctx);
763 }
764 audit_log_format(ab, " audit_enabled=%d res=0",
765 audit_enabled);
766 audit_log_end(ab);
767 }
768 return -EPERM; 696 return -EPERM;
769 } 697 }
770 /* fallthrough */ 698 /* fallthrough */
@@ -775,19 +703,10 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
775 break; 703 break;
776 case AUDIT_TRIM: 704 case AUDIT_TRIM:
777 audit_trim_trees(); 705 audit_trim_trees();
778 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 706
779 if (!ab) 707 audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid,
780 break; 708 uid, loginuid, sid);
781 audit_log_format(ab, "auid=%u", loginuid); 709
782 if (sid) {
783 u32 len;
784 ctx = NULL;
785 if (selinux_sid_to_string(sid, &ctx, &len))
786 audit_log_format(ab, " ssid=%u", sid);
787 else
788 audit_log_format(ab, " subj=%s", ctx);
789 kfree(ctx);
790 }
791 audit_log_format(ab, " op=trim res=1"); 710 audit_log_format(ab, " op=trim res=1");
792 audit_log_end(ab); 711 audit_log_end(ab);
793 break; 712 break;
@@ -817,22 +736,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
817 /* OK, here comes... */ 736 /* OK, here comes... */
818 err = audit_tag_tree(old, new); 737 err = audit_tag_tree(old, new);
819 738
820 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 739 audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid,
821 if (!ab) { 740 uid, loginuid, sid);
822 kfree(old); 741
823 kfree(new);
824 break;
825 }
826 audit_log_format(ab, "auid=%u", loginuid);
827 if (sid) {
828 u32 len;
829 ctx = NULL;
830 if (selinux_sid_to_string(sid, &ctx, &len))
831 audit_log_format(ab, " ssid=%u", sid);
832 else
833 audit_log_format(ab, " subj=%s", ctx);
834 kfree(ctx);
835 }
836 audit_log_format(ab, " op=make_equiv old="); 742 audit_log_format(ab, " op=make_equiv old=");
837 audit_log_untrustedstring(ab, old); 743 audit_log_untrustedstring(ab, old);
838 audit_log_format(ab, " new="); 744 audit_log_format(ab, " new=");
@@ -965,6 +871,7 @@ static int __init audit_init(void)
965 skb_queue_head_init(&audit_skb_queue); 871 skb_queue_head_init(&audit_skb_queue);
966 audit_initialized = 1; 872 audit_initialized = 1;
967 audit_enabled = audit_default; 873 audit_enabled = audit_default;
874 audit_ever_enabled |= !!audit_default;
968 875
969 /* Register the callback with selinux. This callback will be invoked 876 /* Register the callback with selinux. This callback will be invoked
970 * when a new policy is loaded. */ 877 * when a new policy is loaded. */
@@ -992,8 +899,10 @@ static int __init audit_enable(char *str)
992 printk(KERN_INFO "audit: %s%s\n", 899 printk(KERN_INFO "audit: %s%s\n",
993 audit_default ? "enabled" : "disabled", 900 audit_default ? "enabled" : "disabled",
994 audit_initialized ? "" : " (after initialization)"); 901 audit_initialized ? "" : " (after initialization)");
995 if (audit_initialized) 902 if (audit_initialized) {
996 audit_enabled = audit_default; 903 audit_enabled = audit_default;
904 audit_ever_enabled |= !!audit_default;
905 }
997 return 1; 906 return 1;
998} 907}
999 908
@@ -1130,7 +1039,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
1130{ 1039{
1131 struct audit_buffer *ab = NULL; 1040 struct audit_buffer *ab = NULL;
1132 struct timespec t; 1041 struct timespec t;
1133 unsigned int serial; 1042 unsigned int uninitialized_var(serial);
1134 int reserve; 1043 int reserve;
1135 unsigned long timeout_start = jiffies; 1044 unsigned long timeout_start = jiffies;
1136 1045
@@ -1164,7 +1073,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
1164 remove_wait_queue(&audit_backlog_wait, &wait); 1073 remove_wait_queue(&audit_backlog_wait, &wait);
1165 continue; 1074 continue;
1166 } 1075 }
1167 if (audit_rate_check()) 1076 if (audit_rate_check() && printk_ratelimit())
1168 printk(KERN_WARNING 1077 printk(KERN_WARNING
1169 "audit: audit_backlog=%d > " 1078 "audit: audit_backlog=%d > "
1170 "audit_backlog_limit=%d\n", 1079 "audit_backlog_limit=%d\n",
@@ -1200,13 +1109,17 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
1200static inline int audit_expand(struct audit_buffer *ab, int extra) 1109static inline int audit_expand(struct audit_buffer *ab, int extra)
1201{ 1110{
1202 struct sk_buff *skb = ab->skb; 1111 struct sk_buff *skb = ab->skb;
1203 int ret = pskb_expand_head(skb, skb_headroom(skb), extra, 1112 int oldtail = skb_tailroom(skb);
1204 ab->gfp_mask); 1113 int ret = pskb_expand_head(skb, 0, extra, ab->gfp_mask);
1114 int newtail = skb_tailroom(skb);
1115
1205 if (ret < 0) { 1116 if (ret < 0) {
1206 audit_log_lost("out of memory in audit_expand"); 1117 audit_log_lost("out of memory in audit_expand");
1207 return 0; 1118 return 0;
1208 } 1119 }
1209 return skb_tailroom(skb); 1120
1121 skb->truesize += newtail - oldtail;
1122 return newtail;
1210} 1123}
1211 1124
1212/* 1125/*
@@ -1245,6 +1158,7 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt,
1245 goto out; 1158 goto out;
1246 len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2); 1159 len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2);
1247 } 1160 }
1161 va_end(args2);
1248 if (len > 0) 1162 if (len > 0)
1249 skb_put(skb, len); 1163 skb_put(skb, len);
1250out: 1164out:
@@ -1346,6 +1260,21 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
1346} 1260}
1347 1261
1348/** 1262/**
1263 * audit_string_contains_control - does a string need to be logged in hex
1264 * @string - string to be checked
1265 * @len - max length of the string to check
1266 */
1267int audit_string_contains_control(const char *string, size_t len)
1268{
1269 const unsigned char *p;
1270 for (p = string; p < (const unsigned char *)string + len && *p; p++) {
1271 if (*p == '"' || *p < 0x21 || *p > 0x7f)
1272 return 1;
1273 }
1274 return 0;
1275}
1276
1277/**
1349 * audit_log_n_untrustedstring - log a string that may contain random characters 1278 * audit_log_n_untrustedstring - log a string that may contain random characters
1350 * @ab: audit_buffer 1279 * @ab: audit_buffer
1351 * @len: lenth of string (not including trailing null) 1280 * @len: lenth of string (not including trailing null)
@@ -1359,19 +1288,13 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
1359 * The caller specifies the number of characters in the string to log, which may 1288 * The caller specifies the number of characters in the string to log, which may
1360 * or may not be the entire string. 1289 * or may not be the entire string.
1361 */ 1290 */
1362const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, 1291void audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len,
1363 const char *string) 1292 const char *string)
1364{ 1293{
1365 const unsigned char *p; 1294 if (audit_string_contains_control(string, len))
1366 1295 audit_log_hex(ab, string, len);
1367 for (p = string; p < (const unsigned char *)string + len && *p; p++) { 1296 else
1368 if (*p == '"' || *p < 0x21 || *p > 0x7f) { 1297 audit_log_n_string(ab, len, string);
1369 audit_log_hex(ab, string, len);
1370 return string + len + 1;
1371 }
1372 }
1373 audit_log_n_string(ab, len, string);
1374 return p + 1;
1375} 1298}
1376 1299
1377/** 1300/**
@@ -1382,9 +1305,9 @@ const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len,
1382 * Same as audit_log_n_untrustedstring(), except that strlen is used to 1305 * Same as audit_log_n_untrustedstring(), except that strlen is used to
1383 * determine string length. 1306 * determine string length.
1384 */ 1307 */
1385const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) 1308void audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
1386{ 1309{
1387 return audit_log_n_untrustedstring(ab, strlen(string), string); 1310 audit_log_n_untrustedstring(ab, strlen(string), string);
1388} 1311}
1389 1312
1390/* This is a helper-function to print the escaped d_path */ 1313/* This is a helper-function to print the escaped d_path */
@@ -1433,8 +1356,11 @@ void audit_log_end(struct audit_buffer *ab)
1433 skb_queue_tail(&audit_skb_queue, ab->skb); 1356 skb_queue_tail(&audit_skb_queue, ab->skb);
1434 ab->skb = NULL; 1357 ab->skb = NULL;
1435 wake_up_interruptible(&kauditd_wait); 1358 wake_up_interruptible(&kauditd_wait);
1359 } else if (printk_ratelimit()) {
1360 struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
1361 printk(KERN_NOTICE "type=%d %s\n", nlh->nlmsg_type, ab->skb->data + NLMSG_SPACE(0));
1436 } else { 1362 } else {
1437 printk(KERN_NOTICE "%s\n", ab->skb->data + NLMSG_SPACE(0)); 1363 audit_log_lost("printk limit exceeded\n");
1438 } 1364 }
1439 } 1365 }
1440 audit_buffer_free(ab); 1366 audit_buffer_free(ab);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 5d96f2cc7be8..6f19fd477aac 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -95,6 +95,8 @@ extern struct inotify_handle *audit_ih;
95/* Inotify events we care about. */ 95/* Inotify events we care about. */
96#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF 96#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF
97 97
98extern int audit_enabled;
99
98void audit_free_parent(struct inotify_watch *i_watch) 100void audit_free_parent(struct inotify_watch *i_watch)
99{ 101{
100 struct audit_parent *parent; 102 struct audit_parent *parent;
@@ -974,7 +976,6 @@ static void audit_update_watch(struct audit_parent *parent,
974 struct audit_watch *owatch, *nwatch, *nextw; 976 struct audit_watch *owatch, *nwatch, *nextw;
975 struct audit_krule *r, *nextr; 977 struct audit_krule *r, *nextr;
976 struct audit_entry *oentry, *nentry; 978 struct audit_entry *oentry, *nentry;
977 struct audit_buffer *ab;
978 979
979 mutex_lock(&audit_filter_mutex); 980 mutex_lock(&audit_filter_mutex);
980 list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) { 981 list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
@@ -1014,13 +1015,18 @@ static void audit_update_watch(struct audit_parent *parent,
1014 call_rcu(&oentry->rcu, audit_free_rule_rcu); 1015 call_rcu(&oentry->rcu, audit_free_rule_rcu);
1015 } 1016 }
1016 1017
1017 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 1018 if (audit_enabled) {
1018 audit_log_format(ab, "op=updated rules specifying path="); 1019 struct audit_buffer *ab;
1019 audit_log_untrustedstring(ab, owatch->path); 1020 ab = audit_log_start(NULL, GFP_KERNEL,
1020 audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino); 1021 AUDIT_CONFIG_CHANGE);
1021 audit_log_format(ab, " list=%d res=1", r->listnr); 1022 audit_log_format(ab,
1022 audit_log_end(ab); 1023 "op=updated rules specifying path=");
1023 1024 audit_log_untrustedstring(ab, owatch->path);
1025 audit_log_format(ab, " with dev=%u ino=%lu\n",
1026 dev, ino);
1027 audit_log_format(ab, " list=%d res=1", r->listnr);
1028 audit_log_end(ab);
1029 }
1024 audit_remove_watch(owatch); 1030 audit_remove_watch(owatch);
1025 goto add_watch_to_parent; /* event applies to a single watch */ 1031 goto add_watch_to_parent; /* event applies to a single watch */
1026 } 1032 }
@@ -1039,25 +1045,28 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
1039 struct audit_watch *w, *nextw; 1045 struct audit_watch *w, *nextw;
1040 struct audit_krule *r, *nextr; 1046 struct audit_krule *r, *nextr;
1041 struct audit_entry *e; 1047 struct audit_entry *e;
1042 struct audit_buffer *ab;
1043 1048
1044 mutex_lock(&audit_filter_mutex); 1049 mutex_lock(&audit_filter_mutex);
1045 parent->flags |= AUDIT_PARENT_INVALID; 1050 parent->flags |= AUDIT_PARENT_INVALID;
1046 list_for_each_entry_safe(w, nextw, &parent->watches, wlist) { 1051 list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
1047 list_for_each_entry_safe(r, nextr, &w->rules, rlist) { 1052 list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
1048 e = container_of(r, struct audit_entry, rule); 1053 e = container_of(r, struct audit_entry, rule);
1049 1054 if (audit_enabled) {
1050 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 1055 struct audit_buffer *ab;
1051 audit_log_format(ab, "op=remove rule path="); 1056 ab = audit_log_start(NULL, GFP_KERNEL,
1052 audit_log_untrustedstring(ab, w->path); 1057 AUDIT_CONFIG_CHANGE);
1053 if (r->filterkey) { 1058 audit_log_format(ab, "op=remove rule path=");
1054 audit_log_format(ab, " key="); 1059 audit_log_untrustedstring(ab, w->path);
1055 audit_log_untrustedstring(ab, r->filterkey); 1060 if (r->filterkey) {
1056 } else 1061 audit_log_format(ab, " key=");
1057 audit_log_format(ab, " key=(null)"); 1062 audit_log_untrustedstring(ab,
1058 audit_log_format(ab, " list=%d res=1", r->listnr); 1063 r->filterkey);
1059 audit_log_end(ab); 1064 } else
1060 1065 audit_log_format(ab, " key=(null)");
1066 audit_log_format(ab, " list=%d res=1",
1067 r->listnr);
1068 audit_log_end(ab);
1069 }
1061 list_del(&r->rlist); 1070 list_del(&r->rlist);
1062 list_del_rcu(&e->list); 1071 list_del_rcu(&e->list);
1063 call_rcu(&e->rcu, audit_free_rule_rcu); 1072 call_rcu(&e->rcu, audit_free_rule_rcu);
@@ -1495,6 +1504,9 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action,
1495{ 1504{
1496 struct audit_buffer *ab; 1505 struct audit_buffer *ab;
1497 1506
1507 if (!audit_enabled)
1508 return;
1509
1498 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 1510 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
1499 if (!ab) 1511 if (!ab)
1500 return; 1512 return;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index bce9ecdb7712..1c06ecf38d7b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -70,6 +70,7 @@
70#include "audit.h" 70#include "audit.h"
71 71
72extern struct list_head audit_filter_list[]; 72extern struct list_head audit_filter_list[];
73extern int audit_ever_enabled;
73 74
74/* AUDIT_NAMES is the number of slots we reserve in the audit_context 75/* AUDIT_NAMES is the number of slots we reserve in the audit_context
75 * for saving names from getname(). */ 76 * for saving names from getname(). */
@@ -78,6 +79,9 @@ extern struct list_head audit_filter_list[];
78/* Indicates that audit should log the full pathname. */ 79/* Indicates that audit should log the full pathname. */
79#define AUDIT_NAME_FULL -1 80#define AUDIT_NAME_FULL -1
80 81
82/* no execve audit message should be longer than this (userspace limits) */
83#define MAX_EXECVE_AUDIT_LEN 7500
84
81/* number of audit rules */ 85/* number of audit rules */
82int audit_n_rules; 86int audit_n_rules;
83 87
@@ -176,7 +180,11 @@ struct audit_aux_data_fd_pair {
176struct audit_aux_data_pids { 180struct audit_aux_data_pids {
177 struct audit_aux_data d; 181 struct audit_aux_data d;
178 pid_t target_pid[AUDIT_AUX_PIDS]; 182 pid_t target_pid[AUDIT_AUX_PIDS];
183 uid_t target_auid[AUDIT_AUX_PIDS];
184 uid_t target_uid[AUDIT_AUX_PIDS];
185 unsigned int target_sessionid[AUDIT_AUX_PIDS];
179 u32 target_sid[AUDIT_AUX_PIDS]; 186 u32 target_sid[AUDIT_AUX_PIDS];
187 char target_comm[AUDIT_AUX_PIDS][TASK_COMM_LEN];
180 int pid_count; 188 int pid_count;
181}; 189};
182 190
@@ -192,7 +200,6 @@ struct audit_context {
192 enum audit_state state; 200 enum audit_state state;
193 unsigned int serial; /* serial number for record */ 201 unsigned int serial; /* serial number for record */
194 struct timespec ctime; /* time of syscall entry */ 202 struct timespec ctime; /* time of syscall entry */
195 uid_t loginuid; /* login uid (identity) */
196 int major; /* syscall number */ 203 int major; /* syscall number */
197 unsigned long argv[4]; /* syscall arguments */ 204 unsigned long argv[4]; /* syscall arguments */
198 int return_valid; /* return code is valid */ 205 int return_valid; /* return code is valid */
@@ -215,7 +222,11 @@ struct audit_context {
215 int arch; 222 int arch;
216 223
217 pid_t target_pid; 224 pid_t target_pid;
225 uid_t target_auid;
226 uid_t target_uid;
227 unsigned int target_sessionid;
218 u32 target_sid; 228 u32 target_sid;
229 char target_comm[TASK_COMM_LEN];
219 230
220 struct audit_tree_refs *trees, *first_trees; 231 struct audit_tree_refs *trees, *first_trees;
221 int tree_count; 232 int tree_count;
@@ -506,7 +517,7 @@ static int audit_filter_rules(struct task_struct *tsk,
506 case AUDIT_LOGINUID: 517 case AUDIT_LOGINUID:
507 result = 0; 518 result = 0;
508 if (ctx) 519 if (ctx)
509 result = audit_comparator(ctx->loginuid, f->op, f->val); 520 result = audit_comparator(tsk->loginuid, f->op, f->val);
510 break; 521 break;
511 case AUDIT_SUBJ_USER: 522 case AUDIT_SUBJ_USER:
512 case AUDIT_SUBJ_ROLE: 523 case AUDIT_SUBJ_ROLE:
@@ -702,7 +713,24 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
702 if (likely(!context)) 713 if (likely(!context))
703 return NULL; 714 return NULL;
704 context->return_valid = return_valid; 715 context->return_valid = return_valid;
705 context->return_code = return_code; 716
717 /*
718 * we need to fix up the return code in the audit logs if the actual
719 * return codes are later going to be fixed up by the arch specific
720 * signal handlers
721 *
722 * This is actually a test for:
723 * (rc == ERESTARTSYS ) || (rc == ERESTARTNOINTR) ||
724 * (rc == ERESTARTNOHAND) || (rc == ERESTART_RESTARTBLOCK)
725 *
726 * but is faster than a bunch of ||
727 */
728 if (unlikely(return_code <= -ERESTARTSYS) &&
729 (return_code >= -ERESTART_RESTARTBLOCK) &&
730 (return_code != -ENOIOCTLCMD))
731 context->return_code = -EINTR;
732 else
733 context->return_code = return_code;
706 734
707 if (context->in_syscall && !context->dummy && !context->auditable) { 735 if (context->in_syscall && !context->dummy && !context->auditable) {
708 enum audit_state state; 736 enum audit_state state;
@@ -783,11 +811,8 @@ static inline void audit_free_aux(struct audit_context *context)
783static inline void audit_zero_context(struct audit_context *context, 811static inline void audit_zero_context(struct audit_context *context,
784 enum audit_state state) 812 enum audit_state state)
785{ 813{
786 uid_t loginuid = context->loginuid;
787
788 memset(context, 0, sizeof(*context)); 814 memset(context, 0, sizeof(*context));
789 context->state = state; 815 context->state = state;
790 context->loginuid = loginuid;
791} 816}
792 817
793static inline struct audit_context *audit_alloc_context(enum audit_state state) 818static inline struct audit_context *audit_alloc_context(enum audit_state state)
@@ -814,7 +839,7 @@ int audit_alloc(struct task_struct *tsk)
814 struct audit_context *context; 839 struct audit_context *context;
815 enum audit_state state; 840 enum audit_state state;
816 841
817 if (likely(!audit_enabled)) 842 if (likely(!audit_ever_enabled))
818 return 0; /* Return if not auditing. */ 843 return 0; /* Return if not auditing. */
819 844
820 state = audit_filter_task(tsk); 845 state = audit_filter_task(tsk);
@@ -826,11 +851,6 @@ int audit_alloc(struct task_struct *tsk)
826 return -ENOMEM; 851 return -ENOMEM;
827 } 852 }
828 853
829 /* Preserve login uid */
830 context->loginuid = -1;
831 if (current->audit_context)
832 context->loginuid = current->audit_context->loginuid;
833
834 tsk->audit_context = context; 854 tsk->audit_context = context;
835 set_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT); 855 set_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT);
836 return 0; 856 return 0;
@@ -922,7 +942,8 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk
922} 942}
923 943
924static int audit_log_pid_context(struct audit_context *context, pid_t pid, 944static int audit_log_pid_context(struct audit_context *context, pid_t pid,
925 u32 sid) 945 uid_t auid, uid_t uid, unsigned int sessionid,
946 u32 sid, char *comm)
926{ 947{
927 struct audit_buffer *ab; 948 struct audit_buffer *ab;
928 char *s = NULL; 949 char *s = NULL;
@@ -931,68 +952,204 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid,
931 952
932 ab = audit_log_start(context, GFP_KERNEL, AUDIT_OBJ_PID); 953 ab = audit_log_start(context, GFP_KERNEL, AUDIT_OBJ_PID);
933 if (!ab) 954 if (!ab)
934 return 1; 955 return rc;
935 956
957 audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid, auid,
958 uid, sessionid);
936 if (selinux_sid_to_string(sid, &s, &len)) { 959 if (selinux_sid_to_string(sid, &s, &len)) {
937 audit_log_format(ab, "opid=%d obj=(none)", pid); 960 audit_log_format(ab, " obj=(none)");
938 rc = 1; 961 rc = 1;
939 } else 962 } else
940 audit_log_format(ab, "opid=%d obj=%s", pid, s); 963 audit_log_format(ab, " obj=%s", s);
964 audit_log_format(ab, " ocomm=");
965 audit_log_untrustedstring(ab, comm);
941 audit_log_end(ab); 966 audit_log_end(ab);
942 kfree(s); 967 kfree(s);
943 968
944 return rc; 969 return rc;
945} 970}
946 971
947static void audit_log_execve_info(struct audit_buffer *ab, 972/*
948 struct audit_aux_data_execve *axi) 973 * to_send and len_sent accounting are very loose estimates. We aren't
974 * really worried about a hard cap to MAX_EXECVE_AUDIT_LEN so much as being
975 * within about 500 bytes (next page boundry)
976 *
977 * why snprintf? an int is up to 12 digits long. if we just assumed when
978 * logging that a[%d]= was going to be 16 characters long we would be wasting
979 * space in every audit message. In one 7500 byte message we can log up to
980 * about 1000 min size arguments. That comes down to about 50% waste of space
981 * if we didn't do the snprintf to find out how long arg_num_len was.
982 */
983static int audit_log_single_execve_arg(struct audit_context *context,
984 struct audit_buffer **ab,
985 int arg_num,
986 size_t *len_sent,
987 const char __user *p,
988 char *buf)
949{ 989{
950 int i; 990 char arg_num_len_buf[12];
951 long len, ret; 991 const char __user *tmp_p = p;
952 const char __user *p; 992 /* how many digits are in arg_num? 3 is the length of a=\n */
953 char *buf; 993 size_t arg_num_len = snprintf(arg_num_len_buf, 12, "%d", arg_num) + 3;
994 size_t len, len_left, to_send;
995 size_t max_execve_audit_len = MAX_EXECVE_AUDIT_LEN;
996 unsigned int i, has_cntl = 0, too_long = 0;
997 int ret;
998
999 /* strnlen_user includes the null we don't want to send */
1000 len_left = len = strnlen_user(p, MAX_ARG_STRLEN) - 1;
954 1001
955 if (axi->mm != current->mm) 1002 /*
956 return; /* execve failed, no additional info */ 1003 * We just created this mm, if we can't find the strings
957 1004 * we just copied into it something is _very_ wrong. Similar
958 p = (const char __user *)axi->mm->arg_start; 1005 * for strings that are too long, we should not have created
1006 * any.
1007 */
1008 if (unlikely((len = -1) || len > MAX_ARG_STRLEN - 1)) {
1009 WARN_ON(1);
1010 send_sig(SIGKILL, current, 0);
1011 }
959 1012
960 for (i = 0; i < axi->argc; i++, p += len) { 1013 /* walk the whole argument looking for non-ascii chars */
961 len = strnlen_user(p, MAX_ARG_STRLEN); 1014 do {
1015 if (len_left > MAX_EXECVE_AUDIT_LEN)
1016 to_send = MAX_EXECVE_AUDIT_LEN;
1017 else
1018 to_send = len_left;
1019 ret = copy_from_user(buf, tmp_p, to_send);
962 /* 1020 /*
963 * We just created this mm, if we can't find the strings 1021 * There is no reason for this copy to be short. We just
964 * we just copied into it something is _very_ wrong. Similar 1022 * copied them here, and the mm hasn't been exposed to user-
965 * for strings that are too long, we should not have created 1023 * space yet.
966 * any.
967 */ 1024 */
968 if (!len || len > MAX_ARG_STRLEN) { 1025 if (ret) {
969 WARN_ON(1); 1026 WARN_ON(1);
970 send_sig(SIGKILL, current, 0); 1027 send_sig(SIGKILL, current, 0);
971 } 1028 }
972 1029 buf[to_send] = '\0';
973 buf = kmalloc(len, GFP_KERNEL); 1030 has_cntl = audit_string_contains_control(buf, to_send);
974 if (!buf) { 1031 if (has_cntl) {
975 audit_panic("out of memory for argv string\n"); 1032 /*
1033 * hex messages get logged as 2 bytes, so we can only
1034 * send half as much in each message
1035 */
1036 max_execve_audit_len = MAX_EXECVE_AUDIT_LEN / 2;
976 break; 1037 break;
977 } 1038 }
1039 len_left -= to_send;
1040 tmp_p += to_send;
1041 } while (len_left > 0);
1042
1043 len_left = len;
1044
1045 if (len > max_execve_audit_len)
1046 too_long = 1;
1047
1048 /* rewalk the argument actually logging the message */
1049 for (i = 0; len_left > 0; i++) {
1050 int room_left;
1051
1052 if (len_left > max_execve_audit_len)
1053 to_send = max_execve_audit_len;
1054 else
1055 to_send = len_left;
1056
1057 /* do we have space left to send this argument in this ab? */
1058 room_left = MAX_EXECVE_AUDIT_LEN - arg_num_len - *len_sent;
1059 if (has_cntl)
1060 room_left -= (to_send * 2);
1061 else
1062 room_left -= to_send;
1063 if (room_left < 0) {
1064 *len_sent = 0;
1065 audit_log_end(*ab);
1066 *ab = audit_log_start(context, GFP_KERNEL, AUDIT_EXECVE);
1067 if (!*ab)
1068 return 0;
1069 }
978 1070
979 ret = copy_from_user(buf, p, len);
980 /* 1071 /*
981 * There is no reason for this copy to be short. We just 1072 * first record needs to say how long the original string was
982 * copied them here, and the mm hasn't been exposed to user- 1073 * so we can be sure nothing was lost.
983 * space yet. 1074 */
1075 if ((i == 0) && (too_long))
1076 audit_log_format(*ab, "a%d_len=%ld ", arg_num,
1077 has_cntl ? 2*len : len);
1078
1079 /*
1080 * normally arguments are small enough to fit and we already
1081 * filled buf above when we checked for control characters
1082 * so don't bother with another copy_from_user
984 */ 1083 */
1084 if (len >= max_execve_audit_len)
1085 ret = copy_from_user(buf, p, to_send);
1086 else
1087 ret = 0;
985 if (ret) { 1088 if (ret) {
986 WARN_ON(1); 1089 WARN_ON(1);
987 send_sig(SIGKILL, current, 0); 1090 send_sig(SIGKILL, current, 0);
988 } 1091 }
1092 buf[to_send] = '\0';
1093
1094 /* actually log it */
1095 audit_log_format(*ab, "a%d", arg_num);
1096 if (too_long)
1097 audit_log_format(*ab, "[%d]", i);
1098 audit_log_format(*ab, "=");
1099 if (has_cntl)
1100 audit_log_hex(*ab, buf, to_send);
1101 else
1102 audit_log_format(*ab, "\"%s\"", buf);
1103 audit_log_format(*ab, "\n");
1104
1105 p += to_send;
1106 len_left -= to_send;
1107 *len_sent += arg_num_len;
1108 if (has_cntl)
1109 *len_sent += to_send * 2;
1110 else
1111 *len_sent += to_send;
1112 }
1113 /* include the null we didn't log */
1114 return len + 1;
1115}
989 1116
990 audit_log_format(ab, "a%d=", i); 1117static void audit_log_execve_info(struct audit_context *context,
991 audit_log_untrustedstring(ab, buf); 1118 struct audit_buffer **ab,
992 audit_log_format(ab, "\n"); 1119 struct audit_aux_data_execve *axi)
1120{
1121 int i;
1122 size_t len, len_sent = 0;
1123 const char __user *p;
1124 char *buf;
1125
1126 if (axi->mm != current->mm)
1127 return; /* execve failed, no additional info */
1128
1129 p = (const char __user *)axi->mm->arg_start;
1130
1131 audit_log_format(*ab, "argc=%d ", axi->argc);
1132
1133 /*
1134 * we need some kernel buffer to hold the userspace args. Just
1135 * allocate one big one rather than allocating one of the right size
1136 * for every single argument inside audit_log_single_execve_arg()
1137 * should be <8k allocation so should be pretty safe.
1138 */
1139 buf = kmalloc(MAX_EXECVE_AUDIT_LEN + 1, GFP_KERNEL);
1140 if (!buf) {
1141 audit_panic("out of memory for argv string\n");
1142 return;
1143 }
993 1144
994 kfree(buf); 1145 for (i = 0; i < axi->argc; i++) {
1146 len = audit_log_single_execve_arg(context, ab, i,
1147 &len_sent, p, buf);
1148 if (len <= 0)
1149 break;
1150 p += len;
995 } 1151 }
1152 kfree(buf);
996} 1153}
997 1154
998static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) 1155static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
@@ -1039,7 +1196,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
1039 " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" 1196 " a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
1040 " ppid=%d pid=%d auid=%u uid=%u gid=%u" 1197 " ppid=%d pid=%d auid=%u uid=%u gid=%u"
1041 " euid=%u suid=%u fsuid=%u" 1198 " euid=%u suid=%u fsuid=%u"
1042 " egid=%u sgid=%u fsgid=%u tty=%s", 1199 " egid=%u sgid=%u fsgid=%u tty=%s ses=%u",
1043 context->argv[0], 1200 context->argv[0],
1044 context->argv[1], 1201 context->argv[1],
1045 context->argv[2], 1202 context->argv[2],
@@ -1047,11 +1204,12 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
1047 context->name_count, 1204 context->name_count,
1048 context->ppid, 1205 context->ppid,
1049 context->pid, 1206 context->pid,
1050 context->loginuid, 1207 tsk->loginuid,
1051 context->uid, 1208 context->uid,
1052 context->gid, 1209 context->gid,
1053 context->euid, context->suid, context->fsuid, 1210 context->euid, context->suid, context->fsuid,
1054 context->egid, context->sgid, context->fsgid, tty); 1211 context->egid, context->sgid, context->fsgid, tty,
1212 tsk->sessionid);
1055 1213
1056 mutex_unlock(&tty_mutex); 1214 mutex_unlock(&tty_mutex);
1057 1215
@@ -1135,7 +1293,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
1135 1293
1136 case AUDIT_EXECVE: { 1294 case AUDIT_EXECVE: {
1137 struct audit_aux_data_execve *axi = (void *)aux; 1295 struct audit_aux_data_execve *axi = (void *)aux;
1138 audit_log_execve_info(ab, axi); 1296 audit_log_execve_info(context, &ab, axi);
1139 break; } 1297 break; }
1140 1298
1141 case AUDIT_SOCKETCALL: { 1299 case AUDIT_SOCKETCALL: {
@@ -1168,13 +1326,19 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
1168 1326
1169 for (i = 0; i < axs->pid_count; i++) 1327 for (i = 0; i < axs->pid_count; i++)
1170 if (audit_log_pid_context(context, axs->target_pid[i], 1328 if (audit_log_pid_context(context, axs->target_pid[i],
1171 axs->target_sid[i])) 1329 axs->target_auid[i],
1330 axs->target_uid[i],
1331 axs->target_sessionid[i],
1332 axs->target_sid[i],
1333 axs->target_comm[i]))
1172 call_panic = 1; 1334 call_panic = 1;
1173 } 1335 }
1174 1336
1175 if (context->target_pid && 1337 if (context->target_pid &&
1176 audit_log_pid_context(context, context->target_pid, 1338 audit_log_pid_context(context, context->target_pid,
1177 context->target_sid)) 1339 context->target_auid, context->target_uid,
1340 context->target_sessionid,
1341 context->target_sid, context->target_comm))
1178 call_panic = 1; 1342 call_panic = 1;
1179 1343
1180 if (context->pwd && context->pwdmnt) { 1344 if (context->pwd && context->pwdmnt) {
@@ -1242,6 +1406,11 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
1242 1406
1243 audit_log_end(ab); 1407 audit_log_end(ab);
1244 } 1408 }
1409
1410 /* Send end of event record to help user space know we are finished */
1411 ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
1412 if (ab)
1413 audit_log_end(ab);
1245 if (call_panic) 1414 if (call_panic)
1246 audit_panic("error converting sid to string"); 1415 audit_panic("error converting sid to string");
1247} 1416}
@@ -1766,6 +1935,9 @@ void auditsc_get_stamp(struct audit_context *ctx,
1766 ctx->auditable = 1; 1935 ctx->auditable = 1;
1767} 1936}
1768 1937
1938/* global counter which is incremented every time something logs in */
1939static atomic_t session_id = ATOMIC_INIT(0);
1940
1769/** 1941/**
1770 * audit_set_loginuid - set a task's audit_context loginuid 1942 * audit_set_loginuid - set a task's audit_context loginuid
1771 * @task: task whose audit context is being modified 1943 * @task: task whose audit context is being modified
@@ -1777,41 +1949,29 @@ void auditsc_get_stamp(struct audit_context *ctx,
1777 */ 1949 */
1778int audit_set_loginuid(struct task_struct *task, uid_t loginuid) 1950int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
1779{ 1951{
1952 unsigned int sessionid = atomic_inc_return(&session_id);
1780 struct audit_context *context = task->audit_context; 1953 struct audit_context *context = task->audit_context;
1781 1954
1782 if (context) { 1955 if (context && context->in_syscall) {
1783 /* Only log if audit is enabled */ 1956 struct audit_buffer *ab;
1784 if (context->in_syscall) { 1957
1785 struct audit_buffer *ab; 1958 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
1786 1959 if (ab) {
1787 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); 1960 audit_log_format(ab, "login pid=%d uid=%u "
1788 if (ab) { 1961 "old auid=%u new auid=%u"
1789 audit_log_format(ab, "login pid=%d uid=%u " 1962 " old ses=%u new ses=%u",
1790 "old auid=%u new auid=%u", 1963 task->pid, task->uid,
1791 task->pid, task->uid, 1964 task->loginuid, loginuid,
1792 context->loginuid, loginuid); 1965 task->sessionid, sessionid);
1793 audit_log_end(ab); 1966 audit_log_end(ab);
1794 }
1795 } 1967 }
1796 context->loginuid = loginuid;
1797 } 1968 }
1969 task->sessionid = sessionid;
1970 task->loginuid = loginuid;
1798 return 0; 1971 return 0;
1799} 1972}
1800 1973
1801/** 1974/**
1802 * audit_get_loginuid - get the loginuid for an audit_context
1803 * @ctx: the audit_context
1804 *
1805 * Returns the context's loginuid or -1 if @ctx is NULL.
1806 */
1807uid_t audit_get_loginuid(struct audit_context *ctx)
1808{
1809 return ctx ? ctx->loginuid : -1;
1810}
1811
1812EXPORT_SYMBOL(audit_get_loginuid);
1813
1814/**
1815 * __audit_mq_open - record audit data for a POSIX MQ open 1975 * __audit_mq_open - record audit data for a POSIX MQ open
1816 * @oflag: open flag 1976 * @oflag: open flag
1817 * @mode: mode bits 1977 * @mode: mode bits
@@ -2070,8 +2230,6 @@ int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode
2070 return 0; 2230 return 0;
2071} 2231}
2072 2232
2073int audit_argv_kb = 32;
2074
2075int audit_bprm(struct linux_binprm *bprm) 2233int audit_bprm(struct linux_binprm *bprm)
2076{ 2234{
2077 struct audit_aux_data_execve *ax; 2235 struct audit_aux_data_execve *ax;
@@ -2080,14 +2238,6 @@ int audit_bprm(struct linux_binprm *bprm)
2080 if (likely(!audit_enabled || !context || context->dummy)) 2238 if (likely(!audit_enabled || !context || context->dummy))
2081 return 0; 2239 return 0;
2082 2240
2083 /*
2084 * Even though the stack code doesn't limit the arg+env size any more,
2085 * the audit code requires that _all_ arguments be logged in a single
2086 * netlink skb. Hence cap it :-(
2087 */
2088 if (bprm->argv_len > (audit_argv_kb << 10))
2089 return -E2BIG;
2090
2091 ax = kmalloc(sizeof(*ax), GFP_KERNEL); 2241 ax = kmalloc(sizeof(*ax), GFP_KERNEL);
2092 if (!ax) 2242 if (!ax)
2093 return -ENOMEM; 2243 return -ENOMEM;
@@ -2193,7 +2343,11 @@ void __audit_ptrace(struct task_struct *t)
2193 struct audit_context *context = current->audit_context; 2343 struct audit_context *context = current->audit_context;
2194 2344
2195 context->target_pid = t->pid; 2345 context->target_pid = t->pid;
2346 context->target_auid = audit_get_loginuid(t);
2347 context->target_uid = t->uid;
2348 context->target_sessionid = audit_get_sessionid(t);
2196 selinux_get_task_sid(t, &context->target_sid); 2349 selinux_get_task_sid(t, &context->target_sid);
2350 memcpy(context->target_comm, t->comm, TASK_COMM_LEN);
2197} 2351}
2198 2352
2199/** 2353/**
@@ -2216,8 +2370,8 @@ int __audit_signal_info(int sig, struct task_struct *t)
2216 if (audit_pid && t->tgid == audit_pid) { 2370 if (audit_pid && t->tgid == audit_pid) {
2217 if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) { 2371 if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) {
2218 audit_sig_pid = tsk->pid; 2372 audit_sig_pid = tsk->pid;
2219 if (ctx) 2373 if (tsk->loginuid != -1)
2220 audit_sig_uid = ctx->loginuid; 2374 audit_sig_uid = tsk->loginuid;
2221 else 2375 else
2222 audit_sig_uid = tsk->uid; 2376 audit_sig_uid = tsk->uid;
2223 selinux_get_task_sid(tsk, &audit_sig_sid); 2377 selinux_get_task_sid(tsk, &audit_sig_sid);
@@ -2230,7 +2384,11 @@ int __audit_signal_info(int sig, struct task_struct *t)
2230 * in audit_context */ 2384 * in audit_context */
2231 if (!ctx->target_pid) { 2385 if (!ctx->target_pid) {
2232 ctx->target_pid = t->tgid; 2386 ctx->target_pid = t->tgid;
2387 ctx->target_auid = audit_get_loginuid(t);
2388 ctx->target_uid = t->uid;
2389 ctx->target_sessionid = audit_get_sessionid(t);
2233 selinux_get_task_sid(t, &ctx->target_sid); 2390 selinux_get_task_sid(t, &ctx->target_sid);
2391 memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN);
2234 return 0; 2392 return 0;
2235 } 2393 }
2236 2394
@@ -2247,7 +2405,11 @@ int __audit_signal_info(int sig, struct task_struct *t)
2247 BUG_ON(axp->pid_count >= AUDIT_AUX_PIDS); 2405 BUG_ON(axp->pid_count >= AUDIT_AUX_PIDS);
2248 2406
2249 axp->target_pid[axp->pid_count] = t->tgid; 2407 axp->target_pid[axp->pid_count] = t->tgid;
2408 axp->target_auid[axp->pid_count] = audit_get_loginuid(t);
2409 axp->target_uid[axp->pid_count] = t->uid;
2410 axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t);
2250 selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]); 2411 selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]);
2412 memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN);
2251 axp->pid_count++; 2413 axp->pid_count++;
2252 2414
2253 return 0; 2415 return 0;
@@ -2264,6 +2426,8 @@ void audit_core_dumps(long signr)
2264{ 2426{
2265 struct audit_buffer *ab; 2427 struct audit_buffer *ab;
2266 u32 sid; 2428 u32 sid;
2429 uid_t auid = audit_get_loginuid(current);
2430 unsigned int sessionid = audit_get_sessionid(current);
2267 2431
2268 if (!audit_enabled) 2432 if (!audit_enabled)
2269 return; 2433 return;
@@ -2272,9 +2436,8 @@ void audit_core_dumps(long signr)
2272 return; 2436 return;
2273 2437
2274 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); 2438 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
2275 audit_log_format(ab, "auid=%u uid=%u gid=%u", 2439 audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u",
2276 audit_get_loginuid(current->audit_context), 2440 auid, current->uid, current->gid, sessionid);
2277 current->uid, current->gid);
2278 selinux_get_task_sid(current, &sid); 2441 selinux_get_task_sid(current, &sid);
2279 if (sid) { 2442 if (sid) {
2280 char *ctx = NULL; 2443 char *ctx = NULL;
diff --git a/kernel/exit.c b/kernel/exit.c
index 549c0558ba68..bfb1c0e940e8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -249,7 +249,7 @@ static int has_stopped_jobs(struct pid *pgrp)
249 struct task_struct *p; 249 struct task_struct *p;
250 250
251 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 251 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
252 if (p->state != TASK_STOPPED) 252 if (!task_is_stopped(p))
253 continue; 253 continue;
254 retval = 1; 254 retval = 1;
255 break; 255 break;
@@ -614,7 +614,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
614 p->parent = p->real_parent; 614 p->parent = p->real_parent;
615 add_parent(p); 615 add_parent(p);
616 616
617 if (p->state == TASK_TRACED) { 617 if (task_is_traced(p)) {
618 /* 618 /*
619 * If it was at a trace stop, turn it into 619 * If it was at a trace stop, turn it into
620 * a normal stop since it's no longer being 620 * a normal stop since it's no longer being
@@ -1563,60 +1563,51 @@ repeat:
1563 } 1563 }
1564 allowed = 1; 1564 allowed = 1;
1565 1565
1566 switch (p->state) { 1566 if (task_is_stopped_or_traced(p)) {
1567 case TASK_TRACED:
1568 /*
1569 * When we hit the race with PTRACE_ATTACH,
1570 * we will not report this child. But the
1571 * race means it has not yet been moved to
1572 * our ptrace_children list, so we need to
1573 * set the flag here to avoid a spurious ECHILD
1574 * when the race happens with the only child.
1575 */
1576 flag = 1;
1577 if (!my_ptrace_child(p))
1578 continue;
1579 /*FALLTHROUGH*/
1580 case TASK_STOPPED:
1581 /* 1567 /*
1582 * It's stopped now, so it might later 1568 * It's stopped now, so it might later
1583 * continue, exit, or stop again. 1569 * continue, exit, or stop again.
1570 *
1571 * When we hit the race with PTRACE_ATTACH, we
1572 * will not report this child. But the race
1573 * means it has not yet been moved to our
1574 * ptrace_children list, so we need to set the
1575 * flag here to avoid a spurious ECHILD when
1576 * the race happens with the only child.
1584 */ 1577 */
1585 flag = 1; 1578 flag = 1;
1586 if (!(options & WUNTRACED) && 1579
1587 !my_ptrace_child(p)) 1580 if (!my_ptrace_child(p)) {
1588 continue; 1581 if (task_is_traced(p))
1582 continue;
1583 if (!(options & WUNTRACED))
1584 continue;
1585 }
1586
1589 retval = wait_task_stopped(p, ret == 2, 1587 retval = wait_task_stopped(p, ret == 2,
1590 (options & WNOWAIT), 1588 (options & WNOWAIT), infop,
1591 infop, 1589 stat_addr, ru);
1592 stat_addr, ru);
1593 if (retval == -EAGAIN) 1590 if (retval == -EAGAIN)
1594 goto repeat; 1591 goto repeat;
1595 if (retval != 0) /* He released the lock. */ 1592 if (retval != 0) /* He released the lock. */
1596 goto end; 1593 goto end;
1597 break; 1594 } else if (p->exit_state == EXIT_DEAD) {
1598 default: 1595 continue;
1599 // case EXIT_DEAD: 1596 } else if (p->exit_state == EXIT_ZOMBIE) {
1600 if (p->exit_state == EXIT_DEAD) 1597 /*
1598 * Eligible but we cannot release it yet:
1599 */
1600 if (ret == 2)
1601 goto check_continued;
1602 if (!likely(options & WEXITED))
1601 continue; 1603 continue;
1602 // case EXIT_ZOMBIE: 1604 retval = wait_task_zombie(p,
1603 if (p->exit_state == EXIT_ZOMBIE) { 1605 (options & WNOWAIT), infop,
1604 /* 1606 stat_addr, ru);
1605 * Eligible but we cannot release 1607 /* He released the lock. */
1606 * it yet: 1608 if (retval != 0)
1607 */ 1609 goto end;
1608 if (ret == 2) 1610 } else {
1609 goto check_continued;
1610 if (!likely(options & WEXITED))
1611 continue;
1612 retval = wait_task_zombie(
1613 p, (options & WNOWAIT),
1614 infop, stat_addr, ru);
1615 /* He released the lock. */
1616 if (retval != 0)
1617 goto end;
1618 break;
1619 }
1620check_continued: 1611check_continued:
1621 /* 1612 /*
1622 * It's running now, so it might later 1613 * It's running now, so it might later
@@ -1625,12 +1616,11 @@ check_continued:
1625 flag = 1; 1616 flag = 1;
1626 if (!unlikely(options & WCONTINUED)) 1617 if (!unlikely(options & WCONTINUED))
1627 continue; 1618 continue;
1628 retval = wait_task_continued( 1619 retval = wait_task_continued(p,
1629 p, (options & WNOWAIT), 1620 (options & WNOWAIT), infop,
1630 infop, stat_addr, ru); 1621 stat_addr, ru);
1631 if (retval != 0) /* He released the lock. */ 1622 if (retval != 0) /* He released the lock. */
1632 goto end; 1623 goto end;
1633 break;
1634 } 1624 }
1635 } 1625 }
1636 if (!flag) { 1626 if (!flag) {
diff --git a/kernel/fork.c b/kernel/fork.c
index 314f5101d2b0..05e0b6f4365b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -393,6 +393,7 @@ void fastcall __mmdrop(struct mm_struct *mm)
393 destroy_context(mm); 393 destroy_context(mm);
394 free_mm(mm); 394 free_mm(mm);
395} 395}
396EXPORT_SYMBOL_GPL(__mmdrop);
396 397
397/* 398/*
398 * Decrement the use count and release all resources for an mm. 399 * Decrement the use count and release all resources for an mm.
diff --git a/kernel/futex.c b/kernel/futex.c
index db9824de8bf0..a6baaec44b8f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -109,6 +109,9 @@ struct futex_q {
109 /* Optional priority inheritance state: */ 109 /* Optional priority inheritance state: */
110 struct futex_pi_state *pi_state; 110 struct futex_pi_state *pi_state;
111 struct task_struct *task; 111 struct task_struct *task;
112
113 /* Bitset for the optional bitmasked wakeup */
114 u32 bitset;
112}; 115};
113 116
114/* 117/*
@@ -722,7 +725,7 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
722 * to this virtual address: 725 * to this virtual address:
723 */ 726 */
724static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, 727static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
725 int nr_wake) 728 int nr_wake, u32 bitset)
726{ 729{
727 struct futex_hash_bucket *hb; 730 struct futex_hash_bucket *hb;
728 struct futex_q *this, *next; 731 struct futex_q *this, *next;
@@ -730,6 +733,9 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
730 union futex_key key; 733 union futex_key key;
731 int ret; 734 int ret;
732 735
736 if (!bitset)
737 return -EINVAL;
738
733 futex_lock_mm(fshared); 739 futex_lock_mm(fshared);
734 740
735 ret = get_futex_key(uaddr, fshared, &key); 741 ret = get_futex_key(uaddr, fshared, &key);
@@ -746,6 +752,11 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
746 ret = -EINVAL; 752 ret = -EINVAL;
747 break; 753 break;
748 } 754 }
755
756 /* Check if one of the bits is set in both bitsets */
757 if (!(this->bitset & bitset))
758 continue;
759
749 wake_futex(this); 760 wake_futex(this);
750 if (++ret >= nr_wake) 761 if (++ret >= nr_wake)
751 break; 762 break;
@@ -1156,7 +1167,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1156static long futex_wait_restart(struct restart_block *restart); 1167static long futex_wait_restart(struct restart_block *restart);
1157 1168
1158static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, 1169static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1159 u32 val, ktime_t *abs_time) 1170 u32 val, ktime_t *abs_time, u32 bitset)
1160{ 1171{
1161 struct task_struct *curr = current; 1172 struct task_struct *curr = current;
1162 DECLARE_WAITQUEUE(wait, curr); 1173 DECLARE_WAITQUEUE(wait, curr);
@@ -1167,7 +1178,11 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1167 struct hrtimer_sleeper t; 1178 struct hrtimer_sleeper t;
1168 int rem = 0; 1179 int rem = 0;
1169 1180
1181 if (!bitset)
1182 return -EINVAL;
1183
1170 q.pi_state = NULL; 1184 q.pi_state = NULL;
1185 q.bitset = bitset;
1171 retry: 1186 retry:
1172 futex_lock_mm(fshared); 1187 futex_lock_mm(fshared);
1173 1188
@@ -1252,6 +1267,8 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1252 t.timer.expires = *abs_time; 1267 t.timer.expires = *abs_time;
1253 1268
1254 hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS); 1269 hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS);
1270 if (!hrtimer_active(&t.timer))
1271 t.task = NULL;
1255 1272
1256 /* 1273 /*
1257 * the timer could have already expired, in which 1274 * the timer could have already expired, in which
@@ -1293,6 +1310,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1293 restart->futex.uaddr = (u32 *)uaddr; 1310 restart->futex.uaddr = (u32 *)uaddr;
1294 restart->futex.val = val; 1311 restart->futex.val = val;
1295 restart->futex.time = abs_time->tv64; 1312 restart->futex.time = abs_time->tv64;
1313 restart->futex.bitset = bitset;
1296 restart->futex.flags = 0; 1314 restart->futex.flags = 0;
1297 1315
1298 if (fshared) 1316 if (fshared)
@@ -1319,7 +1337,8 @@ static long futex_wait_restart(struct restart_block *restart)
1319 restart->fn = do_no_restart_syscall; 1337 restart->fn = do_no_restart_syscall;
1320 if (restart->futex.flags & FLAGS_SHARED) 1338 if (restart->futex.flags & FLAGS_SHARED)
1321 fshared = &current->mm->mmap_sem; 1339 fshared = &current->mm->mmap_sem;
1322 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t); 1340 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
1341 restart->futex.bitset);
1323} 1342}
1324 1343
1325 1344
@@ -1535,9 +1554,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1535 owner = rt_mutex_owner(&q.pi_state->pi_mutex); 1554 owner = rt_mutex_owner(&q.pi_state->pi_mutex);
1536 res = fixup_pi_state_owner(uaddr, &q, owner); 1555 res = fixup_pi_state_owner(uaddr, &q, owner);
1537 1556
1538 WARN_ON(rt_mutex_owner(&q.pi_state->pi_mutex) !=
1539 owner);
1540
1541 /* propagate -EFAULT, if the fixup failed */ 1557 /* propagate -EFAULT, if the fixup failed */
1542 if (res) 1558 if (res)
1543 ret = res; 1559 ret = res;
@@ -1943,7 +1959,8 @@ retry:
1943 * PI futexes happens in exit_pi_state(): 1959 * PI futexes happens in exit_pi_state():
1944 */ 1960 */
1945 if (!pi && (uval & FUTEX_WAITERS)) 1961 if (!pi && (uval & FUTEX_WAITERS))
1946 futex_wake(uaddr, &curr->mm->mmap_sem, 1); 1962 futex_wake(uaddr, &curr->mm->mmap_sem, 1,
1963 FUTEX_BITSET_MATCH_ANY);
1947 } 1964 }
1948 return 0; 1965 return 0;
1949} 1966}
@@ -2043,10 +2060,14 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2043 2060
2044 switch (cmd) { 2061 switch (cmd) {
2045 case FUTEX_WAIT: 2062 case FUTEX_WAIT:
2046 ret = futex_wait(uaddr, fshared, val, timeout); 2063 val3 = FUTEX_BITSET_MATCH_ANY;
2064 case FUTEX_WAIT_BITSET:
2065 ret = futex_wait(uaddr, fshared, val, timeout, val3);
2047 break; 2066 break;
2048 case FUTEX_WAKE: 2067 case FUTEX_WAKE:
2049 ret = futex_wake(uaddr, fshared, val); 2068 val3 = FUTEX_BITSET_MATCH_ANY;
2069 case FUTEX_WAKE_BITSET:
2070 ret = futex_wake(uaddr, fshared, val, val3);
2050 break; 2071 break;
2051 case FUTEX_FD: 2072 case FUTEX_FD:
2052 /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */ 2073 /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
@@ -2086,7 +2107,8 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
2086 u32 val2 = 0; 2107 u32 val2 = 0;
2087 int cmd = op & FUTEX_CMD_MASK; 2108 int cmd = op & FUTEX_CMD_MASK;
2088 2109
2089 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) { 2110 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2111 cmd == FUTEX_WAIT_BITSET)) {
2090 if (copy_from_user(&ts, utime, sizeof(ts)) != 0) 2112 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2091 return -EFAULT; 2113 return -EFAULT;
2092 if (!timespec_valid(&ts)) 2114 if (!timespec_valid(&ts))
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 0a43def6fee7..133d558db452 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -167,7 +167,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
167 int val2 = 0; 167 int val2 = 0;
168 int cmd = op & FUTEX_CMD_MASK; 168 int cmd = op & FUTEX_CMD_MASK;
169 169
170 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) { 170 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
171 cmd == FUTEX_WAIT_BITSET)) {
171 if (get_compat_timespec(&ts, utime)) 172 if (get_compat_timespec(&ts, utime))
172 return -EFAULT; 173 return -EFAULT;
173 if (!timespec_valid(&ts)) 174 if (!timespec_valid(&ts))
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index bd5d6b5060bc..1069998fe25f 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1315,6 +1315,8 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
1315 1315
1316 } while (t->task && !signal_pending(current)); 1316 } while (t->task && !signal_pending(current));
1317 1317
1318 __set_current_state(TASK_RUNNING);
1319
1318 return t->task == NULL; 1320 return t->task == NULL;
1319} 1321}
1320 1322
diff --git a/kernel/mutex.c b/kernel/mutex.c
index d7fe50cc556f..d9ec9b666250 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -166,9 +166,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
166 * got a signal? (This code gets eliminated in the 166 * got a signal? (This code gets eliminated in the
167 * TASK_UNINTERRUPTIBLE case.) 167 * TASK_UNINTERRUPTIBLE case.)
168 */ 168 */
169 if (unlikely(state == TASK_INTERRUPTIBLE && 169 if (unlikely((state == TASK_INTERRUPTIBLE &&
170 signal_pending(task))) { 170 signal_pending(task)) ||
171 mutex_remove_waiter(lock, &waiter, task_thread_info(task)); 171 (state == TASK_KILLABLE &&
172 fatal_signal_pending(task)))) {
173 mutex_remove_waiter(lock, &waiter,
174 task_thread_info(task));
172 mutex_release(&lock->dep_map, 1, ip); 175 mutex_release(&lock->dep_map, 1, ip);
173 spin_unlock_mutex(&lock->wait_lock, flags); 176 spin_unlock_mutex(&lock->wait_lock, flags);
174 177
@@ -211,6 +214,14 @@ mutex_lock_nested(struct mutex *lock, unsigned int subclass)
211EXPORT_SYMBOL_GPL(mutex_lock_nested); 214EXPORT_SYMBOL_GPL(mutex_lock_nested);
212 215
213int __sched 216int __sched
217mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
218{
219 might_sleep();
220 return __mutex_lock_common(lock, TASK_KILLABLE, subclass, _RET_IP_);
221}
222EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
223
224int __sched
214mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) 225mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
215{ 226{
216 might_sleep(); 227 might_sleep();
@@ -272,6 +283,9 @@ __mutex_unlock_slowpath(atomic_t *lock_count)
272 * mutex_lock_interruptible() and mutex_trylock(). 283 * mutex_lock_interruptible() and mutex_trylock().
273 */ 284 */
274static int fastcall noinline __sched 285static int fastcall noinline __sched
286__mutex_lock_killable_slowpath(atomic_t *lock_count);
287
288static noinline int fastcall __sched
275__mutex_lock_interruptible_slowpath(atomic_t *lock_count); 289__mutex_lock_interruptible_slowpath(atomic_t *lock_count);
276 290
277/*** 291/***
@@ -294,6 +308,14 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock)
294 308
295EXPORT_SYMBOL(mutex_lock_interruptible); 309EXPORT_SYMBOL(mutex_lock_interruptible);
296 310
311int fastcall __sched mutex_lock_killable(struct mutex *lock)
312{
313 might_sleep();
314 return __mutex_fastpath_lock_retval
315 (&lock->count, __mutex_lock_killable_slowpath);
316}
317EXPORT_SYMBOL(mutex_lock_killable);
318
297static void fastcall noinline __sched 319static void fastcall noinline __sched
298__mutex_lock_slowpath(atomic_t *lock_count) 320__mutex_lock_slowpath(atomic_t *lock_count)
299{ 321{
@@ -303,6 +325,14 @@ __mutex_lock_slowpath(atomic_t *lock_count)
303} 325}
304 326
305static int fastcall noinline __sched 327static int fastcall noinline __sched
328__mutex_lock_killable_slowpath(atomic_t *lock_count)
329{
330 struct mutex *lock = container_of(lock_count, struct mutex, count);
331
332 return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_);
333}
334
335static noinline int fastcall __sched
306__mutex_lock_interruptible_slowpath(atomic_t *lock_count) 336__mutex_lock_interruptible_slowpath(atomic_t *lock_count)
307{ 337{
308 struct mutex *lock = container_of(lock_count, struct mutex, count); 338 struct mutex *lock = container_of(lock_count, struct mutex, count);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 8e186c678149..ef9b802738a5 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -44,9 +44,30 @@ config PM_VERBOSE
44 ---help--- 44 ---help---
45 This option enables verbose messages from the Power Management code. 45 This option enables verbose messages from the Power Management code.
46 46
47config CAN_PM_TRACE
48 def_bool y
49 depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL
50
47config PM_TRACE 51config PM_TRACE
52 bool
53 help
54 This enables code to save the last PM event point across
55 reboot. The architecture needs to support this, x86 for
56 example does by saving things in the RTC, see below.
57
58 The architecture specific code must provide the extern
59 functions from <linux/resume-trace.h> as well as the
60 <asm/resume-trace.h> header with a TRACE_RESUME() macro.
61
62 The way the information is presented is architecture-
63 dependent, x86 will print the information during a
64 late_initcall.
65
66config PM_TRACE_RTC
48 bool "Suspend/resume event tracing" 67 bool "Suspend/resume event tracing"
49 depends on PM_DEBUG && X86 && PM_SLEEP && EXPERIMENTAL 68 depends on CAN_PM_TRACE
69 depends on X86
70 select PM_TRACE
50 default n 71 default n
51 ---help--- 72 ---help---
52 This enables some cheesy code to save the last PM event point in the 73 This enables some cheesy code to save the last PM event point in the
@@ -63,7 +84,8 @@ config PM_TRACE
63 84
64config PM_SLEEP_SMP 85config PM_SLEEP_SMP
65 bool 86 bool
66 depends on SUSPEND_SMP_POSSIBLE || HIBERNATION_SMP_POSSIBLE 87 depends on SMP
88 depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE
67 depends on PM_SLEEP 89 depends on PM_SLEEP
68 select HOTPLUG_CPU 90 select HOTPLUG_CPU
69 default y 91 default y
@@ -73,46 +95,29 @@ config PM_SLEEP
73 depends on SUSPEND || HIBERNATION 95 depends on SUSPEND || HIBERNATION
74 default y 96 default y
75 97
76config SUSPEND_UP_POSSIBLE
77 bool
78 depends on (X86 && !X86_VOYAGER) || PPC || ARM || BLACKFIN || MIPS \
79 || SUPERH || FRV
80 depends on !SMP
81 default y
82
83config SUSPEND_SMP_POSSIBLE
84 bool
85 depends on (X86 && !X86_VOYAGER) \
86 || (PPC && (PPC_PSERIES || PPC_PMAC)) || ARM
87 depends on SMP
88 default y
89
90config SUSPEND 98config SUSPEND
91 bool "Suspend to RAM and standby" 99 bool "Suspend to RAM and standby"
92 depends on PM 100 depends on PM && ARCH_SUSPEND_POSSIBLE
93 depends on SUSPEND_UP_POSSIBLE || SUSPEND_SMP_POSSIBLE
94 default y 101 default y
95 ---help--- 102 ---help---
96 Allow the system to enter sleep states in which main memory is 103 Allow the system to enter sleep states in which main memory is
97 powered and thus its contents are preserved, such as the 104 powered and thus its contents are preserved, such as the
98 suspend-to-RAM state (i.e. the ACPI S3 state). 105 suspend-to-RAM state (e.g. the ACPI S3 state).
99 106
100config HIBERNATION_UP_POSSIBLE 107config SUSPEND_FREEZER
101 bool 108 bool "Enable freezer for suspend to RAM/standby" \
102 depends on X86 || PPC64_SWSUSP || PPC32 109 if ARCH_WANTS_FREEZER_CONTROL || BROKEN
103 depends on !SMP 110 depends on SUSPEND
104 default y 111 default y
112 help
113 This allows you to turn off the freezer for suspend. If this is
114 done, no tasks are frozen for suspend to RAM/standby.
105 115
106config HIBERNATION_SMP_POSSIBLE 116 Turning OFF this setting is NOT recommended! If in doubt, say Y.
107 bool
108 depends on (X86 && !X86_VOYAGER) || PPC64_SWSUSP
109 depends on SMP
110 default y
111 117
112config HIBERNATION 118config HIBERNATION
113 bool "Hibernation (aka 'suspend to disk')" 119 bool "Hibernation (aka 'suspend to disk')"
114 depends on PM && SWAP 120 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
115 depends on HIBERNATION_UP_POSSIBLE || HIBERNATION_SMP_POSSIBLE
116 ---help--- 121 ---help---
117 Enable the suspend to disk (STD) functionality, which is usually 122 Enable the suspend to disk (STD) functionality, which is usually
118 called "hibernation" in user interfaces. STD checkpoints the 123 called "hibernation" in user interfaces. STD checkpoints the
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index b138b431e271..d09da0895174 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -54,8 +54,8 @@ static struct platform_hibernation_ops *hibernation_ops;
54 54
55void hibernation_set_ops(struct platform_hibernation_ops *ops) 55void hibernation_set_ops(struct platform_hibernation_ops *ops)
56{ 56{
57 if (ops && !(ops->start && ops->pre_snapshot && ops->finish 57 if (ops && !(ops->begin && ops->end && ops->pre_snapshot
58 && ops->prepare && ops->enter && ops->pre_restore 58 && ops->prepare && ops->finish && ops->enter && ops->pre_restore
59 && ops->restore_cleanup)) { 59 && ops->restore_cleanup)) {
60 WARN_ON(1); 60 WARN_ON(1);
61 return; 61 return;
@@ -70,15 +70,55 @@ void hibernation_set_ops(struct platform_hibernation_ops *ops)
70 mutex_unlock(&pm_mutex); 70 mutex_unlock(&pm_mutex);
71} 71}
72 72
73#ifdef CONFIG_PM_DEBUG
74static void hibernation_debug_sleep(void)
75{
76 printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n");
77 mdelay(5000);
78}
79
80static int hibernation_testmode(int mode)
81{
82 if (hibernation_mode == mode) {
83 hibernation_debug_sleep();
84 return 1;
85 }
86 return 0;
87}
88
89static int hibernation_test(int level)
90{
91 if (pm_test_level == level) {
92 hibernation_debug_sleep();
93 return 1;
94 }
95 return 0;
96}
97#else /* !CONFIG_PM_DEBUG */
98static int hibernation_testmode(int mode) { return 0; }
99static int hibernation_test(int level) { return 0; }
100#endif /* !CONFIG_PM_DEBUG */
101
73/** 102/**
74 * platform_start - tell the platform driver that we're starting 103 * platform_begin - tell the platform driver that we're starting
75 * hibernation 104 * hibernation
76 */ 105 */
77 106
78static int platform_start(int platform_mode) 107static int platform_begin(int platform_mode)
79{ 108{
80 return (platform_mode && hibernation_ops) ? 109 return (platform_mode && hibernation_ops) ?
81 hibernation_ops->start() : 0; 110 hibernation_ops->begin() : 0;
111}
112
113/**
114 * platform_end - tell the platform driver that we've entered the
115 * working state
116 */
117
118static void platform_end(int platform_mode)
119{
120 if (platform_mode && hibernation_ops)
121 hibernation_ops->end();
82} 122}
83 123
84/** 124/**
@@ -162,19 +202,25 @@ int create_image(int platform_mode)
162 */ 202 */
163 error = device_power_down(PMSG_FREEZE); 203 error = device_power_down(PMSG_FREEZE);
164 if (error) { 204 if (error) {
165 printk(KERN_ERR "Some devices failed to power down, " 205 printk(KERN_ERR "PM: Some devices failed to power down, "
166 KERN_ERR "aborting suspend\n"); 206 "aborting hibernation\n");
167 goto Enable_irqs; 207 goto Enable_irqs;
168 } 208 }
169 209
210 if (hibernation_test(TEST_CORE))
211 goto Power_up;
212
213 in_suspend = 1;
170 save_processor_state(); 214 save_processor_state();
171 error = swsusp_arch_suspend(); 215 error = swsusp_arch_suspend();
172 if (error) 216 if (error)
173 printk(KERN_ERR "Error %d while creating the image\n", error); 217 printk(KERN_ERR "PM: Error %d creating hibernation image\n",
218 error);
174 /* Restore control flow magically appears here */ 219 /* Restore control flow magically appears here */
175 restore_processor_state(); 220 restore_processor_state();
176 if (!in_suspend) 221 if (!in_suspend)
177 platform_leave(platform_mode); 222 platform_leave(platform_mode);
223 Power_up:
178 /* NOTE: device_power_up() is just a resume() for devices 224 /* NOTE: device_power_up() is just a resume() for devices
179 * that suspended with irqs off ... no overall powerup. 225 * that suspended with irqs off ... no overall powerup.
180 */ 226 */
@@ -202,36 +248,90 @@ int hibernation_snapshot(int platform_mode)
202 if (error) 248 if (error)
203 return error; 249 return error;
204 250
205 error = platform_start(platform_mode); 251 error = platform_begin(platform_mode);
206 if (error) 252 if (error)
207 return error; 253 goto Close;
208 254
209 suspend_console(); 255 suspend_console();
210 error = device_suspend(PMSG_FREEZE); 256 error = device_suspend(PMSG_FREEZE);
211 if (error) 257 if (error)
212 goto Resume_console; 258 goto Resume_console;
213 259
214 error = platform_pre_snapshot(platform_mode); 260 if (hibernation_test(TEST_DEVICES))
215 if (error)
216 goto Resume_devices; 261 goto Resume_devices;
217 262
263 error = platform_pre_snapshot(platform_mode);
264 if (error || hibernation_test(TEST_PLATFORM))
265 goto Finish;
266
218 error = disable_nonboot_cpus(); 267 error = disable_nonboot_cpus();
219 if (!error) { 268 if (!error) {
220 if (hibernation_mode != HIBERNATION_TEST) { 269 if (hibernation_test(TEST_CPUS))
221 in_suspend = 1; 270 goto Enable_cpus;
222 error = create_image(platform_mode); 271
223 /* Control returns here after successful restore */ 272 if (hibernation_testmode(HIBERNATION_TEST))
224 } else { 273 goto Enable_cpus;
225 printk("swsusp debug: Waiting for 5 seconds.\n"); 274
226 mdelay(5000); 275 error = create_image(platform_mode);
227 } 276 /* Control returns here after successful restore */
228 } 277 }
278 Enable_cpus:
229 enable_nonboot_cpus(); 279 enable_nonboot_cpus();
230 Resume_devices: 280 Finish:
231 platform_finish(platform_mode); 281 platform_finish(platform_mode);
282 Resume_devices:
232 device_resume(); 283 device_resume();
233 Resume_console: 284 Resume_console:
234 resume_console(); 285 resume_console();
286 Close:
287 platform_end(platform_mode);
288 return error;
289}
290
291/**
292 * resume_target_kernel - prepare devices that need to be suspended with
293 * interrupts off, restore the contents of highmem that have not been
294 * restored yet from the image and run the low level code that will restore
295 * the remaining contents of memory and switch to the just restored target
296 * kernel.
297 */
298
299static int resume_target_kernel(void)
300{
301 int error;
302
303 local_irq_disable();
304 error = device_power_down(PMSG_PRETHAW);
305 if (error) {
306 printk(KERN_ERR "PM: Some devices failed to power down, "
307 "aborting resume\n");
308 goto Enable_irqs;
309 }
310 /* We'll ignore saved state, but this gets preempt count (etc) right */
311 save_processor_state();
312 error = restore_highmem();
313 if (!error) {
314 error = swsusp_arch_resume();
315 /*
316 * The code below is only ever reached in case of a failure.
317 * Otherwise execution continues at place where
318 * swsusp_arch_suspend() was called
319 */
320 BUG_ON(!error);
321 /* This call to restore_highmem() undos the previous one */
322 restore_highmem();
323 }
324 /*
325 * The only reason why swsusp_arch_resume() can fail is memory being
326 * very tight, so we have to free it as soon as we can to avoid
327 * subsequent failures
328 */
329 swsusp_free();
330 restore_processor_state();
331 touch_softlockup_watchdog();
332 device_power_up();
333 Enable_irqs:
334 local_irq_enable();
235 return error; 335 return error;
236} 336}
237 337
@@ -258,7 +358,7 @@ int hibernation_restore(int platform_mode)
258 if (!error) { 358 if (!error) {
259 error = disable_nonboot_cpus(); 359 error = disable_nonboot_cpus();
260 if (!error) 360 if (!error)
261 error = swsusp_resume(); 361 error = resume_target_kernel();
262 enable_nonboot_cpus(); 362 enable_nonboot_cpus();
263 } 363 }
264 platform_restore_cleanup(platform_mode); 364 platform_restore_cleanup(platform_mode);
@@ -286,9 +386,9 @@ int hibernation_platform_enter(void)
286 * hibernation_ops->finish() before saving the image, so we should let 386 * hibernation_ops->finish() before saving the image, so we should let
287 * the firmware know that we're going to enter the sleep state after all 387 * the firmware know that we're going to enter the sleep state after all
288 */ 388 */
289 error = hibernation_ops->start(); 389 error = hibernation_ops->begin();
290 if (error) 390 if (error)
291 return error; 391 goto Close;
292 392
293 suspend_console(); 393 suspend_console();
294 error = device_suspend(PMSG_SUSPEND); 394 error = device_suspend(PMSG_SUSPEND);
@@ -322,6 +422,8 @@ int hibernation_platform_enter(void)
322 device_resume(); 422 device_resume();
323 Resume_console: 423 Resume_console:
324 resume_console(); 424 resume_console();
425 Close:
426 hibernation_ops->end();
325 return error; 427 return error;
326} 428}
327 429
@@ -352,24 +454,17 @@ static void power_down(void)
352 * Valid image is on the disk, if we continue we risk serious data 454 * Valid image is on the disk, if we continue we risk serious data
353 * corruption after resume. 455 * corruption after resume.
354 */ 456 */
355 printk(KERN_CRIT "Please power me down manually\n"); 457 printk(KERN_CRIT "PM: Please power down manually\n");
356 while(1); 458 while(1);
357} 459}
358 460
359static void unprepare_processes(void)
360{
361 thaw_processes();
362 pm_restore_console();
363}
364
365static int prepare_processes(void) 461static int prepare_processes(void)
366{ 462{
367 int error = 0; 463 int error = 0;
368 464
369 pm_prepare_console();
370 if (freeze_processes()) { 465 if (freeze_processes()) {
371 error = -EBUSY; 466 error = -EBUSY;
372 unprepare_processes(); 467 thaw_processes();
373 } 468 }
374 return error; 469 return error;
375} 470}
@@ -389,6 +484,7 @@ int hibernate(void)
389 goto Unlock; 484 goto Unlock;
390 } 485 }
391 486
487 pm_prepare_console();
392 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); 488 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
393 if (error) 489 if (error)
394 goto Exit; 490 goto Exit;
@@ -398,7 +494,7 @@ int hibernate(void)
398 if (error) 494 if (error)
399 goto Exit; 495 goto Exit;
400 496
401 printk("Syncing filesystems ... "); 497 printk(KERN_INFO "PM: Syncing filesystems ... ");
402 sys_sync(); 498 sys_sync();
403 printk("done.\n"); 499 printk("done.\n");
404 500
@@ -406,11 +502,12 @@ int hibernate(void)
406 if (error) 502 if (error)
407 goto Finish; 503 goto Finish;
408 504
409 if (hibernation_mode == HIBERNATION_TESTPROC) { 505 if (hibernation_test(TEST_FREEZER))
410 printk("swsusp debug: Waiting for 5 seconds.\n");
411 mdelay(5000);
412 goto Thaw; 506 goto Thaw;
413 } 507
508 if (hibernation_testmode(HIBERNATION_TESTPROC))
509 goto Thaw;
510
414 error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); 511 error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
415 if (in_suspend && !error) { 512 if (in_suspend && !error) {
416 unsigned int flags = 0; 513 unsigned int flags = 0;
@@ -427,11 +524,12 @@ int hibernate(void)
427 swsusp_free(); 524 swsusp_free();
428 } 525 }
429 Thaw: 526 Thaw:
430 unprepare_processes(); 527 thaw_processes();
431 Finish: 528 Finish:
432 free_basic_memory_bitmaps(); 529 free_basic_memory_bitmaps();
433 Exit: 530 Exit:
434 pm_notifier_call_chain(PM_POST_HIBERNATION); 531 pm_notifier_call_chain(PM_POST_HIBERNATION);
532 pm_restore_console();
435 atomic_inc(&snapshot_device_available); 533 atomic_inc(&snapshot_device_available);
436 Unlock: 534 Unlock:
437 mutex_unlock(&pm_mutex); 535 mutex_unlock(&pm_mutex);
@@ -473,22 +571,23 @@ static int software_resume(void)
473 return -ENOENT; 571 return -ENOENT;
474 } 572 }
475 swsusp_resume_device = name_to_dev_t(resume_file); 573 swsusp_resume_device = name_to_dev_t(resume_file);
476 pr_debug("swsusp: Resume From Partition %s\n", resume_file); 574 pr_debug("PM: Resume from partition %s\n", resume_file);
477 } else { 575 } else {
478 pr_debug("swsusp: Resume From Partition %d:%d\n", 576 pr_debug("PM: Resume from partition %d:%d\n",
479 MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); 577 MAJOR(swsusp_resume_device),
578 MINOR(swsusp_resume_device));
480 } 579 }
481 580
482 if (noresume) { 581 if (noresume) {
483 /** 582 /**
484 * FIXME: If noresume is specified, we need to find the partition 583 * FIXME: If noresume is specified, we need to find the
485 * and reset it back to normal swap space. 584 * partition and reset it back to normal swap space.
486 */ 585 */
487 mutex_unlock(&pm_mutex); 586 mutex_unlock(&pm_mutex);
488 return 0; 587 return 0;
489 } 588 }
490 589
491 pr_debug("PM: Checking swsusp image.\n"); 590 pr_debug("PM: Checking hibernation image.\n");
492 error = swsusp_check(); 591 error = swsusp_check();
493 if (error) 592 if (error)
494 goto Unlock; 593 goto Unlock;
@@ -499,6 +598,11 @@ static int software_resume(void)
499 goto Unlock; 598 goto Unlock;
500 } 599 }
501 600
601 pm_prepare_console();
602 error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
603 if (error)
604 goto Finish;
605
502 error = create_basic_memory_bitmaps(); 606 error = create_basic_memory_bitmaps();
503 if (error) 607 if (error)
504 goto Finish; 608 goto Finish;
@@ -510,7 +614,7 @@ static int software_resume(void)
510 goto Done; 614 goto Done;
511 } 615 }
512 616
513 pr_debug("PM: Reading swsusp image.\n"); 617 pr_debug("PM: Reading hibernation image.\n");
514 618
515 error = swsusp_read(&flags); 619 error = swsusp_read(&flags);
516 if (!error) 620 if (!error)
@@ -518,10 +622,12 @@ static int software_resume(void)
518 622
519 printk(KERN_ERR "PM: Restore failed, recovering.\n"); 623 printk(KERN_ERR "PM: Restore failed, recovering.\n");
520 swsusp_free(); 624 swsusp_free();
521 unprepare_processes(); 625 thaw_processes();
522 Done: 626 Done:
523 free_basic_memory_bitmaps(); 627 free_basic_memory_bitmaps();
524 Finish: 628 Finish:
629 pm_notifier_call_chain(PM_POST_RESTORE);
630 pm_restore_console();
525 atomic_inc(&snapshot_device_available); 631 atomic_inc(&snapshot_device_available);
526 /* For success case, the suspend path will release the lock */ 632 /* For success case, the suspend path will release the lock */
527 Unlock: 633 Unlock:
@@ -636,7 +742,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
636 error = -EINVAL; 742 error = -EINVAL;
637 743
638 if (!error) 744 if (!error)
639 pr_debug("PM: suspend-to-disk mode set to '%s'\n", 745 pr_debug("PM: Hibernation mode set to '%s'\n",
640 hibernation_modes[mode]); 746 hibernation_modes[mode]);
641 mutex_unlock(&pm_mutex); 747 mutex_unlock(&pm_mutex);
642 return error ? error : n; 748 return error ? error : n;
@@ -668,7 +774,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
668 mutex_lock(&pm_mutex); 774 mutex_lock(&pm_mutex);
669 swsusp_resume_device = res; 775 swsusp_resume_device = res;
670 mutex_unlock(&pm_mutex); 776 mutex_unlock(&pm_mutex);
671 printk("Attempting manual resume\n"); 777 printk(KERN_INFO "PM: Starting manual resume from disk\n");
672 noresume = 0; 778 noresume = 0;
673 software_resume(); 779 software_resume();
674 ret = n; 780 ret = n;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index efc08360e627..6a6d5eb3524e 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -24,13 +24,112 @@
24 24
25#include "power.h" 25#include "power.h"
26 26
27BLOCKING_NOTIFIER_HEAD(pm_chain_head);
28
29DEFINE_MUTEX(pm_mutex); 27DEFINE_MUTEX(pm_mutex);
30 28
31unsigned int pm_flags; 29unsigned int pm_flags;
32EXPORT_SYMBOL(pm_flags); 30EXPORT_SYMBOL(pm_flags);
33 31
32#ifdef CONFIG_PM_SLEEP
33
34/* Routines for PM-transition notifications */
35
36static BLOCKING_NOTIFIER_HEAD(pm_chain_head);
37
38int register_pm_notifier(struct notifier_block *nb)
39{
40 return blocking_notifier_chain_register(&pm_chain_head, nb);
41}
42EXPORT_SYMBOL_GPL(register_pm_notifier);
43
44int unregister_pm_notifier(struct notifier_block *nb)
45{
46 return blocking_notifier_chain_unregister(&pm_chain_head, nb);
47}
48EXPORT_SYMBOL_GPL(unregister_pm_notifier);
49
50int pm_notifier_call_chain(unsigned long val)
51{
52 return (blocking_notifier_call_chain(&pm_chain_head, val, NULL)
53 == NOTIFY_BAD) ? -EINVAL : 0;
54}
55
56#ifdef CONFIG_PM_DEBUG
57int pm_test_level = TEST_NONE;
58
59static int suspend_test(int level)
60{
61 if (pm_test_level == level) {
62 printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
63 mdelay(5000);
64 return 1;
65 }
66 return 0;
67}
68
69static const char * const pm_tests[__TEST_AFTER_LAST] = {
70 [TEST_NONE] = "none",
71 [TEST_CORE] = "core",
72 [TEST_CPUS] = "processors",
73 [TEST_PLATFORM] = "platform",
74 [TEST_DEVICES] = "devices",
75 [TEST_FREEZER] = "freezer",
76};
77
78static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr,
79 char *buf)
80{
81 char *s = buf;
82 int level;
83
84 for (level = TEST_FIRST; level <= TEST_MAX; level++)
85 if (pm_tests[level]) {
86 if (level == pm_test_level)
87 s += sprintf(s, "[%s] ", pm_tests[level]);
88 else
89 s += sprintf(s, "%s ", pm_tests[level]);
90 }
91
92 if (s != buf)
93 /* convert the last space to a newline */
94 *(s-1) = '\n';
95
96 return (s - buf);
97}
98
99static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
100 const char *buf, size_t n)
101{
102 const char * const *s;
103 int level;
104 char *p;
105 int len;
106 int error = -EINVAL;
107
108 p = memchr(buf, '\n', n);
109 len = p ? p - buf : n;
110
111 mutex_lock(&pm_mutex);
112
113 level = TEST_FIRST;
114 for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++)
115 if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) {
116 pm_test_level = level;
117 error = 0;
118 break;
119 }
120
121 mutex_unlock(&pm_mutex);
122
123 return error ? error : n;
124}
125
126power_attr(pm_test);
127#else /* !CONFIG_PM_DEBUG */
128static inline int suspend_test(int level) { return 0; }
129#endif /* !CONFIG_PM_DEBUG */
130
131#endif /* CONFIG_PM_SLEEP */
132
34#ifdef CONFIG_SUSPEND 133#ifdef CONFIG_SUSPEND
35 134
36/* This is just an arbitrary number */ 135/* This is just an arbitrary number */
@@ -76,13 +175,13 @@ static int suspend_prepare(void)
76 if (!suspend_ops || !suspend_ops->enter) 175 if (!suspend_ops || !suspend_ops->enter)
77 return -EPERM; 176 return -EPERM;
78 177
178 pm_prepare_console();
179
79 error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); 180 error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
80 if (error) 181 if (error)
81 goto Finish; 182 goto Finish;
82 183
83 pm_prepare_console(); 184 if (suspend_freeze_processes()) {
84
85 if (freeze_processes()) {
86 error = -EAGAIN; 185 error = -EAGAIN;
87 goto Thaw; 186 goto Thaw;
88 } 187 }
@@ -100,10 +199,10 @@ static int suspend_prepare(void)
100 return 0; 199 return 0;
101 200
102 Thaw: 201 Thaw:
103 thaw_processes(); 202 suspend_thaw_processes();
104 pm_restore_console();
105 Finish: 203 Finish:
106 pm_notifier_call_chain(PM_POST_SUSPEND); 204 pm_notifier_call_chain(PM_POST_SUSPEND);
205 pm_restore_console();
107 return error; 206 return error;
108} 207}
109 208
@@ -133,10 +232,13 @@ static int suspend_enter(suspend_state_t state)
133 BUG_ON(!irqs_disabled()); 232 BUG_ON(!irqs_disabled());
134 233
135 if ((error = device_power_down(PMSG_SUSPEND))) { 234 if ((error = device_power_down(PMSG_SUSPEND))) {
136 printk(KERN_ERR "Some devices failed to power down\n"); 235 printk(KERN_ERR "PM: Some devices failed to power down\n");
137 goto Done; 236 goto Done;
138 } 237 }
139 error = suspend_ops->enter(state); 238
239 if (!suspend_test(TEST_CORE))
240 error = suspend_ops->enter(state);
241
140 device_power_up(); 242 device_power_up();
141 Done: 243 Done:
142 arch_suspend_enable_irqs(); 244 arch_suspend_enable_irqs();
@@ -145,8 +247,8 @@ static int suspend_enter(suspend_state_t state)
145} 247}
146 248
147/** 249/**
148 * suspend_devices_and_enter - suspend devices and enter the desired system sleep 250 * suspend_devices_and_enter - suspend devices and enter the desired system
149 * state. 251 * sleep state.
150 * @state: state to enter 252 * @state: state to enter
151 */ 253 */
152int suspend_devices_and_enter(suspend_state_t state) 254int suspend_devices_and_enter(suspend_state_t state)
@@ -156,33 +258,45 @@ int suspend_devices_and_enter(suspend_state_t state)
156 if (!suspend_ops) 258 if (!suspend_ops)
157 return -ENOSYS; 259 return -ENOSYS;
158 260
159 if (suspend_ops->set_target) { 261 if (suspend_ops->begin) {
160 error = suspend_ops->set_target(state); 262 error = suspend_ops->begin(state);
161 if (error) 263 if (error)
162 return error; 264 goto Close;
163 } 265 }
164 suspend_console(); 266 suspend_console();
165 error = device_suspend(PMSG_SUSPEND); 267 error = device_suspend(PMSG_SUSPEND);
166 if (error) { 268 if (error) {
167 printk(KERN_ERR "Some devices failed to suspend\n"); 269 printk(KERN_ERR "PM: Some devices failed to suspend\n");
168 goto Resume_console; 270 goto Resume_console;
169 } 271 }
272
273 if (suspend_test(TEST_DEVICES))
274 goto Resume_devices;
275
170 if (suspend_ops->prepare) { 276 if (suspend_ops->prepare) {
171 error = suspend_ops->prepare(); 277 error = suspend_ops->prepare();
172 if (error) 278 if (error)
173 goto Resume_devices; 279 goto Resume_devices;
174 } 280 }
281
282 if (suspend_test(TEST_PLATFORM))
283 goto Finish;
284
175 error = disable_nonboot_cpus(); 285 error = disable_nonboot_cpus();
176 if (!error) 286 if (!error && !suspend_test(TEST_CPUS))
177 suspend_enter(state); 287 suspend_enter(state);
178 288
179 enable_nonboot_cpus(); 289 enable_nonboot_cpus();
290 Finish:
180 if (suspend_ops->finish) 291 if (suspend_ops->finish)
181 suspend_ops->finish(); 292 suspend_ops->finish();
182 Resume_devices: 293 Resume_devices:
183 device_resume(); 294 device_resume();
184 Resume_console: 295 Resume_console:
185 resume_console(); 296 resume_console();
297 Close:
298 if (suspend_ops->end)
299 suspend_ops->end();
186 return error; 300 return error;
187} 301}
188 302
@@ -194,9 +308,9 @@ int suspend_devices_and_enter(suspend_state_t state)
194 */ 308 */
195static void suspend_finish(void) 309static void suspend_finish(void)
196{ 310{
197 thaw_processes(); 311 suspend_thaw_processes();
198 pm_restore_console();
199 pm_notifier_call_chain(PM_POST_SUSPEND); 312 pm_notifier_call_chain(PM_POST_SUSPEND);
313 pm_restore_console();
200} 314}
201 315
202 316
@@ -238,17 +352,22 @@ static int enter_state(suspend_state_t state)
238 if (!mutex_trylock(&pm_mutex)) 352 if (!mutex_trylock(&pm_mutex))
239 return -EBUSY; 353 return -EBUSY;
240 354
241 printk("Syncing filesystems ... "); 355 printk(KERN_INFO "PM: Syncing filesystems ... ");
242 sys_sync(); 356 sys_sync();
243 printk("done.\n"); 357 printk("done.\n");
244 358
245 pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); 359 pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
246 if ((error = suspend_prepare())) 360 error = suspend_prepare();
361 if (error)
247 goto Unlock; 362 goto Unlock;
248 363
364 if (suspend_test(TEST_FREEZER))
365 goto Finish;
366
249 pr_debug("PM: Entering %s sleep\n", pm_states[state]); 367 pr_debug("PM: Entering %s sleep\n", pm_states[state]);
250 error = suspend_devices_and_enter(state); 368 error = suspend_devices_and_enter(state);
251 369
370 Finish:
252 pr_debug("PM: Finishing wakeup.\n"); 371 pr_debug("PM: Finishing wakeup.\n");
253 suspend_finish(); 372 suspend_finish();
254 Unlock: 373 Unlock:
@@ -369,18 +488,18 @@ pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr,
369} 488}
370 489
371power_attr(pm_trace); 490power_attr(pm_trace);
491#endif /* CONFIG_PM_TRACE */
372 492
373static struct attribute * g[] = { 493static struct attribute * g[] = {
374 &state_attr.attr, 494 &state_attr.attr,
495#ifdef CONFIG_PM_TRACE
375 &pm_trace_attr.attr, 496 &pm_trace_attr.attr,
497#endif
498#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_PM_DEBUG)
499 &pm_test_attr.attr,
500#endif
376 NULL, 501 NULL,
377}; 502};
378#else
379static struct attribute * g[] = {
380 &state_attr.attr,
381 NULL,
382};
383#endif /* CONFIG_PM_TRACE */
384 503
385static struct attribute_group attr_group = { 504static struct attribute_group attr_group = {
386 .attrs = g, 505 .attrs = g,
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 2093c3a9a994..700f44ec8406 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -1,5 +1,7 @@
1#include <linux/suspend.h> 1#include <linux/suspend.h>
2#include <linux/suspend_ioctls.h>
2#include <linux/utsname.h> 3#include <linux/utsname.h>
4#include <linux/freezer.h>
3 5
4struct swsusp_info { 6struct swsusp_info {
5 struct new_utsname uts; 7 struct new_utsname uts;
@@ -128,42 +130,12 @@ struct snapshot_handle {
128#define data_of(handle) ((handle).buffer + (handle).buf_offset) 130#define data_of(handle) ((handle).buffer + (handle).buf_offset)
129 131
130extern unsigned int snapshot_additional_pages(struct zone *zone); 132extern unsigned int snapshot_additional_pages(struct zone *zone);
133extern unsigned long snapshot_get_image_size(void);
131extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); 134extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
132extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); 135extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
133extern void snapshot_write_finalize(struct snapshot_handle *handle); 136extern void snapshot_write_finalize(struct snapshot_handle *handle);
134extern int snapshot_image_loaded(struct snapshot_handle *handle); 137extern int snapshot_image_loaded(struct snapshot_handle *handle);
135 138
136/*
137 * This structure is used to pass the values needed for the identification
138 * of the resume swap area from a user space to the kernel via the
139 * SNAPSHOT_SET_SWAP_AREA ioctl
140 */
141struct resume_swap_area {
142 loff_t offset;
143 u_int32_t dev;
144} __attribute__((packed));
145
146#define SNAPSHOT_IOC_MAGIC '3'
147#define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1)
148#define SNAPSHOT_UNFREEZE _IO(SNAPSHOT_IOC_MAGIC, 2)
149#define SNAPSHOT_ATOMIC_SNAPSHOT _IOW(SNAPSHOT_IOC_MAGIC, 3, void *)
150#define SNAPSHOT_ATOMIC_RESTORE _IO(SNAPSHOT_IOC_MAGIC, 4)
151#define SNAPSHOT_FREE _IO(SNAPSHOT_IOC_MAGIC, 5)
152#define SNAPSHOT_SET_IMAGE_SIZE _IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long)
153#define SNAPSHOT_AVAIL_SWAP _IOR(SNAPSHOT_IOC_MAGIC, 7, void *)
154#define SNAPSHOT_GET_SWAP_PAGE _IOR(SNAPSHOT_IOC_MAGIC, 8, void *)
155#define SNAPSHOT_FREE_SWAP_PAGES _IO(SNAPSHOT_IOC_MAGIC, 9)
156#define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int)
157#define SNAPSHOT_S2RAM _IO(SNAPSHOT_IOC_MAGIC, 11)
158#define SNAPSHOT_PMOPS _IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int)
159#define SNAPSHOT_SET_SWAP_AREA _IOW(SNAPSHOT_IOC_MAGIC, 13, \
160 struct resume_swap_area)
161#define SNAPSHOT_IOC_MAXNR 13
162
163#define PMOPS_PREPARE 1
164#define PMOPS_ENTER 2
165#define PMOPS_FINISH 3
166
167/* If unset, the snapshot device cannot be open. */ 139/* If unset, the snapshot device cannot be open. */
168extern atomic_t snapshot_device_available; 140extern atomic_t snapshot_device_available;
169 141
@@ -181,7 +153,6 @@ extern int swsusp_swap_in_use(void);
181extern int swsusp_check(void); 153extern int swsusp_check(void);
182extern int swsusp_shrink_memory(void); 154extern int swsusp_shrink_memory(void);
183extern void swsusp_free(void); 155extern void swsusp_free(void);
184extern int swsusp_resume(void);
185extern int swsusp_read(unsigned int *flags_p); 156extern int swsusp_read(unsigned int *flags_p);
186extern int swsusp_write(unsigned int flags); 157extern int swsusp_write(unsigned int flags);
187extern void swsusp_close(void); 158extern void swsusp_close(void);
@@ -201,11 +172,56 @@ static inline int suspend_devices_and_enter(suspend_state_t state)
201} 172}
202#endif /* !CONFIG_SUSPEND */ 173#endif /* !CONFIG_SUSPEND */
203 174
204/* kernel/power/common.c */ 175#ifdef CONFIG_PM_SLEEP
205extern struct blocking_notifier_head pm_chain_head; 176/* kernel/power/main.c */
177extern int pm_notifier_call_chain(unsigned long val);
178#endif
179
180#ifdef CONFIG_HIGHMEM
181unsigned int count_highmem_pages(void);
182int restore_highmem(void);
183#else
184static inline unsigned int count_highmem_pages(void) { return 0; }
185static inline int restore_highmem(void) { return 0; }
186#endif
187
188/*
189 * Suspend test levels
190 */
191enum {
192 /* keep first */
193 TEST_NONE,
194 TEST_CORE,
195 TEST_CPUS,
196 TEST_PLATFORM,
197 TEST_DEVICES,
198 TEST_FREEZER,
199 /* keep last */
200 __TEST_AFTER_LAST
201};
202
203#define TEST_FIRST TEST_NONE
204#define TEST_MAX (__TEST_AFTER_LAST - 1)
205
206extern int pm_test_level;
207
208#ifdef CONFIG_SUSPEND_FREEZER
209static inline int suspend_freeze_processes(void)
210{
211 return freeze_processes();
212}
206 213
207static inline int pm_notifier_call_chain(unsigned long val) 214static inline void suspend_thaw_processes(void)
208{ 215{
209 return (blocking_notifier_call_chain(&pm_chain_head, val, NULL) 216 thaw_processes();
210 == NOTIFY_BAD) ? -EINVAL : 0;
211} 217}
218#else
219static inline int suspend_freeze_processes(void)
220{
221 return 0;
222}
223
224static inline void suspend_thaw_processes(void)
225{
226}
227#endif
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 6533923e711b..7c2118f9597f 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -86,9 +86,9 @@ static void fake_signal_wake_up(struct task_struct *p, int resume)
86 86
87static void send_fake_signal(struct task_struct *p) 87static void send_fake_signal(struct task_struct *p)
88{ 88{
89 if (p->state == TASK_STOPPED) 89 if (task_is_stopped(p))
90 force_sig_specific(SIGSTOP, p); 90 force_sig_specific(SIGSTOP, p);
91 fake_signal_wake_up(p, p->state == TASK_STOPPED); 91 fake_signal_wake_up(p, task_is_stopped(p));
92} 92}
93 93
94static int has_mm(struct task_struct *p) 94static int has_mm(struct task_struct *p)
@@ -182,7 +182,7 @@ static int try_to_freeze_tasks(int freeze_user_space)
182 if (frozen(p) || !freezeable(p)) 182 if (frozen(p) || !freezeable(p))
183 continue; 183 continue;
184 184
185 if (p->state == TASK_TRACED && frozen(p->parent)) { 185 if (task_is_traced(p) && frozen(p->parent)) {
186 cancel_freezing(p); 186 cancel_freezing(p);
187 continue; 187 continue;
188 } 188 }
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 78039b477d2b..f6a5df934f8d 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -635,7 +635,7 @@ __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
635 region->end_pfn = end_pfn; 635 region->end_pfn = end_pfn;
636 list_add_tail(&region->list, &nosave_regions); 636 list_add_tail(&region->list, &nosave_regions);
637 Report: 637 Report:
638 printk("swsusp: Registered nosave memory region: %016lx - %016lx\n", 638 printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n",
639 start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); 639 start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
640} 640}
641 641
@@ -704,7 +704,7 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
704 list_for_each_entry(region, &nosave_regions, list) { 704 list_for_each_entry(region, &nosave_regions, list) {
705 unsigned long pfn; 705 unsigned long pfn;
706 706
707 printk("swsusp: Marking nosave pages: %016lx - %016lx\n", 707 pr_debug("PM: Marking nosave pages: %016lx - %016lx\n",
708 region->start_pfn << PAGE_SHIFT, 708 region->start_pfn << PAGE_SHIFT,
709 region->end_pfn << PAGE_SHIFT); 709 region->end_pfn << PAGE_SHIFT);
710 710
@@ -749,7 +749,7 @@ int create_basic_memory_bitmaps(void)
749 free_pages_map = bm2; 749 free_pages_map = bm2;
750 mark_nosave_pages(forbidden_pages_map); 750 mark_nosave_pages(forbidden_pages_map);
751 751
752 printk("swsusp: Basic memory bitmaps created\n"); 752 pr_debug("PM: Basic memory bitmaps created\n");
753 753
754 return 0; 754 return 0;
755 755
@@ -784,7 +784,7 @@ void free_basic_memory_bitmaps(void)
784 memory_bm_free(bm2, PG_UNSAFE_CLEAR); 784 memory_bm_free(bm2, PG_UNSAFE_CLEAR);
785 kfree(bm2); 785 kfree(bm2);
786 786
787 printk("swsusp: Basic memory bitmaps freed\n"); 787 pr_debug("PM: Basic memory bitmaps freed\n");
788} 788}
789 789
790/** 790/**
@@ -872,7 +872,6 @@ unsigned int count_highmem_pages(void)
872} 872}
873#else 873#else
874static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } 874static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
875static inline unsigned int count_highmem_pages(void) { return 0; }
876#endif /* CONFIG_HIGHMEM */ 875#endif /* CONFIG_HIGHMEM */
877 876
878/** 877/**
@@ -1089,7 +1088,7 @@ static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1089 } 1088 }
1090 1089
1091 nr_pages += count_pages_for_highmem(nr_highmem); 1090 nr_pages += count_pages_for_highmem(nr_highmem);
1092 pr_debug("swsusp: Normal pages needed: %u + %u + %u, available pages: %u\n", 1091 pr_debug("PM: Normal pages needed: %u + %u + %u, available pages: %u\n",
1093 nr_pages, PAGES_FOR_IO, meta, free); 1092 nr_pages, PAGES_FOR_IO, meta, free);
1094 1093
1095 return free > nr_pages + PAGES_FOR_IO + meta; 1094 return free > nr_pages + PAGES_FOR_IO + meta;
@@ -1202,20 +1201,20 @@ asmlinkage int swsusp_save(void)
1202{ 1201{
1203 unsigned int nr_pages, nr_highmem; 1202 unsigned int nr_pages, nr_highmem;
1204 1203
1205 printk("swsusp: critical section: \n"); 1204 printk(KERN_INFO "PM: Creating hibernation image: \n");
1206 1205
1207 drain_local_pages(); 1206 drain_local_pages();
1208 nr_pages = count_data_pages(); 1207 nr_pages = count_data_pages();
1209 nr_highmem = count_highmem_pages(); 1208 nr_highmem = count_highmem_pages();
1210 printk("swsusp: Need to copy %u pages\n", nr_pages + nr_highmem); 1209 printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
1211 1210
1212 if (!enough_free_mem(nr_pages, nr_highmem)) { 1211 if (!enough_free_mem(nr_pages, nr_highmem)) {
1213 printk(KERN_ERR "swsusp: Not enough free memory\n"); 1212 printk(KERN_ERR "PM: Not enough free memory\n");
1214 return -ENOMEM; 1213 return -ENOMEM;
1215 } 1214 }
1216 1215
1217 if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) { 1216 if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
1218 printk(KERN_ERR "swsusp: Memory allocation failed\n"); 1217 printk(KERN_ERR "PM: Memory allocation failed\n");
1219 return -ENOMEM; 1218 return -ENOMEM;
1220 } 1219 }
1221 1220
@@ -1235,7 +1234,8 @@ asmlinkage int swsusp_save(void)
1235 nr_copy_pages = nr_pages; 1234 nr_copy_pages = nr_pages;
1236 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); 1235 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
1237 1236
1238 printk("swsusp: critical section: done (%d pages copied)\n", nr_pages); 1237 printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n",
1238 nr_pages);
1239 1239
1240 return 0; 1240 return 0;
1241} 1241}
@@ -1264,12 +1264,17 @@ static char *check_image_kernel(struct swsusp_info *info)
1264} 1264}
1265#endif /* CONFIG_ARCH_HIBERNATION_HEADER */ 1265#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
1266 1266
1267unsigned long snapshot_get_image_size(void)
1268{
1269 return nr_copy_pages + nr_meta_pages + 1;
1270}
1271
1267static int init_header(struct swsusp_info *info) 1272static int init_header(struct swsusp_info *info)
1268{ 1273{
1269 memset(info, 0, sizeof(struct swsusp_info)); 1274 memset(info, 0, sizeof(struct swsusp_info));
1270 info->num_physpages = num_physpages; 1275 info->num_physpages = num_physpages;
1271 info->image_pages = nr_copy_pages; 1276 info->image_pages = nr_copy_pages;
1272 info->pages = nr_copy_pages + nr_meta_pages + 1; 1277 info->pages = snapshot_get_image_size();
1273 info->size = info->pages; 1278 info->size = info->pages;
1274 info->size <<= PAGE_SHIFT; 1279 info->size <<= PAGE_SHIFT;
1275 return init_header_complete(info); 1280 return init_header_complete(info);
@@ -1429,7 +1434,7 @@ static int check_header(struct swsusp_info *info)
1429 if (!reason && info->num_physpages != num_physpages) 1434 if (!reason && info->num_physpages != num_physpages)
1430 reason = "memory size"; 1435 reason = "memory size";
1431 if (reason) { 1436 if (reason) {
1432 printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason); 1437 printk(KERN_ERR "PM: Image mismatch: %s\n", reason);
1433 return -EPERM; 1438 return -EPERM;
1434 } 1439 }
1435 return 0; 1440 return 0;
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 917aba100575..a0abf9a463f9 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -28,8 +28,6 @@
28 28
29#include "power.h" 29#include "power.h"
30 30
31extern char resume_file[];
32
33#define SWSUSP_SIG "S1SUSPEND" 31#define SWSUSP_SIG "S1SUSPEND"
34 32
35struct swsusp_header { 33struct swsusp_header {
@@ -73,7 +71,8 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
73 bio->bi_end_io = end_swap_bio_read; 71 bio->bi_end_io = end_swap_bio_read;
74 72
75 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { 73 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
76 printk("swsusp: ERROR: adding page to bio at %ld\n", page_off); 74 printk(KERN_ERR "PM: Adding page to bio failed at %ld\n",
75 page_off);
77 bio_put(bio); 76 bio_put(bio);
78 return -EFAULT; 77 return -EFAULT;
79 } 78 }
@@ -153,7 +152,7 @@ static int mark_swapfiles(sector_t start, unsigned int flags)
153 error = bio_write_page(swsusp_resume_block, 152 error = bio_write_page(swsusp_resume_block,
154 swsusp_header, NULL); 153 swsusp_header, NULL);
155 } else { 154 } else {
156 printk(KERN_ERR "swsusp: Swap header not found!\n"); 155 printk(KERN_ERR "PM: Swap header not found!\n");
157 error = -ENODEV; 156 error = -ENODEV;
158 } 157 }
159 return error; 158 return error;
@@ -325,7 +324,8 @@ static int save_image(struct swap_map_handle *handle,
325 struct timeval start; 324 struct timeval start;
326 struct timeval stop; 325 struct timeval stop;
327 326
328 printk("Saving image data pages (%u pages) ... ", nr_to_write); 327 printk(KERN_INFO "PM: Saving image data pages (%u pages) ... ",
328 nr_to_write);
329 m = nr_to_write / 100; 329 m = nr_to_write / 100;
330 if (!m) 330 if (!m)
331 m = 1; 331 m = 1;
@@ -365,7 +365,7 @@ static int enough_swap(unsigned int nr_pages)
365{ 365{
366 unsigned int free_swap = count_swap_pages(root_swap, 1); 366 unsigned int free_swap = count_swap_pages(root_swap, 1);
367 367
368 pr_debug("swsusp: free swap pages: %u\n", free_swap); 368 pr_debug("PM: Free swap pages: %u\n", free_swap);
369 return free_swap > nr_pages + PAGES_FOR_IO; 369 return free_swap > nr_pages + PAGES_FOR_IO;
370} 370}
371 371
@@ -388,7 +388,7 @@ int swsusp_write(unsigned int flags)
388 388
389 error = swsusp_swap_check(); 389 error = swsusp_swap_check();
390 if (error) { 390 if (error) {
391 printk(KERN_ERR "swsusp: Cannot find swap device, try " 391 printk(KERN_ERR "PM: Cannot find swap device, try "
392 "swapon -a.\n"); 392 "swapon -a.\n");
393 return error; 393 return error;
394 } 394 }
@@ -402,7 +402,7 @@ int swsusp_write(unsigned int flags)
402 } 402 }
403 header = (struct swsusp_info *)data_of(snapshot); 403 header = (struct swsusp_info *)data_of(snapshot);
404 if (!enough_swap(header->pages)) { 404 if (!enough_swap(header->pages)) {
405 printk(KERN_ERR "swsusp: Not enough free swap\n"); 405 printk(KERN_ERR "PM: Not enough free swap\n");
406 error = -ENOSPC; 406 error = -ENOSPC;
407 goto out; 407 goto out;
408 } 408 }
@@ -417,7 +417,7 @@ int swsusp_write(unsigned int flags)
417 417
418 if (!error) { 418 if (!error) {
419 flush_swap_writer(&handle); 419 flush_swap_writer(&handle);
420 printk("S"); 420 printk(KERN_INFO "PM: S");
421 error = mark_swapfiles(start, flags); 421 error = mark_swapfiles(start, flags);
422 printk("|\n"); 422 printk("|\n");
423 } 423 }
@@ -507,7 +507,8 @@ static int load_image(struct swap_map_handle *handle,
507 int err2; 507 int err2;
508 unsigned nr_pages; 508 unsigned nr_pages;
509 509
510 printk("Loading image data pages (%u pages) ... ", nr_to_read); 510 printk(KERN_INFO "PM: Loading image data pages (%u pages) ... ",
511 nr_to_read);
511 m = nr_to_read / 100; 512 m = nr_to_read / 100;
512 if (!m) 513 if (!m)
513 m = 1; 514 m = 1;
@@ -558,7 +559,7 @@ int swsusp_read(unsigned int *flags_p)
558 559
559 *flags_p = swsusp_header->flags; 560 *flags_p = swsusp_header->flags;
560 if (IS_ERR(resume_bdev)) { 561 if (IS_ERR(resume_bdev)) {
561 pr_debug("swsusp: block device not initialised\n"); 562 pr_debug("PM: Image device not initialised\n");
562 return PTR_ERR(resume_bdev); 563 return PTR_ERR(resume_bdev);
563 } 564 }
564 565
@@ -577,9 +578,9 @@ int swsusp_read(unsigned int *flags_p)
577 blkdev_put(resume_bdev); 578 blkdev_put(resume_bdev);
578 579
579 if (!error) 580 if (!error)
580 pr_debug("swsusp: Reading resume file was successful\n"); 581 pr_debug("PM: Image successfully loaded\n");
581 else 582 else
582 pr_debug("swsusp: Error %d resuming\n", error); 583 pr_debug("PM: Error %d resuming\n", error);
583 return error; 584 return error;
584} 585}
585 586
@@ -611,13 +612,13 @@ int swsusp_check(void)
611 if (error) 612 if (error)
612 blkdev_put(resume_bdev); 613 blkdev_put(resume_bdev);
613 else 614 else
614 pr_debug("swsusp: Signature found, resuming\n"); 615 pr_debug("PM: Signature found, resuming\n");
615 } else { 616 } else {
616 error = PTR_ERR(resume_bdev); 617 error = PTR_ERR(resume_bdev);
617 } 618 }
618 619
619 if (error) 620 if (error)
620 pr_debug("swsusp: Error %d check for resume file\n", error); 621 pr_debug("PM: Error %d checking image file\n", error);
621 622
622 return error; 623 return error;
623} 624}
@@ -629,7 +630,7 @@ int swsusp_check(void)
629void swsusp_close(void) 630void swsusp_close(void)
630{ 631{
631 if (IS_ERR(resume_bdev)) { 632 if (IS_ERR(resume_bdev)) {
632 pr_debug("swsusp: block device not initialised\n"); 633 pr_debug("PM: Image device not initialised\n");
633 return; 634 return;
634 } 635 }
635 636
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index e1722d3155f1..023ff2a31d89 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -64,14 +64,6 @@ unsigned long image_size = 500 * 1024 * 1024;
64 64
65int in_suspend __nosavedata = 0; 65int in_suspend __nosavedata = 0;
66 66
67#ifdef CONFIG_HIGHMEM
68unsigned int count_highmem_pages(void);
69int restore_highmem(void);
70#else
71static inline int restore_highmem(void) { return 0; }
72static inline unsigned int count_highmem_pages(void) { return 0; }
73#endif
74
75/** 67/**
76 * The following functions are used for tracing the allocated 68 * The following functions are used for tracing the allocated
77 * swap pages, so that they can be freed in case of an error. 69 * swap pages, so that they can be freed in case of an error.
@@ -196,7 +188,8 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop,
196 centisecs = 1; /* avoid div-by-zero */ 188 centisecs = 1; /* avoid div-by-zero */
197 k = nr_pages * (PAGE_SIZE / 1024); 189 k = nr_pages * (PAGE_SIZE / 1024);
198 kps = (k * 100) / centisecs; 190 kps = (k * 100) / centisecs;
199 printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k, 191 printk(KERN_INFO "PM: %s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n",
192 msg, k,
200 centisecs / 100, centisecs % 100, 193 centisecs / 100, centisecs % 100,
201 kps / 1000, (kps % 1000) / 10); 194 kps / 1000, (kps % 1000) / 10);
202} 195}
@@ -227,7 +220,7 @@ int swsusp_shrink_memory(void)
227 char *p = "-\\|/"; 220 char *p = "-\\|/";
228 struct timeval start, stop; 221 struct timeval start, stop;
229 222
230 printk("Shrinking memory... "); 223 printk(KERN_INFO "PM: Shrinking memory... ");
231 do_gettimeofday(&start); 224 do_gettimeofday(&start);
232 do { 225 do {
233 long size, highmem_size; 226 long size, highmem_size;
@@ -269,38 +262,3 @@ int swsusp_shrink_memory(void)
269 262
270 return 0; 263 return 0;
271} 264}
272
273int swsusp_resume(void)
274{
275 int error;
276
277 local_irq_disable();
278 /* NOTE: device_power_down() is just a suspend() with irqs off;
279 * it has no special "power things down" semantics
280 */
281 if (device_power_down(PMSG_PRETHAW))
282 printk(KERN_ERR "Some devices failed to power down, very bad\n");
283 /* We'll ignore saved state, but this gets preempt count (etc) right */
284 save_processor_state();
285 error = restore_highmem();
286 if (!error) {
287 error = swsusp_arch_resume();
288 /* The code below is only ever reached in case of a failure.
289 * Otherwise execution continues at place where
290 * swsusp_arch_suspend() was called
291 */
292 BUG_ON(!error);
293 /* This call to restore_highmem() undos the previous one */
294 restore_highmem();
295 }
296 /* The only reason why swsusp_arch_resume() can fail is memory being
297 * very tight, so we have to free it as soon as we can to avoid
298 * subsequent failures
299 */
300 swsusp_free();
301 restore_processor_state();
302 touch_softlockup_watchdog();
303 device_power_up();
304 local_irq_enable();
305 return error;
306}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 5bd321bcbb75..f5512cb3aa86 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -28,6 +28,29 @@
28 28
29#include "power.h" 29#include "power.h"
30 30
31/*
32 * NOTE: The SNAPSHOT_SET_SWAP_FILE and SNAPSHOT_PMOPS ioctls are obsolete and
33 * will be removed in the future. They are only preserved here for
34 * compatibility with existing userland utilities.
35 */
36#define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int)
37#define SNAPSHOT_PMOPS _IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int)
38
39#define PMOPS_PREPARE 1
40#define PMOPS_ENTER 2
41#define PMOPS_FINISH 3
42
43/*
44 * NOTE: The following ioctl definitions are wrong and have been replaced with
45 * correct ones. They are only preserved here for compatibility with existing
46 * userland utilities and will be removed in the future.
47 */
48#define SNAPSHOT_ATOMIC_SNAPSHOT _IOW(SNAPSHOT_IOC_MAGIC, 3, void *)
49#define SNAPSHOT_SET_IMAGE_SIZE _IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long)
50#define SNAPSHOT_AVAIL_SWAP _IOR(SNAPSHOT_IOC_MAGIC, 7, void *)
51#define SNAPSHOT_GET_SWAP_PAGE _IOR(SNAPSHOT_IOC_MAGIC, 8, void *)
52
53
31#define SNAPSHOT_MINOR 231 54#define SNAPSHOT_MINOR 231
32 55
33static struct snapshot_data { 56static struct snapshot_data {
@@ -36,7 +59,7 @@ static struct snapshot_data {
36 int mode; 59 int mode;
37 char frozen; 60 char frozen;
38 char ready; 61 char ready;
39 char platform_suspend; 62 char platform_support;
40} snapshot_state; 63} snapshot_state;
41 64
42atomic_t snapshot_device_available = ATOMIC_INIT(1); 65atomic_t snapshot_device_available = ATOMIC_INIT(1);
@@ -44,6 +67,7 @@ atomic_t snapshot_device_available = ATOMIC_INIT(1);
44static int snapshot_open(struct inode *inode, struct file *filp) 67static int snapshot_open(struct inode *inode, struct file *filp)
45{ 68{
46 struct snapshot_data *data; 69 struct snapshot_data *data;
70 int error;
47 71
48 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) 72 if (!atomic_add_unless(&snapshot_device_available, -1, 0))
49 return -EBUSY; 73 return -EBUSY;
@@ -64,13 +88,23 @@ static int snapshot_open(struct inode *inode, struct file *filp)
64 data->swap = swsusp_resume_device ? 88 data->swap = swsusp_resume_device ?
65 swap_type_of(swsusp_resume_device, 0, NULL) : -1; 89 swap_type_of(swsusp_resume_device, 0, NULL) : -1;
66 data->mode = O_RDONLY; 90 data->mode = O_RDONLY;
91 error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
92 if (error)
93 pm_notifier_call_chain(PM_POST_RESTORE);
67 } else { 94 } else {
68 data->swap = -1; 95 data->swap = -1;
69 data->mode = O_WRONLY; 96 data->mode = O_WRONLY;
97 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
98 if (error)
99 pm_notifier_call_chain(PM_POST_HIBERNATION);
100 }
101 if (error) {
102 atomic_inc(&snapshot_device_available);
103 return error;
70 } 104 }
71 data->frozen = 0; 105 data->frozen = 0;
72 data->ready = 0; 106 data->ready = 0;
73 data->platform_suspend = 0; 107 data->platform_support = 0;
74 108
75 return 0; 109 return 0;
76} 110}
@@ -88,6 +122,8 @@ static int snapshot_release(struct inode *inode, struct file *filp)
88 thaw_processes(); 122 thaw_processes();
89 mutex_unlock(&pm_mutex); 123 mutex_unlock(&pm_mutex);
90 } 124 }
125 pm_notifier_call_chain(data->mode == O_WRONLY ?
126 PM_POST_HIBERNATION : PM_POST_RESTORE);
91 atomic_inc(&snapshot_device_available); 127 atomic_inc(&snapshot_device_available);
92 return 0; 128 return 0;
93} 129}
@@ -133,7 +169,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
133{ 169{
134 int error = 0; 170 int error = 0;
135 struct snapshot_data *data; 171 struct snapshot_data *data;
136 loff_t avail; 172 loff_t size;
137 sector_t offset; 173 sector_t offset;
138 174
139 if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC) 175 if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC)
@@ -151,18 +187,13 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
151 if (data->frozen) 187 if (data->frozen)
152 break; 188 break;
153 mutex_lock(&pm_mutex); 189 mutex_lock(&pm_mutex);
154 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); 190 printk("Syncing filesystems ... ");
155 if (!error) { 191 sys_sync();
156 printk("Syncing filesystems ... "); 192 printk("done.\n");
157 sys_sync(); 193
158 printk("done.\n"); 194 error = freeze_processes();
159
160 error = freeze_processes();
161 if (error)
162 thaw_processes();
163 }
164 if (error) 195 if (error)
165 pm_notifier_call_chain(PM_POST_HIBERNATION); 196 thaw_processes();
166 mutex_unlock(&pm_mutex); 197 mutex_unlock(&pm_mutex);
167 if (!error) 198 if (!error)
168 data->frozen = 1; 199 data->frozen = 1;
@@ -173,19 +204,19 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
173 break; 204 break;
174 mutex_lock(&pm_mutex); 205 mutex_lock(&pm_mutex);
175 thaw_processes(); 206 thaw_processes();
176 pm_notifier_call_chain(PM_POST_HIBERNATION);
177 mutex_unlock(&pm_mutex); 207 mutex_unlock(&pm_mutex);
178 data->frozen = 0; 208 data->frozen = 0;
179 break; 209 break;
180 210
211 case SNAPSHOT_CREATE_IMAGE:
181 case SNAPSHOT_ATOMIC_SNAPSHOT: 212 case SNAPSHOT_ATOMIC_SNAPSHOT:
182 if (data->mode != O_RDONLY || !data->frozen || data->ready) { 213 if (data->mode != O_RDONLY || !data->frozen || data->ready) {
183 error = -EPERM; 214 error = -EPERM;
184 break; 215 break;
185 } 216 }
186 error = hibernation_snapshot(data->platform_suspend); 217 error = hibernation_snapshot(data->platform_support);
187 if (!error) 218 if (!error)
188 error = put_user(in_suspend, (unsigned int __user *)arg); 219 error = put_user(in_suspend, (int __user *)arg);
189 if (!error) 220 if (!error)
190 data->ready = 1; 221 data->ready = 1;
191 break; 222 break;
@@ -197,7 +228,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
197 error = -EPERM; 228 error = -EPERM;
198 break; 229 break;
199 } 230 }
200 error = hibernation_restore(data->platform_suspend); 231 error = hibernation_restore(data->platform_support);
201 break; 232 break;
202 233
203 case SNAPSHOT_FREE: 234 case SNAPSHOT_FREE:
@@ -206,16 +237,29 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
206 data->ready = 0; 237 data->ready = 0;
207 break; 238 break;
208 239
240 case SNAPSHOT_PREF_IMAGE_SIZE:
209 case SNAPSHOT_SET_IMAGE_SIZE: 241 case SNAPSHOT_SET_IMAGE_SIZE:
210 image_size = arg; 242 image_size = arg;
211 break; 243 break;
212 244
245 case SNAPSHOT_GET_IMAGE_SIZE:
246 if (!data->ready) {
247 error = -ENODATA;
248 break;
249 }
250 size = snapshot_get_image_size();
251 size <<= PAGE_SHIFT;
252 error = put_user(size, (loff_t __user *)arg);
253 break;
254
255 case SNAPSHOT_AVAIL_SWAP_SIZE:
213 case SNAPSHOT_AVAIL_SWAP: 256 case SNAPSHOT_AVAIL_SWAP:
214 avail = count_swap_pages(data->swap, 1); 257 size = count_swap_pages(data->swap, 1);
215 avail <<= PAGE_SHIFT; 258 size <<= PAGE_SHIFT;
216 error = put_user(avail, (loff_t __user *)arg); 259 error = put_user(size, (loff_t __user *)arg);
217 break; 260 break;
218 261
262 case SNAPSHOT_ALLOC_SWAP_PAGE:
219 case SNAPSHOT_GET_SWAP_PAGE: 263 case SNAPSHOT_GET_SWAP_PAGE:
220 if (data->swap < 0 || data->swap >= MAX_SWAPFILES) { 264 if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
221 error = -ENODEV; 265 error = -ENODEV;
@@ -224,7 +268,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
224 offset = alloc_swapdev_block(data->swap); 268 offset = alloc_swapdev_block(data->swap);
225 if (offset) { 269 if (offset) {
226 offset <<= PAGE_SHIFT; 270 offset <<= PAGE_SHIFT;
227 error = put_user(offset, (sector_t __user *)arg); 271 error = put_user(offset, (loff_t __user *)arg);
228 } else { 272 } else {
229 error = -ENOSPC; 273 error = -ENOSPC;
230 } 274 }
@@ -238,7 +282,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
238 free_all_swap_pages(data->swap); 282 free_all_swap_pages(data->swap);
239 break; 283 break;
240 284
241 case SNAPSHOT_SET_SWAP_FILE: 285 case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */
242 if (!swsusp_swap_in_use()) { 286 if (!swsusp_swap_in_use()) {
243 /* 287 /*
244 * User space encodes device types as two-byte values, 288 * User space encodes device types as two-byte values,
@@ -275,26 +319,33 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
275 mutex_unlock(&pm_mutex); 319 mutex_unlock(&pm_mutex);
276 break; 320 break;
277 321
278 case SNAPSHOT_PMOPS: 322 case SNAPSHOT_PLATFORM_SUPPORT:
323 data->platform_support = !!arg;
324 break;
325
326 case SNAPSHOT_POWER_OFF:
327 if (data->platform_support)
328 error = hibernation_platform_enter();
329 break;
330
331 case SNAPSHOT_PMOPS: /* This ioctl is deprecated */
279 error = -EINVAL; 332 error = -EINVAL;
280 333
281 switch (arg) { 334 switch (arg) {
282 335
283 case PMOPS_PREPARE: 336 case PMOPS_PREPARE:
284 data->platform_suspend = 1; 337 data->platform_support = 1;
285 error = 0; 338 error = 0;
286 break; 339 break;
287 340
288 case PMOPS_ENTER: 341 case PMOPS_ENTER:
289 if (data->platform_suspend) 342 if (data->platform_support)
290 error = hibernation_platform_enter(); 343 error = hibernation_platform_enter();
291
292 break; 344 break;
293 345
294 case PMOPS_FINISH: 346 case PMOPS_FINISH:
295 if (data->platform_suspend) 347 if (data->platform_support)
296 error = 0; 348 error = 0;
297
298 break; 349 break;
299 350
300 default: 351 default:
diff --git a/kernel/printk.c b/kernel/printk.c
index 58bbec684119..29ae1e99cde0 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -455,10 +455,10 @@ static int __init ignore_loglevel_setup(char *str)
455 ignore_loglevel = 1; 455 ignore_loglevel = 1;
456 printk(KERN_INFO "debug: ignoring loglevel setting.\n"); 456 printk(KERN_INFO "debug: ignoring loglevel setting.\n");
457 457
458 return 1; 458 return 0;
459} 459}
460 460
461__setup("ignore_loglevel", ignore_loglevel_setup); 461early_param("ignore_loglevel", ignore_loglevel_setup);
462 462
463/* 463/*
464 * Write out chars from start to end - 1 inclusive 464 * Write out chars from start to end - 1 inclusive
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e6e9b8be4b05..b0d4ab4dfd3d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -51,7 +51,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
51void ptrace_untrace(struct task_struct *child) 51void ptrace_untrace(struct task_struct *child)
52{ 52{
53 spin_lock(&child->sighand->siglock); 53 spin_lock(&child->sighand->siglock);
54 if (child->state == TASK_TRACED) { 54 if (task_is_traced(child)) {
55 if (child->signal->flags & SIGNAL_STOP_STOPPED) { 55 if (child->signal->flags & SIGNAL_STOP_STOPPED) {
56 child->state = TASK_STOPPED; 56 child->state = TASK_STOPPED;
57 } else { 57 } else {
@@ -79,7 +79,7 @@ void __ptrace_unlink(struct task_struct *child)
79 add_parent(child); 79 add_parent(child);
80 } 80 }
81 81
82 if (child->state == TASK_TRACED) 82 if (task_is_traced(child))
83 ptrace_untrace(child); 83 ptrace_untrace(child);
84} 84}
85 85
@@ -103,9 +103,9 @@ int ptrace_check_attach(struct task_struct *child, int kill)
103 && child->signal != NULL) { 103 && child->signal != NULL) {
104 ret = 0; 104 ret = 0;
105 spin_lock_irq(&child->sighand->siglock); 105 spin_lock_irq(&child->sighand->siglock);
106 if (child->state == TASK_STOPPED) { 106 if (task_is_stopped(child)) {
107 child->state = TASK_TRACED; 107 child->state = TASK_TRACED;
108 } else if (child->state != TASK_TRACED && !kill) { 108 } else if (!task_is_traced(child) && !kill) {
109 ret = -ESRCH; 109 ret = -ESRCH;
110 } 110 }
111 spin_unlock_irq(&child->sighand->siglock); 111 spin_unlock_irq(&child->sighand->siglock);
diff --git a/kernel/sched.c b/kernel/sched.c
index ba4c88088f62..9474b23c28bf 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1255,12 +1255,12 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
1255 1255
1256#define sched_class_highest (&rt_sched_class) 1256#define sched_class_highest (&rt_sched_class)
1257 1257
1258static void inc_nr_running(struct task_struct *p, struct rq *rq) 1258static void inc_nr_running(struct rq *rq)
1259{ 1259{
1260 rq->nr_running++; 1260 rq->nr_running++;
1261} 1261}
1262 1262
1263static void dec_nr_running(struct task_struct *p, struct rq *rq) 1263static void dec_nr_running(struct rq *rq)
1264{ 1264{
1265 rq->nr_running--; 1265 rq->nr_running--;
1266} 1266}
@@ -1350,11 +1350,11 @@ static int effective_prio(struct task_struct *p)
1350 */ 1350 */
1351static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) 1351static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
1352{ 1352{
1353 if (p->state == TASK_UNINTERRUPTIBLE) 1353 if (task_contributes_to_load(p))
1354 rq->nr_uninterruptible--; 1354 rq->nr_uninterruptible--;
1355 1355
1356 enqueue_task(rq, p, wakeup); 1356 enqueue_task(rq, p, wakeup);
1357 inc_nr_running(p, rq); 1357 inc_nr_running(rq);
1358} 1358}
1359 1359
1360/* 1360/*
@@ -1362,11 +1362,11 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
1362 */ 1362 */
1363static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) 1363static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
1364{ 1364{
1365 if (p->state == TASK_UNINTERRUPTIBLE) 1365 if (task_contributes_to_load(p))
1366 rq->nr_uninterruptible++; 1366 rq->nr_uninterruptible++;
1367 1367
1368 dequeue_task(rq, p, sleep); 1368 dequeue_task(rq, p, sleep);
1369 dec_nr_running(p, rq); 1369 dec_nr_running(rq);
1370} 1370}
1371 1371
1372/** 1372/**
@@ -1895,8 +1895,7 @@ out:
1895 1895
1896int fastcall wake_up_process(struct task_struct *p) 1896int fastcall wake_up_process(struct task_struct *p)
1897{ 1897{
1898 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | 1898 return try_to_wake_up(p, TASK_ALL, 0);
1899 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
1900} 1899}
1901EXPORT_SYMBOL(wake_up_process); 1900EXPORT_SYMBOL(wake_up_process);
1902 1901
@@ -2006,7 +2005,7 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2006 * management (if any): 2005 * management (if any):
2007 */ 2006 */
2008 p->sched_class->task_new(rq, p); 2007 p->sched_class->task_new(rq, p);
2009 inc_nr_running(p, rq); 2008 inc_nr_running(rq);
2010 } 2009 }
2011 check_preempt_curr(rq, p); 2010 check_preempt_curr(rq, p);
2012#ifdef CONFIG_SMP 2011#ifdef CONFIG_SMP
@@ -4124,8 +4123,7 @@ void complete(struct completion *x)
4124 4123
4125 spin_lock_irqsave(&x->wait.lock, flags); 4124 spin_lock_irqsave(&x->wait.lock, flags);
4126 x->done++; 4125 x->done++;
4127 __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 4126 __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
4128 1, 0, NULL);
4129 spin_unlock_irqrestore(&x->wait.lock, flags); 4127 spin_unlock_irqrestore(&x->wait.lock, flags);
4130} 4128}
4131EXPORT_SYMBOL(complete); 4129EXPORT_SYMBOL(complete);
@@ -4136,8 +4134,7 @@ void complete_all(struct completion *x)
4136 4134
4137 spin_lock_irqsave(&x->wait.lock, flags); 4135 spin_lock_irqsave(&x->wait.lock, flags);
4138 x->done += UINT_MAX/2; 4136 x->done += UINT_MAX/2;
4139 __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 4137 __wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
4140 0, 0, NULL);
4141 spin_unlock_irqrestore(&x->wait.lock, flags); 4138 spin_unlock_irqrestore(&x->wait.lock, flags);
4142} 4139}
4143EXPORT_SYMBOL(complete_all); 4140EXPORT_SYMBOL(complete_all);
@@ -4151,8 +4148,10 @@ do_wait_for_common(struct completion *x, long timeout, int state)
4151 wait.flags |= WQ_FLAG_EXCLUSIVE; 4148 wait.flags |= WQ_FLAG_EXCLUSIVE;
4152 __add_wait_queue_tail(&x->wait, &wait); 4149 __add_wait_queue_tail(&x->wait, &wait);
4153 do { 4150 do {
4154 if (state == TASK_INTERRUPTIBLE && 4151 if ((state == TASK_INTERRUPTIBLE &&
4155 signal_pending(current)) { 4152 signal_pending(current)) ||
4153 (state == TASK_KILLABLE &&
4154 fatal_signal_pending(current))) {
4156 __remove_wait_queue(&x->wait, &wait); 4155 __remove_wait_queue(&x->wait, &wait);
4157 return -ERESTARTSYS; 4156 return -ERESTARTSYS;
4158 } 4157 }
@@ -4212,6 +4211,15 @@ wait_for_completion_interruptible_timeout(struct completion *x,
4212} 4211}
4213EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); 4212EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
4214 4213
4214int __sched wait_for_completion_killable(struct completion *x)
4215{
4216 long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
4217 if (t == -ERESTARTSYS)
4218 return t;
4219 return 0;
4220}
4221EXPORT_SYMBOL(wait_for_completion_killable);
4222
4215static long __sched 4223static long __sched
4216sleep_on_common(wait_queue_head_t *q, int state, long timeout) 4224sleep_on_common(wait_queue_head_t *q, int state, long timeout)
4217{ 4225{
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 72e25c7a3a18..6c091d6e159d 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -520,7 +520,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
520 520
521 if (!initial) { 521 if (!initial) {
522 /* sleeps upto a single latency don't count. */ 522 /* sleeps upto a single latency don't count. */
523 if (sched_feat(NEW_FAIR_SLEEPERS) && entity_is_task(se)) 523 if (sched_feat(NEW_FAIR_SLEEPERS))
524 vruntime -= sysctl_sched_latency; 524 vruntime -= sysctl_sched_latency;
525 525
526 /* ensure we never gain time by being placed backwards. */ 526 /* ensure we never gain time by being placed backwards. */
@@ -1106,7 +1106,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
1106 } 1106 }
1107 1107
1108 gran = sysctl_sched_wakeup_granularity; 1108 gran = sysctl_sched_wakeup_granularity;
1109 if (unlikely(se->load.weight != NICE_0_LOAD)) 1109 /*
1110 * More easily preempt - nice tasks, while not making
1111 * it harder for + nice tasks.
1112 */
1113 if (unlikely(se->load.weight > NICE_0_LOAD))
1110 gran = calc_delta_fair(gran, &se->load); 1114 gran = calc_delta_fair(gran, &se->load);
1111 1115
1112 if (pse->vruntime + gran < se->vruntime) 1116 if (pse->vruntime + gran < se->vruntime)
diff --git a/kernel/signal.c b/kernel/signal.c
index bf49ce6f016b..4333b6dbb424 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -456,15 +456,15 @@ void signal_wake_up(struct task_struct *t, int resume)
456 set_tsk_thread_flag(t, TIF_SIGPENDING); 456 set_tsk_thread_flag(t, TIF_SIGPENDING);
457 457
458 /* 458 /*
459 * For SIGKILL, we want to wake it up in the stopped/traced case. 459 * For SIGKILL, we want to wake it up in the stopped/traced/killable
460 * We don't check t->state here because there is a race with it 460 * case. We don't check t->state here because there is a race with it
461 * executing another processor and just now entering stopped state. 461 * executing another processor and just now entering stopped state.
462 * By using wake_up_state, we ensure the process will wake up and 462 * By using wake_up_state, we ensure the process will wake up and
463 * handle its death signal. 463 * handle its death signal.
464 */ 464 */
465 mask = TASK_INTERRUPTIBLE; 465 mask = TASK_INTERRUPTIBLE;
466 if (resume) 466 if (resume)
467 mask |= TASK_STOPPED | TASK_TRACED; 467 mask |= TASK_WAKEKILL;
468 if (!wake_up_state(t, mask)) 468 if (!wake_up_state(t, mask))
469 kick_process(t); 469 kick_process(t);
470} 470}
@@ -620,7 +620,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
620 * Wake up the stopped thread _after_ setting 620 * Wake up the stopped thread _after_ setting
621 * TIF_SIGPENDING 621 * TIF_SIGPENDING
622 */ 622 */
623 state = TASK_STOPPED; 623 state = __TASK_STOPPED;
624 if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) { 624 if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
625 set_tsk_thread_flag(t, TIF_SIGPENDING); 625 set_tsk_thread_flag(t, TIF_SIGPENDING);
626 state |= TASK_INTERRUPTIBLE; 626 state |= TASK_INTERRUPTIBLE;
@@ -838,7 +838,7 @@ static inline int wants_signal(int sig, struct task_struct *p)
838 return 0; 838 return 0;
839 if (sig == SIGKILL) 839 if (sig == SIGKILL)
840 return 1; 840 return 1;
841 if (p->state & (TASK_STOPPED | TASK_TRACED)) 841 if (task_is_stopped_or_traced(p))
842 return 0; 842 return 0;
843 return task_curr(p) || !signal_pending(p); 843 return task_curr(p) || !signal_pending(p);
844} 844}
@@ -994,6 +994,12 @@ void zap_other_threads(struct task_struct *p)
994 } 994 }
995} 995}
996 996
997int fastcall __fatal_signal_pending(struct task_struct *tsk)
998{
999 return sigismember(&tsk->pending.signal, SIGKILL);
1000}
1001EXPORT_SYMBOL(__fatal_signal_pending);
1002
997/* 1003/*
998 * Must be called under rcu_read_lock() or with tasklist_lock read-held. 1004 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
999 */ 1005 */
@@ -1441,7 +1447,7 @@ void do_notify_parent(struct task_struct *tsk, int sig)
1441 BUG_ON(sig == -1); 1447 BUG_ON(sig == -1);
1442 1448
1443 /* do_notify_parent_cldstop should have been called instead. */ 1449 /* do_notify_parent_cldstop should have been called instead. */
1444 BUG_ON(tsk->state & (TASK_STOPPED|TASK_TRACED)); 1450 BUG_ON(task_is_stopped_or_traced(tsk));
1445 1451
1446 BUG_ON(!tsk->ptrace && 1452 BUG_ON(!tsk->ptrace &&
1447 (tsk->group_leader != tsk || !thread_group_empty(tsk))); 1453 (tsk->group_leader != tsk || !thread_group_empty(tsk)));
@@ -1729,7 +1735,7 @@ static int do_signal_stop(int signr)
1729 * so this check has no races. 1735 * so this check has no races.
1730 */ 1736 */
1731 if (!t->exit_state && 1737 if (!t->exit_state &&
1732 !(t->state & (TASK_STOPPED|TASK_TRACED))) { 1738 !task_is_stopped_or_traced(t)) {
1733 stop_count++; 1739 stop_count++;
1734 signal_wake_up(t, 0); 1740 signal_wake_up(t, 0);
1735 } 1741 }
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index c1d76552446e..7c2da88db4ed 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -101,6 +101,10 @@ void softlockup_tick(void)
101 101
102 now = get_timestamp(this_cpu); 102 now = get_timestamp(this_cpu);
103 103
104 /* Wake up the high-prio watchdog task every second: */
105 if (now > (touch_timestamp + 1))
106 wake_up_process(per_cpu(watchdog_task, this_cpu));
107
104 /* Warn about unreasonable delays: */ 108 /* Warn about unreasonable delays: */
105 if (now <= (touch_timestamp + softlockup_thresh)) 109 if (now <= (touch_timestamp + softlockup_thresh))
106 return; 110 return;
@@ -191,11 +195,11 @@ static void check_hung_uninterruptible_tasks(int this_cpu)
191 read_lock(&tasklist_lock); 195 read_lock(&tasklist_lock);
192 do_each_thread(g, t) { 196 do_each_thread(g, t) {
193 if (!--max_count) 197 if (!--max_count)
194 break; 198 goto unlock;
195 if (t->state & TASK_UNINTERRUPTIBLE) 199 if (t->state & TASK_UNINTERRUPTIBLE)
196 check_hung_task(t, now); 200 check_hung_task(t, now);
197 } while_each_thread(g, t); 201 } while_each_thread(g, t);
198 202 unlock:
199 read_unlock(&tasklist_lock); 203 read_unlock(&tasklist_lock);
200} 204}
201 205
@@ -218,14 +222,19 @@ static int watchdog(void *__bind_cpu)
218 * debug-printout triggers in softlockup_tick(). 222 * debug-printout triggers in softlockup_tick().
219 */ 223 */
220 while (!kthread_should_stop()) { 224 while (!kthread_should_stop()) {
225 set_current_state(TASK_INTERRUPTIBLE);
221 touch_softlockup_watchdog(); 226 touch_softlockup_watchdog();
222 msleep_interruptible(10000); 227 schedule();
228
229 if (kthread_should_stop())
230 break;
223 231
224 if (this_cpu != check_cpu) 232 if (this_cpu != check_cpu)
225 continue; 233 continue;
226 234
227 if (sysctl_hung_task_timeout_secs) 235 if (sysctl_hung_task_timeout_secs)
228 check_hung_uninterruptible_tasks(this_cpu); 236 check_hung_uninterruptible_tasks(this_cpu);
237
229 } 238 }
230 239
231 return 0; 240 return 0;
@@ -259,13 +268,6 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
259 wake_up_process(per_cpu(watchdog_task, hotcpu)); 268 wake_up_process(per_cpu(watchdog_task, hotcpu));
260 break; 269 break;
261#ifdef CONFIG_HOTPLUG_CPU 270#ifdef CONFIG_HOTPLUG_CPU
262 case CPU_UP_CANCELED:
263 case CPU_UP_CANCELED_FROZEN:
264 if (!per_cpu(watchdog_task, hotcpu))
265 break;
266 /* Unbind so it can run. Fall thru. */
267 kthread_bind(per_cpu(watchdog_task, hotcpu),
268 any_online_cpu(cpu_online_map));
269 case CPU_DOWN_PREPARE: 271 case CPU_DOWN_PREPARE:
270 case CPU_DOWN_PREPARE_FROZEN: 272 case CPU_DOWN_PREPARE_FROZEN:
271 if (hotcpu == check_cpu) { 273 if (hotcpu == check_cpu) {
@@ -275,6 +277,14 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
275 check_cpu = any_online_cpu(temp_cpu_online_map); 277 check_cpu = any_online_cpu(temp_cpu_online_map);
276 } 278 }
277 break; 279 break;
280
281 case CPU_UP_CANCELED:
282 case CPU_UP_CANCELED_FROZEN:
283 if (!per_cpu(watchdog_task, hotcpu))
284 break;
285 /* Unbind so it can run. Fall thru. */
286 kthread_bind(per_cpu(watchdog_task, hotcpu),
287 any_online_cpu(cpu_online_map));
278 case CPU_DEAD: 288 case CPU_DEAD:
279 case CPU_DEAD_FROZEN: 289 case CPU_DEAD_FROZEN:
280 p = per_cpu(watchdog_task, hotcpu); 290 p = per_cpu(watchdog_task, hotcpu);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 56cb009a4b35..beee5b3b68a2 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -131,6 +131,7 @@ cond_syscall(sys32_sysctl);
131cond_syscall(ppc_rtas); 131cond_syscall(ppc_rtas);
132cond_syscall(sys_spu_run); 132cond_syscall(sys_spu_run);
133cond_syscall(sys_spu_create); 133cond_syscall(sys_spu_create);
134cond_syscall(sys_subpage_prot);
134 135
135/* mmu depending weak syscall entries */ 136/* mmu depending weak syscall entries */
136cond_syscall(sys_mprotect); 137cond_syscall(sys_mprotect);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 357b68ba23ec..7cb1ac3e6fff 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -81,7 +81,6 @@ extern int percpu_pagelist_fraction;
81extern int compat_log; 81extern int compat_log;
82extern int maps_protect; 82extern int maps_protect;
83extern int sysctl_stat_interval; 83extern int sysctl_stat_interval;
84extern int audit_argv_kb;
85extern int latencytop_enabled; 84extern int latencytop_enabled;
86 85
87/* Constants used for minimum and maximum */ 86/* Constants used for minimum and maximum */
@@ -390,16 +389,6 @@ static struct ctl_table kern_table[] = {
390 .mode = 0644, 389 .mode = 0644,
391 .proc_handler = &proc_dointvec, 390 .proc_handler = &proc_dointvec,
392 }, 391 },
393#ifdef CONFIG_AUDITSYSCALL
394 {
395 .ctl_name = CTL_UNNUMBERED,
396 .procname = "audit_argv_kb",
397 .data = &audit_argv_kb,
398 .maxlen = sizeof(int),
399 .mode = 0644,
400 .proc_handler = &proc_dointvec,
401 },
402#endif
403 { 392 {
404 .ctl_name = KERN_CORE_PATTERN, 393 .ctl_name = KERN_CORE_PATTERN,
405 .procname = "core_pattern", 394 .procname = "core_pattern",
diff --git a/kernel/time.c b/kernel/time.c
index 09d3c45c4da7..4064c0566e77 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -129,6 +129,7 @@ static inline void warp_clock(void)
129 write_seqlock_irq(&xtime_lock); 129 write_seqlock_irq(&xtime_lock);
130 wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; 130 wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
131 xtime.tv_sec += sys_tz.tz_minuteswest * 60; 131 xtime.tv_sec += sys_tz.tz_minuteswest * 60;
132 update_xtime_cache(0);
132 write_sequnlock_irq(&xtime_lock); 133 write_sequnlock_irq(&xtime_lock);
133 clock_was_set(); 134 clock_was_set();
134} 135}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 63f24b550695..88267f0a8471 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -137,6 +137,7 @@ void tick_nohz_update_jiffies(void)
137 137
138 cpu_clear(cpu, nohz_cpu_mask); 138 cpu_clear(cpu, nohz_cpu_mask);
139 now = ktime_get(); 139 now = ktime_get();
140 ts->idle_waketime = now;
140 141
141 local_irq_save(flags); 142 local_irq_save(flags);
142 tick_do_update_jiffies64(now); 143 tick_do_update_jiffies64(now);
@@ -400,6 +401,7 @@ void tick_nohz_restart_sched_tick(void)
400 * Cancel the scheduled timer and restore the tick 401 * Cancel the scheduled timer and restore the tick
401 */ 402 */
402 ts->tick_stopped = 0; 403 ts->tick_stopped = 0;
404 ts->idle_exittime = now;
403 hrtimer_cancel(&ts->sched_timer); 405 hrtimer_cancel(&ts->sched_timer);
404 ts->sched_timer.expires = ts->idle_tick; 406 ts->sched_timer.expires = ts->idle_tick;
405 407
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 092a2366b5a9..cd5dbc4579c9 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -47,7 +47,7 @@ struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
47static unsigned long total_sleep_time; /* seconds */ 47static unsigned long total_sleep_time; /* seconds */
48 48
49static struct timespec xtime_cache __attribute__ ((aligned (16))); 49static struct timespec xtime_cache __attribute__ ((aligned (16)));
50static inline void update_xtime_cache(u64 nsec) 50void update_xtime_cache(u64 nsec)
51{ 51{
52 xtime_cache = xtime; 52 xtime_cache = xtime;
53 timespec_add_ns(&xtime_cache, nsec); 53 timespec_add_ns(&xtime_cache, nsec);
@@ -145,6 +145,7 @@ int do_settimeofday(struct timespec *tv)
145 145
146 set_normalized_timespec(&xtime, sec, nsec); 146 set_normalized_timespec(&xtime, sec, nsec);
147 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); 147 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
148 update_xtime_cache(0);
148 149
149 clock->error = 0; 150 clock->error = 0;
150 ntp_clear(); 151 ntp_clear();
@@ -252,8 +253,8 @@ void __init timekeeping_init(void)
252 xtime.tv_nsec = 0; 253 xtime.tv_nsec = 0;
253 set_normalized_timespec(&wall_to_monotonic, 254 set_normalized_timespec(&wall_to_monotonic,
254 -xtime.tv_sec, -xtime.tv_nsec); 255 -xtime.tv_sec, -xtime.tv_nsec);
256 update_xtime_cache(0);
255 total_sleep_time = 0; 257 total_sleep_time = 0;
256
257 write_sequnlock_irqrestore(&xtime_lock, flags); 258 write_sequnlock_irqrestore(&xtime_lock, flags);
258} 259}
259 260
@@ -290,6 +291,7 @@ static int timekeeping_resume(struct sys_device *dev)
290 } 291 }
291 /* Make sure that we have the correct xtime reference */ 292 /* Make sure that we have the correct xtime reference */
292 timespec_add_ns(&xtime, timekeeping_suspend_nsecs); 293 timespec_add_ns(&xtime, timekeeping_suspend_nsecs);
294 update_xtime_cache(0);
293 /* re-base the last cycle value */ 295 /* re-base the last cycle value */
294 clock->cycle_last = clocksource_read(clock); 296 clock->cycle_last = clocksource_read(clock);
295 clock->error = 0; 297 clock->error = 0;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 12c5f4cb6b8c..d3d94c1a0fd2 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -166,6 +166,8 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
166 P(idle_calls); 166 P(idle_calls);
167 P(idle_sleeps); 167 P(idle_sleeps);
168 P_ns(idle_entrytime); 168 P_ns(idle_entrytime);
169 P_ns(idle_waketime);
170 P_ns(idle_exittime);
169 P_ns(idle_sleeptime); 171 P_ns(idle_sleeptime);
170 P(last_jiffies); 172 P(last_jiffies);
171 P(next_jiffies); 173 P(next_jiffies);
diff --git a/kernel/timer.c b/kernel/timer.c
index 23f7ead78fae..9fbb472b8cf0 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1099,6 +1099,13 @@ signed long __sched schedule_timeout_interruptible(signed long timeout)
1099} 1099}
1100EXPORT_SYMBOL(schedule_timeout_interruptible); 1100EXPORT_SYMBOL(schedule_timeout_interruptible);
1101 1101
1102signed long __sched schedule_timeout_killable(signed long timeout)
1103{
1104 __set_current_state(TASK_KILLABLE);
1105 return schedule_timeout(timeout);
1106}
1107EXPORT_SYMBOL(schedule_timeout_killable);
1108
1102signed long __sched schedule_timeout_uninterruptible(signed long timeout) 1109signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1103{ 1110{
1104 __set_current_state(TASK_UNINTERRUPTIBLE); 1111 __set_current_state(TASK_UNINTERRUPTIBLE);
diff --git a/kernel/wait.c b/kernel/wait.c
index 444ddbfaefc4..f9876888a569 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -215,7 +215,7 @@ void fastcall __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
215{ 215{
216 struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); 216 struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
217 if (waitqueue_active(wq)) 217 if (waitqueue_active(wq))
218 __wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, &key); 218 __wake_up(wq, TASK_NORMAL, 1, &key);
219} 219}
220EXPORT_SYMBOL(__wake_up_bit); 220EXPORT_SYMBOL(__wake_up_bit);
221 221