aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDmitry Torokhov <dtor@insightbb.com>2007-05-01 00:24:54 -0400
committerDmitry Torokhov <dtor@insightbb.com>2007-05-01 00:24:54 -0400
commitbc95f3669f5e6f63cf0b84fe4922c3c6dd4aa775 (patch)
tree427fcf2a7287c16d4b5aa6cbf494d59579a6a8b1 /kernel
parent3d29cdff999c37b3876082278a8134a0642a02cd (diff)
parentdc87c3985e9b442c60994308a96f887579addc39 (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: drivers/usb/input/Makefile drivers/usb/input/gtco.c
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c232
-rw-r--r--kernel/auditfilter.c9
-rw-r--r--kernel/auditsc.c64
-rw-r--r--kernel/cpu.c32
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/hrtimer.c57
-rw-r--r--kernel/irq/devres.c2
-rw-r--r--kernel/irq/migration.c9
-rw-r--r--kernel/kallsyms.c23
-rw-r--r--kernel/kprobes.c113
-rw-r--r--kernel/lockdep.c14
-rw-r--r--kernel/module.c43
-rw-r--r--kernel/params.c32
-rw-r--r--kernel/power/Kconfig37
-rw-r--r--kernel/power/console.c10
-rw-r--r--kernel/power/disk.c94
-rw-r--r--kernel/power/main.c42
-rw-r--r--kernel/power/swsusp.c2
-rw-r--r--kernel/power/user.c9
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/rcutorture.c14
-rw-r--r--kernel/relay.c3
-rw-r--r--kernel/resource.c21
-rw-r--r--kernel/sched.c195
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/taskstats.c4
-rw-r--r--kernel/time.c4
-rw-r--r--kernel/time/clockevents.c69
-rw-r--r--kernel/time/clocksource.c11
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/time/ntp.c30
-rw-r--r--kernel/time/tick-broadcast.c53
-rw-r--r--kernel/time/tick-common.c38
-rw-r--r--kernel/time/tick-internal.h15
-rw-r--r--kernel/time/tick-oneshot.c12
-rw-r--r--kernel/time/tick-sched.c14
-rw-r--r--kernel/time/timer_list.c6
-rw-r--r--kernel/timer.c42
41 files changed, 810 insertions, 566 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index d9b690ac684b..4e9d20829681 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2,7 +2,7 @@
2 * Gateway between the kernel (e.g., selinux) and the user-space audit daemon. 2 * Gateway between the kernel (e.g., selinux) and the user-space audit daemon.
3 * System-call specific features have moved to auditsc.c 3 * System-call specific features have moved to auditsc.c
4 * 4 *
5 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. 5 * Copyright 2003-2007 Red Hat Inc., Durham, North Carolina.
6 * All Rights Reserved. 6 * All Rights Reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
@@ -65,7 +65,9 @@
65 * (Initialization happens after skb_init is called.) */ 65 * (Initialization happens after skb_init is called.) */
66static int audit_initialized; 66static int audit_initialized;
67 67
68/* No syscall auditing will take place unless audit_enabled != 0. */ 68/* 0 - no auditing
69 * 1 - auditing enabled
70 * 2 - auditing enabled and configuration is locked/unchangeable. */
69int audit_enabled; 71int audit_enabled;
70 72
71/* Default state when kernel boots without any parameters. */ 73/* Default state when kernel boots without any parameters. */
@@ -149,7 +151,7 @@ struct audit_buffer {
149 151
150static void audit_set_pid(struct audit_buffer *ab, pid_t pid) 152static void audit_set_pid(struct audit_buffer *ab, pid_t pid)
151{ 153{
152 struct nlmsghdr *nlh = (struct nlmsghdr *)ab->skb->data; 154 struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
153 nlh->nlmsg_pid = pid; 155 nlh->nlmsg_pid = pid;
154} 156}
155 157
@@ -239,102 +241,150 @@ void audit_log_lost(const char *message)
239 241
240static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) 242static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
241{ 243{
242 int old = audit_rate_limit; 244 int res, rc = 0, old = audit_rate_limit;
245
246 /* check if we are locked */
247 if (audit_enabled == 2)
248 res = 0;
249 else
250 res = 1;
243 251
244 if (sid) { 252 if (sid) {
245 char *ctx = NULL; 253 char *ctx = NULL;
246 u32 len; 254 u32 len;
247 int rc; 255 if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) {
248 if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
249 return rc;
250 else
251 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 256 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
252 "audit_rate_limit=%d old=%d by auid=%u subj=%s", 257 "audit_rate_limit=%d old=%d by auid=%u"
253 limit, old, loginuid, ctx); 258 " subj=%s res=%d",
254 kfree(ctx); 259 limit, old, loginuid, ctx, res);
255 } else 260 kfree(ctx);
256 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 261 } else
257 "audit_rate_limit=%d old=%d by auid=%u", 262 res = 0; /* Something weird, deny request */
258 limit, old, loginuid); 263 }
259 audit_rate_limit = limit; 264 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
260 return 0; 265 "audit_rate_limit=%d old=%d by auid=%u res=%d",
266 limit, old, loginuid, res);
267
268 /* If we are allowed, make the change */
269 if (res == 1)
270 audit_rate_limit = limit;
271 /* Not allowed, update reason */
272 else if (rc == 0)
273 rc = -EPERM;
274 return rc;
261} 275}
262 276
263static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) 277static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
264{ 278{
265 int old = audit_backlog_limit; 279 int res, rc = 0, old = audit_backlog_limit;
280
281 /* check if we are locked */
282 if (audit_enabled == 2)
283 res = 0;
284 else
285 res = 1;
266 286
267 if (sid) { 287 if (sid) {
268 char *ctx = NULL; 288 char *ctx = NULL;
269 u32 len; 289 u32 len;
270 int rc; 290 if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) {
271 if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
272 return rc;
273 else
274 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 291 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
275 "audit_backlog_limit=%d old=%d by auid=%u subj=%s", 292 "audit_backlog_limit=%d old=%d by auid=%u"
276 limit, old, loginuid, ctx); 293 " subj=%s res=%d",
277 kfree(ctx); 294 limit, old, loginuid, ctx, res);
278 } else 295 kfree(ctx);
279 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 296 } else
280 "audit_backlog_limit=%d old=%d by auid=%u", 297 res = 0; /* Something weird, deny request */
281 limit, old, loginuid); 298 }
282 audit_backlog_limit = limit; 299 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
283 return 0; 300 "audit_backlog_limit=%d old=%d by auid=%u res=%d",
301 limit, old, loginuid, res);
302
303 /* If we are allowed, make the change */
304 if (res == 1)
305 audit_backlog_limit = limit;
306 /* Not allowed, update reason */
307 else if (rc == 0)
308 rc = -EPERM;
309 return rc;
284} 310}
285 311
286static int audit_set_enabled(int state, uid_t loginuid, u32 sid) 312static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
287{ 313{
288 int old = audit_enabled; 314 int res, rc = 0, old = audit_enabled;
289 315
290 if (state != 0 && state != 1) 316 if (state < 0 || state > 2)
291 return -EINVAL; 317 return -EINVAL;
292 318
319 /* check if we are locked */
320 if (audit_enabled == 2)
321 res = 0;
322 else
323 res = 1;
324
293 if (sid) { 325 if (sid) {
294 char *ctx = NULL; 326 char *ctx = NULL;
295 u32 len; 327 u32 len;
296 int rc; 328 if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) {
297 if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
298 return rc;
299 else
300 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 329 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
301 "audit_enabled=%d old=%d by auid=%u subj=%s", 330 "audit_enabled=%d old=%d by auid=%u"
302 state, old, loginuid, ctx); 331 " subj=%s res=%d",
303 kfree(ctx); 332 state, old, loginuid, ctx, res);
304 } else 333 kfree(ctx);
305 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 334 } else
306 "audit_enabled=%d old=%d by auid=%u", 335 res = 0; /* Something weird, deny request */
307 state, old, loginuid); 336 }
308 audit_enabled = state; 337 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
309 return 0; 338 "audit_enabled=%d old=%d by auid=%u res=%d",
339 state, old, loginuid, res);
340
341 /* If we are allowed, make the change */
342 if (res == 1)
343 audit_enabled = state;
344 /* Not allowed, update reason */
345 else if (rc == 0)
346 rc = -EPERM;
347 return rc;
310} 348}
311 349
312static int audit_set_failure(int state, uid_t loginuid, u32 sid) 350static int audit_set_failure(int state, uid_t loginuid, u32 sid)
313{ 351{
314 int old = audit_failure; 352 int res, rc = 0, old = audit_failure;
315 353
316 if (state != AUDIT_FAIL_SILENT 354 if (state != AUDIT_FAIL_SILENT
317 && state != AUDIT_FAIL_PRINTK 355 && state != AUDIT_FAIL_PRINTK
318 && state != AUDIT_FAIL_PANIC) 356 && state != AUDIT_FAIL_PANIC)
319 return -EINVAL; 357 return -EINVAL;
320 358
359 /* check if we are locked */
360 if (audit_enabled == 2)
361 res = 0;
362 else
363 res = 1;
364
321 if (sid) { 365 if (sid) {
322 char *ctx = NULL; 366 char *ctx = NULL;
323 u32 len; 367 u32 len;
324 int rc; 368 if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) {
325 if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
326 return rc;
327 else
328 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 369 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
329 "audit_failure=%d old=%d by auid=%u subj=%s", 370 "audit_failure=%d old=%d by auid=%u"
330 state, old, loginuid, ctx); 371 " subj=%s res=%d",
331 kfree(ctx); 372 state, old, loginuid, ctx, res);
332 } else 373 kfree(ctx);
333 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 374 } else
334 "audit_failure=%d old=%d by auid=%u", 375 res = 0; /* Something weird, deny request */
335 state, old, loginuid); 376 }
336 audit_failure = state; 377 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
337 return 0; 378 "audit_failure=%d old=%d by auid=%u res=%d",
379 state, old, loginuid, res);
380
381 /* If we are allowed, make the change */
382 if (res == 1)
383 audit_failure = state;
384 /* Not allowed, update reason */
385 else if (rc == 0)
386 rc = -EPERM;
387 return rc;
338} 388}
339 389
340static int kauditd_thread(void *dummy) 390static int kauditd_thread(void *dummy)
@@ -599,6 +649,30 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
599 case AUDIT_DEL: 649 case AUDIT_DEL:
600 if (nlmsg_len(nlh) < sizeof(struct audit_rule)) 650 if (nlmsg_len(nlh) < sizeof(struct audit_rule))
601 return -EINVAL; 651 return -EINVAL;
652 if (audit_enabled == 2) {
653 ab = audit_log_start(NULL, GFP_KERNEL,
654 AUDIT_CONFIG_CHANGE);
655 if (ab) {
656 audit_log_format(ab,
657 "pid=%d uid=%u auid=%u",
658 pid, uid, loginuid);
659 if (sid) {
660 if (selinux_sid_to_string(
661 sid, &ctx, &len)) {
662 audit_log_format(ab,
663 " ssid=%u", sid);
664 /* Maybe call audit_panic? */
665 } else
666 audit_log_format(ab,
667 " subj=%s", ctx);
668 kfree(ctx);
669 }
670 audit_log_format(ab, " audit_enabled=%d res=0",
671 audit_enabled);
672 audit_log_end(ab);
673 }
674 return -EPERM;
675 }
602 /* fallthrough */ 676 /* fallthrough */
603 case AUDIT_LIST: 677 case AUDIT_LIST:
604 err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, 678 err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
@@ -609,6 +683,30 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
609 case AUDIT_DEL_RULE: 683 case AUDIT_DEL_RULE:
610 if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) 684 if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
611 return -EINVAL; 685 return -EINVAL;
686 if (audit_enabled == 2) {
687 ab = audit_log_start(NULL, GFP_KERNEL,
688 AUDIT_CONFIG_CHANGE);
689 if (ab) {
690 audit_log_format(ab,
691 "pid=%d uid=%u auid=%u",
692 pid, uid, loginuid);
693 if (sid) {
694 if (selinux_sid_to_string(
695 sid, &ctx, &len)) {
696 audit_log_format(ab,
697 " ssid=%u", sid);
698 /* Maybe call audit_panic? */
699 } else
700 audit_log_format(ab,
701 " subj=%s", ctx);
702 kfree(ctx);
703 }
704 audit_log_format(ab, " audit_enabled=%d res=0",
705 audit_enabled);
706 audit_log_end(ab);
707 }
708 return -EPERM;
709 }
612 /* fallthrough */ 710 /* fallthrough */
613 case AUDIT_LIST_RULES: 711 case AUDIT_LIST_RULES:
614 err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, 712 err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
@@ -652,7 +750,7 @@ static void audit_receive_skb(struct sk_buff *skb)
652 u32 rlen; 750 u32 rlen;
653 751
654 while (skb->len >= NLMSG_SPACE(0)) { 752 while (skb->len >= NLMSG_SPACE(0)) {
655 nlh = (struct nlmsghdr *)skb->data; 753 nlh = nlmsg_hdr(skb);
656 if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) 754 if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
657 return; 755 return;
658 rlen = NLMSG_ALIGN(nlh->nlmsg_len); 756 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
@@ -697,7 +795,7 @@ static int __init audit_init(void)
697 printk(KERN_INFO "audit: initializing netlink socket (%s)\n", 795 printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
698 audit_default ? "enabled" : "disabled"); 796 audit_default ? "enabled" : "disabled");
699 audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, 797 audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive,
700 THIS_MODULE); 798 NULL, THIS_MODULE);
701 if (!audit_sock) 799 if (!audit_sock)
702 audit_panic("cannot initialize netlink socket"); 800 audit_panic("cannot initialize netlink socket");
703 else 801 else
@@ -975,7 +1073,7 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt,
975 goto out; 1073 goto out;
976 } 1074 }
977 va_copy(args2, args); 1075 va_copy(args2, args);
978 len = vsnprintf(skb->tail, avail, fmt, args); 1076 len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args);
979 if (len >= avail) { 1077 if (len >= avail) {
980 /* The printk buffer is 1024 bytes long, so if we get 1078 /* The printk buffer is 1024 bytes long, so if we get
981 * here and AUDIT_BUFSIZ is at least 1024, then we can 1079 * here and AUDIT_BUFSIZ is at least 1024, then we can
@@ -984,7 +1082,7 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt,
984 max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail)); 1082 max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail));
985 if (!avail) 1083 if (!avail)
986 goto out; 1084 goto out;
987 len = vsnprintf(skb->tail, avail, fmt, args2); 1085 len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2);
988 } 1086 }
989 if (len > 0) 1087 if (len > 0)
990 skb_put(skb, len); 1088 skb_put(skb, len);
@@ -1045,7 +1143,7 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf,
1045 return; 1143 return;
1046 } 1144 }
1047 1145
1048 ptr = skb->tail; 1146 ptr = skb_tail_pointer(skb);
1049 for (i=0; i<len; i++) { 1147 for (i=0; i<len; i++) {
1050 *ptr++ = hex[(buf[i] & 0xF0)>>4]; /* Upper nibble */ 1148 *ptr++ = hex[(buf[i] & 0xF0)>>4]; /* Upper nibble */
1051 *ptr++ = hex[buf[i] & 0x0F]; /* Lower nibble */ 1149 *ptr++ = hex[buf[i] & 0x0F]; /* Lower nibble */
@@ -1077,7 +1175,7 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
1077 if (!avail) 1175 if (!avail)
1078 return; 1176 return;
1079 } 1177 }
1080 ptr = skb->tail; 1178 ptr = skb_tail_pointer(skb);
1081 *ptr++ = '"'; 1179 *ptr++ = '"';
1082 memcpy(ptr, string, slen); 1180 memcpy(ptr, string, slen);
1083 ptr += slen; 1181 ptr += slen;
@@ -1170,7 +1268,7 @@ void audit_log_end(struct audit_buffer *ab)
1170 audit_log_lost("rate limit exceeded"); 1268 audit_log_lost("rate limit exceeded");
1171 } else { 1269 } else {
1172 if (audit_pid) { 1270 if (audit_pid) {
1173 struct nlmsghdr *nlh = (struct nlmsghdr *)ab->skb->data; 1271 struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
1174 nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); 1272 nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0);
1175 skb_queue_tail(&audit_skb_queue, ab->skb); 1273 skb_queue_tail(&audit_skb_queue, ab->skb);
1176 ab->skb = NULL; 1274 ab->skb = NULL;
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 87865f8b4ce3..3749193aed8c 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -937,9 +937,10 @@ static void audit_update_watch(struct audit_parent *parent,
937 } 937 }
938 938
939 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 939 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
940 audit_log_format(ab, "audit updated rules specifying path="); 940 audit_log_format(ab, "op=updated rules specifying path=");
941 audit_log_untrustedstring(ab, owatch->path); 941 audit_log_untrustedstring(ab, owatch->path);
942 audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino); 942 audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino);
943 audit_log_format(ab, " list=%d res=1", r->listnr);
943 audit_log_end(ab); 944 audit_log_end(ab);
944 945
945 audit_remove_watch(owatch); 946 audit_remove_watch(owatch);
@@ -969,14 +970,14 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
969 e = container_of(r, struct audit_entry, rule); 970 e = container_of(r, struct audit_entry, rule);
970 971
971 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 972 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
972 audit_log_format(ab, "audit implicitly removed rule path="); 973 audit_log_format(ab, "op=remove rule path=");
973 audit_log_untrustedstring(ab, w->path); 974 audit_log_untrustedstring(ab, w->path);
974 if (r->filterkey) { 975 if (r->filterkey) {
975 audit_log_format(ab, " key="); 976 audit_log_format(ab, " key=");
976 audit_log_untrustedstring(ab, r->filterkey); 977 audit_log_untrustedstring(ab, r->filterkey);
977 } else 978 } else
978 audit_log_format(ab, " key=(null)"); 979 audit_log_format(ab, " key=(null)");
979 audit_log_format(ab, " list=%d", r->listnr); 980 audit_log_format(ab, " list=%d res=1", r->listnr);
980 audit_log_end(ab); 981 audit_log_end(ab);
981 982
982 list_del(&r->rlist); 983 list_del(&r->rlist);
@@ -1410,7 +1411,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action,
1410 audit_log_format(ab, " subj=%s", ctx); 1411 audit_log_format(ab, " subj=%s", ctx);
1411 kfree(ctx); 1412 kfree(ctx);
1412 } 1413 }
1413 audit_log_format(ab, " %s rule key=", action); 1414 audit_log_format(ab, " op=%s rule key=", action);
1414 if (rule->filterkey) 1415 if (rule->filterkey)
1415 audit_log_untrustedstring(ab, rule->filterkey); 1416 audit_log_untrustedstring(ab, rule->filterkey);
1416 else 1417 else
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 298897559ca4..628c7ac590a0 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -170,6 +170,11 @@ struct audit_aux_data_sockaddr {
170 char a[0]; 170 char a[0];
171}; 171};
172 172
173struct audit_aux_data_fd_pair {
174 struct audit_aux_data d;
175 int fd[2];
176};
177
173struct audit_aux_data_path { 178struct audit_aux_data_path {
174 struct audit_aux_data d; 179 struct audit_aux_data d;
175 struct dentry *dentry; 180 struct dentry *dentry;
@@ -734,28 +739,26 @@ static inline void audit_free_context(struct audit_context *context)
734void audit_log_task_context(struct audit_buffer *ab) 739void audit_log_task_context(struct audit_buffer *ab)
735{ 740{
736 char *ctx = NULL; 741 char *ctx = NULL;
737 ssize_t len = 0; 742 unsigned len;
743 int error;
744 u32 sid;
745
746 selinux_get_task_sid(current, &sid);
747 if (!sid)
748 return;
738 749
739 len = security_getprocattr(current, "current", NULL, 0); 750 error = selinux_sid_to_string(sid, &ctx, &len);
740 if (len < 0) { 751 if (error) {
741 if (len != -EINVAL) 752 if (error != -EINVAL)
742 goto error_path; 753 goto error_path;
743 return; 754 return;
744 } 755 }
745 756
746 ctx = kmalloc(len, GFP_KERNEL);
747 if (!ctx)
748 goto error_path;
749
750 len = security_getprocattr(current, "current", ctx, len);
751 if (len < 0 )
752 goto error_path;
753
754 audit_log_format(ab, " subj=%s", ctx); 757 audit_log_format(ab, " subj=%s", ctx);
758 kfree(ctx);
755 return; 759 return;
756 760
757error_path: 761error_path:
758 kfree(ctx);
759 audit_panic("error in audit_log_task_context"); 762 audit_panic("error in audit_log_task_context");
760 return; 763 return;
761} 764}
@@ -961,6 +964,11 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
961 audit_log_d_path(ab, "path=", axi->dentry, axi->mnt); 964 audit_log_d_path(ab, "path=", axi->dentry, axi->mnt);
962 break; } 965 break; }
963 966
967 case AUDIT_FD_PAIR: {
968 struct audit_aux_data_fd_pair *axs = (void *)aux;
969 audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]);
970 break; }
971
964 } 972 }
965 audit_log_end(ab); 973 audit_log_end(ab);
966 } 974 }
@@ -1815,6 +1823,36 @@ int audit_socketcall(int nargs, unsigned long *args)
1815} 1823}
1816 1824
1817/** 1825/**
1826 * __audit_fd_pair - record audit data for pipe and socketpair
1827 * @fd1: the first file descriptor
1828 * @fd2: the second file descriptor
1829 *
1830 * Returns 0 for success or NULL context or < 0 on error.
1831 */
1832int __audit_fd_pair(int fd1, int fd2)
1833{
1834 struct audit_context *context = current->audit_context;
1835 struct audit_aux_data_fd_pair *ax;
1836
1837 if (likely(!context)) {
1838 return 0;
1839 }
1840
1841 ax = kmalloc(sizeof(*ax), GFP_KERNEL);
1842 if (!ax) {
1843 return -ENOMEM;
1844 }
1845
1846 ax->fd[0] = fd1;
1847 ax->fd[1] = fd2;
1848
1849 ax->d.type = AUDIT_FD_PAIR;
1850 ax->d.next = context->aux;
1851 context->aux = (void *)ax;
1852 return 0;
1853}
1854
1855/**
1818 * audit_sockaddr - record audit data for sys_bind, sys_connect, sys_sendto 1856 * audit_sockaddr - record audit data for sys_bind, sys_connect, sys_sendto
1819 * @len: data length in user space 1857 * @len: data length in user space
1820 * @a: data address in kernel space 1858 * @a: data address in kernel space
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3d4206ada5c9..36e70845cfc3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -254,6 +254,12 @@ int __cpuinit cpu_up(unsigned int cpu)
254} 254}
255 255
256#ifdef CONFIG_SUSPEND_SMP 256#ifdef CONFIG_SUSPEND_SMP
257/* Needed to prevent the microcode driver from requesting firmware in its CPU
258 * hotplug notifier during the suspend/resume.
259 */
260int suspend_cpu_hotplug;
261EXPORT_SYMBOL(suspend_cpu_hotplug);
262
257static cpumask_t frozen_cpus; 263static cpumask_t frozen_cpus;
258 264
259int disable_nonboot_cpus(void) 265int disable_nonboot_cpus(void)
@@ -261,16 +267,8 @@ int disable_nonboot_cpus(void)
261 int cpu, first_cpu, error = 0; 267 int cpu, first_cpu, error = 0;
262 268
263 mutex_lock(&cpu_add_remove_lock); 269 mutex_lock(&cpu_add_remove_lock);
264 first_cpu = first_cpu(cpu_present_map); 270 suspend_cpu_hotplug = 1;
265 if (!cpu_online(first_cpu)) { 271 first_cpu = first_cpu(cpu_online_map);
266 error = _cpu_up(first_cpu);
267 if (error) {
268 printk(KERN_ERR "Could not bring CPU%d up.\n",
269 first_cpu);
270 goto out;
271 }
272 }
273
274 /* We take down all of the non-boot CPUs in one shot to avoid races 272 /* We take down all of the non-boot CPUs in one shot to avoid races
275 * with the userspace trying to use the CPU hotplug at the same time 273 * with the userspace trying to use the CPU hotplug at the same time
276 */ 274 */
@@ -296,7 +294,7 @@ int disable_nonboot_cpus(void)
296 } else { 294 } else {
297 printk(KERN_ERR "Non-boot CPUs are not disabled\n"); 295 printk(KERN_ERR "Non-boot CPUs are not disabled\n");
298 } 296 }
299out: 297 suspend_cpu_hotplug = 0;
300 mutex_unlock(&cpu_add_remove_lock); 298 mutex_unlock(&cpu_add_remove_lock);
301 return error; 299 return error;
302} 300}
@@ -308,20 +306,22 @@ void enable_nonboot_cpus(void)
308 /* Allow everyone to use the CPU hotplug again */ 306 /* Allow everyone to use the CPU hotplug again */
309 mutex_lock(&cpu_add_remove_lock); 307 mutex_lock(&cpu_add_remove_lock);
310 cpu_hotplug_disabled = 0; 308 cpu_hotplug_disabled = 0;
311 mutex_unlock(&cpu_add_remove_lock);
312 if (cpus_empty(frozen_cpus)) 309 if (cpus_empty(frozen_cpus))
313 return; 310 goto out;
314 311
312 suspend_cpu_hotplug = 1;
315 printk("Enabling non-boot CPUs ...\n"); 313 printk("Enabling non-boot CPUs ...\n");
316 for_each_cpu_mask(cpu, frozen_cpus) { 314 for_each_cpu_mask(cpu, frozen_cpus) {
317 error = cpu_up(cpu); 315 error = _cpu_up(cpu);
318 if (!error) { 316 if (!error) {
319 printk("CPU%d is up\n", cpu); 317 printk("CPU%d is up\n", cpu);
320 continue; 318 continue;
321 } 319 }
322 printk(KERN_WARNING "Error taking CPU%d up: %d\n", 320 printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
323 cpu, error);
324 } 321 }
325 cpus_clear(frozen_cpus); 322 cpus_clear(frozen_cpus);
323 suspend_cpu_hotplug = 0;
324out:
325 mutex_unlock(&cpu_add_remove_lock);
326} 326}
327#endif 327#endif
diff --git a/kernel/exit.c b/kernel/exit.c
index f132349c0325..b55ed4cc9104 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -790,7 +790,7 @@ static void exit_notify(struct task_struct *tsk)
790 790
791 pgrp = task_pgrp(tsk); 791 pgrp = task_pgrp(tsk);
792 if ((task_pgrp(t) != pgrp) && 792 if ((task_pgrp(t) != pgrp) &&
793 (task_session(t) != task_session(tsk)) && 793 (task_session(t) == task_session(tsk)) &&
794 will_become_orphaned_pgrp(pgrp, tsk) && 794 will_become_orphaned_pgrp(pgrp, tsk) &&
795 has_stopped_jobs(pgrp)) { 795 has_stopped_jobs(pgrp)) {
796 __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); 796 __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
diff --git a/kernel/fork.c b/kernel/fork.c
index d154cc786489..6af959c034d8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -933,8 +933,8 @@ asmlinkage long sys_set_tid_address(int __user *tidptr)
933 933
934static inline void rt_mutex_init_task(struct task_struct *p) 934static inline void rt_mutex_init_task(struct task_struct *p)
935{ 935{
936#ifdef CONFIG_RT_MUTEXES
937 spin_lock_init(&p->pi_lock); 936 spin_lock_init(&p->pi_lock);
937#ifdef CONFIG_RT_MUTEXES
938 plist_head_init(&p->pi_waiters, &p->pi_lock); 938 plist_head_init(&p->pi_waiters, &p->pi_lock);
939 p->pi_blocked_on = NULL; 939 p->pi_blocked_on = NULL;
940#endif 940#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index e749e7df14b1..5a270b5e3f95 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -565,6 +565,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
565 if (!pi_state) 565 if (!pi_state)
566 return -EINVAL; 566 return -EINVAL;
567 567
568 spin_lock(&pi_state->pi_mutex.wait_lock);
568 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); 569 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
569 570
570 /* 571 /*
@@ -604,6 +605,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
604 pi_state->owner = new_owner; 605 pi_state->owner = new_owner;
605 spin_unlock_irq(&new_owner->pi_lock); 606 spin_unlock_irq(&new_owner->pi_lock);
606 607
608 spin_unlock(&pi_state->pi_mutex.wait_lock);
607 rt_mutex_unlock(&pi_state->pi_mutex); 609 rt_mutex_unlock(&pi_state->pi_mutex);
608 610
609 return 0; 611 return 0;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 476cb0c0b4a4..1b3033105b40 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -59,6 +59,7 @@ ktime_t ktime_get(void)
59 59
60 return timespec_to_ktime(now); 60 return timespec_to_ktime(now);
61} 61}
62EXPORT_SYMBOL_GPL(ktime_get);
62 63
63/** 64/**
64 * ktime_get_real - get the real (wall-) time in ktime_t format 65 * ktime_get_real - get the real (wall-) time in ktime_t format
@@ -135,7 +136,7 @@ EXPORT_SYMBOL_GPL(ktime_get_ts);
135static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) 136static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
136{ 137{
137 ktime_t xtim, tomono; 138 ktime_t xtim, tomono;
138 struct timespec xts; 139 struct timespec xts, tom;
139 unsigned long seq; 140 unsigned long seq;
140 141
141 do { 142 do {
@@ -145,10 +146,11 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
145#else 146#else
146 xts = xtime; 147 xts = xtime;
147#endif 148#endif
149 tom = wall_to_monotonic;
148 } while (read_seqretry(&xtime_lock, seq)); 150 } while (read_seqretry(&xtime_lock, seq));
149 151
150 xtim = timespec_to_ktime(xts); 152 xtim = timespec_to_ktime(xts);
151 tomono = timespec_to_ktime(wall_to_monotonic); 153 tomono = timespec_to_ktime(tom);
152 base->clock_base[CLOCK_REALTIME].softirq_time = xtim; 154 base->clock_base[CLOCK_REALTIME].softirq_time = xtim;
153 base->clock_base[CLOCK_MONOTONIC].softirq_time = 155 base->clock_base[CLOCK_MONOTONIC].softirq_time =
154 ktime_add(xtim, tomono); 156 ktime_add(xtim, tomono);
@@ -277,6 +279,8 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
277 279
278 return ktime_add(kt, tmp); 280 return ktime_add(kt, tmp);
279} 281}
282
283EXPORT_SYMBOL_GPL(ktime_add_ns);
280# endif /* !CONFIG_KTIME_SCALAR */ 284# endif /* !CONFIG_KTIME_SCALAR */
281 285
282/* 286/*
@@ -458,6 +462,18 @@ void clock_was_set(void)
458} 462}
459 463
460/* 464/*
465 * During resume we might have to reprogram the high resolution timer
466 * interrupt (on the local CPU):
467 */
468void hres_timers_resume(void)
469{
470 WARN_ON_ONCE(num_online_cpus() > 1);
471
472 /* Retrigger the CPU local events: */
473 retrigger_next_event(NULL);
474}
475
476/*
461 * Check, whether the timer is on the callback pending list 477 * Check, whether the timer is on the callback pending list
462 */ 478 */
463static inline int hrtimer_cb_pending(const struct hrtimer *timer) 479static inline int hrtimer_cb_pending(const struct hrtimer *timer)
@@ -540,19 +556,19 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
540/* 556/*
541 * Switch to high resolution mode 557 * Switch to high resolution mode
542 */ 558 */
543static void hrtimer_switch_to_hres(void) 559static int hrtimer_switch_to_hres(void)
544{ 560{
545 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); 561 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
546 unsigned long flags; 562 unsigned long flags;
547 563
548 if (base->hres_active) 564 if (base->hres_active)
549 return; 565 return 1;
550 566
551 local_irq_save(flags); 567 local_irq_save(flags);
552 568
553 if (tick_init_highres()) { 569 if (tick_init_highres()) {
554 local_irq_restore(flags); 570 local_irq_restore(flags);
555 return; 571 return 0;
556 } 572 }
557 base->hres_active = 1; 573 base->hres_active = 1;
558 base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES; 574 base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES;
@@ -565,13 +581,14 @@ static void hrtimer_switch_to_hres(void)
565 local_irq_restore(flags); 581 local_irq_restore(flags);
566 printk(KERN_INFO "Switched to high resolution mode on CPU %d\n", 582 printk(KERN_INFO "Switched to high resolution mode on CPU %d\n",
567 smp_processor_id()); 583 smp_processor_id());
584 return 1;
568} 585}
569 586
570#else 587#else
571 588
572static inline int hrtimer_hres_active(void) { return 0; } 589static inline int hrtimer_hres_active(void) { return 0; }
573static inline int hrtimer_is_hres_enabled(void) { return 0; } 590static inline int hrtimer_is_hres_enabled(void) { return 0; }
574static inline void hrtimer_switch_to_hres(void) { } 591static inline int hrtimer_switch_to_hres(void) { return 0; }
575static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { } 592static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
576static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, 593static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
577 struct hrtimer_clock_base *base) 594 struct hrtimer_clock_base *base)
@@ -643,6 +660,12 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
643 orun++; 660 orun++;
644 } 661 }
645 timer->expires = ktime_add(timer->expires, interval); 662 timer->expires = ktime_add(timer->expires, interval);
663 /*
664 * Make sure, that the result did not wrap with a very large
665 * interval.
666 */
667 if (timer->expires.tv64 < 0)
668 timer->expires = ktime_set(KTIME_SEC_MAX, 0);
646 669
647 return orun; 670 return orun;
648} 671}
@@ -806,7 +829,12 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
806 829
807 timer_stats_hrtimer_set_start_info(timer); 830 timer_stats_hrtimer_set_start_info(timer);
808 831
809 enqueue_hrtimer(timer, new_base, base == new_base); 832 /*
833 * Only allow reprogramming if the new base is on this CPU.
834 * (it might still be on another CPU if the timer was pending)
835 */
836 enqueue_hrtimer(timer, new_base,
837 new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
810 838
811 unlock_hrtimer_base(timer, &flags); 839 unlock_hrtimer_base(timer, &flags);
812 840
@@ -1130,6 +1158,9 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
1130 if (base->softirq_time.tv64 <= timer->expires.tv64) 1158 if (base->softirq_time.tv64 <= timer->expires.tv64)
1131 break; 1159 break;
1132 1160
1161#ifdef CONFIG_HIGH_RES_TIMERS
1162 WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ);
1163#endif
1133 timer_stats_account_hrtimer(timer); 1164 timer_stats_account_hrtimer(timer);
1134 1165
1135 fn = timer->function; 1166 fn = timer->function;
@@ -1173,7 +1204,8 @@ void hrtimer_run_queues(void)
1173 * deadlock vs. xtime_lock. 1204 * deadlock vs. xtime_lock.
1174 */ 1205 */
1175 if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) 1206 if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
1176 hrtimer_switch_to_hres(); 1207 if (hrtimer_switch_to_hres())
1208 return;
1177 1209
1178 hrtimer_get_softirq_time(cpu_base); 1210 hrtimer_get_softirq_time(cpu_base);
1179 1211
@@ -1355,17 +1387,16 @@ static void migrate_hrtimers(int cpu)
1355 tick_cancel_sched_timer(cpu); 1387 tick_cancel_sched_timer(cpu);
1356 1388
1357 local_irq_disable(); 1389 local_irq_disable();
1358 1390 double_spin_lock(&new_base->lock, &old_base->lock,
1359 spin_lock(&new_base->lock); 1391 smp_processor_id() < cpu);
1360 spin_lock(&old_base->lock);
1361 1392
1362 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1393 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1363 migrate_hrtimer_list(&old_base->clock_base[i], 1394 migrate_hrtimer_list(&old_base->clock_base[i],
1364 &new_base->clock_base[i]); 1395 &new_base->clock_base[i]);
1365 } 1396 }
1366 spin_unlock(&old_base->lock);
1367 spin_unlock(&new_base->lock);
1368 1397
1398 double_spin_unlock(&new_base->lock, &old_base->lock,
1399 smp_processor_id() < cpu);
1369 local_irq_enable(); 1400 local_irq_enable();
1370 put_cpu_var(hrtimer_bases); 1401 put_cpu_var(hrtimer_bases);
1371} 1402}
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 85a430da0fb6..d8ee241115f5 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -54,7 +54,7 @@ int devm_request_irq(struct device *dev, unsigned int irq,
54 54
55 rc = request_irq(irq, handler, irqflags, devname, dev_id); 55 rc = request_irq(irq, handler, irqflags, devname, dev_id);
56 if (rc) { 56 if (rc) {
57 kfree(dr); 57 devres_free(dr);
58 return rc; 58 return rc;
59 } 59 }
60 60
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 4baa3bbcd25a..77b7acc875c5 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -65,12 +65,11 @@ void move_native_irq(int irq)
65 if (likely(!(desc->status & IRQ_MOVE_PENDING))) 65 if (likely(!(desc->status & IRQ_MOVE_PENDING)))
66 return; 66 return;
67 67
68 if (likely(!(desc->status & IRQ_DISABLED))) 68 if (unlikely(desc->status & IRQ_DISABLED))
69 desc->chip->disable(irq); 69 return;
70 70
71 desc->chip->mask(irq);
71 move_masked_irq(irq); 72 move_masked_irq(irq);
72 73 desc->chip->unmask(irq);
73 if (likely(!(desc->status & IRQ_DISABLED)))
74 desc->chip->enable(irq);
75} 74}
76 75
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6f294ff4f9ee..5a0de8409739 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -267,27 +267,33 @@ const char *kallsyms_lookup(unsigned long addr,
267 return NULL; 267 return NULL;
268} 268}
269 269
270/* Replace "%s" in format with address, or returns -errno. */ 270/* Look up a kernel symbol and return it in a text buffer. */
271void __print_symbol(const char *fmt, unsigned long address) 271int sprint_symbol(char *buffer, unsigned long address)
272{ 272{
273 char *modname; 273 char *modname;
274 const char *name; 274 const char *name;
275 unsigned long offset, size; 275 unsigned long offset, size;
276 char namebuf[KSYM_NAME_LEN+1]; 276 char namebuf[KSYM_NAME_LEN+1];
277 char buffer[sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN +
278 2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1];
279 277
280 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); 278 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
281
282 if (!name) 279 if (!name)
283 sprintf(buffer, "0x%lx", address); 280 return sprintf(buffer, "0x%lx", address);
284 else { 281 else {
285 if (modname) 282 if (modname)
286 sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, 283 return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
287 size, modname); 284 size, modname);
288 else 285 else
289 sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); 286 return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
290 } 287 }
288}
289
290/* Look up a kernel symbol and print it to the kernel messages. */
291void __print_symbol(const char *fmt, unsigned long address)
292{
293 char buffer[KSYM_SYMBOL_LEN];
294
295 sprint_symbol(buffer, address);
296
291 printk(fmt, buffer); 297 printk(fmt, buffer);
292} 298}
293 299
@@ -452,3 +458,4 @@ static int __init kallsyms_init(void)
452__initcall(kallsyms_init); 458__initcall(kallsyms_init);
453 459
454EXPORT_SYMBOL(__print_symbol); 460EXPORT_SYMBOL(__print_symbol);
461EXPORT_SYMBOL_GPL(sprint_symbol);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 6fcf8dd148d0..d25a9ada3f8e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -39,6 +39,8 @@
39#include <linux/moduleloader.h> 39#include <linux/moduleloader.h>
40#include <linux/kallsyms.h> 40#include <linux/kallsyms.h>
41#include <linux/freezer.h> 41#include <linux/freezer.h>
42#include <linux/seq_file.h>
43#include <linux/debugfs.h>
42#include <asm-generic/sections.h> 44#include <asm-generic/sections.h>
43#include <asm/cacheflush.h> 45#include <asm/cacheflush.h>
44#include <asm/errno.h> 46#include <asm/errno.h>
@@ -778,6 +780,12 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
778 return -ENOSYS; 780 return -ENOSYS;
779} 781}
780 782
783static int __kprobes pre_handler_kretprobe(struct kprobe *p,
784 struct pt_regs *regs)
785{
786 return 0;
787}
788
781#endif /* ARCH_SUPPORTS_KRETPROBES */ 789#endif /* ARCH_SUPPORTS_KRETPROBES */
782 790
783void __kprobes unregister_kretprobe(struct kretprobe *rp) 791void __kprobes unregister_kretprobe(struct kretprobe *rp)
@@ -815,7 +823,109 @@ static int __init init_kprobes(void)
815 return err; 823 return err;
816} 824}
817 825
818__initcall(init_kprobes); 826#ifdef CONFIG_DEBUG_FS
827static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
828 const char *sym, int offset,char *modname)
829{
830 char *kprobe_type;
831
832 if (p->pre_handler == pre_handler_kretprobe)
833 kprobe_type = "r";
834 else if (p->pre_handler == setjmp_pre_handler)
835 kprobe_type = "j";
836 else
837 kprobe_type = "k";
838 if (sym)
839 seq_printf(pi, "%p %s %s+0x%x %s\n", p->addr, kprobe_type,
840 sym, offset, (modname ? modname : " "));
841 else
842 seq_printf(pi, "%p %s %p\n", p->addr, kprobe_type, p->addr);
843}
844
845static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
846{
847 return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
848}
849
850static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
851{
852 (*pos)++;
853 if (*pos >= KPROBE_TABLE_SIZE)
854 return NULL;
855 return pos;
856}
857
858static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v)
859{
860 /* Nothing to do */
861}
862
863static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
864{
865 struct hlist_head *head;
866 struct hlist_node *node;
867 struct kprobe *p, *kp;
868 const char *sym = NULL;
869 unsigned int i = *(loff_t *) v;
870 unsigned long size, offset = 0;
871 char *modname, namebuf[128];
872
873 head = &kprobe_table[i];
874 preempt_disable();
875 hlist_for_each_entry_rcu(p, node, head, hlist) {
876 sym = kallsyms_lookup((unsigned long)p->addr, &size,
877 &offset, &modname, namebuf);
878 if (p->pre_handler == aggr_pre_handler) {
879 list_for_each_entry_rcu(kp, &p->list, list)
880 report_probe(pi, kp, sym, offset, modname);
881 } else
882 report_probe(pi, p, sym, offset, modname);
883 }
884 preempt_enable();
885 return 0;
886}
887
888static struct seq_operations kprobes_seq_ops = {
889 .start = kprobe_seq_start,
890 .next = kprobe_seq_next,
891 .stop = kprobe_seq_stop,
892 .show = show_kprobe_addr
893};
894
895static int __kprobes kprobes_open(struct inode *inode, struct file *filp)
896{
897 return seq_open(filp, &kprobes_seq_ops);
898}
899
900static struct file_operations debugfs_kprobes_operations = {
901 .open = kprobes_open,
902 .read = seq_read,
903 .llseek = seq_lseek,
904 .release = seq_release,
905};
906
907static int __kprobes debugfs_kprobe_init(void)
908{
909 struct dentry *dir, *file;
910
911 dir = debugfs_create_dir("kprobes", NULL);
912 if (!dir)
913 return -ENOMEM;
914
915 file = debugfs_create_file("list", 0444, dir , 0 ,
916 &debugfs_kprobes_operations);
917 if (!file) {
918 debugfs_remove(dir);
919 return -ENOMEM;
920 }
921
922 return 0;
923}
924
925late_initcall(debugfs_kprobe_init);
926#endif /* CONFIG_DEBUG_FS */
927
928module_init(init_kprobes);
819 929
820EXPORT_SYMBOL_GPL(register_kprobe); 930EXPORT_SYMBOL_GPL(register_kprobe);
821EXPORT_SYMBOL_GPL(unregister_kprobe); 931EXPORT_SYMBOL_GPL(unregister_kprobe);
@@ -824,4 +934,3 @@ EXPORT_SYMBOL_GPL(unregister_jprobe);
824EXPORT_SYMBOL_GPL(jprobe_return); 934EXPORT_SYMBOL_GPL(jprobe_return);
825EXPORT_SYMBOL_GPL(register_kretprobe); 935EXPORT_SYMBOL_GPL(register_kretprobe);
826EXPORT_SYMBOL_GPL(unregister_kretprobe); 936EXPORT_SYMBOL_GPL(unregister_kretprobe);
827
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 592c576d77a7..7065a687ac54 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2228,6 +2228,10 @@ out_calc_hash:
2228 2228
2229 curr->lockdep_depth++; 2229 curr->lockdep_depth++;
2230 check_chain_key(curr); 2230 check_chain_key(curr);
2231#ifdef CONFIG_DEBUG_LOCKDEP
2232 if (unlikely(!debug_locks))
2233 return 0;
2234#endif
2231 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { 2235 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
2232 debug_locks_off(); 2236 debug_locks_off();
2233 printk("BUG: MAX_LOCK_DEPTH too low!\n"); 2237 printk("BUG: MAX_LOCK_DEPTH too low!\n");
@@ -2598,7 +2602,7 @@ out_restore:
2598 raw_local_irq_restore(flags); 2602 raw_local_irq_restore(flags);
2599} 2603}
2600 2604
2601void __init lockdep_init(void) 2605void lockdep_init(void)
2602{ 2606{
2603 int i; 2607 int i;
2604 2608
@@ -2738,6 +2742,10 @@ void debug_show_all_locks(void)
2738 int count = 10; 2742 int count = 10;
2739 int unlock = 1; 2743 int unlock = 1;
2740 2744
2745 if (unlikely(!debug_locks)) {
2746 printk("INFO: lockdep is turned off.\n");
2747 return;
2748 }
2741 printk("\nShowing all locks held in the system:\n"); 2749 printk("\nShowing all locks held in the system:\n");
2742 2750
2743 /* 2751 /*
@@ -2781,6 +2789,10 @@ EXPORT_SYMBOL_GPL(debug_show_all_locks);
2781 2789
2782void debug_show_held_locks(struct task_struct *task) 2790void debug_show_held_locks(struct task_struct *task)
2783{ 2791{
2792 if (unlikely(!debug_locks)) {
2793 printk("INFO: lockdep is turned off.\n");
2794 return;
2795 }
2784 lockdep_print_held_locks(task); 2796 lockdep_print_held_locks(task);
2785} 2797}
2786 2798
diff --git a/kernel/module.c b/kernel/module.c
index 8a94e054230c..9da5af668a20 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1068,7 +1068,8 @@ static inline void remove_sect_attrs(struct module *mod)
1068} 1068}
1069#endif /* CONFIG_KALLSYMS */ 1069#endif /* CONFIG_KALLSYMS */
1070 1070
1071static int module_add_modinfo_attrs(struct module *mod) 1071#ifdef CONFIG_SYSFS
1072int module_add_modinfo_attrs(struct module *mod)
1072{ 1073{
1073 struct module_attribute *attr; 1074 struct module_attribute *attr;
1074 struct module_attribute *temp_attr; 1075 struct module_attribute *temp_attr;
@@ -1094,7 +1095,7 @@ static int module_add_modinfo_attrs(struct module *mod)
1094 return error; 1095 return error;
1095} 1096}
1096 1097
1097static void module_remove_modinfo_attrs(struct module *mod) 1098void module_remove_modinfo_attrs(struct module *mod)
1098{ 1099{
1099 struct module_attribute *attr; 1100 struct module_attribute *attr;
1100 int i; 1101 int i;
@@ -1109,8 +1110,10 @@ static void module_remove_modinfo_attrs(struct module *mod)
1109 } 1110 }
1110 kfree(mod->modinfo_attrs); 1111 kfree(mod->modinfo_attrs);
1111} 1112}
1113#endif
1112 1114
1113static int mod_sysfs_init(struct module *mod) 1115#ifdef CONFIG_SYSFS
1116int mod_sysfs_init(struct module *mod)
1114{ 1117{
1115 int err; 1118 int err;
1116 1119
@@ -1133,7 +1136,7 @@ out:
1133 return err; 1136 return err;
1134} 1137}
1135 1138
1136static int mod_sysfs_setup(struct module *mod, 1139int mod_sysfs_setup(struct module *mod,
1137 struct kernel_param *kparam, 1140 struct kernel_param *kparam,
1138 unsigned int num_params) 1141 unsigned int num_params)
1139{ 1142{
@@ -1145,8 +1148,10 @@ static int mod_sysfs_setup(struct module *mod,
1145 goto out; 1148 goto out;
1146 1149
1147 mod->holders_dir = kobject_add_dir(&mod->mkobj.kobj, "holders"); 1150 mod->holders_dir = kobject_add_dir(&mod->mkobj.kobj, "holders");
1148 if (!mod->holders_dir) 1151 if (!mod->holders_dir) {
1152 err = -ENOMEM;
1149 goto out_unreg; 1153 goto out_unreg;
1154 }
1150 1155
1151 err = module_param_sysfs_setup(mod, kparam, num_params); 1156 err = module_param_sysfs_setup(mod, kparam, num_params);
1152 if (err) 1157 if (err)
@@ -1169,16 +1174,14 @@ out_unreg:
1169out: 1174out:
1170 return err; 1175 return err;
1171} 1176}
1177#endif
1172 1178
1173static void mod_kobject_remove(struct module *mod) 1179static void mod_kobject_remove(struct module *mod)
1174{ 1180{
1175 module_remove_modinfo_attrs(mod); 1181 module_remove_modinfo_attrs(mod);
1176 module_param_sysfs_remove(mod); 1182 module_param_sysfs_remove(mod);
1177 if (mod->mkobj.drivers_dir) 1183 kobject_unregister(mod->mkobj.drivers_dir);
1178 kobject_unregister(mod->mkobj.drivers_dir); 1184 kobject_unregister(mod->holders_dir);
1179 if (mod->holders_dir)
1180 kobject_unregister(mod->holders_dir);
1181
1182 kobject_unregister(&mod->mkobj.kobj); 1185 kobject_unregister(&mod->mkobj.kobj);
1183} 1186}
1184 1187
@@ -2345,6 +2348,7 @@ void print_modules(void)
2345 printk("\n"); 2348 printk("\n");
2346} 2349}
2347 2350
2351#ifdef CONFIG_SYSFS
2348static char *make_driver_name(struct device_driver *drv) 2352static char *make_driver_name(struct device_driver *drv)
2349{ 2353{
2350 char *driver_name; 2354 char *driver_name;
@@ -2382,8 +2386,13 @@ void module_add_driver(struct module *mod, struct device_driver *drv)
2382 2386
2383 /* Lookup built-in module entry in /sys/modules */ 2387 /* Lookup built-in module entry in /sys/modules */
2384 mkobj = kset_find_obj(&module_subsys.kset, drv->mod_name); 2388 mkobj = kset_find_obj(&module_subsys.kset, drv->mod_name);
2385 if (mkobj) 2389 if (mkobj) {
2386 mk = container_of(mkobj, struct module_kobject, kobj); 2390 mk = container_of(mkobj, struct module_kobject, kobj);
2391 /* remember our module structure */
2392 drv->mkobj = mk;
2393 /* kset_find_obj took a reference */
2394 kobject_put(mkobj);
2395 }
2387 } 2396 }
2388 2397
2389 if (!mk) 2398 if (!mk)
@@ -2403,22 +2412,28 @@ EXPORT_SYMBOL(module_add_driver);
2403 2412
2404void module_remove_driver(struct device_driver *drv) 2413void module_remove_driver(struct device_driver *drv)
2405{ 2414{
2415 struct module_kobject *mk = NULL;
2406 char *driver_name; 2416 char *driver_name;
2407 2417
2408 if (!drv) 2418 if (!drv)
2409 return; 2419 return;
2410 2420
2411 sysfs_remove_link(&drv->kobj, "module"); 2421 sysfs_remove_link(&drv->kobj, "module");
2412 if (drv->owner && drv->owner->mkobj.drivers_dir) { 2422
2423 if (drv->owner)
2424 mk = &drv->owner->mkobj;
2425 else if (drv->mkobj)
2426 mk = drv->mkobj;
2427 if (mk && mk->drivers_dir) {
2413 driver_name = make_driver_name(drv); 2428 driver_name = make_driver_name(drv);
2414 if (driver_name) { 2429 if (driver_name) {
2415 sysfs_remove_link(drv->owner->mkobj.drivers_dir, 2430 sysfs_remove_link(mk->drivers_dir, driver_name);
2416 driver_name);
2417 kfree(driver_name); 2431 kfree(driver_name);
2418 } 2432 }
2419 } 2433 }
2420} 2434}
2421EXPORT_SYMBOL(module_remove_driver); 2435EXPORT_SYMBOL(module_remove_driver);
2436#endif
2422 2437
2423#ifdef CONFIG_MODVERSIONS 2438#ifdef CONFIG_MODVERSIONS
2424/* Generate the signature for struct module here, too, for modversions. */ 2439/* Generate the signature for struct module here, too, for modversions. */
diff --git a/kernel/params.c b/kernel/params.c
index 553cf7d6a4be..1fc4ac746cd8 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -30,8 +30,6 @@
30#define DEBUGP(fmt, a...) 30#define DEBUGP(fmt, a...)
31#endif 31#endif
32 32
33static struct kobj_type module_ktype;
34
35static inline char dash2underscore(char c) 33static inline char dash2underscore(char c)
36{ 34{
37 if (c == '-') 35 if (c == '-')
@@ -358,6 +356,10 @@ int param_set_copystring(const char *val, struct kernel_param *kp)
358{ 356{
359 struct kparam_string *kps = kp->arg; 357 struct kparam_string *kps = kp->arg;
360 358
359 if (!val) {
360 printk(KERN_ERR "%s: missing param set value\n", kp->name);
361 return -EINVAL;
362 }
361 if (strlen(val)+1 > kps->maxlen) { 363 if (strlen(val)+1 > kps->maxlen) {
362 printk(KERN_ERR "%s: string doesn't fit in %u chars.\n", 364 printk(KERN_ERR "%s: string doesn't fit in %u chars.\n",
363 kp->name, kps->maxlen-1); 365 kp->name, kps->maxlen-1);
@@ -391,6 +393,7 @@ struct module_param_attrs
391 struct param_attribute attrs[0]; 393 struct param_attribute attrs[0];
392}; 394};
393 395
396#ifdef CONFIG_SYSFS
394#define to_param_attr(n) container_of(n, struct param_attribute, mattr); 397#define to_param_attr(n) container_of(n, struct param_attribute, mattr);
395 398
396static ssize_t param_attr_show(struct module_attribute *mattr, 399static ssize_t param_attr_show(struct module_attribute *mattr,
@@ -426,6 +429,7 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
426 return len; 429 return len;
427 return err; 430 return err;
428} 431}
432#endif
429 433
430#ifdef CONFIG_MODULES 434#ifdef CONFIG_MODULES
431#define __modinit 435#define __modinit
@@ -433,6 +437,7 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
433#define __modinit __init 437#define __modinit __init
434#endif 438#endif
435 439
440#ifdef CONFIG_SYSFS
436/* 441/*
437 * param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME 442 * param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME
438 * @mk: struct module_kobject (contains parent kobject) 443 * @mk: struct module_kobject (contains parent kobject)
@@ -500,9 +505,7 @@ param_sysfs_setup(struct module_kobject *mk,
500 return mp; 505 return mp;
501} 506}
502 507
503
504#ifdef CONFIG_MODULES 508#ifdef CONFIG_MODULES
505
506/* 509/*
507 * module_param_sysfs_setup - setup sysfs support for one module 510 * module_param_sysfs_setup - setup sysfs support for one module
508 * @mod: module 511 * @mod: module
@@ -625,7 +628,6 @@ static void __init param_sysfs_builtin(void)
625 628
626 629
627/* module-related sysfs stuff */ 630/* module-related sysfs stuff */
628#ifdef CONFIG_SYSFS
629 631
630#define to_module_attr(n) container_of(n, struct module_attribute, attr); 632#define to_module_attr(n) container_of(n, struct module_attribute, attr);
631#define to_module_kobject(n) container_of(n, struct module_kobject, kobj); 633#define to_module_kobject(n) container_of(n, struct module_kobject, kobj);
@@ -673,6 +675,8 @@ static struct sysfs_ops module_sysfs_ops = {
673 .store = module_attr_store, 675 .store = module_attr_store,
674}; 676};
675 677
678static struct kobj_type module_ktype;
679
676static int uevent_filter(struct kset *kset, struct kobject *kobj) 680static int uevent_filter(struct kset *kset, struct kobject *kobj)
677{ 681{
678 struct kobj_type *ktype = get_ktype(kobj); 682 struct kobj_type *ktype = get_ktype(kobj);
@@ -686,19 +690,12 @@ static struct kset_uevent_ops module_uevent_ops = {
686 .filter = uevent_filter, 690 .filter = uevent_filter,
687}; 691};
688 692
689#else 693decl_subsys(module, &module_ktype, &module_uevent_ops);
690static struct sysfs_ops module_sysfs_ops = {
691 .show = NULL,
692 .store = NULL,
693};
694#endif
695 694
696static struct kobj_type module_ktype = { 695static struct kobj_type module_ktype = {
697 .sysfs_ops = &module_sysfs_ops, 696 .sysfs_ops = &module_sysfs_ops,
698}; 697};
699 698
700decl_subsys(module, &module_ktype, &module_uevent_ops);
701
702/* 699/*
703 * param_sysfs_init - wrapper for built-in params support 700 * param_sysfs_init - wrapper for built-in params support
704 */ 701 */
@@ -719,6 +716,15 @@ static int __init param_sysfs_init(void)
719} 716}
720subsys_initcall(param_sysfs_init); 717subsys_initcall(param_sysfs_init);
721 718
719#else
720#if 0
721static struct sysfs_ops module_sysfs_ops = {
722 .show = NULL,
723 .store = NULL,
724};
725#endif
726#endif
727
722EXPORT_SYMBOL(param_set_byte); 728EXPORT_SYMBOL(param_set_byte);
723EXPORT_SYMBOL(param_get_byte); 729EXPORT_SYMBOL(param_get_byte);
724EXPORT_SYMBOL(param_set_short); 730EXPORT_SYMBOL(param_set_short);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 95f6657fff73..51a4dd0f1b74 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -81,29 +81,34 @@ config SOFTWARE_SUSPEND
81 bool "Software Suspend" 81 bool "Software Suspend"
82 depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) 82 depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP))
83 ---help--- 83 ---help---
84 Enable the possibility of suspending the machine. 84 Enable the suspend to disk (STD) functionality.
85 It doesn't need ACPI or APM.
86 You may suspend your machine by 'swsusp' or 'shutdown -z <time>'
87 (patch for sysvinit needed).
88 85
89 It creates an image which is saved in your active swap. Upon next 86 You can suspend your machine with 'echo disk > /sys/power/state'.
87 Alternatively, you can use the additional userland tools available
88 from <http://suspend.sf.net>.
89
90 In principle it does not require ACPI or APM, although for example
91 ACPI will be used if available.
92
93 It creates an image which is saved in your active swap. Upon the next
90 boot, pass the 'resume=/dev/swappartition' argument to the kernel to 94 boot, pass the 'resume=/dev/swappartition' argument to the kernel to
91 have it detect the saved image, restore memory state from it, and 95 have it detect the saved image, restore memory state from it, and
92 continue to run as before. If you do not want the previous state to 96 continue to run as before. If you do not want the previous state to
93 be reloaded, then use the 'noresume' kernel argument. However, note 97 be reloaded, then use the 'noresume' kernel command line argument.
94 that your partitions will be fsck'd and you must re-mkswap your swap 98 Note, however, that fsck will be run on your filesystems and you will
95 partitions. It does not work with swap files. 99 need to run mkswap against the swap partition used for the suspend.
96 100
97 Right now you may boot without resuming and then later resume but 101 It also works with swap files to a limited extent (for details see
98 in meantime you cannot use those swap partitions/files which were 102 <file:Documentation/power/swsusp-and-swap-files.txt>).
99 involved in suspending. Also in this case there is a risk that buffers
100 on disk won't match with saved ones.
101 103
102 For more information take a look at <file:Documentation/power/swsusp.txt>. 104 Right now you may boot without resuming and resume later but in the
105 meantime you cannot use the swap partition(s)/file(s) involved in
106 suspending. Also in this case you must not use the filesystems
107 that were mounted before the suspend. In particular, you MUST NOT
108 MOUNT any journaled filesystems mounted before the suspend or they
109 will get corrupted in a nasty way.
103 110
104 (For now, swsusp is incompatible with PAE aka HIGHMEM_64G on i386. 111 For more information take a look at <file:Documentation/power/swsusp.txt>.
105 we need identity mapping for resume to work, and that is trivial
106 to get with 4MB pages, but less than trivial on PAE).
107 112
108config PM_STD_PARTITION 113config PM_STD_PARTITION
109 string "Default resume partition" 114 string "Default resume partition"
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 623786d44159..89bcf4973ee5 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -27,7 +27,15 @@ int pm_prepare_console(void)
27 return 1; 27 return 1;
28 } 28 }
29 29
30 set_console(SUSPEND_CONSOLE); 30 if (set_console(SUSPEND_CONSOLE)) {
31 /*
32 * We're unable to switch to the SUSPEND_CONSOLE.
33 * Let the calling function know so it can decide
34 * what to do.
35 */
36 release_console_sem();
37 return 1;
38 }
31 release_console_sem(); 39 release_console_sem();
32 40
33 if (vt_waitactive(SUSPEND_CONSOLE)) { 41 if (vt_waitactive(SUSPEND_CONSOLE)) {
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 406b20adb27a..02e4fb69111a 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -39,7 +39,13 @@ static inline int platform_prepare(void)
39{ 39{
40 int error = 0; 40 int error = 0;
41 41
42 if (pm_disk_mode == PM_DISK_PLATFORM) { 42 switch (pm_disk_mode) {
43 case PM_DISK_TEST:
44 case PM_DISK_TESTPROC:
45 case PM_DISK_SHUTDOWN:
46 case PM_DISK_REBOOT:
47 break;
48 default:
43 if (pm_ops && pm_ops->prepare) 49 if (pm_ops && pm_ops->prepare)
44 error = pm_ops->prepare(PM_SUSPEND_DISK); 50 error = pm_ops->prepare(PM_SUSPEND_DISK);
45 } 51 }
@@ -48,40 +54,48 @@ static inline int platform_prepare(void)
48 54
49/** 55/**
50 * power_down - Shut machine down for hibernate. 56 * power_down - Shut machine down for hibernate.
51 * @mode: Suspend-to-disk mode
52 * 57 *
53 * Use the platform driver, if configured so, and return gracefully if it 58 * Use the platform driver, if configured so; otherwise try
54 * fails. 59 * to power off or reboot.
55 * Otherwise, try to power off and reboot. If they fail, halt the machine,
56 * there ain't no turning back.
57 */ 60 */
58 61
59static void power_down(suspend_disk_method_t mode) 62static void power_down(void)
60{ 63{
61 switch(mode) { 64 switch (pm_disk_mode) {
62 case PM_DISK_PLATFORM: 65 case PM_DISK_TEST:
63 if (pm_ops && pm_ops->enter) { 66 case PM_DISK_TESTPROC:
64 kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); 67 break;
65 pm_ops->enter(PM_SUSPEND_DISK);
66 break;
67 }
68 case PM_DISK_SHUTDOWN: 68 case PM_DISK_SHUTDOWN:
69 kernel_power_off(); 69 kernel_power_off();
70 break; 70 break;
71 case PM_DISK_REBOOT: 71 case PM_DISK_REBOOT:
72 kernel_restart(NULL); 72 kernel_restart(NULL);
73 break; 73 break;
74 default:
75 if (pm_ops && pm_ops->enter) {
76 kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
77 pm_ops->enter(PM_SUSPEND_DISK);
78 break;
79 }
74 } 80 }
75 kernel_halt(); 81 kernel_halt();
76 /* Valid image is on the disk, if we continue we risk serious data corruption 82 /*
77 after resume. */ 83 * Valid image is on the disk, if we continue we risk serious data
84 * corruption after resume.
85 */
78 printk(KERN_CRIT "Please power me down manually\n"); 86 printk(KERN_CRIT "Please power me down manually\n");
79 while(1); 87 while(1);
80} 88}
81 89
82static inline void platform_finish(void) 90static inline void platform_finish(void)
83{ 91{
84 if (pm_disk_mode == PM_DISK_PLATFORM) { 92 switch (pm_disk_mode) {
93 case PM_DISK_TEST:
94 case PM_DISK_TESTPROC:
95 case PM_DISK_SHUTDOWN:
96 case PM_DISK_REBOOT:
97 break;
98 default:
85 if (pm_ops && pm_ops->finish) 99 if (pm_ops && pm_ops->finish)
86 pm_ops->finish(PM_SUSPEND_DISK); 100 pm_ops->finish(PM_SUSPEND_DISK);
87 } 101 }
@@ -108,8 +122,6 @@ static int prepare_processes(void)
108/** 122/**
109 * pm_suspend_disk - The granpappy of hibernation power management. 123 * pm_suspend_disk - The granpappy of hibernation power management.
110 * 124 *
111 * If we're going through the firmware, then get it over with quickly.
112 *
113 * If not, then call swsusp to do its thing, then figure out how 125 * If not, then call swsusp to do its thing, then figure out how
114 * to power down the system. 126 * to power down the system.
115 */ 127 */
@@ -166,7 +178,7 @@ int pm_suspend_disk(void)
166 pr_debug("PM: writing image.\n"); 178 pr_debug("PM: writing image.\n");
167 error = swsusp_write(); 179 error = swsusp_write();
168 if (!error) 180 if (!error)
169 power_down(pm_disk_mode); 181 power_down();
170 else { 182 else {
171 swsusp_free(); 183 swsusp_free();
172 goto Thaw; 184 goto Thaw;
@@ -240,12 +252,6 @@ static int software_resume(void)
240 goto Done; 252 goto Done;
241 } 253 }
242 254
243 error = platform_prepare();
244 if (error) {
245 swsusp_free();
246 goto Thaw;
247 }
248
249 pr_debug("PM: Reading swsusp image.\n"); 255 pr_debug("PM: Reading swsusp image.\n");
250 256
251 error = swsusp_read(); 257 error = swsusp_read();
@@ -268,7 +274,6 @@ static int software_resume(void)
268 enable_nonboot_cpus(); 274 enable_nonboot_cpus();
269 Free: 275 Free:
270 swsusp_free(); 276 swsusp_free();
271 platform_finish();
272 device_resume(); 277 device_resume();
273 resume_console(); 278 resume_console();
274 Thaw: 279 Thaw:
@@ -285,7 +290,6 @@ late_initcall(software_resume);
285 290
286 291
287static const char * const pm_disk_modes[] = { 292static const char * const pm_disk_modes[] = {
288 [PM_DISK_FIRMWARE] = "firmware",
289 [PM_DISK_PLATFORM] = "platform", 293 [PM_DISK_PLATFORM] = "platform",
290 [PM_DISK_SHUTDOWN] = "shutdown", 294 [PM_DISK_SHUTDOWN] = "shutdown",
291 [PM_DISK_REBOOT] = "reboot", 295 [PM_DISK_REBOOT] = "reboot",
@@ -296,27 +300,25 @@ static const char * const pm_disk_modes[] = {
296/** 300/**
297 * disk - Control suspend-to-disk mode 301 * disk - Control suspend-to-disk mode
298 * 302 *
299 * Suspend-to-disk can be handled in several ways. The greatest 303 * Suspend-to-disk can be handled in several ways. We have a few options
300 * distinction is who writes memory to disk - the firmware or the OS. 304 * for putting the system to sleep - using the platform driver (e.g. ACPI
301 * If the firmware does it, we assume that it also handles suspending 305 * or other pm_ops), powering off the system or rebooting the system
302 * the system. 306 * (for testing) as well as the two test modes.
303 * If the OS does it, then we have three options for putting the system
304 * to sleep - using the platform driver (e.g. ACPI or other PM registers),
305 * powering off the system or rebooting the system (for testing).
306 * 307 *
307 * The system will support either 'firmware' or 'platform', and that is 308 * The system can support 'platform', and that is known a priori (and
308 * known a priori (and encoded in pm_ops). But, the user may choose 309 * encoded in pm_ops). However, the user may choose 'shutdown' or 'reboot'
309 * 'shutdown' or 'reboot' as alternatives. 310 * as alternatives, as well as the test modes 'test' and 'testproc'.
310 * 311 *
311 * show() will display what the mode is currently set to. 312 * show() will display what the mode is currently set to.
312 * store() will accept one of 313 * store() will accept one of
313 * 314 *
314 * 'firmware'
315 * 'platform' 315 * 'platform'
316 * 'shutdown' 316 * 'shutdown'
317 * 'reboot' 317 * 'reboot'
318 * 'test'
319 * 'testproc'
318 * 320 *
319 * It will only change to 'firmware' or 'platform' if the system 321 * It will only change to 'platform' if the system
320 * supports it (as determined from pm_ops->pm_disk_mode). 322 * supports it (as determined from pm_ops->pm_disk_mode).
321 */ 323 */
322 324
@@ -338,17 +340,21 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
338 len = p ? p - buf : n; 340 len = p ? p - buf : n;
339 341
340 mutex_lock(&pm_mutex); 342 mutex_lock(&pm_mutex);
341 for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { 343 for (i = PM_DISK_PLATFORM; i < PM_DISK_MAX; i++) {
342 if (!strncmp(buf, pm_disk_modes[i], len)) { 344 if (!strncmp(buf, pm_disk_modes[i], len)) {
343 mode = i; 345 mode = i;
344 break; 346 break;
345 } 347 }
346 } 348 }
347 if (mode) { 349 if (mode) {
348 if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT || 350 switch (mode) {
349 mode == PM_DISK_TEST || mode == PM_DISK_TESTPROC) { 351 case PM_DISK_SHUTDOWN:
352 case PM_DISK_REBOOT:
353 case PM_DISK_TEST:
354 case PM_DISK_TESTPROC:
350 pm_disk_mode = mode; 355 pm_disk_mode = mode;
351 } else { 356 break;
357 default:
352 if (pm_ops && pm_ops->enter && 358 if (pm_ops && pm_ops->enter &&
353 (mode == pm_ops->pm_disk_mode)) 359 (mode == pm_ops->pm_disk_mode))
354 pm_disk_mode = mode; 360 pm_disk_mode = mode;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index e1c413120469..72419a3b1beb 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -30,7 +30,7 @@
30DEFINE_MUTEX(pm_mutex); 30DEFINE_MUTEX(pm_mutex);
31 31
32struct pm_ops *pm_ops; 32struct pm_ops *pm_ops;
33suspend_disk_method_t pm_disk_mode = PM_DISK_PLATFORM; 33suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN;
34 34
35/** 35/**
36 * pm_set_ops - Set the global power method table. 36 * pm_set_ops - Set the global power method table.
@@ -41,9 +41,26 @@ void pm_set_ops(struct pm_ops * ops)
41{ 41{
42 mutex_lock(&pm_mutex); 42 mutex_lock(&pm_mutex);
43 pm_ops = ops; 43 pm_ops = ops;
44 if (ops && ops->pm_disk_mode != PM_DISK_INVALID) {
45 pm_disk_mode = ops->pm_disk_mode;
46 } else
47 pm_disk_mode = PM_DISK_SHUTDOWN;
44 mutex_unlock(&pm_mutex); 48 mutex_unlock(&pm_mutex);
45} 49}
46 50
51/**
52 * pm_valid_only_mem - generic memory-only valid callback
53 *
54 * pm_ops drivers that implement mem suspend only and only need
55 * to check for that in their .valid callback can use this instead
56 * of rolling their own .valid callback.
57 */
58int pm_valid_only_mem(suspend_state_t state)
59{
60 return state == PM_SUSPEND_MEM;
61}
62
63
47static inline void pm_finish(suspend_state_t state) 64static inline void pm_finish(suspend_state_t state)
48{ 65{
49 if (pm_ops->finish) 66 if (pm_ops->finish)
@@ -111,13 +128,24 @@ static int suspend_prepare(suspend_state_t state)
111 return error; 128 return error;
112} 129}
113 130
131/* default implementation */
132void __attribute__ ((weak)) arch_suspend_disable_irqs(void)
133{
134 local_irq_disable();
135}
136
137/* default implementation */
138void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
139{
140 local_irq_enable();
141}
114 142
115int suspend_enter(suspend_state_t state) 143int suspend_enter(suspend_state_t state)
116{ 144{
117 int error = 0; 145 int error = 0;
118 unsigned long flags;
119 146
120 local_irq_save(flags); 147 arch_suspend_disable_irqs();
148 BUG_ON(!irqs_disabled());
121 149
122 if ((error = device_power_down(PMSG_SUSPEND))) { 150 if ((error = device_power_down(PMSG_SUSPEND))) {
123 printk(KERN_ERR "Some devices failed to power down\n"); 151 printk(KERN_ERR "Some devices failed to power down\n");
@@ -126,7 +154,8 @@ int suspend_enter(suspend_state_t state)
126 error = pm_ops->enter(state); 154 error = pm_ops->enter(state);
127 device_power_up(); 155 device_power_up();
128 Done: 156 Done:
129 local_irq_restore(flags); 157 arch_suspend_enable_irqs();
158 BUG_ON(irqs_disabled());
130 return error; 159 return error;
131} 160}
132 161
@@ -167,7 +196,10 @@ static inline int valid_state(suspend_state_t state)
167 if (state == PM_SUSPEND_DISK) 196 if (state == PM_SUSPEND_DISK)
168 return 1; 197 return 1;
169 198
170 if (pm_ops && pm_ops->valid && !pm_ops->valid(state)) 199 /* all other states need lowlevel support and need to be
200 * valid to the lowlevel implementation, no valid callback
201 * implies that none are valid. */
202 if (!pm_ops || !pm_ops->valid || !pm_ops->valid(state))
171 return 0; 203 return 0;
172 return 1; 204 return 1;
173} 205}
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 7fb834397a0d..175370824f37 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -229,13 +229,13 @@ int swsusp_shrink_memory(void)
229 size += highmem_size; 229 size += highmem_size;
230 for_each_zone (zone) 230 for_each_zone (zone)
231 if (populated_zone(zone)) { 231 if (populated_zone(zone)) {
232 tmp += snapshot_additional_pages(zone);
232 if (is_highmem(zone)) { 233 if (is_highmem(zone)) {
233 highmem_size -= 234 highmem_size -=
234 zone_page_state(zone, NR_FREE_PAGES); 235 zone_page_state(zone, NR_FREE_PAGES);
235 } else { 236 } else {
236 tmp -= zone_page_state(zone, NR_FREE_PAGES); 237 tmp -= zone_page_state(zone, NR_FREE_PAGES);
237 tmp += zone->lowmem_reserve[ZONE_NORMAL]; 238 tmp += zone->lowmem_reserve[ZONE_NORMAL];
238 tmp += snapshot_additional_pages(zone);
239 } 239 }
240 } 240 }
241 241
diff --git a/kernel/power/user.c b/kernel/power/user.c
index dd09efe7df54..7cf6713b2325 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -368,9 +368,12 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
368 if (error) { 368 if (error) {
369 printk(KERN_ERR "Failed to suspend some devices.\n"); 369 printk(KERN_ERR "Failed to suspend some devices.\n");
370 } else { 370 } else {
371 /* Enter S3, system is already frozen */ 371 error = disable_nonboot_cpus();
372 suspend_enter(PM_SUSPEND_MEM); 372 if (!error) {
373 373 /* Enter S3, system is already frozen */
374 suspend_enter(PM_SUSPEND_MEM);
375 enable_nonboot_cpus();
376 }
374 /* Wake up devices */ 377 /* Wake up devices */
375 device_resume(); 378 device_resume();
376 } 379 }
diff --git a/kernel/printk.c b/kernel/printk.c
index 0c151877ff71..4b47e59248df 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -54,7 +54,7 @@ int console_printk[4] = {
54}; 54};
55 55
56/* 56/*
57 * Low lever drivers may need that to know if they can schedule in 57 * Low level drivers may need that to know if they can schedule in
58 * their unblank() callback or not. So let's export it. 58 * their unblank() callback or not. So let's export it.
59 */ 59 */
60int oops_in_progress; 60int oops_in_progress;
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 482b11ff65cb..bcd14e83ef39 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -60,19 +60,19 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
60static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/ 60static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
61static char *torture_type = "rcu"; /* What RCU implementation to torture. */ 61static char *torture_type = "rcu"; /* What RCU implementation to torture. */
62 62
63module_param(nreaders, int, 0); 63module_param(nreaders, int, 0444);
64MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); 64MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
65module_param(nfakewriters, int, 0); 65module_param(nfakewriters, int, 0444);
66MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); 66MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads");
67module_param(stat_interval, int, 0); 67module_param(stat_interval, int, 0444);
68MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); 68MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
69module_param(verbose, bool, 0); 69module_param(verbose, bool, 0444);
70MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); 70MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
71module_param(test_no_idle_hz, bool, 0); 71module_param(test_no_idle_hz, bool, 0444);
72MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); 72MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
73module_param(shuffle_interval, int, 0); 73module_param(shuffle_interval, int, 0444);
74MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); 74MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
75module_param(torture_type, charp, 0); 75module_param(torture_type, charp, 0444);
76MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); 76MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
77 77
78#define TORTURE_FLAG "-torture:" 78#define TORTURE_FLAG "-torture:"
diff --git a/kernel/relay.c b/kernel/relay.c
index ef8a935710a2..577f251c7e28 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -474,13 +474,12 @@ static void setup_callbacks(struct rchan *chan,
474} 474}
475 475
476/** 476/**
477 *
478 * relay_hotcpu_callback - CPU hotplug callback 477 * relay_hotcpu_callback - CPU hotplug callback
479 * @nb: notifier block 478 * @nb: notifier block
480 * @action: hotplug action to take 479 * @action: hotplug action to take
481 * @hcpu: CPU number 480 * @hcpu: CPU number
482 * 481 *
483 * Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD) 482 * Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD)
484 */ 483 */
485static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, 484static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb,
486 unsigned long action, 485 unsigned long action,
diff --git a/kernel/resource.c b/kernel/resource.c
index bdb55a33f969..9bd14fd3e6de 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -213,27 +213,6 @@ int request_resource(struct resource *root, struct resource *new)
213EXPORT_SYMBOL(request_resource); 213EXPORT_SYMBOL(request_resource);
214 214
215/** 215/**
216 * ____request_resource - reserve a resource, with resource conflict returned
217 * @root: root resource descriptor
218 * @new: resource descriptor desired by caller
219 *
220 * Returns:
221 * On success, NULL is returned.
222 * On error, a pointer to the conflicting resource is returned.
223 */
224struct resource *____request_resource(struct resource *root, struct resource *new)
225{
226 struct resource *conflict;
227
228 write_lock(&resource_lock);
229 conflict = __request_resource(root, new);
230 write_unlock(&resource_lock);
231 return conflict;
232}
233
234EXPORT_SYMBOL(____request_resource);
235
236/**
237 * release_resource - release a previously reserved resource 216 * release_resource - release a previously reserved resource
238 * @old: resource pointer 217 * @old: resource pointer
239 */ 218 */
diff --git a/kernel/sched.c b/kernel/sched.c
index 0dc757246d89..960d7c5fca39 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3006,23 +3006,6 @@ static inline void idle_balance(int cpu, struct rq *rq)
3006} 3006}
3007#endif 3007#endif
3008 3008
3009static inline void wake_priority_sleeper(struct rq *rq)
3010{
3011#ifdef CONFIG_SCHED_SMT
3012 if (!rq->nr_running)
3013 return;
3014
3015 spin_lock(&rq->lock);
3016 /*
3017 * If an SMT sibling task has been put to sleep for priority
3018 * reasons reschedule the idle task to see if it can now run.
3019 */
3020 if (rq->nr_running)
3021 resched_task(rq->idle);
3022 spin_unlock(&rq->lock);
3023#endif
3024}
3025
3026DEFINE_PER_CPU(struct kernel_stat, kstat); 3009DEFINE_PER_CPU(struct kernel_stat, kstat);
3027 3010
3028EXPORT_PER_CPU_SYMBOL(kstat); 3011EXPORT_PER_CPU_SYMBOL(kstat);
@@ -3239,10 +3222,7 @@ void scheduler_tick(void)
3239 3222
3240 update_cpu_clock(p, rq, now); 3223 update_cpu_clock(p, rq, now);
3241 3224
3242 if (p == rq->idle) 3225 if (p != rq->idle)
3243 /* Task on the idle queue */
3244 wake_priority_sleeper(rq);
3245 else
3246 task_running_tick(rq, p); 3226 task_running_tick(rq, p);
3247#ifdef CONFIG_SMP 3227#ifdef CONFIG_SMP
3248 update_load(rq); 3228 update_load(rq);
@@ -3251,136 +3231,6 @@ void scheduler_tick(void)
3251#endif 3231#endif
3252} 3232}
3253 3233
3254#ifdef CONFIG_SCHED_SMT
3255static inline void wakeup_busy_runqueue(struct rq *rq)
3256{
3257 /* If an SMT runqueue is sleeping due to priority reasons wake it up */
3258 if (rq->curr == rq->idle && rq->nr_running)
3259 resched_task(rq->idle);
3260}
3261
3262/*
3263 * Called with interrupt disabled and this_rq's runqueue locked.
3264 */
3265static void wake_sleeping_dependent(int this_cpu)
3266{
3267 struct sched_domain *tmp, *sd = NULL;
3268 int i;
3269
3270 for_each_domain(this_cpu, tmp) {
3271 if (tmp->flags & SD_SHARE_CPUPOWER) {
3272 sd = tmp;
3273 break;
3274 }
3275 }
3276
3277 if (!sd)
3278 return;
3279
3280 for_each_cpu_mask(i, sd->span) {
3281 struct rq *smt_rq = cpu_rq(i);
3282
3283 if (i == this_cpu)
3284 continue;
3285 if (unlikely(!spin_trylock(&smt_rq->lock)))
3286 continue;
3287
3288 wakeup_busy_runqueue(smt_rq);
3289 spin_unlock(&smt_rq->lock);
3290 }
3291}
3292
3293/*
3294 * number of 'lost' timeslices this task wont be able to fully
3295 * utilize, if another task runs on a sibling. This models the
3296 * slowdown effect of other tasks running on siblings:
3297 */
3298static inline unsigned long
3299smt_slice(struct task_struct *p, struct sched_domain *sd)
3300{
3301 return p->time_slice * (100 - sd->per_cpu_gain) / 100;
3302}
3303
3304/*
3305 * To minimise lock contention and not have to drop this_rq's runlock we only
3306 * trylock the sibling runqueues and bypass those runqueues if we fail to
3307 * acquire their lock. As we only trylock the normal locking order does not
3308 * need to be obeyed.
3309 */
3310static int
3311dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
3312{
3313 struct sched_domain *tmp, *sd = NULL;
3314 int ret = 0, i;
3315
3316 /* kernel/rt threads do not participate in dependent sleeping */
3317 if (!p->mm || rt_task(p))
3318 return 0;
3319
3320 for_each_domain(this_cpu, tmp) {
3321 if (tmp->flags & SD_SHARE_CPUPOWER) {
3322 sd = tmp;
3323 break;
3324 }
3325 }
3326
3327 if (!sd)
3328 return 0;
3329
3330 for_each_cpu_mask(i, sd->span) {
3331 struct task_struct *smt_curr;
3332 struct rq *smt_rq;
3333
3334 if (i == this_cpu)
3335 continue;
3336
3337 smt_rq = cpu_rq(i);
3338 if (unlikely(!spin_trylock(&smt_rq->lock)))
3339 continue;
3340
3341 smt_curr = smt_rq->curr;
3342
3343 if (!smt_curr->mm)
3344 goto unlock;
3345
3346 /*
3347 * If a user task with lower static priority than the
3348 * running task on the SMT sibling is trying to schedule,
3349 * delay it till there is proportionately less timeslice
3350 * left of the sibling task to prevent a lower priority
3351 * task from using an unfair proportion of the
3352 * physical cpu's resources. -ck
3353 */
3354 if (rt_task(smt_curr)) {
3355 /*
3356 * With real time tasks we run non-rt tasks only
3357 * per_cpu_gain% of the time.
3358 */
3359 if ((jiffies % DEF_TIMESLICE) >
3360 (sd->per_cpu_gain * DEF_TIMESLICE / 100))
3361 ret = 1;
3362 } else {
3363 if (smt_curr->static_prio < p->static_prio &&
3364 !TASK_PREEMPTS_CURR(p, smt_rq) &&
3365 smt_slice(smt_curr, sd) > task_timeslice(p))
3366 ret = 1;
3367 }
3368unlock:
3369 spin_unlock(&smt_rq->lock);
3370 }
3371 return ret;
3372}
3373#else
3374static inline void wake_sleeping_dependent(int this_cpu)
3375{
3376}
3377static inline int
3378dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
3379{
3380 return 0;
3381}
3382#endif
3383
3384#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) 3234#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
3385 3235
3386void fastcall add_preempt_count(int val) 3236void fastcall add_preempt_count(int val)
@@ -3507,7 +3357,6 @@ need_resched_nonpreemptible:
3507 if (!rq->nr_running) { 3357 if (!rq->nr_running) {
3508 next = rq->idle; 3358 next = rq->idle;
3509 rq->expired_timestamp = 0; 3359 rq->expired_timestamp = 0;
3510 wake_sleeping_dependent(cpu);
3511 goto switch_tasks; 3360 goto switch_tasks;
3512 } 3361 }
3513 } 3362 }
@@ -3547,8 +3396,6 @@ need_resched_nonpreemptible:
3547 } 3396 }
3548 } 3397 }
3549 next->sleep_type = SLEEP_NORMAL; 3398 next->sleep_type = SLEEP_NORMAL;
3550 if (dependent_sleeper(cpu, rq, next))
3551 next = rq->idle;
3552switch_tasks: 3399switch_tasks:
3553 if (next == rq->idle) 3400 if (next == rq->idle)
3554 schedstat_inc(rq, sched_goidle); 3401 schedstat_inc(rq, sched_goidle);
@@ -3566,7 +3413,7 @@ switch_tasks:
3566 3413
3567 sched_info_switch(prev, next); 3414 sched_info_switch(prev, next);
3568 if (likely(prev != next)) { 3415 if (likely(prev != next)) {
3569 next->timestamp = now; 3416 next->timestamp = next->last_ran = now;
3570 rq->nr_switches++; 3417 rq->nr_switches++;
3571 rq->curr = next; 3418 rq->curr = next;
3572 ++*switch_count; 3419 ++*switch_count;
@@ -4840,32 +4687,10 @@ out_unlock:
4840 return retval; 4687 return retval;
4841} 4688}
4842 4689
4843static inline struct task_struct *eldest_child(struct task_struct *p)
4844{
4845 if (list_empty(&p->children))
4846 return NULL;
4847 return list_entry(p->children.next,struct task_struct,sibling);
4848}
4849
4850static inline struct task_struct *older_sibling(struct task_struct *p)
4851{
4852 if (p->sibling.prev==&p->parent->children)
4853 return NULL;
4854 return list_entry(p->sibling.prev,struct task_struct,sibling);
4855}
4856
4857static inline struct task_struct *younger_sibling(struct task_struct *p)
4858{
4859 if (p->sibling.next==&p->parent->children)
4860 return NULL;
4861 return list_entry(p->sibling.next,struct task_struct,sibling);
4862}
4863
4864static const char stat_nam[] = "RSDTtZX"; 4690static const char stat_nam[] = "RSDTtZX";
4865 4691
4866static void show_task(struct task_struct *p) 4692static void show_task(struct task_struct *p)
4867{ 4693{
4868 struct task_struct *relative;
4869 unsigned long free = 0; 4694 unsigned long free = 0;
4870 unsigned state; 4695 unsigned state;
4871 4696
@@ -4891,19 +4716,7 @@ static void show_task(struct task_struct *p)
4891 free = (unsigned long)n - (unsigned long)end_of_stack(p); 4716 free = (unsigned long)n - (unsigned long)end_of_stack(p);
4892 } 4717 }
4893#endif 4718#endif
4894 printk("%5lu %5d %6d ", free, p->pid, p->parent->pid); 4719 printk("%5lu %5d %6d", free, p->pid, p->parent->pid);
4895 if ((relative = eldest_child(p)))
4896 printk("%5d ", relative->pid);
4897 else
4898 printk(" ");
4899 if ((relative = younger_sibling(p)))
4900 printk("%7d", relative->pid);
4901 else
4902 printk(" ");
4903 if ((relative = older_sibling(p)))
4904 printk(" %5d", relative->pid);
4905 else
4906 printk(" ");
4907 if (!p->mm) 4720 if (!p->mm)
4908 printk(" (L-TLB)\n"); 4721 printk(" (L-TLB)\n");
4909 else 4722 else
@@ -4933,7 +4746,7 @@ void show_state_filter(unsigned long state_filter)
4933 * console might take alot of time: 4746 * console might take alot of time:
4934 */ 4747 */
4935 touch_nmi_watchdog(); 4748 touch_nmi_watchdog();
4936 if (p->state & state_filter) 4749 if (!state_filter || (p->state & state_filter))
4937 show_task(p); 4750 show_task(p);
4938 } while_each_thread(g, p); 4751 } while_each_thread(g, p);
4939 4752
diff --git a/kernel/signal.c b/kernel/signal.c
index e2a7d4bf7d57..3670225ecbc0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1140,7 +1140,8 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
1140 return error; 1140 return error;
1141} 1141}
1142 1142
1143static int kill_proc_info(int sig, struct siginfo *info, pid_t pid) 1143int
1144kill_proc_info(int sig, struct siginfo *info, pid_t pid)
1144{ 1145{
1145 int error; 1146 int error;
1146 rcu_read_lock(); 1147 rcu_read_lock();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3ca1d5ff0319..c904748f2290 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -846,7 +846,8 @@ static ctl_table vm_table[] = {
846 .extra2 = &one_hundred, 846 .extra2 = &one_hundred,
847 }, 847 },
848#endif 848#endif
849#ifdef CONFIG_X86_32 849#if defined(CONFIG_X86_32) || \
850 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
850 { 851 {
851 .ctl_name = VM_VDSO_ENABLED, 852 .ctl_name = VM_VDSO_ENABLED,
852 .procname = "vdso_enabled", 853 .procname = "vdso_enabled",
@@ -1359,8 +1360,7 @@ void unregister_sysctl_table(struct ctl_table_header * header)
1359} 1360}
1360 1361
1361#else /* !CONFIG_SYSCTL */ 1362#else /* !CONFIG_SYSCTL */
1362struct ctl_table_header * register_sysctl_table(ctl_table * table, 1363struct ctl_table_header *register_sysctl_table(ctl_table * table)
1363 int insert_at_head)
1364{ 1364{
1365 return NULL; 1365 return NULL;
1366} 1366}
@@ -1676,7 +1676,7 @@ static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
1676{ 1676{
1677 int op; 1677 int op;
1678 1678
1679 if (!capable(CAP_SYS_ADMIN)) 1679 if (write && !capable(CAP_SYS_ADMIN))
1680 return -EPERM; 1680 return -EPERM;
1681 1681
1682 op = OP_OR; 1682 op = OP_OR;
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4c3476fa058d..ad7d2392cb0e 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -102,7 +102,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
102 */ 102 */
103static int send_reply(struct sk_buff *skb, pid_t pid) 103static int send_reply(struct sk_buff *skb, pid_t pid)
104{ 104{
105 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); 105 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
106 void *reply = genlmsg_data(genlhdr); 106 void *reply = genlmsg_data(genlhdr);
107 int rc; 107 int rc;
108 108
@@ -121,7 +121,7 @@ static int send_reply(struct sk_buff *skb, pid_t pid)
121static void send_cpu_listeners(struct sk_buff *skb, 121static void send_cpu_listeners(struct sk_buff *skb,
122 struct listener_list *listeners) 122 struct listener_list *listeners)
123{ 123{
124 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); 124 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
125 struct listener *s, *tmp; 125 struct listener *s, *tmp;
126 struct sk_buff *skb_next, *skb_cur = skb; 126 struct sk_buff *skb_next, *skb_cur = skb;
127 void *reply = genlmsg_data(genlhdr); 127 void *reply = genlmsg_data(genlhdr);
diff --git a/kernel/time.c b/kernel/time.c
index c6c80ea5d0ea..ba18ec4899bd 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -452,6 +452,7 @@ struct timespec ns_to_timespec(const s64 nsec)
452 452
453 return ts; 453 return ts;
454} 454}
455EXPORT_SYMBOL(ns_to_timespec);
455 456
456/** 457/**
457 * ns_to_timeval - Convert nanoseconds to timeval 458 * ns_to_timeval - Convert nanoseconds to timeval
@@ -469,6 +470,7 @@ struct timeval ns_to_timeval(const s64 nsec)
469 470
470 return tv; 471 return tv;
471} 472}
473EXPORT_SYMBOL(ns_to_timeval);
472 474
473/* 475/*
474 * Convert jiffies to milliseconds and back. 476 * Convert jiffies to milliseconds and back.
@@ -635,6 +637,7 @@ timeval_to_jiffies(const struct timeval *value)
635 (((u64)usec * USEC_CONVERSION + USEC_ROUND) >> 637 (((u64)usec * USEC_CONVERSION + USEC_ROUND) >>
636 (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; 638 (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
637} 639}
640EXPORT_SYMBOL(timeval_to_jiffies);
638 641
639void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) 642void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value)
640{ 643{
@@ -649,6 +652,7 @@ void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value)
649 tv_usec /= NSEC_PER_USEC; 652 tv_usec /= NSEC_PER_USEC;
650 value->tv_usec = tv_usec; 653 value->tv_usec = tv_usec;
651} 654}
655EXPORT_SYMBOL(jiffies_to_timeval);
652 656
653/* 657/*
654 * Convert jiffies/jiffies_64 to clock_t and back. 658 * Convert jiffies/jiffies_64 to clock_t and back.
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 67932ea78c17..76212b2a99de 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -274,72 +274,3 @@ void clockevents_notify(unsigned long reason, void *arg)
274} 274}
275EXPORT_SYMBOL_GPL(clockevents_notify); 275EXPORT_SYMBOL_GPL(clockevents_notify);
276 276
277#ifdef CONFIG_SYSFS
278
279/**
280 * clockevents_show_registered - sysfs interface for listing clockevents
281 * @dev: unused
282 * @buf: char buffer to be filled with clock events list
283 *
284 * Provides sysfs interface for listing registered clock event devices
285 */
286static ssize_t clockevents_show_registered(struct sys_device *dev, char *buf)
287{
288 struct list_head *tmp;
289 char *p = buf;
290 int cpu;
291
292 spin_lock(&clockevents_lock);
293
294 list_for_each(tmp, &clockevent_devices) {
295 struct clock_event_device *ce;
296
297 ce = list_entry(tmp, struct clock_event_device, list);
298 p += sprintf(p, "%-20s F:%04x M:%d", ce->name,
299 ce->features, ce->mode);
300 p += sprintf(p, " C:");
301 if (!cpus_equal(ce->cpumask, cpu_possible_map)) {
302 for_each_cpu_mask(cpu, ce->cpumask)
303 p += sprintf(p, " %d", cpu);
304 } else {
305 /*
306 * FIXME: Add the cpu which is handling this sucker
307 */
308 }
309 p += sprintf(p, "\n");
310 }
311
312 spin_unlock(&clockevents_lock);
313
314 return p - buf;
315}
316
317/*
318 * Sysfs setup bits:
319 */
320static SYSDEV_ATTR(registered, 0600,
321 clockevents_show_registered, NULL);
322
323static struct sysdev_class clockevents_sysclass = {
324 set_kset_name("clockevents"),
325};
326
327static struct sys_device clockevents_sys_device = {
328 .id = 0,
329 .cls = &clockevents_sysclass,
330};
331
332static int __init clockevents_sysfs_init(void)
333{
334 int error = sysdev_class_register(&clockevents_sysclass);
335
336 if (!error)
337 error = sysdev_register(&clockevents_sys_device);
338 if (!error)
339 error = sysdev_create_file(
340 &clockevents_sys_device,
341 &attr_registered);
342 return error;
343}
344device_initcall(clockevents_sysfs_init);
345#endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 193a0793af95..fe5c7db24247 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -55,16 +55,18 @@ static DEFINE_SPINLOCK(clocksource_lock);
55static char override_name[32]; 55static char override_name[32];
56static int finished_booting; 56static int finished_booting;
57 57
58/* clocksource_done_booting - Called near the end of bootup 58/* clocksource_done_booting - Called near the end of core bootup
59 * 59 *
60 * Hack to avoid lots of clocksource churn at boot time 60 * Hack to avoid lots of clocksource churn at boot time.
61 * We use fs_initcall because we want this to start before
62 * device_initcall but after subsys_initcall.
61 */ 63 */
62static int __init clocksource_done_booting(void) 64static int __init clocksource_done_booting(void)
63{ 65{
64 finished_booting = 1; 66 finished_booting = 1;
65 return 0; 67 return 0;
66} 68}
67late_initcall(clocksource_done_booting); 69fs_initcall(clocksource_done_booting);
68 70
69#ifdef CONFIG_CLOCKSOURCE_WATCHDOG 71#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
70static LIST_HEAD(watchdog_list); 72static LIST_HEAD(watchdog_list);
@@ -149,7 +151,8 @@ static void clocksource_check_watchdog(struct clocksource *cs)
149 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; 151 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
150 add_timer(&watchdog_timer); 152 add_timer(&watchdog_timer);
151 } 153 }
152 } else if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) { 154 } else {
155 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
153 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 156 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
154 157
155 if (!watchdog || cs->rating > watchdog->rating) { 158 if (!watchdog || cs->rating > watchdog->rating) {
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 3be8da8fed7e..4c256fdb8875 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -69,4 +69,4 @@ static int __init init_jiffies_clocksource(void)
69 return clocksource_register(&clocksource_jiffies); 69 return clocksource_register(&clocksource_jiffies);
70} 70}
71 71
72module_init(init_jiffies_clocksource); 72core_initcall(init_jiffies_clocksource);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index eb12509e00bd..cb25649c6f50 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -32,7 +32,7 @@ static u64 tick_length, tick_length_base;
32/* TIME_ERROR prevents overwriting the CMOS clock */ 32/* TIME_ERROR prevents overwriting the CMOS clock */
33static int time_state = TIME_OK; /* clock synchronization status */ 33static int time_state = TIME_OK; /* clock synchronization status */
34int time_status = STA_UNSYNC; /* clock status bits */ 34int time_status = STA_UNSYNC; /* clock status bits */
35static long time_offset; /* time adjustment (ns) */ 35static s64 time_offset; /* time adjustment (ns) */
36static long time_constant = 2; /* pll time constant */ 36static long time_constant = 2; /* pll time constant */
37long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ 37long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
38long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ 38long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
@@ -196,7 +196,7 @@ void __attribute__ ((weak)) notify_arch_cmos_timer(void)
196 */ 196 */
197int do_adjtimex(struct timex *txc) 197int do_adjtimex(struct timex *txc)
198{ 198{
199 long ltemp, mtemp, save_adjust; 199 long mtemp, save_adjust, rem;
200 s64 freq_adj, temp64; 200 s64 freq_adj, temp64;
201 int result; 201 int result;
202 202
@@ -277,14 +277,14 @@ int do_adjtimex(struct timex *txc)
277 time_adjust = txc->offset; 277 time_adjust = txc->offset;
278 } 278 }
279 else if (time_status & STA_PLL) { 279 else if (time_status & STA_PLL) {
280 ltemp = txc->offset * NSEC_PER_USEC; 280 time_offset = txc->offset * NSEC_PER_USEC;
281 281
282 /* 282 /*
283 * Scale the phase adjustment and 283 * Scale the phase adjustment and
284 * clamp to the operating range. 284 * clamp to the operating range.
285 */ 285 */
286 time_offset = min(ltemp, MAXPHASE * NSEC_PER_USEC); 286 time_offset = min(time_offset, (s64)MAXPHASE * NSEC_PER_USEC);
287 time_offset = max(time_offset, -MAXPHASE * NSEC_PER_USEC); 287 time_offset = max(time_offset, (s64)-MAXPHASE * NSEC_PER_USEC);
288 288
289 /* 289 /*
290 * Select whether the frequency is to be controlled 290 * Select whether the frequency is to be controlled
@@ -297,11 +297,11 @@ int do_adjtimex(struct timex *txc)
297 mtemp = xtime.tv_sec - time_reftime; 297 mtemp = xtime.tv_sec - time_reftime;
298 time_reftime = xtime.tv_sec; 298 time_reftime = xtime.tv_sec;
299 299
300 freq_adj = (s64)time_offset * mtemp; 300 freq_adj = time_offset * mtemp;
301 freq_adj = shift_right(freq_adj, time_constant * 2 + 301 freq_adj = shift_right(freq_adj, time_constant * 2 +
302 (SHIFT_PLL + 2) * 2 - SHIFT_NSEC); 302 (SHIFT_PLL + 2) * 2 - SHIFT_NSEC);
303 if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { 303 if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) {
304 temp64 = (s64)time_offset << (SHIFT_NSEC - SHIFT_FLL); 304 temp64 = time_offset << (SHIFT_NSEC - SHIFT_FLL);
305 if (time_offset < 0) { 305 if (time_offset < 0) {
306 temp64 = -temp64; 306 temp64 = -temp64;
307 do_div(temp64, mtemp); 307 do_div(temp64, mtemp);
@@ -314,8 +314,10 @@ int do_adjtimex(struct timex *txc)
314 freq_adj += time_freq; 314 freq_adj += time_freq;
315 freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); 315 freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC);
316 time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); 316 time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC);
317 time_offset = (time_offset / NTP_INTERVAL_FREQ) 317 time_offset = div_long_long_rem_signed(time_offset,
318 << SHIFT_UPDATE; 318 NTP_INTERVAL_FREQ,
319 &rem);
320 time_offset <<= SHIFT_UPDATE;
319 } /* STA_PLL */ 321 } /* STA_PLL */
320 } /* txc->modes & ADJ_OFFSET */ 322 } /* txc->modes & ADJ_OFFSET */
321 if (txc->modes & ADJ_TICK) 323 if (txc->modes & ADJ_TICK)
@@ -328,12 +330,12 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0)
328 result = TIME_ERROR; 330 result = TIME_ERROR;
329 331
330 if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) 332 if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
331 txc->offset = save_adjust; 333 txc->offset = save_adjust;
332 else 334 else
333 txc->offset = shift_right(time_offset, SHIFT_UPDATE) 335 txc->offset = ((long)shift_right(time_offset, SHIFT_UPDATE)) *
334 * NTP_INTERVAL_FREQ / 1000; 336 NTP_INTERVAL_FREQ / 1000;
335 txc->freq = (time_freq / NSEC_PER_USEC) 337 txc->freq = (time_freq / NSEC_PER_USEC) <<
336 << (SHIFT_USEC - SHIFT_NSEC); 338 (SHIFT_USEC - SHIFT_NSEC);
337 txc->maxerror = time_maxerror; 339 txc->maxerror = time_maxerror;
338 txc->esterror = time_esterror; 340 txc->esterror = time_esterror;
339 txc->status = time_status; 341 txc->status = time_status;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 12b3efeb9f6f..eadfce2fff74 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -284,6 +284,49 @@ void tick_shutdown_broadcast(unsigned int *cpup)
284 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 284 spin_unlock_irqrestore(&tick_broadcast_lock, flags);
285} 285}
286 286
287void tick_suspend_broadcast(void)
288{
289 struct clock_event_device *bc;
290 unsigned long flags;
291
292 spin_lock_irqsave(&tick_broadcast_lock, flags);
293
294 bc = tick_broadcast_device.evtdev;
295 if (bc && tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
296 clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
297
298 spin_unlock_irqrestore(&tick_broadcast_lock, flags);
299}
300
301int tick_resume_broadcast(void)
302{
303 struct clock_event_device *bc;
304 unsigned long flags;
305 int broadcast = 0;
306
307 spin_lock_irqsave(&tick_broadcast_lock, flags);
308
309 bc = tick_broadcast_device.evtdev;
310
311 if (bc) {
312 switch (tick_broadcast_device.mode) {
313 case TICKDEV_MODE_PERIODIC:
314 if(!cpus_empty(tick_broadcast_mask))
315 tick_broadcast_start_periodic(bc);
316 broadcast = cpu_isset(smp_processor_id(),
317 tick_broadcast_mask);
318 break;
319 case TICKDEV_MODE_ONESHOT:
320 broadcast = tick_resume_broadcast_oneshot(bc);
321 break;
322 }
323 }
324 spin_unlock_irqrestore(&tick_broadcast_lock, flags);
325
326 return broadcast;
327}
328
329
287#ifdef CONFIG_TICK_ONESHOT 330#ifdef CONFIG_TICK_ONESHOT
288 331
289static cpumask_t tick_broadcast_oneshot_mask; 332static cpumask_t tick_broadcast_oneshot_mask;
@@ -311,6 +354,16 @@ static int tick_broadcast_set_event(ktime_t expires, int force)
311 } 354 }
312} 355}
313 356
357int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
358{
359 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
360
361 if(!cpus_empty(tick_broadcast_oneshot_mask))
362 tick_broadcast_set_event(ktime_get(), 1);
363
364 return cpu_isset(smp_processor_id(), tick_broadcast_oneshot_mask);
365}
366
314/* 367/*
315 * Reprogram the broadcast device: 368 * Reprogram the broadcast device:
316 * 369 *
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 4500e347f1bb..bfda3f7f0716 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -77,6 +77,7 @@ static void tick_periodic(int cpu)
77void tick_handle_periodic(struct clock_event_device *dev) 77void tick_handle_periodic(struct clock_event_device *dev)
78{ 78{
79 int cpu = smp_processor_id(); 79 int cpu = smp_processor_id();
80 ktime_t next;
80 81
81 tick_periodic(cpu); 82 tick_periodic(cpu);
82 83
@@ -86,12 +87,12 @@ void tick_handle_periodic(struct clock_event_device *dev)
86 * Setup the next period for devices, which do not have 87 * Setup the next period for devices, which do not have
87 * periodic mode: 88 * periodic mode:
88 */ 89 */
90 next = ktime_add(dev->next_event, tick_period);
89 for (;;) { 91 for (;;) {
90 ktime_t next = ktime_add(dev->next_event, tick_period);
91
92 if (!clockevents_program_event(dev, next, ktime_get())) 92 if (!clockevents_program_event(dev, next, ktime_get()))
93 return; 93 return;
94 tick_periodic(cpu); 94 tick_periodic(cpu);
95 next = ktime_add(next, tick_period);
95 } 96 }
96} 97}
97 98
@@ -297,6 +298,29 @@ static void tick_shutdown(unsigned int *cpup)
297 spin_unlock_irqrestore(&tick_device_lock, flags); 298 spin_unlock_irqrestore(&tick_device_lock, flags);
298} 299}
299 300
301static void tick_suspend(void)
302{
303 struct tick_device *td = &__get_cpu_var(tick_cpu_device);
304 unsigned long flags;
305
306 spin_lock_irqsave(&tick_device_lock, flags);
307 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
308 spin_unlock_irqrestore(&tick_device_lock, flags);
309}
310
311static void tick_resume(void)
312{
313 struct tick_device *td = &__get_cpu_var(tick_cpu_device);
314 unsigned long flags;
315
316 spin_lock_irqsave(&tick_device_lock, flags);
317 if (td->mode == TICKDEV_MODE_PERIODIC)
318 tick_setup_periodic(td->evtdev, 0);
319 else
320 tick_resume_oneshot();
321 spin_unlock_irqrestore(&tick_device_lock, flags);
322}
323
300/* 324/*
301 * Notification about clock event devices 325 * Notification about clock event devices
302 */ 326 */
@@ -324,6 +348,16 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason,
324 tick_shutdown(dev); 348 tick_shutdown(dev);
325 break; 349 break;
326 350
351 case CLOCK_EVT_NOTIFY_SUSPEND:
352 tick_suspend();
353 tick_suspend_broadcast();
354 break;
355
356 case CLOCK_EVT_NOTIFY_RESUME:
357 if (!tick_resume_broadcast())
358 tick_resume();
359 break;
360
327 default: 361 default:
328 break; 362 break;
329 } 363 }
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 54861a0f29ff..c9d203bde518 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -19,12 +19,13 @@ extern void tick_setup_oneshot(struct clock_event_device *newdev,
19extern int tick_program_event(ktime_t expires, int force); 19extern int tick_program_event(ktime_t expires, int force);
20extern void tick_oneshot_notify(void); 20extern void tick_oneshot_notify(void);
21extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); 21extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
22 22extern void tick_resume_oneshot(void);
23# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 23# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
24extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); 24extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
25extern void tick_broadcast_oneshot_control(unsigned long reason); 25extern void tick_broadcast_oneshot_control(unsigned long reason);
26extern void tick_broadcast_switch_to_oneshot(void); 26extern void tick_broadcast_switch_to_oneshot(void);
27extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); 27extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
28extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
28# else /* BROADCAST */ 29# else /* BROADCAST */
29static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) 30static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
30{ 31{
@@ -43,6 +44,10 @@ void tick_setup_oneshot(struct clock_event_device *newdev,
43{ 44{
44 BUG(); 45 BUG();
45} 46}
47static inline void tick_resume_oneshot(void)
48{
49 BUG();
50}
46static inline int tick_program_event(ktime_t expires, int force) 51static inline int tick_program_event(ktime_t expires, int force)
47{ 52{
48 return 0; 53 return 0;
@@ -54,6 +59,10 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
54} 59}
55static inline void tick_broadcast_oneshot_control(unsigned long reason) { } 60static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
56static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } 61static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
62static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
63{
64 return 0;
65}
57#endif /* !TICK_ONESHOT */ 66#endif /* !TICK_ONESHOT */
58 67
59/* 68/*
@@ -67,6 +76,8 @@ extern int tick_check_broadcast_device(struct clock_event_device *dev);
67extern int tick_is_broadcast_device(struct clock_event_device *dev); 76extern int tick_is_broadcast_device(struct clock_event_device *dev);
68extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); 77extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
69extern void tick_shutdown_broadcast(unsigned int *cpup); 78extern void tick_shutdown_broadcast(unsigned int *cpup);
79extern void tick_suspend_broadcast(void);
80extern int tick_resume_broadcast(void);
70 81
71extern void 82extern void
72tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); 83tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
@@ -90,6 +101,8 @@ static inline int tick_device_uses_broadcast(struct clock_event_device *dev,
90static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } 101static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
91static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { } 102static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
92static inline void tick_shutdown_broadcast(unsigned int *cpup) { } 103static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
104static inline void tick_suspend_broadcast(void) { }
105static inline int tick_resume_broadcast(void) { return 0; }
93 106
94/* 107/*
95 * Set the periodic handler in non broadcast mode 108 * Set the periodic handler in non broadcast mode
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 2e8b7ff863cc..f6997ab0c3c9 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -41,6 +41,18 @@ int tick_program_event(ktime_t expires, int force)
41} 41}
42 42
43/** 43/**
44 * tick_resume_onshot - resume oneshot mode
45 */
46void tick_resume_oneshot(void)
47{
48 struct tick_device *td = &__get_cpu_var(tick_cpu_device);
49 struct clock_event_device *dev = td->evtdev;
50
51 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
52 tick_program_event(ktime_get(), 1);
53}
54
55/**
44 * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz) 56 * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz)
45 */ 57 */
46void tick_setup_oneshot(struct clock_event_device *newdev, 58void tick_setup_oneshot(struct clock_event_device *newdev,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 95e41f7f850b..51556b95f60f 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -21,6 +21,8 @@
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/tick.h> 22#include <linux/tick.h>
23 23
24#include <asm/irq_regs.h>
25
24#include "tick-internal.h" 26#include "tick-internal.h"
25 27
26/* 28/*
@@ -165,7 +167,9 @@ void tick_nohz_stop_sched_tick(void)
165 goto end; 167 goto end;
166 168
167 cpu = smp_processor_id(); 169 cpu = smp_processor_id();
168 BUG_ON(local_softirq_pending()); 170 if (unlikely(local_softirq_pending()))
171 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
172 local_softirq_pending());
169 173
170 now = ktime_get(); 174 now = ktime_get();
171 /* 175 /*
@@ -191,19 +195,19 @@ void tick_nohz_stop_sched_tick(void)
191 next_jiffies = get_next_timer_interrupt(last_jiffies); 195 next_jiffies = get_next_timer_interrupt(last_jiffies);
192 delta_jiffies = next_jiffies - last_jiffies; 196 delta_jiffies = next_jiffies - last_jiffies;
193 197
198 if (rcu_needs_cpu(cpu))
199 delta_jiffies = 1;
194 /* 200 /*
195 * Do not stop the tick, if we are only one off 201 * Do not stop the tick, if we are only one off
196 * or if the cpu is required for rcu 202 * or if the cpu is required for rcu
197 */ 203 */
198 if (!ts->tick_stopped && (delta_jiffies == 1 || rcu_needs_cpu(cpu))) 204 if (!ts->tick_stopped && delta_jiffies == 1)
199 goto out; 205 goto out;
200 206
201 /* Schedule the tick, if we are at least one jiffie off */ 207 /* Schedule the tick, if we are at least one jiffie off */
202 if ((long)delta_jiffies >= 1) { 208 if ((long)delta_jiffies >= 1) {
203 209
204 if (rcu_needs_cpu(cpu)) 210 if (delta_jiffies > 1)
205 delta_jiffies = 1;
206 else
207 cpu_set(cpu, nohz_cpu_mask); 211 cpu_set(cpu, nohz_cpu_mask);
208 /* 212 /*
209 * nohz_stop_sched_tick can be called several times before 213 * nohz_stop_sched_tick can be called several times before
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index f82c635c3d5c..59df5e8555a8 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -194,9 +194,9 @@ print_tickdevice(struct seq_file *m, struct tick_device *td)
194 return; 194 return;
195 } 195 }
196 SEQ_printf(m, "%s\n", dev->name); 196 SEQ_printf(m, "%s\n", dev->name);
197 SEQ_printf(m, " max_delta_ns: %ld\n", dev->max_delta_ns); 197 SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns);
198 SEQ_printf(m, " min_delta_ns: %ld\n", dev->min_delta_ns); 198 SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns);
199 SEQ_printf(m, " mult: %ld\n", dev->mult); 199 SEQ_printf(m, " mult: %lu\n", dev->mult);
200 SEQ_printf(m, " shift: %d\n", dev->shift); 200 SEQ_printf(m, " shift: %d\n", dev->shift);
201 SEQ_printf(m, " mode: %d\n", dev->mode); 201 SEQ_printf(m, " mode: %d\n", dev->mode);
202 SEQ_printf(m, " next_event: %Ld nsecs\n", 202 SEQ_printf(m, " next_event: %Ld nsecs\n",
diff --git a/kernel/timer.c b/kernel/timer.c
index cb1b86a9c52f..b22bd39740dd 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -505,6 +505,8 @@ out:
505 return ret; 505 return ret;
506} 506}
507 507
508EXPORT_SYMBOL(try_to_del_timer_sync);
509
508/** 510/**
509 * del_timer_sync - deactivate a timer and wait for the handler to finish. 511 * del_timer_sync - deactivate a timer and wait for the handler to finish.
510 * @timer: the timer to be deactivated 512 * @timer: the timer to be deactivated
@@ -695,15 +697,28 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
695{ 697{
696 ktime_t hr_delta = hrtimer_get_next_event(); 698 ktime_t hr_delta = hrtimer_get_next_event();
697 struct timespec tsdelta; 699 struct timespec tsdelta;
700 unsigned long delta;
698 701
699 if (hr_delta.tv64 == KTIME_MAX) 702 if (hr_delta.tv64 == KTIME_MAX)
700 return expires; 703 return expires;
701 704
702 if (hr_delta.tv64 <= TICK_NSEC) 705 /*
703 return now; 706 * Expired timer available, let it expire in the next tick
707 */
708 if (hr_delta.tv64 <= 0)
709 return now + 1;
704 710
705 tsdelta = ktime_to_timespec(hr_delta); 711 tsdelta = ktime_to_timespec(hr_delta);
706 now += timespec_to_jiffies(&tsdelta); 712 delta = timespec_to_jiffies(&tsdelta);
713 /*
714 * Take rounding errors in to account and make sure, that it
715 * expires in the next tick. Otherwise we go into an endless
716 * ping pong due to tick_nohz_stop_sched_tick() retriggering
717 * the timer softirq
718 */
719 if (delta < 1)
720 delta = 1;
721 now += delta;
707 if (time_before(now, expires)) 722 if (time_before(now, expires))
708 return now; 723 return now;
709 return expires; 724 return expires;
@@ -711,6 +726,7 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
711 726
712/** 727/**
713 * next_timer_interrupt - return the jiffy of the next pending timer 728 * next_timer_interrupt - return the jiffy of the next pending timer
729 * @now: current time (in jiffies)
714 */ 730 */
715unsigned long get_next_timer_interrupt(unsigned long now) 731unsigned long get_next_timer_interrupt(unsigned long now)
716{ 732{
@@ -861,6 +877,8 @@ int do_settimeofday(struct timespec *tv)
861 clock->error = 0; 877 clock->error = 0;
862 ntp_clear(); 878 ntp_clear();
863 879
880 update_vsyscall(&xtime, clock);
881
864 write_sequnlock_irqrestore(&xtime_lock, flags); 882 write_sequnlock_irqrestore(&xtime_lock, flags);
865 883
866 /* signal hrtimers about time change */ 884 /* signal hrtimers about time change */
@@ -908,7 +926,7 @@ static inline void change_clocksource(void) { }
908#endif 926#endif
909 927
910/** 928/**
911 * timeofday_is_continuous - check to see if timekeeping is free running 929 * timekeeping_is_continuous - check to see if timekeeping is free running
912 */ 930 */
913int timekeeping_is_continuous(void) 931int timekeeping_is_continuous(void)
914{ 932{
@@ -996,8 +1014,11 @@ static int timekeeping_resume(struct sys_device *dev)
996 write_sequnlock_irqrestore(&xtime_lock, flags); 1014 write_sequnlock_irqrestore(&xtime_lock, flags);
997 1015
998 touch_softlockup_watchdog(); 1016 touch_softlockup_watchdog();
1017
1018 clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
1019
999 /* Resume hrtimers */ 1020 /* Resume hrtimers */
1000 clock_was_set(); 1021 hres_timers_resume();
1001 1022
1002 return 0; 1023 return 0;
1003} 1024}
@@ -1010,6 +1031,9 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
1010 timekeeping_suspended = 1; 1031 timekeeping_suspended = 1;
1011 timekeeping_suspend_time = read_persistent_clock(); 1032 timekeeping_suspend_time = read_persistent_clock();
1012 write_sequnlock_irqrestore(&xtime_lock, flags); 1033 write_sequnlock_irqrestore(&xtime_lock, flags);
1034
1035 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
1036
1013 return 0; 1037 return 0;
1014} 1038}
1015 1039
@@ -1650,8 +1674,8 @@ static void __devinit migrate_timers(int cpu)
1650 new_base = get_cpu_var(tvec_bases); 1674 new_base = get_cpu_var(tvec_bases);
1651 1675
1652 local_irq_disable(); 1676 local_irq_disable();
1653 spin_lock(&new_base->lock); 1677 double_spin_lock(&new_base->lock, &old_base->lock,
1654 spin_lock(&old_base->lock); 1678 smp_processor_id() < cpu);
1655 1679
1656 BUG_ON(old_base->running_timer); 1680 BUG_ON(old_base->running_timer);
1657 1681
@@ -1664,8 +1688,8 @@ static void __devinit migrate_timers(int cpu)
1664 migrate_timer_list(new_base, old_base->tv5.vec + i); 1688 migrate_timer_list(new_base, old_base->tv5.vec + i);
1665 } 1689 }
1666 1690
1667 spin_unlock(&old_base->lock); 1691 double_spin_unlock(&new_base->lock, &old_base->lock,
1668 spin_unlock(&new_base->lock); 1692 smp_processor_id() < cpu);
1669 local_irq_enable(); 1693 local_irq_enable();
1670 put_cpu_var(tvec_bases); 1694 put_cpu_var(tvec_bases);
1671} 1695}