aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2005-06-15 17:06:48 -0400
committerTony Luck <tony.luck@intel.com>2005-06-15 17:06:48 -0400
commitf2cbb4f01936a3e4225692e03b084b78c56d386d (patch)
treef89f3d8baa250589a38a4dd2df56f84cddae3c76 /kernel
parent325a479c4c110db278ef3361460a48c4093252cc (diff)
parent1016888fb69662936b32ab767c7419a3be9a69d3 (diff)
Auto merge with /home/aegl/GIT/linus
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/audit.c93
-rw-r--r--kernel/auditsc.c47
-rw-r--r--kernel/cpuset.c24
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/irq/handle.c3
-rw-r--r--kernel/itimer.c6
-rw-r--r--kernel/kallsyms.c13
-rw-r--r--kernel/kprobes.c142
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/power/main.c6
-rw-r--r--kernel/printk.c72
-rw-r--r--kernel/profile.c16
-rw-r--r--kernel/sched.c9
-rw-r--r--kernel/signal.c11
-rw-r--r--kernel/spinlock.c8
-rw-r--r--kernel/sys.c2
17 files changed, 338 insertions, 124 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index eb88b446c2..b01d26fe8d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -29,7 +29,7 @@ obj-$(CONFIG_SYSFS) += ksysfs.o
29obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 29obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
30obj-$(CONFIG_SECCOMP) += seccomp.o 30obj-$(CONFIG_SECCOMP) += seccomp.o
31 31
32ifneq ($(CONFIG_IA64),y) 32ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
33# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 33# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
34# needed for x86 only. Why this used to be enabled for all architectures is beyond 34# needed for x86 only. Why this used to be enabled for all architectures is beyond
35# me. I suspect most platforms don't need this, but until we know that for sure 35# me. I suspect most platforms don't need this, but until we know that for sure
diff --git a/kernel/audit.c b/kernel/audit.c
index ac26d4d960..9c4f1af0c7 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1,4 +1,4 @@
1/* audit.c -- Auditing support -*- linux-c -*- 1/* audit.c -- Auditing support
2 * Gateway between the kernel (e.g., selinux) and the user-space audit daemon. 2 * Gateway between the kernel (e.g., selinux) and the user-space audit daemon.
3 * System-call specific features have moved to auditsc.c 3 * System-call specific features have moved to auditsc.c
4 * 4 *
@@ -38,7 +38,7 @@
38 * 6) Support low-overhead kernel-based filtering to minimize the 38 * 6) Support low-overhead kernel-based filtering to minimize the
39 * information that must be passed to user-space. 39 * information that must be passed to user-space.
40 * 40 *
41 * Example user-space utilities: http://people.redhat.com/faith/audit/ 41 * Example user-space utilities: http://people.redhat.com/sgrubb/audit/
42 */ 42 */
43 43
44#include <linux/init.h> 44#include <linux/init.h>
@@ -142,7 +142,6 @@ struct audit_buffer {
142 int total; 142 int total;
143 int type; 143 int type;
144 int pid; 144 int pid;
145 int count; /* Times requeued */
146}; 145};
147 146
148void audit_set_type(struct audit_buffer *ab, int type) 147void audit_set_type(struct audit_buffer *ab, int type)
@@ -239,36 +238,36 @@ void audit_log_lost(const char *message)
239 238
240} 239}
241 240
242static int audit_set_rate_limit(int limit) 241static int audit_set_rate_limit(int limit, uid_t loginuid)
243{ 242{
244 int old = audit_rate_limit; 243 int old = audit_rate_limit;
245 audit_rate_limit = limit; 244 audit_rate_limit = limit;
246 audit_log(current->audit_context, "audit_rate_limit=%d old=%d", 245 audit_log(NULL, "audit_rate_limit=%d old=%d by auid %u",
247 audit_rate_limit, old); 246 audit_rate_limit, old, loginuid);
248 return old; 247 return old;
249} 248}
250 249
251static int audit_set_backlog_limit(int limit) 250static int audit_set_backlog_limit(int limit, uid_t loginuid)
252{ 251{
253 int old = audit_backlog_limit; 252 int old = audit_backlog_limit;
254 audit_backlog_limit = limit; 253 audit_backlog_limit = limit;
255 audit_log(current->audit_context, "audit_backlog_limit=%d old=%d", 254 audit_log(NULL, "audit_backlog_limit=%d old=%d by auid %u",
256 audit_backlog_limit, old); 255 audit_backlog_limit, old, loginuid);
257 return old; 256 return old;
258} 257}
259 258
260static int audit_set_enabled(int state) 259static int audit_set_enabled(int state, uid_t loginuid)
261{ 260{
262 int old = audit_enabled; 261 int old = audit_enabled;
263 if (state != 0 && state != 1) 262 if (state != 0 && state != 1)
264 return -EINVAL; 263 return -EINVAL;
265 audit_enabled = state; 264 audit_enabled = state;
266 audit_log(current->audit_context, "audit_enabled=%d old=%d", 265 audit_log(NULL, "audit_enabled=%d old=%d by auid %u",
267 audit_enabled, old); 266 audit_enabled, old, loginuid);
268 return old; 267 return old;
269} 268}
270 269
271static int audit_set_failure(int state) 270static int audit_set_failure(int state, uid_t loginuid)
272{ 271{
273 int old = audit_failure; 272 int old = audit_failure;
274 if (state != AUDIT_FAIL_SILENT 273 if (state != AUDIT_FAIL_SILENT
@@ -276,8 +275,8 @@ static int audit_set_failure(int state)
276 && state != AUDIT_FAIL_PANIC) 275 && state != AUDIT_FAIL_PANIC)
277 return -EINVAL; 276 return -EINVAL;
278 audit_failure = state; 277 audit_failure = state;
279 audit_log(current->audit_context, "audit_failure=%d old=%d", 278 audit_log(NULL, "audit_failure=%d old=%d by auid %u",
280 audit_failure, old); 279 audit_failure, old, loginuid);
281 return old; 280 return old;
282} 281}
283 282
@@ -344,6 +343,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
344 int err; 343 int err;
345 struct audit_buffer *ab; 344 struct audit_buffer *ab;
346 u16 msg_type = nlh->nlmsg_type; 345 u16 msg_type = nlh->nlmsg_type;
346 uid_t loginuid; /* loginuid of sender */
347 347
348 err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type); 348 err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type);
349 if (err) 349 if (err)
@@ -351,6 +351,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
351 351
352 pid = NETLINK_CREDS(skb)->pid; 352 pid = NETLINK_CREDS(skb)->pid;
353 uid = NETLINK_CREDS(skb)->uid; 353 uid = NETLINK_CREDS(skb)->uid;
354 loginuid = NETLINK_CB(skb).loginuid;
354 seq = nlh->nlmsg_seq; 355 seq = nlh->nlmsg_seq;
355 data = NLMSG_DATA(nlh); 356 data = NLMSG_DATA(nlh);
356 357
@@ -371,34 +372,36 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
371 return -EINVAL; 372 return -EINVAL;
372 status_get = (struct audit_status *)data; 373 status_get = (struct audit_status *)data;
373 if (status_get->mask & AUDIT_STATUS_ENABLED) { 374 if (status_get->mask & AUDIT_STATUS_ENABLED) {
374 err = audit_set_enabled(status_get->enabled); 375 err = audit_set_enabled(status_get->enabled, loginuid);
375 if (err < 0) return err; 376 if (err < 0) return err;
376 } 377 }
377 if (status_get->mask & AUDIT_STATUS_FAILURE) { 378 if (status_get->mask & AUDIT_STATUS_FAILURE) {
378 err = audit_set_failure(status_get->failure); 379 err = audit_set_failure(status_get->failure, loginuid);
379 if (err < 0) return err; 380 if (err < 0) return err;
380 } 381 }
381 if (status_get->mask & AUDIT_STATUS_PID) { 382 if (status_get->mask & AUDIT_STATUS_PID) {
382 int old = audit_pid; 383 int old = audit_pid;
383 audit_pid = status_get->pid; 384 audit_pid = status_get->pid;
384 audit_log(current->audit_context, 385 audit_log(NULL, "audit_pid=%d old=%d by auid %u",
385 "audit_pid=%d old=%d", audit_pid, old); 386 audit_pid, old, loginuid);
386 } 387 }
387 if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) 388 if (status_get->mask & AUDIT_STATUS_RATE_LIMIT)
388 audit_set_rate_limit(status_get->rate_limit); 389 audit_set_rate_limit(status_get->rate_limit, loginuid);
389 if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) 390 if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
390 audit_set_backlog_limit(status_get->backlog_limit); 391 audit_set_backlog_limit(status_get->backlog_limit,
392 loginuid);
391 break; 393 break;
392 case AUDIT_USER: 394 case AUDIT_USER:
393 ab = audit_log_start(NULL); 395 ab = audit_log_start(NULL);
394 if (!ab) 396 if (!ab)
395 break; /* audit_panic has been called */ 397 break; /* audit_panic has been called */
396 audit_log_format(ab, 398 audit_log_format(ab,
397 "user pid=%d uid=%d length=%d msg='%.1024s'", 399 "user pid=%d uid=%d length=%d loginuid=%u"
400 " msg='%.1024s'",
398 pid, uid, 401 pid, uid,
399 (int)(nlh->nlmsg_len 402 (int)(nlh->nlmsg_len
400 - ((char *)data - (char *)nlh)), 403 - ((char *)data - (char *)nlh)),
401 (char *)data); 404 loginuid, (char *)data);
402 ab->type = AUDIT_USER; 405 ab->type = AUDIT_USER;
403 ab->pid = pid; 406 ab->pid = pid;
404 audit_log_end(ab); 407 audit_log_end(ab);
@@ -411,7 +414,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
411 case AUDIT_LIST: 414 case AUDIT_LIST:
412#ifdef CONFIG_AUDITSYSCALL 415#ifdef CONFIG_AUDITSYSCALL
413 err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, 416 err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
414 uid, seq, data); 417 uid, seq, data, loginuid);
415#else 418#else
416 err = -EOPNOTSUPP; 419 err = -EOPNOTSUPP;
417#endif 420#endif
@@ -480,7 +483,7 @@ static void audit_log_move(struct audit_buffer *ab)
480 if (ab->len == 0) 483 if (ab->len == 0)
481 return; 484 return;
482 485
483 skb = skb_peek(&ab->sklist); 486 skb = skb_peek_tail(&ab->sklist);
484 if (!skb || skb_tailroom(skb) <= ab->len + extra) { 487 if (!skb || skb_tailroom(skb) <= ab->len + extra) {
485 skb = alloc_skb(2 * ab->len + extra, GFP_ATOMIC); 488 skb = alloc_skb(2 * ab->len + extra, GFP_ATOMIC);
486 if (!skb) { 489 if (!skb) {
@@ -519,9 +522,9 @@ static inline int audit_log_drain(struct audit_buffer *ab)
519 retval = netlink_unicast(audit_sock, skb, audit_pid, 522 retval = netlink_unicast(audit_sock, skb, audit_pid,
520 MSG_DONTWAIT); 523 MSG_DONTWAIT);
521 } 524 }
522 if (retval == -EAGAIN && ab->count < 5) { 525 if (retval == -EAGAIN &&
523 ++ab->count; 526 (atomic_read(&audit_backlog)) < audit_backlog_limit) {
524 skb_queue_tail(&ab->sklist, skb); 527 skb_queue_head(&ab->sklist, skb);
525 audit_log_end_irq(ab); 528 audit_log_end_irq(ab);
526 return 1; 529 return 1;
527 } 530 }
@@ -537,8 +540,8 @@ static inline int audit_log_drain(struct audit_buffer *ab)
537 if (!audit_pid) { /* No daemon */ 540 if (!audit_pid) { /* No daemon */
538 int offset = ab->nlh ? NLMSG_SPACE(0) : 0; 541 int offset = ab->nlh ? NLMSG_SPACE(0) : 0;
539 int len = skb->len - offset; 542 int len = skb->len - offset;
540 printk(KERN_ERR "%*.*s\n", 543 skb->data[offset + len] = '\0';
541 len, len, skb->data + offset); 544 printk(KERN_ERR "%s\n", skb->data + offset);
542 } 545 }
543 kfree_skb(skb); 546 kfree_skb(skb);
544 ab->nlh = NULL; 547 ab->nlh = NULL;
@@ -617,7 +620,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx)
617 struct audit_buffer *ab = NULL; 620 struct audit_buffer *ab = NULL;
618 unsigned long flags; 621 unsigned long flags;
619 struct timespec t; 622 struct timespec t;
620 int serial = 0; 623 unsigned int serial;
621 624
622 if (!audit_initialized) 625 if (!audit_initialized)
623 return NULL; 626 return NULL;
@@ -659,15 +662,16 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx)
659 ab->total = 0; 662 ab->total = 0;
660 ab->type = AUDIT_KERNEL; 663 ab->type = AUDIT_KERNEL;
661 ab->pid = 0; 664 ab->pid = 0;
662 ab->count = 0;
663 665
664#ifdef CONFIG_AUDITSYSCALL 666#ifdef CONFIG_AUDITSYSCALL
665 if (ab->ctx) 667 if (ab->ctx)
666 audit_get_stamp(ab->ctx, &t, &serial); 668 audit_get_stamp(ab->ctx, &t, &serial);
667 else 669 else
668#endif 670#endif
671 {
669 t = CURRENT_TIME; 672 t = CURRENT_TIME;
670 673 serial = 0;
674 }
671 audit_log_format(ab, "audit(%lu.%03lu:%u): ", 675 audit_log_format(ab, "audit(%lu.%03lu:%u): ",
672 t.tv_sec, t.tv_nsec/1000000, serial); 676 t.tv_sec, t.tv_nsec/1000000, serial);
673 return ab; 677 return ab;
@@ -717,6 +721,29 @@ void audit_log_format(struct audit_buffer *ab, const char *fmt, ...)
717 va_end(args); 721 va_end(args);
718} 722}
719 723
724void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len)
725{
726 int i;
727
728 for (i=0; i<len; i++)
729 audit_log_format(ab, "%02x", buf[i]);
730}
731
732void audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
733{
734 const unsigned char *p = string;
735
736 while (*p) {
737 if (*p == '"' || *p == ' ' || *p < 0x20 || *p > 0x7f) {
738 audit_log_hex(ab, string, strlen(string));
739 return;
740 }
741 p++;
742 }
743 audit_log_format(ab, "\"%s\"", string);
744}
745
746
720/* This is a helper-function to print the d_path without using a static 747/* This is a helper-function to print the d_path without using a static
721 * buffer or allocating another buffer in addition to the one in 748 * buffer or allocating another buffer in addition to the one in
722 * audit_buffer. */ 749 * audit_buffer. */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 6f1931381b..37b3ac94bc 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1,4 +1,4 @@
1/* auditsc.c -- System-call auditing support -*- linux-c -*- 1/* auditsc.c -- System-call auditing support
2 * Handles all system-call specific auditing features. 2 * Handles all system-call specific auditing features.
3 * 3 *
4 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. 4 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
@@ -123,7 +123,7 @@ struct audit_context {
123 int major; /* syscall number */ 123 int major; /* syscall number */
124 unsigned long argv[4]; /* syscall arguments */ 124 unsigned long argv[4]; /* syscall arguments */
125 int return_valid; /* return code is valid */ 125 int return_valid; /* return code is valid */
126 int return_code;/* syscall return code */ 126 long return_code;/* syscall return code */
127 int auditable; /* 1 if record should be written */ 127 int auditable; /* 1 if record should be written */
128 int name_count; 128 int name_count;
129 struct audit_names names[AUDIT_NAMES]; 129 struct audit_names names[AUDIT_NAMES];
@@ -135,6 +135,7 @@ struct audit_context {
135 uid_t uid, euid, suid, fsuid; 135 uid_t uid, euid, suid, fsuid;
136 gid_t gid, egid, sgid, fsgid; 136 gid_t gid, egid, sgid, fsgid;
137 unsigned long personality; 137 unsigned long personality;
138 int arch;
138 139
139#if AUDIT_DEBUG 140#if AUDIT_DEBUG
140 int put_count; 141 int put_count;
@@ -250,7 +251,8 @@ static int audit_copy_rule(struct audit_rule *d, struct audit_rule *s)
250 return 0; 251 return 0;
251} 252}
252 253
253int audit_receive_filter(int type, int pid, int uid, int seq, void *data) 254int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
255 uid_t loginuid)
254{ 256{
255 u32 flags; 257 u32 flags;
256 struct audit_entry *entry; 258 struct audit_entry *entry;
@@ -285,6 +287,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data)
285 err = audit_add_rule(entry, &audit_entlist); 287 err = audit_add_rule(entry, &audit_entlist);
286 if (!err && (flags & AUDIT_AT_EXIT)) 288 if (!err && (flags & AUDIT_AT_EXIT))
287 err = audit_add_rule(entry, &audit_extlist); 289 err = audit_add_rule(entry, &audit_extlist);
290 audit_log(NULL, "auid %u added an audit rule\n", loginuid);
288 break; 291 break;
289 case AUDIT_DEL: 292 case AUDIT_DEL:
290 flags =((struct audit_rule *)data)->flags; 293 flags =((struct audit_rule *)data)->flags;
@@ -294,6 +297,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data)
294 err = audit_del_rule(data, &audit_entlist); 297 err = audit_del_rule(data, &audit_entlist);
295 if (!err && (flags & AUDIT_AT_EXIT)) 298 if (!err && (flags & AUDIT_AT_EXIT))
296 err = audit_del_rule(data, &audit_extlist); 299 err = audit_del_rule(data, &audit_extlist);
300 audit_log(NULL, "auid %u removed an audit rule\n", loginuid);
297 break; 301 break;
298 default: 302 default:
299 return -EINVAL; 303 return -EINVAL;
@@ -348,6 +352,10 @@ static int audit_filter_rules(struct task_struct *tsk,
348 case AUDIT_PERS: 352 case AUDIT_PERS:
349 result = (tsk->personality == value); 353 result = (tsk->personality == value);
350 break; 354 break;
355 case AUDIT_ARCH:
356 if (ctx)
357 result = (ctx->arch == value);
358 break;
351 359
352 case AUDIT_EXIT: 360 case AUDIT_EXIT:
353 if (ctx && ctx->return_valid) 361 if (ctx && ctx->return_valid)
@@ -355,7 +363,7 @@ static int audit_filter_rules(struct task_struct *tsk,
355 break; 363 break;
356 case AUDIT_SUCCESS: 364 case AUDIT_SUCCESS:
357 if (ctx && ctx->return_valid) 365 if (ctx && ctx->return_valid)
358 result = (ctx->return_code >= 0); 366 result = (ctx->return_valid == AUDITSC_SUCCESS);
359 break; 367 break;
360 case AUDIT_DEVMAJOR: 368 case AUDIT_DEVMAJOR:
361 if (ctx) { 369 if (ctx) {
@@ -648,8 +656,11 @@ static void audit_log_exit(struct audit_context *context)
648 audit_log_format(ab, "syscall=%d", context->major); 656 audit_log_format(ab, "syscall=%d", context->major);
649 if (context->personality != PER_LINUX) 657 if (context->personality != PER_LINUX)
650 audit_log_format(ab, " per=%lx", context->personality); 658 audit_log_format(ab, " per=%lx", context->personality);
659 audit_log_format(ab, " arch=%x", context->arch);
651 if (context->return_valid) 660 if (context->return_valid)
652 audit_log_format(ab, " exit=%d", context->return_code); 661 audit_log_format(ab, " success=%s exit=%ld",
662 (context->return_valid==AUDITSC_SUCCESS)?"yes":"no",
663 context->return_code);
653 audit_log_format(ab, 664 audit_log_format(ab,
654 " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" 665 " a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
655 " pid=%d loginuid=%d uid=%d gid=%d" 666 " pid=%d loginuid=%d uid=%d gid=%d"
@@ -696,9 +707,10 @@ static void audit_log_exit(struct audit_context *context)
696 if (!ab) 707 if (!ab)
697 continue; /* audit_panic has been called */ 708 continue; /* audit_panic has been called */
698 audit_log_format(ab, "item=%d", i); 709 audit_log_format(ab, "item=%d", i);
699 if (context->names[i].name) 710 if (context->names[i].name) {
700 audit_log_format(ab, " name=%s", 711 audit_log_format(ab, " name=");
701 context->names[i].name); 712 audit_log_untrustedstring(ab, context->names[i].name);
713 }
702 if (context->names[i].ino != (unsigned long)-1) 714 if (context->names[i].ino != (unsigned long)-1)
703 audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#o" 715 audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#o"
704 " uid=%d gid=%d rdev=%02x:%02x", 716 " uid=%d gid=%d rdev=%02x:%02x",
@@ -772,7 +784,7 @@ static inline unsigned int audit_serial(void)
772 * then the record will be written at syscall exit time (otherwise, it 784 * then the record will be written at syscall exit time (otherwise, it
773 * will only be written if another part of the kernel requests that it 785 * will only be written if another part of the kernel requests that it
774 * be written). */ 786 * be written). */
775void audit_syscall_entry(struct task_struct *tsk, int major, 787void audit_syscall_entry(struct task_struct *tsk, int arch, int major,
776 unsigned long a1, unsigned long a2, 788 unsigned long a1, unsigned long a2,
777 unsigned long a3, unsigned long a4) 789 unsigned long a3, unsigned long a4)
778{ 790{
@@ -826,6 +838,7 @@ void audit_syscall_entry(struct task_struct *tsk, int major,
826 if (!audit_enabled) 838 if (!audit_enabled)
827 return; 839 return;
828 840
841 context->arch = arch;
829 context->major = major; 842 context->major = major;
830 context->argv[0] = a1; 843 context->argv[0] = a1;
831 context->argv[1] = a2; 844 context->argv[1] = a2;
@@ -849,13 +862,13 @@ void audit_syscall_entry(struct task_struct *tsk, int major,
849 * filtering, or because some other part of the kernel write an audit 862 * filtering, or because some other part of the kernel write an audit
850 * message), then write out the syscall information. In call cases, 863 * message), then write out the syscall information. In call cases,
851 * free the names stored from getname(). */ 864 * free the names stored from getname(). */
852void audit_syscall_exit(struct task_struct *tsk, int return_code) 865void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code)
853{ 866{
854 struct audit_context *context; 867 struct audit_context *context;
855 868
856 get_task_struct(tsk); 869 get_task_struct(tsk);
857 task_lock(tsk); 870 task_lock(tsk);
858 context = audit_get_context(tsk, 1, return_code); 871 context = audit_get_context(tsk, valid, return_code);
859 task_unlock(tsk); 872 task_unlock(tsk);
860 873
861 /* Not having a context here is ok, since the parent may have 874 /* Not having a context here is ok, since the parent may have
@@ -868,6 +881,7 @@ void audit_syscall_exit(struct task_struct *tsk, int return_code)
868 881
869 context->in_syscall = 0; 882 context->in_syscall = 0;
870 context->auditable = 0; 883 context->auditable = 0;
884
871 if (context->previous) { 885 if (context->previous) {
872 struct audit_context *new_context = context->previous; 886 struct audit_context *new_context = context->previous;
873 context->previous = NULL; 887 context->previous = NULL;
@@ -981,7 +995,7 @@ void audit_inode(const char *name, const struct inode *inode)
981} 995}
982 996
983void audit_get_stamp(struct audit_context *ctx, 997void audit_get_stamp(struct audit_context *ctx,
984 struct timespec *t, int *serial) 998 struct timespec *t, unsigned int *serial)
985{ 999{
986 if (ctx) { 1000 if (ctx) {
987 t->tv_sec = ctx->ctime.tv_sec; 1001 t->tv_sec = ctx->ctime.tv_sec;
@@ -996,20 +1010,21 @@ void audit_get_stamp(struct audit_context *ctx,
996 1010
997extern int audit_set_type(struct audit_buffer *ab, int type); 1011extern int audit_set_type(struct audit_buffer *ab, int type);
998 1012
999int audit_set_loginuid(struct audit_context *ctx, uid_t loginuid) 1013int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
1000{ 1014{
1001 if (ctx) { 1015 if (task->audit_context) {
1002 struct audit_buffer *ab; 1016 struct audit_buffer *ab;
1003 1017
1004 ab = audit_log_start(NULL); 1018 ab = audit_log_start(NULL);
1005 if (ab) { 1019 if (ab) {
1006 audit_log_format(ab, "login pid=%d uid=%u " 1020 audit_log_format(ab, "login pid=%d uid=%u "
1007 "old loginuid=%u new loginuid=%u", 1021 "old loginuid=%u new loginuid=%u",
1008 ctx->pid, ctx->uid, ctx->loginuid, loginuid); 1022 task->pid, task->uid,
1023 task->audit_context->loginuid, loginuid);
1009 audit_set_type(ab, AUDIT_LOGIN); 1024 audit_set_type(ab, AUDIT_LOGIN);
1010 audit_log_end(ab); 1025 audit_log_end(ab);
1011 } 1026 }
1012 ctx->loginuid = loginuid; 1027 task->audit_context->loginuid = loginuid;
1013 } 1028 }
1014 return 0; 1029 return 0;
1015} 1030}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 961d74044d..00e8f25755 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -166,9 +166,8 @@ static struct super_block *cpuset_sb = NULL;
166 * The hooks from fork and exit, cpuset_fork() and cpuset_exit(), don't 166 * The hooks from fork and exit, cpuset_fork() and cpuset_exit(), don't
167 * (usually) grab cpuset_sem. These are the two most performance 167 * (usually) grab cpuset_sem. These are the two most performance
168 * critical pieces of code here. The exception occurs on exit(), 168 * critical pieces of code here. The exception occurs on exit(),
169 * if the last task using a cpuset exits, and the cpuset was marked 169 * when a task in a notify_on_release cpuset exits. Then cpuset_sem
170 * notify_on_release. In that case, the cpuset_sem is taken, the 170 * is taken, and if the cpuset count is zero, a usermode call made
171 * path to the released cpuset calculated, and a usermode call made
172 * to /sbin/cpuset_release_agent with the name of the cpuset (path 171 * to /sbin/cpuset_release_agent with the name of the cpuset (path
173 * relative to the root of cpuset file system) as the argument. 172 * relative to the root of cpuset file system) as the argument.
174 * 173 *
@@ -1404,6 +1403,18 @@ void cpuset_fork(struct task_struct *tsk)
1404 * 1403 *
1405 * Description: Detach cpuset from @tsk and release it. 1404 * Description: Detach cpuset from @tsk and release it.
1406 * 1405 *
1406 * Note that cpusets marked notify_on_release force every task
1407 * in them to take the global cpuset_sem semaphore when exiting.
1408 * This could impact scaling on very large systems. Be reluctant
1409 * to use notify_on_release cpusets where very high task exit
1410 * scaling is required on large systems.
1411 *
1412 * Don't even think about derefencing 'cs' after the cpuset use
1413 * count goes to zero, except inside a critical section guarded
1414 * by the cpuset_sem semaphore. If you don't hold cpuset_sem,
1415 * then a zero cpuset use count is a license to any other task to
1416 * nuke the cpuset immediately.
1417 *
1407 **/ 1418 **/
1408 1419
1409void cpuset_exit(struct task_struct *tsk) 1420void cpuset_exit(struct task_struct *tsk)
@@ -1415,10 +1426,13 @@ void cpuset_exit(struct task_struct *tsk)
1415 tsk->cpuset = NULL; 1426 tsk->cpuset = NULL;
1416 task_unlock(tsk); 1427 task_unlock(tsk);
1417 1428
1418 if (atomic_dec_and_test(&cs->count)) { 1429 if (notify_on_release(cs)) {
1419 down(&cpuset_sem); 1430 down(&cpuset_sem);
1420 check_for_release(cs); 1431 if (atomic_dec_and_test(&cs->count))
1432 check_for_release(cs);
1421 up(&cpuset_sem); 1433 up(&cpuset_sem);
1434 } else {
1435 atomic_dec(&cs->count);
1422 } 1436 }
1423} 1437}
1424 1438
diff --git a/kernel/exit.c b/kernel/exit.c
index 7be283d989..edaa50b5bb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -846,6 +846,8 @@ fastcall NORET_TYPE void do_exit(long code)
846 for (;;) ; 846 for (;;) ;
847} 847}
848 848
849EXPORT_SYMBOL_GPL(do_exit);
850
849NORET_TYPE void complete_and_exit(struct completion *comp, long code) 851NORET_TYPE void complete_and_exit(struct completion *comp, long code)
850{ 852{
851 if (comp) 853 if (comp)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 2fb0e46e11..436c7d93c0 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -30,6 +30,7 @@
30 */ 30 */
31irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = { 31irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
32 [0 ... NR_IRQS-1] = { 32 [0 ... NR_IRQS-1] = {
33 .status = IRQ_DISABLED,
33 .handler = &no_irq_type, 34 .handler = &no_irq_type,
34 .lock = SPIN_LOCK_UNLOCKED 35 .lock = SPIN_LOCK_UNLOCKED
35 } 36 }
@@ -118,8 +119,6 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
118 */ 119 */
119 desc->handler->ack(irq); 120 desc->handler->ack(irq);
120 action_ret = handle_IRQ_event(irq, regs, desc->action); 121 action_ret = handle_IRQ_event(irq, regs, desc->action);
121 if (!noirqdebug)
122 note_interrupt(irq, desc, action_ret);
123 desc->handler->end(irq); 122 desc->handler->end(irq);
124 return 1; 123 return 1;
125 } 124 }
diff --git a/kernel/itimer.c b/kernel/itimer.c
index e9a40e947e..1dc988e0d2 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -123,7 +123,11 @@ static inline void it_real_arm(struct task_struct *p, unsigned long interval)
123 return; 123 return;
124 if (interval > (unsigned long) LONG_MAX) 124 if (interval > (unsigned long) LONG_MAX)
125 interval = LONG_MAX; 125 interval = LONG_MAX;
126 p->signal->real_timer.expires = jiffies + interval; 126 /* the "+ 1" below makes sure that the timer doesn't go off before
127 * the interval requested. This could happen if
128 * time requested % (usecs per jiffy) is more than the usecs left
129 * in the current jiffy */
130 p->signal->real_timer.expires = jiffies + interval + 1;
127 add_timer(&p->signal->real_timer); 131 add_timer(&p->signal->real_timer);
128} 132}
129 133
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 1627f8d6e0..13bcec151b 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -46,6 +46,14 @@ static inline int is_kernel_inittext(unsigned long addr)
46 return 0; 46 return 0;
47} 47}
48 48
49static inline int is_kernel_extratext(unsigned long addr)
50{
51 if (addr >= (unsigned long)_sextratext
52 && addr <= (unsigned long)_eextratext)
53 return 1;
54 return 0;
55}
56
49static inline int is_kernel_text(unsigned long addr) 57static inline int is_kernel_text(unsigned long addr)
50{ 58{
51 if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) 59 if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext)
@@ -169,8 +177,9 @@ const char *kallsyms_lookup(unsigned long addr,
169 namebuf[0] = 0; 177 namebuf[0] = 0;
170 178
171 if ((all_var && is_kernel(addr)) || 179 if ((all_var && is_kernel(addr)) ||
172 (!all_var && (is_kernel_text(addr) || is_kernel_inittext(addr)))) { 180 (!all_var && (is_kernel_text(addr) || is_kernel_inittext(addr) ||
173 unsigned long symbol_end=0; 181 is_kernel_extratext(addr)))) {
182 unsigned long symbol_end = 0;
174 183
175 /* do a binary search on the sorted kallsyms_addresses array */ 184 /* do a binary search on the sorted kallsyms_addresses array */
176 low = 0; 185 low = 0;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1d5dd1337b..037142b72a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -44,6 +44,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
44 44
45unsigned int kprobe_cpu = NR_CPUS; 45unsigned int kprobe_cpu = NR_CPUS;
46static DEFINE_SPINLOCK(kprobe_lock); 46static DEFINE_SPINLOCK(kprobe_lock);
47static struct kprobe *curr_kprobe;
47 48
48/* Locks kprobe: irqs must be disabled */ 49/* Locks kprobe: irqs must be disabled */
49void lock_kprobes(void) 50void lock_kprobes(void)
@@ -73,22 +74,139 @@ struct kprobe *get_kprobe(void *addr)
73 return NULL; 74 return NULL;
74} 75}
75 76
77/*
78 * Aggregate handlers for multiple kprobes support - these handlers
79 * take care of invoking the individual kprobe handlers on p->list
80 */
81int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
82{
83 struct kprobe *kp;
84
85 list_for_each_entry(kp, &p->list, list) {
86 if (kp->pre_handler) {
87 curr_kprobe = kp;
88 kp->pre_handler(kp, regs);
89 curr_kprobe = NULL;
90 }
91 }
92 return 0;
93}
94
95void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
96 unsigned long flags)
97{
98 struct kprobe *kp;
99
100 list_for_each_entry(kp, &p->list, list) {
101 if (kp->post_handler) {
102 curr_kprobe = kp;
103 kp->post_handler(kp, regs, flags);
104 curr_kprobe = NULL;
105 }
106 }
107 return;
108}
109
110int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr)
111{
112 /*
113 * if we faulted "during" the execution of a user specified
114 * probe handler, invoke just that probe's fault handler
115 */
116 if (curr_kprobe && curr_kprobe->fault_handler) {
117 if (curr_kprobe->fault_handler(curr_kprobe, regs, trapnr))
118 return 1;
119 }
120 return 0;
121}
122
123/*
124 * Fill in the required fields of the "manager kprobe". Replace the
125 * earlier kprobe in the hlist with the manager kprobe
126 */
127static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
128{
129 ap->addr = p->addr;
130 ap->opcode = p->opcode;
131 memcpy(&ap->ainsn, &p->ainsn, sizeof(struct arch_specific_insn));
132
133 ap->pre_handler = aggr_pre_handler;
134 ap->post_handler = aggr_post_handler;
135 ap->fault_handler = aggr_fault_handler;
136
137 INIT_LIST_HEAD(&ap->list);
138 list_add(&p->list, &ap->list);
139
140 INIT_HLIST_NODE(&ap->hlist);
141 hlist_del(&p->hlist);
142 hlist_add_head(&ap->hlist,
143 &kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]);
144}
145
146/*
147 * This is the second or subsequent kprobe at the address - handle
148 * the intricacies
149 * TODO: Move kcalloc outside the spinlock
150 */
151static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p)
152{
153 int ret = 0;
154 struct kprobe *ap;
155
156 if (old_p->break_handler || p->break_handler) {
157 ret = -EEXIST; /* kprobe and jprobe can't (yet) coexist */
158 } else if (old_p->pre_handler == aggr_pre_handler) {
159 list_add(&p->list, &old_p->list);
160 } else {
161 ap = kcalloc(1, sizeof(struct kprobe), GFP_ATOMIC);
162 if (!ap)
163 return -ENOMEM;
164 add_aggr_kprobe(ap, old_p);
165 list_add(&p->list, &ap->list);
166 }
167 return ret;
168}
169
170/* kprobe removal house-keeping routines */
171static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags)
172{
173 *p->addr = p->opcode;
174 hlist_del(&p->hlist);
175 flush_icache_range((unsigned long) p->addr,
176 (unsigned long) p->addr + sizeof(kprobe_opcode_t));
177 spin_unlock_irqrestore(&kprobe_lock, flags);
178 arch_remove_kprobe(p);
179}
180
181static inline void cleanup_aggr_kprobe(struct kprobe *old_p,
182 struct kprobe *p, unsigned long flags)
183{
184 list_del(&p->list);
185 if (list_empty(&old_p->list)) {
186 cleanup_kprobe(old_p, flags);
187 kfree(old_p);
188 } else
189 spin_unlock_irqrestore(&kprobe_lock, flags);
190}
191
76int register_kprobe(struct kprobe *p) 192int register_kprobe(struct kprobe *p)
77{ 193{
78 int ret = 0; 194 int ret = 0;
79 unsigned long flags = 0; 195 unsigned long flags = 0;
196 struct kprobe *old_p;
80 197
81 if ((ret = arch_prepare_kprobe(p)) != 0) { 198 if ((ret = arch_prepare_kprobe(p)) != 0) {
82 goto rm_kprobe; 199 goto rm_kprobe;
83 } 200 }
84 spin_lock_irqsave(&kprobe_lock, flags); 201 spin_lock_irqsave(&kprobe_lock, flags);
85 INIT_HLIST_NODE(&p->hlist); 202 old_p = get_kprobe(p->addr);
86 if (get_kprobe(p->addr)) { 203 if (old_p) {
87 ret = -EEXIST; 204 ret = register_aggr_kprobe(old_p, p);
88 goto out; 205 goto out;
89 } 206 }
90 arch_copy_kprobe(p);
91 207
208 arch_copy_kprobe(p);
209 INIT_HLIST_NODE(&p->hlist);
92 hlist_add_head(&p->hlist, 210 hlist_add_head(&p->hlist,
93 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 211 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
94 212
@@ -107,13 +225,17 @@ rm_kprobe:
107void unregister_kprobe(struct kprobe *p) 225void unregister_kprobe(struct kprobe *p)
108{ 226{
109 unsigned long flags; 227 unsigned long flags;
110 arch_remove_kprobe(p); 228 struct kprobe *old_p;
229
111 spin_lock_irqsave(&kprobe_lock, flags); 230 spin_lock_irqsave(&kprobe_lock, flags);
112 *p->addr = p->opcode; 231 old_p = get_kprobe(p->addr);
113 hlist_del(&p->hlist); 232 if (old_p) {
114 flush_icache_range((unsigned long) p->addr, 233 if (old_p->pre_handler == aggr_pre_handler)
115 (unsigned long) p->addr + sizeof(kprobe_opcode_t)); 234 cleanup_aggr_kprobe(old_p, p, flags);
116 spin_unlock_irqrestore(&kprobe_lock, flags); 235 else
236 cleanup_kprobe(p, flags);
237 } else
238 spin_unlock_irqrestore(&kprobe_lock, flags);
117} 239}
118 240
119static struct notifier_block kprobe_exceptions_nb = { 241static struct notifier_block kprobe_exceptions_nb = {
diff --git a/kernel/module.c b/kernel/module.c
index 5734ab09d3..83b3d37670 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1758,6 +1758,7 @@ sys_init_module(void __user *umod,
1758 const char __user *uargs) 1758 const char __user *uargs)
1759{ 1759{
1760 struct module *mod; 1760 struct module *mod;
1761 mm_segment_t old_fs = get_fs();
1761 int ret = 0; 1762 int ret = 0;
1762 1763
1763 /* Must have permission */ 1764 /* Must have permission */
@@ -1775,6 +1776,9 @@ sys_init_module(void __user *umod,
1775 return PTR_ERR(mod); 1776 return PTR_ERR(mod);
1776 } 1777 }
1777 1778
1779 /* flush the icache in correct context */
1780 set_fs(KERNEL_DS);
1781
1778 /* Flush the instruction cache, since we've played with text */ 1782 /* Flush the instruction cache, since we've played with text */
1779 if (mod->module_init) 1783 if (mod->module_init)
1780 flush_icache_range((unsigned long)mod->module_init, 1784 flush_icache_range((unsigned long)mod->module_init,
@@ -1783,6 +1787,8 @@ sys_init_module(void __user *umod,
1783 flush_icache_range((unsigned long)mod->module_core, 1787 flush_icache_range((unsigned long)mod->module_core,
1784 (unsigned long)mod->module_core + mod->core_size); 1788 (unsigned long)mod->module_core + mod->core_size);
1785 1789
1790 set_fs(old_fs);
1791
1786 /* Now sew it into the lists. They won't access us, since 1792 /* Now sew it into the lists. They won't access us, since
1787 strong_try_module_get() will fail. */ 1793 strong_try_module_get() will fail. */
1788 stop_machine_run(__link_module, mod, NR_CPUS); 1794 stop_machine_run(__link_module, mod, NR_CPUS);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 7960ddf04a..4cdebc972f 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -156,14 +156,14 @@ static int enter_state(suspend_state_t state)
156 goto Unlock; 156 goto Unlock;
157 } 157 }
158 158
159 pr_debug("PM: Preparing system for suspend\n"); 159 pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
160 if ((error = suspend_prepare(state))) 160 if ((error = suspend_prepare(state)))
161 goto Unlock; 161 goto Unlock;
162 162
163 pr_debug("PM: Entering state.\n"); 163 pr_debug("PM: Entering %s sleep\n", pm_states[state]);
164 error = suspend_enter(state); 164 error = suspend_enter(state);
165 165
166 pr_debug("PM: Finishing up.\n"); 166 pr_debug("PM: Finishing wakeup.\n");
167 suspend_finish(state); 167 suspend_finish(state);
168 Unlock: 168 Unlock:
169 up(&pm_sem); 169 up(&pm_sem);
diff --git a/kernel/printk.c b/kernel/printk.c
index 290a07ce2c..01b58d7d17 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -160,42 +160,6 @@ static int __init console_setup(char *str)
160 160
161__setup("console=", console_setup); 161__setup("console=", console_setup);
162 162
163/**
164 * add_preferred_console - add a device to the list of preferred consoles.
165 *
166 * The last preferred console added will be used for kernel messages
167 * and stdin/out/err for init. Normally this is used by console_setup
168 * above to handle user-supplied console arguments; however it can also
169 * be used by arch-specific code either to override the user or more
170 * commonly to provide a default console (ie from PROM variables) when
171 * the user has not supplied one.
172 */
173int __init add_preferred_console(char *name, int idx, char *options)
174{
175 struct console_cmdline *c;
176 int i;
177
178 /*
179 * See if this tty is not yet registered, and
180 * if we have a slot free.
181 */
182 for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
183 if (strcmp(console_cmdline[i].name, name) == 0 &&
184 console_cmdline[i].index == idx) {
185 selected_console = i;
186 return 0;
187 }
188 if (i == MAX_CMDLINECONSOLES)
189 return -E2BIG;
190 selected_console = i;
191 c = &console_cmdline[i];
192 memcpy(c->name, name, sizeof(c->name));
193 c->name[sizeof(c->name) - 1] = 0;
194 c->options = options;
195 c->index = idx;
196 return 0;
197}
198
199static int __init log_buf_len_setup(char *str) 163static int __init log_buf_len_setup(char *str)
200{ 164{
201 unsigned long size = memparse(str, &str); 165 unsigned long size = memparse(str, &str);
@@ -671,6 +635,42 @@ static void call_console_drivers(unsigned long start, unsigned long end) {}
671#endif 635#endif
672 636
673/** 637/**
638 * add_preferred_console - add a device to the list of preferred consoles.
639 *
640 * The last preferred console added will be used for kernel messages
641 * and stdin/out/err for init. Normally this is used by console_setup
642 * above to handle user-supplied console arguments; however it can also
643 * be used by arch-specific code either to override the user or more
644 * commonly to provide a default console (ie from PROM variables) when
645 * the user has not supplied one.
646 */
647int __init add_preferred_console(char *name, int idx, char *options)
648{
649 struct console_cmdline *c;
650 int i;
651
652 /*
653 * See if this tty is not yet registered, and
654 * if we have a slot free.
655 */
656 for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
657 if (strcmp(console_cmdline[i].name, name) == 0 &&
658 console_cmdline[i].index == idx) {
659 selected_console = i;
660 return 0;
661 }
662 if (i == MAX_CMDLINECONSOLES)
663 return -E2BIG;
664 selected_console = i;
665 c = &console_cmdline[i];
666 memcpy(c->name, name, sizeof(c->name));
667 c->name[sizeof(c->name) - 1] = 0;
668 c->options = options;
669 c->index = idx;
670 return 0;
671}
672
673/**
674 * acquire_console_sem - lock the console system for exclusive use. 674 * acquire_console_sem - lock the console system for exclusive use.
675 * 675 *
676 * Acquires a semaphore which guarantees that the caller has 676 * Acquires a semaphore which guarantees that the caller has
diff --git a/kernel/profile.c b/kernel/profile.c
index 0221a50ca8..ad8cbb75ff 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -49,15 +49,19 @@ static DECLARE_MUTEX(profile_flip_mutex);
49 49
50static int __init profile_setup(char * str) 50static int __init profile_setup(char * str)
51{ 51{
52 static char __initdata schedstr[] = "schedule";
52 int par; 53 int par;
53 54
54 if (!strncmp(str, "schedule", 8)) { 55 if (!strncmp(str, schedstr, strlen(schedstr))) {
55 prof_on = SCHED_PROFILING; 56 prof_on = SCHED_PROFILING;
56 printk(KERN_INFO "kernel schedule profiling enabled\n"); 57 if (str[strlen(schedstr)] == ',')
57 if (str[7] == ',') 58 str += strlen(schedstr) + 1;
58 str += 8; 59 if (get_option(&str, &par))
59 } 60 prof_shift = par;
60 if (get_option(&str,&par)) { 61 printk(KERN_INFO
62 "kernel schedule profiling enabled (shift: %ld)\n",
63 prof_shift);
64 } else if (get_option(&str, &par)) {
61 prof_shift = par; 65 prof_shift = par;
62 prof_on = CPU_PROFILING; 66 prof_on = CPU_PROFILING;
63 printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n", 67 printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n",
diff --git a/kernel/sched.c b/kernel/sched.c
index 0dc3158667..f12a0c8a7d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3755,19 +3755,22 @@ EXPORT_SYMBOL(cond_resched);
3755 */ 3755 */
3756int cond_resched_lock(spinlock_t * lock) 3756int cond_resched_lock(spinlock_t * lock)
3757{ 3757{
3758 int ret = 0;
3759
3758 if (need_lockbreak(lock)) { 3760 if (need_lockbreak(lock)) {
3759 spin_unlock(lock); 3761 spin_unlock(lock);
3760 cpu_relax(); 3762 cpu_relax();
3763 ret = 1;
3761 spin_lock(lock); 3764 spin_lock(lock);
3762 } 3765 }
3763 if (need_resched()) { 3766 if (need_resched()) {
3764 _raw_spin_unlock(lock); 3767 _raw_spin_unlock(lock);
3765 preempt_enable_no_resched(); 3768 preempt_enable_no_resched();
3766 __cond_resched(); 3769 __cond_resched();
3770 ret = 1;
3767 spin_lock(lock); 3771 spin_lock(lock);
3768 return 1;
3769 } 3772 }
3770 return 0; 3773 return ret;
3771} 3774}
3772 3775
3773EXPORT_SYMBOL(cond_resched_lock); 3776EXPORT_SYMBOL(cond_resched_lock);
@@ -4243,7 +4246,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk)
4243 4246
4244 /* No more Mr. Nice Guy. */ 4247 /* No more Mr. Nice Guy. */
4245 if (dest_cpu == NR_CPUS) { 4248 if (dest_cpu == NR_CPUS) {
4246 tsk->cpus_allowed = cpuset_cpus_allowed(tsk); 4249 cpus_setall(tsk->cpus_allowed);
4247 dest_cpu = any_online_cpu(tsk->cpus_allowed); 4250 dest_cpu = any_online_cpu(tsk->cpus_allowed);
4248 4251
4249 /* 4252 /*
diff --git a/kernel/signal.c b/kernel/signal.c
index 8f3debc77c..b3c24c732c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -522,7 +522,16 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
522{ 522{
523 int sig = 0; 523 int sig = 0;
524 524
525 sig = next_signal(pending, mask); 525 /* SIGKILL must have priority, otherwise it is quite easy
526 * to create an unkillable process, sending sig < SIGKILL
527 * to self */
528 if (unlikely(sigismember(&pending->signal, SIGKILL))) {
529 if (!sigismember(mask, SIGKILL))
530 sig = SIGKILL;
531 }
532
533 if (likely(!sig))
534 sig = next_signal(pending, mask);
526 if (sig) { 535 if (sig) {
527 if (current->notifier) { 536 if (current->notifier) {
528 if (sigismember(current->notifier_mask, sig)) { 537 if (sigismember(current->notifier_mask, sig)) {
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index e15ed17863..0c3f9d8bbe 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -294,7 +294,7 @@ EXPORT_SYMBOL(_spin_unlock_irq);
294void __lockfunc _spin_unlock_bh(spinlock_t *lock) 294void __lockfunc _spin_unlock_bh(spinlock_t *lock)
295{ 295{
296 _raw_spin_unlock(lock); 296 _raw_spin_unlock(lock);
297 preempt_enable(); 297 preempt_enable_no_resched();
298 local_bh_enable(); 298 local_bh_enable();
299} 299}
300EXPORT_SYMBOL(_spin_unlock_bh); 300EXPORT_SYMBOL(_spin_unlock_bh);
@@ -318,7 +318,7 @@ EXPORT_SYMBOL(_read_unlock_irq);
318void __lockfunc _read_unlock_bh(rwlock_t *lock) 318void __lockfunc _read_unlock_bh(rwlock_t *lock)
319{ 319{
320 _raw_read_unlock(lock); 320 _raw_read_unlock(lock);
321 preempt_enable(); 321 preempt_enable_no_resched();
322 local_bh_enable(); 322 local_bh_enable();
323} 323}
324EXPORT_SYMBOL(_read_unlock_bh); 324EXPORT_SYMBOL(_read_unlock_bh);
@@ -342,7 +342,7 @@ EXPORT_SYMBOL(_write_unlock_irq);
342void __lockfunc _write_unlock_bh(rwlock_t *lock) 342void __lockfunc _write_unlock_bh(rwlock_t *lock)
343{ 343{
344 _raw_write_unlock(lock); 344 _raw_write_unlock(lock);
345 preempt_enable(); 345 preempt_enable_no_resched();
346 local_bh_enable(); 346 local_bh_enable();
347} 347}
348EXPORT_SYMBOL(_write_unlock_bh); 348EXPORT_SYMBOL(_write_unlock_bh);
@@ -354,7 +354,7 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
354 if (_raw_spin_trylock(lock)) 354 if (_raw_spin_trylock(lock))
355 return 1; 355 return 1;
356 356
357 preempt_enable(); 357 preempt_enable_no_resched();
358 local_bh_enable(); 358 local_bh_enable();
359 return 0; 359 return 0;
360} 360}
diff --git a/kernel/sys.c b/kernel/sys.c
index f64e97cabe..f006632c2b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1195,7 +1195,7 @@ static int groups_from_user(struct group_info *group_info,
1195 return 0; 1195 return 0;
1196} 1196}
1197 1197
1198/* a simple shell-metzner sort */ 1198/* a simple Shell sort */
1199static void groups_sort(struct group_info *group_info) 1199static void groups_sort(struct group_info *group_info)
1200{ 1200{
1201 int base, max, stride; 1201 int base, max, stride;