aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/futex.c53
-rw-r--r--kernel/irq/handle.c7
-rw-r--r--kernel/irq/manage.c182
-rw-r--r--kernel/kexec.c7
-rw-r--r--kernel/posix-cpu-timers.c60
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/console.c6
-rw-r--r--kernel/power/disk.c22
-rw-r--r--kernel/power/main.c8
-rw-r--r--kernel/power/swap.c5
-rw-r--r--kernel/power/user.c8
-rw-r--r--kernel/printk.c15
-rw-r--r--kernel/rcuclassic.c4
-rw-r--r--kernel/rcupdate.c12
-rw-r--r--kernel/rcupreempt.c3
-rw-r--r--kernel/rcutree.c4
-rw-r--r--kernel/sched.c30
-rw-r--r--kernel/seccomp.c7
-rw-r--r--kernel/softirq.c1
-rw-r--r--kernel/sys.c31
-rw-r--r--kernel/trace/Kconfig25
-rw-r--r--kernel/trace/ftrace.c6
-rw-r--r--kernel/trace/trace_mmiotrace.c14
-rw-r--r--kernel/trace/trace_selftest.c19
-rw-r--r--kernel/tsacct.c6
-rw-r--r--kernel/user.c35
-rw-r--r--kernel/user_namespace.c21
30 files changed, 409 insertions, 198 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 170a9213c1b6..e4791b3ba55d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_UID16) += uid16.o
51obj-$(CONFIG_MODULES) += module.o 51obj-$(CONFIG_MODULES) += module.o
52obj-$(CONFIG_KALLSYMS) += kallsyms.o 52obj-$(CONFIG_KALLSYMS) += kallsyms.o
53obj-$(CONFIG_PM) += power/ 53obj-$(CONFIG_PM) += power/
54obj-$(CONFIG_FREEZER) += power/
54obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o 55obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
55obj-$(CONFIG_KEXEC) += kexec.o 56obj-$(CONFIG_KEXEC) += kexec.o
56obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o 57obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e14db9c089b9..9edb5c4b79b4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1122,8 +1122,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
1122 1122
1123 mutex_unlock(&cgroup_mutex); 1123 mutex_unlock(&cgroup_mutex);
1124 1124
1125 kfree(root);
1126 kill_litter_super(sb); 1125 kill_litter_super(sb);
1126 kfree(root);
1127} 1127}
1128 1128
1129static struct file_system_type cgroup_fs_type = { 1129static struct file_system_type cgroup_fs_type = {
diff --git a/kernel/fork.c b/kernel/fork.c
index a66fbde20715..4854c2c4a82e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1179,10 +1179,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1179#endif 1179#endif
1180 clear_all_latency_tracing(p); 1180 clear_all_latency_tracing(p);
1181 1181
1182 /* Our parent execution domain becomes current domain
1183 These must match for thread signalling to apply */
1184 p->parent_exec_id = p->self_exec_id;
1185
1186 /* ok, now we should be set up.. */ 1182 /* ok, now we should be set up.. */
1187 p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); 1183 p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
1188 p->pdeath_signal = 0; 1184 p->pdeath_signal = 0;
@@ -1220,10 +1216,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1220 set_task_cpu(p, smp_processor_id()); 1216 set_task_cpu(p, smp_processor_id());
1221 1217
1222 /* CLONE_PARENT re-uses the old parent */ 1218 /* CLONE_PARENT re-uses the old parent */
1223 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) 1219 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1224 p->real_parent = current->real_parent; 1220 p->real_parent = current->real_parent;
1225 else 1221 p->parent_exec_id = current->parent_exec_id;
1222 } else {
1226 p->real_parent = current; 1223 p->real_parent = current;
1224 p->parent_exec_id = current->self_exec_id;
1225 }
1227 1226
1228 spin_lock(&current->sighand->siglock); 1227 spin_lock(&current->sighand->siglock);
1229 1228
diff --git a/kernel/futex.c b/kernel/futex.c
index f89d373a9c6d..438701adce23 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1165,6 +1165,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
1165 u32 val, ktime_t *abs_time, u32 bitset, int clockrt) 1165 u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
1166{ 1166{
1167 struct task_struct *curr = current; 1167 struct task_struct *curr = current;
1168 struct restart_block *restart;
1168 DECLARE_WAITQUEUE(wait, curr); 1169 DECLARE_WAITQUEUE(wait, curr);
1169 struct futex_hash_bucket *hb; 1170 struct futex_hash_bucket *hb;
1170 struct futex_q q; 1171 struct futex_q q;
@@ -1216,11 +1217,13 @@ retry:
1216 1217
1217 if (!ret) 1218 if (!ret)
1218 goto retry; 1219 goto retry;
1219 return ret; 1220 goto out;
1220 } 1221 }
1221 ret = -EWOULDBLOCK; 1222 ret = -EWOULDBLOCK;
1222 if (uval != val) 1223 if (unlikely(uval != val)) {
1223 goto out_unlock_put_key; 1224 queue_unlock(&q, hb);
1225 goto out_put_key;
1226 }
1224 1227
1225 /* Only actually queue if *uaddr contained val. */ 1228 /* Only actually queue if *uaddr contained val. */
1226 queue_me(&q, hb); 1229 queue_me(&q, hb);
@@ -1284,38 +1287,38 @@ retry:
1284 */ 1287 */
1285 1288
1286 /* If we were woken (and unqueued), we succeeded, whatever. */ 1289 /* If we were woken (and unqueued), we succeeded, whatever. */
1290 ret = 0;
1287 if (!unqueue_me(&q)) 1291 if (!unqueue_me(&q))
1288 return 0; 1292 goto out_put_key;
1293 ret = -ETIMEDOUT;
1289 if (rem) 1294 if (rem)
1290 return -ETIMEDOUT; 1295 goto out_put_key;
1291 1296
1292 /* 1297 /*
1293 * We expect signal_pending(current), but another thread may 1298 * We expect signal_pending(current), but another thread may
1294 * have handled it for us already. 1299 * have handled it for us already.
1295 */ 1300 */
1301 ret = -ERESTARTSYS;
1296 if (!abs_time) 1302 if (!abs_time)
1297 return -ERESTARTSYS; 1303 goto out_put_key;
1298 else {
1299 struct restart_block *restart;
1300 restart = &current_thread_info()->restart_block;
1301 restart->fn = futex_wait_restart;
1302 restart->futex.uaddr = (u32 *)uaddr;
1303 restart->futex.val = val;
1304 restart->futex.time = abs_time->tv64;
1305 restart->futex.bitset = bitset;
1306 restart->futex.flags = 0;
1307
1308 if (fshared)
1309 restart->futex.flags |= FLAGS_SHARED;
1310 if (clockrt)
1311 restart->futex.flags |= FLAGS_CLOCKRT;
1312 return -ERESTART_RESTARTBLOCK;
1313 }
1314 1304
1315out_unlock_put_key: 1305 restart = &current_thread_info()->restart_block;
1316 queue_unlock(&q, hb); 1306 restart->fn = futex_wait_restart;
1317 put_futex_key(fshared, &q.key); 1307 restart->futex.uaddr = (u32 *)uaddr;
1308 restart->futex.val = val;
1309 restart->futex.time = abs_time->tv64;
1310 restart->futex.bitset = bitset;
1311 restart->futex.flags = 0;
1312
1313 if (fshared)
1314 restart->futex.flags |= FLAGS_SHARED;
1315 if (clockrt)
1316 restart->futex.flags |= FLAGS_CLOCKRT;
1318 1317
1318 ret = -ERESTART_RESTARTBLOCK;
1319
1320out_put_key:
1321 put_futex_key(fshared, &q.key);
1319out: 1322out:
1320 return ret; 1323 return ret;
1321} 1324}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 759b8b04d294..f6cdda68e5c6 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -338,6 +338,8 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
338 irqreturn_t ret, retval = IRQ_NONE; 338 irqreturn_t ret, retval = IRQ_NONE;
339 unsigned int status = 0; 339 unsigned int status = 0;
340 340
341 WARN_ONCE(!in_irq(), "BUG: IRQ handler called from non-hardirq context!");
342
341 if (!(action->flags & IRQF_DISABLED)) 343 if (!(action->flags & IRQF_DISABLED))
342 local_irq_enable_in_hardirq(); 344 local_irq_enable_in_hardirq();
343 345
@@ -357,6 +359,11 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
357} 359}
358 360
359#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ 361#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
362
363#ifdef CONFIG_ENABLE_WARN_DEPRECATED
364# warning __do_IRQ is deprecated. Please convert to proper flow handlers
365#endif
366
360/** 367/**
361 * __do_IRQ - original all in one highlevel IRQ handler 368 * __do_IRQ - original all in one highlevel IRQ handler
362 * @irq: the interrupt number 369 * @irq: the interrupt number
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index cbc3828faf5f..ea119effe096 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -389,9 +389,9 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
389 * allocate special interrupts that are part of the architecture. 389 * allocate special interrupts that are part of the architecture.
390 */ 390 */
391static int 391static int
392__setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new) 392__setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
393{ 393{
394 struct irqaction *old, **p; 394 struct irqaction *old, **old_ptr;
395 const char *old_name = NULL; 395 const char *old_name = NULL;
396 unsigned long flags; 396 unsigned long flags;
397 int shared = 0; 397 int shared = 0;
@@ -423,8 +423,8 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
423 * The following block of code has to be executed atomically 423 * The following block of code has to be executed atomically
424 */ 424 */
425 spin_lock_irqsave(&desc->lock, flags); 425 spin_lock_irqsave(&desc->lock, flags);
426 p = &desc->action; 426 old_ptr = &desc->action;
427 old = *p; 427 old = *old_ptr;
428 if (old) { 428 if (old) {
429 /* 429 /*
430 * Can't share interrupts unless both agree to and are 430 * Can't share interrupts unless both agree to and are
@@ -447,8 +447,8 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
447 447
448 /* add new interrupt at end of irq queue */ 448 /* add new interrupt at end of irq queue */
449 do { 449 do {
450 p = &old->next; 450 old_ptr = &old->next;
451 old = *p; 451 old = *old_ptr;
452 } while (old); 452 } while (old);
453 shared = 1; 453 shared = 1;
454 } 454 }
@@ -499,7 +499,7 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
499 (int)(new->flags & IRQF_TRIGGER_MASK)); 499 (int)(new->flags & IRQF_TRIGGER_MASK));
500 } 500 }
501 501
502 *p = new; 502 *old_ptr = new;
503 503
504 /* Reset broken irq detection when installing new handler */ 504 /* Reset broken irq detection when installing new handler */
505 desc->irq_count = 0; 505 desc->irq_count = 0;
@@ -549,90 +549,117 @@ int setup_irq(unsigned int irq, struct irqaction *act)
549 549
550 return __setup_irq(irq, desc, act); 550 return __setup_irq(irq, desc, act);
551} 551}
552EXPORT_SYMBOL_GPL(setup_irq);
552 553
553/** 554 /*
554 * free_irq - free an interrupt 555 * Internal function to unregister an irqaction - used to free
555 * @irq: Interrupt line to free 556 * regular and special interrupts that are part of the architecture.
556 * @dev_id: Device identity to free
557 *
558 * Remove an interrupt handler. The handler is removed and if the
559 * interrupt line is no longer in use by any driver it is disabled.
560 * On a shared IRQ the caller must ensure the interrupt is disabled
561 * on the card it drives before calling this function. The function
562 * does not return until any executing interrupts for this IRQ
563 * have completed.
564 *
565 * This function must not be called from interrupt context.
566 */ 557 */
567void free_irq(unsigned int irq, void *dev_id) 558static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
568{ 559{
569 struct irq_desc *desc = irq_to_desc(irq); 560 struct irq_desc *desc = irq_to_desc(irq);
570 struct irqaction **p; 561 struct irqaction *action, **action_ptr;
571 unsigned long flags; 562 unsigned long flags;
572 563
573 WARN_ON(in_interrupt()); 564 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
574 565
575 if (!desc) 566 if (!desc)
576 return; 567 return NULL;
577 568
578 spin_lock_irqsave(&desc->lock, flags); 569 spin_lock_irqsave(&desc->lock, flags);
579 p = &desc->action; 570
571 /*
572 * There can be multiple actions per IRQ descriptor, find the right
573 * one based on the dev_id:
574 */
575 action_ptr = &desc->action;
580 for (;;) { 576 for (;;) {
581 struct irqaction *action = *p; 577 action = *action_ptr;
582 578
583 if (action) { 579 if (!action) {
584 struct irqaction **pp = p; 580 WARN(1, "Trying to free already-free IRQ %d\n", irq);
581 spin_unlock_irqrestore(&desc->lock, flags);
585 582
586 p = &action->next; 583 return NULL;
587 if (action->dev_id != dev_id) 584 }
588 continue;
589 585
590 /* Found it - now remove it from the list of entries */ 586 if (action->dev_id == dev_id)
591 *pp = action->next; 587 break;
588 action_ptr = &action->next;
589 }
592 590
593 /* Currently used only by UML, might disappear one day.*/ 591 /* Found it - now remove it from the list of entries: */
592 *action_ptr = action->next;
593
594 /* Currently used only by UML, might disappear one day: */
594#ifdef CONFIG_IRQ_RELEASE_METHOD 595#ifdef CONFIG_IRQ_RELEASE_METHOD
595 if (desc->chip->release) 596 if (desc->chip->release)
596 desc->chip->release(irq, dev_id); 597 desc->chip->release(irq, dev_id);
597#endif 598#endif
598 599
599 if (!desc->action) { 600 /* If this was the last handler, shut down the IRQ line: */
600 desc->status |= IRQ_DISABLED; 601 if (!desc->action) {
601 if (desc->chip->shutdown) 602 desc->status |= IRQ_DISABLED;
602 desc->chip->shutdown(irq); 603 if (desc->chip->shutdown)
603 else 604 desc->chip->shutdown(irq);
604 desc->chip->disable(irq); 605 else
605 } 606 desc->chip->disable(irq);
606 spin_unlock_irqrestore(&desc->lock, flags); 607 }
607 unregister_handler_proc(irq, action); 608 spin_unlock_irqrestore(&desc->lock, flags);
609
610 unregister_handler_proc(irq, action);
611
612 /* Make sure it's not being used on another CPU: */
613 synchronize_irq(irq);
608 614
609 /* Make sure it's not being used on another CPU */
610 synchronize_irq(irq);
611#ifdef CONFIG_DEBUG_SHIRQ
612 /*
613 * It's a shared IRQ -- the driver ought to be
614 * prepared for it to happen even now it's
615 * being freed, so let's make sure.... We do
616 * this after actually deregistering it, to
617 * make sure that a 'real' IRQ doesn't run in
618 * parallel with our fake
619 */
620 if (action->flags & IRQF_SHARED) {
621 local_irq_save(flags);
622 action->handler(irq, dev_id);
623 local_irq_restore(flags);
624 }
625#endif
626 kfree(action);
627 return;
628 }
629 printk(KERN_ERR "Trying to free already-free IRQ %d\n", irq);
630#ifdef CONFIG_DEBUG_SHIRQ 615#ifdef CONFIG_DEBUG_SHIRQ
631 dump_stack(); 616 /*
632#endif 617 * It's a shared IRQ -- the driver ought to be prepared for an IRQ
633 spin_unlock_irqrestore(&desc->lock, flags); 618 * event to happen even now it's being freed, so let's make sure that
634 return; 619 * is so by doing an extra call to the handler ....
620 *
621 * ( We do this after actually deregistering it, to make sure that a
622 * 'real' IRQ doesn't run in * parallel with our fake. )
623 */
624 if (action->flags & IRQF_SHARED) {
625 local_irq_save(flags);
626 action->handler(irq, dev_id);
627 local_irq_restore(flags);
635 } 628 }
629#endif
630 return action;
631}
632
633/**
634 * remove_irq - free an interrupt
635 * @irq: Interrupt line to free
636 * @act: irqaction for the interrupt
637 *
638 * Used to remove interrupts statically setup by the early boot process.
639 */
640void remove_irq(unsigned int irq, struct irqaction *act)
641{
642 __free_irq(irq, act->dev_id);
643}
644EXPORT_SYMBOL_GPL(remove_irq);
645
646/**
647 * free_irq - free an interrupt allocated with request_irq
648 * @irq: Interrupt line to free
649 * @dev_id: Device identity to free
650 *
651 * Remove an interrupt handler. The handler is removed and if the
652 * interrupt line is no longer in use by any driver it is disabled.
653 * On a shared IRQ the caller must ensure the interrupt is disabled
654 * on the card it drives before calling this function. The function
655 * does not return until any executing interrupts for this IRQ
656 * have completed.
657 *
658 * This function must not be called from interrupt context.
659 */
660void free_irq(unsigned int irq, void *dev_id)
661{
662 kfree(__free_irq(irq, dev_id));
636} 663}
637EXPORT_SYMBOL(free_irq); 664EXPORT_SYMBOL(free_irq);
638 665
@@ -679,11 +706,12 @@ int request_irq(unsigned int irq, irq_handler_t handler,
679 * the behavior is classified as "will not fix" so we need to 706 * the behavior is classified as "will not fix" so we need to
680 * start nudging drivers away from using that idiom. 707 * start nudging drivers away from using that idiom.
681 */ 708 */
682 if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) 709 if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) ==
683 == (IRQF_SHARED|IRQF_DISABLED)) 710 (IRQF_SHARED|IRQF_DISABLED)) {
684 pr_warning("IRQ %d/%s: IRQF_DISABLED is not " 711 pr_warning(
685 "guaranteed on shared IRQs\n", 712 "IRQ %d/%s: IRQF_DISABLED is not guaranteed on shared IRQs\n",
686 irq, devname); 713 irq, devname);
714 }
687 715
688#ifdef CONFIG_LOCKDEP 716#ifdef CONFIG_LOCKDEP
689 /* 717 /*
@@ -709,15 +737,13 @@ int request_irq(unsigned int irq, irq_handler_t handler,
709 if (!handler) 737 if (!handler)
710 return -EINVAL; 738 return -EINVAL;
711 739
712 action = kmalloc(sizeof(struct irqaction), GFP_KERNEL); 740 action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
713 if (!action) 741 if (!action)
714 return -ENOMEM; 742 return -ENOMEM;
715 743
716 action->handler = handler; 744 action->handler = handler;
717 action->flags = irqflags; 745 action->flags = irqflags;
718 cpus_clear(action->mask);
719 action->name = devname; 746 action->name = devname;
720 action->next = NULL;
721 action->dev_id = dev_id; 747 action->dev_id = dev_id;
722 748
723 retval = __setup_irq(irq, desc, action); 749 retval = __setup_irq(irq, desc, action);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 8a6d7b08864e..483899578259 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1465,6 +1465,11 @@ int kernel_kexec(void)
1465 error = device_power_down(PMSG_FREEZE); 1465 error = device_power_down(PMSG_FREEZE);
1466 if (error) 1466 if (error)
1467 goto Enable_irqs; 1467 goto Enable_irqs;
1468
1469 /* Suspend system devices */
1470 error = sysdev_suspend(PMSG_FREEZE);
1471 if (error)
1472 goto Power_up_devices;
1468 } else 1473 } else
1469#endif 1474#endif
1470 { 1475 {
@@ -1477,6 +1482,8 @@ int kernel_kexec(void)
1477 1482
1478#ifdef CONFIG_KEXEC_JUMP 1483#ifdef CONFIG_KEXEC_JUMP
1479 if (kexec_image->preserve_context) { 1484 if (kexec_image->preserve_context) {
1485 sysdev_resume();
1486 Power_up_devices:
1480 device_power_up(PMSG_RESTORE); 1487 device_power_up(PMSG_RESTORE);
1481 Enable_irqs: 1488 Enable_irqs:
1482 local_irq_enable(); 1489 local_irq_enable();
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 2313a4cc14ea..e976e505648d 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -681,6 +681,33 @@ static void cpu_timer_fire(struct k_itimer *timer)
681} 681}
682 682
683/* 683/*
684 * Sample a process (thread group) timer for the given group_leader task.
685 * Must be called with tasklist_lock held for reading.
686 */
687static int cpu_timer_sample_group(const clockid_t which_clock,
688 struct task_struct *p,
689 union cpu_time_count *cpu)
690{
691 struct task_cputime cputime;
692
693 thread_group_cputimer(p, &cputime);
694 switch (CPUCLOCK_WHICH(which_clock)) {
695 default:
696 return -EINVAL;
697 case CPUCLOCK_PROF:
698 cpu->cpu = cputime_add(cputime.utime, cputime.stime);
699 break;
700 case CPUCLOCK_VIRT:
701 cpu->cpu = cputime.utime;
702 break;
703 case CPUCLOCK_SCHED:
704 cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
705 break;
706 }
707 return 0;
708}
709
710/*
684 * Guts of sys_timer_settime for CPU timers. 711 * Guts of sys_timer_settime for CPU timers.
685 * This is called with the timer locked and interrupts disabled. 712 * This is called with the timer locked and interrupts disabled.
686 * If we return TIMER_RETRY, it's necessary to release the timer's lock 713 * If we return TIMER_RETRY, it's necessary to release the timer's lock
@@ -741,7 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
741 if (CPUCLOCK_PERTHREAD(timer->it_clock)) { 768 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
742 cpu_clock_sample(timer->it_clock, p, &val); 769 cpu_clock_sample(timer->it_clock, p, &val);
743 } else { 770 } else {
744 cpu_clock_sample_group(timer->it_clock, p, &val); 771 cpu_timer_sample_group(timer->it_clock, p, &val);
745 } 772 }
746 773
747 if (old) { 774 if (old) {
@@ -889,7 +916,7 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
889 read_unlock(&tasklist_lock); 916 read_unlock(&tasklist_lock);
890 goto dead; 917 goto dead;
891 } else { 918 } else {
892 cpu_clock_sample_group(timer->it_clock, p, &now); 919 cpu_timer_sample_group(timer->it_clock, p, &now);
893 clear_dead = (unlikely(p->exit_state) && 920 clear_dead = (unlikely(p->exit_state) &&
894 thread_group_empty(p)); 921 thread_group_empty(p));
895 } 922 }
@@ -1244,7 +1271,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
1244 clear_dead_task(timer, now); 1271 clear_dead_task(timer, now);
1245 goto out_unlock; 1272 goto out_unlock;
1246 } 1273 }
1247 cpu_clock_sample_group(timer->it_clock, p, &now); 1274 cpu_timer_sample_group(timer->it_clock, p, &now);
1248 bump_cpu_timer(timer, now); 1275 bump_cpu_timer(timer, now);
1249 /* Leave the tasklist_lock locked for the call below. */ 1276 /* Leave the tasklist_lock locked for the call below. */
1250 } 1277 }
@@ -1409,33 +1436,6 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1409} 1436}
1410 1437
1411/* 1438/*
1412 * Sample a process (thread group) timer for the given group_leader task.
1413 * Must be called with tasklist_lock held for reading.
1414 */
1415static int cpu_timer_sample_group(const clockid_t which_clock,
1416 struct task_struct *p,
1417 union cpu_time_count *cpu)
1418{
1419 struct task_cputime cputime;
1420
1421 thread_group_cputimer(p, &cputime);
1422 switch (CPUCLOCK_WHICH(which_clock)) {
1423 default:
1424 return -EINVAL;
1425 case CPUCLOCK_PROF:
1426 cpu->cpu = cputime_add(cputime.utime, cputime.stime);
1427 break;
1428 case CPUCLOCK_VIRT:
1429 cpu->cpu = cputime.utime;
1430 break;
1431 case CPUCLOCK_SCHED:
1432 cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
1433 break;
1434 }
1435 return 0;
1436}
1437
1438/*
1439 * Set one of the process-wide special case CPU timers. 1439 * Set one of the process-wide special case CPU timers.
1440 * The tsk->sighand->siglock must be held by the caller. 1440 * The tsk->sighand->siglock must be held by the caller.
1441 * The *newval argument is relative and we update it to be absolute, *oldval 1441 * The *newval argument is relative and we update it to be absolute, *oldval
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index d7a10167a25b..720ea4f781bd 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -3,7 +3,7 @@ ifeq ($(CONFIG_PM_DEBUG),y)
3EXTRA_CFLAGS += -DDEBUG 3EXTRA_CFLAGS += -DDEBUG
4endif 4endif
5 5
6obj-y := main.o 6obj-$(CONFIG_PM) += main.o
7obj-$(CONFIG_PM_SLEEP) += console.o 7obj-$(CONFIG_PM_SLEEP) += console.o
8obj-$(CONFIG_FREEZER) += process.o 8obj-$(CONFIG_FREEZER) += process.o
9obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o 9obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o
diff --git a/kernel/power/console.c b/kernel/power/console.c
index b8628be2a465..a3961b205de7 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -78,6 +78,12 @@ void pm_restore_console(void)
78 } 78 }
79 set_console(orig_fgconsole); 79 set_console(orig_fgconsole);
80 release_console_sem(); 80 release_console_sem();
81
82 if (vt_waitactive(orig_fgconsole)) {
83 pr_debug("Resume: Can't switch VCs.");
84 return;
85 }
86
81 kmsg_redirect = orig_kmsg; 87 kmsg_redirect = orig_kmsg;
82} 88}
83#endif 89#endif
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 432ee575c9ee..4a4a206b1979 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -227,6 +227,12 @@ static int create_image(int platform_mode)
227 "aborting hibernation\n"); 227 "aborting hibernation\n");
228 goto Enable_irqs; 228 goto Enable_irqs;
229 } 229 }
230 sysdev_suspend(PMSG_FREEZE);
231 if (error) {
232 printk(KERN_ERR "PM: Some devices failed to power down, "
233 "aborting hibernation\n");
234 goto Power_up_devices;
235 }
230 236
231 if (hibernation_test(TEST_CORE)) 237 if (hibernation_test(TEST_CORE))
232 goto Power_up; 238 goto Power_up;
@@ -242,9 +248,11 @@ static int create_image(int platform_mode)
242 if (!in_suspend) 248 if (!in_suspend)
243 platform_leave(platform_mode); 249 platform_leave(platform_mode);
244 Power_up: 250 Power_up:
251 sysdev_resume();
245 /* NOTE: device_power_up() is just a resume() for devices 252 /* NOTE: device_power_up() is just a resume() for devices
246 * that suspended with irqs off ... no overall powerup. 253 * that suspended with irqs off ... no overall powerup.
247 */ 254 */
255 Power_up_devices:
248 device_power_up(in_suspend ? 256 device_power_up(in_suspend ?
249 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); 257 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
250 Enable_irqs: 258 Enable_irqs:
@@ -335,6 +343,7 @@ static int resume_target_kernel(void)
335 "aborting resume\n"); 343 "aborting resume\n");
336 goto Enable_irqs; 344 goto Enable_irqs;
337 } 345 }
346 sysdev_suspend(PMSG_QUIESCE);
338 /* We'll ignore saved state, but this gets preempt count (etc) right */ 347 /* We'll ignore saved state, but this gets preempt count (etc) right */
339 save_processor_state(); 348 save_processor_state();
340 error = restore_highmem(); 349 error = restore_highmem();
@@ -357,6 +366,7 @@ static int resume_target_kernel(void)
357 swsusp_free(); 366 swsusp_free();
358 restore_processor_state(); 367 restore_processor_state();
359 touch_softlockup_watchdog(); 368 touch_softlockup_watchdog();
369 sysdev_resume();
360 device_power_up(PMSG_RECOVER); 370 device_power_up(PMSG_RECOVER);
361 Enable_irqs: 371 Enable_irqs:
362 local_irq_enable(); 372 local_irq_enable();
@@ -440,6 +450,7 @@ int hibernation_platform_enter(void)
440 local_irq_disable(); 450 local_irq_disable();
441 error = device_power_down(PMSG_HIBERNATE); 451 error = device_power_down(PMSG_HIBERNATE);
442 if (!error) { 452 if (!error) {
453 sysdev_suspend(PMSG_HIBERNATE);
443 hibernation_ops->enter(); 454 hibernation_ops->enter();
444 /* We should never get here */ 455 /* We should never get here */
445 while (1); 456 while (1);
@@ -595,6 +606,12 @@ static int software_resume(void)
595 unsigned int flags; 606 unsigned int flags;
596 607
597 /* 608 /*
609 * If the user said "noresume".. bail out early.
610 */
611 if (noresume)
612 return 0;
613
614 /*
598 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs 615 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
599 * is configured into the kernel. Since the regular hibernate 616 * is configured into the kernel. Since the regular hibernate
600 * trigger path is via sysfs which takes a buffer mutex before 617 * trigger path is via sysfs which takes a buffer mutex before
@@ -610,6 +627,11 @@ static int software_resume(void)
610 mutex_unlock(&pm_mutex); 627 mutex_unlock(&pm_mutex);
611 return -ENOENT; 628 return -ENOENT;
612 } 629 }
630 /*
631 * Some device discovery might still be in progress; we need
632 * to wait for this to finish.
633 */
634 wait_for_device_probe();
613 swsusp_resume_device = name_to_dev_t(resume_file); 635 swsusp_resume_device = name_to_dev_t(resume_file);
614 pr_debug("PM: Resume from partition %s\n", resume_file); 636 pr_debug("PM: Resume from partition %s\n", resume_file);
615 } else { 637 } else {
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b4d219016b6c..c9632f841f64 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -298,8 +298,12 @@ static int suspend_enter(suspend_state_t state)
298 goto Done; 298 goto Done;
299 } 299 }
300 300
301 if (!suspend_test(TEST_CORE)) 301 error = sysdev_suspend(PMSG_SUSPEND);
302 error = suspend_ops->enter(state); 302 if (!error) {
303 if (!suspend_test(TEST_CORE))
304 error = suspend_ops->enter(state);
305 sysdev_resume();
306 }
303 307
304 device_power_up(PMSG_RESUME); 308 device_power_up(PMSG_RESUME);
305 Done: 309 Done:
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 6da14358537c..505f319e489c 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -60,6 +60,7 @@ static struct block_device *resume_bdev;
60static int submit(int rw, pgoff_t page_off, struct page *page, 60static int submit(int rw, pgoff_t page_off, struct page *page,
61 struct bio **bio_chain) 61 struct bio **bio_chain)
62{ 62{
63 const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
63 struct bio *bio; 64 struct bio *bio;
64 65
65 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); 66 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
@@ -80,7 +81,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
80 bio_get(bio); 81 bio_get(bio);
81 82
82 if (bio_chain == NULL) { 83 if (bio_chain == NULL) {
83 submit_bio(rw | (1 << BIO_RW_SYNC), bio); 84 submit_bio(bio_rw, bio);
84 wait_on_page_locked(page); 85 wait_on_page_locked(page);
85 if (rw == READ) 86 if (rw == READ)
86 bio_set_pages_dirty(bio); 87 bio_set_pages_dirty(bio);
@@ -90,7 +91,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
90 get_page(page); /* These pages are freed later */ 91 get_page(page); /* These pages are freed later */
91 bio->bi_private = *bio_chain; 92 bio->bi_private = *bio_chain;
92 *bio_chain = bio; 93 *bio_chain = bio;
93 submit_bio(rw | (1 << BIO_RW_SYNC), bio); 94 submit_bio(bio_rw, bio);
94 } 95 }
95 return 0; 96 return 0;
96} 97}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 005b93d839ba..6c85359364f2 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -95,15 +95,15 @@ static int snapshot_open(struct inode *inode, struct file *filp)
95 data->swap = swsusp_resume_device ? 95 data->swap = swsusp_resume_device ?
96 swap_type_of(swsusp_resume_device, 0, NULL) : -1; 96 swap_type_of(swsusp_resume_device, 0, NULL) : -1;
97 data->mode = O_RDONLY; 97 data->mode = O_RDONLY;
98 error = pm_notifier_call_chain(PM_RESTORE_PREPARE); 98 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
99 if (error) 99 if (error)
100 pm_notifier_call_chain(PM_POST_RESTORE); 100 pm_notifier_call_chain(PM_POST_HIBERNATION);
101 } else { 101 } else {
102 data->swap = -1; 102 data->swap = -1;
103 data->mode = O_WRONLY; 103 data->mode = O_WRONLY;
104 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); 104 error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
105 if (error) 105 if (error)
106 pm_notifier_call_chain(PM_POST_HIBERNATION); 106 pm_notifier_call_chain(PM_POST_RESTORE);
107 } 107 }
108 if (error) 108 if (error)
109 atomic_inc(&snapshot_device_available); 109 atomic_inc(&snapshot_device_available);
diff --git a/kernel/printk.c b/kernel/printk.c
index 69188f226a93..e3602d0755b0 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -73,7 +73,6 @@ EXPORT_SYMBOL(oops_in_progress);
73 * driver system. 73 * driver system.
74 */ 74 */
75static DECLARE_MUTEX(console_sem); 75static DECLARE_MUTEX(console_sem);
76static DECLARE_MUTEX(secondary_console_sem);
77struct console *console_drivers; 76struct console *console_drivers;
78EXPORT_SYMBOL_GPL(console_drivers); 77EXPORT_SYMBOL_GPL(console_drivers);
79 78
@@ -891,12 +890,14 @@ void suspend_console(void)
891 printk("Suspending console(s) (use no_console_suspend to debug)\n"); 890 printk("Suspending console(s) (use no_console_suspend to debug)\n");
892 acquire_console_sem(); 891 acquire_console_sem();
893 console_suspended = 1; 892 console_suspended = 1;
893 up(&console_sem);
894} 894}
895 895
896void resume_console(void) 896void resume_console(void)
897{ 897{
898 if (!console_suspend_enabled) 898 if (!console_suspend_enabled)
899 return; 899 return;
900 down(&console_sem);
900 console_suspended = 0; 901 console_suspended = 0;
901 release_console_sem(); 902 release_console_sem();
902} 903}
@@ -912,11 +913,9 @@ void resume_console(void)
912void acquire_console_sem(void) 913void acquire_console_sem(void)
913{ 914{
914 BUG_ON(in_interrupt()); 915 BUG_ON(in_interrupt());
915 if (console_suspended) {
916 down(&secondary_console_sem);
917 return;
918 }
919 down(&console_sem); 916 down(&console_sem);
917 if (console_suspended)
918 return;
920 console_locked = 1; 919 console_locked = 1;
921 console_may_schedule = 1; 920 console_may_schedule = 1;
922} 921}
@@ -926,6 +925,10 @@ int try_acquire_console_sem(void)
926{ 925{
927 if (down_trylock(&console_sem)) 926 if (down_trylock(&console_sem))
928 return -1; 927 return -1;
928 if (console_suspended) {
929 up(&console_sem);
930 return -1;
931 }
929 console_locked = 1; 932 console_locked = 1;
930 console_may_schedule = 0; 933 console_may_schedule = 0;
931 return 0; 934 return 0;
@@ -979,7 +982,7 @@ void release_console_sem(void)
979 unsigned wake_klogd = 0; 982 unsigned wake_klogd = 0;
980 983
981 if (console_suspended) { 984 if (console_suspended) {
982 up(&secondary_console_sem); 985 up(&console_sem);
983 return; 986 return;
984 } 987 }
985 988
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index bd5a9003497c..654c640a6b9c 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -679,8 +679,8 @@ int rcu_needs_cpu(int cpu)
679void rcu_check_callbacks(int cpu, int user) 679void rcu_check_callbacks(int cpu, int user)
680{ 680{
681 if (user || 681 if (user ||
682 (idle_cpu(cpu) && !in_softirq() && 682 (idle_cpu(cpu) && rcu_scheduler_active &&
683 hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 683 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
684 684
685 /* 685 /*
686 * Get here if this CPU took its interrupt from user 686 * Get here if this CPU took its interrupt from user
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index d92a76a881aa..cae8a059cf47 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -44,6 +44,7 @@
44#include <linux/cpu.h> 44#include <linux/cpu.h>
45#include <linux/mutex.h> 45#include <linux/mutex.h>
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/kernel_stat.h>
47 48
48enum rcu_barrier { 49enum rcu_barrier {
49 RCU_BARRIER_STD, 50 RCU_BARRIER_STD,
@@ -55,6 +56,7 @@ static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
55static atomic_t rcu_barrier_cpu_count; 56static atomic_t rcu_barrier_cpu_count;
56static DEFINE_MUTEX(rcu_barrier_mutex); 57static DEFINE_MUTEX(rcu_barrier_mutex);
57static struct completion rcu_barrier_completion; 58static struct completion rcu_barrier_completion;
59int rcu_scheduler_active __read_mostly;
58 60
59/* 61/*
60 * Awaken the corresponding synchronize_rcu() instance now that a 62 * Awaken the corresponding synchronize_rcu() instance now that a
@@ -80,6 +82,10 @@ void wakeme_after_rcu(struct rcu_head *head)
80void synchronize_rcu(void) 82void synchronize_rcu(void)
81{ 83{
82 struct rcu_synchronize rcu; 84 struct rcu_synchronize rcu;
85
86 if (rcu_blocking_is_gp())
87 return;
88
83 init_completion(&rcu.completion); 89 init_completion(&rcu.completion);
84 /* Will wake me after RCU finished. */ 90 /* Will wake me after RCU finished. */
85 call_rcu(&rcu.head, wakeme_after_rcu); 91 call_rcu(&rcu.head, wakeme_after_rcu);
@@ -175,3 +181,9 @@ void __init rcu_init(void)
175 __rcu_init(); 181 __rcu_init();
176} 182}
177 183
184void rcu_scheduler_starting(void)
185{
186 WARN_ON(num_online_cpus() != 1);
187 WARN_ON(nr_context_switches() > 0);
188 rcu_scheduler_active = 1;
189}
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 33cfc50781f9..5d59e850fb71 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -1181,6 +1181,9 @@ void __synchronize_sched(void)
1181{ 1181{
1182 struct rcu_synchronize rcu; 1182 struct rcu_synchronize rcu;
1183 1183
1184 if (num_online_cpus() == 1)
1185 return; /* blocking is gp if only one CPU! */
1186
1184 init_completion(&rcu.completion); 1187 init_completion(&rcu.completion);
1185 /* Will wake me after RCU finished. */ 1188 /* Will wake me after RCU finished. */
1186 call_rcu_sched(&rcu.head, wakeme_after_rcu); 1189 call_rcu_sched(&rcu.head, wakeme_after_rcu);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index b2fd602a6f6f..97ce31579ec0 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -948,8 +948,8 @@ static void rcu_do_batch(struct rcu_data *rdp)
948void rcu_check_callbacks(int cpu, int user) 948void rcu_check_callbacks(int cpu, int user)
949{ 949{
950 if (user || 950 if (user ||
951 (idle_cpu(cpu) && !in_softirq() && 951 (idle_cpu(cpu) && rcu_scheduler_active &&
952 hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 952 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
953 953
954 /* 954 /*
955 * Get here if this CPU took its interrupt from user 955 * Get here if this CPU took its interrupt from user
diff --git a/kernel/sched.c b/kernel/sched.c
index c1d0ed360088..8e2558c2ba67 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -223,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
223{ 223{
224 ktime_t now; 224 ktime_t now;
225 225
226 if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) 226 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
227 return; 227 return;
228 228
229 if (hrtimer_active(&rt_b->rt_period_timer)) 229 if (hrtimer_active(&rt_b->rt_period_timer))
@@ -6944,20 +6944,26 @@ static void free_rootdomain(struct root_domain *rd)
6944 6944
6945static void rq_attach_root(struct rq *rq, struct root_domain *rd) 6945static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6946{ 6946{
6947 struct root_domain *old_rd = NULL;
6947 unsigned long flags; 6948 unsigned long flags;
6948 6949
6949 spin_lock_irqsave(&rq->lock, flags); 6950 spin_lock_irqsave(&rq->lock, flags);
6950 6951
6951 if (rq->rd) { 6952 if (rq->rd) {
6952 struct root_domain *old_rd = rq->rd; 6953 old_rd = rq->rd;
6953 6954
6954 if (cpumask_test_cpu(rq->cpu, old_rd->online)) 6955 if (cpumask_test_cpu(rq->cpu, old_rd->online))
6955 set_rq_offline(rq); 6956 set_rq_offline(rq);
6956 6957
6957 cpumask_clear_cpu(rq->cpu, old_rd->span); 6958 cpumask_clear_cpu(rq->cpu, old_rd->span);
6958 6959
6959 if (atomic_dec_and_test(&old_rd->refcount)) 6960 /*
6960 free_rootdomain(old_rd); 6961 * If we dont want to free the old_rt yet then
6962 * set old_rd to NULL to skip the freeing later
6963 * in this function:
6964 */
6965 if (!atomic_dec_and_test(&old_rd->refcount))
6966 old_rd = NULL;
6961 } 6967 }
6962 6968
6963 atomic_inc(&rd->refcount); 6969 atomic_inc(&rd->refcount);
@@ -6968,6 +6974,9 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6968 set_rq_online(rq); 6974 set_rq_online(rq);
6969 6975
6970 spin_unlock_irqrestore(&rq->lock, flags); 6976 spin_unlock_irqrestore(&rq->lock, flags);
6977
6978 if (old_rd)
6979 free_rootdomain(old_rd);
6971} 6980}
6972 6981
6973static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) 6982static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
@@ -9215,6 +9224,16 @@ static int sched_rt_global_constraints(void)
9215 9224
9216 return ret; 9225 return ret;
9217} 9226}
9227
9228int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
9229{
9230 /* Don't accept realtime tasks when there is no way for them to run */
9231 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
9232 return 0;
9233
9234 return 1;
9235}
9236
9218#else /* !CONFIG_RT_GROUP_SCHED */ 9237#else /* !CONFIG_RT_GROUP_SCHED */
9219static int sched_rt_global_constraints(void) 9238static int sched_rt_global_constraints(void)
9220{ 9239{
@@ -9308,8 +9327,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
9308 struct task_struct *tsk) 9327 struct task_struct *tsk)
9309{ 9328{
9310#ifdef CONFIG_RT_GROUP_SCHED 9329#ifdef CONFIG_RT_GROUP_SCHED
9311 /* Don't accept realtime tasks when there is no way for them to run */ 9330 if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
9312 if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0)
9313 return -EINVAL; 9331 return -EINVAL;
9314#else 9332#else
9315 /* We don't support RT-tasks being in separate groups */ 9333 /* We don't support RT-tasks being in separate groups */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ad64fcb731f2..57d4b13b631d 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/seccomp.h> 9#include <linux/seccomp.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/compat.h>
11 12
12/* #define SECCOMP_DEBUG 1 */ 13/* #define SECCOMP_DEBUG 1 */
13#define NR_SECCOMP_MODES 1 14#define NR_SECCOMP_MODES 1
@@ -22,7 +23,7 @@ static int mode1_syscalls[] = {
22 0, /* null terminated */ 23 0, /* null terminated */
23}; 24};
24 25
25#ifdef TIF_32BIT 26#ifdef CONFIG_COMPAT
26static int mode1_syscalls_32[] = { 27static int mode1_syscalls_32[] = {
27 __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32, 28 __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
28 0, /* null terminated */ 29 0, /* null terminated */
@@ -37,8 +38,8 @@ void __secure_computing(int this_syscall)
37 switch (mode) { 38 switch (mode) {
38 case 1: 39 case 1:
39 syscall = mode1_syscalls; 40 syscall = mode1_syscalls;
40#ifdef TIF_32BIT 41#ifdef CONFIG_COMPAT
41 if (test_thread_flag(TIF_32BIT)) 42 if (is_compat_task())
42 syscall = mode1_syscalls_32; 43 syscall = mode1_syscalls_32;
43#endif 44#endif
44 do { 45 do {
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bdbe9de9cd8d..9041ea7948fe 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -626,6 +626,7 @@ static int ksoftirqd(void * __bind_cpu)
626 preempt_enable_no_resched(); 626 preempt_enable_no_resched();
627 cond_resched(); 627 cond_resched();
628 preempt_disable(); 628 preempt_disable();
629 rcu_qsctr_inc((long)__bind_cpu);
629 } 630 }
630 preempt_enable(); 631 preempt_enable();
631 set_current_state(TASK_INTERRUPTIBLE); 632 set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/sys.c b/kernel/sys.c
index f145c415bc16..37f458e6882a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -559,7 +559,7 @@ error:
559 abort_creds(new); 559 abort_creds(new);
560 return retval; 560 return retval;
561} 561}
562 562
563/* 563/*
564 * change the user struct in a credentials set to match the new UID 564 * change the user struct in a credentials set to match the new UID
565 */ 565 */
@@ -571,6 +571,11 @@ static int set_user(struct cred *new)
571 if (!new_user) 571 if (!new_user)
572 return -EAGAIN; 572 return -EAGAIN;
573 573
574 if (!task_can_switch_user(new_user, current)) {
575 free_uid(new_user);
576 return -EINVAL;
577 }
578
574 if (atomic_read(&new_user->processes) >= 579 if (atomic_read(&new_user->processes) >=
575 current->signal->rlim[RLIMIT_NPROC].rlim_cur && 580 current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
576 new_user != INIT_USER) { 581 new_user != INIT_USER) {
@@ -631,10 +636,11 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
631 goto error; 636 goto error;
632 } 637 }
633 638
634 retval = -EAGAIN; 639 if (new->uid != old->uid) {
635 if (new->uid != old->uid && set_user(new) < 0) 640 retval = set_user(new);
636 goto error; 641 if (retval < 0)
637 642 goto error;
643 }
638 if (ruid != (uid_t) -1 || 644 if (ruid != (uid_t) -1 ||
639 (euid != (uid_t) -1 && euid != old->uid)) 645 (euid != (uid_t) -1 && euid != old->uid))
640 new->suid = new->euid; 646 new->suid = new->euid;
@@ -680,9 +686,10 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
680 retval = -EPERM; 686 retval = -EPERM;
681 if (capable(CAP_SETUID)) { 687 if (capable(CAP_SETUID)) {
682 new->suid = new->uid = uid; 688 new->suid = new->uid = uid;
683 if (uid != old->uid && set_user(new) < 0) { 689 if (uid != old->uid) {
684 retval = -EAGAIN; 690 retval = set_user(new);
685 goto error; 691 if (retval < 0)
692 goto error;
686 } 693 }
687 } else if (uid != old->uid && uid != new->suid) { 694 } else if (uid != old->uid && uid != new->suid) {
688 goto error; 695 goto error;
@@ -734,11 +741,13 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
734 goto error; 741 goto error;
735 } 742 }
736 743
737 retval = -EAGAIN;
738 if (ruid != (uid_t) -1) { 744 if (ruid != (uid_t) -1) {
739 new->uid = ruid; 745 new->uid = ruid;
740 if (ruid != old->uid && set_user(new) < 0) 746 if (ruid != old->uid) {
741 goto error; 747 retval = set_user(new);
748 if (retval < 0)
749 goto error;
750 }
742 } 751 }
743 if (euid != (uid_t) -1) 752 if (euid != (uid_t) -1)
744 new->euid = euid; 753 new->euid = euid;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e2a4ff6fc3a6..34e707e5ab87 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -52,6 +52,7 @@ config FUNCTION_TRACER
52 depends on HAVE_FUNCTION_TRACER 52 depends on HAVE_FUNCTION_TRACER
53 depends on DEBUG_KERNEL 53 depends on DEBUG_KERNEL
54 select FRAME_POINTER 54 select FRAME_POINTER
55 select KALLSYMS
55 select TRACING 56 select TRACING
56 select CONTEXT_SWITCH_TRACER 57 select CONTEXT_SWITCH_TRACER
57 help 58 help
@@ -238,6 +239,7 @@ config STACK_TRACER
238 depends on DEBUG_KERNEL 239 depends on DEBUG_KERNEL
239 select FUNCTION_TRACER 240 select FUNCTION_TRACER
240 select STACKTRACE 241 select STACKTRACE
242 select KALLSYMS
241 help 243 help
242 This special tracer records the maximum stack footprint of the 244 This special tracer records the maximum stack footprint of the
243 kernel and displays it in debugfs/tracing/stack_trace. 245 kernel and displays it in debugfs/tracing/stack_trace.
@@ -302,4 +304,27 @@ config FTRACE_STARTUP_TEST
302 functioning properly. It will do tests on all the configured 304 functioning properly. It will do tests on all the configured
303 tracers of ftrace. 305 tracers of ftrace.
304 306
307config MMIOTRACE
308 bool "Memory mapped IO tracing"
309 depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI
310 select TRACING
311 help
312 Mmiotrace traces Memory Mapped I/O access and is meant for
313 debugging and reverse engineering. It is called from the ioremap
314 implementation and works via page faults. Tracing is disabled by
315 default and can be enabled at run-time.
316
317 See Documentation/tracers/mmiotrace.txt.
318 If you are not helping to develop drivers, say N.
319
320config MMIOTRACE_TEST
321 tristate "Test module for mmiotrace"
322 depends on MMIOTRACE && m
323 help
324 This is a dumb module for testing mmiotrace. It is very dangerous
325 as it will write garbage to IO memory starting at a given address.
326 However, it should be safe to use on e.g. unused portion of VRAM.
327
328 Say N, unless you absolutely know what you are doing.
329
305endmenu 330endmenu
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9a236ffe2aa4..fdf913dfc7e8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2033,7 +2033,7 @@ free:
2033static int start_graph_tracing(void) 2033static int start_graph_tracing(void)
2034{ 2034{
2035 struct ftrace_ret_stack **ret_stack_list; 2035 struct ftrace_ret_stack **ret_stack_list;
2036 int ret; 2036 int ret, cpu;
2037 2037
2038 ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE * 2038 ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
2039 sizeof(struct ftrace_ret_stack *), 2039 sizeof(struct ftrace_ret_stack *),
@@ -2042,6 +2042,10 @@ static int start_graph_tracing(void)
2042 if (!ret_stack_list) 2042 if (!ret_stack_list)
2043 return -ENOMEM; 2043 return -ENOMEM;
2044 2044
2045 /* The cpu_boot init_task->ret_stack will never be freed */
2046 for_each_online_cpu(cpu)
2047 ftrace_graph_init_task(idle_task(cpu));
2048
2045 do { 2049 do {
2046 ret = alloc_retstack_tasklist(ret_stack_list); 2050 ret = alloc_retstack_tasklist(ret_stack_list);
2047 } while (ret == -EAGAIN); 2051 } while (ret == -EAGAIN);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index fffcb069f1dc..80e503ef6136 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,7 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/mmiotrace.h> 10#include <linux/mmiotrace.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <asm/atomic.h>
12 13
13#include "trace.h" 14#include "trace.h"
14 15
@@ -19,6 +20,7 @@ struct header_iter {
19static struct trace_array *mmio_trace_array; 20static struct trace_array *mmio_trace_array;
20static bool overrun_detected; 21static bool overrun_detected;
21static unsigned long prev_overruns; 22static unsigned long prev_overruns;
23static atomic_t dropped_count;
22 24
23static void mmio_reset_data(struct trace_array *tr) 25static void mmio_reset_data(struct trace_array *tr)
24{ 26{
@@ -121,11 +123,11 @@ static void mmio_close(struct trace_iterator *iter)
121 123
122static unsigned long count_overruns(struct trace_iterator *iter) 124static unsigned long count_overruns(struct trace_iterator *iter)
123{ 125{
124 unsigned long cnt = 0; 126 unsigned long cnt = atomic_xchg(&dropped_count, 0);
125 unsigned long over = ring_buffer_overruns(iter->tr->buffer); 127 unsigned long over = ring_buffer_overruns(iter->tr->buffer);
126 128
127 if (over > prev_overruns) 129 if (over > prev_overruns)
128 cnt = over - prev_overruns; 130 cnt += over - prev_overruns;
129 prev_overruns = over; 131 prev_overruns = over;
130 return cnt; 132 return cnt;
131} 133}
@@ -310,8 +312,10 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
310 312
311 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 313 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
312 &irq_flags); 314 &irq_flags);
313 if (!event) 315 if (!event) {
316 atomic_inc(&dropped_count);
314 return; 317 return;
318 }
315 entry = ring_buffer_event_data(event); 319 entry = ring_buffer_event_data(event);
316 tracing_generic_entry_update(&entry->ent, 0, preempt_count()); 320 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
317 entry->ent.type = TRACE_MMIO_RW; 321 entry->ent.type = TRACE_MMIO_RW;
@@ -338,8 +342,10 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
338 342
339 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 343 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
340 &irq_flags); 344 &irq_flags);
341 if (!event) 345 if (!event) {
346 atomic_inc(&dropped_count);
342 return; 347 return;
348 }
343 entry = ring_buffer_event_data(event); 349 entry = ring_buffer_event_data(event);
344 tracing_generic_entry_update(&entry->ent, 0, preempt_count()); 350 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
345 entry->ent.type = TRACE_MMIO_MAP; 351 entry->ent.type = TRACE_MMIO_MAP;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 88c8eb70f54a..bc8e80a86bca 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -23,10 +23,20 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
23{ 23{
24 struct ring_buffer_event *event; 24 struct ring_buffer_event *event;
25 struct trace_entry *entry; 25 struct trace_entry *entry;
26 unsigned int loops = 0;
26 27
27 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { 28 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
28 entry = ring_buffer_event_data(event); 29 entry = ring_buffer_event_data(event);
29 30
31 /*
32 * The ring buffer is a size of trace_buf_size, if
33 * we loop more than the size, there's something wrong
34 * with the ring buffer.
35 */
36 if (loops++ > trace_buf_size) {
37 printk(KERN_CONT ".. bad ring buffer ");
38 goto failed;
39 }
30 if (!trace_valid_entry(entry)) { 40 if (!trace_valid_entry(entry)) {
31 printk(KERN_CONT ".. invalid entry %d ", 41 printk(KERN_CONT ".. invalid entry %d ",
32 entry->type); 42 entry->type);
@@ -57,11 +67,20 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
57 67
58 cnt = ring_buffer_entries(tr->buffer); 68 cnt = ring_buffer_entries(tr->buffer);
59 69
70 /*
71 * The trace_test_buffer_cpu runs a while loop to consume all data.
72 * If the calling tracer is broken, and is constantly filling
73 * the buffer, this will run forever, and hard lock the box.
74 * We disable the ring buffer while we do this test to prevent
75 * a hard lock up.
76 */
77 tracing_off();
60 for_each_possible_cpu(cpu) { 78 for_each_possible_cpu(cpu) {
61 ret = trace_test_buffer_cpu(tr, cpu); 79 ret = trace_test_buffer_cpu(tr, cpu);
62 if (ret) 80 if (ret)
63 break; 81 break;
64 } 82 }
83 tracing_on();
65 __raw_spin_unlock(&ftrace_max_lock); 84 __raw_spin_unlock(&ftrace_max_lock);
66 local_irq_restore(flags); 85 local_irq_restore(flags);
67 86
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 43f891b05a4b..00d59d048edf 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -122,8 +122,10 @@ void acct_update_integrals(struct task_struct *tsk)
122 if (likely(tsk->mm)) { 122 if (likely(tsk->mm)) {
123 cputime_t time, dtime; 123 cputime_t time, dtime;
124 struct timeval value; 124 struct timeval value;
125 unsigned long flags;
125 u64 delta; 126 u64 delta;
126 127
128 local_irq_save(flags);
127 time = tsk->stime + tsk->utime; 129 time = tsk->stime + tsk->utime;
128 dtime = cputime_sub(time, tsk->acct_timexpd); 130 dtime = cputime_sub(time, tsk->acct_timexpd);
129 jiffies_to_timeval(cputime_to_jiffies(dtime), &value); 131 jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
@@ -131,10 +133,12 @@ void acct_update_integrals(struct task_struct *tsk)
131 delta = delta * USEC_PER_SEC + value.tv_usec; 133 delta = delta * USEC_PER_SEC + value.tv_usec;
132 134
133 if (delta == 0) 135 if (delta == 0)
134 return; 136 goto out;
135 tsk->acct_timexpd = time; 137 tsk->acct_timexpd = time;
136 tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); 138 tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
137 tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; 139 tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
140 out:
141 local_irq_restore(flags);
138 } 142 }
139} 143}
140 144
diff --git a/kernel/user.c b/kernel/user.c
index 477b6660f447..fbb300e6191f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -72,6 +72,7 @@ static void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent)
72static void uid_hash_remove(struct user_struct *up) 72static void uid_hash_remove(struct user_struct *up)
73{ 73{
74 hlist_del_init(&up->uidhash_node); 74 hlist_del_init(&up->uidhash_node);
75 put_user_ns(up->user_ns);
75} 76}
76 77
77static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) 78static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
@@ -285,14 +286,12 @@ int __init uids_sysfs_init(void)
285/* work function to remove sysfs directory for a user and free up 286/* work function to remove sysfs directory for a user and free up
286 * corresponding structures. 287 * corresponding structures.
287 */ 288 */
288static void remove_user_sysfs_dir(struct work_struct *w) 289static void cleanup_user_struct(struct work_struct *w)
289{ 290{
290 struct user_struct *up = container_of(w, struct user_struct, work); 291 struct user_struct *up = container_of(w, struct user_struct, work);
291 unsigned long flags; 292 unsigned long flags;
292 int remove_user = 0; 293 int remove_user = 0;
293 294
294 if (up->user_ns != &init_user_ns)
295 return;
296 /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del() 295 /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del()
297 * atomic. 296 * atomic.
298 */ 297 */
@@ -311,9 +310,11 @@ static void remove_user_sysfs_dir(struct work_struct *w)
311 if (!remove_user) 310 if (!remove_user)
312 goto done; 311 goto done;
313 312
314 kobject_uevent(&up->kobj, KOBJ_REMOVE); 313 if (up->user_ns == &init_user_ns) {
315 kobject_del(&up->kobj); 314 kobject_uevent(&up->kobj, KOBJ_REMOVE);
316 kobject_put(&up->kobj); 315 kobject_del(&up->kobj);
316 kobject_put(&up->kobj);
317 }
317 318
318 sched_destroy_user(up); 319 sched_destroy_user(up);
319 key_put(up->uid_keyring); 320 key_put(up->uid_keyring);
@@ -334,8 +335,7 @@ static void free_user(struct user_struct *up, unsigned long flags)
334 atomic_inc(&up->__count); 335 atomic_inc(&up->__count);
335 spin_unlock_irqrestore(&uidhash_lock, flags); 336 spin_unlock_irqrestore(&uidhash_lock, flags);
336 337
337 put_user_ns(up->user_ns); 338 INIT_WORK(&up->work, cleanup_user_struct);
338 INIT_WORK(&up->work, remove_user_sysfs_dir);
339 schedule_work(&up->work); 339 schedule_work(&up->work);
340} 340}
341 341
@@ -357,12 +357,29 @@ static void free_user(struct user_struct *up, unsigned long flags)
357 sched_destroy_user(up); 357 sched_destroy_user(up);
358 key_put(up->uid_keyring); 358 key_put(up->uid_keyring);
359 key_put(up->session_keyring); 359 key_put(up->session_keyring);
360 put_user_ns(up->user_ns);
361 kmem_cache_free(uid_cachep, up); 360 kmem_cache_free(uid_cachep, up);
362} 361}
363 362
364#endif 363#endif
365 364
365#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED)
366/*
367 * We need to check if a setuid can take place. This function should be called
368 * before successfully completing the setuid.
369 */
370int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
371{
372
373 return sched_rt_can_attach(up->tg, tsk);
374
375}
376#else
377int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
378{
379 return 1;
380}
381#endif
382
366/* 383/*
367 * Locate the user_struct for the passed UID. If found, take a ref on it. The 384 * Locate the user_struct for the passed UID. If found, take a ref on it. The
368 * caller must undo that ref with free_uid(). 385 * caller must undo that ref with free_uid().
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 79084311ee57..076c7c8215b0 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -60,12 +60,25 @@ int create_user_ns(struct cred *new)
60 return 0; 60 return 0;
61} 61}
62 62
63void free_user_ns(struct kref *kref) 63/*
64 * Deferred destructor for a user namespace. This is required because
65 * free_user_ns() may be called with uidhash_lock held, but we need to call
66 * back to free_uid() which will want to take the lock again.
67 */
68static void free_user_ns_work(struct work_struct *work)
64{ 69{
65 struct user_namespace *ns; 70 struct user_namespace *ns =
66 71 container_of(work, struct user_namespace, destroyer);
67 ns = container_of(kref, struct user_namespace, kref);
68 free_uid(ns->creator); 72 free_uid(ns->creator);
69 kfree(ns); 73 kfree(ns);
70} 74}
75
76void free_user_ns(struct kref *kref)
77{
78 struct user_namespace *ns =
79 container_of(kref, struct user_namespace, kref);
80
81 INIT_WORK(&ns->destroyer, free_user_ns_work);
82 schedule_work(&ns->destroyer);
83}
71EXPORT_SYMBOL(free_user_ns); 84EXPORT_SYMBOL(free_user_ns);