aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c24
-rw-r--r--kernel/irq/handle.c3
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/power/main.c6
-rw-r--r--kernel/printk.c72
-rw-r--r--kernel/profile.c16
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/signal.c11
-rw-r--r--kernel/spinlock.c8
9 files changed, 90 insertions, 58 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 961d74044deb..00e8f2575512 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -166,9 +166,8 @@ static struct super_block *cpuset_sb = NULL;
166 * The hooks from fork and exit, cpuset_fork() and cpuset_exit(), don't 166 * The hooks from fork and exit, cpuset_fork() and cpuset_exit(), don't
167 * (usually) grab cpuset_sem. These are the two most performance 167 * (usually) grab cpuset_sem. These are the two most performance
168 * critical pieces of code here. The exception occurs on exit(), 168 * critical pieces of code here. The exception occurs on exit(),
169 * if the last task using a cpuset exits, and the cpuset was marked 169 * when a task in a notify_on_release cpuset exits. Then cpuset_sem
170 * notify_on_release. In that case, the cpuset_sem is taken, the 170 * is taken, and if the cpuset count is zero, a usermode call made
171 * path to the released cpuset calculated, and a usermode call made
172 * to /sbin/cpuset_release_agent with the name of the cpuset (path 171 * to /sbin/cpuset_release_agent with the name of the cpuset (path
173 * relative to the root of cpuset file system) as the argument. 172 * relative to the root of cpuset file system) as the argument.
174 * 173 *
@@ -1404,6 +1403,18 @@ void cpuset_fork(struct task_struct *tsk)
1404 * 1403 *
1405 * Description: Detach cpuset from @tsk and release it. 1404 * Description: Detach cpuset from @tsk and release it.
1406 * 1405 *
1406 * Note that cpusets marked notify_on_release force every task
1407 * in them to take the global cpuset_sem semaphore when exiting.
1408 * This could impact scaling on very large systems. Be reluctant
1409 * to use notify_on_release cpusets where very high task exit
1410 * scaling is required on large systems.
1411 *
1412 * Don't even think about derefencing 'cs' after the cpuset use
1413 * count goes to zero, except inside a critical section guarded
1414 * by the cpuset_sem semaphore. If you don't hold cpuset_sem,
1415 * then a zero cpuset use count is a license to any other task to
1416 * nuke the cpuset immediately.
1417 *
1407 **/ 1418 **/
1408 1419
1409void cpuset_exit(struct task_struct *tsk) 1420void cpuset_exit(struct task_struct *tsk)
@@ -1415,10 +1426,13 @@ void cpuset_exit(struct task_struct *tsk)
1415 tsk->cpuset = NULL; 1426 tsk->cpuset = NULL;
1416 task_unlock(tsk); 1427 task_unlock(tsk);
1417 1428
1418 if (atomic_dec_and_test(&cs->count)) { 1429 if (notify_on_release(cs)) {
1419 down(&cpuset_sem); 1430 down(&cpuset_sem);
1420 check_for_release(cs); 1431 if (atomic_dec_and_test(&cs->count))
1432 check_for_release(cs);
1421 up(&cpuset_sem); 1433 up(&cpuset_sem);
1434 } else {
1435 atomic_dec(&cs->count);
1422 } 1436 }
1423} 1437}
1424 1438
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 2fb0e46e11f3..436c7d93c00a 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -30,6 +30,7 @@
30 */ 30 */
31irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = { 31irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
32 [0 ... NR_IRQS-1] = { 32 [0 ... NR_IRQS-1] = {
33 .status = IRQ_DISABLED,
33 .handler = &no_irq_type, 34 .handler = &no_irq_type,
34 .lock = SPIN_LOCK_UNLOCKED 35 .lock = SPIN_LOCK_UNLOCKED
35 } 36 }
@@ -118,8 +119,6 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
118 */ 119 */
119 desc->handler->ack(irq); 120 desc->handler->ack(irq);
120 action_ret = handle_IRQ_event(irq, regs, desc->action); 121 action_ret = handle_IRQ_event(irq, regs, desc->action);
121 if (!noirqdebug)
122 note_interrupt(irq, desc, action_ret);
123 desc->handler->end(irq); 122 desc->handler->end(irq);
124 return 1; 123 return 1;
125 } 124 }
diff --git a/kernel/module.c b/kernel/module.c
index 5734ab09d3f9..83b3d376708c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1758,6 +1758,7 @@ sys_init_module(void __user *umod,
1758 const char __user *uargs) 1758 const char __user *uargs)
1759{ 1759{
1760 struct module *mod; 1760 struct module *mod;
1761 mm_segment_t old_fs = get_fs();
1761 int ret = 0; 1762 int ret = 0;
1762 1763
1763 /* Must have permission */ 1764 /* Must have permission */
@@ -1775,6 +1776,9 @@ sys_init_module(void __user *umod,
1775 return PTR_ERR(mod); 1776 return PTR_ERR(mod);
1776 } 1777 }
1777 1778
1779 /* flush the icache in correct context */
1780 set_fs(KERNEL_DS);
1781
1778 /* Flush the instruction cache, since we've played with text */ 1782 /* Flush the instruction cache, since we've played with text */
1779 if (mod->module_init) 1783 if (mod->module_init)
1780 flush_icache_range((unsigned long)mod->module_init, 1784 flush_icache_range((unsigned long)mod->module_init,
@@ -1783,6 +1787,8 @@ sys_init_module(void __user *umod,
1783 flush_icache_range((unsigned long)mod->module_core, 1787 flush_icache_range((unsigned long)mod->module_core,
1784 (unsigned long)mod->module_core + mod->core_size); 1788 (unsigned long)mod->module_core + mod->core_size);
1785 1789
1790 set_fs(old_fs);
1791
1786 /* Now sew it into the lists. They won't access us, since 1792 /* Now sew it into the lists. They won't access us, since
1787 strong_try_module_get() will fail. */ 1793 strong_try_module_get() will fail. */
1788 stop_machine_run(__link_module, mod, NR_CPUS); 1794 stop_machine_run(__link_module, mod, NR_CPUS);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 7960ddf04a57..4cdebc972ff2 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -156,14 +156,14 @@ static int enter_state(suspend_state_t state)
156 goto Unlock; 156 goto Unlock;
157 } 157 }
158 158
159 pr_debug("PM: Preparing system for suspend\n"); 159 pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
160 if ((error = suspend_prepare(state))) 160 if ((error = suspend_prepare(state)))
161 goto Unlock; 161 goto Unlock;
162 162
163 pr_debug("PM: Entering state.\n"); 163 pr_debug("PM: Entering %s sleep\n", pm_states[state]);
164 error = suspend_enter(state); 164 error = suspend_enter(state);
165 165
166 pr_debug("PM: Finishing up.\n"); 166 pr_debug("PM: Finishing wakeup.\n");
167 suspend_finish(state); 167 suspend_finish(state);
168 Unlock: 168 Unlock:
169 up(&pm_sem); 169 up(&pm_sem);
diff --git a/kernel/printk.c b/kernel/printk.c
index 290a07ce2c8a..01b58d7d17ff 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -160,42 +160,6 @@ static int __init console_setup(char *str)
160 160
161__setup("console=", console_setup); 161__setup("console=", console_setup);
162 162
163/**
164 * add_preferred_console - add a device to the list of preferred consoles.
165 *
166 * The last preferred console added will be used for kernel messages
167 * and stdin/out/err for init. Normally this is used by console_setup
168 * above to handle user-supplied console arguments; however it can also
169 * be used by arch-specific code either to override the user or more
170 * commonly to provide a default console (ie from PROM variables) when
171 * the user has not supplied one.
172 */
173int __init add_preferred_console(char *name, int idx, char *options)
174{
175 struct console_cmdline *c;
176 int i;
177
178 /*
179 * See if this tty is not yet registered, and
180 * if we have a slot free.
181 */
182 for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
183 if (strcmp(console_cmdline[i].name, name) == 0 &&
184 console_cmdline[i].index == idx) {
185 selected_console = i;
186 return 0;
187 }
188 if (i == MAX_CMDLINECONSOLES)
189 return -E2BIG;
190 selected_console = i;
191 c = &console_cmdline[i];
192 memcpy(c->name, name, sizeof(c->name));
193 c->name[sizeof(c->name) - 1] = 0;
194 c->options = options;
195 c->index = idx;
196 return 0;
197}
198
199static int __init log_buf_len_setup(char *str) 163static int __init log_buf_len_setup(char *str)
200{ 164{
201 unsigned long size = memparse(str, &str); 165 unsigned long size = memparse(str, &str);
@@ -671,6 +635,42 @@ static void call_console_drivers(unsigned long start, unsigned long end) {}
671#endif 635#endif
672 636
673/** 637/**
638 * add_preferred_console - add a device to the list of preferred consoles.
639 *
640 * The last preferred console added will be used for kernel messages
641 * and stdin/out/err for init. Normally this is used by console_setup
642 * above to handle user-supplied console arguments; however it can also
643 * be used by arch-specific code either to override the user or more
644 * commonly to provide a default console (ie from PROM variables) when
645 * the user has not supplied one.
646 */
647int __init add_preferred_console(char *name, int idx, char *options)
648{
649 struct console_cmdline *c;
650 int i;
651
652 /*
653 * See if this tty is not yet registered, and
654 * if we have a slot free.
655 */
656 for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
657 if (strcmp(console_cmdline[i].name, name) == 0 &&
658 console_cmdline[i].index == idx) {
659 selected_console = i;
660 return 0;
661 }
662 if (i == MAX_CMDLINECONSOLES)
663 return -E2BIG;
664 selected_console = i;
665 c = &console_cmdline[i];
666 memcpy(c->name, name, sizeof(c->name));
667 c->name[sizeof(c->name) - 1] = 0;
668 c->options = options;
669 c->index = idx;
670 return 0;
671}
672
673/**
674 * acquire_console_sem - lock the console system for exclusive use. 674 * acquire_console_sem - lock the console system for exclusive use.
675 * 675 *
676 * Acquires a semaphore which guarantees that the caller has 676 * Acquires a semaphore which guarantees that the caller has
diff --git a/kernel/profile.c b/kernel/profile.c
index 0221a50ca867..ad8cbb75ffa2 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -49,15 +49,19 @@ static DECLARE_MUTEX(profile_flip_mutex);
49 49
50static int __init profile_setup(char * str) 50static int __init profile_setup(char * str)
51{ 51{
52 static char __initdata schedstr[] = "schedule";
52 int par; 53 int par;
53 54
54 if (!strncmp(str, "schedule", 8)) { 55 if (!strncmp(str, schedstr, strlen(schedstr))) {
55 prof_on = SCHED_PROFILING; 56 prof_on = SCHED_PROFILING;
56 printk(KERN_INFO "kernel schedule profiling enabled\n"); 57 if (str[strlen(schedstr)] == ',')
57 if (str[7] == ',') 58 str += strlen(schedstr) + 1;
58 str += 8; 59 if (get_option(&str, &par))
59 } 60 prof_shift = par;
60 if (get_option(&str,&par)) { 61 printk(KERN_INFO
62 "kernel schedule profiling enabled (shift: %ld)\n",
63 prof_shift);
64 } else if (get_option(&str, &par)) {
61 prof_shift = par; 65 prof_shift = par;
62 prof_on = CPU_PROFILING; 66 prof_on = CPU_PROFILING;
63 printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n", 67 printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n",
diff --git a/kernel/sched.c b/kernel/sched.c
index 0dc3158667a2..66b2ed784822 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4243,7 +4243,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk)
4243 4243
4244 /* No more Mr. Nice Guy. */ 4244 /* No more Mr. Nice Guy. */
4245 if (dest_cpu == NR_CPUS) { 4245 if (dest_cpu == NR_CPUS) {
4246 tsk->cpus_allowed = cpuset_cpus_allowed(tsk); 4246 cpus_setall(tsk->cpus_allowed);
4247 dest_cpu = any_online_cpu(tsk->cpus_allowed); 4247 dest_cpu = any_online_cpu(tsk->cpus_allowed);
4248 4248
4249 /* 4249 /*
diff --git a/kernel/signal.c b/kernel/signal.c
index 8f3debc77c5b..b3c24c732c5a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -522,7 +522,16 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
522{ 522{
523 int sig = 0; 523 int sig = 0;
524 524
525 sig = next_signal(pending, mask); 525 /* SIGKILL must have priority, otherwise it is quite easy
526 * to create an unkillable process, sending sig < SIGKILL
527 * to self */
528 if (unlikely(sigismember(&pending->signal, SIGKILL))) {
529 if (!sigismember(mask, SIGKILL))
530 sig = SIGKILL;
531 }
532
533 if (likely(!sig))
534 sig = next_signal(pending, mask);
526 if (sig) { 535 if (sig) {
527 if (current->notifier) { 536 if (current->notifier) {
528 if (sigismember(current->notifier_mask, sig)) { 537 if (sigismember(current->notifier_mask, sig)) {
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index e15ed17863f1..0c3f9d8bbe17 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -294,7 +294,7 @@ EXPORT_SYMBOL(_spin_unlock_irq);
294void __lockfunc _spin_unlock_bh(spinlock_t *lock) 294void __lockfunc _spin_unlock_bh(spinlock_t *lock)
295{ 295{
296 _raw_spin_unlock(lock); 296 _raw_spin_unlock(lock);
297 preempt_enable(); 297 preempt_enable_no_resched();
298 local_bh_enable(); 298 local_bh_enable();
299} 299}
300EXPORT_SYMBOL(_spin_unlock_bh); 300EXPORT_SYMBOL(_spin_unlock_bh);
@@ -318,7 +318,7 @@ EXPORT_SYMBOL(_read_unlock_irq);
318void __lockfunc _read_unlock_bh(rwlock_t *lock) 318void __lockfunc _read_unlock_bh(rwlock_t *lock)
319{ 319{
320 _raw_read_unlock(lock); 320 _raw_read_unlock(lock);
321 preempt_enable(); 321 preempt_enable_no_resched();
322 local_bh_enable(); 322 local_bh_enable();
323} 323}
324EXPORT_SYMBOL(_read_unlock_bh); 324EXPORT_SYMBOL(_read_unlock_bh);
@@ -342,7 +342,7 @@ EXPORT_SYMBOL(_write_unlock_irq);
342void __lockfunc _write_unlock_bh(rwlock_t *lock) 342void __lockfunc _write_unlock_bh(rwlock_t *lock)
343{ 343{
344 _raw_write_unlock(lock); 344 _raw_write_unlock(lock);
345 preempt_enable(); 345 preempt_enable_no_resched();
346 local_bh_enable(); 346 local_bh_enable();
347} 347}
348EXPORT_SYMBOL(_write_unlock_bh); 348EXPORT_SYMBOL(_write_unlock_bh);
@@ -354,7 +354,7 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
354 if (_raw_spin_trylock(lock)) 354 if (_raw_spin_trylock(lock))
355 return 1; 355 return 1;
356 356
357 preempt_enable(); 357 preempt_enable_no_resched();
358 local_bh_enable(); 358 local_bh_enable();
359 return 0; 359 return 0;
360} 360}