aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2007-05-15 16:11:17 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2007-05-15 16:11:17 -0400
commit6684e323a236d40461f27d36b38c6b70aabc9e71 (patch)
tree7eb6e0a22b7083c5e08f4f2cf3f15be7d938a572
parent7531d692d4174789d583eb50fcb83cefa121b790 (diff)
parent0560551dca0c02a4b23f95a9c339882ff291e1c7 (diff)
Merge branch 'origin'
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/i386/kernel/Makefile1
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k7.c36
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.c2
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.h4
-rw-r--r--arch/i386/kernel/smp.c65
-rw-r--r--arch/i386/kernel/smpboot.c22
-rw-r--r--arch/i386/kernel/smpcommon.c79
-rw-r--r--arch/i386/mach-voyager/voyager_smp.c106
-rw-r--r--arch/m68k/lib/uaccess.c4
-rw-r--r--block/ll_rw_blk.c1
-rw-r--r--drivers/char/agp/via-agp.c6
-rw-r--r--drivers/infiniband/core/cma.c106
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c7
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c94
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c17
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c13
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c7
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c7
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c12
-rw-r--r--drivers/infiniband/hw/mlx4/main.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c11
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/mlx4/main.c2
-rw-r--r--drivers/net/mlx4/mlx4.h1
-rw-r--r--fs/eventpoll.c561
-rw-r--r--include/asm-alpha/mmu_context.h2
-rw-r--r--include/asm-h8300/atomic.h1
-rw-r--r--include/asm-i386/processor.h4
-rw-r--r--include/asm-m68k/uaccess.h4
-rw-r--r--include/linux/init.h7
-rw-r--r--include/linux/io.h8
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/slub_def.h6
-rw-r--r--kernel/time/clocksource.c10
39 files changed, 588 insertions, 630 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 21f3fff5432f..bbeb5b6b5b05 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3267,6 +3267,7 @@ W: http://tpmdd.sourceforge.net
3267P: Marcel Selhorst 3267P: Marcel Selhorst
3268M: tpm@selhorst.net 3268M: tpm@selhorst.net
3269W: http://www.prosec.rub.de/tpm/ 3269W: http://www.prosec.rub.de/tpm/
3270L: tpmdd-devel@lists.sourceforge.net
3270S: Maintained 3271S: Maintained
3271 3272
3272Telecom Clock Driver for MCPL0010 3273Telecom Clock Driver for MCPL0010
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 91cff8dc9e1a..06da59f6f837 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_X86_CPUID) += cpuid.o
19obj-$(CONFIG_MICROCODE) += microcode.o 19obj-$(CONFIG_MICROCODE) += microcode.o
20obj-$(CONFIG_APM) += apm.o 20obj-$(CONFIG_APM) += apm.o
21obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o 21obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o
22obj-$(CONFIG_SMP) += smpcommon.o
22obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 23obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
23obj-$(CONFIG_X86_MPPARSE) += mpparse.o 24obj-$(CONFIG_X86_MPPARSE) += mpparse.o
24obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o 25obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
index 837b04166a47..ca3e1d341889 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
@@ -341,15 +341,17 @@ static int powernow_acpi_init(void)
341 pc.val = (unsigned long) acpi_processor_perf->states[0].control; 341 pc.val = (unsigned long) acpi_processor_perf->states[0].control;
342 for (i = 0; i < number_scales; i++) { 342 for (i = 0; i < number_scales; i++) {
343 u8 fid, vid; 343 u8 fid, vid;
344 unsigned int speed; 344 struct acpi_processor_px *state =
345 &acpi_processor_perf->states[i];
346 unsigned int speed, speed_mhz;
345 347
346 pc.val = (unsigned long) acpi_processor_perf->states[i].control; 348 pc.val = (unsigned long) state->control;
347 dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", 349 dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
348 i, 350 i,
349 (u32) acpi_processor_perf->states[i].core_frequency, 351 (u32) state->core_frequency,
350 (u32) acpi_processor_perf->states[i].power, 352 (u32) state->power,
351 (u32) acpi_processor_perf->states[i].transition_latency, 353 (u32) state->transition_latency,
352 (u32) acpi_processor_perf->states[i].control, 354 (u32) state->control,
353 pc.bits.sgtc); 355 pc.bits.sgtc);
354 356
355 vid = pc.bits.vid; 357 vid = pc.bits.vid;
@@ -360,6 +362,18 @@ static int powernow_acpi_init(void)
360 powernow_table[i].index |= (vid << 8); /* upper 8 bits */ 362 powernow_table[i].index |= (vid << 8); /* upper 8 bits */
361 363
362 speed = powernow_table[i].frequency; 364 speed = powernow_table[i].frequency;
365 speed_mhz = speed / 1000;
366
367 /* processor_perflib will multiply the MHz value by 1000 to
368 * get a KHz value (e.g. 1266000). However, powernow-k7 works
369 * with true KHz values (e.g. 1266768). To ensure that all
370 * powernow frequencies are available, we must ensure that
371 * ACPI doesn't restrict them, so we round up the MHz value
372 * to ensure that perflib's computed KHz value is greater than
373 * or equal to powernow's KHz value.
374 */
375 if (speed % 1000 > 0)
376 speed_mhz++;
363 377
364 if ((fid_codes[fid] % 10)==5) { 378 if ((fid_codes[fid] % 10)==5) {
365 if (have_a0 == 1) 379 if (have_a0 == 1)
@@ -368,10 +382,16 @@ static int powernow_acpi_init(void)
368 382
369 dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " 383 dprintk (" FID: 0x%x (%d.%dx [%dMHz]) "
370 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, 384 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
371 fid_codes[fid] % 10, speed/1000, vid, 385 fid_codes[fid] % 10, speed_mhz, vid,
372 mobile_vid_table[vid]/1000, 386 mobile_vid_table[vid]/1000,
373 mobile_vid_table[vid]%1000); 387 mobile_vid_table[vid]%1000);
374 388
389 if (state->core_frequency != speed_mhz) {
390 state->core_frequency = speed_mhz;
391 dprintk(" Corrected ACPI frequency to %d\n",
392 speed_mhz);
393 }
394
375 if (latency < pc.bits.sgtc) 395 if (latency < pc.bits.sgtc)
376 latency = pc.bits.sgtc; 396 latency = pc.bits.sgtc;
377 397
@@ -602,7 +622,7 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
602 result = powernow_acpi_init(); 622 result = powernow_acpi_init();
603 if (result) { 623 if (result) {
604 printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); 624 printk (KERN_INFO PFX "ACPI and legacy methods failed\n");
605 printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.shtml\n"); 625 printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n");
606 } 626 }
607 } else { 627 } else {
608 /* SGTC use the bus clock as timer */ 628 /* SGTC use the bus clock as timer */
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
index 7cf3d207b6b3..4ade55c5f333 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -521,7 +521,7 @@ static int check_supported_cpu(unsigned int cpu)
521 521
522 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { 522 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
523 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || 523 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
524 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) { 524 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
525 printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); 525 printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
526 goto out; 526 goto out;
527 } 527 }
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
index 95be5013c984..b06c812208ca 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
@@ -46,8 +46,8 @@ struct powernow_k8_data {
46#define CPUID_XFAM 0x0ff00000 /* extended family */ 46#define CPUID_XFAM 0x0ff00000 /* extended family */
47#define CPUID_XFAM_K8 0 47#define CPUID_XFAM_K8 0
48#define CPUID_XMOD 0x000f0000 /* extended model */ 48#define CPUID_XMOD 0x000f0000 /* extended model */
49#define CPUID_XMOD_REV_G 0x00060000 49#define CPUID_XMOD_REV_MASK 0x00080000
50#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */ 50#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */
51#define CPUID_USE_XFAM_XMOD 0x00000f00 51#define CPUID_USE_XFAM_XMOD 0x00000f00
52#define CPUID_GET_MAX_CAPABILITIES 0x80000000 52#define CPUID_GET_MAX_CAPABILITIES 0x80000000
53#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 53#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 706bda72dc60..c9a7c9835aba 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -467,7 +467,7 @@ void flush_tlb_all(void)
467 * it goes straight through and wastes no time serializing 467 * it goes straight through and wastes no time serializing
468 * anything. Worst case is that we lose a reschedule ... 468 * anything. Worst case is that we lose a reschedule ...
469 */ 469 */
470void native_smp_send_reschedule(int cpu) 470static void native_smp_send_reschedule(int cpu)
471{ 471{
472 WARN_ON(cpu_is_offline(cpu)); 472 WARN_ON(cpu_is_offline(cpu));
473 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 473 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
@@ -546,9 +546,10 @@ static void __smp_call_function(void (*func) (void *info), void *info,
546 * You must not call this function with disabled interrupts or from a 546 * You must not call this function with disabled interrupts or from a
547 * hardware interrupt handler or from a bottom half handler. 547 * hardware interrupt handler or from a bottom half handler.
548 */ 548 */
549int native_smp_call_function_mask(cpumask_t mask, 549static int
550 void (*func)(void *), void *info, 550native_smp_call_function_mask(cpumask_t mask,
551 int wait) 551 void (*func)(void *), void *info,
552 int wait)
552{ 553{
553 struct call_data_struct data; 554 struct call_data_struct data;
554 cpumask_t allbutself; 555 cpumask_t allbutself;
@@ -599,60 +600,6 @@ int native_smp_call_function_mask(cpumask_t mask,
599 return 0; 600 return 0;
600} 601}
601 602
602/**
603 * smp_call_function(): Run a function on all other CPUs.
604 * @func: The function to run. This must be fast and non-blocking.
605 * @info: An arbitrary pointer to pass to the function.
606 * @nonatomic: Unused.
607 * @wait: If true, wait (atomically) until function has completed on other CPUs.
608 *
609 * Returns 0 on success, else a negative status code.
610 *
611 * If @wait is true, then returns once @func has returned; otherwise
612 * it returns just before the target cpu calls @func.
613 *
614 * You must not call this function with disabled interrupts or from a
615 * hardware interrupt handler or from a bottom half handler.
616 */
617int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
618 int wait)
619{
620 return smp_call_function_mask(cpu_online_map, func, info, wait);
621}
622EXPORT_SYMBOL(smp_call_function);
623
624/**
625 * smp_call_function_single - Run a function on another CPU
626 * @cpu: The target CPU. Cannot be the calling CPU.
627 * @func: The function to run. This must be fast and non-blocking.
628 * @info: An arbitrary pointer to pass to the function.
629 * @nonatomic: Unused.
630 * @wait: If true, wait until function has completed on other CPUs.
631 *
632 * Returns 0 on success, else a negative status code.
633 *
634 * If @wait is true, then returns once @func has returned; otherwise
635 * it returns just before the target cpu calls @func.
636 */
637int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
638 int nonatomic, int wait)
639{
640 /* prevent preemption and reschedule on another processor */
641 int ret;
642 int me = get_cpu();
643 if (cpu == me) {
644 WARN_ON(1);
645 put_cpu();
646 return -EBUSY;
647 }
648
649 ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
650
651 put_cpu();
652 return ret;
653}
654EXPORT_SYMBOL(smp_call_function_single);
655
656static void stop_this_cpu (void * dummy) 603static void stop_this_cpu (void * dummy)
657{ 604{
658 local_irq_disable(); 605 local_irq_disable();
@@ -670,7 +617,7 @@ static void stop_this_cpu (void * dummy)
670 * this function calls the 'stop' function on all other CPUs in the system. 617 * this function calls the 'stop' function on all other CPUs in the system.
671 */ 618 */
672 619
673void native_smp_send_stop(void) 620static void native_smp_send_stop(void)
674{ 621{
675 /* Don't deadlock on the call lock in panic */ 622 /* Don't deadlock on the call lock in panic */
676 int nolock = !spin_trylock(&call_lock); 623 int nolock = !spin_trylock(&call_lock);
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index b92cc4e8b3bb..08f07a74a9d3 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -98,9 +98,6 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
98 98
99u8 apicid_2_node[MAX_APICID]; 99u8 apicid_2_node[MAX_APICID];
100 100
101DEFINE_PER_CPU(unsigned long, this_cpu_off);
102EXPORT_PER_CPU_SYMBOL(this_cpu_off);
103
104/* 101/*
105 * Trampoline 80x86 program as an array. 102 * Trampoline 80x86 program as an array.
106 */ 103 */
@@ -763,25 +760,6 @@ static inline struct task_struct * alloc_idle_task(int cpu)
763#define alloc_idle_task(cpu) fork_idle(cpu) 760#define alloc_idle_task(cpu) fork_idle(cpu)
764#endif 761#endif
765 762
766/* Initialize the CPU's GDT. This is either the boot CPU doing itself
767 (still using the master per-cpu area), or a CPU doing it for a
768 secondary which will soon come up. */
769static __cpuinit void init_gdt(int cpu)
770{
771 struct desc_struct *gdt = get_cpu_gdt_table(cpu);
772
773 pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a,
774 (u32 *)&gdt[GDT_ENTRY_PERCPU].b,
775 __per_cpu_offset[cpu], 0xFFFFF,
776 0x80 | DESCTYPE_S | 0x2, 0x8);
777
778 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
779 per_cpu(cpu_number, cpu) = cpu;
780}
781
782/* Defined in head.S */
783extern struct Xgt_desc_struct early_gdt_descr;
784
785static int __cpuinit do_boot_cpu(int apicid, int cpu) 763static int __cpuinit do_boot_cpu(int apicid, int cpu)
786/* 764/*
787 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 765 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
diff --git a/arch/i386/kernel/smpcommon.c b/arch/i386/kernel/smpcommon.c
new file mode 100644
index 000000000000..1868ae18eb4d
--- /dev/null
+++ b/arch/i386/kernel/smpcommon.c
@@ -0,0 +1,79 @@
1/*
2 * SMP stuff which is common to all sub-architectures.
3 */
4#include <linux/module.h>
5#include <asm/smp.h>
6
7DEFINE_PER_CPU(unsigned long, this_cpu_off);
8EXPORT_PER_CPU_SYMBOL(this_cpu_off);
9
10/* Initialize the CPU's GDT. This is either the boot CPU doing itself
11 (still using the master per-cpu area), or a CPU doing it for a
12 secondary which will soon come up. */
13__cpuinit void init_gdt(int cpu)
14{
15 struct desc_struct *gdt = get_cpu_gdt_table(cpu);
16
17 pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a,
18 (u32 *)&gdt[GDT_ENTRY_PERCPU].b,
19 __per_cpu_offset[cpu], 0xFFFFF,
20 0x80 | DESCTYPE_S | 0x2, 0x8);
21
22 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
23 per_cpu(cpu_number, cpu) = cpu;
24}
25
26
27/**
28 * smp_call_function(): Run a function on all other CPUs.
29 * @func: The function to run. This must be fast and non-blocking.
30 * @info: An arbitrary pointer to pass to the function.
31 * @nonatomic: Unused.
32 * @wait: If true, wait (atomically) until function has completed on other CPUs.
33 *
34 * Returns 0 on success, else a negative status code.
35 *
36 * If @wait is true, then returns once @func has returned; otherwise
37 * it returns just before the target cpu calls @func.
38 *
39 * You must not call this function with disabled interrupts or from a
40 * hardware interrupt handler or from a bottom half handler.
41 */
42int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
43 int wait)
44{
45 return smp_call_function_mask(cpu_online_map, func, info, wait);
46}
47EXPORT_SYMBOL(smp_call_function);
48
49/**
50 * smp_call_function_single - Run a function on another CPU
51 * @cpu: The target CPU. Cannot be the calling CPU.
52 * @func: The function to run. This must be fast and non-blocking.
53 * @info: An arbitrary pointer to pass to the function.
54 * @nonatomic: Unused.
55 * @wait: If true, wait until function has completed on other CPUs.
56 *
57 * Returns 0 on success, else a negative status code.
58 *
59 * If @wait is true, then returns once @func has returned; otherwise
60 * it returns just before the target cpu calls @func.
61 */
62int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
63 int nonatomic, int wait)
64{
65 /* prevent preemption and reschedule on another processor */
66 int ret;
67 int me = get_cpu();
68 if (cpu == me) {
69 WARN_ON(1);
70 put_cpu();
71 return -EBUSY;
72 }
73
74 ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
75
76 put_cpu();
77 return ret;
78}
79EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
index 50d9c52070b1..b87f8548e75a 100644
--- a/arch/i386/mach-voyager/voyager_smp.c
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -27,7 +27,6 @@
27#include <asm/pgalloc.h> 27#include <asm/pgalloc.h>
28#include <asm/tlbflush.h> 28#include <asm/tlbflush.h>
29#include <asm/arch_hooks.h> 29#include <asm/arch_hooks.h>
30#include <asm/pda.h>
31 30
32/* TLB state -- visible externally, indexed physically */ 31/* TLB state -- visible externally, indexed physically */
33DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 }; 32DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 };
@@ -422,7 +421,7 @@ find_smp_config(void)
422 VOYAGER_SUS_IN_CONTROL_PORT); 421 VOYAGER_SUS_IN_CONTROL_PORT);
423 422
424 current_thread_info()->cpu = boot_cpu_id; 423 current_thread_info()->cpu = boot_cpu_id;
425 write_pda(cpu_number, boot_cpu_id); 424 x86_write_percpu(cpu_number, boot_cpu_id);
426} 425}
427 426
428/* 427/*
@@ -435,7 +434,7 @@ smp_store_cpu_info(int id)
435 434
436 *c = boot_cpu_data; 435 *c = boot_cpu_data;
437 436
438 identify_cpu(c); 437 identify_secondary_cpu(c);
439} 438}
440 439
441/* set up the trampoline and return the physical address of the code */ 440/* set up the trampoline and return the physical address of the code */
@@ -459,7 +458,7 @@ start_secondary(void *unused)
459 /* external functions not defined in the headers */ 458 /* external functions not defined in the headers */
460 extern void calibrate_delay(void); 459 extern void calibrate_delay(void);
461 460
462 secondary_cpu_init(); 461 cpu_init();
463 462
464 /* OK, we're in the routine */ 463 /* OK, we're in the routine */
465 ack_CPI(VIC_CPU_BOOT_CPI); 464 ack_CPI(VIC_CPU_BOOT_CPI);
@@ -572,7 +571,9 @@ do_boot_cpu(__u8 cpu)
572 /* init_tasks (in sched.c) is indexed logically */ 571 /* init_tasks (in sched.c) is indexed logically */
573 stack_start.esp = (void *) idle->thread.esp; 572 stack_start.esp = (void *) idle->thread.esp;
574 573
575 init_gdt(cpu, idle); 574 init_gdt(cpu);
575 per_cpu(current_task, cpu) = idle;
576 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
576 irq_ctx_init(cpu); 577 irq_ctx_init(cpu);
577 578
578 /* Note: Don't modify initial ss override */ 579 /* Note: Don't modify initial ss override */
@@ -859,8 +860,8 @@ smp_invalidate_interrupt(void)
859 860
860/* This routine is called with a physical cpu mask */ 861/* This routine is called with a physical cpu mask */
861static void 862static void
862flush_tlb_others (unsigned long cpumask, struct mm_struct *mm, 863voyager_flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
863 unsigned long va) 864 unsigned long va)
864{ 865{
865 int stuck = 50000; 866 int stuck = 50000;
866 867
@@ -912,7 +913,7 @@ flush_tlb_current_task(void)
912 cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); 913 cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
913 local_flush_tlb(); 914 local_flush_tlb();
914 if (cpu_mask) 915 if (cpu_mask)
915 flush_tlb_others(cpu_mask, mm, FLUSH_ALL); 916 voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
916 917
917 preempt_enable(); 918 preempt_enable();
918} 919}
@@ -934,7 +935,7 @@ flush_tlb_mm (struct mm_struct * mm)
934 leave_mm(smp_processor_id()); 935 leave_mm(smp_processor_id());
935 } 936 }
936 if (cpu_mask) 937 if (cpu_mask)
937 flush_tlb_others(cpu_mask, mm, FLUSH_ALL); 938 voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
938 939
939 preempt_enable(); 940 preempt_enable();
940} 941}
@@ -955,7 +956,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
955 } 956 }
956 957
957 if (cpu_mask) 958 if (cpu_mask)
958 flush_tlb_others(cpu_mask, mm, va); 959 voyager_flush_tlb_others(cpu_mask, mm, va);
959 960
960 preempt_enable(); 961 preempt_enable();
961} 962}
@@ -1044,10 +1045,12 @@ smp_call_function_interrupt(void)
1044} 1045}
1045 1046
1046static int 1047static int
1047__smp_call_function_mask (void (*func) (void *info), void *info, int retry, 1048voyager_smp_call_function_mask (cpumask_t cpumask,
1048 int wait, __u32 mask) 1049 void (*func) (void *info), void *info,
1050 int wait)
1049{ 1051{
1050 struct call_data_struct data; 1052 struct call_data_struct data;
1053 u32 mask = cpus_addr(cpumask)[0];
1051 1054
1052 mask &= ~(1<<smp_processor_id()); 1055 mask &= ~(1<<smp_processor_id());
1053 1056
@@ -1083,47 +1086,6 @@ __smp_call_function_mask (void (*func) (void *info), void *info, int retry,
1083 return 0; 1086 return 0;
1084} 1087}
1085 1088
1086/* Call this function on all CPUs using the function_interrupt above
1087 <func> The function to run. This must be fast and non-blocking.
1088 <info> An arbitrary pointer to pass to the function.
1089 <retry> If true, keep retrying until ready.
1090 <wait> If true, wait until function has completed on other CPUs.
1091 [RETURNS] 0 on success, else a negative status code. Does not return until
1092 remote CPUs are nearly ready to execute <<func>> or are or have executed.
1093*/
1094int
1095smp_call_function(void (*func) (void *info), void *info, int retry,
1096 int wait)
1097{
1098 __u32 mask = cpus_addr(cpu_online_map)[0];
1099
1100 return __smp_call_function_mask(func, info, retry, wait, mask);
1101}
1102EXPORT_SYMBOL(smp_call_function);
1103
1104/*
1105 * smp_call_function_single - Run a function on another CPU
1106 * @func: The function to run. This must be fast and non-blocking.
1107 * @info: An arbitrary pointer to pass to the function.
1108 * @nonatomic: Currently unused.
1109 * @wait: If true, wait until function has completed on other CPUs.
1110 *
1111 * Retrurns 0 on success, else a negative status code.
1112 *
1113 * Does not return until the remote CPU is nearly ready to execute <func>
1114 * or is or has executed.
1115 */
1116
1117int
1118smp_call_function_single(int cpu, void (*func) (void *info), void *info,
1119 int nonatomic, int wait)
1120{
1121 __u32 mask = 1 << cpu;
1122
1123 return __smp_call_function_mask(func, info, nonatomic, wait, mask);
1124}
1125EXPORT_SYMBOL(smp_call_function_single);
1126
1127/* Sorry about the name. In an APIC based system, the APICs 1089/* Sorry about the name. In an APIC based system, the APICs
1128 * themselves are programmed to send a timer interrupt. This is used 1090 * themselves are programmed to send a timer interrupt. This is used
1129 * by linux to reschedule the processor. Voyager doesn't have this, 1091 * by linux to reschedule the processor. Voyager doesn't have this,
@@ -1237,8 +1199,8 @@ smp_alloc_memory(void)
1237} 1199}
1238 1200
1239/* send a reschedule CPI to one CPU by physical CPU number*/ 1201/* send a reschedule CPI to one CPU by physical CPU number*/
1240void 1202static void
1241smp_send_reschedule(int cpu) 1203voyager_smp_send_reschedule(int cpu)
1242{ 1204{
1243 send_one_CPI(cpu, VIC_RESCHEDULE_CPI); 1205 send_one_CPI(cpu, VIC_RESCHEDULE_CPI);
1244} 1206}
@@ -1267,8 +1229,8 @@ safe_smp_processor_id(void)
1267} 1229}
1268 1230
1269/* broadcast a halt to all other CPUs */ 1231/* broadcast a halt to all other CPUs */
1270void 1232static void
1271smp_send_stop(void) 1233voyager_smp_send_stop(void)
1272{ 1234{
1273 smp_call_function(smp_stop_cpu_function, NULL, 1, 1); 1235 smp_call_function(smp_stop_cpu_function, NULL, 1, 1);
1274} 1236}
@@ -1930,23 +1892,26 @@ smp_voyager_power_off(void *dummy)
1930 smp_stop_cpu_function(NULL); 1892 smp_stop_cpu_function(NULL);
1931} 1893}
1932 1894
1933void __init 1895static void __init
1934smp_prepare_cpus(unsigned int max_cpus) 1896voyager_smp_prepare_cpus(unsigned int max_cpus)
1935{ 1897{
1936 /* FIXME: ignore max_cpus for now */ 1898 /* FIXME: ignore max_cpus for now */
1937 smp_boot_cpus(); 1899 smp_boot_cpus();
1938} 1900}
1939 1901
1940void __devinit smp_prepare_boot_cpu(void) 1902static void __devinit voyager_smp_prepare_boot_cpu(void)
1941{ 1903{
1904 init_gdt(smp_processor_id());
1905 switch_to_new_gdt();
1906
1942 cpu_set(smp_processor_id(), cpu_online_map); 1907 cpu_set(smp_processor_id(), cpu_online_map);
1943 cpu_set(smp_processor_id(), cpu_callout_map); 1908 cpu_set(smp_processor_id(), cpu_callout_map);
1944 cpu_set(smp_processor_id(), cpu_possible_map); 1909 cpu_set(smp_processor_id(), cpu_possible_map);
1945 cpu_set(smp_processor_id(), cpu_present_map); 1910 cpu_set(smp_processor_id(), cpu_present_map);
1946} 1911}
1947 1912
1948int __devinit 1913static int __devinit
1949__cpu_up(unsigned int cpu) 1914voyager_cpu_up(unsigned int cpu)
1950{ 1915{
1951 /* This only works at boot for x86. See "rewrite" above. */ 1916 /* This only works at boot for x86. See "rewrite" above. */
1952 if (cpu_isset(cpu, smp_commenced_mask)) 1917 if (cpu_isset(cpu, smp_commenced_mask))
@@ -1962,8 +1927,8 @@ __cpu_up(unsigned int cpu)
1962 return 0; 1927 return 0;
1963} 1928}
1964 1929
1965void __init 1930static void __init
1966smp_cpus_done(unsigned int max_cpus) 1931voyager_smp_cpus_done(unsigned int max_cpus)
1967{ 1932{
1968 zap_low_mappings(); 1933 zap_low_mappings();
1969} 1934}
@@ -1972,5 +1937,16 @@ void __init
1972smp_setup_processor_id(void) 1937smp_setup_processor_id(void)
1973{ 1938{
1974 current_thread_info()->cpu = hard_smp_processor_id(); 1939 current_thread_info()->cpu = hard_smp_processor_id();
1975 write_pda(cpu_number, hard_smp_processor_id()); 1940 x86_write_percpu(cpu_number, hard_smp_processor_id());
1976} 1941}
1942
1943struct smp_ops smp_ops = {
1944 .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu,
1945 .smp_prepare_cpus = voyager_smp_prepare_cpus,
1946 .cpu_up = voyager_cpu_up,
1947 .smp_cpus_done = voyager_smp_cpus_done,
1948
1949 .smp_send_stop = voyager_smp_send_stop,
1950 .smp_send_reschedule = voyager_smp_send_reschedule,
1951 .smp_call_function_mask = voyager_smp_call_function_mask,
1952};
diff --git a/arch/m68k/lib/uaccess.c b/arch/m68k/lib/uaccess.c
index 865f9fb9e686..13854ed8cd9a 100644
--- a/arch/m68k/lib/uaccess.c
+++ b/arch/m68k/lib/uaccess.c
@@ -181,7 +181,7 @@ EXPORT_SYMBOL(strnlen_user);
181 * Zero Userspace 181 * Zero Userspace
182 */ 182 */
183 183
184unsigned long clear_user(void __user *to, unsigned long n) 184unsigned long __clear_user(void __user *to, unsigned long n)
185{ 185{
186 unsigned long res; 186 unsigned long res;
187 187
@@ -219,4 +219,4 @@ unsigned long clear_user(void __user *to, unsigned long n)
219 219
220 return res; 220 return res;
221} 221}
222EXPORT_SYMBOL(clear_user); 222EXPORT_SYMBOL(__clear_user);
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 74a567afb830..6b5173ac8131 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3802,7 +3802,6 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node)
3802 3802
3803 return ret; 3803 return ret;
3804} 3804}
3805EXPORT_SYMBOL(current_io_context);
3806 3805
3807/* 3806/*
3808 * If the current task has no IO context then create one and initialise it. 3807 * If the current task has no IO context then create one and initialise it.
diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c
index a2bb4eccaab4..9aaf401a8975 100644
--- a/drivers/char/agp/via-agp.c
+++ b/drivers/char/agp/via-agp.c
@@ -384,9 +384,9 @@ static struct agp_device_ids via_agp_device_ids[] __devinitdata =
384 .device_id = PCI_DEVICE_ID_VIA_P4M800CE, 384 .device_id = PCI_DEVICE_ID_VIA_P4M800CE,
385 .chipset_name = "VT3314", 385 .chipset_name = "VT3314",
386 }, 386 },
387 /* CX700 */ 387 /* VT3324 / CX700 */
388 { 388 {
389 .device_id = PCI_DEVICE_ID_VIA_CX700, 389 .device_id = PCI_DEVICE_ID_VIA_VT3324,
390 .chipset_name = "CX700", 390 .chipset_name = "CX700",
391 }, 391 },
392 /* VT3336 */ 392 /* VT3336 */
@@ -540,7 +540,7 @@ static const struct pci_device_id agp_via_pci_table[] = {
540 ID(PCI_DEVICE_ID_VIA_83_87XX_1), 540 ID(PCI_DEVICE_ID_VIA_83_87XX_1),
541 ID(PCI_DEVICE_ID_VIA_3296_0), 541 ID(PCI_DEVICE_ID_VIA_3296_0),
542 ID(PCI_DEVICE_ID_VIA_P4M800CE), 542 ID(PCI_DEVICE_ID_VIA_P4M800CE),
543 ID(PCI_DEVICE_ID_VIA_CX700), 543 ID(PCI_DEVICE_ID_VIA_VT3324),
544 ID(PCI_DEVICE_ID_VIA_VT3336), 544 ID(PCI_DEVICE_ID_VIA_VT3336),
545 ID(PCI_DEVICE_ID_VIA_P4M890), 545 ID(PCI_DEVICE_ID_VIA_P4M890),
546 { } 546 { }
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index fde92ce45153..2eb52b7a71da 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -346,12 +346,33 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
346 complete(&id_priv->comp); 346 complete(&id_priv->comp);
347} 347}
348 348
349static void cma_release_remove(struct rdma_id_private *id_priv) 349static int cma_disable_remove(struct rdma_id_private *id_priv,
350 enum cma_state state)
351{
352 unsigned long flags;
353 int ret;
354
355 spin_lock_irqsave(&id_priv->lock, flags);
356 if (id_priv->state == state) {
357 atomic_inc(&id_priv->dev_remove);
358 ret = 0;
359 } else
360 ret = -EINVAL;
361 spin_unlock_irqrestore(&id_priv->lock, flags);
362 return ret;
363}
364
365static void cma_enable_remove(struct rdma_id_private *id_priv)
350{ 366{
351 if (atomic_dec_and_test(&id_priv->dev_remove)) 367 if (atomic_dec_and_test(&id_priv->dev_remove))
352 wake_up(&id_priv->wait_remove); 368 wake_up(&id_priv->wait_remove);
353} 369}
354 370
371static int cma_has_cm_dev(struct rdma_id_private *id_priv)
372{
373 return (id_priv->id.device && id_priv->cm_id.ib);
374}
375
355struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, 376struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
356 void *context, enum rdma_port_space ps) 377 void *context, enum rdma_port_space ps)
357{ 378{
@@ -884,9 +905,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
884 struct rdma_cm_event event; 905 struct rdma_cm_event event;
885 int ret = 0; 906 int ret = 0;
886 907
887 atomic_inc(&id_priv->dev_remove); 908 if (cma_disable_remove(id_priv, CMA_CONNECT))
888 if (!cma_comp(id_priv, CMA_CONNECT)) 909 return 0;
889 goto out;
890 910
891 memset(&event, 0, sizeof event); 911 memset(&event, 0, sizeof event);
892 switch (ib_event->event) { 912 switch (ib_event->event) {
@@ -942,12 +962,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
942 /* Destroy the CM ID by returning a non-zero value. */ 962 /* Destroy the CM ID by returning a non-zero value. */
943 id_priv->cm_id.ib = NULL; 963 id_priv->cm_id.ib = NULL;
944 cma_exch(id_priv, CMA_DESTROYING); 964 cma_exch(id_priv, CMA_DESTROYING);
945 cma_release_remove(id_priv); 965 cma_enable_remove(id_priv);
946 rdma_destroy_id(&id_priv->id); 966 rdma_destroy_id(&id_priv->id);
947 return ret; 967 return ret;
948 } 968 }
949out: 969out:
950 cma_release_remove(id_priv); 970 cma_enable_remove(id_priv);
951 return ret; 971 return ret;
952} 972}
953 973
@@ -1057,11 +1077,8 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1057 int offset, ret; 1077 int offset, ret;
1058 1078
1059 listen_id = cm_id->context; 1079 listen_id = cm_id->context;
1060 atomic_inc(&listen_id->dev_remove); 1080 if (cma_disable_remove(listen_id, CMA_LISTEN))
1061 if (!cma_comp(listen_id, CMA_LISTEN)) { 1081 return -ECONNABORTED;
1062 ret = -ECONNABORTED;
1063 goto out;
1064 }
1065 1082
1066 memset(&event, 0, sizeof event); 1083 memset(&event, 0, sizeof event);
1067 offset = cma_user_data_offset(listen_id->id.ps); 1084 offset = cma_user_data_offset(listen_id->id.ps);
@@ -1101,11 +1118,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1101 1118
1102release_conn_id: 1119release_conn_id:
1103 cma_exch(conn_id, CMA_DESTROYING); 1120 cma_exch(conn_id, CMA_DESTROYING);
1104 cma_release_remove(conn_id); 1121 cma_enable_remove(conn_id);
1105 rdma_destroy_id(&conn_id->id); 1122 rdma_destroy_id(&conn_id->id);
1106 1123
1107out: 1124out:
1108 cma_release_remove(listen_id); 1125 cma_enable_remove(listen_id);
1109 return ret; 1126 return ret;
1110} 1127}
1111 1128
@@ -1171,9 +1188,10 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1171 struct sockaddr_in *sin; 1188 struct sockaddr_in *sin;
1172 int ret = 0; 1189 int ret = 0;
1173 1190
1174 memset(&event, 0, sizeof event); 1191 if (cma_disable_remove(id_priv, CMA_CONNECT))
1175 atomic_inc(&id_priv->dev_remove); 1192 return 0;
1176 1193
1194 memset(&event, 0, sizeof event);
1177 switch (iw_event->event) { 1195 switch (iw_event->event) {
1178 case IW_CM_EVENT_CLOSE: 1196 case IW_CM_EVENT_CLOSE:
1179 event.event = RDMA_CM_EVENT_DISCONNECTED; 1197 event.event = RDMA_CM_EVENT_DISCONNECTED;
@@ -1214,12 +1232,12 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1214 /* Destroy the CM ID by returning a non-zero value. */ 1232 /* Destroy the CM ID by returning a non-zero value. */
1215 id_priv->cm_id.iw = NULL; 1233 id_priv->cm_id.iw = NULL;
1216 cma_exch(id_priv, CMA_DESTROYING); 1234 cma_exch(id_priv, CMA_DESTROYING);
1217 cma_release_remove(id_priv); 1235 cma_enable_remove(id_priv);
1218 rdma_destroy_id(&id_priv->id); 1236 rdma_destroy_id(&id_priv->id);
1219 return ret; 1237 return ret;
1220 } 1238 }
1221 1239
1222 cma_release_remove(id_priv); 1240 cma_enable_remove(id_priv);
1223 return ret; 1241 return ret;
1224} 1242}
1225 1243
@@ -1234,11 +1252,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1234 int ret; 1252 int ret;
1235 1253
1236 listen_id = cm_id->context; 1254 listen_id = cm_id->context;
1237 atomic_inc(&listen_id->dev_remove); 1255 if (cma_disable_remove(listen_id, CMA_LISTEN))
1238 if (!cma_comp(listen_id, CMA_LISTEN)) { 1256 return -ECONNABORTED;
1239 ret = -ECONNABORTED;
1240 goto out;
1241 }
1242 1257
1243 /* Create a new RDMA id for the new IW CM ID */ 1258 /* Create a new RDMA id for the new IW CM ID */
1244 new_cm_id = rdma_create_id(listen_id->id.event_handler, 1259 new_cm_id = rdma_create_id(listen_id->id.event_handler,
@@ -1255,13 +1270,13 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1255 dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr); 1270 dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
1256 if (!dev) { 1271 if (!dev) {
1257 ret = -EADDRNOTAVAIL; 1272 ret = -EADDRNOTAVAIL;
1258 cma_release_remove(conn_id); 1273 cma_enable_remove(conn_id);
1259 rdma_destroy_id(new_cm_id); 1274 rdma_destroy_id(new_cm_id);
1260 goto out; 1275 goto out;
1261 } 1276 }
1262 ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); 1277 ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1263 if (ret) { 1278 if (ret) {
1264 cma_release_remove(conn_id); 1279 cma_enable_remove(conn_id);
1265 rdma_destroy_id(new_cm_id); 1280 rdma_destroy_id(new_cm_id);
1266 goto out; 1281 goto out;
1267 } 1282 }
@@ -1270,7 +1285,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1270 ret = cma_acquire_dev(conn_id); 1285 ret = cma_acquire_dev(conn_id);
1271 mutex_unlock(&lock); 1286 mutex_unlock(&lock);
1272 if (ret) { 1287 if (ret) {
1273 cma_release_remove(conn_id); 1288 cma_enable_remove(conn_id);
1274 rdma_destroy_id(new_cm_id); 1289 rdma_destroy_id(new_cm_id);
1275 goto out; 1290 goto out;
1276 } 1291 }
@@ -1293,14 +1308,14 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1293 /* User wants to destroy the CM ID */ 1308 /* User wants to destroy the CM ID */
1294 conn_id->cm_id.iw = NULL; 1309 conn_id->cm_id.iw = NULL;
1295 cma_exch(conn_id, CMA_DESTROYING); 1310 cma_exch(conn_id, CMA_DESTROYING);
1296 cma_release_remove(conn_id); 1311 cma_enable_remove(conn_id);
1297 rdma_destroy_id(&conn_id->id); 1312 rdma_destroy_id(&conn_id->id);
1298 } 1313 }
1299 1314
1300out: 1315out:
1301 if (dev) 1316 if (dev)
1302 dev_put(dev); 1317 dev_put(dev);
1303 cma_release_remove(listen_id); 1318 cma_enable_remove(listen_id);
1304 return ret; 1319 return ret;
1305} 1320}
1306 1321
@@ -1519,7 +1534,7 @@ static void cma_work_handler(struct work_struct *_work)
1519 destroy = 1; 1534 destroy = 1;
1520 } 1535 }
1521out: 1536out:
1522 cma_release_remove(id_priv); 1537 cma_enable_remove(id_priv);
1523 cma_deref_id(id_priv); 1538 cma_deref_id(id_priv);
1524 if (destroy) 1539 if (destroy)
1525 rdma_destroy_id(&id_priv->id); 1540 rdma_destroy_id(&id_priv->id);
@@ -1711,13 +1726,13 @@ static void addr_handler(int status, struct sockaddr *src_addr,
1711 1726
1712 if (id_priv->id.event_handler(&id_priv->id, &event)) { 1727 if (id_priv->id.event_handler(&id_priv->id, &event)) {
1713 cma_exch(id_priv, CMA_DESTROYING); 1728 cma_exch(id_priv, CMA_DESTROYING);
1714 cma_release_remove(id_priv); 1729 cma_enable_remove(id_priv);
1715 cma_deref_id(id_priv); 1730 cma_deref_id(id_priv);
1716 rdma_destroy_id(&id_priv->id); 1731 rdma_destroy_id(&id_priv->id);
1717 return; 1732 return;
1718 } 1733 }
1719out: 1734out:
1720 cma_release_remove(id_priv); 1735 cma_enable_remove(id_priv);
1721 cma_deref_id(id_priv); 1736 cma_deref_id(id_priv);
1722} 1737}
1723 1738
@@ -2042,11 +2057,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2042 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; 2057 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2043 int ret = 0; 2058 int ret = 0;
2044 2059
2045 memset(&event, 0, sizeof event); 2060 if (cma_disable_remove(id_priv, CMA_CONNECT))
2046 atomic_inc(&id_priv->dev_remove); 2061 return 0;
2047 if (!cma_comp(id_priv, CMA_CONNECT))
2048 goto out;
2049 2062
2063 memset(&event, 0, sizeof event);
2050 switch (ib_event->event) { 2064 switch (ib_event->event) {
2051 case IB_CM_SIDR_REQ_ERROR: 2065 case IB_CM_SIDR_REQ_ERROR:
2052 event.event = RDMA_CM_EVENT_UNREACHABLE; 2066 event.event = RDMA_CM_EVENT_UNREACHABLE;
@@ -2084,12 +2098,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2084 /* Destroy the CM ID by returning a non-zero value. */ 2098 /* Destroy the CM ID by returning a non-zero value. */
2085 id_priv->cm_id.ib = NULL; 2099 id_priv->cm_id.ib = NULL;
2086 cma_exch(id_priv, CMA_DESTROYING); 2100 cma_exch(id_priv, CMA_DESTROYING);
2087 cma_release_remove(id_priv); 2101 cma_enable_remove(id_priv);
2088 rdma_destroy_id(&id_priv->id); 2102 rdma_destroy_id(&id_priv->id);
2089 return ret; 2103 return ret;
2090 } 2104 }
2091out: 2105out:
2092 cma_release_remove(id_priv); 2106 cma_enable_remove(id_priv);
2093 return ret; 2107 return ret;
2094} 2108}
2095 2109
@@ -2413,7 +2427,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
2413 int ret; 2427 int ret;
2414 2428
2415 id_priv = container_of(id, struct rdma_id_private, id); 2429 id_priv = container_of(id, struct rdma_id_private, id);
2416 if (!cma_comp(id_priv, CMA_CONNECT)) 2430 if (!cma_has_cm_dev(id_priv))
2417 return -EINVAL; 2431 return -EINVAL;
2418 2432
2419 switch (id->device->node_type) { 2433 switch (id->device->node_type) {
@@ -2435,7 +2449,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2435 int ret; 2449 int ret;
2436 2450
2437 id_priv = container_of(id, struct rdma_id_private, id); 2451 id_priv = container_of(id, struct rdma_id_private, id);
2438 if (!cma_comp(id_priv, CMA_CONNECT)) 2452 if (!cma_has_cm_dev(id_priv))
2439 return -EINVAL; 2453 return -EINVAL;
2440 2454
2441 switch (rdma_node_get_transport(id->device->node_type)) { 2455 switch (rdma_node_get_transport(id->device->node_type)) {
@@ -2466,8 +2480,7 @@ int rdma_disconnect(struct rdma_cm_id *id)
2466 int ret; 2480 int ret;
2467 2481
2468 id_priv = container_of(id, struct rdma_id_private, id); 2482 id_priv = container_of(id, struct rdma_id_private, id);
2469 if (!cma_comp(id_priv, CMA_CONNECT) && 2483 if (!cma_has_cm_dev(id_priv))
2470 !cma_comp(id_priv, CMA_DISCONNECT))
2471 return -EINVAL; 2484 return -EINVAL;
2472 2485
2473 switch (rdma_node_get_transport(id->device->node_type)) { 2486 switch (rdma_node_get_transport(id->device->node_type)) {
@@ -2499,10 +2512,9 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2499 int ret; 2512 int ret;
2500 2513
2501 id_priv = mc->id_priv; 2514 id_priv = mc->id_priv;
2502 atomic_inc(&id_priv->dev_remove); 2515 if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) &&
2503 if (!cma_comp(id_priv, CMA_ADDR_BOUND) && 2516 cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
2504 !cma_comp(id_priv, CMA_ADDR_RESOLVED)) 2517 return 0;
2505 goto out;
2506 2518
2507 if (!status && id_priv->id.qp) 2519 if (!status && id_priv->id.qp)
2508 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 2520 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
@@ -2524,12 +2536,12 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2524 ret = id_priv->id.event_handler(&id_priv->id, &event); 2536 ret = id_priv->id.event_handler(&id_priv->id, &event);
2525 if (ret) { 2537 if (ret) {
2526 cma_exch(id_priv, CMA_DESTROYING); 2538 cma_exch(id_priv, CMA_DESTROYING);
2527 cma_release_remove(id_priv); 2539 cma_enable_remove(id_priv);
2528 rdma_destroy_id(&id_priv->id); 2540 rdma_destroy_id(&id_priv->id);
2529 return 0; 2541 return 0;
2530 } 2542 }
2531out: 2543
2532 cma_release_remove(id_priv); 2544 cma_enable_remove(id_priv);
2533 return 0; 2545 return 0;
2534} 2546}
2535 2547
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index f64d42b08674..1d286d3cc2d5 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -277,6 +277,7 @@ void ehca_cleanup_mrmw_cache(void);
277 277
278extern spinlock_t ehca_qp_idr_lock; 278extern spinlock_t ehca_qp_idr_lock;
279extern spinlock_t ehca_cq_idr_lock; 279extern spinlock_t ehca_cq_idr_lock;
280extern spinlock_t hcall_lock;
280extern struct idr ehca_qp_idr; 281extern struct idr ehca_qp_idr;
281extern struct idr ehca_cq_idr; 282extern struct idr ehca_cq_idr;
282 283
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 82dda2faf4d0..100329ba3343 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -517,12 +517,11 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
517 else { 517 else {
518 struct ehca_cq *cq = eq->eqe_cache[i].cq; 518 struct ehca_cq *cq = eq->eqe_cache[i].cq;
519 comp_event_callback(cq); 519 comp_event_callback(cq);
520 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 520 spin_lock(&ehca_cq_idr_lock);
521 cq->nr_events--; 521 cq->nr_events--;
522 if (!cq->nr_events) 522 if (!cq->nr_events)
523 wake_up(&cq->wait_completion); 523 wake_up(&cq->wait_completion);
524 spin_unlock_irqrestore(&ehca_cq_idr_lock, 524 spin_unlock(&ehca_cq_idr_lock);
525 flags);
526 } 525 }
527 } else { 526 } else {
528 ehca_dbg(&shca->ib_device, "Got non completion event"); 527 ehca_dbg(&shca->ib_device, "Got non completion event");
@@ -711,6 +710,7 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
711 kthread_stop(task); 710 kthread_stop(task);
712} 711}
713 712
713#ifdef CONFIG_HOTPLUG_CPU
714static void take_over_work(struct ehca_comp_pool *pool, 714static void take_over_work(struct ehca_comp_pool *pool,
715 int cpu) 715 int cpu)
716{ 716{
@@ -735,7 +735,6 @@ static void take_over_work(struct ehca_comp_pool *pool,
735 735
736} 736}
737 737
738#ifdef CONFIG_HOTPLUG_CPU
739static int comp_pool_callback(struct notifier_block *nfb, 738static int comp_pool_callback(struct notifier_block *nfb,
740 unsigned long action, 739 unsigned long action,
741 void *hcpu) 740 void *hcpu)
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index fe90e7454560..c3f99f33b49c 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -52,7 +52,7 @@
52MODULE_LICENSE("Dual BSD/GPL"); 52MODULE_LICENSE("Dual BSD/GPL");
53MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); 53MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
54MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); 54MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
55MODULE_VERSION("SVNEHCA_0022"); 55MODULE_VERSION("SVNEHCA_0023");
56 56
57int ehca_open_aqp1 = 0; 57int ehca_open_aqp1 = 0;
58int ehca_debug_level = 0; 58int ehca_debug_level = 0;
@@ -62,7 +62,7 @@ int ehca_use_hp_mr = 0;
62int ehca_port_act_time = 30; 62int ehca_port_act_time = 30;
63int ehca_poll_all_eqs = 1; 63int ehca_poll_all_eqs = 1;
64int ehca_static_rate = -1; 64int ehca_static_rate = -1;
65int ehca_scaling_code = 1; 65int ehca_scaling_code = 0;
66 66
67module_param_named(open_aqp1, ehca_open_aqp1, int, 0); 67module_param_named(open_aqp1, ehca_open_aqp1, int, 0);
68module_param_named(debug_level, ehca_debug_level, int, 0); 68module_param_named(debug_level, ehca_debug_level, int, 0);
@@ -98,6 +98,7 @@ MODULE_PARM_DESC(scaling_code,
98 98
99spinlock_t ehca_qp_idr_lock; 99spinlock_t ehca_qp_idr_lock;
100spinlock_t ehca_cq_idr_lock; 100spinlock_t ehca_cq_idr_lock;
101spinlock_t hcall_lock;
101DEFINE_IDR(ehca_qp_idr); 102DEFINE_IDR(ehca_qp_idr);
102DEFINE_IDR(ehca_cq_idr); 103DEFINE_IDR(ehca_cq_idr);
103 104
@@ -453,15 +454,14 @@ static ssize_t ehca_store_debug_level(struct device_driver *ddp,
453DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, 454DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
454 ehca_show_debug_level, ehca_store_debug_level); 455 ehca_show_debug_level, ehca_store_debug_level);
455 456
456void ehca_create_driver_sysfs(struct ibmebus_driver *drv) 457static struct attribute *ehca_drv_attrs[] = {
457{ 458 &driver_attr_debug_level.attr,
458 driver_create_file(&drv->driver, &driver_attr_debug_level); 459 NULL
459} 460};
460 461
461void ehca_remove_driver_sysfs(struct ibmebus_driver *drv) 462static struct attribute_group ehca_drv_attr_grp = {
462{ 463 .attrs = ehca_drv_attrs
463 driver_remove_file(&drv->driver, &driver_attr_debug_level); 464};
464}
465 465
466#define EHCA_RESOURCE_ATTR(name) \ 466#define EHCA_RESOURCE_ATTR(name) \
467static ssize_t ehca_show_##name(struct device *dev, \ 467static ssize_t ehca_show_##name(struct device *dev, \
@@ -523,44 +523,28 @@ static ssize_t ehca_show_adapter_handle(struct device *dev,
523} 523}
524static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); 524static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
525 525
526static struct attribute *ehca_dev_attrs[] = {
527 &dev_attr_adapter_handle.attr,
528 &dev_attr_num_ports.attr,
529 &dev_attr_hw_ver.attr,
530 &dev_attr_max_eq.attr,
531 &dev_attr_cur_eq.attr,
532 &dev_attr_max_cq.attr,
533 &dev_attr_cur_cq.attr,
534 &dev_attr_max_qp.attr,
535 &dev_attr_cur_qp.attr,
536 &dev_attr_max_mr.attr,
537 &dev_attr_cur_mr.attr,
538 &dev_attr_max_mw.attr,
539 &dev_attr_cur_mw.attr,
540 &dev_attr_max_pd.attr,
541 &dev_attr_max_ah.attr,
542 NULL
543};
526 544
527void ehca_create_device_sysfs(struct ibmebus_dev *dev) 545static struct attribute_group ehca_dev_attr_grp = {
528{ 546 .attrs = ehca_dev_attrs
529 device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle); 547};
530 device_create_file(&dev->ofdev.dev, &dev_attr_num_ports);
531 device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver);
532 device_create_file(&dev->ofdev.dev, &dev_attr_max_eq);
533 device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq);
534 device_create_file(&dev->ofdev.dev, &dev_attr_max_cq);
535 device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq);
536 device_create_file(&dev->ofdev.dev, &dev_attr_max_qp);
537 device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp);
538 device_create_file(&dev->ofdev.dev, &dev_attr_max_mr);
539 device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr);
540 device_create_file(&dev->ofdev.dev, &dev_attr_max_mw);
541 device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw);
542 device_create_file(&dev->ofdev.dev, &dev_attr_max_pd);
543 device_create_file(&dev->ofdev.dev, &dev_attr_max_ah);
544}
545
546void ehca_remove_device_sysfs(struct ibmebus_dev *dev)
547{
548 device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
549 device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports);
550 device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver);
551 device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq);
552 device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq);
553 device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq);
554 device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq);
555 device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp);
556 device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp);
557 device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr);
558 device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr);
559 device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw);
560 device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw);
561 device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd);
562 device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah);
563}
564 548
565static int __devinit ehca_probe(struct ibmebus_dev *dev, 549static int __devinit ehca_probe(struct ibmebus_dev *dev,
566 const struct of_device_id *id) 550 const struct of_device_id *id)
@@ -668,7 +652,10 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
668 } 652 }
669 } 653 }
670 654
671 ehca_create_device_sysfs(dev); 655 ret = sysfs_create_group(&dev->ofdev.dev.kobj, &ehca_dev_attr_grp);
656 if (ret) /* only complain; we can live without attributes */
657 ehca_err(&shca->ib_device,
658 "Cannot create device attributes ret=%d", ret);
672 659
673 spin_lock(&shca_list_lock); 660 spin_lock(&shca_list_lock);
674 list_add(&shca->shca_list, &shca_list); 661 list_add(&shca->shca_list, &shca_list);
@@ -720,7 +707,7 @@ static int __devexit ehca_remove(struct ibmebus_dev *dev)
720 struct ehca_shca *shca = dev->ofdev.dev.driver_data; 707 struct ehca_shca *shca = dev->ofdev.dev.driver_data;
721 int ret; 708 int ret;
722 709
723 ehca_remove_device_sysfs(dev); 710 sysfs_remove_group(&dev->ofdev.dev.kobj, &ehca_dev_attr_grp);
724 711
725 if (ehca_open_aqp1 == 1) { 712 if (ehca_open_aqp1 == 1) {
726 int i; 713 int i;
@@ -812,11 +799,12 @@ int __init ehca_module_init(void)
812 int ret; 799 int ret;
813 800
814 printk(KERN_INFO "eHCA Infiniband Device Driver " 801 printk(KERN_INFO "eHCA Infiniband Device Driver "
815 "(Rel.: SVNEHCA_0022)\n"); 802 "(Rel.: SVNEHCA_0023)\n");
816 idr_init(&ehca_qp_idr); 803 idr_init(&ehca_qp_idr);
817 idr_init(&ehca_cq_idr); 804 idr_init(&ehca_cq_idr);
818 spin_lock_init(&ehca_qp_idr_lock); 805 spin_lock_init(&ehca_qp_idr_lock);
819 spin_lock_init(&ehca_cq_idr_lock); 806 spin_lock_init(&ehca_cq_idr_lock);
807 spin_lock_init(&hcall_lock);
820 808
821 INIT_LIST_HEAD(&shca_list); 809 INIT_LIST_HEAD(&shca_list);
822 spin_lock_init(&shca_list_lock); 810 spin_lock_init(&shca_list_lock);
@@ -838,7 +826,9 @@ int __init ehca_module_init(void)
838 goto module_init2; 826 goto module_init2;
839 } 827 }
840 828
841 ehca_create_driver_sysfs(&ehca_driver); 829 ret = sysfs_create_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp);
830 if (ret) /* only complain; we can live without attributes */
831 ehca_gen_err("Cannot create driver attributes ret=%d", ret);
842 832
843 if (ehca_poll_all_eqs != 1) { 833 if (ehca_poll_all_eqs != 1) {
844 ehca_gen_err("WARNING!!!"); 834 ehca_gen_err("WARNING!!!");
@@ -865,7 +855,7 @@ void __exit ehca_module_exit(void)
865 if (ehca_poll_all_eqs == 1) 855 if (ehca_poll_all_eqs == 1)
866 del_timer_sync(&poll_eqs_timer); 856 del_timer_sync(&poll_eqs_timer);
867 857
868 ehca_remove_driver_sysfs(&ehca_driver); 858 sysfs_remove_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp);
869 ibmebus_unregister_driver(&ehca_driver); 859 ibmebus_unregister_driver(&ehca_driver);
870 860
871 ehca_destroy_slab_caches(); 861 ehca_destroy_slab_caches();
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index df0516f24379..b5bc787c77b6 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -523,6 +523,8 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
523 goto create_qp_exit1; 523 goto create_qp_exit1;
524 } 524 }
525 525
526 my_qp->ib_qp.qp_num = my_qp->real_qp_num;
527
526 switch (init_attr->qp_type) { 528 switch (init_attr->qp_type) {
527 case IB_QPT_RC: 529 case IB_QPT_RC:
528 if (isdaqp == 0) { 530 if (isdaqp == 0) {
@@ -568,7 +570,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
568 parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; 570 parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr;
569 parms.act_nr_send_sges = init_attr->cap.max_send_sge; 571 parms.act_nr_send_sges = init_attr->cap.max_send_sge;
570 parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; 572 parms.act_nr_recv_sges = init_attr->cap.max_recv_sge;
571 my_qp->real_qp_num = 573 my_qp->ib_qp.qp_num =
572 (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1; 574 (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1;
573 } 575 }
574 576
@@ -595,7 +597,6 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
595 my_qp->ib_qp.recv_cq = init_attr->recv_cq; 597 my_qp->ib_qp.recv_cq = init_attr->recv_cq;
596 my_qp->ib_qp.send_cq = init_attr->send_cq; 598 my_qp->ib_qp.send_cq = init_attr->send_cq;
597 599
598 my_qp->ib_qp.qp_num = my_qp->real_qp_num;
599 my_qp->ib_qp.qp_type = init_attr->qp_type; 600 my_qp->ib_qp.qp_type = init_attr->qp_type;
600 601
601 my_qp->qp_type = init_attr->qp_type; 602 my_qp->qp_type = init_attr->qp_type;
@@ -968,17 +969,21 @@ static int internal_modify_qp(struct ib_qp *ibqp,
968 ((ehca_mult - 1) / ah_mult) : 0; 969 ((ehca_mult - 1) / ah_mult) : 0;
969 else 970 else
970 mqpcb->max_static_rate = 0; 971 mqpcb->max_static_rate = 0;
971
972 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); 972 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
973 973
974 /* 974 /*
975 * Always supply the GRH flag, even if it's zero, to give the
976 * hypervisor a clear "yes" or "no" instead of a "perhaps"
977 */
978 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
979
980 /*
975 * only if GRH is TRUE we might consider SOURCE_GID_IDX 981 * only if GRH is TRUE we might consider SOURCE_GID_IDX
976 * and DEST_GID otherwise phype will return H_ATTR_PARM!!! 982 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
977 */ 983 */
978 if (attr->ah_attr.ah_flags == IB_AH_GRH) { 984 if (attr->ah_attr.ah_flags == IB_AH_GRH) {
979 mqpcb->send_grh_flag = 1 << 31; 985 mqpcb->send_grh_flag = 1;
980 update_mask |= 986
981 EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
982 mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; 987 mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
983 update_mask |= 988 update_mask |=
984 EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); 989 EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index b564fcd3b282..7f0beec74f70 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -154,7 +154,8 @@ static long ehca_plpar_hcall9(unsigned long opcode,
154 unsigned long arg9) 154 unsigned long arg9)
155{ 155{
156 long ret; 156 long ret;
157 int i, sleep_msecs; 157 int i, sleep_msecs, lock_is_set = 0;
158 unsigned long flags;
158 159
159 ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " 160 ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
160 "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx", 161 "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
@@ -162,10 +163,18 @@ static long ehca_plpar_hcall9(unsigned long opcode,
162 arg8, arg9); 163 arg8, arg9);
163 164
164 for (i = 0; i < 5; i++) { 165 for (i = 0; i < 5; i++) {
166 if ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5)) {
167 spin_lock_irqsave(&hcall_lock, flags);
168 lock_is_set = 1;
169 }
170
165 ret = plpar_hcall9(opcode, outs, 171 ret = plpar_hcall9(opcode, outs,
166 arg1, arg2, arg3, arg4, arg5, 172 arg1, arg2, arg3, arg4, arg5,
167 arg6, arg7, arg8, arg9); 173 arg6, arg7, arg8, arg9);
168 174
175 if (lock_is_set)
176 spin_unlock_irqrestore(&hcall_lock, flags);
177
169 if (H_IS_LONG_BUSY(ret)) { 178 if (H_IS_LONG_BUSY(ret)) {
170 sleep_msecs = get_longbusy_msecs(ret); 179 sleep_msecs = get_longbusy_msecs(ret);
171 msleep_interruptible(sleep_msecs); 180 msleep_interruptible(sleep_msecs);
@@ -193,11 +202,11 @@ static long ehca_plpar_hcall9(unsigned long opcode,
193 opcode, ret, outs[0], outs[1], outs[2], outs[3], 202 opcode, ret, outs[0], outs[1], outs[2], outs[3],
194 outs[4], outs[5], outs[6], outs[7], outs[8]); 203 outs[4], outs[5], outs[6], outs[7], outs[8]);
195 return ret; 204 return ret;
196
197 } 205 }
198 206
199 return H_BUSY; 207 return H_BUSY;
200} 208}
209
201u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, 210u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
202 struct ehca_pfeq *pfeq, 211 struct ehca_pfeq *pfeq,
203 const u32 neq_control, 212 const u32 neq_control,
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 1b9c30857754..4e2e3dfeb2c8 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -747,7 +747,6 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
747 747
748static int ipath_pe_intconfig(struct ipath_devdata *dd) 748static int ipath_pe_intconfig(struct ipath_devdata *dd)
749{ 749{
750 u64 val;
751 u32 chiprev; 750 u32 chiprev;
752 751
753 /* 752 /*
@@ -760,9 +759,9 @@ static int ipath_pe_intconfig(struct ipath_devdata *dd)
760 if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) { 759 if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) {
761 /* Rev2+ reports extra errors via internal GPIO pins */ 760 /* Rev2+ reports extra errors via internal GPIO pins */
762 dd->ipath_flags |= IPATH_GPIO_ERRINTRS; 761 dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
763 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); 762 dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
764 val |= IPATH_GPIO_ERRINTR_MASK; 763 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
765 ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); 764 dd->ipath_gpio_mask);
766 } 765 }
767 return 0; 766 return 0;
768} 767}
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 45d033169c6e..a90d3b5699c4 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -1056,7 +1056,7 @@ irqreturn_t ipath_intr(int irq, void *data)
1056 gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT); 1056 gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
1057 chk0rcv = 1; 1057 chk0rcv = 1;
1058 } 1058 }
1059 if (unlikely(gpiostatus)) { 1059 if (gpiostatus) {
1060 /* 1060 /*
1061 * Some unexpected bits remain. If they could have 1061 * Some unexpected bits remain. If they could have
1062 * caused the interrupt, complain and clear. 1062 * caused the interrupt, complain and clear.
@@ -1065,9 +1065,8 @@ irqreturn_t ipath_intr(int irq, void *data)
1065 * GPIO interrupts, possibly on a "three strikes" 1065 * GPIO interrupts, possibly on a "three strikes"
1066 * basis. 1066 * basis.
1067 */ 1067 */
1068 u32 mask; 1068 const u32 mask = (u32) dd->ipath_gpio_mask;
1069 mask = ipath_read_kreg32( 1069
1070 dd, dd->ipath_kregs->kr_gpio_mask);
1071 if (mask & gpiostatus) { 1070 if (mask & gpiostatus) {
1072 ipath_dbg("Unexpected GPIO IRQ bits %x\n", 1071 ipath_dbg("Unexpected GPIO IRQ bits %x\n",
1073 gpiostatus & mask); 1072 gpiostatus & mask);
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index e900c2593f44..12194f3dd8cc 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -397,6 +397,8 @@ struct ipath_devdata {
397 unsigned long ipath_pioavailshadow[8]; 397 unsigned long ipath_pioavailshadow[8];
398 /* shadow of kr_gpio_out, for rmw ops */ 398 /* shadow of kr_gpio_out, for rmw ops */
399 u64 ipath_gpio_out; 399 u64 ipath_gpio_out;
400 /* shadow the gpio mask register */
401 u64 ipath_gpio_mask;
400 /* kr_revision shadow */ 402 /* kr_revision shadow */
401 u64 ipath_revision; 403 u64 ipath_revision;
402 /* 404 /*
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 12933e77c7e9..bb70845279b8 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1387,13 +1387,12 @@ static int enable_timer(struct ipath_devdata *dd)
1387 * processing. 1387 * processing.
1388 */ 1388 */
1389 if (dd->ipath_flags & IPATH_GPIO_INTR) { 1389 if (dd->ipath_flags & IPATH_GPIO_INTR) {
1390 u64 val;
1391 ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, 1390 ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
1392 0x2074076542310ULL); 1391 0x2074076542310ULL);
1393 /* Enable GPIO bit 2 interrupt */ 1392 /* Enable GPIO bit 2 interrupt */
1394 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); 1393 dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
1395 val |= (u64) (1 << IPATH_GPIO_PORT0_BIT); 1394 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1396 ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); 1395 dd->ipath_gpio_mask);
1397 } 1396 }
1398 1397
1399 init_timer(&dd->verbs_timer); 1398 init_timer(&dd->verbs_timer);
@@ -1412,8 +1411,9 @@ static int disable_timer(struct ipath_devdata *dd)
1412 u64 val; 1411 u64 val;
1413 /* Disable GPIO bit 2 interrupt */ 1412 /* Disable GPIO bit 2 interrupt */
1414 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); 1413 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
1415 val &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); 1414 dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
1416 ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); 1415 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1416 dd->ipath_gpio_mask);
1417 /* 1417 /*
1418 * We might want to undo changes to debugportselect, 1418 * We might want to undo changes to debugportselect,
1419 * but how? 1419 * but how?
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 688ecb4c39f3..402f3a20ec0a 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -489,6 +489,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
489 ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 489 ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
490 if (!ibdev->uar_map) 490 if (!ibdev->uar_map)
491 goto err_uar; 491 goto err_uar;
492 MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
492 493
493 INIT_LIST_HEAD(&ibdev->pgdir_list); 494 INIT_LIST_HEAD(&ibdev->pgdir_list);
494 mutex_init(&ibdev->pgdir_mutex); 495 mutex_init(&ibdev->pgdir_mutex);
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index cf0868f6e965..ca224d018af2 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -284,7 +284,7 @@ void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
284{ 284{
285 struct mthca_cqe *cqe; 285 struct mthca_cqe *cqe;
286 u32 prod_index; 286 u32 prod_index;
287 int nfreed = 0; 287 int i, nfreed = 0;
288 288
289 spin_lock_irq(&cq->lock); 289 spin_lock_irq(&cq->lock);
290 290
@@ -321,6 +321,8 @@ void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
321 } 321 }
322 322
323 if (nfreed) { 323 if (nfreed) {
324 for (i = 0; i < nfreed; ++i)
325 set_cqe_hw(get_cqe(cq, (cq->cons_index + i) & cq->ibcq.cqe));
324 wmb(); 326 wmb();
325 cq->cons_index += nfreed; 327 cq->cons_index += nfreed;
326 update_cons_index(dev, cq, nfreed); 328 update_cons_index(dev, cq, nfreed);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index fee60c852d14..72fabb822f1c 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1862,6 +1862,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1862 dev->kar + MTHCA_RECEIVE_DOORBELL, 1862 dev->kar + MTHCA_RECEIVE_DOORBELL,
1863 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1863 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1864 1864
1865 qp->rq.next_ind = ind;
1865 qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; 1866 qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
1866 size0 = 0; 1867 size0 = 0;
1867 } 1868 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 785bc8505f2a..eec833b81e9b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -257,10 +257,11 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
257 cm_id->context = p; 257 cm_id->context = p;
258 p->jiffies = jiffies; 258 p->jiffies = jiffies;
259 spin_lock_irq(&priv->lock); 259 spin_lock_irq(&priv->lock);
260 if (list_empty(&priv->cm.passive_ids))
261 queue_delayed_work(ipoib_workqueue,
262 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
260 list_add(&p->list, &priv->cm.passive_ids); 263 list_add(&p->list, &priv->cm.passive_ids);
261 spin_unlock_irq(&priv->lock); 264 spin_unlock_irq(&priv->lock);
262 queue_delayed_work(ipoib_workqueue,
263 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
264 return 0; 265 return 0;
265 266
266err_rep: 267err_rep:
@@ -378,8 +379,6 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
378 if (!list_empty(&p->list)) 379 if (!list_empty(&p->list))
379 list_move(&p->list, &priv->cm.passive_ids); 380 list_move(&p->list, &priv->cm.passive_ids);
380 spin_unlock_irqrestore(&priv->lock, flags); 381 spin_unlock_irqrestore(&priv->lock, flags);
381 queue_delayed_work(ipoib_workqueue,
382 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
383 } 382 }
384 } 383 }
385 384
@@ -1100,6 +1099,10 @@ static void ipoib_cm_stale_task(struct work_struct *work)
1100 kfree(p); 1099 kfree(p);
1101 spin_lock_irq(&priv->lock); 1100 spin_lock_irq(&priv->lock);
1102 } 1101 }
1102
1103 if (!list_empty(&priv->cm.passive_ids))
1104 queue_delayed_work(ipoib_workqueue,
1105 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
1103 spin_unlock_irq(&priv->lock); 1106 spin_unlock_irq(&priv->lock);
1104} 1107}
1105 1108
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index fb99cd445504..c5baa197bc08 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2508,6 +2508,7 @@ config MLX4_CORE
2508 2508
2509config MLX4_DEBUG 2509config MLX4_DEBUG
2510 bool "Verbose debugging output" if (MLX4_CORE && EMBEDDED) 2510 bool "Verbose debugging output" if (MLX4_CORE && EMBEDDED)
2511 depends on MLX4_CORE
2511 default y 2512 default y
2512 ---help--- 2513 ---help---
2513 This option causes debugging code to be compiled into the 2514 This option causes debugging code to be compiled into the
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 4debb024eaf9..20b8c0d3ced4 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -542,8 +542,6 @@ static int __devinit mlx4_setup_hca(struct mlx4_dev *dev)
542 struct mlx4_priv *priv = mlx4_priv(dev); 542 struct mlx4_priv *priv = mlx4_priv(dev);
543 int err; 543 int err;
544 544
545 MLX4_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
546
547 err = mlx4_init_uar_table(dev); 545 err = mlx4_init_uar_table(dev);
548 if (err) { 546 if (err) {
549 mlx4_err(dev, "Failed to initialize " 547 mlx4_err(dev, "Failed to initialize "
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 9befbae3d196..3d3b6d24d8d3 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -275,7 +275,6 @@ struct mlx4_priv {
275 275
276 struct mlx4_uar driver_uar; 276 struct mlx4_uar driver_uar;
277 void __iomem *kar; 277 void __iomem *kar;
278 MLX4_DECLARE_DOORBELL_LOCK(doorbell_lock)
279 278
280 u32 rev_id; 279 u32 rev_id;
281 char board_id[MLX4_BOARD_ID_LEN]; 280 char board_id[MLX4_BOARD_ID_LEN];
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1aad34ea61a4..0b73cd45a06d 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * fs/eventpoll.c ( Efficent event polling implementation ) 2 * fs/eventpoll.c (Efficent event polling implementation)
3 * Copyright (C) 2001,...,2006 Davide Libenzi 3 * Copyright (C) 2001,...,2007 Davide Libenzi
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -26,7 +26,6 @@
26#include <linux/hash.h> 26#include <linux/hash.h>
27#include <linux/spinlock.h> 27#include <linux/spinlock.h>
28#include <linux/syscalls.h> 28#include <linux/syscalls.h>
29#include <linux/rwsem.h>
30#include <linux/rbtree.h> 29#include <linux/rbtree.h>
31#include <linux/wait.h> 30#include <linux/wait.h>
32#include <linux/eventpoll.h> 31#include <linux/eventpoll.h>
@@ -39,15 +38,14 @@
39#include <asm/io.h> 38#include <asm/io.h>
40#include <asm/mman.h> 39#include <asm/mman.h>
41#include <asm/atomic.h> 40#include <asm/atomic.h>
42#include <asm/semaphore.h>
43 41
44/* 42/*
45 * LOCKING: 43 * LOCKING:
46 * There are three level of locking required by epoll : 44 * There are three level of locking required by epoll :
47 * 45 *
48 * 1) epmutex (mutex) 46 * 1) epmutex (mutex)
49 * 2) ep->sem (rw_semaphore) 47 * 2) ep->mtx (mutex)
50 * 3) ep->lock (rw_lock) 48 * 3) ep->lock (spinlock)
51 * 49 *
52 * The acquire order is the one listed above, from 1 to 3. 50 * The acquire order is the one listed above, from 1 to 3.
53 * We need a spinlock (ep->lock) because we manipulate objects 51 * We need a spinlock (ep->lock) because we manipulate objects
@@ -57,20 +55,20 @@
57 * a spinlock. During the event transfer loop (from kernel to 55 * a spinlock. During the event transfer loop (from kernel to
58 * user space) we could end up sleeping due a copy_to_user(), so 56 * user space) we could end up sleeping due a copy_to_user(), so
59 * we need a lock that will allow us to sleep. This lock is a 57 * we need a lock that will allow us to sleep. This lock is a
60 * read-write semaphore (ep->sem). It is acquired on read during 58 * mutex (ep->mtx). It is acquired during the event transfer loop,
61 * the event transfer loop and in write during epoll_ctl(EPOLL_CTL_DEL) 59 * during epoll_ctl(EPOLL_CTL_DEL) and during eventpoll_release_file().
62 * and during eventpoll_release_file(). Then we also need a global 60 * Then we also need a global mutex to serialize eventpoll_release_file()
63 * semaphore to serialize eventpoll_release_file() and ep_free(). 61 * and ep_free().
64 * This semaphore is acquired by ep_free() during the epoll file 62 * This mutex is acquired by ep_free() during the epoll file
65 * cleanup path and it is also acquired by eventpoll_release_file() 63 * cleanup path and it is also acquired by eventpoll_release_file()
66 * if a file has been pushed inside an epoll set and it is then 64 * if a file has been pushed inside an epoll set and it is then
67 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). 65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
68 * It is possible to drop the "ep->sem" and to use the global 66 * It is possible to drop the "ep->mtx" and to use the global
69 * semaphore "epmutex" (together with "ep->lock") to have it working, 67 * mutex "epmutex" (together with "ep->lock") to have it working,
70 * but having "ep->sem" will make the interface more scalable. 68 * but having "ep->mtx" will make the interface more scalable.
71 * Events that require holding "epmutex" are very rare, while for 69 * Events that require holding "epmutex" are very rare, while for
72 * normal operations the epoll private "ep->sem" will guarantee 70 * normal operations the epoll private "ep->mtx" will guarantee
73 * a greater scalability. 71 * a better scalability.
74 */ 72 */
75 73
76#define DEBUG_EPOLL 0 74#define DEBUG_EPOLL 0
@@ -102,6 +100,8 @@
102 100
103#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 101#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
104 102
103#define EP_UNACTIVE_PTR ((void *) -1L)
104
105struct epoll_filefd { 105struct epoll_filefd {
106 struct file *file; 106 struct file *file;
107 int fd; 107 int fd;
@@ -111,7 +111,7 @@ struct epoll_filefd {
111 * Node that is linked into the "wake_task_list" member of the "struct poll_safewake". 111 * Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
112 * It is used to keep track on all tasks that are currently inside the wake_up() code 112 * It is used to keep track on all tasks that are currently inside the wake_up() code
113 * to 1) short-circuit the one coming from the same task and same wait queue head 113 * to 1) short-circuit the one coming from the same task and same wait queue head
114 * ( loop ) 2) allow a maximum number of epoll descriptors inclusion nesting 114 * (loop) 2) allow a maximum number of epoll descriptors inclusion nesting
115 * 3) let go the ones coming from other tasks. 115 * 3) let go the ones coming from other tasks.
116 */ 116 */
117struct wake_task_node { 117struct wake_task_node {
@@ -130,21 +130,57 @@ struct poll_safewake {
130}; 130};
131 131
132/* 132/*
133 * Each file descriptor added to the eventpoll interface will
134 * have an entry of this type linked to the "rbr" RB tree.
135 */
136struct epitem {
137 /* RB tree node used to link this structure to the eventpoll RB tree */
138 struct rb_node rbn;
139
140 /* List header used to link this structure to the eventpoll ready list */
141 struct list_head rdllink;
142
143 /*
144 * Works together "struct eventpoll"->ovflist in keeping the
145 * single linked chain of items.
146 */
147 struct epitem *next;
148
149 /* The file descriptor information this item refers to */
150 struct epoll_filefd ffd;
151
152 /* Number of active wait queue attached to poll operations */
153 int nwait;
154
155 /* List containing poll wait queues */
156 struct list_head pwqlist;
157
158 /* The "container" of this item */
159 struct eventpoll *ep;
160
161 /* List header used to link this item to the "struct file" items list */
162 struct list_head fllink;
163
164 /* The structure that describe the interested events and the source fd */
165 struct epoll_event event;
166};
167
168/*
133 * This structure is stored inside the "private_data" member of the file 169 * This structure is stored inside the "private_data" member of the file
134 * structure and rapresent the main data sructure for the eventpoll 170 * structure and rapresent the main data sructure for the eventpoll
135 * interface. 171 * interface.
136 */ 172 */
137struct eventpoll { 173struct eventpoll {
138 /* Protect the this structure access */ 174 /* Protect the this structure access */
139 rwlock_t lock; 175 spinlock_t lock;
140 176
141 /* 177 /*
142 * This semaphore is used to ensure that files are not removed 178 * This mutex is used to ensure that files are not removed
143 * while epoll is using them. This is read-held during the event 179 * while epoll is using them. This is held during the event
144 * collection loop and it is write-held during the file cleanup 180 * collection loop, the file cleanup path, the epoll file exit
145 * path, the epoll file exit code and the ctl operations. 181 * code and the ctl operations.
146 */ 182 */
147 struct rw_semaphore sem; 183 struct mutex mtx;
148 184
149 /* Wait queue used by sys_epoll_wait() */ 185 /* Wait queue used by sys_epoll_wait() */
150 wait_queue_head_t wq; 186 wait_queue_head_t wq;
@@ -155,8 +191,15 @@ struct eventpoll {
155 /* List of ready file descriptors */ 191 /* List of ready file descriptors */
156 struct list_head rdllist; 192 struct list_head rdllist;
157 193
158 /* RB-Tree root used to store monitored fd structs */ 194 /* RB tree root used to store monitored fd structs */
159 struct rb_root rbr; 195 struct rb_root rbr;
196
197 /*
198 * This is a single linked list that chains all the "struct epitem" that
199 * happened while transfering ready events to userspace w/out
200 * holding ->lock.
201 */
202 struct epitem *ovflist;
160}; 203};
161 204
162/* Wait structure used by the poll hooks */ 205/* Wait structure used by the poll hooks */
@@ -177,42 +220,6 @@ struct eppoll_entry {
177 wait_queue_head_t *whead; 220 wait_queue_head_t *whead;
178}; 221};
179 222
180/*
181 * Each file descriptor added to the eventpoll interface will
182 * have an entry of this type linked to the "rbr" RB tree.
183 */
184struct epitem {
185 /* RB-Tree node used to link this structure to the eventpoll rb-tree */
186 struct rb_node rbn;
187
188 /* List header used to link this structure to the eventpoll ready list */
189 struct list_head rdllink;
190
191 /* The file descriptor information this item refers to */
192 struct epoll_filefd ffd;
193
194 /* Number of active wait queue attached to poll operations */
195 int nwait;
196
197 /* List containing poll wait queues */
198 struct list_head pwqlist;
199
200 /* The "container" of this item */
201 struct eventpoll *ep;
202
203 /* The structure that describe the interested events and the source fd */
204 struct epoll_event event;
205
206 /*
207 * Used to keep track of the usage count of the structure. This avoids
208 * that the structure will desappear from underneath our processing.
209 */
210 atomic_t usecnt;
211
212 /* List header used to link this item to the "struct file" items list */
213 struct list_head fllink;
214};
215
216/* Wrapper struct used by poll queueing */ 223/* Wrapper struct used by poll queueing */
217struct ep_pqueue { 224struct ep_pqueue {
218 poll_table pt; 225 poll_table pt;
@@ -220,7 +227,7 @@ struct ep_pqueue {
220}; 227};
221 228
222/* 229/*
223 * This semaphore is used to serialize ep_free() and eventpoll_release_file(). 230 * This mutex is used to serialize ep_free() and eventpoll_release_file().
224 */ 231 */
225static struct mutex epmutex; 232static struct mutex epmutex;
226 233
@@ -234,7 +241,7 @@ static struct kmem_cache *epi_cache __read_mostly;
234static struct kmem_cache *pwq_cache __read_mostly; 241static struct kmem_cache *pwq_cache __read_mostly;
235 242
236 243
237/* Setup the structure that is used as key for the rb-tree */ 244/* Setup the structure that is used as key for the RB tree */
238static inline void ep_set_ffd(struct epoll_filefd *ffd, 245static inline void ep_set_ffd(struct epoll_filefd *ffd,
239 struct file *file, int fd) 246 struct file *file, int fd)
240{ 247{
@@ -242,7 +249,7 @@ static inline void ep_set_ffd(struct epoll_filefd *ffd,
242 ffd->fd = fd; 249 ffd->fd = fd;
243} 250}
244 251
245/* Compare rb-tree keys */ 252/* Compare RB tree keys */
246static inline int ep_cmp_ffd(struct epoll_filefd *p1, 253static inline int ep_cmp_ffd(struct epoll_filefd *p1,
247 struct epoll_filefd *p2) 254 struct epoll_filefd *p2)
248{ 255{
@@ -250,20 +257,20 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1,
250 (p1->file < p2->file ? -1 : p1->fd - p2->fd)); 257 (p1->file < p2->file ? -1 : p1->fd - p2->fd));
251} 258}
252 259
253/* Special initialization for the rb-tree node to detect linkage */ 260/* Special initialization for the RB tree node to detect linkage */
254static inline void ep_rb_initnode(struct rb_node *n) 261static inline void ep_rb_initnode(struct rb_node *n)
255{ 262{
256 rb_set_parent(n, n); 263 rb_set_parent(n, n);
257} 264}
258 265
259/* Removes a node from the rb-tree and marks it for a fast is-linked check */ 266/* Removes a node from the RB tree and marks it for a fast is-linked check */
260static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) 267static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
261{ 268{
262 rb_erase(n, r); 269 rb_erase(n, r);
263 rb_set_parent(n, n); 270 rb_set_parent(n, n);
264} 271}
265 272
266/* Fast check to verify that the item is linked to the main rb-tree */ 273/* Fast check to verify that the item is linked to the main RB tree */
267static inline int ep_rb_linked(struct rb_node *n) 274static inline int ep_rb_linked(struct rb_node *n)
268{ 275{
269 return rb_parent(n) != n; 276 return rb_parent(n) != n;
@@ -381,78 +388,11 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
381} 388}
382 389
383/* 390/*
384 * Unlink the "struct epitem" from all places it might have been hooked up.
385 * This function must be called with write IRQ lock on "ep->lock".
386 */
387static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
388{
389 int error;
390
391 /*
392 * It can happen that this one is called for an item already unlinked.
393 * The check protect us from doing a double unlink ( crash ).
394 */
395 error = -ENOENT;
396 if (!ep_rb_linked(&epi->rbn))
397 goto error_return;
398
399 /*
400 * Clear the event mask for the unlinked item. This will avoid item
401 * notifications to be sent after the unlink operation from inside
402 * the kernel->userspace event transfer loop.
403 */
404 epi->event.events = 0;
405
406 /*
407 * At this point is safe to do the job, unlink the item from our rb-tree.
408 * This operation togheter with the above check closes the door to
409 * double unlinks.
410 */
411 ep_rb_erase(&epi->rbn, &ep->rbr);
412
413 /*
414 * If the item we are going to remove is inside the ready file descriptors
415 * we want to remove it from this list to avoid stale events.
416 */
417 if (ep_is_linked(&epi->rdllink))
418 list_del_init(&epi->rdllink);
419
420 error = 0;
421error_return:
422
423 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n",
424 current, ep, epi->ffd.file, error));
425
426 return error;
427}
428
429/*
430 * Increment the usage count of the "struct epitem" making it sure
431 * that the user will have a valid pointer to reference.
432 */
433static void ep_use_epitem(struct epitem *epi)
434{
435 atomic_inc(&epi->usecnt);
436}
437
438/*
439 * Decrement ( release ) the usage count by signaling that the user
440 * has finished using the structure. It might lead to freeing the
441 * structure itself if the count goes to zero.
442 */
443static void ep_release_epitem(struct epitem *epi)
444{
445 if (atomic_dec_and_test(&epi->usecnt))
446 kmem_cache_free(epi_cache, epi);
447}
448
449/*
450 * Removes a "struct epitem" from the eventpoll RB tree and deallocates 391 * Removes a "struct epitem" from the eventpoll RB tree and deallocates
451 * all the associated resources. 392 * all the associated resources. Must be called with "mtx" held.
452 */ 393 */
453static int ep_remove(struct eventpoll *ep, struct epitem *epi) 394static int ep_remove(struct eventpoll *ep, struct epitem *epi)
454{ 395{
455 int error;
456 unsigned long flags; 396 unsigned long flags;
457 struct file *file = epi->ffd.file; 397 struct file *file = epi->ffd.file;
458 398
@@ -472,26 +412,21 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
472 list_del_init(&epi->fllink); 412 list_del_init(&epi->fllink);
473 spin_unlock(&file->f_ep_lock); 413 spin_unlock(&file->f_ep_lock);
474 414
475 /* We need to acquire the write IRQ lock before calling ep_unlink() */ 415 if (ep_rb_linked(&epi->rbn))
476 write_lock_irqsave(&ep->lock, flags); 416 ep_rb_erase(&epi->rbn, &ep->rbr);
477
478 /* Really unlink the item from the RB tree */
479 error = ep_unlink(ep, epi);
480
481 write_unlock_irqrestore(&ep->lock, flags);
482 417
483 if (error) 418 spin_lock_irqsave(&ep->lock, flags);
484 goto error_return; 419 if (ep_is_linked(&epi->rdllink))
420 list_del_init(&epi->rdllink);
421 spin_unlock_irqrestore(&ep->lock, flags);
485 422
486 /* At this point it is safe to free the eventpoll item */ 423 /* At this point it is safe to free the eventpoll item */
487 ep_release_epitem(epi); 424 kmem_cache_free(epi_cache, epi);
488 425
489 error = 0; 426 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n",
490error_return: 427 current, ep, file));
491 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p) = %d\n",
492 current, ep, file, error));
493 428
494 return error; 429 return 0;
495} 430}
496 431
497static void ep_free(struct eventpoll *ep) 432static void ep_free(struct eventpoll *ep)
@@ -506,7 +441,7 @@ static void ep_free(struct eventpoll *ep)
506 /* 441 /*
507 * We need to lock this because we could be hit by 442 * We need to lock this because we could be hit by
508 * eventpoll_release_file() while we're freeing the "struct eventpoll". 443 * eventpoll_release_file() while we're freeing the "struct eventpoll".
509 * We do not need to hold "ep->sem" here because the epoll file 444 * We do not need to hold "ep->mtx" here because the epoll file
510 * is on the way to be removed and no one has references to it 445 * is on the way to be removed and no one has references to it
511 * anymore. The only hit might come from eventpoll_release_file() but 446 * anymore. The only hit might come from eventpoll_release_file() but
512 * holding "epmutex" is sufficent here. 447 * holding "epmutex" is sufficent here.
@@ -525,7 +460,7 @@ static void ep_free(struct eventpoll *ep)
525 /* 460 /*
526 * Walks through the whole tree by freeing each "struct epitem". At this 461 * Walks through the whole tree by freeing each "struct epitem". At this
527 * point we are sure no poll callbacks will be lingering around, and also by 462 * point we are sure no poll callbacks will be lingering around, and also by
528 * write-holding "sem" we can be sure that no file cleanup code will hit 463 * holding "epmutex" we can be sure that no file cleanup code will hit
529 * us during this operation. So we can avoid the lock on "ep->lock". 464 * us during this operation. So we can avoid the lock on "ep->lock".
530 */ 465 */
531 while ((rbp = rb_first(&ep->rbr)) != 0) { 466 while ((rbp = rb_first(&ep->rbr)) != 0) {
@@ -534,16 +469,16 @@ static void ep_free(struct eventpoll *ep)
534 } 469 }
535 470
536 mutex_unlock(&epmutex); 471 mutex_unlock(&epmutex);
472 mutex_destroy(&ep->mtx);
473 kfree(ep);
537} 474}
538 475
539static int ep_eventpoll_release(struct inode *inode, struct file *file) 476static int ep_eventpoll_release(struct inode *inode, struct file *file)
540{ 477{
541 struct eventpoll *ep = file->private_data; 478 struct eventpoll *ep = file->private_data;
542 479
543 if (ep) { 480 if (ep)
544 ep_free(ep); 481 ep_free(ep);
545 kfree(ep);
546 }
547 482
548 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep)); 483 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep));
549 return 0; 484 return 0;
@@ -559,10 +494,10 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
559 poll_wait(file, &ep->poll_wait, wait); 494 poll_wait(file, &ep->poll_wait, wait);
560 495
561 /* Check our condition */ 496 /* Check our condition */
562 read_lock_irqsave(&ep->lock, flags); 497 spin_lock_irqsave(&ep->lock, flags);
563 if (!list_empty(&ep->rdllist)) 498 if (!list_empty(&ep->rdllist))
564 pollflags = POLLIN | POLLRDNORM; 499 pollflags = POLLIN | POLLRDNORM;
565 read_unlock_irqrestore(&ep->lock, flags); 500 spin_unlock_irqrestore(&ep->lock, flags);
566 501
567 return pollflags; 502 return pollflags;
568} 503}
@@ -594,9 +529,11 @@ void eventpoll_release_file(struct file *file)
594 * We don't want to get "file->f_ep_lock" because it is not 529 * We don't want to get "file->f_ep_lock" because it is not
595 * necessary. It is not necessary because we're in the "struct file" 530 * necessary. It is not necessary because we're in the "struct file"
596 * cleanup path, and this means that noone is using this file anymore. 531 * cleanup path, and this means that noone is using this file anymore.
597 * The only hit might come from ep_free() but by holding the semaphore 532 * So, for example, epoll_ctl() cannot hit here sicne if we reach this
533 * point, the file counter already went to zero and fget() would fail.
534 * The only hit might come from ep_free() but by holding the mutex
598 * will correctly serialize the operation. We do need to acquire 535 * will correctly serialize the operation. We do need to acquire
599 * "ep->sem" after "epmutex" because ep_remove() requires it when called 536 * "ep->mtx" after "epmutex" because ep_remove() requires it when called
600 * from anywhere but ep_free(). 537 * from anywhere but ep_free().
601 */ 538 */
602 mutex_lock(&epmutex); 539 mutex_lock(&epmutex);
@@ -606,9 +543,9 @@ void eventpoll_release_file(struct file *file)
606 543
607 ep = epi->ep; 544 ep = epi->ep;
608 list_del_init(&epi->fllink); 545 list_del_init(&epi->fllink);
609 down_write(&ep->sem); 546 mutex_lock(&ep->mtx);
610 ep_remove(ep, epi); 547 ep_remove(ep, epi);
611 up_write(&ep->sem); 548 mutex_unlock(&ep->mtx);
612 } 549 }
613 550
614 mutex_unlock(&epmutex); 551 mutex_unlock(&epmutex);
@@ -621,12 +558,13 @@ static int ep_alloc(struct eventpoll **pep)
621 if (!ep) 558 if (!ep)
622 return -ENOMEM; 559 return -ENOMEM;
623 560
624 rwlock_init(&ep->lock); 561 spin_lock_init(&ep->lock);
625 init_rwsem(&ep->sem); 562 mutex_init(&ep->mtx);
626 init_waitqueue_head(&ep->wq); 563 init_waitqueue_head(&ep->wq);
627 init_waitqueue_head(&ep->poll_wait); 564 init_waitqueue_head(&ep->poll_wait);
628 INIT_LIST_HEAD(&ep->rdllist); 565 INIT_LIST_HEAD(&ep->rdllist);
629 ep->rbr = RB_ROOT; 566 ep->rbr = RB_ROOT;
567 ep->ovflist = EP_UNACTIVE_PTR;
630 568
631 *pep = ep; 569 *pep = ep;
632 570
@@ -636,20 +574,18 @@ static int ep_alloc(struct eventpoll **pep)
636} 574}
637 575
638/* 576/*
639 * Search the file inside the eventpoll tree. It add usage count to 577 * Search the file inside the eventpoll tree. The RB tree operations
640 * the returned item, so the caller must call ep_release_epitem() 578 * are protected by the "mtx" mutex, and ep_find() must be called with
641 * after finished using the "struct epitem". 579 * "mtx" held.
642 */ 580 */
643static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) 581static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
644{ 582{
645 int kcmp; 583 int kcmp;
646 unsigned long flags;
647 struct rb_node *rbp; 584 struct rb_node *rbp;
648 struct epitem *epi, *epir = NULL; 585 struct epitem *epi, *epir = NULL;
649 struct epoll_filefd ffd; 586 struct epoll_filefd ffd;
650 587
651 ep_set_ffd(&ffd, file, fd); 588 ep_set_ffd(&ffd, file, fd);
652 read_lock_irqsave(&ep->lock, flags);
653 for (rbp = ep->rbr.rb_node; rbp; ) { 589 for (rbp = ep->rbr.rb_node; rbp; ) {
654 epi = rb_entry(rbp, struct epitem, rbn); 590 epi = rb_entry(rbp, struct epitem, rbn);
655 kcmp = ep_cmp_ffd(&ffd, &epi->ffd); 591 kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
@@ -658,12 +594,10 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
658 else if (kcmp < 0) 594 else if (kcmp < 0)
659 rbp = rbp->rb_left; 595 rbp = rbp->rb_left;
660 else { 596 else {
661 ep_use_epitem(epi);
662 epir = epi; 597 epir = epi;
663 break; 598 break;
664 } 599 }
665 } 600 }
666 read_unlock_irqrestore(&ep->lock, flags);
667 601
668 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n", 602 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n",
669 current, file, epir)); 603 current, file, epir));
@@ -686,7 +620,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
686 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", 620 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
687 current, epi->ffd.file, epi, ep)); 621 current, epi->ffd.file, epi, ep));
688 622
689 write_lock_irqsave(&ep->lock, flags); 623 spin_lock_irqsave(&ep->lock, flags);
690 624
691 /* 625 /*
692 * If the event mask does not contain any poll(2) event, we consider the 626 * If the event mask does not contain any poll(2) event, we consider the
@@ -695,7 +629,21 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
695 * until the next EPOLL_CTL_MOD will be issued. 629 * until the next EPOLL_CTL_MOD will be issued.
696 */ 630 */
697 if (!(epi->event.events & ~EP_PRIVATE_BITS)) 631 if (!(epi->event.events & ~EP_PRIVATE_BITS))
698 goto is_disabled; 632 goto out_unlock;
633
634 /*
635 * If we are trasfering events to userspace, we can hold no locks
636 * (because we're accessing user memory, and because of linux f_op->poll()
637 * semantics). All the events that happens during that period of time are
638 * chained in ep->ovflist and requeued later on.
639 */
640 if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
641 if (epi->next == EP_UNACTIVE_PTR) {
642 epi->next = ep->ovflist;
643 ep->ovflist = epi;
644 }
645 goto out_unlock;
646 }
699 647
700 /* If this file is already in the ready list we exit soon */ 648 /* If this file is already in the ready list we exit soon */
701 if (ep_is_linked(&epi->rdllink)) 649 if (ep_is_linked(&epi->rdllink))
@@ -714,8 +662,8 @@ is_linked:
714 if (waitqueue_active(&ep->poll_wait)) 662 if (waitqueue_active(&ep->poll_wait))
715 pwake++; 663 pwake++;
716 664
717is_disabled: 665out_unlock:
718 write_unlock_irqrestore(&ep->lock, flags); 666 spin_unlock_irqrestore(&ep->lock, flags);
719 667
720 /* We have to call this outside the lock */ 668 /* We have to call this outside the lock */
721 if (pwake) 669 if (pwake)
@@ -766,6 +714,9 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
766 rb_insert_color(&epi->rbn, &ep->rbr); 714 rb_insert_color(&epi->rbn, &ep->rbr);
767} 715}
768 716
717/*
718 * Must be called with "mtx" held.
719 */
769static int ep_insert(struct eventpoll *ep, struct epoll_event *event, 720static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
770 struct file *tfile, int fd) 721 struct file *tfile, int fd)
771{ 722{
@@ -786,8 +737,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
786 epi->ep = ep; 737 epi->ep = ep;
787 ep_set_ffd(&epi->ffd, tfile, fd); 738 ep_set_ffd(&epi->ffd, tfile, fd);
788 epi->event = *event; 739 epi->event = *event;
789 atomic_set(&epi->usecnt, 1);
790 epi->nwait = 0; 740 epi->nwait = 0;
741 epi->next = EP_UNACTIVE_PTR;
791 742
792 /* Initialize the poll table using the queue callback */ 743 /* Initialize the poll table using the queue callback */
793 epq.epi = epi; 744 epq.epi = epi;
@@ -796,7 +747,9 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
796 /* 747 /*
797 * Attach the item to the poll hooks and get current event bits. 748 * Attach the item to the poll hooks and get current event bits.
798 * We can safely use the file* here because its usage count has 749 * We can safely use the file* here because its usage count has
799 * been increased by the caller of this function. 750 * been increased by the caller of this function. Note that after
751 * this operation completes, the poll callback can start hitting
752 * the new item.
800 */ 753 */
801 revents = tfile->f_op->poll(tfile, &epq.pt); 754 revents = tfile->f_op->poll(tfile, &epq.pt);
802 755
@@ -813,12 +766,15 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
813 list_add_tail(&epi->fllink, &tfile->f_ep_links); 766 list_add_tail(&epi->fllink, &tfile->f_ep_links);
814 spin_unlock(&tfile->f_ep_lock); 767 spin_unlock(&tfile->f_ep_lock);
815 768
816 /* We have to drop the new item inside our item list to keep track of it */ 769 /*
817 write_lock_irqsave(&ep->lock, flags); 770 * Add the current item to the RB tree. All RB tree operations are
818 771 * protected by "mtx", and ep_insert() is called with "mtx" held.
819 /* Add the current item to the rb-tree */ 772 */
820 ep_rbtree_insert(ep, epi); 773 ep_rbtree_insert(ep, epi);
821 774
775 /* We have to drop the new item inside our item list to keep track of it */
776 spin_lock_irqsave(&ep->lock, flags);
777
822 /* If the file is already "ready" we drop it inside the ready list */ 778 /* If the file is already "ready" we drop it inside the ready list */
823 if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { 779 if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
824 list_add_tail(&epi->rdllink, &ep->rdllist); 780 list_add_tail(&epi->rdllink, &ep->rdllist);
@@ -830,7 +786,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
830 pwake++; 786 pwake++;
831 } 787 }
832 788
833 write_unlock_irqrestore(&ep->lock, flags); 789 spin_unlock_irqrestore(&ep->lock, flags);
834 790
835 /* We have to call this outside the lock */ 791 /* We have to call this outside the lock */
836 if (pwake) 792 if (pwake)
@@ -846,12 +802,14 @@ error_unregister:
846 802
847 /* 803 /*
848 * We need to do this because an event could have been arrived on some 804 * We need to do this because an event could have been arrived on some
849 * allocated wait queue. 805 * allocated wait queue. Note that we don't care about the ep->ovflist
806 * list, since that is used/cleaned only inside a section bound by "mtx".
807 * And ep_insert() is called with "mtx" held.
850 */ 808 */
851 write_lock_irqsave(&ep->lock, flags); 809 spin_lock_irqsave(&ep->lock, flags);
852 if (ep_is_linked(&epi->rdllink)) 810 if (ep_is_linked(&epi->rdllink))
853 list_del_init(&epi->rdllink); 811 list_del_init(&epi->rdllink);
854 write_unlock_irqrestore(&ep->lock, flags); 812 spin_unlock_irqrestore(&ep->lock, flags);
855 813
856 kmem_cache_free(epi_cache, epi); 814 kmem_cache_free(epi_cache, epi);
857error_return: 815error_return:
@@ -860,7 +818,7 @@ error_return:
860 818
861/* 819/*
862 * Modify the interest event mask by dropping an event if the new mask 820 * Modify the interest event mask by dropping an event if the new mask
863 * has a match in the current file status. 821 * has a match in the current file status. Must be called with "mtx" held.
864 */ 822 */
865static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event) 823static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event)
866{ 824{
@@ -882,36 +840,28 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
882 */ 840 */
883 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); 841 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
884 842
885 write_lock_irqsave(&ep->lock, flags); 843 spin_lock_irqsave(&ep->lock, flags);
886 844
887 /* Copy the data member from inside the lock */ 845 /* Copy the data member from inside the lock */
888 epi->event.data = event->data; 846 epi->event.data = event->data;
889 847
890 /* 848 /*
891 * If the item is not linked to the RB tree it means that it's on its 849 * If the item is "hot" and it is not registered inside the ready
892 * way toward the removal. Do nothing in this case. 850 * list, push it inside.
893 */ 851 */
894 if (ep_rb_linked(&epi->rbn)) { 852 if (revents & event->events) {
895 /* 853 if (!ep_is_linked(&epi->rdllink)) {
896 * If the item is "hot" and it is not registered inside the ready 854 list_add_tail(&epi->rdllink, &ep->rdllist);
897 * list, push it inside. If the item is not "hot" and it is currently 855
898 * registered inside the ready list, unlink it. 856 /* Notify waiting tasks that events are available */
899 */ 857 if (waitqueue_active(&ep->wq))
900 if (revents & event->events) { 858 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
901 if (!ep_is_linked(&epi->rdllink)) { 859 TASK_INTERRUPTIBLE);
902 list_add_tail(&epi->rdllink, &ep->rdllist); 860 if (waitqueue_active(&ep->poll_wait))
903 861 pwake++;
904 /* Notify waiting tasks that events are available */
905 if (waitqueue_active(&ep->wq))
906 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
907 TASK_INTERRUPTIBLE);
908 if (waitqueue_active(&ep->poll_wait))
909 pwake++;
910 }
911 } 862 }
912 } 863 }
913 864 spin_unlock_irqrestore(&ep->lock, flags);
914 write_unlock_irqrestore(&ep->lock, flags);
915 865
916 /* We have to call this outside the lock */ 866 /* We have to call this outside the lock */
917 if (pwake) 867 if (pwake)
@@ -920,36 +870,50 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
920 return 0; 870 return 0;
921} 871}
922 872
923/* 873static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events,
924 * This function is called without holding the "ep->lock" since the call to 874 int maxevents)
925 * __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ
926 * because of the way poll() is traditionally implemented in Linux.
927 */
928static int ep_send_events(struct eventpoll *ep, struct list_head *txlist,
929 struct epoll_event __user *events, int maxevents)
930{ 875{
931 int eventcnt, error = -EFAULT, pwake = 0; 876 int eventcnt, error = -EFAULT, pwake = 0;
932 unsigned int revents; 877 unsigned int revents;
933 unsigned long flags; 878 unsigned long flags;
934 struct epitem *epi; 879 struct epitem *epi, *nepi;
935 struct list_head injlist; 880 struct list_head txlist;
881
882 INIT_LIST_HEAD(&txlist);
883
884 /*
885 * We need to lock this because we could be hit by
886 * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL).
887 */
888 mutex_lock(&ep->mtx);
936 889
937 INIT_LIST_HEAD(&injlist); 890 /*
891 * Steal the ready list, and re-init the original one to the
892 * empty list. Also, set ep->ovflist to NULL so that events
893 * happening while looping w/out locks, are not lost. We cannot
894 * have the poll callback to queue directly on ep->rdllist,
895 * because we are doing it in the loop below, in a lockless way.
896 */
897 spin_lock_irqsave(&ep->lock, flags);
898 list_splice(&ep->rdllist, &txlist);
899 INIT_LIST_HEAD(&ep->rdllist);
900 ep->ovflist = NULL;
901 spin_unlock_irqrestore(&ep->lock, flags);
938 902
939 /* 903 /*
940 * We can loop without lock because this is a task private list. 904 * We can loop without lock because this is a task private list.
941 * We just splice'd out the ep->rdllist in ep_collect_ready_items(). 905 * We just splice'd out the ep->rdllist in ep_collect_ready_items().
942 * Items cannot vanish during the loop because we are holding "sem" in 906 * Items cannot vanish during the loop because we are holding "mtx".
943 * read.
944 */ 907 */
945 for (eventcnt = 0; !list_empty(txlist) && eventcnt < maxevents;) { 908 for (eventcnt = 0; !list_empty(&txlist) && eventcnt < maxevents;) {
946 epi = list_first_entry(txlist, struct epitem, rdllink); 909 epi = list_first_entry(&txlist, struct epitem, rdllink);
947 prefetch(epi->rdllink.next); 910
911 list_del_init(&epi->rdllink);
948 912
949 /* 913 /*
950 * Get the ready file event set. We can safely use the file 914 * Get the ready file event set. We can safely use the file
951 * because we are holding the "sem" in read and this will 915 * because we are holding the "mtx" and this will guarantee
952 * guarantee that both the file and the item will not vanish. 916 * that both the file and the item will not vanish.
953 */ 917 */
954 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); 918 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
955 revents &= epi->event.events; 919 revents &= epi->event.events;
@@ -957,8 +921,8 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist,
957 /* 921 /*
958 * Is the event mask intersect the caller-requested one, 922 * Is the event mask intersect the caller-requested one,
959 * deliver the event to userspace. Again, we are holding 923 * deliver the event to userspace. Again, we are holding
960 * "sem" in read, so no operations coming from userspace 924 * "mtx", so no operations coming from userspace can change
961 * can change the item. 925 * the item.
962 */ 926 */
963 if (revents) { 927 if (revents) {
964 if (__put_user(revents, 928 if (__put_user(revents,
@@ -970,59 +934,59 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist,
970 epi->event.events &= EP_PRIVATE_BITS; 934 epi->event.events &= EP_PRIVATE_BITS;
971 eventcnt++; 935 eventcnt++;
972 } 936 }
973
974 /* 937 /*
975 * This is tricky. We are holding the "sem" in read, and this 938 * At this point, noone can insert into ep->rdllist besides
976 * means that the operations that can change the "linked" status 939 * us. The epoll_ctl() callers are locked out by us holding
977 * of the epoll item (epi->rbn and epi->rdllink), cannot touch 940 * "mtx" and the poll callback will queue them in ep->ovflist.
978 * them. Also, since we are "linked" from a epi->rdllink POV
979 * (the item is linked to our transmission list we just
980 * spliced), the ep_poll_callback() cannot touch us either,
981 * because of the check present in there. Another parallel
982 * epoll_wait() will not get the same result set, since we
983 * spliced the ready list before. Note that list_del() still
984 * shows the item as linked to the test in ep_poll_callback().
985 */ 941 */
986 list_del(&epi->rdllink);
987 if (!(epi->event.events & EPOLLET) && 942 if (!(epi->event.events & EPOLLET) &&
988 (revents & epi->event.events)) 943 (revents & epi->event.events))
989 list_add_tail(&epi->rdllink, &injlist); 944 list_add_tail(&epi->rdllink, &ep->rdllist);
990 else {
991 /*
992 * Be sure the item is totally detached before re-init
993 * the list_head. After INIT_LIST_HEAD() is committed,
994 * the ep_poll_callback() can requeue the item again,
995 * but we don't care since we are already past it.
996 */
997 smp_mb();
998 INIT_LIST_HEAD(&epi->rdllink);
999 }
1000 } 945 }
1001 error = 0; 946 error = 0;
1002 947
1003 errxit: 948errxit:
1004 949
950 spin_lock_irqsave(&ep->lock, flags);
1005 /* 951 /*
1006 * If the re-injection list or the txlist are not empty, re-splice 952 * During the time we spent in the loop above, some other events
1007 * them to the ready list and do proper wakeups. 953 * might have been queued by the poll callback. We re-insert them
954 * here (in case they are not already queued, or they're one-shot).
1008 */ 955 */
1009 if (!list_empty(&injlist) || !list_empty(txlist)) { 956 for (nepi = ep->ovflist; (epi = nepi) != NULL;
1010 write_lock_irqsave(&ep->lock, flags); 957 nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
958 if (!ep_is_linked(&epi->rdllink) &&
959 (epi->event.events & ~EP_PRIVATE_BITS))
960 list_add_tail(&epi->rdllink, &ep->rdllist);
961 }
962 /*
963 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
964 * releasing the lock, events will be queued in the normal way inside
965 * ep->rdllist.
966 */
967 ep->ovflist = EP_UNACTIVE_PTR;
968
969 /*
970 * In case of error in the event-send loop, or in case the number of
971 * ready events exceeds the userspace limit, we need to splice the
972 * "txlist" back inside ep->rdllist.
973 */
974 list_splice(&txlist, &ep->rdllist);
1011 975
1012 list_splice(txlist, &ep->rdllist); 976 if (!list_empty(&ep->rdllist)) {
1013 list_splice(&injlist, &ep->rdllist);
1014 /* 977 /*
1015 * Wake up ( if active ) both the eventpoll wait list and the ->poll() 978 * Wake up (if active) both the eventpoll wait list and the ->poll()
1016 * wait list. 979 * wait list (delayed after we release the lock).
1017 */ 980 */
1018 if (waitqueue_active(&ep->wq)) 981 if (waitqueue_active(&ep->wq))
1019 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | 982 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
1020 TASK_INTERRUPTIBLE); 983 TASK_INTERRUPTIBLE);
1021 if (waitqueue_active(&ep->poll_wait)) 984 if (waitqueue_active(&ep->poll_wait))
1022 pwake++; 985 pwake++;
1023
1024 write_unlock_irqrestore(&ep->lock, flags);
1025 } 986 }
987 spin_unlock_irqrestore(&ep->lock, flags);
988
989 mutex_unlock(&ep->mtx);
1026 990
1027 /* We have to call this outside the lock */ 991 /* We have to call this outside the lock */
1028 if (pwake) 992 if (pwake)
@@ -1031,41 +995,6 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist,
1031 return eventcnt == 0 ? error: eventcnt; 995 return eventcnt == 0 ? error: eventcnt;
1032} 996}
1033 997
1034/*
1035 * Perform the transfer of events to user space.
1036 */
1037static int ep_events_transfer(struct eventpoll *ep,
1038 struct epoll_event __user *events, int maxevents)
1039{
1040 int eventcnt;
1041 unsigned long flags;
1042 struct list_head txlist;
1043
1044 INIT_LIST_HEAD(&txlist);
1045
1046 /*
1047 * We need to lock this because we could be hit by
1048 * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL).
1049 */
1050 down_read(&ep->sem);
1051
1052 /*
1053 * Steal the ready list, and re-init the original one to the
1054 * empty list.
1055 */
1056 write_lock_irqsave(&ep->lock, flags);
1057 list_splice(&ep->rdllist, &txlist);
1058 INIT_LIST_HEAD(&ep->rdllist);
1059 write_unlock_irqrestore(&ep->lock, flags);
1060
1061 /* Build result set in userspace */
1062 eventcnt = ep_send_events(ep, &txlist, events, maxevents);
1063
1064 up_read(&ep->sem);
1065
1066 return eventcnt;
1067}
1068
1069static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, 998static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1070 int maxevents, long timeout) 999 int maxevents, long timeout)
1071{ 1000{
@@ -1083,7 +1012,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1083 MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; 1012 MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
1084 1013
1085retry: 1014retry:
1086 write_lock_irqsave(&ep->lock, flags); 1015 spin_lock_irqsave(&ep->lock, flags);
1087 1016
1088 res = 0; 1017 res = 0;
1089 if (list_empty(&ep->rdllist)) { 1018 if (list_empty(&ep->rdllist)) {
@@ -1093,6 +1022,7 @@ retry:
1093 * ep_poll_callback() when events will become available. 1022 * ep_poll_callback() when events will become available.
1094 */ 1023 */
1095 init_waitqueue_entry(&wait, current); 1024 init_waitqueue_entry(&wait, current);
1025 wait.flags |= WQ_FLAG_EXCLUSIVE;
1096 __add_wait_queue(&ep->wq, &wait); 1026 __add_wait_queue(&ep->wq, &wait);
1097 1027
1098 for (;;) { 1028 for (;;) {
@@ -1109,9 +1039,9 @@ retry:
1109 break; 1039 break;
1110 } 1040 }
1111 1041
1112 write_unlock_irqrestore(&ep->lock, flags); 1042 spin_unlock_irqrestore(&ep->lock, flags);
1113 jtimeout = schedule_timeout(jtimeout); 1043 jtimeout = schedule_timeout(jtimeout);
1114 write_lock_irqsave(&ep->lock, flags); 1044 spin_lock_irqsave(&ep->lock, flags);
1115 } 1045 }
1116 __remove_wait_queue(&ep->wq, &wait); 1046 __remove_wait_queue(&ep->wq, &wait);
1117 1047
@@ -1121,7 +1051,7 @@ retry:
1121 /* Is it worth to try to dig for events ? */ 1051 /* Is it worth to try to dig for events ? */
1122 eavail = !list_empty(&ep->rdllist); 1052 eavail = !list_empty(&ep->rdllist);
1123 1053
1124 write_unlock_irqrestore(&ep->lock, flags); 1054 spin_unlock_irqrestore(&ep->lock, flags);
1125 1055
1126 /* 1056 /*
1127 * Try to transfer events to user space. In case we get 0 events and 1057 * Try to transfer events to user space. In case we get 0 events and
@@ -1129,18 +1059,17 @@ retry:
1129 * more luck. 1059 * more luck.
1130 */ 1060 */
1131 if (!res && eavail && 1061 if (!res && eavail &&
1132 !(res = ep_events_transfer(ep, events, maxevents)) && jtimeout) 1062 !(res = ep_send_events(ep, events, maxevents)) && jtimeout)
1133 goto retry; 1063 goto retry;
1134 1064
1135 return res; 1065 return res;
1136} 1066}
1137 1067
1138/* 1068/*
1139 * It opens an eventpoll file descriptor by suggesting a storage of "size" 1069 * It opens an eventpoll file descriptor. The "size" parameter is there
1140 * file descriptors. The size parameter is just an hint about how to size 1070 * for historical reasons, when epoll was using an hash instead of an
1141 * data structures. It won't prevent the user to store more than "size" 1071 * RB tree. With the current implementation, the "size" parameter is ignored
1142 * file descriptors inside the epoll interface. It is the kernel part of 1072 * (besides sanity checks).
1143 * the userspace epoll_create(2).
1144 */ 1073 */
1145asmlinkage long sys_epoll_create(int size) 1074asmlinkage long sys_epoll_create(int size)
1146{ 1075{
@@ -1176,7 +1105,6 @@ asmlinkage long sys_epoll_create(int size)
1176 1105
1177error_free: 1106error_free:
1178 ep_free(ep); 1107 ep_free(ep);
1179 kfree(ep);
1180error_return: 1108error_return:
1181 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1109 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1182 current, size, error)); 1110 current, size, error));
@@ -1186,8 +1114,7 @@ error_return:
1186/* 1114/*
1187 * The following function implements the controller interface for 1115 * The following function implements the controller interface for
1188 * the eventpoll file that enables the insertion/removal/change of 1116 * the eventpoll file that enables the insertion/removal/change of
1189 * file descriptors inside the interest set. It represents 1117 * file descriptors inside the interest set.
1190 * the kernel part of the user space epoll_ctl(2).
1191 */ 1118 */
1192asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, 1119asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
1193 struct epoll_event __user *event) 1120 struct epoll_event __user *event)
@@ -1237,9 +1164,13 @@ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
1237 */ 1164 */
1238 ep = file->private_data; 1165 ep = file->private_data;
1239 1166
1240 down_write(&ep->sem); 1167 mutex_lock(&ep->mtx);
1241 1168
1242 /* Try to lookup the file inside our RB tree */ 1169 /*
1170 * Try to lookup the file inside our RB tree, Since we grabbed "mtx"
1171 * above, we can be sure to be able to use the item looked up by
1172 * ep_find() till we release the mutex.
1173 */
1243 epi = ep_find(ep, tfile, fd); 1174 epi = ep_find(ep, tfile, fd);
1244 1175
1245 error = -EINVAL; 1176 error = -EINVAL;
@@ -1266,13 +1197,7 @@ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
1266 error = -ENOENT; 1197 error = -ENOENT;
1267 break; 1198 break;
1268 } 1199 }
1269 /* 1200 mutex_unlock(&ep->mtx);
1270 * The function ep_find() increments the usage count of the structure
1271 * so, if this is not NULL, we need to release it.
1272 */
1273 if (epi)
1274 ep_release_epitem(epi);
1275 up_write(&ep->sem);
1276 1201
1277error_tgt_fput: 1202error_tgt_fput:
1278 fput(tfile); 1203 fput(tfile);
@@ -1378,7 +1303,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
1378 if (sigmask) { 1303 if (sigmask) {
1379 if (error == -EINTR) { 1304 if (error == -EINTR) {
1380 memcpy(&current->saved_sigmask, &sigsaved, 1305 memcpy(&current->saved_sigmask, &sigsaved,
1381 sizeof(sigsaved)); 1306 sizeof(sigsaved));
1382 set_thread_flag(TIF_RESTORE_SIGMASK); 1307 set_thread_flag(TIF_RESTORE_SIGMASK);
1383 } else 1308 } else
1384 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1309 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h
index 0bd7bd2ccb90..6a5be1f7debf 100644
--- a/include/asm-alpha/mmu_context.h
+++ b/include/asm-alpha/mmu_context.h
@@ -85,8 +85,8 @@ __reload_thread(struct pcb_struct *pcb)
85 * +-------------+----------------+--------------+ 85 * +-------------+----------------+--------------+
86 */ 86 */
87 87
88#ifdef CONFIG_SMP
89#include <asm/smp.h> 88#include <asm/smp.h>
89#ifdef CONFIG_SMP
90#define cpu_last_asn(cpuid) (cpu_data[cpuid].last_asn) 90#define cpu_last_asn(cpuid) (cpu_data[cpuid].last_asn)
91#else 91#else
92extern unsigned long last_asn; 92extern unsigned long last_asn;
diff --git a/include/asm-h8300/atomic.h b/include/asm-h8300/atomic.h
index 21f54428c86b..b4cf0ea97ede 100644
--- a/include/asm-h8300/atomic.h
+++ b/include/asm-h8300/atomic.h
@@ -37,6 +37,7 @@ static __inline__ int atomic_sub_return(int i, atomic_t *v)
37} 37}
38 38
39#define atomic_sub(i, v) atomic_sub_return(i, v) 39#define atomic_sub(i, v) atomic_sub_return(i, v)
40#define atomic_sub_and_test(i,v) (atomic_sub_return(i, v) == 0)
40 41
41static __inline__ int atomic_inc_return(atomic_t *v) 42static __inline__ int atomic_inc_return(atomic_t *v)
42{ 43{
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index 70f3515c3db0..338668bfb0a2 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -749,9 +749,13 @@ extern unsigned long boot_option_idle_override;
749extern void enable_sep_cpu(void); 749extern void enable_sep_cpu(void);
750extern int sysenter_setup(void); 750extern int sysenter_setup(void);
751 751
752/* Defined in head.S */
753extern struct Xgt_desc_struct early_gdt_descr;
754
752extern void cpu_set_gdt(int); 755extern void cpu_set_gdt(int);
753extern void switch_to_new_gdt(void); 756extern void switch_to_new_gdt(void);
754extern void cpu_init(void); 757extern void cpu_init(void);
758extern void init_gdt(int cpu);
755 759
756extern int force_mwait; 760extern int force_mwait;
757 761
diff --git a/include/asm-m68k/uaccess.h b/include/asm-m68k/uaccess.h
index 6a4cf2081512..5c1264cf0c65 100644
--- a/include/asm-m68k/uaccess.h
+++ b/include/asm-m68k/uaccess.h
@@ -361,7 +361,9 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n)
361 361
362long strncpy_from_user(char *dst, const char __user *src, long count); 362long strncpy_from_user(char *dst, const char __user *src, long count);
363long strnlen_user(const char __user *src, long n); 363long strnlen_user(const char __user *src, long n);
364unsigned long clear_user(void __user *to, unsigned long n); 364unsigned long __clear_user(void __user *to, unsigned long n);
365
366#define clear_user __clear_user
365 367
366#define strlen_user(str) strnlen_user(str, 32767) 368#define strlen_user(str) strnlen_user(str, 32767)
367 369
diff --git a/include/linux/init.h b/include/linux/init.h
index 8bc32bb2fce2..e007ae4dc41e 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -52,14 +52,9 @@
52#endif 52#endif
53 53
54/* For assembly routines */ 54/* For assembly routines */
55#ifdef CONFIG_HOTPLUG_CPU
56#define __INIT .section ".text","ax"
57#define __INITDATA .section ".data","aw"
58#else
59#define __INIT .section ".init.text","ax" 55#define __INIT .section ".init.text","ax"
60#define __INITDATA .section ".init.data","aw"
61#endif
62#define __FINIT .previous 56#define __FINIT .previous
57#define __INITDATA .section ".init.data","aw"
63 58
64#ifndef __ASSEMBLY__ 59#ifndef __ASSEMBLY__
65/* 60/*
diff --git a/include/linux/io.h b/include/linux/io.h
index 09d351236379..8423dd376514 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -27,8 +27,16 @@ struct device;
27void __iowrite32_copy(void __iomem *to, const void *from, size_t count); 27void __iowrite32_copy(void __iomem *to, const void *from, size_t count);
28void __iowrite64_copy(void __iomem *to, const void *from, size_t count); 28void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
29 29
30#ifdef CONFIG_MMU
30int ioremap_page_range(unsigned long addr, unsigned long end, 31int ioremap_page_range(unsigned long addr, unsigned long end,
31 unsigned long phys_addr, pgprot_t prot); 32 unsigned long phys_addr, pgprot_t prot);
33#else
34static inline int ioremap_page_range(unsigned long addr, unsigned long end,
35 unsigned long phys_addr, pgprot_t prot)
36{
37 return 0;
38}
39#endif
32 40
33/* 41/*
34 * Managed iomap interface 42 * Managed iomap interface
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ccd85e4d3b8f..3b1fbf49fa7d 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1288,6 +1288,7 @@
1288#define PCI_DEVICE_ID_VIA_8363_0 0x0305 1288#define PCI_DEVICE_ID_VIA_8363_0 0x0305
1289#define PCI_DEVICE_ID_VIA_P4M800CE 0x0314 1289#define PCI_DEVICE_ID_VIA_P4M800CE 0x0314
1290#define PCI_DEVICE_ID_VIA_P4M890 0x0327 1290#define PCI_DEVICE_ID_VIA_P4M890 0x0327
1291#define PCI_DEVICE_ID_VIA_VT3324 0x0324
1291#define PCI_DEVICE_ID_VIA_VT3336 0x0336 1292#define PCI_DEVICE_ID_VIA_VT3336 0x0336
1292#define PCI_DEVICE_ID_VIA_8371_0 0x0391 1293#define PCI_DEVICE_ID_VIA_8371_0 0x0391
1293#define PCI_DEVICE_ID_VIA_8501_0 0x0501 1294#define PCI_DEVICE_ID_VIA_8501_0 0x0501
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index ea27065e80e6..fd6627e2d115 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -60,7 +60,8 @@ struct kmem_cache {
60#define KMALLOC_SHIFT_LOW 3 60#define KMALLOC_SHIFT_LOW 3
61 61
62#ifdef CONFIG_LARGE_ALLOCS 62#ifdef CONFIG_LARGE_ALLOCS
63#define KMALLOC_SHIFT_HIGH 25 63#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT) =< 25 ? \
64 (MAX_ORDER + PAGE_SHIFT - 1) : 25)
64#else 65#else
65#if !defined(CONFIG_MMU) || NR_CPUS > 512 || MAX_NUMNODES > 256 66#if !defined(CONFIG_MMU) || NR_CPUS > 512 || MAX_NUMNODES > 256
66#define KMALLOC_SHIFT_HIGH 20 67#define KMALLOC_SHIFT_HIGH 20
@@ -87,6 +88,9 @@ static inline int kmalloc_index(int size)
87 */ 88 */
88 WARN_ON_ONCE(size == 0); 89 WARN_ON_ONCE(size == 0);
89 90
91 if (size >= (1 << KMALLOC_SHIFT_HIGH))
92 return -1;
93
90 if (size > 64 && size <= 96) 94 if (size > 64 && size <= 96)
91 return 1; 95 return 1;
92 if (size > 128 && size <= 192) 96 if (size > 128 && size <= 192)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 3db5c3c460d7..51b6a6a6158c 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -74,7 +74,7 @@ static struct clocksource *watchdog;
74static struct timer_list watchdog_timer; 74static struct timer_list watchdog_timer;
75static DEFINE_SPINLOCK(watchdog_lock); 75static DEFINE_SPINLOCK(watchdog_lock);
76static cycle_t watchdog_last; 76static cycle_t watchdog_last;
77static int watchdog_resumed; 77static unsigned long watchdog_resumed;
78 78
79/* 79/*
80 * Interval: 0.5sec Threshold: 0.0625s 80 * Interval: 0.5sec Threshold: 0.0625s
@@ -104,9 +104,7 @@ static void clocksource_watchdog(unsigned long data)
104 104
105 spin_lock(&watchdog_lock); 105 spin_lock(&watchdog_lock);
106 106
107 resumed = watchdog_resumed; 107 resumed = test_and_clear_bit(0, &watchdog_resumed);
108 if (unlikely(resumed))
109 watchdog_resumed = 0;
110 108
111 wdnow = watchdog->read(); 109 wdnow = watchdog->read();
112 wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); 110 wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask);
@@ -151,9 +149,7 @@ static void clocksource_watchdog(unsigned long data)
151} 149}
152static void clocksource_resume_watchdog(void) 150static void clocksource_resume_watchdog(void)
153{ 151{
154 spin_lock(&watchdog_lock); 152 set_bit(0, &watchdog_resumed);
155 watchdog_resumed = 1;
156 spin_unlock(&watchdog_lock);
157} 153}
158 154
159static void clocksource_check_watchdog(struct clocksource *cs) 155static void clocksource_check_watchdog(struct clocksource *cs)