diff options
39 files changed, 588 insertions, 630 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 21f3fff5432f..bbeb5b6b5b05 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -3267,6 +3267,7 @@ W: http://tpmdd.sourceforge.net | |||
| 3267 | P: Marcel Selhorst | 3267 | P: Marcel Selhorst |
| 3268 | M: tpm@selhorst.net | 3268 | M: tpm@selhorst.net |
| 3269 | W: http://www.prosec.rub.de/tpm/ | 3269 | W: http://www.prosec.rub.de/tpm/ |
| 3270 | L: tpmdd-devel@lists.sourceforge.net | ||
| 3270 | S: Maintained | 3271 | S: Maintained |
| 3271 | 3272 | ||
| 3272 | Telecom Clock Driver for MCPL0010 | 3273 | Telecom Clock Driver for MCPL0010 |
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 91cff8dc9e1a..06da59f6f837 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
| @@ -19,6 +19,7 @@ obj-$(CONFIG_X86_CPUID) += cpuid.o | |||
| 19 | obj-$(CONFIG_MICROCODE) += microcode.o | 19 | obj-$(CONFIG_MICROCODE) += microcode.o |
| 20 | obj-$(CONFIG_APM) += apm.o | 20 | obj-$(CONFIG_APM) += apm.o |
| 21 | obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o | 21 | obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o |
| 22 | obj-$(CONFIG_SMP) += smpcommon.o | ||
| 22 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 23 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
| 23 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o | 24 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o |
| 24 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o | 25 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o |
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c index 837b04166a47..ca3e1d341889 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c | |||
| @@ -341,15 +341,17 @@ static int powernow_acpi_init(void) | |||
| 341 | pc.val = (unsigned long) acpi_processor_perf->states[0].control; | 341 | pc.val = (unsigned long) acpi_processor_perf->states[0].control; |
| 342 | for (i = 0; i < number_scales; i++) { | 342 | for (i = 0; i < number_scales; i++) { |
| 343 | u8 fid, vid; | 343 | u8 fid, vid; |
| 344 | unsigned int speed; | 344 | struct acpi_processor_px *state = |
| 345 | &acpi_processor_perf->states[i]; | ||
| 346 | unsigned int speed, speed_mhz; | ||
| 345 | 347 | ||
| 346 | pc.val = (unsigned long) acpi_processor_perf->states[i].control; | 348 | pc.val = (unsigned long) state->control; |
| 347 | dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", | 349 | dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", |
| 348 | i, | 350 | i, |
| 349 | (u32) acpi_processor_perf->states[i].core_frequency, | 351 | (u32) state->core_frequency, |
| 350 | (u32) acpi_processor_perf->states[i].power, | 352 | (u32) state->power, |
| 351 | (u32) acpi_processor_perf->states[i].transition_latency, | 353 | (u32) state->transition_latency, |
| 352 | (u32) acpi_processor_perf->states[i].control, | 354 | (u32) state->control, |
| 353 | pc.bits.sgtc); | 355 | pc.bits.sgtc); |
| 354 | 356 | ||
| 355 | vid = pc.bits.vid; | 357 | vid = pc.bits.vid; |
| @@ -360,6 +362,18 @@ static int powernow_acpi_init(void) | |||
| 360 | powernow_table[i].index |= (vid << 8); /* upper 8 bits */ | 362 | powernow_table[i].index |= (vid << 8); /* upper 8 bits */ |
| 361 | 363 | ||
| 362 | speed = powernow_table[i].frequency; | 364 | speed = powernow_table[i].frequency; |
| 365 | speed_mhz = speed / 1000; | ||
| 366 | |||
| 367 | /* processor_perflib will multiply the MHz value by 1000 to | ||
| 368 | * get a KHz value (e.g. 1266000). However, powernow-k7 works | ||
| 369 | * with true KHz values (e.g. 1266768). To ensure that all | ||
| 370 | * powernow frequencies are available, we must ensure that | ||
| 371 | * ACPI doesn't restrict them, so we round up the MHz value | ||
| 372 | * to ensure that perflib's computed KHz value is greater than | ||
| 373 | * or equal to powernow's KHz value. | ||
| 374 | */ | ||
| 375 | if (speed % 1000 > 0) | ||
| 376 | speed_mhz++; | ||
| 363 | 377 | ||
| 364 | if ((fid_codes[fid] % 10)==5) { | 378 | if ((fid_codes[fid] % 10)==5) { |
| 365 | if (have_a0 == 1) | 379 | if (have_a0 == 1) |
| @@ -368,10 +382,16 @@ static int powernow_acpi_init(void) | |||
| 368 | 382 | ||
| 369 | dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " | 383 | dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " |
| 370 | "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, | 384 | "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, |
| 371 | fid_codes[fid] % 10, speed/1000, vid, | 385 | fid_codes[fid] % 10, speed_mhz, vid, |
| 372 | mobile_vid_table[vid]/1000, | 386 | mobile_vid_table[vid]/1000, |
| 373 | mobile_vid_table[vid]%1000); | 387 | mobile_vid_table[vid]%1000); |
| 374 | 388 | ||
| 389 | if (state->core_frequency != speed_mhz) { | ||
| 390 | state->core_frequency = speed_mhz; | ||
| 391 | dprintk(" Corrected ACPI frequency to %d\n", | ||
| 392 | speed_mhz); | ||
| 393 | } | ||
| 394 | |||
| 375 | if (latency < pc.bits.sgtc) | 395 | if (latency < pc.bits.sgtc) |
| 376 | latency = pc.bits.sgtc; | 396 | latency = pc.bits.sgtc; |
| 377 | 397 | ||
| @@ -602,7 +622,7 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) | |||
| 602 | result = powernow_acpi_init(); | 622 | result = powernow_acpi_init(); |
| 603 | if (result) { | 623 | if (result) { |
| 604 | printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); | 624 | printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); |
| 605 | printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.shtml\n"); | 625 | printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n"); |
| 606 | } | 626 | } |
| 607 | } else { | 627 | } else { |
| 608 | /* SGTC use the bus clock as timer */ | 628 | /* SGTC use the bus clock as timer */ |
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 7cf3d207b6b3..4ade55c5f333 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c | |||
| @@ -521,7 +521,7 @@ static int check_supported_cpu(unsigned int cpu) | |||
| 521 | 521 | ||
| 522 | if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { | 522 | if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { |
| 523 | if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || | 523 | if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || |
| 524 | ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) { | 524 | ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { |
| 525 | printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); | 525 | printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); |
| 526 | goto out; | 526 | goto out; |
| 527 | } | 527 | } |
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index 95be5013c984..b06c812208ca 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h | |||
| @@ -46,8 +46,8 @@ struct powernow_k8_data { | |||
| 46 | #define CPUID_XFAM 0x0ff00000 /* extended family */ | 46 | #define CPUID_XFAM 0x0ff00000 /* extended family */ |
| 47 | #define CPUID_XFAM_K8 0 | 47 | #define CPUID_XFAM_K8 0 |
| 48 | #define CPUID_XMOD 0x000f0000 /* extended model */ | 48 | #define CPUID_XMOD 0x000f0000 /* extended model */ |
| 49 | #define CPUID_XMOD_REV_G 0x00060000 | 49 | #define CPUID_XMOD_REV_MASK 0x00080000 |
| 50 | #define CPUID_XFAM_10H 0x00100000 /* family 0x10 */ | 50 | #define CPUID_XFAM_10H 0x00100000 /* family 0x10 */ |
| 51 | #define CPUID_USE_XFAM_XMOD 0x00000f00 | 51 | #define CPUID_USE_XFAM_XMOD 0x00000f00 |
| 52 | #define CPUID_GET_MAX_CAPABILITIES 0x80000000 | 52 | #define CPUID_GET_MAX_CAPABILITIES 0x80000000 |
| 53 | #define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 | 53 | #define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 |
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 706bda72dc60..c9a7c9835aba 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c | |||
| @@ -467,7 +467,7 @@ void flush_tlb_all(void) | |||
| 467 | * it goes straight through and wastes no time serializing | 467 | * it goes straight through and wastes no time serializing |
| 468 | * anything. Worst case is that we lose a reschedule ... | 468 | * anything. Worst case is that we lose a reschedule ... |
| 469 | */ | 469 | */ |
| 470 | void native_smp_send_reschedule(int cpu) | 470 | static void native_smp_send_reschedule(int cpu) |
| 471 | { | 471 | { |
| 472 | WARN_ON(cpu_is_offline(cpu)); | 472 | WARN_ON(cpu_is_offline(cpu)); |
| 473 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); | 473 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); |
| @@ -546,9 +546,10 @@ static void __smp_call_function(void (*func) (void *info), void *info, | |||
| 546 | * You must not call this function with disabled interrupts or from a | 546 | * You must not call this function with disabled interrupts or from a |
| 547 | * hardware interrupt handler or from a bottom half handler. | 547 | * hardware interrupt handler or from a bottom half handler. |
| 548 | */ | 548 | */ |
| 549 | int native_smp_call_function_mask(cpumask_t mask, | 549 | static int |
| 550 | void (*func)(void *), void *info, | 550 | native_smp_call_function_mask(cpumask_t mask, |
| 551 | int wait) | 551 | void (*func)(void *), void *info, |
| 552 | int wait) | ||
| 552 | { | 553 | { |
| 553 | struct call_data_struct data; | 554 | struct call_data_struct data; |
| 554 | cpumask_t allbutself; | 555 | cpumask_t allbutself; |
| @@ -599,60 +600,6 @@ int native_smp_call_function_mask(cpumask_t mask, | |||
| 599 | return 0; | 600 | return 0; |
| 600 | } | 601 | } |
| 601 | 602 | ||
| 602 | /** | ||
| 603 | * smp_call_function(): Run a function on all other CPUs. | ||
| 604 | * @func: The function to run. This must be fast and non-blocking. | ||
| 605 | * @info: An arbitrary pointer to pass to the function. | ||
| 606 | * @nonatomic: Unused. | ||
| 607 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | ||
| 608 | * | ||
| 609 | * Returns 0 on success, else a negative status code. | ||
| 610 | * | ||
| 611 | * If @wait is true, then returns once @func has returned; otherwise | ||
| 612 | * it returns just before the target cpu calls @func. | ||
| 613 | * | ||
| 614 | * You must not call this function with disabled interrupts or from a | ||
| 615 | * hardware interrupt handler or from a bottom half handler. | ||
| 616 | */ | ||
| 617 | int smp_call_function(void (*func) (void *info), void *info, int nonatomic, | ||
| 618 | int wait) | ||
| 619 | { | ||
| 620 | return smp_call_function_mask(cpu_online_map, func, info, wait); | ||
| 621 | } | ||
| 622 | EXPORT_SYMBOL(smp_call_function); | ||
| 623 | |||
| 624 | /** | ||
| 625 | * smp_call_function_single - Run a function on another CPU | ||
| 626 | * @cpu: The target CPU. Cannot be the calling CPU. | ||
| 627 | * @func: The function to run. This must be fast and non-blocking. | ||
| 628 | * @info: An arbitrary pointer to pass to the function. | ||
| 629 | * @nonatomic: Unused. | ||
| 630 | * @wait: If true, wait until function has completed on other CPUs. | ||
| 631 | * | ||
| 632 | * Returns 0 on success, else a negative status code. | ||
| 633 | * | ||
| 634 | * If @wait is true, then returns once @func has returned; otherwise | ||
| 635 | * it returns just before the target cpu calls @func. | ||
| 636 | */ | ||
| 637 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
| 638 | int nonatomic, int wait) | ||
| 639 | { | ||
| 640 | /* prevent preemption and reschedule on another processor */ | ||
| 641 | int ret; | ||
| 642 | int me = get_cpu(); | ||
| 643 | if (cpu == me) { | ||
| 644 | WARN_ON(1); | ||
| 645 | put_cpu(); | ||
| 646 | return -EBUSY; | ||
| 647 | } | ||
| 648 | |||
| 649 | ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); | ||
| 650 | |||
| 651 | put_cpu(); | ||
| 652 | return ret; | ||
| 653 | } | ||
| 654 | EXPORT_SYMBOL(smp_call_function_single); | ||
| 655 | |||
| 656 | static void stop_this_cpu (void * dummy) | 603 | static void stop_this_cpu (void * dummy) |
| 657 | { | 604 | { |
| 658 | local_irq_disable(); | 605 | local_irq_disable(); |
| @@ -670,7 +617,7 @@ static void stop_this_cpu (void * dummy) | |||
| 670 | * this function calls the 'stop' function on all other CPUs in the system. | 617 | * this function calls the 'stop' function on all other CPUs in the system. |
| 671 | */ | 618 | */ |
| 672 | 619 | ||
| 673 | void native_smp_send_stop(void) | 620 | static void native_smp_send_stop(void) |
| 674 | { | 621 | { |
| 675 | /* Don't deadlock on the call lock in panic */ | 622 | /* Don't deadlock on the call lock in panic */ |
| 676 | int nolock = !spin_trylock(&call_lock); | 623 | int nolock = !spin_trylock(&call_lock); |
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index b92cc4e8b3bb..08f07a74a9d3 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
| @@ -98,9 +98,6 @@ EXPORT_SYMBOL(x86_cpu_to_apicid); | |||
| 98 | 98 | ||
| 99 | u8 apicid_2_node[MAX_APICID]; | 99 | u8 apicid_2_node[MAX_APICID]; |
| 100 | 100 | ||
| 101 | DEFINE_PER_CPU(unsigned long, this_cpu_off); | ||
| 102 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); | ||
| 103 | |||
| 104 | /* | 101 | /* |
| 105 | * Trampoline 80x86 program as an array. | 102 | * Trampoline 80x86 program as an array. |
| 106 | */ | 103 | */ |
| @@ -763,25 +760,6 @@ static inline struct task_struct * alloc_idle_task(int cpu) | |||
| 763 | #define alloc_idle_task(cpu) fork_idle(cpu) | 760 | #define alloc_idle_task(cpu) fork_idle(cpu) |
| 764 | #endif | 761 | #endif |
| 765 | 762 | ||
| 766 | /* Initialize the CPU's GDT. This is either the boot CPU doing itself | ||
| 767 | (still using the master per-cpu area), or a CPU doing it for a | ||
| 768 | secondary which will soon come up. */ | ||
| 769 | static __cpuinit void init_gdt(int cpu) | ||
| 770 | { | ||
| 771 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | ||
| 772 | |||
| 773 | pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a, | ||
| 774 | (u32 *)&gdt[GDT_ENTRY_PERCPU].b, | ||
| 775 | __per_cpu_offset[cpu], 0xFFFFF, | ||
| 776 | 0x80 | DESCTYPE_S | 0x2, 0x8); | ||
| 777 | |||
| 778 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; | ||
| 779 | per_cpu(cpu_number, cpu) = cpu; | ||
| 780 | } | ||
| 781 | |||
| 782 | /* Defined in head.S */ | ||
| 783 | extern struct Xgt_desc_struct early_gdt_descr; | ||
| 784 | |||
| 785 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | 763 | static int __cpuinit do_boot_cpu(int apicid, int cpu) |
| 786 | /* | 764 | /* |
| 787 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 765 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
diff --git a/arch/i386/kernel/smpcommon.c b/arch/i386/kernel/smpcommon.c new file mode 100644 index 000000000000..1868ae18eb4d --- /dev/null +++ b/arch/i386/kernel/smpcommon.c | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | /* | ||
| 2 | * SMP stuff which is common to all sub-architectures. | ||
| 3 | */ | ||
| 4 | #include <linux/module.h> | ||
| 5 | #include <asm/smp.h> | ||
| 6 | |||
| 7 | DEFINE_PER_CPU(unsigned long, this_cpu_off); | ||
| 8 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); | ||
| 9 | |||
| 10 | /* Initialize the CPU's GDT. This is either the boot CPU doing itself | ||
| 11 | (still using the master per-cpu area), or a CPU doing it for a | ||
| 12 | secondary which will soon come up. */ | ||
| 13 | __cpuinit void init_gdt(int cpu) | ||
| 14 | { | ||
| 15 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | ||
| 16 | |||
| 17 | pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a, | ||
| 18 | (u32 *)&gdt[GDT_ENTRY_PERCPU].b, | ||
| 19 | __per_cpu_offset[cpu], 0xFFFFF, | ||
| 20 | 0x80 | DESCTYPE_S | 0x2, 0x8); | ||
| 21 | |||
| 22 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; | ||
| 23 | per_cpu(cpu_number, cpu) = cpu; | ||
| 24 | } | ||
| 25 | |||
| 26 | |||
| 27 | /** | ||
| 28 | * smp_call_function(): Run a function on all other CPUs. | ||
| 29 | * @func: The function to run. This must be fast and non-blocking. | ||
| 30 | * @info: An arbitrary pointer to pass to the function. | ||
| 31 | * @nonatomic: Unused. | ||
| 32 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | ||
| 33 | * | ||
| 34 | * Returns 0 on success, else a negative status code. | ||
| 35 | * | ||
| 36 | * If @wait is true, then returns once @func has returned; otherwise | ||
| 37 | * it returns just before the target cpu calls @func. | ||
| 38 | * | ||
| 39 | * You must not call this function with disabled interrupts or from a | ||
| 40 | * hardware interrupt handler or from a bottom half handler. | ||
| 41 | */ | ||
| 42 | int smp_call_function(void (*func) (void *info), void *info, int nonatomic, | ||
| 43 | int wait) | ||
| 44 | { | ||
| 45 | return smp_call_function_mask(cpu_online_map, func, info, wait); | ||
| 46 | } | ||
| 47 | EXPORT_SYMBOL(smp_call_function); | ||
| 48 | |||
| 49 | /** | ||
| 50 | * smp_call_function_single - Run a function on another CPU | ||
| 51 | * @cpu: The target CPU. Cannot be the calling CPU. | ||
| 52 | * @func: The function to run. This must be fast and non-blocking. | ||
| 53 | * @info: An arbitrary pointer to pass to the function. | ||
| 54 | * @nonatomic: Unused. | ||
| 55 | * @wait: If true, wait until function has completed on other CPUs. | ||
| 56 | * | ||
| 57 | * Returns 0 on success, else a negative status code. | ||
| 58 | * | ||
| 59 | * If @wait is true, then returns once @func has returned; otherwise | ||
| 60 | * it returns just before the target cpu calls @func. | ||
| 61 | */ | ||
| 62 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
| 63 | int nonatomic, int wait) | ||
| 64 | { | ||
| 65 | /* prevent preemption and reschedule on another processor */ | ||
| 66 | int ret; | ||
| 67 | int me = get_cpu(); | ||
| 68 | if (cpu == me) { | ||
| 69 | WARN_ON(1); | ||
| 70 | put_cpu(); | ||
| 71 | return -EBUSY; | ||
| 72 | } | ||
| 73 | |||
| 74 | ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); | ||
| 75 | |||
| 76 | put_cpu(); | ||
| 77 | return ret; | ||
| 78 | } | ||
| 79 | EXPORT_SYMBOL(smp_call_function_single); | ||
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index 50d9c52070b1..b87f8548e75a 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c | |||
| @@ -27,7 +27,6 @@ | |||
| 27 | #include <asm/pgalloc.h> | 27 | #include <asm/pgalloc.h> |
| 28 | #include <asm/tlbflush.h> | 28 | #include <asm/tlbflush.h> |
| 29 | #include <asm/arch_hooks.h> | 29 | #include <asm/arch_hooks.h> |
| 30 | #include <asm/pda.h> | ||
| 31 | 30 | ||
| 32 | /* TLB state -- visible externally, indexed physically */ | 31 | /* TLB state -- visible externally, indexed physically */ |
| 33 | DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 }; | 32 | DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 }; |
| @@ -422,7 +421,7 @@ find_smp_config(void) | |||
| 422 | VOYAGER_SUS_IN_CONTROL_PORT); | 421 | VOYAGER_SUS_IN_CONTROL_PORT); |
| 423 | 422 | ||
| 424 | current_thread_info()->cpu = boot_cpu_id; | 423 | current_thread_info()->cpu = boot_cpu_id; |
| 425 | write_pda(cpu_number, boot_cpu_id); | 424 | x86_write_percpu(cpu_number, boot_cpu_id); |
| 426 | } | 425 | } |
| 427 | 426 | ||
| 428 | /* | 427 | /* |
| @@ -435,7 +434,7 @@ smp_store_cpu_info(int id) | |||
| 435 | 434 | ||
| 436 | *c = boot_cpu_data; | 435 | *c = boot_cpu_data; |
| 437 | 436 | ||
| 438 | identify_cpu(c); | 437 | identify_secondary_cpu(c); |
| 439 | } | 438 | } |
| 440 | 439 | ||
| 441 | /* set up the trampoline and return the physical address of the code */ | 440 | /* set up the trampoline and return the physical address of the code */ |
| @@ -459,7 +458,7 @@ start_secondary(void *unused) | |||
| 459 | /* external functions not defined in the headers */ | 458 | /* external functions not defined in the headers */ |
| 460 | extern void calibrate_delay(void); | 459 | extern void calibrate_delay(void); |
| 461 | 460 | ||
| 462 | secondary_cpu_init(); | 461 | cpu_init(); |
| 463 | 462 | ||
| 464 | /* OK, we're in the routine */ | 463 | /* OK, we're in the routine */ |
| 465 | ack_CPI(VIC_CPU_BOOT_CPI); | 464 | ack_CPI(VIC_CPU_BOOT_CPI); |
| @@ -572,7 +571,9 @@ do_boot_cpu(__u8 cpu) | |||
| 572 | /* init_tasks (in sched.c) is indexed logically */ | 571 | /* init_tasks (in sched.c) is indexed logically */ |
| 573 | stack_start.esp = (void *) idle->thread.esp; | 572 | stack_start.esp = (void *) idle->thread.esp; |
| 574 | 573 | ||
| 575 | init_gdt(cpu, idle); | 574 | init_gdt(cpu); |
| 575 | per_cpu(current_task, cpu) = idle; | ||
| 576 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | ||
| 576 | irq_ctx_init(cpu); | 577 | irq_ctx_init(cpu); |
| 577 | 578 | ||
| 578 | /* Note: Don't modify initial ss override */ | 579 | /* Note: Don't modify initial ss override */ |
| @@ -859,8 +860,8 @@ smp_invalidate_interrupt(void) | |||
| 859 | 860 | ||
| 860 | /* This routine is called with a physical cpu mask */ | 861 | /* This routine is called with a physical cpu mask */ |
| 861 | static void | 862 | static void |
| 862 | flush_tlb_others (unsigned long cpumask, struct mm_struct *mm, | 863 | voyager_flush_tlb_others (unsigned long cpumask, struct mm_struct *mm, |
| 863 | unsigned long va) | 864 | unsigned long va) |
| 864 | { | 865 | { |
| 865 | int stuck = 50000; | 866 | int stuck = 50000; |
| 866 | 867 | ||
| @@ -912,7 +913,7 @@ flush_tlb_current_task(void) | |||
| 912 | cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); | 913 | cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); |
| 913 | local_flush_tlb(); | 914 | local_flush_tlb(); |
| 914 | if (cpu_mask) | 915 | if (cpu_mask) |
| 915 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | 916 | voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL); |
| 916 | 917 | ||
| 917 | preempt_enable(); | 918 | preempt_enable(); |
| 918 | } | 919 | } |
| @@ -934,7 +935,7 @@ flush_tlb_mm (struct mm_struct * mm) | |||
| 934 | leave_mm(smp_processor_id()); | 935 | leave_mm(smp_processor_id()); |
| 935 | } | 936 | } |
| 936 | if (cpu_mask) | 937 | if (cpu_mask) |
| 937 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | 938 | voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL); |
| 938 | 939 | ||
| 939 | preempt_enable(); | 940 | preempt_enable(); |
| 940 | } | 941 | } |
| @@ -955,7 +956,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) | |||
| 955 | } | 956 | } |
| 956 | 957 | ||
| 957 | if (cpu_mask) | 958 | if (cpu_mask) |
| 958 | flush_tlb_others(cpu_mask, mm, va); | 959 | voyager_flush_tlb_others(cpu_mask, mm, va); |
| 959 | 960 | ||
| 960 | preempt_enable(); | 961 | preempt_enable(); |
| 961 | } | 962 | } |
| @@ -1044,10 +1045,12 @@ smp_call_function_interrupt(void) | |||
| 1044 | } | 1045 | } |
| 1045 | 1046 | ||
| 1046 | static int | 1047 | static int |
| 1047 | __smp_call_function_mask (void (*func) (void *info), void *info, int retry, | 1048 | voyager_smp_call_function_mask (cpumask_t cpumask, |
| 1048 | int wait, __u32 mask) | 1049 | void (*func) (void *info), void *info, |
| 1050 | int wait) | ||
| 1049 | { | 1051 | { |
| 1050 | struct call_data_struct data; | 1052 | struct call_data_struct data; |
| 1053 | u32 mask = cpus_addr(cpumask)[0]; | ||
| 1051 | 1054 | ||
| 1052 | mask &= ~(1<<smp_processor_id()); | 1055 | mask &= ~(1<<smp_processor_id()); |
| 1053 | 1056 | ||
| @@ -1083,47 +1086,6 @@ __smp_call_function_mask (void (*func) (void *info), void *info, int retry, | |||
| 1083 | return 0; | 1086 | return 0; |
| 1084 | } | 1087 | } |
| 1085 | 1088 | ||
| 1086 | /* Call this function on all CPUs using the function_interrupt above | ||
| 1087 | <func> The function to run. This must be fast and non-blocking. | ||
| 1088 | <info> An arbitrary pointer to pass to the function. | ||
| 1089 | <retry> If true, keep retrying until ready. | ||
| 1090 | <wait> If true, wait until function has completed on other CPUs. | ||
| 1091 | [RETURNS] 0 on success, else a negative status code. Does not return until | ||
| 1092 | remote CPUs are nearly ready to execute <<func>> or are or have executed. | ||
| 1093 | */ | ||
| 1094 | int | ||
| 1095 | smp_call_function(void (*func) (void *info), void *info, int retry, | ||
| 1096 | int wait) | ||
| 1097 | { | ||
| 1098 | __u32 mask = cpus_addr(cpu_online_map)[0]; | ||
| 1099 | |||
| 1100 | return __smp_call_function_mask(func, info, retry, wait, mask); | ||
| 1101 | } | ||
| 1102 | EXPORT_SYMBOL(smp_call_function); | ||
| 1103 | |||
| 1104 | /* | ||
| 1105 | * smp_call_function_single - Run a function on another CPU | ||
| 1106 | * @func: The function to run. This must be fast and non-blocking. | ||
| 1107 | * @info: An arbitrary pointer to pass to the function. | ||
| 1108 | * @nonatomic: Currently unused. | ||
| 1109 | * @wait: If true, wait until function has completed on other CPUs. | ||
| 1110 | * | ||
| 1111 | * Retrurns 0 on success, else a negative status code. | ||
| 1112 | * | ||
| 1113 | * Does not return until the remote CPU is nearly ready to execute <func> | ||
| 1114 | * or is or has executed. | ||
| 1115 | */ | ||
| 1116 | |||
| 1117 | int | ||
| 1118 | smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
| 1119 | int nonatomic, int wait) | ||
| 1120 | { | ||
| 1121 | __u32 mask = 1 << cpu; | ||
| 1122 | |||
| 1123 | return __smp_call_function_mask(func, info, nonatomic, wait, mask); | ||
| 1124 | } | ||
| 1125 | EXPORT_SYMBOL(smp_call_function_single); | ||
| 1126 | |||
| 1127 | /* Sorry about the name. In an APIC based system, the APICs | 1089 | /* Sorry about the name. In an APIC based system, the APICs |
| 1128 | * themselves are programmed to send a timer interrupt. This is used | 1090 | * themselves are programmed to send a timer interrupt. This is used |
| 1129 | * by linux to reschedule the processor. Voyager doesn't have this, | 1091 | * by linux to reschedule the processor. Voyager doesn't have this, |
| @@ -1237,8 +1199,8 @@ smp_alloc_memory(void) | |||
| 1237 | } | 1199 | } |
| 1238 | 1200 | ||
| 1239 | /* send a reschedule CPI to one CPU by physical CPU number*/ | 1201 | /* send a reschedule CPI to one CPU by physical CPU number*/ |
| 1240 | void | 1202 | static void |
| 1241 | smp_send_reschedule(int cpu) | 1203 | voyager_smp_send_reschedule(int cpu) |
| 1242 | { | 1204 | { |
| 1243 | send_one_CPI(cpu, VIC_RESCHEDULE_CPI); | 1205 | send_one_CPI(cpu, VIC_RESCHEDULE_CPI); |
| 1244 | } | 1206 | } |
| @@ -1267,8 +1229,8 @@ safe_smp_processor_id(void) | |||
| 1267 | } | 1229 | } |
| 1268 | 1230 | ||
| 1269 | /* broadcast a halt to all other CPUs */ | 1231 | /* broadcast a halt to all other CPUs */ |
| 1270 | void | 1232 | static void |
| 1271 | smp_send_stop(void) | 1233 | voyager_smp_send_stop(void) |
| 1272 | { | 1234 | { |
| 1273 | smp_call_function(smp_stop_cpu_function, NULL, 1, 1); | 1235 | smp_call_function(smp_stop_cpu_function, NULL, 1, 1); |
| 1274 | } | 1236 | } |
| @@ -1930,23 +1892,26 @@ smp_voyager_power_off(void *dummy) | |||
| 1930 | smp_stop_cpu_function(NULL); | 1892 | smp_stop_cpu_function(NULL); |
| 1931 | } | 1893 | } |
| 1932 | 1894 | ||
| 1933 | void __init | 1895 | static void __init |
| 1934 | smp_prepare_cpus(unsigned int max_cpus) | 1896 | voyager_smp_prepare_cpus(unsigned int max_cpus) |
| 1935 | { | 1897 | { |
| 1936 | /* FIXME: ignore max_cpus for now */ | 1898 | /* FIXME: ignore max_cpus for now */ |
| 1937 | smp_boot_cpus(); | 1899 | smp_boot_cpus(); |
| 1938 | } | 1900 | } |
| 1939 | 1901 | ||
| 1940 | void __devinit smp_prepare_boot_cpu(void) | 1902 | static void __devinit voyager_smp_prepare_boot_cpu(void) |
| 1941 | { | 1903 | { |
| 1904 | init_gdt(smp_processor_id()); | ||
| 1905 | switch_to_new_gdt(); | ||
| 1906 | |||
| 1942 | cpu_set(smp_processor_id(), cpu_online_map); | 1907 | cpu_set(smp_processor_id(), cpu_online_map); |
| 1943 | cpu_set(smp_processor_id(), cpu_callout_map); | 1908 | cpu_set(smp_processor_id(), cpu_callout_map); |
| 1944 | cpu_set(smp_processor_id(), cpu_possible_map); | 1909 | cpu_set(smp_processor_id(), cpu_possible_map); |
| 1945 | cpu_set(smp_processor_id(), cpu_present_map); | 1910 | cpu_set(smp_processor_id(), cpu_present_map); |
| 1946 | } | 1911 | } |
| 1947 | 1912 | ||
| 1948 | int __devinit | 1913 | static int __devinit |
| 1949 | __cpu_up(unsigned int cpu) | 1914 | voyager_cpu_up(unsigned int cpu) |
| 1950 | { | 1915 | { |
| 1951 | /* This only works at boot for x86. See "rewrite" above. */ | 1916 | /* This only works at boot for x86. See "rewrite" above. */ |
| 1952 | if (cpu_isset(cpu, smp_commenced_mask)) | 1917 | if (cpu_isset(cpu, smp_commenced_mask)) |
| @@ -1962,8 +1927,8 @@ __cpu_up(unsigned int cpu) | |||
| 1962 | return 0; | 1927 | return 0; |
| 1963 | } | 1928 | } |
| 1964 | 1929 | ||
| 1965 | void __init | 1930 | static void __init |
| 1966 | smp_cpus_done(unsigned int max_cpus) | 1931 | voyager_smp_cpus_done(unsigned int max_cpus) |
| 1967 | { | 1932 | { |
| 1968 | zap_low_mappings(); | 1933 | zap_low_mappings(); |
| 1969 | } | 1934 | } |
| @@ -1972,5 +1937,16 @@ void __init | |||
| 1972 | smp_setup_processor_id(void) | 1937 | smp_setup_processor_id(void) |
| 1973 | { | 1938 | { |
| 1974 | current_thread_info()->cpu = hard_smp_processor_id(); | 1939 | current_thread_info()->cpu = hard_smp_processor_id(); |
| 1975 | write_pda(cpu_number, hard_smp_processor_id()); | 1940 | x86_write_percpu(cpu_number, hard_smp_processor_id()); |
| 1976 | } | 1941 | } |
| 1942 | |||
| 1943 | struct smp_ops smp_ops = { | ||
| 1944 | .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu, | ||
| 1945 | .smp_prepare_cpus = voyager_smp_prepare_cpus, | ||
| 1946 | .cpu_up = voyager_cpu_up, | ||
| 1947 | .smp_cpus_done = voyager_smp_cpus_done, | ||
| 1948 | |||
| 1949 | .smp_send_stop = voyager_smp_send_stop, | ||
| 1950 | .smp_send_reschedule = voyager_smp_send_reschedule, | ||
| 1951 | .smp_call_function_mask = voyager_smp_call_function_mask, | ||
| 1952 | }; | ||
diff --git a/arch/m68k/lib/uaccess.c b/arch/m68k/lib/uaccess.c index 865f9fb9e686..13854ed8cd9a 100644 --- a/arch/m68k/lib/uaccess.c +++ b/arch/m68k/lib/uaccess.c | |||
| @@ -181,7 +181,7 @@ EXPORT_SYMBOL(strnlen_user); | |||
| 181 | * Zero Userspace | 181 | * Zero Userspace |
| 182 | */ | 182 | */ |
| 183 | 183 | ||
| 184 | unsigned long clear_user(void __user *to, unsigned long n) | 184 | unsigned long __clear_user(void __user *to, unsigned long n) |
| 185 | { | 185 | { |
| 186 | unsigned long res; | 186 | unsigned long res; |
| 187 | 187 | ||
| @@ -219,4 +219,4 @@ unsigned long clear_user(void __user *to, unsigned long n) | |||
| 219 | 219 | ||
| 220 | return res; | 220 | return res; |
| 221 | } | 221 | } |
| 222 | EXPORT_SYMBOL(clear_user); | 222 | EXPORT_SYMBOL(__clear_user); |
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 74a567afb830..6b5173ac8131 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
| @@ -3802,7 +3802,6 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node) | |||
| 3802 | 3802 | ||
| 3803 | return ret; | 3803 | return ret; |
| 3804 | } | 3804 | } |
| 3805 | EXPORT_SYMBOL(current_io_context); | ||
| 3806 | 3805 | ||
| 3807 | /* | 3806 | /* |
| 3808 | * If the current task has no IO context then create one and initialise it. | 3807 | * If the current task has no IO context then create one and initialise it. |
diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c index a2bb4eccaab4..9aaf401a8975 100644 --- a/drivers/char/agp/via-agp.c +++ b/drivers/char/agp/via-agp.c | |||
| @@ -384,9 +384,9 @@ static struct agp_device_ids via_agp_device_ids[] __devinitdata = | |||
| 384 | .device_id = PCI_DEVICE_ID_VIA_P4M800CE, | 384 | .device_id = PCI_DEVICE_ID_VIA_P4M800CE, |
| 385 | .chipset_name = "VT3314", | 385 | .chipset_name = "VT3314", |
| 386 | }, | 386 | }, |
| 387 | /* CX700 */ | 387 | /* VT3324 / CX700 */ |
| 388 | { | 388 | { |
| 389 | .device_id = PCI_DEVICE_ID_VIA_CX700, | 389 | .device_id = PCI_DEVICE_ID_VIA_VT3324, |
| 390 | .chipset_name = "CX700", | 390 | .chipset_name = "CX700", |
| 391 | }, | 391 | }, |
| 392 | /* VT3336 */ | 392 | /* VT3336 */ |
| @@ -540,7 +540,7 @@ static const struct pci_device_id agp_via_pci_table[] = { | |||
| 540 | ID(PCI_DEVICE_ID_VIA_83_87XX_1), | 540 | ID(PCI_DEVICE_ID_VIA_83_87XX_1), |
| 541 | ID(PCI_DEVICE_ID_VIA_3296_0), | 541 | ID(PCI_DEVICE_ID_VIA_3296_0), |
| 542 | ID(PCI_DEVICE_ID_VIA_P4M800CE), | 542 | ID(PCI_DEVICE_ID_VIA_P4M800CE), |
| 543 | ID(PCI_DEVICE_ID_VIA_CX700), | 543 | ID(PCI_DEVICE_ID_VIA_VT3324), |
| 544 | ID(PCI_DEVICE_ID_VIA_VT3336), | 544 | ID(PCI_DEVICE_ID_VIA_VT3336), |
| 545 | ID(PCI_DEVICE_ID_VIA_P4M890), | 545 | ID(PCI_DEVICE_ID_VIA_P4M890), |
| 546 | { } | 546 | { } |
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fde92ce45153..2eb52b7a71da 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c | |||
| @@ -346,12 +346,33 @@ static void cma_deref_id(struct rdma_id_private *id_priv) | |||
| 346 | complete(&id_priv->comp); | 346 | complete(&id_priv->comp); |
| 347 | } | 347 | } |
| 348 | 348 | ||
| 349 | static void cma_release_remove(struct rdma_id_private *id_priv) | 349 | static int cma_disable_remove(struct rdma_id_private *id_priv, |
| 350 | enum cma_state state) | ||
| 351 | { | ||
| 352 | unsigned long flags; | ||
| 353 | int ret; | ||
| 354 | |||
| 355 | spin_lock_irqsave(&id_priv->lock, flags); | ||
| 356 | if (id_priv->state == state) { | ||
| 357 | atomic_inc(&id_priv->dev_remove); | ||
| 358 | ret = 0; | ||
| 359 | } else | ||
| 360 | ret = -EINVAL; | ||
| 361 | spin_unlock_irqrestore(&id_priv->lock, flags); | ||
| 362 | return ret; | ||
| 363 | } | ||
| 364 | |||
| 365 | static void cma_enable_remove(struct rdma_id_private *id_priv) | ||
| 350 | { | 366 | { |
| 351 | if (atomic_dec_and_test(&id_priv->dev_remove)) | 367 | if (atomic_dec_and_test(&id_priv->dev_remove)) |
| 352 | wake_up(&id_priv->wait_remove); | 368 | wake_up(&id_priv->wait_remove); |
| 353 | } | 369 | } |
| 354 | 370 | ||
| 371 | static int cma_has_cm_dev(struct rdma_id_private *id_priv) | ||
| 372 | { | ||
| 373 | return (id_priv->id.device && id_priv->cm_id.ib); | ||
| 374 | } | ||
| 375 | |||
| 355 | struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, | 376 | struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, |
| 356 | void *context, enum rdma_port_space ps) | 377 | void *context, enum rdma_port_space ps) |
| 357 | { | 378 | { |
| @@ -884,9 +905,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) | |||
| 884 | struct rdma_cm_event event; | 905 | struct rdma_cm_event event; |
| 885 | int ret = 0; | 906 | int ret = 0; |
| 886 | 907 | ||
| 887 | atomic_inc(&id_priv->dev_remove); | 908 | if (cma_disable_remove(id_priv, CMA_CONNECT)) |
| 888 | if (!cma_comp(id_priv, CMA_CONNECT)) | 909 | return 0; |
| 889 | goto out; | ||
| 890 | 910 | ||
| 891 | memset(&event, 0, sizeof event); | 911 | memset(&event, 0, sizeof event); |
| 892 | switch (ib_event->event) { | 912 | switch (ib_event->event) { |
| @@ -942,12 +962,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) | |||
| 942 | /* Destroy the CM ID by returning a non-zero value. */ | 962 | /* Destroy the CM ID by returning a non-zero value. */ |
| 943 | id_priv->cm_id.ib = NULL; | 963 | id_priv->cm_id.ib = NULL; |
| 944 | cma_exch(id_priv, CMA_DESTROYING); | 964 | cma_exch(id_priv, CMA_DESTROYING); |
| 945 | cma_release_remove(id_priv); | 965 | cma_enable_remove(id_priv); |
| 946 | rdma_destroy_id(&id_priv->id); | 966 | rdma_destroy_id(&id_priv->id); |
| 947 | return ret; | 967 | return ret; |
| 948 | } | 968 | } |
| 949 | out: | 969 | out: |
| 950 | cma_release_remove(id_priv); | 970 | cma_enable_remove(id_priv); |
| 951 | return ret; | 971 | return ret; |
| 952 | } | 972 | } |
| 953 | 973 | ||
| @@ -1057,11 +1077,8 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) | |||
| 1057 | int offset, ret; | 1077 | int offset, ret; |
| 1058 | 1078 | ||
| 1059 | listen_id = cm_id->context; | 1079 | listen_id = cm_id->context; |
| 1060 | atomic_inc(&listen_id->dev_remove); | 1080 | if (cma_disable_remove(listen_id, CMA_LISTEN)) |
| 1061 | if (!cma_comp(listen_id, CMA_LISTEN)) { | 1081 | return -ECONNABORTED; |
| 1062 | ret = -ECONNABORTED; | ||
| 1063 | goto out; | ||
| 1064 | } | ||
| 1065 | 1082 | ||
| 1066 | memset(&event, 0, sizeof event); | 1083 | memset(&event, 0, sizeof event); |
| 1067 | offset = cma_user_data_offset(listen_id->id.ps); | 1084 | offset = cma_user_data_offset(listen_id->id.ps); |
| @@ -1101,11 +1118,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) | |||
| 1101 | 1118 | ||
| 1102 | release_conn_id: | 1119 | release_conn_id: |
| 1103 | cma_exch(conn_id, CMA_DESTROYING); | 1120 | cma_exch(conn_id, CMA_DESTROYING); |
| 1104 | cma_release_remove(conn_id); | 1121 | cma_enable_remove(conn_id); |
| 1105 | rdma_destroy_id(&conn_id->id); | 1122 | rdma_destroy_id(&conn_id->id); |
| 1106 | 1123 | ||
| 1107 | out: | 1124 | out: |
| 1108 | cma_release_remove(listen_id); | 1125 | cma_enable_remove(listen_id); |
| 1109 | return ret; | 1126 | return ret; |
| 1110 | } | 1127 | } |
| 1111 | 1128 | ||
| @@ -1171,9 +1188,10 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) | |||
| 1171 | struct sockaddr_in *sin; | 1188 | struct sockaddr_in *sin; |
| 1172 | int ret = 0; | 1189 | int ret = 0; |
| 1173 | 1190 | ||
| 1174 | memset(&event, 0, sizeof event); | 1191 | if (cma_disable_remove(id_priv, CMA_CONNECT)) |
| 1175 | atomic_inc(&id_priv->dev_remove); | 1192 | return 0; |
| 1176 | 1193 | ||
| 1194 | memset(&event, 0, sizeof event); | ||
| 1177 | switch (iw_event->event) { | 1195 | switch (iw_event->event) { |
| 1178 | case IW_CM_EVENT_CLOSE: | 1196 | case IW_CM_EVENT_CLOSE: |
| 1179 | event.event = RDMA_CM_EVENT_DISCONNECTED; | 1197 | event.event = RDMA_CM_EVENT_DISCONNECTED; |
| @@ -1214,12 +1232,12 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) | |||
| 1214 | /* Destroy the CM ID by returning a non-zero value. */ | 1232 | /* Destroy the CM ID by returning a non-zero value. */ |
| 1215 | id_priv->cm_id.iw = NULL; | 1233 | id_priv->cm_id.iw = NULL; |
| 1216 | cma_exch(id_priv, CMA_DESTROYING); | 1234 | cma_exch(id_priv, CMA_DESTROYING); |
| 1217 | cma_release_remove(id_priv); | 1235 | cma_enable_remove(id_priv); |
| 1218 | rdma_destroy_id(&id_priv->id); | 1236 | rdma_destroy_id(&id_priv->id); |
| 1219 | return ret; | 1237 | return ret; |
| 1220 | } | 1238 | } |
| 1221 | 1239 | ||
| 1222 | cma_release_remove(id_priv); | 1240 | cma_enable_remove(id_priv); |
| 1223 | return ret; | 1241 | return ret; |
| 1224 | } | 1242 | } |
| 1225 | 1243 | ||
| @@ -1234,11 +1252,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, | |||
| 1234 | int ret; | 1252 | int ret; |
| 1235 | 1253 | ||
| 1236 | listen_id = cm_id->context; | 1254 | listen_id = cm_id->context; |
| 1237 | atomic_inc(&listen_id->dev_remove); | 1255 | if (cma_disable_remove(listen_id, CMA_LISTEN)) |
| 1238 | if (!cma_comp(listen_id, CMA_LISTEN)) { | 1256 | return -ECONNABORTED; |
| 1239 | ret = -ECONNABORTED; | ||
| 1240 | goto out; | ||
| 1241 | } | ||
| 1242 | 1257 | ||
| 1243 | /* Create a new RDMA id for the new IW CM ID */ | 1258 | /* Create a new RDMA id for the new IW CM ID */ |
| 1244 | new_cm_id = rdma_create_id(listen_id->id.event_handler, | 1259 | new_cm_id = rdma_create_id(listen_id->id.event_handler, |
| @@ -1255,13 +1270,13 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, | |||
| 1255 | dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr); | 1270 | dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr); |
| 1256 | if (!dev) { | 1271 | if (!dev) { |
| 1257 | ret = -EADDRNOTAVAIL; | 1272 | ret = -EADDRNOTAVAIL; |
| 1258 | cma_release_remove(conn_id); | 1273 | cma_enable_remove(conn_id); |
| 1259 | rdma_destroy_id(new_cm_id); | 1274 | rdma_destroy_id(new_cm_id); |
| 1260 | goto out; | 1275 | goto out; |
| 1261 | } | 1276 | } |
| 1262 | ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); | 1277 | ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); |
| 1263 | if (ret) { | 1278 | if (ret) { |
| 1264 | cma_release_remove(conn_id); | 1279 | cma_enable_remove(conn_id); |
| 1265 | rdma_destroy_id(new_cm_id); | 1280 | rdma_destroy_id(new_cm_id); |
| 1266 | goto out; | 1281 | goto out; |
| 1267 | } | 1282 | } |
| @@ -1270,7 +1285,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, | |||
| 1270 | ret = cma_acquire_dev(conn_id); | 1285 | ret = cma_acquire_dev(conn_id); |
| 1271 | mutex_unlock(&lock); | 1286 | mutex_unlock(&lock); |
| 1272 | if (ret) { | 1287 | if (ret) { |
| 1273 | cma_release_remove(conn_id); | 1288 | cma_enable_remove(conn_id); |
| 1274 | rdma_destroy_id(new_cm_id); | 1289 | rdma_destroy_id(new_cm_id); |
| 1275 | goto out; | 1290 | goto out; |
| 1276 | } | 1291 | } |
| @@ -1293,14 +1308,14 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, | |||
| 1293 | /* User wants to destroy the CM ID */ | 1308 | /* User wants to destroy the CM ID */ |
| 1294 | conn_id->cm_id.iw = NULL; | 1309 | conn_id->cm_id.iw = NULL; |
| 1295 | cma_exch(conn_id, CMA_DESTROYING); | 1310 | cma_exch(conn_id, CMA_DESTROYING); |
| 1296 | cma_release_remove(conn_id); | 1311 | cma_enable_remove(conn_id); |
| 1297 | rdma_destroy_id(&conn_id->id); | 1312 | rdma_destroy_id(&conn_id->id); |
| 1298 | } | 1313 | } |
| 1299 | 1314 | ||
| 1300 | out: | 1315 | out: |
| 1301 | if (dev) | 1316 | if (dev) |
| 1302 | dev_put(dev); | 1317 | dev_put(dev); |
| 1303 | cma_release_remove(listen_id); | 1318 | cma_enable_remove(listen_id); |
| 1304 | return ret; | 1319 | return ret; |
| 1305 | } | 1320 | } |
| 1306 | 1321 | ||
| @@ -1519,7 +1534,7 @@ static void cma_work_handler(struct work_struct *_work) | |||
| 1519 | destroy = 1; | 1534 | destroy = 1; |
| 1520 | } | 1535 | } |
| 1521 | out: | 1536 | out: |
| 1522 | cma_release_remove(id_priv); | 1537 | cma_enable_remove(id_priv); |
| 1523 | cma_deref_id(id_priv); | 1538 | cma_deref_id(id_priv); |
| 1524 | if (destroy) | 1539 | if (destroy) |
| 1525 | rdma_destroy_id(&id_priv->id); | 1540 | rdma_destroy_id(&id_priv->id); |
| @@ -1711,13 +1726,13 @@ static void addr_handler(int status, struct sockaddr *src_addr, | |||
| 1711 | 1726 | ||
| 1712 | if (id_priv->id.event_handler(&id_priv->id, &event)) { | 1727 | if (id_priv->id.event_handler(&id_priv->id, &event)) { |
| 1713 | cma_exch(id_priv, CMA_DESTROYING); | 1728 | cma_exch(id_priv, CMA_DESTROYING); |
| 1714 | cma_release_remove(id_priv); | 1729 | cma_enable_remove(id_priv); |
| 1715 | cma_deref_id(id_priv); | 1730 | cma_deref_id(id_priv); |
| 1716 | rdma_destroy_id(&id_priv->id); | 1731 | rdma_destroy_id(&id_priv->id); |
| 1717 | return; | 1732 | return; |
| 1718 | } | 1733 | } |
| 1719 | out: | 1734 | out: |
| 1720 | cma_release_remove(id_priv); | 1735 | cma_enable_remove(id_priv); |
| 1721 | cma_deref_id(id_priv); | 1736 | cma_deref_id(id_priv); |
| 1722 | } | 1737 | } |
| 1723 | 1738 | ||
| @@ -2042,11 +2057,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, | |||
| 2042 | struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; | 2057 | struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; |
| 2043 | int ret = 0; | 2058 | int ret = 0; |
| 2044 | 2059 | ||
| 2045 | memset(&event, 0, sizeof event); | 2060 | if (cma_disable_remove(id_priv, CMA_CONNECT)) |
| 2046 | atomic_inc(&id_priv->dev_remove); | 2061 | return 0; |
| 2047 | if (!cma_comp(id_priv, CMA_CONNECT)) | ||
| 2048 | goto out; | ||
| 2049 | 2062 | ||
| 2063 | memset(&event, 0, sizeof event); | ||
| 2050 | switch (ib_event->event) { | 2064 | switch (ib_event->event) { |
| 2051 | case IB_CM_SIDR_REQ_ERROR: | 2065 | case IB_CM_SIDR_REQ_ERROR: |
| 2052 | event.event = RDMA_CM_EVENT_UNREACHABLE; | 2066 | event.event = RDMA_CM_EVENT_UNREACHABLE; |
| @@ -2084,12 +2098,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, | |||
| 2084 | /* Destroy the CM ID by returning a non-zero value. */ | 2098 | /* Destroy the CM ID by returning a non-zero value. */ |
| 2085 | id_priv->cm_id.ib = NULL; | 2099 | id_priv->cm_id.ib = NULL; |
| 2086 | cma_exch(id_priv, CMA_DESTROYING); | 2100 | cma_exch(id_priv, CMA_DESTROYING); |
| 2087 | cma_release_remove(id_priv); | 2101 | cma_enable_remove(id_priv); |
| 2088 | rdma_destroy_id(&id_priv->id); | 2102 | rdma_destroy_id(&id_priv->id); |
| 2089 | return ret; | 2103 | return ret; |
| 2090 | } | 2104 | } |
| 2091 | out: | 2105 | out: |
| 2092 | cma_release_remove(id_priv); | 2106 | cma_enable_remove(id_priv); |
| 2093 | return ret; | 2107 | return ret; |
| 2094 | } | 2108 | } |
| 2095 | 2109 | ||
| @@ -2413,7 +2427,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) | |||
| 2413 | int ret; | 2427 | int ret; |
| 2414 | 2428 | ||
| 2415 | id_priv = container_of(id, struct rdma_id_private, id); | 2429 | id_priv = container_of(id, struct rdma_id_private, id); |
| 2416 | if (!cma_comp(id_priv, CMA_CONNECT)) | 2430 | if (!cma_has_cm_dev(id_priv)) |
| 2417 | return -EINVAL; | 2431 | return -EINVAL; |
| 2418 | 2432 | ||
| 2419 | switch (id->device->node_type) { | 2433 | switch (id->device->node_type) { |
| @@ -2435,7 +2449,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, | |||
| 2435 | int ret; | 2449 | int ret; |
| 2436 | 2450 | ||
| 2437 | id_priv = container_of(id, struct rdma_id_private, id); | 2451 | id_priv = container_of(id, struct rdma_id_private, id); |
| 2438 | if (!cma_comp(id_priv, CMA_CONNECT)) | 2452 | if (!cma_has_cm_dev(id_priv)) |
| 2439 | return -EINVAL; | 2453 | return -EINVAL; |
| 2440 | 2454 | ||
| 2441 | switch (rdma_node_get_transport(id->device->node_type)) { | 2455 | switch (rdma_node_get_transport(id->device->node_type)) { |
| @@ -2466,8 +2480,7 @@ int rdma_disconnect(struct rdma_cm_id *id) | |||
| 2466 | int ret; | 2480 | int ret; |
| 2467 | 2481 | ||
| 2468 | id_priv = container_of(id, struct rdma_id_private, id); | 2482 | id_priv = container_of(id, struct rdma_id_private, id); |
| 2469 | if (!cma_comp(id_priv, CMA_CONNECT) && | 2483 | if (!cma_has_cm_dev(id_priv)) |
| 2470 | !cma_comp(id_priv, CMA_DISCONNECT)) | ||
| 2471 | return -EINVAL; | 2484 | return -EINVAL; |
| 2472 | 2485 | ||
| 2473 | switch (rdma_node_get_transport(id->device->node_type)) { | 2486 | switch (rdma_node_get_transport(id->device->node_type)) { |
| @@ -2499,10 +2512,9 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) | |||
| 2499 | int ret; | 2512 | int ret; |
| 2500 | 2513 | ||
| 2501 | id_priv = mc->id_priv; | 2514 | id_priv = mc->id_priv; |
| 2502 | atomic_inc(&id_priv->dev_remove); | 2515 | if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) && |
| 2503 | if (!cma_comp(id_priv, CMA_ADDR_BOUND) && | 2516 | cma_disable_remove(id_priv, CMA_ADDR_RESOLVED)) |
| 2504 | !cma_comp(id_priv, CMA_ADDR_RESOLVED)) | 2517 | return 0; |
| 2505 | goto out; | ||
| 2506 | 2518 | ||
| 2507 | if (!status && id_priv->id.qp) | 2519 | if (!status && id_priv->id.qp) |
| 2508 | status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, | 2520 | status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, |
| @@ -2524,12 +2536,12 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) | |||
| 2524 | ret = id_priv->id.event_handler(&id_priv->id, &event); | 2536 | ret = id_priv->id.event_handler(&id_priv->id, &event); |
| 2525 | if (ret) { | 2537 | if (ret) { |
| 2526 | cma_exch(id_priv, CMA_DESTROYING); | 2538 | cma_exch(id_priv, CMA_DESTROYING); |
| 2527 | cma_release_remove(id_priv); | 2539 | cma_enable_remove(id_priv); |
| 2528 | rdma_destroy_id(&id_priv->id); | 2540 | rdma_destroy_id(&id_priv->id); |
| 2529 | return 0; | 2541 | return 0; |
| 2530 | } | 2542 | } |
| 2531 | out: | 2543 | |
| 2532 | cma_release_remove(id_priv); | 2544 | cma_enable_remove(id_priv); |
| 2533 | return 0; | 2545 | return 0; |
| 2534 | } | 2546 | } |
| 2535 | 2547 | ||
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index f64d42b08674..1d286d3cc2d5 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h | |||
| @@ -277,6 +277,7 @@ void ehca_cleanup_mrmw_cache(void); | |||
| 277 | 277 | ||
| 278 | extern spinlock_t ehca_qp_idr_lock; | 278 | extern spinlock_t ehca_qp_idr_lock; |
| 279 | extern spinlock_t ehca_cq_idr_lock; | 279 | extern spinlock_t ehca_cq_idr_lock; |
| 280 | extern spinlock_t hcall_lock; | ||
| 280 | extern struct idr ehca_qp_idr; | 281 | extern struct idr ehca_qp_idr; |
| 281 | extern struct idr ehca_cq_idr; | 282 | extern struct idr ehca_cq_idr; |
| 282 | 283 | ||
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 82dda2faf4d0..100329ba3343 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c | |||
| @@ -517,12 +517,11 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) | |||
| 517 | else { | 517 | else { |
| 518 | struct ehca_cq *cq = eq->eqe_cache[i].cq; | 518 | struct ehca_cq *cq = eq->eqe_cache[i].cq; |
| 519 | comp_event_callback(cq); | 519 | comp_event_callback(cq); |
| 520 | spin_lock_irqsave(&ehca_cq_idr_lock, flags); | 520 | spin_lock(&ehca_cq_idr_lock); |
| 521 | cq->nr_events--; | 521 | cq->nr_events--; |
| 522 | if (!cq->nr_events) | 522 | if (!cq->nr_events) |
| 523 | wake_up(&cq->wait_completion); | 523 | wake_up(&cq->wait_completion); |
| 524 | spin_unlock_irqrestore(&ehca_cq_idr_lock, | 524 | spin_unlock(&ehca_cq_idr_lock); |
| 525 | flags); | ||
| 526 | } | 525 | } |
| 527 | } else { | 526 | } else { |
| 528 | ehca_dbg(&shca->ib_device, "Got non completion event"); | 527 | ehca_dbg(&shca->ib_device, "Got non completion event"); |
| @@ -711,6 +710,7 @@ static void destroy_comp_task(struct ehca_comp_pool *pool, | |||
| 711 | kthread_stop(task); | 710 | kthread_stop(task); |
| 712 | } | 711 | } |
| 713 | 712 | ||
| 713 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 714 | static void take_over_work(struct ehca_comp_pool *pool, | 714 | static void take_over_work(struct ehca_comp_pool *pool, |
| 715 | int cpu) | 715 | int cpu) |
| 716 | { | 716 | { |
| @@ -735,7 +735,6 @@ static void take_over_work(struct ehca_comp_pool *pool, | |||
| 735 | 735 | ||
| 736 | } | 736 | } |
| 737 | 737 | ||
| 738 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 739 | static int comp_pool_callback(struct notifier_block *nfb, | 738 | static int comp_pool_callback(struct notifier_block *nfb, |
| 740 | unsigned long action, | 739 | unsigned long action, |
| 741 | void *hcpu) | 740 | void *hcpu) |
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index fe90e7454560..c3f99f33b49c 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c | |||
| @@ -52,7 +52,7 @@ | |||
| 52 | MODULE_LICENSE("Dual BSD/GPL"); | 52 | MODULE_LICENSE("Dual BSD/GPL"); |
| 53 | MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); | 53 | MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); |
| 54 | MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); | 54 | MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); |
| 55 | MODULE_VERSION("SVNEHCA_0022"); | 55 | MODULE_VERSION("SVNEHCA_0023"); |
| 56 | 56 | ||
| 57 | int ehca_open_aqp1 = 0; | 57 | int ehca_open_aqp1 = 0; |
| 58 | int ehca_debug_level = 0; | 58 | int ehca_debug_level = 0; |
| @@ -62,7 +62,7 @@ int ehca_use_hp_mr = 0; | |||
| 62 | int ehca_port_act_time = 30; | 62 | int ehca_port_act_time = 30; |
| 63 | int ehca_poll_all_eqs = 1; | 63 | int ehca_poll_all_eqs = 1; |
| 64 | int ehca_static_rate = -1; | 64 | int ehca_static_rate = -1; |
| 65 | int ehca_scaling_code = 1; | 65 | int ehca_scaling_code = 0; |
| 66 | 66 | ||
| 67 | module_param_named(open_aqp1, ehca_open_aqp1, int, 0); | 67 | module_param_named(open_aqp1, ehca_open_aqp1, int, 0); |
| 68 | module_param_named(debug_level, ehca_debug_level, int, 0); | 68 | module_param_named(debug_level, ehca_debug_level, int, 0); |
| @@ -98,6 +98,7 @@ MODULE_PARM_DESC(scaling_code, | |||
| 98 | 98 | ||
| 99 | spinlock_t ehca_qp_idr_lock; | 99 | spinlock_t ehca_qp_idr_lock; |
| 100 | spinlock_t ehca_cq_idr_lock; | 100 | spinlock_t ehca_cq_idr_lock; |
| 101 | spinlock_t hcall_lock; | ||
| 101 | DEFINE_IDR(ehca_qp_idr); | 102 | DEFINE_IDR(ehca_qp_idr); |
| 102 | DEFINE_IDR(ehca_cq_idr); | 103 | DEFINE_IDR(ehca_cq_idr); |
| 103 | 104 | ||
| @@ -453,15 +454,14 @@ static ssize_t ehca_store_debug_level(struct device_driver *ddp, | |||
| 453 | DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, | 454 | DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, |
| 454 | ehca_show_debug_level, ehca_store_debug_level); | 455 | ehca_show_debug_level, ehca_store_debug_level); |
| 455 | 456 | ||
| 456 | void ehca_create_driver_sysfs(struct ibmebus_driver *drv) | 457 | static struct attribute *ehca_drv_attrs[] = { |
| 457 | { | 458 | &driver_attr_debug_level.attr, |
| 458 | driver_create_file(&drv->driver, &driver_attr_debug_level); | 459 | NULL |
| 459 | } | 460 | }; |
| 460 | 461 | ||
| 461 | void ehca_remove_driver_sysfs(struct ibmebus_driver *drv) | 462 | static struct attribute_group ehca_drv_attr_grp = { |
| 462 | { | 463 | .attrs = ehca_drv_attrs |
| 463 | driver_remove_file(&drv->driver, &driver_attr_debug_level); | 464 | }; |
| 464 | } | ||
| 465 | 465 | ||
| 466 | #define EHCA_RESOURCE_ATTR(name) \ | 466 | #define EHCA_RESOURCE_ATTR(name) \ |
| 467 | static ssize_t ehca_show_##name(struct device *dev, \ | 467 | static ssize_t ehca_show_##name(struct device *dev, \ |
| @@ -523,44 +523,28 @@ static ssize_t ehca_show_adapter_handle(struct device *dev, | |||
| 523 | } | 523 | } |
| 524 | static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); | 524 | static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); |
| 525 | 525 | ||
| 526 | static struct attribute *ehca_dev_attrs[] = { | ||
| 527 | &dev_attr_adapter_handle.attr, | ||
| 528 | &dev_attr_num_ports.attr, | ||
| 529 | &dev_attr_hw_ver.attr, | ||
| 530 | &dev_attr_max_eq.attr, | ||
| 531 | &dev_attr_cur_eq.attr, | ||
| 532 | &dev_attr_max_cq.attr, | ||
| 533 | &dev_attr_cur_cq.attr, | ||
| 534 | &dev_attr_max_qp.attr, | ||
| 535 | &dev_attr_cur_qp.attr, | ||
| 536 | &dev_attr_max_mr.attr, | ||
| 537 | &dev_attr_cur_mr.attr, | ||
| 538 | &dev_attr_max_mw.attr, | ||
| 539 | &dev_attr_cur_mw.attr, | ||
| 540 | &dev_attr_max_pd.attr, | ||
| 541 | &dev_attr_max_ah.attr, | ||
| 542 | NULL | ||
| 543 | }; | ||
| 526 | 544 | ||
| 527 | void ehca_create_device_sysfs(struct ibmebus_dev *dev) | 545 | static struct attribute_group ehca_dev_attr_grp = { |
| 528 | { | 546 | .attrs = ehca_dev_attrs |
| 529 | device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle); | 547 | }; |
| 530 | device_create_file(&dev->ofdev.dev, &dev_attr_num_ports); | ||
| 531 | device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver); | ||
| 532 | device_create_file(&dev->ofdev.dev, &dev_attr_max_eq); | ||
| 533 | device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq); | ||
| 534 | device_create_file(&dev->ofdev.dev, &dev_attr_max_cq); | ||
| 535 | device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq); | ||
| 536 | device_create_file(&dev->ofdev.dev, &dev_attr_max_qp); | ||
| 537 | device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp); | ||
| 538 | device_create_file(&dev->ofdev.dev, &dev_attr_max_mr); | ||
| 539 | device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr); | ||
| 540 | device_create_file(&dev->ofdev.dev, &dev_attr_max_mw); | ||
| 541 | device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw); | ||
| 542 | device_create_file(&dev->ofdev.dev, &dev_attr_max_pd); | ||
| 543 | device_create_file(&dev->ofdev.dev, &dev_attr_max_ah); | ||
| 544 | } | ||
| 545 | |||
| 546 | void ehca_remove_device_sysfs(struct ibmebus_dev *dev) | ||
| 547 | { | ||
| 548 | device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle); | ||
| 549 | device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports); | ||
| 550 | device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver); | ||
| 551 | device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq); | ||
| 552 | device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq); | ||
| 553 | device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq); | ||
| 554 | device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq); | ||
| 555 | device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp); | ||
| 556 | device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp); | ||
| 557 | device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr); | ||
| 558 | device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr); | ||
| 559 | device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw); | ||
| 560 | device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw); | ||
| 561 | device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd); | ||
| 562 | device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah); | ||
| 563 | } | ||
| 564 | 548 | ||
| 565 | static int __devinit ehca_probe(struct ibmebus_dev *dev, | 549 | static int __devinit ehca_probe(struct ibmebus_dev *dev, |
| 566 | const struct of_device_id *id) | 550 | const struct of_device_id *id) |
| @@ -668,7 +652,10 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, | |||
| 668 | } | 652 | } |
| 669 | } | 653 | } |
| 670 | 654 | ||
| 671 | ehca_create_device_sysfs(dev); | 655 | ret = sysfs_create_group(&dev->ofdev.dev.kobj, &ehca_dev_attr_grp); |
| 656 | if (ret) /* only complain; we can live without attributes */ | ||
| 657 | ehca_err(&shca->ib_device, | ||
| 658 | "Cannot create device attributes ret=%d", ret); | ||
| 672 | 659 | ||
| 673 | spin_lock(&shca_list_lock); | 660 | spin_lock(&shca_list_lock); |
| 674 | list_add(&shca->shca_list, &shca_list); | 661 | list_add(&shca->shca_list, &shca_list); |
| @@ -720,7 +707,7 @@ static int __devexit ehca_remove(struct ibmebus_dev *dev) | |||
| 720 | struct ehca_shca *shca = dev->ofdev.dev.driver_data; | 707 | struct ehca_shca *shca = dev->ofdev.dev.driver_data; |
| 721 | int ret; | 708 | int ret; |
| 722 | 709 | ||
| 723 | ehca_remove_device_sysfs(dev); | 710 | sysfs_remove_group(&dev->ofdev.dev.kobj, &ehca_dev_attr_grp); |
| 724 | 711 | ||
| 725 | if (ehca_open_aqp1 == 1) { | 712 | if (ehca_open_aqp1 == 1) { |
| 726 | int i; | 713 | int i; |
| @@ -812,11 +799,12 @@ int __init ehca_module_init(void) | |||
| 812 | int ret; | 799 | int ret; |
| 813 | 800 | ||
| 814 | printk(KERN_INFO "eHCA Infiniband Device Driver " | 801 | printk(KERN_INFO "eHCA Infiniband Device Driver " |
| 815 | "(Rel.: SVNEHCA_0022)\n"); | 802 | "(Rel.: SVNEHCA_0023)\n"); |
| 816 | idr_init(&ehca_qp_idr); | 803 | idr_init(&ehca_qp_idr); |
| 817 | idr_init(&ehca_cq_idr); | 804 | idr_init(&ehca_cq_idr); |
| 818 | spin_lock_init(&ehca_qp_idr_lock); | 805 | spin_lock_init(&ehca_qp_idr_lock); |
| 819 | spin_lock_init(&ehca_cq_idr_lock); | 806 | spin_lock_init(&ehca_cq_idr_lock); |
| 807 | spin_lock_init(&hcall_lock); | ||
| 820 | 808 | ||
| 821 | INIT_LIST_HEAD(&shca_list); | 809 | INIT_LIST_HEAD(&shca_list); |
| 822 | spin_lock_init(&shca_list_lock); | 810 | spin_lock_init(&shca_list_lock); |
| @@ -838,7 +826,9 @@ int __init ehca_module_init(void) | |||
| 838 | goto module_init2; | 826 | goto module_init2; |
| 839 | } | 827 | } |
| 840 | 828 | ||
| 841 | ehca_create_driver_sysfs(&ehca_driver); | 829 | ret = sysfs_create_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp); |
| 830 | if (ret) /* only complain; we can live without attributes */ | ||
| 831 | ehca_gen_err("Cannot create driver attributes ret=%d", ret); | ||
| 842 | 832 | ||
| 843 | if (ehca_poll_all_eqs != 1) { | 833 | if (ehca_poll_all_eqs != 1) { |
| 844 | ehca_gen_err("WARNING!!!"); | 834 | ehca_gen_err("WARNING!!!"); |
| @@ -865,7 +855,7 @@ void __exit ehca_module_exit(void) | |||
| 865 | if (ehca_poll_all_eqs == 1) | 855 | if (ehca_poll_all_eqs == 1) |
| 866 | del_timer_sync(&poll_eqs_timer); | 856 | del_timer_sync(&poll_eqs_timer); |
| 867 | 857 | ||
| 868 | ehca_remove_driver_sysfs(&ehca_driver); | 858 | sysfs_remove_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp); |
| 869 | ibmebus_unregister_driver(&ehca_driver); | 859 | ibmebus_unregister_driver(&ehca_driver); |
| 870 | 860 | ||
| 871 | ehca_destroy_slab_caches(); | 861 | ehca_destroy_slab_caches(); |
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index df0516f24379..b5bc787c77b6 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c | |||
| @@ -523,6 +523,8 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, | |||
| 523 | goto create_qp_exit1; | 523 | goto create_qp_exit1; |
| 524 | } | 524 | } |
| 525 | 525 | ||
| 526 | my_qp->ib_qp.qp_num = my_qp->real_qp_num; | ||
| 527 | |||
| 526 | switch (init_attr->qp_type) { | 528 | switch (init_attr->qp_type) { |
| 527 | case IB_QPT_RC: | 529 | case IB_QPT_RC: |
| 528 | if (isdaqp == 0) { | 530 | if (isdaqp == 0) { |
| @@ -568,7 +570,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, | |||
| 568 | parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; | 570 | parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; |
| 569 | parms.act_nr_send_sges = init_attr->cap.max_send_sge; | 571 | parms.act_nr_send_sges = init_attr->cap.max_send_sge; |
| 570 | parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; | 572 | parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; |
| 571 | my_qp->real_qp_num = | 573 | my_qp->ib_qp.qp_num = |
| 572 | (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1; | 574 | (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1; |
| 573 | } | 575 | } |
| 574 | 576 | ||
| @@ -595,7 +597,6 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, | |||
| 595 | my_qp->ib_qp.recv_cq = init_attr->recv_cq; | 597 | my_qp->ib_qp.recv_cq = init_attr->recv_cq; |
| 596 | my_qp->ib_qp.send_cq = init_attr->send_cq; | 598 | my_qp->ib_qp.send_cq = init_attr->send_cq; |
| 597 | 599 | ||
| 598 | my_qp->ib_qp.qp_num = my_qp->real_qp_num; | ||
| 599 | my_qp->ib_qp.qp_type = init_attr->qp_type; | 600 | my_qp->ib_qp.qp_type = init_attr->qp_type; |
| 600 | 601 | ||
| 601 | my_qp->qp_type = init_attr->qp_type; | 602 | my_qp->qp_type = init_attr->qp_type; |
| @@ -968,17 +969,21 @@ static int internal_modify_qp(struct ib_qp *ibqp, | |||
| 968 | ((ehca_mult - 1) / ah_mult) : 0; | 969 | ((ehca_mult - 1) / ah_mult) : 0; |
| 969 | else | 970 | else |
| 970 | mqpcb->max_static_rate = 0; | 971 | mqpcb->max_static_rate = 0; |
| 971 | |||
| 972 | update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); | 972 | update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); |
| 973 | 973 | ||
| 974 | /* | 974 | /* |
| 975 | * Always supply the GRH flag, even if it's zero, to give the | ||
| 976 | * hypervisor a clear "yes" or "no" instead of a "perhaps" | ||
| 977 | */ | ||
| 978 | update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); | ||
| 979 | |||
| 980 | /* | ||
| 975 | * only if GRH is TRUE we might consider SOURCE_GID_IDX | 981 | * only if GRH is TRUE we might consider SOURCE_GID_IDX |
| 976 | * and DEST_GID otherwise phype will return H_ATTR_PARM!!! | 982 | * and DEST_GID otherwise phype will return H_ATTR_PARM!!! |
| 977 | */ | 983 | */ |
| 978 | if (attr->ah_attr.ah_flags == IB_AH_GRH) { | 984 | if (attr->ah_attr.ah_flags == IB_AH_GRH) { |
| 979 | mqpcb->send_grh_flag = 1 << 31; | 985 | mqpcb->send_grh_flag = 1; |
| 980 | update_mask |= | 986 | |
| 981 | EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); | ||
| 982 | mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; | 987 | mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; |
| 983 | update_mask |= | 988 | update_mask |= |
| 984 | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); | 989 | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); |
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index b564fcd3b282..7f0beec74f70 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c | |||
| @@ -154,7 +154,8 @@ static long ehca_plpar_hcall9(unsigned long opcode, | |||
| 154 | unsigned long arg9) | 154 | unsigned long arg9) |
| 155 | { | 155 | { |
| 156 | long ret; | 156 | long ret; |
| 157 | int i, sleep_msecs; | 157 | int i, sleep_msecs, lock_is_set = 0; |
| 158 | unsigned long flags; | ||
| 158 | 159 | ||
| 159 | ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " | 160 | ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " |
| 160 | "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx", | 161 | "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx", |
| @@ -162,10 +163,18 @@ static long ehca_plpar_hcall9(unsigned long opcode, | |||
| 162 | arg8, arg9); | 163 | arg8, arg9); |
| 163 | 164 | ||
| 164 | for (i = 0; i < 5; i++) { | 165 | for (i = 0; i < 5; i++) { |
| 166 | if ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5)) { | ||
| 167 | spin_lock_irqsave(&hcall_lock, flags); | ||
| 168 | lock_is_set = 1; | ||
| 169 | } | ||
| 170 | |||
| 165 | ret = plpar_hcall9(opcode, outs, | 171 | ret = plpar_hcall9(opcode, outs, |
| 166 | arg1, arg2, arg3, arg4, arg5, | 172 | arg1, arg2, arg3, arg4, arg5, |
| 167 | arg6, arg7, arg8, arg9); | 173 | arg6, arg7, arg8, arg9); |
| 168 | 174 | ||
| 175 | if (lock_is_set) | ||
| 176 | spin_unlock_irqrestore(&hcall_lock, flags); | ||
| 177 | |||
| 169 | if (H_IS_LONG_BUSY(ret)) { | 178 | if (H_IS_LONG_BUSY(ret)) { |
| 170 | sleep_msecs = get_longbusy_msecs(ret); | 179 | sleep_msecs = get_longbusy_msecs(ret); |
| 171 | msleep_interruptible(sleep_msecs); | 180 | msleep_interruptible(sleep_msecs); |
| @@ -193,11 +202,11 @@ static long ehca_plpar_hcall9(unsigned long opcode, | |||
| 193 | opcode, ret, outs[0], outs[1], outs[2], outs[3], | 202 | opcode, ret, outs[0], outs[1], outs[2], outs[3], |
| 194 | outs[4], outs[5], outs[6], outs[7], outs[8]); | 203 | outs[4], outs[5], outs[6], outs[7], outs[8]); |
| 195 | return ret; | 204 | return ret; |
| 196 | |||
| 197 | } | 205 | } |
| 198 | 206 | ||
| 199 | return H_BUSY; | 207 | return H_BUSY; |
| 200 | } | 208 | } |
| 209 | |||
| 201 | u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, | 210 | u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, |
| 202 | struct ehca_pfeq *pfeq, | 211 | struct ehca_pfeq *pfeq, |
| 203 | const u32 neq_control, | 212 | const u32 neq_control, |
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 1b9c30857754..4e2e3dfeb2c8 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c | |||
| @@ -747,7 +747,6 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd) | |||
| 747 | 747 | ||
| 748 | static int ipath_pe_intconfig(struct ipath_devdata *dd) | 748 | static int ipath_pe_intconfig(struct ipath_devdata *dd) |
| 749 | { | 749 | { |
| 750 | u64 val; | ||
| 751 | u32 chiprev; | 750 | u32 chiprev; |
| 752 | 751 | ||
| 753 | /* | 752 | /* |
| @@ -760,9 +759,9 @@ static int ipath_pe_intconfig(struct ipath_devdata *dd) | |||
| 760 | if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) { | 759 | if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) { |
| 761 | /* Rev2+ reports extra errors via internal GPIO pins */ | 760 | /* Rev2+ reports extra errors via internal GPIO pins */ |
| 762 | dd->ipath_flags |= IPATH_GPIO_ERRINTRS; | 761 | dd->ipath_flags |= IPATH_GPIO_ERRINTRS; |
| 763 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); | 762 | dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK; |
| 764 | val |= IPATH_GPIO_ERRINTR_MASK; | 763 | ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, |
| 765 | ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); | 764 | dd->ipath_gpio_mask); |
| 766 | } | 765 | } |
| 767 | return 0; | 766 | return 0; |
| 768 | } | 767 | } |
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 45d033169c6e..a90d3b5699c4 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c | |||
| @@ -1056,7 +1056,7 @@ irqreturn_t ipath_intr(int irq, void *data) | |||
| 1056 | gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT); | 1056 | gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT); |
| 1057 | chk0rcv = 1; | 1057 | chk0rcv = 1; |
| 1058 | } | 1058 | } |
| 1059 | if (unlikely(gpiostatus)) { | 1059 | if (gpiostatus) { |
| 1060 | /* | 1060 | /* |
| 1061 | * Some unexpected bits remain. If they could have | 1061 | * Some unexpected bits remain. If they could have |
| 1062 | * caused the interrupt, complain and clear. | 1062 | * caused the interrupt, complain and clear. |
| @@ -1065,9 +1065,8 @@ irqreturn_t ipath_intr(int irq, void *data) | |||
| 1065 | * GPIO interrupts, possibly on a "three strikes" | 1065 | * GPIO interrupts, possibly on a "three strikes" |
| 1066 | * basis. | 1066 | * basis. |
| 1067 | */ | 1067 | */ |
| 1068 | u32 mask; | 1068 | const u32 mask = (u32) dd->ipath_gpio_mask; |
| 1069 | mask = ipath_read_kreg32( | 1069 | |
| 1070 | dd, dd->ipath_kregs->kr_gpio_mask); | ||
| 1071 | if (mask & gpiostatus) { | 1070 | if (mask & gpiostatus) { |
| 1072 | ipath_dbg("Unexpected GPIO IRQ bits %x\n", | 1071 | ipath_dbg("Unexpected GPIO IRQ bits %x\n", |
| 1073 | gpiostatus & mask); | 1072 | gpiostatus & mask); |
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index e900c2593f44..12194f3dd8cc 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h | |||
| @@ -397,6 +397,8 @@ struct ipath_devdata { | |||
| 397 | unsigned long ipath_pioavailshadow[8]; | 397 | unsigned long ipath_pioavailshadow[8]; |
| 398 | /* shadow of kr_gpio_out, for rmw ops */ | 398 | /* shadow of kr_gpio_out, for rmw ops */ |
| 399 | u64 ipath_gpio_out; | 399 | u64 ipath_gpio_out; |
| 400 | /* shadow the gpio mask register */ | ||
| 401 | u64 ipath_gpio_mask; | ||
| 400 | /* kr_revision shadow */ | 402 | /* kr_revision shadow */ |
| 401 | u64 ipath_revision; | 403 | u64 ipath_revision; |
| 402 | /* | 404 | /* |
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 12933e77c7e9..bb70845279b8 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c | |||
| @@ -1387,13 +1387,12 @@ static int enable_timer(struct ipath_devdata *dd) | |||
| 1387 | * processing. | 1387 | * processing. |
| 1388 | */ | 1388 | */ |
| 1389 | if (dd->ipath_flags & IPATH_GPIO_INTR) { | 1389 | if (dd->ipath_flags & IPATH_GPIO_INTR) { |
| 1390 | u64 val; | ||
| 1391 | ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, | 1390 | ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, |
| 1392 | 0x2074076542310ULL); | 1391 | 0x2074076542310ULL); |
| 1393 | /* Enable GPIO bit 2 interrupt */ | 1392 | /* Enable GPIO bit 2 interrupt */ |
| 1394 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); | 1393 | dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT); |
| 1395 | val |= (u64) (1 << IPATH_GPIO_PORT0_BIT); | 1394 | ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, |
| 1396 | ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); | 1395 | dd->ipath_gpio_mask); |
| 1397 | } | 1396 | } |
| 1398 | 1397 | ||
| 1399 | init_timer(&dd->verbs_timer); | 1398 | init_timer(&dd->verbs_timer); |
| @@ -1412,8 +1411,9 @@ static int disable_timer(struct ipath_devdata *dd) | |||
| 1412 | u64 val; | 1411 | u64 val; |
| 1413 | /* Disable GPIO bit 2 interrupt */ | 1412 | /* Disable GPIO bit 2 interrupt */ |
| 1414 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); | 1413 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); |
| 1415 | val &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); | 1414 | dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); |
| 1416 | ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); | 1415 | ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, |
| 1416 | dd->ipath_gpio_mask); | ||
| 1417 | /* | 1417 | /* |
| 1418 | * We might want to undo changes to debugportselect, | 1418 | * We might want to undo changes to debugportselect, |
| 1419 | * but how? | 1419 | * but how? |
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 688ecb4c39f3..402f3a20ec0a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c | |||
| @@ -489,6 +489,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) | |||
| 489 | ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); | 489 | ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); |
| 490 | if (!ibdev->uar_map) | 490 | if (!ibdev->uar_map) |
| 491 | goto err_uar; | 491 | goto err_uar; |
| 492 | MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); | ||
| 492 | 493 | ||
| 493 | INIT_LIST_HEAD(&ibdev->pgdir_list); | 494 | INIT_LIST_HEAD(&ibdev->pgdir_list); |
| 494 | mutex_init(&ibdev->pgdir_mutex); | 495 | mutex_init(&ibdev->pgdir_mutex); |
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index cf0868f6e965..ca224d018af2 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c | |||
| @@ -284,7 +284,7 @@ void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn, | |||
| 284 | { | 284 | { |
| 285 | struct mthca_cqe *cqe; | 285 | struct mthca_cqe *cqe; |
| 286 | u32 prod_index; | 286 | u32 prod_index; |
| 287 | int nfreed = 0; | 287 | int i, nfreed = 0; |
| 288 | 288 | ||
| 289 | spin_lock_irq(&cq->lock); | 289 | spin_lock_irq(&cq->lock); |
| 290 | 290 | ||
| @@ -321,6 +321,8 @@ void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn, | |||
| 321 | } | 321 | } |
| 322 | 322 | ||
| 323 | if (nfreed) { | 323 | if (nfreed) { |
| 324 | for (i = 0; i < nfreed; ++i) | ||
| 325 | set_cqe_hw(get_cqe(cq, (cq->cons_index + i) & cq->ibcq.cqe)); | ||
| 324 | wmb(); | 326 | wmb(); |
| 325 | cq->cons_index += nfreed; | 327 | cq->cons_index += nfreed; |
| 326 | update_cons_index(dev, cq, nfreed); | 328 | update_cons_index(dev, cq, nfreed); |
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index fee60c852d14..72fabb822f1c 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c | |||
| @@ -1862,6 +1862,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, | |||
| 1862 | dev->kar + MTHCA_RECEIVE_DOORBELL, | 1862 | dev->kar + MTHCA_RECEIVE_DOORBELL, |
| 1863 | MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); | 1863 | MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); |
| 1864 | 1864 | ||
| 1865 | qp->rq.next_ind = ind; | ||
| 1865 | qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; | 1866 | qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; |
| 1866 | size0 = 0; | 1867 | size0 = 0; |
| 1867 | } | 1868 | } |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 785bc8505f2a..eec833b81e9b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c | |||
| @@ -257,10 +257,11 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even | |||
| 257 | cm_id->context = p; | 257 | cm_id->context = p; |
| 258 | p->jiffies = jiffies; | 258 | p->jiffies = jiffies; |
| 259 | spin_lock_irq(&priv->lock); | 259 | spin_lock_irq(&priv->lock); |
| 260 | if (list_empty(&priv->cm.passive_ids)) | ||
| 261 | queue_delayed_work(ipoib_workqueue, | ||
| 262 | &priv->cm.stale_task, IPOIB_CM_RX_DELAY); | ||
| 260 | list_add(&p->list, &priv->cm.passive_ids); | 263 | list_add(&p->list, &priv->cm.passive_ids); |
| 261 | spin_unlock_irq(&priv->lock); | 264 | spin_unlock_irq(&priv->lock); |
| 262 | queue_delayed_work(ipoib_workqueue, | ||
| 263 | &priv->cm.stale_task, IPOIB_CM_RX_DELAY); | ||
| 264 | return 0; | 265 | return 0; |
| 265 | 266 | ||
| 266 | err_rep: | 267 | err_rep: |
| @@ -378,8 +379,6 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) | |||
| 378 | if (!list_empty(&p->list)) | 379 | if (!list_empty(&p->list)) |
| 379 | list_move(&p->list, &priv->cm.passive_ids); | 380 | list_move(&p->list, &priv->cm.passive_ids); |
| 380 | spin_unlock_irqrestore(&priv->lock, flags); | 381 | spin_unlock_irqrestore(&priv->lock, flags); |
| 381 | queue_delayed_work(ipoib_workqueue, | ||
| 382 | &priv->cm.stale_task, IPOIB_CM_RX_DELAY); | ||
| 383 | } | 382 | } |
| 384 | } | 383 | } |
| 385 | 384 | ||
| @@ -1100,6 +1099,10 @@ static void ipoib_cm_stale_task(struct work_struct *work) | |||
| 1100 | kfree(p); | 1099 | kfree(p); |
| 1101 | spin_lock_irq(&priv->lock); | 1100 | spin_lock_irq(&priv->lock); |
| 1102 | } | 1101 | } |
| 1102 | |||
| 1103 | if (!list_empty(&priv->cm.passive_ids)) | ||
| 1104 | queue_delayed_work(ipoib_workqueue, | ||
| 1105 | &priv->cm.stale_task, IPOIB_CM_RX_DELAY); | ||
| 1103 | spin_unlock_irq(&priv->lock); | 1106 | spin_unlock_irq(&priv->lock); |
| 1104 | } | 1107 | } |
| 1105 | 1108 | ||
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index fb99cd445504..c5baa197bc08 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig | |||
| @@ -2508,6 +2508,7 @@ config MLX4_CORE | |||
| 2508 | 2508 | ||
| 2509 | config MLX4_DEBUG | 2509 | config MLX4_DEBUG |
| 2510 | bool "Verbose debugging output" if (MLX4_CORE && EMBEDDED) | 2510 | bool "Verbose debugging output" if (MLX4_CORE && EMBEDDED) |
| 2511 | depends on MLX4_CORE | ||
| 2511 | default y | 2512 | default y |
| 2512 | ---help--- | 2513 | ---help--- |
| 2513 | This option causes debugging code to be compiled into the | 2514 | This option causes debugging code to be compiled into the |
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 4debb024eaf9..20b8c0d3ced4 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c | |||
| @@ -542,8 +542,6 @@ static int __devinit mlx4_setup_hca(struct mlx4_dev *dev) | |||
| 542 | struct mlx4_priv *priv = mlx4_priv(dev); | 542 | struct mlx4_priv *priv = mlx4_priv(dev); |
| 543 | int err; | 543 | int err; |
| 544 | 544 | ||
| 545 | MLX4_INIT_DOORBELL_LOCK(&priv->doorbell_lock); | ||
| 546 | |||
| 547 | err = mlx4_init_uar_table(dev); | 545 | err = mlx4_init_uar_table(dev); |
| 548 | if (err) { | 546 | if (err) { |
| 549 | mlx4_err(dev, "Failed to initialize " | 547 | mlx4_err(dev, "Failed to initialize " |
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 9befbae3d196..3d3b6d24d8d3 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h | |||
| @@ -275,7 +275,6 @@ struct mlx4_priv { | |||
| 275 | 275 | ||
| 276 | struct mlx4_uar driver_uar; | 276 | struct mlx4_uar driver_uar; |
| 277 | void __iomem *kar; | 277 | void __iomem *kar; |
| 278 | MLX4_DECLARE_DOORBELL_LOCK(doorbell_lock) | ||
| 279 | 278 | ||
| 280 | u32 rev_id; | 279 | u32 rev_id; |
| 281 | char board_id[MLX4_BOARD_ID_LEN]; | 280 | char board_id[MLX4_BOARD_ID_LEN]; |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1aad34ea61a4..0b73cd45a06d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * fs/eventpoll.c ( Efficent event polling implementation ) | 2 | * fs/eventpoll.c (Efficent event polling implementation) |
| 3 | * Copyright (C) 2001,...,2006 Davide Libenzi | 3 | * Copyright (C) 2001,...,2007 Davide Libenzi |
| 4 | * | 4 | * |
| 5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
| 6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
| @@ -26,7 +26,6 @@ | |||
| 26 | #include <linux/hash.h> | 26 | #include <linux/hash.h> |
| 27 | #include <linux/spinlock.h> | 27 | #include <linux/spinlock.h> |
| 28 | #include <linux/syscalls.h> | 28 | #include <linux/syscalls.h> |
| 29 | #include <linux/rwsem.h> | ||
| 30 | #include <linux/rbtree.h> | 29 | #include <linux/rbtree.h> |
| 31 | #include <linux/wait.h> | 30 | #include <linux/wait.h> |
| 32 | #include <linux/eventpoll.h> | 31 | #include <linux/eventpoll.h> |
| @@ -39,15 +38,14 @@ | |||
| 39 | #include <asm/io.h> | 38 | #include <asm/io.h> |
| 40 | #include <asm/mman.h> | 39 | #include <asm/mman.h> |
| 41 | #include <asm/atomic.h> | 40 | #include <asm/atomic.h> |
| 42 | #include <asm/semaphore.h> | ||
| 43 | 41 | ||
| 44 | /* | 42 | /* |
| 45 | * LOCKING: | 43 | * LOCKING: |
| 46 | * There are three level of locking required by epoll : | 44 | * There are three level of locking required by epoll : |
| 47 | * | 45 | * |
| 48 | * 1) epmutex (mutex) | 46 | * 1) epmutex (mutex) |
| 49 | * 2) ep->sem (rw_semaphore) | 47 | * 2) ep->mtx (mutex) |
| 50 | * 3) ep->lock (rw_lock) | 48 | * 3) ep->lock (spinlock) |
| 51 | * | 49 | * |
| 52 | * The acquire order is the one listed above, from 1 to 3. | 50 | * The acquire order is the one listed above, from 1 to 3. |
| 53 | * We need a spinlock (ep->lock) because we manipulate objects | 51 | * We need a spinlock (ep->lock) because we manipulate objects |
| @@ -57,20 +55,20 @@ | |||
| 57 | * a spinlock. During the event transfer loop (from kernel to | 55 | * a spinlock. During the event transfer loop (from kernel to |
| 58 | * user space) we could end up sleeping due a copy_to_user(), so | 56 | * user space) we could end up sleeping due a copy_to_user(), so |
| 59 | * we need a lock that will allow us to sleep. This lock is a | 57 | * we need a lock that will allow us to sleep. This lock is a |
| 60 | * read-write semaphore (ep->sem). It is acquired on read during | 58 | * mutex (ep->mtx). It is acquired during the event transfer loop, |
| 61 | * the event transfer loop and in write during epoll_ctl(EPOLL_CTL_DEL) | 59 | * during epoll_ctl(EPOLL_CTL_DEL) and during eventpoll_release_file(). |
| 62 | * and during eventpoll_release_file(). Then we also need a global | 60 | * Then we also need a global mutex to serialize eventpoll_release_file() |
| 63 | * semaphore to serialize eventpoll_release_file() and ep_free(). | 61 | * and ep_free(). |
| 64 | * This semaphore is acquired by ep_free() during the epoll file | 62 | * This mutex is acquired by ep_free() during the epoll file |
| 65 | * cleanup path and it is also acquired by eventpoll_release_file() | 63 | * cleanup path and it is also acquired by eventpoll_release_file() |
| 66 | * if a file has been pushed inside an epoll set and it is then | 64 | * if a file has been pushed inside an epoll set and it is then |
| 67 | * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). | 65 | * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). |
| 68 | * It is possible to drop the "ep->sem" and to use the global | 66 | * It is possible to drop the "ep->mtx" and to use the global |
| 69 | * semaphore "epmutex" (together with "ep->lock") to have it working, | 67 | * mutex "epmutex" (together with "ep->lock") to have it working, |
| 70 | * but having "ep->sem" will make the interface more scalable. | 68 | * but having "ep->mtx" will make the interface more scalable. |
| 71 | * Events that require holding "epmutex" are very rare, while for | 69 | * Events that require holding "epmutex" are very rare, while for |
| 72 | * normal operations the epoll private "ep->sem" will guarantee | 70 | * normal operations the epoll private "ep->mtx" will guarantee |
| 73 | * a greater scalability. | 71 | * a better scalability. |
| 74 | */ | 72 | */ |
| 75 | 73 | ||
| 76 | #define DEBUG_EPOLL 0 | 74 | #define DEBUG_EPOLL 0 |
| @@ -102,6 +100,8 @@ | |||
| 102 | 100 | ||
| 103 | #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) | 101 | #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) |
| 104 | 102 | ||
| 103 | #define EP_UNACTIVE_PTR ((void *) -1L) | ||
| 104 | |||
| 105 | struct epoll_filefd { | 105 | struct epoll_filefd { |
| 106 | struct file *file; | 106 | struct file *file; |
| 107 | int fd; | 107 | int fd; |
| @@ -111,7 +111,7 @@ struct epoll_filefd { | |||
| 111 | * Node that is linked into the "wake_task_list" member of the "struct poll_safewake". | 111 | * Node that is linked into the "wake_task_list" member of the "struct poll_safewake". |
| 112 | * It is used to keep track on all tasks that are currently inside the wake_up() code | 112 | * It is used to keep track on all tasks that are currently inside the wake_up() code |
| 113 | * to 1) short-circuit the one coming from the same task and same wait queue head | 113 | * to 1) short-circuit the one coming from the same task and same wait queue head |
| 114 | * ( loop ) 2) allow a maximum number of epoll descriptors inclusion nesting | 114 | * (loop) 2) allow a maximum number of epoll descriptors inclusion nesting |
| 115 | * 3) let go the ones coming from other tasks. | 115 | * 3) let go the ones coming from other tasks. |
| 116 | */ | 116 | */ |
| 117 | struct wake_task_node { | 117 | struct wake_task_node { |
| @@ -130,21 +130,57 @@ struct poll_safewake { | |||
| 130 | }; | 130 | }; |
| 131 | 131 | ||
| 132 | /* | 132 | /* |
| 133 | * Each file descriptor added to the eventpoll interface will | ||
| 134 | * have an entry of this type linked to the "rbr" RB tree. | ||
| 135 | */ | ||
| 136 | struct epitem { | ||
| 137 | /* RB tree node used to link this structure to the eventpoll RB tree */ | ||
| 138 | struct rb_node rbn; | ||
| 139 | |||
| 140 | /* List header used to link this structure to the eventpoll ready list */ | ||
| 141 | struct list_head rdllink; | ||
| 142 | |||
| 143 | /* | ||
| 144 | * Works together "struct eventpoll"->ovflist in keeping the | ||
| 145 | * single linked chain of items. | ||
| 146 | */ | ||
| 147 | struct epitem *next; | ||
| 148 | |||
| 149 | /* The file descriptor information this item refers to */ | ||
| 150 | struct epoll_filefd ffd; | ||
| 151 | |||
| 152 | /* Number of active wait queue attached to poll operations */ | ||
| 153 | int nwait; | ||
| 154 | |||
| 155 | /* List containing poll wait queues */ | ||
| 156 | struct list_head pwqlist; | ||
| 157 | |||
| 158 | /* The "container" of this item */ | ||
| 159 | struct eventpoll *ep; | ||
| 160 | |||
| 161 | /* List header used to link this item to the "struct file" items list */ | ||
| 162 | struct list_head fllink; | ||
| 163 | |||
| 164 | /* The structure that describe the interested events and the source fd */ | ||
| 165 | struct epoll_event event; | ||
| 166 | }; | ||
| 167 | |||
| 168 | /* | ||
| 133 | * This structure is stored inside the "private_data" member of the file | 169 | * This structure is stored inside the "private_data" member of the file |
| 134 | * structure and rapresent the main data sructure for the eventpoll | 170 | * structure and rapresent the main data sructure for the eventpoll |
| 135 | * interface. | 171 | * interface. |
| 136 | */ | 172 | */ |
| 137 | struct eventpoll { | 173 | struct eventpoll { |
| 138 | /* Protect the this structure access */ | 174 | /* Protect the this structure access */ |
| 139 | rwlock_t lock; | 175 | spinlock_t lock; |
| 140 | 176 | ||
| 141 | /* | 177 | /* |
| 142 | * This semaphore is used to ensure that files are not removed | 178 | * This mutex is used to ensure that files are not removed |
| 143 | * while epoll is using them. This is read-held during the event | 179 | * while epoll is using them. This is held during the event |
| 144 | * collection loop and it is write-held during the file cleanup | 180 | * collection loop, the file cleanup path, the epoll file exit |
| 145 | * path, the epoll file exit code and the ctl operations. | 181 | * code and the ctl operations. |
| 146 | */ | 182 | */ |
| 147 | struct rw_semaphore sem; | 183 | struct mutex mtx; |
| 148 | 184 | ||
| 149 | /* Wait queue used by sys_epoll_wait() */ | 185 | /* Wait queue used by sys_epoll_wait() */ |
| 150 | wait_queue_head_t wq; | 186 | wait_queue_head_t wq; |
| @@ -155,8 +191,15 @@ struct eventpoll { | |||
| 155 | /* List of ready file descriptors */ | 191 | /* List of ready file descriptors */ |
| 156 | struct list_head rdllist; | 192 | struct list_head rdllist; |
| 157 | 193 | ||
| 158 | /* RB-Tree root used to store monitored fd structs */ | 194 | /* RB tree root used to store monitored fd structs */ |
| 159 | struct rb_root rbr; | 195 | struct rb_root rbr; |
| 196 | |||
| 197 | /* | ||
| 198 | * This is a single linked list that chains all the "struct epitem" that | ||
| 199 | * happened while transfering ready events to userspace w/out | ||
| 200 | * holding ->lock. | ||
| 201 | */ | ||
| 202 | struct epitem *ovflist; | ||
| 160 | }; | 203 | }; |
| 161 | 204 | ||
| 162 | /* Wait structure used by the poll hooks */ | 205 | /* Wait structure used by the poll hooks */ |
| @@ -177,42 +220,6 @@ struct eppoll_entry { | |||
| 177 | wait_queue_head_t *whead; | 220 | wait_queue_head_t *whead; |
| 178 | }; | 221 | }; |
| 179 | 222 | ||
| 180 | /* | ||
| 181 | * Each file descriptor added to the eventpoll interface will | ||
| 182 | * have an entry of this type linked to the "rbr" RB tree. | ||
| 183 | */ | ||
| 184 | struct epitem { | ||
| 185 | /* RB-Tree node used to link this structure to the eventpoll rb-tree */ | ||
| 186 | struct rb_node rbn; | ||
| 187 | |||
| 188 | /* List header used to link this structure to the eventpoll ready list */ | ||
| 189 | struct list_head rdllink; | ||
| 190 | |||
| 191 | /* The file descriptor information this item refers to */ | ||
| 192 | struct epoll_filefd ffd; | ||
| 193 | |||
| 194 | /* Number of active wait queue attached to poll operations */ | ||
| 195 | int nwait; | ||
| 196 | |||
| 197 | /* List containing poll wait queues */ | ||
| 198 | struct list_head pwqlist; | ||
| 199 | |||
| 200 | /* The "container" of this item */ | ||
| 201 | struct eventpoll *ep; | ||
| 202 | |||
| 203 | /* The structure that describe the interested events and the source fd */ | ||
| 204 | struct epoll_event event; | ||
| 205 | |||
| 206 | /* | ||
| 207 | * Used to keep track of the usage count of the structure. This avoids | ||
| 208 | * that the structure will desappear from underneath our processing. | ||
| 209 | */ | ||
| 210 | atomic_t usecnt; | ||
| 211 | |||
| 212 | /* List header used to link this item to the "struct file" items list */ | ||
| 213 | struct list_head fllink; | ||
| 214 | }; | ||
| 215 | |||
| 216 | /* Wrapper struct used by poll queueing */ | 223 | /* Wrapper struct used by poll queueing */ |
| 217 | struct ep_pqueue { | 224 | struct ep_pqueue { |
| 218 | poll_table pt; | 225 | poll_table pt; |
| @@ -220,7 +227,7 @@ struct ep_pqueue { | |||
| 220 | }; | 227 | }; |
| 221 | 228 | ||
| 222 | /* | 229 | /* |
| 223 | * This semaphore is used to serialize ep_free() and eventpoll_release_file(). | 230 | * This mutex is used to serialize ep_free() and eventpoll_release_file(). |
| 224 | */ | 231 | */ |
| 225 | static struct mutex epmutex; | 232 | static struct mutex epmutex; |
| 226 | 233 | ||
| @@ -234,7 +241,7 @@ static struct kmem_cache *epi_cache __read_mostly; | |||
| 234 | static struct kmem_cache *pwq_cache __read_mostly; | 241 | static struct kmem_cache *pwq_cache __read_mostly; |
| 235 | 242 | ||
| 236 | 243 | ||
| 237 | /* Setup the structure that is used as key for the rb-tree */ | 244 | /* Setup the structure that is used as key for the RB tree */ |
| 238 | static inline void ep_set_ffd(struct epoll_filefd *ffd, | 245 | static inline void ep_set_ffd(struct epoll_filefd *ffd, |
| 239 | struct file *file, int fd) | 246 | struct file *file, int fd) |
| 240 | { | 247 | { |
| @@ -242,7 +249,7 @@ static inline void ep_set_ffd(struct epoll_filefd *ffd, | |||
| 242 | ffd->fd = fd; | 249 | ffd->fd = fd; |
| 243 | } | 250 | } |
| 244 | 251 | ||
| 245 | /* Compare rb-tree keys */ | 252 | /* Compare RB tree keys */ |
| 246 | static inline int ep_cmp_ffd(struct epoll_filefd *p1, | 253 | static inline int ep_cmp_ffd(struct epoll_filefd *p1, |
| 247 | struct epoll_filefd *p2) | 254 | struct epoll_filefd *p2) |
| 248 | { | 255 | { |
| @@ -250,20 +257,20 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1, | |||
| 250 | (p1->file < p2->file ? -1 : p1->fd - p2->fd)); | 257 | (p1->file < p2->file ? -1 : p1->fd - p2->fd)); |
| 251 | } | 258 | } |
| 252 | 259 | ||
| 253 | /* Special initialization for the rb-tree node to detect linkage */ | 260 | /* Special initialization for the RB tree node to detect linkage */ |
| 254 | static inline void ep_rb_initnode(struct rb_node *n) | 261 | static inline void ep_rb_initnode(struct rb_node *n) |
| 255 | { | 262 | { |
| 256 | rb_set_parent(n, n); | 263 | rb_set_parent(n, n); |
| 257 | } | 264 | } |
| 258 | 265 | ||
| 259 | /* Removes a node from the rb-tree and marks it for a fast is-linked check */ | 266 | /* Removes a node from the RB tree and marks it for a fast is-linked check */ |
| 260 | static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) | 267 | static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) |
| 261 | { | 268 | { |
| 262 | rb_erase(n, r); | 269 | rb_erase(n, r); |
| 263 | rb_set_parent(n, n); | 270 | rb_set_parent(n, n); |
| 264 | } | 271 | } |
| 265 | 272 | ||
| 266 | /* Fast check to verify that the item is linked to the main rb-tree */ | 273 | /* Fast check to verify that the item is linked to the main RB tree */ |
| 267 | static inline int ep_rb_linked(struct rb_node *n) | 274 | static inline int ep_rb_linked(struct rb_node *n) |
| 268 | { | 275 | { |
| 269 | return rb_parent(n) != n; | 276 | return rb_parent(n) != n; |
| @@ -381,78 +388,11 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) | |||
| 381 | } | 388 | } |
| 382 | 389 | ||
| 383 | /* | 390 | /* |
| 384 | * Unlink the "struct epitem" from all places it might have been hooked up. | ||
| 385 | * This function must be called with write IRQ lock on "ep->lock". | ||
| 386 | */ | ||
| 387 | static int ep_unlink(struct eventpoll *ep, struct epitem *epi) | ||
| 388 | { | ||
| 389 | int error; | ||
| 390 | |||
| 391 | /* | ||
| 392 | * It can happen that this one is called for an item already unlinked. | ||
| 393 | * The check protect us from doing a double unlink ( crash ). | ||
| 394 | */ | ||
| 395 | error = -ENOENT; | ||
| 396 | if (!ep_rb_linked(&epi->rbn)) | ||
| 397 | goto error_return; | ||
| 398 | |||
| 399 | /* | ||
| 400 | * Clear the event mask for the unlinked item. This will avoid item | ||
| 401 | * notifications to be sent after the unlink operation from inside | ||
| 402 | * the kernel->userspace event transfer loop. | ||
| 403 | */ | ||
| 404 | epi->event.events = 0; | ||
| 405 | |||
| 406 | /* | ||
| 407 | * At this point is safe to do the job, unlink the item from our rb-tree. | ||
| 408 | * This operation togheter with the above check closes the door to | ||
| 409 | * double unlinks. | ||
| 410 | */ | ||
| 411 | ep_rb_erase(&epi->rbn, &ep->rbr); | ||
| 412 | |||
| 413 | /* | ||
| 414 | * If the item we are going to remove is inside the ready file descriptors | ||
| 415 | * we want to remove it from this list to avoid stale events. | ||
| 416 | */ | ||
| 417 | if (ep_is_linked(&epi->rdllink)) | ||
| 418 | list_del_init(&epi->rdllink); | ||
| 419 | |||
| 420 | error = 0; | ||
| 421 | error_return: | ||
| 422 | |||
| 423 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n", | ||
| 424 | current, ep, epi->ffd.file, error)); | ||
| 425 | |||
| 426 | return error; | ||
| 427 | } | ||
| 428 | |||
| 429 | /* | ||
| 430 | * Increment the usage count of the "struct epitem" making it sure | ||
| 431 | * that the user will have a valid pointer to reference. | ||
| 432 | */ | ||
| 433 | static void ep_use_epitem(struct epitem *epi) | ||
| 434 | { | ||
| 435 | atomic_inc(&epi->usecnt); | ||
| 436 | } | ||
| 437 | |||
| 438 | /* | ||
| 439 | * Decrement ( release ) the usage count by signaling that the user | ||
| 440 | * has finished using the structure. It might lead to freeing the | ||
| 441 | * structure itself if the count goes to zero. | ||
| 442 | */ | ||
| 443 | static void ep_release_epitem(struct epitem *epi) | ||
| 444 | { | ||
| 445 | if (atomic_dec_and_test(&epi->usecnt)) | ||
| 446 | kmem_cache_free(epi_cache, epi); | ||
| 447 | } | ||
| 448 | |||
| 449 | /* | ||
| 450 | * Removes a "struct epitem" from the eventpoll RB tree and deallocates | 391 | * Removes a "struct epitem" from the eventpoll RB tree and deallocates |
| 451 | * all the associated resources. | 392 | * all the associated resources. Must be called with "mtx" held. |
| 452 | */ | 393 | */ |
| 453 | static int ep_remove(struct eventpoll *ep, struct epitem *epi) | 394 | static int ep_remove(struct eventpoll *ep, struct epitem *epi) |
| 454 | { | 395 | { |
| 455 | int error; | ||
| 456 | unsigned long flags; | 396 | unsigned long flags; |
| 457 | struct file *file = epi->ffd.file; | 397 | struct file *file = epi->ffd.file; |
| 458 | 398 | ||
| @@ -472,26 +412,21 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) | |||
| 472 | list_del_init(&epi->fllink); | 412 | list_del_init(&epi->fllink); |
| 473 | spin_unlock(&file->f_ep_lock); | 413 | spin_unlock(&file->f_ep_lock); |
| 474 | 414 | ||
| 475 | /* We need to acquire the write IRQ lock before calling ep_unlink() */ | 415 | if (ep_rb_linked(&epi->rbn)) |
| 476 | write_lock_irqsave(&ep->lock, flags); | 416 | ep_rb_erase(&epi->rbn, &ep->rbr); |
| 477 | |||
| 478 | /* Really unlink the item from the RB tree */ | ||
| 479 | error = ep_unlink(ep, epi); | ||
| 480 | |||
| 481 | write_unlock_irqrestore(&ep->lock, flags); | ||
| 482 | 417 | ||
| 483 | if (error) | 418 | spin_lock_irqsave(&ep->lock, flags); |
| 484 | goto error_return; | 419 | if (ep_is_linked(&epi->rdllink)) |
| 420 | list_del_init(&epi->rdllink); | ||
| 421 | spin_unlock_irqrestore(&ep->lock, flags); | ||
| 485 | 422 | ||
| 486 | /* At this point it is safe to free the eventpoll item */ | 423 | /* At this point it is safe to free the eventpoll item */ |
| 487 | ep_release_epitem(epi); | 424 | kmem_cache_free(epi_cache, epi); |
| 488 | 425 | ||
| 489 | error = 0; | 426 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n", |
| 490 | error_return: | 427 | current, ep, file)); |
| 491 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p) = %d\n", | ||
| 492 | current, ep, file, error)); | ||
| 493 | 428 | ||
| 494 | return error; | 429 | return 0; |
| 495 | } | 430 | } |
| 496 | 431 | ||
| 497 | static void ep_free(struct eventpoll *ep) | 432 | static void ep_free(struct eventpoll *ep) |
| @@ -506,7 +441,7 @@ static void ep_free(struct eventpoll *ep) | |||
| 506 | /* | 441 | /* |
| 507 | * We need to lock this because we could be hit by | 442 | * We need to lock this because we could be hit by |
| 508 | * eventpoll_release_file() while we're freeing the "struct eventpoll". | 443 | * eventpoll_release_file() while we're freeing the "struct eventpoll". |
| 509 | * We do not need to hold "ep->sem" here because the epoll file | 444 | * We do not need to hold "ep->mtx" here because the epoll file |
| 510 | * is on the way to be removed and no one has references to it | 445 | * is on the way to be removed and no one has references to it |
| 511 | * anymore. The only hit might come from eventpoll_release_file() but | 446 | * anymore. The only hit might come from eventpoll_release_file() but |
| 512 | * holding "epmutex" is sufficent here. | 447 | * holding "epmutex" is sufficent here. |
| @@ -525,7 +460,7 @@ static void ep_free(struct eventpoll *ep) | |||
| 525 | /* | 460 | /* |
| 526 | * Walks through the whole tree by freeing each "struct epitem". At this | 461 | * Walks through the whole tree by freeing each "struct epitem". At this |
| 527 | * point we are sure no poll callbacks will be lingering around, and also by | 462 | * point we are sure no poll callbacks will be lingering around, and also by |
| 528 | * write-holding "sem" we can be sure that no file cleanup code will hit | 463 | * holding "epmutex" we can be sure that no file cleanup code will hit |
| 529 | * us during this operation. So we can avoid the lock on "ep->lock". | 464 | * us during this operation. So we can avoid the lock on "ep->lock". |
| 530 | */ | 465 | */ |
| 531 | while ((rbp = rb_first(&ep->rbr)) != 0) { | 466 | while ((rbp = rb_first(&ep->rbr)) != 0) { |
| @@ -534,16 +469,16 @@ static void ep_free(struct eventpoll *ep) | |||
| 534 | } | 469 | } |
| 535 | 470 | ||
| 536 | mutex_unlock(&epmutex); | 471 | mutex_unlock(&epmutex); |
| 472 | mutex_destroy(&ep->mtx); | ||
| 473 | kfree(ep); | ||
| 537 | } | 474 | } |
| 538 | 475 | ||
| 539 | static int ep_eventpoll_release(struct inode *inode, struct file *file) | 476 | static int ep_eventpoll_release(struct inode *inode, struct file *file) |
| 540 | { | 477 | { |
| 541 | struct eventpoll *ep = file->private_data; | 478 | struct eventpoll *ep = file->private_data; |
| 542 | 479 | ||
| 543 | if (ep) { | 480 | if (ep) |
| 544 | ep_free(ep); | 481 | ep_free(ep); |
| 545 | kfree(ep); | ||
| 546 | } | ||
| 547 | 482 | ||
| 548 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep)); | 483 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep)); |
| 549 | return 0; | 484 | return 0; |
| @@ -559,10 +494,10 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) | |||
| 559 | poll_wait(file, &ep->poll_wait, wait); | 494 | poll_wait(file, &ep->poll_wait, wait); |
| 560 | 495 | ||
| 561 | /* Check our condition */ | 496 | /* Check our condition */ |
| 562 | read_lock_irqsave(&ep->lock, flags); | 497 | spin_lock_irqsave(&ep->lock, flags); |
| 563 | if (!list_empty(&ep->rdllist)) | 498 | if (!list_empty(&ep->rdllist)) |
| 564 | pollflags = POLLIN | POLLRDNORM; | 499 | pollflags = POLLIN | POLLRDNORM; |
| 565 | read_unlock_irqrestore(&ep->lock, flags); | 500 | spin_unlock_irqrestore(&ep->lock, flags); |
| 566 | 501 | ||
| 567 | return pollflags; | 502 | return pollflags; |
| 568 | } | 503 | } |
| @@ -594,9 +529,11 @@ void eventpoll_release_file(struct file *file) | |||
| 594 | * We don't want to get "file->f_ep_lock" because it is not | 529 | * We don't want to get "file->f_ep_lock" because it is not |
| 595 | * necessary. It is not necessary because we're in the "struct file" | 530 | * necessary. It is not necessary because we're in the "struct file" |
| 596 | * cleanup path, and this means that noone is using this file anymore. | 531 | * cleanup path, and this means that noone is using this file anymore. |
| 597 | * The only hit might come from ep_free() but by holding the semaphore | 532 | * So, for example, epoll_ctl() cannot hit here sicne if we reach this |
| 533 | * point, the file counter already went to zero and fget() would fail. | ||
| 534 | * The only hit might come from ep_free() but by holding the mutex | ||
| 598 | * will correctly serialize the operation. We do need to acquire | 535 | * will correctly serialize the operation. We do need to acquire |
| 599 | * "ep->sem" after "epmutex" because ep_remove() requires it when called | 536 | * "ep->mtx" after "epmutex" because ep_remove() requires it when called |
| 600 | * from anywhere but ep_free(). | 537 | * from anywhere but ep_free(). |
| 601 | */ | 538 | */ |
| 602 | mutex_lock(&epmutex); | 539 | mutex_lock(&epmutex); |
| @@ -606,9 +543,9 @@ void eventpoll_release_file(struct file *file) | |||
| 606 | 543 | ||
| 607 | ep = epi->ep; | 544 | ep = epi->ep; |
| 608 | list_del_init(&epi->fllink); | 545 | list_del_init(&epi->fllink); |
| 609 | down_write(&ep->sem); | 546 | mutex_lock(&ep->mtx); |
| 610 | ep_remove(ep, epi); | 547 | ep_remove(ep, epi); |
| 611 | up_write(&ep->sem); | 548 | mutex_unlock(&ep->mtx); |
| 612 | } | 549 | } |
| 613 | 550 | ||
| 614 | mutex_unlock(&epmutex); | 551 | mutex_unlock(&epmutex); |
| @@ -621,12 +558,13 @@ static int ep_alloc(struct eventpoll **pep) | |||
| 621 | if (!ep) | 558 | if (!ep) |
| 622 | return -ENOMEM; | 559 | return -ENOMEM; |
| 623 | 560 | ||
| 624 | rwlock_init(&ep->lock); | 561 | spin_lock_init(&ep->lock); |
| 625 | init_rwsem(&ep->sem); | 562 | mutex_init(&ep->mtx); |
| 626 | init_waitqueue_head(&ep->wq); | 563 | init_waitqueue_head(&ep->wq); |
| 627 | init_waitqueue_head(&ep->poll_wait); | 564 | init_waitqueue_head(&ep->poll_wait); |
| 628 | INIT_LIST_HEAD(&ep->rdllist); | 565 | INIT_LIST_HEAD(&ep->rdllist); |
| 629 | ep->rbr = RB_ROOT; | 566 | ep->rbr = RB_ROOT; |
| 567 | ep->ovflist = EP_UNACTIVE_PTR; | ||
| 630 | 568 | ||
| 631 | *pep = ep; | 569 | *pep = ep; |
| 632 | 570 | ||
| @@ -636,20 +574,18 @@ static int ep_alloc(struct eventpoll **pep) | |||
| 636 | } | 574 | } |
| 637 | 575 | ||
| 638 | /* | 576 | /* |
| 639 | * Search the file inside the eventpoll tree. It add usage count to | 577 | * Search the file inside the eventpoll tree. The RB tree operations |
| 640 | * the returned item, so the caller must call ep_release_epitem() | 578 | * are protected by the "mtx" mutex, and ep_find() must be called with |
| 641 | * after finished using the "struct epitem". | 579 | * "mtx" held. |
| 642 | */ | 580 | */ |
| 643 | static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) | 581 | static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) |
| 644 | { | 582 | { |
| 645 | int kcmp; | 583 | int kcmp; |
| 646 | unsigned long flags; | ||
| 647 | struct rb_node *rbp; | 584 | struct rb_node *rbp; |
| 648 | struct epitem *epi, *epir = NULL; | 585 | struct epitem *epi, *epir = NULL; |
| 649 | struct epoll_filefd ffd; | 586 | struct epoll_filefd ffd; |
| 650 | 587 | ||
| 651 | ep_set_ffd(&ffd, file, fd); | 588 | ep_set_ffd(&ffd, file, fd); |
| 652 | read_lock_irqsave(&ep->lock, flags); | ||
| 653 | for (rbp = ep->rbr.rb_node; rbp; ) { | 589 | for (rbp = ep->rbr.rb_node; rbp; ) { |
| 654 | epi = rb_entry(rbp, struct epitem, rbn); | 590 | epi = rb_entry(rbp, struct epitem, rbn); |
| 655 | kcmp = ep_cmp_ffd(&ffd, &epi->ffd); | 591 | kcmp = ep_cmp_ffd(&ffd, &epi->ffd); |
| @@ -658,12 +594,10 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) | |||
| 658 | else if (kcmp < 0) | 594 | else if (kcmp < 0) |
| 659 | rbp = rbp->rb_left; | 595 | rbp = rbp->rb_left; |
| 660 | else { | 596 | else { |
| 661 | ep_use_epitem(epi); | ||
| 662 | epir = epi; | 597 | epir = epi; |
| 663 | break; | 598 | break; |
| 664 | } | 599 | } |
| 665 | } | 600 | } |
| 666 | read_unlock_irqrestore(&ep->lock, flags); | ||
| 667 | 601 | ||
| 668 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n", | 602 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n", |
| 669 | current, file, epir)); | 603 | current, file, epir)); |
| @@ -686,7 +620,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k | |||
| 686 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", | 620 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", |
| 687 | current, epi->ffd.file, epi, ep)); | 621 | current, epi->ffd.file, epi, ep)); |
| 688 | 622 | ||
| 689 | write_lock_irqsave(&ep->lock, flags); | 623 | spin_lock_irqsave(&ep->lock, flags); |
| 690 | 624 | ||
| 691 | /* | 625 | /* |
| 692 | * If the event mask does not contain any poll(2) event, we consider the | 626 | * If the event mask does not contain any poll(2) event, we consider the |
| @@ -695,7 +629,21 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k | |||
| 695 | * until the next EPOLL_CTL_MOD will be issued. | 629 | * until the next EPOLL_CTL_MOD will be issued. |
| 696 | */ | 630 | */ |
| 697 | if (!(epi->event.events & ~EP_PRIVATE_BITS)) | 631 | if (!(epi->event.events & ~EP_PRIVATE_BITS)) |
| 698 | goto is_disabled; | 632 | goto out_unlock; |
| 633 | |||
| 634 | /* | ||
| 635 | * If we are trasfering events to userspace, we can hold no locks | ||
| 636 | * (because we're accessing user memory, and because of linux f_op->poll() | ||
| 637 | * semantics). All the events that happens during that period of time are | ||
| 638 | * chained in ep->ovflist and requeued later on. | ||
| 639 | */ | ||
| 640 | if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) { | ||
| 641 | if (epi->next == EP_UNACTIVE_PTR) { | ||
| 642 | epi->next = ep->ovflist; | ||
| 643 | ep->ovflist = epi; | ||
| 644 | } | ||
| 645 | goto out_unlock; | ||
| 646 | } | ||
| 699 | 647 | ||
| 700 | /* If this file is already in the ready list we exit soon */ | 648 | /* If this file is already in the ready list we exit soon */ |
| 701 | if (ep_is_linked(&epi->rdllink)) | 649 | if (ep_is_linked(&epi->rdllink)) |
| @@ -714,8 +662,8 @@ is_linked: | |||
| 714 | if (waitqueue_active(&ep->poll_wait)) | 662 | if (waitqueue_active(&ep->poll_wait)) |
| 715 | pwake++; | 663 | pwake++; |
| 716 | 664 | ||
| 717 | is_disabled: | 665 | out_unlock: |
| 718 | write_unlock_irqrestore(&ep->lock, flags); | 666 | spin_unlock_irqrestore(&ep->lock, flags); |
| 719 | 667 | ||
| 720 | /* We have to call this outside the lock */ | 668 | /* We have to call this outside the lock */ |
| 721 | if (pwake) | 669 | if (pwake) |
| @@ -766,6 +714,9 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) | |||
| 766 | rb_insert_color(&epi->rbn, &ep->rbr); | 714 | rb_insert_color(&epi->rbn, &ep->rbr); |
| 767 | } | 715 | } |
| 768 | 716 | ||
| 717 | /* | ||
| 718 | * Must be called with "mtx" held. | ||
| 719 | */ | ||
| 769 | static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | 720 | static int ep_insert(struct eventpoll *ep, struct epoll_event *event, |
| 770 | struct file *tfile, int fd) | 721 | struct file *tfile, int fd) |
| 771 | { | 722 | { |
| @@ -786,8 +737,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
| 786 | epi->ep = ep; | 737 | epi->ep = ep; |
| 787 | ep_set_ffd(&epi->ffd, tfile, fd); | 738 | ep_set_ffd(&epi->ffd, tfile, fd); |
| 788 | epi->event = *event; | 739 | epi->event = *event; |
| 789 | atomic_set(&epi->usecnt, 1); | ||
| 790 | epi->nwait = 0; | 740 | epi->nwait = 0; |
| 741 | epi->next = EP_UNACTIVE_PTR; | ||
| 791 | 742 | ||
| 792 | /* Initialize the poll table using the queue callback */ | 743 | /* Initialize the poll table using the queue callback */ |
| 793 | epq.epi = epi; | 744 | epq.epi = epi; |
| @@ -796,7 +747,9 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
| 796 | /* | 747 | /* |
| 797 | * Attach the item to the poll hooks and get current event bits. | 748 | * Attach the item to the poll hooks and get current event bits. |
| 798 | * We can safely use the file* here because its usage count has | 749 | * We can safely use the file* here because its usage count has |
| 799 | * been increased by the caller of this function. | 750 | * been increased by the caller of this function. Note that after |
| 751 | * this operation completes, the poll callback can start hitting | ||
| 752 | * the new item. | ||
| 800 | */ | 753 | */ |
| 801 | revents = tfile->f_op->poll(tfile, &epq.pt); | 754 | revents = tfile->f_op->poll(tfile, &epq.pt); |
| 802 | 755 | ||
| @@ -813,12 +766,15 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
| 813 | list_add_tail(&epi->fllink, &tfile->f_ep_links); | 766 | list_add_tail(&epi->fllink, &tfile->f_ep_links); |
| 814 | spin_unlock(&tfile->f_ep_lock); | 767 | spin_unlock(&tfile->f_ep_lock); |
| 815 | 768 | ||
| 816 | /* We have to drop the new item inside our item list to keep track of it */ | 769 | /* |
| 817 | write_lock_irqsave(&ep->lock, flags); | 770 | * Add the current item to the RB tree. All RB tree operations are |
| 818 | 771 | * protected by "mtx", and ep_insert() is called with "mtx" held. | |
| 819 | /* Add the current item to the rb-tree */ | 772 | */ |
| 820 | ep_rbtree_insert(ep, epi); | 773 | ep_rbtree_insert(ep, epi); |
| 821 | 774 | ||
| 775 | /* We have to drop the new item inside our item list to keep track of it */ | ||
| 776 | spin_lock_irqsave(&ep->lock, flags); | ||
| 777 | |||
| 822 | /* If the file is already "ready" we drop it inside the ready list */ | 778 | /* If the file is already "ready" we drop it inside the ready list */ |
| 823 | if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { | 779 | if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { |
| 824 | list_add_tail(&epi->rdllink, &ep->rdllist); | 780 | list_add_tail(&epi->rdllink, &ep->rdllist); |
| @@ -830,7 +786,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
| 830 | pwake++; | 786 | pwake++; |
| 831 | } | 787 | } |
| 832 | 788 | ||
| 833 | write_unlock_irqrestore(&ep->lock, flags); | 789 | spin_unlock_irqrestore(&ep->lock, flags); |
| 834 | 790 | ||
| 835 | /* We have to call this outside the lock */ | 791 | /* We have to call this outside the lock */ |
| 836 | if (pwake) | 792 | if (pwake) |
| @@ -846,12 +802,14 @@ error_unregister: | |||
| 846 | 802 | ||
| 847 | /* | 803 | /* |
| 848 | * We need to do this because an event could have been arrived on some | 804 | * We need to do this because an event could have been arrived on some |
| 849 | * allocated wait queue. | 805 | * allocated wait queue. Note that we don't care about the ep->ovflist |
| 806 | * list, since that is used/cleaned only inside a section bound by "mtx". | ||
| 807 | * And ep_insert() is called with "mtx" held. | ||
| 850 | */ | 808 | */ |
| 851 | write_lock_irqsave(&ep->lock, flags); | 809 | spin_lock_irqsave(&ep->lock, flags); |
| 852 | if (ep_is_linked(&epi->rdllink)) | 810 | if (ep_is_linked(&epi->rdllink)) |
| 853 | list_del_init(&epi->rdllink); | 811 | list_del_init(&epi->rdllink); |
| 854 | write_unlock_irqrestore(&ep->lock, flags); | 812 | spin_unlock_irqrestore(&ep->lock, flags); |
| 855 | 813 | ||
| 856 | kmem_cache_free(epi_cache, epi); | 814 | kmem_cache_free(epi_cache, epi); |
| 857 | error_return: | 815 | error_return: |
| @@ -860,7 +818,7 @@ error_return: | |||
| 860 | 818 | ||
| 861 | /* | 819 | /* |
| 862 | * Modify the interest event mask by dropping an event if the new mask | 820 | * Modify the interest event mask by dropping an event if the new mask |
| 863 | * has a match in the current file status. | 821 | * has a match in the current file status. Must be called with "mtx" held. |
| 864 | */ | 822 | */ |
| 865 | static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event) | 823 | static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event) |
| 866 | { | 824 | { |
| @@ -882,36 +840,28 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
| 882 | */ | 840 | */ |
| 883 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); | 841 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); |
| 884 | 842 | ||
| 885 | write_lock_irqsave(&ep->lock, flags); | 843 | spin_lock_irqsave(&ep->lock, flags); |
| 886 | 844 | ||
| 887 | /* Copy the data member from inside the lock */ | 845 | /* Copy the data member from inside the lock */ |
| 888 | epi->event.data = event->data; | 846 | epi->event.data = event->data; |
| 889 | 847 | ||
| 890 | /* | 848 | /* |
| 891 | * If the item is not linked to the RB tree it means that it's on its | 849 | * If the item is "hot" and it is not registered inside the ready |
| 892 | * way toward the removal. Do nothing in this case. | 850 | * list, push it inside. |
| 893 | */ | 851 | */ |
| 894 | if (ep_rb_linked(&epi->rbn)) { | 852 | if (revents & event->events) { |
| 895 | /* | 853 | if (!ep_is_linked(&epi->rdllink)) { |
| 896 | * If the item is "hot" and it is not registered inside the ready | 854 | list_add_tail(&epi->rdllink, &ep->rdllist); |
| 897 | * list, push it inside. If the item is not "hot" and it is currently | 855 | |
| 898 | * registered inside the ready list, unlink it. | 856 | /* Notify waiting tasks that events are available */ |
| 899 | */ | 857 | if (waitqueue_active(&ep->wq)) |
| 900 | if (revents & event->events) { | 858 | __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | |
| 901 | if (!ep_is_linked(&epi->rdllink)) { | 859 | TASK_INTERRUPTIBLE); |
| 902 | list_add_tail(&epi->rdllink, &ep->rdllist); | 860 | if (waitqueue_active(&ep->poll_wait)) |
| 903 | 861 | pwake++; | |
| 904 | /* Notify waiting tasks that events are available */ | ||
| 905 | if (waitqueue_active(&ep->wq)) | ||
| 906 | __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | | ||
| 907 | TASK_INTERRUPTIBLE); | ||
| 908 | if (waitqueue_active(&ep->poll_wait)) | ||
| 909 | pwake++; | ||
| 910 | } | ||
| 911 | } | 862 | } |
| 912 | } | 863 | } |
| 913 | 864 | spin_unlock_irqrestore(&ep->lock, flags); | |
| 914 | write_unlock_irqrestore(&ep->lock, flags); | ||
| 915 | 865 | ||
| 916 | /* We have to call this outside the lock */ | 866 | /* We have to call this outside the lock */ |
| 917 | if (pwake) | 867 | if (pwake) |
| @@ -920,36 +870,50 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
| 920 | return 0; | 870 | return 0; |
| 921 | } | 871 | } |
| 922 | 872 | ||
| 923 | /* | 873 | static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, |
| 924 | * This function is called without holding the "ep->lock" since the call to | 874 | int maxevents) |
| 925 | * __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ | ||
| 926 | * because of the way poll() is traditionally implemented in Linux. | ||
| 927 | */ | ||
| 928 | static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, | ||
| 929 | struct epoll_event __user *events, int maxevents) | ||
| 930 | { | 875 | { |
| 931 | int eventcnt, error = -EFAULT, pwake = 0; | 876 | int eventcnt, error = -EFAULT, pwake = 0; |
| 932 | unsigned int revents; | 877 | unsigned int revents; |
| 933 | unsigned long flags; | 878 | unsigned long flags; |
| 934 | struct epitem *epi; | 879 | struct epitem *epi, *nepi; |
| 935 | struct list_head injlist; | 880 | struct list_head txlist; |
| 881 | |||
| 882 | INIT_LIST_HEAD(&txlist); | ||
| 883 | |||
| 884 | /* | ||
| 885 | * We need to lock this because we could be hit by | ||
| 886 | * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL). | ||
| 887 | */ | ||
| 888 | mutex_lock(&ep->mtx); | ||
| 936 | 889 | ||
| 937 | INIT_LIST_HEAD(&injlist); | 890 | /* |
| 891 | * Steal the ready list, and re-init the original one to the | ||
| 892 | * empty list. Also, set ep->ovflist to NULL so that events | ||
| 893 | * happening while looping w/out locks, are not lost. We cannot | ||
| 894 | * have the poll callback to queue directly on ep->rdllist, | ||
| 895 | * because we are doing it in the loop below, in a lockless way. | ||
| 896 | */ | ||
| 897 | spin_lock_irqsave(&ep->lock, flags); | ||
| 898 | list_splice(&ep->rdllist, &txlist); | ||
| 899 | INIT_LIST_HEAD(&ep->rdllist); | ||
| 900 | ep->ovflist = NULL; | ||
| 901 | spin_unlock_irqrestore(&ep->lock, flags); | ||
| 938 | 902 | ||
| 939 | /* | 903 | /* |
| 940 | * We can loop without lock because this is a task private list. | 904 | * We can loop without lock because this is a task private list. |
| 941 | * We just splice'd out the ep->rdllist in ep_collect_ready_items(). | 905 | * We just splice'd out the ep->rdllist in ep_collect_ready_items(). |
| 942 | * Items cannot vanish during the loop because we are holding "sem" in | 906 | * Items cannot vanish during the loop because we are holding "mtx". |
| 943 | * read. | ||
| 944 | */ | 907 | */ |
| 945 | for (eventcnt = 0; !list_empty(txlist) && eventcnt < maxevents;) { | 908 | for (eventcnt = 0; !list_empty(&txlist) && eventcnt < maxevents;) { |
| 946 | epi = list_first_entry(txlist, struct epitem, rdllink); | 909 | epi = list_first_entry(&txlist, struct epitem, rdllink); |
| 947 | prefetch(epi->rdllink.next); | 910 | |
| 911 | list_del_init(&epi->rdllink); | ||
| 948 | 912 | ||
| 949 | /* | 913 | /* |
| 950 | * Get the ready file event set. We can safely use the file | 914 | * Get the ready file event set. We can safely use the file |
| 951 | * because we are holding the "sem" in read and this will | 915 | * because we are holding the "mtx" and this will guarantee |
| 952 | * guarantee that both the file and the item will not vanish. | 916 | * that both the file and the item will not vanish. |
| 953 | */ | 917 | */ |
| 954 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); | 918 | revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); |
| 955 | revents &= epi->event.events; | 919 | revents &= epi->event.events; |
| @@ -957,8 +921,8 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, | |||
| 957 | /* | 921 | /* |
| 958 | * Is the event mask intersect the caller-requested one, | 922 | * Is the event mask intersect the caller-requested one, |
| 959 | * deliver the event to userspace. Again, we are holding | 923 | * deliver the event to userspace. Again, we are holding |
| 960 | * "sem" in read, so no operations coming from userspace | 924 | * "mtx", so no operations coming from userspace can change |
| 961 | * can change the item. | 925 | * the item. |
| 962 | */ | 926 | */ |
| 963 | if (revents) { | 927 | if (revents) { |
| 964 | if (__put_user(revents, | 928 | if (__put_user(revents, |
| @@ -970,59 +934,59 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, | |||
| 970 | epi->event.events &= EP_PRIVATE_BITS; | 934 | epi->event.events &= EP_PRIVATE_BITS; |
| 971 | eventcnt++; | 935 | eventcnt++; |
| 972 | } | 936 | } |
| 973 | |||
| 974 | /* | 937 | /* |
| 975 | * This is tricky. We are holding the "sem" in read, and this | 938 | * At this point, noone can insert into ep->rdllist besides |
| 976 | * means that the operations that can change the "linked" status | 939 | * us. The epoll_ctl() callers are locked out by us holding |
| 977 | * of the epoll item (epi->rbn and epi->rdllink), cannot touch | 940 | * "mtx" and the poll callback will queue them in ep->ovflist. |
| 978 | * them. Also, since we are "linked" from a epi->rdllink POV | ||
| 979 | * (the item is linked to our transmission list we just | ||
| 980 | * spliced), the ep_poll_callback() cannot touch us either, | ||
| 981 | * because of the check present in there. Another parallel | ||
| 982 | * epoll_wait() will not get the same result set, since we | ||
| 983 | * spliced the ready list before. Note that list_del() still | ||
| 984 | * shows the item as linked to the test in ep_poll_callback(). | ||
| 985 | */ | 941 | */ |
| 986 | list_del(&epi->rdllink); | ||
| 987 | if (!(epi->event.events & EPOLLET) && | 942 | if (!(epi->event.events & EPOLLET) && |
| 988 | (revents & epi->event.events)) | 943 | (revents & epi->event.events)) |
| 989 | list_add_tail(&epi->rdllink, &injlist); | 944 | list_add_tail(&epi->rdllink, &ep->rdllist); |
| 990 | else { | ||
| 991 | /* | ||
| 992 | * Be sure the item is totally detached before re-init | ||
| 993 | * the list_head. After INIT_LIST_HEAD() is committed, | ||
| 994 | * the ep_poll_callback() can requeue the item again, | ||
| 995 | * but we don't care since we are already past it. | ||
| 996 | */ | ||
| 997 | smp_mb(); | ||
| 998 | INIT_LIST_HEAD(&epi->rdllink); | ||
| 999 | } | ||
| 1000 | } | 945 | } |
| 1001 | error = 0; | 946 | error = 0; |
| 1002 | 947 | ||
| 1003 | errxit: | 948 | errxit: |
| 1004 | 949 | ||
| 950 | spin_lock_irqsave(&ep->lock, flags); | ||
| 1005 | /* | 951 | /* |
| 1006 | * If the re-injection list or the txlist are not empty, re-splice | 952 | * During the time we spent in the loop above, some other events |
| 1007 | * them to the ready list and do proper wakeups. | 953 | * might have been queued by the poll callback. We re-insert them |
| 954 | * here (in case they are not already queued, or they're one-shot). | ||
| 1008 | */ | 955 | */ |
| 1009 | if (!list_empty(&injlist) || !list_empty(txlist)) { | 956 | for (nepi = ep->ovflist; (epi = nepi) != NULL; |
| 1010 | write_lock_irqsave(&ep->lock, flags); | 957 | nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { |
| 958 | if (!ep_is_linked(&epi->rdllink) && | ||
| 959 | (epi->event.events & ~EP_PRIVATE_BITS)) | ||
| 960 | list_add_tail(&epi->rdllink, &ep->rdllist); | ||
| 961 | } | ||
| 962 | /* | ||
| 963 | * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after | ||
| 964 | * releasing the lock, events will be queued in the normal way inside | ||
| 965 | * ep->rdllist. | ||
| 966 | */ | ||
| 967 | ep->ovflist = EP_UNACTIVE_PTR; | ||
| 968 | |||
| 969 | /* | ||
| 970 | * In case of error in the event-send loop, or in case the number of | ||
| 971 | * ready events exceeds the userspace limit, we need to splice the | ||
| 972 | * "txlist" back inside ep->rdllist. | ||
| 973 | */ | ||
| 974 | list_splice(&txlist, &ep->rdllist); | ||
| 1011 | 975 | ||
| 1012 | list_splice(txlist, &ep->rdllist); | 976 | if (!list_empty(&ep->rdllist)) { |
| 1013 | list_splice(&injlist, &ep->rdllist); | ||
| 1014 | /* | 977 | /* |
| 1015 | * Wake up ( if active ) both the eventpoll wait list and the ->poll() | 978 | * Wake up (if active) both the eventpoll wait list and the ->poll() |
| 1016 | * wait list. | 979 | * wait list (delayed after we release the lock). |
| 1017 | */ | 980 | */ |
| 1018 | if (waitqueue_active(&ep->wq)) | 981 | if (waitqueue_active(&ep->wq)) |
| 1019 | __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | | 982 | __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | |
| 1020 | TASK_INTERRUPTIBLE); | 983 | TASK_INTERRUPTIBLE); |
| 1021 | if (waitqueue_active(&ep->poll_wait)) | 984 | if (waitqueue_active(&ep->poll_wait)) |
| 1022 | pwake++; | 985 | pwake++; |
| 1023 | |||
| 1024 | write_unlock_irqrestore(&ep->lock, flags); | ||
| 1025 | } | 986 | } |
| 987 | spin_unlock_irqrestore(&ep->lock, flags); | ||
| 988 | |||
| 989 | mutex_unlock(&ep->mtx); | ||
| 1026 | 990 | ||
| 1027 | /* We have to call this outside the lock */ | 991 | /* We have to call this outside the lock */ |
| 1028 | if (pwake) | 992 | if (pwake) |
| @@ -1031,41 +995,6 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, | |||
| 1031 | return eventcnt == 0 ? error: eventcnt; | 995 | return eventcnt == 0 ? error: eventcnt; |
| 1032 | } | 996 | } |
| 1033 | 997 | ||
| 1034 | /* | ||
| 1035 | * Perform the transfer of events to user space. | ||
| 1036 | */ | ||
| 1037 | static int ep_events_transfer(struct eventpoll *ep, | ||
| 1038 | struct epoll_event __user *events, int maxevents) | ||
| 1039 | { | ||
| 1040 | int eventcnt; | ||
| 1041 | unsigned long flags; | ||
| 1042 | struct list_head txlist; | ||
| 1043 | |||
| 1044 | INIT_LIST_HEAD(&txlist); | ||
| 1045 | |||
| 1046 | /* | ||
| 1047 | * We need to lock this because we could be hit by | ||
| 1048 | * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL). | ||
| 1049 | */ | ||
| 1050 | down_read(&ep->sem); | ||
| 1051 | |||
| 1052 | /* | ||
| 1053 | * Steal the ready list, and re-init the original one to the | ||
| 1054 | * empty list. | ||
| 1055 | */ | ||
| 1056 | write_lock_irqsave(&ep->lock, flags); | ||
| 1057 | list_splice(&ep->rdllist, &txlist); | ||
| 1058 | INIT_LIST_HEAD(&ep->rdllist); | ||
| 1059 | write_unlock_irqrestore(&ep->lock, flags); | ||
| 1060 | |||
| 1061 | /* Build result set in userspace */ | ||
| 1062 | eventcnt = ep_send_events(ep, &txlist, events, maxevents); | ||
| 1063 | |||
| 1064 | up_read(&ep->sem); | ||
| 1065 | |||
| 1066 | return eventcnt; | ||
| 1067 | } | ||
| 1068 | |||
| 1069 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, | 998 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, |
| 1070 | int maxevents, long timeout) | 999 | int maxevents, long timeout) |
| 1071 | { | 1000 | { |
| @@ -1083,7 +1012,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, | |||
| 1083 | MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; | 1012 | MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; |
| 1084 | 1013 | ||
| 1085 | retry: | 1014 | retry: |
| 1086 | write_lock_irqsave(&ep->lock, flags); | 1015 | spin_lock_irqsave(&ep->lock, flags); |
| 1087 | 1016 | ||
| 1088 | res = 0; | 1017 | res = 0; |
| 1089 | if (list_empty(&ep->rdllist)) { | 1018 | if (list_empty(&ep->rdllist)) { |
| @@ -1093,6 +1022,7 @@ retry: | |||
| 1093 | * ep_poll_callback() when events will become available. | 1022 | * ep_poll_callback() when events will become available. |
| 1094 | */ | 1023 | */ |
| 1095 | init_waitqueue_entry(&wait, current); | 1024 | init_waitqueue_entry(&wait, current); |
| 1025 | wait.flags |= WQ_FLAG_EXCLUSIVE; | ||
| 1096 | __add_wait_queue(&ep->wq, &wait); | 1026 | __add_wait_queue(&ep->wq, &wait); |
| 1097 | 1027 | ||
| 1098 | for (;;) { | 1028 | for (;;) { |
| @@ -1109,9 +1039,9 @@ retry: | |||
| 1109 | break; | 1039 | break; |
| 1110 | } | 1040 | } |
| 1111 | 1041 | ||
| 1112 | write_unlock_irqrestore(&ep->lock, flags); | 1042 | spin_unlock_irqrestore(&ep->lock, flags); |
| 1113 | jtimeout = schedule_timeout(jtimeout); | 1043 | jtimeout = schedule_timeout(jtimeout); |
| 1114 | write_lock_irqsave(&ep->lock, flags); | 1044 | spin_lock_irqsave(&ep->lock, flags); |
| 1115 | } | 1045 | } |
| 1116 | __remove_wait_queue(&ep->wq, &wait); | 1046 | __remove_wait_queue(&ep->wq, &wait); |
| 1117 | 1047 | ||
| @@ -1121,7 +1051,7 @@ retry: | |||
| 1121 | /* Is it worth to try to dig for events ? */ | 1051 | /* Is it worth to try to dig for events ? */ |
| 1122 | eavail = !list_empty(&ep->rdllist); | 1052 | eavail = !list_empty(&ep->rdllist); |
| 1123 | 1053 | ||
| 1124 | write_unlock_irqrestore(&ep->lock, flags); | 1054 | spin_unlock_irqrestore(&ep->lock, flags); |
| 1125 | 1055 | ||
| 1126 | /* | 1056 | /* |
| 1127 | * Try to transfer events to user space. In case we get 0 events and | 1057 | * Try to transfer events to user space. In case we get 0 events and |
| @@ -1129,18 +1059,17 @@ retry: | |||
| 1129 | * more luck. | 1059 | * more luck. |
| 1130 | */ | 1060 | */ |
| 1131 | if (!res && eavail && | 1061 | if (!res && eavail && |
| 1132 | !(res = ep_events_transfer(ep, events, maxevents)) && jtimeout) | 1062 | !(res = ep_send_events(ep, events, maxevents)) && jtimeout) |
| 1133 | goto retry; | 1063 | goto retry; |
| 1134 | 1064 | ||
| 1135 | return res; | 1065 | return res; |
| 1136 | } | 1066 | } |
| 1137 | 1067 | ||
| 1138 | /* | 1068 | /* |
| 1139 | * It opens an eventpoll file descriptor by suggesting a storage of "size" | 1069 | * It opens an eventpoll file descriptor. The "size" parameter is there |
| 1140 | * file descriptors. The size parameter is just an hint about how to size | 1070 | * for historical reasons, when epoll was using an hash instead of an |
| 1141 | * data structures. It won't prevent the user to store more than "size" | 1071 | * RB tree. With the current implementation, the "size" parameter is ignored |
| 1142 | * file descriptors inside the epoll interface. It is the kernel part of | 1072 | * (besides sanity checks). |
| 1143 | * the userspace epoll_create(2). | ||
| 1144 | */ | 1073 | */ |
| 1145 | asmlinkage long sys_epoll_create(int size) | 1074 | asmlinkage long sys_epoll_create(int size) |
| 1146 | { | 1075 | { |
| @@ -1176,7 +1105,6 @@ asmlinkage long sys_epoll_create(int size) | |||
| 1176 | 1105 | ||
| 1177 | error_free: | 1106 | error_free: |
| 1178 | ep_free(ep); | 1107 | ep_free(ep); |
| 1179 | kfree(ep); | ||
| 1180 | error_return: | 1108 | error_return: |
| 1181 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", | 1109 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", |
| 1182 | current, size, error)); | 1110 | current, size, error)); |
| @@ -1186,8 +1114,7 @@ error_return: | |||
| 1186 | /* | 1114 | /* |
| 1187 | * The following function implements the controller interface for | 1115 | * The following function implements the controller interface for |
| 1188 | * the eventpoll file that enables the insertion/removal/change of | 1116 | * the eventpoll file that enables the insertion/removal/change of |
| 1189 | * file descriptors inside the interest set. It represents | 1117 | * file descriptors inside the interest set. |
| 1190 | * the kernel part of the user space epoll_ctl(2). | ||
| 1191 | */ | 1118 | */ |
| 1192 | asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, | 1119 | asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, |
| 1193 | struct epoll_event __user *event) | 1120 | struct epoll_event __user *event) |
| @@ -1237,9 +1164,13 @@ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, | |||
| 1237 | */ | 1164 | */ |
| 1238 | ep = file->private_data; | 1165 | ep = file->private_data; |
| 1239 | 1166 | ||
| 1240 | down_write(&ep->sem); | 1167 | mutex_lock(&ep->mtx); |
| 1241 | 1168 | ||
| 1242 | /* Try to lookup the file inside our RB tree */ | 1169 | /* |
| 1170 | * Try to lookup the file inside our RB tree, Since we grabbed "mtx" | ||
| 1171 | * above, we can be sure to be able to use the item looked up by | ||
| 1172 | * ep_find() till we release the mutex. | ||
| 1173 | */ | ||
| 1243 | epi = ep_find(ep, tfile, fd); | 1174 | epi = ep_find(ep, tfile, fd); |
| 1244 | 1175 | ||
| 1245 | error = -EINVAL; | 1176 | error = -EINVAL; |
| @@ -1266,13 +1197,7 @@ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, | |||
| 1266 | error = -ENOENT; | 1197 | error = -ENOENT; |
| 1267 | break; | 1198 | break; |
| 1268 | } | 1199 | } |
| 1269 | /* | 1200 | mutex_unlock(&ep->mtx); |
| 1270 | * The function ep_find() increments the usage count of the structure | ||
| 1271 | * so, if this is not NULL, we need to release it. | ||
| 1272 | */ | ||
| 1273 | if (epi) | ||
| 1274 | ep_release_epitem(epi); | ||
| 1275 | up_write(&ep->sem); | ||
| 1276 | 1201 | ||
| 1277 | error_tgt_fput: | 1202 | error_tgt_fput: |
| 1278 | fput(tfile); | 1203 | fput(tfile); |
| @@ -1378,7 +1303,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, | |||
| 1378 | if (sigmask) { | 1303 | if (sigmask) { |
| 1379 | if (error == -EINTR) { | 1304 | if (error == -EINTR) { |
| 1380 | memcpy(¤t->saved_sigmask, &sigsaved, | 1305 | memcpy(¤t->saved_sigmask, &sigsaved, |
| 1381 | sizeof(sigsaved)); | 1306 | sizeof(sigsaved)); |
| 1382 | set_thread_flag(TIF_RESTORE_SIGMASK); | 1307 | set_thread_flag(TIF_RESTORE_SIGMASK); |
| 1383 | } else | 1308 | } else |
| 1384 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 1309 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h index 0bd7bd2ccb90..6a5be1f7debf 100644 --- a/include/asm-alpha/mmu_context.h +++ b/include/asm-alpha/mmu_context.h | |||
| @@ -85,8 +85,8 @@ __reload_thread(struct pcb_struct *pcb) | |||
| 85 | * +-------------+----------------+--------------+ | 85 | * +-------------+----------------+--------------+ |
| 86 | */ | 86 | */ |
| 87 | 87 | ||
| 88 | #ifdef CONFIG_SMP | ||
| 89 | #include <asm/smp.h> | 88 | #include <asm/smp.h> |
| 89 | #ifdef CONFIG_SMP | ||
| 90 | #define cpu_last_asn(cpuid) (cpu_data[cpuid].last_asn) | 90 | #define cpu_last_asn(cpuid) (cpu_data[cpuid].last_asn) |
| 91 | #else | 91 | #else |
| 92 | extern unsigned long last_asn; | 92 | extern unsigned long last_asn; |
diff --git a/include/asm-h8300/atomic.h b/include/asm-h8300/atomic.h index 21f54428c86b..b4cf0ea97ede 100644 --- a/include/asm-h8300/atomic.h +++ b/include/asm-h8300/atomic.h | |||
| @@ -37,6 +37,7 @@ static __inline__ int atomic_sub_return(int i, atomic_t *v) | |||
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | #define atomic_sub(i, v) atomic_sub_return(i, v) | 39 | #define atomic_sub(i, v) atomic_sub_return(i, v) |
| 40 | #define atomic_sub_and_test(i,v) (atomic_sub_return(i, v) == 0) | ||
| 40 | 41 | ||
| 41 | static __inline__ int atomic_inc_return(atomic_t *v) | 42 | static __inline__ int atomic_inc_return(atomic_t *v) |
| 42 | { | 43 | { |
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 70f3515c3db0..338668bfb0a2 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h | |||
| @@ -749,9 +749,13 @@ extern unsigned long boot_option_idle_override; | |||
| 749 | extern void enable_sep_cpu(void); | 749 | extern void enable_sep_cpu(void); |
| 750 | extern int sysenter_setup(void); | 750 | extern int sysenter_setup(void); |
| 751 | 751 | ||
| 752 | /* Defined in head.S */ | ||
| 753 | extern struct Xgt_desc_struct early_gdt_descr; | ||
| 754 | |||
| 752 | extern void cpu_set_gdt(int); | 755 | extern void cpu_set_gdt(int); |
| 753 | extern void switch_to_new_gdt(void); | 756 | extern void switch_to_new_gdt(void); |
| 754 | extern void cpu_init(void); | 757 | extern void cpu_init(void); |
| 758 | extern void init_gdt(int cpu); | ||
| 755 | 759 | ||
| 756 | extern int force_mwait; | 760 | extern int force_mwait; |
| 757 | 761 | ||
diff --git a/include/asm-m68k/uaccess.h b/include/asm-m68k/uaccess.h index 6a4cf2081512..5c1264cf0c65 100644 --- a/include/asm-m68k/uaccess.h +++ b/include/asm-m68k/uaccess.h | |||
| @@ -361,7 +361,9 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n) | |||
| 361 | 361 | ||
| 362 | long strncpy_from_user(char *dst, const char __user *src, long count); | 362 | long strncpy_from_user(char *dst, const char __user *src, long count); |
| 363 | long strnlen_user(const char __user *src, long n); | 363 | long strnlen_user(const char __user *src, long n); |
| 364 | unsigned long clear_user(void __user *to, unsigned long n); | 364 | unsigned long __clear_user(void __user *to, unsigned long n); |
| 365 | |||
| 366 | #define clear_user __clear_user | ||
| 365 | 367 | ||
| 366 | #define strlen_user(str) strnlen_user(str, 32767) | 368 | #define strlen_user(str) strnlen_user(str, 32767) |
| 367 | 369 | ||
diff --git a/include/linux/init.h b/include/linux/init.h index 8bc32bb2fce2..e007ae4dc41e 100644 --- a/include/linux/init.h +++ b/include/linux/init.h | |||
| @@ -52,14 +52,9 @@ | |||
| 52 | #endif | 52 | #endif |
| 53 | 53 | ||
| 54 | /* For assembly routines */ | 54 | /* For assembly routines */ |
| 55 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 56 | #define __INIT .section ".text","ax" | ||
| 57 | #define __INITDATA .section ".data","aw" | ||
| 58 | #else | ||
| 59 | #define __INIT .section ".init.text","ax" | 55 | #define __INIT .section ".init.text","ax" |
| 60 | #define __INITDATA .section ".init.data","aw" | ||
| 61 | #endif | ||
| 62 | #define __FINIT .previous | 56 | #define __FINIT .previous |
| 57 | #define __INITDATA .section ".init.data","aw" | ||
| 63 | 58 | ||
| 64 | #ifndef __ASSEMBLY__ | 59 | #ifndef __ASSEMBLY__ |
| 65 | /* | 60 | /* |
diff --git a/include/linux/io.h b/include/linux/io.h index 09d351236379..8423dd376514 100644 --- a/include/linux/io.h +++ b/include/linux/io.h | |||
| @@ -27,8 +27,16 @@ struct device; | |||
| 27 | void __iowrite32_copy(void __iomem *to, const void *from, size_t count); | 27 | void __iowrite32_copy(void __iomem *to, const void *from, size_t count); |
| 28 | void __iowrite64_copy(void __iomem *to, const void *from, size_t count); | 28 | void __iowrite64_copy(void __iomem *to, const void *from, size_t count); |
| 29 | 29 | ||
| 30 | #ifdef CONFIG_MMU | ||
| 30 | int ioremap_page_range(unsigned long addr, unsigned long end, | 31 | int ioremap_page_range(unsigned long addr, unsigned long end, |
| 31 | unsigned long phys_addr, pgprot_t prot); | 32 | unsigned long phys_addr, pgprot_t prot); |
| 33 | #else | ||
| 34 | static inline int ioremap_page_range(unsigned long addr, unsigned long end, | ||
| 35 | unsigned long phys_addr, pgprot_t prot) | ||
| 36 | { | ||
| 37 | return 0; | ||
| 38 | } | ||
| 39 | #endif | ||
| 32 | 40 | ||
| 33 | /* | 41 | /* |
| 34 | * Managed iomap interface | 42 | * Managed iomap interface |
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ccd85e4d3b8f..3b1fbf49fa7d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h | |||
| @@ -1288,6 +1288,7 @@ | |||
| 1288 | #define PCI_DEVICE_ID_VIA_8363_0 0x0305 | 1288 | #define PCI_DEVICE_ID_VIA_8363_0 0x0305 |
| 1289 | #define PCI_DEVICE_ID_VIA_P4M800CE 0x0314 | 1289 | #define PCI_DEVICE_ID_VIA_P4M800CE 0x0314 |
| 1290 | #define PCI_DEVICE_ID_VIA_P4M890 0x0327 | 1290 | #define PCI_DEVICE_ID_VIA_P4M890 0x0327 |
| 1291 | #define PCI_DEVICE_ID_VIA_VT3324 0x0324 | ||
| 1291 | #define PCI_DEVICE_ID_VIA_VT3336 0x0336 | 1292 | #define PCI_DEVICE_ID_VIA_VT3336 0x0336 |
| 1292 | #define PCI_DEVICE_ID_VIA_8371_0 0x0391 | 1293 | #define PCI_DEVICE_ID_VIA_8371_0 0x0391 |
| 1293 | #define PCI_DEVICE_ID_VIA_8501_0 0x0501 | 1294 | #define PCI_DEVICE_ID_VIA_8501_0 0x0501 |
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index ea27065e80e6..fd6627e2d115 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
| @@ -60,7 +60,8 @@ struct kmem_cache { | |||
| 60 | #define KMALLOC_SHIFT_LOW 3 | 60 | #define KMALLOC_SHIFT_LOW 3 |
| 61 | 61 | ||
| 62 | #ifdef CONFIG_LARGE_ALLOCS | 62 | #ifdef CONFIG_LARGE_ALLOCS |
| 63 | #define KMALLOC_SHIFT_HIGH 25 | 63 | #define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT) =< 25 ? \ |
| 64 | (MAX_ORDER + PAGE_SHIFT - 1) : 25) | ||
| 64 | #else | 65 | #else |
| 65 | #if !defined(CONFIG_MMU) || NR_CPUS > 512 || MAX_NUMNODES > 256 | 66 | #if !defined(CONFIG_MMU) || NR_CPUS > 512 || MAX_NUMNODES > 256 |
| 66 | #define KMALLOC_SHIFT_HIGH 20 | 67 | #define KMALLOC_SHIFT_HIGH 20 |
| @@ -87,6 +88,9 @@ static inline int kmalloc_index(int size) | |||
| 87 | */ | 88 | */ |
| 88 | WARN_ON_ONCE(size == 0); | 89 | WARN_ON_ONCE(size == 0); |
| 89 | 90 | ||
| 91 | if (size >= (1 << KMALLOC_SHIFT_HIGH)) | ||
| 92 | return -1; | ||
| 93 | |||
| 90 | if (size > 64 && size <= 96) | 94 | if (size > 64 && size <= 96) |
| 91 | return 1; | 95 | return 1; |
| 92 | if (size > 128 && size <= 192) | 96 | if (size > 128 && size <= 192) |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 3db5c3c460d7..51b6a6a6158c 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -74,7 +74,7 @@ static struct clocksource *watchdog; | |||
| 74 | static struct timer_list watchdog_timer; | 74 | static struct timer_list watchdog_timer; |
| 75 | static DEFINE_SPINLOCK(watchdog_lock); | 75 | static DEFINE_SPINLOCK(watchdog_lock); |
| 76 | static cycle_t watchdog_last; | 76 | static cycle_t watchdog_last; |
| 77 | static int watchdog_resumed; | 77 | static unsigned long watchdog_resumed; |
| 78 | 78 | ||
| 79 | /* | 79 | /* |
| 80 | * Interval: 0.5sec Threshold: 0.0625s | 80 | * Interval: 0.5sec Threshold: 0.0625s |
| @@ -104,9 +104,7 @@ static void clocksource_watchdog(unsigned long data) | |||
| 104 | 104 | ||
| 105 | spin_lock(&watchdog_lock); | 105 | spin_lock(&watchdog_lock); |
| 106 | 106 | ||
| 107 | resumed = watchdog_resumed; | 107 | resumed = test_and_clear_bit(0, &watchdog_resumed); |
| 108 | if (unlikely(resumed)) | ||
| 109 | watchdog_resumed = 0; | ||
| 110 | 108 | ||
| 111 | wdnow = watchdog->read(); | 109 | wdnow = watchdog->read(); |
| 112 | wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); | 110 | wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); |
| @@ -151,9 +149,7 @@ static void clocksource_watchdog(unsigned long data) | |||
| 151 | } | 149 | } |
| 152 | static void clocksource_resume_watchdog(void) | 150 | static void clocksource_resume_watchdog(void) |
| 153 | { | 151 | { |
| 154 | spin_lock(&watchdog_lock); | 152 | set_bit(0, &watchdog_resumed); |
| 155 | watchdog_resumed = 1; | ||
| 156 | spin_unlock(&watchdog_lock); | ||
| 157 | } | 153 | } |
| 158 | 154 | ||
| 159 | static void clocksource_check_watchdog(struct clocksource *cs) | 155 | static void clocksource_check_watchdog(struct clocksource *cs) |
