diff options
| -rw-r--r-- | arch/x86/include/asm/entry_arch.h | 9 | ||||
| -rw-r--r-- | arch/x86/include/asm/irq_vectors.h | 11 | ||||
| -rw-r--r-- | arch/x86/kernel/entry_64.S | 18 | ||||
| -rw-r--r-- | arch/x86/kernel/irqinit.c | 73 | ||||
| -rw-r--r-- | arch/x86/mm/tlb.c | 242 |
5 files changed, 47 insertions, 306 deletions
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 0baa628e330c..40afa0005c69 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
| @@ -15,15 +15,6 @@ BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) | |||
| 15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) | 15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) |
| 16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) | 16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) |
| 17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) | 17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) |
| 18 | |||
| 19 | .irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | ||
| 20 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | ||
| 21 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | ||
| 22 | BUILD_INTERRUPT3(invalidate_interrupt\idx, | ||
| 23 | (INVALIDATE_TLB_VECTOR_START)+\idx, | ||
| 24 | smp_invalidate_interrupt) | ||
| 25 | .endif | ||
| 26 | .endr | ||
| 27 | #endif | 18 | #endif |
| 28 | 19 | ||
| 29 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) | 20 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4b4448761e88..1508e518c7e3 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
| @@ -119,17 +119,6 @@ | |||
| 119 | */ | 119 | */ |
| 120 | #define LOCAL_TIMER_VECTOR 0xef | 120 | #define LOCAL_TIMER_VECTOR 0xef |
| 121 | 121 | ||
| 122 | /* up to 32 vectors used for spreading out TLB flushes: */ | ||
| 123 | #if NR_CPUS <= 32 | ||
| 124 | # define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS) | ||
| 125 | #else | ||
| 126 | # define NUM_INVALIDATE_TLB_VECTORS (32) | ||
| 127 | #endif | ||
| 128 | |||
| 129 | #define INVALIDATE_TLB_VECTOR_END (0xee) | ||
| 130 | #define INVALIDATE_TLB_VECTOR_START \ | ||
| 131 | (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1) | ||
| 132 | |||
| 133 | #define NR_VECTORS 256 | 122 | #define NR_VECTORS 256 |
| 134 | 123 | ||
| 135 | #define FPU_IRQ 13 | 124 | #define FPU_IRQ 13 |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7d65133b51be..bcf28e1ce1a7 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -1048,24 +1048,6 @@ apicinterrupt LOCAL_TIMER_VECTOR \ | |||
| 1048 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ | 1048 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ |
| 1049 | x86_platform_ipi smp_x86_platform_ipi | 1049 | x86_platform_ipi smp_x86_platform_ipi |
| 1050 | 1050 | ||
| 1051 | #ifdef CONFIG_SMP | ||
| 1052 | ALIGN | ||
| 1053 | INTR_FRAME | ||
| 1054 | .irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | ||
| 1055 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | ||
| 1056 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | ||
| 1057 | ENTRY(invalidate_interrupt\idx) | ||
| 1058 | pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx) | ||
| 1059 | jmp .Lcommon_invalidate_interrupt0 | ||
| 1060 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 1061 | END(invalidate_interrupt\idx) | ||
| 1062 | .endif | ||
| 1063 | .endr | ||
| 1064 | CFI_ENDPROC | ||
| 1065 | apicinterrupt INVALIDATE_TLB_VECTOR_START, \ | ||
| 1066 | invalidate_interrupt0, smp_invalidate_interrupt | ||
| 1067 | #endif | ||
| 1068 | |||
| 1069 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 1051 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
| 1070 | threshold_interrupt smp_threshold_interrupt | 1052 | threshold_interrupt smp_threshold_interrupt |
| 1071 | apicinterrupt THERMAL_APIC_VECTOR \ | 1053 | apicinterrupt THERMAL_APIC_VECTOR \ |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 252981afd6c4..6e03b0d69138 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
| @@ -171,79 +171,6 @@ static void __init smp_intr_init(void) | |||
| 171 | */ | 171 | */ |
| 172 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | 172 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); |
| 173 | 173 | ||
| 174 | /* IPIs for invalidation */ | ||
| 175 | #define ALLOC_INVTLB_VEC(NR) \ | ||
| 176 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \ | ||
| 177 | invalidate_interrupt##NR) | ||
| 178 | |||
| 179 | switch (NUM_INVALIDATE_TLB_VECTORS) { | ||
| 180 | default: | ||
| 181 | ALLOC_INVTLB_VEC(31); | ||
| 182 | case 31: | ||
| 183 | ALLOC_INVTLB_VEC(30); | ||
| 184 | case 30: | ||
| 185 | ALLOC_INVTLB_VEC(29); | ||
| 186 | case 29: | ||
| 187 | ALLOC_INVTLB_VEC(28); | ||
| 188 | case 28: | ||
| 189 | ALLOC_INVTLB_VEC(27); | ||
| 190 | case 27: | ||
| 191 | ALLOC_INVTLB_VEC(26); | ||
| 192 | case 26: | ||
| 193 | ALLOC_INVTLB_VEC(25); | ||
| 194 | case 25: | ||
| 195 | ALLOC_INVTLB_VEC(24); | ||
| 196 | case 24: | ||
| 197 | ALLOC_INVTLB_VEC(23); | ||
| 198 | case 23: | ||
| 199 | ALLOC_INVTLB_VEC(22); | ||
| 200 | case 22: | ||
| 201 | ALLOC_INVTLB_VEC(21); | ||
| 202 | case 21: | ||
| 203 | ALLOC_INVTLB_VEC(20); | ||
| 204 | case 20: | ||
| 205 | ALLOC_INVTLB_VEC(19); | ||
| 206 | case 19: | ||
| 207 | ALLOC_INVTLB_VEC(18); | ||
| 208 | case 18: | ||
| 209 | ALLOC_INVTLB_VEC(17); | ||
| 210 | case 17: | ||
| 211 | ALLOC_INVTLB_VEC(16); | ||
| 212 | case 16: | ||
| 213 | ALLOC_INVTLB_VEC(15); | ||
| 214 | case 15: | ||
| 215 | ALLOC_INVTLB_VEC(14); | ||
| 216 | case 14: | ||
| 217 | ALLOC_INVTLB_VEC(13); | ||
| 218 | case 13: | ||
| 219 | ALLOC_INVTLB_VEC(12); | ||
| 220 | case 12: | ||
| 221 | ALLOC_INVTLB_VEC(11); | ||
| 222 | case 11: | ||
| 223 | ALLOC_INVTLB_VEC(10); | ||
| 224 | case 10: | ||
| 225 | ALLOC_INVTLB_VEC(9); | ||
| 226 | case 9: | ||
| 227 | ALLOC_INVTLB_VEC(8); | ||
| 228 | case 8: | ||
| 229 | ALLOC_INVTLB_VEC(7); | ||
| 230 | case 7: | ||
| 231 | ALLOC_INVTLB_VEC(6); | ||
| 232 | case 6: | ||
| 233 | ALLOC_INVTLB_VEC(5); | ||
| 234 | case 5: | ||
| 235 | ALLOC_INVTLB_VEC(4); | ||
| 236 | case 4: | ||
| 237 | ALLOC_INVTLB_VEC(3); | ||
| 238 | case 3: | ||
| 239 | ALLOC_INVTLB_VEC(2); | ||
| 240 | case 2: | ||
| 241 | ALLOC_INVTLB_VEC(1); | ||
| 242 | case 1: | ||
| 243 | ALLOC_INVTLB_VEC(0); | ||
| 244 | break; | ||
| 245 | } | ||
| 246 | |||
| 247 | /* IPI for generic function call */ | 174 | /* IPI for generic function call */ |
| 248 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 175 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
| 249 | 176 | ||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 481737def84a..2b5f506a7655 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
| @@ -28,34 +28,14 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) | |||
| 28 | * | 28 | * |
| 29 | * More scalable flush, from Andi Kleen | 29 | * More scalable flush, from Andi Kleen |
| 30 | * | 30 | * |
| 31 | * To avoid global state use 8 different call vectors. | 31 | * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi |
| 32 | * Each CPU uses a specific vector to trigger flushes on other | ||
| 33 | * CPUs. Depending on the received vector the target CPUs look into | ||
| 34 | * the right array slot for the flush data. | ||
| 35 | * | ||
| 36 | * With more than 8 CPUs they are hashed to the 8 available | ||
| 37 | * vectors. The limited global vector space forces us to this right now. | ||
| 38 | * In future when interrupts are split into per CPU domains this could be | ||
| 39 | * fixed, at the cost of triggering multiple IPIs in some cases. | ||
| 40 | */ | 32 | */ |
| 41 | 33 | ||
| 42 | union smp_flush_state { | 34 | struct flush_tlb_info { |
| 43 | struct { | 35 | struct mm_struct *flush_mm; |
| 44 | struct mm_struct *flush_mm; | 36 | unsigned long flush_start; |
| 45 | unsigned long flush_start; | 37 | unsigned long flush_end; |
| 46 | unsigned long flush_end; | 38 | }; |
| 47 | raw_spinlock_t tlbstate_lock; | ||
| 48 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); | ||
| 49 | }; | ||
| 50 | char pad[INTERNODE_CACHE_BYTES]; | ||
| 51 | } ____cacheline_internodealigned_in_smp; | ||
| 52 | |||
| 53 | /* State is put into the per CPU data section, but padded | ||
| 54 | to a full cache line because other CPUs can access it and we don't | ||
| 55 | want false sharing in the per cpu data segment. */ | ||
| 56 | static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; | ||
| 57 | |||
| 58 | static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset); | ||
| 59 | 39 | ||
| 60 | /* | 40 | /* |
| 61 | * We cannot call mmdrop() because we are in interrupt context, | 41 | * We cannot call mmdrop() because we are in interrupt context, |
| @@ -74,28 +54,25 @@ void leave_mm(int cpu) | |||
| 74 | EXPORT_SYMBOL_GPL(leave_mm); | 54 | EXPORT_SYMBOL_GPL(leave_mm); |
| 75 | 55 | ||
| 76 | /* | 56 | /* |
| 77 | * | ||
| 78 | * The flush IPI assumes that a thread switch happens in this order: | 57 | * The flush IPI assumes that a thread switch happens in this order: |
| 79 | * [cpu0: the cpu that switches] | 58 | * [cpu0: the cpu that switches] |
| 80 | * 1) switch_mm() either 1a) or 1b) | 59 | * 1) switch_mm() either 1a) or 1b) |
| 81 | * 1a) thread switch to a different mm | 60 | * 1a) thread switch to a different mm |
| 82 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); | 61 | * 1a1) set cpu_tlbstate to TLBSTATE_OK |
| 83 | * Stop ipi delivery for the old mm. This is not synchronized with | 62 | * Now the tlb flush NMI handler flush_tlb_func won't call leave_mm |
| 84 | * the other cpus, but smp_invalidate_interrupt ignore flush ipis | 63 | * if cpu0 was in lazy tlb mode. |
| 85 | * for the wrong mm, and in the worst case we perform a superfluous | 64 | * 1a2) update cpu active_mm |
| 86 | * tlb flush. | ||
| 87 | * 1a2) set cpu mmu_state to TLBSTATE_OK | ||
| 88 | * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 | ||
| 89 | * was in lazy tlb mode. | ||
| 90 | * 1a3) update cpu active_mm | ||
| 91 | * Now cpu0 accepts tlb flushes for the new mm. | 65 | * Now cpu0 accepts tlb flushes for the new mm. |
| 92 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); | 66 | * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask); |
| 93 | * Now the other cpus will send tlb flush ipis. | 67 | * Now the other cpus will send tlb flush ipis. |
| 94 | * 1a4) change cr3. | 68 | * 1a4) change cr3. |
| 69 | * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask); | ||
| 70 | * Stop ipi delivery for the old mm. This is not synchronized with | ||
| 71 | * the other cpus, but flush_tlb_func ignore flush ipis for the wrong | ||
| 72 | * mm, and in the worst case we perform a superfluous tlb flush. | ||
| 95 | * 1b) thread switch without mm change | 73 | * 1b) thread switch without mm change |
| 96 | * cpu active_mm is correct, cpu0 already handles | 74 | * cpu active_mm is correct, cpu0 already handles flush ipis. |
| 97 | * flush ipis. | 75 | * 1b1) set cpu_tlbstate to TLBSTATE_OK |
| 98 | * 1b1) set cpu mmu_state to TLBSTATE_OK | ||
| 99 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. | 76 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. |
| 100 | * Atomically set the bit [other cpus will start sending flush ipis], | 77 | * Atomically set the bit [other cpus will start sending flush ipis], |
| 101 | * and test the bit. | 78 | * and test the bit. |
| @@ -108,186 +85,61 @@ EXPORT_SYMBOL_GPL(leave_mm); | |||
| 108 | * runs in kernel space, the cpu could load tlb entries for user space | 85 | * runs in kernel space, the cpu could load tlb entries for user space |
| 109 | * pages. | 86 | * pages. |
| 110 | * | 87 | * |
| 111 | * The good news is that cpu mmu_state is local to each cpu, no | 88 | * The good news is that cpu_tlbstate is local to each cpu, no |
| 112 | * write/read ordering problems. | 89 | * write/read ordering problems. |
| 113 | */ | 90 | */ |
| 114 | 91 | ||
| 115 | /* | 92 | /* |
| 116 | * TLB flush IPI: | 93 | * TLB flush funcation: |
| 117 | * | ||
| 118 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. | 94 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. |
| 119 | * 2) Leave the mm if we are in the lazy tlb mode. | 95 | * 2) Leave the mm if we are in the lazy tlb mode. |
| 120 | * | ||
| 121 | * Interrupts are disabled. | ||
| 122 | */ | ||
| 123 | |||
| 124 | /* | ||
| 125 | * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop | ||
| 126 | * but still used for documentation purpose but the usage is slightly | ||
| 127 | * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt | ||
| 128 | * entry calls in with the first parameter in %eax. Maybe define | ||
| 129 | * intrlinkage? | ||
| 130 | */ | 96 | */ |
| 131 | #ifdef CONFIG_X86_64 | 97 | static void flush_tlb_func(void *info) |
| 132 | asmlinkage | ||
| 133 | #endif | ||
| 134 | void smp_invalidate_interrupt(struct pt_regs *regs) | ||
| 135 | { | 98 | { |
| 136 | unsigned int cpu; | 99 | struct flush_tlb_info *f = info; |
| 137 | unsigned int sender; | ||
| 138 | union smp_flush_state *f; | ||
| 139 | |||
| 140 | cpu = smp_processor_id(); | ||
| 141 | /* | ||
| 142 | * orig_rax contains the negated interrupt vector. | ||
| 143 | * Use that to determine where the sender put the data. | ||
| 144 | */ | ||
| 145 | sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; | ||
| 146 | f = &flush_state[sender]; | ||
| 147 | |||
| 148 | if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) | ||
| 149 | goto out; | ||
| 150 | /* | ||
| 151 | * This was a BUG() but until someone can quote me the | ||
| 152 | * line from the intel manual that guarantees an IPI to | ||
| 153 | * multiple CPUs is retried _only_ on the erroring CPUs | ||
| 154 | * its staying as a return | ||
| 155 | * | ||
| 156 | * BUG(); | ||
| 157 | */ | ||
| 158 | |||
| 159 | if (f->flush_mm == this_cpu_read(cpu_tlbstate.active_mm)) { | ||
| 160 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { | ||
| 161 | if (f->flush_end == TLB_FLUSH_ALL | ||
| 162 | || !cpu_has_invlpg) | ||
| 163 | local_flush_tlb(); | ||
| 164 | else if (!f->flush_end) | ||
| 165 | __flush_tlb_single(f->flush_start); | ||
| 166 | else { | ||
| 167 | unsigned long addr; | ||
| 168 | addr = f->flush_start; | ||
| 169 | while (addr < f->flush_end) { | ||
| 170 | __flush_tlb_single(addr); | ||
| 171 | addr += PAGE_SIZE; | ||
| 172 | } | ||
| 173 | } | ||
| 174 | } else | ||
| 175 | leave_mm(cpu); | ||
| 176 | } | ||
| 177 | out: | ||
| 178 | ack_APIC_irq(); | ||
| 179 | smp_mb__before_clear_bit(); | ||
| 180 | cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); | ||
| 181 | smp_mb__after_clear_bit(); | ||
| 182 | inc_irq_stat(irq_tlb_count); | ||
| 183 | } | ||
| 184 | 100 | ||
| 185 | static void flush_tlb_others_ipi(const struct cpumask *cpumask, | 101 | if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) |
| 186 | struct mm_struct *mm, unsigned long start, | 102 | return; |
| 187 | unsigned long end) | 103 | |
| 188 | { | 104 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { |
| 189 | unsigned int sender; | 105 | if (f->flush_end == TLB_FLUSH_ALL || !cpu_has_invlpg) |
| 190 | union smp_flush_state *f; | 106 | local_flush_tlb(); |
| 191 | 107 | else if (!f->flush_end) | |
| 192 | /* Caller has disabled preemption */ | 108 | __flush_tlb_single(f->flush_start); |
| 193 | sender = this_cpu_read(tlb_vector_offset); | 109 | else { |
| 194 | f = &flush_state[sender]; | 110 | unsigned long addr; |
| 195 | 111 | addr = f->flush_start; | |
| 196 | if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) | 112 | while (addr < f->flush_end) { |
| 197 | raw_spin_lock(&f->tlbstate_lock); | 113 | __flush_tlb_single(addr); |
| 198 | 114 | addr += PAGE_SIZE; | |
| 199 | f->flush_mm = mm; | 115 | } |
| 200 | f->flush_start = start; | 116 | } |
| 201 | f->flush_end = end; | 117 | } else |
| 202 | if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) { | 118 | leave_mm(smp_processor_id()); |
| 203 | /* | ||
| 204 | * We have to send the IPI only to | ||
| 205 | * CPUs affected. | ||
| 206 | */ | ||
| 207 | apic->send_IPI_mask(to_cpumask(f->flush_cpumask), | ||
| 208 | INVALIDATE_TLB_VECTOR_START + sender); | ||
| 209 | |||
| 210 | while (!cpumask_empty(to_cpumask(f->flush_cpumask))) | ||
| 211 | cpu_relax(); | ||
| 212 | } | ||
| 213 | 119 | ||
| 214 | f->flush_mm = NULL; | ||
| 215 | f->flush_start = 0; | ||
| 216 | f->flush_end = 0; | ||
| 217 | if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) | ||
| 218 | raw_spin_unlock(&f->tlbstate_lock); | ||
| 219 | } | 120 | } |
| 220 | 121 | ||
| 221 | void native_flush_tlb_others(const struct cpumask *cpumask, | 122 | void native_flush_tlb_others(const struct cpumask *cpumask, |
| 222 | struct mm_struct *mm, unsigned long start, | 123 | struct mm_struct *mm, unsigned long start, |
| 223 | unsigned long end) | 124 | unsigned long end) |
| 224 | { | 125 | { |
| 126 | struct flush_tlb_info info; | ||
| 127 | info.flush_mm = mm; | ||
| 128 | info.flush_start = start; | ||
| 129 | info.flush_end = end; | ||
| 130 | |||
| 225 | if (is_uv_system()) { | 131 | if (is_uv_system()) { |
| 226 | unsigned int cpu; | 132 | unsigned int cpu; |
| 227 | 133 | ||
| 228 | cpu = smp_processor_id(); | 134 | cpu = smp_processor_id(); |
| 229 | cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu); | 135 | cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu); |
| 230 | if (cpumask) | 136 | if (cpumask) |
| 231 | flush_tlb_others_ipi(cpumask, mm, start, end); | 137 | smp_call_function_many(cpumask, flush_tlb_func, |
| 138 | &info, 1); | ||
| 232 | return; | 139 | return; |
| 233 | } | 140 | } |
| 234 | flush_tlb_others_ipi(cpumask, mm, start, end); | 141 | smp_call_function_many(cpumask, flush_tlb_func, &info, 1); |
| 235 | } | ||
| 236 | |||
| 237 | static void __cpuinit calculate_tlb_offset(void) | ||
| 238 | { | ||
| 239 | int cpu, node, nr_node_vecs, idx = 0; | ||
| 240 | /* | ||
| 241 | * we are changing tlb_vector_offset for each CPU in runtime, but this | ||
| 242 | * will not cause inconsistency, as the write is atomic under X86. we | ||
| 243 | * might see more lock contentions in a short time, but after all CPU's | ||
| 244 | * tlb_vector_offset are changed, everything should go normal | ||
| 245 | * | ||
| 246 | * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might | ||
| 247 | * waste some vectors. | ||
| 248 | **/ | ||
| 249 | if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS) | ||
| 250 | nr_node_vecs = 1; | ||
| 251 | else | ||
| 252 | nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; | ||
| 253 | |||
| 254 | for_each_online_node(node) { | ||
| 255 | int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) * | ||
| 256 | nr_node_vecs; | ||
| 257 | int cpu_offset = 0; | ||
| 258 | for_each_cpu(cpu, cpumask_of_node(node)) { | ||
| 259 | per_cpu(tlb_vector_offset, cpu) = node_offset + | ||
| 260 | cpu_offset; | ||
| 261 | cpu_offset++; | ||
| 262 | cpu_offset = cpu_offset % nr_node_vecs; | ||
| 263 | } | ||
| 264 | idx++; | ||
| 265 | } | ||
| 266 | } | ||
| 267 | |||
| 268 | static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n, | ||
| 269 | unsigned long action, void *hcpu) | ||
| 270 | { | ||
| 271 | switch (action & 0xf) { | ||
| 272 | case CPU_ONLINE: | ||
| 273 | case CPU_DEAD: | ||
| 274 | calculate_tlb_offset(); | ||
| 275 | } | ||
| 276 | return NOTIFY_OK; | ||
| 277 | } | ||
| 278 | |||
| 279 | static int __cpuinit init_smp_flush(void) | ||
| 280 | { | ||
| 281 | int i; | ||
| 282 | |||
| 283 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) | ||
| 284 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); | ||
| 285 | |||
| 286 | calculate_tlb_offset(); | ||
| 287 | hotcpu_notifier(tlb_cpuhp_notify, 0); | ||
| 288 | return 0; | ||
| 289 | } | 142 | } |
| 290 | core_initcall(init_smp_flush); | ||
| 291 | 143 | ||
| 292 | void flush_tlb_current_task(void) | 144 | void flush_tlb_current_task(void) |
| 293 | { | 145 | { |
