diff options
39 files changed, 501 insertions, 377 deletions
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 42aa078a5e4d..5a621c6d22ab 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c | |||
@@ -585,8 +585,7 @@ handle_ipi(struct pt_regs *regs) | |||
585 | 585 | ||
586 | switch (which) { | 586 | switch (which) { |
587 | case IPI_RESCHEDULE: | 587 | case IPI_RESCHEDULE: |
588 | /* Reschedule callback. Everything to be done | 588 | scheduler_ipi(); |
589 | is done by the interrupt return path. */ | ||
590 | break; | 589 | break; |
591 | 590 | ||
592 | case IPI_CALL_FUNC: | 591 | case IPI_CALL_FUNC: |
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8fe05ad932e4..7a561eb731ea 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c | |||
@@ -560,10 +560,7 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) | |||
560 | break; | 560 | break; |
561 | 561 | ||
562 | case IPI_RESCHEDULE: | 562 | case IPI_RESCHEDULE: |
563 | /* | 563 | scheduler_ipi(); |
564 | * nothing more to do - eveything is | ||
565 | * done on the interrupt return path | ||
566 | */ | ||
567 | break; | 564 | break; |
568 | 565 | ||
569 | case IPI_CALL_FUNC: | 566 | case IPI_CALL_FUNC: |
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index 8bce5ed031e4..1fbd94c44457 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c | |||
@@ -177,6 +177,9 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance) | |||
177 | while (msg_queue->count) { | 177 | while (msg_queue->count) { |
178 | msg = &msg_queue->ipi_message[msg_queue->head]; | 178 | msg = &msg_queue->ipi_message[msg_queue->head]; |
179 | switch (msg->type) { | 179 | switch (msg->type) { |
180 | case BFIN_IPI_RESCHEDULE: | ||
181 | scheduler_ipi(); | ||
182 | break; | ||
180 | case BFIN_IPI_CALL_FUNC: | 183 | case BFIN_IPI_CALL_FUNC: |
181 | spin_unlock_irqrestore(&msg_queue->lock, flags); | 184 | spin_unlock_irqrestore(&msg_queue->lock, flags); |
182 | ipi_call_function(cpu, msg); | 185 | ipi_call_function(cpu, msg); |
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 4c9e3e1ba5d1..66cc75657e2f 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c | |||
@@ -342,15 +342,18 @@ irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id) | |||
342 | 342 | ||
343 | ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi); | 343 | ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi); |
344 | 344 | ||
345 | if (ipi.vector & IPI_SCHEDULE) { | ||
346 | scheduler_ipi(); | ||
347 | } | ||
345 | if (ipi.vector & IPI_CALL) { | 348 | if (ipi.vector & IPI_CALL) { |
346 | func(info); | 349 | func(info); |
347 | } | 350 | } |
348 | if (ipi.vector & IPI_FLUSH_TLB) { | 351 | if (ipi.vector & IPI_FLUSH_TLB) { |
349 | if (flush_mm == FLUSH_ALL) | 352 | if (flush_mm == FLUSH_ALL) |
350 | __flush_tlb_all(); | 353 | __flush_tlb_all(); |
351 | else if (flush_vma == FLUSH_ALL) | 354 | else if (flush_vma == FLUSH_ALL) |
352 | __flush_tlb_mm(flush_mm); | 355 | __flush_tlb_mm(flush_mm); |
353 | else | 356 | else |
354 | __flush_tlb_page(flush_vma, flush_addr); | 357 | __flush_tlb_page(flush_vma, flush_addr); |
355 | } | 358 | } |
356 | 359 | ||
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 5b704740f160..782c3a357f24 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/irq.h> | 31 | #include <linux/irq.h> |
32 | #include <linux/ratelimit.h> | 32 | #include <linux/ratelimit.h> |
33 | #include <linux/acpi.h> | 33 | #include <linux/acpi.h> |
34 | #include <linux/sched.h> | ||
34 | 35 | ||
35 | #include <asm/delay.h> | 36 | #include <asm/delay.h> |
36 | #include <asm/intrinsics.h> | 37 | #include <asm/intrinsics.h> |
@@ -496,6 +497,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) | |||
496 | smp_local_flush_tlb(); | 497 | smp_local_flush_tlb(); |
497 | kstat_incr_irqs_this_cpu(irq, desc); | 498 | kstat_incr_irqs_this_cpu(irq, desc); |
498 | } else if (unlikely(IS_RESCHEDULE(vector))) { | 499 | } else if (unlikely(IS_RESCHEDULE(vector))) { |
500 | scheduler_ipi(); | ||
499 | kstat_incr_irqs_this_cpu(irq, desc); | 501 | kstat_incr_irqs_this_cpu(irq, desc); |
500 | } else { | 502 | } else { |
501 | ia64_setreg(_IA64_REG_CR_TPR, vector); | 503 | ia64_setreg(_IA64_REG_CR_TPR, vector); |
diff --git a/arch/ia64/xen/irq_xen.c b/arch/ia64/xen/irq_xen.c index 108bb858acf2..b279e142c633 100644 --- a/arch/ia64/xen/irq_xen.c +++ b/arch/ia64/xen/irq_xen.c | |||
@@ -92,6 +92,8 @@ static unsigned short saved_irq_cnt; | |||
92 | static int xen_slab_ready; | 92 | static int xen_slab_ready; |
93 | 93 | ||
94 | #ifdef CONFIG_SMP | 94 | #ifdef CONFIG_SMP |
95 | #include <linux/sched.h> | ||
96 | |||
95 | /* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ, | 97 | /* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ, |
96 | * it ends up to issue several memory accesses upon percpu data and | 98 | * it ends up to issue several memory accesses upon percpu data and |
97 | * thus adds unnecessary traffic to other paths. | 99 | * thus adds unnecessary traffic to other paths. |
@@ -99,7 +101,13 @@ static int xen_slab_ready; | |||
99 | static irqreturn_t | 101 | static irqreturn_t |
100 | xen_dummy_handler(int irq, void *dev_id) | 102 | xen_dummy_handler(int irq, void *dev_id) |
101 | { | 103 | { |
104 | return IRQ_HANDLED; | ||
105 | } | ||
102 | 106 | ||
107 | static irqreturn_t | ||
108 | xen_resched_handler(int irq, void *dev_id) | ||
109 | { | ||
110 | scheduler_ipi(); | ||
103 | return IRQ_HANDLED; | 111 | return IRQ_HANDLED; |
104 | } | 112 | } |
105 | 113 | ||
@@ -110,7 +118,7 @@ static struct irqaction xen_ipi_irqaction = { | |||
110 | }; | 118 | }; |
111 | 119 | ||
112 | static struct irqaction xen_resched_irqaction = { | 120 | static struct irqaction xen_resched_irqaction = { |
113 | .handler = xen_dummy_handler, | 121 | .handler = xen_resched_handler, |
114 | .flags = IRQF_DISABLED, | 122 | .flags = IRQF_DISABLED, |
115 | .name = "resched" | 123 | .name = "resched" |
116 | }; | 124 | }; |
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c index 31cef20b2996..fc10b39893d4 100644 --- a/arch/m32r/kernel/smp.c +++ b/arch/m32r/kernel/smp.c | |||
@@ -122,8 +122,6 @@ void smp_send_reschedule(int cpu_id) | |||
122 | * | 122 | * |
123 | * Description: This routine executes on CPU which received | 123 | * Description: This routine executes on CPU which received |
124 | * 'RESCHEDULE_IPI'. | 124 | * 'RESCHEDULE_IPI'. |
125 | * Rescheduling is processed at the exit of interrupt | ||
126 | * operation. | ||
127 | * | 125 | * |
128 | * Born on Date: 2002.02.05 | 126 | * Born on Date: 2002.02.05 |
129 | * | 127 | * |
@@ -138,7 +136,7 @@ void smp_send_reschedule(int cpu_id) | |||
138 | *==========================================================================*/ | 136 | *==========================================================================*/ |
139 | void smp_reschedule_interrupt(void) | 137 | void smp_reschedule_interrupt(void) |
140 | { | 138 | { |
141 | /* nothing to do */ | 139 | scheduler_ipi(); |
142 | } | 140 | } |
143 | 141 | ||
144 | /*==========================================================================* | 142 | /*==========================================================================* |
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index ba78b21cc8d0..76923eeb58b9 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c | |||
@@ -44,6 +44,8 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id) | |||
44 | 44 | ||
45 | if (action & SMP_CALL_FUNCTION) | 45 | if (action & SMP_CALL_FUNCTION) |
46 | smp_call_function_interrupt(); | 46 | smp_call_function_interrupt(); |
47 | if (action & SMP_RESCHEDULE_YOURSELF) | ||
48 | scheduler_ipi(); | ||
47 | 49 | ||
48 | /* Check if we've been told to flush the icache */ | 50 | /* Check if we've been told to flush the icache */ |
49 | if (action & SMP_ICACHE_FLUSH) | 51 | if (action & SMP_ICACHE_FLUSH) |
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index 5a88cc4ccd5a..cedac4633741 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c | |||
@@ -929,7 +929,7 @@ static void post_direct_ipi(int cpu, struct smtc_ipi *pipi) | |||
929 | 929 | ||
930 | static void ipi_resched_interrupt(void) | 930 | static void ipi_resched_interrupt(void) |
931 | { | 931 | { |
932 | /* Return from interrupt should be enough to cause scheduler check */ | 932 | scheduler_ipi(); |
933 | } | 933 | } |
934 | 934 | ||
935 | static void ipi_call_interrupt(void) | 935 | static void ipi_call_interrupt(void) |
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index 9027061f0ead..7d93e6fbfa5a 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c | |||
@@ -309,6 +309,8 @@ static void ipi_call_dispatch(void) | |||
309 | 309 | ||
310 | static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) | 310 | static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) |
311 | { | 311 | { |
312 | scheduler_ipi(); | ||
313 | |||
312 | return IRQ_HANDLED; | 314 | return IRQ_HANDLED; |
313 | } | 315 | } |
314 | 316 | ||
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c index efc9e889b349..2608752898c0 100644 --- a/arch/mips/pmc-sierra/yosemite/smp.c +++ b/arch/mips/pmc-sierra/yosemite/smp.c | |||
@@ -55,6 +55,8 @@ void titan_mailbox_irq(void) | |||
55 | 55 | ||
56 | if (status & 0x2) | 56 | if (status & 0x2) |
57 | smp_call_function_interrupt(); | 57 | smp_call_function_interrupt(); |
58 | if (status & 0x4) | ||
59 | scheduler_ipi(); | ||
58 | break; | 60 | break; |
59 | 61 | ||
60 | case 1: | 62 | case 1: |
@@ -63,6 +65,8 @@ void titan_mailbox_irq(void) | |||
63 | 65 | ||
64 | if (status & 0x2) | 66 | if (status & 0x2) |
65 | smp_call_function_interrupt(); | 67 | smp_call_function_interrupt(); |
68 | if (status & 0x4) | ||
69 | scheduler_ipi(); | ||
66 | break; | 70 | break; |
67 | } | 71 | } |
68 | } | 72 | } |
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index 0a04603d577c..b18b04e48577 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c | |||
@@ -147,8 +147,10 @@ static void ip27_do_irq_mask0(void) | |||
147 | #ifdef CONFIG_SMP | 147 | #ifdef CONFIG_SMP |
148 | if (pend0 & (1UL << CPU_RESCHED_A_IRQ)) { | 148 | if (pend0 & (1UL << CPU_RESCHED_A_IRQ)) { |
149 | LOCAL_HUB_CLR_INTR(CPU_RESCHED_A_IRQ); | 149 | LOCAL_HUB_CLR_INTR(CPU_RESCHED_A_IRQ); |
150 | scheduler_ipi(); | ||
150 | } else if (pend0 & (1UL << CPU_RESCHED_B_IRQ)) { | 151 | } else if (pend0 & (1UL << CPU_RESCHED_B_IRQ)) { |
151 | LOCAL_HUB_CLR_INTR(CPU_RESCHED_B_IRQ); | 152 | LOCAL_HUB_CLR_INTR(CPU_RESCHED_B_IRQ); |
153 | scheduler_ipi(); | ||
152 | } else if (pend0 & (1UL << CPU_CALL_A_IRQ)) { | 154 | } else if (pend0 & (1UL << CPU_CALL_A_IRQ)) { |
153 | LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ); | 155 | LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ); |
154 | smp_call_function_interrupt(); | 156 | smp_call_function_interrupt(); |
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c index 47b347c992ea..d667875be564 100644 --- a/arch/mips/sibyte/bcm1480/smp.c +++ b/arch/mips/sibyte/bcm1480/smp.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
21 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
22 | #include <linux/kernel_stat.h> | 22 | #include <linux/kernel_stat.h> |
23 | #include <linux/sched.h> | ||
23 | 24 | ||
24 | #include <asm/mmu_context.h> | 25 | #include <asm/mmu_context.h> |
25 | #include <asm/io.h> | 26 | #include <asm/io.h> |
@@ -189,10 +190,8 @@ void bcm1480_mailbox_interrupt(void) | |||
189 | /* Clear the mailbox to clear the interrupt */ | 190 | /* Clear the mailbox to clear the interrupt */ |
190 | __raw_writeq(((u64)action)<<48, mailbox_0_clear_regs[cpu]); | 191 | __raw_writeq(((u64)action)<<48, mailbox_0_clear_regs[cpu]); |
191 | 192 | ||
192 | /* | 193 | if (action & SMP_RESCHEDULE_YOURSELF) |
193 | * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the | 194 | scheduler_ipi(); |
194 | * interrupt will do the reschedule for us | ||
195 | */ | ||
196 | 195 | ||
197 | if (action & SMP_CALL_FUNCTION) | 196 | if (action & SMP_CALL_FUNCTION) |
198 | smp_call_function_interrupt(); | 197 | smp_call_function_interrupt(); |
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c index c00a5cb1128d..38e7f6bd7922 100644 --- a/arch/mips/sibyte/sb1250/smp.c +++ b/arch/mips/sibyte/sb1250/smp.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/interrupt.h> | 21 | #include <linux/interrupt.h> |
22 | #include <linux/smp.h> | 22 | #include <linux/smp.h> |
23 | #include <linux/kernel_stat.h> | 23 | #include <linux/kernel_stat.h> |
24 | #include <linux/sched.h> | ||
24 | 25 | ||
25 | #include <asm/mmu_context.h> | 26 | #include <asm/mmu_context.h> |
26 | #include <asm/io.h> | 27 | #include <asm/io.h> |
@@ -177,10 +178,8 @@ void sb1250_mailbox_interrupt(void) | |||
177 | /* Clear the mailbox to clear the interrupt */ | 178 | /* Clear the mailbox to clear the interrupt */ |
178 | ____raw_writeq(((u64)action) << 48, mailbox_clear_regs[cpu]); | 179 | ____raw_writeq(((u64)action) << 48, mailbox_clear_regs[cpu]); |
179 | 180 | ||
180 | /* | 181 | if (action & SMP_RESCHEDULE_YOURSELF) |
181 | * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the | 182 | scheduler_ipi(); |
182 | * interrupt will do the reschedule for us | ||
183 | */ | ||
184 | 183 | ||
185 | if (action & SMP_CALL_FUNCTION) | 184 | if (action & SMP_CALL_FUNCTION) |
186 | smp_call_function_interrupt(); | 185 | smp_call_function_interrupt(); |
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c index 226c826a2194..83fb27912231 100644 --- a/arch/mn10300/kernel/smp.c +++ b/arch/mn10300/kernel/smp.c | |||
@@ -494,14 +494,11 @@ void smp_send_stop(void) | |||
494 | * @irq: The interrupt number. | 494 | * @irq: The interrupt number. |
495 | * @dev_id: The device ID. | 495 | * @dev_id: The device ID. |
496 | * | 496 | * |
497 | * We need do nothing here, since the scheduling will be effected on our way | ||
498 | * back through entry.S. | ||
499 | * | ||
500 | * Returns IRQ_HANDLED to indicate we handled the interrupt successfully. | 497 | * Returns IRQ_HANDLED to indicate we handled the interrupt successfully. |
501 | */ | 498 | */ |
502 | static irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) | 499 | static irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) |
503 | { | 500 | { |
504 | /* do nothing */ | 501 | scheduler_ipi(); |
505 | return IRQ_HANDLED; | 502 | return IRQ_HANDLED; |
506 | } | 503 | } |
507 | 504 | ||
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 69d63d354ef0..828305f19cff 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c | |||
@@ -155,10 +155,7 @@ ipi_interrupt(int irq, void *dev_id) | |||
155 | 155 | ||
156 | case IPI_RESCHEDULE: | 156 | case IPI_RESCHEDULE: |
157 | smp_debug(100, KERN_DEBUG "CPU%d IPI_RESCHEDULE\n", this_cpu); | 157 | smp_debug(100, KERN_DEBUG "CPU%d IPI_RESCHEDULE\n", this_cpu); |
158 | /* | 158 | scheduler_ipi(); |
159 | * Reschedule callback. Everything to be | ||
160 | * done is done by the interrupt return path. | ||
161 | */ | ||
162 | break; | 159 | break; |
163 | 160 | ||
164 | case IPI_CALL_FUNC: | 161 | case IPI_CALL_FUNC: |
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index cbdbb14be4b0..9f9c204bef69 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c | |||
@@ -116,7 +116,7 @@ void smp_message_recv(int msg) | |||
116 | generic_smp_call_function_interrupt(); | 116 | generic_smp_call_function_interrupt(); |
117 | break; | 117 | break; |
118 | case PPC_MSG_RESCHEDULE: | 118 | case PPC_MSG_RESCHEDULE: |
119 | /* we notice need_resched on exit */ | 119 | scheduler_ipi(); |
120 | break; | 120 | break; |
121 | case PPC_MSG_CALL_FUNC_SINGLE: | 121 | case PPC_MSG_CALL_FUNC_SINGLE: |
122 | generic_smp_call_function_single_interrupt(); | 122 | generic_smp_call_function_single_interrupt(); |
@@ -146,7 +146,7 @@ static irqreturn_t call_function_action(int irq, void *data) | |||
146 | 146 | ||
147 | static irqreturn_t reschedule_action(int irq, void *data) | 147 | static irqreturn_t reschedule_action(int irq, void *data) |
148 | { | 148 | { |
149 | /* we just need the return path side effect of checking need_resched */ | 149 | scheduler_ipi(); |
150 | return IRQ_HANDLED; | 150 | return IRQ_HANDLED; |
151 | } | 151 | } |
152 | 152 | ||
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 63a97db83f96..63c7d9ff220d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c | |||
@@ -165,12 +165,12 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, | |||
165 | kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++; | 165 | kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++; |
166 | /* | 166 | /* |
167 | * handle bit signal external calls | 167 | * handle bit signal external calls |
168 | * | ||
169 | * For the ec_schedule signal we have to do nothing. All the work | ||
170 | * is done automatically when we return from the interrupt. | ||
171 | */ | 168 | */ |
172 | bits = xchg(&S390_lowcore.ext_call_fast, 0); | 169 | bits = xchg(&S390_lowcore.ext_call_fast, 0); |
173 | 170 | ||
171 | if (test_bit(ec_schedule, &bits)) | ||
172 | scheduler_ipi(); | ||
173 | |||
174 | if (test_bit(ec_call_function, &bits)) | 174 | if (test_bit(ec_call_function, &bits)) |
175 | generic_smp_call_function_interrupt(); | 175 | generic_smp_call_function_interrupt(); |
176 | 176 | ||
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 509b36b45115..6207561ea34a 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/cpu.h> | 21 | #include <linux/cpu.h> |
22 | #include <linux/interrupt.h> | 22 | #include <linux/interrupt.h> |
23 | #include <linux/sched.h> | ||
23 | #include <asm/atomic.h> | 24 | #include <asm/atomic.h> |
24 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
25 | #include <asm/system.h> | 26 | #include <asm/system.h> |
@@ -323,6 +324,7 @@ void smp_message_recv(unsigned int msg) | |||
323 | generic_smp_call_function_interrupt(); | 324 | generic_smp_call_function_interrupt(); |
324 | break; | 325 | break; |
325 | case SMP_MSG_RESCHEDULE: | 326 | case SMP_MSG_RESCHEDULE: |
327 | scheduler_ipi(); | ||
326 | break; | 328 | break; |
327 | case SMP_MSG_FUNCTION_SINGLE: | 329 | case SMP_MSG_FUNCTION_SINGLE: |
328 | generic_smp_call_function_single_interrupt(); | 330 | generic_smp_call_function_single_interrupt(); |
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 41102c5a6702..d5b3958be0b4 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c | |||
@@ -156,11 +156,11 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) | |||
156 | 156 | ||
157 | void smp_resched_interrupt(void) | 157 | void smp_resched_interrupt(void) |
158 | { | 158 | { |
159 | irq_enter(); | ||
160 | scheduler_ipi(); | ||
159 | local_cpu_data().irq_resched_count++; | 161 | local_cpu_data().irq_resched_count++; |
160 | /* | 162 | irq_exit(); |
161 | * do nothing, since it all was about calling re-schedule | 163 | /* re-schedule routine called by interrupt return code. */ |
162 | * routine called by interrupt return code. | ||
163 | */ | ||
164 | } | 164 | } |
165 | 165 | ||
166 | void smp_call_function_single_interrupt(void) | 166 | void smp_call_function_single_interrupt(void) |
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index c274a30c3cbf..99cb17251bb5 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c | |||
@@ -1368,6 +1368,7 @@ void smp_send_reschedule(int cpu) | |||
1368 | void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) | 1368 | void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) |
1369 | { | 1369 | { |
1370 | clear_softint(1 << irq); | 1370 | clear_softint(1 << irq); |
1371 | scheduler_ipi(); | ||
1371 | } | 1372 | } |
1372 | 1373 | ||
1373 | /* This is a nop because we capture all other cpus | 1374 | /* This is a nop because we capture all other cpus |
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index a4293102ef81..c52224d5ed45 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c | |||
@@ -189,12 +189,8 @@ void flush_icache_range(unsigned long start, unsigned long end) | |||
189 | /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */ | 189 | /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */ |
190 | static irqreturn_t handle_reschedule_ipi(int irq, void *token) | 190 | static irqreturn_t handle_reschedule_ipi(int irq, void *token) |
191 | { | 191 | { |
192 | /* | ||
193 | * Nothing to do here; when we return from interrupt, the | ||
194 | * rescheduling will occur there. But do bump the interrupt | ||
195 | * profiler count in the meantime. | ||
196 | */ | ||
197 | __get_cpu_var(irq_stat).irq_resched_count++; | 192 | __get_cpu_var(irq_stat).irq_resched_count++; |
193 | scheduler_ipi(); | ||
198 | 194 | ||
199 | return IRQ_HANDLED; | 195 | return IRQ_HANDLED; |
200 | } | 196 | } |
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index 106bf27e2a9a..eefb107d2d73 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c | |||
@@ -173,7 +173,7 @@ void IPI_handler(int cpu) | |||
173 | break; | 173 | break; |
174 | 174 | ||
175 | case 'R': | 175 | case 'R': |
176 | set_tsk_need_resched(current); | 176 | scheduler_ipi(); |
177 | break; | 177 | break; |
178 | 178 | ||
179 | case 'S': | 179 | case 'S': |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 513deac7228d..013e7eba83bb 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -194,14 +194,13 @@ static void native_stop_other_cpus(int wait) | |||
194 | } | 194 | } |
195 | 195 | ||
196 | /* | 196 | /* |
197 | * Reschedule call back. Nothing to do, | 197 | * Reschedule call back. |
198 | * all the work is done automatically when | ||
199 | * we return from the interrupt. | ||
200 | */ | 198 | */ |
201 | void smp_reschedule_interrupt(struct pt_regs *regs) | 199 | void smp_reschedule_interrupt(struct pt_regs *regs) |
202 | { | 200 | { |
203 | ack_APIC_irq(); | 201 | ack_APIC_irq(); |
204 | inc_irq_stat(irq_resched_count); | 202 | inc_irq_stat(irq_resched_count); |
203 | scheduler_ipi(); | ||
205 | /* | 204 | /* |
206 | * KVM uses this interrupt to force a cpu out of guest mode | 205 | * KVM uses this interrupt to force a cpu out of guest mode |
207 | */ | 206 | */ |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 30612441ed99..762b46ab14d5 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -46,13 +46,12 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); | |||
46 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); | 46 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * Reschedule call back. Nothing to do, | 49 | * Reschedule call back. |
50 | * all the work is done automatically when | ||
51 | * we return from the interrupt. | ||
52 | */ | 50 | */ |
53 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | 51 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) |
54 | { | 52 | { |
55 | inc_irq_stat(irq_resched_count); | 53 | inc_irq_stat(irq_resched_count); |
54 | scheduler_ipi(); | ||
56 | 55 | ||
57 | return IRQ_HANDLED; | 56 | return IRQ_HANDLED; |
58 | } | 57 | } |
diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 94b48bd40dd7..c75471db576e 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h | |||
@@ -51,7 +51,7 @@ struct mutex { | |||
51 | spinlock_t wait_lock; | 51 | spinlock_t wait_lock; |
52 | struct list_head wait_list; | 52 | struct list_head wait_list; |
53 | #if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) | 53 | #if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) |
54 | struct thread_info *owner; | 54 | struct task_struct *owner; |
55 | #endif | 55 | #endif |
56 | #ifdef CONFIG_DEBUG_MUTEXES | 56 | #ifdef CONFIG_DEBUG_MUTEXES |
57 | const char *name; | 57 | const char *name; |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 18d63cea2848..94107a2c2840 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -360,7 +360,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout); | |||
360 | extern signed long schedule_timeout_killable(signed long timeout); | 360 | extern signed long schedule_timeout_killable(signed long timeout); |
361 | extern signed long schedule_timeout_uninterruptible(signed long timeout); | 361 | extern signed long schedule_timeout_uninterruptible(signed long timeout); |
362 | asmlinkage void schedule(void); | 362 | asmlinkage void schedule(void); |
363 | extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); | 363 | extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); |
364 | 364 | ||
365 | struct nsproxy; | 365 | struct nsproxy; |
366 | struct user_namespace; | 366 | struct user_namespace; |
@@ -1048,8 +1048,12 @@ struct sched_domain; | |||
1048 | #define WF_FORK 0x02 /* child wakeup after fork */ | 1048 | #define WF_FORK 0x02 /* child wakeup after fork */ |
1049 | 1049 | ||
1050 | #define ENQUEUE_WAKEUP 1 | 1050 | #define ENQUEUE_WAKEUP 1 |
1051 | #define ENQUEUE_WAKING 2 | 1051 | #define ENQUEUE_HEAD 2 |
1052 | #define ENQUEUE_HEAD 4 | 1052 | #ifdef CONFIG_SMP |
1053 | #define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */ | ||
1054 | #else | ||
1055 | #define ENQUEUE_WAKING 0 | ||
1056 | #endif | ||
1053 | 1057 | ||
1054 | #define DEQUEUE_SLEEP 1 | 1058 | #define DEQUEUE_SLEEP 1 |
1055 | 1059 | ||
@@ -1067,12 +1071,11 @@ struct sched_class { | |||
1067 | void (*put_prev_task) (struct rq *rq, struct task_struct *p); | 1071 | void (*put_prev_task) (struct rq *rq, struct task_struct *p); |
1068 | 1072 | ||
1069 | #ifdef CONFIG_SMP | 1073 | #ifdef CONFIG_SMP |
1070 | int (*select_task_rq)(struct rq *rq, struct task_struct *p, | 1074 | int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); |
1071 | int sd_flag, int flags); | ||
1072 | 1075 | ||
1073 | void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); | 1076 | void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); |
1074 | void (*post_schedule) (struct rq *this_rq); | 1077 | void (*post_schedule) (struct rq *this_rq); |
1075 | void (*task_waking) (struct rq *this_rq, struct task_struct *task); | 1078 | void (*task_waking) (struct task_struct *task); |
1076 | void (*task_woken) (struct rq *this_rq, struct task_struct *task); | 1079 | void (*task_woken) (struct rq *this_rq, struct task_struct *task); |
1077 | 1080 | ||
1078 | void (*set_cpus_allowed)(struct task_struct *p, | 1081 | void (*set_cpus_allowed)(struct task_struct *p, |
@@ -1200,10 +1203,10 @@ struct task_struct { | |||
1200 | int lock_depth; /* BKL lock depth */ | 1203 | int lock_depth; /* BKL lock depth */ |
1201 | 1204 | ||
1202 | #ifdef CONFIG_SMP | 1205 | #ifdef CONFIG_SMP |
1203 | #ifdef __ARCH_WANT_UNLOCKED_CTXSW | 1206 | struct task_struct *wake_entry; |
1204 | int oncpu; | 1207 | int on_cpu; |
1205 | #endif | ||
1206 | #endif | 1208 | #endif |
1209 | int on_rq; | ||
1207 | 1210 | ||
1208 | int prio, static_prio, normal_prio; | 1211 | int prio, static_prio, normal_prio; |
1209 | unsigned int rt_priority; | 1212 | unsigned int rt_priority; |
@@ -1274,6 +1277,7 @@ struct task_struct { | |||
1274 | 1277 | ||
1275 | /* Revert to default priority/policy when forking */ | 1278 | /* Revert to default priority/policy when forking */ |
1276 | unsigned sched_reset_on_fork:1; | 1279 | unsigned sched_reset_on_fork:1; |
1280 | unsigned sched_contributes_to_load:1; | ||
1277 | 1281 | ||
1278 | pid_t pid; | 1282 | pid_t pid; |
1279 | pid_t tgid; | 1283 | pid_t tgid; |
@@ -2192,8 +2196,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from); | |||
2192 | extern char *get_task_comm(char *to, struct task_struct *tsk); | 2196 | extern char *get_task_comm(char *to, struct task_struct *tsk); |
2193 | 2197 | ||
2194 | #ifdef CONFIG_SMP | 2198 | #ifdef CONFIG_SMP |
2199 | void scheduler_ipi(void); | ||
2195 | extern unsigned long wait_task_inactive(struct task_struct *, long match_state); | 2200 | extern unsigned long wait_task_inactive(struct task_struct *, long match_state); |
2196 | #else | 2201 | #else |
2202 | static inline void scheduler_ipi(void) { } | ||
2197 | static inline unsigned long wait_task_inactive(struct task_struct *p, | 2203 | static inline unsigned long wait_task_inactive(struct task_struct *p, |
2198 | long match_state) | 2204 | long match_state) |
2199 | { | 2205 | { |
diff --git a/init/Kconfig b/init/Kconfig index 56240e724d9a..32745bfe059e 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -827,6 +827,11 @@ config SCHED_AUTOGROUP | |||
827 | desktop applications. Task group autogeneration is currently based | 827 | desktop applications. Task group autogeneration is currently based |
828 | upon task session. | 828 | upon task session. |
829 | 829 | ||
830 | config SCHED_TTWU_QUEUE | ||
831 | bool | ||
832 | depends on !SPARC32 | ||
833 | default y | ||
834 | |||
830 | config MM_OWNER | 835 | config MM_OWNER |
831 | bool | 836 | bool |
832 | 837 | ||
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index ec815a960b5d..73da83aff418 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c | |||
@@ -75,7 +75,7 @@ void debug_mutex_unlock(struct mutex *lock) | |||
75 | return; | 75 | return; |
76 | 76 | ||
77 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); | 77 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); |
78 | DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); | 78 | DEBUG_LOCKS_WARN_ON(lock->owner != current); |
79 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); | 79 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); |
80 | mutex_clear_owner(lock); | 80 | mutex_clear_owner(lock); |
81 | } | 81 | } |
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h index 57d527a16f9d..0799fd3e4cfa 100644 --- a/kernel/mutex-debug.h +++ b/kernel/mutex-debug.h | |||
@@ -29,7 +29,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name, | |||
29 | 29 | ||
30 | static inline void mutex_set_owner(struct mutex *lock) | 30 | static inline void mutex_set_owner(struct mutex *lock) |
31 | { | 31 | { |
32 | lock->owner = current_thread_info(); | 32 | lock->owner = current; |
33 | } | 33 | } |
34 | 34 | ||
35 | static inline void mutex_clear_owner(struct mutex *lock) | 35 | static inline void mutex_clear_owner(struct mutex *lock) |
diff --git a/kernel/mutex.c b/kernel/mutex.c index c4195fa98900..fe4706cb0c5b 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -160,7 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
160 | */ | 160 | */ |
161 | 161 | ||
162 | for (;;) { | 162 | for (;;) { |
163 | struct thread_info *owner; | 163 | struct task_struct *owner; |
164 | 164 | ||
165 | /* | 165 | /* |
166 | * If we own the BKL, then don't spin. The owner of | 166 | * If we own the BKL, then don't spin. The owner of |
diff --git a/kernel/mutex.h b/kernel/mutex.h index 67578ca48f94..4115fbf83b12 100644 --- a/kernel/mutex.h +++ b/kernel/mutex.h | |||
@@ -19,7 +19,7 @@ | |||
19 | #ifdef CONFIG_SMP | 19 | #ifdef CONFIG_SMP |
20 | static inline void mutex_set_owner(struct mutex *lock) | 20 | static inline void mutex_set_owner(struct mutex *lock) |
21 | { | 21 | { |
22 | lock->owner = current_thread_info(); | 22 | lock->owner = current; |
23 | } | 23 | } |
24 | 24 | ||
25 | static inline void mutex_clear_owner(struct mutex *lock) | 25 | static inline void mutex_clear_owner(struct mutex *lock) |
diff --git a/kernel/sched.c b/kernel/sched.c index 312f8b95c2d4..8c9d804dc07d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -312,6 +312,9 @@ struct cfs_rq { | |||
312 | 312 | ||
313 | u64 exec_clock; | 313 | u64 exec_clock; |
314 | u64 min_vruntime; | 314 | u64 min_vruntime; |
315 | #ifndef CONFIG_64BIT | ||
316 | u64 min_vruntime_copy; | ||
317 | #endif | ||
315 | 318 | ||
316 | struct rb_root tasks_timeline; | 319 | struct rb_root tasks_timeline; |
317 | struct rb_node *rb_leftmost; | 320 | struct rb_node *rb_leftmost; |
@@ -553,6 +556,10 @@ struct rq { | |||
553 | unsigned int ttwu_count; | 556 | unsigned int ttwu_count; |
554 | unsigned int ttwu_local; | 557 | unsigned int ttwu_local; |
555 | #endif | 558 | #endif |
559 | |||
560 | #ifdef CONFIG_SMP | ||
561 | struct task_struct *wake_list; | ||
562 | #endif | ||
556 | }; | 563 | }; |
557 | 564 | ||
558 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 565 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
@@ -596,7 +603,7 @@ static inline int cpu_of(struct rq *rq) | |||
596 | * Return the group to which this tasks belongs. | 603 | * Return the group to which this tasks belongs. |
597 | * | 604 | * |
598 | * We use task_subsys_state_check() and extend the RCU verification | 605 | * We use task_subsys_state_check() and extend the RCU verification |
599 | * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() | 606 | * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach() |
600 | * holds that lock for each task it moves into the cgroup. Therefore | 607 | * holds that lock for each task it moves into the cgroup. Therefore |
601 | * by holding that lock, we pin the task to the current cgroup. | 608 | * by holding that lock, we pin the task to the current cgroup. |
602 | */ | 609 | */ |
@@ -606,7 +613,7 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
606 | struct cgroup_subsys_state *css; | 613 | struct cgroup_subsys_state *css; |
607 | 614 | ||
608 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 615 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
609 | lockdep_is_held(&task_rq(p)->lock)); | 616 | lockdep_is_held(&p->pi_lock)); |
610 | tg = container_of(css, struct task_group, css); | 617 | tg = container_of(css, struct task_group, css); |
611 | 618 | ||
612 | return autogroup_task_group(p, tg); | 619 | return autogroup_task_group(p, tg); |
@@ -838,18 +845,39 @@ static inline int task_current(struct rq *rq, struct task_struct *p) | |||
838 | return rq->curr == p; | 845 | return rq->curr == p; |
839 | } | 846 | } |
840 | 847 | ||
841 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
842 | static inline int task_running(struct rq *rq, struct task_struct *p) | 848 | static inline int task_running(struct rq *rq, struct task_struct *p) |
843 | { | 849 | { |
850 | #ifdef CONFIG_SMP | ||
851 | return p->on_cpu; | ||
852 | #else | ||
844 | return task_current(rq, p); | 853 | return task_current(rq, p); |
854 | #endif | ||
845 | } | 855 | } |
846 | 856 | ||
857 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
847 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | 858 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) |
848 | { | 859 | { |
860 | #ifdef CONFIG_SMP | ||
861 | /* | ||
862 | * We can optimise this out completely for !SMP, because the | ||
863 | * SMP rebalancing from interrupt is the only thing that cares | ||
864 | * here. | ||
865 | */ | ||
866 | next->on_cpu = 1; | ||
867 | #endif | ||
849 | } | 868 | } |
850 | 869 | ||
851 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | 870 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) |
852 | { | 871 | { |
872 | #ifdef CONFIG_SMP | ||
873 | /* | ||
874 | * After ->on_cpu is cleared, the task can be moved to a different CPU. | ||
875 | * We must ensure this doesn't happen until the switch is completely | ||
876 | * finished. | ||
877 | */ | ||
878 | smp_wmb(); | ||
879 | prev->on_cpu = 0; | ||
880 | #endif | ||
853 | #ifdef CONFIG_DEBUG_SPINLOCK | 881 | #ifdef CONFIG_DEBUG_SPINLOCK |
854 | /* this is a valid case when another task releases the spinlock */ | 882 | /* this is a valid case when another task releases the spinlock */ |
855 | rq->lock.owner = current; | 883 | rq->lock.owner = current; |
@@ -865,15 +893,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
865 | } | 893 | } |
866 | 894 | ||
867 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ | 895 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ |
868 | static inline int task_running(struct rq *rq, struct task_struct *p) | ||
869 | { | ||
870 | #ifdef CONFIG_SMP | ||
871 | return p->oncpu; | ||
872 | #else | ||
873 | return task_current(rq, p); | ||
874 | #endif | ||
875 | } | ||
876 | |||
877 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | 896 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) |
878 | { | 897 | { |
879 | #ifdef CONFIG_SMP | 898 | #ifdef CONFIG_SMP |
@@ -882,7 +901,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | |||
882 | * SMP rebalancing from interrupt is the only thing that cares | 901 | * SMP rebalancing from interrupt is the only thing that cares |
883 | * here. | 902 | * here. |
884 | */ | 903 | */ |
885 | next->oncpu = 1; | 904 | next->on_cpu = 1; |
886 | #endif | 905 | #endif |
887 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 906 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
888 | raw_spin_unlock_irq(&rq->lock); | 907 | raw_spin_unlock_irq(&rq->lock); |
@@ -895,12 +914,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
895 | { | 914 | { |
896 | #ifdef CONFIG_SMP | 915 | #ifdef CONFIG_SMP |
897 | /* | 916 | /* |
898 | * After ->oncpu is cleared, the task can be moved to a different CPU. | 917 | * After ->on_cpu is cleared, the task can be moved to a different CPU. |
899 | * We must ensure this doesn't happen until the switch is completely | 918 | * We must ensure this doesn't happen until the switch is completely |
900 | * finished. | 919 | * finished. |
901 | */ | 920 | */ |
902 | smp_wmb(); | 921 | smp_wmb(); |
903 | prev->oncpu = 0; | 922 | prev->on_cpu = 0; |
904 | #endif | 923 | #endif |
905 | #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 924 | #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
906 | local_irq_enable(); | 925 | local_irq_enable(); |
@@ -909,23 +928,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
909 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ | 928 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ |
910 | 929 | ||
911 | /* | 930 | /* |
912 | * Check whether the task is waking, we use this to synchronize ->cpus_allowed | 931 | * __task_rq_lock - lock the rq @p resides on. |
913 | * against ttwu(). | ||
914 | */ | ||
915 | static inline int task_is_waking(struct task_struct *p) | ||
916 | { | ||
917 | return unlikely(p->state == TASK_WAKING); | ||
918 | } | ||
919 | |||
920 | /* | ||
921 | * __task_rq_lock - lock the runqueue a given task resides on. | ||
922 | * Must be called interrupts disabled. | ||
923 | */ | 932 | */ |
924 | static inline struct rq *__task_rq_lock(struct task_struct *p) | 933 | static inline struct rq *__task_rq_lock(struct task_struct *p) |
925 | __acquires(rq->lock) | 934 | __acquires(rq->lock) |
926 | { | 935 | { |
927 | struct rq *rq; | 936 | struct rq *rq; |
928 | 937 | ||
938 | lockdep_assert_held(&p->pi_lock); | ||
939 | |||
929 | for (;;) { | 940 | for (;;) { |
930 | rq = task_rq(p); | 941 | rq = task_rq(p); |
931 | raw_spin_lock(&rq->lock); | 942 | raw_spin_lock(&rq->lock); |
@@ -936,22 +947,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p) | |||
936 | } | 947 | } |
937 | 948 | ||
938 | /* | 949 | /* |
939 | * task_rq_lock - lock the runqueue a given task resides on and disable | 950 | * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. |
940 | * interrupts. Note the ordering: we can safely lookup the task_rq without | ||
941 | * explicitly disabling preemption. | ||
942 | */ | 951 | */ |
943 | static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | 952 | static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) |
953 | __acquires(p->pi_lock) | ||
944 | __acquires(rq->lock) | 954 | __acquires(rq->lock) |
945 | { | 955 | { |
946 | struct rq *rq; | 956 | struct rq *rq; |
947 | 957 | ||
948 | for (;;) { | 958 | for (;;) { |
949 | local_irq_save(*flags); | 959 | raw_spin_lock_irqsave(&p->pi_lock, *flags); |
950 | rq = task_rq(p); | 960 | rq = task_rq(p); |
951 | raw_spin_lock(&rq->lock); | 961 | raw_spin_lock(&rq->lock); |
952 | if (likely(rq == task_rq(p))) | 962 | if (likely(rq == task_rq(p))) |
953 | return rq; | 963 | return rq; |
954 | raw_spin_unlock_irqrestore(&rq->lock, *flags); | 964 | raw_spin_unlock(&rq->lock); |
965 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
955 | } | 966 | } |
956 | } | 967 | } |
957 | 968 | ||
@@ -961,10 +972,13 @@ static void __task_rq_unlock(struct rq *rq) | |||
961 | raw_spin_unlock(&rq->lock); | 972 | raw_spin_unlock(&rq->lock); |
962 | } | 973 | } |
963 | 974 | ||
964 | static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | 975 | static inline void |
976 | task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) | ||
965 | __releases(rq->lock) | 977 | __releases(rq->lock) |
978 | __releases(p->pi_lock) | ||
966 | { | 979 | { |
967 | raw_spin_unlock_irqrestore(&rq->lock, *flags); | 980 | raw_spin_unlock(&rq->lock); |
981 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
968 | } | 982 | } |
969 | 983 | ||
970 | /* | 984 | /* |
@@ -1773,7 +1787,6 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags) | |||
1773 | update_rq_clock(rq); | 1787 | update_rq_clock(rq); |
1774 | sched_info_queued(p); | 1788 | sched_info_queued(p); |
1775 | p->sched_class->enqueue_task(rq, p, flags); | 1789 | p->sched_class->enqueue_task(rq, p, flags); |
1776 | p->se.on_rq = 1; | ||
1777 | } | 1790 | } |
1778 | 1791 | ||
1779 | static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) | 1792 | static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) |
@@ -1781,7 +1794,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) | |||
1781 | update_rq_clock(rq); | 1794 | update_rq_clock(rq); |
1782 | sched_info_dequeued(p); | 1795 | sched_info_dequeued(p); |
1783 | p->sched_class->dequeue_task(rq, p, flags); | 1796 | p->sched_class->dequeue_task(rq, p, flags); |
1784 | p->se.on_rq = 0; | ||
1785 | } | 1797 | } |
1786 | 1798 | ||
1787 | /* | 1799 | /* |
@@ -2116,7 +2128,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
2116 | * A queue event has occurred, and we're going to schedule. In | 2128 | * A queue event has occurred, and we're going to schedule. In |
2117 | * this case, we can save a useless back to back clock update. | 2129 | * this case, we can save a useless back to back clock update. |
2118 | */ | 2130 | */ |
2119 | if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) | 2131 | if (rq->curr->on_rq && test_tsk_need_resched(rq->curr)) |
2120 | rq->skip_clock_update = 1; | 2132 | rq->skip_clock_update = 1; |
2121 | } | 2133 | } |
2122 | 2134 | ||
@@ -2162,6 +2174,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
2162 | */ | 2174 | */ |
2163 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && | 2175 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && |
2164 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); | 2176 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); |
2177 | |||
2178 | #ifdef CONFIG_LOCKDEP | ||
2179 | WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || | ||
2180 | lockdep_is_held(&task_rq(p)->lock))); | ||
2181 | #endif | ||
2165 | #endif | 2182 | #endif |
2166 | 2183 | ||
2167 | trace_sched_migrate_task(p, new_cpu); | 2184 | trace_sched_migrate_task(p, new_cpu); |
@@ -2185,13 +2202,15 @@ static int migration_cpu_stop(void *data); | |||
2185 | * The task's runqueue lock must be held. | 2202 | * The task's runqueue lock must be held. |
2186 | * Returns true if you have to wait for migration thread. | 2203 | * Returns true if you have to wait for migration thread. |
2187 | */ | 2204 | */ |
2188 | static bool migrate_task(struct task_struct *p, struct rq *rq) | 2205 | static bool need_migrate_task(struct task_struct *p) |
2189 | { | 2206 | { |
2190 | /* | 2207 | /* |
2191 | * If the task is not on a runqueue (and not running), then | 2208 | * If the task is not on a runqueue (and not running), then |
2192 | * the next wake-up will properly place the task. | 2209 | * the next wake-up will properly place the task. |
2193 | */ | 2210 | */ |
2194 | return p->se.on_rq || task_running(rq, p); | 2211 | bool running = p->on_rq || p->on_cpu; |
2212 | smp_rmb(); /* finish_lock_switch() */ | ||
2213 | return running; | ||
2195 | } | 2214 | } |
2196 | 2215 | ||
2197 | /* | 2216 | /* |
@@ -2251,11 +2270,11 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |||
2251 | rq = task_rq_lock(p, &flags); | 2270 | rq = task_rq_lock(p, &flags); |
2252 | trace_sched_wait_task(p); | 2271 | trace_sched_wait_task(p); |
2253 | running = task_running(rq, p); | 2272 | running = task_running(rq, p); |
2254 | on_rq = p->se.on_rq; | 2273 | on_rq = p->on_rq; |
2255 | ncsw = 0; | 2274 | ncsw = 0; |
2256 | if (!match_state || p->state == match_state) | 2275 | if (!match_state || p->state == match_state) |
2257 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ | 2276 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ |
2258 | task_rq_unlock(rq, &flags); | 2277 | task_rq_unlock(rq, p, &flags); |
2259 | 2278 | ||
2260 | /* | 2279 | /* |
2261 | * If it changed from the expected state, bail out now. | 2280 | * If it changed from the expected state, bail out now. |
@@ -2330,7 +2349,7 @@ EXPORT_SYMBOL_GPL(kick_process); | |||
2330 | 2349 | ||
2331 | #ifdef CONFIG_SMP | 2350 | #ifdef CONFIG_SMP |
2332 | /* | 2351 | /* |
2333 | * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. | 2352 | * ->cpus_allowed is protected by both rq->lock and p->pi_lock |
2334 | */ | 2353 | */ |
2335 | static int select_fallback_rq(int cpu, struct task_struct *p) | 2354 | static int select_fallback_rq(int cpu, struct task_struct *p) |
2336 | { | 2355 | { |
@@ -2363,12 +2382,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
2363 | } | 2382 | } |
2364 | 2383 | ||
2365 | /* | 2384 | /* |
2366 | * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable. | 2385 | * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. |
2367 | */ | 2386 | */ |
2368 | static inline | 2387 | static inline |
2369 | int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags) | 2388 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) |
2370 | { | 2389 | { |
2371 | int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags); | 2390 | int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags); |
2372 | 2391 | ||
2373 | /* | 2392 | /* |
2374 | * In order not to call set_task_cpu() on a blocking task we need | 2393 | * In order not to call set_task_cpu() on a blocking task we need |
@@ -2394,27 +2413,60 @@ static void update_avg(u64 *avg, u64 sample) | |||
2394 | } | 2413 | } |
2395 | #endif | 2414 | #endif |
2396 | 2415 | ||
2397 | static inline void ttwu_activate(struct task_struct *p, struct rq *rq, | 2416 | static void |
2398 | bool is_sync, bool is_migrate, bool is_local, | 2417 | ttwu_stat(struct task_struct *p, int cpu, int wake_flags) |
2399 | unsigned long en_flags) | ||
2400 | { | 2418 | { |
2419 | #ifdef CONFIG_SCHEDSTATS | ||
2420 | struct rq *rq = this_rq(); | ||
2421 | |||
2422 | #ifdef CONFIG_SMP | ||
2423 | int this_cpu = smp_processor_id(); | ||
2424 | |||
2425 | if (cpu == this_cpu) { | ||
2426 | schedstat_inc(rq, ttwu_local); | ||
2427 | schedstat_inc(p, se.statistics.nr_wakeups_local); | ||
2428 | } else { | ||
2429 | struct sched_domain *sd; | ||
2430 | |||
2431 | schedstat_inc(p, se.statistics.nr_wakeups_remote); | ||
2432 | for_each_domain(this_cpu, sd) { | ||
2433 | if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { | ||
2434 | schedstat_inc(sd, ttwu_wake_remote); | ||
2435 | break; | ||
2436 | } | ||
2437 | } | ||
2438 | } | ||
2439 | #endif /* CONFIG_SMP */ | ||
2440 | |||
2441 | schedstat_inc(rq, ttwu_count); | ||
2401 | schedstat_inc(p, se.statistics.nr_wakeups); | 2442 | schedstat_inc(p, se.statistics.nr_wakeups); |
2402 | if (is_sync) | 2443 | |
2444 | if (wake_flags & WF_SYNC) | ||
2403 | schedstat_inc(p, se.statistics.nr_wakeups_sync); | 2445 | schedstat_inc(p, se.statistics.nr_wakeups_sync); |
2404 | if (is_migrate) | 2446 | |
2447 | if (cpu != task_cpu(p)) | ||
2405 | schedstat_inc(p, se.statistics.nr_wakeups_migrate); | 2448 | schedstat_inc(p, se.statistics.nr_wakeups_migrate); |
2406 | if (is_local) | ||
2407 | schedstat_inc(p, se.statistics.nr_wakeups_local); | ||
2408 | else | ||
2409 | schedstat_inc(p, se.statistics.nr_wakeups_remote); | ||
2410 | 2449 | ||
2450 | #endif /* CONFIG_SCHEDSTATS */ | ||
2451 | } | ||
2452 | |||
2453 | static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) | ||
2454 | { | ||
2411 | activate_task(rq, p, en_flags); | 2455 | activate_task(rq, p, en_flags); |
2456 | p->on_rq = 1; | ||
2457 | |||
2458 | /* if a worker is waking up, notify workqueue */ | ||
2459 | if (p->flags & PF_WQ_WORKER) | ||
2460 | wq_worker_waking_up(p, cpu_of(rq)); | ||
2412 | } | 2461 | } |
2413 | 2462 | ||
2414 | static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, | 2463 | /* |
2415 | int wake_flags, bool success) | 2464 | * Mark the task runnable and perform wakeup-preemption. |
2465 | */ | ||
2466 | static void | ||
2467 | ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) | ||
2416 | { | 2468 | { |
2417 | trace_sched_wakeup(p, success); | 2469 | trace_sched_wakeup(p, true); |
2418 | check_preempt_curr(rq, p, wake_flags); | 2470 | check_preempt_curr(rq, p, wake_flags); |
2419 | 2471 | ||
2420 | p->state = TASK_RUNNING; | 2472 | p->state = TASK_RUNNING; |
@@ -2433,9 +2485,99 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, | |||
2433 | rq->idle_stamp = 0; | 2485 | rq->idle_stamp = 0; |
2434 | } | 2486 | } |
2435 | #endif | 2487 | #endif |
2436 | /* if a worker is waking up, notify workqueue */ | 2488 | } |
2437 | if ((p->flags & PF_WQ_WORKER) && success) | 2489 | |
2438 | wq_worker_waking_up(p, cpu_of(rq)); | 2490 | static void |
2491 | ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) | ||
2492 | { | ||
2493 | #ifdef CONFIG_SMP | ||
2494 | if (p->sched_contributes_to_load) | ||
2495 | rq->nr_uninterruptible--; | ||
2496 | #endif | ||
2497 | |||
2498 | ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING); | ||
2499 | ttwu_do_wakeup(rq, p, wake_flags); | ||
2500 | } | ||
2501 | |||
2502 | /* | ||
2503 | * Called in case the task @p isn't fully descheduled from its runqueue, | ||
2504 | * in this case we must do a remote wakeup. Its a 'light' wakeup though, | ||
2505 | * since all we need to do is flip p->state to TASK_RUNNING, since | ||
2506 | * the task is still ->on_rq. | ||
2507 | */ | ||
2508 | static int ttwu_remote(struct task_struct *p, int wake_flags) | ||
2509 | { | ||
2510 | struct rq *rq; | ||
2511 | int ret = 0; | ||
2512 | |||
2513 | rq = __task_rq_lock(p); | ||
2514 | if (p->on_rq) { | ||
2515 | ttwu_do_wakeup(rq, p, wake_flags); | ||
2516 | ret = 1; | ||
2517 | } | ||
2518 | __task_rq_unlock(rq); | ||
2519 | |||
2520 | return ret; | ||
2521 | } | ||
2522 | |||
2523 | #ifdef CONFIG_SMP | ||
2524 | static void sched_ttwu_pending(void) | ||
2525 | { | ||
2526 | struct rq *rq = this_rq(); | ||
2527 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
2528 | |||
2529 | if (!list) | ||
2530 | return; | ||
2531 | |||
2532 | raw_spin_lock(&rq->lock); | ||
2533 | |||
2534 | while (list) { | ||
2535 | struct task_struct *p = list; | ||
2536 | list = list->wake_entry; | ||
2537 | ttwu_do_activate(rq, p, 0); | ||
2538 | } | ||
2539 | |||
2540 | raw_spin_unlock(&rq->lock); | ||
2541 | } | ||
2542 | |||
2543 | void scheduler_ipi(void) | ||
2544 | { | ||
2545 | sched_ttwu_pending(); | ||
2546 | } | ||
2547 | |||
2548 | static void ttwu_queue_remote(struct task_struct *p, int cpu) | ||
2549 | { | ||
2550 | struct rq *rq = cpu_rq(cpu); | ||
2551 | struct task_struct *next = rq->wake_list; | ||
2552 | |||
2553 | for (;;) { | ||
2554 | struct task_struct *old = next; | ||
2555 | |||
2556 | p->wake_entry = next; | ||
2557 | next = cmpxchg(&rq->wake_list, old, p); | ||
2558 | if (next == old) | ||
2559 | break; | ||
2560 | } | ||
2561 | |||
2562 | if (!next) | ||
2563 | smp_send_reschedule(cpu); | ||
2564 | } | ||
2565 | #endif | ||
2566 | |||
2567 | static void ttwu_queue(struct task_struct *p, int cpu) | ||
2568 | { | ||
2569 | struct rq *rq = cpu_rq(cpu); | ||
2570 | |||
2571 | #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE) | ||
2572 | if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { | ||
2573 | ttwu_queue_remote(p, cpu); | ||
2574 | return; | ||
2575 | } | ||
2576 | #endif | ||
2577 | |||
2578 | raw_spin_lock(&rq->lock); | ||
2579 | ttwu_do_activate(rq, p, 0); | ||
2580 | raw_spin_unlock(&rq->lock); | ||
2439 | } | 2581 | } |
2440 | 2582 | ||
2441 | /** | 2583 | /** |
@@ -2453,92 +2595,64 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, | |||
2453 | * Returns %true if @p was woken up, %false if it was already running | 2595 | * Returns %true if @p was woken up, %false if it was already running |
2454 | * or @state didn't match @p's state. | 2596 | * or @state didn't match @p's state. |
2455 | */ | 2597 | */ |
2456 | static int try_to_wake_up(struct task_struct *p, unsigned int state, | 2598 | static int |
2457 | int wake_flags) | 2599 | try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) |
2458 | { | 2600 | { |
2459 | int cpu, orig_cpu, this_cpu, success = 0; | ||
2460 | unsigned long flags; | 2601 | unsigned long flags; |
2461 | unsigned long en_flags = ENQUEUE_WAKEUP; | 2602 | int cpu, success = 0; |
2462 | struct rq *rq; | ||
2463 | |||
2464 | this_cpu = get_cpu(); | ||
2465 | 2603 | ||
2466 | smp_wmb(); | 2604 | smp_wmb(); |
2467 | rq = task_rq_lock(p, &flags); | 2605 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
2468 | if (!(p->state & state)) | 2606 | if (!(p->state & state)) |
2469 | goto out; | 2607 | goto out; |
2470 | 2608 | ||
2471 | if (p->se.on_rq) | 2609 | success = 1; /* we're going to change ->state */ |
2472 | goto out_running; | ||
2473 | |||
2474 | cpu = task_cpu(p); | 2610 | cpu = task_cpu(p); |
2475 | orig_cpu = cpu; | ||
2476 | 2611 | ||
2477 | #ifdef CONFIG_SMP | 2612 | if (p->on_rq && ttwu_remote(p, wake_flags)) |
2478 | if (unlikely(task_running(rq, p))) | 2613 | goto stat; |
2479 | goto out_activate; | ||
2480 | 2614 | ||
2615 | #ifdef CONFIG_SMP | ||
2481 | /* | 2616 | /* |
2482 | * In order to handle concurrent wakeups and release the rq->lock | 2617 | * If the owning (remote) cpu is still in the middle of schedule() with |
2483 | * we put the task in TASK_WAKING state. | 2618 | * this task as prev, wait until its done referencing the task. |
2484 | * | ||
2485 | * First fix up the nr_uninterruptible count: | ||
2486 | */ | 2619 | */ |
2487 | if (task_contributes_to_load(p)) { | 2620 | while (p->on_cpu) { |
2488 | if (likely(cpu_online(orig_cpu))) | 2621 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
2489 | rq->nr_uninterruptible--; | 2622 | /* |
2490 | else | 2623 | * If called from interrupt context we could have landed in the |
2491 | this_rq()->nr_uninterruptible--; | 2624 | * middle of schedule(), in this case we should take care not |
2492 | } | 2625 | * to spin on ->on_cpu if p is current, since that would |
2493 | p->state = TASK_WAKING; | 2626 | * deadlock. |
2494 | 2627 | */ | |
2495 | if (p->sched_class->task_waking) { | 2628 | if (p == current) { |
2496 | p->sched_class->task_waking(rq, p); | 2629 | ttwu_queue(p, cpu); |
2497 | en_flags |= ENQUEUE_WAKING; | 2630 | goto stat; |
2631 | } | ||
2632 | #endif | ||
2633 | cpu_relax(); | ||
2498 | } | 2634 | } |
2499 | |||
2500 | cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags); | ||
2501 | if (cpu != orig_cpu) | ||
2502 | set_task_cpu(p, cpu); | ||
2503 | __task_rq_unlock(rq); | ||
2504 | |||
2505 | rq = cpu_rq(cpu); | ||
2506 | raw_spin_lock(&rq->lock); | ||
2507 | |||
2508 | /* | 2635 | /* |
2509 | * We migrated the task without holding either rq->lock, however | 2636 | * Pairs with the smp_wmb() in finish_lock_switch(). |
2510 | * since the task is not on the task list itself, nobody else | ||
2511 | * will try and migrate the task, hence the rq should match the | ||
2512 | * cpu we just moved it to. | ||
2513 | */ | 2637 | */ |
2514 | WARN_ON(task_cpu(p) != cpu); | 2638 | smp_rmb(); |
2515 | WARN_ON(p->state != TASK_WAKING); | ||
2516 | 2639 | ||
2517 | #ifdef CONFIG_SCHEDSTATS | 2640 | p->sched_contributes_to_load = !!task_contributes_to_load(p); |
2518 | schedstat_inc(rq, ttwu_count); | 2641 | p->state = TASK_WAKING; |
2519 | if (cpu == this_cpu) | 2642 | |
2520 | schedstat_inc(rq, ttwu_local); | 2643 | if (p->sched_class->task_waking) |
2521 | else { | 2644 | p->sched_class->task_waking(p); |
2522 | struct sched_domain *sd; | ||
2523 | for_each_domain(this_cpu, sd) { | ||
2524 | if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { | ||
2525 | schedstat_inc(sd, ttwu_wake_remote); | ||
2526 | break; | ||
2527 | } | ||
2528 | } | ||
2529 | } | ||
2530 | #endif /* CONFIG_SCHEDSTATS */ | ||
2531 | 2645 | ||
2532 | out_activate: | 2646 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
2647 | if (task_cpu(p) != cpu) | ||
2648 | set_task_cpu(p, cpu); | ||
2533 | #endif /* CONFIG_SMP */ | 2649 | #endif /* CONFIG_SMP */ |
2534 | ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu, | 2650 | |
2535 | cpu == this_cpu, en_flags); | 2651 | ttwu_queue(p, cpu); |
2536 | success = 1; | 2652 | stat: |
2537 | out_running: | 2653 | ttwu_stat(p, cpu, wake_flags); |
2538 | ttwu_post_activation(p, rq, wake_flags, success); | ||
2539 | out: | 2654 | out: |
2540 | task_rq_unlock(rq, &flags); | 2655 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
2541 | put_cpu(); | ||
2542 | 2656 | ||
2543 | return success; | 2657 | return success; |
2544 | } | 2658 | } |
@@ -2547,31 +2661,34 @@ out: | |||
2547 | * try_to_wake_up_local - try to wake up a local task with rq lock held | 2661 | * try_to_wake_up_local - try to wake up a local task with rq lock held |
2548 | * @p: the thread to be awakened | 2662 | * @p: the thread to be awakened |
2549 | * | 2663 | * |
2550 | * Put @p on the run-queue if it's not already there. The caller must | 2664 | * Put @p on the run-queue if it's not already there. The caller must |
2551 | * ensure that this_rq() is locked, @p is bound to this_rq() and not | 2665 | * ensure that this_rq() is locked, @p is bound to this_rq() and not |
2552 | * the current task. this_rq() stays locked over invocation. | 2666 | * the current task. |
2553 | */ | 2667 | */ |
2554 | static void try_to_wake_up_local(struct task_struct *p) | 2668 | static void try_to_wake_up_local(struct task_struct *p) |
2555 | { | 2669 | { |
2556 | struct rq *rq = task_rq(p); | 2670 | struct rq *rq = task_rq(p); |
2557 | bool success = false; | ||
2558 | 2671 | ||
2559 | BUG_ON(rq != this_rq()); | 2672 | BUG_ON(rq != this_rq()); |
2560 | BUG_ON(p == current); | 2673 | BUG_ON(p == current); |
2561 | lockdep_assert_held(&rq->lock); | 2674 | lockdep_assert_held(&rq->lock); |
2562 | 2675 | ||
2676 | if (!raw_spin_trylock(&p->pi_lock)) { | ||
2677 | raw_spin_unlock(&rq->lock); | ||
2678 | raw_spin_lock(&p->pi_lock); | ||
2679 | raw_spin_lock(&rq->lock); | ||
2680 | } | ||
2681 | |||
2563 | if (!(p->state & TASK_NORMAL)) | 2682 | if (!(p->state & TASK_NORMAL)) |
2564 | return; | 2683 | goto out; |
2565 | 2684 | ||
2566 | if (!p->se.on_rq) { | 2685 | if (!p->on_rq) |
2567 | if (likely(!task_running(rq, p))) { | 2686 | ttwu_activate(rq, p, ENQUEUE_WAKEUP); |
2568 | schedstat_inc(rq, ttwu_count); | 2687 | |
2569 | schedstat_inc(rq, ttwu_local); | 2688 | ttwu_do_wakeup(rq, p, 0); |
2570 | } | 2689 | ttwu_stat(p, smp_processor_id(), 0); |
2571 | ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP); | 2690 | out: |
2572 | success = true; | 2691 | raw_spin_unlock(&p->pi_lock); |
2573 | } | ||
2574 | ttwu_post_activation(p, rq, 0, success); | ||
2575 | } | 2692 | } |
2576 | 2693 | ||
2577 | /** | 2694 | /** |
@@ -2604,19 +2721,21 @@ int wake_up_state(struct task_struct *p, unsigned int state) | |||
2604 | */ | 2721 | */ |
2605 | static void __sched_fork(struct task_struct *p) | 2722 | static void __sched_fork(struct task_struct *p) |
2606 | { | 2723 | { |
2724 | p->on_rq = 0; | ||
2725 | |||
2726 | p->se.on_rq = 0; | ||
2607 | p->se.exec_start = 0; | 2727 | p->se.exec_start = 0; |
2608 | p->se.sum_exec_runtime = 0; | 2728 | p->se.sum_exec_runtime = 0; |
2609 | p->se.prev_sum_exec_runtime = 0; | 2729 | p->se.prev_sum_exec_runtime = 0; |
2610 | p->se.nr_migrations = 0; | 2730 | p->se.nr_migrations = 0; |
2611 | p->se.vruntime = 0; | 2731 | p->se.vruntime = 0; |
2732 | INIT_LIST_HEAD(&p->se.group_node); | ||
2612 | 2733 | ||
2613 | #ifdef CONFIG_SCHEDSTATS | 2734 | #ifdef CONFIG_SCHEDSTATS |
2614 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); | 2735 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
2615 | #endif | 2736 | #endif |
2616 | 2737 | ||
2617 | INIT_LIST_HEAD(&p->rt.run_list); | 2738 | INIT_LIST_HEAD(&p->rt.run_list); |
2618 | p->se.on_rq = 0; | ||
2619 | INIT_LIST_HEAD(&p->se.group_node); | ||
2620 | 2739 | ||
2621 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2740 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
2622 | INIT_HLIST_HEAD(&p->preempt_notifiers); | 2741 | INIT_HLIST_HEAD(&p->preempt_notifiers); |
@@ -2628,6 +2747,7 @@ static void __sched_fork(struct task_struct *p) | |||
2628 | */ | 2747 | */ |
2629 | void sched_fork(struct task_struct *p, int clone_flags) | 2748 | void sched_fork(struct task_struct *p, int clone_flags) |
2630 | { | 2749 | { |
2750 | unsigned long flags; | ||
2631 | int cpu = get_cpu(); | 2751 | int cpu = get_cpu(); |
2632 | 2752 | ||
2633 | __sched_fork(p); | 2753 | __sched_fork(p); |
@@ -2678,16 +2798,16 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2678 | * | 2798 | * |
2679 | * Silence PROVE_RCU. | 2799 | * Silence PROVE_RCU. |
2680 | */ | 2800 | */ |
2681 | rcu_read_lock(); | 2801 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
2682 | set_task_cpu(p, cpu); | 2802 | set_task_cpu(p, cpu); |
2683 | rcu_read_unlock(); | 2803 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
2684 | 2804 | ||
2685 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2805 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
2686 | if (likely(sched_info_on())) | 2806 | if (likely(sched_info_on())) |
2687 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 2807 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
2688 | #endif | 2808 | #endif |
2689 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 2809 | #if defined(CONFIG_SMP) |
2690 | p->oncpu = 0; | 2810 | p->on_cpu = 0; |
2691 | #endif | 2811 | #endif |
2692 | #ifdef CONFIG_PREEMPT | 2812 | #ifdef CONFIG_PREEMPT |
2693 | /* Want to start with kernel preemption disabled. */ | 2813 | /* Want to start with kernel preemption disabled. */ |
@@ -2711,37 +2831,27 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2711 | { | 2831 | { |
2712 | unsigned long flags; | 2832 | unsigned long flags; |
2713 | struct rq *rq; | 2833 | struct rq *rq; |
2714 | int cpu __maybe_unused = get_cpu(); | ||
2715 | 2834 | ||
2835 | raw_spin_lock_irqsave(&p->pi_lock, flags); | ||
2716 | #ifdef CONFIG_SMP | 2836 | #ifdef CONFIG_SMP |
2717 | rq = task_rq_lock(p, &flags); | ||
2718 | p->state = TASK_WAKING; | ||
2719 | |||
2720 | /* | 2837 | /* |
2721 | * Fork balancing, do it here and not earlier because: | 2838 | * Fork balancing, do it here and not earlier because: |
2722 | * - cpus_allowed can change in the fork path | 2839 | * - cpus_allowed can change in the fork path |
2723 | * - any previously selected cpu might disappear through hotplug | 2840 | * - any previously selected cpu might disappear through hotplug |
2724 | * | ||
2725 | * We set TASK_WAKING so that select_task_rq() can drop rq->lock | ||
2726 | * without people poking at ->cpus_allowed. | ||
2727 | */ | 2841 | */ |
2728 | cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0); | 2842 | set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0)); |
2729 | set_task_cpu(p, cpu); | ||
2730 | |||
2731 | p->state = TASK_RUNNING; | ||
2732 | task_rq_unlock(rq, &flags); | ||
2733 | #endif | 2843 | #endif |
2734 | 2844 | ||
2735 | rq = task_rq_lock(p, &flags); | 2845 | rq = __task_rq_lock(p); |
2736 | activate_task(rq, p, 0); | 2846 | activate_task(rq, p, 0); |
2737 | trace_sched_wakeup_new(p, 1); | 2847 | p->on_rq = 1; |
2848 | trace_sched_wakeup_new(p, true); | ||
2738 | check_preempt_curr(rq, p, WF_FORK); | 2849 | check_preempt_curr(rq, p, WF_FORK); |
2739 | #ifdef CONFIG_SMP | 2850 | #ifdef CONFIG_SMP |
2740 | if (p->sched_class->task_woken) | 2851 | if (p->sched_class->task_woken) |
2741 | p->sched_class->task_woken(rq, p); | 2852 | p->sched_class->task_woken(rq, p); |
2742 | #endif | 2853 | #endif |
2743 | task_rq_unlock(rq, &flags); | 2854 | task_rq_unlock(rq, p, &flags); |
2744 | put_cpu(); | ||
2745 | } | 2855 | } |
2746 | 2856 | ||
2747 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2857 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
@@ -3450,27 +3560,22 @@ void sched_exec(void) | |||
3450 | { | 3560 | { |
3451 | struct task_struct *p = current; | 3561 | struct task_struct *p = current; |
3452 | unsigned long flags; | 3562 | unsigned long flags; |
3453 | struct rq *rq; | ||
3454 | int dest_cpu; | 3563 | int dest_cpu; |
3455 | 3564 | ||
3456 | rq = task_rq_lock(p, &flags); | 3565 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
3457 | dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0); | 3566 | dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0); |
3458 | if (dest_cpu == smp_processor_id()) | 3567 | if (dest_cpu == smp_processor_id()) |
3459 | goto unlock; | 3568 | goto unlock; |
3460 | 3569 | ||
3461 | /* | 3570 | if (likely(cpu_active(dest_cpu))) { |
3462 | * select_task_rq() can race against ->cpus_allowed | ||
3463 | */ | ||
3464 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && | ||
3465 | likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) { | ||
3466 | struct migration_arg arg = { p, dest_cpu }; | 3571 | struct migration_arg arg = { p, dest_cpu }; |
3467 | 3572 | ||
3468 | task_rq_unlock(rq, &flags); | 3573 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
3469 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | 3574 | stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); |
3470 | return; | 3575 | return; |
3471 | } | 3576 | } |
3472 | unlock: | 3577 | unlock: |
3473 | task_rq_unlock(rq, &flags); | 3578 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
3474 | } | 3579 | } |
3475 | 3580 | ||
3476 | #endif | 3581 | #endif |
@@ -3507,7 +3612,7 @@ unsigned long long task_delta_exec(struct task_struct *p) | |||
3507 | 3612 | ||
3508 | rq = task_rq_lock(p, &flags); | 3613 | rq = task_rq_lock(p, &flags); |
3509 | ns = do_task_delta_exec(p, rq); | 3614 | ns = do_task_delta_exec(p, rq); |
3510 | task_rq_unlock(rq, &flags); | 3615 | task_rq_unlock(rq, p, &flags); |
3511 | 3616 | ||
3512 | return ns; | 3617 | return ns; |
3513 | } | 3618 | } |
@@ -3525,7 +3630,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
3525 | 3630 | ||
3526 | rq = task_rq_lock(p, &flags); | 3631 | rq = task_rq_lock(p, &flags); |
3527 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | 3632 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); |
3528 | task_rq_unlock(rq, &flags); | 3633 | task_rq_unlock(rq, p, &flags); |
3529 | 3634 | ||
3530 | return ns; | 3635 | return ns; |
3531 | } | 3636 | } |
@@ -3549,7 +3654,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p) | |||
3549 | rq = task_rq_lock(p, &flags); | 3654 | rq = task_rq_lock(p, &flags); |
3550 | thread_group_cputime(p, &totals); | 3655 | thread_group_cputime(p, &totals); |
3551 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); | 3656 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); |
3552 | task_rq_unlock(rq, &flags); | 3657 | task_rq_unlock(rq, p, &flags); |
3553 | 3658 | ||
3554 | return ns; | 3659 | return ns; |
3555 | } | 3660 | } |
@@ -4035,7 +4140,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
4035 | 4140 | ||
4036 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | 4141 | static void put_prev_task(struct rq *rq, struct task_struct *prev) |
4037 | { | 4142 | { |
4038 | if (prev->se.on_rq) | 4143 | if (prev->on_rq) |
4039 | update_rq_clock(rq); | 4144 | update_rq_clock(rq); |
4040 | prev->sched_class->put_prev_task(rq, prev); | 4145 | prev->sched_class->put_prev_task(rq, prev); |
4041 | } | 4146 | } |
@@ -4097,11 +4202,13 @@ need_resched: | |||
4097 | if (unlikely(signal_pending_state(prev->state, prev))) { | 4202 | if (unlikely(signal_pending_state(prev->state, prev))) { |
4098 | prev->state = TASK_RUNNING; | 4203 | prev->state = TASK_RUNNING; |
4099 | } else { | 4204 | } else { |
4205 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | ||
4206 | prev->on_rq = 0; | ||
4207 | |||
4100 | /* | 4208 | /* |
4101 | * If a worker is going to sleep, notify and | 4209 | * If a worker went to sleep, notify and ask workqueue |
4102 | * ask workqueue whether it wants to wake up a | 4210 | * whether it wants to wake up a task to maintain |
4103 | * task to maintain concurrency. If so, wake | 4211 | * concurrency. |
4104 | * up the task. | ||
4105 | */ | 4212 | */ |
4106 | if (prev->flags & PF_WQ_WORKER) { | 4213 | if (prev->flags & PF_WQ_WORKER) { |
4107 | struct task_struct *to_wakeup; | 4214 | struct task_struct *to_wakeup; |
@@ -4110,11 +4217,10 @@ need_resched: | |||
4110 | if (to_wakeup) | 4217 | if (to_wakeup) |
4111 | try_to_wake_up_local(to_wakeup); | 4218 | try_to_wake_up_local(to_wakeup); |
4112 | } | 4219 | } |
4113 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | ||
4114 | 4220 | ||
4115 | /* | 4221 | /* |
4116 | * If we are going to sleep and we have plugged IO queued, make | 4222 | * If we are going to sleep and we have plugged IO |
4117 | * sure to submit it to avoid deadlocks. | 4223 | * queued, make sure to submit it to avoid deadlocks. |
4118 | */ | 4224 | */ |
4119 | if (blk_needs_flush_plug(prev)) { | 4225 | if (blk_needs_flush_plug(prev)) { |
4120 | raw_spin_unlock(&rq->lock); | 4226 | raw_spin_unlock(&rq->lock); |
@@ -4161,70 +4267,53 @@ need_resched: | |||
4161 | EXPORT_SYMBOL(schedule); | 4267 | EXPORT_SYMBOL(schedule); |
4162 | 4268 | ||
4163 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 4269 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
4164 | /* | ||
4165 | * Look out! "owner" is an entirely speculative pointer | ||
4166 | * access and not reliable. | ||
4167 | */ | ||
4168 | int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) | ||
4169 | { | ||
4170 | unsigned int cpu; | ||
4171 | struct rq *rq; | ||
4172 | 4270 | ||
4173 | if (!sched_feat(OWNER_SPIN)) | 4271 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) |
4174 | return 0; | 4272 | { |
4273 | bool ret = false; | ||
4175 | 4274 | ||
4176 | #ifdef CONFIG_DEBUG_PAGEALLOC | 4275 | rcu_read_lock(); |
4177 | /* | 4276 | if (lock->owner != owner) |
4178 | * Need to access the cpu field knowing that | 4277 | goto fail; |
4179 | * DEBUG_PAGEALLOC could have unmapped it if | ||
4180 | * the mutex owner just released it and exited. | ||
4181 | */ | ||
4182 | if (probe_kernel_address(&owner->cpu, cpu)) | ||
4183 | return 0; | ||
4184 | #else | ||
4185 | cpu = owner->cpu; | ||
4186 | #endif | ||
4187 | 4278 | ||
4188 | /* | 4279 | /* |
4189 | * Even if the access succeeded (likely case), | 4280 | * Ensure we emit the owner->on_cpu, dereference _after_ checking |
4190 | * the cpu field may no longer be valid. | 4281 | * lock->owner still matches owner, if that fails, owner might |
4282 | * point to free()d memory, if it still matches, the rcu_read_lock() | ||
4283 | * ensures the memory stays valid. | ||
4191 | */ | 4284 | */ |
4192 | if (cpu >= nr_cpumask_bits) | 4285 | barrier(); |
4193 | return 0; | ||
4194 | 4286 | ||
4195 | /* | 4287 | ret = owner->on_cpu; |
4196 | * We need to validate that we can do a | 4288 | fail: |
4197 | * get_cpu() and that we have the percpu area. | 4289 | rcu_read_unlock(); |
4198 | */ | ||
4199 | if (!cpu_online(cpu)) | ||
4200 | return 0; | ||
4201 | 4290 | ||
4202 | rq = cpu_rq(cpu); | 4291 | return ret; |
4292 | } | ||
4203 | 4293 | ||
4204 | for (;;) { | 4294 | /* |
4205 | /* | 4295 | * Look out! "owner" is an entirely speculative pointer |
4206 | * Owner changed, break to re-assess state. | 4296 | * access and not reliable. |
4207 | */ | 4297 | */ |
4208 | if (lock->owner != owner) { | 4298 | int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) |
4209 | /* | 4299 | { |
4210 | * If the lock has switched to a different owner, | 4300 | if (!sched_feat(OWNER_SPIN)) |
4211 | * we likely have heavy contention. Return 0 to quit | 4301 | return 0; |
4212 | * optimistic spinning and not contend further: | ||
4213 | */ | ||
4214 | if (lock->owner) | ||
4215 | return 0; | ||
4216 | break; | ||
4217 | } | ||
4218 | 4302 | ||
4219 | /* | 4303 | while (owner_running(lock, owner)) { |
4220 | * Is that owner really running on that cpu? | 4304 | if (need_resched()) |
4221 | */ | ||
4222 | if (task_thread_info(rq->curr) != owner || need_resched()) | ||
4223 | return 0; | 4305 | return 0; |
4224 | 4306 | ||
4225 | arch_mutex_cpu_relax(); | 4307 | arch_mutex_cpu_relax(); |
4226 | } | 4308 | } |
4227 | 4309 | ||
4310 | /* | ||
4311 | * If the owner changed to another task there is likely | ||
4312 | * heavy contention, stop spinning. | ||
4313 | */ | ||
4314 | if (lock->owner) | ||
4315 | return 0; | ||
4316 | |||
4228 | return 1; | 4317 | return 1; |
4229 | } | 4318 | } |
4230 | #endif | 4319 | #endif |
@@ -4684,19 +4773,18 @@ EXPORT_SYMBOL(sleep_on_timeout); | |||
4684 | */ | 4773 | */ |
4685 | void rt_mutex_setprio(struct task_struct *p, int prio) | 4774 | void rt_mutex_setprio(struct task_struct *p, int prio) |
4686 | { | 4775 | { |
4687 | unsigned long flags; | ||
4688 | int oldprio, on_rq, running; | 4776 | int oldprio, on_rq, running; |
4689 | struct rq *rq; | 4777 | struct rq *rq; |
4690 | const struct sched_class *prev_class; | 4778 | const struct sched_class *prev_class; |
4691 | 4779 | ||
4692 | BUG_ON(prio < 0 || prio > MAX_PRIO); | 4780 | BUG_ON(prio < 0 || prio > MAX_PRIO); |
4693 | 4781 | ||
4694 | rq = task_rq_lock(p, &flags); | 4782 | rq = __task_rq_lock(p); |
4695 | 4783 | ||
4696 | trace_sched_pi_setprio(p, prio); | 4784 | trace_sched_pi_setprio(p, prio); |
4697 | oldprio = p->prio; | 4785 | oldprio = p->prio; |
4698 | prev_class = p->sched_class; | 4786 | prev_class = p->sched_class; |
4699 | on_rq = p->se.on_rq; | 4787 | on_rq = p->on_rq; |
4700 | running = task_current(rq, p); | 4788 | running = task_current(rq, p); |
4701 | if (on_rq) | 4789 | if (on_rq) |
4702 | dequeue_task(rq, p, 0); | 4790 | dequeue_task(rq, p, 0); |
@@ -4716,7 +4804,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
4716 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); | 4804 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); |
4717 | 4805 | ||
4718 | check_class_changed(rq, p, prev_class, oldprio); | 4806 | check_class_changed(rq, p, prev_class, oldprio); |
4719 | task_rq_unlock(rq, &flags); | 4807 | __task_rq_unlock(rq); |
4720 | } | 4808 | } |
4721 | 4809 | ||
4722 | #endif | 4810 | #endif |
@@ -4744,7 +4832,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4744 | p->static_prio = NICE_TO_PRIO(nice); | 4832 | p->static_prio = NICE_TO_PRIO(nice); |
4745 | goto out_unlock; | 4833 | goto out_unlock; |
4746 | } | 4834 | } |
4747 | on_rq = p->se.on_rq; | 4835 | on_rq = p->on_rq; |
4748 | if (on_rq) | 4836 | if (on_rq) |
4749 | dequeue_task(rq, p, 0); | 4837 | dequeue_task(rq, p, 0); |
4750 | 4838 | ||
@@ -4764,7 +4852,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4764 | resched_task(rq->curr); | 4852 | resched_task(rq->curr); |
4765 | } | 4853 | } |
4766 | out_unlock: | 4854 | out_unlock: |
4767 | task_rq_unlock(rq, &flags); | 4855 | task_rq_unlock(rq, p, &flags); |
4768 | } | 4856 | } |
4769 | EXPORT_SYMBOL(set_user_nice); | 4857 | EXPORT_SYMBOL(set_user_nice); |
4770 | 4858 | ||
@@ -4878,8 +4966,6 @@ static struct task_struct *find_process_by_pid(pid_t pid) | |||
4878 | static void | 4966 | static void |
4879 | __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | 4967 | __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) |
4880 | { | 4968 | { |
4881 | BUG_ON(p->se.on_rq); | ||
4882 | |||
4883 | p->policy = policy; | 4969 | p->policy = policy; |
4884 | p->rt_priority = prio; | 4970 | p->rt_priority = prio; |
4885 | p->normal_prio = normal_prio(p); | 4971 | p->normal_prio = normal_prio(p); |
@@ -4994,20 +5080,17 @@ recheck: | |||
4994 | /* | 5080 | /* |
4995 | * make sure no PI-waiters arrive (or leave) while we are | 5081 | * make sure no PI-waiters arrive (or leave) while we are |
4996 | * changing the priority of the task: | 5082 | * changing the priority of the task: |
4997 | */ | 5083 | * |
4998 | raw_spin_lock_irqsave(&p->pi_lock, flags); | ||
4999 | /* | ||
5000 | * To be able to change p->policy safely, the appropriate | 5084 | * To be able to change p->policy safely, the appropriate |
5001 | * runqueue lock must be held. | 5085 | * runqueue lock must be held. |
5002 | */ | 5086 | */ |
5003 | rq = __task_rq_lock(p); | 5087 | rq = task_rq_lock(p, &flags); |
5004 | 5088 | ||
5005 | /* | 5089 | /* |
5006 | * Changing the policy of the stop threads its a very bad idea | 5090 | * Changing the policy of the stop threads its a very bad idea |
5007 | */ | 5091 | */ |
5008 | if (p == rq->stop) { | 5092 | if (p == rq->stop) { |
5009 | __task_rq_unlock(rq); | 5093 | task_rq_unlock(rq, p, &flags); |
5010 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5011 | return -EINVAL; | 5094 | return -EINVAL; |
5012 | } | 5095 | } |
5013 | 5096 | ||
@@ -5031,8 +5114,7 @@ recheck: | |||
5031 | if (rt_bandwidth_enabled() && rt_policy(policy) && | 5114 | if (rt_bandwidth_enabled() && rt_policy(policy) && |
5032 | task_group(p)->rt_bandwidth.rt_runtime == 0 && | 5115 | task_group(p)->rt_bandwidth.rt_runtime == 0 && |
5033 | !task_group_is_autogroup(task_group(p))) { | 5116 | !task_group_is_autogroup(task_group(p))) { |
5034 | __task_rq_unlock(rq); | 5117 | task_rq_unlock(rq, p, &flags); |
5035 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5036 | return -EPERM; | 5118 | return -EPERM; |
5037 | } | 5119 | } |
5038 | } | 5120 | } |
@@ -5041,11 +5123,10 @@ recheck: | |||
5041 | /* recheck policy now with rq lock held */ | 5123 | /* recheck policy now with rq lock held */ |
5042 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 5124 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
5043 | policy = oldpolicy = -1; | 5125 | policy = oldpolicy = -1; |
5044 | __task_rq_unlock(rq); | 5126 | task_rq_unlock(rq, p, &flags); |
5045 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5046 | goto recheck; | 5127 | goto recheck; |
5047 | } | 5128 | } |
5048 | on_rq = p->se.on_rq; | 5129 | on_rq = p->on_rq; |
5049 | running = task_current(rq, p); | 5130 | running = task_current(rq, p); |
5050 | if (on_rq) | 5131 | if (on_rq) |
5051 | deactivate_task(rq, p, 0); | 5132 | deactivate_task(rq, p, 0); |
@@ -5064,8 +5145,7 @@ recheck: | |||
5064 | activate_task(rq, p, 0); | 5145 | activate_task(rq, p, 0); |
5065 | 5146 | ||
5066 | check_class_changed(rq, p, prev_class, oldprio); | 5147 | check_class_changed(rq, p, prev_class, oldprio); |
5067 | __task_rq_unlock(rq); | 5148 | task_rq_unlock(rq, p, &flags); |
5068 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5069 | 5149 | ||
5070 | rt_mutex_adjust_pi(p); | 5150 | rt_mutex_adjust_pi(p); |
5071 | 5151 | ||
@@ -5316,7 +5396,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
5316 | { | 5396 | { |
5317 | struct task_struct *p; | 5397 | struct task_struct *p; |
5318 | unsigned long flags; | 5398 | unsigned long flags; |
5319 | struct rq *rq; | ||
5320 | int retval; | 5399 | int retval; |
5321 | 5400 | ||
5322 | get_online_cpus(); | 5401 | get_online_cpus(); |
@@ -5331,9 +5410,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
5331 | if (retval) | 5410 | if (retval) |
5332 | goto out_unlock; | 5411 | goto out_unlock; |
5333 | 5412 | ||
5334 | rq = task_rq_lock(p, &flags); | 5413 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
5335 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); | 5414 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); |
5336 | task_rq_unlock(rq, &flags); | 5415 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
5337 | 5416 | ||
5338 | out_unlock: | 5417 | out_unlock: |
5339 | rcu_read_unlock(); | 5418 | rcu_read_unlock(); |
@@ -5658,7 +5737,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
5658 | 5737 | ||
5659 | rq = task_rq_lock(p, &flags); | 5738 | rq = task_rq_lock(p, &flags); |
5660 | time_slice = p->sched_class->get_rr_interval(rq, p); | 5739 | time_slice = p->sched_class->get_rr_interval(rq, p); |
5661 | task_rq_unlock(rq, &flags); | 5740 | task_rq_unlock(rq, p, &flags); |
5662 | 5741 | ||
5663 | rcu_read_unlock(); | 5742 | rcu_read_unlock(); |
5664 | jiffies_to_timespec(time_slice, &t); | 5743 | jiffies_to_timespec(time_slice, &t); |
@@ -5776,8 +5855,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
5776 | rcu_read_unlock(); | 5855 | rcu_read_unlock(); |
5777 | 5856 | ||
5778 | rq->curr = rq->idle = idle; | 5857 | rq->curr = rq->idle = idle; |
5779 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 5858 | #if defined(CONFIG_SMP) |
5780 | idle->oncpu = 1; | 5859 | idle->on_cpu = 1; |
5781 | #endif | 5860 | #endif |
5782 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 5861 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
5783 | 5862 | ||
@@ -5881,18 +5960,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
5881 | unsigned int dest_cpu; | 5960 | unsigned int dest_cpu; |
5882 | int ret = 0; | 5961 | int ret = 0; |
5883 | 5962 | ||
5884 | /* | ||
5885 | * Serialize against TASK_WAKING so that ttwu() and wunt() can | ||
5886 | * drop the rq->lock and still rely on ->cpus_allowed. | ||
5887 | */ | ||
5888 | again: | ||
5889 | while (task_is_waking(p)) | ||
5890 | cpu_relax(); | ||
5891 | rq = task_rq_lock(p, &flags); | 5963 | rq = task_rq_lock(p, &flags); |
5892 | if (task_is_waking(p)) { | ||
5893 | task_rq_unlock(rq, &flags); | ||
5894 | goto again; | ||
5895 | } | ||
5896 | 5964 | ||
5897 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | 5965 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
5898 | ret = -EINVAL; | 5966 | ret = -EINVAL; |
@@ -5917,16 +5985,16 @@ again: | |||
5917 | goto out; | 5985 | goto out; |
5918 | 5986 | ||
5919 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); | 5987 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); |
5920 | if (migrate_task(p, rq)) { | 5988 | if (need_migrate_task(p)) { |
5921 | struct migration_arg arg = { p, dest_cpu }; | 5989 | struct migration_arg arg = { p, dest_cpu }; |
5922 | /* Need help from migration thread: drop lock and wait. */ | 5990 | /* Need help from migration thread: drop lock and wait. */ |
5923 | task_rq_unlock(rq, &flags); | 5991 | task_rq_unlock(rq, p, &flags); |
5924 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | 5992 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
5925 | tlb_migrate_finish(p->mm); | 5993 | tlb_migrate_finish(p->mm); |
5926 | return 0; | 5994 | return 0; |
5927 | } | 5995 | } |
5928 | out: | 5996 | out: |
5929 | task_rq_unlock(rq, &flags); | 5997 | task_rq_unlock(rq, p, &flags); |
5930 | 5998 | ||
5931 | return ret; | 5999 | return ret; |
5932 | } | 6000 | } |
@@ -5954,6 +6022,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
5954 | rq_src = cpu_rq(src_cpu); | 6022 | rq_src = cpu_rq(src_cpu); |
5955 | rq_dest = cpu_rq(dest_cpu); | 6023 | rq_dest = cpu_rq(dest_cpu); |
5956 | 6024 | ||
6025 | raw_spin_lock(&p->pi_lock); | ||
5957 | double_rq_lock(rq_src, rq_dest); | 6026 | double_rq_lock(rq_src, rq_dest); |
5958 | /* Already moved. */ | 6027 | /* Already moved. */ |
5959 | if (task_cpu(p) != src_cpu) | 6028 | if (task_cpu(p) != src_cpu) |
@@ -5966,7 +6035,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
5966 | * If we're not on a rq, the next wake-up will ensure we're | 6035 | * If we're not on a rq, the next wake-up will ensure we're |
5967 | * placed properly. | 6036 | * placed properly. |
5968 | */ | 6037 | */ |
5969 | if (p->se.on_rq) { | 6038 | if (p->on_rq) { |
5970 | deactivate_task(rq_src, p, 0); | 6039 | deactivate_task(rq_src, p, 0); |
5971 | set_task_cpu(p, dest_cpu); | 6040 | set_task_cpu(p, dest_cpu); |
5972 | activate_task(rq_dest, p, 0); | 6041 | activate_task(rq_dest, p, 0); |
@@ -5976,6 +6045,7 @@ done: | |||
5976 | ret = 1; | 6045 | ret = 1; |
5977 | fail: | 6046 | fail: |
5978 | double_rq_unlock(rq_src, rq_dest); | 6047 | double_rq_unlock(rq_src, rq_dest); |
6048 | raw_spin_unlock(&p->pi_lock); | ||
5979 | return ret; | 6049 | return ret; |
5980 | } | 6050 | } |
5981 | 6051 | ||
@@ -6316,6 +6386,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6316 | 6386 | ||
6317 | #ifdef CONFIG_HOTPLUG_CPU | 6387 | #ifdef CONFIG_HOTPLUG_CPU |
6318 | case CPU_DYING: | 6388 | case CPU_DYING: |
6389 | sched_ttwu_pending(); | ||
6319 | /* Update our root-domain */ | 6390 | /* Update our root-domain */ |
6320 | raw_spin_lock_irqsave(&rq->lock, flags); | 6391 | raw_spin_lock_irqsave(&rq->lock, flags); |
6321 | if (rq->rd) { | 6392 | if (rq->rd) { |
@@ -8340,7 +8411,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p) | |||
8340 | int old_prio = p->prio; | 8411 | int old_prio = p->prio; |
8341 | int on_rq; | 8412 | int on_rq; |
8342 | 8413 | ||
8343 | on_rq = p->se.on_rq; | 8414 | on_rq = p->on_rq; |
8344 | if (on_rq) | 8415 | if (on_rq) |
8345 | deactivate_task(rq, p, 0); | 8416 | deactivate_task(rq, p, 0); |
8346 | __setscheduler(rq, p, SCHED_NORMAL, 0); | 8417 | __setscheduler(rq, p, SCHED_NORMAL, 0); |
@@ -8683,7 +8754,7 @@ void sched_move_task(struct task_struct *tsk) | |||
8683 | rq = task_rq_lock(tsk, &flags); | 8754 | rq = task_rq_lock(tsk, &flags); |
8684 | 8755 | ||
8685 | running = task_current(rq, tsk); | 8756 | running = task_current(rq, tsk); |
8686 | on_rq = tsk->se.on_rq; | 8757 | on_rq = tsk->on_rq; |
8687 | 8758 | ||
8688 | if (on_rq) | 8759 | if (on_rq) |
8689 | dequeue_task(rq, tsk, 0); | 8760 | dequeue_task(rq, tsk, 0); |
@@ -8702,7 +8773,7 @@ void sched_move_task(struct task_struct *tsk) | |||
8702 | if (on_rq) | 8773 | if (on_rq) |
8703 | enqueue_task(rq, tsk, 0); | 8774 | enqueue_task(rq, tsk, 0); |
8704 | 8775 | ||
8705 | task_rq_unlock(rq, &flags); | 8776 | task_rq_unlock(rq, tsk, &flags); |
8706 | } | 8777 | } |
8707 | #endif /* CONFIG_CGROUP_SCHED */ | 8778 | #endif /* CONFIG_CGROUP_SCHED */ |
8708 | 8779 | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 7bacd83a4158..3669bec6e130 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -152,7 +152,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) | |||
152 | read_lock_irqsave(&tasklist_lock, flags); | 152 | read_lock_irqsave(&tasklist_lock, flags); |
153 | 153 | ||
154 | do_each_thread(g, p) { | 154 | do_each_thread(g, p) { |
155 | if (!p->se.on_rq || task_cpu(p) != rq_cpu) | 155 | if (!p->on_rq || task_cpu(p) != rq_cpu) |
156 | continue; | 156 | continue; |
157 | 157 | ||
158 | print_task(m, rq, p); | 158 | print_task(m, rq, p); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6fa833ab2cb8..054cebb81f7b 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -358,6 +358,10 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) | |||
358 | } | 358 | } |
359 | 359 | ||
360 | cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); | 360 | cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); |
361 | #ifndef CONFIG_64BIT | ||
362 | smp_wmb(); | ||
363 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; | ||
364 | #endif | ||
361 | } | 365 | } |
362 | 366 | ||
363 | /* | 367 | /* |
@@ -1372,12 +1376,25 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1372 | 1376 | ||
1373 | #ifdef CONFIG_SMP | 1377 | #ifdef CONFIG_SMP |
1374 | 1378 | ||
1375 | static void task_waking_fair(struct rq *rq, struct task_struct *p) | 1379 | static void task_waking_fair(struct task_struct *p) |
1376 | { | 1380 | { |
1377 | struct sched_entity *se = &p->se; | 1381 | struct sched_entity *se = &p->se; |
1378 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1382 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
1383 | u64 min_vruntime; | ||
1379 | 1384 | ||
1380 | se->vruntime -= cfs_rq->min_vruntime; | 1385 | #ifndef CONFIG_64BIT |
1386 | u64 min_vruntime_copy; | ||
1387 | |||
1388 | do { | ||
1389 | min_vruntime_copy = cfs_rq->min_vruntime_copy; | ||
1390 | smp_rmb(); | ||
1391 | min_vruntime = cfs_rq->min_vruntime; | ||
1392 | } while (min_vruntime != min_vruntime_copy); | ||
1393 | #else | ||
1394 | min_vruntime = cfs_rq->min_vruntime; | ||
1395 | #endif | ||
1396 | |||
1397 | se->vruntime -= min_vruntime; | ||
1381 | } | 1398 | } |
1382 | 1399 | ||
1383 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1400 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -1657,7 +1674,7 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
1657 | * preempt must be disabled. | 1674 | * preempt must be disabled. |
1658 | */ | 1675 | */ |
1659 | static int | 1676 | static int |
1660 | select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags) | 1677 | select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) |
1661 | { | 1678 | { |
1662 | struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; | 1679 | struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; |
1663 | int cpu = smp_processor_id(); | 1680 | int cpu = smp_processor_id(); |
@@ -1789,10 +1806,7 @@ wakeup_gran(struct sched_entity *curr, struct sched_entity *se) | |||
1789 | * This is especially important for buddies when the leftmost | 1806 | * This is especially important for buddies when the leftmost |
1790 | * task is higher priority than the buddy. | 1807 | * task is higher priority than the buddy. |
1791 | */ | 1808 | */ |
1792 | if (unlikely(se->load.weight != NICE_0_LOAD)) | 1809 | return calc_delta_fair(gran, se); |
1793 | gran = calc_delta_fair(gran, se); | ||
1794 | |||
1795 | return gran; | ||
1796 | } | 1810 | } |
1797 | 1811 | ||
1798 | /* | 1812 | /* |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 68e69acc29b9..be40f7371ee1 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -64,3 +64,9 @@ SCHED_FEAT(OWNER_SPIN, 1) | |||
64 | * Decrement CPU power based on irq activity | 64 | * Decrement CPU power based on irq activity |
65 | */ | 65 | */ |
66 | SCHED_FEAT(NONIRQ_POWER, 1) | 66 | SCHED_FEAT(NONIRQ_POWER, 1) |
67 | |||
68 | /* | ||
69 | * Queue remote wakeups on the target CPU and process them | ||
70 | * using the scheduler IPI. Reduces rq->lock contention/bounces. | ||
71 | */ | ||
72 | SCHED_FEAT(TTWU_QUEUE, 1) | ||
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index a776a6396427..0a51882534ea 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
@@ -7,7 +7,7 @@ | |||
7 | 7 | ||
8 | #ifdef CONFIG_SMP | 8 | #ifdef CONFIG_SMP |
9 | static int | 9 | static int |
10 | select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags) | 10 | select_task_rq_idle(struct task_struct *p, int sd_flag, int flags) |
11 | { | 11 | { |
12 | return task_cpu(p); /* IDLE tasks as never migrated */ | 12 | return task_cpu(p); /* IDLE tasks as never migrated */ |
13 | } | 13 | } |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index e7cebdc65f82..19ecb3127379 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -977,13 +977,23 @@ static void yield_task_rt(struct rq *rq) | |||
977 | static int find_lowest_rq(struct task_struct *task); | 977 | static int find_lowest_rq(struct task_struct *task); |
978 | 978 | ||
979 | static int | 979 | static int |
980 | select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags) | 980 | select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) |
981 | { | 981 | { |
982 | struct task_struct *curr; | ||
983 | struct rq *rq; | ||
984 | int cpu; | ||
985 | |||
982 | if (sd_flag != SD_BALANCE_WAKE) | 986 | if (sd_flag != SD_BALANCE_WAKE) |
983 | return smp_processor_id(); | 987 | return smp_processor_id(); |
984 | 988 | ||
989 | cpu = task_cpu(p); | ||
990 | rq = cpu_rq(cpu); | ||
991 | |||
992 | rcu_read_lock(); | ||
993 | curr = ACCESS_ONCE(rq->curr); /* unlocked access */ | ||
994 | |||
985 | /* | 995 | /* |
986 | * If the current task is an RT task, then | 996 | * If the current task on @p's runqueue is an RT task, then |
987 | * try to see if we can wake this RT task up on another | 997 | * try to see if we can wake this RT task up on another |
988 | * runqueue. Otherwise simply start this RT task | 998 | * runqueue. Otherwise simply start this RT task |
989 | * on its current runqueue. | 999 | * on its current runqueue. |
@@ -997,21 +1007,25 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags) | |||
997 | * lock? | 1007 | * lock? |
998 | * | 1008 | * |
999 | * For equal prio tasks, we just let the scheduler sort it out. | 1009 | * For equal prio tasks, we just let the scheduler sort it out. |
1010 | * | ||
1011 | * Otherwise, just let it ride on the affined RQ and the | ||
1012 | * post-schedule router will push the preempted task away | ||
1013 | * | ||
1014 | * This test is optimistic, if we get it wrong the load-balancer | ||
1015 | * will have to sort it out. | ||
1000 | */ | 1016 | */ |
1001 | if (unlikely(rt_task(rq->curr)) && | 1017 | if (curr && unlikely(rt_task(curr)) && |
1002 | (rq->curr->rt.nr_cpus_allowed < 2 || | 1018 | (curr->rt.nr_cpus_allowed < 2 || |
1003 | rq->curr->prio < p->prio) && | 1019 | curr->prio < p->prio) && |
1004 | (p->rt.nr_cpus_allowed > 1)) { | 1020 | (p->rt.nr_cpus_allowed > 1)) { |
1005 | int cpu = find_lowest_rq(p); | 1021 | int target = find_lowest_rq(p); |
1006 | 1022 | ||
1007 | return (cpu == -1) ? task_cpu(p) : cpu; | 1023 | if (target != -1) |
1024 | cpu = target; | ||
1008 | } | 1025 | } |
1026 | rcu_read_unlock(); | ||
1009 | 1027 | ||
1010 | /* | 1028 | return cpu; |
1011 | * Otherwise, just let it ride on the affined RQ and the | ||
1012 | * post-schedule router will push the preempted task away | ||
1013 | */ | ||
1014 | return task_cpu(p); | ||
1015 | } | 1029 | } |
1016 | 1030 | ||
1017 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | 1031 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) |
@@ -1136,7 +1150,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | |||
1136 | * The previous task needs to be made eligible for pushing | 1150 | * The previous task needs to be made eligible for pushing |
1137 | * if it is still active | 1151 | * if it is still active |
1138 | */ | 1152 | */ |
1139 | if (p->se.on_rq && p->rt.nr_cpus_allowed > 1) | 1153 | if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1) |
1140 | enqueue_pushable_task(rq, p); | 1154 | enqueue_pushable_task(rq, p); |
1141 | } | 1155 | } |
1142 | 1156 | ||
@@ -1287,7 +1301,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) | |||
1287 | !cpumask_test_cpu(lowest_rq->cpu, | 1301 | !cpumask_test_cpu(lowest_rq->cpu, |
1288 | &task->cpus_allowed) || | 1302 | &task->cpus_allowed) || |
1289 | task_running(rq, task) || | 1303 | task_running(rq, task) || |
1290 | !task->se.on_rq)) { | 1304 | !task->on_rq)) { |
1291 | 1305 | ||
1292 | raw_spin_unlock(&lowest_rq->lock); | 1306 | raw_spin_unlock(&lowest_rq->lock); |
1293 | lowest_rq = NULL; | 1307 | lowest_rq = NULL; |
@@ -1321,7 +1335,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) | |||
1321 | BUG_ON(task_current(rq, p)); | 1335 | BUG_ON(task_current(rq, p)); |
1322 | BUG_ON(p->rt.nr_cpus_allowed <= 1); | 1336 | BUG_ON(p->rt.nr_cpus_allowed <= 1); |
1323 | 1337 | ||
1324 | BUG_ON(!p->se.on_rq); | 1338 | BUG_ON(!p->on_rq); |
1325 | BUG_ON(!rt_task(p)); | 1339 | BUG_ON(!rt_task(p)); |
1326 | 1340 | ||
1327 | return p; | 1341 | return p; |
@@ -1467,7 +1481,7 @@ static int pull_rt_task(struct rq *this_rq) | |||
1467 | */ | 1481 | */ |
1468 | if (p && (p->prio < this_rq->rt.highest_prio.curr)) { | 1482 | if (p && (p->prio < this_rq->rt.highest_prio.curr)) { |
1469 | WARN_ON(p == src_rq->curr); | 1483 | WARN_ON(p == src_rq->curr); |
1470 | WARN_ON(!p->se.on_rq); | 1484 | WARN_ON(!p->on_rq); |
1471 | 1485 | ||
1472 | /* | 1486 | /* |
1473 | * There's a chance that p is higher in priority | 1487 | * There's a chance that p is higher in priority |
@@ -1538,7 +1552,7 @@ static void set_cpus_allowed_rt(struct task_struct *p, | |||
1538 | * Update the migration status of the RQ if we have an RT task | 1552 | * Update the migration status of the RQ if we have an RT task |
1539 | * which is running AND changing its weight value. | 1553 | * which is running AND changing its weight value. |
1540 | */ | 1554 | */ |
1541 | if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) { | 1555 | if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) { |
1542 | struct rq *rq = task_rq(p); | 1556 | struct rq *rq = task_rq(p); |
1543 | 1557 | ||
1544 | if (!task_current(rq, p)) { | 1558 | if (!task_current(rq, p)) { |
@@ -1608,7 +1622,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) | |||
1608 | * we may need to handle the pulling of RT tasks | 1622 | * we may need to handle the pulling of RT tasks |
1609 | * now. | 1623 | * now. |
1610 | */ | 1624 | */ |
1611 | if (p->se.on_rq && !rq->rt.rt_nr_running) | 1625 | if (p->on_rq && !rq->rt.rt_nr_running) |
1612 | pull_rt_task(rq); | 1626 | pull_rt_task(rq); |
1613 | } | 1627 | } |
1614 | 1628 | ||
@@ -1638,7 +1652,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) | |||
1638 | * If that current running task is also an RT task | 1652 | * If that current running task is also an RT task |
1639 | * then see if we can move to another run queue. | 1653 | * then see if we can move to another run queue. |
1640 | */ | 1654 | */ |
1641 | if (p->se.on_rq && rq->curr != p) { | 1655 | if (p->on_rq && rq->curr != p) { |
1642 | #ifdef CONFIG_SMP | 1656 | #ifdef CONFIG_SMP |
1643 | if (rq->rt.overloaded && push_rt_task(rq) && | 1657 | if (rq->rt.overloaded && push_rt_task(rq) && |
1644 | /* Don't resched if we changed runqueues */ | 1658 | /* Don't resched if we changed runqueues */ |
@@ -1657,7 +1671,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) | |||
1657 | static void | 1671 | static void |
1658 | prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) | 1672 | prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) |
1659 | { | 1673 | { |
1660 | if (!p->se.on_rq) | 1674 | if (!p->on_rq) |
1661 | return; | 1675 | return; |
1662 | 1676 | ||
1663 | if (rq->curr == p) { | 1677 | if (rq->curr == p) { |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 1ba2bd40fdac..6f437632afab 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c | |||
@@ -9,8 +9,7 @@ | |||
9 | 9 | ||
10 | #ifdef CONFIG_SMP | 10 | #ifdef CONFIG_SMP |
11 | static int | 11 | static int |
12 | select_task_rq_stop(struct rq *rq, struct task_struct *p, | 12 | select_task_rq_stop(struct task_struct *p, int sd_flag, int flags) |
13 | int sd_flag, int flags) | ||
14 | { | 13 | { |
15 | return task_cpu(p); /* stop tasks as never migrate */ | 14 | return task_cpu(p); /* stop tasks as never migrate */ |
16 | } | 15 | } |
@@ -26,7 +25,7 @@ static struct task_struct *pick_next_task_stop(struct rq *rq) | |||
26 | { | 25 | { |
27 | struct task_struct *stop = rq->stop; | 26 | struct task_struct *stop = rq->stop; |
28 | 27 | ||
29 | if (stop && stop->se.on_rq) | 28 | if (stop && stop->on_rq) |
30 | return stop; | 29 | return stop; |
31 | 30 | ||
32 | return NULL; | 31 | return NULL; |