aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/xen/enlighten.c5
-rw-r--r--arch/x86/xen/mmu.c56
-rw-r--r--arch/x86/xen/xen-head.S2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c6
-rw-r--r--drivers/lguest/x86/core.c15
-rw-r--r--drivers/xen/events.c2
-rw-r--r--kernel/futex.c93
-rw-r--r--kernel/sched.c14
-rw-r--r--kernel/sched_rt.c3
-rw-r--r--sound/isa/sb/sb_mixer.c4
-rw-r--r--sound/pci/aw2/aw2-alsa.c4
11 files changed, 136 insertions, 68 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8a56e457d61..c048de34d6a1 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1228,6 +1228,11 @@ asmlinkage void __init xen_start_kernel(void)
1228 if (xen_feature(XENFEAT_supervisor_mode_kernel)) 1228 if (xen_feature(XENFEAT_supervisor_mode_kernel))
1229 pv_info.kernel_rpl = 0; 1229 pv_info.kernel_rpl = 0;
1230 1230
1231 /* Prevent unwanted bits from being set in PTEs. */
1232 __supported_pte_mask &= ~_PAGE_GLOBAL;
1233 if (!is_initial_xendomain())
1234 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1235
1231 /* set the limit of our address space */ 1236 /* set the limit of our address space */
1232 xen_reserve_top(); 1237 xen_reserve_top();
1233 1238
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3525ef523a74..265601d5a6ae 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -179,48 +179,54 @@ out:
179 preempt_enable(); 179 preempt_enable();
180} 180}
181 181
182pteval_t xen_pte_val(pte_t pte) 182/* Assume pteval_t is equivalent to all the other *val_t types. */
183static pteval_t pte_mfn_to_pfn(pteval_t val)
183{ 184{
184 pteval_t ret = pte.pte; 185 if (val & _PAGE_PRESENT) {
186 unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT;
187 pteval_t flags = val & ~PTE_MASK;
188 val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
189 }
190
191 return val;
192}
193
194static pteval_t pte_pfn_to_mfn(pteval_t val)
195{
196 if (val & _PAGE_PRESENT) {
197 unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT;
198 pteval_t flags = val & ~PTE_MASK;
199 val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
200 }
185 201
186 if (ret & _PAGE_PRESENT) 202 return val;
187 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; 203}
188 204
189 return ret; 205pteval_t xen_pte_val(pte_t pte)
206{
207 return pte_mfn_to_pfn(pte.pte);
190} 208}
191 209
192pgdval_t xen_pgd_val(pgd_t pgd) 210pgdval_t xen_pgd_val(pgd_t pgd)
193{ 211{
194 pgdval_t ret = pgd.pgd; 212 return pte_mfn_to_pfn(pgd.pgd);
195 if (ret & _PAGE_PRESENT)
196 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
197 return ret;
198} 213}
199 214
200pte_t xen_make_pte(pteval_t pte) 215pte_t xen_make_pte(pteval_t pte)
201{ 216{
202 if (pte & _PAGE_PRESENT) { 217 pte = pte_pfn_to_mfn(pte);
203 pte = phys_to_machine(XPADDR(pte)).maddr; 218 return native_make_pte(pte);
204 pte &= ~(_PAGE_PCD | _PAGE_PWT);
205 }
206
207 return (pte_t){ .pte = pte };
208} 219}
209 220
210pgd_t xen_make_pgd(pgdval_t pgd) 221pgd_t xen_make_pgd(pgdval_t pgd)
211{ 222{
212 if (pgd & _PAGE_PRESENT) 223 pgd = pte_pfn_to_mfn(pgd);
213 pgd = phys_to_machine(XPADDR(pgd)).maddr; 224 return native_make_pgd(pgd);
214
215 return (pgd_t){ pgd };
216} 225}
217 226
218pmdval_t xen_pmd_val(pmd_t pmd) 227pmdval_t xen_pmd_val(pmd_t pmd)
219{ 228{
220 pmdval_t ret = native_pmd_val(pmd); 229 return pte_mfn_to_pfn(pmd.pmd);
221 if (ret & _PAGE_PRESENT)
222 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
223 return ret;
224} 230}
225#ifdef CONFIG_X86_PAE 231#ifdef CONFIG_X86_PAE
226void xen_set_pud(pud_t *ptr, pud_t val) 232void xen_set_pud(pud_t *ptr, pud_t val)
@@ -267,9 +273,7 @@ void xen_pmd_clear(pmd_t *pmdp)
267 273
268pmd_t xen_make_pmd(pmdval_t pmd) 274pmd_t xen_make_pmd(pmdval_t pmd)
269{ 275{
270 if (pmd & _PAGE_PRESENT) 276 pmd = pte_pfn_to_mfn(pmd);
271 pmd = phys_to_machine(XPADDR(pmd)).maddr;
272
273 return native_make_pmd(pmd); 277 return native_make_pmd(pmd);
274} 278}
275#else /* !PAE */ 279#else /* !PAE */
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 288d587ce73c..3175e973fd0d 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -17,7 +17,7 @@ ENTRY(startup_xen)
17 17
18 __FINIT 18 __FINIT
19 19
20.pushsection .bss.page_aligned 20.pushsection .text
21 .align PAGE_SIZE_asm 21 .align PAGE_SIZE_asm
22ENTRY(hypercall_page) 22ENTRY(hypercall_page)
23 .skip 0x1000 23 .skip 0x1000
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index b224079d4e1f..d5862e5d99a0 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -109,7 +109,11 @@ static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_m
109{ 109{
110 struct page *page; 110 struct page *page;
111 111
112 page = alloc_pages(gfp_mask, order); 112 /*
113 * Use __GFP_ZERO because buggy firmware assumes ICM pages are
114 * cleared, and subtle failures are seen if they aren't.
115 */
116 page = alloc_pages(gfp_mask | __GFP_ZERO, order);
113 if (!page) 117 if (!page)
114 return -ENOMEM; 118 return -ENOMEM;
115 119
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 5126d5d9ea0e..2e554a4ab337 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -176,7 +176,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
176 * we set it now, so we can trap and pass that trap to the Guest if it 176 * we set it now, so we can trap and pass that trap to the Guest if it
177 * uses the FPU. */ 177 * uses the FPU. */
178 if (cpu->ts) 178 if (cpu->ts)
179 lguest_set_ts(); 179 unlazy_fpu(current);
180 180
181 /* SYSENTER is an optimized way of doing system calls. We can't allow 181 /* SYSENTER is an optimized way of doing system calls. We can't allow
182 * it because it always jumps to privilege level 0. A normal Guest 182 * it because it always jumps to privilege level 0. A normal Guest
@@ -196,6 +196,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
196 * trap made the switcher code come back, and an error code which some 196 * trap made the switcher code come back, and an error code which some
197 * traps set. */ 197 * traps set. */
198 198
199 /* Restore SYSENTER if it's supposed to be on. */
200 if (boot_cpu_has(X86_FEATURE_SEP))
201 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
202
199 /* If the Guest page faulted, then the cr2 register will tell us the 203 /* If the Guest page faulted, then the cr2 register will tell us the
200 * bad virtual address. We have to grab this now, because once we 204 * bad virtual address. We have to grab this now, because once we
201 * re-enable interrupts an interrupt could fault and thus overwrite 205 * re-enable interrupts an interrupt could fault and thus overwrite
@@ -203,13 +207,12 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
203 if (cpu->regs->trapnum == 14) 207 if (cpu->regs->trapnum == 14)
204 cpu->arch.last_pagefault = read_cr2(); 208 cpu->arch.last_pagefault = read_cr2();
205 /* Similarly, if we took a trap because the Guest used the FPU, 209 /* Similarly, if we took a trap because the Guest used the FPU,
206 * we have to restore the FPU it expects to see. */ 210 * we have to restore the FPU it expects to see.
211 * math_state_restore() may sleep and we may even move off to
212 * a different CPU. So all the critical stuff should be done
213 * before this. */
207 else if (cpu->regs->trapnum == 7) 214 else if (cpu->regs->trapnum == 7)
208 math_state_restore(); 215 math_state_restore();
209
210 /* Restore SYSENTER if it's supposed to be on. */
211 if (boot_cpu_has(X86_FEATURE_SEP))
212 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
213} 216}
214 217
215/*H:130 Now we've examined the hypercall code; our Guest can make requests. 218/*H:130 Now we've examined the hypercall code; our Guest can make requests.
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 4f0f22b020ea..76e5b7386af9 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -529,7 +529,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
529 529
530#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ 530#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
531 /* Clear master flag /before/ clearing selector flag. */ 531 /* Clear master flag /before/ clearing selector flag. */
532 rmb(); 532 wmb();
533#endif 533#endif
534 pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); 534 pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
535 while (pending_words != 0) { 535 while (pending_words != 0) {
diff --git a/kernel/futex.c b/kernel/futex.c
index 449def8074fe..7d1136e97c14 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1096,21 +1096,64 @@ static void unqueue_me_pi(struct futex_q *q)
1096 * private futexes. 1096 * private futexes.
1097 */ 1097 */
1098static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, 1098static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1099 struct task_struct *newowner) 1099 struct task_struct *newowner,
1100 struct rw_semaphore *fshared)
1100{ 1101{
1101 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; 1102 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1102 struct futex_pi_state *pi_state = q->pi_state; 1103 struct futex_pi_state *pi_state = q->pi_state;
1104 struct task_struct *oldowner = pi_state->owner;
1103 u32 uval, curval, newval; 1105 u32 uval, curval, newval;
1104 int ret; 1106 int ret, attempt = 0;
1105 1107
1106 /* Owner died? */ 1108 /* Owner died? */
1109 if (!pi_state->owner)
1110 newtid |= FUTEX_OWNER_DIED;
1111
1112 /*
1113 * We are here either because we stole the rtmutex from the
1114 * pending owner or we are the pending owner which failed to
1115 * get the rtmutex. We have to replace the pending owner TID
1116 * in the user space variable. This must be atomic as we have
1117 * to preserve the owner died bit here.
1118 *
1119 * Note: We write the user space value _before_ changing the
1120 * pi_state because we can fault here. Imagine swapped out
1121 * pages or a fork, which was running right before we acquired
1122 * mmap_sem, that marked all the anonymous memory readonly for
1123 * cow.
1124 *
1125 * Modifying pi_state _before_ the user space value would
1126 * leave the pi_state in an inconsistent state when we fault
1127 * here, because we need to drop the hash bucket lock to
1128 * handle the fault. This might be observed in the PID check
1129 * in lookup_pi_state.
1130 */
1131retry:
1132 if (get_futex_value_locked(&uval, uaddr))
1133 goto handle_fault;
1134
1135 while (1) {
1136 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1137
1138 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
1139
1140 if (curval == -EFAULT)
1141 goto handle_fault;
1142 if (curval == uval)
1143 break;
1144 uval = curval;
1145 }
1146
1147 /*
1148 * We fixed up user space. Now we need to fix the pi_state
1149 * itself.
1150 */
1107 if (pi_state->owner != NULL) { 1151 if (pi_state->owner != NULL) {
1108 spin_lock_irq(&pi_state->owner->pi_lock); 1152 spin_lock_irq(&pi_state->owner->pi_lock);
1109 WARN_ON(list_empty(&pi_state->list)); 1153 WARN_ON(list_empty(&pi_state->list));
1110 list_del_init(&pi_state->list); 1154 list_del_init(&pi_state->list);
1111 spin_unlock_irq(&pi_state->owner->pi_lock); 1155 spin_unlock_irq(&pi_state->owner->pi_lock);
1112 } else 1156 }
1113 newtid |= FUTEX_OWNER_DIED;
1114 1157
1115 pi_state->owner = newowner; 1158 pi_state->owner = newowner;
1116 1159
@@ -1118,26 +1161,35 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1118 WARN_ON(!list_empty(&pi_state->list)); 1161 WARN_ON(!list_empty(&pi_state->list));
1119 list_add(&pi_state->list, &newowner->pi_state_list); 1162 list_add(&pi_state->list, &newowner->pi_state_list);
1120 spin_unlock_irq(&newowner->pi_lock); 1163 spin_unlock_irq(&newowner->pi_lock);
1164 return 0;
1121 1165
1122 /* 1166 /*
1123 * We own it, so we have to replace the pending owner 1167 * To handle the page fault we need to drop the hash bucket
1124 * TID. This must be atomic as we have preserve the 1168 * lock here. That gives the other task (either the pending
1125 * owner died bit here. 1169 * owner itself or the task which stole the rtmutex) the
1170 * chance to try the fixup of the pi_state. So once we are
1171 * back from handling the fault we need to check the pi_state
1172 * after reacquiring the hash bucket lock and before trying to
1173 * do another fixup. When the fixup has been done already we
1174 * simply return.
1126 */ 1175 */
1127 ret = get_futex_value_locked(&uval, uaddr); 1176handle_fault:
1177 spin_unlock(q->lock_ptr);
1128 1178
1129 while (!ret) { 1179 ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++);
1130 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1131 1180
1132 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 1181 spin_lock(q->lock_ptr);
1133 1182
1134 if (curval == -EFAULT) 1183 /*
1135 ret = -EFAULT; 1184 * Check if someone else fixed it for us:
1136 if (curval == uval) 1185 */
1137 break; 1186 if (pi_state->owner != oldowner)
1138 uval = curval; 1187 return 0;
1139 } 1188
1140 return ret; 1189 if (ret)
1190 return ret;
1191
1192 goto retry;
1141} 1193}
1142 1194
1143/* 1195/*
@@ -1507,7 +1559,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1507 * that case: 1559 * that case:
1508 */ 1560 */
1509 if (q.pi_state->owner != curr) 1561 if (q.pi_state->owner != curr)
1510 ret = fixup_pi_state_owner(uaddr, &q, curr); 1562 ret = fixup_pi_state_owner(uaddr, &q, curr, fshared);
1511 } else { 1563 } else {
1512 /* 1564 /*
1513 * Catch the rare case, where the lock was released 1565 * Catch the rare case, where the lock was released
@@ -1539,7 +1591,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1539 int res; 1591 int res;
1540 1592
1541 owner = rt_mutex_owner(&q.pi_state->pi_mutex); 1593 owner = rt_mutex_owner(&q.pi_state->pi_mutex);
1542 res = fixup_pi_state_owner(uaddr, &q, owner); 1594 res = fixup_pi_state_owner(uaddr, &q, owner,
1595 fshared);
1543 1596
1544 /* propagate -EFAULT, if the fixup failed */ 1597 /* propagate -EFAULT, if the fixup failed */
1545 if (res) 1598 if (res)
diff --git a/kernel/sched.c b/kernel/sched.c
index b048ad8a11af..3aaa5c8cb421 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4398,22 +4398,20 @@ do_wait_for_common(struct completion *x, long timeout, int state)
4398 signal_pending(current)) || 4398 signal_pending(current)) ||
4399 (state == TASK_KILLABLE && 4399 (state == TASK_KILLABLE &&
4400 fatal_signal_pending(current))) { 4400 fatal_signal_pending(current))) {
4401 __remove_wait_queue(&x->wait, &wait); 4401 timeout = -ERESTARTSYS;
4402 return -ERESTARTSYS; 4402 break;
4403 } 4403 }
4404 __set_current_state(state); 4404 __set_current_state(state);
4405 spin_unlock_irq(&x->wait.lock); 4405 spin_unlock_irq(&x->wait.lock);
4406 timeout = schedule_timeout(timeout); 4406 timeout = schedule_timeout(timeout);
4407 spin_lock_irq(&x->wait.lock); 4407 spin_lock_irq(&x->wait.lock);
4408 if (!timeout) { 4408 } while (!x->done && timeout);
4409 __remove_wait_queue(&x->wait, &wait);
4410 return timeout;
4411 }
4412 } while (!x->done);
4413 __remove_wait_queue(&x->wait, &wait); 4409 __remove_wait_queue(&x->wait, &wait);
4410 if (!x->done)
4411 return timeout;
4414 } 4412 }
4415 x->done--; 4413 x->done--;
4416 return timeout; 4414 return timeout ?: 1;
4417} 4415}
4418 4416
4419static long __sched 4417static long __sched
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 1dad5bbb59b6..0f3c19197fa4 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -250,7 +250,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
250 if (rt_rq->rt_time || rt_rq->rt_nr_running) 250 if (rt_rq->rt_time || rt_rq->rt_nr_running)
251 idle = 0; 251 idle = 0;
252 spin_unlock(&rt_rq->rt_runtime_lock); 252 spin_unlock(&rt_rq->rt_runtime_lock);
253 } 253 } else if (rt_rq->rt_nr_running)
254 idle = 0;
254 255
255 if (enqueue) 256 if (enqueue)
256 sched_rt_rq_enqueue(rt_rq); 257 sched_rt_rq_enqueue(rt_rq);
diff --git a/sound/isa/sb/sb_mixer.c b/sound/isa/sb/sb_mixer.c
index 91d14224f6b3..73d4572d136b 100644
--- a/sound/isa/sb/sb_mixer.c
+++ b/sound/isa/sb/sb_mixer.c
@@ -925,7 +925,7 @@ static unsigned char als4000_saved_regs[] = {
925static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs) 925static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
926{ 926{
927 unsigned char *val = chip->saved_regs; 927 unsigned char *val = chip->saved_regs;
928 snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return); 928 snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
929 for (; num_regs; num_regs--) 929 for (; num_regs; num_regs--)
930 *val++ = snd_sbmixer_read(chip, *regs++); 930 *val++ = snd_sbmixer_read(chip, *regs++);
931} 931}
@@ -933,7 +933,7 @@ static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
933static void restore_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs) 933static void restore_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
934{ 934{
935 unsigned char *val = chip->saved_regs; 935 unsigned char *val = chip->saved_regs;
936 snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return); 936 snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
937 for (; num_regs; num_regs--) 937 for (; num_regs; num_regs--)
938 snd_sbmixer_write(chip, *regs++, *val++); 938 snd_sbmixer_write(chip, *regs++, *val++);
939} 939}
diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c
index 56f87cd33c19..3f00ddf450f8 100644
--- a/sound/pci/aw2/aw2-alsa.c
+++ b/sound/pci/aw2/aw2-alsa.c
@@ -316,6 +316,8 @@ static int __devinit snd_aw2_create(struct snd_card *card,
316 return -ENOMEM; 316 return -ENOMEM;
317 } 317 }
318 318
319 /* (2) initialization of the chip hardware */
320 snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
319 321
320 if (request_irq(pci->irq, snd_aw2_saa7146_interrupt, 322 if (request_irq(pci->irq, snd_aw2_saa7146_interrupt,
321 IRQF_SHARED, "Audiowerk2", chip)) { 323 IRQF_SHARED, "Audiowerk2", chip)) {
@@ -329,8 +331,6 @@ static int __devinit snd_aw2_create(struct snd_card *card,
329 } 331 }
330 chip->irq = pci->irq; 332 chip->irq = pci->irq;
331 333
332 /* (2) initialization of the chip hardware */
333 snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
334 err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); 334 err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
335 if (err < 0) { 335 if (err < 0) {
336 free_irq(chip->irq, (void *)chip); 336 free_irq(chip->irq, (void *)chip);