aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-01-25 22:02:31 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-01-25 22:02:31 -0500
commit486d35e2220acfe45d85131c557d94fe889184a2 (patch)
tree6be42a8a0d82e7e09bb4ac05edcbdb96adf650dc
parenta8d0b6666ecfe14226f1e46d693d5e2cde072337 (diff)
parent443c39bc9ef7d8f648408d74c97e943f3bb3f48a (diff)
Merge branch 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: Fix leak of free lapic date in kvm_arch_vcpu_init() KVM: x86: Fix probable memory leak of vcpu->arch.mce_banks KVM: S390: fix potential array overrun in intercept handling KVM: fix spurious interrupt with irqfd eventfd - allow atomic read and waitqueue remove KVM: MMU: bail out pagewalk on kvm_read_guest error KVM: properly check max PIC pin in irq route setup KVM: only allow one gsi per fd KVM: x86: Fix host_mapping_level() KVM: powerpc: Show timing option only on embedded KVM: Fix race between APIC TMR and IRR
-rw-r--r--arch/powerpc/kvm/Kconfig2
-rw-r--r--arch/s390/kvm/intercept.c4
-rw-r--r--arch/x86/kvm/lapic.c11
-rw-r--r--arch/x86/kvm/mmu.c6
-rw-r--r--arch/x86/kvm/paging_tmpl.h4
-rw-r--r--arch/x86/kvm/x86.c6
-rw-r--r--fs/eventfd.c89
-rw-r--r--include/linux/eventfd.h16
-rw-r--r--virt/kvm/eventfd.c18
-rw-r--r--virt/kvm/irq_comm.c6
10 files changed, 128 insertions, 34 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 07703f72330..6fb6e8aa389 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -53,7 +53,7 @@ config KVM_440
53 53
54config KVM_EXIT_TIMING 54config KVM_EXIT_TIMING
55 bool "Detailed exit timing" 55 bool "Detailed exit timing"
56 depends on KVM 56 depends on KVM_440 || KVM_E500
57 ---help--- 57 ---help---
58 Calculate elapsed time for every exit/enter cycle. A per-vcpu 58 Calculate elapsed time for every exit/enter cycle. A per-vcpu
59 report is available in debugfs kvm/vm#_vcpu#_timing. 59 report is available in debugfs kvm/vm#_vcpu#_timing.
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index ba9d8a7bc1a..b40096494e4 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -213,7 +213,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
213 return rc2; 213 return rc2;
214} 214}
215 215
216static const intercept_handler_t intercept_funcs[0x48 >> 2] = { 216static const intercept_handler_t intercept_funcs[] = {
217 [0x00 >> 2] = handle_noop, 217 [0x00 >> 2] = handle_noop,
218 [0x04 >> 2] = handle_instruction, 218 [0x04 >> 2] = handle_instruction,
219 [0x08 >> 2] = handle_prog, 219 [0x08 >> 2] = handle_prog,
@@ -230,7 +230,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
230 intercept_handler_t func; 230 intercept_handler_t func;
231 u8 code = vcpu->arch.sie_block->icptcode; 231 u8 code = vcpu->arch.sie_block->icptcode;
232 232
233 if (code & 3 || code > 0x48) 233 if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs))
234 return -ENOTSUPP; 234 return -ENOTSUPP;
235 func = intercept_funcs[code >> 2]; 235 func = intercept_funcs[code >> 2];
236 if (func) 236 if (func)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3063a0c4858..ba8c045da78 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -373,6 +373,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
373 if (unlikely(!apic_enabled(apic))) 373 if (unlikely(!apic_enabled(apic)))
374 break; 374 break;
375 375
376 if (trig_mode) {
377 apic_debug("level trig mode for vector %d", vector);
378 apic_set_vector(vector, apic->regs + APIC_TMR);
379 } else
380 apic_clear_vector(vector, apic->regs + APIC_TMR);
381
376 result = !apic_test_and_set_irr(vector, apic); 382 result = !apic_test_and_set_irr(vector, apic);
377 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 383 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
378 trig_mode, vector, !result); 384 trig_mode, vector, !result);
@@ -383,11 +389,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
383 break; 389 break;
384 } 390 }
385 391
386 if (trig_mode) {
387 apic_debug("level trig mode for vector %d", vector);
388 apic_set_vector(vector, apic->regs + APIC_TMR);
389 } else
390 apic_clear_vector(vector, apic->regs + APIC_TMR);
391 kvm_vcpu_kick(vcpu); 392 kvm_vcpu_kick(vcpu);
392 break; 393 break;
393 394
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4c3e5b2314c..89a49fb46a2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -477,7 +477,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
477 477
478 addr = gfn_to_hva(kvm, gfn); 478 addr = gfn_to_hva(kvm, gfn);
479 if (kvm_is_error_hva(addr)) 479 if (kvm_is_error_hva(addr))
480 return page_size; 480 return PT_PAGE_TABLE_LEVEL;
481 481
482 down_read(&current->mm->mmap_sem); 482 down_read(&current->mm->mmap_sem);
483 vma = find_vma(current->mm, addr); 483 vma = find_vma(current->mm, addr);
@@ -515,11 +515,9 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
515 if (host_level == PT_PAGE_TABLE_LEVEL) 515 if (host_level == PT_PAGE_TABLE_LEVEL)
516 return host_level; 516 return host_level;
517 517
518 for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) { 518 for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level)
519
520 if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) 519 if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
521 break; 520 break;
522 }
523 521
524 return level - 1; 522 return level - 1;
525} 523}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 58a0f1e8859..ede2131a922 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -150,7 +150,9 @@ walk:
150 walker->table_gfn[walker->level - 1] = table_gfn; 150 walker->table_gfn[walker->level - 1] = table_gfn;
151 walker->pte_gpa[walker->level - 1] = pte_gpa; 151 walker->pte_gpa[walker->level - 1] = pte_gpa;
152 152
153 kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); 153 if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)))
154 goto not_present;
155
154 trace_kvm_mmu_paging_element(pte, walker->level); 156 trace_kvm_mmu_paging_element(pte, walker->level);
155 157
156 if (!is_present_gpte(pte)) 158 if (!is_present_gpte(pte))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6651dbf5867..1ddcad452ad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5072,12 +5072,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5072 GFP_KERNEL); 5072 GFP_KERNEL);
5073 if (!vcpu->arch.mce_banks) { 5073 if (!vcpu->arch.mce_banks) {
5074 r = -ENOMEM; 5074 r = -ENOMEM;
5075 goto fail_mmu_destroy; 5075 goto fail_free_lapic;
5076 } 5076 }
5077 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; 5077 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
5078 5078
5079 return 0; 5079 return 0;
5080 5080fail_free_lapic:
5081 kvm_free_lapic(vcpu);
5081fail_mmu_destroy: 5082fail_mmu_destroy:
5082 kvm_mmu_destroy(vcpu); 5083 kvm_mmu_destroy(vcpu);
5083fail_free_pio_data: 5084fail_free_pio_data:
@@ -5088,6 +5089,7 @@ fail:
5088 5089
5089void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 5090void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
5090{ 5091{
5092 kfree(vcpu->arch.mce_banks);
5091 kvm_free_lapic(vcpu); 5093 kvm_free_lapic(vcpu);
5092 down_read(&vcpu->kvm->slots_lock); 5094 down_read(&vcpu->kvm->slots_lock);
5093 kvm_mmu_destroy(vcpu); 5095 kvm_mmu_destroy(vcpu);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index d26402ff06e..7758cc382ef 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
135 return events; 135 return events;
136} 136}
137 137
138static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, 138static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
139 loff_t *ppos) 139{
140 *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
141 ctx->count -= *cnt;
142}
143
144/**
145 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
146 * @ctx: [in] Pointer to eventfd context.
147 * @wait: [in] Wait queue to be removed.
148 * @cnt: [out] Pointer to the 64bit conter value.
149 *
150 * Returns zero if successful, or the following error codes:
151 *
152 * -EAGAIN : The operation would have blocked.
153 *
154 * This is used to atomically remove a wait queue entry from the eventfd wait
155 * queue head, and read/reset the counter value.
156 */
157int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
158 __u64 *cnt)
159{
160 unsigned long flags;
161
162 spin_lock_irqsave(&ctx->wqh.lock, flags);
163 eventfd_ctx_do_read(ctx, cnt);
164 __remove_wait_queue(&ctx->wqh, wait);
165 if (*cnt != 0 && waitqueue_active(&ctx->wqh))
166 wake_up_locked_poll(&ctx->wqh, POLLOUT);
167 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
168
169 return *cnt != 0 ? 0 : -EAGAIN;
170}
171EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
172
173/**
174 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
175 * @ctx: [in] Pointer to eventfd context.
176 * @no_wait: [in] Different from zero if the operation should not block.
177 * @cnt: [out] Pointer to the 64bit conter value.
178 *
179 * Returns zero if successful, or the following error codes:
180 *
181 * -EAGAIN : The operation would have blocked but @no_wait was nonzero.
182 * -ERESTARTSYS : A signal interrupted the wait operation.
183 *
184 * If @no_wait is zero, the function might sleep until the eventfd internal
185 * counter becomes greater than zero.
186 */
187ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
140{ 188{
141 struct eventfd_ctx *ctx = file->private_data;
142 ssize_t res; 189 ssize_t res;
143 __u64 ucnt = 0;
144 DECLARE_WAITQUEUE(wait, current); 190 DECLARE_WAITQUEUE(wait, current);
145 191
146 if (count < sizeof(ucnt))
147 return -EINVAL;
148 spin_lock_irq(&ctx->wqh.lock); 192 spin_lock_irq(&ctx->wqh.lock);
193 *cnt = 0;
149 res = -EAGAIN; 194 res = -EAGAIN;
150 if (ctx->count > 0) 195 if (ctx->count > 0)
151 res = sizeof(ucnt); 196 res = 0;
152 else if (!(file->f_flags & O_NONBLOCK)) { 197 else if (!no_wait) {
153 __add_wait_queue(&ctx->wqh, &wait); 198 __add_wait_queue(&ctx->wqh, &wait);
154 for (res = 0;;) { 199 for (;;) {
155 set_current_state(TASK_INTERRUPTIBLE); 200 set_current_state(TASK_INTERRUPTIBLE);
156 if (ctx->count > 0) { 201 if (ctx->count > 0) {
157 res = sizeof(ucnt); 202 res = 0;
158 break; 203 break;
159 } 204 }
160 if (signal_pending(current)) { 205 if (signal_pending(current)) {
@@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
168 __remove_wait_queue(&ctx->wqh, &wait); 213 __remove_wait_queue(&ctx->wqh, &wait);
169 __set_current_state(TASK_RUNNING); 214 __set_current_state(TASK_RUNNING);
170 } 215 }
171 if (likely(res > 0)) { 216 if (likely(res == 0)) {
172 ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; 217 eventfd_ctx_do_read(ctx, cnt);
173 ctx->count -= ucnt;
174 if (waitqueue_active(&ctx->wqh)) 218 if (waitqueue_active(&ctx->wqh))
175 wake_up_locked_poll(&ctx->wqh, POLLOUT); 219 wake_up_locked_poll(&ctx->wqh, POLLOUT);
176 } 220 }
177 spin_unlock_irq(&ctx->wqh.lock); 221 spin_unlock_irq(&ctx->wqh.lock);
178 if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
179 return -EFAULT;
180 222
181 return res; 223 return res;
182} 224}
225EXPORT_SYMBOL_GPL(eventfd_ctx_read);
226
227static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
228 loff_t *ppos)
229{
230 struct eventfd_ctx *ctx = file->private_data;
231 ssize_t res;
232 __u64 cnt;
233
234 if (count < sizeof(cnt))
235 return -EINVAL;
236 res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
237 if (res < 0)
238 return res;
239
240 return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
241}
183 242
184static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, 243static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
185 loff_t *ppos) 244 loff_t *ppos)
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 94dd10366a7..91bb4f27238 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -10,6 +10,7 @@
10 10
11#include <linux/fcntl.h> 11#include <linux/fcntl.h>
12#include <linux/file.h> 12#include <linux/file.h>
13#include <linux/wait.h>
13 14
14/* 15/*
15 * CAREFUL: Check include/asm-generic/fcntl.h when defining 16 * CAREFUL: Check include/asm-generic/fcntl.h when defining
@@ -34,6 +35,9 @@ struct file *eventfd_fget(int fd);
34struct eventfd_ctx *eventfd_ctx_fdget(int fd); 35struct eventfd_ctx *eventfd_ctx_fdget(int fd);
35struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); 36struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
36int eventfd_signal(struct eventfd_ctx *ctx, int n); 37int eventfd_signal(struct eventfd_ctx *ctx, int n);
38ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt);
39int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
40 __u64 *cnt);
37 41
38#else /* CONFIG_EVENTFD */ 42#else /* CONFIG_EVENTFD */
39 43
@@ -61,6 +65,18 @@ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
61 65
62} 66}
63 67
68static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait,
69 __u64 *cnt)
70{
71 return -ENOSYS;
72}
73
74static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
75 wait_queue_t *wait, __u64 *cnt)
76{
77 return -ENOSYS;
78}
79
64#endif 80#endif
65 81
66#endif /* _LINUX_EVENTFD_H */ 82#endif /* _LINUX_EVENTFD_H */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 30f70fd511c..a9d3fc6c681 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -72,12 +72,13 @@ static void
72irqfd_shutdown(struct work_struct *work) 72irqfd_shutdown(struct work_struct *work)
73{ 73{
74 struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); 74 struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
75 u64 cnt;
75 76
76 /* 77 /*
77 * Synchronize with the wait-queue and unhook ourselves to prevent 78 * Synchronize with the wait-queue and unhook ourselves to prevent
78 * further events. 79 * further events.
79 */ 80 */
80 remove_wait_queue(irqfd->wqh, &irqfd->wait); 81 eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
81 82
82 /* 83 /*
83 * We know no new events will be scheduled at this point, so block 84 * We know no new events will be scheduled at this point, so block
@@ -166,7 +167,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
166static int 167static int
167kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) 168kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
168{ 169{
169 struct _irqfd *irqfd; 170 struct _irqfd *irqfd, *tmp;
170 struct file *file = NULL; 171 struct file *file = NULL;
171 struct eventfd_ctx *eventfd = NULL; 172 struct eventfd_ctx *eventfd = NULL;
172 int ret; 173 int ret;
@@ -203,9 +204,20 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
203 init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); 204 init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
204 init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); 205 init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
205 206
207 spin_lock_irq(&kvm->irqfds.lock);
208
209 ret = 0;
210 list_for_each_entry(tmp, &kvm->irqfds.items, list) {
211 if (irqfd->eventfd != tmp->eventfd)
212 continue;
213 /* This fd is used for another irq already. */
214 ret = -EBUSY;
215 spin_unlock_irq(&kvm->irqfds.lock);
216 goto fail;
217 }
218
206 events = file->f_op->poll(file, &irqfd->pt); 219 events = file->f_op->poll(file, &irqfd->pt);
207 220
208 spin_lock_irq(&kvm->irqfds.lock);
209 list_add_tail(&irqfd->list, &kvm->irqfds.items); 221 list_add_tail(&irqfd->list, &kvm->irqfds.items);
210 spin_unlock_irq(&kvm->irqfds.lock); 222 spin_unlock_irq(&kvm->irqfds.lock);
211 223
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 9b077342ab5..9fd5b3ebc51 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -302,6 +302,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
302{ 302{
303 int r = -EINVAL; 303 int r = -EINVAL;
304 int delta; 304 int delta;
305 unsigned max_pin;
305 struct kvm_kernel_irq_routing_entry *ei; 306 struct kvm_kernel_irq_routing_entry *ei;
306 struct hlist_node *n; 307 struct hlist_node *n;
307 308
@@ -322,12 +323,15 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
322 switch (ue->u.irqchip.irqchip) { 323 switch (ue->u.irqchip.irqchip) {
323 case KVM_IRQCHIP_PIC_MASTER: 324 case KVM_IRQCHIP_PIC_MASTER:
324 e->set = kvm_set_pic_irq; 325 e->set = kvm_set_pic_irq;
326 max_pin = 16;
325 break; 327 break;
326 case KVM_IRQCHIP_PIC_SLAVE: 328 case KVM_IRQCHIP_PIC_SLAVE:
327 e->set = kvm_set_pic_irq; 329 e->set = kvm_set_pic_irq;
330 max_pin = 16;
328 delta = 8; 331 delta = 8;
329 break; 332 break;
330 case KVM_IRQCHIP_IOAPIC: 333 case KVM_IRQCHIP_IOAPIC:
334 max_pin = KVM_IOAPIC_NUM_PINS;
331 e->set = kvm_set_ioapic_irq; 335 e->set = kvm_set_ioapic_irq;
332 break; 336 break;
333 default: 337 default:
@@ -335,7 +339,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
335 } 339 }
336 e->irqchip.irqchip = ue->u.irqchip.irqchip; 340 e->irqchip.irqchip = ue->u.irqchip.irqchip;
337 e->irqchip.pin = ue->u.irqchip.pin + delta; 341 e->irqchip.pin = ue->u.irqchip.pin + delta;
338 if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS) 342 if (e->irqchip.pin >= max_pin)
339 goto out; 343 goto out;
340 rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; 344 rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
341 break; 345 break;