diff options
author | Paul Mackerras <paulus@samba.org> | 2009-03-23 13:22:08 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-06 03:30:26 -0400 |
commit | 37d81828385f8ff823caaaf1a83e72d065b6cfa1 (patch) | |
tree | 972900a193a6a5ab1bdc14adcd7ab72bf0a51c13 /kernel/perf_counter.c | |
parent | 96f6d4444302bb2ea2cf409529eef816462f6ce0 (diff) |
perf_counter: add an mmap method to allow userspace to read hardware counters
Impact: new feature giving performance improvement
This adds the ability for userspace to do an mmap on a hardware counter
fd and get access to a read-only page that contains the information
needed to translate a hardware counter value to the full 64-bit
counter value that would be returned by a read on the fd. This is
useful on architectures that allow user programs to read the hardware
counters, such as PowerPC.
The mmap will only succeed if the counter is a hardware counter
monitoring the current process.
On my quad 2.5GHz PowerPC 970MP machine, userspace can read a counter
and translate it to the full 64-bit value in about 30ns using the
mmapped page, compared to about 830ns for the read syscall on the
counter, so this does give a significant performance improvement.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Orig-LKML-Reference: <20090323172417.297057964@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r-- | kernel/perf_counter.c | 76 |
1 files changed, 76 insertions, 0 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ce34bff07bda..d9cfd902140e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -1177,6 +1177,7 @@ static int perf_release(struct inode *inode, struct file *file) | |||
1177 | mutex_unlock(&counter->mutex); | 1177 | mutex_unlock(&counter->mutex); |
1178 | mutex_unlock(&ctx->mutex); | 1178 | mutex_unlock(&ctx->mutex); |
1179 | 1179 | ||
1180 | free_page(counter->user_page); | ||
1180 | free_counter(counter); | 1181 | free_counter(counter); |
1181 | put_context(ctx); | 1182 | put_context(ctx); |
1182 | 1183 | ||
@@ -1346,12 +1347,87 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
1346 | return err; | 1347 | return err; |
1347 | } | 1348 | } |
1348 | 1349 | ||
1350 | void perf_counter_update_userpage(struct perf_counter *counter) | ||
1351 | { | ||
1352 | struct perf_counter_mmap_page *userpg; | ||
1353 | |||
1354 | if (!counter->user_page) | ||
1355 | return; | ||
1356 | userpg = (struct perf_counter_mmap_page *) counter->user_page; | ||
1357 | |||
1358 | ++userpg->lock; | ||
1359 | smp_wmb(); | ||
1360 | userpg->index = counter->hw.idx; | ||
1361 | userpg->offset = atomic64_read(&counter->count); | ||
1362 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) | ||
1363 | userpg->offset -= atomic64_read(&counter->hw.prev_count); | ||
1364 | smp_wmb(); | ||
1365 | ++userpg->lock; | ||
1366 | } | ||
1367 | |||
1368 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
1369 | { | ||
1370 | struct perf_counter *counter = vma->vm_file->private_data; | ||
1371 | |||
1372 | if (!counter->user_page) | ||
1373 | return VM_FAULT_SIGBUS; | ||
1374 | |||
1375 | vmf->page = virt_to_page(counter->user_page); | ||
1376 | get_page(vmf->page); | ||
1377 | return 0; | ||
1378 | } | ||
1379 | |||
1380 | static struct vm_operations_struct perf_mmap_vmops = { | ||
1381 | .fault = perf_mmap_fault, | ||
1382 | }; | ||
1383 | |||
1384 | static int perf_mmap(struct file *file, struct vm_area_struct *vma) | ||
1385 | { | ||
1386 | struct perf_counter *counter = file->private_data; | ||
1387 | unsigned long userpg; | ||
1388 | |||
1389 | if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) | ||
1390 | return -EINVAL; | ||
1391 | if (vma->vm_end - vma->vm_start != PAGE_SIZE) | ||
1392 | return -EINVAL; | ||
1393 | |||
1394 | /* | ||
1395 | * For now, restrict to the case of a hardware counter | ||
1396 | * on the current task. | ||
1397 | */ | ||
1398 | if (is_software_counter(counter) || counter->task != current) | ||
1399 | return -EINVAL; | ||
1400 | |||
1401 | userpg = counter->user_page; | ||
1402 | if (!userpg) { | ||
1403 | userpg = get_zeroed_page(GFP_KERNEL); | ||
1404 | mutex_lock(&counter->mutex); | ||
1405 | if (counter->user_page) { | ||
1406 | free_page(userpg); | ||
1407 | userpg = counter->user_page; | ||
1408 | } else { | ||
1409 | counter->user_page = userpg; | ||
1410 | } | ||
1411 | mutex_unlock(&counter->mutex); | ||
1412 | if (!userpg) | ||
1413 | return -ENOMEM; | ||
1414 | } | ||
1415 | |||
1416 | perf_counter_update_userpage(counter); | ||
1417 | |||
1418 | vma->vm_flags &= ~VM_MAYWRITE; | ||
1419 | vma->vm_flags |= VM_RESERVED; | ||
1420 | vma->vm_ops = &perf_mmap_vmops; | ||
1421 | return 0; | ||
1422 | } | ||
1423 | |||
1349 | static const struct file_operations perf_fops = { | 1424 | static const struct file_operations perf_fops = { |
1350 | .release = perf_release, | 1425 | .release = perf_release, |
1351 | .read = perf_read, | 1426 | .read = perf_read, |
1352 | .poll = perf_poll, | 1427 | .poll = perf_poll, |
1353 | .unlocked_ioctl = perf_ioctl, | 1428 | .unlocked_ioctl = perf_ioctl, |
1354 | .compat_ioctl = perf_ioctl, | 1429 | .compat_ioctl = perf_ioctl, |
1430 | .mmap = perf_mmap, | ||
1355 | }; | 1431 | }; |
1356 | 1432 | ||
1357 | /* | 1433 | /* |