aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Biggers <ebiggers@google.com>2017-03-31 13:31:48 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2017-04-03 01:05:57 -0400
commit64bd72048a2ac07efed70debe606a1c6e5e03554 (patch)
tree1666d9e34b9a4a47ed54ab9eceabf4f56d6dc39a
parentb15fb70b82299f92bb8d591c9d1731cb23fa8290 (diff)
statx: optimize copy of struct statx to userspace
I found that statx() was significantly slower than stat(). As a microbenchmark, I compared 10,000,000 invocations of fstat() on a tmpfs file to the same with statx() passed a NULL path: $ time ./stat_benchmark real 0m1.464s user 0m0.275s sys 0m1.187s $ time ./statx_benchmark real 0m5.530s user 0m0.281s sys 0m5.247s statx is expected to be a little slower than stat because struct statx is larger than struct stat, but not by *that* much. It turns out that most of the overhead was in copying struct statx to userspace, mostly in all the stac/clac instructions that got generated for each __put_user() call. (This was on x86_64, but some other architectures, e.g. arm64, have something similar now too.) stat() instead initializes its struct on the stack and copies it to userspace with a single call to copy_to_user(). This turns out to be much faster, and changing statx to do this makes it almost as fast as stat: $ time ./statx_benchmark real 0m1.624s user 0m0.270s sys 0m1.354s For zeroing the reserved fields, start by zeroing the full struct with memset. This makes it clear that every byte copied to userspace is initialized, even implicit padding bytes (though there are none currently). In the scenarios I tested, it also performed the same as a designated initializer. Manually initializing each field was still slightly faster, but would have been more error-prone and less verifiable. Also rename statx_set_result() to cp_statx() for consistency with cp_old_stat() et al., and make it noinline so that struct statx doesn't add to the stack usage during the main portion of the syscall execution. Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/stat.c74
1 files changed, 32 insertions, 42 deletions
diff --git a/fs/stat.c b/fs/stat.c
index b792dd201c31..ab27f2868588 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -510,46 +510,37 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
510} 510}
511#endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ 511#endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */
512 512
513static inline int __put_timestamp(struct timespec *kts, 513static noinline_for_stack int
514 struct statx_timestamp __user *uts) 514cp_statx(const struct kstat *stat, struct statx __user *buffer)
515{ 515{
516 return (__put_user(kts->tv_sec, &uts->tv_sec ) || 516 struct statx tmp;
517 __put_user(kts->tv_nsec, &uts->tv_nsec ) || 517
518 __put_user(0, &uts->__reserved )); 518 memset(&tmp, 0, sizeof(tmp));
519} 519
520 520 tmp.stx_mask = stat->result_mask;
521/* 521 tmp.stx_blksize = stat->blksize;
522 * Set the statx results. 522 tmp.stx_attributes = stat->attributes;
523 */ 523 tmp.stx_nlink = stat->nlink;
524static long statx_set_result(struct kstat *stat, struct statx __user *buffer) 524 tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
525{ 525 tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
526 uid_t uid = from_kuid_munged(current_user_ns(), stat->uid); 526 tmp.stx_mode = stat->mode;
527 gid_t gid = from_kgid_munged(current_user_ns(), stat->gid); 527 tmp.stx_ino = stat->ino;
528 528 tmp.stx_size = stat->size;
529 if (__put_user(stat->result_mask, &buffer->stx_mask ) || 529 tmp.stx_blocks = stat->blocks;
530 __put_user(stat->mode, &buffer->stx_mode ) || 530 tmp.stx_atime.tv_sec = stat->atime.tv_sec;
531 __clear_user(&buffer->__spare0, sizeof(buffer->__spare0)) || 531 tmp.stx_atime.tv_nsec = stat->atime.tv_nsec;
532 __put_user(stat->nlink, &buffer->stx_nlink ) || 532 tmp.stx_btime.tv_sec = stat->btime.tv_sec;
533 __put_user(uid, &buffer->stx_uid ) || 533 tmp.stx_btime.tv_nsec = stat->btime.tv_nsec;
534 __put_user(gid, &buffer->stx_gid ) || 534 tmp.stx_ctime.tv_sec = stat->ctime.tv_sec;
535 __put_user(stat->attributes, &buffer->stx_attributes ) || 535 tmp.stx_ctime.tv_nsec = stat->ctime.tv_nsec;
536 __put_user(stat->blksize, &buffer->stx_blksize ) || 536 tmp.stx_mtime.tv_sec = stat->mtime.tv_sec;
537 __put_user(MAJOR(stat->rdev), &buffer->stx_rdev_major ) || 537 tmp.stx_mtime.tv_nsec = stat->mtime.tv_nsec;
538 __put_user(MINOR(stat->rdev), &buffer->stx_rdev_minor ) || 538 tmp.stx_rdev_major = MAJOR(stat->rdev);
539 __put_user(MAJOR(stat->dev), &buffer->stx_dev_major ) || 539 tmp.stx_rdev_minor = MINOR(stat->rdev);
540 __put_user(MINOR(stat->dev), &buffer->stx_dev_minor ) || 540 tmp.stx_dev_major = MAJOR(stat->dev);
541 __put_timestamp(&stat->atime, &buffer->stx_atime ) || 541 tmp.stx_dev_minor = MINOR(stat->dev);
542 __put_timestamp(&stat->btime, &buffer->stx_btime ) || 542
543 __put_timestamp(&stat->ctime, &buffer->stx_ctime ) || 543 return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
544 __put_timestamp(&stat->mtime, &buffer->stx_mtime ) ||
545 __put_user(stat->ino, &buffer->stx_ino ) ||
546 __put_user(stat->size, &buffer->stx_size ) ||
547 __put_user(stat->blocks, &buffer->stx_blocks ) ||
548 __clear_user(&buffer->__spare1, sizeof(buffer->__spare1)) ||
549 __clear_user(&buffer->__spare2, sizeof(buffer->__spare2)))
550 return -EFAULT;
551
552 return 0;
553} 544}
554 545
555/** 546/**
@@ -573,8 +564,6 @@ SYSCALL_DEFINE5(statx,
573 564
574 if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) 565 if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
575 return -EINVAL; 566 return -EINVAL;
576 if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer)))
577 return -EFAULT;
578 567
579 if (filename) 568 if (filename)
580 error = vfs_statx(dfd, filename, flags, &stat, mask); 569 error = vfs_statx(dfd, filename, flags, &stat, mask);
@@ -582,7 +571,8 @@ SYSCALL_DEFINE5(statx,
582 error = vfs_statx_fd(dfd, &stat, mask, flags); 571 error = vfs_statx_fd(dfd, &stat, mask, flags);
583 if (error) 572 if (error)
584 return error; 573 return error;
585 return statx_set_result(&stat, buffer); 574
575 return cp_statx(&stat, buffer);
586} 576}
587 577
588/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ 578/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */