diff options
author | Andi Kleen <ak@linux.intel.com> | 2011-08-02 00:38:08 -0400 |
---|---|---|
committer | root <root@serles.lst.de> | 2011-10-28 08:58:58 -0400 |
commit | ba253fbf6d3502c54e1ac8792e7ac8290a1f5b8d (patch) | |
tree | 08bfd191e714b53ed44ec185f890cc5113e6dafd /fs/direct-io.c | |
parent | 18772641dbe2c89c6122c603f81f6a9574aee556 (diff) |
direct-io: inline the complete submission path
Add inlines to all the submission path functions. While this increases
code size it also gives gcc a lot of optimization opportunities
in this critical hotpath.
In particular -- together with some other changes -- this
allows gcc to get rid of the unnecessary clearing of
sdio at the beginning and optimize the messy parameter passing.
Any non inlining of a function which takes a sdio parameter
would break this optimization because they cannot be done if the
address of a structure is taken.
Note that benefits are only seen with CONFIG_OPTIMIZE_INLINING
and CONFIG_CC_OPTIMIZE_FOR_SIZE both set to off.
This gives about 2.2% improvement on a large database benchmark
with a high IOPS rate.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r-- | fs/direct-io.c | 36 |
1 files changed, 21 insertions, 15 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index edf3174afd6a..6d425821be66 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -199,7 +199,7 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio) | |||
199 | /* | 199 | /* |
200 | * Go grab and pin some userspace pages. Typically we'll get 64 at a time. | 200 | * Go grab and pin some userspace pages. Typically we'll get 64 at a time. |
201 | */ | 201 | */ |
202 | static int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) | 202 | static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) |
203 | { | 203 | { |
204 | int ret; | 204 | int ret; |
205 | int nr_pages; | 205 | int nr_pages; |
@@ -245,7 +245,8 @@ out: | |||
245 | * decent number of pages, less frequently. To provide nicer use of the | 245 | * decent number of pages, less frequently. To provide nicer use of the |
246 | * L1 cache. | 246 | * L1 cache. |
247 | */ | 247 | */ |
248 | static struct page *dio_get_page(struct dio *dio, struct dio_submit *sdio) | 248 | static inline struct page *dio_get_page(struct dio *dio, |
249 | struct dio_submit *sdio) | ||
249 | { | 250 | { |
250 | if (dio_pages_present(sdio) == 0) { | 251 | if (dio_pages_present(sdio) == 0) { |
251 | int ret; | 252 | int ret; |
@@ -376,7 +377,7 @@ void dio_end_io(struct bio *bio, int error) | |||
376 | } | 377 | } |
377 | EXPORT_SYMBOL_GPL(dio_end_io); | 378 | EXPORT_SYMBOL_GPL(dio_end_io); |
378 | 379 | ||
379 | static void | 380 | static inline void |
380 | dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, | 381 | dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, |
381 | struct block_device *bdev, | 382 | struct block_device *bdev, |
382 | sector_t first_sector, int nr_vecs) | 383 | sector_t first_sector, int nr_vecs) |
@@ -407,7 +408,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, | |||
407 | * | 408 | * |
408 | * bios hold a dio reference between submit_bio and ->end_io. | 409 | * bios hold a dio reference between submit_bio and ->end_io. |
409 | */ | 410 | */ |
410 | static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) | 411 | static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) |
411 | { | 412 | { |
412 | struct bio *bio = sdio->bio; | 413 | struct bio *bio = sdio->bio; |
413 | unsigned long flags; | 414 | unsigned long flags; |
@@ -435,7 +436,7 @@ static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) | |||
435 | /* | 436 | /* |
436 | * Release any resources in case of a failure | 437 | * Release any resources in case of a failure |
437 | */ | 438 | */ |
438 | static void dio_cleanup(struct dio *dio, struct dio_submit *sdio) | 439 | static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio) |
439 | { | 440 | { |
440 | while (dio_pages_present(sdio)) | 441 | while (dio_pages_present(sdio)) |
441 | page_cache_release(dio_get_page(dio, sdio)); | 442 | page_cache_release(dio_get_page(dio, sdio)); |
@@ -528,7 +529,7 @@ static void dio_await_completion(struct dio *dio) | |||
528 | * | 529 | * |
529 | * This also helps to limit the peak amount of pinned userspace memory. | 530 | * This also helps to limit the peak amount of pinned userspace memory. |
530 | */ | 531 | */ |
531 | static int dio_bio_reap(struct dio *dio, struct dio_submit *sdio) | 532 | static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio) |
532 | { | 533 | { |
533 | int ret = 0; | 534 | int ret = 0; |
534 | 535 | ||
@@ -631,8 +632,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, | |||
631 | /* | 632 | /* |
632 | * There is no bio. Make one now. | 633 | * There is no bio. Make one now. |
633 | */ | 634 | */ |
634 | static int dio_new_bio(struct dio *dio, struct dio_submit *sdio, | 635 | static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio, |
635 | sector_t start_sector, struct buffer_head *map_bh) | 636 | sector_t start_sector, struct buffer_head *map_bh) |
636 | { | 637 | { |
637 | sector_t sector; | 638 | sector_t sector; |
638 | int ret, nr_pages; | 639 | int ret, nr_pages; |
@@ -657,7 +658,7 @@ out: | |||
657 | * | 658 | * |
658 | * Return zero on success. Non-zero means the caller needs to start a new BIO. | 659 | * Return zero on success. Non-zero means the caller needs to start a new BIO. |
659 | */ | 660 | */ |
660 | static int dio_bio_add_page(struct dio_submit *sdio) | 661 | static inline int dio_bio_add_page(struct dio_submit *sdio) |
661 | { | 662 | { |
662 | int ret; | 663 | int ret; |
663 | 664 | ||
@@ -689,8 +690,8 @@ static int dio_bio_add_page(struct dio_submit *sdio) | |||
689 | * The caller of this function is responsible for removing cur_page from the | 690 | * The caller of this function is responsible for removing cur_page from the |
690 | * dio, and for dropping the refcount which came from that presence. | 691 | * dio, and for dropping the refcount which came from that presence. |
691 | */ | 692 | */ |
692 | static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, | 693 | static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, |
693 | struct buffer_head *map_bh) | 694 | struct buffer_head *map_bh) |
694 | { | 695 | { |
695 | int ret = 0; | 696 | int ret = 0; |
696 | 697 | ||
@@ -759,7 +760,7 @@ out: | |||
759 | * If that doesn't work out then we put the old page into the bio and add this | 760 | * If that doesn't work out then we put the old page into the bio and add this |
760 | * page to the dio instead. | 761 | * page to the dio instead. |
761 | */ | 762 | */ |
762 | static int | 763 | static inline int |
763 | submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, | 764 | submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, |
764 | unsigned offset, unsigned len, sector_t blocknr, | 765 | unsigned offset, unsigned len, sector_t blocknr, |
765 | struct buffer_head *map_bh) | 766 | struct buffer_head *map_bh) |
@@ -842,8 +843,8 @@ static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh) | |||
842 | * `end' is zero if we're doing the start of the IO, 1 at the end of the | 843 | * `end' is zero if we're doing the start of the IO, 1 at the end of the |
843 | * IO. | 844 | * IO. |
844 | */ | 845 | */ |
845 | static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end, | 846 | static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio, |
846 | struct buffer_head *map_bh) | 847 | int end, struct buffer_head *map_bh) |
847 | { | 848 | { |
848 | unsigned dio_blocks_per_fs_block; | 849 | unsigned dio_blocks_per_fs_block; |
849 | unsigned this_chunk_blocks; /* In dio_blocks */ | 850 | unsigned this_chunk_blocks; /* In dio_blocks */ |
@@ -1042,7 +1043,7 @@ out: | |||
1042 | return ret; | 1043 | return ret; |
1043 | } | 1044 | } |
1044 | 1045 | ||
1045 | static ssize_t | 1046 | static inline ssize_t |
1046 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | 1047 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, |
1047 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, | 1048 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, |
1048 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, | 1049 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, |
@@ -1216,6 +1217,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1216 | * expected that filesystem provide exclusion between new direct I/O | 1217 | * expected that filesystem provide exclusion between new direct I/O |
1217 | * and truncates. For DIO_LOCKING filesystems this is done by i_mutex, | 1218 | * and truncates. For DIO_LOCKING filesystems this is done by i_mutex, |
1218 | * but other filesystems need to take care of this on their own. | 1219 | * but other filesystems need to take care of this on their own. |
1220 | * | ||
1221 | * NOTE: if you pass "sdio" to anything by pointer make sure that function | ||
1222 | * is always inlined. Otherwise gcc is unable to split the structure into | ||
1223 | * individual fields and will generate much worse code. This is important | ||
1224 | * for the whole file. | ||
1219 | */ | 1225 | */ |
1220 | ssize_t | 1226 | ssize_t |
1221 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1227 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |