aboutsummaryrefslogtreecommitdiffstats
path: root/fs/direct-io.c
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2011-08-02 00:38:08 -0400
committerroot <root@serles.lst.de>2011-10-28 08:58:58 -0400
commitba253fbf6d3502c54e1ac8792e7ac8290a1f5b8d (patch)
tree08bfd191e714b53ed44ec185f890cc5113e6dafd /fs/direct-io.c
parent18772641dbe2c89c6122c603f81f6a9574aee556 (diff)
direct-io: inline the complete submission path
Add inlines to all the submission path functions. While this increases code size it also gives gcc a lot of optimization opportunities in this critical hotpath. In particular -- together with some other changes -- this allows gcc to get rid of the unnecessary clearing of sdio at the beginning and optimize the messy parameter passing. Any non inlining of a function which takes a sdio parameter would break this optimization because they cannot be done if the address of a structure is taken. Note that benefits are only seen with CONFIG_OPTIMIZE_INLINING and CONFIG_CC_OPTIMIZE_FOR_SIZE both set to off. This gives about 2.2% improvement on a large database benchmark with a high IOPS rate. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r--fs/direct-io.c36
1 files changed, 21 insertions, 15 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index edf3174afd6a..6d425821be66 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -199,7 +199,7 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio)
199/* 199/*
200 * Go grab and pin some userspace pages. Typically we'll get 64 at a time. 200 * Go grab and pin some userspace pages. Typically we'll get 64 at a time.
201 */ 201 */
202static int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) 202static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
203{ 203{
204 int ret; 204 int ret;
205 int nr_pages; 205 int nr_pages;
@@ -245,7 +245,8 @@ out:
245 * decent number of pages, less frequently. To provide nicer use of the 245 * decent number of pages, less frequently. To provide nicer use of the
246 * L1 cache. 246 * L1 cache.
247 */ 247 */
248static struct page *dio_get_page(struct dio *dio, struct dio_submit *sdio) 248static inline struct page *dio_get_page(struct dio *dio,
249 struct dio_submit *sdio)
249{ 250{
250 if (dio_pages_present(sdio) == 0) { 251 if (dio_pages_present(sdio) == 0) {
251 int ret; 252 int ret;
@@ -376,7 +377,7 @@ void dio_end_io(struct bio *bio, int error)
376} 377}
377EXPORT_SYMBOL_GPL(dio_end_io); 378EXPORT_SYMBOL_GPL(dio_end_io);
378 379
379static void 380static inline void
380dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, 381dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
381 struct block_device *bdev, 382 struct block_device *bdev,
382 sector_t first_sector, int nr_vecs) 383 sector_t first_sector, int nr_vecs)
@@ -407,7 +408,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
407 * 408 *
408 * bios hold a dio reference between submit_bio and ->end_io. 409 * bios hold a dio reference between submit_bio and ->end_io.
409 */ 410 */
410static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) 411static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
411{ 412{
412 struct bio *bio = sdio->bio; 413 struct bio *bio = sdio->bio;
413 unsigned long flags; 414 unsigned long flags;
@@ -435,7 +436,7 @@ static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
435/* 436/*
436 * Release any resources in case of a failure 437 * Release any resources in case of a failure
437 */ 438 */
438static void dio_cleanup(struct dio *dio, struct dio_submit *sdio) 439static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
439{ 440{
440 while (dio_pages_present(sdio)) 441 while (dio_pages_present(sdio))
441 page_cache_release(dio_get_page(dio, sdio)); 442 page_cache_release(dio_get_page(dio, sdio));
@@ -528,7 +529,7 @@ static void dio_await_completion(struct dio *dio)
528 * 529 *
529 * This also helps to limit the peak amount of pinned userspace memory. 530 * This also helps to limit the peak amount of pinned userspace memory.
530 */ 531 */
531static int dio_bio_reap(struct dio *dio, struct dio_submit *sdio) 532static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
532{ 533{
533 int ret = 0; 534 int ret = 0;
534 535
@@ -631,8 +632,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
631/* 632/*
632 * There is no bio. Make one now. 633 * There is no bio. Make one now.
633 */ 634 */
634static int dio_new_bio(struct dio *dio, struct dio_submit *sdio, 635static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
635 sector_t start_sector, struct buffer_head *map_bh) 636 sector_t start_sector, struct buffer_head *map_bh)
636{ 637{
637 sector_t sector; 638 sector_t sector;
638 int ret, nr_pages; 639 int ret, nr_pages;
@@ -657,7 +658,7 @@ out:
657 * 658 *
658 * Return zero on success. Non-zero means the caller needs to start a new BIO. 659 * Return zero on success. Non-zero means the caller needs to start a new BIO.
659 */ 660 */
660static int dio_bio_add_page(struct dio_submit *sdio) 661static inline int dio_bio_add_page(struct dio_submit *sdio)
661{ 662{
662 int ret; 663 int ret;
663 664
@@ -689,8 +690,8 @@ static int dio_bio_add_page(struct dio_submit *sdio)
689 * The caller of this function is responsible for removing cur_page from the 690 * The caller of this function is responsible for removing cur_page from the
690 * dio, and for dropping the refcount which came from that presence. 691 * dio, and for dropping the refcount which came from that presence.
691 */ 692 */
692static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, 693static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
693 struct buffer_head *map_bh) 694 struct buffer_head *map_bh)
694{ 695{
695 int ret = 0; 696 int ret = 0;
696 697
@@ -759,7 +760,7 @@ out:
759 * If that doesn't work out then we put the old page into the bio and add this 760 * If that doesn't work out then we put the old page into the bio and add this
760 * page to the dio instead. 761 * page to the dio instead.
761 */ 762 */
762static int 763static inline int
763submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, 764submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
764 unsigned offset, unsigned len, sector_t blocknr, 765 unsigned offset, unsigned len, sector_t blocknr,
765 struct buffer_head *map_bh) 766 struct buffer_head *map_bh)
@@ -842,8 +843,8 @@ static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh)
842 * `end' is zero if we're doing the start of the IO, 1 at the end of the 843 * `end' is zero if we're doing the start of the IO, 1 at the end of the
843 * IO. 844 * IO.
844 */ 845 */
845static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end, 846static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio,
846 struct buffer_head *map_bh) 847 int end, struct buffer_head *map_bh)
847{ 848{
848 unsigned dio_blocks_per_fs_block; 849 unsigned dio_blocks_per_fs_block;
849 unsigned this_chunk_blocks; /* In dio_blocks */ 850 unsigned this_chunk_blocks; /* In dio_blocks */
@@ -1042,7 +1043,7 @@ out:
1042 return ret; 1043 return ret;
1043} 1044}
1044 1045
1045static ssize_t 1046static inline ssize_t
1046direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 1047direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1047 const struct iovec *iov, loff_t offset, unsigned long nr_segs, 1048 const struct iovec *iov, loff_t offset, unsigned long nr_segs,
1048 unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, 1049 unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
@@ -1216,6 +1217,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1216 * expected that filesystem provide exclusion between new direct I/O 1217 * expected that filesystem provide exclusion between new direct I/O
1217 * and truncates. For DIO_LOCKING filesystems this is done by i_mutex, 1218 * and truncates. For DIO_LOCKING filesystems this is done by i_mutex,
1218 * but other filesystems need to take care of this on their own. 1219 * but other filesystems need to take care of this on their own.
1220 *
1221 * NOTE: if you pass "sdio" to anything by pointer make sure that function
1222 * is always inlined. Otherwise gcc is unable to split the structure into
1223 * individual fields and will generate much worse code. This is important
1224 * for the whole file.
1219 */ 1225 */
1220ssize_t 1226ssize_t
1221__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1227__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,