diff options
author | Jeff Moyer <jmoyer@redhat.com> | 2009-12-15 19:47:49 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-16 10:20:13 -0500 |
commit | 23aee091d804efa8cc732a31c1ae5d625e1ec886 (patch) | |
tree | 2ca3456dba4771600ba62822c81f85b61074088a | |
parent | fac046ad0b1ee2c4244ebf43a26433ef0ea29ae4 (diff) |
dio: don't zero out the pages array inside struct dio
Intel reported a performance regression caused by the following commit:
commit 848c4dd5153c7a0de55470ce99a8e13a63b4703f
Author: Zach Brown <zach.brown@oracle.com>
Date: Mon Aug 20 17:12:01 2007 -0700
dio: zero struct dio with kzalloc instead of manually
This patch uses kzalloc to zero all of struct dio rather than
manually trying to track which fields we rely on being zero. It
passed aio+dio stress testing and some bug regression testing on
ext3.
This patch was introduced by Linus in the conversation that lead up
to Badari's minimal fix to manually zero .map_bh.b_state in commit:
6a648fa72161d1f6468dabd96c5d3c0db04f598a
It makes the code a bit smaller. Maybe a couple fewer cachelines to
load, if we're lucky:
text data bss dec hex filename
3285925 568506 1304616 5159047 4eb887 vmlinux
3285797 568506 1304616 5158919 4eb807 vmlinux.patched
I was unable to measure a stable difference in the number of cpu
cycles spent in blockdev_direct_IO() when pushing aio+dio 256K reads
at ~340MB/s.
So the resulting intent of the patch isn't a performance gain but to
avoid exposing ourselves to the risk of finding another field like
.map_bh.b_state where we rely on zeroing but don't enforce it in the
code.
Zach surmised that zeroing out the page array was what caused most of
the problem, and suggested the approach taken in the attached patch for
resolving the issue. Intel re-tested with this patch and saw a 0.6%
performance gain (the original regression was 0.5%).
[akpm@linux-foundation.org: add comment]
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Acked-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/direct-io.c | 38 |
1 files changed, 25 insertions, 13 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index b912270942fa..9f34bb9b1ecb 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -104,6 +104,18 @@ struct dio { | |||
104 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ | 104 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ |
105 | sector_t cur_page_block; /* Where it starts */ | 105 | sector_t cur_page_block; /* Where it starts */ |
106 | 106 | ||
107 | /* BIO completion state */ | ||
108 | spinlock_t bio_lock; /* protects BIO fields below */ | ||
109 | unsigned long refcount; /* direct_io_worker() and bios */ | ||
110 | struct bio *bio_list; /* singly linked via bi_private */ | ||
111 | struct task_struct *waiter; /* waiting task (NULL if none) */ | ||
112 | |||
113 | /* AIO related stuff */ | ||
114 | struct kiocb *iocb; /* kiocb */ | ||
115 | int is_async; /* is IO async ? */ | ||
116 | int io_error; /* IO error in completion path */ | ||
117 | ssize_t result; /* IO result */ | ||
118 | |||
107 | /* | 119 | /* |
108 | * Page fetching state. These variables belong to dio_refill_pages(). | 120 | * Page fetching state. These variables belong to dio_refill_pages(). |
109 | */ | 121 | */ |
@@ -115,22 +127,16 @@ struct dio { | |||
115 | * Page queue. These variables belong to dio_refill_pages() and | 127 | * Page queue. These variables belong to dio_refill_pages() and |
116 | * dio_get_page(). | 128 | * dio_get_page(). |
117 | */ | 129 | */ |
118 | struct page *pages[DIO_PAGES]; /* page buffer */ | ||
119 | unsigned head; /* next page to process */ | 130 | unsigned head; /* next page to process */ |
120 | unsigned tail; /* last valid page + 1 */ | 131 | unsigned tail; /* last valid page + 1 */ |
121 | int page_errors; /* errno from get_user_pages() */ | 132 | int page_errors; /* errno from get_user_pages() */ |
122 | 133 | ||
123 | /* BIO completion state */ | 134 | /* |
124 | spinlock_t bio_lock; /* protects BIO fields below */ | 135 | * pages[] (and any fields placed after it) are not zeroed out at |
125 | unsigned long refcount; /* direct_io_worker() and bios */ | 136 | * allocation time. Don't add new fields after pages[] unless you |
126 | struct bio *bio_list; /* singly linked via bi_private */ | 137 | * wish that they not be zeroed. |
127 | struct task_struct *waiter; /* waiting task (NULL if none) */ | 138 | */ |
128 | 139 | struct page *pages[DIO_PAGES]; /* page buffer */ | |
129 | /* AIO related stuff */ | ||
130 | struct kiocb *iocb; /* kiocb */ | ||
131 | int is_async; /* is IO async ? */ | ||
132 | int io_error; /* IO error in completion path */ | ||
133 | ssize_t result; /* IO result */ | ||
134 | }; | 140 | }; |
135 | 141 | ||
136 | /* | 142 | /* |
@@ -1151,10 +1157,16 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1151 | } | 1157 | } |
1152 | } | 1158 | } |
1153 | 1159 | ||
1154 | dio = kzalloc(sizeof(*dio), GFP_KERNEL); | 1160 | dio = kmalloc(sizeof(*dio), GFP_KERNEL); |
1155 | retval = -ENOMEM; | 1161 | retval = -ENOMEM; |
1156 | if (!dio) | 1162 | if (!dio) |
1157 | goto out; | 1163 | goto out; |
1164 | /* | ||
1165 | * Believe it or not, zeroing out the page array caused a .5% | ||
1166 | * performance regression in a database benchmark. So, we take | ||
1167 | * care to only zero out what's needed. | ||
1168 | */ | ||
1169 | memset(dio, 0, offsetof(struct dio, pages)); | ||
1158 | 1170 | ||
1159 | /* | 1171 | /* |
1160 | * For block device access DIO_NO_LOCKING is used, | 1172 | * For block device access DIO_NO_LOCKING is used, |