aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJeff Moyer <jmoyer@redhat.com>2009-12-15 19:47:49 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-16 10:20:13 -0500
commit23aee091d804efa8cc732a31c1ae5d625e1ec886 (patch)
tree2ca3456dba4771600ba62822c81f85b61074088a /fs
parentfac046ad0b1ee2c4244ebf43a26433ef0ea29ae4 (diff)
dio: don't zero out the pages array inside struct dio
Intel reported a performance regression caused by the following commit: commit 848c4dd5153c7a0de55470ce99a8e13a63b4703f Author: Zach Brown <zach.brown@oracle.com> Date: Mon Aug 20 17:12:01 2007 -0700 dio: zero struct dio with kzalloc instead of manually This patch uses kzalloc to zero all of struct dio rather than manually trying to track which fields we rely on being zero. It passed aio+dio stress testing and some bug regression testing on ext3. This patch was introduced by Linus in the conversation that lead up to Badari's minimal fix to manually zero .map_bh.b_state in commit: 6a648fa72161d1f6468dabd96c5d3c0db04f598a It makes the code a bit smaller. Maybe a couple fewer cachelines to load, if we're lucky: text data bss dec hex filename 3285925 568506 1304616 5159047 4eb887 vmlinux 3285797 568506 1304616 5158919 4eb807 vmlinux.patched I was unable to measure a stable difference in the number of cpu cycles spent in blockdev_direct_IO() when pushing aio+dio 256K reads at ~340MB/s. So the resulting intent of the patch isn't a performance gain but to avoid exposing ourselves to the risk of finding another field like .map_bh.b_state where we rely on zeroing but don't enforce it in the code. Zach surmised that zeroing out the page array was what caused most of the problem, and suggested the approach taken in the attached patch for resolving the issue. Intel re-tested with this patch and saw a 0.6% performance gain (the original regression was 0.5%). [akpm@linux-foundation.org: add comment] Signed-off-by: Jeff Moyer <jmoyer@redhat.com> Acked-by: Zach Brown <zach.brown@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/direct-io.c38
1 files changed, 25 insertions, 13 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b912270942fa..9f34bb9b1ecb 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -104,6 +104,18 @@ struct dio {
104 unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ 104 unsigned cur_page_len; /* Nr of bytes at cur_page_offset */
105 sector_t cur_page_block; /* Where it starts */ 105 sector_t cur_page_block; /* Where it starts */
106 106
107 /* BIO completion state */
108 spinlock_t bio_lock; /* protects BIO fields below */
109 unsigned long refcount; /* direct_io_worker() and bios */
110 struct bio *bio_list; /* singly linked via bi_private */
111 struct task_struct *waiter; /* waiting task (NULL if none) */
112
113 /* AIO related stuff */
114 struct kiocb *iocb; /* kiocb */
115 int is_async; /* is IO async ? */
116 int io_error; /* IO error in completion path */
117 ssize_t result; /* IO result */
118
107 /* 119 /*
108 * Page fetching state. These variables belong to dio_refill_pages(). 120 * Page fetching state. These variables belong to dio_refill_pages().
109 */ 121 */
@@ -115,22 +127,16 @@ struct dio {
115 * Page queue. These variables belong to dio_refill_pages() and 127 * Page queue. These variables belong to dio_refill_pages() and
116 * dio_get_page(). 128 * dio_get_page().
117 */ 129 */
118 struct page *pages[DIO_PAGES]; /* page buffer */
119 unsigned head; /* next page to process */ 130 unsigned head; /* next page to process */
120 unsigned tail; /* last valid page + 1 */ 131 unsigned tail; /* last valid page + 1 */
121 int page_errors; /* errno from get_user_pages() */ 132 int page_errors; /* errno from get_user_pages() */
122 133
123 /* BIO completion state */ 134 /*
124 spinlock_t bio_lock; /* protects BIO fields below */ 135 * pages[] (and any fields placed after it) are not zeroed out at
125 unsigned long refcount; /* direct_io_worker() and bios */ 136 * allocation time. Don't add new fields after pages[] unless you
126 struct bio *bio_list; /* singly linked via bi_private */ 137 * wish that they not be zeroed.
127 struct task_struct *waiter; /* waiting task (NULL if none) */ 138 */
128 139 struct page *pages[DIO_PAGES]; /* page buffer */
129 /* AIO related stuff */
130 struct kiocb *iocb; /* kiocb */
131 int is_async; /* is IO async ? */
132 int io_error; /* IO error in completion path */
133 ssize_t result; /* IO result */
134}; 140};
135 141
136/* 142/*
@@ -1151,10 +1157,16 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1151 } 1157 }
1152 } 1158 }
1153 1159
1154 dio = kzalloc(sizeof(*dio), GFP_KERNEL); 1160 dio = kmalloc(sizeof(*dio), GFP_KERNEL);
1155 retval = -ENOMEM; 1161 retval = -ENOMEM;
1156 if (!dio) 1162 if (!dio)
1157 goto out; 1163 goto out;
1164 /*
1165 * Believe it or not, zeroing out the page array caused a .5%
1166 * performance regression in a database benchmark. So, we take
1167 * care to only zero out what's needed.
1168 */
1169 memset(dio, 0, offsetof(struct dio, pages));
1158 1170
1159 /* 1171 /*
1160 * For block device access DIO_NO_LOCKING is used, 1172 * For block device access DIO_NO_LOCKING is used,