aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-06-09 15:39:08 -0400
committerChris Mason <chris.mason@oracle.com>2009-06-10 11:29:49 -0400
commitd84275c938e1a5e2dc5b89eb9b878e0ddb2c55e0 (patch)
tree7d8f49716140738c1bdcb97978b06328dd148a7a
parent585ad2c3797dcaa643aeba75b9f072778adf3490 (diff)
Btrfs: don't allow WRITE_SYNC bios to starve out regular writes
Btrfs uses dedicated threads to submit bios when checksumming is on, which allows us to make sure the threads dedicated to checksumming don't get stuck waiting for requests. For each btrfs device, there are two lists of bios. One list is for WRITE_SYNC bios and the other is for regular priority bios. The IO submission threads used to process all of the WRITE_SYNC bios first and then switch to the regular bios. This commit makes sure we don't completely starve the regular bios by rotating between the two lists. WRITE_SYNC bios are still favored 2:1 over the regular bios, and this tries to run in batches to avoid seeking. Benchmarking shows this eliminates stalls during streaming buffered writes on both multi-device and single device filesystems. If the regular bios starve, the system can end up with a large amount of ram pinned down in writeback pages. If we are a little more fair between the two classes, we're able to keep throughput up and make progress on the bulk of our dirty ram. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/volumes.c22
1 files changed, 15 insertions, 7 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8bc6a8807482..9d3618192009 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -163,6 +163,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
163 unsigned long num_sync_run; 163 unsigned long num_sync_run;
164 unsigned long limit; 164 unsigned long limit;
165 unsigned long last_waited = 0; 165 unsigned long last_waited = 0;
166 int force_reg = 0;
166 167
167 bdi = blk_get_backing_dev_info(device->bdev); 168 bdi = blk_get_backing_dev_info(device->bdev);
168 fs_info = device->dev_root->fs_info; 169 fs_info = device->dev_root->fs_info;
@@ -176,19 +177,22 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
176 177
177loop: 178loop:
178 spin_lock(&device->io_lock); 179 spin_lock(&device->io_lock);
179 num_run = 0;
180 180
181loop_lock: 181loop_lock:
182 num_run = 0;
182 183
183 /* take all the bios off the list at once and process them 184 /* take all the bios off the list at once and process them
184 * later on (without the lock held). But, remember the 185 * later on (without the lock held). But, remember the
185 * tail and other pointers so the bios can be properly reinserted 186 * tail and other pointers so the bios can be properly reinserted
186 * into the list if we hit congestion 187 * into the list if we hit congestion
187 */ 188 */
188 if (device->pending_sync_bios.head) 189 if (!force_reg && device->pending_sync_bios.head) {
189 pending_bios = &device->pending_sync_bios; 190 pending_bios = &device->pending_sync_bios;
190 else 191 force_reg = 1;
192 } else {
191 pending_bios = &device->pending_bios; 193 pending_bios = &device->pending_bios;
194 force_reg = 0;
195 }
192 196
193 pending = pending_bios->head; 197 pending = pending_bios->head;
194 tail = pending_bios->tail; 198 tail = pending_bios->tail;
@@ -228,10 +232,14 @@ loop_lock:
228 while (pending) { 232 while (pending) {
229 233
230 rmb(); 234 rmb();
231 if (pending_bios != &device->pending_sync_bios && 235 /* we want to work on both lists, but do more bios on the
232 device->pending_sync_bios.head && 236 * sync list than the regular list
233 num_run > 16) { 237 */
234 cond_resched(); 238 if ((num_run > 32 &&
239 pending_bios != &device->pending_sync_bios &&
240 device->pending_sync_bios.head) ||
241 (num_run > 64 && pending_bios == &device->pending_sync_bios &&
242 device->pending_bios.head)) {
235 spin_lock(&device->io_lock); 243 spin_lock(&device->io_lock);
236 requeue_list(pending_bios, pending, tail); 244 requeue_list(pending_bios, pending, tail);
237 goto loop_lock; 245 goto loop_lock;