aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c162
1 files changed, 157 insertions, 5 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 722eb4550154..c57458ce6339 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -27,6 +27,7 @@
27#include "transaction.h" 27#include "transaction.h"
28#include "print-tree.h" 28#include "print-tree.h"
29#include "volumes.h" 29#include "volumes.h"
30#include "async-thread.h"
30 31
31struct map_lookup { 32struct map_lookup {
32 u64 type; 33 u64 type;
@@ -110,6 +111,101 @@ static struct btrfs_fs_devices *find_fsid(u8 *fsid)
110 return NULL; 111 return NULL;
111} 112}
112 113
114/*
115 * we try to collect pending bios for a device so we don't get a large
116 * number of procs sending bios down to the same device. This greatly
117 * improves the schedulers ability to collect and merge the bios.
118 *
119 * But, it also turns into a long list of bios to process and that is sure
120 * to eventually make the worker thread block. The solution here is to
121 * make some progress and then put this work struct back at the end of
122 * the list if the block device is congested. This way, multiple devices
123 * can make progress from a single worker thread.
124 */
125int run_scheduled_bios(struct btrfs_device *device)
126{
127 struct bio *pending;
128 struct backing_dev_info *bdi;
129 struct bio *tail;
130 struct bio *cur;
131 int again = 0;
132 unsigned long num_run = 0;
133
134 bdi = device->bdev->bd_inode->i_mapping->backing_dev_info;
135loop:
136 spin_lock(&device->io_lock);
137
138 /* take all the bios off the list at once and process them
139 * later on (without the lock held). But, remember the
140 * tail and other pointers so the bios can be properly reinserted
141 * into the list if we hit congestion
142 */
143 pending = device->pending_bios;
144 tail = device->pending_bio_tail;
145 WARN_ON(pending && !tail);
146 device->pending_bios = NULL;
147 device->pending_bio_tail = NULL;
148
149 /*
150 * if pending was null this time around, no bios need processing
151 * at all and we can stop. Otherwise it'll loop back up again
152 * and do an additional check so no bios are missed.
153 *
154 * device->running_pending is used to synchronize with the
155 * schedule_bio code.
156 */
157 if (pending) {
158 again = 1;
159 device->running_pending = 1;
160 } else {
161 again = 0;
162 device->running_pending = 0;
163 }
164 spin_unlock(&device->io_lock);
165
166 while(pending) {
167 cur = pending;
168 pending = pending->bi_next;
169 cur->bi_next = NULL;
170 atomic_dec(&device->dev_root->fs_info->nr_async_submits);
171 submit_bio(cur->bi_rw, cur);
172 num_run++;
173
174 /*
175 * we made progress, there is more work to do and the bdi
176 * is now congested. Back off and let other work structs
177 * run instead
178 */
179 if (pending && num_run && bdi_write_congested(bdi)) {
180 struct bio *old_head;
181
182 spin_lock(&device->io_lock);
183 old_head = device->pending_bios;
184 device->pending_bios = pending;
185 if (device->pending_bio_tail)
186 tail->bi_next = old_head;
187 else
188 device->pending_bio_tail = tail;
189
190 spin_unlock(&device->io_lock);
191 btrfs_requeue_work(&device->work);
192 goto done;
193 }
194 }
195 if (again)
196 goto loop;
197done:
198 return 0;
199}
200
201void pending_bios_fn(struct btrfs_work *work)
202{
203 struct btrfs_device *device;
204
205 device = container_of(work, struct btrfs_device, work);
206 run_scheduled_bios(device);
207}
208
113static int device_list_add(const char *path, 209static int device_list_add(const char *path,
114 struct btrfs_super_block *disk_super, 210 struct btrfs_super_block *disk_super,
115 u64 devid, struct btrfs_fs_devices **fs_devices_ret) 211 u64 devid, struct btrfs_fs_devices **fs_devices_ret)
@@ -141,6 +237,7 @@ static int device_list_add(const char *path,
141 return -ENOMEM; 237 return -ENOMEM;
142 } 238 }
143 device->devid = devid; 239 device->devid = devid;
240 device->work.func = pending_bios_fn;
144 memcpy(device->uuid, disk_super->dev_item.uuid, 241 memcpy(device->uuid, disk_super->dev_item.uuid,
145 BTRFS_UUID_SIZE); 242 BTRFS_UUID_SIZE);
146 device->barriers = 1; 243 device->barriers = 1;
@@ -925,6 +1022,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
925 } 1022 }
926 1023
927 device->barriers = 1; 1024 device->barriers = 1;
1025 device->work.func = pending_bios_fn;
928 generate_random_uuid(device->uuid); 1026 generate_random_uuid(device->uuid);
929 spin_lock_init(&device->io_lock); 1027 spin_lock_init(&device->io_lock);
930 device->name = kstrdup(device_path, GFP_NOFS); 1028 device->name = kstrdup(device_path, GFP_NOFS);
@@ -1965,8 +2063,61 @@ static int end_bio_multi_stripe(struct bio *bio,
1965#endif 2063#endif
1966} 2064}
1967 2065
2066struct async_sched {
2067 struct bio *bio;
2068 int rw;
2069 struct btrfs_fs_info *info;
2070 struct btrfs_work work;
2071};
2072
2073/*
2074 * see run_scheduled_bios for a description of why bios are collected for
2075 * async submit.
2076 *
2077 * This will add one bio to the pending list for a device and make sure
2078 * the work struct is scheduled.
2079 */
2080int schedule_bio(struct btrfs_root *root, struct btrfs_device *device,
2081 int rw, struct bio *bio)
2082{
2083 int should_queue = 1;
2084
2085 /* don't bother with additional async steps for reads, right now */
2086 if (!(rw & (1 << BIO_RW))) {
2087 submit_bio(rw, bio);
2088 return 0;
2089 }
2090
2091 /*
2092 * nr_async_sumbits allows us to reliably return congestion to the
2093 * higher layers. Otherwise, the async bio makes it appear we have
2094 * made progress against dirty pages when we've really just put it
2095 * on a queue for later
2096 */
2097 atomic_inc(&root->fs_info->nr_async_submits);
2098 bio->bi_next = NULL;
2099 bio->bi_rw |= rw;
2100
2101 spin_lock(&device->io_lock);
2102
2103 if (device->pending_bio_tail)
2104 device->pending_bio_tail->bi_next = bio;
2105
2106 device->pending_bio_tail = bio;
2107 if (!device->pending_bios)
2108 device->pending_bios = bio;
2109 if (device->running_pending)
2110 should_queue = 0;
2111
2112 spin_unlock(&device->io_lock);
2113
2114 if (should_queue)
2115 btrfs_queue_worker(&root->fs_info->workers, &device->work);
2116 return 0;
2117}
2118
1968int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, 2119int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
1969 int mirror_num) 2120 int mirror_num, int async_submit)
1970{ 2121{
1971 struct btrfs_mapping_tree *map_tree; 2122 struct btrfs_mapping_tree *map_tree;
1972 struct btrfs_device *dev; 2123 struct btrfs_device *dev;
@@ -2012,10 +2163,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
2012 dev = multi->stripes[dev_nr].dev; 2163 dev = multi->stripes[dev_nr].dev;
2013 if (dev && dev->bdev) { 2164 if (dev && dev->bdev) {
2014 bio->bi_bdev = dev->bdev; 2165 bio->bi_bdev = dev->bdev;
2015 spin_lock(&dev->io_lock); 2166 if (async_submit)
2016 dev->total_ios++; 2167 schedule_bio(root, dev, rw, bio);
2017 spin_unlock(&dev->io_lock); 2168 else
2018 submit_bio(rw, bio); 2169 submit_bio(rw, bio);
2019 } else { 2170 } else {
2020 bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; 2171 bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
2021 bio->bi_sector = logical >> 9; 2172 bio->bi_sector = logical >> 9;
@@ -2054,6 +2205,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
2054 device->barriers = 1; 2205 device->barriers = 1;
2055 device->dev_root = root->fs_info->dev_root; 2206 device->dev_root = root->fs_info->dev_root;
2056 device->devid = devid; 2207 device->devid = devid;
2208 device->work.func = pending_bios_fn;
2057 fs_devices->num_devices++; 2209 fs_devices->num_devices++;
2058 spin_lock_init(&device->io_lock); 2210 spin_lock_init(&device->io_lock);
2059 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); 2211 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);