diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 162 |
1 files changed, 157 insertions, 5 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 722eb4550154..c57458ce6339 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include "transaction.h" | 27 | #include "transaction.h" |
28 | #include "print-tree.h" | 28 | #include "print-tree.h" |
29 | #include "volumes.h" | 29 | #include "volumes.h" |
30 | #include "async-thread.h" | ||
30 | 31 | ||
31 | struct map_lookup { | 32 | struct map_lookup { |
32 | u64 type; | 33 | u64 type; |
@@ -110,6 +111,101 @@ static struct btrfs_fs_devices *find_fsid(u8 *fsid) | |||
110 | return NULL; | 111 | return NULL; |
111 | } | 112 | } |
112 | 113 | ||
114 | /* | ||
115 | * we try to collect pending bios for a device so we don't get a large | ||
116 | * number of procs sending bios down to the same device. This greatly | ||
117 | * improves the schedulers ability to collect and merge the bios. | ||
118 | * | ||
119 | * But, it also turns into a long list of bios to process and that is sure | ||
120 | * to eventually make the worker thread block. The solution here is to | ||
121 | * make some progress and then put this work struct back at the end of | ||
122 | * the list if the block device is congested. This way, multiple devices | ||
123 | * can make progress from a single worker thread. | ||
124 | */ | ||
125 | int run_scheduled_bios(struct btrfs_device *device) | ||
126 | { | ||
127 | struct bio *pending; | ||
128 | struct backing_dev_info *bdi; | ||
129 | struct bio *tail; | ||
130 | struct bio *cur; | ||
131 | int again = 0; | ||
132 | unsigned long num_run = 0; | ||
133 | |||
134 | bdi = device->bdev->bd_inode->i_mapping->backing_dev_info; | ||
135 | loop: | ||
136 | spin_lock(&device->io_lock); | ||
137 | |||
138 | /* take all the bios off the list at once and process them | ||
139 | * later on (without the lock held). But, remember the | ||
140 | * tail and other pointers so the bios can be properly reinserted | ||
141 | * into the list if we hit congestion | ||
142 | */ | ||
143 | pending = device->pending_bios; | ||
144 | tail = device->pending_bio_tail; | ||
145 | WARN_ON(pending && !tail); | ||
146 | device->pending_bios = NULL; | ||
147 | device->pending_bio_tail = NULL; | ||
148 | |||
149 | /* | ||
150 | * if pending was null this time around, no bios need processing | ||
151 | * at all and we can stop. Otherwise it'll loop back up again | ||
152 | * and do an additional check so no bios are missed. | ||
153 | * | ||
154 | * device->running_pending is used to synchronize with the | ||
155 | * schedule_bio code. | ||
156 | */ | ||
157 | if (pending) { | ||
158 | again = 1; | ||
159 | device->running_pending = 1; | ||
160 | } else { | ||
161 | again = 0; | ||
162 | device->running_pending = 0; | ||
163 | } | ||
164 | spin_unlock(&device->io_lock); | ||
165 | |||
166 | while(pending) { | ||
167 | cur = pending; | ||
168 | pending = pending->bi_next; | ||
169 | cur->bi_next = NULL; | ||
170 | atomic_dec(&device->dev_root->fs_info->nr_async_submits); | ||
171 | submit_bio(cur->bi_rw, cur); | ||
172 | num_run++; | ||
173 | |||
174 | /* | ||
175 | * we made progress, there is more work to do and the bdi | ||
176 | * is now congested. Back off and let other work structs | ||
177 | * run instead | ||
178 | */ | ||
179 | if (pending && num_run && bdi_write_congested(bdi)) { | ||
180 | struct bio *old_head; | ||
181 | |||
182 | spin_lock(&device->io_lock); | ||
183 | old_head = device->pending_bios; | ||
184 | device->pending_bios = pending; | ||
185 | if (device->pending_bio_tail) | ||
186 | tail->bi_next = old_head; | ||
187 | else | ||
188 | device->pending_bio_tail = tail; | ||
189 | |||
190 | spin_unlock(&device->io_lock); | ||
191 | btrfs_requeue_work(&device->work); | ||
192 | goto done; | ||
193 | } | ||
194 | } | ||
195 | if (again) | ||
196 | goto loop; | ||
197 | done: | ||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | void pending_bios_fn(struct btrfs_work *work) | ||
202 | { | ||
203 | struct btrfs_device *device; | ||
204 | |||
205 | device = container_of(work, struct btrfs_device, work); | ||
206 | run_scheduled_bios(device); | ||
207 | } | ||
208 | |||
113 | static int device_list_add(const char *path, | 209 | static int device_list_add(const char *path, |
114 | struct btrfs_super_block *disk_super, | 210 | struct btrfs_super_block *disk_super, |
115 | u64 devid, struct btrfs_fs_devices **fs_devices_ret) | 211 | u64 devid, struct btrfs_fs_devices **fs_devices_ret) |
@@ -141,6 +237,7 @@ static int device_list_add(const char *path, | |||
141 | return -ENOMEM; | 237 | return -ENOMEM; |
142 | } | 238 | } |
143 | device->devid = devid; | 239 | device->devid = devid; |
240 | device->work.func = pending_bios_fn; | ||
144 | memcpy(device->uuid, disk_super->dev_item.uuid, | 241 | memcpy(device->uuid, disk_super->dev_item.uuid, |
145 | BTRFS_UUID_SIZE); | 242 | BTRFS_UUID_SIZE); |
146 | device->barriers = 1; | 243 | device->barriers = 1; |
@@ -925,6 +1022,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
925 | } | 1022 | } |
926 | 1023 | ||
927 | device->barriers = 1; | 1024 | device->barriers = 1; |
1025 | device->work.func = pending_bios_fn; | ||
928 | generate_random_uuid(device->uuid); | 1026 | generate_random_uuid(device->uuid); |
929 | spin_lock_init(&device->io_lock); | 1027 | spin_lock_init(&device->io_lock); |
930 | device->name = kstrdup(device_path, GFP_NOFS); | 1028 | device->name = kstrdup(device_path, GFP_NOFS); |
@@ -1965,8 +2063,61 @@ static int end_bio_multi_stripe(struct bio *bio, | |||
1965 | #endif | 2063 | #endif |
1966 | } | 2064 | } |
1967 | 2065 | ||
2066 | struct async_sched { | ||
2067 | struct bio *bio; | ||
2068 | int rw; | ||
2069 | struct btrfs_fs_info *info; | ||
2070 | struct btrfs_work work; | ||
2071 | }; | ||
2072 | |||
2073 | /* | ||
2074 | * see run_scheduled_bios for a description of why bios are collected for | ||
2075 | * async submit. | ||
2076 | * | ||
2077 | * This will add one bio to the pending list for a device and make sure | ||
2078 | * the work struct is scheduled. | ||
2079 | */ | ||
2080 | int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, | ||
2081 | int rw, struct bio *bio) | ||
2082 | { | ||
2083 | int should_queue = 1; | ||
2084 | |||
2085 | /* don't bother with additional async steps for reads, right now */ | ||
2086 | if (!(rw & (1 << BIO_RW))) { | ||
2087 | submit_bio(rw, bio); | ||
2088 | return 0; | ||
2089 | } | ||
2090 | |||
2091 | /* | ||
2092 | * nr_async_sumbits allows us to reliably return congestion to the | ||
2093 | * higher layers. Otherwise, the async bio makes it appear we have | ||
2094 | * made progress against dirty pages when we've really just put it | ||
2095 | * on a queue for later | ||
2096 | */ | ||
2097 | atomic_inc(&root->fs_info->nr_async_submits); | ||
2098 | bio->bi_next = NULL; | ||
2099 | bio->bi_rw |= rw; | ||
2100 | |||
2101 | spin_lock(&device->io_lock); | ||
2102 | |||
2103 | if (device->pending_bio_tail) | ||
2104 | device->pending_bio_tail->bi_next = bio; | ||
2105 | |||
2106 | device->pending_bio_tail = bio; | ||
2107 | if (!device->pending_bios) | ||
2108 | device->pending_bios = bio; | ||
2109 | if (device->running_pending) | ||
2110 | should_queue = 0; | ||
2111 | |||
2112 | spin_unlock(&device->io_lock); | ||
2113 | |||
2114 | if (should_queue) | ||
2115 | btrfs_queue_worker(&root->fs_info->workers, &device->work); | ||
2116 | return 0; | ||
2117 | } | ||
2118 | |||
1968 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | 2119 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, |
1969 | int mirror_num) | 2120 | int mirror_num, int async_submit) |
1970 | { | 2121 | { |
1971 | struct btrfs_mapping_tree *map_tree; | 2122 | struct btrfs_mapping_tree *map_tree; |
1972 | struct btrfs_device *dev; | 2123 | struct btrfs_device *dev; |
@@ -2012,10 +2163,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
2012 | dev = multi->stripes[dev_nr].dev; | 2163 | dev = multi->stripes[dev_nr].dev; |
2013 | if (dev && dev->bdev) { | 2164 | if (dev && dev->bdev) { |
2014 | bio->bi_bdev = dev->bdev; | 2165 | bio->bi_bdev = dev->bdev; |
2015 | spin_lock(&dev->io_lock); | 2166 | if (async_submit) |
2016 | dev->total_ios++; | 2167 | schedule_bio(root, dev, rw, bio); |
2017 | spin_unlock(&dev->io_lock); | 2168 | else |
2018 | submit_bio(rw, bio); | 2169 | submit_bio(rw, bio); |
2019 | } else { | 2170 | } else { |
2020 | bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; | 2171 | bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; |
2021 | bio->bi_sector = logical >> 9; | 2172 | bio->bi_sector = logical >> 9; |
@@ -2054,6 +2205,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, | |||
2054 | device->barriers = 1; | 2205 | device->barriers = 1; |
2055 | device->dev_root = root->fs_info->dev_root; | 2206 | device->dev_root = root->fs_info->dev_root; |
2056 | device->devid = devid; | 2207 | device->devid = devid; |
2208 | device->work.func = pending_bios_fn; | ||
2057 | fs_devices->num_devices++; | 2209 | fs_devices->num_devices++; |
2058 | spin_lock_init(&device->io_lock); | 2210 | spin_lock_init(&device->io_lock); |
2059 | memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); | 2211 | memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); |