diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-08 13:13:35 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-08 13:13:35 -0400 |
commit | 4de13d7aa8f4d02f4dc99d4609575659f92b3c5a (patch) | |
tree | 3bc9729eabe79c6164cd29a5d605000bc82bf837 | |
parent | 5af43c24ca59a448c9312dd4a4a51d27ec3b9a73 (diff) | |
parent | b8d4a5bf6a049303a29a3275f463f09a490b50ea (diff) |
Merge branch 'for-3.10/core' of git://git.kernel.dk/linux-block
Pull block core updates from Jens Axboe:
- Major bit is Kents prep work for immutable bio vecs.
- Stable candidate fix for a scheduling-while-atomic in the queue
bypass operation.
- Fix for the hang on exceeded rq->datalen 32-bit unsigned when merging
discard bios.
- Tejuns changes to convert the writeback thread pool to the generic
workqueue mechanism.
- Runtime PM framework, SCSI patches exists on top of these in James'
tree.
- A few random fixes.
* 'for-3.10/core' of git://git.kernel.dk/linux-block: (40 commits)
relay: move remove_buf_file inside relay_close_buf
partitions/efi.c: replace useless kzalloc's by kmalloc's
fs/block_dev.c: fix iov_shorten() criteria in blkdev_aio_read()
block: fix max discard sectors limit
blkcg: fix "scheduling while atomic" in blk_queue_bypass_start
Documentation: cfq-iosched: update documentation help for cfq tunables
writeback: expose the bdi_wq workqueue
writeback: replace custom worker pool implementation with unbound workqueue
writeback: remove unused bdi_pending_list
aoe: Fix unitialized var usage
bio-integrity: Add explicit field for owner of bip_buf
block: Add an explicit bio flag for bios that own their bvec
block: Add bio_alloc_pages()
block: Convert some code to bio_for_each_segment_all()
block: Add bio_for_each_segment_all()
bounce: Refactor __blk_queue_bounce to not use bi_io_vec
raid1: use bio_copy_data()
pktcdvd: Use bio_reset() in disabled code to kill bi_idx usage
pktcdvd: use bio_copy_data()
block: Add bio_copy_data()
...
50 files changed, 1000 insertions, 956 deletions
diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt index a5eb7d19a65d..9887f0414c16 100644 --- a/Documentation/block/cfq-iosched.txt +++ b/Documentation/block/cfq-iosched.txt | |||
@@ -5,7 +5,7 @@ The main aim of CFQ scheduler is to provide a fair allocation of the disk | |||
5 | I/O bandwidth for all the processes which requests an I/O operation. | 5 | I/O bandwidth for all the processes which requests an I/O operation. |
6 | 6 | ||
7 | CFQ maintains the per process queue for the processes which request I/O | 7 | CFQ maintains the per process queue for the processes which request I/O |
8 | operation(syncronous requests). In case of asynchronous requests, all the | 8 | operation(synchronous requests). In case of asynchronous requests, all the |
9 | requests from all the processes are batched together according to their | 9 | requests from all the processes are batched together according to their |
10 | process's I/O priority. | 10 | process's I/O priority. |
11 | 11 | ||
@@ -66,6 +66,47 @@ This parameter is used to set the timeout of synchronous requests. Default | |||
66 | value of this is 124ms. In case to favor synchronous requests over asynchronous | 66 | value of this is 124ms. In case to favor synchronous requests over asynchronous |
67 | one, this value should be decreased relative to fifo_expire_async. | 67 | one, this value should be decreased relative to fifo_expire_async. |
68 | 68 | ||
69 | group_idle | ||
70 | ----------- | ||
71 | This parameter forces idling at the CFQ group level instead of CFQ | ||
72 | queue level. This was introduced after after a bottleneck was observed | ||
73 | in higher end storage due to idle on sequential queue and allow dispatch | ||
74 | from a single queue. The idea with this parameter is that it can be run with | ||
75 | slice_idle=0 and group_idle=8, so that idling does not happen on individual | ||
76 | queues in the group but happens overall on the group and thus still keeps the | ||
77 | IO controller working. | ||
78 | Not idling on individual queues in the group will dispatch requests from | ||
79 | multiple queues in the group at the same time and achieve higher throughput | ||
80 | on higher end storage. | ||
81 | |||
82 | Default value for this parameter is 8ms. | ||
83 | |||
84 | latency | ||
85 | ------- | ||
86 | This parameter is used to enable/disable the latency mode of the CFQ | ||
87 | scheduler. If latency mode (called low_latency) is enabled, CFQ tries | ||
88 | to recompute the slice time for each process based on the target_latency set | ||
89 | for the system. This favors fairness over throughput. Disabling low | ||
90 | latency (setting it to 0) ignores target latency, allowing each process in the | ||
91 | system to get a full time slice. | ||
92 | |||
93 | By default low latency mode is enabled. | ||
94 | |||
95 | target_latency | ||
96 | -------------- | ||
97 | This parameter is used to calculate the time slice for a process if cfq's | ||
98 | latency mode is enabled. It will ensure that sync requests have an estimated | ||
99 | latency. But if sequential workload is higher(e.g. sequential read), | ||
100 | then to meet the latency constraints, throughput may decrease because of less | ||
101 | time for each process to issue I/O request before the cfq queue is switched. | ||
102 | |||
103 | Though this can be overcome by disabling the latency_mode, it may increase | ||
104 | the read latency for some applications. This parameter allows for changing | ||
105 | target_latency through the sysfs interface which can provide the balanced | ||
106 | throughput and read latency. | ||
107 | |||
108 | Default value for target_latency is 300ms. | ||
109 | |||
69 | slice_async | 110 | slice_async |
70 | ----------- | 111 | ----------- |
71 | This parameter is same as of slice_sync but for asynchronous queue. The | 112 | This parameter is same as of slice_sync but for asynchronous queue. The |
@@ -98,8 +139,8 @@ in the device exceeds this parameter. This parameter is used for synchronous | |||
98 | request. | 139 | request. |
99 | 140 | ||
100 | In case of storage with several disk, this setting can limit the parallel | 141 | In case of storage with several disk, this setting can limit the parallel |
101 | processing of request. Therefore, increasing the value can imporve the | 142 | processing of request. Therefore, increasing the value can improve the |
102 | performace although this can cause the latency of some I/O to increase due | 143 | performance although this can cause the latency of some I/O to increase due |
103 | to more number of requests. | 144 | to more number of requests. |
104 | 145 | ||
105 | CFQ Group scheduling | 146 | CFQ Group scheduling |
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b2b9837f9dd3..e8918ffaf96d 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -972,10 +972,10 @@ int blkcg_activate_policy(struct request_queue *q, | |||
972 | if (!new_blkg) | 972 | if (!new_blkg) |
973 | return -ENOMEM; | 973 | return -ENOMEM; |
974 | 974 | ||
975 | preloaded = !radix_tree_preload(GFP_KERNEL); | ||
976 | |||
977 | blk_queue_bypass_start(q); | 975 | blk_queue_bypass_start(q); |
978 | 976 | ||
977 | preloaded = !radix_tree_preload(GFP_KERNEL); | ||
978 | |||
979 | /* | 979 | /* |
980 | * Make sure the root blkg exists and count the existing blkgs. As | 980 | * Make sure the root blkg exists and count the existing blkgs. As |
981 | * @q is bypassing at this point, blkg_lookup_create() can't be | 981 | * @q is bypassing at this point, blkg_lookup_create() can't be |
diff --git a/block/blk-core.c b/block/blk-core.c index 7c288358a745..33c33bc99ddd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/list_sort.h> | 30 | #include <linux/list_sort.h> |
31 | #include <linux/delay.h> | 31 | #include <linux/delay.h> |
32 | #include <linux/ratelimit.h> | 32 | #include <linux/ratelimit.h> |
33 | #include <linux/pm_runtime.h> | ||
33 | 34 | ||
34 | #define CREATE_TRACE_POINTS | 35 | #define CREATE_TRACE_POINTS |
35 | #include <trace/events/block.h> | 36 | #include <trace/events/block.h> |
@@ -159,20 +160,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio, | |||
159 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 160 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
160 | error = -EIO; | 161 | error = -EIO; |
161 | 162 | ||
162 | if (unlikely(nbytes > bio->bi_size)) { | ||
163 | printk(KERN_ERR "%s: want %u bytes done, %u left\n", | ||
164 | __func__, nbytes, bio->bi_size); | ||
165 | nbytes = bio->bi_size; | ||
166 | } | ||
167 | |||
168 | if (unlikely(rq->cmd_flags & REQ_QUIET)) | 163 | if (unlikely(rq->cmd_flags & REQ_QUIET)) |
169 | set_bit(BIO_QUIET, &bio->bi_flags); | 164 | set_bit(BIO_QUIET, &bio->bi_flags); |
170 | 165 | ||
171 | bio->bi_size -= nbytes; | 166 | bio_advance(bio, nbytes); |
172 | bio->bi_sector += (nbytes >> 9); | ||
173 | |||
174 | if (bio_integrity(bio)) | ||
175 | bio_integrity_advance(bio, nbytes); | ||
176 | 167 | ||
177 | /* don't actually finish bio if it's part of flush sequence */ | 168 | /* don't actually finish bio if it's part of flush sequence */ |
178 | if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) | 169 | if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) |
@@ -1264,6 +1255,16 @@ void part_round_stats(int cpu, struct hd_struct *part) | |||
1264 | } | 1255 | } |
1265 | EXPORT_SYMBOL_GPL(part_round_stats); | 1256 | EXPORT_SYMBOL_GPL(part_round_stats); |
1266 | 1257 | ||
1258 | #ifdef CONFIG_PM_RUNTIME | ||
1259 | static void blk_pm_put_request(struct request *rq) | ||
1260 | { | ||
1261 | if (rq->q->dev && !(rq->cmd_flags & REQ_PM) && !--rq->q->nr_pending) | ||
1262 | pm_runtime_mark_last_busy(rq->q->dev); | ||
1263 | } | ||
1264 | #else | ||
1265 | static inline void blk_pm_put_request(struct request *rq) {} | ||
1266 | #endif | ||
1267 | |||
1267 | /* | 1268 | /* |
1268 | * queue lock must be held | 1269 | * queue lock must be held |
1269 | */ | 1270 | */ |
@@ -1274,6 +1275,8 @@ void __blk_put_request(struct request_queue *q, struct request *req) | |||
1274 | if (unlikely(--req->ref_count)) | 1275 | if (unlikely(--req->ref_count)) |
1275 | return; | 1276 | return; |
1276 | 1277 | ||
1278 | blk_pm_put_request(req); | ||
1279 | |||
1277 | elv_completed_request(q, req); | 1280 | elv_completed_request(q, req); |
1278 | 1281 | ||
1279 | /* this is a bio leak */ | 1282 | /* this is a bio leak */ |
@@ -1597,7 +1600,7 @@ static void handle_bad_sector(struct bio *bio) | |||
1597 | printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", | 1600 | printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", |
1598 | bdevname(bio->bi_bdev, b), | 1601 | bdevname(bio->bi_bdev, b), |
1599 | bio->bi_rw, | 1602 | bio->bi_rw, |
1600 | (unsigned long long)bio->bi_sector + bio_sectors(bio), | 1603 | (unsigned long long)bio_end_sector(bio), |
1601 | (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); | 1604 | (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); |
1602 | 1605 | ||
1603 | set_bit(BIO_EOF, &bio->bi_flags); | 1606 | set_bit(BIO_EOF, &bio->bi_flags); |
@@ -2053,6 +2056,28 @@ static void blk_account_io_done(struct request *req) | |||
2053 | } | 2056 | } |
2054 | } | 2057 | } |
2055 | 2058 | ||
2059 | #ifdef CONFIG_PM_RUNTIME | ||
2060 | /* | ||
2061 | * Don't process normal requests when queue is suspended | ||
2062 | * or in the process of suspending/resuming | ||
2063 | */ | ||
2064 | static struct request *blk_pm_peek_request(struct request_queue *q, | ||
2065 | struct request *rq) | ||
2066 | { | ||
2067 | if (q->dev && (q->rpm_status == RPM_SUSPENDED || | ||
2068 | (q->rpm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM)))) | ||
2069 | return NULL; | ||
2070 | else | ||
2071 | return rq; | ||
2072 | } | ||
2073 | #else | ||
2074 | static inline struct request *blk_pm_peek_request(struct request_queue *q, | ||
2075 | struct request *rq) | ||
2076 | { | ||
2077 | return rq; | ||
2078 | } | ||
2079 | #endif | ||
2080 | |||
2056 | /** | 2081 | /** |
2057 | * blk_peek_request - peek at the top of a request queue | 2082 | * blk_peek_request - peek at the top of a request queue |
2058 | * @q: request queue to peek at | 2083 | * @q: request queue to peek at |
@@ -2075,6 +2100,11 @@ struct request *blk_peek_request(struct request_queue *q) | |||
2075 | int ret; | 2100 | int ret; |
2076 | 2101 | ||
2077 | while ((rq = __elv_next_request(q)) != NULL) { | 2102 | while ((rq = __elv_next_request(q)) != NULL) { |
2103 | |||
2104 | rq = blk_pm_peek_request(q, rq); | ||
2105 | if (!rq) | ||
2106 | break; | ||
2107 | |||
2078 | if (!(rq->cmd_flags & REQ_STARTED)) { | 2108 | if (!(rq->cmd_flags & REQ_STARTED)) { |
2079 | /* | 2109 | /* |
2080 | * This is the first time the device driver | 2110 | * This is the first time the device driver |
@@ -2253,8 +2283,7 @@ EXPORT_SYMBOL(blk_fetch_request); | |||
2253 | **/ | 2283 | **/ |
2254 | bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) | 2284 | bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) |
2255 | { | 2285 | { |
2256 | int total_bytes, bio_nbytes, next_idx = 0; | 2286 | int total_bytes; |
2257 | struct bio *bio; | ||
2258 | 2287 | ||
2259 | if (!req->bio) | 2288 | if (!req->bio) |
2260 | return false; | 2289 | return false; |
@@ -2300,56 +2329,21 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) | |||
2300 | 2329 | ||
2301 | blk_account_io_completion(req, nr_bytes); | 2330 | blk_account_io_completion(req, nr_bytes); |
2302 | 2331 | ||
2303 | total_bytes = bio_nbytes = 0; | 2332 | total_bytes = 0; |
2304 | while ((bio = req->bio) != NULL) { | 2333 | while (req->bio) { |
2305 | int nbytes; | 2334 | struct bio *bio = req->bio; |
2335 | unsigned bio_bytes = min(bio->bi_size, nr_bytes); | ||
2306 | 2336 | ||
2307 | if (nr_bytes >= bio->bi_size) { | 2337 | if (bio_bytes == bio->bi_size) |
2308 | req->bio = bio->bi_next; | 2338 | req->bio = bio->bi_next; |
2309 | nbytes = bio->bi_size; | ||
2310 | req_bio_endio(req, bio, nbytes, error); | ||
2311 | next_idx = 0; | ||
2312 | bio_nbytes = 0; | ||
2313 | } else { | ||
2314 | int idx = bio->bi_idx + next_idx; | ||
2315 | 2339 | ||
2316 | if (unlikely(idx >= bio->bi_vcnt)) { | 2340 | req_bio_endio(req, bio, bio_bytes, error); |
2317 | blk_dump_rq_flags(req, "__end_that"); | ||
2318 | printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n", | ||
2319 | __func__, idx, bio->bi_vcnt); | ||
2320 | break; | ||
2321 | } | ||
2322 | 2341 | ||
2323 | nbytes = bio_iovec_idx(bio, idx)->bv_len; | 2342 | total_bytes += bio_bytes; |
2324 | BIO_BUG_ON(nbytes > bio->bi_size); | 2343 | nr_bytes -= bio_bytes; |
2325 | 2344 | ||
2326 | /* | 2345 | if (!nr_bytes) |
2327 | * not a complete bvec done | 2346 | break; |
2328 | */ | ||
2329 | if (unlikely(nbytes > nr_bytes)) { | ||
2330 | bio_nbytes += nr_bytes; | ||
2331 | total_bytes += nr_bytes; | ||
2332 | break; | ||
2333 | } | ||
2334 | |||
2335 | /* | ||
2336 | * advance to the next vector | ||
2337 | */ | ||
2338 | next_idx++; | ||
2339 | bio_nbytes += nbytes; | ||
2340 | } | ||
2341 | |||
2342 | total_bytes += nbytes; | ||
2343 | nr_bytes -= nbytes; | ||
2344 | |||
2345 | bio = req->bio; | ||
2346 | if (bio) { | ||
2347 | /* | ||
2348 | * end more in this run, or just return 'not-done' | ||
2349 | */ | ||
2350 | if (unlikely(nr_bytes <= 0)) | ||
2351 | break; | ||
2352 | } | ||
2353 | } | 2347 | } |
2354 | 2348 | ||
2355 | /* | 2349 | /* |
@@ -2365,16 +2359,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) | |||
2365 | return false; | 2359 | return false; |
2366 | } | 2360 | } |
2367 | 2361 | ||
2368 | /* | ||
2369 | * if the request wasn't completed, update state | ||
2370 | */ | ||
2371 | if (bio_nbytes) { | ||
2372 | req_bio_endio(req, bio, bio_nbytes, error); | ||
2373 | bio->bi_idx += next_idx; | ||
2374 | bio_iovec(bio)->bv_offset += nr_bytes; | ||
2375 | bio_iovec(bio)->bv_len -= nr_bytes; | ||
2376 | } | ||
2377 | |||
2378 | req->__data_len -= total_bytes; | 2362 | req->__data_len -= total_bytes; |
2379 | req->buffer = bio_data(req->bio); | 2363 | req->buffer = bio_data(req->bio); |
2380 | 2364 | ||
@@ -3046,6 +3030,149 @@ void blk_finish_plug(struct blk_plug *plug) | |||
3046 | } | 3030 | } |
3047 | EXPORT_SYMBOL(blk_finish_plug); | 3031 | EXPORT_SYMBOL(blk_finish_plug); |
3048 | 3032 | ||
3033 | #ifdef CONFIG_PM_RUNTIME | ||
3034 | /** | ||
3035 | * blk_pm_runtime_init - Block layer runtime PM initialization routine | ||
3036 | * @q: the queue of the device | ||
3037 | * @dev: the device the queue belongs to | ||
3038 | * | ||
3039 | * Description: | ||
3040 | * Initialize runtime-PM-related fields for @q and start auto suspend for | ||
3041 | * @dev. Drivers that want to take advantage of request-based runtime PM | ||
3042 | * should call this function after @dev has been initialized, and its | ||
3043 | * request queue @q has been allocated, and runtime PM for it can not happen | ||
3044 | * yet(either due to disabled/forbidden or its usage_count > 0). In most | ||
3045 | * cases, driver should call this function before any I/O has taken place. | ||
3046 | * | ||
3047 | * This function takes care of setting up using auto suspend for the device, | ||
3048 | * the autosuspend delay is set to -1 to make runtime suspend impossible | ||
3049 | * until an updated value is either set by user or by driver. Drivers do | ||
3050 | * not need to touch other autosuspend settings. | ||
3051 | * | ||
3052 | * The block layer runtime PM is request based, so only works for drivers | ||
3053 | * that use request as their IO unit instead of those directly use bio's. | ||
3054 | */ | ||
3055 | void blk_pm_runtime_init(struct request_queue *q, struct device *dev) | ||
3056 | { | ||
3057 | q->dev = dev; | ||
3058 | q->rpm_status = RPM_ACTIVE; | ||
3059 | pm_runtime_set_autosuspend_delay(q->dev, -1); | ||
3060 | pm_runtime_use_autosuspend(q->dev); | ||
3061 | } | ||
3062 | EXPORT_SYMBOL(blk_pm_runtime_init); | ||
3063 | |||
3064 | /** | ||
3065 | * blk_pre_runtime_suspend - Pre runtime suspend check | ||
3066 | * @q: the queue of the device | ||
3067 | * | ||
3068 | * Description: | ||
3069 | * This function will check if runtime suspend is allowed for the device | ||
3070 | * by examining if there are any requests pending in the queue. If there | ||
3071 | * are requests pending, the device can not be runtime suspended; otherwise, | ||
3072 | * the queue's status will be updated to SUSPENDING and the driver can | ||
3073 | * proceed to suspend the device. | ||
3074 | * | ||
3075 | * For the not allowed case, we mark last busy for the device so that | ||
3076 | * runtime PM core will try to autosuspend it some time later. | ||
3077 | * | ||
3078 | * This function should be called near the start of the device's | ||
3079 | * runtime_suspend callback. | ||
3080 | * | ||
3081 | * Return: | ||
3082 | * 0 - OK to runtime suspend the device | ||
3083 | * -EBUSY - Device should not be runtime suspended | ||
3084 | */ | ||
3085 | int blk_pre_runtime_suspend(struct request_queue *q) | ||
3086 | { | ||
3087 | int ret = 0; | ||
3088 | |||
3089 | spin_lock_irq(q->queue_lock); | ||
3090 | if (q->nr_pending) { | ||
3091 | ret = -EBUSY; | ||
3092 | pm_runtime_mark_last_busy(q->dev); | ||
3093 | } else { | ||
3094 | q->rpm_status = RPM_SUSPENDING; | ||
3095 | } | ||
3096 | spin_unlock_irq(q->queue_lock); | ||
3097 | return ret; | ||
3098 | } | ||
3099 | EXPORT_SYMBOL(blk_pre_runtime_suspend); | ||
3100 | |||
3101 | /** | ||
3102 | * blk_post_runtime_suspend - Post runtime suspend processing | ||
3103 | * @q: the queue of the device | ||
3104 | * @err: return value of the device's runtime_suspend function | ||
3105 | * | ||
3106 | * Description: | ||
3107 | * Update the queue's runtime status according to the return value of the | ||
3108 | * device's runtime suspend function and mark last busy for the device so | ||
3109 | * that PM core will try to auto suspend the device at a later time. | ||
3110 | * | ||
3111 | * This function should be called near the end of the device's | ||
3112 | * runtime_suspend callback. | ||
3113 | */ | ||
3114 | void blk_post_runtime_suspend(struct request_queue *q, int err) | ||
3115 | { | ||
3116 | spin_lock_irq(q->queue_lock); | ||
3117 | if (!err) { | ||
3118 | q->rpm_status = RPM_SUSPENDED; | ||
3119 | } else { | ||
3120 | q->rpm_status = RPM_ACTIVE; | ||
3121 | pm_runtime_mark_last_busy(q->dev); | ||
3122 | } | ||
3123 | spin_unlock_irq(q->queue_lock); | ||
3124 | } | ||
3125 | EXPORT_SYMBOL(blk_post_runtime_suspend); | ||
3126 | |||
3127 | /** | ||
3128 | * blk_pre_runtime_resume - Pre runtime resume processing | ||
3129 | * @q: the queue of the device | ||
3130 | * | ||
3131 | * Description: | ||
3132 | * Update the queue's runtime status to RESUMING in preparation for the | ||
3133 | * runtime resume of the device. | ||
3134 | * | ||
3135 | * This function should be called near the start of the device's | ||
3136 | * runtime_resume callback. | ||
3137 | */ | ||
3138 | void blk_pre_runtime_resume(struct request_queue *q) | ||
3139 | { | ||
3140 | spin_lock_irq(q->queue_lock); | ||
3141 | q->rpm_status = RPM_RESUMING; | ||
3142 | spin_unlock_irq(q->queue_lock); | ||
3143 | } | ||
3144 | EXPORT_SYMBOL(blk_pre_runtime_resume); | ||
3145 | |||
3146 | /** | ||
3147 | * blk_post_runtime_resume - Post runtime resume processing | ||
3148 | * @q: the queue of the device | ||
3149 | * @err: return value of the device's runtime_resume function | ||
3150 | * | ||
3151 | * Description: | ||
3152 | * Update the queue's runtime status according to the return value of the | ||
3153 | * device's runtime_resume function. If it is successfully resumed, process | ||
3154 | * the requests that are queued into the device's queue when it is resuming | ||
3155 | * and then mark last busy and initiate autosuspend for it. | ||
3156 | * | ||
3157 | * This function should be called near the end of the device's | ||
3158 | * runtime_resume callback. | ||
3159 | */ | ||
3160 | void blk_post_runtime_resume(struct request_queue *q, int err) | ||
3161 | { | ||
3162 | spin_lock_irq(q->queue_lock); | ||
3163 | if (!err) { | ||
3164 | q->rpm_status = RPM_ACTIVE; | ||
3165 | __blk_run_queue(q); | ||
3166 | pm_runtime_mark_last_busy(q->dev); | ||
3167 | pm_runtime_autosuspend(q->dev); | ||
3168 | } else { | ||
3169 | q->rpm_status = RPM_SUSPENDED; | ||
3170 | } | ||
3171 | spin_unlock_irq(q->queue_lock); | ||
3172 | } | ||
3173 | EXPORT_SYMBOL(blk_post_runtime_resume); | ||
3174 | #endif | ||
3175 | |||
3049 | int __init blk_dev_init(void) | 3176 | int __init blk_dev_init(void) |
3050 | { | 3177 | { |
3051 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * | 3178 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4f0ade74cfd0..d5cd3131c57a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -2270,11 +2270,8 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) | |||
2270 | return NULL; | 2270 | return NULL; |
2271 | 2271 | ||
2272 | cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); | 2272 | cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); |
2273 | if (cfqq) { | 2273 | if (cfqq) |
2274 | sector_t sector = bio->bi_sector + bio_sectors(bio); | 2274 | return elv_rb_find(&cfqq->sort_list, bio_end_sector(bio)); |
2275 | |||
2276 | return elv_rb_find(&cfqq->sort_list, sector); | ||
2277 | } | ||
2278 | 2275 | ||
2279 | return NULL; | 2276 | return NULL; |
2280 | } | 2277 | } |
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 90037b5eb17f..ba19a3afab79 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c | |||
@@ -132,7 +132,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio) | |||
132 | * check for front merge | 132 | * check for front merge |
133 | */ | 133 | */ |
134 | if (dd->front_merges) { | 134 | if (dd->front_merges) { |
135 | sector_t sector = bio->bi_sector + bio_sectors(bio); | 135 | sector_t sector = bio_end_sector(bio); |
136 | 136 | ||
137 | __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); | 137 | __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); |
138 | if (__rq) { | 138 | if (__rq) { |
diff --git a/block/elevator.c b/block/elevator.c index a0ffdd943c98..eba5b04c29b1 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/blktrace_api.h> | 34 | #include <linux/blktrace_api.h> |
35 | #include <linux/hash.h> | 35 | #include <linux/hash.h> |
36 | #include <linux/uaccess.h> | 36 | #include <linux/uaccess.h> |
37 | #include <linux/pm_runtime.h> | ||
37 | 38 | ||
38 | #include <trace/events/block.h> | 39 | #include <trace/events/block.h> |
39 | 40 | ||
@@ -536,6 +537,27 @@ void elv_bio_merged(struct request_queue *q, struct request *rq, | |||
536 | e->type->ops.elevator_bio_merged_fn(q, rq, bio); | 537 | e->type->ops.elevator_bio_merged_fn(q, rq, bio); |
537 | } | 538 | } |
538 | 539 | ||
540 | #ifdef CONFIG_PM_RUNTIME | ||
541 | static void blk_pm_requeue_request(struct request *rq) | ||
542 | { | ||
543 | if (rq->q->dev && !(rq->cmd_flags & REQ_PM)) | ||
544 | rq->q->nr_pending--; | ||
545 | } | ||
546 | |||
547 | static void blk_pm_add_request(struct request_queue *q, struct request *rq) | ||
548 | { | ||
549 | if (q->dev && !(rq->cmd_flags & REQ_PM) && q->nr_pending++ == 0 && | ||
550 | (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING)) | ||
551 | pm_request_resume(q->dev); | ||
552 | } | ||
553 | #else | ||
554 | static inline void blk_pm_requeue_request(struct request *rq) {} | ||
555 | static inline void blk_pm_add_request(struct request_queue *q, | ||
556 | struct request *rq) | ||
557 | { | ||
558 | } | ||
559 | #endif | ||
560 | |||
539 | void elv_requeue_request(struct request_queue *q, struct request *rq) | 561 | void elv_requeue_request(struct request_queue *q, struct request *rq) |
540 | { | 562 | { |
541 | /* | 563 | /* |
@@ -550,6 +572,8 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) | |||
550 | 572 | ||
551 | rq->cmd_flags &= ~REQ_STARTED; | 573 | rq->cmd_flags &= ~REQ_STARTED; |
552 | 574 | ||
575 | blk_pm_requeue_request(rq); | ||
576 | |||
553 | __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE); | 577 | __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE); |
554 | } | 578 | } |
555 | 579 | ||
@@ -572,6 +596,8 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) | |||
572 | { | 596 | { |
573 | trace_block_rq_insert(q, rq); | 597 | trace_block_rq_insert(q, rq); |
574 | 598 | ||
599 | blk_pm_add_request(q, rq); | ||
600 | |||
575 | rq->q = q; | 601 | rq->q = q; |
576 | 602 | ||
577 | if (rq->cmd_flags & REQ_SOFTBARRIER) { | 603 | if (rq->cmd_flags & REQ_SOFTBARRIER) { |
diff --git a/block/partitions/efi.c b/block/partitions/efi.c index ff5804e2f1d2..c85fc895ecdb 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c | |||
@@ -238,7 +238,7 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, | |||
238 | le32_to_cpu(gpt->sizeof_partition_entry); | 238 | le32_to_cpu(gpt->sizeof_partition_entry); |
239 | if (!count) | 239 | if (!count) |
240 | return NULL; | 240 | return NULL; |
241 | pte = kzalloc(count, GFP_KERNEL); | 241 | pte = kmalloc(count, GFP_KERNEL); |
242 | if (!pte) | 242 | if (!pte) |
243 | return NULL; | 243 | return NULL; |
244 | 244 | ||
@@ -267,7 +267,7 @@ static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state, | |||
267 | gpt_header *gpt; | 267 | gpt_header *gpt; |
268 | unsigned ssz = bdev_logical_block_size(state->bdev); | 268 | unsigned ssz = bdev_logical_block_size(state->bdev); |
269 | 269 | ||
270 | gpt = kzalloc(ssz, GFP_KERNEL); | 270 | gpt = kmalloc(ssz, GFP_KERNEL); |
271 | if (!gpt) | 271 | if (!gpt) |
272 | return NULL; | 272 | return NULL; |
273 | 273 | ||
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 92b6d7c51e39..5efed089a702 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c | |||
@@ -928,7 +928,7 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio) | |||
928 | buf->resid = bio->bi_size; | 928 | buf->resid = bio->bi_size; |
929 | buf->sector = bio->bi_sector; | 929 | buf->sector = bio->bi_sector; |
930 | bio_pageinc(bio); | 930 | bio_pageinc(bio); |
931 | buf->bv = bv = &bio->bi_io_vec[bio->bi_idx]; | 931 | buf->bv = bv = bio_iovec(bio); |
932 | buf->bv_resid = bv->bv_len; | 932 | buf->bv_resid = bv->bv_len; |
933 | WARN_ON(buf->bv_resid == 0); | 933 | WARN_ON(buf->bv_resid == 0); |
934 | } | 934 | } |
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 531ceb31d0ff..f1a29f8e9d33 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
@@ -334,8 +334,7 @@ static void brd_make_request(struct request_queue *q, struct bio *bio) | |||
334 | int err = -EIO; | 334 | int err = -EIO; |
335 | 335 | ||
336 | sector = bio->bi_sector; | 336 | sector = bio->bi_sector; |
337 | if (sector + (bio->bi_size >> SECTOR_SHIFT) > | 337 | if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) |
338 | get_capacity(bdev->bd_disk)) | ||
339 | goto out; | 338 | goto out; |
340 | 339 | ||
341 | if (unlikely(bio->bi_rw & REQ_DISCARD)) { | 340 | if (unlikely(bio->bi_rw & REQ_DISCARD)) { |
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index c49e85608101..04ceb7e2fadd 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c | |||
@@ -3775,7 +3775,6 @@ static int __floppy_read_block_0(struct block_device *bdev) | |||
3775 | bio_vec.bv_len = size; | 3775 | bio_vec.bv_len = size; |
3776 | bio_vec.bv_offset = 0; | 3776 | bio_vec.bv_offset = 0; |
3777 | bio.bi_vcnt = 1; | 3777 | bio.bi_vcnt = 1; |
3778 | bio.bi_idx = 0; | ||
3779 | bio.bi_size = size; | 3778 | bio.bi_size = size; |
3780 | bio.bi_bdev = bdev; | 3779 | bio.bi_bdev = bdev; |
3781 | bio.bi_sector = 0; | 3780 | bio.bi_sector = 0; |
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 9f2d348f7115..3c08983e600a 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c | |||
@@ -901,7 +901,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd) | |||
901 | pd->iosched.successive_reads += bio->bi_size >> 10; | 901 | pd->iosched.successive_reads += bio->bi_size >> 10; |
902 | else { | 902 | else { |
903 | pd->iosched.successive_reads = 0; | 903 | pd->iosched.successive_reads = 0; |
904 | pd->iosched.last_write = bio->bi_sector + bio_sectors(bio); | 904 | pd->iosched.last_write = bio_end_sector(bio); |
905 | } | 905 | } |
906 | if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) { | 906 | if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) { |
907 | if (pd->read_speed == pd->write_speed) { | 907 | if (pd->read_speed == pd->write_speed) { |
@@ -948,31 +948,6 @@ static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_que | |||
948 | } | 948 | } |
949 | 949 | ||
950 | /* | 950 | /* |
951 | * Copy CD_FRAMESIZE bytes from src_bio into a destination page | ||
952 | */ | ||
953 | static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, struct page *dst_page, int dst_offs) | ||
954 | { | ||
955 | unsigned int copy_size = CD_FRAMESIZE; | ||
956 | |||
957 | while (copy_size > 0) { | ||
958 | struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg); | ||
959 | void *vfrom = kmap_atomic(src_bvl->bv_page) + | ||
960 | src_bvl->bv_offset + offs; | ||
961 | void *vto = page_address(dst_page) + dst_offs; | ||
962 | int len = min_t(int, copy_size, src_bvl->bv_len - offs); | ||
963 | |||
964 | BUG_ON(len < 0); | ||
965 | memcpy(vto, vfrom, len); | ||
966 | kunmap_atomic(vfrom); | ||
967 | |||
968 | seg++; | ||
969 | offs = 0; | ||
970 | dst_offs += len; | ||
971 | copy_size -= len; | ||
972 | } | ||
973 | } | ||
974 | |||
975 | /* | ||
976 | * Copy all data for this packet to pkt->pages[], so that | 951 | * Copy all data for this packet to pkt->pages[], so that |
977 | * a) The number of required segments for the write bio is minimized, which | 952 | * a) The number of required segments for the write bio is minimized, which |
978 | * is necessary for some scsi controllers. | 953 | * is necessary for some scsi controllers. |
@@ -1181,16 +1156,15 @@ static int pkt_start_recovery(struct packet_data *pkt) | |||
1181 | new_sector = new_block * (CD_FRAMESIZE >> 9); | 1156 | new_sector = new_block * (CD_FRAMESIZE >> 9); |
1182 | pkt->sector = new_sector; | 1157 | pkt->sector = new_sector; |
1183 | 1158 | ||
1159 | bio_reset(pkt->bio); | ||
1160 | pkt->bio->bi_bdev = pd->bdev; | ||
1161 | pkt->bio->bi_rw = REQ_WRITE; | ||
1184 | pkt->bio->bi_sector = new_sector; | 1162 | pkt->bio->bi_sector = new_sector; |
1185 | pkt->bio->bi_next = NULL; | 1163 | pkt->bio->bi_size = pkt->frames * CD_FRAMESIZE; |
1186 | pkt->bio->bi_flags = 1 << BIO_UPTODATE; | 1164 | pkt->bio->bi_vcnt = pkt->frames; |
1187 | pkt->bio->bi_idx = 0; | ||
1188 | 1165 | ||
1189 | BUG_ON(pkt->bio->bi_rw != REQ_WRITE); | 1166 | pkt->bio->bi_end_io = pkt_end_io_packet_write; |
1190 | BUG_ON(pkt->bio->bi_vcnt != pkt->frames); | 1167 | pkt->bio->bi_private = pkt; |
1191 | BUG_ON(pkt->bio->bi_size != pkt->frames * CD_FRAMESIZE); | ||
1192 | BUG_ON(pkt->bio->bi_end_io != pkt_end_io_packet_write); | ||
1193 | BUG_ON(pkt->bio->bi_private != pkt); | ||
1194 | 1168 | ||
1195 | drop_super(sb); | 1169 | drop_super(sb); |
1196 | return 1; | 1170 | return 1; |
@@ -1325,55 +1299,35 @@ try_next_bio: | |||
1325 | */ | 1299 | */ |
1326 | static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) | 1300 | static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) |
1327 | { | 1301 | { |
1328 | struct bio *bio; | ||
1329 | int f; | 1302 | int f; |
1330 | int frames_write; | ||
1331 | struct bio_vec *bvec = pkt->w_bio->bi_io_vec; | 1303 | struct bio_vec *bvec = pkt->w_bio->bi_io_vec; |
1332 | 1304 | ||
1305 | bio_reset(pkt->w_bio); | ||
1306 | pkt->w_bio->bi_sector = pkt->sector; | ||
1307 | pkt->w_bio->bi_bdev = pd->bdev; | ||
1308 | pkt->w_bio->bi_end_io = pkt_end_io_packet_write; | ||
1309 | pkt->w_bio->bi_private = pkt; | ||
1310 | |||
1311 | /* XXX: locking? */ | ||
1333 | for (f = 0; f < pkt->frames; f++) { | 1312 | for (f = 0; f < pkt->frames; f++) { |
1334 | bvec[f].bv_page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE]; | 1313 | bvec[f].bv_page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE]; |
1335 | bvec[f].bv_offset = (f * CD_FRAMESIZE) % PAGE_SIZE; | 1314 | bvec[f].bv_offset = (f * CD_FRAMESIZE) % PAGE_SIZE; |
1315 | if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) | ||
1316 | BUG(); | ||
1336 | } | 1317 | } |
1318 | VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt); | ||
1337 | 1319 | ||
1338 | /* | 1320 | /* |
1339 | * Fill-in bvec with data from orig_bios. | 1321 | * Fill-in bvec with data from orig_bios. |
1340 | */ | 1322 | */ |
1341 | frames_write = 0; | ||
1342 | spin_lock(&pkt->lock); | 1323 | spin_lock(&pkt->lock); |
1343 | bio_list_for_each(bio, &pkt->orig_bios) { | 1324 | bio_copy_data(pkt->w_bio, pkt->orig_bios.head); |
1344 | int segment = bio->bi_idx; | ||
1345 | int src_offs = 0; | ||
1346 | int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9); | ||
1347 | int num_frames = bio->bi_size / CD_FRAMESIZE; | ||
1348 | BUG_ON(first_frame < 0); | ||
1349 | BUG_ON(first_frame + num_frames > pkt->frames); | ||
1350 | for (f = first_frame; f < first_frame + num_frames; f++) { | ||
1351 | struct bio_vec *src_bvl = bio_iovec_idx(bio, segment); | ||
1352 | |||
1353 | while (src_offs >= src_bvl->bv_len) { | ||
1354 | src_offs -= src_bvl->bv_len; | ||
1355 | segment++; | ||
1356 | BUG_ON(segment >= bio->bi_vcnt); | ||
1357 | src_bvl = bio_iovec_idx(bio, segment); | ||
1358 | } | ||
1359 | 1325 | ||
1360 | if (src_bvl->bv_len - src_offs >= CD_FRAMESIZE) { | ||
1361 | bvec[f].bv_page = src_bvl->bv_page; | ||
1362 | bvec[f].bv_offset = src_bvl->bv_offset + src_offs; | ||
1363 | } else { | ||
1364 | pkt_copy_bio_data(bio, segment, src_offs, | ||
1365 | bvec[f].bv_page, bvec[f].bv_offset); | ||
1366 | } | ||
1367 | src_offs += CD_FRAMESIZE; | ||
1368 | frames_write++; | ||
1369 | } | ||
1370 | } | ||
1371 | pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE); | 1326 | pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE); |
1372 | spin_unlock(&pkt->lock); | 1327 | spin_unlock(&pkt->lock); |
1373 | 1328 | ||
1374 | VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n", | 1329 | VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n", |
1375 | frames_write, (unsigned long long)pkt->sector); | 1330 | pkt->write_size, (unsigned long long)pkt->sector); |
1376 | BUG_ON(frames_write != pkt->write_size); | ||
1377 | 1331 | ||
1378 | if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) { | 1332 | if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) { |
1379 | pkt_make_local_copy(pkt, bvec); | 1333 | pkt_make_local_copy(pkt, bvec); |
@@ -1383,16 +1337,6 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) | |||
1383 | } | 1337 | } |
1384 | 1338 | ||
1385 | /* Start the write request */ | 1339 | /* Start the write request */ |
1386 | bio_reset(pkt->w_bio); | ||
1387 | pkt->w_bio->bi_sector = pkt->sector; | ||
1388 | pkt->w_bio->bi_bdev = pd->bdev; | ||
1389 | pkt->w_bio->bi_end_io = pkt_end_io_packet_write; | ||
1390 | pkt->w_bio->bi_private = pkt; | ||
1391 | for (f = 0; f < pkt->frames; f++) | ||
1392 | if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) | ||
1393 | BUG(); | ||
1394 | VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt); | ||
1395 | |||
1396 | atomic_set(&pkt->io_wait, 1); | 1340 | atomic_set(&pkt->io_wait, 1); |
1397 | pkt->w_bio->bi_rw = WRITE; | 1341 | pkt->w_bio->bi_rw = WRITE; |
1398 | pkt_queue_bio(pd, pkt->w_bio); | 1342 | pkt_queue_bio(pd, pkt->w_bio); |
@@ -2431,7 +2375,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) | |||
2431 | cloned_bio->bi_bdev = pd->bdev; | 2375 | cloned_bio->bi_bdev = pd->bdev; |
2432 | cloned_bio->bi_private = psd; | 2376 | cloned_bio->bi_private = psd; |
2433 | cloned_bio->bi_end_io = pkt_end_io_read_cloned; | 2377 | cloned_bio->bi_end_io = pkt_end_io_read_cloned; |
2434 | pd->stats.secs_r += bio->bi_size >> 9; | 2378 | pd->stats.secs_r += bio_sectors(bio); |
2435 | pkt_queue_bio(pd, cloned_bio); | 2379 | pkt_queue_bio(pd, cloned_bio); |
2436 | return; | 2380 | return; |
2437 | } | 2381 | } |
@@ -2452,7 +2396,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) | |||
2452 | zone = ZONE(bio->bi_sector, pd); | 2396 | zone = ZONE(bio->bi_sector, pd); |
2453 | VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n", | 2397 | VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n", |
2454 | (unsigned long long)bio->bi_sector, | 2398 | (unsigned long long)bio->bi_sector, |
2455 | (unsigned long long)(bio->bi_sector + bio_sectors(bio))); | 2399 | (unsigned long long)bio_end_sector(bio)); |
2456 | 2400 | ||
2457 | /* Check if we have to split the bio */ | 2401 | /* Check if we have to split the bio */ |
2458 | { | 2402 | { |
@@ -2460,7 +2404,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) | |||
2460 | sector_t last_zone; | 2404 | sector_t last_zone; |
2461 | int first_sectors; | 2405 | int first_sectors; |
2462 | 2406 | ||
2463 | last_zone = ZONE(bio->bi_sector + bio_sectors(bio) - 1, pd); | 2407 | last_zone = ZONE(bio_end_sector(bio) - 1, pd); |
2464 | if (last_zone != zone) { | 2408 | if (last_zone != zone) { |
2465 | BUG_ON(last_zone != zone + pd->settings.size); | 2409 | BUG_ON(last_zone != zone + pd->settings.size); |
2466 | first_sectors = last_zone - bio->bi_sector; | 2410 | first_sectors = last_zone - bio->bi_sector; |
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 22ffd5dcb168..ca63104136e0 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -1143,7 +1143,7 @@ static struct bio *bio_clone_range(struct bio *bio_src, | |||
1143 | /* Find first affected segment... */ | 1143 | /* Find first affected segment... */ |
1144 | 1144 | ||
1145 | resid = offset; | 1145 | resid = offset; |
1146 | __bio_for_each_segment(bv, bio_src, idx, 0) { | 1146 | bio_for_each_segment(bv, bio_src, idx) { |
1147 | if (resid < bv->bv_len) | 1147 | if (resid < bv->bv_len) |
1148 | break; | 1148 | break; |
1149 | resid -= bv->bv_len; | 1149 | resid -= bv->bv_len; |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 13c15480d940..6d2d41ae9e32 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -858,8 +858,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) | |||
858 | unsigned int i; | 858 | unsigned int i; |
859 | struct bio_vec *bv; | 859 | struct bio_vec *bv; |
860 | 860 | ||
861 | for (i = 0; i < clone->bi_vcnt; i++) { | 861 | bio_for_each_segment_all(bv, clone, i) { |
862 | bv = bio_iovec_idx(clone, i); | ||
863 | BUG_ON(!bv->bv_page); | 862 | BUG_ON(!bv->bv_page); |
864 | mempool_free(bv->bv_page, cc->page_pool); | 863 | mempool_free(bv->bv_page, cc->page_pool); |
865 | bv->bv_page = NULL; | 864 | bv->bv_page = NULL; |
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index d053098c6a91..699b5be68d31 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -458,7 +458,7 @@ static void map_region(struct dm_io_region *io, struct mirror *m, | |||
458 | { | 458 | { |
459 | io->bdev = m->dev->bdev; | 459 | io->bdev = m->dev->bdev; |
460 | io->sector = map_sector(m, bio); | 460 | io->sector = map_sector(m, bio); |
461 | io->count = bio->bi_size >> 9; | 461 | io->count = bio_sectors(bio); |
462 | } | 462 | } |
463 | 463 | ||
464 | static void hold_bio(struct mirror_set *ms, struct bio *bio) | 464 | static void hold_bio(struct mirror_set *ms, struct bio *bio) |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index d8837d313f54..ea5e878a30b9 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
@@ -258,7 +258,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio, | |||
258 | sector_t begin, end; | 258 | sector_t begin, end; |
259 | 259 | ||
260 | stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin); | 260 | stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin); |
261 | stripe_map_range_sector(sc, bio->bi_sector + bio_sectors(bio), | 261 | stripe_map_range_sector(sc, bio_end_sector(bio), |
262 | target_stripe, &end); | 262 | target_stripe, &end); |
263 | if (begin < end) { | 263 | if (begin < end) { |
264 | bio->bi_bdev = sc->stripe[target_stripe].dev->bdev; | 264 | bio->bi_bdev = sc->stripe[target_stripe].dev->bdev; |
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index a746f1d21c66..b948fd864d45 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c | |||
@@ -501,7 +501,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) | |||
501 | return -EIO; | 501 | return -EIO; |
502 | } | 502 | } |
503 | 503 | ||
504 | if ((bio->bi_sector + bio_sectors(bio)) >> | 504 | if (bio_end_sector(bio) >> |
505 | (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) { | 505 | (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) { |
506 | DMERR_LIMIT("io out of range"); | 506 | DMERR_LIMIT("io out of range"); |
507 | return -EIO; | 507 | return -EIO; |
@@ -519,7 +519,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) | |||
519 | 519 | ||
520 | bio->bi_end_io = verity_end_io; | 520 | bio->bi_end_io = verity_end_io; |
521 | bio->bi_private = io; | 521 | bio->bi_private = io; |
522 | io->io_vec_size = bio->bi_vcnt - bio->bi_idx; | 522 | io->io_vec_size = bio_segments(bio); |
523 | if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE) | 523 | if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE) |
524 | io->io_vec = io->io_vec_inline; | 524 | io->io_vec = io->io_vec_inline; |
525 | else | 525 | else |
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 5e7dc772f5de..3193aefe982b 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c | |||
@@ -185,8 +185,7 @@ static void make_request(struct mddev *mddev, struct bio *bio) | |||
185 | return; | 185 | return; |
186 | } | 186 | } |
187 | 187 | ||
188 | if (check_sector(conf, bio->bi_sector, bio->bi_sector+(bio->bi_size>>9), | 188 | if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), WRITE)) |
189 | WRITE)) | ||
190 | failit = 1; | 189 | failit = 1; |
191 | if (check_mode(conf, WritePersistent)) { | 190 | if (check_mode(conf, WritePersistent)) { |
192 | add_sector(conf, bio->bi_sector, WritePersistent); | 191 | add_sector(conf, bio->bi_sector, WritePersistent); |
@@ -196,8 +195,7 @@ static void make_request(struct mddev *mddev, struct bio *bio) | |||
196 | failit = 1; | 195 | failit = 1; |
197 | } else { | 196 | } else { |
198 | /* read request */ | 197 | /* read request */ |
199 | if (check_sector(conf, bio->bi_sector, bio->bi_sector + (bio->bi_size>>9), | 198 | if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), READ)) |
200 | READ)) | ||
201 | failit = 1; | 199 | failit = 1; |
202 | if (check_mode(conf, ReadTransient)) | 200 | if (check_mode(conf, ReadTransient)) |
203 | failit = 1; | 201 | failit = 1; |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 21014836bdbf..f03fabd2b37b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -317,8 +317,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) | |||
317 | bio_io_error(bio); | 317 | bio_io_error(bio); |
318 | return; | 318 | return; |
319 | } | 319 | } |
320 | if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > | 320 | if (unlikely(bio_end_sector(bio) > tmp_dev->end_sector)) { |
321 | tmp_dev->end_sector)) { | ||
322 | /* This bio crosses a device boundary, so we have to | 321 | /* This bio crosses a device boundary, so we have to |
323 | * split it. | 322 | * split it. |
324 | */ | 323 | */ |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 6330c727396c..681d1099a2d5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -197,21 +197,12 @@ void md_trim_bio(struct bio *bio, int offset, int size) | |||
197 | if (offset == 0 && size == bio->bi_size) | 197 | if (offset == 0 && size == bio->bi_size) |
198 | return; | 198 | return; |
199 | 199 | ||
200 | bio->bi_sector += offset; | ||
201 | bio->bi_size = size; | ||
202 | offset <<= 9; | ||
203 | clear_bit(BIO_SEG_VALID, &bio->bi_flags); | 200 | clear_bit(BIO_SEG_VALID, &bio->bi_flags); |
204 | 201 | ||
205 | while (bio->bi_idx < bio->bi_vcnt && | 202 | bio_advance(bio, offset << 9); |
206 | bio->bi_io_vec[bio->bi_idx].bv_len <= offset) { | 203 | |
207 | /* remove this whole bio_vec */ | 204 | bio->bi_size = size; |
208 | offset -= bio->bi_io_vec[bio->bi_idx].bv_len; | 205 | |
209 | bio->bi_idx++; | ||
210 | } | ||
211 | if (bio->bi_idx < bio->bi_vcnt) { | ||
212 | bio->bi_io_vec[bio->bi_idx].bv_offset += offset; | ||
213 | bio->bi_io_vec[bio->bi_idx].bv_len -= offset; | ||
214 | } | ||
215 | /* avoid any complications with bi_idx being non-zero*/ | 206 | /* avoid any complications with bi_idx being non-zero*/ |
216 | if (bio->bi_idx) { | 207 | if (bio->bi_idx) { |
217 | memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, | 208 | memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 0505452de8d6..fcf65e512cf5 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -502,11 +502,11 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev, | |||
502 | { | 502 | { |
503 | if (likely(is_power_of_2(chunk_sects))) { | 503 | if (likely(is_power_of_2(chunk_sects))) { |
504 | return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) | 504 | return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) |
505 | + (bio->bi_size >> 9)); | 505 | + bio_sectors(bio)); |
506 | } else{ | 506 | } else{ |
507 | sector_t sector = bio->bi_sector; | 507 | sector_t sector = bio->bi_sector; |
508 | return chunk_sects >= (sector_div(sector, chunk_sects) | 508 | return chunk_sects >= (sector_div(sector, chunk_sects) |
509 | + (bio->bi_size >> 9)); | 509 | + bio_sectors(bio)); |
510 | } | 510 | } |
511 | } | 511 | } |
512 | 512 | ||
@@ -527,8 +527,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) | |||
527 | sector_t sector = bio->bi_sector; | 527 | sector_t sector = bio->bi_sector; |
528 | struct bio_pair *bp; | 528 | struct bio_pair *bp; |
529 | /* Sanity check -- queue functions should prevent this happening */ | 529 | /* Sanity check -- queue functions should prevent this happening */ |
530 | if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) || | 530 | if (bio_segments(bio) > 1) |
531 | bio->bi_idx != 0) | ||
532 | goto bad_map; | 531 | goto bad_map; |
533 | /* This is a one page bio that upper layers | 532 | /* This is a one page bio that upper layers |
534 | * refuse to split for us, so we need to split it. | 533 | * refuse to split for us, so we need to split it. |
@@ -567,7 +566,7 @@ bad_map: | |||
567 | printk("md/raid0:%s: make_request bug: can't convert block across chunks" | 566 | printk("md/raid0:%s: make_request bug: can't convert block across chunks" |
568 | " or bigger than %dk %llu %d\n", | 567 | " or bigger than %dk %llu %d\n", |
569 | mdname(mddev), chunk_sects / 2, | 568 | mdname(mddev), chunk_sects / 2, |
570 | (unsigned long long)bio->bi_sector, bio->bi_size >> 10); | 569 | (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2); |
571 | 570 | ||
572 | bio_io_error(bio); | 571 | bio_io_error(bio); |
573 | return; | 572 | return; |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 851023e2ba5d..55951182af73 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -92,7 +92,6 @@ static void r1bio_pool_free(void *r1_bio, void *data) | |||
92 | static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) | 92 | static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) |
93 | { | 93 | { |
94 | struct pool_info *pi = data; | 94 | struct pool_info *pi = data; |
95 | struct page *page; | ||
96 | struct r1bio *r1_bio; | 95 | struct r1bio *r1_bio; |
97 | struct bio *bio; | 96 | struct bio *bio; |
98 | int i, j; | 97 | int i, j; |
@@ -122,14 +121,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) | |||
122 | j = 1; | 121 | j = 1; |
123 | while(j--) { | 122 | while(j--) { |
124 | bio = r1_bio->bios[j]; | 123 | bio = r1_bio->bios[j]; |
125 | for (i = 0; i < RESYNC_PAGES; i++) { | 124 | bio->bi_vcnt = RESYNC_PAGES; |
126 | page = alloc_page(gfp_flags); | ||
127 | if (unlikely(!page)) | ||
128 | goto out_free_pages; | ||
129 | 125 | ||
130 | bio->bi_io_vec[i].bv_page = page; | 126 | if (bio_alloc_pages(bio, gfp_flags)) |
131 | bio->bi_vcnt = i+1; | 127 | goto out_free_bio; |
132 | } | ||
133 | } | 128 | } |
134 | /* If not user-requests, copy the page pointers to all bios */ | 129 | /* If not user-requests, copy the page pointers to all bios */ |
135 | if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) { | 130 | if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) { |
@@ -143,11 +138,6 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) | |||
143 | 138 | ||
144 | return r1_bio; | 139 | return r1_bio; |
145 | 140 | ||
146 | out_free_pages: | ||
147 | for (j=0 ; j < pi->raid_disks; j++) | ||
148 | for (i=0; i < r1_bio->bios[j]->bi_vcnt ; i++) | ||
149 | put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page); | ||
150 | j = -1; | ||
151 | out_free_bio: | 141 | out_free_bio: |
152 | while (++j < pi->raid_disks) | 142 | while (++j < pi->raid_disks) |
153 | bio_put(r1_bio->bios[j]); | 143 | bio_put(r1_bio->bios[j]); |
@@ -267,7 +257,7 @@ static void raid_end_bio_io(struct r1bio *r1_bio) | |||
267 | (bio_data_dir(bio) == WRITE) ? "write" : "read", | 257 | (bio_data_dir(bio) == WRITE) ? "write" : "read", |
268 | (unsigned long long) bio->bi_sector, | 258 | (unsigned long long) bio->bi_sector, |
269 | (unsigned long long) bio->bi_sector + | 259 | (unsigned long long) bio->bi_sector + |
270 | (bio->bi_size >> 9) - 1); | 260 | bio_sectors(bio) - 1); |
271 | 261 | ||
272 | call_bio_endio(r1_bio); | 262 | call_bio_endio(r1_bio); |
273 | } | 263 | } |
@@ -458,7 +448,7 @@ static void raid1_end_write_request(struct bio *bio, int error) | |||
458 | " %llu-%llu\n", | 448 | " %llu-%llu\n", |
459 | (unsigned long long) mbio->bi_sector, | 449 | (unsigned long long) mbio->bi_sector, |
460 | (unsigned long long) mbio->bi_sector + | 450 | (unsigned long long) mbio->bi_sector + |
461 | (mbio->bi_size >> 9) - 1); | 451 | bio_sectors(mbio) - 1); |
462 | call_bio_endio(r1_bio); | 452 | call_bio_endio(r1_bio); |
463 | } | 453 | } |
464 | } | 454 | } |
@@ -925,7 +915,7 @@ static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio) | |||
925 | if (unlikely(!bvecs)) | 915 | if (unlikely(!bvecs)) |
926 | return; | 916 | return; |
927 | 917 | ||
928 | bio_for_each_segment(bvec, bio, i) { | 918 | bio_for_each_segment_all(bvec, bio, i) { |
929 | bvecs[i] = *bvec; | 919 | bvecs[i] = *bvec; |
930 | bvecs[i].bv_page = alloc_page(GFP_NOIO); | 920 | bvecs[i].bv_page = alloc_page(GFP_NOIO); |
931 | if (unlikely(!bvecs[i].bv_page)) | 921 | if (unlikely(!bvecs[i].bv_page)) |
@@ -1023,7 +1013,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1023 | md_write_start(mddev, bio); /* wait on superblock update early */ | 1013 | md_write_start(mddev, bio); /* wait on superblock update early */ |
1024 | 1014 | ||
1025 | if (bio_data_dir(bio) == WRITE && | 1015 | if (bio_data_dir(bio) == WRITE && |
1026 | bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo && | 1016 | bio_end_sector(bio) > mddev->suspend_lo && |
1027 | bio->bi_sector < mddev->suspend_hi) { | 1017 | bio->bi_sector < mddev->suspend_hi) { |
1028 | /* As the suspend_* range is controlled by | 1018 | /* As the suspend_* range is controlled by |
1029 | * userspace, we want an interruptible | 1019 | * userspace, we want an interruptible |
@@ -1034,7 +1024,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1034 | flush_signals(current); | 1024 | flush_signals(current); |
1035 | prepare_to_wait(&conf->wait_barrier, | 1025 | prepare_to_wait(&conf->wait_barrier, |
1036 | &w, TASK_INTERRUPTIBLE); | 1026 | &w, TASK_INTERRUPTIBLE); |
1037 | if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo || | 1027 | if (bio_end_sector(bio) <= mddev->suspend_lo || |
1038 | bio->bi_sector >= mddev->suspend_hi) | 1028 | bio->bi_sector >= mddev->suspend_hi) |
1039 | break; | 1029 | break; |
1040 | schedule(); | 1030 | schedule(); |
@@ -1054,7 +1044,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1054 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); | 1044 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); |
1055 | 1045 | ||
1056 | r1_bio->master_bio = bio; | 1046 | r1_bio->master_bio = bio; |
1057 | r1_bio->sectors = bio->bi_size >> 9; | 1047 | r1_bio->sectors = bio_sectors(bio); |
1058 | r1_bio->state = 0; | 1048 | r1_bio->state = 0; |
1059 | r1_bio->mddev = mddev; | 1049 | r1_bio->mddev = mddev; |
1060 | r1_bio->sector = bio->bi_sector; | 1050 | r1_bio->sector = bio->bi_sector; |
@@ -1132,7 +1122,7 @@ read_again: | |||
1132 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); | 1122 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); |
1133 | 1123 | ||
1134 | r1_bio->master_bio = bio; | 1124 | r1_bio->master_bio = bio; |
1135 | r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled; | 1125 | r1_bio->sectors = bio_sectors(bio) - sectors_handled; |
1136 | r1_bio->state = 0; | 1126 | r1_bio->state = 0; |
1137 | r1_bio->mddev = mddev; | 1127 | r1_bio->mddev = mddev; |
1138 | r1_bio->sector = bio->bi_sector + sectors_handled; | 1128 | r1_bio->sector = bio->bi_sector + sectors_handled; |
@@ -1289,14 +1279,10 @@ read_again: | |||
1289 | struct bio_vec *bvec; | 1279 | struct bio_vec *bvec; |
1290 | int j; | 1280 | int j; |
1291 | 1281 | ||
1292 | /* Yes, I really want the '__' version so that | 1282 | /* |
1293 | * we clear any unused pointer in the io_vec, rather | 1283 | * We trimmed the bio, so _all is legit |
1294 | * than leave them unchanged. This is important | ||
1295 | * because when we come to free the pages, we won't | ||
1296 | * know the original bi_idx, so we just free | ||
1297 | * them all | ||
1298 | */ | 1284 | */ |
1299 | __bio_for_each_segment(bvec, mbio, j, 0) | 1285 | bio_for_each_segment_all(bvec, mbio, j) |
1300 | bvec->bv_page = r1_bio->behind_bvecs[j].bv_page; | 1286 | bvec->bv_page = r1_bio->behind_bvecs[j].bv_page; |
1301 | if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) | 1287 | if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) |
1302 | atomic_inc(&r1_bio->behind_remaining); | 1288 | atomic_inc(&r1_bio->behind_remaining); |
@@ -1334,14 +1320,14 @@ read_again: | |||
1334 | /* Mustn't call r1_bio_write_done before this next test, | 1320 | /* Mustn't call r1_bio_write_done before this next test, |
1335 | * as it could result in the bio being freed. | 1321 | * as it could result in the bio being freed. |
1336 | */ | 1322 | */ |
1337 | if (sectors_handled < (bio->bi_size >> 9)) { | 1323 | if (sectors_handled < bio_sectors(bio)) { |
1338 | r1_bio_write_done(r1_bio); | 1324 | r1_bio_write_done(r1_bio); |
1339 | /* We need another r1_bio. It has already been counted | 1325 | /* We need another r1_bio. It has already been counted |
1340 | * in bio->bi_phys_segments | 1326 | * in bio->bi_phys_segments |
1341 | */ | 1327 | */ |
1342 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); | 1328 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); |
1343 | r1_bio->master_bio = bio; | 1329 | r1_bio->master_bio = bio; |
1344 | r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled; | 1330 | r1_bio->sectors = bio_sectors(bio) - sectors_handled; |
1345 | r1_bio->state = 0; | 1331 | r1_bio->state = 0; |
1346 | r1_bio->mddev = mddev; | 1332 | r1_bio->mddev = mddev; |
1347 | r1_bio->sector = bio->bi_sector + sectors_handled; | 1333 | r1_bio->sector = bio->bi_sector + sectors_handled; |
@@ -1867,7 +1853,7 @@ static int process_checks(struct r1bio *r1_bio) | |||
1867 | struct bio *sbio = r1_bio->bios[i]; | 1853 | struct bio *sbio = r1_bio->bios[i]; |
1868 | int size; | 1854 | int size; |
1869 | 1855 | ||
1870 | if (r1_bio->bios[i]->bi_end_io != end_sync_read) | 1856 | if (sbio->bi_end_io != end_sync_read) |
1871 | continue; | 1857 | continue; |
1872 | 1858 | ||
1873 | if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) { | 1859 | if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) { |
@@ -1892,16 +1878,15 @@ static int process_checks(struct r1bio *r1_bio) | |||
1892 | continue; | 1878 | continue; |
1893 | } | 1879 | } |
1894 | /* fixup the bio for reuse */ | 1880 | /* fixup the bio for reuse */ |
1881 | bio_reset(sbio); | ||
1895 | sbio->bi_vcnt = vcnt; | 1882 | sbio->bi_vcnt = vcnt; |
1896 | sbio->bi_size = r1_bio->sectors << 9; | 1883 | sbio->bi_size = r1_bio->sectors << 9; |
1897 | sbio->bi_idx = 0; | ||
1898 | sbio->bi_phys_segments = 0; | ||
1899 | sbio->bi_flags &= ~(BIO_POOL_MASK - 1); | ||
1900 | sbio->bi_flags |= 1 << BIO_UPTODATE; | ||
1901 | sbio->bi_next = NULL; | ||
1902 | sbio->bi_sector = r1_bio->sector + | 1884 | sbio->bi_sector = r1_bio->sector + |
1903 | conf->mirrors[i].rdev->data_offset; | 1885 | conf->mirrors[i].rdev->data_offset; |
1904 | sbio->bi_bdev = conf->mirrors[i].rdev->bdev; | 1886 | sbio->bi_bdev = conf->mirrors[i].rdev->bdev; |
1887 | sbio->bi_end_io = end_sync_read; | ||
1888 | sbio->bi_private = r1_bio; | ||
1889 | |||
1905 | size = sbio->bi_size; | 1890 | size = sbio->bi_size; |
1906 | for (j = 0; j < vcnt ; j++) { | 1891 | for (j = 0; j < vcnt ; j++) { |
1907 | struct bio_vec *bi; | 1892 | struct bio_vec *bi; |
@@ -1912,10 +1897,9 @@ static int process_checks(struct r1bio *r1_bio) | |||
1912 | else | 1897 | else |
1913 | bi->bv_len = size; | 1898 | bi->bv_len = size; |
1914 | size -= PAGE_SIZE; | 1899 | size -= PAGE_SIZE; |
1915 | memcpy(page_address(bi->bv_page), | ||
1916 | page_address(pbio->bi_io_vec[j].bv_page), | ||
1917 | PAGE_SIZE); | ||
1918 | } | 1900 | } |
1901 | |||
1902 | bio_copy_data(sbio, pbio); | ||
1919 | } | 1903 | } |
1920 | return 0; | 1904 | return 0; |
1921 | } | 1905 | } |
@@ -1952,7 +1936,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) | |||
1952 | wbio->bi_rw = WRITE; | 1936 | wbio->bi_rw = WRITE; |
1953 | wbio->bi_end_io = end_sync_write; | 1937 | wbio->bi_end_io = end_sync_write; |
1954 | atomic_inc(&r1_bio->remaining); | 1938 | atomic_inc(&r1_bio->remaining); |
1955 | md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); | 1939 | md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio)); |
1956 | 1940 | ||
1957 | generic_make_request(wbio); | 1941 | generic_make_request(wbio); |
1958 | } | 1942 | } |
@@ -2064,32 +2048,11 @@ static void fix_read_error(struct r1conf *conf, int read_disk, | |||
2064 | } | 2048 | } |
2065 | } | 2049 | } |
2066 | 2050 | ||
2067 | static void bi_complete(struct bio *bio, int error) | ||
2068 | { | ||
2069 | complete((struct completion *)bio->bi_private); | ||
2070 | } | ||
2071 | |||
2072 | static int submit_bio_wait(int rw, struct bio *bio) | ||
2073 | { | ||
2074 | struct completion event; | ||
2075 | rw |= REQ_SYNC; | ||
2076 | |||
2077 | init_completion(&event); | ||
2078 | bio->bi_private = &event; | ||
2079 | bio->bi_end_io = bi_complete; | ||
2080 | submit_bio(rw, bio); | ||
2081 | wait_for_completion(&event); | ||
2082 | |||
2083 | return test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
2084 | } | ||
2085 | |||
2086 | static int narrow_write_error(struct r1bio *r1_bio, int i) | 2051 | static int narrow_write_error(struct r1bio *r1_bio, int i) |
2087 | { | 2052 | { |
2088 | struct mddev *mddev = r1_bio->mddev; | 2053 | struct mddev *mddev = r1_bio->mddev; |
2089 | struct r1conf *conf = mddev->private; | 2054 | struct r1conf *conf = mddev->private; |
2090 | struct md_rdev *rdev = conf->mirrors[i].rdev; | 2055 | struct md_rdev *rdev = conf->mirrors[i].rdev; |
2091 | int vcnt, idx; | ||
2092 | struct bio_vec *vec; | ||
2093 | 2056 | ||
2094 | /* bio has the data to be written to device 'i' where | 2057 | /* bio has the data to be written to device 'i' where |
2095 | * we just recently had a write error. | 2058 | * we just recently had a write error. |
@@ -2117,30 +2080,32 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) | |||
2117 | & ~(sector_t)(block_sectors - 1)) | 2080 | & ~(sector_t)(block_sectors - 1)) |
2118 | - sector; | 2081 | - sector; |
2119 | 2082 | ||
2120 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { | ||
2121 | vcnt = r1_bio->behind_page_count; | ||
2122 | vec = r1_bio->behind_bvecs; | ||
2123 | idx = 0; | ||
2124 | while (vec[idx].bv_page == NULL) | ||
2125 | idx++; | ||
2126 | } else { | ||
2127 | vcnt = r1_bio->master_bio->bi_vcnt; | ||
2128 | vec = r1_bio->master_bio->bi_io_vec; | ||
2129 | idx = r1_bio->master_bio->bi_idx; | ||
2130 | } | ||
2131 | while (sect_to_write) { | 2083 | while (sect_to_write) { |
2132 | struct bio *wbio; | 2084 | struct bio *wbio; |
2133 | if (sectors > sect_to_write) | 2085 | if (sectors > sect_to_write) |
2134 | sectors = sect_to_write; | 2086 | sectors = sect_to_write; |
2135 | /* Write at 'sector' for 'sectors'*/ | 2087 | /* Write at 'sector' for 'sectors'*/ |
2136 | 2088 | ||
2137 | wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev); | 2089 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { |
2138 | memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec)); | 2090 | unsigned vcnt = r1_bio->behind_page_count; |
2139 | wbio->bi_sector = r1_bio->sector; | 2091 | struct bio_vec *vec = r1_bio->behind_bvecs; |
2092 | |||
2093 | while (!vec->bv_page) { | ||
2094 | vec++; | ||
2095 | vcnt--; | ||
2096 | } | ||
2097 | |||
2098 | wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev); | ||
2099 | memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec)); | ||
2100 | |||
2101 | wbio->bi_vcnt = vcnt; | ||
2102 | } else { | ||
2103 | wbio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev); | ||
2104 | } | ||
2105 | |||
2140 | wbio->bi_rw = WRITE; | 2106 | wbio->bi_rw = WRITE; |
2141 | wbio->bi_vcnt = vcnt; | 2107 | wbio->bi_sector = r1_bio->sector; |
2142 | wbio->bi_size = r1_bio->sectors << 9; | 2108 | wbio->bi_size = r1_bio->sectors << 9; |
2143 | wbio->bi_idx = idx; | ||
2144 | 2109 | ||
2145 | md_trim_bio(wbio, sector - r1_bio->sector, sectors); | 2110 | md_trim_bio(wbio, sector - r1_bio->sector, sectors); |
2146 | wbio->bi_sector += rdev->data_offset; | 2111 | wbio->bi_sector += rdev->data_offset; |
@@ -2289,8 +2254,7 @@ read_more: | |||
2289 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); | 2254 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); |
2290 | 2255 | ||
2291 | r1_bio->master_bio = mbio; | 2256 | r1_bio->master_bio = mbio; |
2292 | r1_bio->sectors = (mbio->bi_size >> 9) | 2257 | r1_bio->sectors = bio_sectors(mbio) - sectors_handled; |
2293 | - sectors_handled; | ||
2294 | r1_bio->state = 0; | 2258 | r1_bio->state = 0; |
2295 | set_bit(R1BIO_ReadError, &r1_bio->state); | 2259 | set_bit(R1BIO_ReadError, &r1_bio->state); |
2296 | r1_bio->mddev = mddev; | 2260 | r1_bio->mddev = mddev; |
@@ -2464,18 +2428,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp | |||
2464 | for (i = 0; i < conf->raid_disks * 2; i++) { | 2428 | for (i = 0; i < conf->raid_disks * 2; i++) { |
2465 | struct md_rdev *rdev; | 2429 | struct md_rdev *rdev; |
2466 | bio = r1_bio->bios[i]; | 2430 | bio = r1_bio->bios[i]; |
2467 | 2431 | bio_reset(bio); | |
2468 | /* take from bio_init */ | ||
2469 | bio->bi_next = NULL; | ||
2470 | bio->bi_flags &= ~(BIO_POOL_MASK-1); | ||
2471 | bio->bi_flags |= 1 << BIO_UPTODATE; | ||
2472 | bio->bi_rw = READ; | ||
2473 | bio->bi_vcnt = 0; | ||
2474 | bio->bi_idx = 0; | ||
2475 | bio->bi_phys_segments = 0; | ||
2476 | bio->bi_size = 0; | ||
2477 | bio->bi_end_io = NULL; | ||
2478 | bio->bi_private = NULL; | ||
2479 | 2432 | ||
2480 | rdev = rcu_dereference(conf->mirrors[i].rdev); | 2433 | rdev = rcu_dereference(conf->mirrors[i].rdev); |
2481 | if (rdev == NULL || | 2434 | if (rdev == NULL || |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 018741ba9310..59d4daa5f4c7 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1174,14 +1174,13 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1174 | /* If this request crosses a chunk boundary, we need to | 1174 | /* If this request crosses a chunk boundary, we need to |
1175 | * split it. This will only happen for 1 PAGE (or less) requests. | 1175 | * split it. This will only happen for 1 PAGE (or less) requests. |
1176 | */ | 1176 | */ |
1177 | if (unlikely((bio->bi_sector & chunk_mask) + (bio->bi_size >> 9) | 1177 | if (unlikely((bio->bi_sector & chunk_mask) + bio_sectors(bio) |
1178 | > chunk_sects | 1178 | > chunk_sects |
1179 | && (conf->geo.near_copies < conf->geo.raid_disks | 1179 | && (conf->geo.near_copies < conf->geo.raid_disks |
1180 | || conf->prev.near_copies < conf->prev.raid_disks))) { | 1180 | || conf->prev.near_copies < conf->prev.raid_disks))) { |
1181 | struct bio_pair *bp; | 1181 | struct bio_pair *bp; |
1182 | /* Sanity check -- queue functions should prevent this happening */ | 1182 | /* Sanity check -- queue functions should prevent this happening */ |
1183 | if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) || | 1183 | if (bio_segments(bio) > 1) |
1184 | bio->bi_idx != 0) | ||
1185 | goto bad_map; | 1184 | goto bad_map; |
1186 | /* This is a one page bio that upper layers | 1185 | /* This is a one page bio that upper layers |
1187 | * refuse to split for us, so we need to split it. | 1186 | * refuse to split for us, so we need to split it. |
@@ -1214,7 +1213,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1214 | bad_map: | 1213 | bad_map: |
1215 | printk("md/raid10:%s: make_request bug: can't convert block across chunks" | 1214 | printk("md/raid10:%s: make_request bug: can't convert block across chunks" |
1216 | " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2, | 1215 | " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2, |
1217 | (unsigned long long)bio->bi_sector, bio->bi_size >> 10); | 1216 | (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2); |
1218 | 1217 | ||
1219 | bio_io_error(bio); | 1218 | bio_io_error(bio); |
1220 | return; | 1219 | return; |
@@ -1229,7 +1228,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1229 | */ | 1228 | */ |
1230 | wait_barrier(conf); | 1229 | wait_barrier(conf); |
1231 | 1230 | ||
1232 | sectors = bio->bi_size >> 9; | 1231 | sectors = bio_sectors(bio); |
1233 | while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | 1232 | while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && |
1234 | bio->bi_sector < conf->reshape_progress && | 1233 | bio->bi_sector < conf->reshape_progress && |
1235 | bio->bi_sector + sectors > conf->reshape_progress) { | 1234 | bio->bi_sector + sectors > conf->reshape_progress) { |
@@ -1331,8 +1330,7 @@ read_again: | |||
1331 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | 1330 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); |
1332 | 1331 | ||
1333 | r10_bio->master_bio = bio; | 1332 | r10_bio->master_bio = bio; |
1334 | r10_bio->sectors = ((bio->bi_size >> 9) | 1333 | r10_bio->sectors = bio_sectors(bio) - sectors_handled; |
1335 | - sectors_handled); | ||
1336 | r10_bio->state = 0; | 1334 | r10_bio->state = 0; |
1337 | r10_bio->mddev = mddev; | 1335 | r10_bio->mddev = mddev; |
1338 | r10_bio->sector = bio->bi_sector + sectors_handled; | 1336 | r10_bio->sector = bio->bi_sector + sectors_handled; |
@@ -1574,7 +1572,7 @@ retry_write: | |||
1574 | * after checking if we need to go around again. | 1572 | * after checking if we need to go around again. |
1575 | */ | 1573 | */ |
1576 | 1574 | ||
1577 | if (sectors_handled < (bio->bi_size >> 9)) { | 1575 | if (sectors_handled < bio_sectors(bio)) { |
1578 | one_write_done(r10_bio); | 1576 | one_write_done(r10_bio); |
1579 | /* We need another r10_bio. It has already been counted | 1577 | /* We need another r10_bio. It has already been counted |
1580 | * in bio->bi_phys_segments. | 1578 | * in bio->bi_phys_segments. |
@@ -1582,7 +1580,7 @@ retry_write: | |||
1582 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | 1580 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); |
1583 | 1581 | ||
1584 | r10_bio->master_bio = bio; | 1582 | r10_bio->master_bio = bio; |
1585 | r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled; | 1583 | r10_bio->sectors = bio_sectors(bio) - sectors_handled; |
1586 | 1584 | ||
1587 | r10_bio->mddev = mddev; | 1585 | r10_bio->mddev = mddev; |
1588 | r10_bio->sector = bio->bi_sector + sectors_handled; | 1586 | r10_bio->sector = bio->bi_sector + sectors_handled; |
@@ -2084,13 +2082,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) | |||
2084 | * First we need to fixup bv_offset, bv_len and | 2082 | * First we need to fixup bv_offset, bv_len and |
2085 | * bi_vecs, as the read request might have corrupted these | 2083 | * bi_vecs, as the read request might have corrupted these |
2086 | */ | 2084 | */ |
2085 | bio_reset(tbio); | ||
2086 | |||
2087 | tbio->bi_vcnt = vcnt; | 2087 | tbio->bi_vcnt = vcnt; |
2088 | tbio->bi_size = r10_bio->sectors << 9; | 2088 | tbio->bi_size = r10_bio->sectors << 9; |
2089 | tbio->bi_idx = 0; | ||
2090 | tbio->bi_phys_segments = 0; | ||
2091 | tbio->bi_flags &= ~(BIO_POOL_MASK - 1); | ||
2092 | tbio->bi_flags |= 1 << BIO_UPTODATE; | ||
2093 | tbio->bi_next = NULL; | ||
2094 | tbio->bi_rw = WRITE; | 2089 | tbio->bi_rw = WRITE; |
2095 | tbio->bi_private = r10_bio; | 2090 | tbio->bi_private = r10_bio; |
2096 | tbio->bi_sector = r10_bio->devs[i].addr; | 2091 | tbio->bi_sector = r10_bio->devs[i].addr; |
@@ -2108,7 +2103,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) | |||
2108 | d = r10_bio->devs[i].devnum; | 2103 | d = r10_bio->devs[i].devnum; |
2109 | atomic_inc(&conf->mirrors[d].rdev->nr_pending); | 2104 | atomic_inc(&conf->mirrors[d].rdev->nr_pending); |
2110 | atomic_inc(&r10_bio->remaining); | 2105 | atomic_inc(&r10_bio->remaining); |
2111 | md_sync_acct(conf->mirrors[d].rdev->bdev, tbio->bi_size >> 9); | 2106 | md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio)); |
2112 | 2107 | ||
2113 | tbio->bi_sector += conf->mirrors[d].rdev->data_offset; | 2108 | tbio->bi_sector += conf->mirrors[d].rdev->data_offset; |
2114 | tbio->bi_bdev = conf->mirrors[d].rdev->bdev; | 2109 | tbio->bi_bdev = conf->mirrors[d].rdev->bdev; |
@@ -2133,7 +2128,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) | |||
2133 | d = r10_bio->devs[i].devnum; | 2128 | d = r10_bio->devs[i].devnum; |
2134 | atomic_inc(&r10_bio->remaining); | 2129 | atomic_inc(&r10_bio->remaining); |
2135 | md_sync_acct(conf->mirrors[d].replacement->bdev, | 2130 | md_sync_acct(conf->mirrors[d].replacement->bdev, |
2136 | tbio->bi_size >> 9); | 2131 | bio_sectors(tbio)); |
2137 | generic_make_request(tbio); | 2132 | generic_make_request(tbio); |
2138 | } | 2133 | } |
2139 | 2134 | ||
@@ -2259,13 +2254,13 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) | |||
2259 | wbio2 = r10_bio->devs[1].repl_bio; | 2254 | wbio2 = r10_bio->devs[1].repl_bio; |
2260 | if (wbio->bi_end_io) { | 2255 | if (wbio->bi_end_io) { |
2261 | atomic_inc(&conf->mirrors[d].rdev->nr_pending); | 2256 | atomic_inc(&conf->mirrors[d].rdev->nr_pending); |
2262 | md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9); | 2257 | md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio)); |
2263 | generic_make_request(wbio); | 2258 | generic_make_request(wbio); |
2264 | } | 2259 | } |
2265 | if (wbio2 && wbio2->bi_end_io) { | 2260 | if (wbio2 && wbio2->bi_end_io) { |
2266 | atomic_inc(&conf->mirrors[d].replacement->nr_pending); | 2261 | atomic_inc(&conf->mirrors[d].replacement->nr_pending); |
2267 | md_sync_acct(conf->mirrors[d].replacement->bdev, | 2262 | md_sync_acct(conf->mirrors[d].replacement->bdev, |
2268 | wbio2->bi_size >> 9); | 2263 | bio_sectors(wbio2)); |
2269 | generic_make_request(wbio2); | 2264 | generic_make_request(wbio2); |
2270 | } | 2265 | } |
2271 | } | 2266 | } |
@@ -2536,25 +2531,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 | |||
2536 | } | 2531 | } |
2537 | } | 2532 | } |
2538 | 2533 | ||
2539 | static void bi_complete(struct bio *bio, int error) | ||
2540 | { | ||
2541 | complete((struct completion *)bio->bi_private); | ||
2542 | } | ||
2543 | |||
2544 | static int submit_bio_wait(int rw, struct bio *bio) | ||
2545 | { | ||
2546 | struct completion event; | ||
2547 | rw |= REQ_SYNC; | ||
2548 | |||
2549 | init_completion(&event); | ||
2550 | bio->bi_private = &event; | ||
2551 | bio->bi_end_io = bi_complete; | ||
2552 | submit_bio(rw, bio); | ||
2553 | wait_for_completion(&event); | ||
2554 | |||
2555 | return test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
2556 | } | ||
2557 | |||
2558 | static int narrow_write_error(struct r10bio *r10_bio, int i) | 2534 | static int narrow_write_error(struct r10bio *r10_bio, int i) |
2559 | { | 2535 | { |
2560 | struct bio *bio = r10_bio->master_bio; | 2536 | struct bio *bio = r10_bio->master_bio; |
@@ -2695,8 +2671,7 @@ read_more: | |||
2695 | r10_bio = mempool_alloc(conf->r10bio_pool, | 2671 | r10_bio = mempool_alloc(conf->r10bio_pool, |
2696 | GFP_NOIO); | 2672 | GFP_NOIO); |
2697 | r10_bio->master_bio = mbio; | 2673 | r10_bio->master_bio = mbio; |
2698 | r10_bio->sectors = (mbio->bi_size >> 9) | 2674 | r10_bio->sectors = bio_sectors(mbio) - sectors_handled; |
2699 | - sectors_handled; | ||
2700 | r10_bio->state = 0; | 2675 | r10_bio->state = 0; |
2701 | set_bit(R10BIO_ReadError, | 2676 | set_bit(R10BIO_ReadError, |
2702 | &r10_bio->state); | 2677 | &r10_bio->state); |
@@ -3133,6 +3108,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
3133 | } | 3108 | } |
3134 | } | 3109 | } |
3135 | bio = r10_bio->devs[0].bio; | 3110 | bio = r10_bio->devs[0].bio; |
3111 | bio_reset(bio); | ||
3136 | bio->bi_next = biolist; | 3112 | bio->bi_next = biolist; |
3137 | biolist = bio; | 3113 | biolist = bio; |
3138 | bio->bi_private = r10_bio; | 3114 | bio->bi_private = r10_bio; |
@@ -3157,6 +3133,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
3157 | rdev = mirror->rdev; | 3133 | rdev = mirror->rdev; |
3158 | if (!test_bit(In_sync, &rdev->flags)) { | 3134 | if (!test_bit(In_sync, &rdev->flags)) { |
3159 | bio = r10_bio->devs[1].bio; | 3135 | bio = r10_bio->devs[1].bio; |
3136 | bio_reset(bio); | ||
3160 | bio->bi_next = biolist; | 3137 | bio->bi_next = biolist; |
3161 | biolist = bio; | 3138 | biolist = bio; |
3162 | bio->bi_private = r10_bio; | 3139 | bio->bi_private = r10_bio; |
@@ -3185,6 +3162,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
3185 | if (rdev == NULL || bio == NULL || | 3162 | if (rdev == NULL || bio == NULL || |
3186 | test_bit(Faulty, &rdev->flags)) | 3163 | test_bit(Faulty, &rdev->flags)) |
3187 | break; | 3164 | break; |
3165 | bio_reset(bio); | ||
3188 | bio->bi_next = biolist; | 3166 | bio->bi_next = biolist; |
3189 | biolist = bio; | 3167 | biolist = bio; |
3190 | bio->bi_private = r10_bio; | 3168 | bio->bi_private = r10_bio; |
@@ -3283,7 +3261,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
3283 | r10_bio->devs[i].repl_bio->bi_end_io = NULL; | 3261 | r10_bio->devs[i].repl_bio->bi_end_io = NULL; |
3284 | 3262 | ||
3285 | bio = r10_bio->devs[i].bio; | 3263 | bio = r10_bio->devs[i].bio; |
3286 | bio->bi_end_io = NULL; | 3264 | bio_reset(bio); |
3287 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | 3265 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
3288 | if (conf->mirrors[d].rdev == NULL || | 3266 | if (conf->mirrors[d].rdev == NULL || |
3289 | test_bit(Faulty, &conf->mirrors[d].rdev->flags)) | 3267 | test_bit(Faulty, &conf->mirrors[d].rdev->flags)) |
@@ -3320,6 +3298,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
3320 | 3298 | ||
3321 | /* Need to set up for writing to the replacement */ | 3299 | /* Need to set up for writing to the replacement */ |
3322 | bio = r10_bio->devs[i].repl_bio; | 3300 | bio = r10_bio->devs[i].repl_bio; |
3301 | bio_reset(bio); | ||
3323 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | 3302 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
3324 | 3303 | ||
3325 | sector = r10_bio->devs[i].addr; | 3304 | sector = r10_bio->devs[i].addr; |
@@ -3353,17 +3332,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
3353 | } | 3332 | } |
3354 | } | 3333 | } |
3355 | 3334 | ||
3356 | for (bio = biolist; bio ; bio=bio->bi_next) { | ||
3357 | |||
3358 | bio->bi_flags &= ~(BIO_POOL_MASK - 1); | ||
3359 | if (bio->bi_end_io) | ||
3360 | bio->bi_flags |= 1 << BIO_UPTODATE; | ||
3361 | bio->bi_vcnt = 0; | ||
3362 | bio->bi_idx = 0; | ||
3363 | bio->bi_phys_segments = 0; | ||
3364 | bio->bi_size = 0; | ||
3365 | } | ||
3366 | |||
3367 | nr_sectors = 0; | 3335 | nr_sectors = 0; |
3368 | if (sector_nr + max_sync < max_sector) | 3336 | if (sector_nr + max_sync < max_sector) |
3369 | max_sector = sector_nr + max_sync; | 3337 | max_sector = sector_nr + max_sync; |
@@ -4411,7 +4379,6 @@ read_more: | |||
4411 | read_bio->bi_flags &= ~(BIO_POOL_MASK - 1); | 4379 | read_bio->bi_flags &= ~(BIO_POOL_MASK - 1); |
4412 | read_bio->bi_flags |= 1 << BIO_UPTODATE; | 4380 | read_bio->bi_flags |= 1 << BIO_UPTODATE; |
4413 | read_bio->bi_vcnt = 0; | 4381 | read_bio->bi_vcnt = 0; |
4414 | read_bio->bi_idx = 0; | ||
4415 | read_bio->bi_size = 0; | 4382 | read_bio->bi_size = 0; |
4416 | r10_bio->master_bio = read_bio; | 4383 | r10_bio->master_bio = read_bio; |
4417 | r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum; | 4384 | r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum; |
@@ -4435,17 +4402,14 @@ read_more: | |||
4435 | } | 4402 | } |
4436 | if (!rdev2 || test_bit(Faulty, &rdev2->flags)) | 4403 | if (!rdev2 || test_bit(Faulty, &rdev2->flags)) |
4437 | continue; | 4404 | continue; |
4405 | |||
4406 | bio_reset(b); | ||
4438 | b->bi_bdev = rdev2->bdev; | 4407 | b->bi_bdev = rdev2->bdev; |
4439 | b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset; | 4408 | b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset; |
4440 | b->bi_private = r10_bio; | 4409 | b->bi_private = r10_bio; |
4441 | b->bi_end_io = end_reshape_write; | 4410 | b->bi_end_io = end_reshape_write; |
4442 | b->bi_rw = WRITE; | 4411 | b->bi_rw = WRITE; |
4443 | b->bi_flags &= ~(BIO_POOL_MASK - 1); | ||
4444 | b->bi_flags |= 1 << BIO_UPTODATE; | ||
4445 | b->bi_next = blist; | 4412 | b->bi_next = blist; |
4446 | b->bi_vcnt = 0; | ||
4447 | b->bi_idx = 0; | ||
4448 | b->bi_size = 0; | ||
4449 | blist = b; | 4413 | blist = b; |
4450 | } | 4414 | } |
4451 | 4415 | ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4a7be455d6d8..9359828ffe26 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -90,7 +90,7 @@ static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect) | |||
90 | */ | 90 | */ |
91 | static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector) | 91 | static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector) |
92 | { | 92 | { |
93 | int sectors = bio->bi_size >> 9; | 93 | int sectors = bio_sectors(bio); |
94 | if (bio->bi_sector + sectors < sector + STRIPE_SECTORS) | 94 | if (bio->bi_sector + sectors < sector + STRIPE_SECTORS) |
95 | return bio->bi_next; | 95 | return bio->bi_next; |
96 | else | 96 | else |
@@ -569,14 +569,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
569 | bi = &sh->dev[i].req; | 569 | bi = &sh->dev[i].req; |
570 | rbi = &sh->dev[i].rreq; /* For writing to replacement */ | 570 | rbi = &sh->dev[i].rreq; /* For writing to replacement */ |
571 | 571 | ||
572 | bi->bi_rw = rw; | ||
573 | rbi->bi_rw = rw; | ||
574 | if (rw & WRITE) { | ||
575 | bi->bi_end_io = raid5_end_write_request; | ||
576 | rbi->bi_end_io = raid5_end_write_request; | ||
577 | } else | ||
578 | bi->bi_end_io = raid5_end_read_request; | ||
579 | |||
580 | rcu_read_lock(); | 572 | rcu_read_lock(); |
581 | rrdev = rcu_dereference(conf->disks[i].replacement); | 573 | rrdev = rcu_dereference(conf->disks[i].replacement); |
582 | smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */ | 574 | smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */ |
@@ -651,7 +643,14 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
651 | 643 | ||
652 | set_bit(STRIPE_IO_STARTED, &sh->state); | 644 | set_bit(STRIPE_IO_STARTED, &sh->state); |
653 | 645 | ||
646 | bio_reset(bi); | ||
654 | bi->bi_bdev = rdev->bdev; | 647 | bi->bi_bdev = rdev->bdev; |
648 | bi->bi_rw = rw; | ||
649 | bi->bi_end_io = (rw & WRITE) | ||
650 | ? raid5_end_write_request | ||
651 | : raid5_end_read_request; | ||
652 | bi->bi_private = sh; | ||
653 | |||
655 | pr_debug("%s: for %llu schedule op %ld on disc %d\n", | 654 | pr_debug("%s: for %llu schedule op %ld on disc %d\n", |
656 | __func__, (unsigned long long)sh->sector, | 655 | __func__, (unsigned long long)sh->sector, |
657 | bi->bi_rw, i); | 656 | bi->bi_rw, i); |
@@ -665,12 +664,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
665 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) | 664 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) |
666 | bi->bi_rw |= REQ_FLUSH; | 665 | bi->bi_rw |= REQ_FLUSH; |
667 | 666 | ||
668 | bi->bi_flags = 1 << BIO_UPTODATE; | ||
669 | bi->bi_idx = 0; | ||
670 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 667 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
671 | bi->bi_io_vec[0].bv_offset = 0; | 668 | bi->bi_io_vec[0].bv_offset = 0; |
672 | bi->bi_size = STRIPE_SIZE; | 669 | bi->bi_size = STRIPE_SIZE; |
673 | bi->bi_next = NULL; | ||
674 | if (rrdev) | 670 | if (rrdev) |
675 | set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); | 671 | set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); |
676 | 672 | ||
@@ -687,7 +683,13 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
687 | 683 | ||
688 | set_bit(STRIPE_IO_STARTED, &sh->state); | 684 | set_bit(STRIPE_IO_STARTED, &sh->state); |
689 | 685 | ||
686 | bio_reset(rbi); | ||
690 | rbi->bi_bdev = rrdev->bdev; | 687 | rbi->bi_bdev = rrdev->bdev; |
688 | rbi->bi_rw = rw; | ||
689 | BUG_ON(!(rw & WRITE)); | ||
690 | rbi->bi_end_io = raid5_end_write_request; | ||
691 | rbi->bi_private = sh; | ||
692 | |||
691 | pr_debug("%s: for %llu schedule op %ld on " | 693 | pr_debug("%s: for %llu schedule op %ld on " |
692 | "replacement disc %d\n", | 694 | "replacement disc %d\n", |
693 | __func__, (unsigned long long)sh->sector, | 695 | __func__, (unsigned long long)sh->sector, |
@@ -699,12 +701,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
699 | else | 701 | else |
700 | rbi->bi_sector = (sh->sector | 702 | rbi->bi_sector = (sh->sector |
701 | + rrdev->data_offset); | 703 | + rrdev->data_offset); |
702 | rbi->bi_flags = 1 << BIO_UPTODATE; | ||
703 | rbi->bi_idx = 0; | ||
704 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 704 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
705 | rbi->bi_io_vec[0].bv_offset = 0; | 705 | rbi->bi_io_vec[0].bv_offset = 0; |
706 | rbi->bi_size = STRIPE_SIZE; | 706 | rbi->bi_size = STRIPE_SIZE; |
707 | rbi->bi_next = NULL; | ||
708 | if (conf->mddev->gendisk) | 707 | if (conf->mddev->gendisk) |
709 | trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), | 708 | trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), |
710 | rbi, disk_devt(conf->mddev->gendisk), | 709 | rbi, disk_devt(conf->mddev->gendisk), |
@@ -2402,11 +2401,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2402 | } else | 2401 | } else |
2403 | bip = &sh->dev[dd_idx].toread; | 2402 | bip = &sh->dev[dd_idx].toread; |
2404 | while (*bip && (*bip)->bi_sector < bi->bi_sector) { | 2403 | while (*bip && (*bip)->bi_sector < bi->bi_sector) { |
2405 | if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector) | 2404 | if (bio_end_sector(*bip) > bi->bi_sector) |
2406 | goto overlap; | 2405 | goto overlap; |
2407 | bip = & (*bip)->bi_next; | 2406 | bip = & (*bip)->bi_next; |
2408 | } | 2407 | } |
2409 | if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9)) | 2408 | if (*bip && (*bip)->bi_sector < bio_end_sector(bi)) |
2410 | goto overlap; | 2409 | goto overlap; |
2411 | 2410 | ||
2412 | BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next); | 2411 | BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next); |
@@ -2422,8 +2421,8 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2422 | sector < sh->dev[dd_idx].sector + STRIPE_SECTORS && | 2421 | sector < sh->dev[dd_idx].sector + STRIPE_SECTORS && |
2423 | bi && bi->bi_sector <= sector; | 2422 | bi && bi->bi_sector <= sector; |
2424 | bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) { | 2423 | bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) { |
2425 | if (bi->bi_sector + (bi->bi_size>>9) >= sector) | 2424 | if (bio_end_sector(bi) >= sector) |
2426 | sector = bi->bi_sector + (bi->bi_size>>9); | 2425 | sector = bio_end_sector(bi); |
2427 | } | 2426 | } |
2428 | if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) | 2427 | if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) |
2429 | set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); | 2428 | set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); |
@@ -3849,7 +3848,7 @@ static int in_chunk_boundary(struct mddev *mddev, struct bio *bio) | |||
3849 | { | 3848 | { |
3850 | sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); | 3849 | sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); |
3851 | unsigned int chunk_sectors = mddev->chunk_sectors; | 3850 | unsigned int chunk_sectors = mddev->chunk_sectors; |
3852 | unsigned int bio_sectors = bio->bi_size >> 9; | 3851 | unsigned int bio_sectors = bio_sectors(bio); |
3853 | 3852 | ||
3854 | if (mddev->new_chunk_sectors < mddev->chunk_sectors) | 3853 | if (mddev->new_chunk_sectors < mddev->chunk_sectors) |
3855 | chunk_sectors = mddev->new_chunk_sectors; | 3854 | chunk_sectors = mddev->new_chunk_sectors; |
@@ -3941,7 +3940,7 @@ static int bio_fits_rdev(struct bio *bi) | |||
3941 | { | 3940 | { |
3942 | struct request_queue *q = bdev_get_queue(bi->bi_bdev); | 3941 | struct request_queue *q = bdev_get_queue(bi->bi_bdev); |
3943 | 3942 | ||
3944 | if ((bi->bi_size>>9) > queue_max_sectors(q)) | 3943 | if (bio_sectors(bi) > queue_max_sectors(q)) |
3945 | return 0; | 3944 | return 0; |
3946 | blk_recount_segments(q, bi); | 3945 | blk_recount_segments(q, bi); |
3947 | if (bi->bi_phys_segments > queue_max_segments(q)) | 3946 | if (bi->bi_phys_segments > queue_max_segments(q)) |
@@ -3988,7 +3987,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
3988 | 0, | 3987 | 0, |
3989 | &dd_idx, NULL); | 3988 | &dd_idx, NULL); |
3990 | 3989 | ||
3991 | end_sector = align_bi->bi_sector + (align_bi->bi_size >> 9); | 3990 | end_sector = bio_end_sector(align_bi); |
3992 | rcu_read_lock(); | 3991 | rcu_read_lock(); |
3993 | rdev = rcu_dereference(conf->disks[dd_idx].replacement); | 3992 | rdev = rcu_dereference(conf->disks[dd_idx].replacement); |
3994 | if (!rdev || test_bit(Faulty, &rdev->flags) || | 3993 | if (!rdev || test_bit(Faulty, &rdev->flags) || |
@@ -4011,7 +4010,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
4011 | align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); | 4010 | align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); |
4012 | 4011 | ||
4013 | if (!bio_fits_rdev(align_bi) || | 4012 | if (!bio_fits_rdev(align_bi) || |
4014 | is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, | 4013 | is_badblock(rdev, align_bi->bi_sector, bio_sectors(align_bi), |
4015 | &first_bad, &bad_sectors)) { | 4014 | &first_bad, &bad_sectors)) { |
4016 | /* too big in some way, or has a known bad block */ | 4015 | /* too big in some way, or has a known bad block */ |
4017 | bio_put(align_bi); | 4016 | bio_put(align_bi); |
@@ -4273,7 +4272,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4273 | } | 4272 | } |
4274 | 4273 | ||
4275 | logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); | 4274 | logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); |
4276 | last_sector = bi->bi_sector + (bi->bi_size>>9); | 4275 | last_sector = bio_end_sector(bi); |
4277 | bi->bi_next = NULL; | 4276 | bi->bi_next = NULL; |
4278 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ | 4277 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ |
4279 | 4278 | ||
@@ -4739,7 +4738,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4739 | logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1); | 4738 | logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1); |
4740 | sector = raid5_compute_sector(conf, logical_sector, | 4739 | sector = raid5_compute_sector(conf, logical_sector, |
4741 | 0, &dd_idx, NULL); | 4740 | 0, &dd_idx, NULL); |
4742 | last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9); | 4741 | last_sector = bio_end_sector(raid_bio); |
4743 | 4742 | ||
4744 | for (; logical_sector < last_sector; | 4743 | for (; logical_sector < last_sector; |
4745 | logical_sector += STRIPE_SECTORS, | 4744 | logical_sector += STRIPE_SECTORS, |
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index ffee6f781e30..dd239bdbfcb4 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c | |||
@@ -2235,10 +2235,10 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, | |||
2235 | } | 2235 | } |
2236 | 2236 | ||
2237 | /* do we need to support multiple segments? */ | 2237 | /* do we need to support multiple segments? */ |
2238 | if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) { | 2238 | if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) { |
2239 | printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u %u, rsp %u %u\n", | 2239 | printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u %u, rsp %u %u\n", |
2240 | ioc->name, __func__, req->bio->bi_vcnt, blk_rq_bytes(req), | 2240 | ioc->name, __func__, bio_segments(req->bio), blk_rq_bytes(req), |
2241 | rsp->bio->bi_vcnt, blk_rq_bytes(rsp)); | 2241 | bio_segments(rsp->bio), blk_rq_bytes(rsp)); |
2242 | return -EINVAL; | 2242 | return -EINVAL; |
2243 | } | 2243 | } |
2244 | 2244 | ||
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 07ba32b07fb0..6eca019bcf30 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c | |||
@@ -822,8 +822,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) | |||
822 | if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0) | 822 | if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0) |
823 | /* Request is not page-aligned. */ | 823 | /* Request is not page-aligned. */ |
824 | goto fail; | 824 | goto fail; |
825 | if (((bio->bi_size >> 9) + bio->bi_sector) | 825 | if (bio_end_sector(bio) > get_capacity(bio->bi_bdev->bd_disk)) { |
826 | > get_capacity(bio->bi_bdev->bd_disk)) { | ||
827 | /* Request beyond end of DCSS segment. */ | 826 | /* Request beyond end of DCSS segment. */ |
828 | goto fail; | 827 | goto fail; |
829 | } | 828 | } |
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 55cbd0180159..f42b0e15410f 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c | |||
@@ -2163,10 +2163,10 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, | |||
2163 | } | 2163 | } |
2164 | 2164 | ||
2165 | /* do we need to support multiple segments? */ | 2165 | /* do we need to support multiple segments? */ |
2166 | if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) { | 2166 | if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) { |
2167 | printk("%s: multiple segments req %u %u, rsp %u %u\n", | 2167 | printk("%s: multiple segments req %u %u, rsp %u %u\n", |
2168 | __func__, req->bio->bi_vcnt, blk_rq_bytes(req), | 2168 | __func__, bio_segments(req->bio), blk_rq_bytes(req), |
2169 | rsp->bio->bi_vcnt, blk_rq_bytes(rsp)); | 2169 | bio_segments(rsp->bio), blk_rq_bytes(rsp)); |
2170 | return -EINVAL; | 2170 | return -EINVAL; |
2171 | } | 2171 | } |
2172 | 2172 | ||
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c index 8c2ffbe6af0f..193e7ae90c3b 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_transport.c +++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c | |||
@@ -1939,7 +1939,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, | |||
1939 | ioc->transport_cmds.status = MPT2_CMD_PENDING; | 1939 | ioc->transport_cmds.status = MPT2_CMD_PENDING; |
1940 | 1940 | ||
1941 | /* Check if the request is split across multiple segments */ | 1941 | /* Check if the request is split across multiple segments */ |
1942 | if (req->bio->bi_vcnt > 1) { | 1942 | if (bio_segments(req->bio) > 1) { |
1943 | u32 offset = 0; | 1943 | u32 offset = 0; |
1944 | 1944 | ||
1945 | /* Allocate memory and copy the request */ | 1945 | /* Allocate memory and copy the request */ |
@@ -1971,7 +1971,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, | |||
1971 | 1971 | ||
1972 | /* Check if the response needs to be populated across | 1972 | /* Check if the response needs to be populated across |
1973 | * multiple segments */ | 1973 | * multiple segments */ |
1974 | if (rsp->bio->bi_vcnt > 1) { | 1974 | if (bio_segments(rsp->bio) > 1) { |
1975 | pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp), | 1975 | pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp), |
1976 | &pci_dma_in); | 1976 | &pci_dma_in); |
1977 | if (!pci_addr_in) { | 1977 | if (!pci_addr_in) { |
@@ -2038,7 +2038,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, | |||
2038 | sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT | | 2038 | sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT | |
2039 | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC); | 2039 | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC); |
2040 | sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; | 2040 | sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; |
2041 | if (req->bio->bi_vcnt > 1) { | 2041 | if (bio_segments(req->bio) > 1) { |
2042 | ioc->base_add_sg_single(psge, sgl_flags | | 2042 | ioc->base_add_sg_single(psge, sgl_flags | |
2043 | (blk_rq_bytes(req) - 4), pci_dma_out); | 2043 | (blk_rq_bytes(req) - 4), pci_dma_out); |
2044 | } else { | 2044 | } else { |
@@ -2054,7 +2054,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, | |||
2054 | MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | | 2054 | MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | |
2055 | MPI2_SGE_FLAGS_END_OF_LIST); | 2055 | MPI2_SGE_FLAGS_END_OF_LIST); |
2056 | sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; | 2056 | sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; |
2057 | if (rsp->bio->bi_vcnt > 1) { | 2057 | if (bio_segments(rsp->bio) > 1) { |
2058 | ioc->base_add_sg_single(psge, sgl_flags | | 2058 | ioc->base_add_sg_single(psge, sgl_flags | |
2059 | (blk_rq_bytes(rsp) + 4), pci_dma_in); | 2059 | (blk_rq_bytes(rsp) + 4), pci_dma_in); |
2060 | } else { | 2060 | } else { |
@@ -2099,7 +2099,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, | |||
2099 | le16_to_cpu(mpi_reply->ResponseDataLength); | 2099 | le16_to_cpu(mpi_reply->ResponseDataLength); |
2100 | /* check if the resp needs to be copied from the allocated | 2100 | /* check if the resp needs to be copied from the allocated |
2101 | * pci mem */ | 2101 | * pci mem */ |
2102 | if (rsp->bio->bi_vcnt > 1) { | 2102 | if (bio_segments(rsp->bio) > 1) { |
2103 | u32 offset = 0; | 2103 | u32 offset = 0; |
2104 | u32 bytes_to_copy = | 2104 | u32 bytes_to_copy = |
2105 | le16_to_cpu(mpi_reply->ResponseDataLength); | 2105 | le16_to_cpu(mpi_reply->ResponseDataLength); |
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index a3f28f331b2b..8fb42916d8a2 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
@@ -27,48 +27,11 @@ | |||
27 | #include <linux/workqueue.h> | 27 | #include <linux/workqueue.h> |
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | 29 | ||
30 | struct integrity_slab { | 30 | #define BIP_INLINE_VECS 4 |
31 | struct kmem_cache *slab; | ||
32 | unsigned short nr_vecs; | ||
33 | char name[8]; | ||
34 | }; | ||
35 | |||
36 | #define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) } | ||
37 | struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = { | ||
38 | IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES), | ||
39 | }; | ||
40 | #undef IS | ||
41 | 31 | ||
32 | static struct kmem_cache *bip_slab; | ||
42 | static struct workqueue_struct *kintegrityd_wq; | 33 | static struct workqueue_struct *kintegrityd_wq; |
43 | 34 | ||
44 | static inline unsigned int vecs_to_idx(unsigned int nr) | ||
45 | { | ||
46 | switch (nr) { | ||
47 | case 1: | ||
48 | return 0; | ||
49 | case 2 ... 4: | ||
50 | return 1; | ||
51 | case 5 ... 16: | ||
52 | return 2; | ||
53 | case 17 ... 64: | ||
54 | return 3; | ||
55 | case 65 ... 128: | ||
56 | return 4; | ||
57 | case 129 ... BIO_MAX_PAGES: | ||
58 | return 5; | ||
59 | default: | ||
60 | BUG(); | ||
61 | } | ||
62 | } | ||
63 | |||
64 | static inline int use_bip_pool(unsigned int idx) | ||
65 | { | ||
66 | if (idx == BIOVEC_MAX_IDX) | ||
67 | return 1; | ||
68 | |||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | /** | 35 | /** |
73 | * bio_integrity_alloc - Allocate integrity payload and attach it to bio | 36 | * bio_integrity_alloc - Allocate integrity payload and attach it to bio |
74 | * @bio: bio to attach integrity metadata to | 37 | * @bio: bio to attach integrity metadata to |
@@ -84,37 +47,41 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, | |||
84 | unsigned int nr_vecs) | 47 | unsigned int nr_vecs) |
85 | { | 48 | { |
86 | struct bio_integrity_payload *bip; | 49 | struct bio_integrity_payload *bip; |
87 | unsigned int idx = vecs_to_idx(nr_vecs); | ||
88 | struct bio_set *bs = bio->bi_pool; | 50 | struct bio_set *bs = bio->bi_pool; |
89 | 51 | unsigned long idx = BIO_POOL_NONE; | |
90 | if (!bs) | 52 | unsigned inline_vecs; |
91 | bs = fs_bio_set; | 53 | |
92 | 54 | if (!bs) { | |
93 | BUG_ON(bio == NULL); | 55 | bip = kmalloc(sizeof(struct bio_integrity_payload) + |
94 | bip = NULL; | 56 | sizeof(struct bio_vec) * nr_vecs, gfp_mask); |
95 | 57 | inline_vecs = nr_vecs; | |
96 | /* Lower order allocations come straight from slab */ | 58 | } else { |
97 | if (!use_bip_pool(idx)) | ||
98 | bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask); | ||
99 | |||
100 | /* Use mempool if lower order alloc failed or max vecs were requested */ | ||
101 | if (bip == NULL) { | ||
102 | idx = BIOVEC_MAX_IDX; /* so we free the payload properly later */ | ||
103 | bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); | 59 | bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); |
104 | 60 | inline_vecs = BIP_INLINE_VECS; | |
105 | if (unlikely(bip == NULL)) { | ||
106 | printk(KERN_ERR "%s: could not alloc bip\n", __func__); | ||
107 | return NULL; | ||
108 | } | ||
109 | } | 61 | } |
110 | 62 | ||
63 | if (unlikely(!bip)) | ||
64 | return NULL; | ||
65 | |||
111 | memset(bip, 0, sizeof(*bip)); | 66 | memset(bip, 0, sizeof(*bip)); |
112 | 67 | ||
68 | if (nr_vecs > inline_vecs) { | ||
69 | bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx, | ||
70 | bs->bvec_integrity_pool); | ||
71 | if (!bip->bip_vec) | ||
72 | goto err; | ||
73 | } else { | ||
74 | bip->bip_vec = bip->bip_inline_vecs; | ||
75 | } | ||
76 | |||
113 | bip->bip_slab = idx; | 77 | bip->bip_slab = idx; |
114 | bip->bip_bio = bio; | 78 | bip->bip_bio = bio; |
115 | bio->bi_integrity = bip; | 79 | bio->bi_integrity = bip; |
116 | 80 | ||
117 | return bip; | 81 | return bip; |
82 | err: | ||
83 | mempool_free(bip, bs->bio_integrity_pool); | ||
84 | return NULL; | ||
118 | } | 85 | } |
119 | EXPORT_SYMBOL(bio_integrity_alloc); | 86 | EXPORT_SYMBOL(bio_integrity_alloc); |
120 | 87 | ||
@@ -130,20 +97,18 @@ void bio_integrity_free(struct bio *bio) | |||
130 | struct bio_integrity_payload *bip = bio->bi_integrity; | 97 | struct bio_integrity_payload *bip = bio->bi_integrity; |
131 | struct bio_set *bs = bio->bi_pool; | 98 | struct bio_set *bs = bio->bi_pool; |
132 | 99 | ||
133 | if (!bs) | 100 | if (bip->bip_owns_buf) |
134 | bs = fs_bio_set; | ||
135 | |||
136 | BUG_ON(bip == NULL); | ||
137 | |||
138 | /* A cloned bio doesn't own the integrity metadata */ | ||
139 | if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY) | ||
140 | && bip->bip_buf != NULL) | ||
141 | kfree(bip->bip_buf); | 101 | kfree(bip->bip_buf); |
142 | 102 | ||
143 | if (use_bip_pool(bip->bip_slab)) | 103 | if (bs) { |
104 | if (bip->bip_slab != BIO_POOL_NONE) | ||
105 | bvec_free(bs->bvec_integrity_pool, bip->bip_vec, | ||
106 | bip->bip_slab); | ||
107 | |||
144 | mempool_free(bip, bs->bio_integrity_pool); | 108 | mempool_free(bip, bs->bio_integrity_pool); |
145 | else | 109 | } else { |
146 | kmem_cache_free(bip_slab[bip->bip_slab].slab, bip); | 110 | kfree(bip); |
111 | } | ||
147 | 112 | ||
148 | bio->bi_integrity = NULL; | 113 | bio->bi_integrity = NULL; |
149 | } | 114 | } |
@@ -419,6 +384,7 @@ int bio_integrity_prep(struct bio *bio) | |||
419 | return -EIO; | 384 | return -EIO; |
420 | } | 385 | } |
421 | 386 | ||
387 | bip->bip_owns_buf = 1; | ||
422 | bip->bip_buf = buf; | 388 | bip->bip_buf = buf; |
423 | bip->bip_size = len; | 389 | bip->bip_size = len; |
424 | bip->bip_sector = bio->bi_sector; | 390 | bip->bip_sector = bio->bi_sector; |
@@ -694,11 +660,11 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors) | |||
694 | bp->bio1.bi_integrity = &bp->bip1; | 660 | bp->bio1.bi_integrity = &bp->bip1; |
695 | bp->bio2.bi_integrity = &bp->bip2; | 661 | bp->bio2.bi_integrity = &bp->bip2; |
696 | 662 | ||
697 | bp->iv1 = bip->bip_vec[0]; | 663 | bp->iv1 = bip->bip_vec[bip->bip_idx]; |
698 | bp->iv2 = bip->bip_vec[0]; | 664 | bp->iv2 = bip->bip_vec[bip->bip_idx]; |
699 | 665 | ||
700 | bp->bip1.bip_vec[0] = bp->iv1; | 666 | bp->bip1.bip_vec = &bp->iv1; |
701 | bp->bip2.bip_vec[0] = bp->iv2; | 667 | bp->bip2.bip_vec = &bp->iv2; |
702 | 668 | ||
703 | bp->iv1.bv_len = sectors * bi->tuple_size; | 669 | bp->iv1.bv_len = sectors * bi->tuple_size; |
704 | bp->iv2.bv_offset += sectors * bi->tuple_size; | 670 | bp->iv2.bv_offset += sectors * bi->tuple_size; |
@@ -746,13 +712,14 @@ EXPORT_SYMBOL(bio_integrity_clone); | |||
746 | 712 | ||
747 | int bioset_integrity_create(struct bio_set *bs, int pool_size) | 713 | int bioset_integrity_create(struct bio_set *bs, int pool_size) |
748 | { | 714 | { |
749 | unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES); | ||
750 | |||
751 | if (bs->bio_integrity_pool) | 715 | if (bs->bio_integrity_pool) |
752 | return 0; | 716 | return 0; |
753 | 717 | ||
754 | bs->bio_integrity_pool = | 718 | bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab); |
755 | mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab); | 719 | |
720 | bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size); | ||
721 | if (!bs->bvec_integrity_pool) | ||
722 | return -1; | ||
756 | 723 | ||
757 | if (!bs->bio_integrity_pool) | 724 | if (!bs->bio_integrity_pool) |
758 | return -1; | 725 | return -1; |
@@ -765,13 +732,14 @@ void bioset_integrity_free(struct bio_set *bs) | |||
765 | { | 732 | { |
766 | if (bs->bio_integrity_pool) | 733 | if (bs->bio_integrity_pool) |
767 | mempool_destroy(bs->bio_integrity_pool); | 734 | mempool_destroy(bs->bio_integrity_pool); |
735 | |||
736 | if (bs->bvec_integrity_pool) | ||
737 | mempool_destroy(bs->bio_integrity_pool); | ||
768 | } | 738 | } |
769 | EXPORT_SYMBOL(bioset_integrity_free); | 739 | EXPORT_SYMBOL(bioset_integrity_free); |
770 | 740 | ||
771 | void __init bio_integrity_init(void) | 741 | void __init bio_integrity_init(void) |
772 | { | 742 | { |
773 | unsigned int i; | ||
774 | |||
775 | /* | 743 | /* |
776 | * kintegrityd won't block much but may burn a lot of CPU cycles. | 744 | * kintegrityd won't block much but may burn a lot of CPU cycles. |
777 | * Make it highpri CPU intensive wq with max concurrency of 1. | 745 | * Make it highpri CPU intensive wq with max concurrency of 1. |
@@ -781,14 +749,10 @@ void __init bio_integrity_init(void) | |||
781 | if (!kintegrityd_wq) | 749 | if (!kintegrityd_wq) |
782 | panic("Failed to create kintegrityd\n"); | 750 | panic("Failed to create kintegrityd\n"); |
783 | 751 | ||
784 | for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) { | 752 | bip_slab = kmem_cache_create("bio_integrity_payload", |
785 | unsigned int size; | 753 | sizeof(struct bio_integrity_payload) + |
786 | 754 | sizeof(struct bio_vec) * BIP_INLINE_VECS, | |
787 | size = sizeof(struct bio_integrity_payload) | 755 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
788 | + bip_slab[i].nr_vecs * sizeof(struct bio_vec); | 756 | if (!bip_slab) |
789 | 757 | panic("Failed to create slab\n"); | |
790 | bip_slab[i].slab = | ||
791 | kmem_cache_create(bip_slab[i].name, size, 0, | ||
792 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | ||
793 | } | ||
794 | } | 758 | } |
@@ -161,12 +161,12 @@ unsigned int bvec_nr_vecs(unsigned short idx) | |||
161 | return bvec_slabs[idx].nr_vecs; | 161 | return bvec_slabs[idx].nr_vecs; |
162 | } | 162 | } |
163 | 163 | ||
164 | void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx) | 164 | void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) |
165 | { | 165 | { |
166 | BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); | 166 | BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); |
167 | 167 | ||
168 | if (idx == BIOVEC_MAX_IDX) | 168 | if (idx == BIOVEC_MAX_IDX) |
169 | mempool_free(bv, bs->bvec_pool); | 169 | mempool_free(bv, pool); |
170 | else { | 170 | else { |
171 | struct biovec_slab *bvs = bvec_slabs + idx; | 171 | struct biovec_slab *bvs = bvec_slabs + idx; |
172 | 172 | ||
@@ -174,8 +174,8 @@ void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx) | |||
174 | } | 174 | } |
175 | } | 175 | } |
176 | 176 | ||
177 | struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, | 177 | struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx, |
178 | struct bio_set *bs) | 178 | mempool_t *pool) |
179 | { | 179 | { |
180 | struct bio_vec *bvl; | 180 | struct bio_vec *bvl; |
181 | 181 | ||
@@ -211,7 +211,7 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, | |||
211 | */ | 211 | */ |
212 | if (*idx == BIOVEC_MAX_IDX) { | 212 | if (*idx == BIOVEC_MAX_IDX) { |
213 | fallback: | 213 | fallback: |
214 | bvl = mempool_alloc(bs->bvec_pool, gfp_mask); | 214 | bvl = mempool_alloc(pool, gfp_mask); |
215 | } else { | 215 | } else { |
216 | struct biovec_slab *bvs = bvec_slabs + *idx; | 216 | struct biovec_slab *bvs = bvec_slabs + *idx; |
217 | gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); | 217 | gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); |
@@ -253,8 +253,8 @@ static void bio_free(struct bio *bio) | |||
253 | __bio_free(bio); | 253 | __bio_free(bio); |
254 | 254 | ||
255 | if (bs) { | 255 | if (bs) { |
256 | if (bio_has_allocated_vec(bio)) | 256 | if (bio_flagged(bio, BIO_OWNS_VEC)) |
257 | bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); | 257 | bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); |
258 | 258 | ||
259 | /* | 259 | /* |
260 | * If we have front padding, adjust the bio pointer before freeing | 260 | * If we have front padding, adjust the bio pointer before freeing |
@@ -298,6 +298,54 @@ void bio_reset(struct bio *bio) | |||
298 | } | 298 | } |
299 | EXPORT_SYMBOL(bio_reset); | 299 | EXPORT_SYMBOL(bio_reset); |
300 | 300 | ||
301 | static void bio_alloc_rescue(struct work_struct *work) | ||
302 | { | ||
303 | struct bio_set *bs = container_of(work, struct bio_set, rescue_work); | ||
304 | struct bio *bio; | ||
305 | |||
306 | while (1) { | ||
307 | spin_lock(&bs->rescue_lock); | ||
308 | bio = bio_list_pop(&bs->rescue_list); | ||
309 | spin_unlock(&bs->rescue_lock); | ||
310 | |||
311 | if (!bio) | ||
312 | break; | ||
313 | |||
314 | generic_make_request(bio); | ||
315 | } | ||
316 | } | ||
317 | |||
318 | static void punt_bios_to_rescuer(struct bio_set *bs) | ||
319 | { | ||
320 | struct bio_list punt, nopunt; | ||
321 | struct bio *bio; | ||
322 | |||
323 | /* | ||
324 | * In order to guarantee forward progress we must punt only bios that | ||
325 | * were allocated from this bio_set; otherwise, if there was a bio on | ||
326 | * there for a stacking driver higher up in the stack, processing it | ||
327 | * could require allocating bios from this bio_set, and doing that from | ||
328 | * our own rescuer would be bad. | ||
329 | * | ||
330 | * Since bio lists are singly linked, pop them all instead of trying to | ||
331 | * remove from the middle of the list: | ||
332 | */ | ||
333 | |||
334 | bio_list_init(&punt); | ||
335 | bio_list_init(&nopunt); | ||
336 | |||
337 | while ((bio = bio_list_pop(current->bio_list))) | ||
338 | bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); | ||
339 | |||
340 | *current->bio_list = nopunt; | ||
341 | |||
342 | spin_lock(&bs->rescue_lock); | ||
343 | bio_list_merge(&bs->rescue_list, &punt); | ||
344 | spin_unlock(&bs->rescue_lock); | ||
345 | |||
346 | queue_work(bs->rescue_workqueue, &bs->rescue_work); | ||
347 | } | ||
348 | |||
301 | /** | 349 | /** |
302 | * bio_alloc_bioset - allocate a bio for I/O | 350 | * bio_alloc_bioset - allocate a bio for I/O |
303 | * @gfp_mask: the GFP_ mask given to the slab allocator | 351 | * @gfp_mask: the GFP_ mask given to the slab allocator |
@@ -315,11 +363,27 @@ EXPORT_SYMBOL(bio_reset); | |||
315 | * previously allocated bio for IO before attempting to allocate a new one. | 363 | * previously allocated bio for IO before attempting to allocate a new one. |
316 | * Failure to do so can cause deadlocks under memory pressure. | 364 | * Failure to do so can cause deadlocks under memory pressure. |
317 | * | 365 | * |
366 | * Note that when running under generic_make_request() (i.e. any block | ||
367 | * driver), bios are not submitted until after you return - see the code in | ||
368 | * generic_make_request() that converts recursion into iteration, to prevent | ||
369 | * stack overflows. | ||
370 | * | ||
371 | * This would normally mean allocating multiple bios under | ||
372 | * generic_make_request() would be susceptible to deadlocks, but we have | ||
373 | * deadlock avoidance code that resubmits any blocked bios from a rescuer | ||
374 | * thread. | ||
375 | * | ||
376 | * However, we do not guarantee forward progress for allocations from other | ||
377 | * mempools. Doing multiple allocations from the same mempool under | ||
378 | * generic_make_request() should be avoided - instead, use bio_set's front_pad | ||
379 | * for per bio allocations. | ||
380 | * | ||
318 | * RETURNS: | 381 | * RETURNS: |
319 | * Pointer to new bio on success, NULL on failure. | 382 | * Pointer to new bio on success, NULL on failure. |
320 | */ | 383 | */ |
321 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | 384 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) |
322 | { | 385 | { |
386 | gfp_t saved_gfp = gfp_mask; | ||
323 | unsigned front_pad; | 387 | unsigned front_pad; |
324 | unsigned inline_vecs; | 388 | unsigned inline_vecs; |
325 | unsigned long idx = BIO_POOL_NONE; | 389 | unsigned long idx = BIO_POOL_NONE; |
@@ -337,7 +401,37 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | |||
337 | front_pad = 0; | 401 | front_pad = 0; |
338 | inline_vecs = nr_iovecs; | 402 | inline_vecs = nr_iovecs; |
339 | } else { | 403 | } else { |
404 | /* | ||
405 | * generic_make_request() converts recursion to iteration; this | ||
406 | * means if we're running beneath it, any bios we allocate and | ||
407 | * submit will not be submitted (and thus freed) until after we | ||
408 | * return. | ||
409 | * | ||
410 | * This exposes us to a potential deadlock if we allocate | ||
411 | * multiple bios from the same bio_set() while running | ||
412 | * underneath generic_make_request(). If we were to allocate | ||
413 | * multiple bios (say a stacking block driver that was splitting | ||
414 | * bios), we would deadlock if we exhausted the mempool's | ||
415 | * reserve. | ||
416 | * | ||
417 | * We solve this, and guarantee forward progress, with a rescuer | ||
418 | * workqueue per bio_set. If we go to allocate and there are | ||
419 | * bios on current->bio_list, we first try the allocation | ||
420 | * without __GFP_WAIT; if that fails, we punt those bios we | ||
421 | * would be blocking to the rescuer workqueue before we retry | ||
422 | * with the original gfp_flags. | ||
423 | */ | ||
424 | |||
425 | if (current->bio_list && !bio_list_empty(current->bio_list)) | ||
426 | gfp_mask &= ~__GFP_WAIT; | ||
427 | |||
340 | p = mempool_alloc(bs->bio_pool, gfp_mask); | 428 | p = mempool_alloc(bs->bio_pool, gfp_mask); |
429 | if (!p && gfp_mask != saved_gfp) { | ||
430 | punt_bios_to_rescuer(bs); | ||
431 | gfp_mask = saved_gfp; | ||
432 | p = mempool_alloc(bs->bio_pool, gfp_mask); | ||
433 | } | ||
434 | |||
341 | front_pad = bs->front_pad; | 435 | front_pad = bs->front_pad; |
342 | inline_vecs = BIO_INLINE_VECS; | 436 | inline_vecs = BIO_INLINE_VECS; |
343 | } | 437 | } |
@@ -349,9 +443,17 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | |||
349 | bio_init(bio); | 443 | bio_init(bio); |
350 | 444 | ||
351 | if (nr_iovecs > inline_vecs) { | 445 | if (nr_iovecs > inline_vecs) { |
352 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); | 446 | bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); |
447 | if (!bvl && gfp_mask != saved_gfp) { | ||
448 | punt_bios_to_rescuer(bs); | ||
449 | gfp_mask = saved_gfp; | ||
450 | bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); | ||
451 | } | ||
452 | |||
353 | if (unlikely(!bvl)) | 453 | if (unlikely(!bvl)) |
354 | goto err_free; | 454 | goto err_free; |
455 | |||
456 | bio->bi_flags |= 1 << BIO_OWNS_VEC; | ||
355 | } else if (nr_iovecs) { | 457 | } else if (nr_iovecs) { |
356 | bvl = bio->bi_inline_vecs; | 458 | bvl = bio->bi_inline_vecs; |
357 | } | 459 | } |
@@ -653,6 +755,181 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, | |||
653 | } | 755 | } |
654 | EXPORT_SYMBOL(bio_add_page); | 756 | EXPORT_SYMBOL(bio_add_page); |
655 | 757 | ||
758 | struct submit_bio_ret { | ||
759 | struct completion event; | ||
760 | int error; | ||
761 | }; | ||
762 | |||
763 | static void submit_bio_wait_endio(struct bio *bio, int error) | ||
764 | { | ||
765 | struct submit_bio_ret *ret = bio->bi_private; | ||
766 | |||
767 | ret->error = error; | ||
768 | complete(&ret->event); | ||
769 | } | ||
770 | |||
771 | /** | ||
772 | * submit_bio_wait - submit a bio, and wait until it completes | ||
773 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) | ||
774 | * @bio: The &struct bio which describes the I/O | ||
775 | * | ||
776 | * Simple wrapper around submit_bio(). Returns 0 on success, or the error from | ||
777 | * bio_endio() on failure. | ||
778 | */ | ||
779 | int submit_bio_wait(int rw, struct bio *bio) | ||
780 | { | ||
781 | struct submit_bio_ret ret; | ||
782 | |||
783 | rw |= REQ_SYNC; | ||
784 | init_completion(&ret.event); | ||
785 | bio->bi_private = &ret; | ||
786 | bio->bi_end_io = submit_bio_wait_endio; | ||
787 | submit_bio(rw, bio); | ||
788 | wait_for_completion(&ret.event); | ||
789 | |||
790 | return ret.error; | ||
791 | } | ||
792 | EXPORT_SYMBOL(submit_bio_wait); | ||
793 | |||
794 | /** | ||
795 | * bio_advance - increment/complete a bio by some number of bytes | ||
796 | * @bio: bio to advance | ||
797 | * @bytes: number of bytes to complete | ||
798 | * | ||
799 | * This updates bi_sector, bi_size and bi_idx; if the number of bytes to | ||
800 | * complete doesn't align with a bvec boundary, then bv_len and bv_offset will | ||
801 | * be updated on the last bvec as well. | ||
802 | * | ||
803 | * @bio will then represent the remaining, uncompleted portion of the io. | ||
804 | */ | ||
805 | void bio_advance(struct bio *bio, unsigned bytes) | ||
806 | { | ||
807 | if (bio_integrity(bio)) | ||
808 | bio_integrity_advance(bio, bytes); | ||
809 | |||
810 | bio->bi_sector += bytes >> 9; | ||
811 | bio->bi_size -= bytes; | ||
812 | |||
813 | if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK) | ||
814 | return; | ||
815 | |||
816 | while (bytes) { | ||
817 | if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { | ||
818 | WARN_ONCE(1, "bio idx %d >= vcnt %d\n", | ||
819 | bio->bi_idx, bio->bi_vcnt); | ||
820 | break; | ||
821 | } | ||
822 | |||
823 | if (bytes >= bio_iovec(bio)->bv_len) { | ||
824 | bytes -= bio_iovec(bio)->bv_len; | ||
825 | bio->bi_idx++; | ||
826 | } else { | ||
827 | bio_iovec(bio)->bv_len -= bytes; | ||
828 | bio_iovec(bio)->bv_offset += bytes; | ||
829 | bytes = 0; | ||
830 | } | ||
831 | } | ||
832 | } | ||
833 | EXPORT_SYMBOL(bio_advance); | ||
834 | |||
835 | /** | ||
836 | * bio_alloc_pages - allocates a single page for each bvec in a bio | ||
837 | * @bio: bio to allocate pages for | ||
838 | * @gfp_mask: flags for allocation | ||
839 | * | ||
840 | * Allocates pages up to @bio->bi_vcnt. | ||
841 | * | ||
842 | * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are | ||
843 | * freed. | ||
844 | */ | ||
845 | int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask) | ||
846 | { | ||
847 | int i; | ||
848 | struct bio_vec *bv; | ||
849 | |||
850 | bio_for_each_segment_all(bv, bio, i) { | ||
851 | bv->bv_page = alloc_page(gfp_mask); | ||
852 | if (!bv->bv_page) { | ||
853 | while (--bv >= bio->bi_io_vec) | ||
854 | __free_page(bv->bv_page); | ||
855 | return -ENOMEM; | ||
856 | } | ||
857 | } | ||
858 | |||
859 | return 0; | ||
860 | } | ||
861 | EXPORT_SYMBOL(bio_alloc_pages); | ||
862 | |||
863 | /** | ||
864 | * bio_copy_data - copy contents of data buffers from one chain of bios to | ||
865 | * another | ||
866 | * @src: source bio list | ||
867 | * @dst: destination bio list | ||
868 | * | ||
869 | * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats | ||
870 | * @src and @dst as linked lists of bios. | ||
871 | * | ||
872 | * Stops when it reaches the end of either @src or @dst - that is, copies | ||
873 | * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios). | ||
874 | */ | ||
875 | void bio_copy_data(struct bio *dst, struct bio *src) | ||
876 | { | ||
877 | struct bio_vec *src_bv, *dst_bv; | ||
878 | unsigned src_offset, dst_offset, bytes; | ||
879 | void *src_p, *dst_p; | ||
880 | |||
881 | src_bv = bio_iovec(src); | ||
882 | dst_bv = bio_iovec(dst); | ||
883 | |||
884 | src_offset = src_bv->bv_offset; | ||
885 | dst_offset = dst_bv->bv_offset; | ||
886 | |||
887 | while (1) { | ||
888 | if (src_offset == src_bv->bv_offset + src_bv->bv_len) { | ||
889 | src_bv++; | ||
890 | if (src_bv == bio_iovec_idx(src, src->bi_vcnt)) { | ||
891 | src = src->bi_next; | ||
892 | if (!src) | ||
893 | break; | ||
894 | |||
895 | src_bv = bio_iovec(src); | ||
896 | } | ||
897 | |||
898 | src_offset = src_bv->bv_offset; | ||
899 | } | ||
900 | |||
901 | if (dst_offset == dst_bv->bv_offset + dst_bv->bv_len) { | ||
902 | dst_bv++; | ||
903 | if (dst_bv == bio_iovec_idx(dst, dst->bi_vcnt)) { | ||
904 | dst = dst->bi_next; | ||
905 | if (!dst) | ||
906 | break; | ||
907 | |||
908 | dst_bv = bio_iovec(dst); | ||
909 | } | ||
910 | |||
911 | dst_offset = dst_bv->bv_offset; | ||
912 | } | ||
913 | |||
914 | bytes = min(dst_bv->bv_offset + dst_bv->bv_len - dst_offset, | ||
915 | src_bv->bv_offset + src_bv->bv_len - src_offset); | ||
916 | |||
917 | src_p = kmap_atomic(src_bv->bv_page); | ||
918 | dst_p = kmap_atomic(dst_bv->bv_page); | ||
919 | |||
920 | memcpy(dst_p + dst_bv->bv_offset, | ||
921 | src_p + src_bv->bv_offset, | ||
922 | bytes); | ||
923 | |||
924 | kunmap_atomic(dst_p); | ||
925 | kunmap_atomic(src_p); | ||
926 | |||
927 | src_offset += bytes; | ||
928 | dst_offset += bytes; | ||
929 | } | ||
930 | } | ||
931 | EXPORT_SYMBOL(bio_copy_data); | ||
932 | |||
656 | struct bio_map_data { | 933 | struct bio_map_data { |
657 | struct bio_vec *iovecs; | 934 | struct bio_vec *iovecs; |
658 | struct sg_iovec *sgvecs; | 935 | struct sg_iovec *sgvecs; |
@@ -715,7 +992,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, | |||
715 | int iov_idx = 0; | 992 | int iov_idx = 0; |
716 | unsigned int iov_off = 0; | 993 | unsigned int iov_off = 0; |
717 | 994 | ||
718 | __bio_for_each_segment(bvec, bio, i, 0) { | 995 | bio_for_each_segment_all(bvec, bio, i) { |
719 | char *bv_addr = page_address(bvec->bv_page); | 996 | char *bv_addr = page_address(bvec->bv_page); |
720 | unsigned int bv_len = iovecs[i].bv_len; | 997 | unsigned int bv_len = iovecs[i].bv_len; |
721 | 998 | ||
@@ -897,7 +1174,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, | |||
897 | return bio; | 1174 | return bio; |
898 | cleanup: | 1175 | cleanup: |
899 | if (!map_data) | 1176 | if (!map_data) |
900 | bio_for_each_segment(bvec, bio, i) | 1177 | bio_for_each_segment_all(bvec, bio, i) |
901 | __free_page(bvec->bv_page); | 1178 | __free_page(bvec->bv_page); |
902 | 1179 | ||
903 | bio_put(bio); | 1180 | bio_put(bio); |
@@ -1111,7 +1388,7 @@ static void __bio_unmap_user(struct bio *bio) | |||
1111 | /* | 1388 | /* |
1112 | * make sure we dirty pages we wrote to | 1389 | * make sure we dirty pages we wrote to |
1113 | */ | 1390 | */ |
1114 | __bio_for_each_segment(bvec, bio, i, 0) { | 1391 | bio_for_each_segment_all(bvec, bio, i) { |
1115 | if (bio_data_dir(bio) == READ) | 1392 | if (bio_data_dir(bio) == READ) |
1116 | set_page_dirty_lock(bvec->bv_page); | 1393 | set_page_dirty_lock(bvec->bv_page); |
1117 | 1394 | ||
@@ -1217,7 +1494,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err) | |||
1217 | int i; | 1494 | int i; |
1218 | char *p = bmd->sgvecs[0].iov_base; | 1495 | char *p = bmd->sgvecs[0].iov_base; |
1219 | 1496 | ||
1220 | __bio_for_each_segment(bvec, bio, i, 0) { | 1497 | bio_for_each_segment_all(bvec, bio, i) { |
1221 | char *addr = page_address(bvec->bv_page); | 1498 | char *addr = page_address(bvec->bv_page); |
1222 | int len = bmd->iovecs[i].bv_len; | 1499 | int len = bmd->iovecs[i].bv_len; |
1223 | 1500 | ||
@@ -1257,7 +1534,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | |||
1257 | if (!reading) { | 1534 | if (!reading) { |
1258 | void *p = data; | 1535 | void *p = data; |
1259 | 1536 | ||
1260 | bio_for_each_segment(bvec, bio, i) { | 1537 | bio_for_each_segment_all(bvec, bio, i) { |
1261 | char *addr = page_address(bvec->bv_page); | 1538 | char *addr = page_address(bvec->bv_page); |
1262 | 1539 | ||
1263 | memcpy(addr, p, bvec->bv_len); | 1540 | memcpy(addr, p, bvec->bv_len); |
@@ -1302,11 +1579,11 @@ EXPORT_SYMBOL(bio_copy_kern); | |||
1302 | */ | 1579 | */ |
1303 | void bio_set_pages_dirty(struct bio *bio) | 1580 | void bio_set_pages_dirty(struct bio *bio) |
1304 | { | 1581 | { |
1305 | struct bio_vec *bvec = bio->bi_io_vec; | 1582 | struct bio_vec *bvec; |
1306 | int i; | 1583 | int i; |
1307 | 1584 | ||
1308 | for (i = 0; i < bio->bi_vcnt; i++) { | 1585 | bio_for_each_segment_all(bvec, bio, i) { |
1309 | struct page *page = bvec[i].bv_page; | 1586 | struct page *page = bvec->bv_page; |
1310 | 1587 | ||
1311 | if (page && !PageCompound(page)) | 1588 | if (page && !PageCompound(page)) |
1312 | set_page_dirty_lock(page); | 1589 | set_page_dirty_lock(page); |
@@ -1315,11 +1592,11 @@ void bio_set_pages_dirty(struct bio *bio) | |||
1315 | 1592 | ||
1316 | static void bio_release_pages(struct bio *bio) | 1593 | static void bio_release_pages(struct bio *bio) |
1317 | { | 1594 | { |
1318 | struct bio_vec *bvec = bio->bi_io_vec; | 1595 | struct bio_vec *bvec; |
1319 | int i; | 1596 | int i; |
1320 | 1597 | ||
1321 | for (i = 0; i < bio->bi_vcnt; i++) { | 1598 | bio_for_each_segment_all(bvec, bio, i) { |
1322 | struct page *page = bvec[i].bv_page; | 1599 | struct page *page = bvec->bv_page; |
1323 | 1600 | ||
1324 | if (page) | 1601 | if (page) |
1325 | put_page(page); | 1602 | put_page(page); |
@@ -1368,16 +1645,16 @@ static void bio_dirty_fn(struct work_struct *work) | |||
1368 | 1645 | ||
1369 | void bio_check_pages_dirty(struct bio *bio) | 1646 | void bio_check_pages_dirty(struct bio *bio) |
1370 | { | 1647 | { |
1371 | struct bio_vec *bvec = bio->bi_io_vec; | 1648 | struct bio_vec *bvec; |
1372 | int nr_clean_pages = 0; | 1649 | int nr_clean_pages = 0; |
1373 | int i; | 1650 | int i; |
1374 | 1651 | ||
1375 | for (i = 0; i < bio->bi_vcnt; i++) { | 1652 | bio_for_each_segment_all(bvec, bio, i) { |
1376 | struct page *page = bvec[i].bv_page; | 1653 | struct page *page = bvec->bv_page; |
1377 | 1654 | ||
1378 | if (PageDirty(page) || PageCompound(page)) { | 1655 | if (PageDirty(page) || PageCompound(page)) { |
1379 | page_cache_release(page); | 1656 | page_cache_release(page); |
1380 | bvec[i].bv_page = NULL; | 1657 | bvec->bv_page = NULL; |
1381 | } else { | 1658 | } else { |
1382 | nr_clean_pages++; | 1659 | nr_clean_pages++; |
1383 | } | 1660 | } |
@@ -1478,8 +1755,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) | |||
1478 | trace_block_split(bdev_get_queue(bi->bi_bdev), bi, | 1755 | trace_block_split(bdev_get_queue(bi->bi_bdev), bi, |
1479 | bi->bi_sector + first_sectors); | 1756 | bi->bi_sector + first_sectors); |
1480 | 1757 | ||
1481 | BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0); | 1758 | BUG_ON(bio_segments(bi) > 1); |
1482 | BUG_ON(bi->bi_idx != 0); | ||
1483 | atomic_set(&bp->cnt, 3); | 1759 | atomic_set(&bp->cnt, 3); |
1484 | bp->error = 0; | 1760 | bp->error = 0; |
1485 | bp->bio1 = *bi; | 1761 | bp->bio1 = *bi; |
@@ -1489,8 +1765,8 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) | |||
1489 | bp->bio1.bi_size = first_sectors << 9; | 1765 | bp->bio1.bi_size = first_sectors << 9; |
1490 | 1766 | ||
1491 | if (bi->bi_vcnt != 0) { | 1767 | if (bi->bi_vcnt != 0) { |
1492 | bp->bv1 = bi->bi_io_vec[0]; | 1768 | bp->bv1 = *bio_iovec(bi); |
1493 | bp->bv2 = bi->bi_io_vec[0]; | 1769 | bp->bv2 = *bio_iovec(bi); |
1494 | 1770 | ||
1495 | if (bio_is_rw(bi)) { | 1771 | if (bio_is_rw(bi)) { |
1496 | bp->bv2.bv_offset += first_sectors << 9; | 1772 | bp->bv2.bv_offset += first_sectors << 9; |
@@ -1542,7 +1818,7 @@ sector_t bio_sector_offset(struct bio *bio, unsigned short index, | |||
1542 | if (index >= bio->bi_idx) | 1818 | if (index >= bio->bi_idx) |
1543 | index = bio->bi_vcnt - 1; | 1819 | index = bio->bi_vcnt - 1; |
1544 | 1820 | ||
1545 | __bio_for_each_segment(bv, bio, i, 0) { | 1821 | bio_for_each_segment_all(bv, bio, i) { |
1546 | if (i == index) { | 1822 | if (i == index) { |
1547 | if (offset > bv->bv_offset) | 1823 | if (offset > bv->bv_offset) |
1548 | sectors += (offset - bv->bv_offset) / sector_sz; | 1824 | sectors += (offset - bv->bv_offset) / sector_sz; |
@@ -1560,29 +1836,25 @@ EXPORT_SYMBOL(bio_sector_offset); | |||
1560 | * create memory pools for biovec's in a bio_set. | 1836 | * create memory pools for biovec's in a bio_set. |
1561 | * use the global biovec slabs created for general use. | 1837 | * use the global biovec slabs created for general use. |
1562 | */ | 1838 | */ |
1563 | static int biovec_create_pools(struct bio_set *bs, int pool_entries) | 1839 | mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries) |
1564 | { | 1840 | { |
1565 | struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; | 1841 | struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; |
1566 | 1842 | ||
1567 | bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab); | 1843 | return mempool_create_slab_pool(pool_entries, bp->slab); |
1568 | if (!bs->bvec_pool) | ||
1569 | return -ENOMEM; | ||
1570 | |||
1571 | return 0; | ||
1572 | } | ||
1573 | |||
1574 | static void biovec_free_pools(struct bio_set *bs) | ||
1575 | { | ||
1576 | mempool_destroy(bs->bvec_pool); | ||
1577 | } | 1844 | } |
1578 | 1845 | ||
1579 | void bioset_free(struct bio_set *bs) | 1846 | void bioset_free(struct bio_set *bs) |
1580 | { | 1847 | { |
1848 | if (bs->rescue_workqueue) | ||
1849 | destroy_workqueue(bs->rescue_workqueue); | ||
1850 | |||
1581 | if (bs->bio_pool) | 1851 | if (bs->bio_pool) |
1582 | mempool_destroy(bs->bio_pool); | 1852 | mempool_destroy(bs->bio_pool); |
1583 | 1853 | ||
1854 | if (bs->bvec_pool) | ||
1855 | mempool_destroy(bs->bvec_pool); | ||
1856 | |||
1584 | bioset_integrity_free(bs); | 1857 | bioset_integrity_free(bs); |
1585 | biovec_free_pools(bs); | ||
1586 | bio_put_slab(bs); | 1858 | bio_put_slab(bs); |
1587 | 1859 | ||
1588 | kfree(bs); | 1860 | kfree(bs); |
@@ -1613,6 +1885,10 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) | |||
1613 | 1885 | ||
1614 | bs->front_pad = front_pad; | 1886 | bs->front_pad = front_pad; |
1615 | 1887 | ||
1888 | spin_lock_init(&bs->rescue_lock); | ||
1889 | bio_list_init(&bs->rescue_list); | ||
1890 | INIT_WORK(&bs->rescue_work, bio_alloc_rescue); | ||
1891 | |||
1616 | bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); | 1892 | bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); |
1617 | if (!bs->bio_slab) { | 1893 | if (!bs->bio_slab) { |
1618 | kfree(bs); | 1894 | kfree(bs); |
@@ -1623,9 +1899,15 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) | |||
1623 | if (!bs->bio_pool) | 1899 | if (!bs->bio_pool) |
1624 | goto bad; | 1900 | goto bad; |
1625 | 1901 | ||
1626 | if (!biovec_create_pools(bs, pool_size)) | 1902 | bs->bvec_pool = biovec_create_pool(bs, pool_size); |
1627 | return bs; | 1903 | if (!bs->bvec_pool) |
1904 | goto bad; | ||
1905 | |||
1906 | bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); | ||
1907 | if (!bs->rescue_workqueue) | ||
1908 | goto bad; | ||
1628 | 1909 | ||
1910 | return bs; | ||
1629 | bad: | 1911 | bad: |
1630 | bioset_free(bs); | 1912 | bioset_free(bs); |
1631 | return NULL; | 1913 | return NULL; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index d9871c1f0894..2091db8cdd78 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1556,7 +1556,7 @@ static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1556 | return 0; | 1556 | return 0; |
1557 | 1557 | ||
1558 | size -= pos; | 1558 | size -= pos; |
1559 | if (size < INT_MAX) | 1559 | if (size < iocb->ki_left) |
1560 | nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size); | 1560 | nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size); |
1561 | return generic_file_aio_read(iocb, iov, nr_segs, pos); | 1561 | return generic_file_aio_read(iocb, iov, nr_segs, pos); |
1562 | } | 1562 | } |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index cdee391fc7bf..73f2bfe3ac93 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2560,8 +2560,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
2560 | if (old_compressed) | 2560 | if (old_compressed) |
2561 | contig = bio->bi_sector == sector; | 2561 | contig = bio->bi_sector == sector; |
2562 | else | 2562 | else |
2563 | contig = bio->bi_sector + (bio->bi_size >> 9) == | 2563 | contig = bio_end_sector(bio) == sector; |
2564 | sector; | ||
2565 | 2564 | ||
2566 | if (prev_bio_flags != bio_flags || !contig || | 2565 | if (prev_bio_flags != bio_flags || !contig || |
2567 | merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || | 2566 | merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2854c824ab64..678977226570 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -5177,7 +5177,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio, | |||
5177 | } | 5177 | } |
5178 | 5178 | ||
5179 | prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; | 5179 | prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; |
5180 | if ((bio->bi_size >> 9) > max_sectors) | 5180 | if (bio_sectors(bio) > max_sectors) |
5181 | return 0; | 5181 | return 0; |
5182 | 5182 | ||
5183 | if (!q->merge_bvec_fn) | 5183 | if (!q->merge_bvec_fn) |
diff --git a/fs/buffer.c b/fs/buffer.c index bc1fe14aaa3e..d2a4d1bb2d57 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -2977,7 +2977,6 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags) | |||
2977 | bio->bi_io_vec[0].bv_offset = bh_offset(bh); | 2977 | bio->bi_io_vec[0].bv_offset = bh_offset(bh); |
2978 | 2978 | ||
2979 | bio->bi_vcnt = 1; | 2979 | bio->bi_vcnt = 1; |
2980 | bio->bi_idx = 0; | ||
2981 | bio->bi_size = bh->b_size; | 2980 | bio->bi_size = bh->b_size; |
2982 | 2981 | ||
2983 | bio->bi_end_io = end_bio_bh_io_sync; | 2982 | bio->bi_end_io = end_bio_bh_io_sync; |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 51d16e067d68..7ab90f5081ee 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -442,8 +442,8 @@ static struct bio *dio_await_one(struct dio *dio) | |||
442 | static int dio_bio_complete(struct dio *dio, struct bio *bio) | 442 | static int dio_bio_complete(struct dio *dio, struct bio *bio) |
443 | { | 443 | { |
444 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 444 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
445 | struct bio_vec *bvec = bio->bi_io_vec; | 445 | struct bio_vec *bvec; |
446 | int page_no; | 446 | unsigned i; |
447 | 447 | ||
448 | if (!uptodate) | 448 | if (!uptodate) |
449 | dio->io_error = -EIO; | 449 | dio->io_error = -EIO; |
@@ -451,8 +451,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) | |||
451 | if (dio->is_async && dio->rw == READ) { | 451 | if (dio->is_async && dio->rw == READ) { |
452 | bio_check_pages_dirty(bio); /* transfers ownership */ | 452 | bio_check_pages_dirty(bio); /* transfers ownership */ |
453 | } else { | 453 | } else { |
454 | for (page_no = 0; page_no < bio->bi_vcnt; page_no++) { | 454 | bio_for_each_segment_all(bvec, bio, i) { |
455 | struct page *page = bvec[page_no].bv_page; | 455 | struct page *page = bvec->bv_page; |
456 | 456 | ||
457 | if (dio->rw == READ && !PageCompound(page)) | 457 | if (dio->rw == READ && !PageCompound(page)) |
458 | set_page_dirty_lock(page); | 458 | set_page_dirty_lock(page); |
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index f936cb50dc0d..b74422888604 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c | |||
@@ -401,7 +401,7 @@ static void _clear_bio(struct bio *bio) | |||
401 | struct bio_vec *bv; | 401 | struct bio_vec *bv; |
402 | unsigned i; | 402 | unsigned i; |
403 | 403 | ||
404 | __bio_for_each_segment(bv, bio, i, 0) { | 404 | bio_for_each_segment_all(bv, bio, i) { |
405 | unsigned this_count = bv->bv_len; | 405 | unsigned this_count = bv->bv_len; |
406 | 406 | ||
407 | if (likely(PAGE_SIZE == this_count)) | 407 | if (likely(PAGE_SIZE == this_count)) |
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index b963f38ac298..7682b970d0f1 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c | |||
@@ -432,7 +432,7 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) | |||
432 | if (!bio) | 432 | if (!bio) |
433 | continue; | 433 | continue; |
434 | 434 | ||
435 | __bio_for_each_segment(bv, bio, i, 0) { | 435 | bio_for_each_segment_all(bv, bio, i) { |
436 | struct page *page = bv->bv_page; | 436 | struct page *page = bv->bv_page; |
437 | 437 | ||
438 | SetPageUptodate(page); | 438 | SetPageUptodate(page); |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 798d4458a4d3..3be57189efd5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
23 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
25 | #include <linux/freezer.h> | ||
26 | #include <linux/writeback.h> | 25 | #include <linux/writeback.h> |
27 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
28 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
@@ -88,20 +87,6 @@ static inline struct inode *wb_inode(struct list_head *head) | |||
88 | #define CREATE_TRACE_POINTS | 87 | #define CREATE_TRACE_POINTS |
89 | #include <trace/events/writeback.h> | 88 | #include <trace/events/writeback.h> |
90 | 89 | ||
91 | /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ | ||
92 | static void bdi_wakeup_flusher(struct backing_dev_info *bdi) | ||
93 | { | ||
94 | if (bdi->wb.task) { | ||
95 | wake_up_process(bdi->wb.task); | ||
96 | } else { | ||
97 | /* | ||
98 | * The bdi thread isn't there, wake up the forker thread which | ||
99 | * will create and run it. | ||
100 | */ | ||
101 | wake_up_process(default_backing_dev_info.wb.task); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | static void bdi_queue_work(struct backing_dev_info *bdi, | 90 | static void bdi_queue_work(struct backing_dev_info *bdi, |
106 | struct wb_writeback_work *work) | 91 | struct wb_writeback_work *work) |
107 | { | 92 | { |
@@ -109,10 +94,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi, | |||
109 | 94 | ||
110 | spin_lock_bh(&bdi->wb_lock); | 95 | spin_lock_bh(&bdi->wb_lock); |
111 | list_add_tail(&work->list, &bdi->work_list); | 96 | list_add_tail(&work->list, &bdi->work_list); |
112 | if (!bdi->wb.task) | ||
113 | trace_writeback_nothread(bdi, work); | ||
114 | bdi_wakeup_flusher(bdi); | ||
115 | spin_unlock_bh(&bdi->wb_lock); | 97 | spin_unlock_bh(&bdi->wb_lock); |
98 | |||
99 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); | ||
116 | } | 100 | } |
117 | 101 | ||
118 | static void | 102 | static void |
@@ -127,10 +111,8 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | |||
127 | */ | 111 | */ |
128 | work = kzalloc(sizeof(*work), GFP_ATOMIC); | 112 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
129 | if (!work) { | 113 | if (!work) { |
130 | if (bdi->wb.task) { | 114 | trace_writeback_nowork(bdi); |
131 | trace_writeback_nowork(bdi); | 115 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); |
132 | wake_up_process(bdi->wb.task); | ||
133 | } | ||
134 | return; | 116 | return; |
135 | } | 117 | } |
136 | 118 | ||
@@ -177,9 +159,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) | |||
177 | * writeback as soon as there is no other work to do. | 159 | * writeback as soon as there is no other work to do. |
178 | */ | 160 | */ |
179 | trace_writeback_wake_background(bdi); | 161 | trace_writeback_wake_background(bdi); |
180 | spin_lock_bh(&bdi->wb_lock); | 162 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); |
181 | bdi_wakeup_flusher(bdi); | ||
182 | spin_unlock_bh(&bdi->wb_lock); | ||
183 | } | 163 | } |
184 | 164 | ||
185 | /* | 165 | /* |
@@ -1020,67 +1000,49 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
1020 | 1000 | ||
1021 | /* | 1001 | /* |
1022 | * Handle writeback of dirty data for the device backed by this bdi. Also | 1002 | * Handle writeback of dirty data for the device backed by this bdi. Also |
1023 | * wakes up periodically and does kupdated style flushing. | 1003 | * reschedules periodically and does kupdated style flushing. |
1024 | */ | 1004 | */ |
1025 | int bdi_writeback_thread(void *data) | 1005 | void bdi_writeback_workfn(struct work_struct *work) |
1026 | { | 1006 | { |
1027 | struct bdi_writeback *wb = data; | 1007 | struct bdi_writeback *wb = container_of(to_delayed_work(work), |
1008 | struct bdi_writeback, dwork); | ||
1028 | struct backing_dev_info *bdi = wb->bdi; | 1009 | struct backing_dev_info *bdi = wb->bdi; |
1029 | long pages_written; | 1010 | long pages_written; |
1030 | 1011 | ||
1031 | set_worker_desc("flush-%s", dev_name(bdi->dev)); | 1012 | set_worker_desc("flush-%s", dev_name(bdi->dev)); |
1032 | current->flags |= PF_SWAPWRITE; | 1013 | current->flags |= PF_SWAPWRITE; |
1033 | set_freezable(); | ||
1034 | wb->last_active = jiffies; | ||
1035 | |||
1036 | /* | ||
1037 | * Our parent may run at a different priority, just set us to normal | ||
1038 | */ | ||
1039 | set_user_nice(current, 0); | ||
1040 | |||
1041 | trace_writeback_thread_start(bdi); | ||
1042 | 1014 | ||
1043 | while (!kthread_freezable_should_stop(NULL)) { | 1015 | if (likely(!current_is_workqueue_rescuer() || |
1016 | list_empty(&bdi->bdi_list))) { | ||
1044 | /* | 1017 | /* |
1045 | * Remove own delayed wake-up timer, since we are already awake | 1018 | * The normal path. Keep writing back @bdi until its |
1046 | * and we'll take care of the periodic write-back. | 1019 | * work_list is empty. Note that this path is also taken |
1020 | * if @bdi is shutting down even when we're running off the | ||
1021 | * rescuer as work_list needs to be drained. | ||
1047 | */ | 1022 | */ |
1048 | del_timer(&wb->wakeup_timer); | 1023 | do { |
1049 | 1024 | pages_written = wb_do_writeback(wb, 0); | |
1050 | pages_written = wb_do_writeback(wb, 0); | 1025 | trace_writeback_pages_written(pages_written); |
1051 | 1026 | } while (!list_empty(&bdi->work_list)); | |
1027 | } else { | ||
1028 | /* | ||
1029 | * bdi_wq can't get enough workers and we're running off | ||
1030 | * the emergency worker. Don't hog it. Hopefully, 1024 is | ||
1031 | * enough for efficient IO. | ||
1032 | */ | ||
1033 | pages_written = writeback_inodes_wb(&bdi->wb, 1024, | ||
1034 | WB_REASON_FORKER_THREAD); | ||
1052 | trace_writeback_pages_written(pages_written); | 1035 | trace_writeback_pages_written(pages_written); |
1053 | |||
1054 | if (pages_written) | ||
1055 | wb->last_active = jiffies; | ||
1056 | |||
1057 | set_current_state(TASK_INTERRUPTIBLE); | ||
1058 | if (!list_empty(&bdi->work_list) || kthread_should_stop()) { | ||
1059 | __set_current_state(TASK_RUNNING); | ||
1060 | continue; | ||
1061 | } | ||
1062 | |||
1063 | if (wb_has_dirty_io(wb) && dirty_writeback_interval) | ||
1064 | schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
1065 | else { | ||
1066 | /* | ||
1067 | * We have nothing to do, so can go sleep without any | ||
1068 | * timeout and save power. When a work is queued or | ||
1069 | * something is made dirty - we will be woken up. | ||
1070 | */ | ||
1071 | schedule(); | ||
1072 | } | ||
1073 | } | 1036 | } |
1074 | 1037 | ||
1075 | /* Flush any work that raced with us exiting */ | 1038 | if (!list_empty(&bdi->work_list) || |
1076 | if (!list_empty(&bdi->work_list)) | 1039 | (wb_has_dirty_io(wb) && dirty_writeback_interval)) |
1077 | wb_do_writeback(wb, 1); | 1040 | queue_delayed_work(bdi_wq, &wb->dwork, |
1041 | msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
1078 | 1042 | ||
1079 | trace_writeback_thread_stop(bdi); | 1043 | current->flags &= ~PF_SWAPWRITE; |
1080 | return 0; | ||
1081 | } | 1044 | } |
1082 | 1045 | ||
1083 | |||
1084 | /* | 1046 | /* |
1085 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back | 1047 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back |
1086 | * the whole world. | 1048 | * the whole world. |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 7318abf9d0fb..c5fa758fd844 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -300,7 +300,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno) | |||
300 | u64 nblk; | 300 | u64 nblk; |
301 | 301 | ||
302 | if (bio) { | 302 | if (bio) { |
303 | nblk = bio->bi_sector + bio_sectors(bio); | 303 | nblk = bio_end_sector(bio); |
304 | nblk >>= sdp->sd_fsb2bb_shift; | 304 | nblk >>= sdp->sd_fsb2bb_shift; |
305 | if (blkno == nblk) | 305 | if (blkno == nblk) |
306 | return bio; | 306 | return bio; |
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index cbe48ea9318e..c57499dca89c 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c | |||
@@ -2005,7 +2005,6 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) | |||
2005 | bio->bi_io_vec[0].bv_offset = bp->l_offset; | 2005 | bio->bi_io_vec[0].bv_offset = bp->l_offset; |
2006 | 2006 | ||
2007 | bio->bi_vcnt = 1; | 2007 | bio->bi_vcnt = 1; |
2008 | bio->bi_idx = 0; | ||
2009 | bio->bi_size = LOGPSIZE; | 2008 | bio->bi_size = LOGPSIZE; |
2010 | 2009 | ||
2011 | bio->bi_end_io = lbmIODone; | 2010 | bio->bi_end_io = lbmIODone; |
@@ -2146,7 +2145,6 @@ static void lbmStartIO(struct lbuf * bp) | |||
2146 | bio->bi_io_vec[0].bv_offset = bp->l_offset; | 2145 | bio->bi_io_vec[0].bv_offset = bp->l_offset; |
2147 | 2146 | ||
2148 | bio->bi_vcnt = 1; | 2147 | bio->bi_vcnt = 1; |
2149 | bio->bi_idx = 0; | ||
2150 | bio->bi_size = LOGPSIZE; | 2148 | bio->bi_size = LOGPSIZE; |
2151 | 2149 | ||
2152 | bio->bi_end_io = lbmIODone; | 2150 | bio->bi_end_io = lbmIODone; |
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index e784a217b500..550475ca6a0e 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c | |||
@@ -32,7 +32,6 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw) | |||
32 | bio_vec.bv_len = PAGE_SIZE; | 32 | bio_vec.bv_len = PAGE_SIZE; |
33 | bio_vec.bv_offset = 0; | 33 | bio_vec.bv_offset = 0; |
34 | bio.bi_vcnt = 1; | 34 | bio.bi_vcnt = 1; |
35 | bio.bi_idx = 0; | ||
36 | bio.bi_size = PAGE_SIZE; | 35 | bio.bi_size = PAGE_SIZE; |
37 | bio.bi_bdev = bdev; | 36 | bio.bi_bdev = bdev; |
38 | bio.bi_sector = page->index * (PAGE_SIZE >> 9); | 37 | bio.bi_sector = page->index * (PAGE_SIZE >> 9); |
@@ -108,7 +107,6 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, | |||
108 | if (i >= max_pages) { | 107 | if (i >= max_pages) { |
109 | /* Block layer cannot split bios :( */ | 108 | /* Block layer cannot split bios :( */ |
110 | bio->bi_vcnt = i; | 109 | bio->bi_vcnt = i; |
111 | bio->bi_idx = 0; | ||
112 | bio->bi_size = i * PAGE_SIZE; | 110 | bio->bi_size = i * PAGE_SIZE; |
113 | bio->bi_bdev = super->s_bdev; | 111 | bio->bi_bdev = super->s_bdev; |
114 | bio->bi_sector = ofs >> 9; | 112 | bio->bi_sector = ofs >> 9; |
@@ -136,7 +134,6 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, | |||
136 | unlock_page(page); | 134 | unlock_page(page); |
137 | } | 135 | } |
138 | bio->bi_vcnt = nr_pages; | 136 | bio->bi_vcnt = nr_pages; |
139 | bio->bi_idx = 0; | ||
140 | bio->bi_size = nr_pages * PAGE_SIZE; | 137 | bio->bi_size = nr_pages * PAGE_SIZE; |
141 | bio->bi_bdev = super->s_bdev; | 138 | bio->bi_bdev = super->s_bdev; |
142 | bio->bi_sector = ofs >> 9; | 139 | bio->bi_sector = ofs >> 9; |
@@ -202,7 +199,6 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, | |||
202 | if (i >= max_pages) { | 199 | if (i >= max_pages) { |
203 | /* Block layer cannot split bios :( */ | 200 | /* Block layer cannot split bios :( */ |
204 | bio->bi_vcnt = i; | 201 | bio->bi_vcnt = i; |
205 | bio->bi_idx = 0; | ||
206 | bio->bi_size = i * PAGE_SIZE; | 202 | bio->bi_size = i * PAGE_SIZE; |
207 | bio->bi_bdev = super->s_bdev; | 203 | bio->bi_bdev = super->s_bdev; |
208 | bio->bi_sector = ofs >> 9; | 204 | bio->bi_sector = ofs >> 9; |
@@ -224,7 +220,6 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, | |||
224 | bio->bi_io_vec[i].bv_offset = 0; | 220 | bio->bi_io_vec[i].bv_offset = 0; |
225 | } | 221 | } |
226 | bio->bi_vcnt = nr_pages; | 222 | bio->bi_vcnt = nr_pages; |
227 | bio->bi_idx = 0; | ||
228 | bio->bi_size = nr_pages * PAGE_SIZE; | 223 | bio->bi_size = nr_pages * PAGE_SIZE; |
229 | bio->bi_bdev = super->s_bdev; | 224 | bio->bi_bdev = super->s_bdev; |
230 | bio->bi_sector = ofs >> 9; | 225 | bio->bi_sector = ofs >> 9; |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 350459910fe1..c3881553f7d1 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/writeback.h> | 18 | #include <linux/writeback.h> |
19 | #include <linux/atomic.h> | 19 | #include <linux/atomic.h> |
20 | #include <linux/sysctl.h> | 20 | #include <linux/sysctl.h> |
21 | #include <linux/workqueue.h> | ||
21 | 22 | ||
22 | struct page; | 23 | struct page; |
23 | struct device; | 24 | struct device; |
@@ -27,7 +28,6 @@ struct dentry; | |||
27 | * Bits in backing_dev_info.state | 28 | * Bits in backing_dev_info.state |
28 | */ | 29 | */ |
29 | enum bdi_state { | 30 | enum bdi_state { |
30 | BDI_pending, /* On its way to being activated */ | ||
31 | BDI_wb_alloc, /* Default embedded wb allocated */ | 31 | BDI_wb_alloc, /* Default embedded wb allocated */ |
32 | BDI_async_congested, /* The async (write) queue is getting full */ | 32 | BDI_async_congested, /* The async (write) queue is getting full */ |
33 | BDI_sync_congested, /* The sync queue is getting full */ | 33 | BDI_sync_congested, /* The sync queue is getting full */ |
@@ -53,10 +53,8 @@ struct bdi_writeback { | |||
53 | unsigned int nr; | 53 | unsigned int nr; |
54 | 54 | ||
55 | unsigned long last_old_flush; /* last old data flush */ | 55 | unsigned long last_old_flush; /* last old data flush */ |
56 | unsigned long last_active; /* last time bdi thread was active */ | ||
57 | 56 | ||
58 | struct task_struct *task; /* writeback thread */ | 57 | struct delayed_work dwork; /* work item used for writeback */ |
59 | struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */ | ||
60 | struct list_head b_dirty; /* dirty inodes */ | 58 | struct list_head b_dirty; /* dirty inodes */ |
61 | struct list_head b_io; /* parked for writeback */ | 59 | struct list_head b_io; /* parked for writeback */ |
62 | struct list_head b_more_io; /* parked for more writeback */ | 60 | struct list_head b_more_io; /* parked for more writeback */ |
@@ -123,14 +121,15 @@ int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); | |||
123 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | 121 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, |
124 | enum wb_reason reason); | 122 | enum wb_reason reason); |
125 | void bdi_start_background_writeback(struct backing_dev_info *bdi); | 123 | void bdi_start_background_writeback(struct backing_dev_info *bdi); |
126 | int bdi_writeback_thread(void *data); | 124 | void bdi_writeback_workfn(struct work_struct *work); |
127 | int bdi_has_dirty_io(struct backing_dev_info *bdi); | 125 | int bdi_has_dirty_io(struct backing_dev_info *bdi); |
128 | void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); | 126 | void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); |
129 | void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); | 127 | void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); |
130 | 128 | ||
131 | extern spinlock_t bdi_lock; | 129 | extern spinlock_t bdi_lock; |
132 | extern struct list_head bdi_list; | 130 | extern struct list_head bdi_list; |
133 | extern struct list_head bdi_pending_list; | 131 | |
132 | extern struct workqueue_struct *bdi_wq; | ||
134 | 133 | ||
135 | static inline int wb_has_dirty_io(struct bdi_writeback *wb) | 134 | static inline int wb_has_dirty_io(struct bdi_writeback *wb) |
136 | { | 135 | { |
@@ -336,11 +335,6 @@ static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi) | |||
336 | return bdi->capabilities & BDI_CAP_SWAP_BACKED; | 335 | return bdi->capabilities & BDI_CAP_SWAP_BACKED; |
337 | } | 336 | } |
338 | 337 | ||
339 | static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi) | ||
340 | { | ||
341 | return bdi == &default_backing_dev_info; | ||
342 | } | ||
343 | |||
344 | static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) | 338 | static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) |
345 | { | 339 | { |
346 | return bdi_cap_writeback_dirty(mapping->backing_dev_info); | 340 | return bdi_cap_writeback_dirty(mapping->backing_dev_info); |
diff --git a/include/linux/bio.h b/include/linux/bio.h index 820e7aaad4fd..ef24466d8f82 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
@@ -67,6 +67,7 @@ | |||
67 | #define bio_offset(bio) bio_iovec((bio))->bv_offset | 67 | #define bio_offset(bio) bio_iovec((bio))->bv_offset |
68 | #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) | 68 | #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) |
69 | #define bio_sectors(bio) ((bio)->bi_size >> 9) | 69 | #define bio_sectors(bio) ((bio)->bi_size >> 9) |
70 | #define bio_end_sector(bio) ((bio)->bi_sector + bio_sectors((bio))) | ||
70 | 71 | ||
71 | static inline unsigned int bio_cur_bytes(struct bio *bio) | 72 | static inline unsigned int bio_cur_bytes(struct bio *bio) |
72 | { | 73 | { |
@@ -84,11 +85,6 @@ static inline void *bio_data(struct bio *bio) | |||
84 | return NULL; | 85 | return NULL; |
85 | } | 86 | } |
86 | 87 | ||
87 | static inline int bio_has_allocated_vec(struct bio *bio) | ||
88 | { | ||
89 | return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs; | ||
90 | } | ||
91 | |||
92 | /* | 88 | /* |
93 | * will die | 89 | * will die |
94 | */ | 90 | */ |
@@ -136,16 +132,27 @@ static inline int bio_has_allocated_vec(struct bio *bio) | |||
136 | #define bio_io_error(bio) bio_endio((bio), -EIO) | 132 | #define bio_io_error(bio) bio_endio((bio), -EIO) |
137 | 133 | ||
138 | /* | 134 | /* |
139 | * drivers should not use the __ version unless they _really_ want to | 135 | * drivers should not use the __ version unless they _really_ know what |
140 | * run through the entire bio and not just pending pieces | 136 | * they're doing |
141 | */ | 137 | */ |
142 | #define __bio_for_each_segment(bvl, bio, i, start_idx) \ | 138 | #define __bio_for_each_segment(bvl, bio, i, start_idx) \ |
143 | for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \ | 139 | for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \ |
144 | i < (bio)->bi_vcnt; \ | 140 | i < (bio)->bi_vcnt; \ |
145 | bvl++, i++) | 141 | bvl++, i++) |
146 | 142 | ||
143 | /* | ||
144 | * drivers should _never_ use the all version - the bio may have been split | ||
145 | * before it got to the driver and the driver won't own all of it | ||
146 | */ | ||
147 | #define bio_for_each_segment_all(bvl, bio, i) \ | ||
148 | for (i = 0; \ | ||
149 | bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ | ||
150 | i++) | ||
151 | |||
147 | #define bio_for_each_segment(bvl, bio, i) \ | 152 | #define bio_for_each_segment(bvl, bio, i) \ |
148 | __bio_for_each_segment(bvl, bio, i, (bio)->bi_idx) | 153 | for (i = (bio)->bi_idx; \ |
154 | bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ | ||
155 | i++) | ||
149 | 156 | ||
150 | /* | 157 | /* |
151 | * get a reference to a bio, so it won't disappear. the intended use is | 158 | * get a reference to a bio, so it won't disappear. the intended use is |
@@ -180,9 +187,12 @@ struct bio_integrity_payload { | |||
180 | unsigned short bip_slab; /* slab the bip came from */ | 187 | unsigned short bip_slab; /* slab the bip came from */ |
181 | unsigned short bip_vcnt; /* # of integrity bio_vecs */ | 188 | unsigned short bip_vcnt; /* # of integrity bio_vecs */ |
182 | unsigned short bip_idx; /* current bip_vec index */ | 189 | unsigned short bip_idx; /* current bip_vec index */ |
190 | unsigned bip_owns_buf:1; /* should free bip_buf */ | ||
183 | 191 | ||
184 | struct work_struct bip_work; /* I/O completion */ | 192 | struct work_struct bip_work; /* I/O completion */ |
185 | struct bio_vec bip_vec[0]; /* embedded bvec array */ | 193 | |
194 | struct bio_vec *bip_vec; | ||
195 | struct bio_vec bip_inline_vecs[0];/* embedded bvec array */ | ||
186 | }; | 196 | }; |
187 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ | 197 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ |
188 | 198 | ||
@@ -211,6 +221,7 @@ extern void bio_pair_release(struct bio_pair *dbio); | |||
211 | 221 | ||
212 | extern struct bio_set *bioset_create(unsigned int, unsigned int); | 222 | extern struct bio_set *bioset_create(unsigned int, unsigned int); |
213 | extern void bioset_free(struct bio_set *); | 223 | extern void bioset_free(struct bio_set *); |
224 | extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries); | ||
214 | 225 | ||
215 | extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); | 226 | extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); |
216 | extern void bio_put(struct bio *); | 227 | extern void bio_put(struct bio *); |
@@ -245,6 +256,9 @@ extern void bio_endio(struct bio *, int); | |||
245 | struct request_queue; | 256 | struct request_queue; |
246 | extern int bio_phys_segments(struct request_queue *, struct bio *); | 257 | extern int bio_phys_segments(struct request_queue *, struct bio *); |
247 | 258 | ||
259 | extern int submit_bio_wait(int rw, struct bio *bio); | ||
260 | extern void bio_advance(struct bio *, unsigned); | ||
261 | |||
248 | extern void bio_init(struct bio *); | 262 | extern void bio_init(struct bio *); |
249 | extern void bio_reset(struct bio *); | 263 | extern void bio_reset(struct bio *); |
250 | 264 | ||
@@ -279,6 +293,9 @@ static inline void bio_flush_dcache_pages(struct bio *bi) | |||
279 | } | 293 | } |
280 | #endif | 294 | #endif |
281 | 295 | ||
296 | extern void bio_copy_data(struct bio *dst, struct bio *src); | ||
297 | extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); | ||
298 | |||
282 | extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, | 299 | extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, |
283 | unsigned long, unsigned int, int, gfp_t); | 300 | unsigned long, unsigned int, int, gfp_t); |
284 | extern struct bio *bio_copy_user_iov(struct request_queue *, | 301 | extern struct bio *bio_copy_user_iov(struct request_queue *, |
@@ -286,8 +303,8 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, | |||
286 | int, int, gfp_t); | 303 | int, int, gfp_t); |
287 | extern int bio_uncopy_user(struct bio *); | 304 | extern int bio_uncopy_user(struct bio *); |
288 | void zero_fill_bio(struct bio *bio); | 305 | void zero_fill_bio(struct bio *bio); |
289 | extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); | 306 | extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); |
290 | extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int); | 307 | extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); |
291 | extern unsigned int bvec_nr_vecs(unsigned short idx); | 308 | extern unsigned int bvec_nr_vecs(unsigned short idx); |
292 | 309 | ||
293 | #ifdef CONFIG_BLK_CGROUP | 310 | #ifdef CONFIG_BLK_CGROUP |
@@ -298,39 +315,6 @@ static inline int bio_associate_current(struct bio *bio) { return -ENOENT; } | |||
298 | static inline void bio_disassociate_task(struct bio *bio) { } | 315 | static inline void bio_disassociate_task(struct bio *bio) { } |
299 | #endif /* CONFIG_BLK_CGROUP */ | 316 | #endif /* CONFIG_BLK_CGROUP */ |
300 | 317 | ||
301 | /* | ||
302 | * bio_set is used to allow other portions of the IO system to | ||
303 | * allocate their own private memory pools for bio and iovec structures. | ||
304 | * These memory pools in turn all allocate from the bio_slab | ||
305 | * and the bvec_slabs[]. | ||
306 | */ | ||
307 | #define BIO_POOL_SIZE 2 | ||
308 | #define BIOVEC_NR_POOLS 6 | ||
309 | #define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1) | ||
310 | |||
311 | struct bio_set { | ||
312 | struct kmem_cache *bio_slab; | ||
313 | unsigned int front_pad; | ||
314 | |||
315 | mempool_t *bio_pool; | ||
316 | #if defined(CONFIG_BLK_DEV_INTEGRITY) | ||
317 | mempool_t *bio_integrity_pool; | ||
318 | #endif | ||
319 | mempool_t *bvec_pool; | ||
320 | }; | ||
321 | |||
322 | struct biovec_slab { | ||
323 | int nr_vecs; | ||
324 | char *name; | ||
325 | struct kmem_cache *slab; | ||
326 | }; | ||
327 | |||
328 | /* | ||
329 | * a small number of entries is fine, not going to be performance critical. | ||
330 | * basically we just need to survive | ||
331 | */ | ||
332 | #define BIO_SPLIT_ENTRIES 2 | ||
333 | |||
334 | #ifdef CONFIG_HIGHMEM | 318 | #ifdef CONFIG_HIGHMEM |
335 | /* | 319 | /* |
336 | * remember never ever reenable interrupts between a bvec_kmap_irq and | 320 | * remember never ever reenable interrupts between a bvec_kmap_irq and |
@@ -527,6 +511,49 @@ static inline struct bio *bio_list_get(struct bio_list *bl) | |||
527 | return bio; | 511 | return bio; |
528 | } | 512 | } |
529 | 513 | ||
514 | /* | ||
515 | * bio_set is used to allow other portions of the IO system to | ||
516 | * allocate their own private memory pools for bio and iovec structures. | ||
517 | * These memory pools in turn all allocate from the bio_slab | ||
518 | * and the bvec_slabs[]. | ||
519 | */ | ||
520 | #define BIO_POOL_SIZE 2 | ||
521 | #define BIOVEC_NR_POOLS 6 | ||
522 | #define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1) | ||
523 | |||
524 | struct bio_set { | ||
525 | struct kmem_cache *bio_slab; | ||
526 | unsigned int front_pad; | ||
527 | |||
528 | mempool_t *bio_pool; | ||
529 | mempool_t *bvec_pool; | ||
530 | #if defined(CONFIG_BLK_DEV_INTEGRITY) | ||
531 | mempool_t *bio_integrity_pool; | ||
532 | mempool_t *bvec_integrity_pool; | ||
533 | #endif | ||
534 | |||
535 | /* | ||
536 | * Deadlock avoidance for stacking block drivers: see comments in | ||
537 | * bio_alloc_bioset() for details | ||
538 | */ | ||
539 | spinlock_t rescue_lock; | ||
540 | struct bio_list rescue_list; | ||
541 | struct work_struct rescue_work; | ||
542 | struct workqueue_struct *rescue_workqueue; | ||
543 | }; | ||
544 | |||
545 | struct biovec_slab { | ||
546 | int nr_vecs; | ||
547 | char *name; | ||
548 | struct kmem_cache *slab; | ||
549 | }; | ||
550 | |||
551 | /* | ||
552 | * a small number of entries is fine, not going to be performance critical. | ||
553 | * basically we just need to survive | ||
554 | */ | ||
555 | #define BIO_SPLIT_ENTRIES 2 | ||
556 | |||
530 | #if defined(CONFIG_BLK_DEV_INTEGRITY) | 557 | #if defined(CONFIG_BLK_DEV_INTEGRITY) |
531 | 558 | ||
532 | #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) | 559 | #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) |
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 22990cf4439d..fa1abeb45b76 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -118,6 +118,7 @@ struct bio { | |||
118 | * BIO_POOL_IDX() | 118 | * BIO_POOL_IDX() |
119 | */ | 119 | */ |
120 | #define BIO_RESET_BITS 13 | 120 | #define BIO_RESET_BITS 13 |
121 | #define BIO_OWNS_VEC 13 /* bio_free() should free bvec */ | ||
121 | 122 | ||
122 | #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) | 123 | #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) |
123 | 124 | ||
@@ -176,6 +177,7 @@ enum rq_flag_bits { | |||
176 | __REQ_IO_STAT, /* account I/O stat */ | 177 | __REQ_IO_STAT, /* account I/O stat */ |
177 | __REQ_MIXED_MERGE, /* merge of different types, fail separately */ | 178 | __REQ_MIXED_MERGE, /* merge of different types, fail separately */ |
178 | __REQ_KERNEL, /* direct IO to kernel pages */ | 179 | __REQ_KERNEL, /* direct IO to kernel pages */ |
180 | __REQ_PM, /* runtime pm request */ | ||
179 | __REQ_NR_BITS, /* stops here */ | 181 | __REQ_NR_BITS, /* stops here */ |
180 | }; | 182 | }; |
181 | 183 | ||
@@ -198,6 +200,8 @@ enum rq_flag_bits { | |||
198 | REQ_SECURE) | 200 | REQ_SECURE) |
199 | #define REQ_CLONE_MASK REQ_COMMON_MASK | 201 | #define REQ_CLONE_MASK REQ_COMMON_MASK |
200 | 202 | ||
203 | #define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME) | ||
204 | |||
201 | /* This mask is used for both bio and request merge checking */ | 205 | /* This mask is used for both bio and request merge checking */ |
202 | #define REQ_NOMERGE_FLAGS \ | 206 | #define REQ_NOMERGE_FLAGS \ |
203 | (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) | 207 | (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) |
@@ -224,5 +228,6 @@ enum rq_flag_bits { | |||
224 | #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) | 228 | #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) |
225 | #define REQ_SECURE (1 << __REQ_SECURE) | 229 | #define REQ_SECURE (1 << __REQ_SECURE) |
226 | #define REQ_KERNEL (1 << __REQ_KERNEL) | 230 | #define REQ_KERNEL (1 << __REQ_KERNEL) |
231 | #define REQ_PM (1 << __REQ_PM) | ||
227 | 232 | ||
228 | #endif /* __LINUX_BLK_TYPES_H */ | 233 | #endif /* __LINUX_BLK_TYPES_H */ |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e38cfe77f7f0..2fdb4a451b49 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -361,6 +361,12 @@ struct request_queue { | |||
361 | */ | 361 | */ |
362 | struct kobject kobj; | 362 | struct kobject kobj; |
363 | 363 | ||
364 | #ifdef CONFIG_PM_RUNTIME | ||
365 | struct device *dev; | ||
366 | int rpm_status; | ||
367 | unsigned int nr_pending; | ||
368 | #endif | ||
369 | |||
364 | /* | 370 | /* |
365 | * queue settings | 371 | * queue settings |
366 | */ | 372 | */ |
@@ -838,7 +844,7 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, | |||
838 | unsigned int cmd_flags) | 844 | unsigned int cmd_flags) |
839 | { | 845 | { |
840 | if (unlikely(cmd_flags & REQ_DISCARD)) | 846 | if (unlikely(cmd_flags & REQ_DISCARD)) |
841 | return q->limits.max_discard_sectors; | 847 | return min(q->limits.max_discard_sectors, UINT_MAX >> 9); |
842 | 848 | ||
843 | if (unlikely(cmd_flags & REQ_WRITE_SAME)) | 849 | if (unlikely(cmd_flags & REQ_WRITE_SAME)) |
844 | return q->limits.max_write_same_sectors; | 850 | return q->limits.max_write_same_sectors; |
@@ -961,6 +967,27 @@ struct request_queue *blk_alloc_queue_node(gfp_t, int); | |||
961 | extern void blk_put_queue(struct request_queue *); | 967 | extern void blk_put_queue(struct request_queue *); |
962 | 968 | ||
963 | /* | 969 | /* |
970 | * block layer runtime pm functions | ||
971 | */ | ||
972 | #ifdef CONFIG_PM_RUNTIME | ||
973 | extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); | ||
974 | extern int blk_pre_runtime_suspend(struct request_queue *q); | ||
975 | extern void blk_post_runtime_suspend(struct request_queue *q, int err); | ||
976 | extern void blk_pre_runtime_resume(struct request_queue *q); | ||
977 | extern void blk_post_runtime_resume(struct request_queue *q, int err); | ||
978 | #else | ||
979 | static inline void blk_pm_runtime_init(struct request_queue *q, | ||
980 | struct device *dev) {} | ||
981 | static inline int blk_pre_runtime_suspend(struct request_queue *q) | ||
982 | { | ||
983 | return -ENOSYS; | ||
984 | } | ||
985 | static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {} | ||
986 | static inline void blk_pre_runtime_resume(struct request_queue *q) {} | ||
987 | static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} | ||
988 | #endif | ||
989 | |||
990 | /* | ||
964 | * blk_plug permits building a queue of related requests by holding the I/O | 991 | * blk_plug permits building a queue of related requests by holding the I/O |
965 | * fragments for a short period. This allows merging of sequential requests | 992 | * fragments for a short period. This allows merging of sequential requests |
966 | * into single larger request. As the requests are moved from a per-task list to | 993 | * into single larger request. As the requests are moved from a per-task list to |
diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 9c1467357b03..60ae7c3db912 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h | |||
@@ -244,7 +244,7 @@ TRACE_EVENT(block_bio_bounce, | |||
244 | __entry->dev = bio->bi_bdev ? | 244 | __entry->dev = bio->bi_bdev ? |
245 | bio->bi_bdev->bd_dev : 0; | 245 | bio->bi_bdev->bd_dev : 0; |
246 | __entry->sector = bio->bi_sector; | 246 | __entry->sector = bio->bi_sector; |
247 | __entry->nr_sector = bio->bi_size >> 9; | 247 | __entry->nr_sector = bio_sectors(bio); |
248 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); | 248 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); |
249 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); | 249 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); |
250 | ), | 250 | ), |
@@ -281,7 +281,7 @@ TRACE_EVENT(block_bio_complete, | |||
281 | TP_fast_assign( | 281 | TP_fast_assign( |
282 | __entry->dev = bio->bi_bdev->bd_dev; | 282 | __entry->dev = bio->bi_bdev->bd_dev; |
283 | __entry->sector = bio->bi_sector; | 283 | __entry->sector = bio->bi_sector; |
284 | __entry->nr_sector = bio->bi_size >> 9; | 284 | __entry->nr_sector = bio_sectors(bio); |
285 | __entry->error = error; | 285 | __entry->error = error; |
286 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); | 286 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); |
287 | ), | 287 | ), |
@@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(block_bio_merge, | |||
309 | TP_fast_assign( | 309 | TP_fast_assign( |
310 | __entry->dev = bio->bi_bdev->bd_dev; | 310 | __entry->dev = bio->bi_bdev->bd_dev; |
311 | __entry->sector = bio->bi_sector; | 311 | __entry->sector = bio->bi_sector; |
312 | __entry->nr_sector = bio->bi_size >> 9; | 312 | __entry->nr_sector = bio_sectors(bio); |
313 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); | 313 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); |
314 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); | 314 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); |
315 | ), | 315 | ), |
@@ -376,7 +376,7 @@ TRACE_EVENT(block_bio_queue, | |||
376 | TP_fast_assign( | 376 | TP_fast_assign( |
377 | __entry->dev = bio->bi_bdev->bd_dev; | 377 | __entry->dev = bio->bi_bdev->bd_dev; |
378 | __entry->sector = bio->bi_sector; | 378 | __entry->sector = bio->bi_sector; |
379 | __entry->nr_sector = bio->bi_size >> 9; | 379 | __entry->nr_sector = bio_sectors(bio); |
380 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); | 380 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); |
381 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); | 381 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); |
382 | ), | 382 | ), |
@@ -404,7 +404,7 @@ DECLARE_EVENT_CLASS(block_get_rq, | |||
404 | TP_fast_assign( | 404 | TP_fast_assign( |
405 | __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; | 405 | __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; |
406 | __entry->sector = bio ? bio->bi_sector : 0; | 406 | __entry->sector = bio ? bio->bi_sector : 0; |
407 | __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; | 407 | __entry->nr_sector = bio ? bio_sectors(bio) : 0; |
408 | blk_fill_rwbs(__entry->rwbs, | 408 | blk_fill_rwbs(__entry->rwbs, |
409 | bio ? bio->bi_rw : 0, __entry->nr_sector); | 409 | bio ? bio->bi_rw : 0, __entry->nr_sector); |
410 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); | 410 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); |
@@ -580,7 +580,7 @@ TRACE_EVENT(block_bio_remap, | |||
580 | TP_fast_assign( | 580 | TP_fast_assign( |
581 | __entry->dev = bio->bi_bdev->bd_dev; | 581 | __entry->dev = bio->bi_bdev->bd_dev; |
582 | __entry->sector = bio->bi_sector; | 582 | __entry->sector = bio->bi_sector; |
583 | __entry->nr_sector = bio->bi_size >> 9; | 583 | __entry->nr_sector = bio_sectors(bio); |
584 | __entry->old_dev = dev; | 584 | __entry->old_dev = dev; |
585 | __entry->old_sector = from; | 585 | __entry->old_sector = from; |
586 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); | 586 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); |
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 6a16fd2e70ed..464ea82e10db 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h | |||
@@ -183,7 +183,6 @@ DECLARE_EVENT_CLASS(writeback_work_class, | |||
183 | DEFINE_EVENT(writeback_work_class, name, \ | 183 | DEFINE_EVENT(writeback_work_class, name, \ |
184 | TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \ | 184 | TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \ |
185 | TP_ARGS(bdi, work)) | 185 | TP_ARGS(bdi, work)) |
186 | DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread); | ||
187 | DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); | 186 | DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); |
188 | DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); | 187 | DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); |
189 | DEFINE_WRITEBACK_WORK_EVENT(writeback_start); | 188 | DEFINE_WRITEBACK_WORK_EVENT(writeback_start); |
@@ -222,12 +221,8 @@ DEFINE_EVENT(writeback_class, name, \ | |||
222 | 221 | ||
223 | DEFINE_WRITEBACK_EVENT(writeback_nowork); | 222 | DEFINE_WRITEBACK_EVENT(writeback_nowork); |
224 | DEFINE_WRITEBACK_EVENT(writeback_wake_background); | 223 | DEFINE_WRITEBACK_EVENT(writeback_wake_background); |
225 | DEFINE_WRITEBACK_EVENT(writeback_wake_thread); | ||
226 | DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread); | ||
227 | DEFINE_WRITEBACK_EVENT(writeback_bdi_register); | 224 | DEFINE_WRITEBACK_EVENT(writeback_bdi_register); |
228 | DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); | 225 | DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); |
229 | DEFINE_WRITEBACK_EVENT(writeback_thread_start); | ||
230 | DEFINE_WRITEBACK_EVENT(writeback_thread_stop); | ||
231 | 226 | ||
232 | DECLARE_EVENT_CLASS(wbc_class, | 227 | DECLARE_EVENT_CLASS(wbc_class, |
233 | TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), | 228 | TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), |
diff --git a/kernel/relay.c b/kernel/relay.c index eef0d113b79e..b91488ba2e5a 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -234,7 +234,6 @@ static void relay_destroy_buf(struct rchan_buf *buf) | |||
234 | static void relay_remove_buf(struct kref *kref) | 234 | static void relay_remove_buf(struct kref *kref) |
235 | { | 235 | { |
236 | struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); | 236 | struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); |
237 | buf->chan->cb->remove_buf_file(buf->dentry); | ||
238 | relay_destroy_buf(buf); | 237 | relay_destroy_buf(buf); |
239 | } | 238 | } |
240 | 239 | ||
@@ -484,6 +483,7 @@ static void relay_close_buf(struct rchan_buf *buf) | |||
484 | { | 483 | { |
485 | buf->finalized = 1; | 484 | buf->finalized = 1; |
486 | del_timer_sync(&buf->timer); | 485 | del_timer_sync(&buf->timer); |
486 | buf->chan->cb->remove_buf_file(buf->dentry); | ||
487 | kref_put(&buf->kref, relay_remove_buf); | 487 | kref_put(&buf->kref, relay_remove_buf); |
488 | } | 488 | } |
489 | 489 | ||
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 41733c5dc820..502517492258 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -31,13 +31,14 @@ EXPORT_SYMBOL_GPL(noop_backing_dev_info); | |||
31 | static struct class *bdi_class; | 31 | static struct class *bdi_class; |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as | 34 | * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side |
35 | * reader side protection for bdi_pending_list. bdi_list has RCU reader side | ||
36 | * locking. | 35 | * locking. |
37 | */ | 36 | */ |
38 | DEFINE_SPINLOCK(bdi_lock); | 37 | DEFINE_SPINLOCK(bdi_lock); |
39 | LIST_HEAD(bdi_list); | 38 | LIST_HEAD(bdi_list); |
40 | LIST_HEAD(bdi_pending_list); | 39 | |
40 | /* bdi_wq serves all asynchronous writeback tasks */ | ||
41 | struct workqueue_struct *bdi_wq; | ||
41 | 42 | ||
42 | void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) | 43 | void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) |
43 | { | 44 | { |
@@ -257,6 +258,11 @@ static int __init default_bdi_init(void) | |||
257 | { | 258 | { |
258 | int err; | 259 | int err; |
259 | 260 | ||
261 | bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE | | ||
262 | WQ_UNBOUND | WQ_SYSFS, 0); | ||
263 | if (!bdi_wq) | ||
264 | return -ENOMEM; | ||
265 | |||
260 | err = bdi_init(&default_backing_dev_info); | 266 | err = bdi_init(&default_backing_dev_info); |
261 | if (!err) | 267 | if (!err) |
262 | bdi_register(&default_backing_dev_info, NULL, "default"); | 268 | bdi_register(&default_backing_dev_info, NULL, "default"); |
@@ -271,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) | |||
271 | return wb_has_dirty_io(&bdi->wb); | 277 | return wb_has_dirty_io(&bdi->wb); |
272 | } | 278 | } |
273 | 279 | ||
274 | static void wakeup_timer_fn(unsigned long data) | ||
275 | { | ||
276 | struct backing_dev_info *bdi = (struct backing_dev_info *)data; | ||
277 | |||
278 | spin_lock_bh(&bdi->wb_lock); | ||
279 | if (bdi->wb.task) { | ||
280 | trace_writeback_wake_thread(bdi); | ||
281 | wake_up_process(bdi->wb.task); | ||
282 | } else if (bdi->dev) { | ||
283 | /* | ||
284 | * When bdi tasks are inactive for long time, they are killed. | ||
285 | * In this case we have to wake-up the forker thread which | ||
286 | * should create and run the bdi thread. | ||
287 | */ | ||
288 | trace_writeback_wake_forker_thread(bdi); | ||
289 | wake_up_process(default_backing_dev_info.wb.task); | ||
290 | } | ||
291 | spin_unlock_bh(&bdi->wb_lock); | ||
292 | } | ||
293 | |||
294 | /* | 280 | /* |
295 | * This function is used when the first inode for this bdi is marked dirty. It | 281 | * This function is used when the first inode for this bdi is marked dirty. It |
296 | * wakes-up the corresponding bdi thread which should then take care of the | 282 | * wakes-up the corresponding bdi thread which should then take care of the |
@@ -307,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi) | |||
307 | unsigned long timeout; | 293 | unsigned long timeout; |
308 | 294 | ||
309 | timeout = msecs_to_jiffies(dirty_writeback_interval * 10); | 295 | timeout = msecs_to_jiffies(dirty_writeback_interval * 10); |
310 | mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout); | 296 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout); |
311 | } | ||
312 | |||
313 | /* | ||
314 | * Calculate the longest interval (jiffies) bdi threads are allowed to be | ||
315 | * inactive. | ||
316 | */ | ||
317 | static unsigned long bdi_longest_inactive(void) | ||
318 | { | ||
319 | unsigned long interval; | ||
320 | |||
321 | interval = msecs_to_jiffies(dirty_writeback_interval * 10); | ||
322 | return max(5UL * 60 * HZ, interval); | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Clear pending bit and wakeup anybody waiting for flusher thread creation or | ||
327 | * shutdown | ||
328 | */ | ||
329 | static void bdi_clear_pending(struct backing_dev_info *bdi) | ||
330 | { | ||
331 | clear_bit(BDI_pending, &bdi->state); | ||
332 | smp_mb__after_clear_bit(); | ||
333 | wake_up_bit(&bdi->state, BDI_pending); | ||
334 | } | ||
335 | |||
336 | static int bdi_forker_thread(void *ptr) | ||
337 | { | ||
338 | struct bdi_writeback *me = ptr; | ||
339 | |||
340 | current->flags |= PF_SWAPWRITE; | ||
341 | set_freezable(); | ||
342 | |||
343 | /* | ||
344 | * Our parent may run at a different priority, just set us to normal | ||
345 | */ | ||
346 | set_user_nice(current, 0); | ||
347 | |||
348 | for (;;) { | ||
349 | struct task_struct *task = NULL; | ||
350 | struct backing_dev_info *bdi; | ||
351 | enum { | ||
352 | NO_ACTION, /* Nothing to do */ | ||
353 | FORK_THREAD, /* Fork bdi thread */ | ||
354 | KILL_THREAD, /* Kill inactive bdi thread */ | ||
355 | } action = NO_ACTION; | ||
356 | |||
357 | /* | ||
358 | * Temporary measure, we want to make sure we don't see | ||
359 | * dirty data on the default backing_dev_info | ||
360 | */ | ||
361 | if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) { | ||
362 | del_timer(&me->wakeup_timer); | ||
363 | wb_do_writeback(me, 0); | ||
364 | } | ||
365 | |||
366 | spin_lock_bh(&bdi_lock); | ||
367 | /* | ||
368 | * In the following loop we are going to check whether we have | ||
369 | * some work to do without any synchronization with tasks | ||
370 | * waking us up to do work for them. Set the task state here | ||
371 | * so that we don't miss wakeups after verifying conditions. | ||
372 | */ | ||
373 | set_current_state(TASK_INTERRUPTIBLE); | ||
374 | |||
375 | list_for_each_entry(bdi, &bdi_list, bdi_list) { | ||
376 | bool have_dirty_io; | ||
377 | |||
378 | if (!bdi_cap_writeback_dirty(bdi) || | ||
379 | bdi_cap_flush_forker(bdi)) | ||
380 | continue; | ||
381 | |||
382 | WARN(!test_bit(BDI_registered, &bdi->state), | ||
383 | "bdi %p/%s is not registered!\n", bdi, bdi->name); | ||
384 | |||
385 | have_dirty_io = !list_empty(&bdi->work_list) || | ||
386 | wb_has_dirty_io(&bdi->wb); | ||
387 | |||
388 | /* | ||
389 | * If the bdi has work to do, but the thread does not | ||
390 | * exist - create it. | ||
391 | */ | ||
392 | if (!bdi->wb.task && have_dirty_io) { | ||
393 | /* | ||
394 | * Set the pending bit - if someone will try to | ||
395 | * unregister this bdi - it'll wait on this bit. | ||
396 | */ | ||
397 | set_bit(BDI_pending, &bdi->state); | ||
398 | action = FORK_THREAD; | ||
399 | break; | ||
400 | } | ||
401 | |||
402 | spin_lock(&bdi->wb_lock); | ||
403 | |||
404 | /* | ||
405 | * If there is no work to do and the bdi thread was | ||
406 | * inactive long enough - kill it. The wb_lock is taken | ||
407 | * to make sure no-one adds more work to this bdi and | ||
408 | * wakes the bdi thread up. | ||
409 | */ | ||
410 | if (bdi->wb.task && !have_dirty_io && | ||
411 | time_after(jiffies, bdi->wb.last_active + | ||
412 | bdi_longest_inactive())) { | ||
413 | task = bdi->wb.task; | ||
414 | bdi->wb.task = NULL; | ||
415 | spin_unlock(&bdi->wb_lock); | ||
416 | set_bit(BDI_pending, &bdi->state); | ||
417 | action = KILL_THREAD; | ||
418 | break; | ||
419 | } | ||
420 | spin_unlock(&bdi->wb_lock); | ||
421 | } | ||
422 | spin_unlock_bh(&bdi_lock); | ||
423 | |||
424 | /* Keep working if default bdi still has things to do */ | ||
425 | if (!list_empty(&me->bdi->work_list)) | ||
426 | __set_current_state(TASK_RUNNING); | ||
427 | |||
428 | switch (action) { | ||
429 | case FORK_THREAD: | ||
430 | __set_current_state(TASK_RUNNING); | ||
431 | task = kthread_create(bdi_writeback_thread, &bdi->wb, | ||
432 | "flush-%s", dev_name(bdi->dev)); | ||
433 | if (IS_ERR(task)) { | ||
434 | /* | ||
435 | * If thread creation fails, force writeout of | ||
436 | * the bdi from the thread. Hopefully 1024 is | ||
437 | * large enough for efficient IO. | ||
438 | */ | ||
439 | writeback_inodes_wb(&bdi->wb, 1024, | ||
440 | WB_REASON_FORKER_THREAD); | ||
441 | } else { | ||
442 | /* | ||
443 | * The spinlock makes sure we do not lose | ||
444 | * wake-ups when racing with 'bdi_queue_work()'. | ||
445 | * And as soon as the bdi thread is visible, we | ||
446 | * can start it. | ||
447 | */ | ||
448 | spin_lock_bh(&bdi->wb_lock); | ||
449 | bdi->wb.task = task; | ||
450 | spin_unlock_bh(&bdi->wb_lock); | ||
451 | wake_up_process(task); | ||
452 | } | ||
453 | bdi_clear_pending(bdi); | ||
454 | break; | ||
455 | |||
456 | case KILL_THREAD: | ||
457 | __set_current_state(TASK_RUNNING); | ||
458 | kthread_stop(task); | ||
459 | bdi_clear_pending(bdi); | ||
460 | break; | ||
461 | |||
462 | case NO_ACTION: | ||
463 | if (!wb_has_dirty_io(me) || !dirty_writeback_interval) | ||
464 | /* | ||
465 | * There are no dirty data. The only thing we | ||
466 | * should now care about is checking for | ||
467 | * inactive bdi threads and killing them. Thus, | ||
468 | * let's sleep for longer time, save energy and | ||
469 | * be friendly for battery-driven devices. | ||
470 | */ | ||
471 | schedule_timeout(bdi_longest_inactive()); | ||
472 | else | ||
473 | schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
474 | try_to_freeze(); | ||
475 | break; | ||
476 | } | ||
477 | } | ||
478 | |||
479 | return 0; | ||
480 | } | 297 | } |
481 | 298 | ||
482 | /* | 299 | /* |
@@ -489,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi) | |||
489 | spin_unlock_bh(&bdi_lock); | 306 | spin_unlock_bh(&bdi_lock); |
490 | 307 | ||
491 | synchronize_rcu_expedited(); | 308 | synchronize_rcu_expedited(); |
309 | |||
310 | /* bdi_list is now unused, clear it to mark @bdi dying */ | ||
311 | INIT_LIST_HEAD(&bdi->bdi_list); | ||
492 | } | 312 | } |
493 | 313 | ||
494 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, | 314 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, |
@@ -508,20 +328,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
508 | 328 | ||
509 | bdi->dev = dev; | 329 | bdi->dev = dev; |
510 | 330 | ||
511 | /* | ||
512 | * Just start the forker thread for our default backing_dev_info, | ||
513 | * and add other bdi's to the list. They will get a thread created | ||
514 | * on-demand when they need it. | ||
515 | */ | ||
516 | if (bdi_cap_flush_forker(bdi)) { | ||
517 | struct bdi_writeback *wb = &bdi->wb; | ||
518 | |||
519 | wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s", | ||
520 | dev_name(dev)); | ||
521 | if (IS_ERR(wb->task)) | ||
522 | return PTR_ERR(wb->task); | ||
523 | } | ||
524 | |||
525 | bdi_debug_register(bdi, dev_name(dev)); | 331 | bdi_debug_register(bdi, dev_name(dev)); |
526 | set_bit(BDI_registered, &bdi->state); | 332 | set_bit(BDI_registered, &bdi->state); |
527 | 333 | ||
@@ -545,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev); | |||
545 | */ | 351 | */ |
546 | static void bdi_wb_shutdown(struct backing_dev_info *bdi) | 352 | static void bdi_wb_shutdown(struct backing_dev_info *bdi) |
547 | { | 353 | { |
548 | struct task_struct *task; | ||
549 | |||
550 | if (!bdi_cap_writeback_dirty(bdi)) | 354 | if (!bdi_cap_writeback_dirty(bdi)) |
551 | return; | 355 | return; |
552 | 356 | ||
@@ -556,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) | |||
556 | bdi_remove_from_list(bdi); | 360 | bdi_remove_from_list(bdi); |
557 | 361 | ||
558 | /* | 362 | /* |
559 | * If setup is pending, wait for that to complete first | 363 | * Drain work list and shutdown the delayed_work. At this point, |
364 | * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi | ||
365 | * is dying and its work_list needs to be drained no matter what. | ||
560 | */ | 366 | */ |
561 | wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait, | 367 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); |
562 | TASK_UNINTERRUPTIBLE); | 368 | flush_delayed_work(&bdi->wb.dwork); |
369 | WARN_ON(!list_empty(&bdi->work_list)); | ||
563 | 370 | ||
564 | /* | 371 | /* |
565 | * Finally, kill the kernel thread. We don't need to be RCU | 372 | * This shouldn't be necessary unless @bdi for some reason has |
566 | * safe anymore, since the bdi is gone from visibility. | 373 | * unflushed dirty IO after work_list is drained. Do it anyway |
374 | * just in case. | ||
567 | */ | 375 | */ |
568 | spin_lock_bh(&bdi->wb_lock); | 376 | cancel_delayed_work_sync(&bdi->wb.dwork); |
569 | task = bdi->wb.task; | ||
570 | bdi->wb.task = NULL; | ||
571 | spin_unlock_bh(&bdi->wb_lock); | ||
572 | |||
573 | if (task) | ||
574 | kthread_stop(task); | ||
575 | } | 377 | } |
576 | 378 | ||
577 | /* | 379 | /* |
@@ -597,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi) | |||
597 | bdi_set_min_ratio(bdi, 0); | 399 | bdi_set_min_ratio(bdi, 0); |
598 | trace_writeback_bdi_unregister(bdi); | 400 | trace_writeback_bdi_unregister(bdi); |
599 | bdi_prune_sb(bdi); | 401 | bdi_prune_sb(bdi); |
600 | del_timer_sync(&bdi->wb.wakeup_timer); | ||
601 | 402 | ||
602 | if (!bdi_cap_flush_forker(bdi)) | 403 | bdi_wb_shutdown(bdi); |
603 | bdi_wb_shutdown(bdi); | ||
604 | bdi_debug_unregister(bdi); | 404 | bdi_debug_unregister(bdi); |
605 | 405 | ||
606 | spin_lock_bh(&bdi->wb_lock); | 406 | spin_lock_bh(&bdi->wb_lock); |
@@ -622,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) | |||
622 | INIT_LIST_HEAD(&wb->b_io); | 422 | INIT_LIST_HEAD(&wb->b_io); |
623 | INIT_LIST_HEAD(&wb->b_more_io); | 423 | INIT_LIST_HEAD(&wb->b_more_io); |
624 | spin_lock_init(&wb->list_lock); | 424 | spin_lock_init(&wb->list_lock); |
625 | setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi); | 425 | INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn); |
626 | } | 426 | } |
627 | 427 | ||
628 | /* | 428 | /* |
@@ -695,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi) | |||
695 | bdi_unregister(bdi); | 495 | bdi_unregister(bdi); |
696 | 496 | ||
697 | /* | 497 | /* |
698 | * If bdi_unregister() had already been called earlier, the | 498 | * If bdi_unregister() had already been called earlier, the dwork |
699 | * wakeup_timer could still be armed because bdi_prune_sb() | 499 | * could still be pending because bdi_prune_sb() can race with the |
700 | * can race with the bdi_wakeup_thread_delayed() calls from | 500 | * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty(). |
701 | * __mark_inode_dirty(). | ||
702 | */ | 501 | */ |
703 | del_timer_sync(&bdi->wb.wakeup_timer); | 502 | cancel_delayed_work_sync(&bdi->wb.dwork); |
704 | 503 | ||
705 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) | 504 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) |
706 | percpu_counter_destroy(&bdi->bdi_stat[i]); | 505 | percpu_counter_destroy(&bdi->bdi_stat[i]); |
diff --git a/mm/bounce.c b/mm/bounce.c index a5c2ec3589cb..c9f0a4339a7d 100644 --- a/mm/bounce.c +++ b/mm/bounce.c | |||
@@ -101,7 +101,7 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from) | |||
101 | struct bio_vec *tovec, *fromvec; | 101 | struct bio_vec *tovec, *fromvec; |
102 | int i; | 102 | int i; |
103 | 103 | ||
104 | __bio_for_each_segment(tovec, to, i, 0) { | 104 | bio_for_each_segment(tovec, to, i) { |
105 | fromvec = from->bi_io_vec + i; | 105 | fromvec = from->bi_io_vec + i; |
106 | 106 | ||
107 | /* | 107 | /* |
@@ -134,7 +134,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) | |||
134 | /* | 134 | /* |
135 | * free up bounce indirect pages used | 135 | * free up bounce indirect pages used |
136 | */ | 136 | */ |
137 | __bio_for_each_segment(bvec, bio, i, 0) { | 137 | bio_for_each_segment_all(bvec, bio, i) { |
138 | org_vec = bio_orig->bi_io_vec + i; | 138 | org_vec = bio_orig->bi_io_vec + i; |
139 | if (bvec->bv_page == org_vec->bv_page) | 139 | if (bvec->bv_page == org_vec->bv_page) |
140 | continue; | 140 | continue; |
@@ -199,78 +199,43 @@ static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) | |||
199 | static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, | 199 | static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, |
200 | mempool_t *pool, int force) | 200 | mempool_t *pool, int force) |
201 | { | 201 | { |
202 | struct page *page; | 202 | struct bio *bio; |
203 | struct bio *bio = NULL; | 203 | int rw = bio_data_dir(*bio_orig); |
204 | int i, rw = bio_data_dir(*bio_orig); | ||
205 | struct bio_vec *to, *from; | 204 | struct bio_vec *to, *from; |
205 | unsigned i; | ||
206 | 206 | ||
207 | bio_for_each_segment(from, *bio_orig, i) { | 207 | bio_for_each_segment(from, *bio_orig, i) |
208 | page = from->bv_page; | 208 | if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q)) |
209 | goto bounce; | ||
209 | 210 | ||
210 | /* | 211 | return; |
211 | * is destination page below bounce pfn? | 212 | bounce: |
212 | */ | 213 | bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set); |
213 | if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) | ||
214 | continue; | ||
215 | |||
216 | /* | ||
217 | * irk, bounce it | ||
218 | */ | ||
219 | if (!bio) { | ||
220 | unsigned int cnt = (*bio_orig)->bi_vcnt; | ||
221 | 214 | ||
222 | bio = bio_alloc(GFP_NOIO, cnt); | 215 | bio_for_each_segment_all(to, bio, i) { |
223 | memset(bio->bi_io_vec, 0, cnt * sizeof(struct bio_vec)); | 216 | struct page *page = to->bv_page; |
224 | } | ||
225 | |||
226 | 217 | ||
227 | to = bio->bi_io_vec + i; | 218 | if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) |
219 | continue; | ||
228 | 220 | ||
229 | to->bv_page = mempool_alloc(pool, q->bounce_gfp); | ||
230 | to->bv_len = from->bv_len; | ||
231 | to->bv_offset = from->bv_offset; | ||
232 | inc_zone_page_state(to->bv_page, NR_BOUNCE); | 221 | inc_zone_page_state(to->bv_page, NR_BOUNCE); |
222 | to->bv_page = mempool_alloc(pool, q->bounce_gfp); | ||
233 | 223 | ||
234 | if (rw == WRITE) { | 224 | if (rw == WRITE) { |
235 | char *vto, *vfrom; | 225 | char *vto, *vfrom; |
236 | 226 | ||
237 | flush_dcache_page(from->bv_page); | 227 | flush_dcache_page(page); |
228 | |||
238 | vto = page_address(to->bv_page) + to->bv_offset; | 229 | vto = page_address(to->bv_page) + to->bv_offset; |
239 | vfrom = kmap(from->bv_page) + from->bv_offset; | 230 | vfrom = kmap_atomic(page) + to->bv_offset; |
240 | memcpy(vto, vfrom, to->bv_len); | 231 | memcpy(vto, vfrom, to->bv_len); |
241 | kunmap(from->bv_page); | 232 | kunmap_atomic(vfrom); |
242 | } | 233 | } |
243 | } | 234 | } |
244 | 235 | ||
245 | /* | ||
246 | * no pages bounced | ||
247 | */ | ||
248 | if (!bio) | ||
249 | return; | ||
250 | |||
251 | trace_block_bio_bounce(q, *bio_orig); | 236 | trace_block_bio_bounce(q, *bio_orig); |
252 | 237 | ||
253 | /* | ||
254 | * at least one page was bounced, fill in possible non-highmem | ||
255 | * pages | ||
256 | */ | ||
257 | __bio_for_each_segment(from, *bio_orig, i, 0) { | ||
258 | to = bio_iovec_idx(bio, i); | ||
259 | if (!to->bv_page) { | ||
260 | to->bv_page = from->bv_page; | ||
261 | to->bv_len = from->bv_len; | ||
262 | to->bv_offset = from->bv_offset; | ||
263 | } | ||
264 | } | ||
265 | |||
266 | bio->bi_bdev = (*bio_orig)->bi_bdev; | ||
267 | bio->bi_flags |= (1 << BIO_BOUNCED); | 238 | bio->bi_flags |= (1 << BIO_BOUNCED); |
268 | bio->bi_sector = (*bio_orig)->bi_sector; | ||
269 | bio->bi_rw = (*bio_orig)->bi_rw; | ||
270 | |||
271 | bio->bi_vcnt = (*bio_orig)->bi_vcnt; | ||
272 | bio->bi_idx = (*bio_orig)->bi_idx; | ||
273 | bio->bi_size = (*bio_orig)->bi_size; | ||
274 | 239 | ||
275 | if (pool == page_pool) { | 240 | if (pool == page_pool) { |
276 | bio->bi_end_io = bounce_end_io_write; | 241 | bio->bi_end_io = bounce_end_io_write; |
diff --git a/mm/page_io.c b/mm/page_io.c index 06a8842a6ec6..a8a3ef45fed7 100644 --- a/mm/page_io.c +++ b/mm/page_io.c | |||
@@ -36,7 +36,6 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, | |||
36 | bio->bi_io_vec[0].bv_len = PAGE_SIZE; | 36 | bio->bi_io_vec[0].bv_len = PAGE_SIZE; |
37 | bio->bi_io_vec[0].bv_offset = 0; | 37 | bio->bi_io_vec[0].bv_offset = 0; |
38 | bio->bi_vcnt = 1; | 38 | bio->bi_vcnt = 1; |
39 | bio->bi_idx = 0; | ||
40 | bio->bi_size = PAGE_SIZE; | 39 | bio->bi_size = PAGE_SIZE; |
41 | bio->bi_end_io = end_io; | 40 | bio->bi_end_io = end_io; |
42 | } | 41 | } |