aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/bio.c2
-rw-r--r--fs/block_dev.c202
-rw-r--r--include/linux/aio.h1
-rw-r--r--include/linux/bio.h1
4 files changed, 178 insertions, 28 deletions
diff --git a/fs/bio.c b/fs/bio.c
index 7ec737eda72b..7618bcb18368 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -916,7 +916,7 @@ void bio_set_pages_dirty(struct bio *bio)
916 } 916 }
917} 917}
918 918
919static void bio_release_pages(struct bio *bio) 919void bio_release_pages(struct bio *bio)
920{ 920{
921 struct bio_vec *bvec = bio->bi_io_vec; 921 struct bio_vec *bvec = bio->bi_io_vec;
922 int i; 922 int i;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 197f93921847..1715d6b5f411 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -129,43 +129,191 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
129 return 0; 129 return 0;
130} 130}
131 131
132static int 132static int blk_end_aio(struct bio *bio, unsigned int bytes_done, int error)
133blkdev_get_blocks(struct inode *inode, sector_t iblock,
134 struct buffer_head *bh, int create)
135{ 133{
136 sector_t end_block = max_block(I_BDEV(inode)); 134 struct kiocb *iocb = bio->bi_private;
137 unsigned long max_blocks = bh->b_size >> inode->i_blkbits; 135 atomic_t *bio_count = &iocb->ki_bio_count;
138 136
139 if ((iblock + max_blocks) > end_block) { 137 if (bio_data_dir(bio) == READ)
140 max_blocks = end_block - iblock; 138 bio_check_pages_dirty(bio);
141 if ((long)max_blocks <= 0) { 139 else {
142 if (create) 140 bio_release_pages(bio);
143 return -EIO; /* write fully beyond EOF */ 141 bio_put(bio);
144 /* 142 }
145 * It is a read which is fully beyond EOF. We return 143
146 * a !buffer_mapped buffer 144 /* iocb->ki_nbytes stores error code from LLDD */
147 */ 145 if (error)
148 max_blocks = 0; 146 iocb->ki_nbytes = -EIO;
149 } 147
148 if (atomic_dec_and_test(bio_count)) {
149 if (iocb->ki_nbytes < 0)
150 aio_complete(iocb, iocb->ki_nbytes, 0);
151 else
152 aio_complete(iocb, iocb->ki_left, 0);
150 } 153 }
151 154
152 bh->b_bdev = I_BDEV(inode);
153 bh->b_blocknr = iblock;
154 bh->b_size = max_blocks << inode->i_blkbits;
155 if (max_blocks)
156 set_buffer_mapped(bh);
157 return 0; 155 return 0;
158} 156}
159 157
158#define VEC_SIZE 16
159struct pvec {
160 unsigned short nr;
161 unsigned short idx;
162 struct page *page[VEC_SIZE];
163};
164
165#define PAGES_SPANNED(addr, len) \
166 (DIV_ROUND_UP((addr) + (len), PAGE_SIZE) - (addr) / PAGE_SIZE);
167
168/*
169 * get page pointer for user addr, we internally cache struct page array for
170 * (addr, count) range in pvec to avoid frequent call to get_user_pages. If
171 * internal page list is exhausted, a batch count of up to VEC_SIZE is used
172 * to get next set of page struct.
173 */
174static struct page *blk_get_page(unsigned long addr, size_t count, int rw,
175 struct pvec *pvec)
176{
177 int ret, nr_pages;
178 if (pvec->idx == pvec->nr) {
179 nr_pages = PAGES_SPANNED(addr, count);
180 nr_pages = min(nr_pages, VEC_SIZE);
181 down_read(&current->mm->mmap_sem);
182 ret = get_user_pages(current, current->mm, addr, nr_pages,
183 rw == READ, 0, pvec->page, NULL);
184 up_read(&current->mm->mmap_sem);
185 if (ret < 0)
186 return ERR_PTR(ret);
187 pvec->nr = ret;
188 pvec->idx = 0;
189 }
190 return pvec->page[pvec->idx++];
191}
192
160static ssize_t 193static ssize_t
161blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 194blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
162 loff_t offset, unsigned long nr_segs) 195 loff_t pos, unsigned long nr_segs)
163{ 196{
164 struct file *file = iocb->ki_filp; 197 struct inode *inode = iocb->ki_filp->f_mapping->host;
165 struct inode *inode = file->f_mapping->host; 198 unsigned blkbits = blksize_bits(bdev_hardsect_size(I_BDEV(inode)));
199 unsigned blocksize_mask = (1 << blkbits) - 1;
200 unsigned long seg = 0; /* iov segment iterator */
201 unsigned long nvec; /* number of bio vec needed */
202 unsigned long cur_off; /* offset into current page */
203 unsigned long cur_len; /* I/O len of current page, up to PAGE_SIZE */
204
205 unsigned long addr; /* user iovec address */
206 size_t count; /* user iovec len */
207 size_t nbytes = iocb->ki_nbytes = iocb->ki_left; /* total xfer size */
208 loff_t size; /* size of block device */
209 struct bio *bio;
210 atomic_t *bio_count = &iocb->ki_bio_count;
211 struct page *page;
212 struct pvec pvec;
213
214 pvec.nr = 0;
215 pvec.idx = 0;
216
217 if (pos & blocksize_mask)
218 return -EINVAL;
219
220 size = i_size_read(inode);
221 if (pos + nbytes > size) {
222 nbytes = size - pos;
223 iocb->ki_left = nbytes;
224 }
225
226 /*
227 * check first non-zero iov alignment, the remaining
228 * iov alignment is checked inside bio loop below.
229 */
230 do {
231 addr = (unsigned long) iov[seg].iov_base;
232 count = min(iov[seg].iov_len, nbytes);
233 if (addr & blocksize_mask || count & blocksize_mask)
234 return -EINVAL;
235 } while (!count && ++seg < nr_segs);
236 atomic_set(bio_count, 1);
237
238 while (nbytes) {
239 /* roughly estimate number of bio vec needed */
240 nvec = (nbytes + PAGE_SIZE - 1) / PAGE_SIZE;
241 nvec = max(nvec, nr_segs - seg);
242 nvec = min(nvec, (unsigned long) BIO_MAX_PAGES);
243
244 /* bio_alloc should not fail with GFP_KERNEL flag */
245 bio = bio_alloc(GFP_KERNEL, nvec);
246 bio->bi_bdev = I_BDEV(inode);
247 bio->bi_end_io = blk_end_aio;
248 bio->bi_private = iocb;
249 bio->bi_sector = pos >> blkbits;
250same_bio:
251 cur_off = addr & ~PAGE_MASK;
252 cur_len = PAGE_SIZE - cur_off;
253 if (count < cur_len)
254 cur_len = count;
255
256 page = blk_get_page(addr, count, rw, &pvec);
257 if (unlikely(IS_ERR(page)))
258 goto backout;
259
260 if (bio_add_page(bio, page, cur_len, cur_off)) {
261 pos += cur_len;
262 addr += cur_len;
263 count -= cur_len;
264 nbytes -= cur_len;
265
266 if (count)
267 goto same_bio;
268 while (++seg < nr_segs) {
269 addr = (unsigned long) iov[seg].iov_base;
270 count = iov[seg].iov_len;
271 if (!count)
272 continue;
273 if (unlikely(addr & blocksize_mask ||
274 count & blocksize_mask)) {
275 page = ERR_PTR(-EINVAL);
276 goto backout;
277 }
278 count = min(count, nbytes);
279 goto same_bio;
280 }
281 }
166 282
167 return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode), 283 /* bio is ready, submit it */
168 iov, offset, nr_segs, blkdev_get_blocks, NULL); 284 if (rw == READ)
285 bio_set_pages_dirty(bio);
286 atomic_inc(bio_count);
287 submit_bio(rw, bio);
288 }
289
290completion:
291 iocb->ki_left -= nbytes;
292 nbytes = iocb->ki_left;
293 iocb->ki_pos += nbytes;
294
295 blk_run_address_space(inode->i_mapping);
296 if (atomic_dec_and_test(bio_count))
297 aio_complete(iocb, nbytes, 0);
298
299 return -EIOCBQUEUED;
300
301backout:
302 /*
303 * back out nbytes count constructed so far for this bio,
304 * we will throw away current bio.
305 */
306 nbytes += bio->bi_size;
307 bio_release_pages(bio);
308 bio_put(bio);
309
310 /*
311 * if no bio was submmitted, return the error code.
312 * otherwise, proceed with pending I/O completion.
313 */
314 if (atomic_read(bio_count) == 1)
315 return PTR_ERR(page);
316 goto completion;
169} 317}
170 318
171static int blkdev_writepage(struct page *page, struct writeback_control *wbc) 319static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 3372ec6bf53a..a30ef13c9e62 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -105,6 +105,7 @@ struct kiocb {
105 wait_queue_t ki_wait; 105 wait_queue_t ki_wait;
106 loff_t ki_pos; 106 loff_t ki_pos;
107 107
108 atomic_t ki_bio_count; /* num bio used for this iocb */
108 void *private; 109 void *private;
109 /* State that we remember to be able to restart/retry */ 110 /* State that we remember to be able to restart/retry */
110 unsigned short ki_opcode; 111 unsigned short ki_opcode;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 092dbd0e7658..08daf3272c02 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -309,6 +309,7 @@ extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
309 gfp_t); 309 gfp_t);
310extern void bio_set_pages_dirty(struct bio *bio); 310extern void bio_set_pages_dirty(struct bio *bio);
311extern void bio_check_pages_dirty(struct bio *bio); 311extern void bio_check_pages_dirty(struct bio *bio);
312extern void bio_release_pages(struct bio *bio);
312extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); 313extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int);
313extern int bio_uncopy_user(struct bio *); 314extern int bio_uncopy_user(struct bio *);
314void zero_fill_bio(struct bio *bio); 315void zero_fill_bio(struct bio *bio);