aboutsummaryrefslogtreecommitdiffstats
path: root/fs/block_dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r--fs/block_dev.c201
1 files changed, 3 insertions, 198 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 67fe72ce6ac7..7d822fae7765 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -31,6 +31,8 @@ struct bdev_inode {
31 struct inode vfs_inode; 31 struct inode vfs_inode;
32}; 32};
33 33
34static const struct address_space_operations def_blk_aops;
35
34static inline struct bdev_inode *BDEV_I(struct inode *inode) 36static inline struct bdev_inode *BDEV_I(struct inode *inode)
35{ 37{
36 return container_of(inode, struct bdev_inode, vfs_inode); 38 return container_of(inode, struct bdev_inode, vfs_inode);
@@ -171,203 +173,6 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
171 iov, offset, nr_segs, blkdev_get_blocks, NULL); 173 iov, offset, nr_segs, blkdev_get_blocks, NULL);
172} 174}
173 175
174#if 0
175static void blk_end_aio(struct bio *bio, int error)
176{
177 struct kiocb *iocb = bio->bi_private;
178 atomic_t *bio_count = &iocb->ki_bio_count;
179
180 if (bio_data_dir(bio) == READ)
181 bio_check_pages_dirty(bio);
182 else {
183 bio_release_pages(bio);
184 bio_put(bio);
185 }
186
187 /* iocb->ki_nbytes stores error code from LLDD */
188 if (error)
189 iocb->ki_nbytes = -EIO;
190
191 if (atomic_dec_and_test(bio_count)) {
192 if ((long)iocb->ki_nbytes < 0)
193 aio_complete(iocb, iocb->ki_nbytes, 0);
194 else
195 aio_complete(iocb, iocb->ki_left, 0);
196 }
197
198 return 0;
199}
200
201#define VEC_SIZE 16
202struct pvec {
203 unsigned short nr;
204 unsigned short idx;
205 struct page *page[VEC_SIZE];
206};
207
208#define PAGES_SPANNED(addr, len) \
209 (DIV_ROUND_UP((addr) + (len), PAGE_SIZE) - (addr) / PAGE_SIZE);
210
211/*
212 * get page pointer for user addr, we internally cache struct page array for
213 * (addr, count) range in pvec to avoid frequent call to get_user_pages. If
214 * internal page list is exhausted, a batch count of up to VEC_SIZE is used
215 * to get next set of page struct.
216 */
217static struct page *blk_get_page(unsigned long addr, size_t count, int rw,
218 struct pvec *pvec)
219{
220 int ret, nr_pages;
221 if (pvec->idx == pvec->nr) {
222 nr_pages = PAGES_SPANNED(addr, count);
223 nr_pages = min(nr_pages, VEC_SIZE);
224 down_read(&current->mm->mmap_sem);
225 ret = get_user_pages(current, current->mm, addr, nr_pages,
226 rw == READ, 0, pvec->page, NULL);
227 up_read(&current->mm->mmap_sem);
228 if (ret < 0)
229 return ERR_PTR(ret);
230 pvec->nr = ret;
231 pvec->idx = 0;
232 }
233 return pvec->page[pvec->idx++];
234}
235
236/* return a page back to pvec array */
237static void blk_unget_page(struct page *page, struct pvec *pvec)
238{
239 pvec->page[--pvec->idx] = page;
240}
241
242static ssize_t
243blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
244 loff_t pos, unsigned long nr_segs)
245{
246 struct inode *inode = iocb->ki_filp->f_mapping->host;
247 unsigned blkbits = blksize_bits(bdev_hardsect_size(I_BDEV(inode)));
248 unsigned blocksize_mask = (1 << blkbits) - 1;
249 unsigned long seg = 0; /* iov segment iterator */
250 unsigned long nvec; /* number of bio vec needed */
251 unsigned long cur_off; /* offset into current page */
252 unsigned long cur_len; /* I/O len of current page, up to PAGE_SIZE */
253
254 unsigned long addr; /* user iovec address */
255 size_t count; /* user iovec len */
256 size_t nbytes = iocb->ki_nbytes = iocb->ki_left; /* total xfer size */
257 loff_t size; /* size of block device */
258 struct bio *bio;
259 atomic_t *bio_count = &iocb->ki_bio_count;
260 struct page *page;
261 struct pvec pvec;
262
263 pvec.nr = 0;
264 pvec.idx = 0;
265
266 if (pos & blocksize_mask)
267 return -EINVAL;
268
269 size = i_size_read(inode);
270 if (pos + nbytes > size) {
271 nbytes = size - pos;
272 iocb->ki_left = nbytes;
273 }
274
275 /*
276 * check first non-zero iov alignment, the remaining
277 * iov alignment is checked inside bio loop below.
278 */
279 do {
280 addr = (unsigned long) iov[seg].iov_base;
281 count = min(iov[seg].iov_len, nbytes);
282 if (addr & blocksize_mask || count & blocksize_mask)
283 return -EINVAL;
284 } while (!count && ++seg < nr_segs);
285 atomic_set(bio_count, 1);
286
287 while (nbytes) {
288 /* roughly estimate number of bio vec needed */
289 nvec = (nbytes + PAGE_SIZE - 1) / PAGE_SIZE;
290 nvec = max(nvec, nr_segs - seg);
291 nvec = min(nvec, (unsigned long) BIO_MAX_PAGES);
292
293 /* bio_alloc should not fail with GFP_KERNEL flag */
294 bio = bio_alloc(GFP_KERNEL, nvec);
295 bio->bi_bdev = I_BDEV(inode);
296 bio->bi_end_io = blk_end_aio;
297 bio->bi_private = iocb;
298 bio->bi_sector = pos >> blkbits;
299same_bio:
300 cur_off = addr & ~PAGE_MASK;
301 cur_len = PAGE_SIZE - cur_off;
302 if (count < cur_len)
303 cur_len = count;
304
305 page = blk_get_page(addr, count, rw, &pvec);
306 if (unlikely(IS_ERR(page)))
307 goto backout;
308
309 if (bio_add_page(bio, page, cur_len, cur_off)) {
310 pos += cur_len;
311 addr += cur_len;
312 count -= cur_len;
313 nbytes -= cur_len;
314
315 if (count)
316 goto same_bio;
317 while (++seg < nr_segs) {
318 addr = (unsigned long) iov[seg].iov_base;
319 count = iov[seg].iov_len;
320 if (!count)
321 continue;
322 if (unlikely(addr & blocksize_mask ||
323 count & blocksize_mask)) {
324 page = ERR_PTR(-EINVAL);
325 goto backout;
326 }
327 count = min(count, nbytes);
328 goto same_bio;
329 }
330 } else {
331 blk_unget_page(page, &pvec);
332 }
333
334 /* bio is ready, submit it */
335 if (rw == READ)
336 bio_set_pages_dirty(bio);
337 atomic_inc(bio_count);
338 submit_bio(rw, bio);
339 }
340
341completion:
342 iocb->ki_left -= nbytes;
343 nbytes = iocb->ki_left;
344 iocb->ki_pos += nbytes;
345
346 blk_run_address_space(inode->i_mapping);
347 if (atomic_dec_and_test(bio_count))
348 aio_complete(iocb, nbytes, 0);
349
350 return -EIOCBQUEUED;
351
352backout:
353 /*
354 * back out nbytes count constructed so far for this bio,
355 * we will throw away current bio.
356 */
357 nbytes += bio->bi_size;
358 bio_release_pages(bio);
359 bio_put(bio);
360
361 /*
362 * if no bio was submmitted, return the error code.
363 * otherwise, proceed with pending I/O completion.
364 */
365 if (atomic_read(bio_count) == 1)
366 return PTR_ERR(page);
367 goto completion;
368}
369#endif
370
371static int blkdev_writepage(struct page *page, struct writeback_control *wbc) 176static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
372{ 177{
373 return block_write_full_page(page, blkdev_get_block, wbc); 178 return block_write_full_page(page, blkdev_get_block, wbc);
@@ -1334,7 +1139,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1334 return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); 1139 return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
1335} 1140}
1336 1141
1337const struct address_space_operations def_blk_aops = { 1142static const struct address_space_operations def_blk_aops = {
1338 .readpage = blkdev_readpage, 1143 .readpage = blkdev_readpage,
1339 .writepage = blkdev_writepage, 1144 .writepage = blkdev_writepage,
1340 .sync_page = block_sync_page, 1145 .sync_page = block_sync_page,