diff options
Diffstat (limited to 'fs/btrfs/compression.c')
-rw-r--r-- | fs/btrfs/compression.c | 454 |
1 files changed, 454 insertions, 0 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c new file mode 100644 index 000000000000..c5470367ca5c --- /dev/null +++ b/fs/btrfs/compression.c | |||
@@ -0,0 +1,454 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/bio.h> | ||
21 | #include <linux/buffer_head.h> | ||
22 | #include <linux/file.h> | ||
23 | #include <linux/fs.h> | ||
24 | #include <linux/pagemap.h> | ||
25 | #include <linux/highmem.h> | ||
26 | #include <linux/time.h> | ||
27 | #include <linux/init.h> | ||
28 | #include <linux/string.h> | ||
29 | #include <linux/smp_lock.h> | ||
30 | #include <linux/backing-dev.h> | ||
31 | #include <linux/mpage.h> | ||
32 | #include <linux/swap.h> | ||
33 | #include <linux/writeback.h> | ||
34 | #include <linux/bit_spinlock.h> | ||
35 | #include <linux/version.h> | ||
36 | #include "ctree.h" | ||
37 | #include "disk-io.h" | ||
38 | #include "transaction.h" | ||
39 | #include "btrfs_inode.h" | ||
40 | #include "volumes.h" | ||
41 | #include "ordered-data.h" | ||
42 | #include "compat.h" | ||
43 | #include "compression.h" | ||
44 | #include "extent_io.h" | ||
45 | #include "extent_map.h" | ||
46 | |||
47 | struct compressed_bio { | ||
48 | /* number of bios pending for this compressed extent */ | ||
49 | atomic_t pending_bios; | ||
50 | |||
51 | /* the pages with the compressed data on them */ | ||
52 | struct page **compressed_pages; | ||
53 | |||
54 | /* inode that owns this data */ | ||
55 | struct inode *inode; | ||
56 | |||
57 | /* starting offset in the inode for our pages */ | ||
58 | u64 start; | ||
59 | |||
60 | /* number of bytes in the inode we're working on */ | ||
61 | unsigned long len; | ||
62 | |||
63 | /* number of bytes on disk */ | ||
64 | unsigned long compressed_len; | ||
65 | |||
66 | /* number of compressed pages in the array */ | ||
67 | unsigned long nr_pages; | ||
68 | |||
69 | /* IO errors */ | ||
70 | int errors; | ||
71 | |||
72 | /* for reads, this is the bio we are copying the data into */ | ||
73 | struct bio *orig_bio; | ||
74 | }; | ||
75 | |||
76 | static struct bio *compressed_bio_alloc(struct block_device *bdev, | ||
77 | u64 first_byte, gfp_t gfp_flags) | ||
78 | { | ||
79 | struct bio *bio; | ||
80 | int nr_vecs; | ||
81 | |||
82 | nr_vecs = bio_get_nr_vecs(bdev); | ||
83 | bio = bio_alloc(gfp_flags, nr_vecs); | ||
84 | |||
85 | if (bio == NULL && (current->flags & PF_MEMALLOC)) { | ||
86 | while (!bio && (nr_vecs /= 2)) | ||
87 | bio = bio_alloc(gfp_flags, nr_vecs); | ||
88 | } | ||
89 | |||
90 | if (bio) { | ||
91 | bio->bi_size = 0; | ||
92 | bio->bi_bdev = bdev; | ||
93 | bio->bi_sector = first_byte >> 9; | ||
94 | } | ||
95 | return bio; | ||
96 | } | ||
97 | |||
98 | /* when we finish reading compressed pages from the disk, we | ||
99 | * decompress them and then run the bio end_io routines on the | ||
100 | * decompressed pages (in the inode address space). | ||
101 | * | ||
102 | * This allows the checksumming and other IO error handling routines | ||
103 | * to work normally | ||
104 | * | ||
105 | * The compressed pages are freed here, and it must be run | ||
106 | * in process context | ||
107 | */ | ||
108 | static void end_compressed_bio_read(struct bio *bio, int err) | ||
109 | { | ||
110 | struct extent_io_tree *tree; | ||
111 | struct compressed_bio *cb = bio->bi_private; | ||
112 | struct inode *inode; | ||
113 | struct page *page; | ||
114 | unsigned long index; | ||
115 | int ret; | ||
116 | |||
117 | if (err) | ||
118 | cb->errors = 1; | ||
119 | |||
120 | /* if there are more bios still pending for this compressed | ||
121 | * extent, just exit | ||
122 | */ | ||
123 | if (!atomic_dec_and_test(&cb->pending_bios)) | ||
124 | goto out; | ||
125 | |||
126 | /* ok, we're the last bio for this extent, lets start | ||
127 | * the decompression. | ||
128 | */ | ||
129 | inode = cb->inode; | ||
130 | tree = &BTRFS_I(inode)->io_tree; | ||
131 | ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, | ||
132 | cb->start, | ||
133 | cb->orig_bio->bi_io_vec, | ||
134 | cb->orig_bio->bi_vcnt, | ||
135 | cb->compressed_len); | ||
136 | if (ret) | ||
137 | cb->errors = 1; | ||
138 | |||
139 | /* release the compressed pages */ | ||
140 | index = 0; | ||
141 | for (index = 0; index < cb->nr_pages; index++) { | ||
142 | page = cb->compressed_pages[index]; | ||
143 | page->mapping = NULL; | ||
144 | page_cache_release(page); | ||
145 | } | ||
146 | |||
147 | /* do io completion on the original bio */ | ||
148 | if (cb->errors) | ||
149 | bio_io_error(cb->orig_bio); | ||
150 | else | ||
151 | bio_endio(cb->orig_bio, 0); | ||
152 | |||
153 | /* finally free the cb struct */ | ||
154 | kfree(cb->compressed_pages); | ||
155 | kfree(cb); | ||
156 | out: | ||
157 | bio_put(bio); | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * Clear the writeback bits on all of the file | ||
162 | * pages for a compressed write | ||
163 | */ | ||
164 | static noinline int end_compressed_writeback(struct inode *inode, u64 start, | ||
165 | unsigned long ram_size) | ||
166 | { | ||
167 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
168 | unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT; | ||
169 | struct page *pages[16]; | ||
170 | unsigned long nr_pages = end_index - index + 1; | ||
171 | int i; | ||
172 | int ret; | ||
173 | |||
174 | while(nr_pages > 0) { | ||
175 | ret = find_get_pages_contig(inode->i_mapping, index, | ||
176 | min(nr_pages, ARRAY_SIZE(pages)), pages); | ||
177 | if (ret == 0) { | ||
178 | nr_pages -= 1; | ||
179 | index += 1; | ||
180 | continue; | ||
181 | } | ||
182 | for (i = 0; i < ret; i++) { | ||
183 | end_page_writeback(pages[i]); | ||
184 | page_cache_release(pages[i]); | ||
185 | } | ||
186 | nr_pages -= ret; | ||
187 | index += ret; | ||
188 | } | ||
189 | /* the inode may be gone now */ | ||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * do the cleanup once all the compressed pages hit the disk. | ||
195 | * This will clear writeback on the file pages and free the compressed | ||
196 | * pages. | ||
197 | * | ||
198 | * This also calls the writeback end hooks for the file pages so that | ||
199 | * metadata and checksums can be updated in the file. | ||
200 | */ | ||
201 | static void end_compressed_bio_write(struct bio *bio, int err) | ||
202 | { | ||
203 | struct extent_io_tree *tree; | ||
204 | struct compressed_bio *cb = bio->bi_private; | ||
205 | struct inode *inode; | ||
206 | struct page *page; | ||
207 | unsigned long index; | ||
208 | |||
209 | if (err) | ||
210 | cb->errors = 1; | ||
211 | |||
212 | /* if there are more bios still pending for this compressed | ||
213 | * extent, just exit | ||
214 | */ | ||
215 | if (!atomic_dec_and_test(&cb->pending_bios)) | ||
216 | goto out; | ||
217 | |||
218 | /* ok, we're the last bio for this extent, step one is to | ||
219 | * call back into the FS and do all the end_io operations | ||
220 | */ | ||
221 | inode = cb->inode; | ||
222 | tree = &BTRFS_I(inode)->io_tree; | ||
223 | tree->ops->writepage_end_io_hook(cb->compressed_pages[0], | ||
224 | cb->start, | ||
225 | cb->start + cb->len - 1, | ||
226 | NULL, 1); | ||
227 | |||
228 | end_compressed_writeback(inode, cb->start, cb->len); | ||
229 | /* note, our inode could be gone now */ | ||
230 | |||
231 | /* | ||
232 | * release the compressed pages, these came from alloc_page and | ||
233 | * are not attached to the inode at all | ||
234 | */ | ||
235 | index = 0; | ||
236 | for (index = 0; index < cb->nr_pages; index++) { | ||
237 | page = cb->compressed_pages[index]; | ||
238 | page->mapping = NULL; | ||
239 | page_cache_release(page); | ||
240 | } | ||
241 | |||
242 | /* finally free the cb struct */ | ||
243 | kfree(cb->compressed_pages); | ||
244 | kfree(cb); | ||
245 | out: | ||
246 | bio_put(bio); | ||
247 | } | ||
248 | |||
249 | /* | ||
250 | * worker function to build and submit bios for previously compressed pages. | ||
251 | * The corresponding pages in the inode should be marked for writeback | ||
252 | * and the compressed pages should have a reference on them for dropping | ||
253 | * when the IO is complete. | ||
254 | * | ||
255 | * This also checksums the file bytes and gets things ready for | ||
256 | * the end io hooks. | ||
257 | */ | ||
258 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, | ||
259 | unsigned long len, u64 disk_start, | ||
260 | unsigned long compressed_len, | ||
261 | struct page **compressed_pages, | ||
262 | unsigned long nr_pages) | ||
263 | { | ||
264 | struct bio *bio = NULL; | ||
265 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
266 | struct compressed_bio *cb; | ||
267 | unsigned long bytes_left; | ||
268 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
269 | int page_index = 0; | ||
270 | struct page *page; | ||
271 | u64 first_byte = disk_start; | ||
272 | struct block_device *bdev; | ||
273 | int ret; | ||
274 | |||
275 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); | ||
276 | cb = kmalloc(sizeof(*cb), GFP_NOFS); | ||
277 | atomic_set(&cb->pending_bios, 0); | ||
278 | cb->errors = 0; | ||
279 | cb->inode = inode; | ||
280 | cb->start = start; | ||
281 | cb->len = len; | ||
282 | cb->compressed_pages = compressed_pages; | ||
283 | cb->compressed_len = compressed_len; | ||
284 | cb->orig_bio = NULL; | ||
285 | cb->nr_pages = nr_pages; | ||
286 | |||
287 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | ||
288 | |||
289 | ret = btrfs_csum_file_bytes(root, inode, start, len); | ||
290 | BUG_ON(ret); | ||
291 | |||
292 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); | ||
293 | bio->bi_private = cb; | ||
294 | bio->bi_end_io = end_compressed_bio_write; | ||
295 | atomic_inc(&cb->pending_bios); | ||
296 | |||
297 | /* create and submit bios for the compressed pages */ | ||
298 | bytes_left = compressed_len; | ||
299 | while(bytes_left > 0) { | ||
300 | page = compressed_pages[page_index]; | ||
301 | page->mapping = inode->i_mapping; | ||
302 | if (bio->bi_size) | ||
303 | ret = io_tree->ops->merge_bio_hook(page, 0, | ||
304 | PAGE_CACHE_SIZE, | ||
305 | bio, 0); | ||
306 | else | ||
307 | ret = 0; | ||
308 | |||
309 | if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < | ||
310 | PAGE_CACHE_SIZE) { | ||
311 | bio_get(bio); | ||
312 | |||
313 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
314 | BUG_ON(ret); | ||
315 | |||
316 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); | ||
317 | BUG_ON(ret); | ||
318 | |||
319 | bio_put(bio); | ||
320 | |||
321 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); | ||
322 | atomic_inc(&cb->pending_bios); | ||
323 | bio->bi_private = cb; | ||
324 | bio->bi_end_io = end_compressed_bio_write; | ||
325 | bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); | ||
326 | } | ||
327 | page_index++; | ||
328 | bytes_left -= PAGE_CACHE_SIZE; | ||
329 | first_byte += PAGE_CACHE_SIZE; | ||
330 | } | ||
331 | bio_get(bio); | ||
332 | |||
333 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
334 | BUG_ON(ret); | ||
335 | |||
336 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); | ||
337 | BUG_ON(ret); | ||
338 | |||
339 | bio_put(bio); | ||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | /* | ||
344 | * for a compressed read, the bio we get passed has all the inode pages | ||
345 | * in it. We don't actually do IO on those pages but allocate new ones | ||
346 | * to hold the compressed pages on disk. | ||
347 | * | ||
348 | * bio->bi_sector points to the compressed extent on disk | ||
349 | * bio->bi_io_vec points to all of the inode pages | ||
350 | * bio->bi_vcnt is a count of pages | ||
351 | * | ||
352 | * After the compressed pages are read, we copy the bytes into the | ||
353 | * bio we were passed and then call the bio end_io calls | ||
354 | */ | ||
355 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | ||
356 | int mirror_num, unsigned long bio_flags) | ||
357 | { | ||
358 | struct extent_io_tree *tree; | ||
359 | struct extent_map_tree *em_tree; | ||
360 | struct compressed_bio *cb; | ||
361 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
362 | unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; | ||
363 | unsigned long compressed_len; | ||
364 | unsigned long nr_pages; | ||
365 | unsigned long page_index; | ||
366 | struct page *page; | ||
367 | struct block_device *bdev; | ||
368 | struct bio *comp_bio; | ||
369 | u64 cur_disk_byte = (u64)bio->bi_sector << 9; | ||
370 | struct extent_map *em; | ||
371 | int ret; | ||
372 | |||
373 | tree = &BTRFS_I(inode)->io_tree; | ||
374 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
375 | |||
376 | /* we need the actual starting offset of this extent in the file */ | ||
377 | spin_lock(&em_tree->lock); | ||
378 | em = lookup_extent_mapping(em_tree, | ||
379 | page_offset(bio->bi_io_vec->bv_page), | ||
380 | PAGE_CACHE_SIZE); | ||
381 | spin_unlock(&em_tree->lock); | ||
382 | |||
383 | cb = kmalloc(sizeof(*cb), GFP_NOFS); | ||
384 | atomic_set(&cb->pending_bios, 0); | ||
385 | cb->errors = 0; | ||
386 | cb->inode = inode; | ||
387 | |||
388 | cb->start = em->start; | ||
389 | compressed_len = em->block_len; | ||
390 | free_extent_map(em); | ||
391 | |||
392 | cb->len = uncompressed_len; | ||
393 | cb->compressed_len = compressed_len; | ||
394 | cb->orig_bio = bio; | ||
395 | |||
396 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | ||
397 | PAGE_CACHE_SIZE; | ||
398 | cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, | ||
399 | GFP_NOFS); | ||
400 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | ||
401 | |||
402 | for (page_index = 0; page_index < nr_pages; page_index++) { | ||
403 | cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | | ||
404 | __GFP_HIGHMEM); | ||
405 | } | ||
406 | cb->nr_pages = nr_pages; | ||
407 | |||
408 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); | ||
409 | comp_bio->bi_private = cb; | ||
410 | comp_bio->bi_end_io = end_compressed_bio_read; | ||
411 | atomic_inc(&cb->pending_bios); | ||
412 | |||
413 | for (page_index = 0; page_index < nr_pages; page_index++) { | ||
414 | page = cb->compressed_pages[page_index]; | ||
415 | page->mapping = inode->i_mapping; | ||
416 | if (comp_bio->bi_size) | ||
417 | ret = tree->ops->merge_bio_hook(page, 0, | ||
418 | PAGE_CACHE_SIZE, | ||
419 | comp_bio, 0); | ||
420 | else | ||
421 | ret = 0; | ||
422 | |||
423 | if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) < | ||
424 | PAGE_CACHE_SIZE) { | ||
425 | bio_get(comp_bio); | ||
426 | |||
427 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | ||
428 | BUG_ON(ret); | ||
429 | |||
430 | ret = btrfs_map_bio(root, READ, comp_bio, 0, 0); | ||
431 | BUG_ON(ret); | ||
432 | |||
433 | bio_put(comp_bio); | ||
434 | |||
435 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, | ||
436 | GFP_NOFS); | ||
437 | atomic_inc(&cb->pending_bios); | ||
438 | bio->bi_private = cb; | ||
439 | bio->bi_end_io = end_compressed_bio_write; | ||
440 | bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); | ||
441 | } | ||
442 | cur_disk_byte += PAGE_CACHE_SIZE; | ||
443 | } | ||
444 | bio_get(comp_bio); | ||
445 | |||
446 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | ||
447 | BUG_ON(ret); | ||
448 | |||
449 | ret = btrfs_map_bio(root, READ, comp_bio, 0, 0); | ||
450 | BUG_ON(ret); | ||
451 | |||
452 | bio_put(comp_bio); | ||
453 | return 0; | ||
454 | } | ||