diff options
-rw-r--r-- | fs/Kconfig | 2 | ||||
-rw-r--r-- | fs/btrfs/Makefile | 3 | ||||
-rw-r--r-- | fs/btrfs/compression.c | 454 | ||||
-rw-r--r-- | fs/btrfs/compression.h | 47 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 99 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 18 | ||||
-rw-r--r-- | fs/btrfs/disk-io.h | 1 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 27 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 411 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 17 | ||||
-rw-r--r-- | fs/btrfs/extent_map.c | 9 | ||||
-rw-r--r-- | fs/btrfs/extent_map.h | 6 | ||||
-rw-r--r-- | fs/btrfs/file-item.c | 75 | ||||
-rw-r--r-- | fs/btrfs/file.c | 263 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 584 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 9 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 10 | ||||
-rw-r--r-- | fs/btrfs/print-tree.c | 7 | ||||
-rw-r--r-- | fs/btrfs/super.c | 10 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 3 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 2 | ||||
-rw-r--r-- | fs/btrfs/zlib.c | 637 |
22 files changed, 2315 insertions, 379 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 18f5a85b47c6..31cce5d88b1a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -501,6 +501,8 @@ config BTRFS_FS | |||
501 | tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format" | 501 | tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format" |
502 | depends on EXPERIMENTAL | 502 | depends on EXPERIMENTAL |
503 | select LIBCRC32C | 503 | select LIBCRC32C |
504 | select ZLIB_INFLATE | ||
505 | select ZLIB_DEFLATE | ||
504 | help | 506 | help |
505 | Btrfs is a new filesystem with extents, writable snapshotting, | 507 | Btrfs is a new filesystem with extents, writable snapshotting, |
506 | support for multiple devices and many more features. | 508 | support for multiple devices and many more features. |
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 7125716e142b..d2cf5a54a4b8 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -7,7 +7,8 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
7 | transaction.o inode.o file.o tree-defrag.o \ | 7 | transaction.o inode.o file.o tree-defrag.o \ |
8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o | 10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ |
11 | compression.o | ||
11 | else | 12 | else |
12 | 13 | ||
13 | # Normal Makefile | 14 | # Normal Makefile |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c new file mode 100644 index 000000000000..c5470367ca5c --- /dev/null +++ b/fs/btrfs/compression.c | |||
@@ -0,0 +1,454 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/bio.h> | ||
21 | #include <linux/buffer_head.h> | ||
22 | #include <linux/file.h> | ||
23 | #include <linux/fs.h> | ||
24 | #include <linux/pagemap.h> | ||
25 | #include <linux/highmem.h> | ||
26 | #include <linux/time.h> | ||
27 | #include <linux/init.h> | ||
28 | #include <linux/string.h> | ||
29 | #include <linux/smp_lock.h> | ||
30 | #include <linux/backing-dev.h> | ||
31 | #include <linux/mpage.h> | ||
32 | #include <linux/swap.h> | ||
33 | #include <linux/writeback.h> | ||
34 | #include <linux/bit_spinlock.h> | ||
35 | #include <linux/version.h> | ||
36 | #include "ctree.h" | ||
37 | #include "disk-io.h" | ||
38 | #include "transaction.h" | ||
39 | #include "btrfs_inode.h" | ||
40 | #include "volumes.h" | ||
41 | #include "ordered-data.h" | ||
42 | #include "compat.h" | ||
43 | #include "compression.h" | ||
44 | #include "extent_io.h" | ||
45 | #include "extent_map.h" | ||
46 | |||
47 | struct compressed_bio { | ||
48 | /* number of bios pending for this compressed extent */ | ||
49 | atomic_t pending_bios; | ||
50 | |||
51 | /* the pages with the compressed data on them */ | ||
52 | struct page **compressed_pages; | ||
53 | |||
54 | /* inode that owns this data */ | ||
55 | struct inode *inode; | ||
56 | |||
57 | /* starting offset in the inode for our pages */ | ||
58 | u64 start; | ||
59 | |||
60 | /* number of bytes in the inode we're working on */ | ||
61 | unsigned long len; | ||
62 | |||
63 | /* number of bytes on disk */ | ||
64 | unsigned long compressed_len; | ||
65 | |||
66 | /* number of compressed pages in the array */ | ||
67 | unsigned long nr_pages; | ||
68 | |||
69 | /* IO errors */ | ||
70 | int errors; | ||
71 | |||
72 | /* for reads, this is the bio we are copying the data into */ | ||
73 | struct bio *orig_bio; | ||
74 | }; | ||
75 | |||
76 | static struct bio *compressed_bio_alloc(struct block_device *bdev, | ||
77 | u64 first_byte, gfp_t gfp_flags) | ||
78 | { | ||
79 | struct bio *bio; | ||
80 | int nr_vecs; | ||
81 | |||
82 | nr_vecs = bio_get_nr_vecs(bdev); | ||
83 | bio = bio_alloc(gfp_flags, nr_vecs); | ||
84 | |||
85 | if (bio == NULL && (current->flags & PF_MEMALLOC)) { | ||
86 | while (!bio && (nr_vecs /= 2)) | ||
87 | bio = bio_alloc(gfp_flags, nr_vecs); | ||
88 | } | ||
89 | |||
90 | if (bio) { | ||
91 | bio->bi_size = 0; | ||
92 | bio->bi_bdev = bdev; | ||
93 | bio->bi_sector = first_byte >> 9; | ||
94 | } | ||
95 | return bio; | ||
96 | } | ||
97 | |||
98 | /* when we finish reading compressed pages from the disk, we | ||
99 | * decompress them and then run the bio end_io routines on the | ||
100 | * decompressed pages (in the inode address space). | ||
101 | * | ||
102 | * This allows the checksumming and other IO error handling routines | ||
103 | * to work normally | ||
104 | * | ||
105 | * The compressed pages are freed here, and it must be run | ||
106 | * in process context | ||
107 | */ | ||
108 | static void end_compressed_bio_read(struct bio *bio, int err) | ||
109 | { | ||
110 | struct extent_io_tree *tree; | ||
111 | struct compressed_bio *cb = bio->bi_private; | ||
112 | struct inode *inode; | ||
113 | struct page *page; | ||
114 | unsigned long index; | ||
115 | int ret; | ||
116 | |||
117 | if (err) | ||
118 | cb->errors = 1; | ||
119 | |||
120 | /* if there are more bios still pending for this compressed | ||
121 | * extent, just exit | ||
122 | */ | ||
123 | if (!atomic_dec_and_test(&cb->pending_bios)) | ||
124 | goto out; | ||
125 | |||
126 | /* ok, we're the last bio for this extent, lets start | ||
127 | * the decompression. | ||
128 | */ | ||
129 | inode = cb->inode; | ||
130 | tree = &BTRFS_I(inode)->io_tree; | ||
131 | ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, | ||
132 | cb->start, | ||
133 | cb->orig_bio->bi_io_vec, | ||
134 | cb->orig_bio->bi_vcnt, | ||
135 | cb->compressed_len); | ||
136 | if (ret) | ||
137 | cb->errors = 1; | ||
138 | |||
139 | /* release the compressed pages */ | ||
140 | index = 0; | ||
141 | for (index = 0; index < cb->nr_pages; index++) { | ||
142 | page = cb->compressed_pages[index]; | ||
143 | page->mapping = NULL; | ||
144 | page_cache_release(page); | ||
145 | } | ||
146 | |||
147 | /* do io completion on the original bio */ | ||
148 | if (cb->errors) | ||
149 | bio_io_error(cb->orig_bio); | ||
150 | else | ||
151 | bio_endio(cb->orig_bio, 0); | ||
152 | |||
153 | /* finally free the cb struct */ | ||
154 | kfree(cb->compressed_pages); | ||
155 | kfree(cb); | ||
156 | out: | ||
157 | bio_put(bio); | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * Clear the writeback bits on all of the file | ||
162 | * pages for a compressed write | ||
163 | */ | ||
164 | static noinline int end_compressed_writeback(struct inode *inode, u64 start, | ||
165 | unsigned long ram_size) | ||
166 | { | ||
167 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
168 | unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT; | ||
169 | struct page *pages[16]; | ||
170 | unsigned long nr_pages = end_index - index + 1; | ||
171 | int i; | ||
172 | int ret; | ||
173 | |||
174 | while(nr_pages > 0) { | ||
175 | ret = find_get_pages_contig(inode->i_mapping, index, | ||
176 | min(nr_pages, ARRAY_SIZE(pages)), pages); | ||
177 | if (ret == 0) { | ||
178 | nr_pages -= 1; | ||
179 | index += 1; | ||
180 | continue; | ||
181 | } | ||
182 | for (i = 0; i < ret; i++) { | ||
183 | end_page_writeback(pages[i]); | ||
184 | page_cache_release(pages[i]); | ||
185 | } | ||
186 | nr_pages -= ret; | ||
187 | index += ret; | ||
188 | } | ||
189 | /* the inode may be gone now */ | ||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * do the cleanup once all the compressed pages hit the disk. | ||
195 | * This will clear writeback on the file pages and free the compressed | ||
196 | * pages. | ||
197 | * | ||
198 | * This also calls the writeback end hooks for the file pages so that | ||
199 | * metadata and checksums can be updated in the file. | ||
200 | */ | ||
201 | static void end_compressed_bio_write(struct bio *bio, int err) | ||
202 | { | ||
203 | struct extent_io_tree *tree; | ||
204 | struct compressed_bio *cb = bio->bi_private; | ||
205 | struct inode *inode; | ||
206 | struct page *page; | ||
207 | unsigned long index; | ||
208 | |||
209 | if (err) | ||
210 | cb->errors = 1; | ||
211 | |||
212 | /* if there are more bios still pending for this compressed | ||
213 | * extent, just exit | ||
214 | */ | ||
215 | if (!atomic_dec_and_test(&cb->pending_bios)) | ||
216 | goto out; | ||
217 | |||
218 | /* ok, we're the last bio for this extent, step one is to | ||
219 | * call back into the FS and do all the end_io operations | ||
220 | */ | ||
221 | inode = cb->inode; | ||
222 | tree = &BTRFS_I(inode)->io_tree; | ||
223 | tree->ops->writepage_end_io_hook(cb->compressed_pages[0], | ||
224 | cb->start, | ||
225 | cb->start + cb->len - 1, | ||
226 | NULL, 1); | ||
227 | |||
228 | end_compressed_writeback(inode, cb->start, cb->len); | ||
229 | /* note, our inode could be gone now */ | ||
230 | |||
231 | /* | ||
232 | * release the compressed pages, these came from alloc_page and | ||
233 | * are not attached to the inode at all | ||
234 | */ | ||
235 | index = 0; | ||
236 | for (index = 0; index < cb->nr_pages; index++) { | ||
237 | page = cb->compressed_pages[index]; | ||
238 | page->mapping = NULL; | ||
239 | page_cache_release(page); | ||
240 | } | ||
241 | |||
242 | /* finally free the cb struct */ | ||
243 | kfree(cb->compressed_pages); | ||
244 | kfree(cb); | ||
245 | out: | ||
246 | bio_put(bio); | ||
247 | } | ||
248 | |||
249 | /* | ||
250 | * worker function to build and submit bios for previously compressed pages. | ||
251 | * The corresponding pages in the inode should be marked for writeback | ||
252 | * and the compressed pages should have a reference on them for dropping | ||
253 | * when the IO is complete. | ||
254 | * | ||
255 | * This also checksums the file bytes and gets things ready for | ||
256 | * the end io hooks. | ||
257 | */ | ||
258 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, | ||
259 | unsigned long len, u64 disk_start, | ||
260 | unsigned long compressed_len, | ||
261 | struct page **compressed_pages, | ||
262 | unsigned long nr_pages) | ||
263 | { | ||
264 | struct bio *bio = NULL; | ||
265 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
266 | struct compressed_bio *cb; | ||
267 | unsigned long bytes_left; | ||
268 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
269 | int page_index = 0; | ||
270 | struct page *page; | ||
271 | u64 first_byte = disk_start; | ||
272 | struct block_device *bdev; | ||
273 | int ret; | ||
274 | |||
275 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); | ||
276 | cb = kmalloc(sizeof(*cb), GFP_NOFS); | ||
277 | atomic_set(&cb->pending_bios, 0); | ||
278 | cb->errors = 0; | ||
279 | cb->inode = inode; | ||
280 | cb->start = start; | ||
281 | cb->len = len; | ||
282 | cb->compressed_pages = compressed_pages; | ||
283 | cb->compressed_len = compressed_len; | ||
284 | cb->orig_bio = NULL; | ||
285 | cb->nr_pages = nr_pages; | ||
286 | |||
287 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | ||
288 | |||
289 | ret = btrfs_csum_file_bytes(root, inode, start, len); | ||
290 | BUG_ON(ret); | ||
291 | |||
292 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); | ||
293 | bio->bi_private = cb; | ||
294 | bio->bi_end_io = end_compressed_bio_write; | ||
295 | atomic_inc(&cb->pending_bios); | ||
296 | |||
297 | /* create and submit bios for the compressed pages */ | ||
298 | bytes_left = compressed_len; | ||
299 | while(bytes_left > 0) { | ||
300 | page = compressed_pages[page_index]; | ||
301 | page->mapping = inode->i_mapping; | ||
302 | if (bio->bi_size) | ||
303 | ret = io_tree->ops->merge_bio_hook(page, 0, | ||
304 | PAGE_CACHE_SIZE, | ||
305 | bio, 0); | ||
306 | else | ||
307 | ret = 0; | ||
308 | |||
309 | if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < | ||
310 | PAGE_CACHE_SIZE) { | ||
311 | bio_get(bio); | ||
312 | |||
313 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
314 | BUG_ON(ret); | ||
315 | |||
316 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); | ||
317 | BUG_ON(ret); | ||
318 | |||
319 | bio_put(bio); | ||
320 | |||
321 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); | ||
322 | atomic_inc(&cb->pending_bios); | ||
323 | bio->bi_private = cb; | ||
324 | bio->bi_end_io = end_compressed_bio_write; | ||
325 | bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); | ||
326 | } | ||
327 | page_index++; | ||
328 | bytes_left -= PAGE_CACHE_SIZE; | ||
329 | first_byte += PAGE_CACHE_SIZE; | ||
330 | } | ||
331 | bio_get(bio); | ||
332 | |||
333 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
334 | BUG_ON(ret); | ||
335 | |||
336 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); | ||
337 | BUG_ON(ret); | ||
338 | |||
339 | bio_put(bio); | ||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | /* | ||
344 | * for a compressed read, the bio we get passed has all the inode pages | ||
345 | * in it. We don't actually do IO on those pages but allocate new ones | ||
346 | * to hold the compressed pages on disk. | ||
347 | * | ||
348 | * bio->bi_sector points to the compressed extent on disk | ||
349 | * bio->bi_io_vec points to all of the inode pages | ||
350 | * bio->bi_vcnt is a count of pages | ||
351 | * | ||
352 | * After the compressed pages are read, we copy the bytes into the | ||
353 | * bio we were passed and then call the bio end_io calls | ||
354 | */ | ||
355 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | ||
356 | int mirror_num, unsigned long bio_flags) | ||
357 | { | ||
358 | struct extent_io_tree *tree; | ||
359 | struct extent_map_tree *em_tree; | ||
360 | struct compressed_bio *cb; | ||
361 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
362 | unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; | ||
363 | unsigned long compressed_len; | ||
364 | unsigned long nr_pages; | ||
365 | unsigned long page_index; | ||
366 | struct page *page; | ||
367 | struct block_device *bdev; | ||
368 | struct bio *comp_bio; | ||
369 | u64 cur_disk_byte = (u64)bio->bi_sector << 9; | ||
370 | struct extent_map *em; | ||
371 | int ret; | ||
372 | |||
373 | tree = &BTRFS_I(inode)->io_tree; | ||
374 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
375 | |||
376 | /* we need the actual starting offset of this extent in the file */ | ||
377 | spin_lock(&em_tree->lock); | ||
378 | em = lookup_extent_mapping(em_tree, | ||
379 | page_offset(bio->bi_io_vec->bv_page), | ||
380 | PAGE_CACHE_SIZE); | ||
381 | spin_unlock(&em_tree->lock); | ||
382 | |||
383 | cb = kmalloc(sizeof(*cb), GFP_NOFS); | ||
384 | atomic_set(&cb->pending_bios, 0); | ||
385 | cb->errors = 0; | ||
386 | cb->inode = inode; | ||
387 | |||
388 | cb->start = em->start; | ||
389 | compressed_len = em->block_len; | ||
390 | free_extent_map(em); | ||
391 | |||
392 | cb->len = uncompressed_len; | ||
393 | cb->compressed_len = compressed_len; | ||
394 | cb->orig_bio = bio; | ||
395 | |||
396 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | ||
397 | PAGE_CACHE_SIZE; | ||
398 | cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, | ||
399 | GFP_NOFS); | ||
400 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | ||
401 | |||
402 | for (page_index = 0; page_index < nr_pages; page_index++) { | ||
403 | cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | | ||
404 | __GFP_HIGHMEM); | ||
405 | } | ||
406 | cb->nr_pages = nr_pages; | ||
407 | |||
408 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); | ||
409 | comp_bio->bi_private = cb; | ||
410 | comp_bio->bi_end_io = end_compressed_bio_read; | ||
411 | atomic_inc(&cb->pending_bios); | ||
412 | |||
413 | for (page_index = 0; page_index < nr_pages; page_index++) { | ||
414 | page = cb->compressed_pages[page_index]; | ||
415 | page->mapping = inode->i_mapping; | ||
416 | if (comp_bio->bi_size) | ||
417 | ret = tree->ops->merge_bio_hook(page, 0, | ||
418 | PAGE_CACHE_SIZE, | ||
419 | comp_bio, 0); | ||
420 | else | ||
421 | ret = 0; | ||
422 | |||
423 | if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) < | ||
424 | PAGE_CACHE_SIZE) { | ||
425 | bio_get(comp_bio); | ||
426 | |||
427 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | ||
428 | BUG_ON(ret); | ||
429 | |||
430 | ret = btrfs_map_bio(root, READ, comp_bio, 0, 0); | ||
431 | BUG_ON(ret); | ||
432 | |||
433 | bio_put(comp_bio); | ||
434 | |||
435 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, | ||
436 | GFP_NOFS); | ||
437 | atomic_inc(&cb->pending_bios); | ||
438 | bio->bi_private = cb; | ||
439 | bio->bi_end_io = end_compressed_bio_write; | ||
440 | bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); | ||
441 | } | ||
442 | cur_disk_byte += PAGE_CACHE_SIZE; | ||
443 | } | ||
444 | bio_get(comp_bio); | ||
445 | |||
446 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | ||
447 | BUG_ON(ret); | ||
448 | |||
449 | ret = btrfs_map_bio(root, READ, comp_bio, 0, 0); | ||
450 | BUG_ON(ret); | ||
451 | |||
452 | bio_put(comp_bio); | ||
453 | return 0; | ||
454 | } | ||
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h new file mode 100644 index 000000000000..421f5b4aa715 --- /dev/null +++ b/fs/btrfs/compression.h | |||
@@ -0,0 +1,47 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #ifndef __BTRFS_COMPRESSION_ | ||
20 | #define __BTRFS_COMPRESSION_ | ||
21 | |||
22 | int btrfs_zlib_decompress(unsigned char *data_in, | ||
23 | struct page *dest_page, | ||
24 | unsigned long start_byte, | ||
25 | size_t srclen, size_t destlen); | ||
26 | int btrfs_zlib_compress_pages(struct address_space *mapping, | ||
27 | u64 start, unsigned long len, | ||
28 | struct page **pages, | ||
29 | unsigned long nr_dest_pages, | ||
30 | unsigned long *out_pages, | ||
31 | unsigned long *total_in, | ||
32 | unsigned long *total_out, | ||
33 | unsigned long max_out); | ||
34 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | ||
35 | u64 disk_start, | ||
36 | struct bio_vec *bvec, | ||
37 | int vcnt, | ||
38 | size_t srclen); | ||
39 | void btrfs_zlib_exit(void); | ||
40 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, | ||
41 | unsigned long len, u64 disk_start, | ||
42 | unsigned long compressed_len, | ||
43 | struct page **compressed_pages, | ||
44 | unsigned long nr_pages); | ||
45 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | ||
46 | int mirror_num, unsigned long bio_flags); | ||
47 | #endif | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8559f39fd47f..793d8fdda244 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -400,10 +400,18 @@ struct btrfs_timespec { | |||
400 | __le32 nsec; | 400 | __le32 nsec; |
401 | } __attribute__ ((__packed__)); | 401 | } __attribute__ ((__packed__)); |
402 | 402 | ||
403 | /* | 403 | typedef enum { |
404 | * there is no padding here on purpose. If you want to extent the inode, | 404 | BTRFS_COMPRESS_NONE = 0, |
405 | * make a new item type | 405 | BTRFS_COMPRESS_ZLIB = 1, |
406 | */ | 406 | BTRFS_COMPRESS_LAST = 2, |
407 | } btrfs_compression_type; | ||
408 | |||
409 | /* we don't understand any encryption methods right now */ | ||
410 | typedef enum { | ||
411 | BTRFS_ENCRYPTION_NONE = 0, | ||
412 | BTRFS_ENCRYPTION_LAST = 1, | ||
413 | } btrfs_encryption_type; | ||
414 | |||
407 | struct btrfs_inode_item { | 415 | struct btrfs_inode_item { |
408 | /* nfs style generation number */ | 416 | /* nfs style generation number */ |
409 | __le64 generation; | 417 | __le64 generation; |
@@ -419,6 +427,7 @@ struct btrfs_inode_item { | |||
419 | __le64 rdev; | 427 | __le64 rdev; |
420 | __le16 flags; | 428 | __le16 flags; |
421 | __le16 compat_flags; | 429 | __le16 compat_flags; |
430 | |||
422 | struct btrfs_timespec atime; | 431 | struct btrfs_timespec atime; |
423 | struct btrfs_timespec ctime; | 432 | struct btrfs_timespec ctime; |
424 | struct btrfs_timespec mtime; | 433 | struct btrfs_timespec mtime; |
@@ -454,8 +463,33 @@ struct btrfs_root_item { | |||
454 | #define BTRFS_FILE_EXTENT_INLINE 1 | 463 | #define BTRFS_FILE_EXTENT_INLINE 1 |
455 | 464 | ||
456 | struct btrfs_file_extent_item { | 465 | struct btrfs_file_extent_item { |
466 | /* | ||
467 | * transaction id that created this extent | ||
468 | */ | ||
457 | __le64 generation; | 469 | __le64 generation; |
470 | /* | ||
471 | * max number of bytes to hold this extent in ram | ||
472 | * when we split a compressed extent we can't know how big | ||
473 | * each of the resulting pieces will be. So, this is | ||
474 | * an upper limit on the size of the extent in ram instead of | ||
475 | * an exact limit. | ||
476 | */ | ||
477 | __le64 ram_bytes; | ||
478 | |||
479 | /* | ||
480 | * 32 bits for the various ways we might encode the data, | ||
481 | * including compression and encryption. If any of these | ||
482 | * are set to something a given disk format doesn't understand | ||
483 | * it is treated like an incompat flag for reading and writing, | ||
484 | * but not for stat. | ||
485 | */ | ||
486 | u8 compression; | ||
487 | u8 encryption; | ||
488 | __le16 other_encoding; /* spare for later use */ | ||
489 | |||
490 | /* are we inline data or a real extent? */ | ||
458 | u8 type; | 491 | u8 type; |
492 | |||
459 | /* | 493 | /* |
460 | * disk space consumed by the extent, checksum blocks are included | 494 | * disk space consumed by the extent, checksum blocks are included |
461 | * in these numbers | 495 | * in these numbers |
@@ -471,9 +505,11 @@ struct btrfs_file_extent_item { | |||
471 | */ | 505 | */ |
472 | __le64 offset; | 506 | __le64 offset; |
473 | /* | 507 | /* |
474 | * the logical number of file blocks (no csums included) | 508 | * the logical number of file blocks (no csums included). This |
509 | * always reflects the size uncompressed and without encoding. | ||
475 | */ | 510 | */ |
476 | __le64 num_bytes; | 511 | __le64 num_bytes; |
512 | |||
477 | } __attribute__ ((__packed__)); | 513 | } __attribute__ ((__packed__)); |
478 | 514 | ||
479 | struct btrfs_csum_item { | 515 | struct btrfs_csum_item { |
@@ -814,6 +850,7 @@ struct btrfs_root { | |||
814 | #define BTRFS_MOUNT_NOBARRIER (1 << 2) | 850 | #define BTRFS_MOUNT_NOBARRIER (1 << 2) |
815 | #define BTRFS_MOUNT_SSD (1 << 3) | 851 | #define BTRFS_MOUNT_SSD (1 << 3) |
816 | #define BTRFS_MOUNT_DEGRADED (1 << 4) | 852 | #define BTRFS_MOUNT_DEGRADED (1 << 4) |
853 | #define BTRFS_MOUNT_COMPRESS (1 << 5) | ||
817 | 854 | ||
818 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 855 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
819 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 856 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -825,6 +862,7 @@ struct btrfs_root { | |||
825 | #define BTRFS_INODE_NODATASUM (1 << 0) | 862 | #define BTRFS_INODE_NODATASUM (1 << 0) |
826 | #define BTRFS_INODE_NODATACOW (1 << 1) | 863 | #define BTRFS_INODE_NODATACOW (1 << 1) |
827 | #define BTRFS_INODE_READONLY (1 << 2) | 864 | #define BTRFS_INODE_READONLY (1 << 2) |
865 | #define BTRFS_INODE_NOCOMPRESS (1 << 3) | ||
828 | #define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ | 866 | #define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ |
829 | ~BTRFS_INODE_##flag) | 867 | ~BTRFS_INODE_##flag) |
830 | #define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ | 868 | #define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ |
@@ -1424,14 +1462,6 @@ static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) | |||
1424 | return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; | 1462 | return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; |
1425 | } | 1463 | } |
1426 | 1464 | ||
1427 | static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, | ||
1428 | struct btrfs_item *e) | ||
1429 | { | ||
1430 | unsigned long offset; | ||
1431 | offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); | ||
1432 | return btrfs_item_size(eb, e) - offset; | ||
1433 | } | ||
1434 | |||
1435 | BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, | 1465 | BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, |
1436 | disk_bytenr, 64); | 1466 | disk_bytenr, 64); |
1437 | BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, | 1467 | BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, |
@@ -1442,6 +1472,36 @@ BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, | |||
1442 | offset, 64); | 1472 | offset, 64); |
1443 | BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, | 1473 | BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, |
1444 | num_bytes, 64); | 1474 | num_bytes, 64); |
1475 | BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item, | ||
1476 | ram_bytes, 64); | ||
1477 | BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item, | ||
1478 | compression, 8); | ||
1479 | BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item, | ||
1480 | encryption, 8); | ||
1481 | BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, | ||
1482 | other_encoding, 16); | ||
1483 | |||
1484 | /* this returns the number of file bytes represented by the inline item. | ||
1485 | * If an item is compressed, this is the uncompressed size | ||
1486 | */ | ||
1487 | static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, | ||
1488 | struct btrfs_file_extent_item *e) | ||
1489 | { | ||
1490 | return btrfs_file_extent_ram_bytes(eb, e); | ||
1491 | } | ||
1492 | |||
1493 | /* | ||
1494 | * this returns the number of bytes used by the item on disk, minus the | ||
1495 | * size of any extent headers. If a file is compressed on disk, this is | ||
1496 | * the compressed size | ||
1497 | */ | ||
1498 | static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, | ||
1499 | struct btrfs_item *e) | ||
1500 | { | ||
1501 | unsigned long offset; | ||
1502 | offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); | ||
1503 | return btrfs_item_size(eb, e) - offset; | ||
1504 | } | ||
1445 | 1505 | ||
1446 | static inline struct btrfs_root *btrfs_sb(struct super_block *sb) | 1506 | static inline struct btrfs_root *btrfs_sb(struct super_block *sb) |
1447 | { | 1507 | { |
@@ -1745,10 +1805,11 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1745 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 1805 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, |
1746 | struct bio *bio); | 1806 | struct bio *bio); |
1747 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 1807 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
1748 | struct btrfs_root *root, | 1808 | struct btrfs_root *root, |
1749 | u64 objectid, u64 pos, u64 disk_offset, | 1809 | u64 objectid, u64 pos, |
1750 | u64 disk_num_bytes, | 1810 | u64 disk_offset, u64 disk_num_bytes, |
1751 | u64 num_bytes, u64 offset); | 1811 | u64 num_bytes, u64 offset, u64 ram_bytes, |
1812 | u8 compression, u8 encryption, u16 other_encoding); | ||
1752 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | 1813 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, |
1753 | struct btrfs_root *root, | 1814 | struct btrfs_root *root, |
1754 | struct btrfs_path *path, u64 objectid, | 1815 | struct btrfs_path *path, u64 objectid, |
@@ -1758,6 +1819,8 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
1758 | struct btrfs_ordered_sum *sums); | 1819 | struct btrfs_ordered_sum *sums); |
1759 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | 1820 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, |
1760 | struct bio *bio); | 1821 | struct bio *bio); |
1822 | int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode, | ||
1823 | u64 start, unsigned long len); | ||
1761 | struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, | 1824 | struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, |
1762 | struct btrfs_root *root, | 1825 | struct btrfs_root *root, |
1763 | struct btrfs_path *path, | 1826 | struct btrfs_path *path, |
@@ -1799,7 +1862,7 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, | |||
1799 | int namelen); | 1862 | int namelen); |
1800 | 1863 | ||
1801 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 1864 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
1802 | size_t size, struct bio *bio); | 1865 | size_t size, struct bio *bio, unsigned long bio_flags); |
1803 | 1866 | ||
1804 | unsigned long btrfs_force_ra(struct address_space *mapping, | 1867 | unsigned long btrfs_force_ra(struct address_space *mapping, |
1805 | struct file_ra_state *ra, struct file *file, | 1868 | struct file_ra_state *ra, struct file *file, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0be044bb6194..dc95f636a11b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -83,6 +83,7 @@ struct async_submit_bio { | |||
83 | extent_submit_bio_hook_t *submit_bio_hook; | 83 | extent_submit_bio_hook_t *submit_bio_hook; |
84 | int rw; | 84 | int rw; |
85 | int mirror_num; | 85 | int mirror_num; |
86 | unsigned long bio_flags; | ||
86 | struct btrfs_work work; | 87 | struct btrfs_work work; |
87 | }; | 88 | }; |
88 | 89 | ||
@@ -115,6 +116,7 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, | |||
115 | } | 116 | } |
116 | em->start = 0; | 117 | em->start = 0; |
117 | em->len = (u64)-1; | 118 | em->len = (u64)-1; |
119 | em->block_len = (u64)-1; | ||
118 | em->block_start = 0; | 120 | em->block_start = 0; |
119 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 121 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
120 | 122 | ||
@@ -469,12 +471,13 @@ static void run_one_async_submit(struct btrfs_work *work) | |||
469 | wake_up(&fs_info->async_submit_wait); | 471 | wake_up(&fs_info->async_submit_wait); |
470 | 472 | ||
471 | async->submit_bio_hook(async->inode, async->rw, async->bio, | 473 | async->submit_bio_hook(async->inode, async->rw, async->bio, |
472 | async->mirror_num); | 474 | async->mirror_num, async->bio_flags); |
473 | kfree(async); | 475 | kfree(async); |
474 | } | 476 | } |
475 | 477 | ||
476 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 478 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
477 | int rw, struct bio *bio, int mirror_num, | 479 | int rw, struct bio *bio, int mirror_num, |
480 | unsigned long bio_flags, | ||
478 | extent_submit_bio_hook_t *submit_bio_hook) | 481 | extent_submit_bio_hook_t *submit_bio_hook) |
479 | { | 482 | { |
480 | struct async_submit_bio *async; | 483 | struct async_submit_bio *async; |
@@ -491,6 +494,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
491 | async->submit_bio_hook = submit_bio_hook; | 494 | async->submit_bio_hook = submit_bio_hook; |
492 | async->work.func = run_one_async_submit; | 495 | async->work.func = run_one_async_submit; |
493 | async->work.flags = 0; | 496 | async->work.flags = 0; |
497 | async->bio_flags = bio_flags; | ||
494 | 498 | ||
495 | while(atomic_read(&fs_info->async_submit_draining) && | 499 | while(atomic_read(&fs_info->async_submit_draining) && |
496 | atomic_read(&fs_info->nr_async_submits)) { | 500 | atomic_read(&fs_info->nr_async_submits)) { |
@@ -530,7 +534,7 @@ static int btree_csum_one_bio(struct bio *bio) | |||
530 | } | 534 | } |
531 | 535 | ||
532 | static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 536 | static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
533 | int mirror_num) | 537 | int mirror_num, unsigned long bio_flags) |
534 | { | 538 | { |
535 | struct btrfs_root *root = BTRFS_I(inode)->root; | 539 | struct btrfs_root *root = BTRFS_I(inode)->root; |
536 | int ret; | 540 | int ret; |
@@ -556,17 +560,17 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
556 | } | 560 | } |
557 | 561 | ||
558 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 562 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
559 | int mirror_num) | 563 | int mirror_num, unsigned long bio_flags) |
560 | { | 564 | { |
561 | /* | 565 | /* |
562 | * kthread helpers are used to submit writes so that checksumming | 566 | * kthread helpers are used to submit writes so that checksumming |
563 | * can happen in parallel across all CPUs | 567 | * can happen in parallel across all CPUs |
564 | */ | 568 | */ |
565 | if (!(rw & (1 << BIO_RW))) { | 569 | if (!(rw & (1 << BIO_RW))) { |
566 | return __btree_submit_bio_hook(inode, rw, bio, mirror_num); | 570 | return __btree_submit_bio_hook(inode, rw, bio, mirror_num, 0); |
567 | } | 571 | } |
568 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 572 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
569 | inode, rw, bio, mirror_num, | 573 | inode, rw, bio, mirror_num, 0, |
570 | __btree_submit_bio_hook); | 574 | __btree_submit_bio_hook); |
571 | } | 575 | } |
572 | 576 | ||
@@ -1407,6 +1411,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1407 | fs_info->btree_inode = new_inode(sb); | 1411 | fs_info->btree_inode = new_inode(sb); |
1408 | fs_info->btree_inode->i_ino = 1; | 1412 | fs_info->btree_inode->i_ino = 1; |
1409 | fs_info->btree_inode->i_nlink = 1; | 1413 | fs_info->btree_inode->i_nlink = 1; |
1414 | |||
1410 | fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); | 1415 | fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); |
1411 | 1416 | ||
1412 | INIT_LIST_HEAD(&fs_info->ordered_extents); | 1417 | INIT_LIST_HEAD(&fs_info->ordered_extents); |
@@ -1508,6 +1513,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1508 | */ | 1513 | */ |
1509 | btrfs_init_workers(&fs_info->workers, "worker", | 1514 | btrfs_init_workers(&fs_info->workers, "worker", |
1510 | fs_info->thread_pool_size); | 1515 | fs_info->thread_pool_size); |
1516 | |||
1511 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 1517 | btrfs_init_workers(&fs_info->submit_workers, "submit", |
1512 | min_t(u64, fs_devices->num_devices, | 1518 | min_t(u64, fs_devices->num_devices, |
1513 | fs_info->thread_pool_size)); | 1519 | fs_info->thread_pool_size)); |
@@ -1559,6 +1565,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1559 | } | 1565 | } |
1560 | 1566 | ||
1561 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1567 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1568 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | ||
1569 | 4 * 1024 * 1024 / PAGE_CACHE_SIZE); | ||
1562 | 1570 | ||
1563 | nodesize = btrfs_super_nodesize(disk_super); | 1571 | nodesize = btrfs_super_nodesize(disk_super); |
1564 | leafsize = btrfs_super_leafsize(disk_super); | 1572 | leafsize = btrfs_super_leafsize(disk_super); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index f84f5058dbbb..4eb1f1408d21 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -71,6 +71,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
71 | int metadata); | 71 | int metadata); |
72 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 72 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
73 | int rw, struct bio *bio, int mirror_num, | 73 | int rw, struct bio *bio, int mirror_num, |
74 | unsigned long bio_flags, | ||
74 | extent_submit_bio_hook_t *submit_bio_hook); | 75 | extent_submit_bio_hook_t *submit_bio_hook); |
75 | int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); | 76 | int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); |
76 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); | 77 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 280ac1aa9b6d..bbf04e80a1a3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3278,6 +3278,7 @@ static int noinline relocate_data_extent(struct inode *reloc_inode, | |||
3278 | 3278 | ||
3279 | em->start = extent_key->objectid - offset; | 3279 | em->start = extent_key->objectid - offset; |
3280 | em->len = extent_key->offset; | 3280 | em->len = extent_key->offset; |
3281 | em->block_len = extent_key->offset; | ||
3281 | em->block_start = extent_key->objectid; | 3282 | em->block_start = extent_key->objectid; |
3282 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 3283 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
3283 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 3284 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
@@ -3314,10 +3315,14 @@ struct btrfs_ref_path { | |||
3314 | }; | 3315 | }; |
3315 | 3316 | ||
3316 | struct disk_extent { | 3317 | struct disk_extent { |
3318 | u64 ram_bytes; | ||
3317 | u64 disk_bytenr; | 3319 | u64 disk_bytenr; |
3318 | u64 disk_num_bytes; | 3320 | u64 disk_num_bytes; |
3319 | u64 offset; | 3321 | u64 offset; |
3320 | u64 num_bytes; | 3322 | u64 num_bytes; |
3323 | u8 compression; | ||
3324 | u8 encryption; | ||
3325 | u16 other_encoding; | ||
3321 | }; | 3326 | }; |
3322 | 3327 | ||
3323 | static int is_cowonly_root(u64 root_objectid) | 3328 | static int is_cowonly_root(u64 root_objectid) |
@@ -3631,6 +3636,11 @@ static int noinline get_new_locations(struct inode *reloc_inode, | |||
3631 | btrfs_file_extent_disk_num_bytes(leaf, fi); | 3636 | btrfs_file_extent_disk_num_bytes(leaf, fi); |
3632 | exts[nr].offset = btrfs_file_extent_offset(leaf, fi); | 3637 | exts[nr].offset = btrfs_file_extent_offset(leaf, fi); |
3633 | exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | 3638 | exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); |
3639 | exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); | ||
3640 | exts[nr].compression = btrfs_file_extent_compression(leaf, fi); | ||
3641 | exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); | ||
3642 | exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, | ||
3643 | fi); | ||
3634 | WARN_ON(exts[nr].offset > 0); | 3644 | WARN_ON(exts[nr].offset > 0); |
3635 | WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); | 3645 | WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); |
3636 | 3646 | ||
@@ -3846,6 +3856,8 @@ next: | |||
3846 | new_extents[0].disk_bytenr); | 3856 | new_extents[0].disk_bytenr); |
3847 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | 3857 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, |
3848 | new_extents[0].disk_num_bytes); | 3858 | new_extents[0].disk_num_bytes); |
3859 | btrfs_set_file_extent_ram_bytes(leaf, fi, | ||
3860 | new_extents[0].ram_bytes); | ||
3849 | ext_offset += new_extents[0].offset; | 3861 | ext_offset += new_extents[0].offset; |
3850 | btrfs_set_file_extent_offset(leaf, fi, ext_offset); | 3862 | btrfs_set_file_extent_offset(leaf, fi, ext_offset); |
3851 | btrfs_mark_buffer_dirty(leaf); | 3863 | btrfs_mark_buffer_dirty(leaf); |
@@ -3911,6 +3923,16 @@ next: | |||
3911 | new_extents[i].disk_bytenr); | 3923 | new_extents[i].disk_bytenr); |
3912 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | 3924 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, |
3913 | new_extents[i].disk_num_bytes); | 3925 | new_extents[i].disk_num_bytes); |
3926 | btrfs_set_file_extent_ram_bytes(leaf, fi, | ||
3927 | new_extents[i].ram_bytes); | ||
3928 | |||
3929 | btrfs_set_file_extent_compression(leaf, fi, | ||
3930 | new_extents[i].compression); | ||
3931 | btrfs_set_file_extent_encryption(leaf, fi, | ||
3932 | new_extents[i].encryption); | ||
3933 | btrfs_set_file_extent_other_encoding(leaf, fi, | ||
3934 | new_extents[i].other_encoding); | ||
3935 | |||
3914 | btrfs_set_file_extent_num_bytes(leaf, fi, | 3936 | btrfs_set_file_extent_num_bytes(leaf, fi, |
3915 | extent_len); | 3937 | extent_len); |
3916 | ext_offset += new_extents[i].offset; | 3938 | ext_offset += new_extents[i].offset; |
@@ -4169,6 +4191,8 @@ static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, | |||
4169 | ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; | 4191 | ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; |
4170 | 4192 | ||
4171 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | 4193 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); |
4194 | btrfs_set_file_extent_ram_bytes(leaf, fi, | ||
4195 | new_extent->ram_bytes); | ||
4172 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | 4196 | btrfs_set_file_extent_disk_bytenr(leaf, fi, |
4173 | new_extent->disk_bytenr); | 4197 | new_extent->disk_bytenr); |
4174 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | 4198 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, |
@@ -4847,7 +4871,8 @@ static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
4847 | BUG_ON(err); | 4871 | BUG_ON(err); |
4848 | 4872 | ||
4849 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | 4873 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, |
4850 | group->key.offset, 0); | 4874 | group->key.offset, 0, group->key.offset, |
4875 | 0, 0, 0); | ||
4851 | BUG_ON(err); | 4876 | BUG_ON(err); |
4852 | 4877 | ||
4853 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); | 4878 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 563b2d12f4f2..314041fdfa43 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -30,6 +30,7 @@ static struct kmem_cache *extent_buffer_cache; | |||
30 | static LIST_HEAD(buffers); | 30 | static LIST_HEAD(buffers); |
31 | static LIST_HEAD(states); | 31 | static LIST_HEAD(states); |
32 | 32 | ||
33 | #define LEAK_DEBUG 1 | ||
33 | #ifdef LEAK_DEBUG | 34 | #ifdef LEAK_DEBUG |
34 | static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; | 35 | static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; |
35 | #endif | 36 | #endif |
@@ -1067,8 +1068,8 @@ EXPORT_SYMBOL(find_first_extent_bit_state); | |||
1067 | * | 1068 | * |
1068 | * 1 is returned if we find something, 0 if nothing was in the tree | 1069 | * 1 is returned if we find something, 0 if nothing was in the tree |
1069 | */ | 1070 | */ |
1070 | static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, | 1071 | static noinline u64 find_delalloc_range(struct extent_io_tree *tree, |
1071 | u64 *start, u64 *end, u64 max_bytes) | 1072 | u64 *start, u64 *end, u64 max_bytes) |
1072 | { | 1073 | { |
1073 | struct rb_node *node; | 1074 | struct rb_node *node; |
1074 | struct extent_state *state; | 1075 | struct extent_state *state; |
@@ -1077,11 +1078,11 @@ static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, | |||
1077 | u64 total_bytes = 0; | 1078 | u64 total_bytes = 0; |
1078 | 1079 | ||
1079 | spin_lock_irq(&tree->lock); | 1080 | spin_lock_irq(&tree->lock); |
1081 | |||
1080 | /* | 1082 | /* |
1081 | * this search will find all the extents that end after | 1083 | * this search will find all the extents that end after |
1082 | * our range starts. | 1084 | * our range starts. |
1083 | */ | 1085 | */ |
1084 | search_again: | ||
1085 | node = tree_search(tree, cur_start); | 1086 | node = tree_search(tree, cur_start); |
1086 | if (!node) { | 1087 | if (!node) { |
1087 | if (!found) | 1088 | if (!found) |
@@ -1100,40 +1101,6 @@ search_again: | |||
1100 | *end = state->end; | 1101 | *end = state->end; |
1101 | goto out; | 1102 | goto out; |
1102 | } | 1103 | } |
1103 | if (!found && !(state->state & EXTENT_BOUNDARY)) { | ||
1104 | struct extent_state *prev_state; | ||
1105 | struct rb_node *prev_node = node; | ||
1106 | while(1) { | ||
1107 | prev_node = rb_prev(prev_node); | ||
1108 | if (!prev_node) | ||
1109 | break; | ||
1110 | prev_state = rb_entry(prev_node, | ||
1111 | struct extent_state, | ||
1112 | rb_node); | ||
1113 | if ((prev_state->end + 1 != state->start) || | ||
1114 | !(prev_state->state & EXTENT_DELALLOC)) | ||
1115 | break; | ||
1116 | if ((cur_start - prev_state->start) * 2 > | ||
1117 | max_bytes) | ||
1118 | break; | ||
1119 | state = prev_state; | ||
1120 | node = prev_node; | ||
1121 | } | ||
1122 | } | ||
1123 | if (state->state & EXTENT_LOCKED) { | ||
1124 | DEFINE_WAIT(wait); | ||
1125 | atomic_inc(&state->refs); | ||
1126 | prepare_to_wait(&state->wq, &wait, | ||
1127 | TASK_UNINTERRUPTIBLE); | ||
1128 | spin_unlock_irq(&tree->lock); | ||
1129 | schedule(); | ||
1130 | spin_lock_irq(&tree->lock); | ||
1131 | finish_wait(&state->wq, &wait); | ||
1132 | free_extent_state(state); | ||
1133 | goto search_again; | ||
1134 | } | ||
1135 | set_state_cb(tree, state, EXTENT_LOCKED); | ||
1136 | state->state |= EXTENT_LOCKED; | ||
1137 | if (!found) | 1104 | if (!found) |
1138 | *start = state->start; | 1105 | *start = state->start; |
1139 | found++; | 1106 | found++; |
@@ -1151,6 +1118,208 @@ out: | |||
1151 | return found; | 1118 | return found; |
1152 | } | 1119 | } |
1153 | 1120 | ||
1121 | static noinline int __unlock_for_delalloc(struct inode *inode, | ||
1122 | struct page *locked_page, | ||
1123 | u64 start, u64 end) | ||
1124 | { | ||
1125 | int ret; | ||
1126 | struct page *pages[16]; | ||
1127 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1128 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1129 | unsigned long nr_pages = end_index - index + 1; | ||
1130 | int i; | ||
1131 | |||
1132 | if (index == locked_page->index && end_index == index) | ||
1133 | return 0; | ||
1134 | |||
1135 | while(nr_pages > 0) { | ||
1136 | ret = find_get_pages_contig(inode->i_mapping, index, | ||
1137 | min(nr_pages, ARRAY_SIZE(pages)), pages); | ||
1138 | for (i = 0; i < ret; i++) { | ||
1139 | if (pages[i] != locked_page) | ||
1140 | unlock_page(pages[i]); | ||
1141 | page_cache_release(pages[i]); | ||
1142 | } | ||
1143 | nr_pages -= ret; | ||
1144 | index += ret; | ||
1145 | cond_resched(); | ||
1146 | } | ||
1147 | return 0; | ||
1148 | } | ||
1149 | |||
1150 | static noinline int lock_delalloc_pages(struct inode *inode, | ||
1151 | struct page *locked_page, | ||
1152 | u64 delalloc_start, | ||
1153 | u64 delalloc_end) | ||
1154 | { | ||
1155 | unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT; | ||
1156 | unsigned long start_index = index; | ||
1157 | unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT; | ||
1158 | unsigned long pages_locked = 0; | ||
1159 | struct page *pages[16]; | ||
1160 | unsigned long nrpages; | ||
1161 | int ret; | ||
1162 | int i; | ||
1163 | |||
1164 | /* the caller is responsible for locking the start index */ | ||
1165 | if (index == locked_page->index && index == end_index) | ||
1166 | return 0; | ||
1167 | |||
1168 | /* skip the page at the start index */ | ||
1169 | nrpages = end_index - index + 1; | ||
1170 | while(nrpages > 0) { | ||
1171 | ret = find_get_pages_contig(inode->i_mapping, index, | ||
1172 | min(nrpages, ARRAY_SIZE(pages)), pages); | ||
1173 | if (ret == 0) { | ||
1174 | ret = -EAGAIN; | ||
1175 | goto done; | ||
1176 | } | ||
1177 | /* now we have an array of pages, lock them all */ | ||
1178 | for (i = 0; i < ret; i++) { | ||
1179 | /* | ||
1180 | * the caller is taking responsibility for | ||
1181 | * locked_page | ||
1182 | */ | ||
1183 | if (pages[i] != locked_page) | ||
1184 | lock_page(pages[i]); | ||
1185 | page_cache_release(pages[i]); | ||
1186 | } | ||
1187 | pages_locked += ret; | ||
1188 | nrpages -= ret; | ||
1189 | index += ret; | ||
1190 | cond_resched(); | ||
1191 | } | ||
1192 | ret = 0; | ||
1193 | done: | ||
1194 | if (ret && pages_locked) { | ||
1195 | __unlock_for_delalloc(inode, locked_page, | ||
1196 | delalloc_start, | ||
1197 | ((u64)(start_index + pages_locked - 1)) << | ||
1198 | PAGE_CACHE_SHIFT); | ||
1199 | } | ||
1200 | return ret; | ||
1201 | } | ||
1202 | |||
1203 | /* | ||
1204 | * find a contiguous range of bytes in the file marked as delalloc, not | ||
1205 | * more than 'max_bytes'. start and end are used to return the range, | ||
1206 | * | ||
1207 | * 1 is returned if we find something, 0 if nothing was in the tree | ||
1208 | */ | ||
1209 | static noinline u64 find_lock_delalloc_range(struct inode *inode, | ||
1210 | struct extent_io_tree *tree, | ||
1211 | struct page *locked_page, | ||
1212 | u64 *start, u64 *end, | ||
1213 | u64 max_bytes) | ||
1214 | { | ||
1215 | u64 delalloc_start; | ||
1216 | u64 delalloc_end; | ||
1217 | u64 found; | ||
1218 | int ret; | ||
1219 | int loops = 0; | ||
1220 | |||
1221 | again: | ||
1222 | /* step one, find a bunch of delalloc bytes starting at start */ | ||
1223 | delalloc_start = *start; | ||
1224 | delalloc_end = 0; | ||
1225 | found = find_delalloc_range(tree, &delalloc_start, &delalloc_end, | ||
1226 | max_bytes); | ||
1227 | if (!found) { | ||
1228 | *start = delalloc_start; | ||
1229 | *end = delalloc_end; | ||
1230 | return found; | ||
1231 | } | ||
1232 | |||
1233 | /* | ||
1234 | * make sure to limit the number of pages we try to lock down | ||
1235 | * if we're looping. | ||
1236 | */ | ||
1237 | if (delalloc_end + 1 - delalloc_start > max_bytes && loops) { | ||
1238 | delalloc_end = (delalloc_start + PAGE_CACHE_SIZE - 1) & | ||
1239 | ~((u64)PAGE_CACHE_SIZE - 1); | ||
1240 | } | ||
1241 | /* step two, lock all the pages after the page that has start */ | ||
1242 | ret = lock_delalloc_pages(inode, locked_page, | ||
1243 | delalloc_start, delalloc_end); | ||
1244 | if (ret == -EAGAIN) { | ||
1245 | /* some of the pages are gone, lets avoid looping by | ||
1246 | * shortening the size of the delalloc range we're searching | ||
1247 | */ | ||
1248 | if (!loops) { | ||
1249 | unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); | ||
1250 | max_bytes = PAGE_CACHE_SIZE - offset; | ||
1251 | loops = 1; | ||
1252 | goto again; | ||
1253 | } else { | ||
1254 | found = 0; | ||
1255 | goto out_failed; | ||
1256 | } | ||
1257 | } | ||
1258 | BUG_ON(ret); | ||
1259 | |||
1260 | /* step three, lock the state bits for the whole range */ | ||
1261 | lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | ||
1262 | |||
1263 | /* then test to make sure it is all still delalloc */ | ||
1264 | ret = test_range_bit(tree, delalloc_start, delalloc_end, | ||
1265 | EXTENT_DELALLOC, 1); | ||
1266 | if (!ret) { | ||
1267 | unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | ||
1268 | __unlock_for_delalloc(inode, locked_page, | ||
1269 | delalloc_start, delalloc_end); | ||
1270 | cond_resched(); | ||
1271 | goto again; | ||
1272 | } | ||
1273 | *start = delalloc_start; | ||
1274 | *end = delalloc_end; | ||
1275 | out_failed: | ||
1276 | return found; | ||
1277 | } | ||
1278 | |||
1279 | int extent_clear_unlock_delalloc(struct inode *inode, | ||
1280 | struct extent_io_tree *tree, | ||
1281 | u64 start, u64 end, struct page *locked_page, | ||
1282 | int clear_dirty, int set_writeback, | ||
1283 | int end_writeback) | ||
1284 | { | ||
1285 | int ret; | ||
1286 | struct page *pages[16]; | ||
1287 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1288 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1289 | unsigned long nr_pages = end_index - index + 1; | ||
1290 | int i; | ||
1291 | int clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC; | ||
1292 | |||
1293 | if (clear_dirty) | ||
1294 | clear_bits |= EXTENT_DIRTY; | ||
1295 | |||
1296 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); | ||
1297 | |||
1298 | while(nr_pages > 0) { | ||
1299 | ret = find_get_pages_contig(inode->i_mapping, index, | ||
1300 | min(nr_pages, ARRAY_SIZE(pages)), pages); | ||
1301 | for (i = 0; i < ret; i++) { | ||
1302 | if (pages[i] == locked_page) { | ||
1303 | page_cache_release(pages[i]); | ||
1304 | continue; | ||
1305 | } | ||
1306 | if (clear_dirty) | ||
1307 | clear_page_dirty_for_io(pages[i]); | ||
1308 | if (set_writeback) | ||
1309 | set_page_writeback(pages[i]); | ||
1310 | if (end_writeback) | ||
1311 | end_page_writeback(pages[i]); | ||
1312 | unlock_page(pages[i]); | ||
1313 | page_cache_release(pages[i]); | ||
1314 | } | ||
1315 | nr_pages -= ret; | ||
1316 | index += ret; | ||
1317 | cond_resched(); | ||
1318 | } | ||
1319 | return 0; | ||
1320 | } | ||
1321 | EXPORT_SYMBOL(extent_clear_unlock_delalloc); | ||
1322 | |||
1154 | /* | 1323 | /* |
1155 | * count the number of bytes in the tree that have a given bit(s) | 1324 | * count the number of bytes in the tree that have a given bit(s) |
1156 | * set. This can be fairly slow, except for EXTENT_DIRTY which is | 1325 | * set. This can be fairly slow, except for EXTENT_DIRTY which is |
@@ -1631,38 +1800,26 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | |||
1631 | return bio; | 1800 | return bio; |
1632 | } | 1801 | } |
1633 | 1802 | ||
1634 | static int submit_one_bio(int rw, struct bio *bio, int mirror_num) | 1803 | static int submit_one_bio(int rw, struct bio *bio, int mirror_num, |
1804 | unsigned long bio_flags) | ||
1635 | { | 1805 | { |
1636 | int ret = 0; | 1806 | int ret = 0; |
1637 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | 1807 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; |
1638 | struct page *page = bvec->bv_page; | 1808 | struct page *page = bvec->bv_page; |
1639 | struct extent_io_tree *tree = bio->bi_private; | 1809 | struct extent_io_tree *tree = bio->bi_private; |
1640 | struct rb_node *node; | ||
1641 | struct extent_state *state; | ||
1642 | u64 start; | 1810 | u64 start; |
1643 | u64 end; | 1811 | u64 end; |
1644 | 1812 | ||
1645 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; | 1813 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; |
1646 | end = start + bvec->bv_len - 1; | 1814 | end = start + bvec->bv_len - 1; |
1647 | 1815 | ||
1648 | spin_lock_irq(&tree->lock); | ||
1649 | node = __etree_search(tree, start, NULL, NULL); | ||
1650 | BUG_ON(!node); | ||
1651 | state = rb_entry(node, struct extent_state, rb_node); | ||
1652 | while(state->end < end) { | ||
1653 | node = rb_next(node); | ||
1654 | state = rb_entry(node, struct extent_state, rb_node); | ||
1655 | } | ||
1656 | BUG_ON(state->end != end); | ||
1657 | spin_unlock_irq(&tree->lock); | ||
1658 | |||
1659 | bio->bi_private = NULL; | 1816 | bio->bi_private = NULL; |
1660 | 1817 | ||
1661 | bio_get(bio); | 1818 | bio_get(bio); |
1662 | 1819 | ||
1663 | if (tree->ops && tree->ops->submit_bio_hook) | 1820 | if (tree->ops && tree->ops->submit_bio_hook) |
1664 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1821 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
1665 | mirror_num); | 1822 | mirror_num, bio_flags); |
1666 | else | 1823 | else |
1667 | submit_bio(rw, bio); | 1824 | submit_bio(rw, bio); |
1668 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 1825 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
@@ -1678,39 +1835,56 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
1678 | struct bio **bio_ret, | 1835 | struct bio **bio_ret, |
1679 | unsigned long max_pages, | 1836 | unsigned long max_pages, |
1680 | bio_end_io_t end_io_func, | 1837 | bio_end_io_t end_io_func, |
1681 | int mirror_num) | 1838 | int mirror_num, |
1839 | unsigned long prev_bio_flags, | ||
1840 | unsigned long bio_flags) | ||
1682 | { | 1841 | { |
1683 | int ret = 0; | 1842 | int ret = 0; |
1684 | struct bio *bio; | 1843 | struct bio *bio; |
1685 | int nr; | 1844 | int nr; |
1845 | int contig = 0; | ||
1846 | int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED; | ||
1847 | int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED; | ||
1848 | size_t page_size = min(size, PAGE_CACHE_SIZE); | ||
1686 | 1849 | ||
1687 | if (bio_ret && *bio_ret) { | 1850 | if (bio_ret && *bio_ret) { |
1688 | bio = *bio_ret; | 1851 | bio = *bio_ret; |
1689 | if (bio->bi_sector + (bio->bi_size >> 9) != sector || | 1852 | if (old_compressed) |
1853 | contig = bio->bi_sector == sector; | ||
1854 | else | ||
1855 | contig = bio->bi_sector + (bio->bi_size >> 9) == | ||
1856 | sector; | ||
1857 | |||
1858 | if (prev_bio_flags != bio_flags || !contig || | ||
1690 | (tree->ops && tree->ops->merge_bio_hook && | 1859 | (tree->ops && tree->ops->merge_bio_hook && |
1691 | tree->ops->merge_bio_hook(page, offset, size, bio)) || | 1860 | tree->ops->merge_bio_hook(page, offset, page_size, bio, |
1692 | bio_add_page(bio, page, size, offset) < size) { | 1861 | bio_flags)) || |
1693 | ret = submit_one_bio(rw, bio, mirror_num); | 1862 | bio_add_page(bio, page, page_size, offset) < page_size) { |
1863 | ret = submit_one_bio(rw, bio, mirror_num, | ||
1864 | prev_bio_flags); | ||
1694 | bio = NULL; | 1865 | bio = NULL; |
1695 | } else { | 1866 | } else { |
1696 | return 0; | 1867 | return 0; |
1697 | } | 1868 | } |
1698 | } | 1869 | } |
1699 | nr = bio_get_nr_vecs(bdev); | 1870 | if (this_compressed) |
1871 | nr = BIO_MAX_PAGES; | ||
1872 | else | ||
1873 | nr = bio_get_nr_vecs(bdev); | ||
1874 | |||
1700 | bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); | 1875 | bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); |
1701 | if (!bio) { | 1876 | if (!bio) { |
1702 | printk("failed to allocate bio nr %d\n", nr); | 1877 | printk("failed to allocate bio nr %d\n", nr); |
1703 | } | 1878 | } |
1704 | 1879 | ||
1705 | 1880 | bio_add_page(bio, page, page_size, offset); | |
1706 | bio_add_page(bio, page, size, offset); | ||
1707 | bio->bi_end_io = end_io_func; | 1881 | bio->bi_end_io = end_io_func; |
1708 | bio->bi_private = tree; | 1882 | bio->bi_private = tree; |
1709 | 1883 | ||
1710 | if (bio_ret) { | 1884 | if (bio_ret) { |
1711 | *bio_ret = bio; | 1885 | *bio_ret = bio; |
1712 | } else { | 1886 | } else { |
1713 | ret = submit_one_bio(rw, bio, mirror_num); | 1887 | ret = submit_one_bio(rw, bio, mirror_num, bio_flags); |
1714 | } | 1888 | } |
1715 | 1889 | ||
1716 | return ret; | 1890 | return ret; |
@@ -1738,7 +1912,8 @@ void set_page_extent_head(struct page *page, unsigned long len) | |||
1738 | static int __extent_read_full_page(struct extent_io_tree *tree, | 1912 | static int __extent_read_full_page(struct extent_io_tree *tree, |
1739 | struct page *page, | 1913 | struct page *page, |
1740 | get_extent_t *get_extent, | 1914 | get_extent_t *get_extent, |
1741 | struct bio **bio, int mirror_num) | 1915 | struct bio **bio, int mirror_num, |
1916 | unsigned long *bio_flags) | ||
1742 | { | 1917 | { |
1743 | struct inode *inode = page->mapping->host; | 1918 | struct inode *inode = page->mapping->host; |
1744 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1919 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
@@ -1756,13 +1931,27 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
1756 | int nr = 0; | 1931 | int nr = 0; |
1757 | size_t page_offset = 0; | 1932 | size_t page_offset = 0; |
1758 | size_t iosize; | 1933 | size_t iosize; |
1934 | size_t disk_io_size; | ||
1759 | size_t blocksize = inode->i_sb->s_blocksize; | 1935 | size_t blocksize = inode->i_sb->s_blocksize; |
1936 | unsigned long this_bio_flag = 0; | ||
1760 | 1937 | ||
1761 | set_page_extent_mapped(page); | 1938 | set_page_extent_mapped(page); |
1762 | 1939 | ||
1763 | end = page_end; | 1940 | end = page_end; |
1764 | lock_extent(tree, start, end, GFP_NOFS); | 1941 | lock_extent(tree, start, end, GFP_NOFS); |
1765 | 1942 | ||
1943 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { | ||
1944 | char *userpage; | ||
1945 | size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); | ||
1946 | |||
1947 | if (zero_offset) { | ||
1948 | iosize = PAGE_CACHE_SIZE - zero_offset; | ||
1949 | userpage = kmap_atomic(page, KM_USER0); | ||
1950 | memset(userpage + zero_offset, 0, iosize); | ||
1951 | flush_dcache_page(page); | ||
1952 | kunmap_atomic(userpage, KM_USER0); | ||
1953 | } | ||
1954 | } | ||
1766 | while (cur <= end) { | 1955 | while (cur <= end) { |
1767 | if (cur >= last_byte) { | 1956 | if (cur >= last_byte) { |
1768 | char *userpage; | 1957 | char *userpage; |
@@ -1793,10 +1982,19 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur); | |||
1793 | } | 1982 | } |
1794 | BUG_ON(end < cur); | 1983 | BUG_ON(end < cur); |
1795 | 1984 | ||
1985 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | ||
1986 | this_bio_flag = EXTENT_BIO_COMPRESSED; | ||
1987 | |||
1796 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 1988 | iosize = min(extent_map_end(em) - cur, end - cur + 1); |
1797 | cur_end = min(extent_map_end(em) - 1, end); | 1989 | cur_end = min(extent_map_end(em) - 1, end); |
1798 | iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); | 1990 | iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); |
1799 | sector = (em->block_start + extent_offset) >> 9; | 1991 | if (this_bio_flag & EXTENT_BIO_COMPRESSED) { |
1992 | disk_io_size = em->block_len; | ||
1993 | sector = em->block_start >> 9; | ||
1994 | } else { | ||
1995 | sector = (em->block_start + extent_offset) >> 9; | ||
1996 | disk_io_size = iosize; | ||
1997 | } | ||
1800 | bdev = em->bdev; | 1998 | bdev = em->bdev; |
1801 | block_start = em->block_start; | 1999 | block_start = em->block_start; |
1802 | free_extent_map(em); | 2000 | free_extent_map(em); |
@@ -1845,10 +2043,13 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur); | |||
1845 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; | 2043 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; |
1846 | pnr -= page->index; | 2044 | pnr -= page->index; |
1847 | ret = submit_extent_page(READ, tree, page, | 2045 | ret = submit_extent_page(READ, tree, page, |
1848 | sector, iosize, page_offset, | 2046 | sector, disk_io_size, page_offset, |
1849 | bdev, bio, pnr, | 2047 | bdev, bio, pnr, |
1850 | end_bio_extent_readpage, mirror_num); | 2048 | end_bio_extent_readpage, mirror_num, |
2049 | *bio_flags, | ||
2050 | this_bio_flag); | ||
1851 | nr++; | 2051 | nr++; |
2052 | *bio_flags = this_bio_flag; | ||
1852 | } | 2053 | } |
1853 | if (ret) | 2054 | if (ret) |
1854 | SetPageError(page); | 2055 | SetPageError(page); |
@@ -1867,11 +2068,13 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
1867 | get_extent_t *get_extent) | 2068 | get_extent_t *get_extent) |
1868 | { | 2069 | { |
1869 | struct bio *bio = NULL; | 2070 | struct bio *bio = NULL; |
2071 | unsigned long bio_flags = 0; | ||
1870 | int ret; | 2072 | int ret; |
1871 | 2073 | ||
1872 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0); | 2074 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, |
2075 | &bio_flags); | ||
1873 | if (bio) | 2076 | if (bio) |
1874 | submit_one_bio(READ, bio, 0); | 2077 | submit_one_bio(READ, bio, 0, bio_flags); |
1875 | return ret; | 2078 | return ret; |
1876 | } | 2079 | } |
1877 | EXPORT_SYMBOL(extent_read_full_page); | 2080 | EXPORT_SYMBOL(extent_read_full_page); |
@@ -1909,6 +2112,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
1909 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; | 2112 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; |
1910 | u64 nr_delalloc; | 2113 | u64 nr_delalloc; |
1911 | u64 delalloc_end; | 2114 | u64 delalloc_end; |
2115 | int page_started; | ||
2116 | int compressed; | ||
1912 | 2117 | ||
1913 | WARN_ON(!PageLocked(page)); | 2118 | WARN_ON(!PageLocked(page)); |
1914 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2119 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
@@ -1934,27 +2139,33 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
1934 | 2139 | ||
1935 | delalloc_start = start; | 2140 | delalloc_start = start; |
1936 | delalloc_end = 0; | 2141 | delalloc_end = 0; |
2142 | page_started = 0; | ||
1937 | while(delalloc_end < page_end) { | 2143 | while(delalloc_end < page_end) { |
1938 | nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, | 2144 | nr_delalloc = find_lock_delalloc_range(inode, tree, |
2145 | page, | ||
2146 | &delalloc_start, | ||
1939 | &delalloc_end, | 2147 | &delalloc_end, |
1940 | 128 * 1024 * 1024); | 2148 | 128 * 1024 * 1024); |
1941 | if (nr_delalloc == 0) { | 2149 | if (nr_delalloc == 0) { |
1942 | delalloc_start = delalloc_end + 1; | 2150 | delalloc_start = delalloc_end + 1; |
1943 | continue; | 2151 | continue; |
1944 | } | 2152 | } |
1945 | tree->ops->fill_delalloc(inode, delalloc_start, | 2153 | tree->ops->fill_delalloc(inode, page, delalloc_start, |
1946 | delalloc_end); | 2154 | delalloc_end, &page_started); |
1947 | clear_extent_bit(tree, delalloc_start, | ||
1948 | delalloc_end, | ||
1949 | EXTENT_LOCKED | EXTENT_DELALLOC, | ||
1950 | 1, 0, GFP_NOFS); | ||
1951 | delalloc_start = delalloc_end + 1; | 2155 | delalloc_start = delalloc_end + 1; |
1952 | } | 2156 | } |
2157 | |||
2158 | /* did the fill delalloc function already unlock and start the IO? */ | ||
2159 | if (page_started) { | ||
2160 | return 0; | ||
2161 | } | ||
2162 | |||
1953 | lock_extent(tree, start, page_end, GFP_NOFS); | 2163 | lock_extent(tree, start, page_end, GFP_NOFS); |
1954 | unlock_start = start; | 2164 | unlock_start = start; |
1955 | 2165 | ||
1956 | if (tree->ops && tree->ops->writepage_start_hook) { | 2166 | if (tree->ops && tree->ops->writepage_start_hook) { |
1957 | ret = tree->ops->writepage_start_hook(page, start, page_end); | 2167 | ret = tree->ops->writepage_start_hook(page, start, |
2168 | page_end); | ||
1958 | if (ret == -EAGAIN) { | 2169 | if (ret == -EAGAIN) { |
1959 | unlock_extent(tree, start, page_end, GFP_NOFS); | 2170 | unlock_extent(tree, start, page_end, GFP_NOFS); |
1960 | redirty_page_for_writepage(wbc, page); | 2171 | redirty_page_for_writepage(wbc, page); |
@@ -2006,10 +2217,15 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2006 | sector = (em->block_start + extent_offset) >> 9; | 2217 | sector = (em->block_start + extent_offset) >> 9; |
2007 | bdev = em->bdev; | 2218 | bdev = em->bdev; |
2008 | block_start = em->block_start; | 2219 | block_start = em->block_start; |
2220 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
2009 | free_extent_map(em); | 2221 | free_extent_map(em); |
2010 | em = NULL; | 2222 | em = NULL; |
2011 | 2223 | ||
2012 | if (block_start == EXTENT_MAP_HOLE || | 2224 | /* |
2225 | * compressed and inline extents are written through other | ||
2226 | * paths in the FS | ||
2227 | */ | ||
2228 | if (compressed || block_start == EXTENT_MAP_HOLE || | ||
2013 | block_start == EXTENT_MAP_INLINE) { | 2229 | block_start == EXTENT_MAP_INLINE) { |
2014 | clear_extent_dirty(tree, cur, | 2230 | clear_extent_dirty(tree, cur, |
2015 | cur + iosize - 1, GFP_NOFS); | 2231 | cur + iosize - 1, GFP_NOFS); |
@@ -2017,16 +2233,28 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2017 | unlock_extent(tree, unlock_start, cur + iosize -1, | 2233 | unlock_extent(tree, unlock_start, cur + iosize -1, |
2018 | GFP_NOFS); | 2234 | GFP_NOFS); |
2019 | 2235 | ||
2020 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2236 | /* |
2237 | * end_io notification does not happen here for | ||
2238 | * compressed extents | ||
2239 | */ | ||
2240 | if (!compressed && tree->ops && | ||
2241 | tree->ops->writepage_end_io_hook) | ||
2021 | tree->ops->writepage_end_io_hook(page, cur, | 2242 | tree->ops->writepage_end_io_hook(page, cur, |
2022 | cur + iosize - 1, | 2243 | cur + iosize - 1, |
2023 | NULL, 1); | 2244 | NULL, 1); |
2024 | cur = cur + iosize; | 2245 | else if (compressed) { |
2246 | /* we don't want to end_page_writeback on | ||
2247 | * a compressed extent. this happens | ||
2248 | * elsewhere | ||
2249 | */ | ||
2250 | nr++; | ||
2251 | } | ||
2252 | |||
2253 | cur += iosize; | ||
2025 | pg_offset += iosize; | 2254 | pg_offset += iosize; |
2026 | unlock_start = cur; | 2255 | unlock_start = cur; |
2027 | continue; | 2256 | continue; |
2028 | } | 2257 | } |
2029 | |||
2030 | /* leave this out until we have a page_mkwrite call */ | 2258 | /* leave this out until we have a page_mkwrite call */ |
2031 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, | 2259 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, |
2032 | EXTENT_DIRTY, 0)) { | 2260 | EXTENT_DIRTY, 0)) { |
@@ -2034,6 +2262,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2034 | pg_offset += iosize; | 2262 | pg_offset += iosize; |
2035 | continue; | 2263 | continue; |
2036 | } | 2264 | } |
2265 | |||
2037 | clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); | 2266 | clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); |
2038 | if (tree->ops && tree->ops->writepage_io_hook) { | 2267 | if (tree->ops && tree->ops->writepage_io_hook) { |
2039 | ret = tree->ops->writepage_io_hook(page, cur, | 2268 | ret = tree->ops->writepage_io_hook(page, cur, |
@@ -2057,7 +2286,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2057 | ret = submit_extent_page(WRITE, tree, page, sector, | 2286 | ret = submit_extent_page(WRITE, tree, page, sector, |
2058 | iosize, pg_offset, bdev, | 2287 | iosize, pg_offset, bdev, |
2059 | &epd->bio, max_nr, | 2288 | &epd->bio, max_nr, |
2060 | end_bio_extent_writepage, 0); | 2289 | end_bio_extent_writepage, |
2290 | 0, 0, 0); | ||
2061 | if (ret) | 2291 | if (ret) |
2062 | SetPageError(page); | 2292 | SetPageError(page); |
2063 | } | 2293 | } |
@@ -2226,7 +2456,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2226 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | 2456 | extent_write_cache_pages(tree, mapping, &wbc_writepages, |
2227 | __extent_writepage, &epd); | 2457 | __extent_writepage, &epd); |
2228 | if (epd.bio) { | 2458 | if (epd.bio) { |
2229 | submit_one_bio(WRITE, epd.bio, 0); | 2459 | submit_one_bio(WRITE, epd.bio, 0, 0); |
2230 | } | 2460 | } |
2231 | return ret; | 2461 | return ret; |
2232 | } | 2462 | } |
@@ -2248,7 +2478,7 @@ int extent_writepages(struct extent_io_tree *tree, | |||
2248 | ret = extent_write_cache_pages(tree, mapping, wbc, | 2478 | ret = extent_write_cache_pages(tree, mapping, wbc, |
2249 | __extent_writepage, &epd); | 2479 | __extent_writepage, &epd); |
2250 | if (epd.bio) { | 2480 | if (epd.bio) { |
2251 | submit_one_bio(WRITE, epd.bio, 0); | 2481 | submit_one_bio(WRITE, epd.bio, 0, 0); |
2252 | } | 2482 | } |
2253 | return ret; | 2483 | return ret; |
2254 | } | 2484 | } |
@@ -2262,6 +2492,7 @@ int extent_readpages(struct extent_io_tree *tree, | |||
2262 | struct bio *bio = NULL; | 2492 | struct bio *bio = NULL; |
2263 | unsigned page_idx; | 2493 | unsigned page_idx; |
2264 | struct pagevec pvec; | 2494 | struct pagevec pvec; |
2495 | unsigned long bio_flags = 0; | ||
2265 | 2496 | ||
2266 | pagevec_init(&pvec, 0); | 2497 | pagevec_init(&pvec, 0); |
2267 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 2498 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
@@ -2281,7 +2512,7 @@ int extent_readpages(struct extent_io_tree *tree, | |||
2281 | if (!pagevec_add(&pvec, page)) | 2512 | if (!pagevec_add(&pvec, page)) |
2282 | __pagevec_lru_add(&pvec); | 2513 | __pagevec_lru_add(&pvec); |
2283 | __extent_read_full_page(tree, page, get_extent, | 2514 | __extent_read_full_page(tree, page, get_extent, |
2284 | &bio, 0); | 2515 | &bio, 0, &bio_flags); |
2285 | } | 2516 | } |
2286 | page_cache_release(page); | 2517 | page_cache_release(page); |
2287 | } | 2518 | } |
@@ -2289,7 +2520,7 @@ int extent_readpages(struct extent_io_tree *tree, | |||
2289 | __pagevec_lru_add(&pvec); | 2520 | __pagevec_lru_add(&pvec); |
2290 | BUG_ON(!list_empty(pages)); | 2521 | BUG_ON(!list_empty(pages)); |
2291 | if (bio) | 2522 | if (bio) |
2292 | submit_one_bio(READ, bio, 0); | 2523 | submit_one_bio(READ, bio, 0, bio_flags); |
2293 | return 0; | 2524 | return 0; |
2294 | } | 2525 | } |
2295 | EXPORT_SYMBOL(extent_readpages); | 2526 | EXPORT_SYMBOL(extent_readpages); |
@@ -2414,7 +2645,8 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
2414 | ret = submit_extent_page(READ, tree, page, | 2645 | ret = submit_extent_page(READ, tree, page, |
2415 | sector, iosize, page_offset, em->bdev, | 2646 | sector, iosize, page_offset, em->bdev, |
2416 | NULL, 1, | 2647 | NULL, 1, |
2417 | end_bio_extent_preparewrite, 0); | 2648 | end_bio_extent_preparewrite, 0, |
2649 | 0, 0); | ||
2418 | iocount++; | 2650 | iocount++; |
2419 | block_start = block_start + iosize; | 2651 | block_start = block_start + iosize; |
2420 | } else { | 2652 | } else { |
@@ -2495,7 +2727,9 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
2495 | } | 2727 | } |
2496 | if (!test_range_bit(tree, em->start, | 2728 | if (!test_range_bit(tree, em->start, |
2497 | extent_map_end(em) - 1, | 2729 | extent_map_end(em) - 1, |
2498 | EXTENT_LOCKED, 0)) { | 2730 | EXTENT_LOCKED | EXTENT_WRITEBACK | |
2731 | EXTENT_ORDERED, | ||
2732 | 0)) { | ||
2499 | remove_extent_mapping(map, em); | 2733 | remove_extent_mapping(map, em); |
2500 | /* once for the rb tree */ | 2734 | /* once for the rb tree */ |
2501 | free_extent_map(em); | 2735 | free_extent_map(em); |
@@ -2923,6 +3157,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
2923 | int inc_all_pages = 0; | 3157 | int inc_all_pages = 0; |
2924 | unsigned long num_pages; | 3158 | unsigned long num_pages; |
2925 | struct bio *bio = NULL; | 3159 | struct bio *bio = NULL; |
3160 | unsigned long bio_flags = 0; | ||
2926 | 3161 | ||
2927 | if (eb->flags & EXTENT_UPTODATE) | 3162 | if (eb->flags & EXTENT_UPTODATE) |
2928 | return 0; | 3163 | return 0; |
@@ -2973,7 +3208,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
2973 | ClearPageError(page); | 3208 | ClearPageError(page); |
2974 | err = __extent_read_full_page(tree, page, | 3209 | err = __extent_read_full_page(tree, page, |
2975 | get_extent, &bio, | 3210 | get_extent, &bio, |
2976 | mirror_num); | 3211 | mirror_num, &bio_flags); |
2977 | if (err) { | 3212 | if (err) { |
2978 | ret = err; | 3213 | ret = err; |
2979 | printk("err %d from __extent_read_full_page\n", ret); | 3214 | printk("err %d from __extent_read_full_page\n", ret); |
@@ -2984,7 +3219,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
2984 | } | 3219 | } |
2985 | 3220 | ||
2986 | if (bio) | 3221 | if (bio) |
2987 | submit_one_bio(READ, bio, mirror_num); | 3222 | submit_one_bio(READ, bio, mirror_num, bio_flags); |
2988 | 3223 | ||
2989 | if (ret || !wait) { | 3224 | if (ret || !wait) { |
2990 | if (ret) | 3225 | if (ret) |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c9d1908a1ae3..86f859b87a6e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -18,6 +18,9 @@ | |||
18 | #define EXTENT_BOUNDARY (1 << 11) | 18 | #define EXTENT_BOUNDARY (1 << 11) |
19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
20 | 20 | ||
21 | /* flags for bio submission */ | ||
22 | #define EXTENT_BIO_COMPRESSED 1 | ||
23 | |||
21 | /* | 24 | /* |
22 | * page->private values. Every page that is controlled by the extent | 25 | * page->private values. Every page that is controlled by the extent |
23 | * map has page->private set to one. | 26 | * map has page->private set to one. |
@@ -28,14 +31,17 @@ | |||
28 | struct extent_state; | 31 | struct extent_state; |
29 | 32 | ||
30 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, | 33 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, |
31 | struct bio *bio, int mirror_num); | 34 | struct bio *bio, int mirror_num, |
35 | unsigned long bio_flags); | ||
32 | struct extent_io_ops { | 36 | struct extent_io_ops { |
33 | int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); | 37 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, |
38 | u64 start, u64 end, int *page_started); | ||
34 | int (*writepage_start_hook)(struct page *page, u64 start, u64 end); | 39 | int (*writepage_start_hook)(struct page *page, u64 start, u64 end); |
35 | int (*writepage_io_hook)(struct page *page, u64 start, u64 end); | 40 | int (*writepage_io_hook)(struct page *page, u64 start, u64 end); |
36 | extent_submit_bio_hook_t *submit_bio_hook; | 41 | extent_submit_bio_hook_t *submit_bio_hook; |
37 | int (*merge_bio_hook)(struct page *page, unsigned long offset, | 42 | int (*merge_bio_hook)(struct page *page, unsigned long offset, |
38 | size_t size, struct bio *bio); | 43 | size_t size, struct bio *bio, |
44 | unsigned long bio_flags); | ||
39 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); | 45 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); |
40 | int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, | 46 | int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, |
41 | u64 start, u64 end, | 47 | u64 start, u64 end, |
@@ -245,4 +251,9 @@ void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); | |||
245 | int release_extent_buffer_tail_pages(struct extent_buffer *eb); | 251 | int release_extent_buffer_tail_pages(struct extent_buffer *eb); |
246 | int extent_range_uptodate(struct extent_io_tree *tree, | 252 | int extent_range_uptodate(struct extent_io_tree *tree, |
247 | u64 start, u64 end); | 253 | u64 start, u64 end); |
254 | int extent_clear_unlock_delalloc(struct inode *inode, | ||
255 | struct extent_io_tree *tree, | ||
256 | u64 start, u64 end, struct page *locked_page, | ||
257 | int clear_dirty, int set_writeback, | ||
258 | int clear_writeback); | ||
248 | #endif | 259 | #endif |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 74b2a29880d3..fd3ebfb8c3c5 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -184,6 +184,13 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) | |||
184 | if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) | 184 | if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) |
185 | return 0; | 185 | return 0; |
186 | 186 | ||
187 | /* | ||
188 | * don't merge compressed extents, we need to know their | ||
189 | * actual size | ||
190 | */ | ||
191 | if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags)) | ||
192 | return 0; | ||
193 | |||
187 | if (extent_map_end(prev) == next->start && | 194 | if (extent_map_end(prev) == next->start && |
188 | prev->flags == next->flags && | 195 | prev->flags == next->flags && |
189 | prev->bdev == next->bdev && | 196 | prev->bdev == next->bdev && |
@@ -239,6 +246,7 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
239 | if (rb && mergable_maps(merge, em)) { | 246 | if (rb && mergable_maps(merge, em)) { |
240 | em->start = merge->start; | 247 | em->start = merge->start; |
241 | em->len += merge->len; | 248 | em->len += merge->len; |
249 | em->block_len += merge->block_len; | ||
242 | em->block_start = merge->block_start; | 250 | em->block_start = merge->block_start; |
243 | merge->in_tree = 0; | 251 | merge->in_tree = 0; |
244 | rb_erase(&merge->rb_node, &tree->map); | 252 | rb_erase(&merge->rb_node, &tree->map); |
@@ -250,6 +258,7 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
250 | merge = rb_entry(rb, struct extent_map, rb_node); | 258 | merge = rb_entry(rb, struct extent_map, rb_node); |
251 | if (rb && mergable_maps(em, merge)) { | 259 | if (rb && mergable_maps(em, merge)) { |
252 | em->len += merge->len; | 260 | em->len += merge->len; |
261 | em->block_len += merge->len; | ||
253 | rb_erase(&merge->rb_node, &tree->map); | 262 | rb_erase(&merge->rb_node, &tree->map); |
254 | merge->in_tree = 0; | 263 | merge->in_tree = 0; |
255 | free_extent_map(merge); | 264 | free_extent_map(merge); |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 26ac6fe0b268..abbcbeb28c79 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | /* bits for the flags field */ | 11 | /* bits for the flags field */ |
12 | #define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ | 12 | #define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ |
13 | #define EXTENT_FLAG_COMPRESSED 1 | ||
13 | 14 | ||
14 | struct extent_map { | 15 | struct extent_map { |
15 | struct rb_node rb_node; | 16 | struct rb_node rb_node; |
@@ -18,6 +19,7 @@ struct extent_map { | |||
18 | u64 start; | 19 | u64 start; |
19 | u64 len; | 20 | u64 len; |
20 | u64 block_start; | 21 | u64 block_start; |
22 | u64 block_len; | ||
21 | unsigned long flags; | 23 | unsigned long flags; |
22 | struct block_device *bdev; | 24 | struct block_device *bdev; |
23 | atomic_t refs; | 25 | atomic_t refs; |
@@ -38,9 +40,9 @@ static inline u64 extent_map_end(struct extent_map *em) | |||
38 | 40 | ||
39 | static inline u64 extent_map_block_end(struct extent_map *em) | 41 | static inline u64 extent_map_block_end(struct extent_map *em) |
40 | { | 42 | { |
41 | if (em->block_start + em->len < em->block_start) | 43 | if (em->block_start + em->block_len < em->block_start) |
42 | return (u64)-1; | 44 | return (u64)-1; |
43 | return em->block_start + em->len; | 45 | return em->block_start + em->block_len; |
44 | } | 46 | } |
45 | 47 | ||
46 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); | 48 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 6dbe88b9d7d4..f4d3fa71bc41 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -31,7 +31,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | |||
31 | struct btrfs_root *root, | 31 | struct btrfs_root *root, |
32 | u64 objectid, u64 pos, | 32 | u64 objectid, u64 pos, |
33 | u64 disk_offset, u64 disk_num_bytes, | 33 | u64 disk_offset, u64 disk_num_bytes, |
34 | u64 num_bytes, u64 offset) | 34 | u64 num_bytes, u64 offset, u64 ram_bytes, |
35 | u8 compression, u8 encryption, u16 other_encoding) | ||
35 | { | 36 | { |
36 | int ret = 0; | 37 | int ret = 0; |
37 | struct btrfs_file_extent_item *item; | 38 | struct btrfs_file_extent_item *item; |
@@ -57,8 +58,13 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | |||
57 | btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); | 58 | btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); |
58 | btrfs_set_file_extent_offset(leaf, item, offset); | 59 | btrfs_set_file_extent_offset(leaf, item, offset); |
59 | btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); | 60 | btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); |
61 | btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes); | ||
60 | btrfs_set_file_extent_generation(leaf, item, trans->transid); | 62 | btrfs_set_file_extent_generation(leaf, item, trans->transid); |
61 | btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); | 63 | btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); |
64 | btrfs_set_file_extent_compression(leaf, item, compression); | ||
65 | btrfs_set_file_extent_encryption(leaf, item, encryption); | ||
66 | btrfs_set_file_extent_other_encoding(leaf, item, other_encoding); | ||
67 | |||
62 | btrfs_mark_buffer_dirty(leaf); | 68 | btrfs_mark_buffer_dirty(leaf); |
63 | out: | 69 | out: |
64 | btrfs_free_path(path); | 70 | btrfs_free_path(path); |
@@ -213,6 +219,73 @@ found: | |||
213 | return 0; | 219 | return 0; |
214 | } | 220 | } |
215 | 221 | ||
222 | int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode, | ||
223 | u64 start, unsigned long len) | ||
224 | { | ||
225 | struct btrfs_ordered_sum *sums; | ||
226 | struct btrfs_sector_sum *sector_sum; | ||
227 | struct btrfs_ordered_extent *ordered; | ||
228 | char *data; | ||
229 | struct page *page; | ||
230 | unsigned long total_bytes = 0; | ||
231 | unsigned long this_sum_bytes = 0; | ||
232 | |||
233 | sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS); | ||
234 | if (!sums) | ||
235 | return -ENOMEM; | ||
236 | |||
237 | sector_sum = sums->sums; | ||
238 | sums->file_offset = start; | ||
239 | sums->len = len; | ||
240 | INIT_LIST_HEAD(&sums->list); | ||
241 | ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset); | ||
242 | BUG_ON(!ordered); | ||
243 | |||
244 | while(len > 0) { | ||
245 | if (start >= ordered->file_offset + ordered->len || | ||
246 | start < ordered->file_offset) { | ||
247 | sums->len = this_sum_bytes; | ||
248 | this_sum_bytes = 0; | ||
249 | btrfs_add_ordered_sum(inode, ordered, sums); | ||
250 | btrfs_put_ordered_extent(ordered); | ||
251 | |||
252 | sums = kzalloc(btrfs_ordered_sum_size(root, len), | ||
253 | GFP_NOFS); | ||
254 | BUG_ON(!sums); | ||
255 | sector_sum = sums->sums; | ||
256 | sums->len = len; | ||
257 | sums->file_offset = start; | ||
258 | ordered = btrfs_lookup_ordered_extent(inode, | ||
259 | sums->file_offset); | ||
260 | BUG_ON(!ordered); | ||
261 | } | ||
262 | |||
263 | page = find_get_page(inode->i_mapping, | ||
264 | start >> PAGE_CACHE_SHIFT); | ||
265 | |||
266 | data = kmap_atomic(page, KM_USER0); | ||
267 | sector_sum->sum = ~(u32)0; | ||
268 | sector_sum->sum = btrfs_csum_data(root, data, sector_sum->sum, | ||
269 | PAGE_CACHE_SIZE); | ||
270 | kunmap_atomic(data, KM_USER0); | ||
271 | btrfs_csum_final(sector_sum->sum, | ||
272 | (char *)§or_sum->sum); | ||
273 | sector_sum->offset = page_offset(page); | ||
274 | page_cache_release(page); | ||
275 | |||
276 | sector_sum++; | ||
277 | total_bytes += PAGE_CACHE_SIZE; | ||
278 | this_sum_bytes += PAGE_CACHE_SIZE; | ||
279 | start += PAGE_CACHE_SIZE; | ||
280 | |||
281 | WARN_ON(len < PAGE_CACHE_SIZE); | ||
282 | len -= PAGE_CACHE_SIZE; | ||
283 | } | ||
284 | btrfs_add_ordered_sum(inode, ordered, sums); | ||
285 | btrfs_put_ordered_extent(ordered); | ||
286 | return 0; | ||
287 | } | ||
288 | |||
216 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | 289 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, |
217 | struct bio *bio) | 290 | struct bio *bio) |
218 | { | 291 | { |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 69abbe19add2..0aa15436590e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -95,153 +95,6 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
95 | } | 95 | } |
96 | } | 96 | } |
97 | 97 | ||
98 | /* this does all the hard work for inserting an inline extent into | ||
99 | * the btree. Any existing inline extent is extended as required to make room, | ||
100 | * otherwise things are inserted as required into the btree | ||
101 | */ | ||
102 | static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, | ||
103 | struct btrfs_root *root, struct inode *inode, | ||
104 | u64 offset, size_t size, | ||
105 | struct page **pages, size_t page_offset, | ||
106 | int num_pages) | ||
107 | { | ||
108 | struct btrfs_key key; | ||
109 | struct btrfs_path *path; | ||
110 | struct extent_buffer *leaf; | ||
111 | char *kaddr; | ||
112 | unsigned long ptr; | ||
113 | struct btrfs_file_extent_item *ei; | ||
114 | struct page *page; | ||
115 | u32 datasize; | ||
116 | int err = 0; | ||
117 | int ret; | ||
118 | int i; | ||
119 | ssize_t cur_size; | ||
120 | |||
121 | path = btrfs_alloc_path(); | ||
122 | if (!path) | ||
123 | return -ENOMEM; | ||
124 | |||
125 | btrfs_set_trans_block_group(trans, inode); | ||
126 | |||
127 | key.objectid = inode->i_ino; | ||
128 | key.offset = offset; | ||
129 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | ||
130 | |||
131 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
132 | if (ret < 0) { | ||
133 | err = ret; | ||
134 | goto fail; | ||
135 | } | ||
136 | if (ret == 1) { | ||
137 | struct btrfs_key found_key; | ||
138 | |||
139 | if (path->slots[0] == 0) | ||
140 | goto insert; | ||
141 | |||
142 | path->slots[0]--; | ||
143 | leaf = path->nodes[0]; | ||
144 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
145 | |||
146 | if (found_key.objectid != inode->i_ino) | ||
147 | goto insert; | ||
148 | |||
149 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
150 | goto insert; | ||
151 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
152 | struct btrfs_file_extent_item); | ||
153 | |||
154 | if (btrfs_file_extent_type(leaf, ei) != | ||
155 | BTRFS_FILE_EXTENT_INLINE) { | ||
156 | goto insert; | ||
157 | } | ||
158 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
159 | ret = 0; | ||
160 | } | ||
161 | if (ret == 0) { | ||
162 | u32 found_size; | ||
163 | u64 found_end; | ||
164 | |||
165 | leaf = path->nodes[0]; | ||
166 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
167 | struct btrfs_file_extent_item); | ||
168 | |||
169 | if (btrfs_file_extent_type(leaf, ei) != | ||
170 | BTRFS_FILE_EXTENT_INLINE) { | ||
171 | err = ret; | ||
172 | btrfs_print_leaf(root, leaf); | ||
173 | printk("found wasn't inline offset %Lu inode %lu\n", | ||
174 | offset, inode->i_ino); | ||
175 | goto fail; | ||
176 | } | ||
177 | found_size = btrfs_file_extent_inline_len(leaf, | ||
178 | btrfs_item_nr(leaf, path->slots[0])); | ||
179 | found_end = key.offset + found_size; | ||
180 | |||
181 | if (found_end < offset + size) { | ||
182 | btrfs_release_path(root, path); | ||
183 | ret = btrfs_search_slot(trans, root, &key, path, | ||
184 | offset + size - found_end, 1); | ||
185 | BUG_ON(ret != 0); | ||
186 | |||
187 | ret = btrfs_extend_item(trans, root, path, | ||
188 | offset + size - found_end); | ||
189 | if (ret) { | ||
190 | err = ret; | ||
191 | goto fail; | ||
192 | } | ||
193 | leaf = path->nodes[0]; | ||
194 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
195 | struct btrfs_file_extent_item); | ||
196 | inode_add_bytes(inode, offset + size - found_end); | ||
197 | } | ||
198 | if (found_end < offset) { | ||
199 | ptr = btrfs_file_extent_inline_start(ei) + found_size; | ||
200 | memset_extent_buffer(leaf, 0, ptr, offset - found_end); | ||
201 | } | ||
202 | } else { | ||
203 | insert: | ||
204 | btrfs_release_path(root, path); | ||
205 | datasize = offset + size - key.offset; | ||
206 | inode_add_bytes(inode, datasize); | ||
207 | datasize = btrfs_file_extent_calc_inline_size(datasize); | ||
208 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
209 | datasize); | ||
210 | if (ret) { | ||
211 | err = ret; | ||
212 | printk("got bad ret %d\n", ret); | ||
213 | goto fail; | ||
214 | } | ||
215 | leaf = path->nodes[0]; | ||
216 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
217 | struct btrfs_file_extent_item); | ||
218 | btrfs_set_file_extent_generation(leaf, ei, trans->transid); | ||
219 | btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); | ||
220 | } | ||
221 | ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset; | ||
222 | |||
223 | cur_size = size; | ||
224 | i = 0; | ||
225 | while (size > 0) { | ||
226 | page = pages[i]; | ||
227 | kaddr = kmap_atomic(page, KM_USER0); | ||
228 | cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size); | ||
229 | write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size); | ||
230 | kunmap_atomic(kaddr, KM_USER0); | ||
231 | page_offset = 0; | ||
232 | ptr += cur_size; | ||
233 | size -= cur_size; | ||
234 | if (i >= num_pages) { | ||
235 | printk("i %d num_pages %d\n", i, num_pages); | ||
236 | } | ||
237 | i++; | ||
238 | } | ||
239 | btrfs_mark_buffer_dirty(leaf); | ||
240 | fail: | ||
241 | btrfs_free_path(path); | ||
242 | return err; | ||
243 | } | ||
244 | |||
245 | /* | 98 | /* |
246 | * after copy_from_user, pages need to be dirtied and we need to make | 99 | * after copy_from_user, pages need to be dirtied and we need to make |
247 | * sure holes are created between the current EOF and the start of | 100 | * sure holes are created between the current EOF and the start of |
@@ -267,8 +120,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
267 | u64 start_pos; | 120 | u64 start_pos; |
268 | u64 end_of_last_block; | 121 | u64 end_of_last_block; |
269 | u64 end_pos = pos + write_bytes; | 122 | u64 end_pos = pos + write_bytes; |
270 | u64 inline_size; | ||
271 | int did_inline = 0; | ||
272 | loff_t isize = i_size_read(inode); | 123 | loff_t isize = i_size_read(inode); |
273 | 124 | ||
274 | start_pos = pos & ~((u64)root->sectorsize - 1); | 125 | start_pos = pos & ~((u64)root->sectorsize - 1); |
@@ -314,7 +165,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
314 | err = btrfs_insert_file_extent(trans, root, | 165 | err = btrfs_insert_file_extent(trans, root, |
315 | inode->i_ino, | 166 | inode->i_ino, |
316 | last_pos_in_file, | 167 | last_pos_in_file, |
317 | 0, 0, hole_size, 0); | 168 | 0, 0, hole_size, 0, |
169 | hole_size, 0, 0, 0); | ||
318 | btrfs_drop_extent_cache(inode, last_pos_in_file, | 170 | btrfs_drop_extent_cache(inode, last_pos_in_file, |
319 | last_pos_in_file + hole_size - 1, 0); | 171 | last_pos_in_file + hole_size - 1, 0); |
320 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | 172 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); |
@@ -324,57 +176,19 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
324 | goto failed; | 176 | goto failed; |
325 | } | 177 | } |
326 | 178 | ||
327 | /* | 179 | /* check for reserved extents on each page, we don't want |
328 | * either allocate an extent for the new bytes or setup the key | 180 | * to reset the delalloc bit on things that already have |
329 | * to show we are doing inline data in the extent | 181 | * extents reserved. |
330 | */ | 182 | */ |
331 | inline_size = end_pos; | 183 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); |
332 | if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || | 184 | for (i = 0; i < num_pages; i++) { |
333 | inline_size > root->fs_info->max_inline || | 185 | struct page *p = pages[i]; |
334 | (inline_size & (root->sectorsize -1)) == 0 || | 186 | SetPageUptodate(p); |
335 | inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { | 187 | ClearPageChecked(p); |
336 | /* check for reserved extents on each page, we don't want | 188 | set_page_dirty(p); |
337 | * to reset the delalloc bit on things that already have | ||
338 | * extents reserved. | ||
339 | */ | ||
340 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | ||
341 | for (i = 0; i < num_pages; i++) { | ||
342 | struct page *p = pages[i]; | ||
343 | SetPageUptodate(p); | ||
344 | ClearPageChecked(p); | ||
345 | set_page_dirty(p); | ||
346 | } | ||
347 | } else { | ||
348 | u64 aligned_end; | ||
349 | /* step one, delete the existing extents in this range */ | ||
350 | aligned_end = (pos + write_bytes + root->sectorsize - 1) & | ||
351 | ~((u64)root->sectorsize - 1); | ||
352 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
353 | err = btrfs_drop_extents(trans, root, inode, start_pos, | ||
354 | aligned_end, aligned_end, &hint_byte); | ||
355 | if (err) | ||
356 | goto failed; | ||
357 | if (isize > inline_size) | ||
358 | inline_size = min_t(u64, isize, aligned_end); | ||
359 | inline_size -= start_pos; | ||
360 | err = insert_inline_extent(trans, root, inode, start_pos, | ||
361 | inline_size, pages, 0, num_pages); | ||
362 | btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1, 0); | ||
363 | BUG_ON(err); | ||
364 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
365 | |||
366 | /* | ||
367 | * an ugly way to do all the prop accounting around | ||
368 | * the page bits and mapping tags | ||
369 | */ | ||
370 | set_page_writeback(pages[0]); | ||
371 | end_page_writeback(pages[0]); | ||
372 | did_inline = 1; | ||
373 | } | 189 | } |
374 | if (end_pos > isize) { | 190 | if (end_pos > isize) { |
375 | i_size_write(inode, end_pos); | 191 | i_size_write(inode, end_pos); |
376 | if (did_inline) | ||
377 | BTRFS_I(inode)->disk_i_size = end_pos; | ||
378 | btrfs_update_inode(trans, root, inode); | 192 | btrfs_update_inode(trans, root, inode); |
379 | } | 193 | } |
380 | failed: | 194 | failed: |
@@ -399,6 +213,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
399 | int ret; | 213 | int ret; |
400 | int testend = 1; | 214 | int testend = 1; |
401 | unsigned long flags; | 215 | unsigned long flags; |
216 | int compressed = 0; | ||
402 | 217 | ||
403 | WARN_ON(end < start); | 218 | WARN_ON(end < start); |
404 | if (end == (u64)-1) { | 219 | if (end == (u64)-1) { |
@@ -434,6 +249,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
434 | free_extent_map(em); | 249 | free_extent_map(em); |
435 | continue; | 250 | continue; |
436 | } | 251 | } |
252 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
437 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 253 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
438 | remove_extent_mapping(em_tree, em); | 254 | remove_extent_mapping(em_tree, em); |
439 | 255 | ||
@@ -442,6 +258,12 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
442 | split->start = em->start; | 258 | split->start = em->start; |
443 | split->len = start - em->start; | 259 | split->len = start - em->start; |
444 | split->block_start = em->block_start; | 260 | split->block_start = em->block_start; |
261 | |||
262 | if (compressed) | ||
263 | split->block_len = em->block_len; | ||
264 | else | ||
265 | split->block_len = split->len; | ||
266 | |||
445 | split->bdev = em->bdev; | 267 | split->bdev = em->bdev; |
446 | split->flags = flags; | 268 | split->flags = flags; |
447 | ret = add_extent_mapping(em_tree, split); | 269 | ret = add_extent_mapping(em_tree, split); |
@@ -459,7 +281,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
459 | split->bdev = em->bdev; | 281 | split->bdev = em->bdev; |
460 | split->flags = flags; | 282 | split->flags = flags; |
461 | 283 | ||
462 | split->block_start = em->block_start + diff; | 284 | if (compressed) { |
285 | split->block_len = em->block_len; | ||
286 | split->block_start = em->block_start; | ||
287 | } else { | ||
288 | split->block_len = split->len; | ||
289 | split->block_start = em->block_start + diff; | ||
290 | } | ||
463 | 291 | ||
464 | ret = add_extent_mapping(em_tree, split); | 292 | ret = add_extent_mapping(em_tree, split); |
465 | BUG_ON(ret); | 293 | BUG_ON(ret); |
@@ -533,7 +361,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode) | |||
533 | struct btrfs_item *item; | 361 | struct btrfs_item *item; |
534 | item = btrfs_item_nr(leaf, slot); | 362 | item = btrfs_item_nr(leaf, slot); |
535 | extent_end = found_key.offset + | 363 | extent_end = found_key.offset + |
536 | btrfs_file_extent_inline_len(leaf, item); | 364 | btrfs_file_extent_inline_len(leaf, extent); |
537 | extent_end = (extent_end + root->sectorsize - 1) & | 365 | extent_end = (extent_end + root->sectorsize - 1) & |
538 | ~((u64)root->sectorsize -1 ); | 366 | ~((u64)root->sectorsize -1 ); |
539 | } | 367 | } |
@@ -573,6 +401,10 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
573 | u64 extent_end = 0; | 401 | u64 extent_end = 0; |
574 | u64 search_start = start; | 402 | u64 search_start = start; |
575 | u64 leaf_start; | 403 | u64 leaf_start; |
404 | u64 ram_bytes = 0; | ||
405 | u8 compression = 0; | ||
406 | u8 encryption = 0; | ||
407 | u16 other_encoding = 0; | ||
576 | u64 root_gen; | 408 | u64 root_gen; |
577 | u64 root_owner; | 409 | u64 root_owner; |
578 | struct extent_buffer *leaf; | 410 | struct extent_buffer *leaf; |
@@ -589,6 +421,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
589 | int recow; | 421 | int recow; |
590 | int ret; | 422 | int ret; |
591 | 423 | ||
424 | inline_limit = 0; | ||
592 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 425 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
593 | 426 | ||
594 | path = btrfs_alloc_path(); | 427 | path = btrfs_alloc_path(); |
@@ -637,6 +470,12 @@ next_slot: | |||
637 | extent = btrfs_item_ptr(leaf, slot, | 470 | extent = btrfs_item_ptr(leaf, slot, |
638 | struct btrfs_file_extent_item); | 471 | struct btrfs_file_extent_item); |
639 | found_type = btrfs_file_extent_type(leaf, extent); | 472 | found_type = btrfs_file_extent_type(leaf, extent); |
473 | compression = btrfs_file_extent_compression(leaf, | ||
474 | extent); | ||
475 | encryption = btrfs_file_extent_encryption(leaf, | ||
476 | extent); | ||
477 | other_encoding = btrfs_file_extent_other_encoding(leaf, | ||
478 | extent); | ||
640 | if (found_type == BTRFS_FILE_EXTENT_REG) { | 479 | if (found_type == BTRFS_FILE_EXTENT_REG) { |
641 | extent_end = | 480 | extent_end = |
642 | btrfs_file_extent_disk_bytenr(leaf, | 481 | btrfs_file_extent_disk_bytenr(leaf, |
@@ -646,13 +485,13 @@ next_slot: | |||
646 | 485 | ||
647 | extent_end = key.offset + | 486 | extent_end = key.offset + |
648 | btrfs_file_extent_num_bytes(leaf, extent); | 487 | btrfs_file_extent_num_bytes(leaf, extent); |
488 | ram_bytes = btrfs_file_extent_ram_bytes(leaf, | ||
489 | extent); | ||
649 | found_extent = 1; | 490 | found_extent = 1; |
650 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | 491 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
651 | struct btrfs_item *item; | ||
652 | item = btrfs_item_nr(leaf, slot); | ||
653 | found_inline = 1; | 492 | found_inline = 1; |
654 | extent_end = key.offset + | 493 | extent_end = key.offset + |
655 | btrfs_file_extent_inline_len(leaf, item); | 494 | btrfs_file_extent_inline_len(leaf, extent); |
656 | } | 495 | } |
657 | } else { | 496 | } else { |
658 | extent_end = search_start; | 497 | extent_end = search_start; |
@@ -680,10 +519,9 @@ next_slot: | |||
680 | search_start = (extent_end + mask) & ~mask; | 519 | search_start = (extent_end + mask) & ~mask; |
681 | } else | 520 | } else |
682 | search_start = extent_end; | 521 | search_start = extent_end; |
683 | if (end <= extent_end && start >= key.offset && found_inline) { | 522 | |
523 | if (end <= extent_end && start >= key.offset && found_inline) | ||
684 | *hint_byte = EXTENT_MAP_INLINE; | 524 | *hint_byte = EXTENT_MAP_INLINE; |
685 | goto out; | ||
686 | } | ||
687 | 525 | ||
688 | if (found_extent) { | 526 | if (found_extent) { |
689 | read_extent_buffer(leaf, &old, (unsigned long)extent, | 527 | read_extent_buffer(leaf, &old, (unsigned long)extent, |
@@ -770,12 +608,27 @@ next_slot: | |||
770 | write_extent_buffer(leaf, &old, | 608 | write_extent_buffer(leaf, &old, |
771 | (unsigned long)extent, sizeof(old)); | 609 | (unsigned long)extent, sizeof(old)); |
772 | 610 | ||
611 | btrfs_set_file_extent_compression(leaf, extent, | ||
612 | compression); | ||
613 | btrfs_set_file_extent_encryption(leaf, extent, | ||
614 | encryption); | ||
615 | btrfs_set_file_extent_other_encoding(leaf, extent, | ||
616 | other_encoding); | ||
773 | btrfs_set_file_extent_offset(leaf, extent, | 617 | btrfs_set_file_extent_offset(leaf, extent, |
774 | le64_to_cpu(old.offset) + end - key.offset); | 618 | le64_to_cpu(old.offset) + end - key.offset); |
775 | WARN_ON(le64_to_cpu(old.num_bytes) < | 619 | WARN_ON(le64_to_cpu(old.num_bytes) < |
776 | (extent_end - end)); | 620 | (extent_end - end)); |
777 | btrfs_set_file_extent_num_bytes(leaf, extent, | 621 | btrfs_set_file_extent_num_bytes(leaf, extent, |
778 | extent_end - end); | 622 | extent_end - end); |
623 | |||
624 | /* | ||
625 | * set the ram bytes to the size of the full extent | ||
626 | * before splitting. This is a worst case flag, | ||
627 | * but its the best we can do because we don't know | ||
628 | * how splitting affects compression | ||
629 | */ | ||
630 | btrfs_set_file_extent_ram_bytes(leaf, extent, | ||
631 | ram_bytes); | ||
779 | btrfs_set_file_extent_type(leaf, extent, | 632 | btrfs_set_file_extent_type(leaf, extent, |
780 | BTRFS_FILE_EXTENT_REG); | 633 | BTRFS_FILE_EXTENT_REG); |
781 | 634 | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bf4bed6ca4d6..9797592dc86b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include "compat.h" | 49 | #include "compat.h" |
50 | #include "tree-log.h" | 50 | #include "tree-log.h" |
51 | #include "ref-cache.h" | 51 | #include "ref-cache.h" |
52 | #include "compression.h" | ||
52 | 53 | ||
53 | struct btrfs_iget_args { | 54 | struct btrfs_iget_args { |
54 | u64 ino; | 55 | u64 ino; |
@@ -83,6 +84,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
83 | }; | 84 | }; |
84 | 85 | ||
85 | static void btrfs_truncate(struct inode *inode); | 86 | static void btrfs_truncate(struct inode *inode); |
87 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); | ||
86 | 88 | ||
87 | /* | 89 | /* |
88 | * a very lame attempt at stopping writes when the FS is 85% full. There | 90 | * a very lame attempt at stopping writes when the FS is 85% full. There |
@@ -114,57 +116,374 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, | |||
114 | } | 116 | } |
115 | 117 | ||
116 | /* | 118 | /* |
119 | * this does all the hard work for inserting an inline extent into | ||
120 | * the btree. The caller should have done a btrfs_drop_extents so that | ||
121 | * no overlapping inline items exist in the btree | ||
122 | */ | ||
123 | static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, | ||
124 | struct btrfs_root *root, struct inode *inode, | ||
125 | u64 start, size_t size, size_t compressed_size, | ||
126 | struct page **compressed_pages) | ||
127 | { | ||
128 | struct btrfs_key key; | ||
129 | struct btrfs_path *path; | ||
130 | struct extent_buffer *leaf; | ||
131 | struct page *page = NULL; | ||
132 | char *kaddr; | ||
133 | unsigned long ptr; | ||
134 | struct btrfs_file_extent_item *ei; | ||
135 | int err = 0; | ||
136 | int ret; | ||
137 | size_t cur_size = size; | ||
138 | size_t datasize; | ||
139 | unsigned long offset; | ||
140 | int use_compress = 0; | ||
141 | |||
142 | if (compressed_size && compressed_pages) { | ||
143 | use_compress = 1; | ||
144 | cur_size = compressed_size; | ||
145 | } | ||
146 | |||
147 | path = btrfs_alloc_path(); if (!path) | ||
148 | return -ENOMEM; | ||
149 | |||
150 | btrfs_set_trans_block_group(trans, inode); | ||
151 | |||
152 | key.objectid = inode->i_ino; | ||
153 | key.offset = start; | ||
154 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | ||
155 | inode_add_bytes(inode, size); | ||
156 | datasize = btrfs_file_extent_calc_inline_size(cur_size); | ||
157 | |||
158 | inode_add_bytes(inode, size); | ||
159 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
160 | datasize); | ||
161 | BUG_ON(ret); | ||
162 | if (ret) { | ||
163 | err = ret; | ||
164 | printk("got bad ret %d\n", ret); | ||
165 | goto fail; | ||
166 | } | ||
167 | leaf = path->nodes[0]; | ||
168 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
169 | struct btrfs_file_extent_item); | ||
170 | btrfs_set_file_extent_generation(leaf, ei, trans->transid); | ||
171 | btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); | ||
172 | btrfs_set_file_extent_encryption(leaf, ei, 0); | ||
173 | btrfs_set_file_extent_other_encoding(leaf, ei, 0); | ||
174 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); | ||
175 | ptr = btrfs_file_extent_inline_start(ei); | ||
176 | |||
177 | if (use_compress) { | ||
178 | struct page *cpage; | ||
179 | int i = 0; | ||
180 | while(compressed_size > 0) { | ||
181 | cpage = compressed_pages[i]; | ||
182 | cur_size = min(compressed_size, | ||
183 | PAGE_CACHE_SIZE); | ||
184 | |||
185 | kaddr = kmap(cpage); | ||
186 | write_extent_buffer(leaf, kaddr, ptr, cur_size); | ||
187 | kunmap(cpage); | ||
188 | |||
189 | i++; | ||
190 | ptr += cur_size; | ||
191 | compressed_size -= cur_size; | ||
192 | } | ||
193 | btrfs_set_file_extent_compression(leaf, ei, | ||
194 | BTRFS_COMPRESS_ZLIB); | ||
195 | } else { | ||
196 | page = find_get_page(inode->i_mapping, | ||
197 | start >> PAGE_CACHE_SHIFT); | ||
198 | btrfs_set_file_extent_compression(leaf, ei, 0); | ||
199 | kaddr = kmap_atomic(page, KM_USER0); | ||
200 | offset = start & (PAGE_CACHE_SIZE - 1); | ||
201 | write_extent_buffer(leaf, kaddr + offset, ptr, size); | ||
202 | kunmap_atomic(kaddr, KM_USER0); | ||
203 | page_cache_release(page); | ||
204 | } | ||
205 | btrfs_mark_buffer_dirty(leaf); | ||
206 | btrfs_free_path(path); | ||
207 | |||
208 | BTRFS_I(inode)->disk_i_size = inode->i_size; | ||
209 | btrfs_update_inode(trans, root, inode); | ||
210 | return 0; | ||
211 | fail: | ||
212 | btrfs_free_path(path); | ||
213 | return err; | ||
214 | } | ||
215 | |||
216 | |||
217 | /* | ||
218 | * conditionally insert an inline extent into the file. This | ||
219 | * does the checks required to make sure the data is small enough | ||
220 | * to fit as an inline extent. | ||
221 | */ | ||
222 | static int cow_file_range_inline(struct btrfs_trans_handle *trans, | ||
223 | struct btrfs_root *root, | ||
224 | struct inode *inode, u64 start, u64 end, | ||
225 | size_t compressed_size, | ||
226 | struct page **compressed_pages) | ||
227 | { | ||
228 | u64 isize = i_size_read(inode); | ||
229 | u64 actual_end = min(end + 1, isize); | ||
230 | u64 inline_len = actual_end - start; | ||
231 | u64 aligned_end = (end + root->sectorsize - 1) & | ||
232 | ~((u64)root->sectorsize - 1); | ||
233 | u64 hint_byte; | ||
234 | u64 data_len = inline_len; | ||
235 | int ret; | ||
236 | |||
237 | if (compressed_size) | ||
238 | data_len = compressed_size; | ||
239 | |||
240 | if (start > 0 || | ||
241 | data_len >= BTRFS_MAX_INLINE_DATA_SIZE(root) || | ||
242 | (!compressed_size && | ||
243 | (actual_end & (root->sectorsize - 1)) == 0) || | ||
244 | end + 1 < isize || | ||
245 | data_len > root->fs_info->max_inline) { | ||
246 | return 1; | ||
247 | } | ||
248 | |||
249 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
250 | ret = btrfs_drop_extents(trans, root, inode, start, | ||
251 | aligned_end, aligned_end, &hint_byte); | ||
252 | BUG_ON(ret); | ||
253 | |||
254 | if (isize > actual_end) | ||
255 | inline_len = min_t(u64, isize, actual_end); | ||
256 | ret = insert_inline_extent(trans, root, inode, start, | ||
257 | inline_len, compressed_size, | ||
258 | compressed_pages); | ||
259 | BUG_ON(ret); | ||
260 | btrfs_drop_extent_cache(inode, start, aligned_end, 0); | ||
261 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
262 | return 0; | ||
263 | } | ||
264 | |||
265 | /* | ||
117 | * when extent_io.c finds a delayed allocation range in the file, | 266 | * when extent_io.c finds a delayed allocation range in the file, |
118 | * the call backs end up in this code. The basic idea is to | 267 | * the call backs end up in this code. The basic idea is to |
119 | * allocate extents on disk for the range, and create ordered data structs | 268 | * allocate extents on disk for the range, and create ordered data structs |
120 | * in ram to track those extents. | 269 | * in ram to track those extents. |
270 | * | ||
271 | * locked_page is the page that writepage had locked already. We use | ||
272 | * it to make sure we don't do extra locks or unlocks. | ||
273 | * | ||
274 | * *page_started is set to one if we unlock locked_page and do everything | ||
275 | * required to start IO on it. It may be clean and already done with | ||
276 | * IO when we return. | ||
121 | */ | 277 | */ |
122 | static int cow_file_range(struct inode *inode, u64 start, u64 end) | 278 | static int cow_file_range(struct inode *inode, struct page *locked_page, |
279 | u64 start, u64 end, int *page_started) | ||
123 | { | 280 | { |
124 | struct btrfs_root *root = BTRFS_I(inode)->root; | 281 | struct btrfs_root *root = BTRFS_I(inode)->root; |
125 | struct btrfs_trans_handle *trans; | 282 | struct btrfs_trans_handle *trans; |
126 | u64 alloc_hint = 0; | 283 | u64 alloc_hint = 0; |
127 | u64 num_bytes; | 284 | u64 num_bytes; |
285 | unsigned long ram_size; | ||
286 | u64 orig_start; | ||
287 | u64 disk_num_bytes; | ||
128 | u64 cur_alloc_size; | 288 | u64 cur_alloc_size; |
129 | u64 blocksize = root->sectorsize; | 289 | u64 blocksize = root->sectorsize; |
130 | u64 orig_num_bytes; | 290 | u64 actual_end; |
131 | struct btrfs_key ins; | 291 | struct btrfs_key ins; |
132 | struct extent_map *em; | 292 | struct extent_map *em; |
133 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 293 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
134 | int ret = 0; | 294 | int ret = 0; |
295 | struct page **pages = NULL; | ||
296 | unsigned long nr_pages; | ||
297 | unsigned long nr_pages_ret = 0; | ||
298 | unsigned long total_compressed = 0; | ||
299 | unsigned long total_in = 0; | ||
300 | unsigned long max_compressed = 128 * 1024; | ||
301 | unsigned long max_uncompressed = 256 * 1024; | ||
302 | int i; | ||
303 | int will_compress; | ||
135 | 304 | ||
136 | trans = btrfs_join_transaction(root, 1); | 305 | trans = btrfs_join_transaction(root, 1); |
137 | BUG_ON(!trans); | 306 | BUG_ON(!trans); |
138 | btrfs_set_trans_block_group(trans, inode); | 307 | btrfs_set_trans_block_group(trans, inode); |
308 | orig_start = start; | ||
309 | |||
310 | /* | ||
311 | * compression made this loop a bit ugly, but the basic idea is to | ||
312 | * compress some pages but keep the total size of the compressed | ||
313 | * extent relatively small. If compression is off, this goto target | ||
314 | * is never used. | ||
315 | */ | ||
316 | again: | ||
317 | will_compress = 0; | ||
318 | nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1; | ||
319 | nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE); | ||
139 | 320 | ||
321 | actual_end = min_t(u64, i_size_read(inode), end + 1); | ||
322 | total_compressed = actual_end - start; | ||
323 | |||
324 | /* we want to make sure that amount of ram required to uncompress | ||
325 | * an extent is reasonable, so we limit the total size in ram | ||
326 | * of a compressed extent to 256k | ||
327 | */ | ||
328 | total_compressed = min(total_compressed, max_uncompressed); | ||
140 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); | 329 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); |
141 | num_bytes = max(blocksize, num_bytes); | 330 | num_bytes = max(blocksize, num_bytes); |
142 | orig_num_bytes = num_bytes; | 331 | disk_num_bytes = num_bytes; |
332 | total_in = 0; | ||
333 | ret = 0; | ||
143 | 334 | ||
144 | if (alloc_hint == EXTENT_MAP_INLINE) | 335 | /* we do compression for mount -o compress and when the |
145 | goto out; | 336 | * inode has not been flagged as nocompress |
337 | */ | ||
338 | if (!btrfs_test_flag(inode, NOCOMPRESS) && | ||
339 | btrfs_test_opt(root, COMPRESS)) { | ||
340 | WARN_ON(pages); | ||
341 | pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | ||
342 | |||
343 | /* we want to make sure the amount of IO required to satisfy | ||
344 | * a random read is reasonably small, so we limit the size | ||
345 | * of a compressed extent to 128k | ||
346 | */ | ||
347 | ret = btrfs_zlib_compress_pages(inode->i_mapping, start, | ||
348 | total_compressed, pages, | ||
349 | nr_pages, &nr_pages_ret, | ||
350 | &total_in, | ||
351 | &total_compressed, | ||
352 | max_compressed); | ||
353 | |||
354 | if (!ret) { | ||
355 | unsigned long offset = total_compressed & | ||
356 | (PAGE_CACHE_SIZE - 1); | ||
357 | struct page *page = pages[nr_pages_ret - 1]; | ||
358 | char *kaddr; | ||
359 | |||
360 | /* zero the tail end of the last page, we might be | ||
361 | * sending it down to disk | ||
362 | */ | ||
363 | if (offset) { | ||
364 | kaddr = kmap_atomic(page, KM_USER0); | ||
365 | memset(kaddr + offset, 0, | ||
366 | PAGE_CACHE_SIZE - offset); | ||
367 | kunmap_atomic(kaddr, KM_USER0); | ||
368 | } | ||
369 | will_compress = 1; | ||
370 | } | ||
371 | } | ||
372 | if (start == 0) { | ||
373 | /* lets try to make an inline extent */ | ||
374 | if (ret || total_in < (end - start + 1)) { | ||
375 | /* we didn't compress the entire range, try | ||
376 | * to make an uncompressed inline extent. This | ||
377 | * is almost sure to fail, but maybe inline sizes | ||
378 | * will get bigger later | ||
379 | */ | ||
380 | ret = cow_file_range_inline(trans, root, inode, | ||
381 | start, end, 0, NULL); | ||
382 | } else { | ||
383 | ret = cow_file_range_inline(trans, root, inode, | ||
384 | start, end, | ||
385 | total_compressed, pages); | ||
386 | } | ||
387 | if (ret == 0) { | ||
388 | extent_clear_unlock_delalloc(inode, | ||
389 | &BTRFS_I(inode)->io_tree, | ||
390 | start, end, NULL, | ||
391 | 1, 1, 1); | ||
392 | *page_started = 1; | ||
393 | ret = 0; | ||
394 | goto free_pages_out; | ||
395 | } | ||
396 | } | ||
397 | |||
398 | if (will_compress) { | ||
399 | /* | ||
400 | * we aren't doing an inline extent round the compressed size | ||
401 | * up to a block size boundary so the allocator does sane | ||
402 | * things | ||
403 | */ | ||
404 | total_compressed = (total_compressed + blocksize - 1) & | ||
405 | ~(blocksize - 1); | ||
406 | |||
407 | /* | ||
408 | * one last check to make sure the compression is really a | ||
409 | * win, compare the page count read with the blocks on disk | ||
410 | */ | ||
411 | total_in = (total_in + PAGE_CACHE_SIZE - 1) & | ||
412 | ~(PAGE_CACHE_SIZE - 1); | ||
413 | if (total_compressed >= total_in) { | ||
414 | will_compress = 0; | ||
415 | } else { | ||
416 | disk_num_bytes = total_compressed; | ||
417 | num_bytes = total_in; | ||
418 | } | ||
419 | } | ||
420 | if (!will_compress && pages) { | ||
421 | /* | ||
422 | * the compression code ran but failed to make things smaller, | ||
423 | * free any pages it allocated and our page pointer array | ||
424 | */ | ||
425 | for (i = 0; i < nr_pages_ret; i++) { | ||
426 | page_cache_release(pages[i]); | ||
427 | } | ||
428 | kfree(pages); | ||
429 | pages = NULL; | ||
430 | total_compressed = 0; | ||
431 | nr_pages_ret = 0; | ||
432 | |||
433 | /* flag the file so we don't compress in the future */ | ||
434 | btrfs_set_flag(inode, NOCOMPRESS); | ||
435 | } | ||
436 | |||
437 | BUG_ON(disk_num_bytes > | ||
438 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | ||
146 | 439 | ||
147 | BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); | ||
148 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | 440 | mutex_lock(&BTRFS_I(inode)->extent_mutex); |
149 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 441 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
150 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | 442 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); |
151 | 443 | ||
152 | while(num_bytes > 0) { | 444 | while(disk_num_bytes > 0) { |
153 | cur_alloc_size = min(num_bytes, root->fs_info->max_extent); | 445 | unsigned long min_bytes; |
446 | |||
447 | /* | ||
448 | * the max size of a compressed extent is pretty small, | ||
449 | * make the code a little less complex by forcing | ||
450 | * the allocator to find a whole compressed extent at once | ||
451 | */ | ||
452 | if (will_compress) | ||
453 | min_bytes = disk_num_bytes; | ||
454 | else | ||
455 | min_bytes = root->sectorsize; | ||
456 | |||
457 | cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); | ||
154 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, | 458 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, |
155 | root->sectorsize, 0, alloc_hint, | 459 | min_bytes, 0, alloc_hint, |
156 | (u64)-1, &ins, 1); | 460 | (u64)-1, &ins, 1); |
157 | if (ret) { | 461 | if (ret) { |
158 | WARN_ON(1); | 462 | WARN_ON(1); |
159 | goto out; | 463 | goto free_pages_out_fail; |
160 | } | 464 | } |
161 | em = alloc_extent_map(GFP_NOFS); | 465 | em = alloc_extent_map(GFP_NOFS); |
162 | em->start = start; | 466 | em->start = start; |
163 | em->len = ins.offset; | 467 | |
468 | if (will_compress) { | ||
469 | ram_size = num_bytes; | ||
470 | em->len = num_bytes; | ||
471 | } else { | ||
472 | /* ramsize == disk size */ | ||
473 | ram_size = ins.offset; | ||
474 | em->len = ins.offset; | ||
475 | } | ||
476 | |||
164 | em->block_start = ins.objectid; | 477 | em->block_start = ins.objectid; |
478 | em->block_len = ins.offset; | ||
165 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 479 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
480 | |||
166 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | 481 | mutex_lock(&BTRFS_I(inode)->extent_mutex); |
167 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 482 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
483 | |||
484 | if (will_compress) | ||
485 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
486 | |||
168 | while(1) { | 487 | while(1) { |
169 | spin_lock(&em_tree->lock); | 488 | spin_lock(&em_tree->lock); |
170 | ret = add_extent_mapping(em_tree, em); | 489 | ret = add_extent_mapping(em_tree, em); |
@@ -174,26 +493,95 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) | |||
174 | break; | 493 | break; |
175 | } | 494 | } |
176 | btrfs_drop_extent_cache(inode, start, | 495 | btrfs_drop_extent_cache(inode, start, |
177 | start + ins.offset - 1, 0); | 496 | start + ram_size - 1, 0); |
178 | } | 497 | } |
179 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | 498 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); |
180 | 499 | ||
181 | cur_alloc_size = ins.offset; | 500 | cur_alloc_size = ins.offset; |
182 | ret = btrfs_add_ordered_extent(inode, start, ins.objectid, | 501 | ret = btrfs_add_ordered_extent(inode, start, ins.objectid, |
183 | ins.offset, 0); | 502 | ram_size, cur_alloc_size, 0, |
503 | will_compress); | ||
184 | BUG_ON(ret); | 504 | BUG_ON(ret); |
185 | if (num_bytes < cur_alloc_size) { | 505 | |
186 | printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, | 506 | if (disk_num_bytes < cur_alloc_size) { |
507 | printk("num_bytes %Lu cur_alloc %Lu\n", disk_num_bytes, | ||
187 | cur_alloc_size); | 508 | cur_alloc_size); |
188 | break; | 509 | break; |
189 | } | 510 | } |
511 | |||
512 | if (will_compress) { | ||
513 | /* | ||
514 | * we're doing compression, we and we need to | ||
515 | * submit the compressed extents down to the device. | ||
516 | * | ||
517 | * We lock down all the file pages, clearing their | ||
518 | * dirty bits and setting them writeback. Everyone | ||
519 | * that wants to modify the page will wait on the | ||
520 | * ordered extent above. | ||
521 | * | ||
522 | * The writeback bits on the file pages are | ||
523 | * cleared when the compressed pages are on disk | ||
524 | */ | ||
525 | btrfs_end_transaction(trans, root); | ||
526 | |||
527 | if (start <= page_offset(locked_page) && | ||
528 | page_offset(locked_page) < start + ram_size) { | ||
529 | *page_started = 1; | ||
530 | } | ||
531 | |||
532 | extent_clear_unlock_delalloc(inode, | ||
533 | &BTRFS_I(inode)->io_tree, | ||
534 | start, | ||
535 | start + ram_size - 1, | ||
536 | NULL, 1, 1, 0); | ||
537 | |||
538 | ret = btrfs_submit_compressed_write(inode, start, | ||
539 | ram_size, ins.objectid, | ||
540 | cur_alloc_size, pages, | ||
541 | nr_pages_ret); | ||
542 | |||
543 | BUG_ON(ret); | ||
544 | trans = btrfs_join_transaction(root, 1); | ||
545 | if (start + ram_size < end) { | ||
546 | start += ram_size; | ||
547 | alloc_hint = ins.objectid + ins.offset; | ||
548 | /* pages will be freed at end_bio time */ | ||
549 | pages = NULL; | ||
550 | goto again; | ||
551 | } else { | ||
552 | /* we've written everything, time to go */ | ||
553 | break; | ||
554 | } | ||
555 | } | ||
556 | /* we're not doing compressed IO, don't unlock the first | ||
557 | * page (which the caller expects to stay locked), don't | ||
558 | * clear any dirty bits and don't set any writeback bits | ||
559 | */ | ||
560 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | ||
561 | start, start + ram_size - 1, | ||
562 | locked_page, 0, 0, 0); | ||
563 | disk_num_bytes -= cur_alloc_size; | ||
190 | num_bytes -= cur_alloc_size; | 564 | num_bytes -= cur_alloc_size; |
191 | alloc_hint = ins.objectid + ins.offset; | 565 | alloc_hint = ins.objectid + ins.offset; |
192 | start += cur_alloc_size; | 566 | start += cur_alloc_size; |
193 | } | 567 | } |
568 | |||
569 | ret = 0; | ||
194 | out: | 570 | out: |
195 | btrfs_end_transaction(trans, root); | 571 | btrfs_end_transaction(trans, root); |
572 | |||
196 | return ret; | 573 | return ret; |
574 | |||
575 | free_pages_out_fail: | ||
576 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | ||
577 | start, end, locked_page, 0, 0, 0); | ||
578 | free_pages_out: | ||
579 | for (i = 0; i < nr_pages_ret; i++) | ||
580 | page_cache_release(pages[i]); | ||
581 | if (pages) | ||
582 | kfree(pages); | ||
583 | |||
584 | goto out; | ||
197 | } | 585 | } |
198 | 586 | ||
199 | /* | 587 | /* |
@@ -203,7 +591,8 @@ out: | |||
203 | * If no cow copies or snapshots exist, we write directly to the existing | 591 | * If no cow copies or snapshots exist, we write directly to the existing |
204 | * blocks on disk | 592 | * blocks on disk |
205 | */ | 593 | */ |
206 | static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) | 594 | static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, |
595 | u64 start, u64 end, int *page_started) | ||
207 | { | 596 | { |
208 | u64 extent_start; | 597 | u64 extent_start; |
209 | u64 extent_end; | 598 | u64 extent_end; |
@@ -260,6 +649,11 @@ again: | |||
260 | extent_end = extent_start + extent_num_bytes; | 649 | extent_end = extent_start + extent_num_bytes; |
261 | err = 0; | 650 | err = 0; |
262 | 651 | ||
652 | if (btrfs_file_extent_compression(leaf, item) || | ||
653 | btrfs_file_extent_encryption(leaf,item) || | ||
654 | btrfs_file_extent_other_encoding(leaf, item)) | ||
655 | goto not_found; | ||
656 | |||
263 | if (loops && start != extent_start) | 657 | if (loops && start != extent_start) |
264 | goto not_found; | 658 | goto not_found; |
265 | 659 | ||
@@ -284,7 +678,8 @@ again: | |||
284 | bytenr += btrfs_file_extent_offset(leaf, item); | 678 | bytenr += btrfs_file_extent_offset(leaf, item); |
285 | extent_num_bytes = min(end + 1, extent_end) - start; | 679 | extent_num_bytes = min(end + 1, extent_end) - start; |
286 | ret = btrfs_add_ordered_extent(inode, start, bytenr, | 680 | ret = btrfs_add_ordered_extent(inode, start, bytenr, |
287 | extent_num_bytes, 1); | 681 | extent_num_bytes, |
682 | extent_num_bytes, 1, 0); | ||
288 | if (ret) { | 683 | if (ret) { |
289 | err = ret; | 684 | err = ret; |
290 | goto out; | 685 | goto out; |
@@ -300,7 +695,8 @@ again: | |||
300 | not_found: | 695 | not_found: |
301 | btrfs_end_transaction(trans, root); | 696 | btrfs_end_transaction(trans, root); |
302 | btrfs_free_path(path); | 697 | btrfs_free_path(path); |
303 | return cow_file_range(inode, start, end); | 698 | return cow_file_range(inode, locked_page, start, end, |
699 | page_started); | ||
304 | } | 700 | } |
305 | out: | 701 | out: |
306 | WARN_ON(err); | 702 | WARN_ON(err); |
@@ -312,16 +708,19 @@ out: | |||
312 | /* | 708 | /* |
313 | * extent_io.c call back to do delayed allocation processing | 709 | * extent_io.c call back to do delayed allocation processing |
314 | */ | 710 | */ |
315 | static int run_delalloc_range(struct inode *inode, u64 start, u64 end) | 711 | static int run_delalloc_range(struct inode *inode, struct page *locked_page, |
712 | u64 start, u64 end, int *page_started) | ||
316 | { | 713 | { |
317 | struct btrfs_root *root = BTRFS_I(inode)->root; | 714 | struct btrfs_root *root = BTRFS_I(inode)->root; |
318 | int ret; | 715 | int ret; |
319 | 716 | ||
320 | if (btrfs_test_opt(root, NODATACOW) || | 717 | if (btrfs_test_opt(root, NODATACOW) || |
321 | btrfs_test_flag(inode, NODATACOW)) | 718 | btrfs_test_flag(inode, NODATACOW)) |
322 | ret = run_delalloc_nocow(inode, start, end); | 719 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
720 | page_started); | ||
323 | else | 721 | else |
324 | ret = cow_file_range(inode, start, end); | 722 | ret = cow_file_range(inode, locked_page, start, end, |
723 | page_started); | ||
325 | 724 | ||
326 | return ret; | 725 | return ret; |
327 | } | 726 | } |
@@ -383,7 +782,8 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | |||
383 | * we don't create bios that span stripes or chunks | 782 | * we don't create bios that span stripes or chunks |
384 | */ | 783 | */ |
385 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 784 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
386 | size_t size, struct bio *bio) | 785 | size_t size, struct bio *bio, |
786 | unsigned long bio_flags) | ||
387 | { | 787 | { |
388 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | 788 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; |
389 | struct btrfs_mapping_tree *map_tree; | 789 | struct btrfs_mapping_tree *map_tree; |
@@ -413,7 +813,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
413 | * are inserted into the btree | 813 | * are inserted into the btree |
414 | */ | 814 | */ |
415 | int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 815 | int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
416 | int mirror_num) | 816 | int mirror_num, unsigned long bio_flags) |
417 | { | 817 | { |
418 | struct btrfs_root *root = BTRFS_I(inode)->root; | 818 | struct btrfs_root *root = BTRFS_I(inode)->root; |
419 | int ret = 0; | 819 | int ret = 0; |
@@ -429,7 +829,7 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
429 | * or reading the csums from the tree before a read | 829 | * or reading the csums from the tree before a read |
430 | */ | 830 | */ |
431 | int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 831 | int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
432 | int mirror_num) | 832 | int mirror_num, unsigned long bio_flags) |
433 | { | 833 | { |
434 | struct btrfs_root *root = BTRFS_I(inode)->root; | 834 | struct btrfs_root *root = BTRFS_I(inode)->root; |
435 | int ret = 0; | 835 | int ret = 0; |
@@ -444,11 +844,17 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
444 | 844 | ||
445 | if (!(rw & (1 << BIO_RW))) { | 845 | if (!(rw & (1 << BIO_RW))) { |
446 | btrfs_lookup_bio_sums(root, inode, bio); | 846 | btrfs_lookup_bio_sums(root, inode, bio); |
847 | |||
848 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | ||
849 | return btrfs_submit_compressed_read(inode, bio, | ||
850 | mirror_num, bio_flags); | ||
851 | } | ||
852 | |||
447 | goto mapit; | 853 | goto mapit; |
448 | } | 854 | } |
449 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 855 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
450 | inode, rw, bio, mirror_num, | 856 | inode, rw, bio, mirror_num, |
451 | __btrfs_submit_bio_hook); | 857 | bio_flags, __btrfs_submit_bio_hook); |
452 | mapit: | 858 | mapit: |
453 | return btrfs_map_bio(root, rw, bio, mirror_num, 0); | 859 | return btrfs_map_bio(root, rw, bio, mirror_num, 0); |
454 | } | 860 | } |
@@ -539,7 +945,7 @@ out_page: | |||
539 | * good idea. This causes problems because we want to make sure COW | 945 | * good idea. This causes problems because we want to make sure COW |
540 | * properly happens and the data=ordered rules are followed. | 946 | * properly happens and the data=ordered rules are followed. |
541 | * | 947 | * |
542 | * In our case any range that doesn't have the EXTENT_ORDERED bit set | 948 | * In our case any range that doesn't have the ORDERED bit set |
543 | * hasn't been properly setup for IO. We kick off an async process | 949 | * hasn't been properly setup for IO. We kick off an async process |
544 | * to fix it up. The async helper will wait for ordered extents, set | 950 | * to fix it up. The async helper will wait for ordered extents, set |
545 | * the delalloc bit and make it safe to write the page. | 951 | * the delalloc bit and make it safe to write the page. |
@@ -632,10 +1038,21 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
632 | btrfs_set_file_extent_disk_bytenr(leaf, extent_item, | 1038 | btrfs_set_file_extent_disk_bytenr(leaf, extent_item, |
633 | ordered_extent->start); | 1039 | ordered_extent->start); |
634 | btrfs_set_file_extent_disk_num_bytes(leaf, extent_item, | 1040 | btrfs_set_file_extent_disk_num_bytes(leaf, extent_item, |
635 | ordered_extent->len); | 1041 | ordered_extent->disk_len); |
636 | btrfs_set_file_extent_offset(leaf, extent_item, 0); | 1042 | btrfs_set_file_extent_offset(leaf, extent_item, 0); |
1043 | |||
1044 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | ||
1045 | btrfs_set_file_extent_compression(leaf, extent_item, 1); | ||
1046 | else | ||
1047 | btrfs_set_file_extent_compression(leaf, extent_item, 0); | ||
1048 | btrfs_set_file_extent_encryption(leaf, extent_item, 0); | ||
1049 | btrfs_set_file_extent_other_encoding(leaf, extent_item, 0); | ||
1050 | |||
1051 | /* ram bytes = extent_num_bytes for now */ | ||
637 | btrfs_set_file_extent_num_bytes(leaf, extent_item, | 1052 | btrfs_set_file_extent_num_bytes(leaf, extent_item, |
638 | ordered_extent->len); | 1053 | ordered_extent->len); |
1054 | btrfs_set_file_extent_ram_bytes(leaf, extent_item, | ||
1055 | ordered_extent->len); | ||
639 | btrfs_mark_buffer_dirty(leaf); | 1056 | btrfs_mark_buffer_dirty(leaf); |
640 | 1057 | ||
641 | btrfs_drop_extent_cache(inode, ordered_extent->file_offset, | 1058 | btrfs_drop_extent_cache(inode, ordered_extent->file_offset, |
@@ -644,7 +1061,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
644 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | 1061 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); |
645 | 1062 | ||
646 | ins.objectid = ordered_extent->start; | 1063 | ins.objectid = ordered_extent->start; |
647 | ins.offset = ordered_extent->len; | 1064 | ins.offset = ordered_extent->disk_len; |
648 | ins.type = BTRFS_EXTENT_ITEM_KEY; | 1065 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
649 | ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, | 1066 | ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, |
650 | root->root_key.objectid, | 1067 | root->root_key.objectid, |
@@ -714,6 +1131,7 @@ int btrfs_io_failed_hook(struct bio *failed_bio, | |||
714 | int ret; | 1131 | int ret; |
715 | int rw; | 1132 | int rw; |
716 | u64 logical; | 1133 | u64 logical; |
1134 | unsigned long bio_flags = 0; | ||
717 | 1135 | ||
718 | ret = get_state_private(failure_tree, start, &private); | 1136 | ret = get_state_private(failure_tree, start, &private); |
719 | if (ret) { | 1137 | if (ret) { |
@@ -738,6 +1156,8 @@ int btrfs_io_failed_hook(struct bio *failed_bio, | |||
738 | } | 1156 | } |
739 | logical = start - em->start; | 1157 | logical = start - em->start; |
740 | logical = em->block_start + logical; | 1158 | logical = em->block_start + logical; |
1159 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | ||
1160 | bio_flags = EXTENT_BIO_COMPRESSED; | ||
741 | failrec->logical = logical; | 1161 | failrec->logical = logical; |
742 | free_extent_map(em); | 1162 | free_extent_map(em); |
743 | set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | | 1163 | set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | |
@@ -781,7 +1201,8 @@ int btrfs_io_failed_hook(struct bio *failed_bio, | |||
781 | rw = READ; | 1201 | rw = READ; |
782 | 1202 | ||
783 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | 1203 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, |
784 | failrec->last_mirror); | 1204 | failrec->last_mirror, |
1205 | bio_flags); | ||
785 | return 0; | 1206 | return 0; |
786 | } | 1207 | } |
787 | 1208 | ||
@@ -1644,10 +2065,8 @@ search_again: | |||
1644 | item_end += | 2065 | item_end += |
1645 | btrfs_file_extent_num_bytes(leaf, fi); | 2066 | btrfs_file_extent_num_bytes(leaf, fi); |
1646 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 2067 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
1647 | struct btrfs_item *item = btrfs_item_nr(leaf, | ||
1648 | path->slots[0]); | ||
1649 | item_end += btrfs_file_extent_inline_len(leaf, | 2068 | item_end += btrfs_file_extent_inline_len(leaf, |
1650 | item); | 2069 | fi); |
1651 | } | 2070 | } |
1652 | item_end--; | 2071 | item_end--; |
1653 | } | 2072 | } |
@@ -1715,7 +2134,14 @@ search_again: | |||
1715 | root_owner = btrfs_header_owner(leaf); | 2134 | root_owner = btrfs_header_owner(leaf); |
1716 | } | 2135 | } |
1717 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 2136 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
1718 | if (!del_item) { | 2137 | /* |
2138 | * we can't truncate inline items that have had | ||
2139 | * special encodings | ||
2140 | */ | ||
2141 | if (!del_item && | ||
2142 | btrfs_file_extent_compression(leaf, fi) == 0 && | ||
2143 | btrfs_file_extent_encryption(leaf, fi) == 0 && | ||
2144 | btrfs_file_extent_other_encoding(leaf, fi) == 0) { | ||
1719 | u32 size = new_size - found_key.offset; | 2145 | u32 size = new_size - found_key.offset; |
1720 | 2146 | ||
1721 | if (root->ref_cows) { | 2147 | if (root->ref_cows) { |
@@ -1926,7 +2352,8 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
1926 | err = btrfs_insert_file_extent(trans, root, | 2352 | err = btrfs_insert_file_extent(trans, root, |
1927 | inode->i_ino, | 2353 | inode->i_ino, |
1928 | hole_start, 0, 0, | 2354 | hole_start, 0, 0, |
1929 | hole_size, 0); | 2355 | hole_size, 0, hole_size, |
2356 | 0, 0, 0); | ||
1930 | btrfs_drop_extent_cache(inode, hole_start, | 2357 | btrfs_drop_extent_cache(inode, hole_start, |
1931 | (u64)-1, 0); | 2358 | (u64)-1, 0); |
1932 | btrfs_check_file(root, inode); | 2359 | btrfs_check_file(root, inode); |
@@ -2894,11 +3321,50 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree, | |||
2894 | start_diff = map_start - em->start; | 3321 | start_diff = map_start - em->start; |
2895 | em->start = map_start; | 3322 | em->start = map_start; |
2896 | em->len = map_len; | 3323 | em->len = map_len; |
2897 | if (em->block_start < EXTENT_MAP_LAST_BYTE) | 3324 | if (em->block_start < EXTENT_MAP_LAST_BYTE && |
3325 | !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | ||
2898 | em->block_start += start_diff; | 3326 | em->block_start += start_diff; |
3327 | em->block_len -= start_diff; | ||
3328 | } | ||
2899 | return add_extent_mapping(em_tree, em); | 3329 | return add_extent_mapping(em_tree, em); |
2900 | } | 3330 | } |
2901 | 3331 | ||
3332 | static noinline int uncompress_inline(struct btrfs_path *path, | ||
3333 | struct inode *inode, struct page *page, | ||
3334 | size_t pg_offset, u64 extent_offset, | ||
3335 | struct btrfs_file_extent_item *item) | ||
3336 | { | ||
3337 | int ret; | ||
3338 | struct extent_buffer *leaf = path->nodes[0]; | ||
3339 | char *tmp; | ||
3340 | size_t max_size; | ||
3341 | unsigned long inline_size; | ||
3342 | unsigned long ptr; | ||
3343 | |||
3344 | WARN_ON(pg_offset != 0); | ||
3345 | max_size = btrfs_file_extent_ram_bytes(leaf, item); | ||
3346 | inline_size = btrfs_file_extent_inline_item_len(leaf, | ||
3347 | btrfs_item_nr(leaf, path->slots[0])); | ||
3348 | tmp = kmalloc(inline_size, GFP_NOFS); | ||
3349 | ptr = btrfs_file_extent_inline_start(item); | ||
3350 | |||
3351 | read_extent_buffer(leaf, tmp, ptr, inline_size); | ||
3352 | |||
3353 | max_size = min(PAGE_CACHE_SIZE, max_size); | ||
3354 | ret = btrfs_zlib_decompress(tmp, page, extent_offset, | ||
3355 | inline_size, max_size); | ||
3356 | if (ret) { | ||
3357 | char *kaddr = kmap_atomic(page, KM_USER0); | ||
3358 | unsigned long copy_size = min_t(u64, | ||
3359 | PAGE_CACHE_SIZE - pg_offset, | ||
3360 | max_size - extent_offset); | ||
3361 | memset(kaddr + pg_offset, 0, copy_size); | ||
3362 | kunmap_atomic(kaddr, KM_USER0); | ||
3363 | } | ||
3364 | kfree(tmp); | ||
3365 | return 0; | ||
3366 | } | ||
3367 | |||
2902 | /* | 3368 | /* |
2903 | * a bit scary, this does extent mapping from logical file offset to the disk. | 3369 | * a bit scary, this does extent mapping from logical file offset to the disk. |
2904 | * the ugly parts come from merging extents from the disk with the | 3370 | * the ugly parts come from merging extents from the disk with the |
@@ -2927,6 +3393,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
2927 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 3393 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
2928 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3394 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
2929 | struct btrfs_trans_handle *trans = NULL; | 3395 | struct btrfs_trans_handle *trans = NULL; |
3396 | int compressed; | ||
2930 | 3397 | ||
2931 | again: | 3398 | again: |
2932 | spin_lock(&em_tree->lock); | 3399 | spin_lock(&em_tree->lock); |
@@ -2951,6 +3418,7 @@ again: | |||
2951 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 3418 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
2952 | em->start = EXTENT_MAP_HOLE; | 3419 | em->start = EXTENT_MAP_HOLE; |
2953 | em->len = (u64)-1; | 3420 | em->len = (u64)-1; |
3421 | em->block_len = (u64)-1; | ||
2954 | 3422 | ||
2955 | if (!path) { | 3423 | if (!path) { |
2956 | path = btrfs_alloc_path(); | 3424 | path = btrfs_alloc_path(); |
@@ -2983,6 +3451,7 @@ again: | |||
2983 | 3451 | ||
2984 | found_type = btrfs_file_extent_type(leaf, item); | 3452 | found_type = btrfs_file_extent_type(leaf, item); |
2985 | extent_start = found_key.offset; | 3453 | extent_start = found_key.offset; |
3454 | compressed = btrfs_file_extent_compression(leaf, item); | ||
2986 | if (found_type == BTRFS_FILE_EXTENT_REG) { | 3455 | if (found_type == BTRFS_FILE_EXTENT_REG) { |
2987 | extent_end = extent_start + | 3456 | extent_end = extent_start + |
2988 | btrfs_file_extent_num_bytes(leaf, item); | 3457 | btrfs_file_extent_num_bytes(leaf, item); |
@@ -3005,10 +3474,18 @@ again: | |||
3005 | em->block_start = EXTENT_MAP_HOLE; | 3474 | em->block_start = EXTENT_MAP_HOLE; |
3006 | goto insert; | 3475 | goto insert; |
3007 | } | 3476 | } |
3008 | bytenr += btrfs_file_extent_offset(leaf, item); | ||
3009 | em->block_start = bytenr; | ||
3010 | em->start = extent_start; | 3477 | em->start = extent_start; |
3011 | em->len = extent_end - extent_start; | 3478 | em->len = extent_end - extent_start; |
3479 | if (compressed) { | ||
3480 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
3481 | em->block_start = bytenr; | ||
3482 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, | ||
3483 | item); | ||
3484 | } else { | ||
3485 | bytenr += btrfs_file_extent_offset(leaf, item); | ||
3486 | em->block_start = bytenr; | ||
3487 | em->block_len = em->len; | ||
3488 | } | ||
3012 | goto insert; | 3489 | goto insert; |
3013 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | 3490 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
3014 | u64 page_start; | 3491 | u64 page_start; |
@@ -3018,8 +3495,7 @@ again: | |||
3018 | size_t extent_offset; | 3495 | size_t extent_offset; |
3019 | size_t copy_size; | 3496 | size_t copy_size; |
3020 | 3497 | ||
3021 | size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, | 3498 | size = btrfs_file_extent_inline_len(leaf, item); |
3022 | path->slots[0])); | ||
3023 | extent_end = (extent_start + size + root->sectorsize - 1) & | 3499 | extent_end = (extent_start + size + root->sectorsize - 1) & |
3024 | ~((u64)root->sectorsize - 1); | 3500 | ~((u64)root->sectorsize - 1); |
3025 | if (start < extent_start || start >= extent_end) { | 3501 | if (start < extent_start || start >= extent_end) { |
@@ -3035,9 +3511,10 @@ again: | |||
3035 | } | 3511 | } |
3036 | em->block_start = EXTENT_MAP_INLINE; | 3512 | em->block_start = EXTENT_MAP_INLINE; |
3037 | 3513 | ||
3038 | if (!page) { | 3514 | if (!page || create) { |
3039 | em->start = extent_start; | 3515 | em->start = extent_start; |
3040 | em->len = size; | 3516 | em->len = (size + root->sectorsize - 1) & |
3517 | ~((u64)root->sectorsize - 1); | ||
3041 | goto out; | 3518 | goto out; |
3042 | } | 3519 | } |
3043 | 3520 | ||
@@ -3048,11 +3525,22 @@ again: | |||
3048 | em->start = extent_start + extent_offset; | 3525 | em->start = extent_start + extent_offset; |
3049 | em->len = (copy_size + root->sectorsize - 1) & | 3526 | em->len = (copy_size + root->sectorsize - 1) & |
3050 | ~((u64)root->sectorsize - 1); | 3527 | ~((u64)root->sectorsize - 1); |
3051 | map = kmap(page); | 3528 | if (compressed) |
3529 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
3052 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; | 3530 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; |
3053 | if (create == 0 && !PageUptodate(page)) { | 3531 | if (create == 0 && !PageUptodate(page)) { |
3054 | read_extent_buffer(leaf, map + pg_offset, ptr, | 3532 | if (btrfs_file_extent_compression(leaf, item) == |
3055 | copy_size); | 3533 | BTRFS_COMPRESS_ZLIB) { |
3534 | ret = uncompress_inline(path, inode, page, | ||
3535 | pg_offset, | ||
3536 | extent_offset, item); | ||
3537 | BUG_ON(ret); | ||
3538 | } else { | ||
3539 | map = kmap(page); | ||
3540 | read_extent_buffer(leaf, map + pg_offset, ptr, | ||
3541 | copy_size); | ||
3542 | kunmap(page); | ||
3543 | } | ||
3056 | flush_dcache_page(page); | 3544 | flush_dcache_page(page); |
3057 | } else if (create && PageUptodate(page)) { | 3545 | } else if (create && PageUptodate(page)) { |
3058 | if (!trans) { | 3546 | if (!trans) { |
@@ -3063,11 +3551,12 @@ again: | |||
3063 | trans = btrfs_join_transaction(root, 1); | 3551 | trans = btrfs_join_transaction(root, 1); |
3064 | goto again; | 3552 | goto again; |
3065 | } | 3553 | } |
3554 | map = kmap(page); | ||
3066 | write_extent_buffer(leaf, map + pg_offset, ptr, | 3555 | write_extent_buffer(leaf, map + pg_offset, ptr, |
3067 | copy_size); | 3556 | copy_size); |
3557 | kunmap(page); | ||
3068 | btrfs_mark_buffer_dirty(leaf); | 3558 | btrfs_mark_buffer_dirty(leaf); |
3069 | } | 3559 | } |
3070 | kunmap(page); | ||
3071 | set_extent_uptodate(io_tree, em->start, | 3560 | set_extent_uptodate(io_tree, em->start, |
3072 | extent_map_end(em) - 1, GFP_NOFS); | 3561 | extent_map_end(em) - 1, GFP_NOFS); |
3073 | goto insert; | 3562 | goto insert; |
@@ -3779,6 +4268,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
3779 | btrfs_set_file_extent_generation(leaf, ei, trans->transid); | 4268 | btrfs_set_file_extent_generation(leaf, ei, trans->transid); |
3780 | btrfs_set_file_extent_type(leaf, ei, | 4269 | btrfs_set_file_extent_type(leaf, ei, |
3781 | BTRFS_FILE_EXTENT_INLINE); | 4270 | BTRFS_FILE_EXTENT_INLINE); |
4271 | btrfs_set_file_extent_encryption(leaf, ei, 0); | ||
4272 | btrfs_set_file_extent_compression(leaf, ei, 0); | ||
4273 | btrfs_set_file_extent_other_encoding(leaf, ei, 0); | ||
4274 | btrfs_set_file_extent_ram_bytes(leaf, ei, name_len); | ||
4275 | |||
3782 | ptr = btrfs_file_extent_inline_start(ei); | 4276 | ptr = btrfs_file_extent_inline_start(ei); |
3783 | write_extent_buffer(leaf, symname, ptr, name_len); | 4277 | write_extent_buffer(leaf, symname, ptr, name_len); |
3784 | btrfs_mark_buffer_dirty(leaf); | 4278 | btrfs_mark_buffer_dirty(leaf); |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 2eb6caba57c2..b5745bb96d40 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -165,7 +165,8 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
165 | * inserted. | 165 | * inserted. |
166 | */ | 166 | */ |
167 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 167 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
168 | u64 start, u64 len, int nocow) | 168 | u64 start, u64 len, u64 disk_len, int nocow, |
169 | int compressed) | ||
169 | { | 170 | { |
170 | struct btrfs_ordered_inode_tree *tree; | 171 | struct btrfs_ordered_inode_tree *tree; |
171 | struct rb_node *node; | 172 | struct rb_node *node; |
@@ -180,9 +181,12 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
180 | entry->file_offset = file_offset; | 181 | entry->file_offset = file_offset; |
181 | entry->start = start; | 182 | entry->start = start; |
182 | entry->len = len; | 183 | entry->len = len; |
184 | entry->disk_len = disk_len; | ||
183 | entry->inode = inode; | 185 | entry->inode = inode; |
184 | if (nocow) | 186 | if (nocow) |
185 | set_bit(BTRFS_ORDERED_NOCOW, &entry->flags); | 187 | set_bit(BTRFS_ORDERED_NOCOW, &entry->flags); |
188 | if (compressed) | ||
189 | set_bit(BTRFS_ORDERED_COMPRESSED, &entry->flags); | ||
186 | 190 | ||
187 | /* one ref for the tree */ | 191 | /* one ref for the tree */ |
188 | atomic_set(&entry->refs, 1); | 192 | atomic_set(&entry->refs, 1); |
@@ -389,9 +393,10 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
389 | * for pdflush to find them | 393 | * for pdflush to find them |
390 | */ | 394 | */ |
391 | btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE); | 395 | btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE); |
392 | if (wait) | 396 | if (wait) { |
393 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 397 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
394 | &entry->flags)); | 398 | &entry->flags)); |
399 | } | ||
395 | } | 400 | } |
396 | 401 | ||
397 | /* | 402 | /* |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index f50f8870a144..1ef464145d22 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -66,6 +66,8 @@ struct btrfs_ordered_sum { | |||
66 | 66 | ||
67 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ | 67 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ |
68 | 68 | ||
69 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ | ||
70 | |||
69 | struct btrfs_ordered_extent { | 71 | struct btrfs_ordered_extent { |
70 | /* logical offset in the file */ | 72 | /* logical offset in the file */ |
71 | u64 file_offset; | 73 | u64 file_offset; |
@@ -73,9 +75,12 @@ struct btrfs_ordered_extent { | |||
73 | /* disk byte number */ | 75 | /* disk byte number */ |
74 | u64 start; | 76 | u64 start; |
75 | 77 | ||
76 | /* length of the extent in bytes */ | 78 | /* ram length of the extent in bytes */ |
77 | u64 len; | 79 | u64 len; |
78 | 80 | ||
81 | /* extent length on disk */ | ||
82 | u64 disk_len; | ||
83 | |||
79 | /* flags (described above) */ | 84 | /* flags (described above) */ |
80 | unsigned long flags; | 85 | unsigned long flags; |
81 | 86 | ||
@@ -127,7 +132,8 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
127 | int btrfs_dec_test_ordered_pending(struct inode *inode, | 132 | int btrfs_dec_test_ordered_pending(struct inode *inode, |
128 | u64 file_offset, u64 io_size); | 133 | u64 file_offset, u64 io_size); |
129 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 134 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
130 | u64 start, u64 len, int nocow); | 135 | u64 start, u64 len, u64 disk_len, int nocow, |
136 | int compressed); | ||
131 | int btrfs_add_ordered_sum(struct inode *inode, | 137 | int btrfs_add_ordered_sum(struct inode *inode, |
132 | struct btrfs_ordered_extent *entry, | 138 | struct btrfs_ordered_extent *entry, |
133 | struct btrfs_ordered_sum *sum); | 139 | struct btrfs_ordered_sum *sum); |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index bd9ab3e9a7f2..64725c13aa11 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -115,15 +115,16 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
115 | if (btrfs_file_extent_type(l, fi) == | 115 | if (btrfs_file_extent_type(l, fi) == |
116 | BTRFS_FILE_EXTENT_INLINE) { | 116 | BTRFS_FILE_EXTENT_INLINE) { |
117 | printk("\t\tinline extent data size %u\n", | 117 | printk("\t\tinline extent data size %u\n", |
118 | btrfs_file_extent_inline_len(l, item)); | 118 | btrfs_file_extent_inline_len(l, fi)); |
119 | break; | 119 | break; |
120 | } | 120 | } |
121 | printk("\t\textent data disk bytenr %llu nr %llu\n", | 121 | printk("\t\textent data disk bytenr %llu nr %llu\n", |
122 | (unsigned long long)btrfs_file_extent_disk_bytenr(l, fi), | 122 | (unsigned long long)btrfs_file_extent_disk_bytenr(l, fi), |
123 | (unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi)); | 123 | (unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi)); |
124 | printk("\t\textent data offset %llu nr %llu\n", | 124 | printk("\t\textent data offset %llu nr %llu ram %llu\n", |
125 | (unsigned long long)btrfs_file_extent_offset(l, fi), | 125 | (unsigned long long)btrfs_file_extent_offset(l, fi), |
126 | (unsigned long long)btrfs_file_extent_num_bytes(l, fi)); | 126 | (unsigned long long)btrfs_file_extent_num_bytes(l, fi), |
127 | (unsigned long long)btrfs_file_extent_ram_bytes(l, fi)); | ||
127 | break; | 128 | break; |
128 | case BTRFS_BLOCK_GROUP_ITEM_KEY: | 129 | case BTRFS_BLOCK_GROUP_ITEM_KEY: |
129 | bi = btrfs_item_ptr(l, i, | 130 | bi = btrfs_item_ptr(l, i, |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2e6039825b7b..431fdf144b58 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include "volumes.h" | 47 | #include "volumes.h" |
48 | #include "version.h" | 48 | #include "version.h" |
49 | #include "export.h" | 49 | #include "export.h" |
50 | #include "compression.h" | ||
50 | 51 | ||
51 | #define BTRFS_SUPER_MAGIC 0x9123683E | 52 | #define BTRFS_SUPER_MAGIC 0x9123683E |
52 | 53 | ||
@@ -69,7 +70,7 @@ static void btrfs_put_super (struct super_block * sb) | |||
69 | enum { | 70 | enum { |
70 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, | 71 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, |
71 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, | 72 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, |
72 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_err, | 73 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_err, |
73 | }; | 74 | }; |
74 | 75 | ||
75 | static match_table_t tokens = { | 76 | static match_table_t tokens = { |
@@ -83,6 +84,7 @@ static match_table_t tokens = { | |||
83 | {Opt_max_inline, "max_inline=%s"}, | 84 | {Opt_max_inline, "max_inline=%s"}, |
84 | {Opt_alloc_start, "alloc_start=%s"}, | 85 | {Opt_alloc_start, "alloc_start=%s"}, |
85 | {Opt_thread_pool, "thread_pool=%d"}, | 86 | {Opt_thread_pool, "thread_pool=%d"}, |
87 | {Opt_compress, "compress"}, | ||
86 | {Opt_ssd, "ssd"}, | 88 | {Opt_ssd, "ssd"}, |
87 | {Opt_noacl, "noacl"}, | 89 | {Opt_noacl, "noacl"}, |
88 | {Opt_err, NULL}, | 90 | {Opt_err, NULL}, |
@@ -163,6 +165,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
163 | btrfs_set_opt(info->mount_opt, NODATACOW); | 165 | btrfs_set_opt(info->mount_opt, NODATACOW); |
164 | btrfs_set_opt(info->mount_opt, NODATASUM); | 166 | btrfs_set_opt(info->mount_opt, NODATASUM); |
165 | break; | 167 | break; |
168 | case Opt_compress: | ||
169 | printk(KERN_INFO "btrfs: use compression\n"); | ||
170 | btrfs_set_opt(info->mount_opt, COMPRESS); | ||
171 | break; | ||
166 | case Opt_ssd: | 172 | case Opt_ssd: |
167 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); | 173 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); |
168 | btrfs_set_opt(info->mount_opt, SSD); | 174 | btrfs_set_opt(info->mount_opt, SSD); |
@@ -622,6 +628,7 @@ static int __init init_btrfs_fs(void) | |||
622 | err = btrfs_interface_init(); | 628 | err = btrfs_interface_init(); |
623 | if (err) | 629 | if (err) |
624 | goto free_extent_map; | 630 | goto free_extent_map; |
631 | |||
625 | err = register_filesystem(&btrfs_fs_type); | 632 | err = register_filesystem(&btrfs_fs_type); |
626 | if (err) | 633 | if (err) |
627 | goto unregister_ioctl; | 634 | goto unregister_ioctl; |
@@ -651,6 +658,7 @@ static void __exit exit_btrfs_fs(void) | |||
651 | unregister_filesystem(&btrfs_fs_type); | 658 | unregister_filesystem(&btrfs_fs_type); |
652 | btrfs_exit_sysfs(); | 659 | btrfs_exit_sysfs(); |
653 | btrfs_cleanup_fs_uuids(); | 660 | btrfs_cleanup_fs_uuids(); |
661 | btrfs_zlib_exit(); | ||
654 | } | 662 | } |
655 | 663 | ||
656 | module_init(init_btrfs_fs) | 664 | module_init(init_btrfs_fs) |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index cf618cc8b34a..e6d579053a47 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -540,8 +540,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
540 | if (found_type == BTRFS_FILE_EXTENT_REG) | 540 | if (found_type == BTRFS_FILE_EXTENT_REG) |
541 | extent_end = start + btrfs_file_extent_num_bytes(eb, item); | 541 | extent_end = start + btrfs_file_extent_num_bytes(eb, item); |
542 | else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | 542 | else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
543 | size = btrfs_file_extent_inline_len(eb, | 543 | size = btrfs_file_extent_inline_len(eb, item); |
544 | btrfs_item_nr(eb, slot)); | ||
545 | extent_end = (start + size + mask) & ~mask; | 544 | extent_end = (start + size + mask) & ~mask; |
546 | } else { | 545 | } else { |
547 | ret = 0; | 546 | ret = 0; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2eed7f91f51a..7db4cfd03a98 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -1816,6 +1816,7 @@ again: | |||
1816 | em->start = key.offset; | 1816 | em->start = key.offset; |
1817 | em->len = *num_bytes; | 1817 | em->len = *num_bytes; |
1818 | em->block_start = 0; | 1818 | em->block_start = 0; |
1819 | em->block_len = em->len; | ||
1819 | 1820 | ||
1820 | if (type & BTRFS_BLOCK_GROUP_SYSTEM) { | 1821 | if (type & BTRFS_BLOCK_GROUP_SYSTEM) { |
1821 | ret = btrfs_add_system_chunk(trans, chunk_root, &key, | 1822 | ret = btrfs_add_system_chunk(trans, chunk_root, &key, |
@@ -2323,6 +2324,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
2323 | em->start = logical; | 2324 | em->start = logical; |
2324 | em->len = length; | 2325 | em->len = length; |
2325 | em->block_start = 0; | 2326 | em->block_start = 0; |
2327 | em->block_len = em->len; | ||
2326 | 2328 | ||
2327 | map->num_stripes = num_stripes; | 2329 | map->num_stripes = num_stripes; |
2328 | map->io_width = btrfs_chunk_io_width(leaf, chunk); | 2330 | map->io_width = btrfs_chunk_io_width(leaf, chunk); |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c new file mode 100644 index 000000000000..e99309180a11 --- /dev/null +++ b/fs/btrfs/zlib.c | |||
@@ -0,0 +1,637 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | * | ||
18 | * Based on jffs2 zlib code: | ||
19 | * Copyright © 2001-2007 Red Hat, Inc. | ||
20 | * Created by David Woodhouse <dwmw2@infradead.org> | ||
21 | */ | ||
22 | |||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/zlib.h> | ||
26 | #include <linux/zutil.h> | ||
27 | #include <linux/vmalloc.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/err.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/pagemap.h> | ||
32 | #include <linux/bio.h> | ||
33 | |||
34 | /* Plan: call deflate() with avail_in == *sourcelen, | ||
35 | avail_out = *dstlen - 12 and flush == Z_FINISH. | ||
36 | If it doesn't manage to finish, call it again with | ||
37 | avail_in == 0 and avail_out set to the remaining 12 | ||
38 | bytes for it to clean up. | ||
39 | Q: Is 12 bytes sufficient? | ||
40 | */ | ||
41 | #define STREAM_END_SPACE 12 | ||
42 | |||
43 | struct workspace { | ||
44 | z_stream inf_strm; | ||
45 | z_stream def_strm; | ||
46 | char *buf; | ||
47 | struct list_head list; | ||
48 | }; | ||
49 | |||
50 | static LIST_HEAD(idle_workspace); | ||
51 | static DEFINE_SPINLOCK(workspace_lock); | ||
52 | static unsigned long num_workspace; | ||
53 | static atomic_t alloc_workspace = ATOMIC_INIT(0); | ||
54 | static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); | ||
55 | |||
56 | /* | ||
57 | * this finds an available zlib workspace or allocates a new one | ||
58 | * NULL or an ERR_PTR is returned if things go bad. | ||
59 | */ | ||
60 | static struct workspace *find_zlib_workspace(void) | ||
61 | { | ||
62 | struct workspace *workspace; | ||
63 | int ret; | ||
64 | int cpus = num_online_cpus(); | ||
65 | |||
66 | again: | ||
67 | spin_lock(&workspace_lock); | ||
68 | if (!list_empty(&idle_workspace)) { | ||
69 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
70 | list); | ||
71 | list_del(&workspace->list); | ||
72 | num_workspace--; | ||
73 | spin_unlock(&workspace_lock); | ||
74 | return workspace; | ||
75 | |||
76 | } | ||
77 | spin_unlock(&workspace_lock); | ||
78 | if (atomic_read(&alloc_workspace) > cpus) { | ||
79 | DEFINE_WAIT(wait); | ||
80 | prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
81 | if (atomic_read(&alloc_workspace) > cpus) | ||
82 | schedule(); | ||
83 | finish_wait(&workspace_wait, &wait); | ||
84 | goto again; | ||
85 | } | ||
86 | atomic_inc(&alloc_workspace); | ||
87 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | ||
88 | if (!workspace) { | ||
89 | ret = -ENOMEM; | ||
90 | goto fail; | ||
91 | } | ||
92 | |||
93 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); | ||
94 | if (!workspace->def_strm.workspace) { | ||
95 | ret = -ENOMEM; | ||
96 | goto fail; | ||
97 | } | ||
98 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); | ||
99 | if (!workspace->inf_strm.workspace) { | ||
100 | ret = -ENOMEM; | ||
101 | goto fail_inflate; | ||
102 | } | ||
103 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); | ||
104 | if (!workspace->buf) { | ||
105 | ret = -ENOMEM; | ||
106 | goto fail_kmalloc; | ||
107 | } | ||
108 | return workspace; | ||
109 | |||
110 | fail_kmalloc: | ||
111 | vfree(workspace->inf_strm.workspace); | ||
112 | fail_inflate: | ||
113 | vfree(workspace->def_strm.workspace); | ||
114 | fail: | ||
115 | kfree(workspace); | ||
116 | atomic_dec(&alloc_workspace); | ||
117 | wake_up(&workspace_wait); | ||
118 | return ERR_PTR(ret); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * put a workspace struct back on the list or free it if we have enough | ||
123 | * idle ones sitting around | ||
124 | */ | ||
125 | static int free_workspace(struct workspace *workspace) | ||
126 | { | ||
127 | spin_lock(&workspace_lock); | ||
128 | if (num_workspace < num_online_cpus()) { | ||
129 | list_add_tail(&workspace->list, &idle_workspace); | ||
130 | num_workspace++; | ||
131 | spin_unlock(&workspace_lock); | ||
132 | if (waitqueue_active(&workspace_wait)) | ||
133 | wake_up(&workspace_wait); | ||
134 | return 0; | ||
135 | } | ||
136 | spin_unlock(&workspace_lock); | ||
137 | vfree(workspace->def_strm.workspace); | ||
138 | vfree(workspace->inf_strm.workspace); | ||
139 | kfree(workspace->buf); | ||
140 | kfree(workspace); | ||
141 | |||
142 | atomic_dec(&alloc_workspace); | ||
143 | if (waitqueue_active(&workspace_wait)) | ||
144 | wake_up(&workspace_wait); | ||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * cleanup function for module exit | ||
150 | */ | ||
151 | static void free_workspaces(void) | ||
152 | { | ||
153 | struct workspace *workspace; | ||
154 | while(!list_empty(&idle_workspace)) { | ||
155 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
156 | list); | ||
157 | list_del(&workspace->list); | ||
158 | vfree(workspace->def_strm.workspace); | ||
159 | vfree(workspace->inf_strm.workspace); | ||
160 | kfree(workspace->buf); | ||
161 | kfree(workspace); | ||
162 | atomic_dec(&alloc_workspace); | ||
163 | } | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * given an address space and start/len, compress the bytes. | ||
168 | * | ||
169 | * pages are allocated to hold the compressed result and stored | ||
170 | * in 'pages' | ||
171 | * | ||
172 | * out_pages is used to return the number of pages allocated. There | ||
173 | * may be pages allocated even if we return an error | ||
174 | * | ||
175 | * total_in is used to return the number of bytes actually read. It | ||
176 | * may be smaller then len if we had to exit early because we | ||
177 | * ran out of room in the pages array or because we cross the | ||
178 | * max_out threshold. | ||
179 | * | ||
180 | * total_out is used to return the total number of compressed bytes | ||
181 | * | ||
182 | * max_out tells us the max number of bytes that we're allowed to | ||
183 | * stuff into pages | ||
184 | */ | ||
185 | int btrfs_zlib_compress_pages(struct address_space *mapping, | ||
186 | u64 start, unsigned long len, | ||
187 | struct page **pages, | ||
188 | unsigned long nr_dest_pages, | ||
189 | unsigned long *out_pages, | ||
190 | unsigned long *total_in, | ||
191 | unsigned long *total_out, | ||
192 | unsigned long max_out) | ||
193 | { | ||
194 | int ret; | ||
195 | struct workspace *workspace; | ||
196 | char *data_in; | ||
197 | char *cpage_out; | ||
198 | int nr_pages = 0; | ||
199 | struct page *in_page = NULL; | ||
200 | struct page *out_page = NULL; | ||
201 | int out_written = 0; | ||
202 | int in_read = 0; | ||
203 | unsigned long bytes_left; | ||
204 | |||
205 | *out_pages = 0; | ||
206 | *total_out = 0; | ||
207 | *total_in = 0; | ||
208 | |||
209 | workspace = find_zlib_workspace(); | ||
210 | if (!workspace) | ||
211 | return -1; | ||
212 | |||
213 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | ||
214 | printk(KERN_WARNING "deflateInit failed\n"); | ||
215 | ret = -1; | ||
216 | goto out; | ||
217 | } | ||
218 | |||
219 | workspace->def_strm.total_in = 0; | ||
220 | workspace->def_strm.total_out = 0; | ||
221 | |||
222 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
223 | data_in = kmap(in_page); | ||
224 | |||
225 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
226 | cpage_out = kmap(out_page); | ||
227 | pages[0] = out_page; | ||
228 | nr_pages = 1; | ||
229 | |||
230 | workspace->def_strm.next_in = data_in; | ||
231 | workspace->def_strm.next_out = cpage_out; | ||
232 | workspace->def_strm.avail_out = PAGE_CACHE_SIZE; | ||
233 | workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); | ||
234 | |||
235 | out_written = 0; | ||
236 | in_read = 0; | ||
237 | |||
238 | while (workspace->def_strm.total_in < len) { | ||
239 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); | ||
240 | if (ret != Z_OK) { | ||
241 | printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", | ||
242 | ret); | ||
243 | zlib_deflateEnd(&workspace->def_strm); | ||
244 | ret = -1; | ||
245 | goto out; | ||
246 | } | ||
247 | |||
248 | /* we're making it bigger, give up */ | ||
249 | if (workspace->def_strm.total_in > 8192 && | ||
250 | workspace->def_strm.total_in < | ||
251 | workspace->def_strm.total_out) { | ||
252 | ret = -1; | ||
253 | goto out; | ||
254 | } | ||
255 | /* we need another page for writing out. Test this | ||
256 | * before the total_in so we will pull in a new page for | ||
257 | * the stream end if required | ||
258 | */ | ||
259 | if (workspace->def_strm.avail_out == 0) { | ||
260 | kunmap(out_page); | ||
261 | if (nr_pages == nr_dest_pages) { | ||
262 | out_page = NULL; | ||
263 | ret = -1; | ||
264 | goto out; | ||
265 | } | ||
266 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
267 | cpage_out = kmap(out_page); | ||
268 | pages[nr_pages] = out_page; | ||
269 | nr_pages++; | ||
270 | workspace->def_strm.avail_out = PAGE_CACHE_SIZE; | ||
271 | workspace->def_strm.next_out = cpage_out; | ||
272 | } | ||
273 | /* we're all done */ | ||
274 | if (workspace->def_strm.total_in >= len) | ||
275 | break; | ||
276 | |||
277 | /* we've read in a full page, get a new one */ | ||
278 | if (workspace->def_strm.avail_in == 0) { | ||
279 | if (workspace->def_strm.total_out > max_out) | ||
280 | break; | ||
281 | |||
282 | bytes_left = len - workspace->def_strm.total_in; | ||
283 | kunmap(in_page); | ||
284 | page_cache_release(in_page); | ||
285 | |||
286 | start += PAGE_CACHE_SIZE; | ||
287 | in_page = find_get_page(mapping, | ||
288 | start >> PAGE_CACHE_SHIFT); | ||
289 | data_in = kmap(in_page); | ||
290 | workspace->def_strm.avail_in = min(bytes_left, | ||
291 | PAGE_CACHE_SIZE); | ||
292 | workspace->def_strm.next_in = data_in; | ||
293 | } | ||
294 | } | ||
295 | workspace->def_strm.avail_in = 0; | ||
296 | ret = zlib_deflate(&workspace->def_strm, Z_FINISH); | ||
297 | zlib_deflateEnd(&workspace->def_strm); | ||
298 | |||
299 | if (ret != Z_STREAM_END) { | ||
300 | ret = -1; | ||
301 | goto out; | ||
302 | } | ||
303 | |||
304 | if (workspace->def_strm.total_out >= workspace->def_strm.total_in) { | ||
305 | ret = -1; | ||
306 | goto out; | ||
307 | } | ||
308 | |||
309 | ret = 0; | ||
310 | *total_out = workspace->def_strm.total_out; | ||
311 | *total_in = workspace->def_strm.total_in; | ||
312 | out: | ||
313 | *out_pages = nr_pages; | ||
314 | if (out_page) | ||
315 | kunmap(out_page); | ||
316 | |||
317 | if (in_page) { | ||
318 | kunmap(in_page); | ||
319 | page_cache_release(in_page); | ||
320 | } | ||
321 | free_workspace(workspace); | ||
322 | return ret; | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * pages_in is an array of pages with compressed data. | ||
327 | * | ||
328 | * disk_start is the starting logical offset of this array in the file | ||
329 | * | ||
330 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
331 | * | ||
332 | * vcnt is the count of pages in the biovec | ||
333 | * | ||
334 | * srclen is the number of bytes in pages_in | ||
335 | * | ||
336 | * The basic idea is that we have a bio that was created by readpages. | ||
337 | * The pages in the bio are for the uncompressed data, and they may not | ||
338 | * be contiguous. They all correspond to the range of bytes covered by | ||
339 | * the compressed extent. | ||
340 | */ | ||
341 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | ||
342 | u64 disk_start, | ||
343 | struct bio_vec *bvec, | ||
344 | int vcnt, | ||
345 | size_t srclen) | ||
346 | { | ||
347 | int ret = 0; | ||
348 | int wbits = MAX_WBITS; | ||
349 | struct workspace *workspace; | ||
350 | char *data_in; | ||
351 | size_t total_out = 0; | ||
352 | unsigned long page_bytes_left; | ||
353 | unsigned long page_in_index = 0; | ||
354 | unsigned long page_out_index = 0; | ||
355 | struct page *page_out; | ||
356 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | ||
357 | PAGE_CACHE_SIZE; | ||
358 | unsigned long buf_start; | ||
359 | unsigned long buf_offset; | ||
360 | unsigned long bytes; | ||
361 | unsigned long working_bytes; | ||
362 | unsigned long pg_offset; | ||
363 | unsigned long start_byte; | ||
364 | unsigned long current_buf_start; | ||
365 | char *kaddr; | ||
366 | |||
367 | workspace = find_zlib_workspace(); | ||
368 | if (!workspace) | ||
369 | return -ENOMEM; | ||
370 | |||
371 | data_in = kmap(pages_in[page_in_index]); | ||
372 | workspace->inf_strm.next_in = data_in; | ||
373 | workspace->inf_strm.avail_in = min(srclen, PAGE_CACHE_SIZE); | ||
374 | workspace->inf_strm.total_in = 0; | ||
375 | |||
376 | workspace->inf_strm.total_out = 0; | ||
377 | workspace->inf_strm.next_out = workspace->buf; | ||
378 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | ||
379 | page_out = bvec[page_out_index].bv_page; | ||
380 | page_bytes_left = PAGE_CACHE_SIZE; | ||
381 | pg_offset = 0; | ||
382 | |||
383 | /* If it's deflate, and it's got no preset dictionary, then | ||
384 | we can tell zlib to skip the adler32 check. */ | ||
385 | if (srclen > 2 && !(data_in[1] & PRESET_DICT) && | ||
386 | ((data_in[0] & 0x0f) == Z_DEFLATED) && | ||
387 | !(((data_in[0]<<8) + data_in[1]) % 31)) { | ||
388 | |||
389 | wbits = -((data_in[0] >> 4) + 8); | ||
390 | workspace->inf_strm.next_in += 2; | ||
391 | workspace->inf_strm.avail_in -= 2; | ||
392 | } | ||
393 | |||
394 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | ||
395 | printk(KERN_WARNING "inflateInit failed\n"); | ||
396 | ret = -1; | ||
397 | goto out; | ||
398 | } | ||
399 | while(workspace->inf_strm.total_in < srclen) { | ||
400 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | ||
401 | if (ret != Z_OK && ret != Z_STREAM_END) { | ||
402 | break; | ||
403 | } | ||
404 | |||
405 | /* | ||
406 | * buf start is the byte offset we're of the start of | ||
407 | * our workspace buffer | ||
408 | */ | ||
409 | buf_start = total_out; | ||
410 | |||
411 | /* total_out is the last byte of the workspace buffer */ | ||
412 | total_out = workspace->inf_strm.total_out; | ||
413 | |||
414 | working_bytes = total_out - buf_start; | ||
415 | |||
416 | /* | ||
417 | * start byte is the first byte of the page we're currently | ||
418 | * copying into relative to the start of the compressed data. | ||
419 | */ | ||
420 | start_byte = page_offset(page_out) - disk_start; | ||
421 | |||
422 | if (working_bytes == 0) { | ||
423 | /* we didn't make progress in this inflate | ||
424 | * call, we're done | ||
425 | */ | ||
426 | if (ret != Z_STREAM_END) | ||
427 | ret = -1; | ||
428 | break; | ||
429 | } | ||
430 | |||
431 | /* we haven't yet hit data corresponding to this page */ | ||
432 | if (total_out <= start_byte) { | ||
433 | goto next; | ||
434 | } | ||
435 | |||
436 | /* | ||
437 | * the start of the data we care about is offset into | ||
438 | * the middle of our working buffer | ||
439 | */ | ||
440 | if (total_out > start_byte && buf_start < start_byte) { | ||
441 | buf_offset = start_byte - buf_start; | ||
442 | working_bytes -= buf_offset; | ||
443 | } else { | ||
444 | buf_offset = 0; | ||
445 | } | ||
446 | current_buf_start = buf_start; | ||
447 | |||
448 | /* copy bytes from the working buffer into the pages */ | ||
449 | while(working_bytes > 0) { | ||
450 | bytes = min(PAGE_CACHE_SIZE - pg_offset, | ||
451 | PAGE_CACHE_SIZE - buf_offset); | ||
452 | bytes = min(bytes, working_bytes); | ||
453 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
454 | memcpy(kaddr + pg_offset, workspace->buf + buf_offset, | ||
455 | bytes); | ||
456 | kunmap_atomic(kaddr, KM_USER0); | ||
457 | flush_dcache_page(page_out); | ||
458 | |||
459 | pg_offset += bytes; | ||
460 | page_bytes_left -= bytes; | ||
461 | buf_offset += bytes; | ||
462 | working_bytes -= bytes; | ||
463 | current_buf_start += bytes; | ||
464 | |||
465 | /* check if we need to pick another page */ | ||
466 | if (page_bytes_left == 0) { | ||
467 | page_out_index++; | ||
468 | if (page_out_index >= vcnt) { | ||
469 | ret = 0; | ||
470 | goto done; | ||
471 | } | ||
472 | page_out = bvec[page_out_index].bv_page; | ||
473 | pg_offset = 0; | ||
474 | page_bytes_left = PAGE_CACHE_SIZE; | ||
475 | start_byte = page_offset(page_out) - disk_start; | ||
476 | |||
477 | /* | ||
478 | * make sure our new page is covered by this | ||
479 | * working buffer | ||
480 | */ | ||
481 | if (total_out <= start_byte) { | ||
482 | goto next; | ||
483 | } | ||
484 | |||
485 | /* the next page in the biovec might not | ||
486 | * be adjacent to the last page, but it | ||
487 | * might still be found inside this working | ||
488 | * buffer. bump our offset pointer | ||
489 | */ | ||
490 | if (total_out > start_byte && | ||
491 | current_buf_start < start_byte) { | ||
492 | buf_offset = start_byte - buf_start; | ||
493 | working_bytes = total_out - start_byte; | ||
494 | current_buf_start = buf_start + | ||
495 | buf_offset; | ||
496 | } | ||
497 | } | ||
498 | } | ||
499 | next: | ||
500 | workspace->inf_strm.next_out = workspace->buf; | ||
501 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | ||
502 | |||
503 | if (workspace->inf_strm.avail_in == 0) { | ||
504 | unsigned long tmp; | ||
505 | kunmap(pages_in[page_in_index]); | ||
506 | page_in_index++; | ||
507 | if (page_in_index >= total_pages_in) { | ||
508 | data_in = NULL; | ||
509 | break; | ||
510 | } | ||
511 | data_in = kmap(pages_in[page_in_index]); | ||
512 | workspace->inf_strm.next_in = data_in; | ||
513 | tmp = srclen - workspace->inf_strm.total_in; | ||
514 | workspace->inf_strm.avail_in = min(tmp, | ||
515 | PAGE_CACHE_SIZE); | ||
516 | } | ||
517 | } | ||
518 | if (ret != Z_STREAM_END) { | ||
519 | ret = -1; | ||
520 | } else { | ||
521 | ret = 0; | ||
522 | } | ||
523 | done: | ||
524 | zlib_inflateEnd(&workspace->inf_strm); | ||
525 | if (data_in) | ||
526 | kunmap(pages_in[page_in_index]); | ||
527 | out: | ||
528 | free_workspace(workspace); | ||
529 | return ret; | ||
530 | } | ||
531 | |||
532 | /* | ||
533 | * a less complex decompression routine. Our compressed data fits in a | ||
534 | * single page, and we want to read a single page out of it. | ||
535 | * start_byte tells us the offset into the compressed data we're interested in | ||
536 | */ | ||
537 | int btrfs_zlib_decompress(unsigned char *data_in, | ||
538 | struct page *dest_page, | ||
539 | unsigned long start_byte, | ||
540 | size_t srclen, size_t destlen) | ||
541 | { | ||
542 | int ret = 0; | ||
543 | int wbits = MAX_WBITS; | ||
544 | struct workspace *workspace; | ||
545 | unsigned long bytes_left = destlen; | ||
546 | unsigned long total_out = 0; | ||
547 | char *kaddr; | ||
548 | |||
549 | if (destlen > PAGE_CACHE_SIZE) | ||
550 | return -ENOMEM; | ||
551 | |||
552 | workspace = find_zlib_workspace(); | ||
553 | if (!workspace) | ||
554 | return -ENOMEM; | ||
555 | |||
556 | workspace->inf_strm.next_in = data_in; | ||
557 | workspace->inf_strm.avail_in = srclen; | ||
558 | workspace->inf_strm.total_in = 0; | ||
559 | |||
560 | workspace->inf_strm.next_out = workspace->buf; | ||
561 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | ||
562 | workspace->inf_strm.total_out = 0; | ||
563 | /* If it's deflate, and it's got no preset dictionary, then | ||
564 | we can tell zlib to skip the adler32 check. */ | ||
565 | if (srclen > 2 && !(data_in[1] & PRESET_DICT) && | ||
566 | ((data_in[0] & 0x0f) == Z_DEFLATED) && | ||
567 | !(((data_in[0]<<8) + data_in[1]) % 31)) { | ||
568 | |||
569 | wbits = -((data_in[0] >> 4) + 8); | ||
570 | workspace->inf_strm.next_in += 2; | ||
571 | workspace->inf_strm.avail_in -= 2; | ||
572 | } | ||
573 | |||
574 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | ||
575 | printk(KERN_WARNING "inflateInit failed\n"); | ||
576 | ret = -1; | ||
577 | goto out; | ||
578 | } | ||
579 | |||
580 | while(bytes_left > 0) { | ||
581 | unsigned long buf_start; | ||
582 | unsigned long buf_offset; | ||
583 | unsigned long bytes; | ||
584 | unsigned long pg_offset = 0; | ||
585 | |||
586 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | ||
587 | if (ret != Z_OK && ret != Z_STREAM_END) { | ||
588 | break; | ||
589 | } | ||
590 | |||
591 | buf_start = total_out; | ||
592 | total_out = workspace->inf_strm.total_out; | ||
593 | |||
594 | if (total_out == buf_start) { | ||
595 | ret = -1; | ||
596 | break; | ||
597 | } | ||
598 | |||
599 | if (total_out <= start_byte) { | ||
600 | goto next; | ||
601 | } | ||
602 | |||
603 | if (total_out > start_byte && buf_start < start_byte) { | ||
604 | buf_offset = start_byte - buf_start; | ||
605 | } else { | ||
606 | buf_offset = 0; | ||
607 | } | ||
608 | |||
609 | bytes = min(PAGE_CACHE_SIZE - pg_offset, | ||
610 | PAGE_CACHE_SIZE - buf_offset); | ||
611 | bytes = min(bytes, bytes_left); | ||
612 | |||
613 | kaddr = kmap_atomic(dest_page, KM_USER0); | ||
614 | memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); | ||
615 | kunmap_atomic(kaddr, KM_USER0); | ||
616 | |||
617 | pg_offset += bytes; | ||
618 | bytes_left -= bytes; | ||
619 | next: | ||
620 | workspace->inf_strm.next_out = workspace->buf; | ||
621 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | ||
622 | } | ||
623 | if (ret != Z_STREAM_END && bytes_left != 0) { | ||
624 | ret = -1; | ||
625 | } else { | ||
626 | ret = 0; | ||
627 | } | ||
628 | zlib_inflateEnd(&workspace->inf_strm); | ||
629 | out: | ||
630 | free_workspace(workspace); | ||
631 | return ret; | ||
632 | } | ||
633 | |||
634 | void btrfs_zlib_exit(void) | ||
635 | { | ||
636 | free_workspaces(); | ||
637 | } | ||