diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 21:12:42 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 21:12:42 -0400 |
commit | 968f3e374faf41e5e6049399eb7302777a09a1e8 (patch) | |
tree | 613c5aa9a005cfbe3fada77fcb0ab24deda126d9 | |
parent | e531cdf50a8a0fb7a4d51c06e52097bd01e9bf7c (diff) | |
parent | 389f239c53420802ad5085e51e88c37e2df5e003 (diff) |
Merge branch 'for-linus-4.6' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason:
"We have a good sized cleanup of our internal read ahead code, and the
first series of commits from Chandan to enable PAGE_SIZE > sectorsize
Otherwise, it's a normal series of cleanups and fixes, with many
thanks to Dave Sterba for doing most of the patch wrangling this time"
* 'for-linus-4.6' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (82 commits)
btrfs: make sure we stay inside the bvec during __btrfs_lookup_bio_sums
btrfs: Fix misspellings in comments.
btrfs: Print Warning only if ENOSPC_DEBUG is enabled
btrfs: scrub: silence an uninitialized variable warning
btrfs: move btrfs_compression_type to compression.h
btrfs: rename btrfs_print_info to btrfs_print_mod_info
Btrfs: Show a warning message if one of objectid reaches its highest value
Documentation: btrfs: remove usage specific information
btrfs: use kbasename in btrfsic_mount
Btrfs: do not collect ordered extents when logging that inode exists
Btrfs: fix race when checking if we can skip fsync'ing an inode
Btrfs: fix listxattrs not listing all xattrs packed in the same item
Btrfs: fix deadlock between direct IO reads and buffered writes
Btrfs: fix extent_same allowing destination offset beyond i_size
Btrfs: fix file loss on log replay after renaming a file and fsync
Btrfs: fix unreplayable log after snapshot delete + parent dir fsync
Btrfs: fix lockdep deadlock warning due to dev_replace
btrfs: drop unused argument in btrfs_ioctl_get_supported_features
btrfs: add GET_SUPPORTED_FEATURES to the control device ioctls
btrfs: change max_inline default to 2048
...
36 files changed, 1102 insertions, 931 deletions
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt index c772b47e7ef0..f9dad22d95ce 100644 --- a/Documentation/filesystems/btrfs.txt +++ b/Documentation/filesystems/btrfs.txt | |||
@@ -1,20 +1,10 @@ | |||
1 | |||
2 | BTRFS | 1 | BTRFS |
3 | ===== | 2 | ===== |
4 | 3 | ||
5 | Btrfs is a copy on write filesystem for Linux aimed at | 4 | Btrfs is a copy on write filesystem for Linux aimed at implementing advanced |
6 | implementing advanced features while focusing on fault tolerance, | 5 | features while focusing on fault tolerance, repair and easy administration. |
7 | repair and easy administration. Initially developed by Oracle, Btrfs | 6 | Jointly developed by several companies, licensed under the GPL and open for |
8 | is licensed under the GPL and open for contribution from anyone. | 7 | contribution from anyone. |
9 | |||
10 | Linux has a wealth of filesystems to choose from, but we are facing a | ||
11 | number of challenges with scaling to the large storage subsystems that | ||
12 | are becoming common in today's data centers. Filesystems need to scale | ||
13 | in their ability to address and manage large storage, and also in | ||
14 | their ability to detect, repair and tolerate errors in the data stored | ||
15 | on disk. Btrfs is under heavy development, and is not suitable for | ||
16 | any uses other than benchmarking and review. The Btrfs disk format is | ||
17 | not yet finalized. | ||
18 | 8 | ||
19 | The main Btrfs features include: | 9 | The main Btrfs features include: |
20 | 10 | ||
@@ -28,243 +18,14 @@ The main Btrfs features include: | |||
28 | * Checksums on data and metadata (multiple algorithms available) | 18 | * Checksums on data and metadata (multiple algorithms available) |
29 | * Compression | 19 | * Compression |
30 | * Integrated multiple device support, with several raid algorithms | 20 | * Integrated multiple device support, with several raid algorithms |
31 | * Online filesystem check (not yet implemented) | 21 | * Offline filesystem check |
32 | * Very fast offline filesystem check | 22 | * Efficient incremental backup and FS mirroring |
33 | * Efficient incremental backup and FS mirroring (not yet implemented) | ||
34 | * Online filesystem defragmentation | 23 | * Online filesystem defragmentation |
35 | 24 | ||
25 | For more information please refer to the wiki | ||
36 | 26 | ||
37 | Mount Options | 27 | https://btrfs.wiki.kernel.org |
38 | ============= | ||
39 | |||
40 | When mounting a btrfs filesystem, the following option are accepted. | ||
41 | Options with (*) are default options and will not show in the mount options. | ||
42 | |||
43 | alloc_start=<bytes> | ||
44 | Debugging option to force all block allocations above a certain | ||
45 | byte threshold on each block device. The value is specified in | ||
46 | bytes, optionally with a K, M, or G suffix, case insensitive. | ||
47 | Default is 1MB. | ||
48 | |||
49 | noautodefrag(*) | ||
50 | autodefrag | ||
51 | Disable/enable auto defragmentation. | ||
52 | Auto defragmentation detects small random writes into files and queue | ||
53 | them up for the defrag process. Works best for small files; | ||
54 | Not well suited for large database workloads. | ||
55 | |||
56 | check_int | ||
57 | check_int_data | ||
58 | check_int_print_mask=<value> | ||
59 | These debugging options control the behavior of the integrity checking | ||
60 | module (the BTRFS_FS_CHECK_INTEGRITY config option required). | ||
61 | |||
62 | check_int enables the integrity checker module, which examines all | ||
63 | block write requests to ensure on-disk consistency, at a large | ||
64 | memory and CPU cost. | ||
65 | |||
66 | check_int_data includes extent data in the integrity checks, and | ||
67 | implies the check_int option. | ||
68 | |||
69 | check_int_print_mask takes a bitmask of BTRFSIC_PRINT_MASK_* values | ||
70 | as defined in fs/btrfs/check-integrity.c, to control the integrity | ||
71 | checker module behavior. | ||
72 | |||
73 | See comments at the top of fs/btrfs/check-integrity.c for more info. | ||
74 | |||
75 | commit=<seconds> | ||
76 | Set the interval of periodic commit, 30 seconds by default. Higher | ||
77 | values defer data being synced to permanent storage with obvious | ||
78 | consequences when the system crashes. The upper bound is not forced, | ||
79 | but a warning is printed if it's more than 300 seconds (5 minutes). | ||
80 | |||
81 | compress | ||
82 | compress=<type> | ||
83 | compress-force | ||
84 | compress-force=<type> | ||
85 | Control BTRFS file data compression. Type may be specified as "zlib" | ||
86 | "lzo" or "no" (for no compression, used for remounting). If no type | ||
87 | is specified, zlib is used. If compress-force is specified, | ||
88 | all files will be compressed, whether or not they compress well. | ||
89 | If compression is enabled, nodatacow and nodatasum are disabled. | ||
90 | |||
91 | degraded | ||
92 | Allow mounts to continue with missing devices. A read-write mount may | ||
93 | fail with too many devices missing, for example if a stripe member | ||
94 | is completely missing. | ||
95 | |||
96 | device=<devicepath> | ||
97 | Specify a device during mount so that ioctls on the control device | ||
98 | can be avoided. Especially useful when trying to mount a multi-device | ||
99 | setup as root. May be specified multiple times for multiple devices. | ||
100 | |||
101 | nodiscard(*) | ||
102 | discard | ||
103 | Disable/enable discard mount option. | ||
104 | Discard issues frequent commands to let the block device reclaim space | ||
105 | freed by the filesystem. | ||
106 | This is useful for SSD devices, thinly provisioned | ||
107 | LUNs and virtual machine images, but may have a significant | ||
108 | performance impact. (The fstrim command is also available to | ||
109 | initiate batch trims from userspace). | ||
110 | |||
111 | noenospc_debug(*) | ||
112 | enospc_debug | ||
113 | Disable/enable debugging option to be more verbose in some ENOSPC conditions. | ||
114 | |||
115 | fatal_errors=<action> | ||
116 | Action to take when encountering a fatal error: | ||
117 | "bug" - BUG() on a fatal error. This is the default. | ||
118 | "panic" - panic() on a fatal error. | ||
119 | |||
120 | noflushoncommit(*) | ||
121 | flushoncommit | ||
122 | The 'flushoncommit' mount option forces any data dirtied by a write in a | ||
123 | prior transaction to commit as part of the current commit. This makes | ||
124 | the committed state a fully consistent view of the file system from the | ||
125 | application's perspective (i.e., it includes all completed file system | ||
126 | operations). This was previously the behavior only when a snapshot is | ||
127 | created. | ||
128 | |||
129 | inode_cache | ||
130 | Enable free inode number caching. Defaults to off due to an overflow | ||
131 | problem when the free space crcs don't fit inside a single page. | ||
132 | |||
133 | max_inline=<bytes> | ||
134 | Specify the maximum amount of space, in bytes, that can be inlined in | ||
135 | a metadata B-tree leaf. The value is specified in bytes, optionally | ||
136 | with a K, M, or G suffix, case insensitive. In practice, this value | ||
137 | is limited by the root sector size, with some space unavailable due | ||
138 | to leaf headers. For a 4k sector size, max inline data is ~3900 bytes. | ||
139 | |||
140 | metadata_ratio=<value> | ||
141 | Specify that 1 metadata chunk should be allocated after every <value> | ||
142 | data chunks. Off by default. | ||
143 | |||
144 | acl(*) | ||
145 | noacl | ||
146 | Enable/disable support for Posix Access Control Lists (ACLs). See the | ||
147 | acl(5) manual page for more information about ACLs. | ||
148 | |||
149 | barrier(*) | ||
150 | nobarrier | ||
151 | Enable/disable the use of block layer write barriers. Write barriers | ||
152 | ensure that certain IOs make it through the device cache and are on | ||
153 | persistent storage. If disabled on a device with a volatile | ||
154 | (non-battery-backed) write-back cache, nobarrier option will lead to | ||
155 | filesystem corruption on a system crash or power loss. | ||
156 | |||
157 | datacow(*) | ||
158 | nodatacow | ||
159 | Enable/disable data copy-on-write for newly created files. | ||
160 | Nodatacow implies nodatasum, and disables all compression. | ||
161 | |||
162 | datasum(*) | ||
163 | nodatasum | ||
164 | Enable/disable data checksumming for newly created files. | ||
165 | Datasum implies datacow. | ||
166 | |||
167 | treelog(*) | ||
168 | notreelog | ||
169 | Enable/disable the tree logging used for fsync and O_SYNC writes. | ||
170 | |||
171 | recovery | ||
172 | Enable autorecovery attempts if a bad tree root is found at mount time. | ||
173 | Currently this scans a list of several previous tree roots and tries to | ||
174 | use the first readable. | ||
175 | |||
176 | rescan_uuid_tree | ||
177 | Force check and rebuild procedure of the UUID tree. This should not | ||
178 | normally be needed. | ||
179 | |||
180 | skip_balance | ||
181 | Skip automatic resume of interrupted balance operation after mount. | ||
182 | May be resumed with "btrfs balance resume." | ||
183 | |||
184 | space_cache (*) | ||
185 | Enable the on-disk freespace cache. | ||
186 | nospace_cache | ||
187 | Disable freespace cache loading without clearing the cache. | ||
188 | clear_cache | ||
189 | Force clearing and rebuilding of the disk space cache if something | ||
190 | has gone wrong. | ||
191 | |||
192 | ssd | ||
193 | nossd | ||
194 | ssd_spread | ||
195 | Options to control ssd allocation schemes. By default, BTRFS will | ||
196 | enable or disable ssd allocation heuristics depending on whether a | ||
197 | rotational or non-rotational disk is in use. The ssd and nossd options | ||
198 | can override this autodetection. | ||
199 | |||
200 | The ssd_spread mount option attempts to allocate into big chunks | ||
201 | of unused space, and may perform better on low-end ssds. ssd_spread | ||
202 | implies ssd, enabling all other ssd heuristics as well. | ||
203 | |||
204 | subvol=<path> | ||
205 | Mount subvolume at <path> rather than the root subvolume. <path> is | ||
206 | relative to the top level subvolume. | ||
207 | |||
208 | subvolid=<ID> | ||
209 | Mount subvolume specified by an ID number rather than the root subvolume. | ||
210 | This allows mounting of subvolumes which are not in the root of the mounted | ||
211 | filesystem. | ||
212 | You can use "btrfs subvolume list" to see subvolume ID numbers. | ||
213 | |||
214 | subvolrootid=<objectid> (deprecated) | ||
215 | Mount subvolume specified by <objectid> rather than the root subvolume. | ||
216 | This allows mounting of subvolumes which are not in the root of the mounted | ||
217 | filesystem. | ||
218 | You can use "btrfs subvolume show " to see the object ID for a subvolume. | ||
219 | |||
220 | thread_pool=<number> | ||
221 | The number of worker threads to allocate. The default number is equal | ||
222 | to the number of CPUs + 2, or 8, whichever is smaller. | ||
223 | |||
224 | user_subvol_rm_allowed | ||
225 | Allow subvolumes to be deleted by a non-root user. Use with caution. | ||
226 | |||
227 | MAILING LIST | ||
228 | ============ | ||
229 | |||
230 | There is a Btrfs mailing list hosted on vger.kernel.org. You can | ||
231 | find details on how to subscribe here: | ||
232 | |||
233 | http://vger.kernel.org/vger-lists.html#linux-btrfs | ||
234 | |||
235 | Mailing list archives are available from gmane: | ||
236 | |||
237 | http://dir.gmane.org/gmane.comp.file-systems.btrfs | ||
238 | |||
239 | |||
240 | |||
241 | IRC | ||
242 | === | ||
243 | |||
244 | Discussion of Btrfs also occurs on the #btrfs channel of the Freenode | ||
245 | IRC network. | ||
246 | |||
247 | |||
248 | |||
249 | UTILITIES | ||
250 | ========= | ||
251 | |||
252 | Userspace tools for creating and manipulating Btrfs file systems are | ||
253 | available from the git repository at the following location: | ||
254 | |||
255 | http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-progs.git | ||
256 | git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-progs.git | ||
257 | |||
258 | These include the following tools: | ||
259 | |||
260 | * mkfs.btrfs: create a filesystem | ||
261 | |||
262 | * btrfs: a single tool to manage the filesystems, refer to the manpage for more details | ||
263 | |||
264 | * 'btrfsck' or 'btrfs check': do a consistency check of the filesystem | ||
265 | |||
266 | Other tools for specific tasks: | ||
267 | |||
268 | * btrfs-convert: in-place conversion from ext2/3/4 filesystems | ||
269 | 28 | ||
270 | * btrfs-image: dump filesystem metadata for debugging | 29 | that maintains information about administration tasks, frequently asked |
30 | questions, use cases, mount options, comprehensible changelogs, features, | ||
31 | manual pages, source code repositories, contacts etc. | ||
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f6dac40f87ff..80e8472d618b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -148,8 +148,7 @@ int __init btrfs_prelim_ref_init(void) | |||
148 | 148 | ||
149 | void btrfs_prelim_ref_exit(void) | 149 | void btrfs_prelim_ref_exit(void) |
150 | { | 150 | { |
151 | if (btrfs_prelim_ref_cache) | 151 | kmem_cache_destroy(btrfs_prelim_ref_cache); |
152 | kmem_cache_destroy(btrfs_prelim_ref_cache); | ||
153 | } | 152 | } |
154 | 153 | ||
155 | /* | 154 | /* |
@@ -566,17 +565,14 @@ static void __merge_refs(struct list_head *head, int mode) | |||
566 | struct __prelim_ref *pos2 = pos1, *tmp; | 565 | struct __prelim_ref *pos2 = pos1, *tmp; |
567 | 566 | ||
568 | list_for_each_entry_safe_continue(pos2, tmp, head, list) { | 567 | list_for_each_entry_safe_continue(pos2, tmp, head, list) { |
569 | struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2; | 568 | struct __prelim_ref *ref1 = pos1, *ref2 = pos2; |
570 | struct extent_inode_elem *eie; | 569 | struct extent_inode_elem *eie; |
571 | 570 | ||
572 | if (!ref_for_same_block(ref1, ref2)) | 571 | if (!ref_for_same_block(ref1, ref2)) |
573 | continue; | 572 | continue; |
574 | if (mode == 1) { | 573 | if (mode == 1) { |
575 | if (!ref1->parent && ref2->parent) { | 574 | if (!ref1->parent && ref2->parent) |
576 | xchg = ref1; | 575 | swap(ref1, ref2); |
577 | ref1 = ref2; | ||
578 | ref2 = xchg; | ||
579 | } | ||
580 | } else { | 576 | } else { |
581 | if (ref1->parent != ref2->parent) | 577 | if (ref1->parent != ref2->parent) |
582 | continue; | 578 | continue; |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 861d472564c1..e34a71b3e225 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
@@ -95,6 +95,7 @@ | |||
95 | #include <linux/genhd.h> | 95 | #include <linux/genhd.h> |
96 | #include <linux/blkdev.h> | 96 | #include <linux/blkdev.h> |
97 | #include <linux/vmalloc.h> | 97 | #include <linux/vmalloc.h> |
98 | #include <linux/string.h> | ||
98 | #include "ctree.h" | 99 | #include "ctree.h" |
99 | #include "disk-io.h" | 100 | #include "disk-io.h" |
100 | #include "hash.h" | 101 | #include "hash.h" |
@@ -105,6 +106,7 @@ | |||
105 | #include "locking.h" | 106 | #include "locking.h" |
106 | #include "check-integrity.h" | 107 | #include "check-integrity.h" |
107 | #include "rcu-string.h" | 108 | #include "rcu-string.h" |
109 | #include "compression.h" | ||
108 | 110 | ||
109 | #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 | 111 | #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 |
110 | #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 | 112 | #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 |
@@ -176,7 +178,7 @@ struct btrfsic_block { | |||
176 | * Elements of this type are allocated dynamically and required because | 178 | * Elements of this type are allocated dynamically and required because |
177 | * each block object can refer to and can be ref from multiple blocks. | 179 | * each block object can refer to and can be ref from multiple blocks. |
178 | * The key to lookup them in the hashtable is the dev_bytenr of | 180 | * The key to lookup them in the hashtable is the dev_bytenr of |
179 | * the block ref to plus the one from the block refered from. | 181 | * the block ref to plus the one from the block referred from. |
180 | * The fact that they are searchable via a hashtable and that a | 182 | * The fact that they are searchable via a hashtable and that a |
181 | * ref_cnt is maintained is not required for the btrfs integrity | 183 | * ref_cnt is maintained is not required for the btrfs integrity |
182 | * check algorithm itself, it is only used to make the output more | 184 | * check algorithm itself, it is only used to make the output more |
@@ -3076,7 +3078,7 @@ int btrfsic_mount(struct btrfs_root *root, | |||
3076 | 3078 | ||
3077 | list_for_each_entry(device, dev_head, dev_list) { | 3079 | list_for_each_entry(device, dev_head, dev_list) { |
3078 | struct btrfsic_dev_state *ds; | 3080 | struct btrfsic_dev_state *ds; |
3079 | char *p; | 3081 | const char *p; |
3080 | 3082 | ||
3081 | if (!device->bdev || !device->name) | 3083 | if (!device->bdev || !device->name) |
3082 | continue; | 3084 | continue; |
@@ -3092,11 +3094,7 @@ int btrfsic_mount(struct btrfs_root *root, | |||
3092 | ds->state = state; | 3094 | ds->state = state; |
3093 | bdevname(ds->bdev, ds->name); | 3095 | bdevname(ds->bdev, ds->name); |
3094 | ds->name[BDEVNAME_SIZE - 1] = '\0'; | 3096 | ds->name[BDEVNAME_SIZE - 1] = '\0'; |
3095 | for (p = ds->name; *p != '\0'; p++); | 3097 | p = kbasename(ds->name); |
3096 | while (p > ds->name && *p != '/') | ||
3097 | p--; | ||
3098 | if (*p == '/') | ||
3099 | p++; | ||
3100 | strlcpy(ds->name, p, sizeof(ds->name)); | 3098 | strlcpy(ds->name, p, sizeof(ds->name)); |
3101 | btrfsic_dev_state_hashtable_add(ds, | 3099 | btrfsic_dev_state_hashtable_add(ds, |
3102 | &btrfsic_dev_state_hashtable); | 3100 | &btrfsic_dev_state_hashtable); |
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 13a4dc0436c9..f49d8b8c0f00 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h | |||
@@ -48,6 +48,15 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
48 | void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, | 48 | void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, |
49 | unsigned long pg_index, | 49 | unsigned long pg_index, |
50 | unsigned long pg_offset); | 50 | unsigned long pg_offset); |
51 | |||
52 | enum btrfs_compression_type { | ||
53 | BTRFS_COMPRESS_NONE = 0, | ||
54 | BTRFS_COMPRESS_ZLIB = 1, | ||
55 | BTRFS_COMPRESS_LZO = 2, | ||
56 | BTRFS_COMPRESS_TYPES = 2, | ||
57 | BTRFS_COMPRESS_LAST = 3, | ||
58 | }; | ||
59 | |||
51 | struct btrfs_compress_op { | 60 | struct btrfs_compress_op { |
52 | struct list_head *(*alloc_workspace)(void); | 61 | struct list_head *(*alloc_workspace)(void); |
53 | 62 | ||
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 769e0ff1b4ce..77592931ab4f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -311,7 +311,7 @@ struct tree_mod_root { | |||
311 | 311 | ||
312 | struct tree_mod_elem { | 312 | struct tree_mod_elem { |
313 | struct rb_node node; | 313 | struct rb_node node; |
314 | u64 index; /* shifted logical */ | 314 | u64 logical; |
315 | u64 seq; | 315 | u64 seq; |
316 | enum mod_log_op op; | 316 | enum mod_log_op op; |
317 | 317 | ||
@@ -435,11 +435,11 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | |||
435 | 435 | ||
436 | /* | 436 | /* |
437 | * key order of the log: | 437 | * key order of the log: |
438 | * index -> sequence | 438 | * node/leaf start address -> sequence |
439 | * | 439 | * |
440 | * the index is the shifted logical of the *new* root node for root replace | 440 | * The 'start address' is the logical address of the *new* root node |
441 | * operations, or the shifted logical of the affected block for all other | 441 | * for root replace operations, or the logical address of the affected |
442 | * operations. | 442 | * block for all other operations. |
443 | * | 443 | * |
444 | * Note: must be called with write lock (tree_mod_log_write_lock). | 444 | * Note: must be called with write lock (tree_mod_log_write_lock). |
445 | */ | 445 | */ |
@@ -460,9 +460,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) | |||
460 | while (*new) { | 460 | while (*new) { |
461 | cur = container_of(*new, struct tree_mod_elem, node); | 461 | cur = container_of(*new, struct tree_mod_elem, node); |
462 | parent = *new; | 462 | parent = *new; |
463 | if (cur->index < tm->index) | 463 | if (cur->logical < tm->logical) |
464 | new = &((*new)->rb_left); | 464 | new = &((*new)->rb_left); |
465 | else if (cur->index > tm->index) | 465 | else if (cur->logical > tm->logical) |
466 | new = &((*new)->rb_right); | 466 | new = &((*new)->rb_right); |
467 | else if (cur->seq < tm->seq) | 467 | else if (cur->seq < tm->seq) |
468 | new = &((*new)->rb_left); | 468 | new = &((*new)->rb_left); |
@@ -523,7 +523,7 @@ alloc_tree_mod_elem(struct extent_buffer *eb, int slot, | |||
523 | if (!tm) | 523 | if (!tm) |
524 | return NULL; | 524 | return NULL; |
525 | 525 | ||
526 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | 526 | tm->logical = eb->start; |
527 | if (op != MOD_LOG_KEY_ADD) { | 527 | if (op != MOD_LOG_KEY_ADD) { |
528 | btrfs_node_key(eb, &tm->key, slot); | 528 | btrfs_node_key(eb, &tm->key, slot); |
529 | tm->blockptr = btrfs_node_blockptr(eb, slot); | 529 | tm->blockptr = btrfs_node_blockptr(eb, slot); |
@@ -588,7 +588,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | |||
588 | goto free_tms; | 588 | goto free_tms; |
589 | } | 589 | } |
590 | 590 | ||
591 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | 591 | tm->logical = eb->start; |
592 | tm->slot = src_slot; | 592 | tm->slot = src_slot; |
593 | tm->move.dst_slot = dst_slot; | 593 | tm->move.dst_slot = dst_slot; |
594 | tm->move.nr_items = nr_items; | 594 | tm->move.nr_items = nr_items; |
@@ -699,7 +699,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | |||
699 | goto free_tms; | 699 | goto free_tms; |
700 | } | 700 | } |
701 | 701 | ||
702 | tm->index = new_root->start >> PAGE_CACHE_SHIFT; | 702 | tm->logical = new_root->start; |
703 | tm->old_root.logical = old_root->start; | 703 | tm->old_root.logical = old_root->start; |
704 | tm->old_root.level = btrfs_header_level(old_root); | 704 | tm->old_root.level = btrfs_header_level(old_root); |
705 | tm->generation = btrfs_header_generation(old_root); | 705 | tm->generation = btrfs_header_generation(old_root); |
@@ -739,16 +739,15 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, | |||
739 | struct rb_node *node; | 739 | struct rb_node *node; |
740 | struct tree_mod_elem *cur = NULL; | 740 | struct tree_mod_elem *cur = NULL; |
741 | struct tree_mod_elem *found = NULL; | 741 | struct tree_mod_elem *found = NULL; |
742 | u64 index = start >> PAGE_CACHE_SHIFT; | ||
743 | 742 | ||
744 | tree_mod_log_read_lock(fs_info); | 743 | tree_mod_log_read_lock(fs_info); |
745 | tm_root = &fs_info->tree_mod_log; | 744 | tm_root = &fs_info->tree_mod_log; |
746 | node = tm_root->rb_node; | 745 | node = tm_root->rb_node; |
747 | while (node) { | 746 | while (node) { |
748 | cur = container_of(node, struct tree_mod_elem, node); | 747 | cur = container_of(node, struct tree_mod_elem, node); |
749 | if (cur->index < index) { | 748 | if (cur->logical < start) { |
750 | node = node->rb_left; | 749 | node = node->rb_left; |
751 | } else if (cur->index > index) { | 750 | } else if (cur->logical > start) { |
752 | node = node->rb_right; | 751 | node = node->rb_right; |
753 | } else if (cur->seq < min_seq) { | 752 | } else if (cur->seq < min_seq) { |
754 | node = node->rb_left; | 753 | node = node->rb_left; |
@@ -1230,9 +1229,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, | |||
1230 | return NULL; | 1229 | return NULL; |
1231 | 1230 | ||
1232 | /* | 1231 | /* |
1233 | * the very last operation that's logged for a root is the replacement | 1232 | * the very last operation that's logged for a root is the |
1234 | * operation (if it is replaced at all). this has the index of the *new* | 1233 | * replacement operation (if it is replaced at all). this has |
1235 | * root, making it the very first operation that's logged for this root. | 1234 | * the logical address of the *new* root, making it the very |
1235 | * first operation that's logged for this root. | ||
1236 | */ | 1236 | */ |
1237 | while (1) { | 1237 | while (1) { |
1238 | tm = tree_mod_log_search_oldest(fs_info, root_logical, | 1238 | tm = tree_mod_log_search_oldest(fs_info, root_logical, |
@@ -1336,7 +1336,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | |||
1336 | if (!next) | 1336 | if (!next) |
1337 | break; | 1337 | break; |
1338 | tm = container_of(next, struct tree_mod_elem, node); | 1338 | tm = container_of(next, struct tree_mod_elem, node); |
1339 | if (tm->index != first_tm->index) | 1339 | if (tm->logical != first_tm->logical) |
1340 | break; | 1340 | break; |
1341 | } | 1341 | } |
1342 | tree_mod_log_read_unlock(fs_info); | 1342 | tree_mod_log_read_unlock(fs_info); |
@@ -5361,7 +5361,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5361 | goto out; | 5361 | goto out; |
5362 | } | 5362 | } |
5363 | 5363 | ||
5364 | tmp_buf = kmalloc(left_root->nodesize, GFP_NOFS); | 5364 | tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL); |
5365 | if (!tmp_buf) { | 5365 | if (!tmp_buf) { |
5366 | ret = -ENOMEM; | 5366 | ret = -ENOMEM; |
5367 | goto out; | 5367 | goto out; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index bfe4a337fb4d..84a6a5b3384a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -100,6 +100,9 @@ struct btrfs_ordered_sum; | |||
100 | /* tracks free space in block groups. */ | 100 | /* tracks free space in block groups. */ |
101 | #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL | 101 | #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL |
102 | 102 | ||
103 | /* device stats in the device tree */ | ||
104 | #define BTRFS_DEV_STATS_OBJECTID 0ULL | ||
105 | |||
103 | /* for storing balance parameters in the root tree */ | 106 | /* for storing balance parameters in the root tree */ |
104 | #define BTRFS_BALANCE_OBJECTID -4ULL | 107 | #define BTRFS_BALANCE_OBJECTID -4ULL |
105 | 108 | ||
@@ -715,14 +718,6 @@ struct btrfs_timespec { | |||
715 | __le32 nsec; | 718 | __le32 nsec; |
716 | } __attribute__ ((__packed__)); | 719 | } __attribute__ ((__packed__)); |
717 | 720 | ||
718 | enum btrfs_compression_type { | ||
719 | BTRFS_COMPRESS_NONE = 0, | ||
720 | BTRFS_COMPRESS_ZLIB = 1, | ||
721 | BTRFS_COMPRESS_LZO = 2, | ||
722 | BTRFS_COMPRESS_TYPES = 2, | ||
723 | BTRFS_COMPRESS_LAST = 3, | ||
724 | }; | ||
725 | |||
726 | struct btrfs_inode_item { | 721 | struct btrfs_inode_item { |
727 | /* nfs style generation number */ | 722 | /* nfs style generation number */ |
728 | __le64 generation; | 723 | __le64 generation; |
@@ -793,7 +788,7 @@ struct btrfs_root_item { | |||
793 | 788 | ||
794 | /* | 789 | /* |
795 | * This generation number is used to test if the new fields are valid | 790 | * This generation number is used to test if the new fields are valid |
796 | * and up to date while reading the root item. Everytime the root item | 791 | * and up to date while reading the root item. Every time the root item |
797 | * is written out, the "generation" field is copied into this field. If | 792 | * is written out, the "generation" field is copied into this field. If |
798 | * anyone ever mounted the fs with an older kernel, we will have | 793 | * anyone ever mounted the fs with an older kernel, we will have |
799 | * mismatching generation values here and thus must invalidate the | 794 | * mismatching generation values here and thus must invalidate the |
@@ -1002,8 +997,10 @@ struct btrfs_dev_replace { | |||
1002 | pid_t lock_owner; | 997 | pid_t lock_owner; |
1003 | atomic_t nesting_level; | 998 | atomic_t nesting_level; |
1004 | struct mutex lock_finishing_cancel_unmount; | 999 | struct mutex lock_finishing_cancel_unmount; |
1005 | struct mutex lock_management_lock; | 1000 | rwlock_t lock; |
1006 | struct mutex lock; | 1001 | atomic_t read_locks; |
1002 | atomic_t blocking_readers; | ||
1003 | wait_queue_head_t read_lock_wq; | ||
1007 | 1004 | ||
1008 | struct btrfs_scrub_progress scrub_progress; | 1005 | struct btrfs_scrub_progress scrub_progress; |
1009 | }; | 1006 | }; |
@@ -1222,10 +1219,10 @@ struct btrfs_space_info { | |||
1222 | * we've called update_block_group and dropped the bytes_used counter | 1219 | * we've called update_block_group and dropped the bytes_used counter |
1223 | * and increased the bytes_pinned counter. However this means that | 1220 | * and increased the bytes_pinned counter. However this means that |
1224 | * bytes_pinned does not reflect the bytes that will be pinned once the | 1221 | * bytes_pinned does not reflect the bytes that will be pinned once the |
1225 | * delayed refs are flushed, so this counter is inc'ed everytime we call | 1222 | * delayed refs are flushed, so this counter is inc'ed every time we |
1226 | * btrfs_free_extent so it is a realtime count of what will be freed | 1223 | * call btrfs_free_extent so it is a realtime count of what will be |
1227 | * once the transaction is committed. It will be zero'ed everytime the | 1224 | * freed once the transaction is committed. It will be zero'ed every |
1228 | * transaction commits. | 1225 | * time the transaction commits. |
1229 | */ | 1226 | */ |
1230 | struct percpu_counter total_bytes_pinned; | 1227 | struct percpu_counter total_bytes_pinned; |
1231 | 1228 | ||
@@ -1822,6 +1819,9 @@ struct btrfs_fs_info { | |||
1822 | spinlock_t reada_lock; | 1819 | spinlock_t reada_lock; |
1823 | struct radix_tree_root reada_tree; | 1820 | struct radix_tree_root reada_tree; |
1824 | 1821 | ||
1822 | /* readahead works cnt */ | ||
1823 | atomic_t reada_works_cnt; | ||
1824 | |||
1825 | /* Extent buffer radix tree */ | 1825 | /* Extent buffer radix tree */ |
1826 | spinlock_t buffer_lock; | 1826 | spinlock_t buffer_lock; |
1827 | struct radix_tree_root buffer_radix; | 1827 | struct radix_tree_root buffer_radix; |
@@ -2185,13 +2185,43 @@ struct btrfs_ioctl_defrag_range_args { | |||
2185 | */ | 2185 | */ |
2186 | #define BTRFS_QGROUP_RELATION_KEY 246 | 2186 | #define BTRFS_QGROUP_RELATION_KEY 246 |
2187 | 2187 | ||
2188 | /* | ||
2189 | * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY. | ||
2190 | */ | ||
2188 | #define BTRFS_BALANCE_ITEM_KEY 248 | 2191 | #define BTRFS_BALANCE_ITEM_KEY 248 |
2189 | 2192 | ||
2190 | /* | 2193 | /* |
2191 | * Persistantly stores the io stats in the device tree. | 2194 | * The key type for tree items that are stored persistently, but do not need to |
2192 | * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid). | 2195 | * exist for extended period of time. The items can exist in any tree. |
2196 | * | ||
2197 | * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data] | ||
2198 | * | ||
2199 | * Existing items: | ||
2200 | * | ||
2201 | * - balance status item | ||
2202 | * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0) | ||
2193 | */ | 2203 | */ |
2194 | #define BTRFS_DEV_STATS_KEY 249 | 2204 | #define BTRFS_TEMPORARY_ITEM_KEY 248 |
2205 | |||
2206 | /* | ||
2207 | * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY | ||
2208 | */ | ||
2209 | #define BTRFS_DEV_STATS_KEY 249 | ||
2210 | |||
2211 | /* | ||
2212 | * The key type for tree items that are stored persistently and usually exist | ||
2213 | * for a long period, eg. filesystem lifetime. The item kinds can be status | ||
2214 | * information, stats or preference values. The item can exist in any tree. | ||
2215 | * | ||
2216 | * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data] | ||
2217 | * | ||
2218 | * Existing items: | ||
2219 | * | ||
2220 | * - device statistics, store IO stats in the device tree, one key for all | ||
2221 | * stats | ||
2222 | * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0) | ||
2223 | */ | ||
2224 | #define BTRFS_PERSISTENT_ITEM_KEY 249 | ||
2195 | 2225 | ||
2196 | /* | 2226 | /* |
2197 | * Persistantly stores the device replace state in the device tree. | 2227 | * Persistantly stores the device replace state in the device tree. |
@@ -2241,7 +2271,7 @@ struct btrfs_ioctl_defrag_range_args { | |||
2241 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) | 2271 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) |
2242 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) | 2272 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) |
2243 | #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) | 2273 | #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) |
2244 | #define BTRFS_MOUNT_RECOVERY (1 << 18) | 2274 | #define BTRFS_MOUNT_USEBACKUPROOT (1 << 18) |
2245 | #define BTRFS_MOUNT_SKIP_BALANCE (1 << 19) | 2275 | #define BTRFS_MOUNT_SKIP_BALANCE (1 << 19) |
2246 | #define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) | 2276 | #define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) |
2247 | #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) | 2277 | #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) |
@@ -2250,9 +2280,10 @@ struct btrfs_ioctl_defrag_range_args { | |||
2250 | #define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24) | 2280 | #define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24) |
2251 | #define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25) | 2281 | #define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25) |
2252 | #define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26) | 2282 | #define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26) |
2283 | #define BTRFS_MOUNT_NOLOGREPLAY (1 << 27) | ||
2253 | 2284 | ||
2254 | #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) | 2285 | #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) |
2255 | #define BTRFS_DEFAULT_MAX_INLINE (8192) | 2286 | #define BTRFS_DEFAULT_MAX_INLINE (2048) |
2256 | 2287 | ||
2257 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 2288 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
2258 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 2289 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -2353,6 +2384,9 @@ struct btrfs_map_token { | |||
2353 | unsigned long offset; | 2384 | unsigned long offset; |
2354 | }; | 2385 | }; |
2355 | 2386 | ||
2387 | #define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \ | ||
2388 | ((bytes) >> (fs_info)->sb->s_blocksize_bits) | ||
2389 | |||
2356 | static inline void btrfs_init_map_token (struct btrfs_map_token *token) | 2390 | static inline void btrfs_init_map_token (struct btrfs_map_token *token) |
2357 | { | 2391 | { |
2358 | token->kaddr = NULL; | 2392 | token->kaddr = NULL; |
@@ -3448,8 +3482,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes); | |||
3448 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | 3482 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, |
3449 | unsigned num_items) | 3483 | unsigned num_items) |
3450 | { | 3484 | { |
3451 | return (root->nodesize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | 3485 | return root->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; |
3452 | 2 * num_items; | ||
3453 | } | 3486 | } |
3454 | 3487 | ||
3455 | /* | 3488 | /* |
@@ -4027,7 +4060,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
4027 | struct btrfs_root *root, | 4060 | struct btrfs_root *root, |
4028 | struct inode *dir, u64 objectid, | 4061 | struct inode *dir, u64 objectid, |
4029 | const char *name, int name_len); | 4062 | const char *name, int name_len); |
4030 | int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, | 4063 | int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, |
4031 | int front); | 4064 | int front); |
4032 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | 4065 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, |
4033 | struct btrfs_root *root, | 4066 | struct btrfs_root *root, |
@@ -4089,6 +4122,7 @@ void btrfs_test_inode_set_ops(struct inode *inode); | |||
4089 | 4122 | ||
4090 | /* ioctl.c */ | 4123 | /* ioctl.c */ |
4091 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 4124 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
4125 | int btrfs_ioctl_get_supported_features(void __user *arg); | ||
4092 | void btrfs_update_iflags(struct inode *inode); | 4126 | void btrfs_update_iflags(struct inode *inode); |
4093 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); | 4127 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); |
4094 | int btrfs_is_empty_uuid(u8 *uuid); | 4128 | int btrfs_is_empty_uuid(u8 *uuid); |
@@ -4151,7 +4185,8 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info); | |||
4151 | ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); | 4185 | ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); |
4152 | 4186 | ||
4153 | /* super.c */ | 4187 | /* super.c */ |
4154 | int btrfs_parse_options(struct btrfs_root *root, char *options); | 4188 | int btrfs_parse_options(struct btrfs_root *root, char *options, |
4189 | unsigned long new_flags); | ||
4155 | int btrfs_sync_fs(struct super_block *sb, int wait); | 4190 | int btrfs_sync_fs(struct super_block *sb, int wait); |
4156 | 4191 | ||
4157 | #ifdef CONFIG_PRINTK | 4192 | #ifdef CONFIG_PRINTK |
@@ -4525,8 +4560,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, | |||
4525 | struct btrfs_key *start, struct btrfs_key *end); | 4560 | struct btrfs_key *start, struct btrfs_key *end); |
4526 | int btrfs_reada_wait(void *handle); | 4561 | int btrfs_reada_wait(void *handle); |
4527 | void btrfs_reada_detach(void *handle); | 4562 | void btrfs_reada_detach(void *handle); |
4528 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | 4563 | int btree_readahead_hook(struct btrfs_fs_info *fs_info, |
4529 | u64 start, int err); | 4564 | struct extent_buffer *eb, u64 start, int err); |
4530 | 4565 | ||
4531 | static inline int is_fstree(u64 rootid) | 4566 | static inline int is_fstree(u64 rootid) |
4532 | { | 4567 | { |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index b57daa895cea..6cef0062f929 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -43,8 +43,7 @@ int __init btrfs_delayed_inode_init(void) | |||
43 | 43 | ||
44 | void btrfs_delayed_inode_exit(void) | 44 | void btrfs_delayed_inode_exit(void) |
45 | { | 45 | { |
46 | if (delayed_node_cache) | 46 | kmem_cache_destroy(delayed_node_cache); |
47 | kmem_cache_destroy(delayed_node_cache); | ||
48 | } | 47 | } |
49 | 48 | ||
50 | static inline void btrfs_init_delayed_node( | 49 | static inline void btrfs_init_delayed_node( |
@@ -651,9 +650,14 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
651 | goto out; | 650 | goto out; |
652 | 651 | ||
653 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | 652 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); |
654 | if (!WARN_ON(ret)) | 653 | if (!ret) |
655 | goto out; | 654 | goto out; |
656 | 655 | ||
656 | if (btrfs_test_opt(root, ENOSPC_DEBUG)) { | ||
657 | btrfs_debug(root->fs_info, | ||
658 | "block rsv migrate returned %d", ret); | ||
659 | WARN_ON(1); | ||
660 | } | ||
657 | /* | 661 | /* |
658 | * Ok this is a problem, let's just steal from the global rsv | 662 | * Ok this is a problem, let's just steal from the global rsv |
659 | * since this really shouldn't happen that often. | 663 | * since this really shouldn't happen that often. |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 914ac13bd92f..430b3689b112 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -929,14 +929,10 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | |||
929 | 929 | ||
930 | void btrfs_delayed_ref_exit(void) | 930 | void btrfs_delayed_ref_exit(void) |
931 | { | 931 | { |
932 | if (btrfs_delayed_ref_head_cachep) | 932 | kmem_cache_destroy(btrfs_delayed_ref_head_cachep); |
933 | kmem_cache_destroy(btrfs_delayed_ref_head_cachep); | 933 | kmem_cache_destroy(btrfs_delayed_tree_ref_cachep); |
934 | if (btrfs_delayed_tree_ref_cachep) | 934 | kmem_cache_destroy(btrfs_delayed_data_ref_cachep); |
935 | kmem_cache_destroy(btrfs_delayed_tree_ref_cachep); | 935 | kmem_cache_destroy(btrfs_delayed_extent_op_cachep); |
936 | if (btrfs_delayed_data_ref_cachep) | ||
937 | kmem_cache_destroy(btrfs_delayed_data_ref_cachep); | ||
938 | if (btrfs_delayed_extent_op_cachep) | ||
939 | kmem_cache_destroy(btrfs_delayed_extent_op_cachep); | ||
940 | } | 936 | } |
941 | 937 | ||
942 | int btrfs_delayed_ref_init(void) | 938 | int btrfs_delayed_ref_init(void) |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index cbb7dbfb3fff..a1d6652e0c47 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -202,13 +202,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, | |||
202 | struct btrfs_dev_replace_item *ptr; | 202 | struct btrfs_dev_replace_item *ptr; |
203 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | 203 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; |
204 | 204 | ||
205 | btrfs_dev_replace_lock(dev_replace); | 205 | btrfs_dev_replace_lock(dev_replace, 0); |
206 | if (!dev_replace->is_valid || | 206 | if (!dev_replace->is_valid || |
207 | !dev_replace->item_needs_writeback) { | 207 | !dev_replace->item_needs_writeback) { |
208 | btrfs_dev_replace_unlock(dev_replace); | 208 | btrfs_dev_replace_unlock(dev_replace, 0); |
209 | return 0; | 209 | return 0; |
210 | } | 210 | } |
211 | btrfs_dev_replace_unlock(dev_replace); | 211 | btrfs_dev_replace_unlock(dev_replace, 0); |
212 | 212 | ||
213 | key.objectid = 0; | 213 | key.objectid = 0; |
214 | key.type = BTRFS_DEV_REPLACE_KEY; | 214 | key.type = BTRFS_DEV_REPLACE_KEY; |
@@ -264,7 +264,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, | |||
264 | ptr = btrfs_item_ptr(eb, path->slots[0], | 264 | ptr = btrfs_item_ptr(eb, path->slots[0], |
265 | struct btrfs_dev_replace_item); | 265 | struct btrfs_dev_replace_item); |
266 | 266 | ||
267 | btrfs_dev_replace_lock(dev_replace); | 267 | btrfs_dev_replace_lock(dev_replace, 1); |
268 | if (dev_replace->srcdev) | 268 | if (dev_replace->srcdev) |
269 | btrfs_set_dev_replace_src_devid(eb, ptr, | 269 | btrfs_set_dev_replace_src_devid(eb, ptr, |
270 | dev_replace->srcdev->devid); | 270 | dev_replace->srcdev->devid); |
@@ -287,7 +287,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, | |||
287 | btrfs_set_dev_replace_cursor_right(eb, ptr, | 287 | btrfs_set_dev_replace_cursor_right(eb, ptr, |
288 | dev_replace->cursor_right); | 288 | dev_replace->cursor_right); |
289 | dev_replace->item_needs_writeback = 0; | 289 | dev_replace->item_needs_writeback = 0; |
290 | btrfs_dev_replace_unlock(dev_replace); | 290 | btrfs_dev_replace_unlock(dev_replace, 1); |
291 | 291 | ||
292 | btrfs_mark_buffer_dirty(eb); | 292 | btrfs_mark_buffer_dirty(eb); |
293 | 293 | ||
@@ -356,7 +356,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
356 | return PTR_ERR(trans); | 356 | return PTR_ERR(trans); |
357 | } | 357 | } |
358 | 358 | ||
359 | btrfs_dev_replace_lock(dev_replace); | 359 | btrfs_dev_replace_lock(dev_replace, 1); |
360 | switch (dev_replace->replace_state) { | 360 | switch (dev_replace->replace_state) { |
361 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | 361 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: |
362 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | 362 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: |
@@ -395,7 +395,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
395 | dev_replace->is_valid = 1; | 395 | dev_replace->is_valid = 1; |
396 | dev_replace->item_needs_writeback = 1; | 396 | dev_replace->item_needs_writeback = 1; |
397 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; | 397 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; |
398 | btrfs_dev_replace_unlock(dev_replace); | 398 | btrfs_dev_replace_unlock(dev_replace, 1); |
399 | 399 | ||
400 | ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device); | 400 | ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device); |
401 | if (ret) | 401 | if (ret) |
@@ -407,7 +407,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
407 | trans = btrfs_start_transaction(root, 0); | 407 | trans = btrfs_start_transaction(root, 0); |
408 | if (IS_ERR(trans)) { | 408 | if (IS_ERR(trans)) { |
409 | ret = PTR_ERR(trans); | 409 | ret = PTR_ERR(trans); |
410 | btrfs_dev_replace_lock(dev_replace); | 410 | btrfs_dev_replace_lock(dev_replace, 1); |
411 | goto leave; | 411 | goto leave; |
412 | } | 412 | } |
413 | 413 | ||
@@ -433,7 +433,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
433 | leave: | 433 | leave: |
434 | dev_replace->srcdev = NULL; | 434 | dev_replace->srcdev = NULL; |
435 | dev_replace->tgtdev = NULL; | 435 | dev_replace->tgtdev = NULL; |
436 | btrfs_dev_replace_unlock(dev_replace); | 436 | btrfs_dev_replace_unlock(dev_replace, 1); |
437 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | 437 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); |
438 | return ret; | 438 | return ret; |
439 | } | 439 | } |
@@ -471,18 +471,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
471 | /* don't allow cancel or unmount to disturb the finishing procedure */ | 471 | /* don't allow cancel or unmount to disturb the finishing procedure */ |
472 | mutex_lock(&dev_replace->lock_finishing_cancel_unmount); | 472 | mutex_lock(&dev_replace->lock_finishing_cancel_unmount); |
473 | 473 | ||
474 | btrfs_dev_replace_lock(dev_replace); | 474 | btrfs_dev_replace_lock(dev_replace, 0); |
475 | /* was the operation canceled, or is it finished? */ | 475 | /* was the operation canceled, or is it finished? */ |
476 | if (dev_replace->replace_state != | 476 | if (dev_replace->replace_state != |
477 | BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) { | 477 | BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) { |
478 | btrfs_dev_replace_unlock(dev_replace); | 478 | btrfs_dev_replace_unlock(dev_replace, 0); |
479 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 479 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
480 | return 0; | 480 | return 0; |
481 | } | 481 | } |
482 | 482 | ||
483 | tgt_device = dev_replace->tgtdev; | 483 | tgt_device = dev_replace->tgtdev; |
484 | src_device = dev_replace->srcdev; | 484 | src_device = dev_replace->srcdev; |
485 | btrfs_dev_replace_unlock(dev_replace); | 485 | btrfs_dev_replace_unlock(dev_replace, 0); |
486 | 486 | ||
487 | /* | 487 | /* |
488 | * flush all outstanding I/O and inode extent mappings before the | 488 | * flush all outstanding I/O and inode extent mappings before the |
@@ -507,7 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
507 | /* keep away write_all_supers() during the finishing procedure */ | 507 | /* keep away write_all_supers() during the finishing procedure */ |
508 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 508 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
509 | mutex_lock(&root->fs_info->chunk_mutex); | 509 | mutex_lock(&root->fs_info->chunk_mutex); |
510 | btrfs_dev_replace_lock(dev_replace); | 510 | btrfs_dev_replace_lock(dev_replace, 1); |
511 | dev_replace->replace_state = | 511 | dev_replace->replace_state = |
512 | scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED | 512 | scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED |
513 | : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; | 513 | : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; |
@@ -528,7 +528,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
528 | rcu_str_deref(src_device->name), | 528 | rcu_str_deref(src_device->name), |
529 | src_device->devid, | 529 | src_device->devid, |
530 | rcu_str_deref(tgt_device->name), scrub_ret); | 530 | rcu_str_deref(tgt_device->name), scrub_ret); |
531 | btrfs_dev_replace_unlock(dev_replace); | 531 | btrfs_dev_replace_unlock(dev_replace, 1); |
532 | mutex_unlock(&root->fs_info->chunk_mutex); | 532 | mutex_unlock(&root->fs_info->chunk_mutex); |
533 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 533 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
534 | mutex_unlock(&uuid_mutex); | 534 | mutex_unlock(&uuid_mutex); |
@@ -565,7 +565,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
565 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); | 565 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); |
566 | fs_info->fs_devices->rw_devices++; | 566 | fs_info->fs_devices->rw_devices++; |
567 | 567 | ||
568 | btrfs_dev_replace_unlock(dev_replace); | 568 | btrfs_dev_replace_unlock(dev_replace, 1); |
569 | 569 | ||
570 | btrfs_rm_dev_replace_blocked(fs_info); | 570 | btrfs_rm_dev_replace_blocked(fs_info); |
571 | 571 | ||
@@ -649,7 +649,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, | |||
649 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | 649 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; |
650 | struct btrfs_device *srcdev; | 650 | struct btrfs_device *srcdev; |
651 | 651 | ||
652 | btrfs_dev_replace_lock(dev_replace); | 652 | btrfs_dev_replace_lock(dev_replace, 0); |
653 | /* even if !dev_replace_is_valid, the values are good enough for | 653 | /* even if !dev_replace_is_valid, the values are good enough for |
654 | * the replace_status ioctl */ | 654 | * the replace_status ioctl */ |
655 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; | 655 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; |
@@ -675,7 +675,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, | |||
675 | div_u64(btrfs_device_get_total_bytes(srcdev), 1000)); | 675 | div_u64(btrfs_device_get_total_bytes(srcdev), 1000)); |
676 | break; | 676 | break; |
677 | } | 677 | } |
678 | btrfs_dev_replace_unlock(dev_replace); | 678 | btrfs_dev_replace_unlock(dev_replace, 0); |
679 | } | 679 | } |
680 | 680 | ||
681 | int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, | 681 | int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, |
@@ -698,13 +698,13 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) | |||
698 | return -EROFS; | 698 | return -EROFS; |
699 | 699 | ||
700 | mutex_lock(&dev_replace->lock_finishing_cancel_unmount); | 700 | mutex_lock(&dev_replace->lock_finishing_cancel_unmount); |
701 | btrfs_dev_replace_lock(dev_replace); | 701 | btrfs_dev_replace_lock(dev_replace, 1); |
702 | switch (dev_replace->replace_state) { | 702 | switch (dev_replace->replace_state) { |
703 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | 703 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: |
704 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | 704 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: |
705 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | 705 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: |
706 | result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED; | 706 | result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED; |
707 | btrfs_dev_replace_unlock(dev_replace); | 707 | btrfs_dev_replace_unlock(dev_replace, 1); |
708 | goto leave; | 708 | goto leave; |
709 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | 709 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: |
710 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | 710 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: |
@@ -717,7 +717,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) | |||
717 | dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; | 717 | dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; |
718 | dev_replace->time_stopped = get_seconds(); | 718 | dev_replace->time_stopped = get_seconds(); |
719 | dev_replace->item_needs_writeback = 1; | 719 | dev_replace->item_needs_writeback = 1; |
720 | btrfs_dev_replace_unlock(dev_replace); | 720 | btrfs_dev_replace_unlock(dev_replace, 1); |
721 | btrfs_scrub_cancel(fs_info); | 721 | btrfs_scrub_cancel(fs_info); |
722 | 722 | ||
723 | trans = btrfs_start_transaction(root, 0); | 723 | trans = btrfs_start_transaction(root, 0); |
@@ -740,7 +740,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info) | |||
740 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | 740 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; |
741 | 741 | ||
742 | mutex_lock(&dev_replace->lock_finishing_cancel_unmount); | 742 | mutex_lock(&dev_replace->lock_finishing_cancel_unmount); |
743 | btrfs_dev_replace_lock(dev_replace); | 743 | btrfs_dev_replace_lock(dev_replace, 1); |
744 | switch (dev_replace->replace_state) { | 744 | switch (dev_replace->replace_state) { |
745 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | 745 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: |
746 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | 746 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: |
@@ -756,7 +756,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info) | |||
756 | break; | 756 | break; |
757 | } | 757 | } |
758 | 758 | ||
759 | btrfs_dev_replace_unlock(dev_replace); | 759 | btrfs_dev_replace_unlock(dev_replace, 1); |
760 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 760 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
761 | } | 761 | } |
762 | 762 | ||
@@ -766,12 +766,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info) | |||
766 | struct task_struct *task; | 766 | struct task_struct *task; |
767 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | 767 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; |
768 | 768 | ||
769 | btrfs_dev_replace_lock(dev_replace); | 769 | btrfs_dev_replace_lock(dev_replace, 1); |
770 | switch (dev_replace->replace_state) { | 770 | switch (dev_replace->replace_state) { |
771 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | 771 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: |
772 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | 772 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: |
773 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | 773 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: |
774 | btrfs_dev_replace_unlock(dev_replace); | 774 | btrfs_dev_replace_unlock(dev_replace, 1); |
775 | return 0; | 775 | return 0; |
776 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | 776 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: |
777 | break; | 777 | break; |
@@ -784,10 +784,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info) | |||
784 | btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing"); | 784 | btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing"); |
785 | btrfs_info(fs_info, | 785 | btrfs_info(fs_info, |
786 | "you may cancel the operation after 'mount -o degraded'"); | 786 | "you may cancel the operation after 'mount -o degraded'"); |
787 | btrfs_dev_replace_unlock(dev_replace); | 787 | btrfs_dev_replace_unlock(dev_replace, 1); |
788 | return 0; | 788 | return 0; |
789 | } | 789 | } |
790 | btrfs_dev_replace_unlock(dev_replace); | 790 | btrfs_dev_replace_unlock(dev_replace, 1); |
791 | 791 | ||
792 | WARN_ON(atomic_xchg( | 792 | WARN_ON(atomic_xchg( |
793 | &fs_info->mutually_exclusive_operation_running, 1)); | 793 | &fs_info->mutually_exclusive_operation_running, 1)); |
@@ -802,7 +802,7 @@ static int btrfs_dev_replace_kthread(void *data) | |||
802 | struct btrfs_ioctl_dev_replace_args *status_args; | 802 | struct btrfs_ioctl_dev_replace_args *status_args; |
803 | u64 progress; | 803 | u64 progress; |
804 | 804 | ||
805 | status_args = kzalloc(sizeof(*status_args), GFP_NOFS); | 805 | status_args = kzalloc(sizeof(*status_args), GFP_KERNEL); |
806 | if (status_args) { | 806 | if (status_args) { |
807 | btrfs_dev_replace_status(fs_info, status_args); | 807 | btrfs_dev_replace_status(fs_info, status_args); |
808 | progress = status_args->status.progress_1000; | 808 | progress = status_args->status.progress_1000; |
@@ -858,55 +858,65 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace) | |||
858 | * not called and the the filesystem is remounted | 858 | * not called and the the filesystem is remounted |
859 | * in degraded state. This does not stop the | 859 | * in degraded state. This does not stop the |
860 | * dev_replace procedure. It needs to be canceled | 860 | * dev_replace procedure. It needs to be canceled |
861 | * manually if the cancelation is wanted. | 861 | * manually if the cancellation is wanted. |
862 | */ | 862 | */ |
863 | break; | 863 | break; |
864 | } | 864 | } |
865 | return 1; | 865 | return 1; |
866 | } | 866 | } |
867 | 867 | ||
868 | void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace) | 868 | void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw) |
869 | { | 869 | { |
870 | /* the beginning is just an optimization for the typical case */ | 870 | if (rw == 1) { |
871 | if (atomic_read(&dev_replace->nesting_level) == 0) { | 871 | /* write */ |
872 | acquire_lock: | 872 | again: |
873 | /* this is not a nested case where the same thread | 873 | wait_event(dev_replace->read_lock_wq, |
874 | * is trying to acqurire the same lock twice */ | 874 | atomic_read(&dev_replace->blocking_readers) == 0); |
875 | mutex_lock(&dev_replace->lock); | 875 | write_lock(&dev_replace->lock); |
876 | mutex_lock(&dev_replace->lock_management_lock); | 876 | if (atomic_read(&dev_replace->blocking_readers)) { |
877 | dev_replace->lock_owner = current->pid; | 877 | write_unlock(&dev_replace->lock); |
878 | atomic_inc(&dev_replace->nesting_level); | 878 | goto again; |
879 | mutex_unlock(&dev_replace->lock_management_lock); | 879 | } |
880 | return; | 880 | } else { |
881 | read_lock(&dev_replace->lock); | ||
882 | atomic_inc(&dev_replace->read_locks); | ||
881 | } | 883 | } |
884 | } | ||
882 | 885 | ||
883 | mutex_lock(&dev_replace->lock_management_lock); | 886 | void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw) |
884 | if (atomic_read(&dev_replace->nesting_level) > 0 && | 887 | { |
885 | dev_replace->lock_owner == current->pid) { | 888 | if (rw == 1) { |
886 | WARN_ON(!mutex_is_locked(&dev_replace->lock)); | 889 | /* write */ |
887 | atomic_inc(&dev_replace->nesting_level); | 890 | ASSERT(atomic_read(&dev_replace->blocking_readers) == 0); |
888 | mutex_unlock(&dev_replace->lock_management_lock); | 891 | write_unlock(&dev_replace->lock); |
889 | return; | 892 | } else { |
893 | ASSERT(atomic_read(&dev_replace->read_locks) > 0); | ||
894 | atomic_dec(&dev_replace->read_locks); | ||
895 | read_unlock(&dev_replace->lock); | ||
890 | } | 896 | } |
897 | } | ||
891 | 898 | ||
892 | mutex_unlock(&dev_replace->lock_management_lock); | 899 | /* inc blocking cnt and release read lock */ |
893 | goto acquire_lock; | 900 | void btrfs_dev_replace_set_lock_blocking( |
901 | struct btrfs_dev_replace *dev_replace) | ||
902 | { | ||
903 | /* only set blocking for read lock */ | ||
904 | ASSERT(atomic_read(&dev_replace->read_locks) > 0); | ||
905 | atomic_inc(&dev_replace->blocking_readers); | ||
906 | read_unlock(&dev_replace->lock); | ||
894 | } | 907 | } |
895 | 908 | ||
896 | void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace) | 909 | /* acquire read lock and dec blocking cnt */ |
910 | void btrfs_dev_replace_clear_lock_blocking( | ||
911 | struct btrfs_dev_replace *dev_replace) | ||
897 | { | 912 | { |
898 | WARN_ON(!mutex_is_locked(&dev_replace->lock)); | 913 | /* only set blocking for read lock */ |
899 | mutex_lock(&dev_replace->lock_management_lock); | 914 | ASSERT(atomic_read(&dev_replace->read_locks) > 0); |
900 | WARN_ON(atomic_read(&dev_replace->nesting_level) < 1); | 915 | ASSERT(atomic_read(&dev_replace->blocking_readers) > 0); |
901 | WARN_ON(dev_replace->lock_owner != current->pid); | 916 | read_lock(&dev_replace->lock); |
902 | atomic_dec(&dev_replace->nesting_level); | 917 | if (atomic_dec_and_test(&dev_replace->blocking_readers) && |
903 | if (atomic_read(&dev_replace->nesting_level) == 0) { | 918 | waitqueue_active(&dev_replace->read_lock_wq)) |
904 | dev_replace->lock_owner = 0; | 919 | wake_up(&dev_replace->read_lock_wq); |
905 | mutex_unlock(&dev_replace->lock_management_lock); | ||
906 | mutex_unlock(&dev_replace->lock); | ||
907 | } else { | ||
908 | mutex_unlock(&dev_replace->lock_management_lock); | ||
909 | } | ||
910 | } | 920 | } |
911 | 921 | ||
912 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) | 922 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) |
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h index 20035cbbf021..29e3ef5f96bd 100644 --- a/fs/btrfs/dev-replace.h +++ b/fs/btrfs/dev-replace.h | |||
@@ -34,8 +34,11 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, | |||
34 | void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info); | 34 | void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info); |
35 | int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info); | 35 | int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info); |
36 | int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace); | 36 | int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace); |
37 | void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace); | 37 | void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw); |
38 | void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace); | 38 | void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw); |
39 | void btrfs_dev_replace_set_lock_blocking(struct btrfs_dev_replace *dev_replace); | ||
40 | void btrfs_dev_replace_clear_lock_blocking( | ||
41 | struct btrfs_dev_replace *dev_replace); | ||
39 | 42 | ||
40 | static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value) | 43 | static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value) |
41 | { | 44 | { |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5699bbc23feb..4b02591b0301 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include "raid56.h" | 50 | #include "raid56.h" |
51 | #include "sysfs.h" | 51 | #include "sysfs.h" |
52 | #include "qgroup.h" | 52 | #include "qgroup.h" |
53 | #include "compression.h" | ||
53 | 54 | ||
54 | #ifdef CONFIG_X86 | 55 | #ifdef CONFIG_X86 |
55 | #include <asm/cpufeature.h> | 56 | #include <asm/cpufeature.h> |
@@ -110,8 +111,7 @@ int __init btrfs_end_io_wq_init(void) | |||
110 | 111 | ||
111 | void btrfs_end_io_wq_exit(void) | 112 | void btrfs_end_io_wq_exit(void) |
112 | { | 113 | { |
113 | if (btrfs_end_io_wq_cache) | 114 | kmem_cache_destroy(btrfs_end_io_wq_cache); |
114 | kmem_cache_destroy(btrfs_end_io_wq_cache); | ||
115 | } | 115 | } |
116 | 116 | ||
117 | /* | 117 | /* |
@@ -612,6 +612,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
612 | int found_level; | 612 | int found_level; |
613 | struct extent_buffer *eb; | 613 | struct extent_buffer *eb; |
614 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | 614 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; |
615 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
615 | int ret = 0; | 616 | int ret = 0; |
616 | int reads_done; | 617 | int reads_done; |
617 | 618 | ||
@@ -637,21 +638,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
637 | 638 | ||
638 | found_start = btrfs_header_bytenr(eb); | 639 | found_start = btrfs_header_bytenr(eb); |
639 | if (found_start != eb->start) { | 640 | if (found_start != eb->start) { |
640 | btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu", | 641 | btrfs_err_rl(fs_info, "bad tree block start %llu %llu", |
641 | found_start, eb->start); | 642 | found_start, eb->start); |
642 | ret = -EIO; | 643 | ret = -EIO; |
643 | goto err; | 644 | goto err; |
644 | } | 645 | } |
645 | if (check_tree_block_fsid(root->fs_info, eb)) { | 646 | if (check_tree_block_fsid(fs_info, eb)) { |
646 | btrfs_err_rl(eb->fs_info, "bad fsid on block %llu", | 647 | btrfs_err_rl(fs_info, "bad fsid on block %llu", |
647 | eb->start); | 648 | eb->start); |
648 | ret = -EIO; | 649 | ret = -EIO; |
649 | goto err; | 650 | goto err; |
650 | } | 651 | } |
651 | found_level = btrfs_header_level(eb); | 652 | found_level = btrfs_header_level(eb); |
652 | if (found_level >= BTRFS_MAX_LEVEL) { | 653 | if (found_level >= BTRFS_MAX_LEVEL) { |
653 | btrfs_err(root->fs_info, "bad tree block level %d", | 654 | btrfs_err(fs_info, "bad tree block level %d", |
654 | (int)btrfs_header_level(eb)); | 655 | (int)btrfs_header_level(eb)); |
655 | ret = -EIO; | 656 | ret = -EIO; |
656 | goto err; | 657 | goto err; |
657 | } | 658 | } |
@@ -659,7 +660,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
659 | btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), | 660 | btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), |
660 | eb, found_level); | 661 | eb, found_level); |
661 | 662 | ||
662 | ret = csum_tree_block(root->fs_info, eb, 1); | 663 | ret = csum_tree_block(fs_info, eb, 1); |
663 | if (ret) { | 664 | if (ret) { |
664 | ret = -EIO; | 665 | ret = -EIO; |
665 | goto err; | 666 | goto err; |
@@ -680,7 +681,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
680 | err: | 681 | err: |
681 | if (reads_done && | 682 | if (reads_done && |
682 | test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) | 683 | test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) |
683 | btree_readahead_hook(root, eb, eb->start, ret); | 684 | btree_readahead_hook(fs_info, eb, eb->start, ret); |
684 | 685 | ||
685 | if (ret) { | 686 | if (ret) { |
686 | /* | 687 | /* |
@@ -699,14 +700,13 @@ out: | |||
699 | static int btree_io_failed_hook(struct page *page, int failed_mirror) | 700 | static int btree_io_failed_hook(struct page *page, int failed_mirror) |
700 | { | 701 | { |
701 | struct extent_buffer *eb; | 702 | struct extent_buffer *eb; |
702 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
703 | 703 | ||
704 | eb = (struct extent_buffer *)page->private; | 704 | eb = (struct extent_buffer *)page->private; |
705 | set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); | 705 | set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); |
706 | eb->read_mirror = failed_mirror; | 706 | eb->read_mirror = failed_mirror; |
707 | atomic_dec(&eb->io_pages); | 707 | atomic_dec(&eb->io_pages); |
708 | if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) | 708 | if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) |
709 | btree_readahead_hook(root, eb, eb->start, -EIO); | 709 | btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO); |
710 | return -EIO; /* we fixed nothing */ | 710 | return -EIO; /* we fixed nothing */ |
711 | } | 711 | } |
712 | 712 | ||
@@ -816,7 +816,7 @@ static void run_one_async_done(struct btrfs_work *work) | |||
816 | waitqueue_active(&fs_info->async_submit_wait)) | 816 | waitqueue_active(&fs_info->async_submit_wait)) |
817 | wake_up(&fs_info->async_submit_wait); | 817 | wake_up(&fs_info->async_submit_wait); |
818 | 818 | ||
819 | /* If an error occured we just want to clean up the bio and move on */ | 819 | /* If an error occurred we just want to clean up the bio and move on */ |
820 | if (async->error) { | 820 | if (async->error) { |
821 | async->bio->bi_error = async->error; | 821 | async->bio->bi_error = async->error; |
822 | bio_endio(async->bio); | 822 | bio_endio(async->bio); |
@@ -1296,9 +1296,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, | |||
1296 | spin_lock_init(&root->root_item_lock); | 1296 | spin_lock_init(&root->root_item_lock); |
1297 | } | 1297 | } |
1298 | 1298 | ||
1299 | static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) | 1299 | static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info, |
1300 | gfp_t flags) | ||
1300 | { | 1301 | { |
1301 | struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); | 1302 | struct btrfs_root *root = kzalloc(sizeof(*root), flags); |
1302 | if (root) | 1303 | if (root) |
1303 | root->fs_info = fs_info; | 1304 | root->fs_info = fs_info; |
1304 | return root; | 1305 | return root; |
@@ -1310,7 +1311,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void) | |||
1310 | { | 1311 | { |
1311 | struct btrfs_root *root; | 1312 | struct btrfs_root *root; |
1312 | 1313 | ||
1313 | root = btrfs_alloc_root(NULL); | 1314 | root = btrfs_alloc_root(NULL, GFP_KERNEL); |
1314 | if (!root) | 1315 | if (!root) |
1315 | return ERR_PTR(-ENOMEM); | 1316 | return ERR_PTR(-ENOMEM); |
1316 | __setup_root(4096, 4096, 4096, root, NULL, 1); | 1317 | __setup_root(4096, 4096, 4096, root, NULL, 1); |
@@ -1332,7 +1333,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
1332 | int ret = 0; | 1333 | int ret = 0; |
1333 | uuid_le uuid; | 1334 | uuid_le uuid; |
1334 | 1335 | ||
1335 | root = btrfs_alloc_root(fs_info); | 1336 | root = btrfs_alloc_root(fs_info, GFP_KERNEL); |
1336 | if (!root) | 1337 | if (!root) |
1337 | return ERR_PTR(-ENOMEM); | 1338 | return ERR_PTR(-ENOMEM); |
1338 | 1339 | ||
@@ -1408,7 +1409,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
1408 | struct btrfs_root *tree_root = fs_info->tree_root; | 1409 | struct btrfs_root *tree_root = fs_info->tree_root; |
1409 | struct extent_buffer *leaf; | 1410 | struct extent_buffer *leaf; |
1410 | 1411 | ||
1411 | root = btrfs_alloc_root(fs_info); | 1412 | root = btrfs_alloc_root(fs_info, GFP_NOFS); |
1412 | if (!root) | 1413 | if (!root) |
1413 | return ERR_PTR(-ENOMEM); | 1414 | return ERR_PTR(-ENOMEM); |
1414 | 1415 | ||
@@ -1506,7 +1507,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, | |||
1506 | if (!path) | 1507 | if (!path) |
1507 | return ERR_PTR(-ENOMEM); | 1508 | return ERR_PTR(-ENOMEM); |
1508 | 1509 | ||
1509 | root = btrfs_alloc_root(fs_info); | 1510 | root = btrfs_alloc_root(fs_info, GFP_NOFS); |
1510 | if (!root) { | 1511 | if (!root) { |
1511 | ret = -ENOMEM; | 1512 | ret = -ENOMEM; |
1512 | goto alloc_fail; | 1513 | goto alloc_fail; |
@@ -2272,9 +2273,11 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) | |||
2272 | fs_info->dev_replace.lock_owner = 0; | 2273 | fs_info->dev_replace.lock_owner = 0; |
2273 | atomic_set(&fs_info->dev_replace.nesting_level, 0); | 2274 | atomic_set(&fs_info->dev_replace.nesting_level, 0); |
2274 | mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); | 2275 | mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); |
2275 | mutex_init(&fs_info->dev_replace.lock_management_lock); | 2276 | rwlock_init(&fs_info->dev_replace.lock); |
2276 | mutex_init(&fs_info->dev_replace.lock); | 2277 | atomic_set(&fs_info->dev_replace.read_locks, 0); |
2278 | atomic_set(&fs_info->dev_replace.blocking_readers, 0); | ||
2277 | init_waitqueue_head(&fs_info->replace_wait); | 2279 | init_waitqueue_head(&fs_info->replace_wait); |
2280 | init_waitqueue_head(&fs_info->dev_replace.read_lock_wq); | ||
2278 | } | 2281 | } |
2279 | 2282 | ||
2280 | static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) | 2283 | static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) |
@@ -2385,7 +2388,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, | |||
2385 | return -EIO; | 2388 | return -EIO; |
2386 | } | 2389 | } |
2387 | 2390 | ||
2388 | log_tree_root = btrfs_alloc_root(fs_info); | 2391 | log_tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); |
2389 | if (!log_tree_root) | 2392 | if (!log_tree_root) |
2390 | return -ENOMEM; | 2393 | return -ENOMEM; |
2391 | 2394 | ||
@@ -2510,8 +2513,8 @@ int open_ctree(struct super_block *sb, | |||
2510 | int backup_index = 0; | 2513 | int backup_index = 0; |
2511 | int max_active; | 2514 | int max_active; |
2512 | 2515 | ||
2513 | tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); | 2516 | tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); |
2514 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); | 2517 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); |
2515 | if (!tree_root || !chunk_root) { | 2518 | if (!tree_root || !chunk_root) { |
2516 | err = -ENOMEM; | 2519 | err = -ENOMEM; |
2517 | goto fail; | 2520 | goto fail; |
@@ -2603,6 +2606,7 @@ int open_ctree(struct super_block *sb, | |||
2603 | atomic_set(&fs_info->nr_async_bios, 0); | 2606 | atomic_set(&fs_info->nr_async_bios, 0); |
2604 | atomic_set(&fs_info->defrag_running, 0); | 2607 | atomic_set(&fs_info->defrag_running, 0); |
2605 | atomic_set(&fs_info->qgroup_op_seq, 0); | 2608 | atomic_set(&fs_info->qgroup_op_seq, 0); |
2609 | atomic_set(&fs_info->reada_works_cnt, 0); | ||
2606 | atomic64_set(&fs_info->tree_mod_seq, 0); | 2610 | atomic64_set(&fs_info->tree_mod_seq, 0); |
2607 | fs_info->sb = sb; | 2611 | fs_info->sb = sb; |
2608 | fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; | 2612 | fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; |
@@ -2622,7 +2626,7 @@ int open_ctree(struct super_block *sb, | |||
2622 | INIT_LIST_HEAD(&fs_info->ordered_roots); | 2626 | INIT_LIST_HEAD(&fs_info->ordered_roots); |
2623 | spin_lock_init(&fs_info->ordered_root_lock); | 2627 | spin_lock_init(&fs_info->ordered_root_lock); |
2624 | fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), | 2628 | fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), |
2625 | GFP_NOFS); | 2629 | GFP_KERNEL); |
2626 | if (!fs_info->delayed_root) { | 2630 | if (!fs_info->delayed_root) { |
2627 | err = -ENOMEM; | 2631 | err = -ENOMEM; |
2628 | goto fail_iput; | 2632 | goto fail_iput; |
@@ -2750,7 +2754,7 @@ int open_ctree(struct super_block *sb, | |||
2750 | */ | 2754 | */ |
2751 | fs_info->compress_type = BTRFS_COMPRESS_ZLIB; | 2755 | fs_info->compress_type = BTRFS_COMPRESS_ZLIB; |
2752 | 2756 | ||
2753 | ret = btrfs_parse_options(tree_root, options); | 2757 | ret = btrfs_parse_options(tree_root, options, sb->s_flags); |
2754 | if (ret) { | 2758 | if (ret) { |
2755 | err = ret; | 2759 | err = ret; |
2756 | goto fail_alloc; | 2760 | goto fail_alloc; |
@@ -3029,8 +3033,9 @@ retry_root_backup: | |||
3029 | if (ret) | 3033 | if (ret) |
3030 | goto fail_trans_kthread; | 3034 | goto fail_trans_kthread; |
3031 | 3035 | ||
3032 | /* do not make disk changes in broken FS */ | 3036 | /* do not make disk changes in broken FS or nologreplay is given */ |
3033 | if (btrfs_super_log_root(disk_super) != 0) { | 3037 | if (btrfs_super_log_root(disk_super) != 0 && |
3038 | !btrfs_test_opt(tree_root, NOLOGREPLAY)) { | ||
3034 | ret = btrfs_replay_log(fs_info, fs_devices); | 3039 | ret = btrfs_replay_log(fs_info, fs_devices); |
3035 | if (ret) { | 3040 | if (ret) { |
3036 | err = ret; | 3041 | err = ret; |
@@ -3146,6 +3151,12 @@ retry_root_backup: | |||
3146 | 3151 | ||
3147 | fs_info->open = 1; | 3152 | fs_info->open = 1; |
3148 | 3153 | ||
3154 | /* | ||
3155 | * backuproot only affect mount behavior, and if open_ctree succeeded, | ||
3156 | * no need to keep the flag | ||
3157 | */ | ||
3158 | btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT); | ||
3159 | |||
3149 | return 0; | 3160 | return 0; |
3150 | 3161 | ||
3151 | fail_qgroup: | 3162 | fail_qgroup: |
@@ -3200,7 +3211,7 @@ fail: | |||
3200 | return err; | 3211 | return err; |
3201 | 3212 | ||
3202 | recovery_tree_root: | 3213 | recovery_tree_root: |
3203 | if (!btrfs_test_opt(tree_root, RECOVERY)) | 3214 | if (!btrfs_test_opt(tree_root, USEBACKUPROOT)) |
3204 | goto fail_tree_roots; | 3215 | goto fail_tree_roots; |
3205 | 3216 | ||
3206 | free_root_pointers(fs_info, 0); | 3217 | free_root_pointers(fs_info, 0); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e2287c7c10be..53e12977bfd0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -4838,7 +4838,7 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, | |||
4838 | u64 thresh = div_factor_fine(space_info->total_bytes, 98); | 4838 | u64 thresh = div_factor_fine(space_info->total_bytes, 98); |
4839 | 4839 | ||
4840 | /* If we're just plain full then async reclaim just slows us down. */ | 4840 | /* If we're just plain full then async reclaim just slows us down. */ |
4841 | if (space_info->bytes_used >= thresh) | 4841 | if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) |
4842 | return 0; | 4842 | return 0; |
4843 | 4843 | ||
4844 | return (used >= thresh && !btrfs_fs_closing(fs_info) && | 4844 | return (used >= thresh && !btrfs_fs_closing(fs_info) && |
@@ -5373,27 +5373,33 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
5373 | 5373 | ||
5374 | block_rsv->size = min_t(u64, num_bytes, SZ_512M); | 5374 | block_rsv->size = min_t(u64, num_bytes, SZ_512M); |
5375 | 5375 | ||
5376 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + | 5376 | if (block_rsv->reserved < block_rsv->size) { |
5377 | sinfo->bytes_reserved + sinfo->bytes_readonly + | 5377 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + |
5378 | sinfo->bytes_may_use; | 5378 | sinfo->bytes_reserved + sinfo->bytes_readonly + |
5379 | 5379 | sinfo->bytes_may_use; | |
5380 | if (sinfo->total_bytes > num_bytes) { | 5380 | if (sinfo->total_bytes > num_bytes) { |
5381 | num_bytes = sinfo->total_bytes - num_bytes; | 5381 | num_bytes = sinfo->total_bytes - num_bytes; |
5382 | block_rsv->reserved += num_bytes; | 5382 | num_bytes = min(num_bytes, |
5383 | sinfo->bytes_may_use += num_bytes; | 5383 | block_rsv->size - block_rsv->reserved); |
5384 | trace_btrfs_space_reservation(fs_info, "space_info", | 5384 | block_rsv->reserved += num_bytes; |
5385 | sinfo->flags, num_bytes, 1); | 5385 | sinfo->bytes_may_use += num_bytes; |
5386 | } | 5386 | trace_btrfs_space_reservation(fs_info, "space_info", |
5387 | 5387 | sinfo->flags, num_bytes, | |
5388 | if (block_rsv->reserved >= block_rsv->size) { | 5388 | 1); |
5389 | } | ||
5390 | } else if (block_rsv->reserved > block_rsv->size) { | ||
5389 | num_bytes = block_rsv->reserved - block_rsv->size; | 5391 | num_bytes = block_rsv->reserved - block_rsv->size; |
5390 | sinfo->bytes_may_use -= num_bytes; | 5392 | sinfo->bytes_may_use -= num_bytes; |
5391 | trace_btrfs_space_reservation(fs_info, "space_info", | 5393 | trace_btrfs_space_reservation(fs_info, "space_info", |
5392 | sinfo->flags, num_bytes, 0); | 5394 | sinfo->flags, num_bytes, 0); |
5393 | block_rsv->reserved = block_rsv->size; | 5395 | block_rsv->reserved = block_rsv->size; |
5394 | block_rsv->full = 1; | ||
5395 | } | 5396 | } |
5396 | 5397 | ||
5398 | if (block_rsv->reserved == block_rsv->size) | ||
5399 | block_rsv->full = 1; | ||
5400 | else | ||
5401 | block_rsv->full = 0; | ||
5402 | |||
5397 | spin_unlock(&block_rsv->lock); | 5403 | spin_unlock(&block_rsv->lock); |
5398 | spin_unlock(&sinfo->lock); | 5404 | spin_unlock(&sinfo->lock); |
5399 | } | 5405 | } |
@@ -5752,7 +5758,7 @@ out_fail: | |||
5752 | 5758 | ||
5753 | /* | 5759 | /* |
5754 | * This is tricky, but first we need to figure out how much we | 5760 | * This is tricky, but first we need to figure out how much we |
5755 | * free'd from any free-ers that occured during this | 5761 | * free'd from any free-ers that occurred during this |
5756 | * reservation, so we reset ->csum_bytes to the csum_bytes | 5762 | * reservation, so we reset ->csum_bytes to the csum_bytes |
5757 | * before we dropped our lock, and then call the free for the | 5763 | * before we dropped our lock, and then call the free for the |
5758 | * number of bytes that were freed while we were trying our | 5764 | * number of bytes that were freed while we were trying our |
@@ -7018,7 +7024,7 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group, | |||
7018 | struct btrfs_free_cluster *cluster, | 7024 | struct btrfs_free_cluster *cluster, |
7019 | int delalloc) | 7025 | int delalloc) |
7020 | { | 7026 | { |
7021 | struct btrfs_block_group_cache *used_bg; | 7027 | struct btrfs_block_group_cache *used_bg = NULL; |
7022 | bool locked = false; | 7028 | bool locked = false; |
7023 | again: | 7029 | again: |
7024 | spin_lock(&cluster->refill_lock); | 7030 | spin_lock(&cluster->refill_lock); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 392592dc7010..76a0c8597d98 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -206,10 +206,8 @@ void extent_io_exit(void) | |||
206 | * destroy caches. | 206 | * destroy caches. |
207 | */ | 207 | */ |
208 | rcu_barrier(); | 208 | rcu_barrier(); |
209 | if (extent_state_cache) | 209 | kmem_cache_destroy(extent_state_cache); |
210 | kmem_cache_destroy(extent_state_cache); | 210 | kmem_cache_destroy(extent_buffer_cache); |
211 | if (extent_buffer_cache) | ||
212 | kmem_cache_destroy(extent_buffer_cache); | ||
213 | if (btrfs_bioset) | 211 | if (btrfs_bioset) |
214 | bioset_free(btrfs_bioset); | 212 | bioset_free(btrfs_bioset); |
215 | } | 213 | } |
@@ -232,7 +230,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) | |||
232 | if (!state) | 230 | if (!state) |
233 | return state; | 231 | return state; |
234 | state->state = 0; | 232 | state->state = 0; |
235 | state->private = 0; | 233 | state->failrec = NULL; |
236 | RB_CLEAR_NODE(&state->rb_node); | 234 | RB_CLEAR_NODE(&state->rb_node); |
237 | btrfs_leak_debug_add(&state->leak_list, &states); | 235 | btrfs_leak_debug_add(&state->leak_list, &states); |
238 | atomic_set(&state->refs, 1); | 236 | atomic_set(&state->refs, 1); |
@@ -1844,7 +1842,8 @@ out: | |||
1844 | * set the private field for a given byte offset in the tree. If there isn't | 1842 | * set the private field for a given byte offset in the tree. If there isn't |
1845 | * an extent_state there already, this does nothing. | 1843 | * an extent_state there already, this does nothing. |
1846 | */ | 1844 | */ |
1847 | static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) | 1845 | static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start, |
1846 | struct io_failure_record *failrec) | ||
1848 | { | 1847 | { |
1849 | struct rb_node *node; | 1848 | struct rb_node *node; |
1850 | struct extent_state *state; | 1849 | struct extent_state *state; |
@@ -1865,13 +1864,14 @@ static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private | |||
1865 | ret = -ENOENT; | 1864 | ret = -ENOENT; |
1866 | goto out; | 1865 | goto out; |
1867 | } | 1866 | } |
1868 | state->private = private; | 1867 | state->failrec = failrec; |
1869 | out: | 1868 | out: |
1870 | spin_unlock(&tree->lock); | 1869 | spin_unlock(&tree->lock); |
1871 | return ret; | 1870 | return ret; |
1872 | } | 1871 | } |
1873 | 1872 | ||
1874 | int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) | 1873 | static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start, |
1874 | struct io_failure_record **failrec) | ||
1875 | { | 1875 | { |
1876 | struct rb_node *node; | 1876 | struct rb_node *node; |
1877 | struct extent_state *state; | 1877 | struct extent_state *state; |
@@ -1892,7 +1892,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) | |||
1892 | ret = -ENOENT; | 1892 | ret = -ENOENT; |
1893 | goto out; | 1893 | goto out; |
1894 | } | 1894 | } |
1895 | *private = state->private; | 1895 | *failrec = state->failrec; |
1896 | out: | 1896 | out: |
1897 | spin_unlock(&tree->lock); | 1897 | spin_unlock(&tree->lock); |
1898 | return ret; | 1898 | return ret; |
@@ -1972,7 +1972,7 @@ int free_io_failure(struct inode *inode, struct io_failure_record *rec) | |||
1972 | int err = 0; | 1972 | int err = 0; |
1973 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | 1973 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; |
1974 | 1974 | ||
1975 | set_state_private(failure_tree, rec->start, 0); | 1975 | set_state_failrec(failure_tree, rec->start, NULL); |
1976 | ret = clear_extent_bits(failure_tree, rec->start, | 1976 | ret = clear_extent_bits(failure_tree, rec->start, |
1977 | rec->start + rec->len - 1, | 1977 | rec->start + rec->len - 1, |
1978 | EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); | 1978 | EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); |
@@ -2089,7 +2089,6 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page, | |||
2089 | unsigned int pg_offset) | 2089 | unsigned int pg_offset) |
2090 | { | 2090 | { |
2091 | u64 private; | 2091 | u64 private; |
2092 | u64 private_failure; | ||
2093 | struct io_failure_record *failrec; | 2092 | struct io_failure_record *failrec; |
2094 | struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; | 2093 | struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; |
2095 | struct extent_state *state; | 2094 | struct extent_state *state; |
@@ -2102,12 +2101,11 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page, | |||
2102 | if (!ret) | 2101 | if (!ret) |
2103 | return 0; | 2102 | return 0; |
2104 | 2103 | ||
2105 | ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start, | 2104 | ret = get_state_failrec(&BTRFS_I(inode)->io_failure_tree, start, |
2106 | &private_failure); | 2105 | &failrec); |
2107 | if (ret) | 2106 | if (ret) |
2108 | return 0; | 2107 | return 0; |
2109 | 2108 | ||
2110 | failrec = (struct io_failure_record *)(unsigned long) private_failure; | ||
2111 | BUG_ON(!failrec->this_mirror); | 2109 | BUG_ON(!failrec->this_mirror); |
2112 | 2110 | ||
2113 | if (failrec->in_validation) { | 2111 | if (failrec->in_validation) { |
@@ -2167,7 +2165,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end) | |||
2167 | 2165 | ||
2168 | next = next_state(state); | 2166 | next = next_state(state); |
2169 | 2167 | ||
2170 | failrec = (struct io_failure_record *)(unsigned long)state->private; | 2168 | failrec = state->failrec; |
2171 | free_extent_state(state); | 2169 | free_extent_state(state); |
2172 | kfree(failrec); | 2170 | kfree(failrec); |
2173 | 2171 | ||
@@ -2177,10 +2175,9 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end) | |||
2177 | } | 2175 | } |
2178 | 2176 | ||
2179 | int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, | 2177 | int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, |
2180 | struct io_failure_record **failrec_ret) | 2178 | struct io_failure_record **failrec_ret) |
2181 | { | 2179 | { |
2182 | struct io_failure_record *failrec; | 2180 | struct io_failure_record *failrec; |
2183 | u64 private; | ||
2184 | struct extent_map *em; | 2181 | struct extent_map *em; |
2185 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | 2182 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; |
2186 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | 2183 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
@@ -2188,7 +2185,7 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, | |||
2188 | int ret; | 2185 | int ret; |
2189 | u64 logical; | 2186 | u64 logical; |
2190 | 2187 | ||
2191 | ret = get_state_private(failure_tree, start, &private); | 2188 | ret = get_state_failrec(failure_tree, start, &failrec); |
2192 | if (ret) { | 2189 | if (ret) { |
2193 | failrec = kzalloc(sizeof(*failrec), GFP_NOFS); | 2190 | failrec = kzalloc(sizeof(*failrec), GFP_NOFS); |
2194 | if (!failrec) | 2191 | if (!failrec) |
@@ -2237,8 +2234,7 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, | |||
2237 | ret = set_extent_bits(failure_tree, start, end, | 2234 | ret = set_extent_bits(failure_tree, start, end, |
2238 | EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); | 2235 | EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); |
2239 | if (ret >= 0) | 2236 | if (ret >= 0) |
2240 | ret = set_state_private(failure_tree, start, | 2237 | ret = set_state_failrec(failure_tree, start, failrec); |
2241 | (u64)(unsigned long)failrec); | ||
2242 | /* set the bits in the inode's tree */ | 2238 | /* set the bits in the inode's tree */ |
2243 | if (ret >= 0) | 2239 | if (ret >= 0) |
2244 | ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED, | 2240 | ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED, |
@@ -2248,7 +2244,6 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, | |||
2248 | return ret; | 2244 | return ret; |
2249 | } | 2245 | } |
2250 | } else { | 2246 | } else { |
2251 | failrec = (struct io_failure_record *)(unsigned long)private; | ||
2252 | pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n", | 2247 | pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n", |
2253 | failrec->logical, failrec->start, failrec->len, | 2248 | failrec->logical, failrec->start, failrec->len, |
2254 | failrec->in_validation); | 2249 | failrec->in_validation); |
@@ -3177,7 +3172,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
3177 | 3172 | ||
3178 | while (1) { | 3173 | while (1) { |
3179 | lock_extent(tree, start, end); | 3174 | lock_extent(tree, start, end); |
3180 | ordered = btrfs_lookup_ordered_extent(inode, start); | 3175 | ordered = btrfs_lookup_ordered_range(inode, start, |
3176 | PAGE_CACHE_SIZE); | ||
3181 | if (!ordered) | 3177 | if (!ordered) |
3182 | break; | 3178 | break; |
3183 | unlock_extent(tree, start, end); | 3179 | unlock_extent(tree, start, end); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 880d5292e972..5dbf92e68fbd 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -61,6 +61,7 @@ | |||
61 | struct extent_state; | 61 | struct extent_state; |
62 | struct btrfs_root; | 62 | struct btrfs_root; |
63 | struct btrfs_io_bio; | 63 | struct btrfs_io_bio; |
64 | struct io_failure_record; | ||
64 | 65 | ||
65 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, | 66 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, |
66 | struct bio *bio, int mirror_num, | 67 | struct bio *bio, int mirror_num, |
@@ -111,8 +112,7 @@ struct extent_state { | |||
111 | atomic_t refs; | 112 | atomic_t refs; |
112 | unsigned state; | 113 | unsigned state; |
113 | 114 | ||
114 | /* for use by the FS */ | 115 | struct io_failure_record *failrec; |
115 | u64 private; | ||
116 | 116 | ||
117 | #ifdef CONFIG_BTRFS_DEBUG | 117 | #ifdef CONFIG_BTRFS_DEBUG |
118 | struct list_head leak_list; | 118 | struct list_head leak_list; |
@@ -342,7 +342,6 @@ int extent_readpages(struct extent_io_tree *tree, | |||
342 | get_extent_t get_extent); | 342 | get_extent_t get_extent); |
343 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 343 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
344 | __u64 start, __u64 len, get_extent_t *get_extent); | 344 | __u64 start, __u64 len, get_extent_t *get_extent); |
345 | int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); | ||
346 | void set_page_extent_mapped(struct page *page); | 345 | void set_page_extent_mapped(struct page *page); |
347 | 346 | ||
348 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | 347 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 84fb56d5c018..318b048eb254 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/hardirq.h> | 4 | #include <linux/hardirq.h> |
5 | #include "ctree.h" | 5 | #include "ctree.h" |
6 | #include "extent_map.h" | 6 | #include "extent_map.h" |
7 | #include "compression.h" | ||
7 | 8 | ||
8 | 9 | ||
9 | static struct kmem_cache *extent_map_cache; | 10 | static struct kmem_cache *extent_map_cache; |
@@ -20,8 +21,7 @@ int __init extent_map_init(void) | |||
20 | 21 | ||
21 | void extent_map_exit(void) | 22 | void extent_map_exit(void) |
22 | { | 23 | { |
23 | if (extent_map_cache) | 24 | kmem_cache_destroy(extent_map_cache); |
24 | kmem_cache_destroy(extent_map_cache); | ||
25 | } | 25 | } |
26 | 26 | ||
27 | /** | 27 | /** |
@@ -62,7 +62,7 @@ struct extent_map *alloc_extent_map(void) | |||
62 | 62 | ||
63 | /** | 63 | /** |
64 | * free_extent_map - drop reference count of an extent_map | 64 | * free_extent_map - drop reference count of an extent_map |
65 | * @em: extent map beeing releasead | 65 | * @em: extent map being releasead |
66 | * | 66 | * |
67 | * Drops the reference out on @em by one and free the structure | 67 | * Drops the reference out on @em by one and free the structure |
68 | * if the reference count hits zero. | 68 | * if the reference count hits zero. |
@@ -422,7 +422,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | |||
422 | /** | 422 | /** |
423 | * remove_extent_mapping - removes an extent_map from the extent tree | 423 | * remove_extent_mapping - removes an extent_map from the extent tree |
424 | * @tree: extent tree to remove from | 424 | * @tree: extent tree to remove from |
425 | * @em: extent map beeing removed | 425 | * @em: extent map being removed |
426 | * | 426 | * |
427 | * Removes @em from @tree. No reference counts are dropped, and no checks | 427 | * Removes @em from @tree. No reference counts are dropped, and no checks |
428 | * are done to see if the range is in use | 428 | * are done to see if the range is in use |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a67e1c828d0f..b5baf5bdc8e1 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include "transaction.h" | 25 | #include "transaction.h" |
26 | #include "volumes.h" | 26 | #include "volumes.h" |
27 | #include "print-tree.h" | 27 | #include "print-tree.h" |
28 | #include "compression.h" | ||
28 | 29 | ||
29 | #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ | 30 | #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ |
30 | sizeof(struct btrfs_item) * 2) / \ | 31 | sizeof(struct btrfs_item) * 2) / \ |
@@ -172,6 +173,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
172 | u64 item_start_offset = 0; | 173 | u64 item_start_offset = 0; |
173 | u64 item_last_offset = 0; | 174 | u64 item_last_offset = 0; |
174 | u64 disk_bytenr; | 175 | u64 disk_bytenr; |
176 | u64 page_bytes_left; | ||
175 | u32 diff; | 177 | u32 diff; |
176 | int nblocks; | 178 | int nblocks; |
177 | int bio_index = 0; | 179 | int bio_index = 0; |
@@ -220,6 +222,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
220 | disk_bytenr = (u64)bio->bi_iter.bi_sector << 9; | 222 | disk_bytenr = (u64)bio->bi_iter.bi_sector << 9; |
221 | if (dio) | 223 | if (dio) |
222 | offset = logical_offset; | 224 | offset = logical_offset; |
225 | |||
226 | page_bytes_left = bvec->bv_len; | ||
223 | while (bio_index < bio->bi_vcnt) { | 227 | while (bio_index < bio->bi_vcnt) { |
224 | if (!dio) | 228 | if (!dio) |
225 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | 229 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; |
@@ -243,7 +247,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
243 | if (BTRFS_I(inode)->root->root_key.objectid == | 247 | if (BTRFS_I(inode)->root->root_key.objectid == |
244 | BTRFS_DATA_RELOC_TREE_OBJECTID) { | 248 | BTRFS_DATA_RELOC_TREE_OBJECTID) { |
245 | set_extent_bits(io_tree, offset, | 249 | set_extent_bits(io_tree, offset, |
246 | offset + bvec->bv_len - 1, | 250 | offset + root->sectorsize - 1, |
247 | EXTENT_NODATASUM, GFP_NOFS); | 251 | EXTENT_NODATASUM, GFP_NOFS); |
248 | } else { | 252 | } else { |
249 | btrfs_info(BTRFS_I(inode)->root->fs_info, | 253 | btrfs_info(BTRFS_I(inode)->root->fs_info, |
@@ -281,13 +285,29 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
281 | found: | 285 | found: |
282 | csum += count * csum_size; | 286 | csum += count * csum_size; |
283 | nblocks -= count; | 287 | nblocks -= count; |
284 | bio_index += count; | 288 | |
285 | while (count--) { | 289 | while (count--) { |
286 | disk_bytenr += bvec->bv_len; | 290 | disk_bytenr += root->sectorsize; |
287 | offset += bvec->bv_len; | 291 | offset += root->sectorsize; |
288 | bvec++; | 292 | page_bytes_left -= root->sectorsize; |
293 | if (!page_bytes_left) { | ||
294 | bio_index++; | ||
295 | /* | ||
296 | * make sure we're still inside the | ||
297 | * bio before we update page_bytes_left | ||
298 | */ | ||
299 | if (bio_index >= bio->bi_vcnt) { | ||
300 | WARN_ON_ONCE(count); | ||
301 | goto done; | ||
302 | } | ||
303 | bvec++; | ||
304 | page_bytes_left = bvec->bv_len; | ||
305 | } | ||
306 | |||
289 | } | 307 | } |
290 | } | 308 | } |
309 | |||
310 | done: | ||
291 | btrfs_free_path(path); | 311 | btrfs_free_path(path); |
292 | return 0; | 312 | return 0; |
293 | } | 313 | } |
@@ -432,6 +452,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | |||
432 | struct bio_vec *bvec = bio->bi_io_vec; | 452 | struct bio_vec *bvec = bio->bi_io_vec; |
433 | int bio_index = 0; | 453 | int bio_index = 0; |
434 | int index; | 454 | int index; |
455 | int nr_sectors; | ||
456 | int i; | ||
435 | unsigned long total_bytes = 0; | 457 | unsigned long total_bytes = 0; |
436 | unsigned long this_sum_bytes = 0; | 458 | unsigned long this_sum_bytes = 0; |
437 | u64 offset; | 459 | u64 offset; |
@@ -459,41 +481,56 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | |||
459 | if (!contig) | 481 | if (!contig) |
460 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | 482 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; |
461 | 483 | ||
462 | if (offset >= ordered->file_offset + ordered->len || | 484 | data = kmap_atomic(bvec->bv_page); |
463 | offset < ordered->file_offset) { | ||
464 | unsigned long bytes_left; | ||
465 | sums->len = this_sum_bytes; | ||
466 | this_sum_bytes = 0; | ||
467 | btrfs_add_ordered_sum(inode, ordered, sums); | ||
468 | btrfs_put_ordered_extent(ordered); | ||
469 | 485 | ||
470 | bytes_left = bio->bi_iter.bi_size - total_bytes; | 486 | nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, |
487 | bvec->bv_len + root->sectorsize | ||
488 | - 1); | ||
489 | |||
490 | for (i = 0; i < nr_sectors; i++) { | ||
491 | if (offset >= ordered->file_offset + ordered->len || | ||
492 | offset < ordered->file_offset) { | ||
493 | unsigned long bytes_left; | ||
494 | |||
495 | kunmap_atomic(data); | ||
496 | sums->len = this_sum_bytes; | ||
497 | this_sum_bytes = 0; | ||
498 | btrfs_add_ordered_sum(inode, ordered, sums); | ||
499 | btrfs_put_ordered_extent(ordered); | ||
500 | |||
501 | bytes_left = bio->bi_iter.bi_size - total_bytes; | ||
502 | |||
503 | sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), | ||
504 | GFP_NOFS); | ||
505 | BUG_ON(!sums); /* -ENOMEM */ | ||
506 | sums->len = bytes_left; | ||
507 | ordered = btrfs_lookup_ordered_extent(inode, | ||
508 | offset); | ||
509 | ASSERT(ordered); /* Logic error */ | ||
510 | sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) | ||
511 | + total_bytes; | ||
512 | index = 0; | ||
513 | |||
514 | data = kmap_atomic(bvec->bv_page); | ||
515 | } | ||
471 | 516 | ||
472 | sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), | 517 | sums->sums[index] = ~(u32)0; |
473 | GFP_NOFS); | 518 | sums->sums[index] |
474 | BUG_ON(!sums); /* -ENOMEM */ | 519 | = btrfs_csum_data(data + bvec->bv_offset |
475 | sums->len = bytes_left; | 520 | + (i * root->sectorsize), |
476 | ordered = btrfs_lookup_ordered_extent(inode, offset); | 521 | sums->sums[index], |
477 | BUG_ON(!ordered); /* Logic error */ | 522 | root->sectorsize); |
478 | sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) + | 523 | btrfs_csum_final(sums->sums[index], |
479 | total_bytes; | 524 | (char *)(sums->sums + index)); |
480 | index = 0; | 525 | index++; |
526 | offset += root->sectorsize; | ||
527 | this_sum_bytes += root->sectorsize; | ||
528 | total_bytes += root->sectorsize; | ||
481 | } | 529 | } |
482 | 530 | ||
483 | data = kmap_atomic(bvec->bv_page); | ||
484 | sums->sums[index] = ~(u32)0; | ||
485 | sums->sums[index] = btrfs_csum_data(data + bvec->bv_offset, | ||
486 | sums->sums[index], | ||
487 | bvec->bv_len); | ||
488 | kunmap_atomic(data); | 531 | kunmap_atomic(data); |
489 | btrfs_csum_final(sums->sums[index], | ||
490 | (char *)(sums->sums + index)); | ||
491 | 532 | ||
492 | bio_index++; | 533 | bio_index++; |
493 | index++; | ||
494 | total_bytes += bvec->bv_len; | ||
495 | this_sum_bytes += bvec->bv_len; | ||
496 | offset += bvec->bv_len; | ||
497 | bvec++; | 534 | bvec++; |
498 | } | 535 | } |
499 | this_sum_bytes = 0; | 536 | this_sum_bytes = 0; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 098bb8f690c9..15a09cb156ce 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include "locking.h" | 41 | #include "locking.h" |
42 | #include "volumes.h" | 42 | #include "volumes.h" |
43 | #include "qgroup.h" | 43 | #include "qgroup.h" |
44 | #include "compression.h" | ||
44 | 45 | ||
45 | static struct kmem_cache *btrfs_inode_defrag_cachep; | 46 | static struct kmem_cache *btrfs_inode_defrag_cachep; |
46 | /* | 47 | /* |
@@ -498,7 +499,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, | |||
498 | loff_t isize = i_size_read(inode); | 499 | loff_t isize = i_size_read(inode); |
499 | 500 | ||
500 | start_pos = pos & ~((u64)root->sectorsize - 1); | 501 | start_pos = pos & ~((u64)root->sectorsize - 1); |
501 | num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize); | 502 | num_bytes = round_up(write_bytes + pos - start_pos, root->sectorsize); |
502 | 503 | ||
503 | end_of_last_block = start_pos + num_bytes - 1; | 504 | end_of_last_block = start_pos + num_bytes - 1; |
504 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 505 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
@@ -1379,16 +1380,19 @@ fail: | |||
1379 | static noinline int | 1380 | static noinline int |
1380 | lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, | 1381 | lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, |
1381 | size_t num_pages, loff_t pos, | 1382 | size_t num_pages, loff_t pos, |
1383 | size_t write_bytes, | ||
1382 | u64 *lockstart, u64 *lockend, | 1384 | u64 *lockstart, u64 *lockend, |
1383 | struct extent_state **cached_state) | 1385 | struct extent_state **cached_state) |
1384 | { | 1386 | { |
1387 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1385 | u64 start_pos; | 1388 | u64 start_pos; |
1386 | u64 last_pos; | 1389 | u64 last_pos; |
1387 | int i; | 1390 | int i; |
1388 | int ret = 0; | 1391 | int ret = 0; |
1389 | 1392 | ||
1390 | start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); | 1393 | start_pos = round_down(pos, root->sectorsize); |
1391 | last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1; | 1394 | last_pos = start_pos |
1395 | + round_up(pos + write_bytes - start_pos, root->sectorsize) - 1; | ||
1392 | 1396 | ||
1393 | if (start_pos < inode->i_size) { | 1397 | if (start_pos < inode->i_size) { |
1394 | struct btrfs_ordered_extent *ordered; | 1398 | struct btrfs_ordered_extent *ordered; |
@@ -1503,6 +1507,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1503 | 1507 | ||
1504 | while (iov_iter_count(i) > 0) { | 1508 | while (iov_iter_count(i) > 0) { |
1505 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 1509 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
1510 | size_t sector_offset; | ||
1506 | size_t write_bytes = min(iov_iter_count(i), | 1511 | size_t write_bytes = min(iov_iter_count(i), |
1507 | nrptrs * (size_t)PAGE_CACHE_SIZE - | 1512 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
1508 | offset); | 1513 | offset); |
@@ -1511,6 +1516,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1511 | size_t reserve_bytes; | 1516 | size_t reserve_bytes; |
1512 | size_t dirty_pages; | 1517 | size_t dirty_pages; |
1513 | size_t copied; | 1518 | size_t copied; |
1519 | size_t dirty_sectors; | ||
1520 | size_t num_sectors; | ||
1514 | 1521 | ||
1515 | WARN_ON(num_pages > nrptrs); | 1522 | WARN_ON(num_pages > nrptrs); |
1516 | 1523 | ||
@@ -1523,29 +1530,29 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1523 | break; | 1530 | break; |
1524 | } | 1531 | } |
1525 | 1532 | ||
1526 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; | 1533 | sector_offset = pos & (root->sectorsize - 1); |
1534 | reserve_bytes = round_up(write_bytes + sector_offset, | ||
1535 | root->sectorsize); | ||
1527 | 1536 | ||
1528 | if (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | | 1537 | if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | |
1529 | BTRFS_INODE_PREALLOC)) { | 1538 | BTRFS_INODE_PREALLOC)) && |
1530 | ret = check_can_nocow(inode, pos, &write_bytes); | 1539 | check_can_nocow(inode, pos, &write_bytes) > 0) { |
1531 | if (ret < 0) | 1540 | /* |
1532 | break; | 1541 | * For nodata cow case, no need to reserve |
1533 | if (ret > 0) { | 1542 | * data space. |
1534 | /* | 1543 | */ |
1535 | * For nodata cow case, no need to reserve | 1544 | only_release_metadata = true; |
1536 | * data space. | 1545 | /* |
1537 | */ | 1546 | * our prealloc extent may be smaller than |
1538 | only_release_metadata = true; | 1547 | * write_bytes, so scale down. |
1539 | /* | 1548 | */ |
1540 | * our prealloc extent may be smaller than | 1549 | num_pages = DIV_ROUND_UP(write_bytes + offset, |
1541 | * write_bytes, so scale down. | 1550 | PAGE_CACHE_SIZE); |
1542 | */ | 1551 | reserve_bytes = round_up(write_bytes + sector_offset, |
1543 | num_pages = DIV_ROUND_UP(write_bytes + offset, | 1552 | root->sectorsize); |
1544 | PAGE_CACHE_SIZE); | 1553 | goto reserve_metadata; |
1545 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; | ||
1546 | goto reserve_metadata; | ||
1547 | } | ||
1548 | } | 1554 | } |
1555 | |||
1549 | ret = btrfs_check_data_free_space(inode, pos, write_bytes); | 1556 | ret = btrfs_check_data_free_space(inode, pos, write_bytes); |
1550 | if (ret < 0) | 1557 | if (ret < 0) |
1551 | break; | 1558 | break; |
@@ -1576,8 +1583,8 @@ again: | |||
1576 | break; | 1583 | break; |
1577 | 1584 | ||
1578 | ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages, | 1585 | ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages, |
1579 | pos, &lockstart, &lockend, | 1586 | pos, write_bytes, &lockstart, |
1580 | &cached_state); | 1587 | &lockend, &cached_state); |
1581 | if (ret < 0) { | 1588 | if (ret < 0) { |
1582 | if (ret == -EAGAIN) | 1589 | if (ret == -EAGAIN) |
1583 | goto again; | 1590 | goto again; |
@@ -1612,9 +1619,16 @@ again: | |||
1612 | * we still have an outstanding extent for the chunk we actually | 1619 | * we still have an outstanding extent for the chunk we actually |
1613 | * managed to copy. | 1620 | * managed to copy. |
1614 | */ | 1621 | */ |
1615 | if (num_pages > dirty_pages) { | 1622 | num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, |
1616 | release_bytes = (num_pages - dirty_pages) << | 1623 | reserve_bytes); |
1617 | PAGE_CACHE_SHIFT; | 1624 | dirty_sectors = round_up(copied + sector_offset, |
1625 | root->sectorsize); | ||
1626 | dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, | ||
1627 | dirty_sectors); | ||
1628 | |||
1629 | if (num_sectors > dirty_sectors) { | ||
1630 | release_bytes = (write_bytes - copied) | ||
1631 | & ~((u64)root->sectorsize - 1); | ||
1618 | if (copied > 0) { | 1632 | if (copied > 0) { |
1619 | spin_lock(&BTRFS_I(inode)->lock); | 1633 | spin_lock(&BTRFS_I(inode)->lock); |
1620 | BTRFS_I(inode)->outstanding_extents++; | 1634 | BTRFS_I(inode)->outstanding_extents++; |
@@ -1633,7 +1647,8 @@ again: | |||
1633 | } | 1647 | } |
1634 | } | 1648 | } |
1635 | 1649 | ||
1636 | release_bytes = dirty_pages << PAGE_CACHE_SHIFT; | 1650 | release_bytes = round_up(copied + sector_offset, |
1651 | root->sectorsize); | ||
1637 | 1652 | ||
1638 | if (copied > 0) | 1653 | if (copied > 0) |
1639 | ret = btrfs_dirty_pages(root, inode, pages, | 1654 | ret = btrfs_dirty_pages(root, inode, pages, |
@@ -1654,8 +1669,7 @@ again: | |||
1654 | 1669 | ||
1655 | if (only_release_metadata && copied > 0) { | 1670 | if (only_release_metadata && copied > 0) { |
1656 | lockstart = round_down(pos, root->sectorsize); | 1671 | lockstart = round_down(pos, root->sectorsize); |
1657 | lockend = lockstart + | 1672 | lockend = round_up(pos + copied, root->sectorsize) - 1; |
1658 | (dirty_pages << PAGE_CACHE_SHIFT) - 1; | ||
1659 | 1673 | ||
1660 | set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | 1674 | set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, |
1661 | lockend, EXTENT_NORESERVE, NULL, | 1675 | lockend, EXTENT_NORESERVE, NULL, |
@@ -1761,6 +1775,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, | |||
1761 | ssize_t err; | 1775 | ssize_t err; |
1762 | loff_t pos; | 1776 | loff_t pos; |
1763 | size_t count; | 1777 | size_t count; |
1778 | loff_t oldsize; | ||
1779 | int clean_page = 0; | ||
1764 | 1780 | ||
1765 | inode_lock(inode); | 1781 | inode_lock(inode); |
1766 | err = generic_write_checks(iocb, from); | 1782 | err = generic_write_checks(iocb, from); |
@@ -1799,14 +1815,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, | |||
1799 | pos = iocb->ki_pos; | 1815 | pos = iocb->ki_pos; |
1800 | count = iov_iter_count(from); | 1816 | count = iov_iter_count(from); |
1801 | start_pos = round_down(pos, root->sectorsize); | 1817 | start_pos = round_down(pos, root->sectorsize); |
1802 | if (start_pos > i_size_read(inode)) { | 1818 | oldsize = i_size_read(inode); |
1819 | if (start_pos > oldsize) { | ||
1803 | /* Expand hole size to cover write data, preventing empty gap */ | 1820 | /* Expand hole size to cover write data, preventing empty gap */ |
1804 | end_pos = round_up(pos + count, root->sectorsize); | 1821 | end_pos = round_up(pos + count, root->sectorsize); |
1805 | err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); | 1822 | err = btrfs_cont_expand(inode, oldsize, end_pos); |
1806 | if (err) { | 1823 | if (err) { |
1807 | inode_unlock(inode); | 1824 | inode_unlock(inode); |
1808 | goto out; | 1825 | goto out; |
1809 | } | 1826 | } |
1827 | if (start_pos > round_up(oldsize, root->sectorsize)) | ||
1828 | clean_page = 1; | ||
1810 | } | 1829 | } |
1811 | 1830 | ||
1812 | if (sync) | 1831 | if (sync) |
@@ -1818,6 +1837,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, | |||
1818 | num_written = __btrfs_buffered_write(file, from, pos); | 1837 | num_written = __btrfs_buffered_write(file, from, pos); |
1819 | if (num_written > 0) | 1838 | if (num_written > 0) |
1820 | iocb->ki_pos = pos + num_written; | 1839 | iocb->ki_pos = pos + num_written; |
1840 | if (clean_page) | ||
1841 | pagecache_isize_extended(inode, oldsize, | ||
1842 | i_size_read(inode)); | ||
1821 | } | 1843 | } |
1822 | 1844 | ||
1823 | inode_unlock(inode); | 1845 | inode_unlock(inode); |
@@ -1825,7 +1847,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, | |||
1825 | /* | 1847 | /* |
1826 | * We also have to set last_sub_trans to the current log transid, | 1848 | * We also have to set last_sub_trans to the current log transid, |
1827 | * otherwise subsequent syncs to a file that's been synced in this | 1849 | * otherwise subsequent syncs to a file that's been synced in this |
1828 | * transaction will appear to have already occured. | 1850 | * transaction will appear to have already occurred. |
1829 | */ | 1851 | */ |
1830 | spin_lock(&BTRFS_I(inode)->lock); | 1852 | spin_lock(&BTRFS_I(inode)->lock); |
1831 | BTRFS_I(inode)->last_sub_trans = root->log_transid; | 1853 | BTRFS_I(inode)->last_sub_trans = root->log_transid; |
@@ -1996,10 +2018,11 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1996 | */ | 2018 | */ |
1997 | smp_mb(); | 2019 | smp_mb(); |
1998 | if (btrfs_inode_in_log(inode, root->fs_info->generation) || | 2020 | if (btrfs_inode_in_log(inode, root->fs_info->generation) || |
1999 | (BTRFS_I(inode)->last_trans <= | 2021 | (full_sync && BTRFS_I(inode)->last_trans <= |
2000 | root->fs_info->last_trans_committed && | 2022 | root->fs_info->last_trans_committed) || |
2001 | (full_sync || | 2023 | (!btrfs_have_ordered_extents_in_range(inode, start, len) && |
2002 | !btrfs_have_ordered_extents_in_range(inode, start, len)))) { | 2024 | BTRFS_I(inode)->last_trans |
2025 | <= root->fs_info->last_trans_committed)) { | ||
2003 | /* | 2026 | /* |
2004 | * We'v had everything committed since the last time we were | 2027 | * We'v had everything committed since the last time we were |
2005 | * modified so clear this flag in case it was set for whatever | 2028 | * modified so clear this flag in case it was set for whatever |
@@ -2293,10 +2316,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2293 | int ret = 0; | 2316 | int ret = 0; |
2294 | int err = 0; | 2317 | int err = 0; |
2295 | unsigned int rsv_count; | 2318 | unsigned int rsv_count; |
2296 | bool same_page; | 2319 | bool same_block; |
2297 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); | 2320 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); |
2298 | u64 ino_size; | 2321 | u64 ino_size; |
2299 | bool truncated_page = false; | 2322 | bool truncated_block = false; |
2300 | bool updated_inode = false; | 2323 | bool updated_inode = false; |
2301 | 2324 | ||
2302 | ret = btrfs_wait_ordered_range(inode, offset, len); | 2325 | ret = btrfs_wait_ordered_range(inode, offset, len); |
@@ -2304,7 +2327,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2304 | return ret; | 2327 | return ret; |
2305 | 2328 | ||
2306 | inode_lock(inode); | 2329 | inode_lock(inode); |
2307 | ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | 2330 | ino_size = round_up(inode->i_size, root->sectorsize); |
2308 | ret = find_first_non_hole(inode, &offset, &len); | 2331 | ret = find_first_non_hole(inode, &offset, &len); |
2309 | if (ret < 0) | 2332 | if (ret < 0) |
2310 | goto out_only_mutex; | 2333 | goto out_only_mutex; |
@@ -2317,31 +2340,30 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2317 | lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); | 2340 | lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); |
2318 | lockend = round_down(offset + len, | 2341 | lockend = round_down(offset + len, |
2319 | BTRFS_I(inode)->root->sectorsize) - 1; | 2342 | BTRFS_I(inode)->root->sectorsize) - 1; |
2320 | same_page = ((offset >> PAGE_CACHE_SHIFT) == | 2343 | same_block = (BTRFS_BYTES_TO_BLKS(root->fs_info, offset)) |
2321 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | 2344 | == (BTRFS_BYTES_TO_BLKS(root->fs_info, offset + len - 1)); |
2322 | |||
2323 | /* | 2345 | /* |
2324 | * We needn't truncate any page which is beyond the end of the file | 2346 | * We needn't truncate any block which is beyond the end of the file |
2325 | * because we are sure there is no data there. | 2347 | * because we are sure there is no data there. |
2326 | */ | 2348 | */ |
2327 | /* | 2349 | /* |
2328 | * Only do this if we are in the same page and we aren't doing the | 2350 | * Only do this if we are in the same block and we aren't doing the |
2329 | * entire page. | 2351 | * entire block. |
2330 | */ | 2352 | */ |
2331 | if (same_page && len < PAGE_CACHE_SIZE) { | 2353 | if (same_block && len < root->sectorsize) { |
2332 | if (offset < ino_size) { | 2354 | if (offset < ino_size) { |
2333 | truncated_page = true; | 2355 | truncated_block = true; |
2334 | ret = btrfs_truncate_page(inode, offset, len, 0); | 2356 | ret = btrfs_truncate_block(inode, offset, len, 0); |
2335 | } else { | 2357 | } else { |
2336 | ret = 0; | 2358 | ret = 0; |
2337 | } | 2359 | } |
2338 | goto out_only_mutex; | 2360 | goto out_only_mutex; |
2339 | } | 2361 | } |
2340 | 2362 | ||
2341 | /* zero back part of the first page */ | 2363 | /* zero back part of the first block */ |
2342 | if (offset < ino_size) { | 2364 | if (offset < ino_size) { |
2343 | truncated_page = true; | 2365 | truncated_block = true; |
2344 | ret = btrfs_truncate_page(inode, offset, 0, 0); | 2366 | ret = btrfs_truncate_block(inode, offset, 0, 0); |
2345 | if (ret) { | 2367 | if (ret) { |
2346 | inode_unlock(inode); | 2368 | inode_unlock(inode); |
2347 | return ret; | 2369 | return ret; |
@@ -2376,9 +2398,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2376 | if (!ret) { | 2398 | if (!ret) { |
2377 | /* zero the front end of the last page */ | 2399 | /* zero the front end of the last page */ |
2378 | if (tail_start + tail_len < ino_size) { | 2400 | if (tail_start + tail_len < ino_size) { |
2379 | truncated_page = true; | 2401 | truncated_block = true; |
2380 | ret = btrfs_truncate_page(inode, | 2402 | ret = btrfs_truncate_block(inode, |
2381 | tail_start + tail_len, 0, 1); | 2403 | tail_start + tail_len, |
2404 | 0, 1); | ||
2382 | if (ret) | 2405 | if (ret) |
2383 | goto out_only_mutex; | 2406 | goto out_only_mutex; |
2384 | } | 2407 | } |
@@ -2544,7 +2567,7 @@ out_trans: | |||
2544 | goto out_free; | 2567 | goto out_free; |
2545 | 2568 | ||
2546 | inode_inc_iversion(inode); | 2569 | inode_inc_iversion(inode); |
2547 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 2570 | inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); |
2548 | 2571 | ||
2549 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2572 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
2550 | ret = btrfs_update_inode(trans, root, inode); | 2573 | ret = btrfs_update_inode(trans, root, inode); |
@@ -2558,7 +2581,7 @@ out: | |||
2558 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 2581 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
2559 | &cached_state, GFP_NOFS); | 2582 | &cached_state, GFP_NOFS); |
2560 | out_only_mutex: | 2583 | out_only_mutex: |
2561 | if (!updated_inode && truncated_page && !ret && !err) { | 2584 | if (!updated_inode && truncated_block && !ret && !err) { |
2562 | /* | 2585 | /* |
2563 | * If we only end up zeroing part of a page, we still need to | 2586 | * If we only end up zeroing part of a page, we still need to |
2564 | * update the inode item, so that all the time fields are | 2587 | * update the inode item, so that all the time fields are |
@@ -2611,7 +2634,7 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len) | |||
2611 | return 0; | 2634 | return 0; |
2612 | } | 2635 | } |
2613 | insert: | 2636 | insert: |
2614 | range = kmalloc(sizeof(*range), GFP_NOFS); | 2637 | range = kmalloc(sizeof(*range), GFP_KERNEL); |
2615 | if (!range) | 2638 | if (!range) |
2616 | return -ENOMEM; | 2639 | return -ENOMEM; |
2617 | range->start = start; | 2640 | range->start = start; |
@@ -2678,10 +2701,10 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2678 | } else if (offset + len > inode->i_size) { | 2701 | } else if (offset + len > inode->i_size) { |
2679 | /* | 2702 | /* |
2680 | * If we are fallocating from the end of the file onward we | 2703 | * If we are fallocating from the end of the file onward we |
2681 | * need to zero out the end of the page if i_size lands in the | 2704 | * need to zero out the end of the block if i_size lands in the |
2682 | * middle of a page. | 2705 | * middle of a block. |
2683 | */ | 2706 | */ |
2684 | ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); | 2707 | ret = btrfs_truncate_block(inode, inode->i_size, 0, 0); |
2685 | if (ret) | 2708 | if (ret) |
2686 | goto out; | 2709 | goto out; |
2687 | } | 2710 | } |
@@ -2712,7 +2735,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2712 | btrfs_put_ordered_extent(ordered); | 2735 | btrfs_put_ordered_extent(ordered); |
2713 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 2736 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
2714 | alloc_start, locked_end, | 2737 | alloc_start, locked_end, |
2715 | &cached_state, GFP_NOFS); | 2738 | &cached_state, GFP_KERNEL); |
2716 | /* | 2739 | /* |
2717 | * we can't wait on the range with the transaction | 2740 | * we can't wait on the range with the transaction |
2718 | * running or with the extent lock held | 2741 | * running or with the extent lock held |
@@ -2794,7 +2817,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2794 | if (IS_ERR(trans)) { | 2817 | if (IS_ERR(trans)) { |
2795 | ret = PTR_ERR(trans); | 2818 | ret = PTR_ERR(trans); |
2796 | } else { | 2819 | } else { |
2797 | inode->i_ctime = CURRENT_TIME; | 2820 | inode->i_ctime = current_fs_time(inode->i_sb); |
2798 | i_size_write(inode, actual_end); | 2821 | i_size_write(inode, actual_end); |
2799 | btrfs_ordered_update_i_size(inode, actual_end, NULL); | 2822 | btrfs_ordered_update_i_size(inode, actual_end, NULL); |
2800 | ret = btrfs_update_inode(trans, root, inode); | 2823 | ret = btrfs_update_inode(trans, root, inode); |
@@ -2806,7 +2829,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2806 | } | 2829 | } |
2807 | out_unlock: | 2830 | out_unlock: |
2808 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | 2831 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
2809 | &cached_state, GFP_NOFS); | 2832 | &cached_state, GFP_KERNEL); |
2810 | out: | 2833 | out: |
2811 | /* | 2834 | /* |
2812 | * As we waited the extent range, the data_rsv_map must be empty | 2835 | * As we waited the extent range, the data_rsv_map must be empty |
@@ -2939,8 +2962,7 @@ const struct file_operations btrfs_file_operations = { | |||
2939 | 2962 | ||
2940 | void btrfs_auto_defrag_exit(void) | 2963 | void btrfs_auto_defrag_exit(void) |
2941 | { | 2964 | { |
2942 | if (btrfs_inode_defrag_cachep) | 2965 | kmem_cache_destroy(btrfs_inode_defrag_cachep); |
2943 | kmem_cache_destroy(btrfs_inode_defrag_cachep); | ||
2944 | } | 2966 | } |
2945 | 2967 | ||
2946 | int btrfs_auto_defrag_init(void) | 2968 | int btrfs_auto_defrag_init(void) |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index e50316c4af15..1f0ec19b23f6 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -556,6 +556,9 @@ int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid) | |||
556 | mutex_lock(&root->objectid_mutex); | 556 | mutex_lock(&root->objectid_mutex); |
557 | 557 | ||
558 | if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) { | 558 | if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) { |
559 | btrfs_warn(root->fs_info, | ||
560 | "the objectid of root %llu reaches its highest value", | ||
561 | root->root_key.objectid); | ||
559 | ret = -ENOSPC; | 562 | ret = -ENOSPC; |
560 | goto out; | 563 | goto out; |
561 | } | 564 | } |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d96f5cf38a2d..41a5688ffdfe 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -263,7 +263,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root, | |||
263 | data_len = compressed_size; | 263 | data_len = compressed_size; |
264 | 264 | ||
265 | if (start > 0 || | 265 | if (start > 0 || |
266 | actual_end > PAGE_CACHE_SIZE || | 266 | actual_end > root->sectorsize || |
267 | data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) || | 267 | data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) || |
268 | (!compressed_size && | 268 | (!compressed_size && |
269 | (actual_end & (root->sectorsize - 1)) == 0) || | 269 | (actual_end & (root->sectorsize - 1)) == 0) || |
@@ -2002,7 +2002,8 @@ again: | |||
2002 | if (PagePrivate2(page)) | 2002 | if (PagePrivate2(page)) |
2003 | goto out; | 2003 | goto out; |
2004 | 2004 | ||
2005 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 2005 | ordered = btrfs_lookup_ordered_range(inode, page_start, |
2006 | PAGE_CACHE_SIZE); | ||
2006 | if (ordered) { | 2007 | if (ordered) { |
2007 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, | 2008 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, |
2008 | page_end, &cached_state, GFP_NOFS); | 2009 | page_end, &cached_state, GFP_NOFS); |
@@ -4013,7 +4014,8 @@ err: | |||
4013 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 4014 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
4014 | inode_inc_iversion(inode); | 4015 | inode_inc_iversion(inode); |
4015 | inode_inc_iversion(dir); | 4016 | inode_inc_iversion(dir); |
4016 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 4017 | inode->i_ctime = dir->i_mtime = |
4018 | dir->i_ctime = current_fs_time(inode->i_sb); | ||
4017 | ret = btrfs_update_inode(trans, root, dir); | 4019 | ret = btrfs_update_inode(trans, root, dir); |
4018 | out: | 4020 | out: |
4019 | return ret; | 4021 | return ret; |
@@ -4156,7 +4158,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
4156 | 4158 | ||
4157 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 4159 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
4158 | inode_inc_iversion(dir); | 4160 | inode_inc_iversion(dir); |
4159 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 4161 | dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); |
4160 | ret = btrfs_update_inode_fallback(trans, root, dir); | 4162 | ret = btrfs_update_inode_fallback(trans, root, dir); |
4161 | if (ret) | 4163 | if (ret) |
4162 | btrfs_abort_transaction(trans, root, ret); | 4164 | btrfs_abort_transaction(trans, root, ret); |
@@ -4211,11 +4213,20 @@ static int truncate_space_check(struct btrfs_trans_handle *trans, | |||
4211 | { | 4213 | { |
4212 | int ret; | 4214 | int ret; |
4213 | 4215 | ||
4216 | /* | ||
4217 | * This is only used to apply pressure to the enospc system, we don't | ||
4218 | * intend to use this reservation at all. | ||
4219 | */ | ||
4214 | bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted); | 4220 | bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted); |
4221 | bytes_deleted *= root->nodesize; | ||
4215 | ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv, | 4222 | ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv, |
4216 | bytes_deleted, BTRFS_RESERVE_NO_FLUSH); | 4223 | bytes_deleted, BTRFS_RESERVE_NO_FLUSH); |
4217 | if (!ret) | 4224 | if (!ret) { |
4225 | trace_btrfs_space_reservation(root->fs_info, "transaction", | ||
4226 | trans->transid, | ||
4227 | bytes_deleted, 1); | ||
4218 | trans->bytes_reserved += bytes_deleted; | 4228 | trans->bytes_reserved += bytes_deleted; |
4229 | } | ||
4219 | return ret; | 4230 | return ret; |
4220 | 4231 | ||
4221 | } | 4232 | } |
@@ -4248,7 +4259,8 @@ static int truncate_inline_extent(struct inode *inode, | |||
4248 | * read the extent item from disk (data not in the page cache). | 4259 | * read the extent item from disk (data not in the page cache). |
4249 | */ | 4260 | */ |
4250 | btrfs_release_path(path); | 4261 | btrfs_release_path(path); |
4251 | return btrfs_truncate_page(inode, offset, page_end - offset, 0); | 4262 | return btrfs_truncate_block(inode, offset, page_end - offset, |
4263 | 0); | ||
4252 | } | 4264 | } |
4253 | 4265 | ||
4254 | btrfs_set_file_extent_ram_bytes(leaf, fi, size); | 4266 | btrfs_set_file_extent_ram_bytes(leaf, fi, size); |
@@ -4601,17 +4613,17 @@ error: | |||
4601 | } | 4613 | } |
4602 | 4614 | ||
4603 | /* | 4615 | /* |
4604 | * btrfs_truncate_page - read, zero a chunk and write a page | 4616 | * btrfs_truncate_block - read, zero a chunk and write a block |
4605 | * @inode - inode that we're zeroing | 4617 | * @inode - inode that we're zeroing |
4606 | * @from - the offset to start zeroing | 4618 | * @from - the offset to start zeroing |
4607 | * @len - the length to zero, 0 to zero the entire range respective to the | 4619 | * @len - the length to zero, 0 to zero the entire range respective to the |
4608 | * offset | 4620 | * offset |
4609 | * @front - zero up to the offset instead of from the offset on | 4621 | * @front - zero up to the offset instead of from the offset on |
4610 | * | 4622 | * |
4611 | * This will find the page for the "from" offset and cow the page and zero the | 4623 | * This will find the block for the "from" offset and cow the block and zero the |
4612 | * part we want to zero. This is used with truncate and hole punching. | 4624 | * part we want to zero. This is used with truncate and hole punching. |
4613 | */ | 4625 | */ |
4614 | int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, | 4626 | int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, |
4615 | int front) | 4627 | int front) |
4616 | { | 4628 | { |
4617 | struct address_space *mapping = inode->i_mapping; | 4629 | struct address_space *mapping = inode->i_mapping; |
@@ -4622,18 +4634,19 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, | |||
4622 | char *kaddr; | 4634 | char *kaddr; |
4623 | u32 blocksize = root->sectorsize; | 4635 | u32 blocksize = root->sectorsize; |
4624 | pgoff_t index = from >> PAGE_CACHE_SHIFT; | 4636 | pgoff_t index = from >> PAGE_CACHE_SHIFT; |
4625 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 4637 | unsigned offset = from & (blocksize - 1); |
4626 | struct page *page; | 4638 | struct page *page; |
4627 | gfp_t mask = btrfs_alloc_write_mask(mapping); | 4639 | gfp_t mask = btrfs_alloc_write_mask(mapping); |
4628 | int ret = 0; | 4640 | int ret = 0; |
4629 | u64 page_start; | 4641 | u64 block_start; |
4630 | u64 page_end; | 4642 | u64 block_end; |
4631 | 4643 | ||
4632 | if ((offset & (blocksize - 1)) == 0 && | 4644 | if ((offset & (blocksize - 1)) == 0 && |
4633 | (!len || ((len & (blocksize - 1)) == 0))) | 4645 | (!len || ((len & (blocksize - 1)) == 0))) |
4634 | goto out; | 4646 | goto out; |
4647 | |||
4635 | ret = btrfs_delalloc_reserve_space(inode, | 4648 | ret = btrfs_delalloc_reserve_space(inode, |
4636 | round_down(from, PAGE_CACHE_SIZE), PAGE_CACHE_SIZE); | 4649 | round_down(from, blocksize), blocksize); |
4637 | if (ret) | 4650 | if (ret) |
4638 | goto out; | 4651 | goto out; |
4639 | 4652 | ||
@@ -4641,14 +4654,14 @@ again: | |||
4641 | page = find_or_create_page(mapping, index, mask); | 4654 | page = find_or_create_page(mapping, index, mask); |
4642 | if (!page) { | 4655 | if (!page) { |
4643 | btrfs_delalloc_release_space(inode, | 4656 | btrfs_delalloc_release_space(inode, |
4644 | round_down(from, PAGE_CACHE_SIZE), | 4657 | round_down(from, blocksize), |
4645 | PAGE_CACHE_SIZE); | 4658 | blocksize); |
4646 | ret = -ENOMEM; | 4659 | ret = -ENOMEM; |
4647 | goto out; | 4660 | goto out; |
4648 | } | 4661 | } |
4649 | 4662 | ||
4650 | page_start = page_offset(page); | 4663 | block_start = round_down(from, blocksize); |
4651 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 4664 | block_end = block_start + blocksize - 1; |
4652 | 4665 | ||
4653 | if (!PageUptodate(page)) { | 4666 | if (!PageUptodate(page)) { |
4654 | ret = btrfs_readpage(NULL, page); | 4667 | ret = btrfs_readpage(NULL, page); |
@@ -4665,12 +4678,12 @@ again: | |||
4665 | } | 4678 | } |
4666 | wait_on_page_writeback(page); | 4679 | wait_on_page_writeback(page); |
4667 | 4680 | ||
4668 | lock_extent_bits(io_tree, page_start, page_end, &cached_state); | 4681 | lock_extent_bits(io_tree, block_start, block_end, &cached_state); |
4669 | set_page_extent_mapped(page); | 4682 | set_page_extent_mapped(page); |
4670 | 4683 | ||
4671 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 4684 | ordered = btrfs_lookup_ordered_extent(inode, block_start); |
4672 | if (ordered) { | 4685 | if (ordered) { |
4673 | unlock_extent_cached(io_tree, page_start, page_end, | 4686 | unlock_extent_cached(io_tree, block_start, block_end, |
4674 | &cached_state, GFP_NOFS); | 4687 | &cached_state, GFP_NOFS); |
4675 | unlock_page(page); | 4688 | unlock_page(page); |
4676 | page_cache_release(page); | 4689 | page_cache_release(page); |
@@ -4679,39 +4692,41 @@ again: | |||
4679 | goto again; | 4692 | goto again; |
4680 | } | 4693 | } |
4681 | 4694 | ||
4682 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | 4695 | clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end, |
4683 | EXTENT_DIRTY | EXTENT_DELALLOC | | 4696 | EXTENT_DIRTY | EXTENT_DELALLOC | |
4684 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, | 4697 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, |
4685 | 0, 0, &cached_state, GFP_NOFS); | 4698 | 0, 0, &cached_state, GFP_NOFS); |
4686 | 4699 | ||
4687 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, | 4700 | ret = btrfs_set_extent_delalloc(inode, block_start, block_end, |
4688 | &cached_state); | 4701 | &cached_state); |
4689 | if (ret) { | 4702 | if (ret) { |
4690 | unlock_extent_cached(io_tree, page_start, page_end, | 4703 | unlock_extent_cached(io_tree, block_start, block_end, |
4691 | &cached_state, GFP_NOFS); | 4704 | &cached_state, GFP_NOFS); |
4692 | goto out_unlock; | 4705 | goto out_unlock; |
4693 | } | 4706 | } |
4694 | 4707 | ||
4695 | if (offset != PAGE_CACHE_SIZE) { | 4708 | if (offset != blocksize) { |
4696 | if (!len) | 4709 | if (!len) |
4697 | len = PAGE_CACHE_SIZE - offset; | 4710 | len = blocksize - offset; |
4698 | kaddr = kmap(page); | 4711 | kaddr = kmap(page); |
4699 | if (front) | 4712 | if (front) |
4700 | memset(kaddr, 0, offset); | 4713 | memset(kaddr + (block_start - page_offset(page)), |
4714 | 0, offset); | ||
4701 | else | 4715 | else |
4702 | memset(kaddr + offset, 0, len); | 4716 | memset(kaddr + (block_start - page_offset(page)) + offset, |
4717 | 0, len); | ||
4703 | flush_dcache_page(page); | 4718 | flush_dcache_page(page); |
4704 | kunmap(page); | 4719 | kunmap(page); |
4705 | } | 4720 | } |
4706 | ClearPageChecked(page); | 4721 | ClearPageChecked(page); |
4707 | set_page_dirty(page); | 4722 | set_page_dirty(page); |
4708 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, | 4723 | unlock_extent_cached(io_tree, block_start, block_end, &cached_state, |
4709 | GFP_NOFS); | 4724 | GFP_NOFS); |
4710 | 4725 | ||
4711 | out_unlock: | 4726 | out_unlock: |
4712 | if (ret) | 4727 | if (ret) |
4713 | btrfs_delalloc_release_space(inode, page_start, | 4728 | btrfs_delalloc_release_space(inode, block_start, |
4714 | PAGE_CACHE_SIZE); | 4729 | blocksize); |
4715 | unlock_page(page); | 4730 | unlock_page(page); |
4716 | page_cache_release(page); | 4731 | page_cache_release(page); |
4717 | out: | 4732 | out: |
@@ -4782,11 +4797,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
4782 | int err = 0; | 4797 | int err = 0; |
4783 | 4798 | ||
4784 | /* | 4799 | /* |
4785 | * If our size started in the middle of a page we need to zero out the | 4800 | * If our size started in the middle of a block we need to zero out the |
4786 | * rest of the page before we expand the i_size, otherwise we could | 4801 | * rest of the block before we expand the i_size, otherwise we could |
4787 | * expose stale data. | 4802 | * expose stale data. |
4788 | */ | 4803 | */ |
4789 | err = btrfs_truncate_page(inode, oldsize, 0, 0); | 4804 | err = btrfs_truncate_block(inode, oldsize, 0, 0); |
4790 | if (err) | 4805 | if (err) |
4791 | return err; | 4806 | return err; |
4792 | 4807 | ||
@@ -4895,7 +4910,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) | |||
4895 | } | 4910 | } |
4896 | 4911 | ||
4897 | if (newsize > oldsize) { | 4912 | if (newsize > oldsize) { |
4898 | truncate_pagecache(inode, newsize); | ||
4899 | /* | 4913 | /* |
4900 | * Don't do an expanding truncate while snapshoting is ongoing. | 4914 | * Don't do an expanding truncate while snapshoting is ongoing. |
4901 | * This is to ensure the snapshot captures a fully consistent | 4915 | * This is to ensure the snapshot captures a fully consistent |
@@ -4918,6 +4932,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) | |||
4918 | 4932 | ||
4919 | i_size_write(inode, newsize); | 4933 | i_size_write(inode, newsize); |
4920 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); | 4934 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); |
4935 | pagecache_isize_extended(inode, oldsize, newsize); | ||
4921 | ret = btrfs_update_inode(trans, root, inode); | 4936 | ret = btrfs_update_inode(trans, root, inode); |
4922 | btrfs_end_write_no_snapshoting(root); | 4937 | btrfs_end_write_no_snapshoting(root); |
4923 | btrfs_end_transaction(trans, root); | 4938 | btrfs_end_transaction(trans, root); |
@@ -5588,7 +5603,7 @@ static struct inode *new_simple_dir(struct super_block *s, | |||
5588 | inode->i_op = &btrfs_dir_ro_inode_operations; | 5603 | inode->i_op = &btrfs_dir_ro_inode_operations; |
5589 | inode->i_fop = &simple_dir_operations; | 5604 | inode->i_fop = &simple_dir_operations; |
5590 | inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; | 5605 | inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; |
5591 | inode->i_mtime = CURRENT_TIME; | 5606 | inode->i_mtime = current_fs_time(inode->i_sb); |
5592 | inode->i_atime = inode->i_mtime; | 5607 | inode->i_atime = inode->i_mtime; |
5593 | inode->i_ctime = inode->i_mtime; | 5608 | inode->i_ctime = inode->i_mtime; |
5594 | BTRFS_I(inode)->i_otime = inode->i_mtime; | 5609 | BTRFS_I(inode)->i_otime = inode->i_mtime; |
@@ -5790,7 +5805,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5790 | if (name_len <= sizeof(tmp_name)) { | 5805 | if (name_len <= sizeof(tmp_name)) { |
5791 | name_ptr = tmp_name; | 5806 | name_ptr = tmp_name; |
5792 | } else { | 5807 | } else { |
5793 | name_ptr = kmalloc(name_len, GFP_NOFS); | 5808 | name_ptr = kmalloc(name_len, GFP_KERNEL); |
5794 | if (!name_ptr) { | 5809 | if (!name_ptr) { |
5795 | ret = -ENOMEM; | 5810 | ret = -ENOMEM; |
5796 | goto err; | 5811 | goto err; |
@@ -6172,7 +6187,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
6172 | inode_init_owner(inode, dir, mode); | 6187 | inode_init_owner(inode, dir, mode); |
6173 | inode_set_bytes(inode, 0); | 6188 | inode_set_bytes(inode, 0); |
6174 | 6189 | ||
6175 | inode->i_mtime = CURRENT_TIME; | 6190 | inode->i_mtime = current_fs_time(inode->i_sb); |
6176 | inode->i_atime = inode->i_mtime; | 6191 | inode->i_atime = inode->i_mtime; |
6177 | inode->i_ctime = inode->i_mtime; | 6192 | inode->i_ctime = inode->i_mtime; |
6178 | BTRFS_I(inode)->i_otime = inode->i_mtime; | 6193 | BTRFS_I(inode)->i_otime = inode->i_mtime; |
@@ -6285,7 +6300,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
6285 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | 6300 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
6286 | name_len * 2); | 6301 | name_len * 2); |
6287 | inode_inc_iversion(parent_inode); | 6302 | inode_inc_iversion(parent_inode); |
6288 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | 6303 | parent_inode->i_mtime = parent_inode->i_ctime = |
6304 | current_fs_time(parent_inode->i_sb); | ||
6289 | ret = btrfs_update_inode(trans, root, parent_inode); | 6305 | ret = btrfs_update_inode(trans, root, parent_inode); |
6290 | if (ret) | 6306 | if (ret) |
6291 | btrfs_abort_transaction(trans, root, ret); | 6307 | btrfs_abort_transaction(trans, root, ret); |
@@ -6503,7 +6519,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
6503 | BTRFS_I(inode)->dir_index = 0ULL; | 6519 | BTRFS_I(inode)->dir_index = 0ULL; |
6504 | inc_nlink(inode); | 6520 | inc_nlink(inode); |
6505 | inode_inc_iversion(inode); | 6521 | inode_inc_iversion(inode); |
6506 | inode->i_ctime = CURRENT_TIME; | 6522 | inode->i_ctime = current_fs_time(inode->i_sb); |
6507 | ihold(inode); | 6523 | ihold(inode); |
6508 | set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); | 6524 | set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); |
6509 | 6525 | ||
@@ -7414,7 +7430,26 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | |||
7414 | cached_state, GFP_NOFS); | 7430 | cached_state, GFP_NOFS); |
7415 | 7431 | ||
7416 | if (ordered) { | 7432 | if (ordered) { |
7417 | btrfs_start_ordered_extent(inode, ordered, 1); | 7433 | /* |
7434 | * If we are doing a DIO read and the ordered extent we | ||
7435 | * found is for a buffered write, we can not wait for it | ||
7436 | * to complete and retry, because if we do so we can | ||
7437 | * deadlock with concurrent buffered writes on page | ||
7438 | * locks. This happens only if our DIO read covers more | ||
7439 | * than one extent map, if at this point has already | ||
7440 | * created an ordered extent for a previous extent map | ||
7441 | * and locked its range in the inode's io tree, and a | ||
7442 | * concurrent write against that previous extent map's | ||
7443 | * range and this range started (we unlock the ranges | ||
7444 | * in the io tree only when the bios complete and | ||
7445 | * buffered writes always lock pages before attempting | ||
7446 | * to lock range in the io tree). | ||
7447 | */ | ||
7448 | if (writing || | ||
7449 | test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) | ||
7450 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
7451 | else | ||
7452 | ret = -ENOTBLK; | ||
7418 | btrfs_put_ordered_extent(ordered); | 7453 | btrfs_put_ordered_extent(ordered); |
7419 | } else { | 7454 | } else { |
7420 | /* | 7455 | /* |
@@ -7431,9 +7466,11 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | |||
7431 | * that page. | 7466 | * that page. |
7432 | */ | 7467 | */ |
7433 | ret = -ENOTBLK; | 7468 | ret = -ENOTBLK; |
7434 | break; | ||
7435 | } | 7469 | } |
7436 | 7470 | ||
7471 | if (ret) | ||
7472 | break; | ||
7473 | |||
7437 | cond_resched(); | 7474 | cond_resched(); |
7438 | } | 7475 | } |
7439 | 7476 | ||
@@ -7764,9 +7801,9 @@ static int btrfs_check_dio_repairable(struct inode *inode, | |||
7764 | } | 7801 | } |
7765 | 7802 | ||
7766 | static int dio_read_error(struct inode *inode, struct bio *failed_bio, | 7803 | static int dio_read_error(struct inode *inode, struct bio *failed_bio, |
7767 | struct page *page, u64 start, u64 end, | 7804 | struct page *page, unsigned int pgoff, |
7768 | int failed_mirror, bio_end_io_t *repair_endio, | 7805 | u64 start, u64 end, int failed_mirror, |
7769 | void *repair_arg) | 7806 | bio_end_io_t *repair_endio, void *repair_arg) |
7770 | { | 7807 | { |
7771 | struct io_failure_record *failrec; | 7808 | struct io_failure_record *failrec; |
7772 | struct bio *bio; | 7809 | struct bio *bio; |
@@ -7787,7 +7824,9 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, | |||
7787 | return -EIO; | 7824 | return -EIO; |
7788 | } | 7825 | } |
7789 | 7826 | ||
7790 | if (failed_bio->bi_vcnt > 1) | 7827 | if ((failed_bio->bi_vcnt > 1) |
7828 | || (failed_bio->bi_io_vec->bv_len | ||
7829 | > BTRFS_I(inode)->root->sectorsize)) | ||
7791 | read_mode = READ_SYNC | REQ_FAILFAST_DEV; | 7830 | read_mode = READ_SYNC | REQ_FAILFAST_DEV; |
7792 | else | 7831 | else |
7793 | read_mode = READ_SYNC; | 7832 | read_mode = READ_SYNC; |
@@ -7795,7 +7834,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, | |||
7795 | isector = start - btrfs_io_bio(failed_bio)->logical; | 7834 | isector = start - btrfs_io_bio(failed_bio)->logical; |
7796 | isector >>= inode->i_sb->s_blocksize_bits; | 7835 | isector >>= inode->i_sb->s_blocksize_bits; |
7797 | bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, | 7836 | bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, |
7798 | 0, isector, repair_endio, repair_arg); | 7837 | pgoff, isector, repair_endio, repair_arg); |
7799 | if (!bio) { | 7838 | if (!bio) { |
7800 | free_io_failure(inode, failrec); | 7839 | free_io_failure(inode, failrec); |
7801 | return -EIO; | 7840 | return -EIO; |
@@ -7825,12 +7864,17 @@ struct btrfs_retry_complete { | |||
7825 | static void btrfs_retry_endio_nocsum(struct bio *bio) | 7864 | static void btrfs_retry_endio_nocsum(struct bio *bio) |
7826 | { | 7865 | { |
7827 | struct btrfs_retry_complete *done = bio->bi_private; | 7866 | struct btrfs_retry_complete *done = bio->bi_private; |
7867 | struct inode *inode; | ||
7828 | struct bio_vec *bvec; | 7868 | struct bio_vec *bvec; |
7829 | int i; | 7869 | int i; |
7830 | 7870 | ||
7831 | if (bio->bi_error) | 7871 | if (bio->bi_error) |
7832 | goto end; | 7872 | goto end; |
7833 | 7873 | ||
7874 | ASSERT(bio->bi_vcnt == 1); | ||
7875 | inode = bio->bi_io_vec->bv_page->mapping->host; | ||
7876 | ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize); | ||
7877 | |||
7834 | done->uptodate = 1; | 7878 | done->uptodate = 1; |
7835 | bio_for_each_segment_all(bvec, bio, i) | 7879 | bio_for_each_segment_all(bvec, bio, i) |
7836 | clean_io_failure(done->inode, done->start, bvec->bv_page, 0); | 7880 | clean_io_failure(done->inode, done->start, bvec->bv_page, 0); |
@@ -7842,25 +7886,35 @@ end: | |||
7842 | static int __btrfs_correct_data_nocsum(struct inode *inode, | 7886 | static int __btrfs_correct_data_nocsum(struct inode *inode, |
7843 | struct btrfs_io_bio *io_bio) | 7887 | struct btrfs_io_bio *io_bio) |
7844 | { | 7888 | { |
7889 | struct btrfs_fs_info *fs_info; | ||
7845 | struct bio_vec *bvec; | 7890 | struct bio_vec *bvec; |
7846 | struct btrfs_retry_complete done; | 7891 | struct btrfs_retry_complete done; |
7847 | u64 start; | 7892 | u64 start; |
7893 | unsigned int pgoff; | ||
7894 | u32 sectorsize; | ||
7895 | int nr_sectors; | ||
7848 | int i; | 7896 | int i; |
7849 | int ret; | 7897 | int ret; |
7850 | 7898 | ||
7899 | fs_info = BTRFS_I(inode)->root->fs_info; | ||
7900 | sectorsize = BTRFS_I(inode)->root->sectorsize; | ||
7901 | |||
7851 | start = io_bio->logical; | 7902 | start = io_bio->logical; |
7852 | done.inode = inode; | 7903 | done.inode = inode; |
7853 | 7904 | ||
7854 | bio_for_each_segment_all(bvec, &io_bio->bio, i) { | 7905 | bio_for_each_segment_all(bvec, &io_bio->bio, i) { |
7855 | try_again: | 7906 | nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len); |
7907 | pgoff = bvec->bv_offset; | ||
7908 | |||
7909 | next_block_or_try_again: | ||
7856 | done.uptodate = 0; | 7910 | done.uptodate = 0; |
7857 | done.start = start; | 7911 | done.start = start; |
7858 | init_completion(&done.done); | 7912 | init_completion(&done.done); |
7859 | 7913 | ||
7860 | ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, | 7914 | ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, |
7861 | start + bvec->bv_len - 1, | 7915 | pgoff, start, start + sectorsize - 1, |
7862 | io_bio->mirror_num, | 7916 | io_bio->mirror_num, |
7863 | btrfs_retry_endio_nocsum, &done); | 7917 | btrfs_retry_endio_nocsum, &done); |
7864 | if (ret) | 7918 | if (ret) |
7865 | return ret; | 7919 | return ret; |
7866 | 7920 | ||
@@ -7868,10 +7922,15 @@ try_again: | |||
7868 | 7922 | ||
7869 | if (!done.uptodate) { | 7923 | if (!done.uptodate) { |
7870 | /* We might have another mirror, so try again */ | 7924 | /* We might have another mirror, so try again */ |
7871 | goto try_again; | 7925 | goto next_block_or_try_again; |
7872 | } | 7926 | } |
7873 | 7927 | ||
7874 | start += bvec->bv_len; | 7928 | start += sectorsize; |
7929 | |||
7930 | if (nr_sectors--) { | ||
7931 | pgoff += sectorsize; | ||
7932 | goto next_block_or_try_again; | ||
7933 | } | ||
7875 | } | 7934 | } |
7876 | 7935 | ||
7877 | return 0; | 7936 | return 0; |
@@ -7881,7 +7940,9 @@ static void btrfs_retry_endio(struct bio *bio) | |||
7881 | { | 7940 | { |
7882 | struct btrfs_retry_complete *done = bio->bi_private; | 7941 | struct btrfs_retry_complete *done = bio->bi_private; |
7883 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | 7942 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); |
7943 | struct inode *inode; | ||
7884 | struct bio_vec *bvec; | 7944 | struct bio_vec *bvec; |
7945 | u64 start; | ||
7885 | int uptodate; | 7946 | int uptodate; |
7886 | int ret; | 7947 | int ret; |
7887 | int i; | 7948 | int i; |
@@ -7890,13 +7951,20 @@ static void btrfs_retry_endio(struct bio *bio) | |||
7890 | goto end; | 7951 | goto end; |
7891 | 7952 | ||
7892 | uptodate = 1; | 7953 | uptodate = 1; |
7954 | |||
7955 | start = done->start; | ||
7956 | |||
7957 | ASSERT(bio->bi_vcnt == 1); | ||
7958 | inode = bio->bi_io_vec->bv_page->mapping->host; | ||
7959 | ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize); | ||
7960 | |||
7893 | bio_for_each_segment_all(bvec, bio, i) { | 7961 | bio_for_each_segment_all(bvec, bio, i) { |
7894 | ret = __readpage_endio_check(done->inode, io_bio, i, | 7962 | ret = __readpage_endio_check(done->inode, io_bio, i, |
7895 | bvec->bv_page, 0, | 7963 | bvec->bv_page, bvec->bv_offset, |
7896 | done->start, bvec->bv_len); | 7964 | done->start, bvec->bv_len); |
7897 | if (!ret) | 7965 | if (!ret) |
7898 | clean_io_failure(done->inode, done->start, | 7966 | clean_io_failure(done->inode, done->start, |
7899 | bvec->bv_page, 0); | 7967 | bvec->bv_page, bvec->bv_offset); |
7900 | else | 7968 | else |
7901 | uptodate = 0; | 7969 | uptodate = 0; |
7902 | } | 7970 | } |
@@ -7910,20 +7978,34 @@ end: | |||
7910 | static int __btrfs_subio_endio_read(struct inode *inode, | 7978 | static int __btrfs_subio_endio_read(struct inode *inode, |
7911 | struct btrfs_io_bio *io_bio, int err) | 7979 | struct btrfs_io_bio *io_bio, int err) |
7912 | { | 7980 | { |
7981 | struct btrfs_fs_info *fs_info; | ||
7913 | struct bio_vec *bvec; | 7982 | struct bio_vec *bvec; |
7914 | struct btrfs_retry_complete done; | 7983 | struct btrfs_retry_complete done; |
7915 | u64 start; | 7984 | u64 start; |
7916 | u64 offset = 0; | 7985 | u64 offset = 0; |
7986 | u32 sectorsize; | ||
7987 | int nr_sectors; | ||
7988 | unsigned int pgoff; | ||
7989 | int csum_pos; | ||
7917 | int i; | 7990 | int i; |
7918 | int ret; | 7991 | int ret; |
7919 | 7992 | ||
7993 | fs_info = BTRFS_I(inode)->root->fs_info; | ||
7994 | sectorsize = BTRFS_I(inode)->root->sectorsize; | ||
7995 | |||
7920 | err = 0; | 7996 | err = 0; |
7921 | start = io_bio->logical; | 7997 | start = io_bio->logical; |
7922 | done.inode = inode; | 7998 | done.inode = inode; |
7923 | 7999 | ||
7924 | bio_for_each_segment_all(bvec, &io_bio->bio, i) { | 8000 | bio_for_each_segment_all(bvec, &io_bio->bio, i) { |
7925 | ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, | 8001 | nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len); |
7926 | 0, start, bvec->bv_len); | 8002 | |
8003 | pgoff = bvec->bv_offset; | ||
8004 | next_block: | ||
8005 | csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset); | ||
8006 | ret = __readpage_endio_check(inode, io_bio, csum_pos, | ||
8007 | bvec->bv_page, pgoff, start, | ||
8008 | sectorsize); | ||
7927 | if (likely(!ret)) | 8009 | if (likely(!ret)) |
7928 | goto next; | 8010 | goto next; |
7929 | try_again: | 8011 | try_again: |
@@ -7931,10 +8013,10 @@ try_again: | |||
7931 | done.start = start; | 8013 | done.start = start; |
7932 | init_completion(&done.done); | 8014 | init_completion(&done.done); |
7933 | 8015 | ||
7934 | ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, | 8016 | ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, |
7935 | start + bvec->bv_len - 1, | 8017 | pgoff, start, start + sectorsize - 1, |
7936 | io_bio->mirror_num, | 8018 | io_bio->mirror_num, |
7937 | btrfs_retry_endio, &done); | 8019 | btrfs_retry_endio, &done); |
7938 | if (ret) { | 8020 | if (ret) { |
7939 | err = ret; | 8021 | err = ret; |
7940 | goto next; | 8022 | goto next; |
@@ -7947,8 +8029,15 @@ try_again: | |||
7947 | goto try_again; | 8029 | goto try_again; |
7948 | } | 8030 | } |
7949 | next: | 8031 | next: |
7950 | offset += bvec->bv_len; | 8032 | offset += sectorsize; |
7951 | start += bvec->bv_len; | 8033 | start += sectorsize; |
8034 | |||
8035 | ASSERT(nr_sectors); | ||
8036 | |||
8037 | if (--nr_sectors) { | ||
8038 | pgoff += sectorsize; | ||
8039 | goto next_block; | ||
8040 | } | ||
7952 | } | 8041 | } |
7953 | 8042 | ||
7954 | return err; | 8043 | return err; |
@@ -8202,9 +8291,11 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
8202 | u64 file_offset = dip->logical_offset; | 8291 | u64 file_offset = dip->logical_offset; |
8203 | u64 submit_len = 0; | 8292 | u64 submit_len = 0; |
8204 | u64 map_length; | 8293 | u64 map_length; |
8205 | int nr_pages = 0; | 8294 | u32 blocksize = root->sectorsize; |
8206 | int ret; | ||
8207 | int async_submit = 0; | 8295 | int async_submit = 0; |
8296 | int nr_sectors; | ||
8297 | int ret; | ||
8298 | int i; | ||
8208 | 8299 | ||
8209 | map_length = orig_bio->bi_iter.bi_size; | 8300 | map_length = orig_bio->bi_iter.bi_size; |
8210 | ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, | 8301 | ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, |
@@ -8234,9 +8325,12 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
8234 | atomic_inc(&dip->pending_bios); | 8325 | atomic_inc(&dip->pending_bios); |
8235 | 8326 | ||
8236 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { | 8327 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { |
8237 | if (map_length < submit_len + bvec->bv_len || | 8328 | nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, bvec->bv_len); |
8238 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, | 8329 | i = 0; |
8239 | bvec->bv_offset) < bvec->bv_len) { | 8330 | next_block: |
8331 | if (unlikely(map_length < submit_len + blocksize || | ||
8332 | bio_add_page(bio, bvec->bv_page, blocksize, | ||
8333 | bvec->bv_offset + (i * blocksize)) < blocksize)) { | ||
8240 | /* | 8334 | /* |
8241 | * inc the count before we submit the bio so | 8335 | * inc the count before we submit the bio so |
8242 | * we know the end IO handler won't happen before | 8336 | * we know the end IO handler won't happen before |
@@ -8257,7 +8351,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
8257 | file_offset += submit_len; | 8351 | file_offset += submit_len; |
8258 | 8352 | ||
8259 | submit_len = 0; | 8353 | submit_len = 0; |
8260 | nr_pages = 0; | ||
8261 | 8354 | ||
8262 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, | 8355 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, |
8263 | start_sector, GFP_NOFS); | 8356 | start_sector, GFP_NOFS); |
@@ -8275,9 +8368,14 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
8275 | bio_put(bio); | 8368 | bio_put(bio); |
8276 | goto out_err; | 8369 | goto out_err; |
8277 | } | 8370 | } |
8371 | |||
8372 | goto next_block; | ||
8278 | } else { | 8373 | } else { |
8279 | submit_len += bvec->bv_len; | 8374 | submit_len += blocksize; |
8280 | nr_pages++; | 8375 | if (--nr_sectors) { |
8376 | i++; | ||
8377 | goto next_block; | ||
8378 | } | ||
8281 | bvec++; | 8379 | bvec++; |
8282 | } | 8380 | } |
8283 | } | 8381 | } |
@@ -8642,6 +8740,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, | |||
8642 | struct extent_state *cached_state = NULL; | 8740 | struct extent_state *cached_state = NULL; |
8643 | u64 page_start = page_offset(page); | 8741 | u64 page_start = page_offset(page); |
8644 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | 8742 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; |
8743 | u64 start; | ||
8744 | u64 end; | ||
8645 | int inode_evicting = inode->i_state & I_FREEING; | 8745 | int inode_evicting = inode->i_state & I_FREEING; |
8646 | 8746 | ||
8647 | /* | 8747 | /* |
@@ -8661,14 +8761,18 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, | |||
8661 | 8761 | ||
8662 | if (!inode_evicting) | 8762 | if (!inode_evicting) |
8663 | lock_extent_bits(tree, page_start, page_end, &cached_state); | 8763 | lock_extent_bits(tree, page_start, page_end, &cached_state); |
8664 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 8764 | again: |
8765 | start = page_start; | ||
8766 | ordered = btrfs_lookup_ordered_range(inode, start, | ||
8767 | page_end - start + 1); | ||
8665 | if (ordered) { | 8768 | if (ordered) { |
8769 | end = min(page_end, ordered->file_offset + ordered->len - 1); | ||
8666 | /* | 8770 | /* |
8667 | * IO on this page will never be started, so we need | 8771 | * IO on this page will never be started, so we need |
8668 | * to account for any ordered extents now | 8772 | * to account for any ordered extents now |
8669 | */ | 8773 | */ |
8670 | if (!inode_evicting) | 8774 | if (!inode_evicting) |
8671 | clear_extent_bit(tree, page_start, page_end, | 8775 | clear_extent_bit(tree, start, end, |
8672 | EXTENT_DIRTY | EXTENT_DELALLOC | | 8776 | EXTENT_DIRTY | EXTENT_DELALLOC | |
8673 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | | 8777 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | |
8674 | EXTENT_DEFRAG, 1, 0, &cached_state, | 8778 | EXTENT_DEFRAG, 1, 0, &cached_state, |
@@ -8685,22 +8789,26 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, | |||
8685 | 8789 | ||
8686 | spin_lock_irq(&tree->lock); | 8790 | spin_lock_irq(&tree->lock); |
8687 | set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags); | 8791 | set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags); |
8688 | new_len = page_start - ordered->file_offset; | 8792 | new_len = start - ordered->file_offset; |
8689 | if (new_len < ordered->truncated_len) | 8793 | if (new_len < ordered->truncated_len) |
8690 | ordered->truncated_len = new_len; | 8794 | ordered->truncated_len = new_len; |
8691 | spin_unlock_irq(&tree->lock); | 8795 | spin_unlock_irq(&tree->lock); |
8692 | 8796 | ||
8693 | if (btrfs_dec_test_ordered_pending(inode, &ordered, | 8797 | if (btrfs_dec_test_ordered_pending(inode, &ordered, |
8694 | page_start, | 8798 | start, |
8695 | PAGE_CACHE_SIZE, 1)) | 8799 | end - start + 1, 1)) |
8696 | btrfs_finish_ordered_io(ordered); | 8800 | btrfs_finish_ordered_io(ordered); |
8697 | } | 8801 | } |
8698 | btrfs_put_ordered_extent(ordered); | 8802 | btrfs_put_ordered_extent(ordered); |
8699 | if (!inode_evicting) { | 8803 | if (!inode_evicting) { |
8700 | cached_state = NULL; | 8804 | cached_state = NULL; |
8701 | lock_extent_bits(tree, page_start, page_end, | 8805 | lock_extent_bits(tree, start, end, |
8702 | &cached_state); | 8806 | &cached_state); |
8703 | } | 8807 | } |
8808 | |||
8809 | start = end + 1; | ||
8810 | if (start < page_end) | ||
8811 | goto again; | ||
8704 | } | 8812 | } |
8705 | 8813 | ||
8706 | /* | 8814 | /* |
@@ -8761,15 +8869,28 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
8761 | loff_t size; | 8869 | loff_t size; |
8762 | int ret; | 8870 | int ret; |
8763 | int reserved = 0; | 8871 | int reserved = 0; |
8872 | u64 reserved_space; | ||
8764 | u64 page_start; | 8873 | u64 page_start; |
8765 | u64 page_end; | 8874 | u64 page_end; |
8875 | u64 end; | ||
8876 | |||
8877 | reserved_space = PAGE_CACHE_SIZE; | ||
8766 | 8878 | ||
8767 | sb_start_pagefault(inode->i_sb); | 8879 | sb_start_pagefault(inode->i_sb); |
8768 | page_start = page_offset(page); | 8880 | page_start = page_offset(page); |
8769 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 8881 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
8882 | end = page_end; | ||
8770 | 8883 | ||
8884 | /* | ||
8885 | * Reserving delalloc space after obtaining the page lock can lead to | ||
8886 | * deadlock. For example, if a dirty page is locked by this function | ||
8887 | * and the call to btrfs_delalloc_reserve_space() ends up triggering | ||
8888 | * dirty page write out, then the btrfs_writepage() function could | ||
8889 | * end up waiting indefinitely to get a lock on the page currently | ||
8890 | * being processed by btrfs_page_mkwrite() function. | ||
8891 | */ | ||
8771 | ret = btrfs_delalloc_reserve_space(inode, page_start, | 8892 | ret = btrfs_delalloc_reserve_space(inode, page_start, |
8772 | PAGE_CACHE_SIZE); | 8893 | reserved_space); |
8773 | if (!ret) { | 8894 | if (!ret) { |
8774 | ret = file_update_time(vma->vm_file); | 8895 | ret = file_update_time(vma->vm_file); |
8775 | reserved = 1; | 8896 | reserved = 1; |
@@ -8803,7 +8924,7 @@ again: | |||
8803 | * we can't set the delalloc bits if there are pending ordered | 8924 | * we can't set the delalloc bits if there are pending ordered |
8804 | * extents. Drop our locks and wait for them to finish | 8925 | * extents. Drop our locks and wait for them to finish |
8805 | */ | 8926 | */ |
8806 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 8927 | ordered = btrfs_lookup_ordered_range(inode, page_start, page_end); |
8807 | if (ordered) { | 8928 | if (ordered) { |
8808 | unlock_extent_cached(io_tree, page_start, page_end, | 8929 | unlock_extent_cached(io_tree, page_start, page_end, |
8809 | &cached_state, GFP_NOFS); | 8930 | &cached_state, GFP_NOFS); |
@@ -8813,6 +8934,18 @@ again: | |||
8813 | goto again; | 8934 | goto again; |
8814 | } | 8935 | } |
8815 | 8936 | ||
8937 | if (page->index == ((size - 1) >> PAGE_CACHE_SHIFT)) { | ||
8938 | reserved_space = round_up(size - page_start, root->sectorsize); | ||
8939 | if (reserved_space < PAGE_CACHE_SIZE) { | ||
8940 | end = page_start + reserved_space - 1; | ||
8941 | spin_lock(&BTRFS_I(inode)->lock); | ||
8942 | BTRFS_I(inode)->outstanding_extents++; | ||
8943 | spin_unlock(&BTRFS_I(inode)->lock); | ||
8944 | btrfs_delalloc_release_space(inode, page_start, | ||
8945 | PAGE_CACHE_SIZE - reserved_space); | ||
8946 | } | ||
8947 | } | ||
8948 | |||
8816 | /* | 8949 | /* |
8817 | * XXX - page_mkwrite gets called every time the page is dirtied, even | 8950 | * XXX - page_mkwrite gets called every time the page is dirtied, even |
8818 | * if it was already dirty, so for space accounting reasons we need to | 8951 | * if it was already dirty, so for space accounting reasons we need to |
@@ -8820,12 +8953,12 @@ again: | |||
8820 | * is probably a better way to do this, but for now keep consistent with | 8953 | * is probably a better way to do this, but for now keep consistent with |
8821 | * prepare_pages in the normal write path. | 8954 | * prepare_pages in the normal write path. |
8822 | */ | 8955 | */ |
8823 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | 8956 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end, |
8824 | EXTENT_DIRTY | EXTENT_DELALLOC | | 8957 | EXTENT_DIRTY | EXTENT_DELALLOC | |
8825 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, | 8958 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, |
8826 | 0, 0, &cached_state, GFP_NOFS); | 8959 | 0, 0, &cached_state, GFP_NOFS); |
8827 | 8960 | ||
8828 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, | 8961 | ret = btrfs_set_extent_delalloc(inode, page_start, end, |
8829 | &cached_state); | 8962 | &cached_state); |
8830 | if (ret) { | 8963 | if (ret) { |
8831 | unlock_extent_cached(io_tree, page_start, page_end, | 8964 | unlock_extent_cached(io_tree, page_start, page_end, |
@@ -8864,7 +8997,7 @@ out_unlock: | |||
8864 | } | 8997 | } |
8865 | unlock_page(page); | 8998 | unlock_page(page); |
8866 | out: | 8999 | out: |
8867 | btrfs_delalloc_release_space(inode, page_start, PAGE_CACHE_SIZE); | 9000 | btrfs_delalloc_release_space(inode, page_start, reserved_space); |
8868 | out_noreserve: | 9001 | out_noreserve: |
8869 | sb_end_pagefault(inode->i_sb); | 9002 | sb_end_pagefault(inode->i_sb); |
8870 | return ret; | 9003 | return ret; |
@@ -9190,16 +9323,11 @@ void btrfs_destroy_cachep(void) | |||
9190 | * destroy cache. | 9323 | * destroy cache. |
9191 | */ | 9324 | */ |
9192 | rcu_barrier(); | 9325 | rcu_barrier(); |
9193 | if (btrfs_inode_cachep) | 9326 | kmem_cache_destroy(btrfs_inode_cachep); |
9194 | kmem_cache_destroy(btrfs_inode_cachep); | 9327 | kmem_cache_destroy(btrfs_trans_handle_cachep); |
9195 | if (btrfs_trans_handle_cachep) | 9328 | kmem_cache_destroy(btrfs_transaction_cachep); |
9196 | kmem_cache_destroy(btrfs_trans_handle_cachep); | 9329 | kmem_cache_destroy(btrfs_path_cachep); |
9197 | if (btrfs_transaction_cachep) | 9330 | kmem_cache_destroy(btrfs_free_space_cachep); |
9198 | kmem_cache_destroy(btrfs_transaction_cachep); | ||
9199 | if (btrfs_path_cachep) | ||
9200 | kmem_cache_destroy(btrfs_path_cachep); | ||
9201 | if (btrfs_free_space_cachep) | ||
9202 | kmem_cache_destroy(btrfs_free_space_cachep); | ||
9203 | } | 9331 | } |
9204 | 9332 | ||
9205 | int btrfs_init_cachep(void) | 9333 | int btrfs_init_cachep(void) |
@@ -9250,7 +9378,6 @@ static int btrfs_getattr(struct vfsmount *mnt, | |||
9250 | 9378 | ||
9251 | generic_fillattr(inode, stat); | 9379 | generic_fillattr(inode, stat); |
9252 | stat->dev = BTRFS_I(inode)->root->anon_dev; | 9380 | stat->dev = BTRFS_I(inode)->root->anon_dev; |
9253 | stat->blksize = PAGE_CACHE_SIZE; | ||
9254 | 9381 | ||
9255 | spin_lock(&BTRFS_I(inode)->lock); | 9382 | spin_lock(&BTRFS_I(inode)->lock); |
9256 | delalloc_bytes = BTRFS_I(inode)->delalloc_bytes; | 9383 | delalloc_bytes = BTRFS_I(inode)->delalloc_bytes; |
@@ -9268,7 +9395,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
9268 | struct btrfs_root *dest = BTRFS_I(new_dir)->root; | 9395 | struct btrfs_root *dest = BTRFS_I(new_dir)->root; |
9269 | struct inode *new_inode = d_inode(new_dentry); | 9396 | struct inode *new_inode = d_inode(new_dentry); |
9270 | struct inode *old_inode = d_inode(old_dentry); | 9397 | struct inode *old_inode = d_inode(old_dentry); |
9271 | struct timespec ctime = CURRENT_TIME; | ||
9272 | u64 index = 0; | 9398 | u64 index = 0; |
9273 | u64 root_objectid; | 9399 | u64 root_objectid; |
9274 | int ret; | 9400 | int ret; |
@@ -9365,9 +9491,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
9365 | inode_inc_iversion(old_dir); | 9491 | inode_inc_iversion(old_dir); |
9366 | inode_inc_iversion(new_dir); | 9492 | inode_inc_iversion(new_dir); |
9367 | inode_inc_iversion(old_inode); | 9493 | inode_inc_iversion(old_inode); |
9368 | old_dir->i_ctime = old_dir->i_mtime = ctime; | 9494 | old_dir->i_ctime = old_dir->i_mtime = |
9369 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 9495 | new_dir->i_ctime = new_dir->i_mtime = |
9370 | old_inode->i_ctime = ctime; | 9496 | old_inode->i_ctime = current_fs_time(old_dir->i_sb); |
9371 | 9497 | ||
9372 | if (old_dentry->d_parent != new_dentry->d_parent) | 9498 | if (old_dentry->d_parent != new_dentry->d_parent) |
9373 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); | 9499 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); |
@@ -9392,7 +9518,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
9392 | 9518 | ||
9393 | if (new_inode) { | 9519 | if (new_inode) { |
9394 | inode_inc_iversion(new_inode); | 9520 | inode_inc_iversion(new_inode); |
9395 | new_inode->i_ctime = CURRENT_TIME; | 9521 | new_inode->i_ctime = current_fs_time(new_inode->i_sb); |
9396 | if (unlikely(btrfs_ino(new_inode) == | 9522 | if (unlikely(btrfs_ino(new_inode) == |
9397 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { | 9523 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { |
9398 | root_objectid = BTRFS_I(new_inode)->location.objectid; | 9524 | root_objectid = BTRFS_I(new_inode)->location.objectid; |
@@ -9870,7 +9996,7 @@ next: | |||
9870 | *alloc_hint = ins.objectid + ins.offset; | 9996 | *alloc_hint = ins.objectid + ins.offset; |
9871 | 9997 | ||
9872 | inode_inc_iversion(inode); | 9998 | inode_inc_iversion(inode); |
9873 | inode->i_ctime = CURRENT_TIME; | 9999 | inode->i_ctime = current_fs_time(inode->i_sb); |
9874 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; | 10000 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; |
9875 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 10001 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
9876 | (actual_len > inode->i_size) && | 10002 | (actual_len > inode->i_size) && |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 48aee9846329..053e677839fe 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -59,6 +59,8 @@ | |||
59 | #include "props.h" | 59 | #include "props.h" |
60 | #include "sysfs.h" | 60 | #include "sysfs.h" |
61 | #include "qgroup.h" | 61 | #include "qgroup.h" |
62 | #include "tree-log.h" | ||
63 | #include "compression.h" | ||
62 | 64 | ||
63 | #ifdef CONFIG_64BIT | 65 | #ifdef CONFIG_64BIT |
64 | /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI | 66 | /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI |
@@ -347,7 +349,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
347 | 349 | ||
348 | btrfs_update_iflags(inode); | 350 | btrfs_update_iflags(inode); |
349 | inode_inc_iversion(inode); | 351 | inode_inc_iversion(inode); |
350 | inode->i_ctime = CURRENT_TIME; | 352 | inode->i_ctime = current_fs_time(inode->i_sb); |
351 | ret = btrfs_update_inode(trans, root, inode); | 353 | ret = btrfs_update_inode(trans, root, inode); |
352 | 354 | ||
353 | btrfs_end_transaction(trans, root); | 355 | btrfs_end_transaction(trans, root); |
@@ -443,7 +445,7 @@ static noinline int create_subvol(struct inode *dir, | |||
443 | struct btrfs_root *root = BTRFS_I(dir)->root; | 445 | struct btrfs_root *root = BTRFS_I(dir)->root; |
444 | struct btrfs_root *new_root; | 446 | struct btrfs_root *new_root; |
445 | struct btrfs_block_rsv block_rsv; | 447 | struct btrfs_block_rsv block_rsv; |
446 | struct timespec cur_time = CURRENT_TIME; | 448 | struct timespec cur_time = current_fs_time(dir->i_sb); |
447 | struct inode *inode; | 449 | struct inode *inode; |
448 | int ret; | 450 | int ret; |
449 | int err; | 451 | int err; |
@@ -844,10 +846,6 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
844 | if (IS_ERR(dentry)) | 846 | if (IS_ERR(dentry)) |
845 | goto out_unlock; | 847 | goto out_unlock; |
846 | 848 | ||
847 | error = -EEXIST; | ||
848 | if (d_really_is_positive(dentry)) | ||
849 | goto out_dput; | ||
850 | |||
851 | error = btrfs_may_create(dir, dentry); | 849 | error = btrfs_may_create(dir, dentry); |
852 | if (error) | 850 | if (error) |
853 | goto out_dput; | 851 | goto out_dput; |
@@ -2097,8 +2095,6 @@ static noinline int search_ioctl(struct inode *inode, | |||
2097 | key.offset = (u64)-1; | 2095 | key.offset = (u64)-1; |
2098 | root = btrfs_read_fs_root_no_name(info, &key); | 2096 | root = btrfs_read_fs_root_no_name(info, &key); |
2099 | if (IS_ERR(root)) { | 2097 | if (IS_ERR(root)) { |
2100 | btrfs_err(info, "could not find root %llu", | ||
2101 | sk->tree_id); | ||
2102 | btrfs_free_path(path); | 2098 | btrfs_free_path(path); |
2103 | return -ENOENT; | 2099 | return -ENOENT; |
2104 | } | 2100 | } |
@@ -2476,6 +2472,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
2476 | trans->block_rsv = &block_rsv; | 2472 | trans->block_rsv = &block_rsv; |
2477 | trans->bytes_reserved = block_rsv.size; | 2473 | trans->bytes_reserved = block_rsv.size; |
2478 | 2474 | ||
2475 | btrfs_record_snapshot_destroy(trans, dir); | ||
2476 | |||
2479 | ret = btrfs_unlink_subvol(trans, root, dir, | 2477 | ret = btrfs_unlink_subvol(trans, root, dir, |
2480 | dest->root_key.objectid, | 2478 | dest->root_key.objectid, |
2481 | dentry->d_name.name, | 2479 | dentry->d_name.name, |
@@ -2960,8 +2958,8 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, | |||
2960 | * of the array is bounded by len, which is in turn bounded by | 2958 | * of the array is bounded by len, which is in turn bounded by |
2961 | * BTRFS_MAX_DEDUPE_LEN. | 2959 | * BTRFS_MAX_DEDUPE_LEN. |
2962 | */ | 2960 | */ |
2963 | src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); | 2961 | src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); |
2964 | dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); | 2962 | dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); |
2965 | if (!src_pgarr || !dst_pgarr) { | 2963 | if (!src_pgarr || !dst_pgarr) { |
2966 | kfree(src_pgarr); | 2964 | kfree(src_pgarr); |
2967 | kfree(dst_pgarr); | 2965 | kfree(dst_pgarr); |
@@ -3068,6 +3066,9 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, | |||
3068 | ret = extent_same_check_offsets(src, loff, &len, olen); | 3066 | ret = extent_same_check_offsets(src, loff, &len, olen); |
3069 | if (ret) | 3067 | if (ret) |
3070 | goto out_unlock; | 3068 | goto out_unlock; |
3069 | ret = extent_same_check_offsets(src, dst_loff, &len, olen); | ||
3070 | if (ret) | ||
3071 | goto out_unlock; | ||
3071 | 3072 | ||
3072 | /* | 3073 | /* |
3073 | * Single inode case wants the same checks, except we | 3074 | * Single inode case wants the same checks, except we |
@@ -3217,7 +3218,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans, | |||
3217 | 3218 | ||
3218 | inode_inc_iversion(inode); | 3219 | inode_inc_iversion(inode); |
3219 | if (!no_time_update) | 3220 | if (!no_time_update) |
3220 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 3221 | inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); |
3221 | /* | 3222 | /* |
3222 | * We round up to the block size at eof when determining which | 3223 | * We round up to the block size at eof when determining which |
3223 | * extents to clone above, but shouldn't round up the file size. | 3224 | * extents to clone above, but shouldn't round up the file size. |
@@ -3889,8 +3890,9 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, | |||
3889 | * Truncate page cache pages so that future reads will see the cloned | 3890 | * Truncate page cache pages so that future reads will see the cloned |
3890 | * data immediately and not the previous data. | 3891 | * data immediately and not the previous data. |
3891 | */ | 3892 | */ |
3892 | truncate_inode_pages_range(&inode->i_data, destoff, | 3893 | truncate_inode_pages_range(&inode->i_data, |
3893 | PAGE_CACHE_ALIGN(destoff + len) - 1); | 3894 | round_down(destoff, PAGE_CACHE_SIZE), |
3895 | round_up(destoff + len, PAGE_CACHE_SIZE) - 1); | ||
3894 | out_unlock: | 3896 | out_unlock: |
3895 | if (!same_inode) | 3897 | if (!same_inode) |
3896 | btrfs_double_inode_unlock(src, inode); | 3898 | btrfs_double_inode_unlock(src, inode); |
@@ -5031,7 +5033,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, | |||
5031 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5033 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5032 | struct btrfs_root_item *root_item = &root->root_item; | 5034 | struct btrfs_root_item *root_item = &root->root_item; |
5033 | struct btrfs_trans_handle *trans; | 5035 | struct btrfs_trans_handle *trans; |
5034 | struct timespec ct = CURRENT_TIME; | 5036 | struct timespec ct = current_fs_time(inode->i_sb); |
5035 | int ret = 0; | 5037 | int ret = 0; |
5036 | int received_uuid_changed; | 5038 | int received_uuid_changed; |
5037 | 5039 | ||
@@ -5262,8 +5264,7 @@ out_unlock: | |||
5262 | .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \ | 5264 | .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \ |
5263 | .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix } | 5265 | .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix } |
5264 | 5266 | ||
5265 | static int btrfs_ioctl_get_supported_features(struct file *file, | 5267 | int btrfs_ioctl_get_supported_features(void __user *arg) |
5266 | void __user *arg) | ||
5267 | { | 5268 | { |
5268 | static const struct btrfs_ioctl_feature_flags features[3] = { | 5269 | static const struct btrfs_ioctl_feature_flags features[3] = { |
5269 | INIT_FEATURE_FLAGS(SUPP), | 5270 | INIT_FEATURE_FLAGS(SUPP), |
@@ -5542,7 +5543,7 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
5542 | case BTRFS_IOC_SET_FSLABEL: | 5543 | case BTRFS_IOC_SET_FSLABEL: |
5543 | return btrfs_ioctl_set_fslabel(file, argp); | 5544 | return btrfs_ioctl_set_fslabel(file, argp); |
5544 | case BTRFS_IOC_GET_SUPPORTED_FEATURES: | 5545 | case BTRFS_IOC_GET_SUPPORTED_FEATURES: |
5545 | return btrfs_ioctl_get_supported_features(file, argp); | 5546 | return btrfs_ioctl_get_supported_features(argp); |
5546 | case BTRFS_IOC_GET_FEATURES: | 5547 | case BTRFS_IOC_GET_FEATURES: |
5547 | return btrfs_ioctl_get_features(file, argp); | 5548 | return btrfs_ioctl_get_features(file, argp); |
5548 | case BTRFS_IOC_SET_FEATURES: | 5549 | case BTRFS_IOC_SET_FEATURES: |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 8c27292ea9ea..0de7da5a610d 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include "btrfs_inode.h" | 25 | #include "btrfs_inode.h" |
26 | #include "extent_io.h" | 26 | #include "extent_io.h" |
27 | #include "disk-io.h" | 27 | #include "disk-io.h" |
28 | #include "compression.h" | ||
28 | 29 | ||
29 | static struct kmem_cache *btrfs_ordered_extent_cache; | 30 | static struct kmem_cache *btrfs_ordered_extent_cache; |
30 | 31 | ||
@@ -1009,7 +1010,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
1009 | for (; node; node = rb_prev(node)) { | 1010 | for (; node; node = rb_prev(node)) { |
1010 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 1011 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
1011 | 1012 | ||
1012 | /* We treat this entry as if it doesnt exist */ | 1013 | /* We treat this entry as if it doesn't exist */ |
1013 | if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) | 1014 | if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) |
1014 | continue; | 1015 | continue; |
1015 | if (test->file_offset + test->len <= disk_i_size) | 1016 | if (test->file_offset + test->len <= disk_i_size) |
@@ -1114,6 +1115,5 @@ int __init ordered_data_init(void) | |||
1114 | 1115 | ||
1115 | void ordered_data_exit(void) | 1116 | void ordered_data_exit(void) |
1116 | { | 1117 | { |
1117 | if (btrfs_ordered_extent_cache) | 1118 | kmem_cache_destroy(btrfs_ordered_extent_cache); |
1118 | kmem_cache_destroy(btrfs_ordered_extent_cache); | ||
1119 | } | 1119 | } |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 647ab12fdf5d..147dc6ca5de1 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -295,8 +295,27 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
295 | btrfs_dev_extent_chunk_offset(l, dev_extent), | 295 | btrfs_dev_extent_chunk_offset(l, dev_extent), |
296 | btrfs_dev_extent_length(l, dev_extent)); | 296 | btrfs_dev_extent_length(l, dev_extent)); |
297 | break; | 297 | break; |
298 | case BTRFS_DEV_STATS_KEY: | 298 | case BTRFS_PERSISTENT_ITEM_KEY: |
299 | printk(KERN_INFO "\t\tdevice stats\n"); | 299 | printk(KERN_INFO "\t\tpersistent item objectid %llu offset %llu\n", |
300 | key.objectid, key.offset); | ||
301 | switch (key.objectid) { | ||
302 | case BTRFS_DEV_STATS_OBJECTID: | ||
303 | printk(KERN_INFO "\t\tdevice stats\n"); | ||
304 | break; | ||
305 | default: | ||
306 | printk(KERN_INFO "\t\tunknown persistent item\n"); | ||
307 | } | ||
308 | break; | ||
309 | case BTRFS_TEMPORARY_ITEM_KEY: | ||
310 | printk(KERN_INFO "\t\ttemporary item objectid %llu offset %llu\n", | ||
311 | key.objectid, key.offset); | ||
312 | switch (key.objectid) { | ||
313 | case BTRFS_BALANCE_OBJECTID: | ||
314 | printk(KERN_INFO "\t\tbalance status\n"); | ||
315 | break; | ||
316 | default: | ||
317 | printk(KERN_INFO "\t\tunknown temporary item\n"); | ||
318 | } | ||
300 | break; | 319 | break; |
301 | case BTRFS_DEV_REPLACE_KEY: | 320 | case BTRFS_DEV_REPLACE_KEY: |
302 | printk(KERN_INFO "\t\tdev replace\n"); | 321 | printk(KERN_INFO "\t\tdev replace\n"); |
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index f9e60231f685..36992128c746 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "hash.h" | 22 | #include "hash.h" |
23 | #include "transaction.h" | 23 | #include "transaction.h" |
24 | #include "xattr.h" | 24 | #include "xattr.h" |
25 | #include "compression.h" | ||
25 | 26 | ||
26 | #define BTRFS_PROP_HANDLERS_HT_BITS 8 | 27 | #define BTRFS_PROP_HANDLERS_HT_BITS 8 |
27 | static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS); | 28 | static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS); |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 619f92963e27..b892914968c1 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
@@ -72,7 +72,7 @@ struct reada_extent { | |||
72 | spinlock_t lock; | 72 | spinlock_t lock; |
73 | struct reada_zone *zones[BTRFS_MAX_MIRRORS]; | 73 | struct reada_zone *zones[BTRFS_MAX_MIRRORS]; |
74 | int nzones; | 74 | int nzones; |
75 | struct btrfs_device *scheduled_for; | 75 | int scheduled; |
76 | }; | 76 | }; |
77 | 77 | ||
78 | struct reada_zone { | 78 | struct reada_zone { |
@@ -101,67 +101,53 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info); | |||
101 | static void __reada_start_machine(struct btrfs_fs_info *fs_info); | 101 | static void __reada_start_machine(struct btrfs_fs_info *fs_info); |
102 | 102 | ||
103 | static int reada_add_block(struct reada_control *rc, u64 logical, | 103 | static int reada_add_block(struct reada_control *rc, u64 logical, |
104 | struct btrfs_key *top, int level, u64 generation); | 104 | struct btrfs_key *top, u64 generation); |
105 | 105 | ||
106 | /* recurses */ | 106 | /* recurses */ |
107 | /* in case of err, eb might be NULL */ | 107 | /* in case of err, eb might be NULL */ |
108 | static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | 108 | static void __readahead_hook(struct btrfs_fs_info *fs_info, |
109 | u64 start, int err) | 109 | struct reada_extent *re, struct extent_buffer *eb, |
110 | u64 start, int err) | ||
110 | { | 111 | { |
111 | int level = 0; | 112 | int level = 0; |
112 | int nritems; | 113 | int nritems; |
113 | int i; | 114 | int i; |
114 | u64 bytenr; | 115 | u64 bytenr; |
115 | u64 generation; | 116 | u64 generation; |
116 | struct reada_extent *re; | ||
117 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
118 | struct list_head list; | 117 | struct list_head list; |
119 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
120 | struct btrfs_device *for_dev; | ||
121 | 118 | ||
122 | if (eb) | 119 | if (eb) |
123 | level = btrfs_header_level(eb); | 120 | level = btrfs_header_level(eb); |
124 | 121 | ||
125 | /* find extent */ | ||
126 | spin_lock(&fs_info->reada_lock); | ||
127 | re = radix_tree_lookup(&fs_info->reada_tree, index); | ||
128 | if (re) | ||
129 | re->refcnt++; | ||
130 | spin_unlock(&fs_info->reada_lock); | ||
131 | |||
132 | if (!re) | ||
133 | return -1; | ||
134 | |||
135 | spin_lock(&re->lock); | 122 | spin_lock(&re->lock); |
136 | /* | 123 | /* |
137 | * just take the full list from the extent. afterwards we | 124 | * just take the full list from the extent. afterwards we |
138 | * don't need the lock anymore | 125 | * don't need the lock anymore |
139 | */ | 126 | */ |
140 | list_replace_init(&re->extctl, &list); | 127 | list_replace_init(&re->extctl, &list); |
141 | for_dev = re->scheduled_for; | 128 | re->scheduled = 0; |
142 | re->scheduled_for = NULL; | ||
143 | spin_unlock(&re->lock); | 129 | spin_unlock(&re->lock); |
144 | 130 | ||
145 | if (err == 0) { | 131 | /* |
146 | nritems = level ? btrfs_header_nritems(eb) : 0; | 132 | * this is the error case, the extent buffer has not been |
147 | generation = btrfs_header_generation(eb); | 133 | * read correctly. We won't access anything from it and |
148 | /* | 134 | * just cleanup our data structures. Effectively this will |
149 | * FIXME: currently we just set nritems to 0 if this is a leaf, | 135 | * cut the branch below this node from read ahead. |
150 | * effectively ignoring the content. In a next step we could | 136 | */ |
151 | * trigger more readahead depending from the content, e.g. | 137 | if (err) |
152 | * fetch the checksums for the extents in the leaf. | 138 | goto cleanup; |
153 | */ | ||
154 | } else { | ||
155 | /* | ||
156 | * this is the error case, the extent buffer has not been | ||
157 | * read correctly. We won't access anything from it and | ||
158 | * just cleanup our data structures. Effectively this will | ||
159 | * cut the branch below this node from read ahead. | ||
160 | */ | ||
161 | nritems = 0; | ||
162 | generation = 0; | ||
163 | } | ||
164 | 139 | ||
140 | /* | ||
141 | * FIXME: currently we just set nritems to 0 if this is a leaf, | ||
142 | * effectively ignoring the content. In a next step we could | ||
143 | * trigger more readahead depending from the content, e.g. | ||
144 | * fetch the checksums for the extents in the leaf. | ||
145 | */ | ||
146 | if (!level) | ||
147 | goto cleanup; | ||
148 | |||
149 | nritems = btrfs_header_nritems(eb); | ||
150 | generation = btrfs_header_generation(eb); | ||
165 | for (i = 0; i < nritems; i++) { | 151 | for (i = 0; i < nritems; i++) { |
166 | struct reada_extctl *rec; | 152 | struct reada_extctl *rec; |
167 | u64 n_gen; | 153 | u64 n_gen; |
@@ -188,19 +174,20 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | |||
188 | */ | 174 | */ |
189 | #ifdef DEBUG | 175 | #ifdef DEBUG |
190 | if (rec->generation != generation) { | 176 | if (rec->generation != generation) { |
191 | btrfs_debug(root->fs_info, | 177 | btrfs_debug(fs_info, |
192 | "generation mismatch for (%llu,%d,%llu) %llu != %llu", | 178 | "generation mismatch for (%llu,%d,%llu) %llu != %llu", |
193 | key.objectid, key.type, key.offset, | 179 | key.objectid, key.type, key.offset, |
194 | rec->generation, generation); | 180 | rec->generation, generation); |
195 | } | 181 | } |
196 | #endif | 182 | #endif |
197 | if (rec->generation == generation && | 183 | if (rec->generation == generation && |
198 | btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && | 184 | btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && |
199 | btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) | 185 | btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) |
200 | reada_add_block(rc, bytenr, &next_key, | 186 | reada_add_block(rc, bytenr, &next_key, n_gen); |
201 | level - 1, n_gen); | ||
202 | } | 187 | } |
203 | } | 188 | } |
189 | |||
190 | cleanup: | ||
204 | /* | 191 | /* |
205 | * free extctl records | 192 | * free extctl records |
206 | */ | 193 | */ |
@@ -222,26 +209,37 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | |||
222 | 209 | ||
223 | reada_extent_put(fs_info, re); /* one ref for each entry */ | 210 | reada_extent_put(fs_info, re); /* one ref for each entry */ |
224 | } | 211 | } |
225 | reada_extent_put(fs_info, re); /* our ref */ | ||
226 | if (for_dev) | ||
227 | atomic_dec(&for_dev->reada_in_flight); | ||
228 | 212 | ||
229 | return 0; | 213 | return; |
230 | } | 214 | } |
231 | 215 | ||
232 | /* | 216 | /* |
233 | * start is passed separately in case eb in NULL, which may be the case with | 217 | * start is passed separately in case eb in NULL, which may be the case with |
234 | * failed I/O | 218 | * failed I/O |
235 | */ | 219 | */ |
236 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | 220 | int btree_readahead_hook(struct btrfs_fs_info *fs_info, |
237 | u64 start, int err) | 221 | struct extent_buffer *eb, u64 start, int err) |
238 | { | 222 | { |
239 | int ret; | 223 | int ret = 0; |
224 | struct reada_extent *re; | ||
240 | 225 | ||
241 | ret = __readahead_hook(root, eb, start, err); | 226 | /* find extent */ |
227 | spin_lock(&fs_info->reada_lock); | ||
228 | re = radix_tree_lookup(&fs_info->reada_tree, | ||
229 | start >> PAGE_CACHE_SHIFT); | ||
230 | if (re) | ||
231 | re->refcnt++; | ||
232 | spin_unlock(&fs_info->reada_lock); | ||
233 | if (!re) { | ||
234 | ret = -1; | ||
235 | goto start_machine; | ||
236 | } | ||
242 | 237 | ||
243 | reada_start_machine(root->fs_info); | 238 | __readahead_hook(fs_info, re, eb, start, err); |
239 | reada_extent_put(fs_info, re); /* our ref */ | ||
244 | 240 | ||
241 | start_machine: | ||
242 | reada_start_machine(fs_info); | ||
245 | return ret; | 243 | return ret; |
246 | } | 244 | } |
247 | 245 | ||
@@ -260,18 +258,14 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, | |||
260 | spin_lock(&fs_info->reada_lock); | 258 | spin_lock(&fs_info->reada_lock); |
261 | ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, | 259 | ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, |
262 | logical >> PAGE_CACHE_SHIFT, 1); | 260 | logical >> PAGE_CACHE_SHIFT, 1); |
263 | if (ret == 1) | 261 | if (ret == 1 && logical >= zone->start && logical <= zone->end) { |
264 | kref_get(&zone->refcnt); | 262 | kref_get(&zone->refcnt); |
265 | spin_unlock(&fs_info->reada_lock); | ||
266 | |||
267 | if (ret == 1) { | ||
268 | if (logical >= zone->start && logical < zone->end) | ||
269 | return zone; | ||
270 | spin_lock(&fs_info->reada_lock); | ||
271 | kref_put(&zone->refcnt, reada_zone_release); | ||
272 | spin_unlock(&fs_info->reada_lock); | 263 | spin_unlock(&fs_info->reada_lock); |
264 | return zone; | ||
273 | } | 265 | } |
274 | 266 | ||
267 | spin_unlock(&fs_info->reada_lock); | ||
268 | |||
275 | cache = btrfs_lookup_block_group(fs_info, logical); | 269 | cache = btrfs_lookup_block_group(fs_info, logical); |
276 | if (!cache) | 270 | if (!cache) |
277 | return NULL; | 271 | return NULL; |
@@ -280,7 +274,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, | |||
280 | end = start + cache->key.offset - 1; | 274 | end = start + cache->key.offset - 1; |
281 | btrfs_put_block_group(cache); | 275 | btrfs_put_block_group(cache); |
282 | 276 | ||
283 | zone = kzalloc(sizeof(*zone), GFP_NOFS); | 277 | zone = kzalloc(sizeof(*zone), GFP_KERNEL); |
284 | if (!zone) | 278 | if (!zone) |
285 | return NULL; | 279 | return NULL; |
286 | 280 | ||
@@ -307,8 +301,10 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, | |||
307 | kfree(zone); | 301 | kfree(zone); |
308 | ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, | 302 | ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, |
309 | logical >> PAGE_CACHE_SHIFT, 1); | 303 | logical >> PAGE_CACHE_SHIFT, 1); |
310 | if (ret == 1) | 304 | if (ret == 1 && logical >= zone->start && logical <= zone->end) |
311 | kref_get(&zone->refcnt); | 305 | kref_get(&zone->refcnt); |
306 | else | ||
307 | zone = NULL; | ||
312 | } | 308 | } |
313 | spin_unlock(&fs_info->reada_lock); | 309 | spin_unlock(&fs_info->reada_lock); |
314 | 310 | ||
@@ -317,7 +313,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, | |||
317 | 313 | ||
318 | static struct reada_extent *reada_find_extent(struct btrfs_root *root, | 314 | static struct reada_extent *reada_find_extent(struct btrfs_root *root, |
319 | u64 logical, | 315 | u64 logical, |
320 | struct btrfs_key *top, int level) | 316 | struct btrfs_key *top) |
321 | { | 317 | { |
322 | int ret; | 318 | int ret; |
323 | struct reada_extent *re = NULL; | 319 | struct reada_extent *re = NULL; |
@@ -330,9 +326,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
330 | u64 length; | 326 | u64 length; |
331 | int real_stripes; | 327 | int real_stripes; |
332 | int nzones = 0; | 328 | int nzones = 0; |
333 | int i; | ||
334 | unsigned long index = logical >> PAGE_CACHE_SHIFT; | 329 | unsigned long index = logical >> PAGE_CACHE_SHIFT; |
335 | int dev_replace_is_ongoing; | 330 | int dev_replace_is_ongoing; |
331 | int have_zone = 0; | ||
336 | 332 | ||
337 | spin_lock(&fs_info->reada_lock); | 333 | spin_lock(&fs_info->reada_lock); |
338 | re = radix_tree_lookup(&fs_info->reada_tree, index); | 334 | re = radix_tree_lookup(&fs_info->reada_tree, index); |
@@ -343,7 +339,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
343 | if (re) | 339 | if (re) |
344 | return re; | 340 | return re; |
345 | 341 | ||
346 | re = kzalloc(sizeof(*re), GFP_NOFS); | 342 | re = kzalloc(sizeof(*re), GFP_KERNEL); |
347 | if (!re) | 343 | if (!re) |
348 | return NULL; | 344 | return NULL; |
349 | 345 | ||
@@ -375,11 +371,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
375 | struct reada_zone *zone; | 371 | struct reada_zone *zone; |
376 | 372 | ||
377 | dev = bbio->stripes[nzones].dev; | 373 | dev = bbio->stripes[nzones].dev; |
374 | |||
375 | /* cannot read ahead on missing device. */ | ||
376 | if (!dev->bdev) | ||
377 | continue; | ||
378 | |||
378 | zone = reada_find_zone(fs_info, dev, logical, bbio); | 379 | zone = reada_find_zone(fs_info, dev, logical, bbio); |
379 | if (!zone) | 380 | if (!zone) |
380 | break; | 381 | continue; |
381 | 382 | ||
382 | re->zones[nzones] = zone; | 383 | re->zones[re->nzones++] = zone; |
383 | spin_lock(&zone->lock); | 384 | spin_lock(&zone->lock); |
384 | if (!zone->elems) | 385 | if (!zone->elems) |
385 | kref_get(&zone->refcnt); | 386 | kref_get(&zone->refcnt); |
@@ -389,14 +390,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
389 | kref_put(&zone->refcnt, reada_zone_release); | 390 | kref_put(&zone->refcnt, reada_zone_release); |
390 | spin_unlock(&fs_info->reada_lock); | 391 | spin_unlock(&fs_info->reada_lock); |
391 | } | 392 | } |
392 | re->nzones = nzones; | 393 | if (re->nzones == 0) { |
393 | if (nzones == 0) { | ||
394 | /* not a single zone found, error and out */ | 394 | /* not a single zone found, error and out */ |
395 | goto error; | 395 | goto error; |
396 | } | 396 | } |
397 | 397 | ||
398 | /* insert extent in reada_tree + all per-device trees, all or nothing */ | 398 | /* insert extent in reada_tree + all per-device trees, all or nothing */ |
399 | btrfs_dev_replace_lock(&fs_info->dev_replace); | 399 | btrfs_dev_replace_lock(&fs_info->dev_replace, 0); |
400 | spin_lock(&fs_info->reada_lock); | 400 | spin_lock(&fs_info->reada_lock); |
401 | ret = radix_tree_insert(&fs_info->reada_tree, index, re); | 401 | ret = radix_tree_insert(&fs_info->reada_tree, index, re); |
402 | if (ret == -EEXIST) { | 402 | if (ret == -EEXIST) { |
@@ -404,19 +404,20 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
404 | BUG_ON(!re_exist); | 404 | BUG_ON(!re_exist); |
405 | re_exist->refcnt++; | 405 | re_exist->refcnt++; |
406 | spin_unlock(&fs_info->reada_lock); | 406 | spin_unlock(&fs_info->reada_lock); |
407 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | 407 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
408 | goto error; | 408 | goto error; |
409 | } | 409 | } |
410 | if (ret) { | 410 | if (ret) { |
411 | spin_unlock(&fs_info->reada_lock); | 411 | spin_unlock(&fs_info->reada_lock); |
412 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | 412 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
413 | goto error; | 413 | goto error; |
414 | } | 414 | } |
415 | prev_dev = NULL; | 415 | prev_dev = NULL; |
416 | dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( | 416 | dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( |
417 | &fs_info->dev_replace); | 417 | &fs_info->dev_replace); |
418 | for (i = 0; i < nzones; ++i) { | 418 | for (nzones = 0; nzones < re->nzones; ++nzones) { |
419 | dev = bbio->stripes[i].dev; | 419 | dev = re->zones[nzones]->device; |
420 | |||
420 | if (dev == prev_dev) { | 421 | if (dev == prev_dev) { |
421 | /* | 422 | /* |
422 | * in case of DUP, just add the first zone. As both | 423 | * in case of DUP, just add the first zone. As both |
@@ -427,15 +428,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
427 | */ | 428 | */ |
428 | continue; | 429 | continue; |
429 | } | 430 | } |
430 | if (!dev->bdev) { | 431 | if (!dev->bdev) |
431 | /* | 432 | continue; |
432 | * cannot read ahead on missing device, but for RAID5/6, | 433 | |
433 | * REQ_GET_READ_MIRRORS return 1. So don't skip missing | ||
434 | * device for such case. | ||
435 | */ | ||
436 | if (nzones > 1) | ||
437 | continue; | ||
438 | } | ||
439 | if (dev_replace_is_ongoing && | 434 | if (dev_replace_is_ongoing && |
440 | dev == fs_info->dev_replace.tgtdev) { | 435 | dev == fs_info->dev_replace.tgtdev) { |
441 | /* | 436 | /* |
@@ -447,8 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
447 | prev_dev = dev; | 442 | prev_dev = dev; |
448 | ret = radix_tree_insert(&dev->reada_extents, index, re); | 443 | ret = radix_tree_insert(&dev->reada_extents, index, re); |
449 | if (ret) { | 444 | if (ret) { |
450 | while (--i >= 0) { | 445 | while (--nzones >= 0) { |
451 | dev = bbio->stripes[i].dev; | 446 | dev = re->zones[nzones]->device; |
452 | BUG_ON(dev == NULL); | 447 | BUG_ON(dev == NULL); |
453 | /* ignore whether the entry was inserted */ | 448 | /* ignore whether the entry was inserted */ |
454 | radix_tree_delete(&dev->reada_extents, index); | 449 | radix_tree_delete(&dev->reada_extents, index); |
@@ -456,21 +451,24 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
456 | BUG_ON(fs_info == NULL); | 451 | BUG_ON(fs_info == NULL); |
457 | radix_tree_delete(&fs_info->reada_tree, index); | 452 | radix_tree_delete(&fs_info->reada_tree, index); |
458 | spin_unlock(&fs_info->reada_lock); | 453 | spin_unlock(&fs_info->reada_lock); |
459 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | 454 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
460 | goto error; | 455 | goto error; |
461 | } | 456 | } |
457 | have_zone = 1; | ||
462 | } | 458 | } |
463 | spin_unlock(&fs_info->reada_lock); | 459 | spin_unlock(&fs_info->reada_lock); |
464 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | 460 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
461 | |||
462 | if (!have_zone) | ||
463 | goto error; | ||
465 | 464 | ||
466 | btrfs_put_bbio(bbio); | 465 | btrfs_put_bbio(bbio); |
467 | return re; | 466 | return re; |
468 | 467 | ||
469 | error: | 468 | error: |
470 | while (nzones) { | 469 | for (nzones = 0; nzones < re->nzones; ++nzones) { |
471 | struct reada_zone *zone; | 470 | struct reada_zone *zone; |
472 | 471 | ||
473 | --nzones; | ||
474 | zone = re->zones[nzones]; | 472 | zone = re->zones[nzones]; |
475 | kref_get(&zone->refcnt); | 473 | kref_get(&zone->refcnt); |
476 | spin_lock(&zone->lock); | 474 | spin_lock(&zone->lock); |
@@ -531,8 +529,6 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, | |||
531 | kref_put(&zone->refcnt, reada_zone_release); | 529 | kref_put(&zone->refcnt, reada_zone_release); |
532 | spin_unlock(&fs_info->reada_lock); | 530 | spin_unlock(&fs_info->reada_lock); |
533 | } | 531 | } |
534 | if (re->scheduled_for) | ||
535 | atomic_dec(&re->scheduled_for->reada_in_flight); | ||
536 | 532 | ||
537 | kfree(re); | 533 | kfree(re); |
538 | } | 534 | } |
@@ -556,17 +552,17 @@ static void reada_control_release(struct kref *kref) | |||
556 | } | 552 | } |
557 | 553 | ||
558 | static int reada_add_block(struct reada_control *rc, u64 logical, | 554 | static int reada_add_block(struct reada_control *rc, u64 logical, |
559 | struct btrfs_key *top, int level, u64 generation) | 555 | struct btrfs_key *top, u64 generation) |
560 | { | 556 | { |
561 | struct btrfs_root *root = rc->root; | 557 | struct btrfs_root *root = rc->root; |
562 | struct reada_extent *re; | 558 | struct reada_extent *re; |
563 | struct reada_extctl *rec; | 559 | struct reada_extctl *rec; |
564 | 560 | ||
565 | re = reada_find_extent(root, logical, top, level); /* takes one ref */ | 561 | re = reada_find_extent(root, logical, top); /* takes one ref */ |
566 | if (!re) | 562 | if (!re) |
567 | return -1; | 563 | return -1; |
568 | 564 | ||
569 | rec = kzalloc(sizeof(*rec), GFP_NOFS); | 565 | rec = kzalloc(sizeof(*rec), GFP_KERNEL); |
570 | if (!rec) { | 566 | if (!rec) { |
571 | reada_extent_put(root->fs_info, re); | 567 | reada_extent_put(root->fs_info, re); |
572 | return -ENOMEM; | 568 | return -ENOMEM; |
@@ -662,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
662 | u64 logical; | 658 | u64 logical; |
663 | int ret; | 659 | int ret; |
664 | int i; | 660 | int i; |
665 | int need_kick = 0; | ||
666 | 661 | ||
667 | spin_lock(&fs_info->reada_lock); | 662 | spin_lock(&fs_info->reada_lock); |
668 | if (dev->reada_curr_zone == NULL) { | 663 | if (dev->reada_curr_zone == NULL) { |
@@ -679,7 +674,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
679 | */ | 674 | */ |
680 | ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, | 675 | ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, |
681 | dev->reada_next >> PAGE_CACHE_SHIFT, 1); | 676 | dev->reada_next >> PAGE_CACHE_SHIFT, 1); |
682 | if (ret == 0 || re->logical >= dev->reada_curr_zone->end) { | 677 | if (ret == 0 || re->logical > dev->reada_curr_zone->end) { |
683 | ret = reada_pick_zone(dev); | 678 | ret = reada_pick_zone(dev); |
684 | if (!ret) { | 679 | if (!ret) { |
685 | spin_unlock(&fs_info->reada_lock); | 680 | spin_unlock(&fs_info->reada_lock); |
@@ -698,6 +693,15 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
698 | 693 | ||
699 | spin_unlock(&fs_info->reada_lock); | 694 | spin_unlock(&fs_info->reada_lock); |
700 | 695 | ||
696 | spin_lock(&re->lock); | ||
697 | if (re->scheduled || list_empty(&re->extctl)) { | ||
698 | spin_unlock(&re->lock); | ||
699 | reada_extent_put(fs_info, re); | ||
700 | return 0; | ||
701 | } | ||
702 | re->scheduled = 1; | ||
703 | spin_unlock(&re->lock); | ||
704 | |||
701 | /* | 705 | /* |
702 | * find mirror num | 706 | * find mirror num |
703 | */ | 707 | */ |
@@ -709,29 +713,20 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
709 | } | 713 | } |
710 | logical = re->logical; | 714 | logical = re->logical; |
711 | 715 | ||
712 | spin_lock(&re->lock); | ||
713 | if (re->scheduled_for == NULL) { | ||
714 | re->scheduled_for = dev; | ||
715 | need_kick = 1; | ||
716 | } | ||
717 | spin_unlock(&re->lock); | ||
718 | |||
719 | reada_extent_put(fs_info, re); | ||
720 | |||
721 | if (!need_kick) | ||
722 | return 0; | ||
723 | |||
724 | atomic_inc(&dev->reada_in_flight); | 716 | atomic_inc(&dev->reada_in_flight); |
725 | ret = reada_tree_block_flagged(fs_info->extent_root, logical, | 717 | ret = reada_tree_block_flagged(fs_info->extent_root, logical, |
726 | mirror_num, &eb); | 718 | mirror_num, &eb); |
727 | if (ret) | 719 | if (ret) |
728 | __readahead_hook(fs_info->extent_root, NULL, logical, ret); | 720 | __readahead_hook(fs_info, re, NULL, logical, ret); |
729 | else if (eb) | 721 | else if (eb) |
730 | __readahead_hook(fs_info->extent_root, eb, eb->start, ret); | 722 | __readahead_hook(fs_info, re, eb, eb->start, ret); |
731 | 723 | ||
732 | if (eb) | 724 | if (eb) |
733 | free_extent_buffer(eb); | 725 | free_extent_buffer(eb); |
734 | 726 | ||
727 | atomic_dec(&dev->reada_in_flight); | ||
728 | reada_extent_put(fs_info, re); | ||
729 | |||
735 | return 1; | 730 | return 1; |
736 | 731 | ||
737 | } | 732 | } |
@@ -752,6 +747,8 @@ static void reada_start_machine_worker(struct btrfs_work *work) | |||
752 | set_task_ioprio(current, BTRFS_IOPRIO_READA); | 747 | set_task_ioprio(current, BTRFS_IOPRIO_READA); |
753 | __reada_start_machine(fs_info); | 748 | __reada_start_machine(fs_info); |
754 | set_task_ioprio(current, old_ioprio); | 749 | set_task_ioprio(current, old_ioprio); |
750 | |||
751 | atomic_dec(&fs_info->reada_works_cnt); | ||
755 | } | 752 | } |
756 | 753 | ||
757 | static void __reada_start_machine(struct btrfs_fs_info *fs_info) | 754 | static void __reada_start_machine(struct btrfs_fs_info *fs_info) |
@@ -783,15 +780,19 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) | |||
783 | * enqueue to workers to finish it. This will distribute the load to | 780 | * enqueue to workers to finish it. This will distribute the load to |
784 | * the cores. | 781 | * the cores. |
785 | */ | 782 | */ |
786 | for (i = 0; i < 2; ++i) | 783 | for (i = 0; i < 2; ++i) { |
787 | reada_start_machine(fs_info); | 784 | reada_start_machine(fs_info); |
785 | if (atomic_read(&fs_info->reada_works_cnt) > | ||
786 | BTRFS_MAX_MIRRORS * 2) | ||
787 | break; | ||
788 | } | ||
788 | } | 789 | } |
789 | 790 | ||
790 | static void reada_start_machine(struct btrfs_fs_info *fs_info) | 791 | static void reada_start_machine(struct btrfs_fs_info *fs_info) |
791 | { | 792 | { |
792 | struct reada_machine_work *rmw; | 793 | struct reada_machine_work *rmw; |
793 | 794 | ||
794 | rmw = kzalloc(sizeof(*rmw), GFP_NOFS); | 795 | rmw = kzalloc(sizeof(*rmw), GFP_KERNEL); |
795 | if (!rmw) { | 796 | if (!rmw) { |
796 | /* FIXME we cannot handle this properly right now */ | 797 | /* FIXME we cannot handle this properly right now */ |
797 | BUG(); | 798 | BUG(); |
@@ -801,6 +802,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) | |||
801 | rmw->fs_info = fs_info; | 802 | rmw->fs_info = fs_info; |
802 | 803 | ||
803 | btrfs_queue_work(fs_info->readahead_workers, &rmw->work); | 804 | btrfs_queue_work(fs_info->readahead_workers, &rmw->work); |
805 | atomic_inc(&fs_info->reada_works_cnt); | ||
804 | } | 806 | } |
805 | 807 | ||
806 | #ifdef DEBUG | 808 | #ifdef DEBUG |
@@ -848,10 +850,9 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) | |||
848 | if (ret == 0) | 850 | if (ret == 0) |
849 | break; | 851 | break; |
850 | printk(KERN_DEBUG | 852 | printk(KERN_DEBUG |
851 | " re: logical %llu size %u empty %d for %lld", | 853 | " re: logical %llu size %u empty %d scheduled %d", |
852 | re->logical, fs_info->tree_root->nodesize, | 854 | re->logical, fs_info->tree_root->nodesize, |
853 | list_empty(&re->extctl), re->scheduled_for ? | 855 | list_empty(&re->extctl), re->scheduled); |
854 | re->scheduled_for->devid : -1); | ||
855 | 856 | ||
856 | for (i = 0; i < re->nzones; ++i) { | 857 | for (i = 0; i < re->nzones; ++i) { |
857 | printk(KERN_CONT " zone %llu-%llu devs", | 858 | printk(KERN_CONT " zone %llu-%llu devs", |
@@ -878,27 +879,21 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) | |||
878 | index, 1); | 879 | index, 1); |
879 | if (ret == 0) | 880 | if (ret == 0) |
880 | break; | 881 | break; |
881 | if (!re->scheduled_for) { | 882 | if (!re->scheduled) { |
882 | index = (re->logical >> PAGE_CACHE_SHIFT) + 1; | 883 | index = (re->logical >> PAGE_CACHE_SHIFT) + 1; |
883 | continue; | 884 | continue; |
884 | } | 885 | } |
885 | printk(KERN_DEBUG | 886 | printk(KERN_DEBUG |
886 | "re: logical %llu size %u list empty %d for %lld", | 887 | "re: logical %llu size %u list empty %d scheduled %d", |
887 | re->logical, fs_info->tree_root->nodesize, | 888 | re->logical, fs_info->tree_root->nodesize, |
888 | list_empty(&re->extctl), | 889 | list_empty(&re->extctl), re->scheduled); |
889 | re->scheduled_for ? re->scheduled_for->devid : -1); | ||
890 | for (i = 0; i < re->nzones; ++i) { | 890 | for (i = 0; i < re->nzones; ++i) { |
891 | printk(KERN_CONT " zone %llu-%llu devs", | 891 | printk(KERN_CONT " zone %llu-%llu devs", |
892 | re->zones[i]->start, | 892 | re->zones[i]->start, |
893 | re->zones[i]->end); | 893 | re->zones[i]->end); |
894 | for (i = 0; i < re->nzones; ++i) { | 894 | for (j = 0; j < re->zones[i]->ndevs; ++j) { |
895 | printk(KERN_CONT " zone %llu-%llu devs", | 895 | printk(KERN_CONT " %lld", |
896 | re->zones[i]->start, | 896 | re->zones[i]->devs[j]->devid); |
897 | re->zones[i]->end); | ||
898 | for (j = 0; j < re->zones[i]->ndevs; ++j) { | ||
899 | printk(KERN_CONT " %lld", | ||
900 | re->zones[i]->devs[j]->devid); | ||
901 | } | ||
902 | } | 897 | } |
903 | } | 898 | } |
904 | printk(KERN_CONT "\n"); | 899 | printk(KERN_CONT "\n"); |
@@ -917,7 +912,6 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, | |||
917 | struct reada_control *rc; | 912 | struct reada_control *rc; |
918 | u64 start; | 913 | u64 start; |
919 | u64 generation; | 914 | u64 generation; |
920 | int level; | ||
921 | int ret; | 915 | int ret; |
922 | struct extent_buffer *node; | 916 | struct extent_buffer *node; |
923 | static struct btrfs_key max_key = { | 917 | static struct btrfs_key max_key = { |
@@ -926,7 +920,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, | |||
926 | .offset = (u64)-1 | 920 | .offset = (u64)-1 |
927 | }; | 921 | }; |
928 | 922 | ||
929 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 923 | rc = kzalloc(sizeof(*rc), GFP_KERNEL); |
930 | if (!rc) | 924 | if (!rc) |
931 | return ERR_PTR(-ENOMEM); | 925 | return ERR_PTR(-ENOMEM); |
932 | 926 | ||
@@ -940,11 +934,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, | |||
940 | 934 | ||
941 | node = btrfs_root_node(root); | 935 | node = btrfs_root_node(root); |
942 | start = node->start; | 936 | start = node->start; |
943 | level = btrfs_header_level(node); | ||
944 | generation = btrfs_header_generation(node); | 937 | generation = btrfs_header_generation(node); |
945 | free_extent_buffer(node); | 938 | free_extent_buffer(node); |
946 | 939 | ||
947 | ret = reada_add_block(rc, start, &max_key, level, generation); | 940 | ret = reada_add_block(rc, start, &max_key, generation); |
948 | if (ret) { | 941 | if (ret) { |
949 | kfree(rc); | 942 | kfree(rc); |
950 | return ERR_PTR(ret); | 943 | return ERR_PTR(ret); |
@@ -959,8 +952,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, | |||
959 | int btrfs_reada_wait(void *handle) | 952 | int btrfs_reada_wait(void *handle) |
960 | { | 953 | { |
961 | struct reada_control *rc = handle; | 954 | struct reada_control *rc = handle; |
955 | struct btrfs_fs_info *fs_info = rc->root->fs_info; | ||
962 | 956 | ||
963 | while (atomic_read(&rc->elems)) { | 957 | while (atomic_read(&rc->elems)) { |
958 | if (!atomic_read(&fs_info->reada_works_cnt)) | ||
959 | reada_start_machine(fs_info); | ||
964 | wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, | 960 | wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, |
965 | 5 * HZ); | 961 | 5 * HZ); |
966 | dump_devs(rc->root->fs_info, | 962 | dump_devs(rc->root->fs_info, |
@@ -977,9 +973,13 @@ int btrfs_reada_wait(void *handle) | |||
977 | int btrfs_reada_wait(void *handle) | 973 | int btrfs_reada_wait(void *handle) |
978 | { | 974 | { |
979 | struct reada_control *rc = handle; | 975 | struct reada_control *rc = handle; |
976 | struct btrfs_fs_info *fs_info = rc->root->fs_info; | ||
980 | 977 | ||
981 | while (atomic_read(&rc->elems)) { | 978 | while (atomic_read(&rc->elems)) { |
982 | wait_event(rc->wait, atomic_read(&rc->elems) == 0); | 979 | if (!atomic_read(&fs_info->reada_works_cnt)) |
980 | reada_start_machine(fs_info); | ||
981 | wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, | ||
982 | (HZ + 9) / 10); | ||
983 | } | 983 | } |
984 | 984 | ||
985 | kref_put(&rc->refcnt, reada_control_release); | 985 | kref_put(&rc->refcnt, reada_control_release); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 2c849b08a91b..9fcd6dfc3266 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -496,7 +496,7 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, | |||
496 | struct btrfs_root *root) | 496 | struct btrfs_root *root) |
497 | { | 497 | { |
498 | struct btrfs_root_item *item = &root->root_item; | 498 | struct btrfs_root_item *item = &root->root_item; |
499 | struct timespec ct = CURRENT_TIME; | 499 | struct timespec ct = current_fs_time(root->fs_info->sb); |
500 | 500 | ||
501 | spin_lock(&root->root_item_lock); | 501 | spin_lock(&root->root_item_lock); |
502 | btrfs_set_root_ctransid(item, trans->transid); | 502 | btrfs_set_root_ctransid(item, trans->transid); |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 92bf5ee732fb..39dbdcbf4d13 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -461,7 +461,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
461 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; | 461 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; |
462 | int ret; | 462 | int ret; |
463 | 463 | ||
464 | sctx = kzalloc(sizeof(*sctx), GFP_NOFS); | 464 | sctx = kzalloc(sizeof(*sctx), GFP_KERNEL); |
465 | if (!sctx) | 465 | if (!sctx) |
466 | goto nomem; | 466 | goto nomem; |
467 | atomic_set(&sctx->refs, 1); | 467 | atomic_set(&sctx->refs, 1); |
@@ -472,7 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
472 | for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) { | 472 | for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) { |
473 | struct scrub_bio *sbio; | 473 | struct scrub_bio *sbio; |
474 | 474 | ||
475 | sbio = kzalloc(sizeof(*sbio), GFP_NOFS); | 475 | sbio = kzalloc(sizeof(*sbio), GFP_KERNEL); |
476 | if (!sbio) | 476 | if (!sbio) |
477 | goto nomem; | 477 | goto nomem; |
478 | sctx->bios[i] = sbio; | 478 | sctx->bios[i] = sbio; |
@@ -611,7 +611,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
611 | u64 flags = 0; | 611 | u64 flags = 0; |
612 | u64 ref_root; | 612 | u64 ref_root; |
613 | u32 item_size; | 613 | u32 item_size; |
614 | u8 ref_level; | 614 | u8 ref_level = 0; |
615 | int ret; | 615 | int ret; |
616 | 616 | ||
617 | WARN_ON(sblock->page_count < 1); | 617 | WARN_ON(sblock->page_count < 1); |
@@ -1654,7 +1654,7 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, | |||
1654 | again: | 1654 | again: |
1655 | if (!wr_ctx->wr_curr_bio) { | 1655 | if (!wr_ctx->wr_curr_bio) { |
1656 | wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio), | 1656 | wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio), |
1657 | GFP_NOFS); | 1657 | GFP_KERNEL); |
1658 | if (!wr_ctx->wr_curr_bio) { | 1658 | if (!wr_ctx->wr_curr_bio) { |
1659 | mutex_unlock(&wr_ctx->wr_lock); | 1659 | mutex_unlock(&wr_ctx->wr_lock); |
1660 | return -ENOMEM; | 1660 | return -ENOMEM; |
@@ -1671,7 +1671,8 @@ again: | |||
1671 | sbio->dev = wr_ctx->tgtdev; | 1671 | sbio->dev = wr_ctx->tgtdev; |
1672 | bio = sbio->bio; | 1672 | bio = sbio->bio; |
1673 | if (!bio) { | 1673 | if (!bio) { |
1674 | bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); | 1674 | bio = btrfs_io_bio_alloc(GFP_KERNEL, |
1675 | wr_ctx->pages_per_wr_bio); | ||
1675 | if (!bio) { | 1676 | if (!bio) { |
1676 | mutex_unlock(&wr_ctx->wr_lock); | 1677 | mutex_unlock(&wr_ctx->wr_lock); |
1677 | return -ENOMEM; | 1678 | return -ENOMEM; |
@@ -2076,7 +2077,8 @@ again: | |||
2076 | sbio->dev = spage->dev; | 2077 | sbio->dev = spage->dev; |
2077 | bio = sbio->bio; | 2078 | bio = sbio->bio; |
2078 | if (!bio) { | 2079 | if (!bio) { |
2079 | bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); | 2080 | bio = btrfs_io_bio_alloc(GFP_KERNEL, |
2081 | sctx->pages_per_rd_bio); | ||
2080 | if (!bio) | 2082 | if (!bio) |
2081 | return -ENOMEM; | 2083 | return -ENOMEM; |
2082 | sbio->bio = bio; | 2084 | sbio->bio = bio; |
@@ -2241,7 +2243,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
2241 | struct scrub_block *sblock; | 2243 | struct scrub_block *sblock; |
2242 | int index; | 2244 | int index; |
2243 | 2245 | ||
2244 | sblock = kzalloc(sizeof(*sblock), GFP_NOFS); | 2246 | sblock = kzalloc(sizeof(*sblock), GFP_KERNEL); |
2245 | if (!sblock) { | 2247 | if (!sblock) { |
2246 | spin_lock(&sctx->stat_lock); | 2248 | spin_lock(&sctx->stat_lock); |
2247 | sctx->stat.malloc_errors++; | 2249 | sctx->stat.malloc_errors++; |
@@ -2259,7 +2261,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
2259 | struct scrub_page *spage; | 2261 | struct scrub_page *spage; |
2260 | u64 l = min_t(u64, len, PAGE_SIZE); | 2262 | u64 l = min_t(u64, len, PAGE_SIZE); |
2261 | 2263 | ||
2262 | spage = kzalloc(sizeof(*spage), GFP_NOFS); | 2264 | spage = kzalloc(sizeof(*spage), GFP_KERNEL); |
2263 | if (!spage) { | 2265 | if (!spage) { |
2264 | leave_nomem: | 2266 | leave_nomem: |
2265 | spin_lock(&sctx->stat_lock); | 2267 | spin_lock(&sctx->stat_lock); |
@@ -2286,7 +2288,7 @@ leave_nomem: | |||
2286 | spage->have_csum = 0; | 2288 | spage->have_csum = 0; |
2287 | } | 2289 | } |
2288 | sblock->page_count++; | 2290 | sblock->page_count++; |
2289 | spage->page = alloc_page(GFP_NOFS); | 2291 | spage->page = alloc_page(GFP_KERNEL); |
2290 | if (!spage->page) | 2292 | if (!spage->page) |
2291 | goto leave_nomem; | 2293 | goto leave_nomem; |
2292 | len -= l; | 2294 | len -= l; |
@@ -2541,7 +2543,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity, | |||
2541 | struct scrub_block *sblock; | 2543 | struct scrub_block *sblock; |
2542 | int index; | 2544 | int index; |
2543 | 2545 | ||
2544 | sblock = kzalloc(sizeof(*sblock), GFP_NOFS); | 2546 | sblock = kzalloc(sizeof(*sblock), GFP_KERNEL); |
2545 | if (!sblock) { | 2547 | if (!sblock) { |
2546 | spin_lock(&sctx->stat_lock); | 2548 | spin_lock(&sctx->stat_lock); |
2547 | sctx->stat.malloc_errors++; | 2549 | sctx->stat.malloc_errors++; |
@@ -2561,7 +2563,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity, | |||
2561 | struct scrub_page *spage; | 2563 | struct scrub_page *spage; |
2562 | u64 l = min_t(u64, len, PAGE_SIZE); | 2564 | u64 l = min_t(u64, len, PAGE_SIZE); |
2563 | 2565 | ||
2564 | spage = kzalloc(sizeof(*spage), GFP_NOFS); | 2566 | spage = kzalloc(sizeof(*spage), GFP_KERNEL); |
2565 | if (!spage) { | 2567 | if (!spage) { |
2566 | leave_nomem: | 2568 | leave_nomem: |
2567 | spin_lock(&sctx->stat_lock); | 2569 | spin_lock(&sctx->stat_lock); |
@@ -2591,7 +2593,7 @@ leave_nomem: | |||
2591 | spage->have_csum = 0; | 2593 | spage->have_csum = 0; |
2592 | } | 2594 | } |
2593 | sblock->page_count++; | 2595 | sblock->page_count++; |
2594 | spage->page = alloc_page(GFP_NOFS); | 2596 | spage->page = alloc_page(GFP_KERNEL); |
2595 | if (!spage->page) | 2597 | if (!spage->page) |
2596 | goto leave_nomem; | 2598 | goto leave_nomem; |
2597 | len -= l; | 2599 | len -= l; |
@@ -3857,16 +3859,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, | |||
3857 | return -EIO; | 3859 | return -EIO; |
3858 | } | 3860 | } |
3859 | 3861 | ||
3860 | btrfs_dev_replace_lock(&fs_info->dev_replace); | 3862 | btrfs_dev_replace_lock(&fs_info->dev_replace, 0); |
3861 | if (dev->scrub_device || | 3863 | if (dev->scrub_device || |
3862 | (!is_dev_replace && | 3864 | (!is_dev_replace && |
3863 | btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) { | 3865 | btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) { |
3864 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | 3866 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
3865 | mutex_unlock(&fs_info->scrub_lock); | 3867 | mutex_unlock(&fs_info->scrub_lock); |
3866 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | 3868 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
3867 | return -EINPROGRESS; | 3869 | return -EINPROGRESS; |
3868 | } | 3870 | } |
3869 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | 3871 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
3870 | 3872 | ||
3871 | ret = scrub_workers_get(fs_info, is_dev_replace); | 3873 | ret = scrub_workers_get(fs_info, is_dev_replace); |
3872 | if (ret) { | 3874 | if (ret) { |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 63a6152be04b..19b7bf4284ee 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "disk-io.h" | 34 | #include "disk-io.h" |
35 | #include "btrfs_inode.h" | 35 | #include "btrfs_inode.h" |
36 | #include "transaction.h" | 36 | #include "transaction.h" |
37 | #include "compression.h" | ||
37 | 38 | ||
38 | static int g_verbose = 0; | 39 | static int g_verbose = 0; |
39 | 40 | ||
@@ -304,7 +305,7 @@ static struct fs_path *fs_path_alloc(void) | |||
304 | { | 305 | { |
305 | struct fs_path *p; | 306 | struct fs_path *p; |
306 | 307 | ||
307 | p = kmalloc(sizeof(*p), GFP_NOFS); | 308 | p = kmalloc(sizeof(*p), GFP_KERNEL); |
308 | if (!p) | 309 | if (!p) |
309 | return NULL; | 310 | return NULL; |
310 | p->reversed = 0; | 311 | p->reversed = 0; |
@@ -363,11 +364,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) | |||
363 | * First time the inline_buf does not suffice | 364 | * First time the inline_buf does not suffice |
364 | */ | 365 | */ |
365 | if (p->buf == p->inline_buf) { | 366 | if (p->buf == p->inline_buf) { |
366 | tmp_buf = kmalloc(len, GFP_NOFS); | 367 | tmp_buf = kmalloc(len, GFP_KERNEL); |
367 | if (tmp_buf) | 368 | if (tmp_buf) |
368 | memcpy(tmp_buf, p->buf, old_buf_len); | 369 | memcpy(tmp_buf, p->buf, old_buf_len); |
369 | } else { | 370 | } else { |
370 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); | 371 | tmp_buf = krealloc(p->buf, len, GFP_KERNEL); |
371 | } | 372 | } |
372 | if (!tmp_buf) | 373 | if (!tmp_buf) |
373 | return -ENOMEM; | 374 | return -ENOMEM; |
@@ -995,7 +996,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
995 | * values are small. | 996 | * values are small. |
996 | */ | 997 | */ |
997 | buf_len = PATH_MAX; | 998 | buf_len = PATH_MAX; |
998 | buf = kmalloc(buf_len, GFP_NOFS); | 999 | buf = kmalloc(buf_len, GFP_KERNEL); |
999 | if (!buf) { | 1000 | if (!buf) { |
1000 | ret = -ENOMEM; | 1001 | ret = -ENOMEM; |
1001 | goto out; | 1002 | goto out; |
@@ -1042,7 +1043,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
1042 | buf = NULL; | 1043 | buf = NULL; |
1043 | } else { | 1044 | } else { |
1044 | char *tmp = krealloc(buf, buf_len, | 1045 | char *tmp = krealloc(buf, buf_len, |
1045 | GFP_NOFS | __GFP_NOWARN); | 1046 | GFP_KERNEL | __GFP_NOWARN); |
1046 | 1047 | ||
1047 | if (!tmp) | 1048 | if (!tmp) |
1048 | kfree(buf); | 1049 | kfree(buf); |
@@ -1303,7 +1304,7 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
1303 | /* We only use this path under the commit sem */ | 1304 | /* We only use this path under the commit sem */ |
1304 | tmp_path->need_commit_sem = 0; | 1305 | tmp_path->need_commit_sem = 0; |
1305 | 1306 | ||
1306 | backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); | 1307 | backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL); |
1307 | if (!backref_ctx) { | 1308 | if (!backref_ctx) { |
1308 | ret = -ENOMEM; | 1309 | ret = -ENOMEM; |
1309 | goto out; | 1310 | goto out; |
@@ -1984,7 +1985,7 @@ static int name_cache_insert(struct send_ctx *sctx, | |||
1984 | nce_head = radix_tree_lookup(&sctx->name_cache, | 1985 | nce_head = radix_tree_lookup(&sctx->name_cache, |
1985 | (unsigned long)nce->ino); | 1986 | (unsigned long)nce->ino); |
1986 | if (!nce_head) { | 1987 | if (!nce_head) { |
1987 | nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); | 1988 | nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL); |
1988 | if (!nce_head) { | 1989 | if (!nce_head) { |
1989 | kfree(nce); | 1990 | kfree(nce); |
1990 | return -ENOMEM; | 1991 | return -ENOMEM; |
@@ -2179,7 +2180,7 @@ out_cache: | |||
2179 | /* | 2180 | /* |
2180 | * Store the result of the lookup in the name cache. | 2181 | * Store the result of the lookup in the name cache. |
2181 | */ | 2182 | */ |
2182 | nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); | 2183 | nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL); |
2183 | if (!nce) { | 2184 | if (!nce) { |
2184 | ret = -ENOMEM; | 2185 | ret = -ENOMEM; |
2185 | goto out; | 2186 | goto out; |
@@ -2315,7 +2316,7 @@ static int send_subvol_begin(struct send_ctx *sctx) | |||
2315 | if (!path) | 2316 | if (!path) |
2316 | return -ENOMEM; | 2317 | return -ENOMEM; |
2317 | 2318 | ||
2318 | name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); | 2319 | name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL); |
2319 | if (!name) { | 2320 | if (!name) { |
2320 | btrfs_free_path(path); | 2321 | btrfs_free_path(path); |
2321 | return -ENOMEM; | 2322 | return -ENOMEM; |
@@ -2730,7 +2731,7 @@ static int __record_ref(struct list_head *head, u64 dir, | |||
2730 | { | 2731 | { |
2731 | struct recorded_ref *ref; | 2732 | struct recorded_ref *ref; |
2732 | 2733 | ||
2733 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | 2734 | ref = kmalloc(sizeof(*ref), GFP_KERNEL); |
2734 | if (!ref) | 2735 | if (!ref) |
2735 | return -ENOMEM; | 2736 | return -ENOMEM; |
2736 | 2737 | ||
@@ -2755,7 +2756,7 @@ static int dup_ref(struct recorded_ref *ref, struct list_head *list) | |||
2755 | { | 2756 | { |
2756 | struct recorded_ref *new; | 2757 | struct recorded_ref *new; |
2757 | 2758 | ||
2758 | new = kmalloc(sizeof(*ref), GFP_NOFS); | 2759 | new = kmalloc(sizeof(*ref), GFP_KERNEL); |
2759 | if (!new) | 2760 | if (!new) |
2760 | return -ENOMEM; | 2761 | return -ENOMEM; |
2761 | 2762 | ||
@@ -2818,7 +2819,7 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) | |||
2818 | struct rb_node *parent = NULL; | 2819 | struct rb_node *parent = NULL; |
2819 | struct orphan_dir_info *entry, *odi; | 2820 | struct orphan_dir_info *entry, *odi; |
2820 | 2821 | ||
2821 | odi = kmalloc(sizeof(*odi), GFP_NOFS); | 2822 | odi = kmalloc(sizeof(*odi), GFP_KERNEL); |
2822 | if (!odi) | 2823 | if (!odi) |
2823 | return ERR_PTR(-ENOMEM); | 2824 | return ERR_PTR(-ENOMEM); |
2824 | odi->ino = dir_ino; | 2825 | odi->ino = dir_ino; |
@@ -2973,7 +2974,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized) | |||
2973 | struct rb_node *parent = NULL; | 2974 | struct rb_node *parent = NULL; |
2974 | struct waiting_dir_move *entry, *dm; | 2975 | struct waiting_dir_move *entry, *dm; |
2975 | 2976 | ||
2976 | dm = kmalloc(sizeof(*dm), GFP_NOFS); | 2977 | dm = kmalloc(sizeof(*dm), GFP_KERNEL); |
2977 | if (!dm) | 2978 | if (!dm) |
2978 | return -ENOMEM; | 2979 | return -ENOMEM; |
2979 | dm->ino = ino; | 2980 | dm->ino = ino; |
@@ -3040,7 +3041,7 @@ static int add_pending_dir_move(struct send_ctx *sctx, | |||
3040 | int exists = 0; | 3041 | int exists = 0; |
3041 | int ret; | 3042 | int ret; |
3042 | 3043 | ||
3043 | pm = kmalloc(sizeof(*pm), GFP_NOFS); | 3044 | pm = kmalloc(sizeof(*pm), GFP_KERNEL); |
3044 | if (!pm) | 3045 | if (!pm) |
3045 | return -ENOMEM; | 3046 | return -ENOMEM; |
3046 | pm->parent_ino = parent_ino; | 3047 | pm->parent_ino = parent_ino; |
@@ -4280,7 +4281,7 @@ static int __find_xattr(int num, struct btrfs_key *di_key, | |||
4280 | strncmp(name, ctx->name, name_len) == 0) { | 4281 | strncmp(name, ctx->name, name_len) == 0) { |
4281 | ctx->found_idx = num; | 4282 | ctx->found_idx = num; |
4282 | ctx->found_data_len = data_len; | 4283 | ctx->found_data_len = data_len; |
4283 | ctx->found_data = kmemdup(data, data_len, GFP_NOFS); | 4284 | ctx->found_data = kmemdup(data, data_len, GFP_KERNEL); |
4284 | if (!ctx->found_data) | 4285 | if (!ctx->found_data) |
4285 | return -ENOMEM; | 4286 | return -ENOMEM; |
4286 | return 1; | 4287 | return 1; |
@@ -4481,7 +4482,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) | |||
4481 | while (index <= last_index) { | 4482 | while (index <= last_index) { |
4482 | unsigned cur_len = min_t(unsigned, len, | 4483 | unsigned cur_len = min_t(unsigned, len, |
4483 | PAGE_CACHE_SIZE - pg_offset); | 4484 | PAGE_CACHE_SIZE - pg_offset); |
4484 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | 4485 | page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL); |
4485 | if (!page) { | 4486 | if (!page) { |
4486 | ret = -ENOMEM; | 4487 | ret = -ENOMEM; |
4487 | break; | 4488 | break; |
@@ -5989,7 +5990,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
5989 | goto out; | 5990 | goto out; |
5990 | } | 5991 | } |
5991 | 5992 | ||
5992 | sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); | 5993 | sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL); |
5993 | if (!sctx) { | 5994 | if (!sctx) { |
5994 | ret = -ENOMEM; | 5995 | ret = -ENOMEM; |
5995 | goto out; | 5996 | goto out; |
@@ -5997,7 +5998,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
5997 | 5998 | ||
5998 | INIT_LIST_HEAD(&sctx->new_refs); | 5999 | INIT_LIST_HEAD(&sctx->new_refs); |
5999 | INIT_LIST_HEAD(&sctx->deleted_refs); | 6000 | INIT_LIST_HEAD(&sctx->deleted_refs); |
6000 | INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); | 6001 | INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL); |
6001 | INIT_LIST_HEAD(&sctx->name_cache_list); | 6002 | INIT_LIST_HEAD(&sctx->name_cache_list); |
6002 | 6003 | ||
6003 | sctx->flags = arg->flags; | 6004 | sctx->flags = arg->flags; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d41e09fe8e38..00b8f37cc306 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -303,7 +303,8 @@ enum { | |||
303 | Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree, | 303 | Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree, |
304 | Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard, | 304 | Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard, |
305 | Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow, | 305 | Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow, |
306 | Opt_datasum, Opt_treelog, Opt_noinode_cache, | 306 | Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_usebackuproot, |
307 | Opt_nologreplay, Opt_norecovery, | ||
307 | #ifdef CONFIG_BTRFS_DEBUG | 308 | #ifdef CONFIG_BTRFS_DEBUG |
308 | Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, | 309 | Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, |
309 | #endif | 310 | #endif |
@@ -335,6 +336,8 @@ static const match_table_t tokens = { | |||
335 | {Opt_noacl, "noacl"}, | 336 | {Opt_noacl, "noacl"}, |
336 | {Opt_notreelog, "notreelog"}, | 337 | {Opt_notreelog, "notreelog"}, |
337 | {Opt_treelog, "treelog"}, | 338 | {Opt_treelog, "treelog"}, |
339 | {Opt_nologreplay, "nologreplay"}, | ||
340 | {Opt_norecovery, "norecovery"}, | ||
338 | {Opt_flushoncommit, "flushoncommit"}, | 341 | {Opt_flushoncommit, "flushoncommit"}, |
339 | {Opt_noflushoncommit, "noflushoncommit"}, | 342 | {Opt_noflushoncommit, "noflushoncommit"}, |
340 | {Opt_ratio, "metadata_ratio=%d"}, | 343 | {Opt_ratio, "metadata_ratio=%d"}, |
@@ -352,7 +355,8 @@ static const match_table_t tokens = { | |||
352 | {Opt_inode_cache, "inode_cache"}, | 355 | {Opt_inode_cache, "inode_cache"}, |
353 | {Opt_noinode_cache, "noinode_cache"}, | 356 | {Opt_noinode_cache, "noinode_cache"}, |
354 | {Opt_no_space_cache, "nospace_cache"}, | 357 | {Opt_no_space_cache, "nospace_cache"}, |
355 | {Opt_recovery, "recovery"}, | 358 | {Opt_recovery, "recovery"}, /* deprecated */ |
359 | {Opt_usebackuproot, "usebackuproot"}, | ||
356 | {Opt_skip_balance, "skip_balance"}, | 360 | {Opt_skip_balance, "skip_balance"}, |
357 | {Opt_check_integrity, "check_int"}, | 361 | {Opt_check_integrity, "check_int"}, |
358 | {Opt_check_integrity_including_extent_data, "check_int_data"}, | 362 | {Opt_check_integrity_including_extent_data, "check_int_data"}, |
@@ -373,7 +377,8 @@ static const match_table_t tokens = { | |||
373 | * reading in a new superblock is parsed here. | 377 | * reading in a new superblock is parsed here. |
374 | * XXX JDM: This needs to be cleaned up for remount. | 378 | * XXX JDM: This needs to be cleaned up for remount. |
375 | */ | 379 | */ |
376 | int btrfs_parse_options(struct btrfs_root *root, char *options) | 380 | int btrfs_parse_options(struct btrfs_root *root, char *options, |
381 | unsigned long new_flags) | ||
377 | { | 382 | { |
378 | struct btrfs_fs_info *info = root->fs_info; | 383 | struct btrfs_fs_info *info = root->fs_info; |
379 | substring_t args[MAX_OPT_ARGS]; | 384 | substring_t args[MAX_OPT_ARGS]; |
@@ -393,8 +398,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
393 | else if (cache_gen) | 398 | else if (cache_gen) |
394 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); | 399 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); |
395 | 400 | ||
401 | /* | ||
402 | * Even the options are empty, we still need to do extra check | ||
403 | * against new flags | ||
404 | */ | ||
396 | if (!options) | 405 | if (!options) |
397 | goto out; | 406 | goto check; |
398 | 407 | ||
399 | /* | 408 | /* |
400 | * strsep changes the string, duplicate it because parse_options | 409 | * strsep changes the string, duplicate it because parse_options |
@@ -606,6 +615,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
606 | btrfs_clear_and_info(root, NOTREELOG, | 615 | btrfs_clear_and_info(root, NOTREELOG, |
607 | "enabling tree log"); | 616 | "enabling tree log"); |
608 | break; | 617 | break; |
618 | case Opt_norecovery: | ||
619 | case Opt_nologreplay: | ||
620 | btrfs_set_and_info(root, NOLOGREPLAY, | ||
621 | "disabling log replay at mount time"); | ||
622 | break; | ||
609 | case Opt_flushoncommit: | 623 | case Opt_flushoncommit: |
610 | btrfs_set_and_info(root, FLUSHONCOMMIT, | 624 | btrfs_set_and_info(root, FLUSHONCOMMIT, |
611 | "turning on flush-on-commit"); | 625 | "turning on flush-on-commit"); |
@@ -696,8 +710,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
696 | "disabling auto defrag"); | 710 | "disabling auto defrag"); |
697 | break; | 711 | break; |
698 | case Opt_recovery: | 712 | case Opt_recovery: |
699 | btrfs_info(root->fs_info, "enabling auto recovery"); | 713 | btrfs_warn(root->fs_info, |
700 | btrfs_set_opt(info->mount_opt, RECOVERY); | 714 | "'recovery' is deprecated, use 'usebackuproot' instead"); |
715 | case Opt_usebackuproot: | ||
716 | btrfs_info(root->fs_info, | ||
717 | "trying to use backup root at mount time"); | ||
718 | btrfs_set_opt(info->mount_opt, USEBACKUPROOT); | ||
701 | break; | 719 | break; |
702 | case Opt_skip_balance: | 720 | case Opt_skip_balance: |
703 | btrfs_set_opt(info->mount_opt, SKIP_BALANCE); | 721 | btrfs_set_opt(info->mount_opt, SKIP_BALANCE); |
@@ -792,6 +810,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
792 | break; | 810 | break; |
793 | } | 811 | } |
794 | } | 812 | } |
813 | check: | ||
814 | /* | ||
815 | * Extra check for current option against current flag | ||
816 | */ | ||
817 | if (btrfs_test_opt(root, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) { | ||
818 | btrfs_err(root->fs_info, | ||
819 | "nologreplay must be used with ro mount option"); | ||
820 | ret = -EINVAL; | ||
821 | } | ||
795 | out: | 822 | out: |
796 | if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) && | 823 | if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) && |
797 | !btrfs_test_opt(root, FREE_SPACE_TREE) && | 824 | !btrfs_test_opt(root, FREE_SPACE_TREE) && |
@@ -1202,6 +1229,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) | |||
1202 | seq_puts(seq, ",ssd"); | 1229 | seq_puts(seq, ",ssd"); |
1203 | if (btrfs_test_opt(root, NOTREELOG)) | 1230 | if (btrfs_test_opt(root, NOTREELOG)) |
1204 | seq_puts(seq, ",notreelog"); | 1231 | seq_puts(seq, ",notreelog"); |
1232 | if (btrfs_test_opt(root, NOLOGREPLAY)) | ||
1233 | seq_puts(seq, ",nologreplay"); | ||
1205 | if (btrfs_test_opt(root, FLUSHONCOMMIT)) | 1234 | if (btrfs_test_opt(root, FLUSHONCOMMIT)) |
1206 | seq_puts(seq, ",flushoncommit"); | 1235 | seq_puts(seq, ",flushoncommit"); |
1207 | if (btrfs_test_opt(root, DISCARD)) | 1236 | if (btrfs_test_opt(root, DISCARD)) |
@@ -1228,8 +1257,6 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) | |||
1228 | seq_puts(seq, ",inode_cache"); | 1257 | seq_puts(seq, ",inode_cache"); |
1229 | if (btrfs_test_opt(root, SKIP_BALANCE)) | 1258 | if (btrfs_test_opt(root, SKIP_BALANCE)) |
1230 | seq_puts(seq, ",skip_balance"); | 1259 | seq_puts(seq, ",skip_balance"); |
1231 | if (btrfs_test_opt(root, RECOVERY)) | ||
1232 | seq_puts(seq, ",recovery"); | ||
1233 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 1260 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
1234 | if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA)) | 1261 | if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA)) |
1235 | seq_puts(seq, ",check_int_data"); | 1262 | seq_puts(seq, ",check_int_data"); |
@@ -1685,7 +1712,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
1685 | } | 1712 | } |
1686 | } | 1713 | } |
1687 | 1714 | ||
1688 | ret = btrfs_parse_options(root, data); | 1715 | ret = btrfs_parse_options(root, data, *flags); |
1689 | if (ret) { | 1716 | if (ret) { |
1690 | ret = -EINVAL; | 1717 | ret = -EINVAL; |
1691 | goto restore; | 1718 | goto restore; |
@@ -2163,6 +2190,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
2163 | break; | 2190 | break; |
2164 | ret = !(fs_devices->num_devices == fs_devices->total_devices); | 2191 | ret = !(fs_devices->num_devices == fs_devices->total_devices); |
2165 | break; | 2192 | break; |
2193 | case BTRFS_IOC_GET_SUPPORTED_FEATURES: | ||
2194 | ret = btrfs_ioctl_get_supported_features((void __user*)arg); | ||
2195 | break; | ||
2166 | } | 2196 | } |
2167 | 2197 | ||
2168 | kfree(vol); | 2198 | kfree(vol); |
@@ -2261,7 +2291,7 @@ static void btrfs_interface_exit(void) | |||
2261 | misc_deregister(&btrfs_misc); | 2291 | misc_deregister(&btrfs_misc); |
2262 | } | 2292 | } |
2263 | 2293 | ||
2264 | static void btrfs_print_info(void) | 2294 | static void btrfs_print_mod_info(void) |
2265 | { | 2295 | { |
2266 | printk(KERN_INFO "Btrfs loaded" | 2296 | printk(KERN_INFO "Btrfs loaded" |
2267 | #ifdef CONFIG_BTRFS_DEBUG | 2297 | #ifdef CONFIG_BTRFS_DEBUG |
@@ -2363,7 +2393,7 @@ static int __init init_btrfs_fs(void) | |||
2363 | 2393 | ||
2364 | btrfs_init_lockdep(); | 2394 | btrfs_init_lockdep(); |
2365 | 2395 | ||
2366 | btrfs_print_info(); | 2396 | btrfs_print_mod_info(); |
2367 | 2397 | ||
2368 | err = btrfs_run_sanity_tests(); | 2398 | err = btrfs_run_sanity_tests(); |
2369 | if (err) | 2399 | if (err) |
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 1c76d73e06dc..f54bf450bad3 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c | |||
@@ -188,12 +188,6 @@ btrfs_alloc_dummy_block_group(unsigned long length) | |||
188 | kfree(cache); | 188 | kfree(cache); |
189 | return NULL; | 189 | return NULL; |
190 | } | 190 | } |
191 | cache->fs_info = btrfs_alloc_dummy_fs_info(); | ||
192 | if (!cache->fs_info) { | ||
193 | kfree(cache->free_space_ctl); | ||
194 | kfree(cache); | ||
195 | return NULL; | ||
196 | } | ||
197 | 191 | ||
198 | cache->key.objectid = 0; | 192 | cache->key.objectid = 0; |
199 | cache->key.offset = length; | 193 | cache->key.offset = length; |
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index d05fe1ab4808..7cea4462acd5 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c | |||
@@ -485,6 +485,7 @@ static int run_test(test_func_t test_func, int bitmaps) | |||
485 | cache->bitmap_low_thresh = 0; | 485 | cache->bitmap_low_thresh = 0; |
486 | cache->bitmap_high_thresh = (u32)-1; | 486 | cache->bitmap_high_thresh = (u32)-1; |
487 | cache->needs_free_space = 1; | 487 | cache->needs_free_space = 1; |
488 | cache->fs_info = root->fs_info; | ||
488 | 489 | ||
489 | btrfs_init_dummy_trans(&trans); | 490 | btrfs_init_dummy_trans(&trans); |
490 | 491 | ||
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index e2d3da02deee..863a6a3af1f8 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "../disk-io.h" | 22 | #include "../disk-io.h" |
23 | #include "../extent_io.h" | 23 | #include "../extent_io.h" |
24 | #include "../volumes.h" | 24 | #include "../volumes.h" |
25 | #include "../compression.h" | ||
25 | 26 | ||
26 | static void insert_extent(struct btrfs_root *root, u64 start, u64 len, | 27 | static void insert_extent(struct btrfs_root *root, u64 start, u64 len, |
27 | u64 ram_bytes, u64 offset, u64 disk_bytenr, | 28 | u64 ram_bytes, u64 offset, u64 disk_bytenr, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b6031ce474f7..43885e51b882 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -637,6 +637,8 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( | |||
637 | 637 | ||
638 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 638 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
639 | trans->bytes_reserved = num_bytes; | 639 | trans->bytes_reserved = num_bytes; |
640 | trace_btrfs_space_reservation(root->fs_info, "transaction", | ||
641 | trans->transid, num_bytes, 1); | ||
640 | 642 | ||
641 | return trans; | 643 | return trans; |
642 | } | 644 | } |
@@ -1333,7 +1335,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1333 | struct dentry *dentry; | 1335 | struct dentry *dentry; |
1334 | struct extent_buffer *tmp; | 1336 | struct extent_buffer *tmp; |
1335 | struct extent_buffer *old; | 1337 | struct extent_buffer *old; |
1336 | struct timespec cur_time = CURRENT_TIME; | 1338 | struct timespec cur_time; |
1337 | int ret = 0; | 1339 | int ret = 0; |
1338 | u64 to_reserve = 0; | 1340 | u64 to_reserve = 0; |
1339 | u64 index = 0; | 1341 | u64 index = 0; |
@@ -1375,12 +1377,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1375 | rsv = trans->block_rsv; | 1377 | rsv = trans->block_rsv; |
1376 | trans->block_rsv = &pending->block_rsv; | 1378 | trans->block_rsv = &pending->block_rsv; |
1377 | trans->bytes_reserved = trans->block_rsv->reserved; | 1379 | trans->bytes_reserved = trans->block_rsv->reserved; |
1378 | 1380 | trace_btrfs_space_reservation(root->fs_info, "transaction", | |
1381 | trans->transid, | ||
1382 | trans->bytes_reserved, 1); | ||
1379 | dentry = pending->dentry; | 1383 | dentry = pending->dentry; |
1380 | parent_inode = pending->dir; | 1384 | parent_inode = pending->dir; |
1381 | parent_root = BTRFS_I(parent_inode)->root; | 1385 | parent_root = BTRFS_I(parent_inode)->root; |
1382 | record_root_in_trans(trans, parent_root); | 1386 | record_root_in_trans(trans, parent_root); |
1383 | 1387 | ||
1388 | cur_time = current_fs_time(parent_inode->i_sb); | ||
1389 | |||
1384 | /* | 1390 | /* |
1385 | * insert the directory item | 1391 | * insert the directory item |
1386 | */ | 1392 | */ |
@@ -1523,7 +1529,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1523 | 1529 | ||
1524 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | 1530 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
1525 | dentry->d_name.len * 2); | 1531 | dentry->d_name.len * 2); |
1526 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | 1532 | parent_inode->i_mtime = parent_inode->i_ctime = |
1533 | current_fs_time(parent_inode->i_sb); | ||
1527 | ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); | 1534 | ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); |
1528 | if (ret) { | 1535 | if (ret) { |
1529 | btrfs_abort_transaction(trans, root, ret); | 1536 | btrfs_abort_transaction(trans, root, ret); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 978c3a810893..24d03c751149 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include "print-tree.h" | 26 | #include "print-tree.h" |
27 | #include "backref.h" | 27 | #include "backref.h" |
28 | #include "hash.h" | 28 | #include "hash.h" |
29 | #include "compression.h" | ||
29 | 30 | ||
30 | /* magic values for the inode_only field in btrfs_log_inode: | 31 | /* magic values for the inode_only field in btrfs_log_inode: |
31 | * | 32 | * |
@@ -1045,7 +1046,7 @@ again: | |||
1045 | 1046 | ||
1046 | /* | 1047 | /* |
1047 | * NOTE: we have searched root tree and checked the | 1048 | * NOTE: we have searched root tree and checked the |
1048 | * coresponding ref, it does not need to check again. | 1049 | * corresponding ref, it does not need to check again. |
1049 | */ | 1050 | */ |
1050 | *search_done = 1; | 1051 | *search_done = 1; |
1051 | } | 1052 | } |
@@ -4500,7 +4501,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4500 | 4501 | ||
4501 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 4502 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
4502 | 4503 | ||
4503 | btrfs_get_logged_extents(inode, &logged_list, start, end); | 4504 | /* |
4505 | * Collect ordered extents only if we are logging data. This is to | ||
4506 | * ensure a subsequent request to log this inode in LOG_INODE_ALL mode | ||
4507 | * will process the ordered extents if they still exists at the time, | ||
4508 | * because when we collect them we test and set for the flag | ||
4509 | * BTRFS_ORDERED_LOGGED to prevent multiple log requests to process the | ||
4510 | * same ordered extents. The consequence for the LOG_INODE_ALL log mode | ||
4511 | * not processing the ordered extents is that we end up logging the | ||
4512 | * corresponding file extent items, based on the extent maps in the | ||
4513 | * inode's extent_map_tree's modified_list, without logging the | ||
4514 | * respective checksums (since the may still be only attached to the | ||
4515 | * ordered extents and have not been inserted in the csum tree by | ||
4516 | * btrfs_finish_ordered_io() yet). | ||
4517 | */ | ||
4518 | if (inode_only == LOG_INODE_ALL) | ||
4519 | btrfs_get_logged_extents(inode, &logged_list, start, end); | ||
4504 | 4520 | ||
4505 | /* | 4521 | /* |
4506 | * a brute force approach to making sure we get the most uptodate | 4522 | * a brute force approach to making sure we get the most uptodate |
@@ -4772,6 +4788,42 @@ out_unlock: | |||
4772 | } | 4788 | } |
4773 | 4789 | ||
4774 | /* | 4790 | /* |
4791 | * Check if we must fallback to a transaction commit when logging an inode. | ||
4792 | * This must be called after logging the inode and is used only in the context | ||
4793 | * when fsyncing an inode requires the need to log some other inode - in which | ||
4794 | * case we can't lock the i_mutex of each other inode we need to log as that | ||
4795 | * can lead to deadlocks with concurrent fsync against other inodes (as we can | ||
4796 | * log inodes up or down in the hierarchy) or rename operations for example. So | ||
4797 | * we take the log_mutex of the inode after we have logged it and then check for | ||
4798 | * its last_unlink_trans value - this is safe because any task setting | ||
4799 | * last_unlink_trans must take the log_mutex and it must do this before it does | ||
4800 | * the actual unlink operation, so if we do this check before a concurrent task | ||
4801 | * sets last_unlink_trans it means we've logged a consistent version/state of | ||
4802 | * all the inode items, otherwise we are not sure and must do a transaction | ||
4803 | * commit (the concurrent task migth have only updated last_unlink_trans before | ||
4804 | * we logged the inode or it might have also done the unlink). | ||
4805 | */ | ||
4806 | static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans, | ||
4807 | struct inode *inode) | ||
4808 | { | ||
4809 | struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; | ||
4810 | bool ret = false; | ||
4811 | |||
4812 | mutex_lock(&BTRFS_I(inode)->log_mutex); | ||
4813 | if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) { | ||
4814 | /* | ||
4815 | * Make sure any commits to the log are forced to be full | ||
4816 | * commits. | ||
4817 | */ | ||
4818 | btrfs_set_log_full_commit(fs_info, trans); | ||
4819 | ret = true; | ||
4820 | } | ||
4821 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | ||
4822 | |||
4823 | return ret; | ||
4824 | } | ||
4825 | |||
4826 | /* | ||
4775 | * follow the dentry parent pointers up the chain and see if any | 4827 | * follow the dentry parent pointers up the chain and see if any |
4776 | * of the directories in it require a full commit before they can | 4828 | * of the directories in it require a full commit before they can |
4777 | * be logged. Returns zero if nothing special needs to be done or 1 if | 4829 | * be logged. Returns zero if nothing special needs to be done or 1 if |
@@ -4784,7 +4836,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
4784 | u64 last_committed) | 4836 | u64 last_committed) |
4785 | { | 4837 | { |
4786 | int ret = 0; | 4838 | int ret = 0; |
4787 | struct btrfs_root *root; | ||
4788 | struct dentry *old_parent = NULL; | 4839 | struct dentry *old_parent = NULL; |
4789 | struct inode *orig_inode = inode; | 4840 | struct inode *orig_inode = inode; |
4790 | 4841 | ||
@@ -4816,14 +4867,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
4816 | BTRFS_I(inode)->logged_trans = trans->transid; | 4867 | BTRFS_I(inode)->logged_trans = trans->transid; |
4817 | smp_mb(); | 4868 | smp_mb(); |
4818 | 4869 | ||
4819 | if (BTRFS_I(inode)->last_unlink_trans > last_committed) { | 4870 | if (btrfs_must_commit_transaction(trans, inode)) { |
4820 | root = BTRFS_I(inode)->root; | ||
4821 | |||
4822 | /* | ||
4823 | * make sure any commits to the log are forced | ||
4824 | * to be full commits | ||
4825 | */ | ||
4826 | btrfs_set_log_full_commit(root->fs_info, trans); | ||
4827 | ret = 1; | 4871 | ret = 1; |
4828 | break; | 4872 | break; |
4829 | } | 4873 | } |
@@ -4982,6 +5026,9 @@ process_leaf: | |||
4982 | btrfs_release_path(path); | 5026 | btrfs_release_path(path); |
4983 | ret = btrfs_log_inode(trans, root, di_inode, | 5027 | ret = btrfs_log_inode(trans, root, di_inode, |
4984 | log_mode, 0, LLONG_MAX, ctx); | 5028 | log_mode, 0, LLONG_MAX, ctx); |
5029 | if (!ret && | ||
5030 | btrfs_must_commit_transaction(trans, di_inode)) | ||
5031 | ret = 1; | ||
4985 | iput(di_inode); | 5032 | iput(di_inode); |
4986 | if (ret) | 5033 | if (ret) |
4987 | goto next_dir_inode; | 5034 | goto next_dir_inode; |
@@ -5096,6 +5143,9 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, | |||
5096 | 5143 | ||
5097 | ret = btrfs_log_inode(trans, root, dir_inode, | 5144 | ret = btrfs_log_inode(trans, root, dir_inode, |
5098 | LOG_INODE_ALL, 0, LLONG_MAX, ctx); | 5145 | LOG_INODE_ALL, 0, LLONG_MAX, ctx); |
5146 | if (!ret && | ||
5147 | btrfs_must_commit_transaction(trans, dir_inode)) | ||
5148 | ret = 1; | ||
5099 | iput(dir_inode); | 5149 | iput(dir_inode); |
5100 | if (ret) | 5150 | if (ret) |
5101 | goto out; | 5151 | goto out; |
@@ -5447,6 +5497,9 @@ error: | |||
5447 | * They revolve around files there were unlinked from the directory, and | 5497 | * They revolve around files there were unlinked from the directory, and |
5448 | * this function updates the parent directory so that a full commit is | 5498 | * this function updates the parent directory so that a full commit is |
5449 | * properly done if it is fsync'd later after the unlinks are done. | 5499 | * properly done if it is fsync'd later after the unlinks are done. |
5500 | * | ||
5501 | * Must be called before the unlink operations (updates to the subvolume tree, | ||
5502 | * inodes, etc) are done. | ||
5450 | */ | 5503 | */ |
5451 | void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | 5504 | void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, |
5452 | struct inode *dir, struct inode *inode, | 5505 | struct inode *dir, struct inode *inode, |
@@ -5462,8 +5515,11 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | |||
5462 | * into the file. When the file is logged we check it and | 5515 | * into the file. When the file is logged we check it and |
5463 | * don't log the parents if the file is fully on disk. | 5516 | * don't log the parents if the file is fully on disk. |
5464 | */ | 5517 | */ |
5465 | if (S_ISREG(inode->i_mode)) | 5518 | if (S_ISREG(inode->i_mode)) { |
5519 | mutex_lock(&BTRFS_I(inode)->log_mutex); | ||
5466 | BTRFS_I(inode)->last_unlink_trans = trans->transid; | 5520 | BTRFS_I(inode)->last_unlink_trans = trans->transid; |
5521 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | ||
5522 | } | ||
5467 | 5523 | ||
5468 | /* | 5524 | /* |
5469 | * if this directory was already logged any new | 5525 | * if this directory was already logged any new |
@@ -5494,7 +5550,29 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | |||
5494 | return; | 5550 | return; |
5495 | 5551 | ||
5496 | record: | 5552 | record: |
5553 | mutex_lock(&BTRFS_I(dir)->log_mutex); | ||
5554 | BTRFS_I(dir)->last_unlink_trans = trans->transid; | ||
5555 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | ||
5556 | } | ||
5557 | |||
5558 | /* | ||
5559 | * Make sure that if someone attempts to fsync the parent directory of a deleted | ||
5560 | * snapshot, it ends up triggering a transaction commit. This is to guarantee | ||
5561 | * that after replaying the log tree of the parent directory's root we will not | ||
5562 | * see the snapshot anymore and at log replay time we will not see any log tree | ||
5563 | * corresponding to the deleted snapshot's root, which could lead to replaying | ||
5564 | * it after replaying the log tree of the parent directory (which would replay | ||
5565 | * the snapshot delete operation). | ||
5566 | * | ||
5567 | * Must be called before the actual snapshot destroy operation (updates to the | ||
5568 | * parent root and tree of tree roots trees, etc) are done. | ||
5569 | */ | ||
5570 | void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, | ||
5571 | struct inode *dir) | ||
5572 | { | ||
5573 | mutex_lock(&BTRFS_I(dir)->log_mutex); | ||
5497 | BTRFS_I(dir)->last_unlink_trans = trans->transid; | 5574 | BTRFS_I(dir)->last_unlink_trans = trans->transid; |
5575 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | ||
5498 | } | 5576 | } |
5499 | 5577 | ||
5500 | /* | 5578 | /* |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 6916a781ea02..a9f1b75d080d 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -79,6 +79,8 @@ int btrfs_pin_log_trans(struct btrfs_root *root); | |||
79 | void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | 79 | void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, |
80 | struct inode *dir, struct inode *inode, | 80 | struct inode *dir, struct inode *inode, |
81 | int for_rename); | 81 | int for_rename); |
82 | void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, | ||
83 | struct inode *dir); | ||
82 | int btrfs_log_new_name(struct btrfs_trans_handle *trans, | 84 | int btrfs_log_new_name(struct btrfs_trans_handle *trans, |
83 | struct inode *inode, struct inode *old_dir, | 85 | struct inode *inode, struct inode *old_dir, |
84 | struct dentry *parent); | 86 | struct dentry *parent); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 366b335946fa..e2b54d546b7c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -138,7 +138,7 @@ static struct btrfs_fs_devices *__alloc_fs_devices(void) | |||
138 | { | 138 | { |
139 | struct btrfs_fs_devices *fs_devs; | 139 | struct btrfs_fs_devices *fs_devs; |
140 | 140 | ||
141 | fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS); | 141 | fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL); |
142 | if (!fs_devs) | 142 | if (!fs_devs) |
143 | return ERR_PTR(-ENOMEM); | 143 | return ERR_PTR(-ENOMEM); |
144 | 144 | ||
@@ -220,7 +220,7 @@ static struct btrfs_device *__alloc_device(void) | |||
220 | { | 220 | { |
221 | struct btrfs_device *dev; | 221 | struct btrfs_device *dev; |
222 | 222 | ||
223 | dev = kzalloc(sizeof(*dev), GFP_NOFS); | 223 | dev = kzalloc(sizeof(*dev), GFP_KERNEL); |
224 | if (!dev) | 224 | if (!dev) |
225 | return ERR_PTR(-ENOMEM); | 225 | return ERR_PTR(-ENOMEM); |
226 | 226 | ||
@@ -733,7 +733,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
733 | * uuid mutex so nothing we touch in here is going to disappear. | 733 | * uuid mutex so nothing we touch in here is going to disappear. |
734 | */ | 734 | */ |
735 | if (orig_dev->name) { | 735 | if (orig_dev->name) { |
736 | name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); | 736 | name = rcu_string_strdup(orig_dev->name->str, |
737 | GFP_KERNEL); | ||
737 | if (!name) { | 738 | if (!name) { |
738 | kfree(device); | 739 | kfree(device); |
739 | goto error; | 740 | goto error; |
@@ -1714,12 +1715,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1714 | } while (read_seqretry(&root->fs_info->profiles_lock, seq)); | 1715 | } while (read_seqretry(&root->fs_info->profiles_lock, seq)); |
1715 | 1716 | ||
1716 | num_devices = root->fs_info->fs_devices->num_devices; | 1717 | num_devices = root->fs_info->fs_devices->num_devices; |
1717 | btrfs_dev_replace_lock(&root->fs_info->dev_replace); | 1718 | btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0); |
1718 | if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) { | 1719 | if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) { |
1719 | WARN_ON(num_devices < 1); | 1720 | WARN_ON(num_devices < 1); |
1720 | num_devices--; | 1721 | num_devices--; |
1721 | } | 1722 | } |
1722 | btrfs_dev_replace_unlock(&root->fs_info->dev_replace); | 1723 | btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0); |
1723 | 1724 | ||
1724 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { | 1725 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { |
1725 | ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET; | 1726 | ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET; |
@@ -2287,7 +2288,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2287 | goto error; | 2288 | goto error; |
2288 | } | 2289 | } |
2289 | 2290 | ||
2290 | name = rcu_string_strdup(device_path, GFP_NOFS); | 2291 | name = rcu_string_strdup(device_path, GFP_KERNEL); |
2291 | if (!name) { | 2292 | if (!name) { |
2292 | kfree(device); | 2293 | kfree(device); |
2293 | ret = -ENOMEM; | 2294 | ret = -ENOMEM; |
@@ -2748,7 +2749,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, | |||
2748 | em->start + em->len < chunk_offset) { | 2749 | em->start + em->len < chunk_offset) { |
2749 | /* | 2750 | /* |
2750 | * This is a logic error, but we don't want to just rely on the | 2751 | * This is a logic error, but we don't want to just rely on the |
2751 | * user having built with ASSERT enabled, so if ASSERT doens't | 2752 | * user having built with ASSERT enabled, so if ASSERT doesn't |
2752 | * do anything we still error out. | 2753 | * do anything we still error out. |
2753 | */ | 2754 | */ |
2754 | ASSERT(0); | 2755 | ASSERT(0); |
@@ -2966,7 +2967,7 @@ static int insert_balance_item(struct btrfs_root *root, | |||
2966 | } | 2967 | } |
2967 | 2968 | ||
2968 | key.objectid = BTRFS_BALANCE_OBJECTID; | 2969 | key.objectid = BTRFS_BALANCE_OBJECTID; |
2969 | key.type = BTRFS_BALANCE_ITEM_KEY; | 2970 | key.type = BTRFS_TEMPORARY_ITEM_KEY; |
2970 | key.offset = 0; | 2971 | key.offset = 0; |
2971 | 2972 | ||
2972 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 2973 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
@@ -3015,7 +3016,7 @@ static int del_balance_item(struct btrfs_root *root) | |||
3015 | } | 3016 | } |
3016 | 3017 | ||
3017 | key.objectid = BTRFS_BALANCE_OBJECTID; | 3018 | key.objectid = BTRFS_BALANCE_OBJECTID; |
3018 | key.type = BTRFS_BALANCE_ITEM_KEY; | 3019 | key.type = BTRFS_TEMPORARY_ITEM_KEY; |
3019 | key.offset = 0; | 3020 | key.offset = 0; |
3020 | 3021 | ||
3021 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 3022 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
@@ -3686,12 +3687,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
3686 | } | 3687 | } |
3687 | 3688 | ||
3688 | num_devices = fs_info->fs_devices->num_devices; | 3689 | num_devices = fs_info->fs_devices->num_devices; |
3689 | btrfs_dev_replace_lock(&fs_info->dev_replace); | 3690 | btrfs_dev_replace_lock(&fs_info->dev_replace, 0); |
3690 | if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { | 3691 | if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { |
3691 | BUG_ON(num_devices < 1); | 3692 | BUG_ON(num_devices < 1); |
3692 | num_devices--; | 3693 | num_devices--; |
3693 | } | 3694 | } |
3694 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | 3695 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
3695 | allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; | 3696 | allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; |
3696 | if (num_devices == 1) | 3697 | if (num_devices == 1) |
3697 | allowed |= BTRFS_BLOCK_GROUP_DUP; | 3698 | allowed |= BTRFS_BLOCK_GROUP_DUP; |
@@ -3867,7 +3868,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info) | |||
3867 | return -ENOMEM; | 3868 | return -ENOMEM; |
3868 | 3869 | ||
3869 | key.objectid = BTRFS_BALANCE_OBJECTID; | 3870 | key.objectid = BTRFS_BALANCE_OBJECTID; |
3870 | key.type = BTRFS_BALANCE_ITEM_KEY; | 3871 | key.type = BTRFS_TEMPORARY_ITEM_KEY; |
3871 | key.offset = 0; | 3872 | key.offset = 0; |
3872 | 3873 | ||
3873 | ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); | 3874 | ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); |
@@ -4118,7 +4119,7 @@ out: | |||
4118 | * Callback for btrfs_uuid_tree_iterate(). | 4119 | * Callback for btrfs_uuid_tree_iterate(). |
4119 | * returns: | 4120 | * returns: |
4120 | * 0 check succeeded, the entry is not outdated. | 4121 | * 0 check succeeded, the entry is not outdated. |
4121 | * < 0 if an error occured. | 4122 | * < 0 if an error occurred. |
4122 | * > 0 if the check failed, which means the caller shall remove the entry. | 4123 | * > 0 if the check failed, which means the caller shall remove the entry. |
4123 | */ | 4124 | */ |
4124 | static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info, | 4125 | static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info, |
@@ -5062,10 +5063,10 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) | |||
5062 | ret = 1; | 5063 | ret = 1; |
5063 | free_extent_map(em); | 5064 | free_extent_map(em); |
5064 | 5065 | ||
5065 | btrfs_dev_replace_lock(&fs_info->dev_replace); | 5066 | btrfs_dev_replace_lock(&fs_info->dev_replace, 0); |
5066 | if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) | 5067 | if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) |
5067 | ret++; | 5068 | ret++; |
5068 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | 5069 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
5069 | 5070 | ||
5070 | return ret; | 5071 | return ret; |
5071 | } | 5072 | } |
@@ -5325,10 +5326,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5325 | if (!bbio_ret) | 5326 | if (!bbio_ret) |
5326 | goto out; | 5327 | goto out; |
5327 | 5328 | ||
5328 | btrfs_dev_replace_lock(dev_replace); | 5329 | btrfs_dev_replace_lock(dev_replace, 0); |
5329 | dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); | 5330 | dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); |
5330 | if (!dev_replace_is_ongoing) | 5331 | if (!dev_replace_is_ongoing) |
5331 | btrfs_dev_replace_unlock(dev_replace); | 5332 | btrfs_dev_replace_unlock(dev_replace, 0); |
5333 | else | ||
5334 | btrfs_dev_replace_set_lock_blocking(dev_replace); | ||
5332 | 5335 | ||
5333 | if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && | 5336 | if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && |
5334 | !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) && | 5337 | !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) && |
@@ -5751,8 +5754,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5751 | bbio->mirror_num = map->num_stripes + 1; | 5754 | bbio->mirror_num = map->num_stripes + 1; |
5752 | } | 5755 | } |
5753 | out: | 5756 | out: |
5754 | if (dev_replace_is_ongoing) | 5757 | if (dev_replace_is_ongoing) { |
5755 | btrfs_dev_replace_unlock(dev_replace); | 5758 | btrfs_dev_replace_clear_lock_blocking(dev_replace); |
5759 | btrfs_dev_replace_unlock(dev_replace, 0); | ||
5760 | } | ||
5756 | free_extent_map(em); | 5761 | free_extent_map(em); |
5757 | return ret; | 5762 | return ret; |
5758 | } | 5763 | } |
@@ -6705,8 +6710,8 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) | |||
6705 | int item_size; | 6710 | int item_size; |
6706 | struct btrfs_dev_stats_item *ptr; | 6711 | struct btrfs_dev_stats_item *ptr; |
6707 | 6712 | ||
6708 | key.objectid = 0; | 6713 | key.objectid = BTRFS_DEV_STATS_OBJECTID; |
6709 | key.type = BTRFS_DEV_STATS_KEY; | 6714 | key.type = BTRFS_PERSISTENT_ITEM_KEY; |
6710 | key.offset = device->devid; | 6715 | key.offset = device->devid; |
6711 | ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); | 6716 | ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); |
6712 | if (ret) { | 6717 | if (ret) { |
@@ -6753,8 +6758,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, | |||
6753 | int ret; | 6758 | int ret; |
6754 | int i; | 6759 | int i; |
6755 | 6760 | ||
6756 | key.objectid = 0; | 6761 | key.objectid = BTRFS_DEV_STATS_OBJECTID; |
6757 | key.type = BTRFS_DEV_STATS_KEY; | 6762 | key.type = BTRFS_PERSISTENT_ITEM_KEY; |
6758 | key.offset = device->devid; | 6763 | key.offset = device->devid; |
6759 | 6764 | ||
6760 | path = btrfs_alloc_path(); | 6765 | path = btrfs_alloc_path(); |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 6c68d6356197..145d2b89e62d 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -249,7 +249,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
249 | goto out; | 249 | goto out; |
250 | 250 | ||
251 | inode_inc_iversion(inode); | 251 | inode_inc_iversion(inode); |
252 | inode->i_ctime = CURRENT_TIME; | 252 | inode->i_ctime = current_fs_time(inode->i_sb); |
253 | set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); | 253 | set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); |
254 | ret = btrfs_update_inode(trans, root, inode); | 254 | ret = btrfs_update_inode(trans, root, inode); |
255 | BUG_ON(ret); | 255 | BUG_ON(ret); |
@@ -260,16 +260,12 @@ out: | |||
260 | 260 | ||
261 | ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | 261 | ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) |
262 | { | 262 | { |
263 | struct btrfs_key key, found_key; | 263 | struct btrfs_key key; |
264 | struct inode *inode = d_inode(dentry); | 264 | struct inode *inode = d_inode(dentry); |
265 | struct btrfs_root *root = BTRFS_I(inode)->root; | 265 | struct btrfs_root *root = BTRFS_I(inode)->root; |
266 | struct btrfs_path *path; | 266 | struct btrfs_path *path; |
267 | struct extent_buffer *leaf; | 267 | int ret = 0; |
268 | struct btrfs_dir_item *di; | ||
269 | int ret = 0, slot; | ||
270 | size_t total_size = 0, size_left = size; | 268 | size_t total_size = 0, size_left = size; |
271 | unsigned long name_ptr; | ||
272 | size_t name_len; | ||
273 | 269 | ||
274 | /* | 270 | /* |
275 | * ok we want all objects associated with this id. | 271 | * ok we want all objects associated with this id. |
@@ -291,6 +287,13 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
291 | goto err; | 287 | goto err; |
292 | 288 | ||
293 | while (1) { | 289 | while (1) { |
290 | struct extent_buffer *leaf; | ||
291 | int slot; | ||
292 | struct btrfs_dir_item *di; | ||
293 | struct btrfs_key found_key; | ||
294 | u32 item_size; | ||
295 | u32 cur; | ||
296 | |||
294 | leaf = path->nodes[0]; | 297 | leaf = path->nodes[0]; |
295 | slot = path->slots[0]; | 298 | slot = path->slots[0]; |
296 | 299 | ||
@@ -316,31 +319,45 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
316 | if (found_key.type > BTRFS_XATTR_ITEM_KEY) | 319 | if (found_key.type > BTRFS_XATTR_ITEM_KEY) |
317 | break; | 320 | break; |
318 | if (found_key.type < BTRFS_XATTR_ITEM_KEY) | 321 | if (found_key.type < BTRFS_XATTR_ITEM_KEY) |
319 | goto next; | 322 | goto next_item; |
320 | 323 | ||
321 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 324 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
322 | if (verify_dir_item(root, leaf, di)) | 325 | item_size = btrfs_item_size_nr(leaf, slot); |
323 | goto next; | 326 | cur = 0; |
324 | 327 | while (cur < item_size) { | |
325 | name_len = btrfs_dir_name_len(leaf, di); | 328 | u16 name_len = btrfs_dir_name_len(leaf, di); |
326 | total_size += name_len + 1; | 329 | u16 data_len = btrfs_dir_data_len(leaf, di); |
330 | u32 this_len = sizeof(*di) + name_len + data_len; | ||
331 | unsigned long name_ptr = (unsigned long)(di + 1); | ||
332 | |||
333 | if (verify_dir_item(root, leaf, di)) { | ||
334 | ret = -EIO; | ||
335 | goto err; | ||
336 | } | ||
327 | 337 | ||
328 | /* we are just looking for how big our buffer needs to be */ | 338 | total_size += name_len + 1; |
329 | if (!size) | 339 | /* |
330 | goto next; | 340 | * We are just looking for how big our buffer needs to |
341 | * be. | ||
342 | */ | ||
343 | if (!size) | ||
344 | goto next; | ||
331 | 345 | ||
332 | if (!buffer || (name_len + 1) > size_left) { | 346 | if (!buffer || (name_len + 1) > size_left) { |
333 | ret = -ERANGE; | 347 | ret = -ERANGE; |
334 | goto err; | 348 | goto err; |
335 | } | 349 | } |
336 | 350 | ||
337 | name_ptr = (unsigned long)(di + 1); | 351 | read_extent_buffer(leaf, buffer, name_ptr, name_len); |
338 | read_extent_buffer(leaf, buffer, name_ptr, name_len); | 352 | buffer[name_len] = '\0'; |
339 | buffer[name_len] = '\0'; | ||
340 | 353 | ||
341 | size_left -= name_len + 1; | 354 | size_left -= name_len + 1; |
342 | buffer += name_len + 1; | 355 | buffer += name_len + 1; |
343 | next: | 356 | next: |
357 | cur += this_len; | ||
358 | di = (struct btrfs_dir_item *)((char *)di + this_len); | ||
359 | } | ||
360 | next_item: | ||
344 | path->slots[0]++; | 361 | path->slots[0]++; |
345 | } | 362 | } |
346 | ret = total_size; | 363 | ret = total_size; |